aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig18
-rw-r--r--lib/Kconfig.debug257
-rw-r--r--lib/Kconfig.kgdb24
-rw-r--r--lib/Makefile20
-rw-r--r--lib/atomic64.c4
-rw-r--r--lib/atomic64_test.c166
-rw-r--r--lib/average.c61
-rw-r--r--lib/bitmap.c22
-rw-r--r--lib/btree.c798
-rw-r--r--lib/bug.c20
-rw-r--r--lib/cpu-notifier-error-inject.c63
-rw-r--r--lib/cpumask.c1
-rw-r--r--lib/crc32.c55
-rw-r--r--lib/debug_locks.c3
-rw-r--r--lib/debugobjects.c64
-rw-r--r--lib/decompress.c5
-rw-r--r--lib/decompress_bunzip2.c18
-rw-r--r--lib/decompress_inflate.c23
-rw-r--r--lib/decompress_unlzma.c85
-rw-r--r--lib/decompress_unlzo.c127
-rw-r--r--lib/decompress_unxz.c397
-rw-r--r--lib/devres.c3
-rw-r--r--lib/div64.c52
-rw-r--r--lib/dma-debug.c5
-rw-r--r--lib/dynamic_debug.c148
-rw-r--r--lib/flex_array.c37
-rw-r--r--lib/gen_crc32table.c47
-rw-r--r--lib/genalloc.c2
-rw-r--r--lib/hexdump.c72
-rw-r--r--lib/hweight.c26
-rw-r--r--lib/idr.c101
-rw-r--r--lib/inflate.c3
-rw-r--r--lib/iommu-helper.c9
-rw-r--r--lib/ioremap.c12
-rw-r--r--lib/kasprintf.c1
-rw-r--r--lib/kobject.c160
-rw-r--r--lib/kobject_uevent.c119
-rw-r--r--lib/kref.c46
-rw-r--r--lib/lcm.c15
-rw-r--r--lib/list_debug.c6
-rw-r--r--lib/list_sort.c335
-rw-r--r--lib/lmb.c532
-rw-r--r--lib/nlattr.c24
-rw-r--r--lib/parser.c7
-rw-r--r--lib/percpu_counter.c88
-rw-r--r--lib/radix-tree.c254
-rw-r--r--lib/raid6/.gitignore4
-rw-r--r--lib/raid6/Makefile75
-rw-r--r--lib/raid6/algos.c154
-rw-r--r--lib/raid6/altivec.uc130
-rw-r--r--lib/raid6/int.uc117
-rw-r--r--lib/raid6/mktables.c132
-rw-r--r--lib/raid6/mmx.c142
-rw-r--r--lib/raid6/recov.c132
-rw-r--r--lib/raid6/sse1.c162
-rw-r--r--lib/raid6/sse2.c262
-rw-r--r--lib/raid6/test/Makefile72
-rw-r--r--lib/raid6/test/test.c124
-rw-r--r--lib/raid6/unroll.awk20
-rw-r--r--lib/raid6/x86.h61
-rw-r--r--lib/random32.c40
-rw-r--r--lib/ratelimit.c11
-rw-r--r--lib/rbtree.c68
-rw-r--r--lib/rwsem-spinlock.c14
-rw-r--r--lib/rwsem.c153
-rw-r--r--lib/scatterlist.c38
-rw-r--r--lib/show_mem.c14
-rw-r--r--lib/string.c40
-rw-r--r--lib/swiotlb.c187
-rw-r--r--lib/textsearch.c1
-rw-r--r--lib/timerqueue.c107
-rw-r--r--lib/uuid.c53
-rw-r--r--lib/vsprintf.c268
-rw-r--r--lib/xz/Kconfig59
-rw-r--r--lib/xz/Makefile5
-rw-r--r--lib/xz/xz_crc32.c59
-rw-r--r--lib/xz/xz_dec_bcj.c561
-rw-r--r--lib/xz/xz_dec_lzma2.c1171
-rw-r--r--lib/xz/xz_dec_stream.c821
-rw-r--r--lib/xz/xz_dec_syms.c26
-rw-r--r--lib/xz/xz_dec_test.c220
-rw-r--r--lib/xz/xz_lzma2.h204
-rw-r--r--lib/xz/xz_private.h156
-rw-r--r--lib/xz/xz_stream.h62
-rw-r--r--lib/zlib_inflate/inffast.c72
85 files changed, 8913 insertions, 1419 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 97b136ff117e..0ee67e08ad3e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -7,6 +7,9 @@ config BINARY_PRINTF
7 7
8menu "Library routines" 8menu "Library routines"
9 9
10config RAID6_PQ
11 tristate
12
10config BITREVERSE 13config BITREVERSE
11 tristate 14 tristate
12 15
@@ -103,6 +106,8 @@ config LZO_COMPRESS
103config LZO_DECOMPRESS 106config LZO_DECOMPRESS
104 tristate 107 tristate
105 108
109source "lib/xz/Kconfig"
110
106# 111#
107# These all provide a common interface (hence the apparent duplication with 112# These all provide a common interface (hence the apparent duplication with
108# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.) 113# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.)
@@ -117,6 +122,10 @@ config DECOMPRESS_BZIP2
117config DECOMPRESS_LZMA 122config DECOMPRESS_LZMA
118 tristate 123 tristate
119 124
125config DECOMPRESS_XZ
126 select XZ_DEC
127 tristate
128
120config DECOMPRESS_LZO 129config DECOMPRESS_LZO
121 select LZO_DECOMPRESS 130 select LZO_DECOMPRESS
122 tristate 131 tristate
@@ -160,6 +169,9 @@ config TEXTSEARCH_BM
160config TEXTSEARCH_FSM 169config TEXTSEARCH_FSM
161 tristate 170 tristate
162 171
172config BTREE
173 boolean
174
163config HAS_IOMEM 175config HAS_IOMEM
164 boolean 176 boolean
165 depends on !NO_IOMEM 177 depends on !NO_IOMEM
@@ -178,9 +190,6 @@ config HAS_DMA
178config CHECK_SIGNATURE 190config CHECK_SIGNATURE
179 bool 191 bool
180 192
181config HAVE_LMB
182 boolean
183
184config CPUMASK_OFFSTACK 193config CPUMASK_OFFSTACK
185 bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS 194 bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
186 help 195 help
@@ -207,4 +216,7 @@ config GENERIC_ATOMIC64
207config LRU_CACHE 216config LRU_CACHE
208 tristate 217 tristate
209 218
219config AVERAGE
220 bool
221
210endmenu 222endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c54..2d05adb98401 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -76,7 +76,6 @@ config UNUSED_SYMBOLS
76 76
77config DEBUG_FS 77config DEBUG_FS
78 bool "Debug Filesystem" 78 bool "Debug Filesystem"
79 depends on SYSFS
80 help 79 help
81 debugfs is a virtual file system that kernel developers use to put 80 debugfs is a virtual file system that kernel developers use to put
82 debugging files into. Enable this option to be able to read and 81 debugging files into. Enable this option to be able to read and
@@ -103,7 +102,8 @@ config HEADERS_CHECK
103 102
104config DEBUG_SECTION_MISMATCH 103config DEBUG_SECTION_MISMATCH
105 bool "Enable full Section mismatch analysis" 104 bool "Enable full Section mismatch analysis"
106 depends on UNDEFINED 105 depends on UNDEFINED || (BLACKFIN)
106 default y
107 # This option is on purpose disabled for now. 107 # This option is on purpose disabled for now.
108 # It will be enabled when we are down to a reasonable number 108 # It will be enabled when we are down to a reasonable number
109 # of section mismatch warnings (< 10 for an allyesconfig build) 109 # of section mismatch warnings (< 10 for an allyesconfig build)
@@ -151,28 +151,34 @@ config DEBUG_SHIRQ
151 Drivers ought to be able to handle interrupts coming in at those 151 Drivers ought to be able to handle interrupts coming in at those
152 points; some don't and need to be caught. 152 points; some don't and need to be caught.
153 153
154config DETECT_SOFTLOCKUP 154config LOCKUP_DETECTOR
155 bool "Detect Soft Lockups" 155 bool "Detect Hard and Soft Lockups"
156 depends on DEBUG_KERNEL && !S390 156 depends on DEBUG_KERNEL && !S390
157 default y
158 help 157 help
159 Say Y here to enable the kernel to detect "soft lockups", 158 Say Y here to enable the kernel to act as a watchdog to detect
160 which are bugs that cause the kernel to loop in kernel 159 hard and soft lockups.
160
161 Softlockups are bugs that cause the kernel to loop in kernel
161 mode for more than 60 seconds, without giving other tasks a 162 mode for more than 60 seconds, without giving other tasks a
162 chance to run. 163 chance to run. The current stack trace is displayed upon
164 detection and the system will stay locked up.
163 165
164 When a soft-lockup is detected, the kernel will print the 166 Hardlockups are bugs that cause the CPU to loop in kernel mode
165 current stack trace (which you should report), but the 167 for more than 60 seconds, without letting other interrupts have a
166 system will stay locked up. This feature has negligible 168 chance to run. The current stack trace is displayed upon detection
167 overhead. 169 and the system will stay locked up.
170
171 The overhead should be minimal. A periodic hrtimer runs to
172 generate interrupts and kick the watchdog task every 10-12 seconds.
173 An NMI is generated every 60 seconds or so to check for hardlockups.
168 174
169 (Note that "hard lockups" are separate type of bugs that 175config HARDLOCKUP_DETECTOR
170 can be detected via the NMI-watchdog, on platforms that 176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
171 support it.) 177 !ARCH_HAS_NMI_WATCHDOG
172 178
173config BOOTPARAM_SOFTLOCKUP_PANIC 179config BOOTPARAM_SOFTLOCKUP_PANIC
174 bool "Panic (Reboot) On Soft Lockups" 180 bool "Panic (Reboot) On Soft Lockups"
175 depends on DETECT_SOFTLOCKUP 181 depends on LOCKUP_DETECTOR
176 help 182 help
177 Say Y here to enable the kernel to panic on "soft lockups", 183 Say Y here to enable the kernel to panic on "soft lockups",
178 which are bugs that cause the kernel to loop in kernel 184 which are bugs that cause the kernel to loop in kernel
@@ -189,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
189 195
190config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE 196config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
191 int 197 int
192 depends on DETECT_SOFTLOCKUP 198 depends on LOCKUP_DETECTOR
193 range 0 1 199 range 0 1
194 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC 200 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
195 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC 201 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -306,6 +312,20 @@ config DEBUG_OBJECTS_WORK
306 work queue routines to track the life time of work objects and 312 work queue routines to track the life time of work objects and
307 validate the work operations. 313 validate the work operations.
308 314
315config DEBUG_OBJECTS_RCU_HEAD
316 bool "Debug RCU callbacks objects"
317 depends on DEBUG_OBJECTS && PREEMPT
318 help
319 Enable this to turn on debugging of RCU list heads (call_rcu() usage).
320
321config DEBUG_OBJECTS_PERCPU_COUNTER
322 bool "Debug percpu counter objects"
323 depends on DEBUG_OBJECTS
324 help
325 If you say Y here, additional code will be inserted into the
326 percpu counter routines to track the life time of percpu counter
327 objects and validate the percpu counter operations.
328
309config DEBUG_OBJECTS_ENABLE_DEFAULT 329config DEBUG_OBJECTS_ENABLE_DEFAULT
310 int "debug_objects bootup default value (0-1)" 330 int "debug_objects bootup default value (0-1)"
311 range 0 1 331 range 0 1
@@ -342,7 +362,7 @@ config SLUB_DEBUG_ON
342config SLUB_STATS 362config SLUB_STATS
343 default n 363 default n
344 bool "Enable SLUB performance statistics" 364 bool "Enable SLUB performance statistics"
345 depends on SLUB && SLUB_DEBUG && SYSFS 365 depends on SLUB && SYSFS
346 help 366 help
347 SLUB statistics are useful to debug SLUBs allocation behavior in 367 SLUB statistics are useful to debug SLUBs allocation behavior in
348 order find ways to optimize the allocator. This should never be 368 order find ways to optimize the allocator. This should never be
@@ -355,7 +375,7 @@ config SLUB_STATS
355config DEBUG_KMEMLEAK 375config DEBUG_KMEMLEAK
356 bool "Kernel memory leak detector" 376 bool "Kernel memory leak detector"
357 depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \ 377 depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
358 (X86 || ARM || PPC || S390) 378 (X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
359 379
360 select DEBUG_FS if SYSFS 380 select DEBUG_FS if SYSFS
361 select STACKTRACE if STACKTRACE_SUPPORT 381 select STACKTRACE if STACKTRACE_SUPPORT
@@ -399,6 +419,13 @@ config DEBUG_KMEMLEAK_TEST
399 419
400 If unsure, say N. 420 If unsure, say N.
401 421
422config DEBUG_KMEMLEAK_DEFAULT_OFF
423 bool "Default kmemleak to off"
424 depends on DEBUG_KMEMLEAK
425 help
426 Say Y here to disable kmemleak by default. It can then be enabled
427 on the command line via kmemleak=on.
428
402config DEBUG_PREEMPT 429config DEBUG_PREEMPT
403 bool "Debug preemptible kernel" 430 bool "Debug preemptible kernel"
404 depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT 431 depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
@@ -443,6 +470,15 @@ config DEBUG_MUTEXES
443 This feature allows mutex semantics violations to be detected and 470 This feature allows mutex semantics violations to be detected and
444 reported. 471 reported.
445 472
473config BKL
474 bool "Big Kernel Lock" if (SMP || PREEMPT)
475 default y
476 help
477 This is the traditional lock that is used in old code instead
478 of proper locking. All drivers that use the BKL should depend
479 on this symbol.
480 Say Y here unless you are working on removing the BKL.
481
446config DEBUG_LOCK_ALLOC 482config DEBUG_LOCK_ALLOC
447 bool "Lock debugging: detect incorrect freeing of live locks" 483 bool "Lock debugging: detect incorrect freeing of live locks"
448 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 484 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -464,6 +500,7 @@ config PROVE_LOCKING
464 select DEBUG_SPINLOCK 500 select DEBUG_SPINLOCK
465 select DEBUG_MUTEXES 501 select DEBUG_MUTEXES
466 select DEBUG_LOCK_ALLOC 502 select DEBUG_LOCK_ALLOC
503 select TRACE_IRQFLAGS
467 default n 504 default n
468 help 505 help
469 This feature enables the kernel to prove that all locking 506 This feature enables the kernel to prove that all locking
@@ -499,11 +536,52 @@ config PROVE_LOCKING
499 536
500 For more details, see Documentation/lockdep-design.txt. 537 For more details, see Documentation/lockdep-design.txt.
501 538
539config PROVE_RCU
540 bool "RCU debugging: prove RCU correctness"
541 depends on PROVE_LOCKING
542 default n
543 help
544 This feature enables lockdep extensions that check for correct
545 use of RCU APIs. This is currently under development. Say Y
546 if you want to debug RCU usage or help work on the PROVE_RCU
547 feature.
548
549 Say N if you are unsure.
550
551config PROVE_RCU_REPEATEDLY
552 bool "RCU debugging: don't disable PROVE_RCU on first splat"
553 depends on PROVE_RCU
554 default n
555 help
556 By itself, PROVE_RCU will disable checking upon issuing the
557 first warning (or "splat"). This feature prevents such
558 disabling, allowing multiple RCU-lockdep warnings to be printed
559 on a single reboot.
560
561 Say Y to allow multiple RCU-lockdep warnings per boot.
562
563 Say N if you are unsure.
564
565config SPARSE_RCU_POINTER
566 bool "RCU debugging: sparse-based checks for pointer usage"
567 default n
568 help
569 This feature enables the __rcu sparse annotation for
570 RCU-protected pointers. This annotation will cause sparse
571 to flag any non-RCU used of annotated pointers. This can be
572 helpful when debugging RCU usage. Please note that this feature
573 is not intended to enforce code cleanliness; it is instead merely
574 a debugging aid.
575
576 Say Y to make sparse flag questionable use of RCU-protected pointers
577
578 Say N if you are unsure.
579
502config LOCKDEP 580config LOCKDEP
503 bool 581 bool
504 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 582 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
505 select STACKTRACE 583 select STACKTRACE
506 select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 584 select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
507 select KALLSYMS 585 select KALLSYMS
508 select KALLSYMS_ALL 586 select KALLSYMS_ALL
509 587
@@ -520,6 +598,14 @@ config LOCK_STAT
520 598
521 For more details, see Documentation/lockstat.txt 599 For more details, see Documentation/lockstat.txt
522 600
601 This also enables lock events required by "perf lock",
602 subcommand of perf.
603 If you want to use "perf lock", you also need to turn on
604 CONFIG_EVENT_TRACING.
605
606 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
607 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
608
523config DEBUG_LOCKDEP 609config DEBUG_LOCKDEP
524 bool "Lock dependency engine debugging" 610 bool "Lock dependency engine debugging"
525 depends on DEBUG_KERNEL && LOCKDEP 611 depends on DEBUG_KERNEL && LOCKDEP
@@ -529,11 +615,10 @@ config DEBUG_LOCKDEP
529 of more runtime overhead. 615 of more runtime overhead.
530 616
531config TRACE_IRQFLAGS 617config TRACE_IRQFLAGS
532 depends on DEBUG_KERNEL
533 bool 618 bool
534 default y 619 help
535 depends on TRACE_IRQFLAGS_SUPPORT 620 Enables hooks to interrupt enabling and disabling for
536 depends on PROVE_LOCKING 621 either tracing or lock debugging.
537 622
538config DEBUG_SPINLOCK_SLEEP 623config DEBUG_SPINLOCK_SLEEP
539 bool "Spinlock debugging: sleep-inside-spinlock checking" 624 bool "Spinlock debugging: sleep-inside-spinlock checking"
@@ -595,6 +680,19 @@ config DEBUG_INFO
595 680
596 If unsure, say N. 681 If unsure, say N.
597 682
683config DEBUG_INFO_REDUCED
684 bool "Reduce debugging information"
685 depends on DEBUG_INFO
686 help
687 If you say Y here gcc is instructed to generate less debugging
688 information for structure types. This means that tools that
689 need full debugging information (like kgdb or systemtap) won't
690 be happy. But if you merely need debugging information to
691 resolve line numbers there is no loss. Advantage is that
692 build directory object sizes shrink dramatically over a full
693 DEBUG_INFO build and compile times are reduced too.
694 Only works with newer gcc versions.
695
598config DEBUG_VM 696config DEBUG_VM
599 bool "Debug VM" 697 bool "Debug VM"
600 depends on DEBUG_KERNEL 698 depends on DEBUG_KERNEL
@@ -651,6 +749,15 @@ config DEBUG_LIST
651 749
652 If unsure, say N. 750 If unsure, say N.
653 751
752config TEST_LIST_SORT
753 bool "Linked list sorting test"
754 depends on DEBUG_KERNEL
755 help
756 Enable this to turn on 'list_sort()' function test. This test is
757 executed only once during system boot, so affects only boot time.
758
759 If unsure, say N.
760
654config DEBUG_SG 761config DEBUG_SG
655 bool "Debug SG table operations" 762 bool "Debug SG table operations"
656 depends on DEBUG_KERNEL 763 depends on DEBUG_KERNEL
@@ -765,10 +872,46 @@ config RCU_CPU_STALL_DETECTOR
765 CPUs are delaying the current grace period, but only when 872 CPUs are delaying the current grace period, but only when
766 the grace period extends for excessive time periods. 873 the grace period extends for excessive time periods.
767 874
768 Say Y if you want RCU to perform such checks. 875 Say N if you want to disable such checks.
876
877 Say Y if you are unsure.
878
879config RCU_CPU_STALL_TIMEOUT
880 int "RCU CPU stall timeout in seconds"
881 depends on RCU_CPU_STALL_DETECTOR
882 range 3 300
883 default 60
884 help
885 If a given RCU grace period extends more than the specified
886 number of seconds, a CPU stall warning is printed. If the
887 RCU grace period persists, additional CPU stall warnings are
888 printed at more widely spaced intervals.
889
890config RCU_CPU_STALL_DETECTOR_RUNNABLE
891 bool "RCU CPU stall checking starts automatically at boot"
892 depends on RCU_CPU_STALL_DETECTOR
893 default y
894 help
895 If set, start checking for RCU CPU stalls immediately on
896 boot. Otherwise, RCU CPU stall checking must be manually
897 enabled.
898
899 Say Y if you are unsure.
900
901 Say N if you wish to suppress RCU CPU stall checking during boot.
902
903config RCU_CPU_STALL_VERBOSE
904 bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
905 depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
906 default y
907 help
908 This option causes RCU to printk detailed per-task information
909 for any tasks that are stalling the current RCU grace period.
769 910
770 Say N if you are unsure. 911 Say N if you are unsure.
771 912
913 Say Y if you want to enable such checks.
914
772config KPROBES_SANITY_TEST 915config KPROBES_SANITY_TEST
773 bool "Kprobes sanity tests" 916 bool "Kprobes sanity tests"
774 depends on DEBUG_KERNEL 917 depends on DEBUG_KERNEL
@@ -840,8 +983,7 @@ config DEBUG_FORCE_WEAK_PER_CPU
840 983
841config LKDTM 984config LKDTM
842 tristate "Linux Kernel Dump Test Tool Module" 985 tristate "Linux Kernel Dump Test Tool Module"
843 depends on DEBUG_KERNEL 986 depends on DEBUG_FS
844 depends on KPROBES
845 depends on BLOCK 987 depends on BLOCK
846 default n 988 default n
847 help 989 help
@@ -852,7 +994,19 @@ config LKDTM
852 called lkdtm. 994 called lkdtm.
853 995
854 Documentation on how to use the module can be found in 996 Documentation on how to use the module can be found in
855 drivers/misc/lkdtm.c 997 Documentation/fault-injection/provoke-crashes.txt
998
999config CPU_NOTIFIER_ERROR_INJECT
1000 tristate "CPU notifier error injection module"
1001 depends on HOTPLUG_CPU && DEBUG_KERNEL
1002 help
1003 This option provides a kernel module that can be used to test
1004 the error handling of the cpu notifiers
1005
1006 To compile this code as a module, choose M here: the module will
1007 be called cpu-notifier-error-inject.
1008
1009 If unsure, say N.
856 1010
857config FAULT_INJECTION 1011config FAULT_INJECTION
858 bool "Fault-injection framework" 1012 bool "Fault-injection framework"
@@ -881,7 +1035,7 @@ config FAIL_MAKE_REQUEST
881 Provide fault-injection capability for disk IO. 1035 Provide fault-injection capability for disk IO.
882 1036
883config FAIL_IO_TIMEOUT 1037config FAIL_IO_TIMEOUT
884 bool "Faul-injection capability for faking disk interrupts" 1038 bool "Fault-injection capability for faking disk interrupts"
885 depends on FAULT_INJECTION && BLOCK 1039 depends on FAULT_INJECTION && BLOCK
886 help 1040 help
887 Provide fault-injection capability on end IO handling. This 1041 Provide fault-injection capability on end IO handling. This
@@ -902,19 +1056,22 @@ config FAULT_INJECTION_STACKTRACE_FILTER
902 depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT 1056 depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
903 depends on !X86_64 1057 depends on !X86_64
904 select STACKTRACE 1058 select STACKTRACE
905 select FRAME_POINTER if !PPC && !S390 1059 select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE
906 help 1060 help
907 Provide stacktrace filter for fault-injection capabilities 1061 Provide stacktrace filter for fault-injection capabilities
908 1062
909config LATENCYTOP 1063config LATENCYTOP
910 bool "Latency measuring infrastructure" 1064 bool "Latency measuring infrastructure"
911 select FRAME_POINTER if !MIPS && !PPC && !S390 1065 depends on HAVE_LATENCYTOP_SUPPORT
1066 depends on DEBUG_KERNEL
1067 depends on STACKTRACE_SUPPORT
1068 depends on PROC_FS
1069 select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
912 select KALLSYMS 1070 select KALLSYMS
913 select KALLSYMS_ALL 1071 select KALLSYMS_ALL
914 select STACKTRACE 1072 select STACKTRACE
915 select SCHEDSTATS 1073 select SCHEDSTATS
916 select SCHED_DEBUG 1074 select SCHED_DEBUG
917 depends on HAVE_LATENCYTOP_SUPPORT
918 help 1075 help
919 Enable this option if you want to use the LatencyTOP tool 1076 Enable this option if you want to use the LatencyTOP tool
920 to find out which userspace is blocking on what kernel operations. 1077 to find out which userspace is blocking on what kernel operations.
@@ -995,10 +1152,10 @@ config DYNAMIC_DEBUG
995 1152
996 Usage: 1153 Usage:
997 1154
998 Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file, 1155 Dynamic debugging is controlled via the 'dynamic_debug/control' file,
999 which is contained in the 'debugfs' filesystem. Thus, the debugfs 1156 which is contained in the 'debugfs' filesystem. Thus, the debugfs
1000 filesystem must first be mounted before making use of this feature. 1157 filesystem must first be mounted before making use of this feature.
1001 We refer the control file as: <debugfs>/dynamic_debug/ddebug. This 1158 We refer the control file as: <debugfs>/dynamic_debug/control. This
1002 file contains a list of the debug statements that can be enabled. The 1159 file contains a list of the debug statements that can be enabled. The
1003 format for each line of the file is: 1160 format for each line of the file is:
1004 1161
@@ -1013,7 +1170,7 @@ config DYNAMIC_DEBUG
1013 1170
1014 From a live system: 1171 From a live system:
1015 1172
1016 nullarbor:~ # cat <debugfs>/dynamic_debug/ddebug 1173 nullarbor:~ # cat <debugfs>/dynamic_debug/control
1017 # filename:lineno [module]function flags format 1174 # filename:lineno [module]function flags format
1018 fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" 1175 fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012"
1019 fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" 1176 fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012"
@@ -1023,23 +1180,23 @@ config DYNAMIC_DEBUG
1023 1180
1024 // enable the message at line 1603 of file svcsock.c 1181 // enable the message at line 1603 of file svcsock.c
1025 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > 1182 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
1026 <debugfs>/dynamic_debug/ddebug 1183 <debugfs>/dynamic_debug/control
1027 1184
1028 // enable all the messages in file svcsock.c 1185 // enable all the messages in file svcsock.c
1029 nullarbor:~ # echo -n 'file svcsock.c +p' > 1186 nullarbor:~ # echo -n 'file svcsock.c +p' >
1030 <debugfs>/dynamic_debug/ddebug 1187 <debugfs>/dynamic_debug/control
1031 1188
1032 // enable all the messages in the NFS server module 1189 // enable all the messages in the NFS server module
1033 nullarbor:~ # echo -n 'module nfsd +p' > 1190 nullarbor:~ # echo -n 'module nfsd +p' >
1034 <debugfs>/dynamic_debug/ddebug 1191 <debugfs>/dynamic_debug/control
1035 1192
1036 // enable all 12 messages in the function svc_process() 1193 // enable all 12 messages in the function svc_process()
1037 nullarbor:~ # echo -n 'func svc_process +p' > 1194 nullarbor:~ # echo -n 'func svc_process +p' >
1038 <debugfs>/dynamic_debug/ddebug 1195 <debugfs>/dynamic_debug/control
1039 1196
1040 // disable all 12 messages in the function svc_process() 1197 // disable all 12 messages in the function svc_process()
1041 nullarbor:~ # echo -n 'func svc_process -p' > 1198 nullarbor:~ # echo -n 'func svc_process -p' >
1042 <debugfs>/dynamic_debug/ddebug 1199 <debugfs>/dynamic_debug/control
1043 1200
1044 See Documentation/dynamic-debug-howto.txt for additional information. 1201 See Documentation/dynamic-debug-howto.txt for additional information.
1045 1202
@@ -1054,6 +1211,26 @@ config DMA_API_DEBUG
1054 This option causes a performance degredation. Use only if you want 1211 This option causes a performance degredation. Use only if you want
1055 to debug device drivers. If unsure, say N. 1212 to debug device drivers. If unsure, say N.
1056 1213
1214config ATOMIC64_SELFTEST
1215 bool "Perform an atomic64_t self-test at boot"
1216 help
1217 Enable this option to test the atomic64_t functions at boot.
1218
1219 If unsure, say N.
1220
1221config ASYNC_RAID6_TEST
1222 tristate "Self test for hardware accelerated raid6 recovery"
1223 depends on ASYNC_RAID6_RECOV
1224 select ASYNC_MEMCPY
1225 ---help---
1226 This is a one-shot self test that permutes through the
1227 recovery of all the possible two disk failure scenarios for a
1228 N-disk array. Recovery is performed with the asynchronous
1229 raid6 recovery routines, and will optionally use an offload
1230 engine if one is available.
1231
1232 If unsure, say N.
1233
1057source "samples/Kconfig" 1234source "samples/Kconfig"
1058 1235
1059source "lib/Kconfig.kgdb" 1236source "lib/Kconfig.kgdb"
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 9b5d1d7f2ef7..43cb93fa2651 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB
3 bool 3 bool
4 4
5menuconfig KGDB 5menuconfig KGDB
6 bool "KGDB: kernel debugging with remote gdb" 6 bool "KGDB: kernel debugger"
7 depends on HAVE_ARCH_KGDB 7 depends on HAVE_ARCH_KGDB
8 depends on DEBUG_KERNEL && EXPERIMENTAL 8 depends on DEBUG_KERNEL && EXPERIMENTAL
9 help 9 help
@@ -57,4 +57,26 @@ config KGDB_TESTS_BOOT_STRING
57 information about other strings you could use beyond the 57 information about other strings you could use beyond the
58 default of V1F100. 58 default of V1F100.
59 59
60config KGDB_LOW_LEVEL_TRAP
61 bool "KGDB: Allow debugging with traps in notifiers"
62 depends on X86 || MIPS
63 default n
64 help
65 This will add an extra call back to kgdb for the breakpoint
66 exception handler on which will will allow kgdb to step
67 through a notify handler.
68
69config KGDB_KDB
70 bool "KGDB_KDB: include kdb frontend for kgdb"
71 default n
72 help
73 KDB frontend for kernel
74
75config KDB_KEYBOARD
76 bool "KGDB_KDB: keyboard as input device"
77 depends on VT && KGDB_KDB
78 default n
79 help
80 KDB can use a PS/2 type keyboard for an input device
81
60endif # KGDB 82endif # KGDB
diff --git a/lib/Makefile b/lib/Makefile
index 3b0b4a696db9..cbb774f7d41d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,11 +8,11 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
8endif 8endif
9 9
10lib-y := ctype.o string.o vsprintf.o cmdline.o \ 10lib-y := ctype.o string.o vsprintf.o cmdline.o \
11 rbtree.o radix-tree.o dump_stack.o \ 11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
12 idr.o int_sqrt.o extable.o prio_tree.o \ 12 idr.o int_sqrt.o extable.o prio_tree.o \
13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o prio_heap.o ratelimit.o show_mem.o \
15 is_single_threaded.o plist.o decompress.o flex_array.o 15 is_single_threaded.o plist.o decompress.o
16 16
17lib-$(CONFIG_MMU) += ioremap.o 17lib-$(CONFIG_MMU) += ioremap.o
18lib-$(CONFIG_SMP) += cpumask.o 18lib-$(CONFIG_SMP) += cpumask.o
@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
21 21
22obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 22obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
23 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ 23 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
24 string_helpers.o gcd.o list_sort.o 24 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o
25 25
26ifeq ($(CONFIG_DEBUG_KOBJECT),y) 26ifeq ($(CONFIG_DEBUG_KOBJECT),y)
27CFLAGS_kobject.o += -DDEBUG 27CFLAGS_kobject.o += -DDEBUG
@@ -39,8 +39,12 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
39lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o 39lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
40lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o 40lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
41obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o 41obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
42
43CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
42obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 44obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
45
43obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o 46obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
47obj-$(CONFIG_BTREE) += btree.o
44obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o 48obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
45obj-$(CONFIG_DEBUG_LIST) += list_debug.o 49obj-$(CONFIG_DEBUG_LIST) += list_debug.o
46obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o 50obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
@@ -65,10 +69,13 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
65obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ 69obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
66obj-$(CONFIG_LZO_COMPRESS) += lzo/ 70obj-$(CONFIG_LZO_COMPRESS) += lzo/
67obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ 71obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
72obj-$(CONFIG_XZ_DEC) += xz/
73obj-$(CONFIG_RAID6_PQ) += raid6/
68 74
69lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o 75lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
70lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o 76lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
71lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o 77lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
78lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
72lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o 79lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
73 80
74obj-$(CONFIG_TEXTSEARCH) += textsearch.o 81obj-$(CONFIG_TEXTSEARCH) += textsearch.o
@@ -81,11 +88,10 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
81obj-$(CONFIG_SWIOTLB) += swiotlb.o 88obj-$(CONFIG_SWIOTLB) += swiotlb.o
82obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o 89obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
83obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 90obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
91obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
84 92
85lib-$(CONFIG_GENERIC_BUG) += bug.o 93lib-$(CONFIG_GENERIC_BUG) += bug.o
86 94
87obj-$(CONFIG_HAVE_LMB) += lmb.o
88
89obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o 95obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
90 96
91obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o 97obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
@@ -100,6 +106,10 @@ obj-$(CONFIG_GENERIC_CSUM) += checksum.o
100 106
101obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o 107obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
102 108
109obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
110
111obj-$(CONFIG_AVERAGE) += average.o
112
103hostprogs-y := gen_crc32table 113hostprogs-y := gen_crc32table
104clean-files := crc32table.h 114clean-files := crc32table.h
105 115
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 8bee16ec7524..a21c12bc727c 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -162,12 +162,12 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u)
162{ 162{
163 unsigned long flags; 163 unsigned long flags;
164 spinlock_t *lock = lock_addr(v); 164 spinlock_t *lock = lock_addr(v);
165 int ret = 1; 165 int ret = 0;
166 166
167 spin_lock_irqsave(lock, flags); 167 spin_lock_irqsave(lock, flags);
168 if (v->counter != u) { 168 if (v->counter != u) {
169 v->counter += a; 169 v->counter += a;
170 ret = 0; 170 ret = 1;
171 } 171 }
172 spin_unlock_irqrestore(lock, flags); 172 spin_unlock_irqrestore(lock, flags);
173 return ret; 173 return ret;
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
new file mode 100644
index 000000000000..44524cc8c32a
--- /dev/null
+++ b/lib/atomic64_test.c
@@ -0,0 +1,166 @@
1/*
2 * Testsuite for atomic64_t functions
3 *
4 * Copyright © 2010 Luca Barbieri
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <asm/atomic.h>
14
15#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
16static __init int test_atomic64(void)
17{
18 long long v0 = 0xaaa31337c001d00dLL;
19 long long v1 = 0xdeadbeefdeafcafeLL;
20 long long v2 = 0xfaceabadf00df001LL;
21 long long onestwos = 0x1111111122222222LL;
22 long long one = 1LL;
23
24 atomic64_t v = ATOMIC64_INIT(v0);
25 long long r = v0;
26 BUG_ON(v.counter != r);
27
28 atomic64_set(&v, v1);
29 r = v1;
30 BUG_ON(v.counter != r);
31 BUG_ON(atomic64_read(&v) != r);
32
33 INIT(v0);
34 atomic64_add(onestwos, &v);
35 r += onestwos;
36 BUG_ON(v.counter != r);
37
38 INIT(v0);
39 atomic64_add(-one, &v);
40 r += -one;
41 BUG_ON(v.counter != r);
42
43 INIT(v0);
44 r += onestwos;
45 BUG_ON(atomic64_add_return(onestwos, &v) != r);
46 BUG_ON(v.counter != r);
47
48 INIT(v0);
49 r += -one;
50 BUG_ON(atomic64_add_return(-one, &v) != r);
51 BUG_ON(v.counter != r);
52
53 INIT(v0);
54 atomic64_sub(onestwos, &v);
55 r -= onestwos;
56 BUG_ON(v.counter != r);
57
58 INIT(v0);
59 atomic64_sub(-one, &v);
60 r -= -one;
61 BUG_ON(v.counter != r);
62
63 INIT(v0);
64 r -= onestwos;
65 BUG_ON(atomic64_sub_return(onestwos, &v) != r);
66 BUG_ON(v.counter != r);
67
68 INIT(v0);
69 r -= -one;
70 BUG_ON(atomic64_sub_return(-one, &v) != r);
71 BUG_ON(v.counter != r);
72
73 INIT(v0);
74 atomic64_inc(&v);
75 r += one;
76 BUG_ON(v.counter != r);
77
78 INIT(v0);
79 r += one;
80 BUG_ON(atomic64_inc_return(&v) != r);
81 BUG_ON(v.counter != r);
82
83 INIT(v0);
84 atomic64_dec(&v);
85 r -= one;
86 BUG_ON(v.counter != r);
87
88 INIT(v0);
89 r -= one;
90 BUG_ON(atomic64_dec_return(&v) != r);
91 BUG_ON(v.counter != r);
92
93 INIT(v0);
94 BUG_ON(atomic64_xchg(&v, v1) != v0);
95 r = v1;
96 BUG_ON(v.counter != r);
97
98 INIT(v0);
99 BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
100 r = v1;
101 BUG_ON(v.counter != r);
102
103 INIT(v0);
104 BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
105 BUG_ON(v.counter != r);
106
107 INIT(v0);
108 BUG_ON(atomic64_add_unless(&v, one, v0));
109 BUG_ON(v.counter != r);
110
111 INIT(v0);
112 BUG_ON(!atomic64_add_unless(&v, one, v1));
113 r += one;
114 BUG_ON(v.counter != r);
115
116#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
117 defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
118 INIT(onestwos);
119 BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
120 r -= one;
121 BUG_ON(v.counter != r);
122
123 INIT(0);
124 BUG_ON(atomic64_dec_if_positive(&v) != -one);
125 BUG_ON(v.counter != r);
126
127 INIT(-one);
128 BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
129 BUG_ON(v.counter != r);
130#else
131#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above
132#endif
133
134 INIT(onestwos);
135 BUG_ON(!atomic64_inc_not_zero(&v));
136 r += one;
137 BUG_ON(v.counter != r);
138
139 INIT(0);
140 BUG_ON(atomic64_inc_not_zero(&v));
141 BUG_ON(v.counter != r);
142
143 INIT(-one);
144 BUG_ON(!atomic64_inc_not_zero(&v));
145 r += one;
146 BUG_ON(v.counter != r);
147
148#ifdef CONFIG_X86
149 printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n",
150#ifdef CONFIG_X86_64
151 "x86-64",
152#elif defined(CONFIG_X86_CMPXCHG64)
153 "i586+",
154#else
155 "i386+",
156#endif
157 boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
158 boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
159#else
160 printk(KERN_INFO "atomic64 test passed\n");
161#endif
162
163 return 0;
164}
165
166core_initcall(test_atomic64);
diff --git a/lib/average.c b/lib/average.c
new file mode 100644
index 000000000000..5576c2841496
--- /dev/null
+++ b/lib/average.c
@@ -0,0 +1,61 @@
1/*
2 * lib/average.c
3 *
4 * This source code is licensed under the GNU General Public License,
5 * Version 2. See the file COPYING for more details.
6 */
7
8#include <linux/module.h>
9#include <linux/average.h>
10#include <linux/bug.h>
11#include <linux/log2.h>
12
13/**
14 * DOC: Exponentially Weighted Moving Average (EWMA)
15 *
16 * These are generic functions for calculating Exponentially Weighted Moving
17 * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled
18 * up internal representation of the average value to prevent rounding errors.
19 * The factor for scaling up and the exponential weight (or decay rate) have to
20 * be specified thru the init fuction. The structure should not be accessed
21 * directly but only thru the helper functions.
22 */
23
24/**
25 * ewma_init() - Initialize EWMA parameters
26 * @avg: Average structure
27 * @factor: Factor to use for the scaled up internal value. The maximum value
28 * of averages can be ULONG_MAX/(factor*weight). For performance reasons
29 * factor has to be a power of 2.
30 * @weight: Exponential weight, or decay rate. This defines how fast the
31 * influence of older values decreases. For performance reasons weight has
32 * to be a power of 2.
33 *
34 * Initialize the EWMA parameters for a given struct ewma @avg.
35 */
36void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
37{
38 WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor));
39
40 avg->weight = ilog2(weight);
41 avg->factor = ilog2(factor);
42 avg->internal = 0;
43}
44EXPORT_SYMBOL(ewma_init);
45
46/**
47 * ewma_add() - Exponentially weighted moving average (EWMA)
48 * @avg: Average structure
49 * @val: Current value
50 *
51 * Add a sample to the average.
52 */
53struct ewma *ewma_add(struct ewma *avg, unsigned long val)
54{
55 avg->internal = avg->internal ?
56 (((avg->internal << avg->weight) - avg->internal) +
57 (val << avg->factor)) >> avg->weight :
58 (val << avg->factor);
59 return avg;
60}
61EXPORT_SYMBOL(ewma_add);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 11bf49750583..741fae905ae3 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -359,7 +359,6 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area);
359 359
360#define CHUNKSZ 32 360#define CHUNKSZ 32
361#define nbits_to_hold_value(val) fls(val) 361#define nbits_to_hold_value(val) fls(val)
362#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
363#define BASEDEC 10 /* fancier cpuset lists input in decimal */ 362#define BASEDEC 10 /* fancier cpuset lists input in decimal */
364 363
365/** 364/**
@@ -466,7 +465,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
466 if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1)) 465 if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
467 return -EOVERFLOW; 466 return -EOVERFLOW;
468 467
469 chunk = (chunk << 4) | unhex(c); 468 chunk = (chunk << 4) | hex_to_bin(c);
470 ndigits++; totaldigits++; 469 ndigits++; totaldigits++;
471 } 470 }
472 if (ndigits == 0) 471 if (ndigits == 0)
@@ -487,7 +486,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
487EXPORT_SYMBOL(__bitmap_parse); 486EXPORT_SYMBOL(__bitmap_parse);
488 487
489/** 488/**
490 * bitmap_parse_user() 489 * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
491 * 490 *
492 * @ubuf: pointer to user buffer containing string. 491 * @ubuf: pointer to user buffer containing string.
493 * @ulen: buffer size in bytes. If string is smaller than this 492 * @ulen: buffer size in bytes. If string is smaller than this
@@ -619,7 +618,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
619EXPORT_SYMBOL(bitmap_parselist); 618EXPORT_SYMBOL(bitmap_parselist);
620 619
621/** 620/**
622 * bitmap_pos_to_ord(buf, pos, bits) 621 * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
623 * @buf: pointer to a bitmap 622 * @buf: pointer to a bitmap
624 * @pos: a bit position in @buf (0 <= @pos < @bits) 623 * @pos: a bit position in @buf (0 <= @pos < @bits)
625 * @bits: number of valid bit positions in @buf 624 * @bits: number of valid bit positions in @buf
@@ -655,7 +654,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
655} 654}
656 655
657/** 656/**
658 * bitmap_ord_to_pos(buf, ord, bits) 657 * bitmap_ord_to_pos - find position of n-th set bit in bitmap
659 * @buf: pointer to bitmap 658 * @buf: pointer to bitmap
660 * @ord: ordinal bit position (n-th set bit, n >= 0) 659 * @ord: ordinal bit position (n-th set bit, n >= 0)
661 * @bits: number of valid bit positions in @buf 660 * @bits: number of valid bit positions in @buf
@@ -733,10 +732,9 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
733 bitmap_zero(dst, bits); 732 bitmap_zero(dst, bits);
734 733
735 w = bitmap_weight(new, bits); 734 w = bitmap_weight(new, bits);
736 for (oldbit = find_first_bit(src, bits); 735 for_each_set_bit(oldbit, src, bits) {
737 oldbit < bits;
738 oldbit = find_next_bit(src, bits, oldbit + 1)) {
739 int n = bitmap_pos_to_ord(old, oldbit, bits); 736 int n = bitmap_pos_to_ord(old, oldbit, bits);
737
740 if (n < 0 || w == 0) 738 if (n < 0 || w == 0)
741 set_bit(oldbit, dst); /* identity map */ 739 set_bit(oldbit, dst); /* identity map */
742 else 740 else
@@ -903,9 +901,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
903 */ 901 */
904 902
905 m = 0; 903 m = 0;
906 for (n = find_first_bit(relmap, bits); 904 for_each_set_bit(n, relmap, bits) {
907 n < bits;
908 n = find_next_bit(relmap, bits, n + 1)) {
909 /* m == bitmap_pos_to_ord(relmap, n, bits) */ 905 /* m == bitmap_pos_to_ord(relmap, n, bits) */
910 if (test_bit(m, orig)) 906 if (test_bit(m, orig))
911 set_bit(n, dst); 907 set_bit(n, dst);
@@ -934,9 +930,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
934 return; 930 return;
935 bitmap_zero(dst, bits); 931 bitmap_zero(dst, bits);
936 932
937 for (oldbit = find_first_bit(orig, bits); 933 for_each_set_bit(oldbit, orig, bits)
938 oldbit < bits;
939 oldbit = find_next_bit(orig, bits, oldbit + 1))
940 set_bit(oldbit % sz, dst); 934 set_bit(oldbit % sz, dst);
941} 935}
942EXPORT_SYMBOL(bitmap_fold); 936EXPORT_SYMBOL(bitmap_fold);
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 000000000000..c9c6f0351526
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,798 @@
1/*
2 * lib/btree.c - Simple In-memory B+Tree
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
7 * Bits and pieces stolen from Peter Zijlstra's code, which is
8 * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
9 * GPLv2
10 *
11 * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
12 *
13 * A relatively simple B+Tree implementation. I have written it as a learning
14 * excercise to understand how B+Trees work. Turned out to be useful as well.
15 *
16 * B+Trees can be used similar to Linux radix trees (which don't have anything
17 * in common with textbook radix trees, beware). Prerequisite for them working
18 * well is that access to a random tree node is much faster than a large number
19 * of operations within each node.
20 *
21 * Disks have fulfilled the prerequisite for a long time. More recently DRAM
22 * has gained similar properties, as memory access times, when measured in cpu
23 * cycles, have increased. Cacheline sizes have increased as well, which also
24 * helps B+Trees.
25 *
26 * Compared to radix trees, B+Trees are more efficient when dealing with a
27 * sparsely populated address space. Between 25% and 50% of the memory is
28 * occupied with valid pointers. When densely populated, radix trees contain
29 * ~98% pointers - hard to beat. Very sparse radix trees contain only ~2%
30 * pointers.
31 *
32 * This particular implementation stores pointers identified by a long value.
33 * Storing NULL pointers is illegal, lookup will return NULL when no entry
34 * was found.
35 *
36 * A tricks was used that is not commonly found in textbooks. The lowest
37 * values are to the right, not to the left. All used slots within a node
38 * are on the left, all unused slots contain NUL values. Most operations
39 * simply loop once over all slots and terminate on the first NUL.
40 */
41
42#include <linux/btree.h>
43#include <linux/cache.h>
44#include <linux/kernel.h>
45#include <linux/slab.h>
46#include <linux/module.h>
47
48#define MAX(a, b) ((a) > (b) ? (a) : (b))
49#define NODESIZE MAX(L1_CACHE_BYTES, 128)
50
51struct btree_geo {
52 int keylen;
53 int no_pairs;
54 int no_longs;
55};
56
57struct btree_geo btree_geo32 = {
58 .keylen = 1,
59 .no_pairs = NODESIZE / sizeof(long) / 2,
60 .no_longs = NODESIZE / sizeof(long) / 2,
61};
62EXPORT_SYMBOL_GPL(btree_geo32);
63
64#define LONG_PER_U64 (64 / BITS_PER_LONG)
65struct btree_geo btree_geo64 = {
66 .keylen = LONG_PER_U64,
67 .no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
68 .no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
69};
70EXPORT_SYMBOL_GPL(btree_geo64);
71
72struct btree_geo btree_geo128 = {
73 .keylen = 2 * LONG_PER_U64,
74 .no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
75 .no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
76};
77EXPORT_SYMBOL_GPL(btree_geo128);
78
79static struct kmem_cache *btree_cachep;
80
81void *btree_alloc(gfp_t gfp_mask, void *pool_data)
82{
83 return kmem_cache_alloc(btree_cachep, gfp_mask);
84}
85EXPORT_SYMBOL_GPL(btree_alloc);
86
87void btree_free(void *element, void *pool_data)
88{
89 kmem_cache_free(btree_cachep, element);
90}
91EXPORT_SYMBOL_GPL(btree_free);
92
93static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
94{
95 unsigned long *node;
96
97 node = mempool_alloc(head->mempool, gfp);
98 if (likely(node))
99 memset(node, 0, NODESIZE);
100 return node;
101}
102
103static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
104{
105 size_t i;
106
107 for (i = 0; i < n; i++) {
108 if (l1[i] < l2[i])
109 return -1;
110 if (l1[i] > l2[i])
111 return 1;
112 }
113 return 0;
114}
115
116static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
117 size_t n)
118{
119 size_t i;
120
121 for (i = 0; i < n; i++)
122 dest[i] = src[i];
123 return dest;
124}
125
126static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
127{
128 size_t i;
129
130 for (i = 0; i < n; i++)
131 s[i] = c;
132 return s;
133}
134
135static void dec_key(struct btree_geo *geo, unsigned long *key)
136{
137 unsigned long val;
138 int i;
139
140 for (i = geo->keylen - 1; i >= 0; i--) {
141 val = key[i];
142 key[i] = val - 1;
143 if (val)
144 break;
145 }
146}
147
148static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
149{
150 return &node[n * geo->keylen];
151}
152
153static void *bval(struct btree_geo *geo, unsigned long *node, int n)
154{
155 return (void *)node[geo->no_longs + n];
156}
157
158static void setkey(struct btree_geo *geo, unsigned long *node, int n,
159 unsigned long *key)
160{
161 longcpy(bkey(geo, node, n), key, geo->keylen);
162}
163
164static void setval(struct btree_geo *geo, unsigned long *node, int n,
165 void *val)
166{
167 node[geo->no_longs + n] = (unsigned long) val;
168}
169
170static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
171{
172 longset(bkey(geo, node, n), 0, geo->keylen);
173 node[geo->no_longs + n] = 0;
174}
175
176static inline void __btree_init(struct btree_head *head)
177{
178 head->node = NULL;
179 head->height = 0;
180}
181
182void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
183{
184 __btree_init(head);
185 head->mempool = mempool;
186}
187EXPORT_SYMBOL_GPL(btree_init_mempool);
188
189int btree_init(struct btree_head *head)
190{
191 __btree_init(head);
192 head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
193 if (!head->mempool)
194 return -ENOMEM;
195 return 0;
196}
197EXPORT_SYMBOL_GPL(btree_init);
198
199void btree_destroy(struct btree_head *head)
200{
201 mempool_destroy(head->mempool);
202 head->mempool = NULL;
203}
204EXPORT_SYMBOL_GPL(btree_destroy);
205
206void *btree_last(struct btree_head *head, struct btree_geo *geo,
207 unsigned long *key)
208{
209 int height = head->height;
210 unsigned long *node = head->node;
211
212 if (height == 0)
213 return NULL;
214
215 for ( ; height > 1; height--)
216 node = bval(geo, node, 0);
217
218 longcpy(key, bkey(geo, node, 0), geo->keylen);
219 return bval(geo, node, 0);
220}
221EXPORT_SYMBOL_GPL(btree_last);
222
223static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
224 unsigned long *key)
225{
226 return longcmp(bkey(geo, node, pos), key, geo->keylen);
227}
228
229static int keyzero(struct btree_geo *geo, unsigned long *key)
230{
231 int i;
232
233 for (i = 0; i < geo->keylen; i++)
234 if (key[i])
235 return 0;
236
237 return 1;
238}
239
240void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
241 unsigned long *key)
242{
243 int i, height = head->height;
244 unsigned long *node = head->node;
245
246 if (height == 0)
247 return NULL;
248
249 for ( ; height > 1; height--) {
250 for (i = 0; i < geo->no_pairs; i++)
251 if (keycmp(geo, node, i, key) <= 0)
252 break;
253 if (i == geo->no_pairs)
254 return NULL;
255 node = bval(geo, node, i);
256 if (!node)
257 return NULL;
258 }
259
260 if (!node)
261 return NULL;
262
263 for (i = 0; i < geo->no_pairs; i++)
264 if (keycmp(geo, node, i, key) == 0)
265 return bval(geo, node, i);
266 return NULL;
267}
268EXPORT_SYMBOL_GPL(btree_lookup);
269
270int btree_update(struct btree_head *head, struct btree_geo *geo,
271 unsigned long *key, void *val)
272{
273 int i, height = head->height;
274 unsigned long *node = head->node;
275
276 if (height == 0)
277 return -ENOENT;
278
279 for ( ; height > 1; height--) {
280 for (i = 0; i < geo->no_pairs; i++)
281 if (keycmp(geo, node, i, key) <= 0)
282 break;
283 if (i == geo->no_pairs)
284 return -ENOENT;
285 node = bval(geo, node, i);
286 if (!node)
287 return -ENOENT;
288 }
289
290 if (!node)
291 return -ENOENT;
292
293 for (i = 0; i < geo->no_pairs; i++)
294 if (keycmp(geo, node, i, key) == 0) {
295 setval(geo, node, i, val);
296 return 0;
297 }
298 return -ENOENT;
299}
300EXPORT_SYMBOL_GPL(btree_update);
301
302/*
303 * Usually this function is quite similar to normal lookup. But the key of
304 * a parent node may be smaller than the smallest key of all its siblings.
305 * In such a case we cannot just return NULL, as we have only proven that no
306 * key smaller than __key, but larger than this parent key exists.
307 * So we set __key to the parent key and retry. We have to use the smallest
308 * such parent key, which is the last parent key we encountered.
309 */
310void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
311 unsigned long *__key)
312{
313 int i, height;
314 unsigned long *node, *oldnode;
315 unsigned long *retry_key = NULL, key[geo->keylen];
316
317 if (keyzero(geo, __key))
318 return NULL;
319
320 if (head->height == 0)
321 return NULL;
322retry:
323 longcpy(key, __key, geo->keylen);
324 dec_key(geo, key);
325
326 node = head->node;
327 for (height = head->height ; height > 1; height--) {
328 for (i = 0; i < geo->no_pairs; i++)
329 if (keycmp(geo, node, i, key) <= 0)
330 break;
331 if (i == geo->no_pairs)
332 goto miss;
333 oldnode = node;
334 node = bval(geo, node, i);
335 if (!node)
336 goto miss;
337 retry_key = bkey(geo, oldnode, i);
338 }
339
340 if (!node)
341 goto miss;
342
343 for (i = 0; i < geo->no_pairs; i++) {
344 if (keycmp(geo, node, i, key) <= 0) {
345 if (bval(geo, node, i)) {
346 longcpy(__key, bkey(geo, node, i), geo->keylen);
347 return bval(geo, node, i);
348 } else
349 goto miss;
350 }
351 }
352miss:
353 if (retry_key) {
354 __key = retry_key;
355 retry_key = NULL;
356 goto retry;
357 }
358 return NULL;
359}
360
361static int getpos(struct btree_geo *geo, unsigned long *node,
362 unsigned long *key)
363{
364 int i;
365
366 for (i = 0; i < geo->no_pairs; i++) {
367 if (keycmp(geo, node, i, key) <= 0)
368 break;
369 }
370 return i;
371}
372
373static int getfill(struct btree_geo *geo, unsigned long *node, int start)
374{
375 int i;
376
377 for (i = start; i < geo->no_pairs; i++)
378 if (!bval(geo, node, i))
379 break;
380 return i;
381}
382
383/*
384 * locate the correct leaf node in the btree
385 */
386static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
387 unsigned long *key, int level)
388{
389 unsigned long *node = head->node;
390 int i, height;
391
392 for (height = head->height; height > level; height--) {
393 for (i = 0; i < geo->no_pairs; i++)
394 if (keycmp(geo, node, i, key) <= 0)
395 break;
396
397 if ((i == geo->no_pairs) || !bval(geo, node, i)) {
398 /* right-most key is too large, update it */
399 /* FIXME: If the right-most key on higher levels is
400 * always zero, this wouldn't be necessary. */
401 i--;
402 setkey(geo, node, i, key);
403 }
404 BUG_ON(i < 0);
405 node = bval(geo, node, i);
406 }
407 BUG_ON(!node);
408 return node;
409}
410
411static int btree_grow(struct btree_head *head, struct btree_geo *geo,
412 gfp_t gfp)
413{
414 unsigned long *node;
415 int fill;
416
417 node = btree_node_alloc(head, gfp);
418 if (!node)
419 return -ENOMEM;
420 if (head->node) {
421 fill = getfill(geo, head->node, 0);
422 setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
423 setval(geo, node, 0, head->node);
424 }
425 head->node = node;
426 head->height++;
427 return 0;
428}
429
430static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
431{
432 unsigned long *node;
433 int fill;
434
435 if (head->height <= 1)
436 return;
437
438 node = head->node;
439 fill = getfill(geo, node, 0);
440 BUG_ON(fill > 1);
441 head->node = bval(geo, node, 0);
442 head->height--;
443 mempool_free(node, head->mempool);
444}
445
446static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
447 unsigned long *key, void *val, int level,
448 gfp_t gfp)
449{
450 unsigned long *node;
451 int i, pos, fill, err;
452
453 BUG_ON(!val);
454 if (head->height < level) {
455 err = btree_grow(head, geo, gfp);
456 if (err)
457 return err;
458 }
459
460retry:
461 node = find_level(head, geo, key, level);
462 pos = getpos(geo, node, key);
463 fill = getfill(geo, node, pos);
464 /* two identical keys are not allowed */
465 BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
466
467 if (fill == geo->no_pairs) {
468 /* need to split node */
469 unsigned long *new;
470
471 new = btree_node_alloc(head, gfp);
472 if (!new)
473 return -ENOMEM;
474 err = btree_insert_level(head, geo,
475 bkey(geo, node, fill / 2 - 1),
476 new, level + 1, gfp);
477 if (err) {
478 mempool_free(new, head->mempool);
479 return err;
480 }
481 for (i = 0; i < fill / 2; i++) {
482 setkey(geo, new, i, bkey(geo, node, i));
483 setval(geo, new, i, bval(geo, node, i));
484 setkey(geo, node, i, bkey(geo, node, i + fill / 2));
485 setval(geo, node, i, bval(geo, node, i + fill / 2));
486 clearpair(geo, node, i + fill / 2);
487 }
488 if (fill & 1) {
489 setkey(geo, node, i, bkey(geo, node, fill - 1));
490 setval(geo, node, i, bval(geo, node, fill - 1));
491 clearpair(geo, node, fill - 1);
492 }
493 goto retry;
494 }
495 BUG_ON(fill >= geo->no_pairs);
496
497 /* shift and insert */
498 for (i = fill; i > pos; i--) {
499 setkey(geo, node, i, bkey(geo, node, i - 1));
500 setval(geo, node, i, bval(geo, node, i - 1));
501 }
502 setkey(geo, node, pos, key);
503 setval(geo, node, pos, val);
504
505 return 0;
506}
507
508int btree_insert(struct btree_head *head, struct btree_geo *geo,
509 unsigned long *key, void *val, gfp_t gfp)
510{
511 return btree_insert_level(head, geo, key, val, 1, gfp);
512}
513EXPORT_SYMBOL_GPL(btree_insert);
514
515static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
516 unsigned long *key, int level);
517static void merge(struct btree_head *head, struct btree_geo *geo, int level,
518 unsigned long *left, int lfill,
519 unsigned long *right, int rfill,
520 unsigned long *parent, int lpos)
521{
522 int i;
523
524 for (i = 0; i < rfill; i++) {
525 /* Move all keys to the left */
526 setkey(geo, left, lfill + i, bkey(geo, right, i));
527 setval(geo, left, lfill + i, bval(geo, right, i));
528 }
529 /* Exchange left and right child in parent */
530 setval(geo, parent, lpos, right);
531 setval(geo, parent, lpos + 1, left);
532 /* Remove left (formerly right) child from parent */
533 btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
534 mempool_free(right, head->mempool);
535}
536
537static void rebalance(struct btree_head *head, struct btree_geo *geo,
538 unsigned long *key, int level, unsigned long *child, int fill)
539{
540 unsigned long *parent, *left = NULL, *right = NULL;
541 int i, no_left, no_right;
542
543 if (fill == 0) {
544 /* Because we don't steal entries from a neigbour, this case
545 * can happen. Parent node contains a single child, this
546 * node, so merging with a sibling never happens.
547 */
548 btree_remove_level(head, geo, key, level + 1);
549 mempool_free(child, head->mempool);
550 return;
551 }
552
553 parent = find_level(head, geo, key, level + 1);
554 i = getpos(geo, parent, key);
555 BUG_ON(bval(geo, parent, i) != child);
556
557 if (i > 0) {
558 left = bval(geo, parent, i - 1);
559 no_left = getfill(geo, left, 0);
560 if (fill + no_left <= geo->no_pairs) {
561 merge(head, geo, level,
562 left, no_left,
563 child, fill,
564 parent, i - 1);
565 return;
566 }
567 }
568 if (i + 1 < getfill(geo, parent, i)) {
569 right = bval(geo, parent, i + 1);
570 no_right = getfill(geo, right, 0);
571 if (fill + no_right <= geo->no_pairs) {
572 merge(head, geo, level,
573 child, fill,
574 right, no_right,
575 parent, i);
576 return;
577 }
578 }
579 /*
580 * We could also try to steal one entry from the left or right
581 * neighbor. By not doing so we changed the invariant from
582 * "all nodes are at least half full" to "no two neighboring
583 * nodes can be merged". Which means that the average fill of
584 * all nodes is still half or better.
585 */
586}
587
588static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
589 unsigned long *key, int level)
590{
591 unsigned long *node;
592 int i, pos, fill;
593 void *ret;
594
595 if (level > head->height) {
596 /* we recursed all the way up */
597 head->height = 0;
598 head->node = NULL;
599 return NULL;
600 }
601
602 node = find_level(head, geo, key, level);
603 pos = getpos(geo, node, key);
604 fill = getfill(geo, node, pos);
605 if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
606 return NULL;
607 ret = bval(geo, node, pos);
608
609 /* remove and shift */
610 for (i = pos; i < fill - 1; i++) {
611 setkey(geo, node, i, bkey(geo, node, i + 1));
612 setval(geo, node, i, bval(geo, node, i + 1));
613 }
614 clearpair(geo, node, fill - 1);
615
616 if (fill - 1 < geo->no_pairs / 2) {
617 if (level < head->height)
618 rebalance(head, geo, key, level, node, fill - 1);
619 else if (fill - 1 == 1)
620 btree_shrink(head, geo);
621 }
622
623 return ret;
624}
625
626void *btree_remove(struct btree_head *head, struct btree_geo *geo,
627 unsigned long *key)
628{
629 if (head->height == 0)
630 return NULL;
631
632 return btree_remove_level(head, geo, key, 1);
633}
634EXPORT_SYMBOL_GPL(btree_remove);
635
636int btree_merge(struct btree_head *target, struct btree_head *victim,
637 struct btree_geo *geo, gfp_t gfp)
638{
639 unsigned long key[geo->keylen];
640 unsigned long dup[geo->keylen];
641 void *val;
642 int err;
643
644 BUG_ON(target == victim);
645
646 if (!(target->node)) {
647 /* target is empty, just copy fields over */
648 target->node = victim->node;
649 target->height = victim->height;
650 __btree_init(victim);
651 return 0;
652 }
653
654 /* TODO: This needs some optimizations. Currently we do three tree
655 * walks to remove a single object from the victim.
656 */
657 for (;;) {
658 if (!btree_last(victim, geo, key))
659 break;
660 val = btree_lookup(victim, geo, key);
661 err = btree_insert(target, geo, key, val, gfp);
662 if (err)
663 return err;
664 /* We must make a copy of the key, as the original will get
665 * mangled inside btree_remove. */
666 longcpy(dup, key, geo->keylen);
667 btree_remove(victim, geo, dup);
668 }
669 return 0;
670}
671EXPORT_SYMBOL_GPL(btree_merge);
672
673static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
674 unsigned long *node, unsigned long opaque,
675 void (*func)(void *elem, unsigned long opaque,
676 unsigned long *key, size_t index,
677 void *func2),
678 void *func2, int reap, int height, size_t count)
679{
680 int i;
681 unsigned long *child;
682
683 for (i = 0; i < geo->no_pairs; i++) {
684 child = bval(geo, node, i);
685 if (!child)
686 break;
687 if (height > 1)
688 count = __btree_for_each(head, geo, child, opaque,
689 func, func2, reap, height - 1, count);
690 else
691 func(child, opaque, bkey(geo, node, i), count++,
692 func2);
693 }
694 if (reap)
695 mempool_free(node, head->mempool);
696 return count;
697}
698
699static void empty(void *elem, unsigned long opaque, unsigned long *key,
700 size_t index, void *func2)
701{
702}
703
704void visitorl(void *elem, unsigned long opaque, unsigned long *key,
705 size_t index, void *__func)
706{
707 visitorl_t func = __func;
708
709 func(elem, opaque, *key, index);
710}
711EXPORT_SYMBOL_GPL(visitorl);
712
713void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
714 size_t index, void *__func)
715{
716 visitor32_t func = __func;
717 u32 *key = (void *)__key;
718
719 func(elem, opaque, *key, index);
720}
721EXPORT_SYMBOL_GPL(visitor32);
722
723void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
724 size_t index, void *__func)
725{
726 visitor64_t func = __func;
727 u64 *key = (void *)__key;
728
729 func(elem, opaque, *key, index);
730}
731EXPORT_SYMBOL_GPL(visitor64);
732
733void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
734 size_t index, void *__func)
735{
736 visitor128_t func = __func;
737 u64 *key = (void *)__key;
738
739 func(elem, opaque, key[0], key[1], index);
740}
741EXPORT_SYMBOL_GPL(visitor128);
742
743size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
744 unsigned long opaque,
745 void (*func)(void *elem, unsigned long opaque,
746 unsigned long *key,
747 size_t index, void *func2),
748 void *func2)
749{
750 size_t count = 0;
751
752 if (!func2)
753 func = empty;
754 if (head->node)
755 count = __btree_for_each(head, geo, head->node, opaque, func,
756 func2, 0, head->height, 0);
757 return count;
758}
759EXPORT_SYMBOL_GPL(btree_visitor);
760
761size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
762 unsigned long opaque,
763 void (*func)(void *elem, unsigned long opaque,
764 unsigned long *key,
765 size_t index, void *func2),
766 void *func2)
767{
768 size_t count = 0;
769
770 if (!func2)
771 func = empty;
772 if (head->node)
773 count = __btree_for_each(head, geo, head->node, opaque, func,
774 func2, 1, head->height, 0);
775 __btree_init(head);
776 return count;
777}
778EXPORT_SYMBOL_GPL(btree_grim_visitor);
779
780static int __init btree_module_init(void)
781{
782 btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
783 SLAB_HWCACHE_ALIGN, NULL);
784 return 0;
785}
786
787static void __exit btree_module_exit(void)
788{
789 kmem_cache_destroy(btree_cachep);
790}
791
792/* If core code starts using btree, initialization should happen even earlier */
793module_init(btree_module_init);
794module_exit(btree_module_exit);
795
796MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
797MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
798MODULE_LICENSE("GPL");
diff --git a/lib/bug.c b/lib/bug.c
index 300e41afbf97..19552096d16b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
72 return NULL; 72 return NULL;
73} 73}
74 74
75int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, 75void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
76 struct module *mod) 76 struct module *mod)
77{ 77{
78 char *secstrings; 78 char *secstrings;
79 unsigned int i; 79 unsigned int i;
@@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
97 * could potentially lead to deadlock and thus be counter-productive. 97 * could potentially lead to deadlock and thus be counter-productive.
98 */ 98 */
99 list_add(&mod->bug_list, &module_bug_list); 99 list_add(&mod->bug_list, &module_bug_list);
100
101 return 0;
102} 100}
103 101
104void module_bug_cleanup(struct module *mod) 102void module_bug_cleanup(struct module *mod)
@@ -136,8 +134,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
136 134
137 bug = find_bug(bugaddr); 135 bug = find_bug(bugaddr);
138 136
139 printk(KERN_EMERG "------------[ cut here ]------------\n");
140
141 file = NULL; 137 file = NULL;
142 line = 0; 138 line = 0;
143 warning = 0; 139 warning = 0;
@@ -156,19 +152,25 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
156 152
157 if (warning) { 153 if (warning) {
158 /* this is a WARN_ON rather than BUG/BUG_ON */ 154 /* this is a WARN_ON rather than BUG/BUG_ON */
155 printk(KERN_WARNING "------------[ cut here ]------------\n");
156
159 if (file) 157 if (file)
160 printk(KERN_ERR "Badness at %s:%u\n", 158 printk(KERN_WARNING "WARNING: at %s:%u\n",
161 file, line); 159 file, line);
162 else 160 else
163 printk(KERN_ERR "Badness at %p " 161 printk(KERN_WARNING "WARNING: at %p "
164 "[verbose debug info unavailable]\n", 162 "[verbose debug info unavailable]\n",
165 (void *)bugaddr); 163 (void *)bugaddr);
166 164
165 print_modules();
167 show_regs(regs); 166 show_regs(regs);
168 add_taint(TAINT_WARN); 167 print_oops_end_marker();
168 add_taint(BUG_GET_TAINT(bug));
169 return BUG_TRAP_TYPE_WARN; 169 return BUG_TRAP_TYPE_WARN;
170 } 170 }
171 171
172 printk(KERN_EMERG "------------[ cut here ]------------\n");
173
172 if (file) 174 if (file)
173 printk(KERN_CRIT "kernel BUG at %s:%u!\n", 175 printk(KERN_CRIT "kernel BUG at %s:%u!\n",
174 file, line); 176 file, line);
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
new file mode 100644
index 000000000000..4dc20321b0d5
--- /dev/null
+++ b/lib/cpu-notifier-error-inject.c
@@ -0,0 +1,63 @@
1#include <linux/kernel.h>
2#include <linux/cpu.h>
3#include <linux/module.h>
4#include <linux/notifier.h>
5
6static int priority;
7static int cpu_up_prepare_error;
8static int cpu_down_prepare_error;
9
10module_param(priority, int, 0);
11MODULE_PARM_DESC(priority, "specify cpu notifier priority");
12
13module_param(cpu_up_prepare_error, int, 0644);
14MODULE_PARM_DESC(cpu_up_prepare_error,
15 "specify error code to inject CPU_UP_PREPARE action");
16
17module_param(cpu_down_prepare_error, int, 0644);
18MODULE_PARM_DESC(cpu_down_prepare_error,
19 "specify error code to inject CPU_DOWN_PREPARE action");
20
21static int err_inject_cpu_callback(struct notifier_block *nfb,
22 unsigned long action, void *hcpu)
23{
24 int err = 0;
25
26 switch (action) {
27 case CPU_UP_PREPARE:
28 case CPU_UP_PREPARE_FROZEN:
29 err = cpu_up_prepare_error;
30 break;
31 case CPU_DOWN_PREPARE:
32 case CPU_DOWN_PREPARE_FROZEN:
33 err = cpu_down_prepare_error;
34 break;
35 }
36 if (err)
37 printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err);
38
39 return notifier_from_errno(err);
40}
41
42static struct notifier_block err_inject_cpu_notifier = {
43 .notifier_call = err_inject_cpu_callback,
44};
45
46static int err_inject_init(void)
47{
48 err_inject_cpu_notifier.priority = priority;
49
50 return register_hotcpu_notifier(&err_inject_cpu_notifier);
51}
52
53static void err_inject_exit(void)
54{
55 unregister_hotcpu_notifier(&err_inject_cpu_notifier);
56}
57
58module_init(err_inject_init);
59module_exit(err_inject_exit);
60
61MODULE_DESCRIPTION("CPU notifier error injection module");
62MODULE_LICENSE("GPL");
63MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 7bb4142a502f..05d6aca7fc19 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -1,3 +1,4 @@
1#include <linux/slab.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/bitops.h> 3#include <linux/bitops.h>
3#include <linux/cpumask.h> 4#include <linux/cpumask.h>
diff --git a/lib/crc32.c b/lib/crc32.c
index 02e3b31b3a79..4855995fcde9 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -25,16 +25,19 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/compiler.h> 26#include <linux/compiler.h>
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/init.h> 28#include <linux/init.h>
30#include <asm/atomic.h> 29#include <asm/atomic.h>
31#include "crc32defs.h" 30#include "crc32defs.h"
32#if CRC_LE_BITS == 8 31#if CRC_LE_BITS == 8
33#define tole(x) __constant_cpu_to_le32(x) 32# define tole(x) __constant_cpu_to_le32(x)
34#define tobe(x) __constant_cpu_to_be32(x)
35#else 33#else
36#define tole(x) (x) 34# define tole(x) (x)
37#define tobe(x) (x) 35#endif
36
37#if CRC_BE_BITS == 8
38# define tobe(x) __constant_cpu_to_be32(x)
39#else
40# define tobe(x) (x)
38#endif 41#endif
39#include "crc32table.h" 42#include "crc32table.h"
40 43
@@ -45,33 +48,37 @@ MODULE_LICENSE("GPL");
45#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8 48#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
46 49
47static inline u32 50static inline u32
48crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab) 51crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
49{ 52{
50# ifdef __LITTLE_ENDIAN 53# ifdef __LITTLE_ENDIAN
51# define DO_CRC(x) crc = tab[(crc ^ (x)) & 255 ] ^ (crc >> 8) 54# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
55# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
56 tab[2][(crc >> 8) & 255] ^ \
57 tab[1][(crc >> 16) & 255] ^ \
58 tab[0][(crc >> 24) & 255]
52# else 59# else
53# define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) 60# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
61# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
62 tab[1][(crc >> 8) & 255] ^ \
63 tab[2][(crc >> 16) & 255] ^ \
64 tab[3][(crc >> 24) & 255]
54# endif 65# endif
55 const u32 *b = (const u32 *)buf; 66 const u32 *b;
56 size_t rem_len; 67 size_t rem_len;
57 68
58 /* Align it */ 69 /* Align it */
59 if (unlikely((long)b & 3 && len)) { 70 if (unlikely((long)buf & 3 && len)) {
60 u8 *p = (u8 *)b;
61 do { 71 do {
62 DO_CRC(*p++); 72 DO_CRC(*buf++);
63 } while ((--len) && ((long)p)&3); 73 } while ((--len) && ((long)buf)&3);
64 b = (u32 *)p;
65 } 74 }
66 rem_len = len & 3; 75 rem_len = len & 3;
67 /* load data 32 bits wide, xor data 32 bits wide. */ 76 /* load data 32 bits wide, xor data 32 bits wide. */
68 len = len >> 2; 77 len = len >> 2;
78 b = (const u32 *)buf;
69 for (--b; len; --len) { 79 for (--b; len; --len) {
70 crc ^= *++b; /* use pre increment for speed */ 80 crc ^= *++b; /* use pre increment for speed */
71 DO_CRC(0); 81 DO_CRC4;
72 DO_CRC(0);
73 DO_CRC(0);
74 DO_CRC(0);
75 } 82 }
76 len = rem_len; 83 len = rem_len;
77 /* And the last few bytes */ 84 /* And the last few bytes */
@@ -82,6 +89,8 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
82 } while (--len); 89 } while (--len);
83 } 90 }
84 return crc; 91 return crc;
92#undef DO_CRC
93#undef DO_CRC4
85} 94}
86#endif 95#endif
87/** 96/**
@@ -114,14 +123,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
114u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) 123u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
115{ 124{
116# if CRC_LE_BITS == 8 125# if CRC_LE_BITS == 8
117 const u32 *tab = crc32table_le; 126 const u32 (*tab)[] = crc32table_le;
118 127
119 crc = __cpu_to_le32(crc); 128 crc = __cpu_to_le32(crc);
120 crc = crc32_body(crc, p, len, tab); 129 crc = crc32_body(crc, p, len, tab);
121 return __le32_to_cpu(crc); 130 return __le32_to_cpu(crc);
122#undef ENDIAN_SHIFT
123#undef DO_CRC
124
125# elif CRC_LE_BITS == 4 131# elif CRC_LE_BITS == 4
126 while (len--) { 132 while (len--) {
127 crc ^= *p++; 133 crc ^= *p++;
@@ -174,14 +180,11 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
174u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) 180u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
175{ 181{
176# if CRC_BE_BITS == 8 182# if CRC_BE_BITS == 8
177 const u32 *tab = crc32table_be; 183 const u32 (*tab)[] = crc32table_be;
178 184
179 crc = __cpu_to_be32(crc); 185 crc = __cpu_to_be32(crc);
180 crc = crc32_body(crc, p, len, tab); 186 crc = crc32_body(crc, p, len, tab);
181 return __be32_to_cpu(crc); 187 return __be32_to_cpu(crc);
182#undef ENDIAN_SHIFT
183#undef DO_CRC
184
185# elif CRC_BE_BITS == 4 188# elif CRC_BE_BITS == 4
186 while (len--) { 189 while (len--) {
187 crc ^= *p++ << 24; 190 crc ^= *p++ << 24;
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index bc3b11731b9c..b1c177307677 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -8,7 +8,6 @@
8 * 8 *
9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
10 */ 10 */
11#include <linux/kernel.h>
12#include <linux/rwsem.h> 11#include <linux/rwsem.h>
13#include <linux/mutex.h> 12#include <linux/mutex.h>
14#include <linux/module.h> 13#include <linux/module.h>
@@ -23,6 +22,7 @@
23 * shut up after that. 22 * shut up after that.
24 */ 23 */
25int debug_locks = 1; 24int debug_locks = 1;
25EXPORT_SYMBOL_GPL(debug_locks);
26 26
27/* 27/*
28 * The locking-testsuite uses <debug_locks_silent> to get a 28 * The locking-testsuite uses <debug_locks_silent> to get a
@@ -38,7 +38,6 @@ int debug_locks_off(void)
38{ 38{
39 if (__debug_locks_off()) { 39 if (__debug_locks_off()) {
40 if (!debug_locks_silent) { 40 if (!debug_locks_silent) {
41 oops_in_progress = 1;
42 console_verbose(); 41 console_verbose();
43 return 1; 42 return 1;
44 } 43 }
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a9a8996d286a..deebcc57d4e6 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -12,6 +12,7 @@
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <linux/debugfs.h> 14#include <linux/debugfs.h>
15#include <linux/slab.h>
15#include <linux/hash.h> 16#include <linux/hash.h>
16 17
17#define ODEBUG_HASH_BITS 14 18#define ODEBUG_HASH_BITS 14
@@ -140,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
140 obj->object = addr; 141 obj->object = addr;
141 obj->descr = descr; 142 obj->descr = descr;
142 obj->state = ODEBUG_STATE_NONE; 143 obj->state = ODEBUG_STATE_NONE;
144 obj->astate = 0;
143 hlist_del(&obj->node); 145 hlist_del(&obj->node);
144 146
145 hlist_add_head(&obj->node, &b->list); 147 hlist_add_head(&obj->node, &b->list);
@@ -251,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
251 253
252 if (limit < 5 && obj->descr != descr_test) { 254 if (limit < 5 && obj->descr != descr_test) {
253 limit++; 255 limit++;
254 WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, 256 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
255 obj_states[obj->state], obj->descr->name); 257 "object type: %s\n",
258 msg, obj_states[obj->state], obj->astate,
259 obj->descr->name);
256 } 260 }
257 debug_objects_warnings++; 261 debug_objects_warnings++;
258} 262}
@@ -446,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
446 case ODEBUG_STATE_INIT: 450 case ODEBUG_STATE_INIT:
447 case ODEBUG_STATE_INACTIVE: 451 case ODEBUG_STATE_INACTIVE:
448 case ODEBUG_STATE_ACTIVE: 452 case ODEBUG_STATE_ACTIVE:
449 obj->state = ODEBUG_STATE_INACTIVE; 453 if (!obj->astate)
454 obj->state = ODEBUG_STATE_INACTIVE;
455 else
456 debug_print_object(obj, "deactivate");
450 break; 457 break;
451 458
452 case ODEBUG_STATE_DESTROYED: 459 case ODEBUG_STATE_DESTROYED:
@@ -552,6 +559,53 @@ out_unlock:
552 raw_spin_unlock_irqrestore(&db->lock, flags); 559 raw_spin_unlock_irqrestore(&db->lock, flags);
553} 560}
554 561
562/**
563 * debug_object_active_state - debug checks object usage state machine
564 * @addr: address of the object
565 * @descr: pointer to an object specific debug description structure
566 * @expect: expected state
567 * @next: state to move to if expected state is found
568 */
569void
570debug_object_active_state(void *addr, struct debug_obj_descr *descr,
571 unsigned int expect, unsigned int next)
572{
573 struct debug_bucket *db;
574 struct debug_obj *obj;
575 unsigned long flags;
576
577 if (!debug_objects_enabled)
578 return;
579
580 db = get_bucket((unsigned long) addr);
581
582 raw_spin_lock_irqsave(&db->lock, flags);
583
584 obj = lookup_object(addr, db);
585 if (obj) {
586 switch (obj->state) {
587 case ODEBUG_STATE_ACTIVE:
588 if (obj->astate == expect)
589 obj->astate = next;
590 else
591 debug_print_object(obj, "active_state");
592 break;
593
594 default:
595 debug_print_object(obj, "active_state");
596 break;
597 }
598 } else {
599 struct debug_obj o = { .object = addr,
600 .state = ODEBUG_STATE_NOTAVAILABLE,
601 .descr = descr };
602
603 debug_print_object(&o, "active_state");
604 }
605
606 raw_spin_unlock_irqrestore(&db->lock, flags);
607}
608
555#ifdef CONFIG_DEBUG_OBJECTS_FREE 609#ifdef CONFIG_DEBUG_OBJECTS_FREE
556static void __debug_check_no_obj_freed(const void *address, unsigned long size) 610static void __debug_check_no_obj_freed(const void *address, unsigned long size)
557{ 611{
@@ -773,7 +827,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state)
773 } 827 }
774} 828}
775 829
776static int 830static int __init
777check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) 831check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
778{ 832{
779 struct debug_bucket *db; 833 struct debug_bucket *db;
@@ -916,7 +970,7 @@ void __init debug_objects_early_init(void)
916/* 970/*
917 * Convert the statically allocated objects to dynamic ones: 971 * Convert the statically allocated objects to dynamic ones:
918 */ 972 */
919static int debug_objects_replace_static_objects(void) 973static int __init debug_objects_replace_static_objects(void)
920{ 974{
921 struct debug_bucket *db = obj_hash; 975 struct debug_bucket *db = obj_hash;
922 struct hlist_node *node, *tmp; 976 struct hlist_node *node, *tmp;
diff --git a/lib/decompress.c b/lib/decompress.c
index a7606815541f..3d766b7f60ab 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/decompress/bunzip2.h> 9#include <linux/decompress/bunzip2.h>
10#include <linux/decompress/unlzma.h> 10#include <linux/decompress/unlzma.h>
11#include <linux/decompress/unxz.h>
11#include <linux/decompress/inflate.h> 12#include <linux/decompress/inflate.h>
12#include <linux/decompress/unlzo.h> 13#include <linux/decompress/unlzo.h>
13 14
@@ -23,6 +24,9 @@
23#ifndef CONFIG_DECOMPRESS_LZMA 24#ifndef CONFIG_DECOMPRESS_LZMA
24# define unlzma NULL 25# define unlzma NULL
25#endif 26#endif
27#ifndef CONFIG_DECOMPRESS_XZ
28# define unxz NULL
29#endif
26#ifndef CONFIG_DECOMPRESS_LZO 30#ifndef CONFIG_DECOMPRESS_LZO
27# define unlzo NULL 31# define unlzo NULL
28#endif 32#endif
@@ -36,6 +40,7 @@ static const struct compress_format {
36 { {037, 0236}, "gzip", gunzip }, 40 { {037, 0236}, "gzip", gunzip },
37 { {0x42, 0x5a}, "bzip2", bunzip2 }, 41 { {0x42, 0x5a}, "bzip2", bunzip2 },
38 { {0x5d, 0x00}, "lzma", unlzma }, 42 { {0x5d, 0x00}, "lzma", unlzma },
43 { {0xfd, 0x37}, "xz", unxz },
39 { {0x89, 0x4c}, "lzo", unlzo }, 44 { {0x89, 0x4c}, "lzo", unlzo },
40 { {0, 0}, NULL, NULL } 45 { {0, 0}, NULL, NULL }
41}; 46};
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index a4e971dee102..a7b80c1d6a0d 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -49,7 +49,6 @@
49#define PREBOOT 49#define PREBOOT
50#else 50#else
51#include <linux/decompress/bunzip2.h> 51#include <linux/decompress/bunzip2.h>
52#include <linux/slab.h>
53#endif /* STATIC */ 52#endif /* STATIC */
54 53
55#include <linux/decompress/mm.h> 54#include <linux/decompress/mm.h>
@@ -107,6 +106,8 @@ struct bunzip_data {
107 unsigned char selectors[32768]; /* nSelectors = 15 bits */ 106 unsigned char selectors[32768]; /* nSelectors = 15 bits */
108 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ 107 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
109 int io_error; /* non-zero if we have IO error */ 108 int io_error; /* non-zero if we have IO error */
109 int byteCount[256];
110 unsigned char symToByte[256], mtfSymbol[256];
110}; 111};
111 112
112 113
@@ -158,14 +159,16 @@ static int INIT get_next_block(struct bunzip_data *bd)
158 int *base = NULL; 159 int *base = NULL;
159 int *limit = NULL; 160 int *limit = NULL;
160 int dbufCount, nextSym, dbufSize, groupCount, selector, 161 int dbufCount, nextSym, dbufSize, groupCount, selector,
161 i, j, k, t, runPos, symCount, symTotal, nSelectors, 162 i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
162 byteCount[256]; 163 unsigned char uc, *symToByte, *mtfSymbol, *selectors;
163 unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
164 unsigned int *dbuf, origPtr; 164 unsigned int *dbuf, origPtr;
165 165
166 dbuf = bd->dbuf; 166 dbuf = bd->dbuf;
167 dbufSize = bd->dbufSize; 167 dbufSize = bd->dbufSize;
168 selectors = bd->selectors; 168 selectors = bd->selectors;
169 byteCount = bd->byteCount;
170 symToByte = bd->symToByte;
171 mtfSymbol = bd->mtfSymbol;
169 172
170 /* Read in header signature and CRC, then validate signature. 173 /* Read in header signature and CRC, then validate signature.
171 (last block signature means CRC is for whole file, return now) */ 174 (last block signature means CRC is for whole file, return now) */
@@ -678,13 +681,12 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
678 int(*flush)(void*, unsigned int), 681 int(*flush)(void*, unsigned int),
679 unsigned char *outbuf, 682 unsigned char *outbuf,
680 int *pos, 683 int *pos,
681 void(*error_fn)(char *x)) 684 void(*error)(char *x))
682{ 685{
683 struct bunzip_data *bd; 686 struct bunzip_data *bd;
684 int i = -1; 687 int i = -1;
685 unsigned char *inbuf; 688 unsigned char *inbuf;
686 689
687 set_error_fn(error_fn);
688 if (flush) 690 if (flush)
689 outbuf = malloc(BZIP2_IOBUF_SIZE); 691 outbuf = malloc(BZIP2_IOBUF_SIZE);
690 692
@@ -747,8 +749,8 @@ STATIC int INIT decompress(unsigned char *buf, int len,
747 int(*flush)(void*, unsigned int), 749 int(*flush)(void*, unsigned int),
748 unsigned char *outbuf, 750 unsigned char *outbuf,
749 int *pos, 751 int *pos,
750 void(*error_fn)(char *x)) 752 void(*error)(char *x))
751{ 753{
752 return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn); 754 return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
753} 755}
754#endif 756#endif
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index fc686c7a0a0d..19ff89e34eec 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -19,7 +19,6 @@
19#include "zlib_inflate/inflate.h" 19#include "zlib_inflate/inflate.h"
20 20
21#include "zlib_inflate/infutil.h" 21#include "zlib_inflate/infutil.h"
22#include <linux/slab.h>
23 22
24#endif /* STATIC */ 23#endif /* STATIC */
25 24
@@ -27,7 +26,7 @@
27 26
28#define GZIP_IOBUF_SIZE (16*1024) 27#define GZIP_IOBUF_SIZE (16*1024)
29 28
30static int nofill(void *buffer, unsigned int len) 29static int INIT nofill(void *buffer, unsigned int len)
31{ 30{
32 return -1; 31 return -1;
33} 32}
@@ -38,13 +37,12 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
38 int(*flush)(void*, unsigned int), 37 int(*flush)(void*, unsigned int),
39 unsigned char *out_buf, 38 unsigned char *out_buf,
40 int *pos, 39 int *pos,
41 void(*error_fn)(char *x)) { 40 void(*error)(char *x)) {
42 u8 *zbuf; 41 u8 *zbuf;
43 struct z_stream_s *strm; 42 struct z_stream_s *strm;
44 int rc; 43 int rc;
45 size_t out_len; 44 size_t out_len;
46 45
47 set_error_fn(error_fn);
48 rc = -1; 46 rc = -1;
49 if (flush) { 47 if (flush) {
50 out_len = 0x8000; /* 32 K */ 48 out_len = 0x8000; /* 32 K */
@@ -100,13 +98,22 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
100 * possible asciz filename) 98 * possible asciz filename)
101 */ 99 */
102 strm->next_in = zbuf + 10; 100 strm->next_in = zbuf + 10;
101 strm->avail_in = len - 10;
103 /* skip over asciz filename */ 102 /* skip over asciz filename */
104 if (zbuf[3] & 0x8) { 103 if (zbuf[3] & 0x8) {
105 while (strm->next_in[0]) 104 do {
106 strm->next_in++; 105 /*
107 strm->next_in++; 106 * If the filename doesn't fit into the buffer,
107 * the file is very probably corrupt. Don't try
108 * to read more data.
109 */
110 if (strm->avail_in == 0) {
111 error("header error");
112 goto gunzip_5;
113 }
114 --strm->avail_in;
115 } while (*strm->next_in++);
108 } 116 }
109 strm->avail_in = len - (strm->next_in - zbuf);
110 117
111 strm->next_out = out_buf; 118 strm->next_out = out_buf;
112 strm->avail_out = out_len; 119 strm->avail_out = out_len;
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index ca82fde81c8f..476c65af9709 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -33,7 +33,6 @@
33#define PREBOOT 33#define PREBOOT
34#else 34#else
35#include <linux/decompress/unlzma.h> 35#include <linux/decompress/unlzma.h>
36#include <linux/slab.h>
37#endif /* STATIC */ 36#endif /* STATIC */
38 37
39#include <linux/decompress/mm.h> 38#include <linux/decompress/mm.h>
@@ -74,6 +73,7 @@ struct rc {
74 uint32_t code; 73 uint32_t code;
75 uint32_t range; 74 uint32_t range;
76 uint32_t bound; 75 uint32_t bound;
76 void (*error)(char *);
77}; 77};
78 78
79 79
@@ -82,7 +82,7 @@ struct rc {
82#define RC_MODEL_TOTAL_BITS 11 82#define RC_MODEL_TOTAL_BITS 11
83 83
84 84
85static int nofill(void *buffer, unsigned int len) 85static int INIT nofill(void *buffer, unsigned int len)
86{ 86{
87 return -1; 87 return -1;
88} 88}
@@ -92,7 +92,7 @@ static void INIT rc_read(struct rc *rc)
92{ 92{
93 rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); 93 rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE);
94 if (rc->buffer_size <= 0) 94 if (rc->buffer_size <= 0)
95 error("unexpected EOF"); 95 rc->error("unexpected EOF");
96 rc->ptr = rc->buffer; 96 rc->ptr = rc->buffer;
97 rc->buffer_end = rc->buffer + rc->buffer_size; 97 rc->buffer_end = rc->buffer + rc->buffer_size;
98} 98}
@@ -127,12 +127,6 @@ static inline void INIT rc_init_code(struct rc *rc)
127} 127}
128 128
129 129
130/* Called once. TODO: bb_maybe_free() */
131static inline void INIT rc_free(struct rc *rc)
132{
133 free(rc->buffer);
134}
135
136/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ 130/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */
137static void INIT rc_do_normalize(struct rc *rc) 131static void INIT rc_do_normalize(struct rc *rc)
138{ 132{
@@ -169,7 +163,7 @@ static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p)
169 rc->range = rc->bound; 163 rc->range = rc->bound;
170 *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; 164 *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
171} 165}
172static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) 166static inline void INIT rc_update_bit_1(struct rc *rc, uint16_t *p)
173{ 167{
174 rc->range -= rc->bound; 168 rc->range -= rc->bound;
175 rc->code -= rc->bound; 169 rc->code -= rc->bound;
@@ -319,32 +313,38 @@ static inline uint8_t INIT peek_old_byte(struct writer *wr,
319 313
320} 314}
321 315
322static inline void INIT write_byte(struct writer *wr, uint8_t byte) 316static inline int INIT write_byte(struct writer *wr, uint8_t byte)
323{ 317{
324 wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; 318 wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte;
325 if (wr->flush && wr->buffer_pos == wr->header->dict_size) { 319 if (wr->flush && wr->buffer_pos == wr->header->dict_size) {
326 wr->buffer_pos = 0; 320 wr->buffer_pos = 0;
327 wr->global_pos += wr->header->dict_size; 321 wr->global_pos += wr->header->dict_size;
328 wr->flush((char *)wr->buffer, wr->header->dict_size); 322 if (wr->flush((char *)wr->buffer, wr->header->dict_size)
323 != wr->header->dict_size)
324 return -1;
329 } 325 }
326 return 0;
330} 327}
331 328
332 329
333static inline void INIT copy_byte(struct writer *wr, uint32_t offs) 330static inline int INIT copy_byte(struct writer *wr, uint32_t offs)
334{ 331{
335 write_byte(wr, peek_old_byte(wr, offs)); 332 return write_byte(wr, peek_old_byte(wr, offs));
336} 333}
337 334
338static inline void INIT copy_bytes(struct writer *wr, 335static inline int INIT copy_bytes(struct writer *wr,
339 uint32_t rep0, int len) 336 uint32_t rep0, int len)
340{ 337{
341 do { 338 do {
342 copy_byte(wr, rep0); 339 if (copy_byte(wr, rep0))
340 return -1;
343 len--; 341 len--;
344 } while (len != 0 && wr->buffer_pos < wr->header->dst_size); 342 } while (len != 0 && wr->buffer_pos < wr->header->dst_size);
343
344 return len;
345} 345}
346 346
347static inline void INIT process_bit0(struct writer *wr, struct rc *rc, 347static inline int INIT process_bit0(struct writer *wr, struct rc *rc,
348 struct cstate *cst, uint16_t *p, 348 struct cstate *cst, uint16_t *p,
349 int pos_state, uint16_t *prob, 349 int pos_state, uint16_t *prob,
350 int lc, uint32_t literal_pos_mask) { 350 int lc, uint32_t literal_pos_mask) {
@@ -378,16 +378,17 @@ static inline void INIT process_bit0(struct writer *wr, struct rc *rc,
378 uint16_t *prob_lit = prob + mi; 378 uint16_t *prob_lit = prob + mi;
379 rc_get_bit(rc, prob_lit, &mi); 379 rc_get_bit(rc, prob_lit, &mi);
380 } 380 }
381 write_byte(wr, mi);
382 if (cst->state < 4) 381 if (cst->state < 4)
383 cst->state = 0; 382 cst->state = 0;
384 else if (cst->state < 10) 383 else if (cst->state < 10)
385 cst->state -= 3; 384 cst->state -= 3;
386 else 385 else
387 cst->state -= 6; 386 cst->state -= 6;
387
388 return write_byte(wr, mi);
388} 389}
389 390
390static inline void INIT process_bit1(struct writer *wr, struct rc *rc, 391static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
391 struct cstate *cst, uint16_t *p, 392 struct cstate *cst, uint16_t *p,
392 int pos_state, uint16_t *prob) { 393 int pos_state, uint16_t *prob) {
393 int offset; 394 int offset;
@@ -418,8 +419,7 @@ static inline void INIT process_bit1(struct writer *wr, struct rc *rc,
418 419
419 cst->state = cst->state < LZMA_NUM_LIT_STATES ? 420 cst->state = cst->state < LZMA_NUM_LIT_STATES ?
420 9 : 11; 421 9 : 11;
421 copy_byte(wr, cst->rep0); 422 return copy_byte(wr, cst->rep0);
422 return;
423 } else { 423 } else {
424 rc_update_bit_1(rc, prob); 424 rc_update_bit_1(rc, prob);
425 } 425 }
@@ -521,12 +521,15 @@ static inline void INIT process_bit1(struct writer *wr, struct rc *rc,
521 } else 521 } else
522 cst->rep0 = pos_slot; 522 cst->rep0 = pos_slot;
523 if (++(cst->rep0) == 0) 523 if (++(cst->rep0) == 0)
524 return; 524 return 0;
525 if (cst->rep0 > wr->header->dict_size
526 || cst->rep0 > get_pos(wr))
527 return -1;
525 } 528 }
526 529
527 len += LZMA_MATCH_MIN_LEN; 530 len += LZMA_MATCH_MIN_LEN;
528 531
529 copy_bytes(wr, cst->rep0, len); 532 return copy_bytes(wr, cst->rep0, len);
530} 533}
531 534
532 535
@@ -536,7 +539,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
536 int(*flush)(void*, unsigned int), 539 int(*flush)(void*, unsigned int),
537 unsigned char *output, 540 unsigned char *output,
538 int *posp, 541 int *posp,
539 void(*error_fn)(char *x) 542 void(*error)(char *x)
540 ) 543 )
541{ 544{
542 struct lzma_header header; 545 struct lzma_header header;
@@ -552,7 +555,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
552 unsigned char *inbuf; 555 unsigned char *inbuf;
553 int ret = -1; 556 int ret = -1;
554 557
555 set_error_fn(error_fn); 558 rc.error = error;
556 559
557 if (buf) 560 if (buf)
558 inbuf = buf; 561 inbuf = buf;
@@ -580,8 +583,10 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
580 ((unsigned char *)&header)[i] = *rc.ptr++; 583 ((unsigned char *)&header)[i] = *rc.ptr++;
581 } 584 }
582 585
583 if (header.pos >= (9 * 5 * 5)) 586 if (header.pos >= (9 * 5 * 5)) {
584 error("bad header"); 587 error("bad header");
588 goto exit_1;
589 }
585 590
586 mi = 0; 591 mi = 0;
587 lc = header.pos; 592 lc = header.pos;
@@ -627,21 +632,29 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
627 int pos_state = get_pos(&wr) & pos_state_mask; 632 int pos_state = get_pos(&wr) & pos_state_mask;
628 uint16_t *prob = p + LZMA_IS_MATCH + 633 uint16_t *prob = p + LZMA_IS_MATCH +
629 (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; 634 (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state;
630 if (rc_is_bit_0(&rc, prob)) 635 if (rc_is_bit_0(&rc, prob)) {
631 process_bit0(&wr, &rc, &cst, p, pos_state, prob, 636 if (process_bit0(&wr, &rc, &cst, p, pos_state, prob,
632 lc, literal_pos_mask); 637 lc, literal_pos_mask)) {
633 else { 638 error("LZMA data is corrupt");
634 process_bit1(&wr, &rc, &cst, p, pos_state, prob); 639 goto exit_3;
640 }
641 } else {
642 if (process_bit1(&wr, &rc, &cst, p, pos_state, prob)) {
643 error("LZMA data is corrupt");
644 goto exit_3;
645 }
635 if (cst.rep0 == 0) 646 if (cst.rep0 == 0)
636 break; 647 break;
637 } 648 }
649 if (rc.buffer_size <= 0)
650 goto exit_3;
638 } 651 }
639 652
640 if (posp) 653 if (posp)
641 *posp = rc.ptr-rc.buffer; 654 *posp = rc.ptr-rc.buffer;
642 if (wr.flush) 655 if (!wr.flush || wr.flush(wr.buffer, wr.buffer_pos) == wr.buffer_pos)
643 wr.flush(wr.buffer, wr.buffer_pos); 656 ret = 0;
644 ret = 0; 657exit_3:
645 large_free(p); 658 large_free(p);
646exit_2: 659exit_2:
647 if (!output) 660 if (!output)
@@ -659,9 +672,9 @@ STATIC int INIT decompress(unsigned char *buf, int in_len,
659 int(*flush)(void*, unsigned int), 672 int(*flush)(void*, unsigned int),
660 unsigned char *output, 673 unsigned char *output,
661 int *posp, 674 int *posp,
662 void(*error_fn)(char *x) 675 void(*error)(char *x)
663 ) 676 )
664{ 677{
665 return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn); 678 return unlzma(buf, in_len - 4, fill, flush, output, posp, error);
666} 679}
667#endif 680#endif
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index db521f45626e..5a7a2adf4c4c 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -33,7 +33,6 @@
33#ifdef STATIC 33#ifdef STATIC
34#include "lzo/lzo1x_decompress.c" 34#include "lzo/lzo1x_decompress.c"
35#else 35#else
36#include <linux/slab.h>
37#include <linux/decompress/unlzo.h> 36#include <linux/decompress/unlzo.h>
38#endif 37#endif
39 38
@@ -49,14 +48,25 @@ static const unsigned char lzop_magic[] = {
49 48
50#define LZO_BLOCK_SIZE (256*1024l) 49#define LZO_BLOCK_SIZE (256*1024l)
51#define HEADER_HAS_FILTER 0x00000800L 50#define HEADER_HAS_FILTER 0x00000800L
51#define HEADER_SIZE_MIN (9 + 7 + 4 + 8 + 1 + 4)
52#define HEADER_SIZE_MAX (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
52 53
53STATIC inline int INIT parse_header(u8 *input, u8 *skip) 54STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
54{ 55{
55 int l; 56 int l;
56 u8 *parse = input; 57 u8 *parse = input;
58 u8 *end = input + in_len;
57 u8 level = 0; 59 u8 level = 0;
58 u16 version; 60 u16 version;
59 61
62 /*
63 * Check that there's enough input to possibly have a valid header.
64 * Then it is possible to parse several fields until the minimum
65 * size may have been used.
66 */
67 if (in_len < HEADER_SIZE_MIN)
68 return 0;
69
60 /* read magic: 9 first bits */ 70 /* read magic: 9 first bits */
61 for (l = 0; l < 9; l++) { 71 for (l = 0; l < 9; l++) {
62 if (*parse++ != lzop_magic[l]) 72 if (*parse++ != lzop_magic[l])
@@ -74,6 +84,15 @@ STATIC inline int INIT parse_header(u8 *input, u8 *skip)
74 else 84 else
75 parse += 4; /* flags */ 85 parse += 4; /* flags */
76 86
87 /*
88 * At least mode, mtime_low, filename length, and checksum must
89 * be left to be parsed. If also mtime_high is present, it's OK
90 * because the next input buffer check is after reading the
91 * filename length.
92 */
93 if (end - parse < 8 + 1 + 4)
94 return 0;
95
77 /* skip mode and mtime_low */ 96 /* skip mode and mtime_low */
78 parse += 8; 97 parse += 8;
79 if (version >= 0x0940) 98 if (version >= 0x0940)
@@ -81,6 +100,8 @@ STATIC inline int INIT parse_header(u8 *input, u8 *skip)
81 100
82 l = *parse++; 101 l = *parse++;
83 /* don't care about the file name, and skip checksum */ 102 /* don't care about the file name, and skip checksum */
103 if (end - parse < l + 4)
104 return 0;
84 parse += l + 4; 105 parse += l + 4;
85 106
86 *skip = parse - input; 107 *skip = parse - input;
@@ -91,15 +112,14 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
91 int (*fill) (void *, unsigned int), 112 int (*fill) (void *, unsigned int),
92 int (*flush) (void *, unsigned int), 113 int (*flush) (void *, unsigned int),
93 u8 *output, int *posp, 114 u8 *output, int *posp,
94 void (*error_fn) (char *x)) 115 void (*error) (char *x))
95{ 116{
96 u8 skip = 0, r = 0; 117 u8 r = 0;
118 int skip = 0;
97 u32 src_len, dst_len; 119 u32 src_len, dst_len;
98 size_t tmp; 120 size_t tmp;
99 u8 *in_buf, *in_buf_save, *out_buf; 121 u8 *in_buf, *in_buf_save, *out_buf;
100 int obytes_processed = 0; 122 int ret = -1;
101
102 set_error_fn(error_fn);
103 123
104 if (output) { 124 if (output) {
105 out_buf = output; 125 out_buf = output;
@@ -119,8 +139,8 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
119 goto exit_1; 139 goto exit_1;
120 } else if (input) { 140 } else if (input) {
121 in_buf = input; 141 in_buf = input;
122 } else if (!fill || !posp) { 142 } else if (!fill) {
123 error("NULL input pointer and missing position pointer or fill function"); 143 error("NULL input pointer and missing fill function");
124 goto exit_1; 144 goto exit_1;
125 } else { 145 } else {
126 in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE)); 146 in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
@@ -134,22 +154,47 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
134 if (posp) 154 if (posp)
135 *posp = 0; 155 *posp = 0;
136 156
137 if (fill) 157 if (fill) {
138 fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); 158 /*
159 * Start from in_buf + HEADER_SIZE_MAX to make it possible
160 * to use memcpy() to copy the unused data to the beginning
161 * of the buffer. This way memmove() isn't needed which
162 * is missing from pre-boot environments of most archs.
163 */
164 in_buf += HEADER_SIZE_MAX;
165 in_len = fill(in_buf, HEADER_SIZE_MAX);
166 }
139 167
140 if (!parse_header(input, &skip)) { 168 if (!parse_header(in_buf, &skip, in_len)) {
141 error("invalid header"); 169 error("invalid header");
142 goto exit_2; 170 goto exit_2;
143 } 171 }
144 in_buf += skip; 172 in_buf += skip;
173 in_len -= skip;
174
175 if (fill) {
176 /* Move the unused data to the beginning of the buffer. */
177 memcpy(in_buf_save, in_buf, in_len);
178 in_buf = in_buf_save;
179 }
145 180
146 if (posp) 181 if (posp)
147 *posp = skip; 182 *posp = skip;
148 183
149 for (;;) { 184 for (;;) {
150 /* read uncompressed block size */ 185 /* read uncompressed block size */
186 if (fill && in_len < 4) {
187 skip = fill(in_buf + in_len, 4 - in_len);
188 if (skip > 0)
189 in_len += skip;
190 }
191 if (in_len < 4) {
192 error("file corrupted");
193 goto exit_2;
194 }
151 dst_len = get_unaligned_be32(in_buf); 195 dst_len = get_unaligned_be32(in_buf);
152 in_buf += 4; 196 in_buf += 4;
197 in_len -= 4;
153 198
154 /* exit if last block */ 199 /* exit if last block */
155 if (dst_len == 0) { 200 if (dst_len == 0) {
@@ -164,8 +209,18 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
164 } 209 }
165 210
166 /* read compressed block size, and skip block checksum info */ 211 /* read compressed block size, and skip block checksum info */
212 if (fill && in_len < 8) {
213 skip = fill(in_buf + in_len, 8 - in_len);
214 if (skip > 0)
215 in_len += skip;
216 }
217 if (in_len < 8) {
218 error("file corrupted");
219 goto exit_2;
220 }
167 src_len = get_unaligned_be32(in_buf); 221 src_len = get_unaligned_be32(in_buf);
168 in_buf += 8; 222 in_buf += 8;
223 in_len -= 8;
169 224
170 if (src_len <= 0 || src_len > dst_len) { 225 if (src_len <= 0 || src_len > dst_len) {
171 error("file corrupted"); 226 error("file corrupted");
@@ -173,29 +228,55 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
173 } 228 }
174 229
175 /* decompress */ 230 /* decompress */
231 if (fill && in_len < src_len) {
232 skip = fill(in_buf + in_len, src_len - in_len);
233 if (skip > 0)
234 in_len += skip;
235 }
236 if (in_len < src_len) {
237 error("file corrupted");
238 goto exit_2;
239 }
176 tmp = dst_len; 240 tmp = dst_len;
177 r = lzo1x_decompress_safe((u8 *) in_buf, src_len, 241
242 /* When the input data is not compressed at all,
243 * lzo1x_decompress_safe will fail, so call memcpy()
244 * instead */
245 if (unlikely(dst_len == src_len))
246 memcpy(out_buf, in_buf, src_len);
247 else {
248 r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
178 out_buf, &tmp); 249 out_buf, &tmp);
179 250
180 if (r != LZO_E_OK || dst_len != tmp) { 251 if (r != LZO_E_OK || dst_len != tmp) {
181 error("Compressed data violation"); 252 error("Compressed data violation");
182 goto exit_2; 253 goto exit_2;
254 }
183 } 255 }
184 256
185 obytes_processed += dst_len; 257 if (flush && flush(out_buf, dst_len) != dst_len)
186 if (flush) 258 goto exit_2;
187 flush(out_buf, dst_len);
188 if (output) 259 if (output)
189 out_buf += dst_len; 260 out_buf += dst_len;
190 if (posp) 261 if (posp)
191 *posp += src_len + 12; 262 *posp += src_len + 12;
263
264 in_buf += src_len;
265 in_len -= src_len;
192 if (fill) { 266 if (fill) {
267 /*
268 * If there happens to still be unused data left in
269 * in_buf, move it to the beginning of the buffer.
270 * Use a loop to avoid memmove() dependency.
271 */
272 if (in_len > 0)
273 for (skip = 0; skip < in_len; ++skip)
274 in_buf_save[skip] = in_buf[skip];
193 in_buf = in_buf_save; 275 in_buf = in_buf_save;
194 fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); 276 }
195 } else
196 in_buf += src_len;
197 } 277 }
198 278
279 ret = 0;
199exit_2: 280exit_2:
200 if (!input) 281 if (!input)
201 free(in_buf); 282 free(in_buf);
@@ -203,7 +284,7 @@ exit_1:
203 if (!output) 284 if (!output)
204 free(out_buf); 285 free(out_buf);
205exit: 286exit:
206 return obytes_processed; 287 return ret;
207} 288}
208 289
209#define decompress unlzo 290#define decompress unlzo
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
new file mode 100644
index 000000000000..cecd23df2b9a
--- /dev/null
+++ b/lib/decompress_unxz.c
@@ -0,0 +1,397 @@
1/*
2 * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10/*
11 * Important notes about in-place decompression
12 *
13 * At least on x86, the kernel is decompressed in place: the compressed data
14 * is placed to the end of the output buffer, and the decompressor overwrites
15 * most of the compressed data. There must be enough safety margin to
16 * guarantee that the write position is always behind the read position.
17 *
18 * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below.
19 * Note that the margin with XZ is bigger than with Deflate (gzip)!
20 *
21 * The worst case for in-place decompression is that the beginning of
22 * the file is compressed extremely well, and the rest of the file is
23 * uncompressible. Thus, we must look for worst-case expansion when the
24 * compressor is encoding uncompressible data.
25 *
26 * The structure of the .xz file in case of a compresed kernel is as follows.
27 * Sizes (as bytes) of the fields are in parenthesis.
28 *
29 * Stream Header (12)
30 * Block Header:
31 * Block Header (8-12)
32 * Compressed Data (N)
33 * Block Padding (0-3)
34 * CRC32 (4)
35 * Index (8-20)
36 * Stream Footer (12)
37 *
38 * Normally there is exactly one Block, but let's assume that there are
39 * 2-4 Blocks just in case. Because Stream Header and also Block Header
40 * of the first Block don't make the decompressor produce any uncompressed
41 * data, we can ignore them from our calculations. Block Headers of possible
42 * additional Blocks have to be taken into account still. With these
43 * assumptions, it is safe to assume that the total header overhead is
44 * less than 128 bytes.
45 *
46 * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ
47 * doesn't change the size of the data, it is enough to calculate the
48 * safety margin for LZMA2.
49 *
50 * LZMA2 stores the data in chunks. Each chunk has a header whose size is
51 * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that
52 * the maximum chunk header size is 8 bytes. After the chunk header, there
53 * may be up to 64 KiB of actual payload in the chunk. Often the payload is
54 * quite a bit smaller though; to be safe, let's assume that an average
55 * chunk has only 32 KiB of payload.
56 *
57 * The maximum uncompressed size of the payload is 2 MiB. The minimum
58 * uncompressed size of the payload is in practice never less than the
59 * payload size itself. The LZMA2 format would allow uncompressed size
60 * to be less than the payload size, but no sane compressor creates such
61 * files. LZMA2 supports storing uncompressible data in uncompressed form,
62 * so there's never a need to create payloads whose uncompressed size is
63 * smaller than the compressed size.
64 *
65 * The assumption, that the uncompressed size of the payload is never
66 * smaller than the payload itself, is valid only when talking about
67 * the payload as a whole. It is possible that the payload has parts where
68 * the decompressor consumes more input than it produces output. Calculating
69 * the worst case for this would be tricky. Instead of trying to do that,
70 * let's simply make sure that the decompressor never overwrites any bytes
71 * of the payload which it is currently reading.
72 *
73 * Now we have enough information to calculate the safety margin. We need
74 * - 128 bytes for the .xz file format headers;
75 * - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header
76 * per chunk, each chunk having average payload size of 32 KiB); and
77 * - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that
78 * the decompressor never overwrites anything from the LZMA2 chunk
79 * payload it is currently reading.
80 *
81 * We get the following formula:
82 *
83 * safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536
84 * = 128 + (uncompressed_size >> 12) + 65536
85 *
86 * For comparision, according to arch/x86/boot/compressed/misc.c, the
87 * equivalent formula for Deflate is this:
88 *
89 * safety_margin = 18 + (uncompressed_size >> 12) + 32768
90 *
91 * Thus, when updating Deflate-only in-place kernel decompressor to
92 * support XZ, the fixed overhead has to be increased from 18+32768 bytes
93 * to 128+65536 bytes.
94 */
95
96/*
97 * STATIC is defined to "static" if we are being built for kernel
98 * decompression (pre-boot code). <linux/decompress/mm.h> will define
99 * STATIC to empty if it wasn't already defined. Since we will need to
100 * know later if we are being used for kernel decompression, we define
101 * XZ_PREBOOT here.
102 */
103#ifdef STATIC
104# define XZ_PREBOOT
105#endif
106#ifdef __KERNEL__
107# include <linux/decompress/mm.h>
108#endif
109#define XZ_EXTERN STATIC
110
111#ifndef XZ_PREBOOT
112# include <linux/slab.h>
113# include <linux/xz.h>
114#else
115/*
116 * Use the internal CRC32 code instead of kernel's CRC32 module, which
117 * is not available in early phase of booting.
118 */
119#define XZ_INTERNAL_CRC32 1
120
121/*
122 * For boot time use, we enable only the BCJ filter of the current
123 * architecture or none if no BCJ filter is available for the architecture.
124 */
125#ifdef CONFIG_X86
126# define XZ_DEC_X86
127#endif
128#ifdef CONFIG_PPC
129# define XZ_DEC_POWERPC
130#endif
131#ifdef CONFIG_ARM
132# define XZ_DEC_ARM
133#endif
134#ifdef CONFIG_IA64
135# define XZ_DEC_IA64
136#endif
137#ifdef CONFIG_SPARC
138# define XZ_DEC_SPARC
139#endif
140
141/*
142 * This will get the basic headers so that memeq() and others
143 * can be defined.
144 */
145#include "xz/xz_private.h"
146
147/*
148 * Replace the normal allocation functions with the versions from
149 * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL)
150 * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it.
151 * Workaround it here because the other decompressors don't need it.
152 */
153#undef kmalloc
154#undef kfree
155#undef vmalloc
156#undef vfree
157#define kmalloc(size, flags) malloc(size)
158#define kfree(ptr) free(ptr)
159#define vmalloc(size) malloc(size)
160#define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0)
161
162/*
163 * FIXME: Not all basic memory functions are provided in architecture-specific
164 * files (yet). We define our own versions here for now, but this should be
165 * only a temporary solution.
166 *
167 * memeq and memzero are not used much and any remotely sane implementation
168 * is fast enough. memcpy/memmove speed matters in multi-call mode, but
169 * the kernel image is decompressed in single-call mode, in which only
170 * memcpy speed can matter and only if there is a lot of uncompressible data
171 * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
172 * functions below should just be kept small; it's probably not worth
173 * optimizing for speed.
174 */
175
176#ifndef memeq
177static bool memeq(const void *a, const void *b, size_t size)
178{
179 const uint8_t *x = a;
180 const uint8_t *y = b;
181 size_t i;
182
183 for (i = 0; i < size; ++i)
184 if (x[i] != y[i])
185 return false;
186
187 return true;
188}
189#endif
190
191#ifndef memzero
192static void memzero(void *buf, size_t size)
193{
194 uint8_t *b = buf;
195 uint8_t *e = b + size;
196
197 while (b != e)
198 *b++ = '\0';
199}
200#endif
201
202#ifndef memmove
203/* Not static to avoid a conflict with the prototype in the Linux headers. */
204void *memmove(void *dest, const void *src, size_t size)
205{
206 uint8_t *d = dest;
207 const uint8_t *s = src;
208 size_t i;
209
210 if (d < s) {
211 for (i = 0; i < size; ++i)
212 d[i] = s[i];
213 } else if (d > s) {
214 i = size;
215 while (i-- > 0)
216 d[i] = s[i];
217 }
218
219 return dest;
220}
221#endif
222
223/*
224 * Since we need memmove anyway, would use it as memcpy too.
225 * Commented out for now to avoid breaking things.
226 */
227/*
228#ifndef memcpy
229# define memcpy memmove
230#endif
231*/
232
233#include "xz/xz_crc32.c"
234#include "xz/xz_dec_stream.c"
235#include "xz/xz_dec_lzma2.c"
236#include "xz/xz_dec_bcj.c"
237
238#endif /* XZ_PREBOOT */
239
240/* Size of the input and output buffers in multi-call mode */
241#define XZ_IOBUF_SIZE 4096
242
243/*
244 * This function implements the API defined in <linux/decompress/generic.h>.
245 *
246 * This wrapper will automatically choose single-call or multi-call mode
247 * of the native XZ decoder API. The single-call mode can be used only when
248 * both input and output buffers are available as a single chunk, i.e. when
249 * fill() and flush() won't be used.
250 */
251STATIC int INIT unxz(unsigned char *in, int in_size,
252 int (*fill)(void *dest, unsigned int size),
253 int (*flush)(void *src, unsigned int size),
254 unsigned char *out, int *in_used,
255 void (*error)(char *x))
256{
257 struct xz_buf b;
258 struct xz_dec *s;
259 enum xz_ret ret;
260 bool must_free_in = false;
261
262#if XZ_INTERNAL_CRC32
263 xz_crc32_init();
264#endif
265
266 if (in_used != NULL)
267 *in_used = 0;
268
269 if (fill == NULL && flush == NULL)
270 s = xz_dec_init(XZ_SINGLE, 0);
271 else
272 s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1);
273
274 if (s == NULL)
275 goto error_alloc_state;
276
277 if (flush == NULL) {
278 b.out = out;
279 b.out_size = (size_t)-1;
280 } else {
281 b.out_size = XZ_IOBUF_SIZE;
282 b.out = malloc(XZ_IOBUF_SIZE);
283 if (b.out == NULL)
284 goto error_alloc_out;
285 }
286
287 if (in == NULL) {
288 must_free_in = true;
289 in = malloc(XZ_IOBUF_SIZE);
290 if (in == NULL)
291 goto error_alloc_in;
292 }
293
294 b.in = in;
295 b.in_pos = 0;
296 b.in_size = in_size;
297 b.out_pos = 0;
298
299 if (fill == NULL && flush == NULL) {
300 ret = xz_dec_run(s, &b);
301 } else {
302 do {
303 if (b.in_pos == b.in_size && fill != NULL) {
304 if (in_used != NULL)
305 *in_used += b.in_pos;
306
307 b.in_pos = 0;
308
309 in_size = fill(in, XZ_IOBUF_SIZE);
310 if (in_size < 0) {
311 /*
312 * This isn't an optimal error code
313 * but it probably isn't worth making
314 * a new one either.
315 */
316 ret = XZ_BUF_ERROR;
317 break;
318 }
319
320 b.in_size = in_size;
321 }
322
323 ret = xz_dec_run(s, &b);
324
325 if (flush != NULL && (b.out_pos == b.out_size
326 || (ret != XZ_OK && b.out_pos > 0))) {
327 /*
328 * Setting ret here may hide an error
329 * returned by xz_dec_run(), but probably
330 * it's not too bad.
331 */
332 if (flush(b.out, b.out_pos) != (int)b.out_pos)
333 ret = XZ_BUF_ERROR;
334
335 b.out_pos = 0;
336 }
337 } while (ret == XZ_OK);
338
339 if (must_free_in)
340 free(in);
341
342 if (flush != NULL)
343 free(b.out);
344 }
345
346 if (in_used != NULL)
347 *in_used += b.in_pos;
348
349 xz_dec_end(s);
350
351 switch (ret) {
352 case XZ_STREAM_END:
353 return 0;
354
355 case XZ_MEM_ERROR:
356 /* This can occur only in multi-call mode. */
357 error("XZ decompressor ran out of memory");
358 break;
359
360 case XZ_FORMAT_ERROR:
361 error("Input is not in the XZ format (wrong magic bytes)");
362 break;
363
364 case XZ_OPTIONS_ERROR:
365 error("Input was encoded with settings that are not "
366 "supported by this XZ decoder");
367 break;
368
369 case XZ_DATA_ERROR:
370 case XZ_BUF_ERROR:
371 error("XZ-compressed data is corrupt");
372 break;
373
374 default:
375 error("Bug in the XZ decompressor");
376 break;
377 }
378
379 return -1;
380
381error_alloc_in:
382 if (flush != NULL)
383 free(b.out);
384
385error_alloc_out:
386 xz_dec_end(s);
387
388error_alloc_state:
389 error("XZ decompressor ran out of memory");
390 return -1;
391}
392
393/*
394 * This macro is used by architecture-specific files to decompress
395 * the kernel image.
396 */
397#define decompress unxz
diff --git a/lib/devres.c b/lib/devres.c
index 72c8909006da..6efddf53b90c 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,5 +1,6 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/io.h> 2#include <linux/io.h>
3#include <linux/gfp.h>
3#include <linux/module.h> 4#include <linux/module.h>
4 5
5void devm_ioremap_release(struct device *dev, void *res) 6void devm_ioremap_release(struct device *dev, void *res)
@@ -327,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
327 * @pdev: PCI device to map IO resources for 328 * @pdev: PCI device to map IO resources for
328 * @mask: Mask of BARs to unmap and release 329 * @mask: Mask of BARs to unmap and release
329 * 330 *
330 * Unamp and release regions specified by @mask. 331 * Unmap and release regions specified by @mask.
331 */ 332 */
332void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) 333void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
333{ 334{
diff --git a/lib/div64.c b/lib/div64.c
index a111eb8de9cf..5b4919191778 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -77,26 +77,58 @@ s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
77EXPORT_SYMBOL(div_s64_rem); 77EXPORT_SYMBOL(div_s64_rem);
78#endif 78#endif
79 79
80/* 64bit divisor, dividend and result. dynamic precision */ 80/**
81 * div64_u64 - unsigned 64bit divide with 64bit divisor
82 * @dividend: 64bit dividend
83 * @divisor: 64bit divisor
84 *
85 * This implementation is a modified version of the algorithm proposed
86 * by the book 'Hacker's Delight'. The original source and full proof
87 * can be found here and is available for use without restriction.
88 *
89 * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c'
90 */
81#ifndef div64_u64 91#ifndef div64_u64
82u64 div64_u64(u64 dividend, u64 divisor) 92u64 div64_u64(u64 dividend, u64 divisor)
83{ 93{
84 u32 high, d; 94 u32 high = divisor >> 32;
95 u64 quot;
85 96
86 high = divisor >> 32; 97 if (high == 0) {
87 if (high) { 98 quot = div_u64(dividend, divisor);
88 unsigned int shift = fls(high); 99 } else {
100 int n = 1 + fls(high);
101 quot = div_u64(dividend >> n, divisor >> n);
89 102
90 d = divisor >> shift; 103 if (quot != 0)
91 dividend >>= shift; 104 quot--;
92 } else 105 if ((dividend - quot * divisor) >= divisor)
93 d = divisor; 106 quot++;
107 }
94 108
95 return div_u64(dividend, d); 109 return quot;
96} 110}
97EXPORT_SYMBOL(div64_u64); 111EXPORT_SYMBOL(div64_u64);
98#endif 112#endif
99 113
114/**
115 * div64_s64 - signed 64bit divide with 64bit divisor
116 * @dividend: 64bit dividend
117 * @divisor: 64bit divisor
118 */
119#ifndef div64_s64
120s64 div64_s64(s64 dividend, s64 divisor)
121{
122 s64 quot, t;
123
124 quot = div64_u64(abs64(dividend), abs64(divisor));
125 t = (dividend ^ divisor) >> 63;
126
127 return (quot ^ t) - t;
128}
129EXPORT_SYMBOL(div64_s64);
130#endif
131
100#endif /* BITS_PER_LONG == 32 */ 132#endif /* BITS_PER_LONG == 32 */
101 133
102/* 134/*
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 7d2f0b33e5a8..4bfb0471f106 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -570,7 +570,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
570 * Now parse out the first token and use it as the name for the 570 * Now parse out the first token and use it as the name for the
571 * driver to filter for. 571 * driver to filter for.
572 */ 572 */
573 for (i = 0; i < NAME_MAX_LEN; ++i) { 573 for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
574 current_driver_name[i] = buf[i]; 574 current_driver_name[i] = buf[i];
575 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) 575 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
576 break; 576 break;
@@ -587,9 +587,10 @@ out_unlock:
587 return count; 587 return count;
588} 588}
589 589
590const struct file_operations filter_fops = { 590static const struct file_operations filter_fops = {
591 .read = filter_read, 591 .read = filter_read,
592 .write = filter_write, 592 .write = filter_write,
593 .llseek = default_llseek,
593}; 594};
594 595
595static int dma_debug_fs_init(void) 596static int dma_debug_fs_init(void)
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index f93502915988..b335acb43be2 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -25,19 +25,12 @@
25#include <linux/uaccess.h> 25#include <linux/uaccess.h>
26#include <linux/dynamic_debug.h> 26#include <linux/dynamic_debug.h>
27#include <linux/debugfs.h> 27#include <linux/debugfs.h>
28#include <linux/slab.h>
29#include <linux/jump_label.h>
28 30
29extern struct _ddebug __start___verbose[]; 31extern struct _ddebug __start___verbose[];
30extern struct _ddebug __stop___verbose[]; 32extern struct _ddebug __stop___verbose[];
31 33
32/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
33 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
34 * use independent hash functions, to reduce the chance of false positives.
35 */
36long long dynamic_debug_enabled;
37EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
38long long dynamic_debug_enabled2;
39EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
40
41struct ddebug_table { 34struct ddebug_table {
42 struct list_head link; 35 struct list_head link;
43 char *mod_name; 36 char *mod_name;
@@ -87,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
87} 80}
88 81
89/* 82/*
90 * must be called with ddebug_lock held
91 */
92
93static int disabled_hash(char hash, bool first_table)
94{
95 struct ddebug_table *dt;
96 char table_hash_value;
97
98 list_for_each_entry(dt, &ddebug_tables, link) {
99 if (first_table)
100 table_hash_value = dt->ddebugs->primary_hash;
101 else
102 table_hash_value = dt->ddebugs->secondary_hash;
103 if (dt->num_enabled && (hash == table_hash_value))
104 return 0;
105 }
106 return 1;
107}
108
109/*
110 * Search the tables for _ddebug's which match the given 83 * Search the tables for _ddebug's which match the given
111 * `query' and apply the `flags' and `mask' to them. Tells 84 * `query' and apply the `flags' and `mask' to them. Tells
112 * the user which ddebug's were changed, or whether none 85 * the user which ddebug's were changed, or whether none
@@ -168,19 +141,10 @@ static void ddebug_change(const struct ddebug_query *query,
168 else if (!dp->flags) 141 else if (!dp->flags)
169 dt->num_enabled++; 142 dt->num_enabled++;
170 dp->flags = newflags; 143 dp->flags = newflags;
171 if (newflags) { 144 if (newflags)
172 dynamic_debug_enabled |= 145 dp->enabled = 1;
173 (1LL << dp->primary_hash); 146 else
174 dynamic_debug_enabled2 |= 147 dp->enabled = 0;
175 (1LL << dp->secondary_hash);
176 } else {
177 if (disabled_hash(dp->primary_hash, true))
178 dynamic_debug_enabled &=
179 ~(1LL << dp->primary_hash);
180 if (disabled_hash(dp->secondary_hash, false))
181 dynamic_debug_enabled2 &=
182 ~(1LL << dp->secondary_hash);
183 }
184 if (verbose) 148 if (verbose)
185 printk(KERN_INFO 149 printk(KERN_INFO
186 "ddebug: changed %s:%d [%s]%s %s\n", 150 "ddebug: changed %s:%d [%s]%s %s\n",
@@ -428,6 +392,40 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
428 return 0; 392 return 0;
429} 393}
430 394
395static int ddebug_exec_query(char *query_string)
396{
397 unsigned int flags = 0, mask = 0;
398 struct ddebug_query query;
399#define MAXWORDS 9
400 int nwords;
401 char *words[MAXWORDS];
402
403 nwords = ddebug_tokenize(query_string, words, MAXWORDS);
404 if (nwords <= 0)
405 return -EINVAL;
406 if (ddebug_parse_query(words, nwords-1, &query))
407 return -EINVAL;
408 if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
409 return -EINVAL;
410
411 /* actually go and implement the change */
412 ddebug_change(&query, flags, mask);
413 return 0;
414}
415
416static __initdata char ddebug_setup_string[1024];
417static __init int ddebug_setup_query(char *str)
418{
419 if (strlen(str) >= 1024) {
420 pr_warning("ddebug boot param string too large\n");
421 return 0;
422 }
423 strcpy(ddebug_setup_string, str);
424 return 1;
425}
426
427__setup("ddebug_query=", ddebug_setup_query);
428
431/* 429/*
432 * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the 430 * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the
433 * command text from userspace, parses and executes it. 431 * command text from userspace, parses and executes it.
@@ -435,12 +433,8 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
435static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, 433static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
436 size_t len, loff_t *offp) 434 size_t len, loff_t *offp)
437{ 435{
438 unsigned int flags = 0, mask = 0;
439 struct ddebug_query query;
440#define MAXWORDS 9
441 int nwords;
442 char *words[MAXWORDS];
443 char tmpbuf[256]; 436 char tmpbuf[256];
437 int ret;
444 438
445 if (len == 0) 439 if (len == 0)
446 return 0; 440 return 0;
@@ -454,16 +448,9 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
454 printk(KERN_INFO "%s: read %d bytes from userspace\n", 448 printk(KERN_INFO "%s: read %d bytes from userspace\n",
455 __func__, (int)len); 449 __func__, (int)len);
456 450
457 nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); 451 ret = ddebug_exec_query(tmpbuf);
458 if (nwords < 0) 452 if (ret)
459 return -EINVAL; 453 return ret;
460 if (ddebug_parse_query(words, nwords-1, &query))
461 return -EINVAL;
462 if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
463 return -EINVAL;
464
465 /* actually go and implement the change */
466 ddebug_change(&query, flags, mask);
467 454
468 *offp += len; 455 *offp += len;
469 return len; 456 return len;
@@ -691,7 +678,7 @@ static void ddebug_table_free(struct ddebug_table *dt)
691 * Called in response to a module being unloaded. Removes 678 * Called in response to a module being unloaded. Removes
692 * any ddebug_table's which point at the module. 679 * any ddebug_table's which point at the module.
693 */ 680 */
694int ddebug_remove_module(char *mod_name) 681int ddebug_remove_module(const char *mod_name)
695{ 682{
696 struct ddebug_table *dt, *nextdt; 683 struct ddebug_table *dt, *nextdt;
697 int ret = -ENOENT; 684 int ret = -ENOENT;
@@ -724,13 +711,14 @@ static void ddebug_remove_all_tables(void)
724 mutex_unlock(&ddebug_lock); 711 mutex_unlock(&ddebug_lock);
725} 712}
726 713
727static int __init dynamic_debug_init(void) 714static __initdata int ddebug_init_success;
715
716static int __init dynamic_debug_init_debugfs(void)
728{ 717{
729 struct dentry *dir, *file; 718 struct dentry *dir, *file;
730 struct _ddebug *iter, *iter_start; 719
731 const char *modname = NULL; 720 if (!ddebug_init_success)
732 int ret = 0; 721 return -ENODEV;
733 int n = 0;
734 722
735 dir = debugfs_create_dir("dynamic_debug", NULL); 723 dir = debugfs_create_dir("dynamic_debug", NULL);
736 if (!dir) 724 if (!dir)
@@ -741,6 +729,16 @@ static int __init dynamic_debug_init(void)
741 debugfs_remove(dir); 729 debugfs_remove(dir);
742 return -ENOMEM; 730 return -ENOMEM;
743 } 731 }
732 return 0;
733}
734
735static int __init dynamic_debug_init(void)
736{
737 struct _ddebug *iter, *iter_start;
738 const char *modname = NULL;
739 int ret = 0;
740 int n = 0;
741
744 if (__start___verbose != __stop___verbose) { 742 if (__start___verbose != __stop___verbose) {
745 iter = __start___verbose; 743 iter = __start___verbose;
746 modname = iter->modname; 744 modname = iter->modname;
@@ -758,12 +756,26 @@ static int __init dynamic_debug_init(void)
758 } 756 }
759 ret = ddebug_add_module(iter_start, n, modname); 757 ret = ddebug_add_module(iter_start, n, modname);
760 } 758 }
759
760 /* ddebug_query boot param got passed -> set it up */
761 if (ddebug_setup_string[0] != '\0') {
762 ret = ddebug_exec_query(ddebug_setup_string);
763 if (ret)
764 pr_warning("Invalid ddebug boot param %s",
765 ddebug_setup_string);
766 else
767 pr_info("ddebug initialized with string %s",
768 ddebug_setup_string);
769 }
770
761out_free: 771out_free:
762 if (ret) { 772 if (ret)
763 ddebug_remove_all_tables(); 773 ddebug_remove_all_tables();
764 debugfs_remove(dir); 774 else
765 debugfs_remove(file); 775 ddebug_init_success = 1;
766 }
767 return 0; 776 return 0;
768} 777}
769module_init(dynamic_debug_init); 778/* Allow early initialization for boot messages via boot param */
779arch_initcall(dynamic_debug_init);
780/* Debugfs setup must be done later */
781module_init(dynamic_debug_init_debugfs);
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 66eef2e4483e..c0ea40ba2082 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -23,6 +23,7 @@
23#include <linux/flex_array.h> 23#include <linux/flex_array.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/stddef.h> 25#include <linux/stddef.h>
26#include <linux/module.h>
26 27
27struct flex_array_part { 28struct flex_array_part {
28 char elements[FLEX_ARRAY_PART_SIZE]; 29 char elements[FLEX_ARRAY_PART_SIZE];
@@ -99,10 +100,11 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
99 ret->element_size = element_size; 100 ret->element_size = element_size;
100 ret->total_nr_elements = total; 101 ret->total_nr_elements = total;
101 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) 102 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
102 memset(ret->parts[0], FLEX_ARRAY_FREE, 103 memset(&ret->parts[0], FLEX_ARRAY_FREE,
103 FLEX_ARRAY_BASE_BYTES_LEFT); 104 FLEX_ARRAY_BASE_BYTES_LEFT);
104 return ret; 105 return ret;
105} 106}
107EXPORT_SYMBOL(flex_array_alloc);
106 108
107static int fa_element_to_part_nr(struct flex_array *fa, 109static int fa_element_to_part_nr(struct flex_array *fa,
108 unsigned int element_nr) 110 unsigned int element_nr)
@@ -126,12 +128,14 @@ void flex_array_free_parts(struct flex_array *fa)
126 for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) 128 for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
127 kfree(fa->parts[part_nr]); 129 kfree(fa->parts[part_nr]);
128} 130}
131EXPORT_SYMBOL(flex_array_free_parts);
129 132
130void flex_array_free(struct flex_array *fa) 133void flex_array_free(struct flex_array *fa)
131{ 134{
132 flex_array_free_parts(fa); 135 flex_array_free_parts(fa);
133 kfree(fa); 136 kfree(fa);
134} 137}
138EXPORT_SYMBOL(flex_array_free);
135 139
136static unsigned int index_inside_part(struct flex_array *fa, 140static unsigned int index_inside_part(struct flex_array *fa,
137 unsigned int element_nr) 141 unsigned int element_nr)
@@ -171,6 +175,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
171 * Note that this *copies* the contents of @src into 175 * Note that this *copies* the contents of @src into
172 * the array. If you are trying to store an array of 176 * the array. If you are trying to store an array of
173 * pointers, make sure to pass in &ptr instead of ptr. 177 * pointers, make sure to pass in &ptr instead of ptr.
178 * You may instead wish to use the flex_array_put_ptr()
179 * helper function.
174 * 180 *
175 * Locking must be provided by the caller. 181 * Locking must be provided by the caller.
176 */ 182 */
@@ -194,6 +200,7 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
194 memcpy(dst, src, fa->element_size); 200 memcpy(dst, src, fa->element_size);
195 return 0; 201 return 0;
196} 202}
203EXPORT_SYMBOL(flex_array_put);
197 204
198/** 205/**
199 * flex_array_clear - clear element in array at @element_nr 206 * flex_array_clear - clear element in array at @element_nr
@@ -221,6 +228,7 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
221 memset(dst, FLEX_ARRAY_FREE, fa->element_size); 228 memset(dst, FLEX_ARRAY_FREE, fa->element_size);
222 return 0; 229 return 0;
223} 230}
231EXPORT_SYMBOL(flex_array_clear);
224 232
225/** 233/**
226 * flex_array_prealloc - guarantee that array space exists 234 * flex_array_prealloc - guarantee that array space exists
@@ -257,6 +265,7 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,
257 } 265 }
258 return 0; 266 return 0;
259} 267}
268EXPORT_SYMBOL(flex_array_prealloc);
260 269
261/** 270/**
262 * flex_array_get - pull data back out of the array 271 * flex_array_get - pull data back out of the array
@@ -265,7 +274,8 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,
265 * 274 *
266 * Returns a pointer to the data at index @element_nr. Note 275 * Returns a pointer to the data at index @element_nr. Note
267 * that this is a copy of the data that was passed in. If you 276 * that this is a copy of the data that was passed in. If you
268 * are using this to store pointers, you'll get back &ptr. 277 * are using this to store pointers, you'll get back &ptr. You
278 * may instead wish to use the flex_array_get_ptr helper.
269 * 279 *
270 * Locking must be provided by the caller. 280 * Locking must be provided by the caller.
271 */ 281 */
@@ -285,6 +295,28 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
285 } 295 }
286 return &part->elements[index_inside_part(fa, element_nr)]; 296 return &part->elements[index_inside_part(fa, element_nr)];
287} 297}
298EXPORT_SYMBOL(flex_array_get);
299
300/**
301 * flex_array_get_ptr - pull a ptr back out of the array
302 * @fa: the flex array from which to extract data
303 * @element_nr: index of the element to fetch from the array
304 *
305 * Returns the pointer placed in the flex array at element_nr using
306 * flex_array_put_ptr(). This function should not be called if the
307 * element in question was not set using the _put_ptr() helper.
308 */
309void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
310{
311 void **tmp;
312
313 tmp = flex_array_get(fa, element_nr);
314 if (!tmp)
315 return NULL;
316
317 return *tmp;
318}
319EXPORT_SYMBOL(flex_array_get_ptr);
288 320
289static int part_is_free(struct flex_array_part *part) 321static int part_is_free(struct flex_array_part *part)
290{ 322{
@@ -325,3 +357,4 @@ int flex_array_shrink(struct flex_array *fa)
325 } 357 }
326 return ret; 358 return ret;
327} 359}
360EXPORT_SYMBOL(flex_array_shrink);
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index bea5d97df991..85d0e412a04f 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -7,8 +7,8 @@
7#define LE_TABLE_SIZE (1 << CRC_LE_BITS) 7#define LE_TABLE_SIZE (1 << CRC_LE_BITS)
8#define BE_TABLE_SIZE (1 << CRC_BE_BITS) 8#define BE_TABLE_SIZE (1 << CRC_BE_BITS)
9 9
10static uint32_t crc32table_le[LE_TABLE_SIZE]; 10static uint32_t crc32table_le[4][LE_TABLE_SIZE];
11static uint32_t crc32table_be[BE_TABLE_SIZE]; 11static uint32_t crc32table_be[4][BE_TABLE_SIZE];
12 12
13/** 13/**
14 * crc32init_le() - allocate and initialize LE table data 14 * crc32init_le() - allocate and initialize LE table data
@@ -22,12 +22,19 @@ static void crc32init_le(void)
22 unsigned i, j; 22 unsigned i, j;
23 uint32_t crc = 1; 23 uint32_t crc = 1;
24 24
25 crc32table_le[0] = 0; 25 crc32table_le[0][0] = 0;
26 26
27 for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { 27 for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
28 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); 28 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
29 for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) 29 for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
30 crc32table_le[i + j] = crc ^ crc32table_le[j]; 30 crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
31 }
32 for (i = 0; i < LE_TABLE_SIZE; i++) {
33 crc = crc32table_le[0][i];
34 for (j = 1; j < 4; j++) {
35 crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
36 crc32table_le[j][i] = crc;
37 }
31 } 38 }
32} 39}
33 40
@@ -39,25 +46,35 @@ static void crc32init_be(void)
39 unsigned i, j; 46 unsigned i, j;
40 uint32_t crc = 0x80000000; 47 uint32_t crc = 0x80000000;
41 48
42 crc32table_be[0] = 0; 49 crc32table_be[0][0] = 0;
43 50
44 for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { 51 for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
45 crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); 52 crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
46 for (j = 0; j < i; j++) 53 for (j = 0; j < i; j++)
47 crc32table_be[i + j] = crc ^ crc32table_be[j]; 54 crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
55 }
56 for (i = 0; i < BE_TABLE_SIZE; i++) {
57 crc = crc32table_be[0][i];
58 for (j = 1; j < 4; j++) {
59 crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
60 crc32table_be[j][i] = crc;
61 }
48 } 62 }
49} 63}
50 64
51static void output_table(uint32_t table[], int len, char *trans) 65static void output_table(uint32_t table[4][256], int len, char *trans)
52{ 66{
53 int i; 67 int i, j;
54 68
55 for (i = 0; i < len - 1; i++) { 69 for (j = 0 ; j < 4; j++) {
56 if (i % ENTRIES_PER_LINE == 0) 70 printf("{");
57 printf("\n"); 71 for (i = 0; i < len - 1; i++) {
58 printf("%s(0x%8.8xL), ", trans, table[i]); 72 if (i % ENTRIES_PER_LINE == 0)
73 printf("\n");
74 printf("%s(0x%8.8xL), ", trans, table[j][i]);
75 }
76 printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
59 } 77 }
60 printf("%s(0x%8.8xL)\n", trans, table[len - 1]);
61} 78}
62 79
63int main(int argc, char** argv) 80int main(int argc, char** argv)
@@ -66,14 +83,14 @@ int main(int argc, char** argv)
66 83
67 if (CRC_LE_BITS > 1) { 84 if (CRC_LE_BITS > 1) {
68 crc32init_le(); 85 crc32init_le();
69 printf("static const u32 crc32table_le[] = {"); 86 printf("static const u32 crc32table_le[4][256] = {");
70 output_table(crc32table_le, LE_TABLE_SIZE, "tole"); 87 output_table(crc32table_le, LE_TABLE_SIZE, "tole");
71 printf("};\n"); 88 printf("};\n");
72 } 89 }
73 90
74 if (CRC_BE_BITS > 1) { 91 if (CRC_BE_BITS > 1) {
75 crc32init_be(); 92 crc32init_be();
76 printf("static const u32 crc32table_be[] = {"); 93 printf("static const u32 crc32table_be[4][256] = {");
77 output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); 94 output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
78 printf("};\n"); 95 printf("};\n");
79 } 96 }
diff --git a/lib/genalloc.c b/lib/genalloc.c
index e67f97495dd5..1923f1490e72 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -10,6 +10,7 @@
10 * Version 2. See the file COPYING for more details. 10 * Version 2. See the file COPYING for more details.
11 */ 11 */
12 12
13#include <linux/slab.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/bitmap.h> 15#include <linux/bitmap.h>
15#include <linux/genalloc.h> 16#include <linux/genalloc.h>
@@ -127,7 +128,6 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
127 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 128 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
128 129
129 end_bit = (chunk->end_addr - chunk->start_addr) >> order; 130 end_bit = (chunk->end_addr - chunk->start_addr) >> order;
130 end_bit -= nbits + 1;
131 131
132 spin_lock_irqsave(&chunk->lock, flags); 132 spin_lock_irqsave(&chunk->lock, flags);
133 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, 133 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 39af2560f765..f5fe6ba7a3ab 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -16,6 +16,40 @@ const char hex_asc[] = "0123456789abcdef";
16EXPORT_SYMBOL(hex_asc); 16EXPORT_SYMBOL(hex_asc);
17 17
18/** 18/**
19 * hex_to_bin - convert a hex digit to its real value
20 * @ch: ascii character represents hex digit
21 *
22 * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
23 * input.
24 */
25int hex_to_bin(char ch)
26{
27 if ((ch >= '0') && (ch <= '9'))
28 return ch - '0';
29 ch = tolower(ch);
30 if ((ch >= 'a') && (ch <= 'f'))
31 return ch - 'a' + 10;
32 return -1;
33}
34EXPORT_SYMBOL(hex_to_bin);
35
36/**
37 * hex2bin - convert an ascii hexadecimal string to its binary representation
38 * @dst: binary result
39 * @src: ascii hexadecimal string
40 * @count: result length
41 */
42void hex2bin(u8 *dst, const char *src, size_t count)
43{
44 while (count--) {
45 *dst = hex_to_bin(*src++) << 4;
46 *dst += hex_to_bin(*src++);
47 dst++;
48 }
49}
50EXPORT_SYMBOL(hex2bin);
51
52/**
19 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory 53 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
20 * @buf: data blob to dump 54 * @buf: data blob to dump
21 * @len: number of bytes in the @buf 55 * @len: number of bytes in the @buf
@@ -34,7 +68,7 @@ EXPORT_SYMBOL(hex_asc);
34 * 68 *
35 * E.g.: 69 * E.g.:
36 * hex_dump_to_buffer(frame->data, frame->len, 16, 1, 70 * hex_dump_to_buffer(frame->data, frame->len, 16, 1,
37 * linebuf, sizeof(linebuf), 1); 71 * linebuf, sizeof(linebuf), true);
38 * 72 *
39 * example output buffer: 73 * example output buffer:
40 * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO 74 * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
@@ -65,8 +99,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
65 99
66 for (j = 0; j < ngroups; j++) 100 for (j = 0; j < ngroups; j++)
67 lx += scnprintf(linebuf + lx, linebuflen - lx, 101 lx += scnprintf(linebuf + lx, linebuflen - lx,
68 "%s%16.16llx", j ? " " : "", 102 "%s%16.16llx", j ? " " : "",
69 (unsigned long long)*(ptr8 + j)); 103 (unsigned long long)*(ptr8 + j));
70 ascii_column = 17 * ngroups + 2; 104 ascii_column = 17 * ngroups + 2;
71 break; 105 break;
72 } 106 }
@@ -77,7 +111,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
77 111
78 for (j = 0; j < ngroups; j++) 112 for (j = 0; j < ngroups; j++)
79 lx += scnprintf(linebuf + lx, linebuflen - lx, 113 lx += scnprintf(linebuf + lx, linebuflen - lx,
80 "%s%8.8x", j ? " " : "", *(ptr4 + j)); 114 "%s%8.8x", j ? " " : "", *(ptr4 + j));
81 ascii_column = 9 * ngroups + 2; 115 ascii_column = 9 * ngroups + 2;
82 break; 116 break;
83 } 117 }
@@ -88,7 +122,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
88 122
89 for (j = 0; j < ngroups; j++) 123 for (j = 0; j < ngroups; j++)
90 lx += scnprintf(linebuf + lx, linebuflen - lx, 124 lx += scnprintf(linebuf + lx, linebuflen - lx,
91 "%s%4.4x", j ? " " : "", *(ptr2 + j)); 125 "%s%4.4x", j ? " " : "", *(ptr2 + j));
92 ascii_column = 5 * ngroups + 2; 126 ascii_column = 5 * ngroups + 2;
93 break; 127 break;
94 } 128 }
@@ -111,14 +145,16 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
111 145
112 while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) 146 while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
113 linebuf[lx++] = ' '; 147 linebuf[lx++] = ' ';
114 for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) 148 for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) {
115 linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] 149 ch = ptr[j];
116 : '.'; 150 linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.';
151 }
117nil: 152nil:
118 linebuf[lx++] = '\0'; 153 linebuf[lx++] = '\0';
119} 154}
120EXPORT_SYMBOL(hex_dump_to_buffer); 155EXPORT_SYMBOL(hex_dump_to_buffer);
121 156
157#ifdef CONFIG_PRINTK
122/** 158/**
123 * print_hex_dump - print a text hex dump to syslog for a binary blob of data 159 * print_hex_dump - print a text hex dump to syslog for a binary blob of data
124 * @level: kernel log level (e.g. KERN_DEBUG) 160 * @level: kernel log level (e.g. KERN_DEBUG)
@@ -143,7 +179,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
143 * 179 *
144 * E.g.: 180 * E.g.:
145 * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS, 181 * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS,
146 * 16, 1, frame->data, frame->len, 1); 182 * 16, 1, frame->data, frame->len, true);
147 * 183 *
148 * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode: 184 * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode:
149 * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO 185 * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
@@ -151,12 +187,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
151 * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~. 187 * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~.
152 */ 188 */
153void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, 189void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
154 int rowsize, int groupsize, 190 int rowsize, int groupsize,
155 const void *buf, size_t len, bool ascii) 191 const void *buf, size_t len, bool ascii)
156{ 192{
157 const u8 *ptr = buf; 193 const u8 *ptr = buf;
158 int i, linelen, remaining = len; 194 int i, linelen, remaining = len;
159 unsigned char linebuf[200]; 195 unsigned char linebuf[32 * 3 + 2 + 32 + 1];
160 196
161 if (rowsize != 16 && rowsize != 32) 197 if (rowsize != 16 && rowsize != 32)
162 rowsize = 16; 198 rowsize = 16;
@@ -164,13 +200,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
164 for (i = 0; i < len; i += rowsize) { 200 for (i = 0; i < len; i += rowsize) {
165 linelen = min(remaining, rowsize); 201 linelen = min(remaining, rowsize);
166 remaining -= rowsize; 202 remaining -= rowsize;
203
167 hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, 204 hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
168 linebuf, sizeof(linebuf), ascii); 205 linebuf, sizeof(linebuf), ascii);
169 206
170 switch (prefix_type) { 207 switch (prefix_type) {
171 case DUMP_PREFIX_ADDRESS: 208 case DUMP_PREFIX_ADDRESS:
172 printk("%s%s%*p: %s\n", level, prefix_str, 209 printk("%s%s%p: %s\n",
173 (int)(2 * sizeof(void *)), ptr + i, linebuf); 210 level, prefix_str, ptr + i, linebuf);
174 break; 211 break;
175 case DUMP_PREFIX_OFFSET: 212 case DUMP_PREFIX_OFFSET:
176 printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); 213 printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
@@ -196,9 +233,10 @@ EXPORT_SYMBOL(print_hex_dump);
196 * rowsize of 16, groupsize of 1, and ASCII output included. 233 * rowsize of 16, groupsize of 1, and ASCII output included.
197 */ 234 */
198void print_hex_dump_bytes(const char *prefix_str, int prefix_type, 235void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
199 const void *buf, size_t len) 236 const void *buf, size_t len)
200{ 237{
201 print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, 238 print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
202 buf, len, 1); 239 buf, len, true);
203} 240}
204EXPORT_SYMBOL(print_hex_dump_bytes); 241EXPORT_SYMBOL(print_hex_dump_bytes);
242#endif
diff --git a/lib/hweight.c b/lib/hweight.c
index 389424ecb129..3c79d50814cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,37 +9,45 @@
9 * The Hamming Weight of a number is the total number of bits set in it. 9 * The Hamming Weight of a number is the total number of bits set in it.
10 */ 10 */
11 11
12unsigned int hweight32(unsigned int w) 12unsigned int __sw_hweight32(unsigned int w)
13{ 13{
14#ifdef ARCH_HAS_FAST_MULTIPLIER
15 w -= (w >> 1) & 0x55555555;
16 w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
17 w = (w + (w >> 4)) & 0x0f0f0f0f;
18 return (w * 0x01010101) >> 24;
19#else
14 unsigned int res = w - ((w >> 1) & 0x55555555); 20 unsigned int res = w - ((w >> 1) & 0x55555555);
15 res = (res & 0x33333333) + ((res >> 2) & 0x33333333); 21 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
16 res = (res + (res >> 4)) & 0x0F0F0F0F; 22 res = (res + (res >> 4)) & 0x0F0F0F0F;
17 res = res + (res >> 8); 23 res = res + (res >> 8);
18 return (res + (res >> 16)) & 0x000000FF; 24 return (res + (res >> 16)) & 0x000000FF;
25#endif
19} 26}
20EXPORT_SYMBOL(hweight32); 27EXPORT_SYMBOL(__sw_hweight32);
21 28
22unsigned int hweight16(unsigned int w) 29unsigned int __sw_hweight16(unsigned int w)
23{ 30{
24 unsigned int res = w - ((w >> 1) & 0x5555); 31 unsigned int res = w - ((w >> 1) & 0x5555);
25 res = (res & 0x3333) + ((res >> 2) & 0x3333); 32 res = (res & 0x3333) + ((res >> 2) & 0x3333);
26 res = (res + (res >> 4)) & 0x0F0F; 33 res = (res + (res >> 4)) & 0x0F0F;
27 return (res + (res >> 8)) & 0x00FF; 34 return (res + (res >> 8)) & 0x00FF;
28} 35}
29EXPORT_SYMBOL(hweight16); 36EXPORT_SYMBOL(__sw_hweight16);
30 37
31unsigned int hweight8(unsigned int w) 38unsigned int __sw_hweight8(unsigned int w)
32{ 39{
33 unsigned int res = w - ((w >> 1) & 0x55); 40 unsigned int res = w - ((w >> 1) & 0x55);
34 res = (res & 0x33) + ((res >> 2) & 0x33); 41 res = (res & 0x33) + ((res >> 2) & 0x33);
35 return (res + (res >> 4)) & 0x0F; 42 return (res + (res >> 4)) & 0x0F;
36} 43}
37EXPORT_SYMBOL(hweight8); 44EXPORT_SYMBOL(__sw_hweight8);
38 45
39unsigned long hweight64(__u64 w) 46unsigned long __sw_hweight64(__u64 w)
40{ 47{
41#if BITS_PER_LONG == 32 48#if BITS_PER_LONG == 32
42 return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); 49 return __sw_hweight32((unsigned int)(w >> 32)) +
50 __sw_hweight32((unsigned int)w);
43#elif BITS_PER_LONG == 64 51#elif BITS_PER_LONG == 64
44#ifdef ARCH_HAS_FAST_MULTIPLIER 52#ifdef ARCH_HAS_FAST_MULTIPLIER
45 w -= (w >> 1) & 0x5555555555555555ul; 53 w -= (w >> 1) & 0x5555555555555555ul;
@@ -56,4 +64,4 @@ unsigned long hweight64(__u64 w)
56#endif 64#endif
57#endif 65#endif
58} 66}
59EXPORT_SYMBOL(hweight64); 67EXPORT_SYMBOL(__sw_hweight64);
diff --git a/lib/idr.c b/lib/idr.c
index 1cac726c44bc..e15502e8b21e 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -106,16 +106,17 @@ static void idr_mark_full(struct idr_layer **pa, int id)
106} 106}
107 107
108/** 108/**
109 * idr_pre_get - reserver resources for idr allocation 109 * idr_pre_get - reserve resources for idr allocation
110 * @idp: idr handle 110 * @idp: idr handle
111 * @gfp_mask: memory allocation flags 111 * @gfp_mask: memory allocation flags
112 * 112 *
113 * This function should be called prior to locking and calling the 113 * This function should be called prior to calling the idr_get_new* functions.
114 * idr_get_new* functions. It preallocates enough memory to satisfy 114 * It preallocates enough memory to satisfy the worst possible allocation. The
115 * the worst possible allocation. 115 * caller should pass in GFP_KERNEL if possible. This of course requires that
116 * no spinning locks be held.
116 * 117 *
117 * If the system is REALLY out of memory this function returns 0, 118 * If the system is REALLY out of memory this function returns %0,
118 * otherwise 1. 119 * otherwise %1.
119 */ 120 */
120int idr_pre_get(struct idr *idp, gfp_t gfp_mask) 121int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
121{ 122{
@@ -156,10 +157,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
156 id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; 157 id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
157 158
158 /* if already at the top layer, we need to grow */ 159 /* if already at the top layer, we need to grow */
159 if (!(p = pa[l])) { 160 if (id >= 1 << (idp->layers * IDR_BITS)) {
160 *starting_id = id; 161 *starting_id = id;
161 return IDR_NEED_TO_GROW; 162 return IDR_NEED_TO_GROW;
162 } 163 }
164 p = pa[l];
165 BUG_ON(!p);
163 166
164 /* If we need to go up one layer, continue the 167 /* If we need to go up one layer, continue the
165 * loop; otherwise, restart from the top. 168 * loop; otherwise, restart from the top.
@@ -282,17 +285,19 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
282 * idr_get_new_above - allocate new idr entry above or equal to a start id 285 * idr_get_new_above - allocate new idr entry above or equal to a start id
283 * @idp: idr handle 286 * @idp: idr handle
284 * @ptr: pointer you want associated with the id 287 * @ptr: pointer you want associated with the id
285 * @start_id: id to start search at 288 * @starting_id: id to start search at
286 * @id: pointer to the allocated handle 289 * @id: pointer to the allocated handle
287 * 290 *
288 * This is the allocate id function. It should be called with any 291 * This is the allocate id function. It should be called with any
289 * required locks. 292 * required locks.
290 * 293 *
291 * If memory is required, it will return -EAGAIN, you should unlock 294 * If allocation from IDR's private freelist fails, idr_get_new_above() will
292 * and go back to the idr_pre_get() call. If the idr is full, it will 295 * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill
293 * return -ENOSPC. 296 * IDR's preallocation and then retry the idr_get_new_above() call.
297 *
298 * If the idr is full idr_get_new_above() will return %-ENOSPC.
294 * 299 *
295 * @id returns a value in the range @starting_id ... 0x7fffffff 300 * @id returns a value in the range @starting_id ... %0x7fffffff
296 */ 301 */
297int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) 302int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
298{ 303{
@@ -316,14 +321,13 @@ EXPORT_SYMBOL(idr_get_new_above);
316 * @ptr: pointer you want associated with the id 321 * @ptr: pointer you want associated with the id
317 * @id: pointer to the allocated handle 322 * @id: pointer to the allocated handle
318 * 323 *
319 * This is the allocate id function. It should be called with any 324 * If allocation from IDR's private freelist fails, idr_get_new_above() will
320 * required locks. 325 * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill
326 * IDR's preallocation and then retry the idr_get_new_above() call.
321 * 327 *
322 * If memory is required, it will return -EAGAIN, you should unlock 328 * If the idr is full idr_get_new_above() will return %-ENOSPC.
323 * and go back to the idr_pre_get() call. If the idr is full, it will
324 * return -ENOSPC.
325 * 329 *
326 * @id returns a value in the range 0 ... 0x7fffffff 330 * @id returns a value in the range %0 ... %0x7fffffff
327 */ 331 */
328int idr_get_new(struct idr *idp, void *ptr, int *id) 332int idr_get_new(struct idr *idp, void *ptr, int *id)
329{ 333{
@@ -386,7 +390,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
386} 390}
387 391
388/** 392/**
389 * idr_remove - remove the given id and free it's slot 393 * idr_remove - remove the given id and free its slot
390 * @idp: idr handle 394 * @idp: idr handle
391 * @id: unique key 395 * @id: unique key
392 */ 396 */
@@ -435,7 +439,7 @@ EXPORT_SYMBOL(idr_remove);
435 * function will remove all id mappings and leave all idp_layers 439 * function will remove all id mappings and leave all idp_layers
436 * unused. 440 * unused.
437 * 441 *
438 * A typical clean-up sequence for objects stored in an idr tree, will 442 * A typical clean-up sequence for objects stored in an idr tree will
439 * use idr_for_each() to free all objects, if necessay, then 443 * use idr_for_each() to free all objects, if necessay, then
440 * idr_remove_all() to remove all ids, and idr_destroy() to free 444 * idr_remove_all() to remove all ids, and idr_destroy() to free
441 * up the cached idr_layers. 445 * up the cached idr_layers.
@@ -443,6 +447,7 @@ EXPORT_SYMBOL(idr_remove);
443void idr_remove_all(struct idr *idp) 447void idr_remove_all(struct idr *idp)
444{ 448{
445 int n, id, max; 449 int n, id, max;
450 int bt_mask;
446 struct idr_layer *p; 451 struct idr_layer *p;
447 struct idr_layer *pa[MAX_LEVEL]; 452 struct idr_layer *pa[MAX_LEVEL];
448 struct idr_layer **paa = &pa[0]; 453 struct idr_layer **paa = &pa[0];
@@ -460,8 +465,10 @@ void idr_remove_all(struct idr *idp)
460 p = p->ary[(id >> n) & IDR_MASK]; 465 p = p->ary[(id >> n) & IDR_MASK];
461 } 466 }
462 467
468 bt_mask = id;
463 id += 1 << n; 469 id += 1 << n;
464 while (n < fls(id)) { 470 /* Get the highest bit that the above add changed from 0->1. */
471 while (n < fls(id ^ bt_mask)) {
465 if (p) 472 if (p)
466 free_layer(p); 473 free_layer(p);
467 n += IDR_BITS; 474 n += IDR_BITS;
@@ -474,7 +481,7 @@ EXPORT_SYMBOL(idr_remove_all);
474 481
475/** 482/**
476 * idr_destroy - release all cached layers within an idr tree 483 * idr_destroy - release all cached layers within an idr tree
477 * idp: idr handle 484 * @idp: idr handle
478 */ 485 */
479void idr_destroy(struct idr *idp) 486void idr_destroy(struct idr *idp)
480{ 487{
@@ -502,7 +509,7 @@ void *idr_find(struct idr *idp, int id)
502 int n; 509 int n;
503 struct idr_layer *p; 510 struct idr_layer *p;
504 511
505 p = rcu_dereference(idp->top); 512 p = rcu_dereference_raw(idp->top);
506 if (!p) 513 if (!p)
507 return NULL; 514 return NULL;
508 n = (p->layer+1) * IDR_BITS; 515 n = (p->layer+1) * IDR_BITS;
@@ -517,7 +524,7 @@ void *idr_find(struct idr *idp, int id)
517 while (n > 0 && p) { 524 while (n > 0 && p) {
518 n -= IDR_BITS; 525 n -= IDR_BITS;
519 BUG_ON(n != p->layer*IDR_BITS); 526 BUG_ON(n != p->layer*IDR_BITS);
520 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 527 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
521 } 528 }
522 return((void *)p); 529 return((void *)p);
523} 530}
@@ -537,7 +544,7 @@ EXPORT_SYMBOL(idr_find);
537 * not allowed. 544 * not allowed.
538 * 545 *
539 * We check the return of @fn each time. If it returns anything other 546 * We check the return of @fn each time. If it returns anything other
540 * than 0, we break out and return that value. 547 * than %0, we break out and return that value.
541 * 548 *
542 * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove(). 549 * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
543 */ 550 */
@@ -550,7 +557,7 @@ int idr_for_each(struct idr *idp,
550 struct idr_layer **paa = &pa[0]; 557 struct idr_layer **paa = &pa[0];
551 558
552 n = idp->layers * IDR_BITS; 559 n = idp->layers * IDR_BITS;
553 p = rcu_dereference(idp->top); 560 p = rcu_dereference_raw(idp->top);
554 max = 1 << n; 561 max = 1 << n;
555 562
556 id = 0; 563 id = 0;
@@ -558,7 +565,7 @@ int idr_for_each(struct idr *idp,
558 while (n > 0 && p) { 565 while (n > 0 && p) {
559 n -= IDR_BITS; 566 n -= IDR_BITS;
560 *paa++ = p; 567 *paa++ = p;
561 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 568 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
562 } 569 }
563 570
564 if (p) { 571 if (p) {
@@ -581,10 +588,11 @@ EXPORT_SYMBOL(idr_for_each);
581/** 588/**
582 * idr_get_next - lookup next object of id to given id. 589 * idr_get_next - lookup next object of id to given id.
583 * @idp: idr handle 590 * @idp: idr handle
584 * @id: pointer to lookup key 591 * @nextidp: pointer to lookup key
585 * 592 *
586 * Returns pointer to registered object with id, which is next number to 593 * Returns pointer to registered object with id, which is next number to
587 * given id. 594 * given id. After being looked up, *@nextidp will be updated for the next
595 * iteration.
588 */ 596 */
589 597
590void *idr_get_next(struct idr *idp, int *nextidp) 598void *idr_get_next(struct idr *idp, int *nextidp)
@@ -597,7 +605,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
597 /* find first ent */ 605 /* find first ent */
598 n = idp->layers * IDR_BITS; 606 n = idp->layers * IDR_BITS;
599 max = 1 << n; 607 max = 1 << n;
600 p = rcu_dereference(idp->top); 608 p = rcu_dereference_raw(idp->top);
601 if (!p) 609 if (!p)
602 return NULL; 610 return NULL;
603 611
@@ -605,7 +613,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
605 while (n > 0 && p) { 613 while (n > 0 && p) {
606 n -= IDR_BITS; 614 n -= IDR_BITS;
607 *paa++ = p; 615 *paa++ = p;
608 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 616 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
609 } 617 }
610 618
611 if (p) { 619 if (p) {
@@ -621,7 +629,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
621 } 629 }
622 return NULL; 630 return NULL;
623} 631}
624 632EXPORT_SYMBOL(idr_get_next);
625 633
626 634
627/** 635/**
@@ -631,8 +639,8 @@ void *idr_get_next(struct idr *idp, int *nextidp)
631 * @id: lookup key 639 * @id: lookup key
632 * 640 *
633 * Replace the pointer registered with an id and return the old value. 641 * Replace the pointer registered with an id and return the old value.
634 * A -ENOENT return indicates that @id was not found. 642 * A %-ENOENT return indicates that @id was not found.
635 * A -EINVAL return indicates that @id was not within valid constraints. 643 * A %-EINVAL return indicates that @id was not within valid constraints.
636 * 644 *
637 * The caller must serialize with writers. 645 * The caller must serialize with writers.
638 */ 646 */
@@ -690,10 +698,11 @@ void idr_init(struct idr *idp)
690EXPORT_SYMBOL(idr_init); 698EXPORT_SYMBOL(idr_init);
691 699
692 700
693/* 701/**
702 * DOC: IDA description
694 * IDA - IDR based ID allocator 703 * IDA - IDR based ID allocator
695 * 704 *
696 * this is id allocator without id -> pointer translation. Memory 705 * This is id allocator without id -> pointer translation. Memory
697 * usage is much lower than full blown idr because each id only 706 * usage is much lower than full blown idr because each id only
698 * occupies a bit. ida uses a custom leaf node which contains 707 * occupies a bit. ida uses a custom leaf node which contains
699 * IDA_BITMAP_BITS slots. 708 * IDA_BITMAP_BITS slots.
@@ -726,8 +735,8 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
726 * following function. It preallocates enough memory to satisfy the 735 * following function. It preallocates enough memory to satisfy the
727 * worst possible allocation. 736 * worst possible allocation.
728 * 737 *
729 * If the system is REALLY out of memory this function returns 0, 738 * If the system is REALLY out of memory this function returns %0,
730 * otherwise 1. 739 * otherwise %1.
731 */ 740 */
732int ida_pre_get(struct ida *ida, gfp_t gfp_mask) 741int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
733{ 742{
@@ -753,17 +762,17 @@ EXPORT_SYMBOL(ida_pre_get);
753/** 762/**
754 * ida_get_new_above - allocate new ID above or equal to a start id 763 * ida_get_new_above - allocate new ID above or equal to a start id
755 * @ida: ida handle 764 * @ida: ida handle
756 * @staring_id: id to start search at 765 * @starting_id: id to start search at
757 * @p_id: pointer to the allocated handle 766 * @p_id: pointer to the allocated handle
758 * 767 *
759 * Allocate new ID above or equal to @ida. It should be called with 768 * Allocate new ID above or equal to @ida. It should be called with
760 * any required locks. 769 * any required locks.
761 * 770 *
762 * If memory is required, it will return -EAGAIN, you should unlock 771 * If memory is required, it will return %-EAGAIN, you should unlock
763 * and go back to the ida_pre_get() call. If the ida is full, it will 772 * and go back to the ida_pre_get() call. If the ida is full, it will
764 * return -ENOSPC. 773 * return %-ENOSPC.
765 * 774 *
766 * @p_id returns a value in the range @starting_id ... 0x7fffffff. 775 * @p_id returns a value in the range @starting_id ... %0x7fffffff.
767 */ 776 */
768int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) 777int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
769{ 778{
@@ -845,11 +854,11 @@ EXPORT_SYMBOL(ida_get_new_above);
845 * 854 *
846 * Allocate new ID. It should be called with any required locks. 855 * Allocate new ID. It should be called with any required locks.
847 * 856 *
848 * If memory is required, it will return -EAGAIN, you should unlock 857 * If memory is required, it will return %-EAGAIN, you should unlock
849 * and go back to the idr_pre_get() call. If the idr is full, it will 858 * and go back to the idr_pre_get() call. If the idr is full, it will
850 * return -ENOSPC. 859 * return %-ENOSPC.
851 * 860 *
852 * @id returns a value in the range 0 ... 0x7fffffff. 861 * @id returns a value in the range %0 ... %0x7fffffff.
853 */ 862 */
854int ida_get_new(struct ida *ida, int *p_id) 863int ida_get_new(struct ida *ida, int *p_id)
855{ 864{
@@ -907,7 +916,7 @@ EXPORT_SYMBOL(ida_remove);
907 916
908/** 917/**
909 * ida_destroy - release all cached layers within an ida tree 918 * ida_destroy - release all cached layers within an ida tree
910 * ida: ida handle 919 * @ida: ida handle
911 */ 920 */
912void ida_destroy(struct ida *ida) 921void ida_destroy(struct ida *ida)
913{ 922{
diff --git a/lib/inflate.c b/lib/inflate.c
index d10255973a9f..013a76193481 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -103,6 +103,9 @@
103 the two sets of lengths. 103 the two sets of lengths.
104 */ 104 */
105#include <linux/compiler.h> 105#include <linux/compiler.h>
106#ifdef NO_INFLATE_MALLOC
107#include <linux/slab.h>
108#endif
106 109
107#ifdef RCSID 110#ifdef RCSID
108static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #"; 111static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #";
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index c0251f4ad08b..da053313ee5c 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -38,12 +38,3 @@ again:
38 return -1; 38 return -1;
39} 39}
40EXPORT_SYMBOL(iommu_area_alloc); 40EXPORT_SYMBOL(iommu_area_alloc);
41
42unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
43 unsigned long io_page_size)
44{
45 unsigned long size = (addr & (io_page_size - 1)) + len;
46
47 return DIV_ROUND_UP(size, io_page_size);
48}
49EXPORT_SYMBOL(iommu_num_pages);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 14c6078f17a2..da4e2ad74b68 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -9,14 +9,15 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/io.h> 11#include <linux/io.h>
12#include <linux/module.h>
12#include <asm/cacheflush.h> 13#include <asm/cacheflush.h>
13#include <asm/pgtable.h> 14#include <asm/pgtable.h>
14 15
15static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, 16static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
16 unsigned long end, unsigned long phys_addr, pgprot_t prot) 17 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
17{ 18{
18 pte_t *pte; 19 pte_t *pte;
19 unsigned long pfn; 20 u64 pfn;
20 21
21 pfn = phys_addr >> PAGE_SHIFT; 22 pfn = phys_addr >> PAGE_SHIFT;
22 pte = pte_alloc_kernel(pmd, addr); 23 pte = pte_alloc_kernel(pmd, addr);
@@ -31,7 +32,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
31} 32}
32 33
33static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, 34static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
34 unsigned long end, unsigned long phys_addr, pgprot_t prot) 35 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
35{ 36{
36 pmd_t *pmd; 37 pmd_t *pmd;
37 unsigned long next; 38 unsigned long next;
@@ -49,7 +50,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
49} 50}
50 51
51static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, 52static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
52 unsigned long end, unsigned long phys_addr, pgprot_t prot) 53 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
53{ 54{
54 pud_t *pud; 55 pud_t *pud;
55 unsigned long next; 56 unsigned long next;
@@ -67,7 +68,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
67} 68}
68 69
69int ioremap_page_range(unsigned long addr, 70int ioremap_page_range(unsigned long addr,
70 unsigned long end, unsigned long phys_addr, pgprot_t prot) 71 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
71{ 72{
72 pgd_t *pgd; 73 pgd_t *pgd;
73 unsigned long start; 74 unsigned long start;
@@ -90,3 +91,4 @@ int ioremap_page_range(unsigned long addr,
90 91
91 return err; 92 return err;
92} 93}
94EXPORT_SYMBOL_GPL(ioremap_page_range);
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index c5ff1fd10030..9c4233b23783 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -6,6 +6,7 @@
6 6
7#include <stdarg.h> 7#include <stdarg.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/slab.h>
9#include <linux/types.h> 10#include <linux/types.h>
10#include <linux/string.h> 11#include <linux/string.h>
11 12
diff --git a/lib/kobject.c b/lib/kobject.c
index b512b746d2af..82dc34c095c2 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
700 return ret; 700 return ret;
701} 701}
702 702
703struct sysfs_ops kobj_sysfs_ops = { 703const struct sysfs_ops kobj_sysfs_ops = {
704 .show = kobj_attr_show, 704 .show = kobj_attr_show,
705 .store = kobj_attr_store, 705 .store = kobj_attr_store,
706}; 706};
@@ -746,17 +746,56 @@ void kset_unregister(struct kset *k)
746 */ 746 */
747struct kobject *kset_find_obj(struct kset *kset, const char *name) 747struct kobject *kset_find_obj(struct kset *kset, const char *name)
748{ 748{
749 return kset_find_obj_hinted(kset, name, NULL);
750}
751
752/**
753 * kset_find_obj_hinted - search for object in kset given a predecessor hint.
754 * @kset: kset we're looking in.
755 * @name: object's name.
756 * @hint: hint to possible object's predecessor.
757 *
758 * Check the hint's next object and if it is a match return it directly,
759 * otherwise, fall back to the behavior of kset_find_obj(). Either way
760 * a reference for the returned object is held and the reference on the
761 * hinted object is released.
762 */
763struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name,
764 struct kobject *hint)
765{
749 struct kobject *k; 766 struct kobject *k;
750 struct kobject *ret = NULL; 767 struct kobject *ret = NULL;
751 768
752 spin_lock(&kset->list_lock); 769 spin_lock(&kset->list_lock);
770
771 if (!hint)
772 goto slow_search;
773
774 /* end of list detection */
775 if (hint->entry.next == kset->list.next)
776 goto slow_search;
777
778 k = container_of(hint->entry.next, struct kobject, entry);
779 if (!kobject_name(k) || strcmp(kobject_name(k), name))
780 goto slow_search;
781
782 ret = kobject_get(k);
783 goto unlock_exit;
784
785slow_search:
753 list_for_each_entry(k, &kset->list, entry) { 786 list_for_each_entry(k, &kset->list, entry) {
754 if (kobject_name(k) && !strcmp(kobject_name(k), name)) { 787 if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
755 ret = kobject_get(k); 788 ret = kobject_get(k);
756 break; 789 break;
757 } 790 }
758 } 791 }
792
793unlock_exit:
759 spin_unlock(&kset->list_lock); 794 spin_unlock(&kset->list_lock);
795
796 if (hint)
797 kobject_put(hint);
798
760 return ret; 799 return ret;
761} 800}
762 801
@@ -789,7 +828,7 @@ static struct kobj_type kset_ktype = {
789 * If the kset was not able to be created, NULL will be returned. 828 * If the kset was not able to be created, NULL will be returned.
790 */ 829 */
791static struct kset *kset_create(const char *name, 830static struct kset *kset_create(const char *name,
792 struct kset_uevent_ops *uevent_ops, 831 const struct kset_uevent_ops *uevent_ops,
793 struct kobject *parent_kobj) 832 struct kobject *parent_kobj)
794{ 833{
795 struct kset *kset; 834 struct kset *kset;
@@ -832,7 +871,7 @@ static struct kset *kset_create(const char *name,
832 * If the kset was not able to be created, NULL will be returned. 871 * If the kset was not able to be created, NULL will be returned.
833 */ 872 */
834struct kset *kset_create_and_add(const char *name, 873struct kset *kset_create_and_add(const char *name,
835 struct kset_uevent_ops *uevent_ops, 874 const struct kset_uevent_ops *uevent_ops,
836 struct kobject *parent_kobj) 875 struct kobject *parent_kobj)
837{ 876{
838 struct kset *kset; 877 struct kset *kset;
@@ -850,6 +889,121 @@ struct kset *kset_create_and_add(const char *name,
850} 889}
851EXPORT_SYMBOL_GPL(kset_create_and_add); 890EXPORT_SYMBOL_GPL(kset_create_and_add);
852 891
892
893static DEFINE_SPINLOCK(kobj_ns_type_lock);
894static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
895
896int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
897{
898 enum kobj_ns_type type = ops->type;
899 int error;
900
901 spin_lock(&kobj_ns_type_lock);
902
903 error = -EINVAL;
904 if (type >= KOBJ_NS_TYPES)
905 goto out;
906
907 error = -EINVAL;
908 if (type <= KOBJ_NS_TYPE_NONE)
909 goto out;
910
911 error = -EBUSY;
912 if (kobj_ns_ops_tbl[type])
913 goto out;
914
915 error = 0;
916 kobj_ns_ops_tbl[type] = ops;
917
918out:
919 spin_unlock(&kobj_ns_type_lock);
920 return error;
921}
922
923int kobj_ns_type_registered(enum kobj_ns_type type)
924{
925 int registered = 0;
926
927 spin_lock(&kobj_ns_type_lock);
928 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES))
929 registered = kobj_ns_ops_tbl[type] != NULL;
930 spin_unlock(&kobj_ns_type_lock);
931
932 return registered;
933}
934
935const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
936{
937 const struct kobj_ns_type_operations *ops = NULL;
938
939 if (parent && parent->ktype->child_ns_type)
940 ops = parent->ktype->child_ns_type(parent);
941
942 return ops;
943}
944
945const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
946{
947 return kobj_child_ns_ops(kobj->parent);
948}
949
950
951const void *kobj_ns_current(enum kobj_ns_type type)
952{
953 const void *ns = NULL;
954
955 spin_lock(&kobj_ns_type_lock);
956 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
957 kobj_ns_ops_tbl[type])
958 ns = kobj_ns_ops_tbl[type]->current_ns();
959 spin_unlock(&kobj_ns_type_lock);
960
961 return ns;
962}
963
964const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
965{
966 const void *ns = NULL;
967
968 spin_lock(&kobj_ns_type_lock);
969 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
970 kobj_ns_ops_tbl[type])
971 ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
972 spin_unlock(&kobj_ns_type_lock);
973
974 return ns;
975}
976
977const void *kobj_ns_initial(enum kobj_ns_type type)
978{
979 const void *ns = NULL;
980
981 spin_lock(&kobj_ns_type_lock);
982 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
983 kobj_ns_ops_tbl[type])
984 ns = kobj_ns_ops_tbl[type]->initial_ns();
985 spin_unlock(&kobj_ns_type_lock);
986
987 return ns;
988}
989
990/*
991 * kobj_ns_exit - invalidate a namespace tag
992 *
993 * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
994 * @ns: the actual namespace being invalidated
995 *
996 * This is called when a tag is no longer valid. For instance,
997 * when a network namespace exits, it uses this helper to
998 * make sure no sb's sysfs_info points to the now-invalidated
999 * netns.
1000 */
1001void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
1002{
1003 sysfs_exit_ns(type, ns);
1004}
1005
1006
853EXPORT_SYMBOL(kobject_get); 1007EXPORT_SYMBOL(kobject_get);
854EXPORT_SYMBOL(kobject_put); 1008EXPORT_SYMBOL(kobject_put);
855EXPORT_SYMBOL(kobject_del); 1009EXPORT_SYMBOL(kobject_del);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 920a3ca6e259..70af0a7f97c0 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -18,18 +18,25 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/kobject.h> 19#include <linux/kobject.h>
20#include <linux/module.h> 20#include <linux/module.h>
21 21#include <linux/slab.h>
22#include <linux/user_namespace.h>
22#include <linux/socket.h> 23#include <linux/socket.h>
23#include <linux/skbuff.h> 24#include <linux/skbuff.h>
24#include <linux/netlink.h> 25#include <linux/netlink.h>
25#include <net/sock.h> 26#include <net/sock.h>
27#include <net/net_namespace.h>
26 28
27 29
28u64 uevent_seqnum; 30u64 uevent_seqnum;
29char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; 31char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
30static DEFINE_SPINLOCK(sequence_lock); 32static DEFINE_SPINLOCK(sequence_lock);
31#if defined(CONFIG_NET) 33#ifdef CONFIG_NET
32static struct sock *uevent_sock; 34struct uevent_sock {
35 struct list_head list;
36 struct sock *sk;
37};
38static LIST_HEAD(uevent_sock_list);
39static DEFINE_MUTEX(uevent_sock_mutex);
33#endif 40#endif
34 41
35/* the strings here must match the enum in include/linux/kobject.h */ 42/* the strings here must match the enum in include/linux/kobject.h */
@@ -76,6 +83,39 @@ out:
76 return ret; 83 return ret;
77} 84}
78 85
86#ifdef CONFIG_NET
87static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
88{
89 struct kobject *kobj = data;
90 const struct kobj_ns_type_operations *ops;
91
92 ops = kobj_ns_ops(kobj);
93 if (ops) {
94 const void *sock_ns, *ns;
95 ns = kobj->ktype->namespace(kobj);
96 sock_ns = ops->netlink_ns(dsk);
97 return sock_ns != ns;
98 }
99
100 return 0;
101}
102#endif
103
104static int kobj_usermode_filter(struct kobject *kobj)
105{
106 const struct kobj_ns_type_operations *ops;
107
108 ops = kobj_ns_ops(kobj);
109 if (ops) {
110 const void *init_ns, *ns;
111 ns = kobj->ktype->namespace(kobj);
112 init_ns = ops->initial_ns();
113 return ns != init_ns;
114 }
115
116 return 0;
117}
118
79/** 119/**
80 * kobject_uevent_env - send an uevent with environmental data 120 * kobject_uevent_env - send an uevent with environmental data
81 * 121 *
@@ -83,7 +123,7 @@ out:
83 * @kobj: struct kobject that the action is happening to 123 * @kobj: struct kobject that the action is happening to
84 * @envp_ext: pointer to environmental data 124 * @envp_ext: pointer to environmental data
85 * 125 *
86 * Returns 0 if kobject_uevent() is completed with success or the 126 * Returns 0 if kobject_uevent_env() is completed with success or the
87 * corresponding error when it fails. 127 * corresponding error when it fails.
88 */ 128 */
89int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, 129int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
@@ -95,10 +135,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
95 const char *subsystem; 135 const char *subsystem;
96 struct kobject *top_kobj; 136 struct kobject *top_kobj;
97 struct kset *kset; 137 struct kset *kset;
98 struct kset_uevent_ops *uevent_ops; 138 const struct kset_uevent_ops *uevent_ops;
99 u64 seq; 139 u64 seq;
100 int i = 0; 140 int i = 0;
101 int retval = 0; 141 int retval = 0;
142#ifdef CONFIG_NET
143 struct uevent_sock *ue_sk;
144#endif
102 145
103 pr_debug("kobject: '%s' (%p): %s\n", 146 pr_debug("kobject: '%s' (%p): %s\n",
104 kobject_name(kobj), kobj, __func__); 147 kobject_name(kobj), kobj, __func__);
@@ -210,7 +253,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
210 253
211#if defined(CONFIG_NET) 254#if defined(CONFIG_NET)
212 /* send netlink message */ 255 /* send netlink message */
213 if (uevent_sock) { 256 mutex_lock(&uevent_sock_mutex);
257 list_for_each_entry(ue_sk, &uevent_sock_list, list) {
258 struct sock *uevent_sock = ue_sk->sk;
214 struct sk_buff *skb; 259 struct sk_buff *skb;
215 size_t len; 260 size_t len;
216 261
@@ -232,18 +277,21 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
232 } 277 }
233 278
234 NETLINK_CB(skb).dst_group = 1; 279 NETLINK_CB(skb).dst_group = 1;
235 retval = netlink_broadcast(uevent_sock, skb, 0, 1, 280 retval = netlink_broadcast_filtered(uevent_sock, skb,
236 GFP_KERNEL); 281 0, 1, GFP_KERNEL,
282 kobj_bcast_filter,
283 kobj);
237 /* ENOBUFS should be handled in userspace */ 284 /* ENOBUFS should be handled in userspace */
238 if (retval == -ENOBUFS) 285 if (retval == -ENOBUFS)
239 retval = 0; 286 retval = 0;
240 } else 287 } else
241 retval = -ENOMEM; 288 retval = -ENOMEM;
242 } 289 }
290 mutex_unlock(&uevent_sock_mutex);
243#endif 291#endif
244 292
245 /* call uevent_helper, usually only enabled during early boot */ 293 /* call uevent_helper, usually only enabled during early boot */
246 if (uevent_helper[0]) { 294 if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
247 char *argv [3]; 295 char *argv [3];
248 296
249 argv [0] = uevent_helper; 297 argv [0] = uevent_helper;
@@ -269,7 +317,7 @@ exit:
269EXPORT_SYMBOL_GPL(kobject_uevent_env); 317EXPORT_SYMBOL_GPL(kobject_uevent_env);
270 318
271/** 319/**
272 * kobject_uevent - notify userspace by ending an uevent 320 * kobject_uevent - notify userspace by sending an uevent
273 * 321 *
274 * @action: action that is happening 322 * @action: action that is happening
275 * @kobj: struct kobject that the action is happening to 323 * @kobj: struct kobject that the action is happening to
@@ -319,18 +367,59 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
319EXPORT_SYMBOL_GPL(add_uevent_var); 367EXPORT_SYMBOL_GPL(add_uevent_var);
320 368
321#if defined(CONFIG_NET) 369#if defined(CONFIG_NET)
322static int __init kobject_uevent_init(void) 370static int uevent_net_init(struct net *net)
323{ 371{
324 uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, 372 struct uevent_sock *ue_sk;
325 1, NULL, NULL, THIS_MODULE); 373
326 if (!uevent_sock) { 374 ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
375 if (!ue_sk)
376 return -ENOMEM;
377
378 ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
379 1, NULL, NULL, THIS_MODULE);
380 if (!ue_sk->sk) {
327 printk(KERN_ERR 381 printk(KERN_ERR
328 "kobject_uevent: unable to create netlink socket!\n"); 382 "kobject_uevent: unable to create netlink socket!\n");
383 kfree(ue_sk);
329 return -ENODEV; 384 return -ENODEV;
330 } 385 }
331 netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); 386 mutex_lock(&uevent_sock_mutex);
387 list_add_tail(&ue_sk->list, &uevent_sock_list);
388 mutex_unlock(&uevent_sock_mutex);
332 return 0; 389 return 0;
333} 390}
334 391
392static void uevent_net_exit(struct net *net)
393{
394 struct uevent_sock *ue_sk;
395
396 mutex_lock(&uevent_sock_mutex);
397 list_for_each_entry(ue_sk, &uevent_sock_list, list) {
398 if (sock_net(ue_sk->sk) == net)
399 goto found;
400 }
401 mutex_unlock(&uevent_sock_mutex);
402 return;
403
404found:
405 list_del(&ue_sk->list);
406 mutex_unlock(&uevent_sock_mutex);
407
408 netlink_kernel_release(ue_sk->sk);
409 kfree(ue_sk);
410}
411
412static struct pernet_operations uevent_net_ops = {
413 .init = uevent_net_init,
414 .exit = uevent_net_exit,
415};
416
417static int __init kobject_uevent_init(void)
418{
419 netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
420 return register_pernet_subsys(&uevent_net_ops);
421}
422
423
335postcore_initcall(kobject_uevent_init); 424postcore_initcall(kobject_uevent_init);
336#endif 425#endif
diff --git a/lib/kref.c b/lib/kref.c
index 9ecd6e865610..3efb882b11db 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -13,17 +13,7 @@
13 13
14#include <linux/kref.h> 14#include <linux/kref.h>
15#include <linux/module.h> 15#include <linux/module.h>
16 16#include <linux/slab.h>
17/**
18 * kref_set - initialize object and set refcount to requested number.
19 * @kref: object in question.
20 * @num: initial reference counter
21 */
22void kref_set(struct kref *kref, int num)
23{
24 atomic_set(&kref->refcount, num);
25 smp_mb();
26}
27 17
28/** 18/**
29 * kref_init - initialize object. 19 * kref_init - initialize object.
@@ -31,7 +21,8 @@ void kref_set(struct kref *kref, int num)
31 */ 21 */
32void kref_init(struct kref *kref) 22void kref_init(struct kref *kref)
33{ 23{
34 kref_set(kref, 1); 24 atomic_set(&kref->refcount, 1);
25 smp_mb();
35} 26}
36 27
37/** 28/**
@@ -71,7 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
71 return 0; 62 return 0;
72} 63}
73 64
74EXPORT_SYMBOL(kref_set); 65
66/**
67 * kref_sub - subtract a number of refcounts for object.
68 * @kref: object.
69 * @count: Number of recounts to subtract.
70 * @release: pointer to the function that will clean up the object when the
71 * last reference to the object is released.
72 * This pointer is required, and it is not acceptable to pass kfree
73 * in as this function.
74 *
75 * Subtract @count from the refcount, and if 0, call release().
76 * Return 1 if the object was removed, otherwise return 0. Beware, if this
77 * function returns 0, you still can not count on the kref from remaining in
78 * memory. Only use the return value if you want to see if the kref is now
79 * gone, not present.
80 */
81int kref_sub(struct kref *kref, unsigned int count,
82 void (*release)(struct kref *kref))
83{
84 WARN_ON(release == NULL);
85 WARN_ON(release == (void (*)(struct kref *))kfree);
86
87 if (atomic_sub_and_test((int) count, &kref->refcount)) {
88 release(kref);
89 return 1;
90 }
91 return 0;
92}
93
75EXPORT_SYMBOL(kref_init); 94EXPORT_SYMBOL(kref_init);
76EXPORT_SYMBOL(kref_get); 95EXPORT_SYMBOL(kref_get);
77EXPORT_SYMBOL(kref_put); 96EXPORT_SYMBOL(kref_put);
97EXPORT_SYMBOL(kref_sub);
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 000000000000..157cd88a6ffc
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
1#include <linux/kernel.h>
2#include <linux/gcd.h>
3#include <linux/module.h>
4
5/* Lowest common multiple */
6unsigned long lcm(unsigned long a, unsigned long b)
7{
8 if (a && b)
9 return (a * b) / gcd(a, b);
10 else if (b)
11 return b;
12
13 return a;
14}
15EXPORT_SYMBOL_GPL(lcm);
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 1a39f4e3ae1f..344c710d16ca 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add);
43 */ 43 */
44void list_del(struct list_head *entry) 44void list_del(struct list_head *entry)
45{ 45{
46 WARN(entry->next == LIST_POISON1,
47 "list_del corruption, next is LIST_POISON1 (%p)\n",
48 LIST_POISON1);
49 WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
50 "list_del corruption, prev is LIST_POISON2 (%p)\n",
51 LIST_POISON2);
46 WARN(entry->prev->next != entry, 52 WARN(entry->prev->next != entry,
47 "list_del corruption. prev->next should be %p, " 53 "list_del corruption. prev->next should be %p, "
48 "but was %p\n", entry, entry->prev->next); 54 "but was %p\n", entry, entry->prev->next);
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 19d11e0bb958..d7325c6b103f 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -4,99 +4,288 @@
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/list.h> 5#include <linux/list.h>
6 6
7#define MAX_LIST_LENGTH_BITS 20
8
9/*
10 * Returns a list organized in an intermediate format suited
11 * to chaining of merge() calls: null-terminated, no reserved or
12 * sentinel head node, "prev" links not maintained.
13 */
14static struct list_head *merge(void *priv,
15 int (*cmp)(void *priv, struct list_head *a,
16 struct list_head *b),
17 struct list_head *a, struct list_head *b)
18{
19 struct list_head head, *tail = &head;
20
21 while (a && b) {
22 /* if equal, take 'a' -- important for sort stability */
23 if ((*cmp)(priv, a, b) <= 0) {
24 tail->next = a;
25 a = a->next;
26 } else {
27 tail->next = b;
28 b = b->next;
29 }
30 tail = tail->next;
31 }
32 tail->next = a?:b;
33 return head.next;
34}
35
36/*
37 * Combine final list merge with restoration of standard doubly-linked
38 * list structure. This approach duplicates code from merge(), but
39 * runs faster than the tidier alternatives of either a separate final
40 * prev-link restoration pass, or maintaining the prev links
41 * throughout.
42 */
43static void merge_and_restore_back_links(void *priv,
44 int (*cmp)(void *priv, struct list_head *a,
45 struct list_head *b),
46 struct list_head *head,
47 struct list_head *a, struct list_head *b)
48{
49 struct list_head *tail = head;
50
51 while (a && b) {
52 /* if equal, take 'a' -- important for sort stability */
53 if ((*cmp)(priv, a, b) <= 0) {
54 tail->next = a;
55 a->prev = tail;
56 a = a->next;
57 } else {
58 tail->next = b;
59 b->prev = tail;
60 b = b->next;
61 }
62 tail = tail->next;
63 }
64 tail->next = a ? : b;
65
66 do {
67 /*
68 * In worst cases this loop may run many iterations.
69 * Continue callbacks to the client even though no
70 * element comparison is needed, so the client's cmp()
71 * routine can invoke cond_resched() periodically.
72 */
73 (*cmp)(priv, tail->next, tail->next);
74
75 tail->next->prev = tail;
76 tail = tail->next;
77 } while (tail->next);
78
79 tail->next = head;
80 head->prev = tail;
81}
82
7/** 83/**
8 * list_sort - sort a list. 84 * list_sort - sort a list
9 * @priv: private data, passed to @cmp 85 * @priv: private data, opaque to list_sort(), passed to @cmp
10 * @head: the list to sort 86 * @head: the list to sort
11 * @cmp: the elements comparison function 87 * @cmp: the elements comparison function
12 * 88 *
13 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It 89 * This function implements "merge sort", which has O(nlog(n))
14 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted 90 * complexity.
15 * in ascending order.
16 * 91 *
17 * The comparison function @cmp is supposed to return a negative value if @a is 92 * The comparison function @cmp must return a negative value if @a
18 * less than @b, and a positive value if @a is greater than @b. If @a and @b 93 * should sort before @b, and a positive value if @a should sort after
19 * are equivalent, then it does not matter what this function returns. 94 * @b. If @a and @b are equivalent, and their original relative
95 * ordering is to be preserved, @cmp must return 0.
20 */ 96 */
21void list_sort(void *priv, struct list_head *head, 97void list_sort(void *priv, struct list_head *head,
22 int (*cmp)(void *priv, struct list_head *a, 98 int (*cmp)(void *priv, struct list_head *a,
23 struct list_head *b)) 99 struct list_head *b))
24{ 100{
25 struct list_head *p, *q, *e, *list, *tail, *oldhead; 101 struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
26 int insize, nmerges, psize, qsize, i; 102 -- last slot is a sentinel */
103 int lev; /* index into part[] */
104 int max_lev = 0;
105 struct list_head *list;
27 106
28 if (list_empty(head)) 107 if (list_empty(head))
29 return; 108 return;
30 109
110 memset(part, 0, sizeof(part));
111
112 head->prev->next = NULL;
31 list = head->next; 113 list = head->next;
32 list_del(head);
33 insize = 1;
34 for (;;) {
35 p = oldhead = list;
36 list = tail = NULL;
37 nmerges = 0;
38
39 while (p) {
40 nmerges++;
41 q = p;
42 psize = 0;
43 for (i = 0; i < insize; i++) {
44 psize++;
45 q = q->next == oldhead ? NULL : q->next;
46 if (!q)
47 break;
48 }
49 114
50 qsize = insize; 115 while (list) {
51 while (psize > 0 || (qsize > 0 && q)) { 116 struct list_head *cur = list;
52 if (!psize) { 117 list = list->next;
53 e = q; 118 cur->next = NULL;
54 q = q->next; 119
55 qsize--; 120 for (lev = 0; part[lev]; lev++) {
56 if (q == oldhead) 121 cur = merge(priv, cmp, part[lev], cur);
57 q = NULL; 122 part[lev] = NULL;
58 } else if (!qsize || !q) { 123 }
59 e = p; 124 if (lev > max_lev) {
60 p = p->next; 125 if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
61 psize--; 126 printk_once(KERN_DEBUG "list passed to"
62 if (p == oldhead) 127 " list_sort() too long for"
63 p = NULL; 128 " efficiency\n");
64 } else if (cmp(priv, p, q) <= 0) { 129 lev--;
65 e = p;
66 p = p->next;
67 psize--;
68 if (p == oldhead)
69 p = NULL;
70 } else {
71 e = q;
72 q = q->next;
73 qsize--;
74 if (q == oldhead)
75 q = NULL;
76 }
77 if (tail)
78 tail->next = e;
79 else
80 list = e;
81 e->prev = tail;
82 tail = e;
83 } 130 }
84 p = q; 131 max_lev = lev;
85 } 132 }
133 part[lev] = cur;
134 }
135
136 for (lev = 0; lev < max_lev; lev++)
137 if (part[lev])
138 list = merge(priv, cmp, part[lev], list);
139
140 merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
141}
142EXPORT_SYMBOL(list_sort);
143
144#ifdef CONFIG_TEST_LIST_SORT
145
146#include <linux/random.h>
86 147
87 tail->next = list; 148/*
88 list->prev = tail; 149 * The pattern of set bits in the list length determines which cases
150 * are hit in list_sort().
151 */
152#define TEST_LIST_LEN (512+128+2) /* not including head */
153
154#define TEST_POISON1 0xDEADBEEF
155#define TEST_POISON2 0xA324354C
89 156
90 if (nmerges <= 1) 157struct debug_el {
91 break; 158 unsigned int poison1;
159 struct list_head list;
160 unsigned int poison2;
161 int value;
162 unsigned serial;
163};
92 164
93 insize *= 2; 165/* Array, containing pointers to all elements in the test list */
166static struct debug_el **elts __initdata;
167
168static int __init check(struct debug_el *ela, struct debug_el *elb)
169{
170 if (ela->serial >= TEST_LIST_LEN) {
171 printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
172 ela->serial);
173 return -EINVAL;
94 } 174 }
175 if (elb->serial >= TEST_LIST_LEN) {
176 printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
177 elb->serial);
178 return -EINVAL;
179 }
180 if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
181 printk(KERN_ERR "list_sort_test: error: phantom element\n");
182 return -EINVAL;
183 }
184 if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
185 printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
186 ela->poison1, ela->poison2);
187 return -EINVAL;
188 }
189 if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
190 printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
191 elb->poison1, elb->poison2);
192 return -EINVAL;
193 }
194 return 0;
195}
196
197static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
198{
199 struct debug_el *ela, *elb;
200
201 ela = container_of(a, struct debug_el, list);
202 elb = container_of(b, struct debug_el, list);
95 203
96 head->next = list; 204 check(ela, elb);
97 head->prev = list->prev; 205 return ela->value - elb->value;
98 list->prev->next = head;
99 list->prev = head;
100} 206}
101 207
102EXPORT_SYMBOL(list_sort); 208static int __init list_sort_test(void)
209{
210 int i, count = 1, err = -EINVAL;
211 struct debug_el *el;
212 struct list_head *cur, *tmp;
213 LIST_HEAD(head);
214
215 printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
216
217 elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
218 if (!elts) {
219 printk(KERN_ERR "list_sort_test: error: cannot allocate "
220 "memory\n");
221 goto exit;
222 }
223
224 for (i = 0; i < TEST_LIST_LEN; i++) {
225 el = kmalloc(sizeof(*el), GFP_KERNEL);
226 if (!el) {
227 printk(KERN_ERR "list_sort_test: error: cannot "
228 "allocate memory\n");
229 goto exit;
230 }
231 /* force some equivalencies */
232 el->value = random32() % (TEST_LIST_LEN/3);
233 el->serial = i;
234 el->poison1 = TEST_POISON1;
235 el->poison2 = TEST_POISON2;
236 elts[i] = el;
237 list_add_tail(&el->list, &head);
238 }
239
240 list_sort(NULL, &head, cmp);
241
242 for (cur = head.next; cur->next != &head; cur = cur->next) {
243 struct debug_el *el1;
244 int cmp_result;
245
246 if (cur->next->prev != cur) {
247 printk(KERN_ERR "list_sort_test: error: list is "
248 "corrupted\n");
249 goto exit;
250 }
251
252 cmp_result = cmp(NULL, cur, cur->next);
253 if (cmp_result > 0) {
254 printk(KERN_ERR "list_sort_test: error: list is not "
255 "sorted\n");
256 goto exit;
257 }
258
259 el = container_of(cur, struct debug_el, list);
260 el1 = container_of(cur->next, struct debug_el, list);
261 if (cmp_result == 0 && el->serial >= el1->serial) {
262 printk(KERN_ERR "list_sort_test: error: order of "
263 "equivalent elements not preserved\n");
264 goto exit;
265 }
266
267 if (check(el, el1)) {
268 printk(KERN_ERR "list_sort_test: error: element check "
269 "failed\n");
270 goto exit;
271 }
272 count++;
273 }
274
275 if (count != TEST_LIST_LEN) {
276 printk(KERN_ERR "list_sort_test: error: bad list length %d",
277 count);
278 goto exit;
279 }
280
281 err = 0;
282exit:
283 kfree(elts);
284 list_for_each_safe(cur, tmp, &head) {
285 list_del(cur);
286 kfree(container_of(cur, struct debug_el, list));
287 }
288 return err;
289}
290module_init(list_sort_test);
291#endif /* CONFIG_TEST_LIST_SORT */
diff --git a/lib/lmb.c b/lib/lmb.c
deleted file mode 100644
index 9cee17142b2c..000000000000
--- a/lib/lmb.c
+++ /dev/null
@@ -1,532 +0,0 @@
1/*
2 * Procedures for maintaining information about logical memory blocks.
3 *
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/bitops.h>
16#include <linux/lmb.h>
17
18#define LMB_ALLOC_ANYWHERE 0
19
20struct lmb lmb;
21
22static int lmb_debug;
23
24static int __init early_lmb(char *p)
25{
26 if (p && strstr(p, "debug"))
27 lmb_debug = 1;
28 return 0;
29}
30early_param("lmb", early_lmb);
31
32static void lmb_dump(struct lmb_region *region, char *name)
33{
34 unsigned long long base, size;
35 int i;
36
37 pr_info(" %s.cnt = 0x%lx\n", name, region->cnt);
38
39 for (i = 0; i < region->cnt; i++) {
40 base = region->region[i].base;
41 size = region->region[i].size;
42
43 pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
44 name, i, base, base + size - 1, size);
45 }
46}
47
48void lmb_dump_all(void)
49{
50 if (!lmb_debug)
51 return;
52
53 pr_info("LMB configuration:\n");
54 pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size);
55 pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
56
57 lmb_dump(&lmb.memory, "memory");
58 lmb_dump(&lmb.reserved, "reserved");
59}
60
61static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
62 u64 size2)
63{
64 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
65}
66
67static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
68{
69 if (base2 == base1 + size1)
70 return 1;
71 else if (base1 == base2 + size2)
72 return -1;
73
74 return 0;
75}
76
77static long lmb_regions_adjacent(struct lmb_region *rgn,
78 unsigned long r1, unsigned long r2)
79{
80 u64 base1 = rgn->region[r1].base;
81 u64 size1 = rgn->region[r1].size;
82 u64 base2 = rgn->region[r2].base;
83 u64 size2 = rgn->region[r2].size;
84
85 return lmb_addrs_adjacent(base1, size1, base2, size2);
86}
87
88static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
89{
90 unsigned long i;
91
92 for (i = r; i < rgn->cnt - 1; i++) {
93 rgn->region[i].base = rgn->region[i + 1].base;
94 rgn->region[i].size = rgn->region[i + 1].size;
95 }
96 rgn->cnt--;
97}
98
99/* Assumption: base addr of region 1 < base addr of region 2 */
100static void lmb_coalesce_regions(struct lmb_region *rgn,
101 unsigned long r1, unsigned long r2)
102{
103 rgn->region[r1].size += rgn->region[r2].size;
104 lmb_remove_region(rgn, r2);
105}
106
107void __init lmb_init(void)
108{
109 /* Create a dummy zero size LMB which will get coalesced away later.
110 * This simplifies the lmb_add() code below...
111 */
112 lmb.memory.region[0].base = 0;
113 lmb.memory.region[0].size = 0;
114 lmb.memory.cnt = 1;
115
116 /* Ditto. */
117 lmb.reserved.region[0].base = 0;
118 lmb.reserved.region[0].size = 0;
119 lmb.reserved.cnt = 1;
120}
121
122void __init lmb_analyze(void)
123{
124 int i;
125
126 lmb.memory.size = 0;
127
128 for (i = 0; i < lmb.memory.cnt; i++)
129 lmb.memory.size += lmb.memory.region[i].size;
130}
131
132static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
133{
134 unsigned long coalesced = 0;
135 long adjacent, i;
136
137 if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
138 rgn->region[0].base = base;
139 rgn->region[0].size = size;
140 return 0;
141 }
142
143 /* First try and coalesce this LMB with another. */
144 for (i = 0; i < rgn->cnt; i++) {
145 u64 rgnbase = rgn->region[i].base;
146 u64 rgnsize = rgn->region[i].size;
147
148 if ((rgnbase == base) && (rgnsize == size))
149 /* Already have this region, so we're done */
150 return 0;
151
152 adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize);
153 if (adjacent > 0) {
154 rgn->region[i].base -= size;
155 rgn->region[i].size += size;
156 coalesced++;
157 break;
158 } else if (adjacent < 0) {
159 rgn->region[i].size += size;
160 coalesced++;
161 break;
162 }
163 }
164
165 if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) {
166 lmb_coalesce_regions(rgn, i, i+1);
167 coalesced++;
168 }
169
170 if (coalesced)
171 return coalesced;
172 if (rgn->cnt >= MAX_LMB_REGIONS)
173 return -1;
174
175 /* Couldn't coalesce the LMB, so add it to the sorted table. */
176 for (i = rgn->cnt - 1; i >= 0; i--) {
177 if (base < rgn->region[i].base) {
178 rgn->region[i+1].base = rgn->region[i].base;
179 rgn->region[i+1].size = rgn->region[i].size;
180 } else {
181 rgn->region[i+1].base = base;
182 rgn->region[i+1].size = size;
183 break;
184 }
185 }
186
187 if (base < rgn->region[0].base) {
188 rgn->region[0].base = base;
189 rgn->region[0].size = size;
190 }
191 rgn->cnt++;
192
193 return 0;
194}
195
196long lmb_add(u64 base, u64 size)
197{
198 struct lmb_region *_rgn = &lmb.memory;
199
200 /* On pSeries LPAR systems, the first LMB is our RMO region. */
201 if (base == 0)
202 lmb.rmo_size = size;
203
204 return lmb_add_region(_rgn, base, size);
205
206}
207
208long lmb_remove(u64 base, u64 size)
209{
210 struct lmb_region *rgn = &(lmb.memory);
211 u64 rgnbegin, rgnend;
212 u64 end = base + size;
213 int i;
214
215 rgnbegin = rgnend = 0; /* supress gcc warnings */
216
217 /* Find the region where (base, size) belongs to */
218 for (i=0; i < rgn->cnt; i++) {
219 rgnbegin = rgn->region[i].base;
220 rgnend = rgnbegin + rgn->region[i].size;
221
222 if ((rgnbegin <= base) && (end <= rgnend))
223 break;
224 }
225
226 /* Didn't find the region */
227 if (i == rgn->cnt)
228 return -1;
229
230 /* Check to see if we are removing entire region */
231 if ((rgnbegin == base) && (rgnend == end)) {
232 lmb_remove_region(rgn, i);
233 return 0;
234 }
235
236 /* Check to see if region is matching at the front */
237 if (rgnbegin == base) {
238 rgn->region[i].base = end;
239 rgn->region[i].size -= size;
240 return 0;
241 }
242
243 /* Check to see if the region is matching at the end */
244 if (rgnend == end) {
245 rgn->region[i].size -= size;
246 return 0;
247 }
248
249 /*
250 * We need to split the entry - adjust the current one to the
251 * beginging of the hole and add the region after hole.
252 */
253 rgn->region[i].size = base - rgn->region[i].base;
254 return lmb_add_region(rgn, end, rgnend - end);
255}
256
257long __init lmb_reserve(u64 base, u64 size)
258{
259 struct lmb_region *_rgn = &lmb.reserved;
260
261 BUG_ON(0 == size);
262
263 return lmb_add_region(_rgn, base, size);
264}
265
266long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
267{
268 unsigned long i;
269
270 for (i = 0; i < rgn->cnt; i++) {
271 u64 rgnbase = rgn->region[i].base;
272 u64 rgnsize = rgn->region[i].size;
273 if (lmb_addrs_overlap(base, size, rgnbase, rgnsize))
274 break;
275 }
276
277 return (i < rgn->cnt) ? i : -1;
278}
279
280static u64 lmb_align_down(u64 addr, u64 size)
281{
282 return addr & ~(size - 1);
283}
284
285static u64 lmb_align_up(u64 addr, u64 size)
286{
287 return (addr + (size - 1)) & ~(size - 1);
288}
289
290static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end,
291 u64 size, u64 align)
292{
293 u64 base, res_base;
294 long j;
295
296 base = lmb_align_down((end - size), align);
297 while (start <= base) {
298 j = lmb_overlaps_region(&lmb.reserved, base, size);
299 if (j < 0) {
300 /* this area isn't reserved, take it */
301 if (lmb_add_region(&lmb.reserved, base, size) < 0)
302 base = ~(u64)0;
303 return base;
304 }
305 res_base = lmb.reserved.region[j].base;
306 if (res_base < size)
307 break;
308 base = lmb_align_down(res_base - size, align);
309 }
310
311 return ~(u64)0;
312}
313
314static u64 __init lmb_alloc_nid_region(struct lmb_property *mp,
315 u64 (*nid_range)(u64, u64, int *),
316 u64 size, u64 align, int nid)
317{
318 u64 start, end;
319
320 start = mp->base;
321 end = start + mp->size;
322
323 start = lmb_align_up(start, align);
324 while (start < end) {
325 u64 this_end;
326 int this_nid;
327
328 this_end = nid_range(start, end, &this_nid);
329 if (this_nid == nid) {
330 u64 ret = lmb_alloc_nid_unreserved(start, this_end,
331 size, align);
332 if (ret != ~(u64)0)
333 return ret;
334 }
335 start = this_end;
336 }
337
338 return ~(u64)0;
339}
340
341u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
342 u64 (*nid_range)(u64 start, u64 end, int *nid))
343{
344 struct lmb_region *mem = &lmb.memory;
345 int i;
346
347 BUG_ON(0 == size);
348
349 size = lmb_align_up(size, align);
350
351 for (i = 0; i < mem->cnt; i++) {
352 u64 ret = lmb_alloc_nid_region(&mem->region[i],
353 nid_range,
354 size, align, nid);
355 if (ret != ~(u64)0)
356 return ret;
357 }
358
359 return lmb_alloc(size, align);
360}
361
362u64 __init lmb_alloc(u64 size, u64 align)
363{
364 return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
365}
366
367u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr)
368{
369 u64 alloc;
370
371 alloc = __lmb_alloc_base(size, align, max_addr);
372
373 if (alloc == 0)
374 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
375 (unsigned long long) size, (unsigned long long) max_addr);
376
377 return alloc;
378}
379
380u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
381{
382 long i, j;
383 u64 base = 0;
384 u64 res_base;
385
386 BUG_ON(0 == size);
387
388 size = lmb_align_up(size, align);
389
390 /* On some platforms, make sure we allocate lowmem */
391 /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */
392 if (max_addr == LMB_ALLOC_ANYWHERE)
393 max_addr = LMB_REAL_LIMIT;
394
395 for (i = lmb.memory.cnt - 1; i >= 0; i--) {
396 u64 lmbbase = lmb.memory.region[i].base;
397 u64 lmbsize = lmb.memory.region[i].size;
398
399 if (lmbsize < size)
400 continue;
401 if (max_addr == LMB_ALLOC_ANYWHERE)
402 base = lmb_align_down(lmbbase + lmbsize - size, align);
403 else if (lmbbase < max_addr) {
404 base = min(lmbbase + lmbsize, max_addr);
405 base = lmb_align_down(base - size, align);
406 } else
407 continue;
408
409 while (base && lmbbase <= base) {
410 j = lmb_overlaps_region(&lmb.reserved, base, size);
411 if (j < 0) {
412 /* this area isn't reserved, take it */
413 if (lmb_add_region(&lmb.reserved, base, size) < 0)
414 return 0;
415 return base;
416 }
417 res_base = lmb.reserved.region[j].base;
418 if (res_base < size)
419 break;
420 base = lmb_align_down(res_base - size, align);
421 }
422 }
423 return 0;
424}
425
426/* You must call lmb_analyze() before this. */
427u64 __init lmb_phys_mem_size(void)
428{
429 return lmb.memory.size;
430}
431
432u64 lmb_end_of_DRAM(void)
433{
434 int idx = lmb.memory.cnt - 1;
435
436 return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
437}
438
439/* You must call lmb_analyze() after this. */
440void __init lmb_enforce_memory_limit(u64 memory_limit)
441{
442 unsigned long i;
443 u64 limit;
444 struct lmb_property *p;
445
446 if (!memory_limit)
447 return;
448
449 /* Truncate the lmb regions to satisfy the memory limit. */
450 limit = memory_limit;
451 for (i = 0; i < lmb.memory.cnt; i++) {
452 if (limit > lmb.memory.region[i].size) {
453 limit -= lmb.memory.region[i].size;
454 continue;
455 }
456
457 lmb.memory.region[i].size = limit;
458 lmb.memory.cnt = i + 1;
459 break;
460 }
461
462 if (lmb.memory.region[0].size < lmb.rmo_size)
463 lmb.rmo_size = lmb.memory.region[0].size;
464
465 memory_limit = lmb_end_of_DRAM();
466
467 /* And truncate any reserves above the limit also. */
468 for (i = 0; i < lmb.reserved.cnt; i++) {
469 p = &lmb.reserved.region[i];
470
471 if (p->base > memory_limit)
472 p->size = 0;
473 else if ((p->base + p->size) > memory_limit)
474 p->size = memory_limit - p->base;
475
476 if (p->size == 0) {
477 lmb_remove_region(&lmb.reserved, i);
478 i--;
479 }
480 }
481}
482
483int __init lmb_is_reserved(u64 addr)
484{
485 int i;
486
487 for (i = 0; i < lmb.reserved.cnt; i++) {
488 u64 upper = lmb.reserved.region[i].base +
489 lmb.reserved.region[i].size - 1;
490 if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
491 return 1;
492 }
493 return 0;
494}
495
496int lmb_is_region_reserved(u64 base, u64 size)
497{
498 return lmb_overlaps_region(&lmb.reserved, base, size);
499}
500
501/*
502 * Given a <base, len>, find which memory regions belong to this range.
503 * Adjust the request and return a contiguous chunk.
504 */
505int lmb_find(struct lmb_property *res)
506{
507 int i;
508 u64 rstart, rend;
509
510 rstart = res->base;
511 rend = rstart + res->size - 1;
512
513 for (i = 0; i < lmb.memory.cnt; i++) {
514 u64 start = lmb.memory.region[i].base;
515 u64 end = start + lmb.memory.region[i].size - 1;
516
517 if (start > rend)
518 return -1;
519
520 if ((end >= rstart) && (start < rend)) {
521 /* adjust the request */
522 if (rstart < start)
523 rstart = start;
524 if (rend > end)
525 rend = end;
526 res->base = rstart;
527 res->size = rend - rstart + 1;
528 return 0;
529 }
530 }
531 return -1;
532}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index c4706eb98d3d..5021cbc34411 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -15,7 +15,7 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <net/netlink.h> 16#include <net/netlink.h>
17 17
18static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { 18static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = {
19 [NLA_U8] = sizeof(u8), 19 [NLA_U8] = sizeof(u8),
20 [NLA_U16] = sizeof(u16), 20 [NLA_U16] = sizeof(u16),
21 [NLA_U32] = sizeof(u32), 21 [NLA_U32] = sizeof(u32),
@@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
23 [NLA_NESTED] = NLA_HDRLEN, 23 [NLA_NESTED] = NLA_HDRLEN,
24}; 24};
25 25
26static int validate_nla(struct nlattr *nla, int maxtype, 26static int validate_nla(const struct nlattr *nla, int maxtype,
27 const struct nla_policy *policy) 27 const struct nla_policy *policy)
28{ 28{
29 const struct nla_policy *pt; 29 const struct nla_policy *pt;
@@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype,
115 * 115 *
116 * Returns 0 on success or a negative error code. 116 * Returns 0 on success or a negative error code.
117 */ 117 */
118int nla_validate(struct nlattr *head, int len, int maxtype, 118int nla_validate(const struct nlattr *head, int len, int maxtype,
119 const struct nla_policy *policy) 119 const struct nla_policy *policy)
120{ 120{
121 struct nlattr *nla; 121 const struct nlattr *nla;
122 int rem, err; 122 int rem, err;
123 123
124 nla_for_each_attr(nla, head, len, rem) { 124 nla_for_each_attr(nla, head, len, rem) {
@@ -167,16 +167,16 @@ nla_policy_len(const struct nla_policy *p, int n)
167 * @policy: validation policy 167 * @policy: validation policy
168 * 168 *
169 * Parses a stream of attributes and stores a pointer to each attribute in 169 * Parses a stream of attributes and stores a pointer to each attribute in
170 * the tb array accessable via the attribute type. Attributes with a type 170 * the tb array accessible via the attribute type. Attributes with a type
171 * exceeding maxtype will be silently ignored for backwards compatibility 171 * exceeding maxtype will be silently ignored for backwards compatibility
172 * reasons. policy may be set to NULL if no validation is required. 172 * reasons. policy may be set to NULL if no validation is required.
173 * 173 *
174 * Returns 0 on success or a negative error code. 174 * Returns 0 on success or a negative error code.
175 */ 175 */
176int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, 176int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
177 const struct nla_policy *policy) 177 int len, const struct nla_policy *policy)
178{ 178{
179 struct nlattr *nla; 179 const struct nlattr *nla;
180 int rem, err; 180 int rem, err;
181 181
182 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 182 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
@@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
191 goto errout; 191 goto errout;
192 } 192 }
193 193
194 tb[type] = nla; 194 tb[type] = (struct nlattr *)nla;
195 } 195 }
196 } 196 }
197 197
@@ -212,14 +212,14 @@ errout:
212 * 212 *
213 * Returns the first attribute in the stream matching the specified type. 213 * Returns the first attribute in the stream matching the specified type.
214 */ 214 */
215struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) 215struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
216{ 216{
217 struct nlattr *nla; 217 const struct nlattr *nla;
218 int rem; 218 int rem;
219 219
220 nla_for_each_attr(nla, head, len, rem) 220 nla_for_each_attr(nla, head, len, rem)
221 if (nla_type(nla) == attrtype) 221 if (nla_type(nla) == attrtype)
222 return nla; 222 return (struct nlattr *)nla;
223 223
224 return NULL; 224 return NULL;
225} 225}
diff --git a/lib/parser.c b/lib/parser.c
index fb34977246bb..6e89eca5cca0 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -128,12 +128,13 @@ static int match_number(substring_t *s, int *result, int base)
128 char *endp; 128 char *endp;
129 char *buf; 129 char *buf;
130 int ret; 130 int ret;
131 size_t len = s->to - s->from;
131 132
132 buf = kmalloc(s->to - s->from + 1, GFP_KERNEL); 133 buf = kmalloc(len + 1, GFP_KERNEL);
133 if (!buf) 134 if (!buf)
134 return -ENOMEM; 135 return -ENOMEM;
135 memcpy(buf, s->from, s->to - s->from); 136 memcpy(buf, s->from, len);
136 buf[s->to - s->from] = '\0'; 137 buf[len] = '\0';
137 *result = simple_strtol(buf, &endp, base); 138 *result = simple_strtol(buf, &endp, base);
138 ret = 0; 139 ret = 0;
139 if (endp == buf) 140 if (endp == buf)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index aeaa6d734447..28f2c33c6b53 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -8,10 +8,53 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/cpu.h> 9#include <linux/cpu.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/debugobjects.h>
11 12
12static LIST_HEAD(percpu_counters); 13static LIST_HEAD(percpu_counters);
13static DEFINE_MUTEX(percpu_counters_lock); 14static DEFINE_MUTEX(percpu_counters_lock);
14 15
16#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
17
18static struct debug_obj_descr percpu_counter_debug_descr;
19
20static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
21{
22 struct percpu_counter *fbc = addr;
23
24 switch (state) {
25 case ODEBUG_STATE_ACTIVE:
26 percpu_counter_destroy(fbc);
27 debug_object_free(fbc, &percpu_counter_debug_descr);
28 return 1;
29 default:
30 return 0;
31 }
32}
33
34static struct debug_obj_descr percpu_counter_debug_descr = {
35 .name = "percpu_counter",
36 .fixup_free = percpu_counter_fixup_free,
37};
38
39static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
40{
41 debug_object_init(fbc, &percpu_counter_debug_descr);
42 debug_object_activate(fbc, &percpu_counter_debug_descr);
43}
44
45static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
46{
47 debug_object_deactivate(fbc, &percpu_counter_debug_descr);
48 debug_object_free(fbc, &percpu_counter_debug_descr);
49}
50
51#else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
52static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
53{ }
54static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
55{ }
56#endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
57
15void percpu_counter_set(struct percpu_counter *fbc, s64 amount) 58void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
16{ 59{
17 int cpu; 60 int cpu;
@@ -29,20 +72,18 @@ EXPORT_SYMBOL(percpu_counter_set);
29void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) 72void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
30{ 73{
31 s64 count; 74 s64 count;
32 s32 *pcount;
33 int cpu = get_cpu();
34 75
35 pcount = per_cpu_ptr(fbc->counters, cpu); 76 preempt_disable();
36 count = *pcount + amount; 77 count = __this_cpu_read(*fbc->counters) + amount;
37 if (count >= batch || count <= -batch) { 78 if (count >= batch || count <= -batch) {
38 spin_lock(&fbc->lock); 79 spin_lock(&fbc->lock);
39 fbc->count += count; 80 fbc->count += count;
40 *pcount = 0; 81 __this_cpu_write(*fbc->counters, 0);
41 spin_unlock(&fbc->lock); 82 spin_unlock(&fbc->lock);
42 } else { 83 } else {
43 *pcount = count; 84 __this_cpu_write(*fbc->counters, count);
44 } 85 }
45 put_cpu(); 86 preempt_enable();
46} 87}
47EXPORT_SYMBOL(__percpu_counter_add); 88EXPORT_SYMBOL(__percpu_counter_add);
48 89
@@ -75,7 +116,11 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
75 fbc->counters = alloc_percpu(s32); 116 fbc->counters = alloc_percpu(s32);
76 if (!fbc->counters) 117 if (!fbc->counters)
77 return -ENOMEM; 118 return -ENOMEM;
119
120 debug_percpu_counter_activate(fbc);
121
78#ifdef CONFIG_HOTPLUG_CPU 122#ifdef CONFIG_HOTPLUG_CPU
123 INIT_LIST_HEAD(&fbc->list);
79 mutex_lock(&percpu_counters_lock); 124 mutex_lock(&percpu_counters_lock);
80 list_add(&fbc->list, &percpu_counters); 125 list_add(&fbc->list, &percpu_counters);
81 mutex_unlock(&percpu_counters_lock); 126 mutex_unlock(&percpu_counters_lock);
@@ -89,6 +134,8 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
89 if (!fbc->counters) 134 if (!fbc->counters)
90 return; 135 return;
91 136
137 debug_percpu_counter_deactivate(fbc);
138
92#ifdef CONFIG_HOTPLUG_CPU 139#ifdef CONFIG_HOTPLUG_CPU
93 mutex_lock(&percpu_counters_lock); 140 mutex_lock(&percpu_counters_lock);
94 list_del(&fbc->list); 141 list_del(&fbc->list);
@@ -137,6 +184,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
137 return NOTIFY_OK; 184 return NOTIFY_OK;
138} 185}
139 186
187/*
188 * Compare counter against given value.
189 * Return 1 if greater, 0 if equal and -1 if less
190 */
191int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
192{
193 s64 count;
194
195 count = percpu_counter_read(fbc);
196 /* Check to see if rough count will be sufficient for comparison */
197 if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
198 if (count > rhs)
199 return 1;
200 else
201 return -1;
202 }
203 /* Need to use precise count */
204 count = percpu_counter_sum(fbc);
205 if (count > rhs)
206 return 1;
207 else if (count < rhs)
208 return -1;
209 else
210 return 0;
211}
212EXPORT_SYMBOL(percpu_counter_compare);
213
140static int __init percpu_counter_startup(void) 214static int __init percpu_counter_startup(void)
141{ 215{
142 compute_batch_value(); 216 compute_batch_value();
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 92cdd9936e3d..5086bb962b4d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -28,7 +28,6 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/notifier.h> 29#include <linux/notifier.h>
30#include <linux/cpu.h> 30#include <linux/cpu.h>
31#include <linux/gfp.h>
32#include <linux/string.h> 31#include <linux/string.h>
33#include <linux/bitops.h> 32#include <linux/bitops.h>
34#include <linux/rcupdate.h> 33#include <linux/rcupdate.h>
@@ -50,7 +49,7 @@ struct radix_tree_node {
50 unsigned int height; /* Height from the bottom */ 49 unsigned int height; /* Height from the bottom */
51 unsigned int count; 50 unsigned int count;
52 struct rcu_head rcu_head; 51 struct rcu_head rcu_head;
53 void *slots[RADIX_TREE_MAP_SIZE]; 52 void __rcu *slots[RADIX_TREE_MAP_SIZE];
54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
55}; 54};
56 55
@@ -83,6 +82,16 @@ struct radix_tree_preload {
83}; 82};
84static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 83static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
85 84
85static inline void *ptr_to_indirect(void *ptr)
86{
87 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
88}
89
90static inline void *indirect_to_ptr(void *ptr)
91{
92 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
93}
94
86static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 95static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
87{ 96{
88 return root->gfp_mask & __GFP_BITS_MASK; 97 return root->gfp_mask & __GFP_BITS_MASK;
@@ -175,14 +184,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
175{ 184{
176 struct radix_tree_node *node = 185 struct radix_tree_node *node =
177 container_of(head, struct radix_tree_node, rcu_head); 186 container_of(head, struct radix_tree_node, rcu_head);
187 int i;
178 188
179 /* 189 /*
180 * must only free zeroed nodes into the slab. radix_tree_shrink 190 * must only free zeroed nodes into the slab. radix_tree_shrink
181 * can leave us with a non-NULL entry in the first slot, so clear 191 * can leave us with a non-NULL entry in the first slot, so clear
182 * that here to make sure. 192 * that here to make sure.
183 */ 193 */
184 tag_clear(node, 0, 0); 194 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
185 tag_clear(node, 1, 0); 195 tag_clear(node, i, 0);
196
186 node->slots[0] = NULL; 197 node->slots[0] = NULL;
187 node->count = 0; 198 node->count = 0;
188 199
@@ -264,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
264 return -ENOMEM; 275 return -ENOMEM;
265 276
266 /* Increase the height. */ 277 /* Increase the height. */
267 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 278 node->slots[0] = indirect_to_ptr(root->rnode);
268 279
269 /* Propagate the aggregated tag info into the new root */ 280 /* Propagate the aggregated tag info into the new root */
270 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 281 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -275,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
275 newheight = root->height+1; 286 newheight = root->height+1;
276 node->height = newheight; 287 node->height = newheight;
277 node->count = 1; 288 node->count = 1;
278 node = radix_tree_ptr_to_indirect(node); 289 node = ptr_to_indirect(node);
279 rcu_assign_pointer(root->rnode, node); 290 rcu_assign_pointer(root->rnode, node);
280 root->height = newheight; 291 root->height = newheight;
281 } while (height > root->height); 292 } while (height > root->height);
@@ -308,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root,
308 return error; 319 return error;
309 } 320 }
310 321
311 slot = radix_tree_indirect_to_ptr(root->rnode); 322 slot = indirect_to_ptr(root->rnode);
312 323
313 height = root->height; 324 height = root->height;
314 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 325 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
@@ -324,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
324 rcu_assign_pointer(node->slots[offset], slot); 335 rcu_assign_pointer(node->slots[offset], slot);
325 node->count++; 336 node->count++;
326 } else 337 } else
327 rcu_assign_pointer(root->rnode, 338 rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
328 radix_tree_ptr_to_indirect(slot));
329 } 339 }
330 340
331 /* Go a level down */ 341 /* Go a level down */
@@ -364,7 +374,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
364 unsigned int height, shift; 374 unsigned int height, shift;
365 struct radix_tree_node *node, **slot; 375 struct radix_tree_node *node, **slot;
366 376
367 node = rcu_dereference(root->rnode); 377 node = rcu_dereference_raw(root->rnode);
368 if (node == NULL) 378 if (node == NULL)
369 return NULL; 379 return NULL;
370 380
@@ -373,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
373 return NULL; 383 return NULL;
374 return is_slot ? (void *)&root->rnode : node; 384 return is_slot ? (void *)&root->rnode : node;
375 } 385 }
376 node = radix_tree_indirect_to_ptr(node); 386 node = indirect_to_ptr(node);
377 387
378 height = node->height; 388 height = node->height;
379 if (index > radix_tree_maxindex(height)) 389 if (index > radix_tree_maxindex(height))
@@ -384,7 +394,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
384 do { 394 do {
385 slot = (struct radix_tree_node **) 395 slot = (struct radix_tree_node **)
386 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 396 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
387 node = rcu_dereference(*slot); 397 node = rcu_dereference_raw(*slot);
388 if (node == NULL) 398 if (node == NULL)
389 return NULL; 399 return NULL;
390 400
@@ -392,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
392 height--; 402 height--;
393 } while (height > 0); 403 } while (height > 0);
394 404
395 return is_slot ? (void *)slot:node; 405 return is_slot ? (void *)slot : indirect_to_ptr(node);
396} 406}
397 407
398/** 408/**
@@ -454,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
454 height = root->height; 464 height = root->height;
455 BUG_ON(index > radix_tree_maxindex(height)); 465 BUG_ON(index > radix_tree_maxindex(height));
456 466
457 slot = radix_tree_indirect_to_ptr(root->rnode); 467 slot = indirect_to_ptr(root->rnode);
458 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 468 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
459 469
460 while (height > 0) { 470 while (height > 0) {
@@ -508,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
508 518
509 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 519 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
510 pathp->node = NULL; 520 pathp->node = NULL;
511 slot = radix_tree_indirect_to_ptr(root->rnode); 521 slot = indirect_to_ptr(root->rnode);
512 522
513 while (height > 0) { 523 while (height > 0) {
514 int offset; 524 int offset;
@@ -556,6 +566,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
556 * 566 *
557 * 0: tag not present or not set 567 * 0: tag not present or not set
558 * 1: tag set 568 * 1: tag set
569 *
570 * Note that the return value of this function may not be relied on, even if
571 * the RCU lock is held, unless tag modification and node deletion are excluded
572 * from concurrency.
559 */ 573 */
560int radix_tree_tag_get(struct radix_tree_root *root, 574int radix_tree_tag_get(struct radix_tree_root *root,
561 unsigned long index, unsigned int tag) 575 unsigned long index, unsigned int tag)
@@ -568,13 +582,13 @@ int radix_tree_tag_get(struct radix_tree_root *root,
568 if (!root_tag_get(root, tag)) 582 if (!root_tag_get(root, tag))
569 return 0; 583 return 0;
570 584
571 node = rcu_dereference(root->rnode); 585 node = rcu_dereference_raw(root->rnode);
572 if (node == NULL) 586 if (node == NULL)
573 return 0; 587 return 0;
574 588
575 if (!radix_tree_is_indirect_ptr(node)) 589 if (!radix_tree_is_indirect_ptr(node))
576 return (index == 0); 590 return (index == 0);
577 node = radix_tree_indirect_to_ptr(node); 591 node = indirect_to_ptr(node);
578 592
579 height = node->height; 593 height = node->height;
580 if (index > radix_tree_maxindex(height)) 594 if (index > radix_tree_maxindex(height))
@@ -596,13 +610,9 @@ int radix_tree_tag_get(struct radix_tree_root *root,
596 */ 610 */
597 if (!tag_get(node, tag, offset)) 611 if (!tag_get(node, tag, offset))
598 saw_unset_tag = 1; 612 saw_unset_tag = 1;
599 if (height == 1) { 613 if (height == 1)
600 int ret = tag_get(node, tag, offset); 614 return !!tag_get(node, tag, offset);
601 615 node = rcu_dereference_raw(node->slots[offset]);
602 BUG_ON(ret && saw_unset_tag);
603 return !!ret;
604 }
605 node = rcu_dereference(node->slots[offset]);
606 shift -= RADIX_TREE_MAP_SHIFT; 616 shift -= RADIX_TREE_MAP_SHIFT;
607 height--; 617 height--;
608 } 618 }
@@ -610,6 +620,134 @@ int radix_tree_tag_get(struct radix_tree_root *root,
610EXPORT_SYMBOL(radix_tree_tag_get); 620EXPORT_SYMBOL(radix_tree_tag_get);
611 621
612/** 622/**
623 * radix_tree_range_tag_if_tagged - for each item in given range set given
624 * tag if item has another tag set
625 * @root: radix tree root
626 * @first_indexp: pointer to a starting index of a range to scan
627 * @last_index: last index of a range to scan
628 * @nr_to_tag: maximum number items to tag
629 * @iftag: tag index to test
630 * @settag: tag index to set if tested tag is set
631 *
632 * This function scans range of radix tree from first_index to last_index
633 * (inclusive). For each item in the range if iftag is set, the function sets
634 * also settag. The function stops either after tagging nr_to_tag items or
635 * after reaching last_index.
636 *
637 * The tags must be set from the leaf level only and propagated back up the
638 * path to the root. We must do this so that we resolve the full path before
639 * setting any tags on intermediate nodes. If we set tags as we descend, then
640 * we can get to the leaf node and find that the index that has the iftag
641 * set is outside the range we are scanning. This reults in dangling tags and
642 * can lead to problems with later tag operations (e.g. livelocks on lookups).
643 *
644 * The function returns number of leaves where the tag was set and sets
645 * *first_indexp to the first unscanned index.
646 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
647 * be prepared to handle that.
648 */
649unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
650 unsigned long *first_indexp, unsigned long last_index,
651 unsigned long nr_to_tag,
652 unsigned int iftag, unsigned int settag)
653{
654 unsigned int height = root->height;
655 struct radix_tree_path path[height];
656 struct radix_tree_path *pathp = path;
657 struct radix_tree_node *slot;
658 unsigned int shift;
659 unsigned long tagged = 0;
660 unsigned long index = *first_indexp;
661
662 last_index = min(last_index, radix_tree_maxindex(height));
663 if (index > last_index)
664 return 0;
665 if (!nr_to_tag)
666 return 0;
667 if (!root_tag_get(root, iftag)) {
668 *first_indexp = last_index + 1;
669 return 0;
670 }
671 if (height == 0) {
672 *first_indexp = last_index + 1;
673 root_tag_set(root, settag);
674 return 1;
675 }
676
677 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
678 slot = indirect_to_ptr(root->rnode);
679
680 /*
681 * we fill the path from (root->height - 2) to 0, leaving the index at
682 * (root->height - 1) as a terminator. Zero the node in the terminator
683 * so that we can use this to end walk loops back up the path.
684 */
685 path[height - 1].node = NULL;
686
687 for (;;) {
688 int offset;
689
690 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
691 if (!slot->slots[offset])
692 goto next;
693 if (!tag_get(slot, iftag, offset))
694 goto next;
695 if (height > 1) {
696 /* Go down one level */
697 height--;
698 shift -= RADIX_TREE_MAP_SHIFT;
699 path[height - 1].node = slot;
700 path[height - 1].offset = offset;
701 slot = slot->slots[offset];
702 continue;
703 }
704
705 /* tag the leaf */
706 tagged++;
707 tag_set(slot, settag, offset);
708
709 /* walk back up the path tagging interior nodes */
710 pathp = &path[0];
711 while (pathp->node) {
712 /* stop if we find a node with the tag already set */
713 if (tag_get(pathp->node, settag, pathp->offset))
714 break;
715 tag_set(pathp->node, settag, pathp->offset);
716 pathp++;
717 }
718
719next:
720 /* Go to next item at level determined by 'shift' */
721 index = ((index >> shift) + 1) << shift;
722 /* Overflow can happen when last_index is ~0UL... */
723 if (index > last_index || !index)
724 break;
725 if (tagged >= nr_to_tag)
726 break;
727 while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
728 /*
729 * We've fully scanned this node. Go up. Because
730 * last_index is guaranteed to be in the tree, what
731 * we do below cannot wander astray.
732 */
733 slot = path[height - 1].node;
734 height++;
735 shift += RADIX_TREE_MAP_SHIFT;
736 }
737 }
738 /*
739 * The iftag must have been set somewhere because otherwise
740 * we would return immediated at the beginning of the function
741 */
742 root_tag_set(root, settag);
743 *first_indexp = index;
744
745 return tagged;
746}
747EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
748
749
750/**
613 * radix_tree_next_hole - find the next hole (not-present entry) 751 * radix_tree_next_hole - find the next hole (not-present entry)
614 * @root: tree root 752 * @root: tree root
615 * @index: index key 753 * @index: index key
@@ -657,7 +795,7 @@ EXPORT_SYMBOL(radix_tree_next_hole);
657 * 795 *
658 * Returns: the index of the hole if found, otherwise returns an index 796 * Returns: the index of the hole if found, otherwise returns an index
659 * outside of the set specified (in which case 'index - return >= max_scan' 797 * outside of the set specified (in which case 'index - return >= max_scan'
660 * will be true). In rare cases of wrap-around, LONG_MAX will be returned. 798 * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
661 * 799 *
662 * radix_tree_next_hole may be called under rcu_read_lock. However, like 800 * radix_tree_next_hole may be called under rcu_read_lock. However, like
663 * radix_tree_gang_lookup, this will not atomically search a snapshot of 801 * radix_tree_gang_lookup, this will not atomically search a snapshot of
@@ -675,7 +813,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
675 if (!radix_tree_lookup(root, index)) 813 if (!radix_tree_lookup(root, index))
676 break; 814 break;
677 index--; 815 index--;
678 if (index == LONG_MAX) 816 if (index == ULONG_MAX)
679 break; 817 break;
680 } 818 }
681 819
@@ -711,7 +849,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
711 } 849 }
712 850
713 shift -= RADIX_TREE_MAP_SHIFT; 851 shift -= RADIX_TREE_MAP_SHIFT;
714 slot = rcu_dereference(slot->slots[i]); 852 slot = rcu_dereference_raw(slot->slots[i]);
715 if (slot == NULL) 853 if (slot == NULL)
716 goto out; 854 goto out;
717 } 855 }
@@ -758,7 +896,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
758 unsigned long cur_index = first_index; 896 unsigned long cur_index = first_index;
759 unsigned int ret; 897 unsigned int ret;
760 898
761 node = rcu_dereference(root->rnode); 899 node = rcu_dereference_raw(root->rnode);
762 if (!node) 900 if (!node)
763 return 0; 901 return 0;
764 902
@@ -768,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
768 results[0] = node; 906 results[0] = node;
769 return 1; 907 return 1;
770 } 908 }
771 node = radix_tree_indirect_to_ptr(node); 909 node = indirect_to_ptr(node);
772 910
773 max_index = radix_tree_maxindex(node->height); 911 max_index = radix_tree_maxindex(node->height);
774 912
@@ -787,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
787 slot = *(((void ***)results)[ret + i]); 925 slot = *(((void ***)results)[ret + i]);
788 if (!slot) 926 if (!slot)
789 continue; 927 continue;
790 results[ret + nr_found] = rcu_dereference(slot); 928 results[ret + nr_found] =
929 indirect_to_ptr(rcu_dereference_raw(slot));
791 nr_found++; 930 nr_found++;
792 } 931 }
793 ret += nr_found; 932 ret += nr_found;
@@ -826,7 +965,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
826 unsigned long cur_index = first_index; 965 unsigned long cur_index = first_index;
827 unsigned int ret; 966 unsigned int ret;
828 967
829 node = rcu_dereference(root->rnode); 968 node = rcu_dereference_raw(root->rnode);
830 if (!node) 969 if (!node)
831 return 0; 970 return 0;
832 971
@@ -836,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
836 results[0] = (void **)&root->rnode; 975 results[0] = (void **)&root->rnode;
837 return 1; 976 return 1;
838 } 977 }
839 node = radix_tree_indirect_to_ptr(node); 978 node = indirect_to_ptr(node);
840 979
841 max_index = radix_tree_maxindex(node->height); 980 max_index = radix_tree_maxindex(node->height);
842 981
@@ -915,7 +1054,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
915 } 1054 }
916 } 1055 }
917 shift -= RADIX_TREE_MAP_SHIFT; 1056 shift -= RADIX_TREE_MAP_SHIFT;
918 slot = rcu_dereference(slot->slots[i]); 1057 slot = rcu_dereference_raw(slot->slots[i]);
919 if (slot == NULL) 1058 if (slot == NULL)
920 break; 1059 break;
921 } 1060 }
@@ -951,7 +1090,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
951 if (!root_tag_get(root, tag)) 1090 if (!root_tag_get(root, tag))
952 return 0; 1091 return 0;
953 1092
954 node = rcu_dereference(root->rnode); 1093 node = rcu_dereference_raw(root->rnode);
955 if (!node) 1094 if (!node)
956 return 0; 1095 return 0;
957 1096
@@ -961,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
961 results[0] = node; 1100 results[0] = node;
962 return 1; 1101 return 1;
963 } 1102 }
964 node = radix_tree_indirect_to_ptr(node); 1103 node = indirect_to_ptr(node);
965 1104
966 max_index = radix_tree_maxindex(node->height); 1105 max_index = radix_tree_maxindex(node->height);
967 1106
@@ -980,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
980 slot = *(((void ***)results)[ret + i]); 1119 slot = *(((void ***)results)[ret + i]);
981 if (!slot) 1120 if (!slot)
982 continue; 1121 continue;
983 results[ret + nr_found] = rcu_dereference(slot); 1122 results[ret + nr_found] =
1123 indirect_to_ptr(rcu_dereference_raw(slot));
984 nr_found++; 1124 nr_found++;
985 } 1125 }
986 ret += nr_found; 1126 ret += nr_found;
@@ -1020,7 +1160,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1020 if (!root_tag_get(root, tag)) 1160 if (!root_tag_get(root, tag))
1021 return 0; 1161 return 0;
1022 1162
1023 node = rcu_dereference(root->rnode); 1163 node = rcu_dereference_raw(root->rnode);
1024 if (!node) 1164 if (!node)
1025 return 0; 1165 return 0;
1026 1166
@@ -1030,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1030 results[0] = (void **)&root->rnode; 1170 results[0] = (void **)&root->rnode;
1031 return 1; 1171 return 1;
1032 } 1172 }
1033 node = radix_tree_indirect_to_ptr(node); 1173 node = indirect_to_ptr(node);
1034 1174
1035 max_index = radix_tree_maxindex(node->height); 1175 max_index = radix_tree_maxindex(node->height);
1036 1176
@@ -1066,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1066 void *newptr; 1206 void *newptr;
1067 1207
1068 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1208 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1069 to_free = radix_tree_indirect_to_ptr(to_free); 1209 to_free = indirect_to_ptr(to_free);
1070 1210
1071 /* 1211 /*
1072 * The candidate node has more than one child, or its child 1212 * The candidate node has more than one child, or its child
@@ -1079,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1079 1219
1080 /* 1220 /*
1081 * We don't need rcu_assign_pointer(), since we are simply 1221 * We don't need rcu_assign_pointer(), since we are simply
1082 * moving the node from one part of the tree to another. If 1222 * moving the node from one part of the tree to another: if it
1083 * it was safe to dereference the old pointer to it 1223 * was safe to dereference the old pointer to it
1084 * (to_free->slots[0]), it will be safe to dereference the new 1224 * (to_free->slots[0]), it will be safe to dereference the new
1085 * one (root->rnode). 1225 * one (root->rnode) as far as dependent read barriers go.
1086 */ 1226 */
1087 newptr = to_free->slots[0]; 1227 newptr = to_free->slots[0];
1088 if (root->height > 1) 1228 if (root->height > 1)
1089 newptr = radix_tree_ptr_to_indirect(newptr); 1229 newptr = ptr_to_indirect(newptr);
1090 root->rnode = newptr; 1230 root->rnode = newptr;
1091 root->height--; 1231 root->height--;
1232
1233 /*
1234 * We have a dilemma here. The node's slot[0] must not be
1235 * NULLed in case there are concurrent lookups expecting to
1236 * find the item. However if this was a bottom-level node,
1237 * then it may be subject to the slot pointer being visible
1238 * to callers dereferencing it. If item corresponding to
1239 * slot[0] is subsequently deleted, these callers would expect
1240 * their slot to become empty sooner or later.
1241 *
1242 * For example, lockless pagecache will look up a slot, deref
1243 * the page pointer, and if the page is 0 refcount it means it
1244 * was concurrently deleted from pagecache so try the deref
1245 * again. Fortunately there is already a requirement for logic
1246 * to retry the entire slot lookup -- the indirect pointer
1247 * problem (replacing direct root node with an indirect pointer
1248 * also results in a stale slot). So tag the slot as indirect
1249 * to force callers to retry.
1250 */
1251 if (root->height == 0)
1252 *((unsigned long *)&to_free->slots[0]) |=
1253 RADIX_TREE_INDIRECT_PTR;
1254
1092 radix_tree_node_free(to_free); 1255 radix_tree_node_free(to_free);
1093 } 1256 }
1094} 1257}
@@ -1125,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1125 root->rnode = NULL; 1288 root->rnode = NULL;
1126 goto out; 1289 goto out;
1127 } 1290 }
1128 slot = radix_tree_indirect_to_ptr(slot); 1291 slot = indirect_to_ptr(slot);
1129 1292
1130 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1293 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1131 pathp->node = NULL; 1294 pathp->node = NULL;
@@ -1167,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1167 radix_tree_node_free(to_free); 1330 radix_tree_node_free(to_free);
1168 1331
1169 if (pathp->node->count) { 1332 if (pathp->node->count) {
1170 if (pathp->node == 1333 if (pathp->node == indirect_to_ptr(root->rnode))
1171 radix_tree_indirect_to_ptr(root->rnode))
1172 radix_tree_shrink(root); 1334 radix_tree_shrink(root);
1173 goto out; 1335 goto out;
1174 } 1336 }
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
new file mode 100644
index 000000000000..162becacf97c
--- /dev/null
+++ b/lib/raid6/.gitignore
@@ -0,0 +1,4 @@
1mktables
2altivec*.c
3int*.c
4tables.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
new file mode 100644
index 000000000000..8a38102770f3
--- /dev/null
+++ b/lib/raid6/Makefile
@@ -0,0 +1,75 @@
1obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
2
3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
4 int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
5 altivec8.o mmx.o sse1.o sse2.o
6hostprogs-y += mktables
7
8quiet_cmd_unroll = UNROLL $@
9 cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \
10 < $< > $@ || ( rm -f $@ && exit 1 )
11
12ifeq ($(CONFIG_ALTIVEC),y)
13altivec_flags := -maltivec -mabi=altivec
14endif
15
16targets += int1.c
17$(obj)/int1.c: UNROLL := 1
18$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE
19 $(call if_changed,unroll)
20
21targets += int2.c
22$(obj)/int2.c: UNROLL := 2
23$(obj)/int2.c: $(src)/int.uc $(src)/unroll.awk FORCE
24 $(call if_changed,unroll)
25
26targets += int4.c
27$(obj)/int4.c: UNROLL := 4
28$(obj)/int4.c: $(src)/int.uc $(src)/unroll.awk FORCE
29 $(call if_changed,unroll)
30
31targets += int8.c
32$(obj)/int8.c: UNROLL := 8
33$(obj)/int8.c: $(src)/int.uc $(src)/unroll.awk FORCE
34 $(call if_changed,unroll)
35
36targets += int16.c
37$(obj)/int16.c: UNROLL := 16
38$(obj)/int16.c: $(src)/int.uc $(src)/unroll.awk FORCE
39 $(call if_changed,unroll)
40
41targets += int32.c
42$(obj)/int32.c: UNROLL := 32
43$(obj)/int32.c: $(src)/int.uc $(src)/unroll.awk FORCE
44 $(call if_changed,unroll)
45
46CFLAGS_altivec1.o += $(altivec_flags)
47targets += altivec1.c
48$(obj)/altivec1.c: UNROLL := 1
49$(obj)/altivec1.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
50 $(call if_changed,unroll)
51
52CFLAGS_altivec2.o += $(altivec_flags)
53targets += altivec2.c
54$(obj)/altivec2.c: UNROLL := 2
55$(obj)/altivec2.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
56 $(call if_changed,unroll)
57
58CFLAGS_altivec4.o += $(altivec_flags)
59targets += altivec4.c
60$(obj)/altivec4.c: UNROLL := 4
61$(obj)/altivec4.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
62 $(call if_changed,unroll)
63
64CFLAGS_altivec8.o += $(altivec_flags)
65targets += altivec8.c
66$(obj)/altivec8.c: UNROLL := 8
67$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
68 $(call if_changed,unroll)
69
70quiet_cmd_mktable = TABLE $@
71 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
72
73targets += tables.c
74$(obj)/tables.c: $(obj)/mktables FORCE
75 $(call if_changed,mktable)
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
new file mode 100644
index 000000000000..b595f560bee7
--- /dev/null
+++ b/lib/raid6/algos.c
@@ -0,0 +1,154 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/algos.c
15 *
16 * Algorithm list and algorithm selection for RAID-6
17 */
18
19#include <linux/raid/pq.h>
20#ifndef __KERNEL__
21#include <sys/mman.h>
22#include <stdio.h>
23#else
24#include <linux/gfp.h>
25#if !RAID6_USE_EMPTY_ZERO_PAGE
26/* In .bss so it's zeroed */
27const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
28EXPORT_SYMBOL(raid6_empty_zero_page);
29#endif
30#endif
31
32struct raid6_calls raid6_call;
33EXPORT_SYMBOL_GPL(raid6_call);
34
35const struct raid6_calls * const raid6_algos[] = {
36 &raid6_intx1,
37 &raid6_intx2,
38 &raid6_intx4,
39 &raid6_intx8,
40#if defined(__ia64__)
41 &raid6_intx16,
42 &raid6_intx32,
43#endif
44#if defined(__i386__) && !defined(__arch_um__)
45 &raid6_mmxx1,
46 &raid6_mmxx2,
47 &raid6_sse1x1,
48 &raid6_sse1x2,
49 &raid6_sse2x1,
50 &raid6_sse2x2,
51#endif
52#if defined(__x86_64__) && !defined(__arch_um__)
53 &raid6_sse2x1,
54 &raid6_sse2x2,
55 &raid6_sse2x4,
56#endif
57#ifdef CONFIG_ALTIVEC
58 &raid6_altivec1,
59 &raid6_altivec2,
60 &raid6_altivec4,
61 &raid6_altivec8,
62#endif
63 NULL
64};
65
66#ifdef __KERNEL__
67#define RAID6_TIME_JIFFIES_LG2 4
68#else
69/* Need more time to be stable in userspace */
70#define RAID6_TIME_JIFFIES_LG2 9
71#define time_before(x, y) ((x) < (y))
72#endif
73
74/* Try to pick the best algorithm */
75/* This code uses the gfmul table as convenient data set to abuse */
76
77int __init raid6_select_algo(void)
78{
79 const struct raid6_calls * const * algo;
80 const struct raid6_calls * best;
81 char *syndromes;
82 void *dptrs[(65536/PAGE_SIZE)+2];
83 int i, disks;
84 unsigned long perf, bestperf;
85 int bestprefer;
86 unsigned long j0, j1;
87
88 disks = (65536/PAGE_SIZE)+2;
89 for ( i = 0 ; i < disks-2 ; i++ ) {
90 dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
91 }
92
93 /* Normal code - use a 2-page allocation to avoid D$ conflict */
94 syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
95
96 if ( !syndromes ) {
97 printk("raid6: Yikes! No memory available.\n");
98 return -ENOMEM;
99 }
100
101 dptrs[disks-2] = syndromes;
102 dptrs[disks-1] = syndromes + PAGE_SIZE;
103
104 bestperf = 0; bestprefer = 0; best = NULL;
105
106 for ( algo = raid6_algos ; *algo ; algo++ ) {
107 if ( !(*algo)->valid || (*algo)->valid() ) {
108 perf = 0;
109
110 preempt_disable();
111 j0 = jiffies;
112 while ( (j1 = jiffies) == j0 )
113 cpu_relax();
114 while (time_before(jiffies,
115 j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
116 (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
117 perf++;
118 }
119 preempt_enable();
120
121 if ( (*algo)->prefer > bestprefer ||
122 ((*algo)->prefer == bestprefer &&
123 perf > bestperf) ) {
124 best = *algo;
125 bestprefer = best->prefer;
126 bestperf = perf;
127 }
128 printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
129 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
130 }
131 }
132
133 if (best) {
134 printk("raid6: using algorithm %s (%ld MB/s)\n",
135 best->name,
136 (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
137 raid6_call = *best;
138 } else
139 printk("raid6: Yikes! No algorithm found!\n");
140
141 free_pages((unsigned long)syndromes, 1);
142
143 return best ? 0 : -EINVAL;
144}
145
146static void raid6_exit(void)
147{
148 do { } while (0);
149}
150
151subsys_initcall(raid6_select_algo);
152module_exit(raid6_exit);
153MODULE_LICENSE("GPL");
154MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
new file mode 100644
index 000000000000..2654d5c854be
--- /dev/null
+++ b/lib/raid6/altivec.uc
@@ -0,0 +1,130 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6altivec$#.c
15 *
16 * $#-way unrolled portable integer math RAID-6 instruction set
17 *
18 * This file is postprocessed using unroll.awk
19 *
20 * <benh> hpa: in process,
21 * you can just "steal" the vec unit with enable_kernel_altivec() (but
22 * bracked this with preempt_disable/enable or in a lock)
23 */
24
25#include <linux/raid/pq.h>
26
27#ifdef CONFIG_ALTIVEC
28
29#include <altivec.h>
30#ifdef __KERNEL__
31# include <asm/system.h>
32# include <asm/cputable.h>
33#endif
34
35/*
36 * This is the C data type to use. We use a vector of
37 * signed char so vec_cmpgt() will generate the right
38 * instruction.
39 */
40
41typedef vector signed char unative_t;
42
43#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
44#define NSIZE sizeof(unative_t)
45
46/*
47 * The SHLBYTE() operation shifts each byte left by 1, *not*
48 * rolling over into the next byte
49 */
50static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
51{
52 return vec_add(v,v);
53}
54
55/*
56 * The MASK() operation returns 0xFF in any byte for which the high
57 * bit is 1, 0x00 for any byte for which the high bit is 0.
58 */
59static inline __attribute_const__ unative_t MASK(unative_t v)
60{
61 unative_t zv = NBYTES(0);
62
63 /* vec_cmpgt returns a vector bool char; thus the need for the cast */
64 return (unative_t)vec_cmpgt(zv, v);
65}
66
67
68/* This is noinline to make damned sure that gcc doesn't move any of the
69 Altivec code around the enable/disable code */
70static void noinline
71raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
72{
73 u8 **dptr = (u8 **)ptrs;
74 u8 *p, *q;
75 int d, z, z0;
76
77 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
78 unative_t x1d = NBYTES(0x1d);
79
80 z0 = disks - 3; /* Highest data disk */
81 p = dptr[z0+1]; /* XOR parity */
82 q = dptr[z0+2]; /* RS syndrome */
83
84 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
85 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
86 for ( z = z0-1 ; z >= 0 ; z-- ) {
87 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
88 wp$$ = vec_xor(wp$$, wd$$);
89 w2$$ = MASK(wq$$);
90 w1$$ = SHLBYTE(wq$$);
91 w2$$ = vec_and(w2$$, x1d);
92 w1$$ = vec_xor(w1$$, w2$$);
93 wq$$ = vec_xor(w1$$, wd$$);
94 }
95 *(unative_t *)&p[d+NSIZE*$$] = wp$$;
96 *(unative_t *)&q[d+NSIZE*$$] = wq$$;
97 }
98}
99
100static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
101{
102 preempt_disable();
103 enable_kernel_altivec();
104
105 raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
106
107 preempt_enable();
108}
109
110int raid6_have_altivec(void);
111#if $# == 1
112int raid6_have_altivec(void)
113{
114 /* This assumes either all CPUs have Altivec or none does */
115# ifdef __KERNEL__
116 return cpu_has_feature(CPU_FTR_ALTIVEC);
117# else
118 return 1;
119# endif
120}
121#endif
122
123const struct raid6_calls raid6_altivec$# = {
124 raid6_altivec$#_gen_syndrome,
125 raid6_have_altivec,
126 "altivecx$#",
127 0
128};
129
130#endif /* CONFIG_ALTIVEC */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
new file mode 100644
index 000000000000..d1e276a14fab
--- /dev/null
+++ b/lib/raid6/int.uc
@@ -0,0 +1,117 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6int$#.c
15 *
16 * $#-way unrolled portable integer math RAID-6 instruction set
17 *
18 * This file is postprocessed using unroll.awk
19 */
20
21#include <linux/raid/pq.h>
22
23/*
24 * This is the C data type to use
25 */
26
27/* Change this from BITS_PER_LONG if there is something better... */
28#if BITS_PER_LONG == 64
29# define NBYTES(x) ((x) * 0x0101010101010101UL)
30# define NSIZE 8
31# define NSHIFT 3
32# define NSTRING "64"
33typedef u64 unative_t;
34#else
35# define NBYTES(x) ((x) * 0x01010101U)
36# define NSIZE 4
37# define NSHIFT 2
38# define NSTRING "32"
39typedef u32 unative_t;
40#endif
41
42
43
44/*
45 * IA-64 wants insane amounts of unrolling. On other architectures that
46 * is just a waste of space.
47 */
48#if ($# <= 8) || defined(__ia64__)
49
50
51/*
52 * These sub-operations are separate inlines since they can sometimes be
53 * specially optimized using architecture-specific hacks.
54 */
55
56/*
57 * The SHLBYTE() operation shifts each byte left by 1, *not*
58 * rolling over into the next byte
59 */
60static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
61{
62 unative_t vv;
63
64 vv = (v << 1) & NBYTES(0xfe);
65 return vv;
66}
67
68/*
69 * The MASK() operation returns 0xFF in any byte for which the high
70 * bit is 1, 0x00 for any byte for which the high bit is 0.
71 */
72static inline __attribute_const__ unative_t MASK(unative_t v)
73{
74 unative_t vv;
75
76 vv = v & NBYTES(0x80);
77 vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
78 return vv;
79}
80
81
82static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
83{
84 u8 **dptr = (u8 **)ptrs;
85 u8 *p, *q;
86 int d, z, z0;
87
88 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
89
90 z0 = disks - 3; /* Highest data disk */
91 p = dptr[z0+1]; /* XOR parity */
92 q = dptr[z0+2]; /* RS syndrome */
93
94 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
95 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
96 for ( z = z0-1 ; z >= 0 ; z-- ) {
97 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
98 wp$$ ^= wd$$;
99 w2$$ = MASK(wq$$);
100 w1$$ = SHLBYTE(wq$$);
101 w2$$ &= NBYTES(0x1d);
102 w1$$ ^= w2$$;
103 wq$$ = w1$$ ^ wd$$;
104 }
105 *(unative_t *)&p[d+NSIZE*$$] = wp$$;
106 *(unative_t *)&q[d+NSIZE*$$] = wq$$;
107 }
108}
109
110const struct raid6_calls raid6_intx$# = {
111 raid6_int$#_gen_syndrome,
112 NULL, /* always valid */
113 "int" NSTRING "x$#",
114 0
115};
116
117#endif
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
new file mode 100644
index 000000000000..3b1500843bba
--- /dev/null
+++ b/lib/raid6/mktables.c
@@ -0,0 +1,132 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * mktables.c
13 *
14 * Make RAID-6 tables. This is a host user space program to be run at
15 * compile time.
16 */
17
18#include <stdio.h>
19#include <string.h>
20#include <inttypes.h>
21#include <stdlib.h>
22#include <time.h>
23
24static uint8_t gfmul(uint8_t a, uint8_t b)
25{
26 uint8_t v = 0;
27
28 while (b) {
29 if (b & 1)
30 v ^= a;
31 a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
32 b >>= 1;
33 }
34
35 return v;
36}
37
38static uint8_t gfpow(uint8_t a, int b)
39{
40 uint8_t v = 1;
41
42 b %= 255;
43 if (b < 0)
44 b += 255;
45
46 while (b) {
47 if (b & 1)
48 v = gfmul(v, a);
49 a = gfmul(a, a);
50 b >>= 1;
51 }
52
53 return v;
54}
55
56int main(int argc, char *argv[])
57{
58 int i, j, k;
59 uint8_t v;
60 uint8_t exptbl[256], invtbl[256];
61
62 printf("#include <linux/raid/pq.h>\n");
63
64 /* Compute multiplication table */
65 printf("\nconst u8 __attribute__((aligned(256)))\n"
66 "raid6_gfmul[256][256] =\n"
67 "{\n");
68 for (i = 0; i < 256; i++) {
69 printf("\t{\n");
70 for (j = 0; j < 256; j += 8) {
71 printf("\t\t");
72 for (k = 0; k < 8; k++)
73 printf("0x%02x,%c", gfmul(i, j + k),
74 (k == 7) ? '\n' : ' ');
75 }
76 printf("\t},\n");
77 }
78 printf("};\n");
79 printf("#ifdef __KERNEL__\n");
80 printf("EXPORT_SYMBOL(raid6_gfmul);\n");
81 printf("#endif\n");
82
83 /* Compute power-of-2 table (exponent) */
84 v = 1;
85 printf("\nconst u8 __attribute__((aligned(256)))\n"
86 "raid6_gfexp[256] =\n" "{\n");
87 for (i = 0; i < 256; i += 8) {
88 printf("\t");
89 for (j = 0; j < 8; j++) {
90 exptbl[i + j] = v;
91 printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
92 v = gfmul(v, 2);
93 if (v == 1)
94 v = 0; /* For entry 255, not a real entry */
95 }
96 }
97 printf("};\n");
98 printf("#ifdef __KERNEL__\n");
99 printf("EXPORT_SYMBOL(raid6_gfexp);\n");
100 printf("#endif\n");
101
102 /* Compute inverse table x^-1 == x^254 */
103 printf("\nconst u8 __attribute__((aligned(256)))\n"
104 "raid6_gfinv[256] =\n" "{\n");
105 for (i = 0; i < 256; i += 8) {
106 printf("\t");
107 for (j = 0; j < 8; j++) {
108 invtbl[i + j] = v = gfpow(i + j, 254);
109 printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
110 }
111 }
112 printf("};\n");
113 printf("#ifdef __KERNEL__\n");
114 printf("EXPORT_SYMBOL(raid6_gfinv);\n");
115 printf("#endif\n");
116
117 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
118 printf("\nconst u8 __attribute__((aligned(256)))\n"
119 "raid6_gfexi[256] =\n" "{\n");
120 for (i = 0; i < 256; i += 8) {
121 printf("\t");
122 for (j = 0; j < 8; j++)
123 printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
124 (j == 7) ? '\n' : ' ');
125 }
126 printf("};\n");
127 printf("#ifdef __KERNEL__\n");
128 printf("EXPORT_SYMBOL(raid6_gfexi);\n");
129 printf("#endif\n");
130
131 return 0;
132}
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
new file mode 100644
index 000000000000..279347f23094
--- /dev/null
+++ b/lib/raid6/mmx.c
@@ -0,0 +1,142 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/mmx.c
15 *
16 * MMX implementation of RAID-6 syndrome functions
17 */
18
19#if defined(__i386__) && !defined(__arch_um__)
20
21#include <linux/raid/pq.h>
22#include "x86.h"
23
24/* Shared with raid6/sse1.c */
25const struct raid6_mmx_constants {
26 u64 x1d;
27} raid6_mmx_constants = {
28 0x1d1d1d1d1d1d1d1dULL,
29};
30
31static int raid6_have_mmx(void)
32{
33 /* Not really "boot_cpu" but "all_cpus" */
34 return boot_cpu_has(X86_FEATURE_MMX);
35}
36
37/*
38 * Plain MMX implementation
39 */
40static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
41{
42 u8 **dptr = (u8 **)ptrs;
43 u8 *p, *q;
44 int d, z, z0;
45
46 z0 = disks - 3; /* Highest data disk */
47 p = dptr[z0+1]; /* XOR parity */
48 q = dptr[z0+2]; /* RS syndrome */
49
50 kernel_fpu_begin();
51
52 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
53 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
54
55 for ( d = 0 ; d < bytes ; d += 8 ) {
56 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
57 asm volatile("movq %mm2,%mm4"); /* Q[0] */
58 for ( z = z0-1 ; z >= 0 ; z-- ) {
59 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
60 asm volatile("pcmpgtb %mm4,%mm5");
61 asm volatile("paddb %mm4,%mm4");
62 asm volatile("pand %mm0,%mm5");
63 asm volatile("pxor %mm5,%mm4");
64 asm volatile("pxor %mm5,%mm5");
65 asm volatile("pxor %mm6,%mm2");
66 asm volatile("pxor %mm6,%mm4");
67 }
68 asm volatile("movq %%mm2,%0" : "=m" (p[d]));
69 asm volatile("pxor %mm2,%mm2");
70 asm volatile("movq %%mm4,%0" : "=m" (q[d]));
71 asm volatile("pxor %mm4,%mm4");
72 }
73
74 kernel_fpu_end();
75}
76
77const struct raid6_calls raid6_mmxx1 = {
78 raid6_mmx1_gen_syndrome,
79 raid6_have_mmx,
80 "mmxx1",
81 0
82};
83
84/*
85 * Unrolled-by-2 MMX implementation
86 */
87static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
88{
89 u8 **dptr = (u8 **)ptrs;
90 u8 *p, *q;
91 int d, z, z0;
92
93 z0 = disks - 3; /* Highest data disk */
94 p = dptr[z0+1]; /* XOR parity */
95 q = dptr[z0+2]; /* RS syndrome */
96
97 kernel_fpu_begin();
98
99 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
100 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
101 asm volatile("pxor %mm7,%mm7"); /* Zero temp */
102
103 for ( d = 0 ; d < bytes ; d += 16 ) {
104 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
105 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
106 asm volatile("movq %mm2,%mm4"); /* Q[0] */
107 asm volatile("movq %mm3,%mm6"); /* Q[1] */
108 for ( z = z0-1 ; z >= 0 ; z-- ) {
109 asm volatile("pcmpgtb %mm4,%mm5");
110 asm volatile("pcmpgtb %mm6,%mm7");
111 asm volatile("paddb %mm4,%mm4");
112 asm volatile("paddb %mm6,%mm6");
113 asm volatile("pand %mm0,%mm5");
114 asm volatile("pand %mm0,%mm7");
115 asm volatile("pxor %mm5,%mm4");
116 asm volatile("pxor %mm7,%mm6");
117 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
118 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
119 asm volatile("pxor %mm5,%mm2");
120 asm volatile("pxor %mm7,%mm3");
121 asm volatile("pxor %mm5,%mm4");
122 asm volatile("pxor %mm7,%mm6");
123 asm volatile("pxor %mm5,%mm5");
124 asm volatile("pxor %mm7,%mm7");
125 }
126 asm volatile("movq %%mm2,%0" : "=m" (p[d]));
127 asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
128 asm volatile("movq %%mm4,%0" : "=m" (q[d]));
129 asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
130 }
131
132 kernel_fpu_end();
133}
134
135const struct raid6_calls raid6_mmxx2 = {
136 raid6_mmx2_gen_syndrome,
137 raid6_have_mmx,
138 "mmxx2",
139 0
140};
141
142#endif
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
new file mode 100644
index 000000000000..8590d19cf522
--- /dev/null
+++ b/lib/raid6/recov.c
@@ -0,0 +1,132 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/recov.c
15 *
16 * RAID-6 data recovery in dual failure mode. In single failure mode,
17 * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
18 * the syndrome.)
19 */
20
21#include <linux/raid/pq.h>
22
23/* Recover two failed data blocks. */
24void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
25 void **ptrs)
26{
27 u8 *p, *q, *dp, *dq;
28 u8 px, qx, db;
29 const u8 *pbmul; /* P multiplier table for B data */
30 const u8 *qmul; /* Q multiplier table (for both) */
31
32 p = (u8 *)ptrs[disks-2];
33 q = (u8 *)ptrs[disks-1];
34
35 /* Compute syndrome with zero for the missing data pages
36 Use the dead data pages as temporary storage for
37 delta p and delta q */
38 dp = (u8 *)ptrs[faila];
39 ptrs[faila] = (void *)raid6_empty_zero_page;
40 ptrs[disks-2] = dp;
41 dq = (u8 *)ptrs[failb];
42 ptrs[failb] = (void *)raid6_empty_zero_page;
43 ptrs[disks-1] = dq;
44
45 raid6_call.gen_syndrome(disks, bytes, ptrs);
46
47 /* Restore pointer table */
48 ptrs[faila] = dp;
49 ptrs[failb] = dq;
50 ptrs[disks-2] = p;
51 ptrs[disks-1] = q;
52
53 /* Now, pick the proper data tables */
54 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
55 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
56
57 /* Now do it... */
58 while ( bytes-- ) {
59 px = *p ^ *dp;
60 qx = qmul[*q ^ *dq];
61 *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
62 *dp++ = db ^ px; /* Reconstructed A */
63 p++; q++;
64 }
65}
66EXPORT_SYMBOL_GPL(raid6_2data_recov);
67
68/* Recover failure of one data block plus the P block */
69void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
70{
71 u8 *p, *q, *dq;
72 const u8 *qmul; /* Q multiplier table */
73
74 p = (u8 *)ptrs[disks-2];
75 q = (u8 *)ptrs[disks-1];
76
77 /* Compute syndrome with zero for the missing data page
78 Use the dead data page as temporary storage for delta q */
79 dq = (u8 *)ptrs[faila];
80 ptrs[faila] = (void *)raid6_empty_zero_page;
81 ptrs[disks-1] = dq;
82
83 raid6_call.gen_syndrome(disks, bytes, ptrs);
84
85 /* Restore pointer table */
86 ptrs[faila] = dq;
87 ptrs[disks-1] = q;
88
89 /* Now, pick the proper data tables */
90 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
91
92 /* Now do it... */
93 while ( bytes-- ) {
94 *p++ ^= *dq = qmul[*q ^ *dq];
95 q++; dq++;
96 }
97}
98EXPORT_SYMBOL_GPL(raid6_datap_recov);
99
100#ifndef __KERNEL__
101/* Testing only */
102
103/* Recover two failed blocks. */
104void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
105{
106 if ( faila > failb ) {
107 int tmp = faila;
108 faila = failb;
109 failb = tmp;
110 }
111
112 if ( failb == disks-1 ) {
113 if ( faila == disks-2 ) {
114 /* P+Q failure. Just rebuild the syndrome. */
115 raid6_call.gen_syndrome(disks, bytes, ptrs);
116 } else {
117 /* data+Q failure. Reconstruct data from P,
118 then rebuild syndrome. */
119 /* NOT IMPLEMENTED - equivalent to RAID-5 */
120 }
121 } else {
122 if ( failb == disks-2 ) {
123 /* data+P failure. */
124 raid6_datap_recov(disks, bytes, faila, ptrs);
125 } else {
126 /* data+data failure. */
127 raid6_2data_recov(disks, bytes, faila, failb, ptrs);
128 }
129 }
130}
131
132#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
new file mode 100644
index 000000000000..10dd91948c07
--- /dev/null
+++ b/lib/raid6/sse1.c
@@ -0,0 +1,162 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/sse1.c
15 *
16 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
17 *
18 * This is really an MMX implementation, but it requires SSE-1 or
19 * AMD MMXEXT for prefetch support and a few other features. The
20 * support for nontemporal memory accesses is enough to make this
21 * worthwhile as a separate implementation.
22 */
23
24#if defined(__i386__) && !defined(__arch_um__)
25
26#include <linux/raid/pq.h>
27#include "x86.h"
28
29/* Defined in raid6/mmx.c */
30extern const struct raid6_mmx_constants {
31 u64 x1d;
32} raid6_mmx_constants;
33
34static int raid6_have_sse1_or_mmxext(void)
35{
36 /* Not really boot_cpu but "all_cpus" */
37 return boot_cpu_has(X86_FEATURE_MMX) &&
38 (boot_cpu_has(X86_FEATURE_XMM) ||
39 boot_cpu_has(X86_FEATURE_MMXEXT));
40}
41
42/*
43 * Plain SSE1 implementation
44 */
45static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
46{
47 u8 **dptr = (u8 **)ptrs;
48 u8 *p, *q;
49 int d, z, z0;
50
51 z0 = disks - 3; /* Highest data disk */
52 p = dptr[z0+1]; /* XOR parity */
53 q = dptr[z0+2]; /* RS syndrome */
54
55 kernel_fpu_begin();
56
57 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
58 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
59
60 for ( d = 0 ; d < bytes ; d += 8 ) {
61 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
62 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
63 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
64 asm volatile("movq %mm2,%mm4"); /* Q[0] */
65 asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
66 for ( z = z0-2 ; z >= 0 ; z-- ) {
67 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
68 asm volatile("pcmpgtb %mm4,%mm5");
69 asm volatile("paddb %mm4,%mm4");
70 asm volatile("pand %mm0,%mm5");
71 asm volatile("pxor %mm5,%mm4");
72 asm volatile("pxor %mm5,%mm5");
73 asm volatile("pxor %mm6,%mm2");
74 asm volatile("pxor %mm6,%mm4");
75 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
76 }
77 asm volatile("pcmpgtb %mm4,%mm5");
78 asm volatile("paddb %mm4,%mm4");
79 asm volatile("pand %mm0,%mm5");
80 asm volatile("pxor %mm5,%mm4");
81 asm volatile("pxor %mm5,%mm5");
82 asm volatile("pxor %mm6,%mm2");
83 asm volatile("pxor %mm6,%mm4");
84
85 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
86 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
87 }
88
89 asm volatile("sfence" : : : "memory");
90 kernel_fpu_end();
91}
92
93const struct raid6_calls raid6_sse1x1 = {
94 raid6_sse11_gen_syndrome,
95 raid6_have_sse1_or_mmxext,
96 "sse1x1",
97 1 /* Has cache hints */
98};
99
100/*
101 * Unrolled-by-2 SSE1 implementation
102 */
103static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
104{
105 u8 **dptr = (u8 **)ptrs;
106 u8 *p, *q;
107 int d, z, z0;
108
109 z0 = disks - 3; /* Highest data disk */
110 p = dptr[z0+1]; /* XOR parity */
111 q = dptr[z0+2]; /* RS syndrome */
112
113 kernel_fpu_begin();
114
115 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
116 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
117 asm volatile("pxor %mm7,%mm7"); /* Zero temp */
118
119 /* We uniformly assume a single prefetch covers at least 16 bytes */
120 for ( d = 0 ; d < bytes ; d += 16 ) {
121 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
123 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
124 asm volatile("movq %mm2,%mm4"); /* Q[0] */
125 asm volatile("movq %mm3,%mm6"); /* Q[1] */
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128 asm volatile("pcmpgtb %mm4,%mm5");
129 asm volatile("pcmpgtb %mm6,%mm7");
130 asm volatile("paddb %mm4,%mm4");
131 asm volatile("paddb %mm6,%mm6");
132 asm volatile("pand %mm0,%mm5");
133 asm volatile("pand %mm0,%mm7");
134 asm volatile("pxor %mm5,%mm4");
135 asm volatile("pxor %mm7,%mm6");
136 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
137 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
138 asm volatile("pxor %mm5,%mm2");
139 asm volatile("pxor %mm7,%mm3");
140 asm volatile("pxor %mm5,%mm4");
141 asm volatile("pxor %mm7,%mm6");
142 asm volatile("pxor %mm5,%mm5");
143 asm volatile("pxor %mm7,%mm7");
144 }
145 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
146 asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
147 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
148 asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
149 }
150
151 asm volatile("sfence" : :: "memory");
152 kernel_fpu_end();
153}
154
155const struct raid6_calls raid6_sse1x2 = {
156 raid6_sse12_gen_syndrome,
157 raid6_have_sse1_or_mmxext,
158 "sse1x2",
159 1 /* Has cache hints */
160};
161
162#endif
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
new file mode 100644
index 000000000000..bc2d57daa589
--- /dev/null
+++ b/lib/raid6/sse2.c
@@ -0,0 +1,262 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/sse2.c
15 *
16 * SSE-2 implementation of RAID-6 syndrome functions
17 *
18 */
19
20#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
21
22#include <linux/raid/pq.h>
23#include "x86.h"
24
25static const struct raid6_sse_constants {
26 u64 x1d[2];
27} raid6_sse_constants __attribute__((aligned(16))) = {
28 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
29};
30
31static int raid6_have_sse2(void)
32{
33 /* Not really boot_cpu but "all_cpus" */
34 return boot_cpu_has(X86_FEATURE_MMX) &&
35 boot_cpu_has(X86_FEATURE_FXSR) &&
36 boot_cpu_has(X86_FEATURE_XMM) &&
37 boot_cpu_has(X86_FEATURE_XMM2);
38}
39
40/*
41 * Plain SSE2 implementation
42 */
43static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
44{
45 u8 **dptr = (u8 **)ptrs;
46 u8 *p, *q;
47 int d, z, z0;
48
49 z0 = disks - 3; /* Highest data disk */
50 p = dptr[z0+1]; /* XOR parity */
51 q = dptr[z0+2]; /* RS syndrome */
52
53 kernel_fpu_begin();
54
55 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
56 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
57
58 for ( d = 0 ; d < bytes ; d += 16 ) {
59 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
60 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
61 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
62 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
63 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
64 for ( z = z0-2 ; z >= 0 ; z-- ) {
65 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
66 asm volatile("pcmpgtb %xmm4,%xmm5");
67 asm volatile("paddb %xmm4,%xmm4");
68 asm volatile("pand %xmm0,%xmm5");
69 asm volatile("pxor %xmm5,%xmm4");
70 asm volatile("pxor %xmm5,%xmm5");
71 asm volatile("pxor %xmm6,%xmm2");
72 asm volatile("pxor %xmm6,%xmm4");
73 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
74 }
75 asm volatile("pcmpgtb %xmm4,%xmm5");
76 asm volatile("paddb %xmm4,%xmm4");
77 asm volatile("pand %xmm0,%xmm5");
78 asm volatile("pxor %xmm5,%xmm4");
79 asm volatile("pxor %xmm5,%xmm5");
80 asm volatile("pxor %xmm6,%xmm2");
81 asm volatile("pxor %xmm6,%xmm4");
82
83 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
84 asm volatile("pxor %xmm2,%xmm2");
85 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
86 asm volatile("pxor %xmm4,%xmm4");
87 }
88
89 asm volatile("sfence" : : : "memory");
90 kernel_fpu_end();
91}
92
93const struct raid6_calls raid6_sse2x1 = {
94 raid6_sse21_gen_syndrome,
95 raid6_have_sse2,
96 "sse2x1",
97 1 /* Has cache hints */
98};
99
100/*
101 * Unrolled-by-2 SSE2 implementation
102 */
103static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
104{
105 u8 **dptr = (u8 **)ptrs;
106 u8 *p, *q;
107 int d, z, z0;
108
109 z0 = disks - 3; /* Highest data disk */
110 p = dptr[z0+1]; /* XOR parity */
111 q = dptr[z0+2]; /* RS syndrome */
112
113 kernel_fpu_begin();
114
115 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
116 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
117 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
118
119 /* We uniformly assume a single prefetch covers at least 32 bytes */
120 for ( d = 0 ; d < bytes ; d += 32 ) {
121 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
123 asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
124 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
125 asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128 asm volatile("pcmpgtb %xmm4,%xmm5");
129 asm volatile("pcmpgtb %xmm6,%xmm7");
130 asm volatile("paddb %xmm4,%xmm4");
131 asm volatile("paddb %xmm6,%xmm6");
132 asm volatile("pand %xmm0,%xmm5");
133 asm volatile("pand %xmm0,%xmm7");
134 asm volatile("pxor %xmm5,%xmm4");
135 asm volatile("pxor %xmm7,%xmm6");
136 asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
137 asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
138 asm volatile("pxor %xmm5,%xmm2");
139 asm volatile("pxor %xmm7,%xmm3");
140 asm volatile("pxor %xmm5,%xmm4");
141 asm volatile("pxor %xmm7,%xmm6");
142 asm volatile("pxor %xmm5,%xmm5");
143 asm volatile("pxor %xmm7,%xmm7");
144 }
145 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
146 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
147 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
148 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
149 }
150
151 asm volatile("sfence" : : : "memory");
152 kernel_fpu_end();
153}
154
155const struct raid6_calls raid6_sse2x2 = {
156 raid6_sse22_gen_syndrome,
157 raid6_have_sse2,
158 "sse2x2",
159 1 /* Has cache hints */
160};
161
162#endif
163
164#if defined(__x86_64__) && !defined(__arch_um__)
165
166/*
167 * Unrolled-by-4 SSE2 implementation
168 */
169static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
170{
171 u8 **dptr = (u8 **)ptrs;
172 u8 *p, *q;
173 int d, z, z0;
174
175 z0 = disks - 3; /* Highest data disk */
176 p = dptr[z0+1]; /* XOR parity */
177 q = dptr[z0+2]; /* RS syndrome */
178
179 kernel_fpu_begin();
180
181 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
182 asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
183 asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
184 asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
185 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
186 asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
187 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
188 asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
189 asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
190 asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
191 asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
192 asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
193 asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
194
195 for ( d = 0 ; d < bytes ; d += 64 ) {
196 for ( z = z0 ; z >= 0 ; z-- ) {
197 /* The second prefetch seems to improve performance... */
198 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
199 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
200 asm volatile("pcmpgtb %xmm4,%xmm5");
201 asm volatile("pcmpgtb %xmm6,%xmm7");
202 asm volatile("pcmpgtb %xmm12,%xmm13");
203 asm volatile("pcmpgtb %xmm14,%xmm15");
204 asm volatile("paddb %xmm4,%xmm4");
205 asm volatile("paddb %xmm6,%xmm6");
206 asm volatile("paddb %xmm12,%xmm12");
207 asm volatile("paddb %xmm14,%xmm14");
208 asm volatile("pand %xmm0,%xmm5");
209 asm volatile("pand %xmm0,%xmm7");
210 asm volatile("pand %xmm0,%xmm13");
211 asm volatile("pand %xmm0,%xmm15");
212 asm volatile("pxor %xmm5,%xmm4");
213 asm volatile("pxor %xmm7,%xmm6");
214 asm volatile("pxor %xmm13,%xmm12");
215 asm volatile("pxor %xmm15,%xmm14");
216 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
217 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
218 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
219 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
220 asm volatile("pxor %xmm5,%xmm2");
221 asm volatile("pxor %xmm7,%xmm3");
222 asm volatile("pxor %xmm13,%xmm10");
223 asm volatile("pxor %xmm15,%xmm11");
224 asm volatile("pxor %xmm5,%xmm4");
225 asm volatile("pxor %xmm7,%xmm6");
226 asm volatile("pxor %xmm13,%xmm12");
227 asm volatile("pxor %xmm15,%xmm14");
228 asm volatile("pxor %xmm5,%xmm5");
229 asm volatile("pxor %xmm7,%xmm7");
230 asm volatile("pxor %xmm13,%xmm13");
231 asm volatile("pxor %xmm15,%xmm15");
232 }
233 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
234 asm volatile("pxor %xmm2,%xmm2");
235 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
236 asm volatile("pxor %xmm3,%xmm3");
237 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
238 asm volatile("pxor %xmm10,%xmm10");
239 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
240 asm volatile("pxor %xmm11,%xmm11");
241 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
242 asm volatile("pxor %xmm4,%xmm4");
243 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
244 asm volatile("pxor %xmm6,%xmm6");
245 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
246 asm volatile("pxor %xmm12,%xmm12");
247 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
248 asm volatile("pxor %xmm14,%xmm14");
249 }
250
251 asm volatile("sfence" : : : "memory");
252 kernel_fpu_end();
253}
254
255const struct raid6_calls raid6_sse2x4 = {
256 raid6_sse24_gen_syndrome,
257 raid6_have_sse2,
258 "sse2x4",
259 1 /* Has cache hints */
260};
261
262#endif
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
new file mode 100644
index 000000000000..aa651697b6dc
--- /dev/null
+++ b/lib/raid6/test/Makefile
@@ -0,0 +1,72 @@
1#
2# This is a simple Makefile to test some of the RAID-6 code
3# from userspace.
4#
5
6CC = gcc
7OPTFLAGS = -O2 # Adjust as desired
8CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
9LD = ld
10AWK = awk -f
11AR = ar
12RANLIB = ranlib
13
14.c.o:
15 $(CC) $(CFLAGS) -c -o $@ $<
16
17%.c: ../%.c
18 cp -f $< $@
19
20%.uc: ../%.uc
21 cp -f $< $@
22
23all: raid6.a raid6test
24
25raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
26 altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \
27 tables.o
28 rm -f $@
29 $(AR) cq $@ $^
30 $(RANLIB) $@
31
32raid6test: test.c raid6.a
33 $(CC) $(CFLAGS) -o raid6test $^
34
35altivec1.c: altivec.uc ../unroll.awk
36 $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
37
38altivec2.c: altivec.uc ../unroll.awk
39 $(AWK) ../unroll.awk -vN=2 < altivec.uc > $@
40
41altivec4.c: altivec.uc ../unroll.awk
42 $(AWK) ../unroll.awk -vN=4 < altivec.uc > $@
43
44altivec8.c: altivec.uc ../unroll.awk
45 $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
46
47int1.c: int.uc ../unroll.awk
48 $(AWK) ../unroll.awk -vN=1 < int.uc > $@
49
50int2.c: int.uc ../unroll.awk
51 $(AWK) ../unroll.awk -vN=2 < int.uc > $@
52
53int4.c: int.uc ../unroll.awk
54 $(AWK) ../unroll.awk -vN=4 < int.uc > $@
55
56int8.c: int.uc ../unroll.awk
57 $(AWK) ../unroll.awk -vN=8 < int.uc > $@
58
59int16.c: int.uc ../unroll.awk
60 $(AWK) ../unroll.awk -vN=16 < int.uc > $@
61
62int32.c: int.uc ../unroll.awk
63 $(AWK) ../unroll.awk -vN=32 < int.uc > $@
64
65tables.c: mktables
66 ./mktables > tables.c
67
68clean:
69 rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
70
71spotless: clean
72 rm -f *~
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
new file mode 100644
index 000000000000..7a930318b17d
--- /dev/null
+++ b/lib/raid6/test/test.c
@@ -0,0 +1,124 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * raid6test.c
13 *
14 * Test RAID-6 recovery with various algorithms
15 */
16
17#include <stdlib.h>
18#include <stdio.h>
19#include <string.h>
20#include <linux/raid/pq.h>
21
22#define NDISKS 16 /* Including P and Q */
23
24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
25struct raid6_calls raid6_call;
26
27char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE];
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30
31static void makedata(void)
32{
33 int i, j;
34
35 for (i = 0; i < NDISKS; i++) {
36 for (j = 0; j < PAGE_SIZE; j++)
37 data[i][j] = rand();
38
39 dataptrs[i] = data[i];
40 }
41}
42
43static char disk_type(int d)
44{
45 switch (d) {
46 case NDISKS-2:
47 return 'P';
48 case NDISKS-1:
49 return 'Q';
50 default:
51 return 'D';
52 }
53}
54
55static int test_disks(int i, int j)
56{
57 int erra, errb;
58
59 memset(recovi, 0xf0, PAGE_SIZE);
60 memset(recovj, 0xba, PAGE_SIZE);
61
62 dataptrs[i] = recovi;
63 dataptrs[j] = recovj;
64
65 raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
66
67 erra = memcmp(data[i], recovi, PAGE_SIZE);
68 errb = memcmp(data[j], recovj, PAGE_SIZE);
69
70 if (i < NDISKS-2 && j == NDISKS-1) {
71 /* We don't implement the DQ failure scenario, since it's
72 equivalent to a RAID-5 failure (XOR, then recompute Q) */
73 erra = errb = 0;
74 } else {
75 printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n",
76 raid6_call.name,
77 i, disk_type(i),
78 j, disk_type(j),
79 (!erra && !errb) ? "OK" :
80 !erra ? "ERRB" :
81 !errb ? "ERRA" : "ERRAB");
82 }
83
84 dataptrs[i] = data[i];
85 dataptrs[j] = data[j];
86
87 return erra || errb;
88}
89
90int main(int argc, char *argv[])
91{
92 const struct raid6_calls *const *algo;
93 int i, j;
94 int err = 0;
95
96 makedata();
97
98 for (algo = raid6_algos; *algo; algo++) {
99 if (!(*algo)->valid || (*algo)->valid()) {
100 raid6_call = **algo;
101
102 /* Nuke syndromes */
103 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
104
105 /* Generate assumed good syndrome */
106 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
107 (void **)&dataptrs);
108
109 for (i = 0; i < NDISKS-1; i++)
110 for (j = i+1; j < NDISKS; j++)
111 err += test_disks(i, j);
112 }
113 printf("\n");
114 }
115
116 printf("\n");
117 /* Pick the best algorithm test */
118 raid6_select_algo();
119
120 if (err)
121 printf("\n*** ERRORS FOUND ***\n");
122
123 return err;
124}
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk
new file mode 100644
index 000000000000..c6aa03631df8
--- /dev/null
+++ b/lib/raid6/unroll.awk
@@ -0,0 +1,20 @@
1
2# This filter requires one command line option of form -vN=n
3# where n must be a decimal number.
4#
5# Repeat each input line containing $$ n times, replacing $$ with 0...n-1.
6# Replace each $# with n, and each $* with a single $.
7
8BEGIN {
9 n = N + 0
10}
11{
12 if (/\$\$/) { rep = n } else { rep = 1 }
13 for (i = 0; i < rep; ++i) {
14 tmp = $0
15 gsub(/\$\$/, i, tmp)
16 gsub(/\$\#/, n, tmp)
17 gsub(/\$\*/, "$", tmp)
18 print tmp
19 }
20}
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
new file mode 100644
index 000000000000..cb2a8c91c886
--- /dev/null
+++ b/lib/raid6/x86.h
@@ -0,0 +1,61 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/x86.h
15 *
16 * Definitions common to x86 and x86-64 RAID-6 code only
17 */
18
19#ifndef LINUX_RAID_RAID6X86_H
20#define LINUX_RAID_RAID6X86_H
21
22#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
23
24#ifdef __KERNEL__ /* Real code */
25
26#include <asm/i387.h>
27
28#else /* Dummy code for user space testing */
29
30static inline void kernel_fpu_begin(void)
31{
32}
33
34static inline void kernel_fpu_end(void)
35{
36}
37
38#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
39#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
40 * (fast save and restore) */
41#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
42#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
43#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
44
45/* Should work well enough on modern CPUs for testing */
46static inline int boot_cpu_has(int flag)
47{
48 u32 eax = (flag >> 5) ? 0x80000001 : 1;
49 u32 edx;
50
51 asm volatile("cpuid"
52 : "+a" (eax), "=d" (edx)
53 : : "ecx", "ebx");
54
55 return (edx >> (flag & 31)) & 1;
56}
57
58#endif /* ndef __KERNEL__ */
59
60#endif
61#endif
diff --git a/lib/random32.c b/lib/random32.c
index 217d5c4b666d..fc3545a32771 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -39,13 +39,16 @@
39#include <linux/jiffies.h> 39#include <linux/jiffies.h>
40#include <linux/random.h> 40#include <linux/random.h>
41 41
42struct rnd_state {
43 u32 s1, s2, s3;
44};
45
46static DEFINE_PER_CPU(struct rnd_state, net_rand_state); 42static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
47 43
48static u32 __random32(struct rnd_state *state) 44/**
45 * prandom32 - seeded pseudo-random number generator.
46 * @state: pointer to state structure holding seeded state.
47 *
48 * This is used for pseudo-randomness with no outside seeding.
49 * For more random results, use random32().
50 */
51u32 prandom32(struct rnd_state *state)
49{ 52{
50#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) 53#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
51 54
@@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state)
55 58
56 return (state->s1 ^ state->s2 ^ state->s3); 59 return (state->s1 ^ state->s2 ^ state->s3);
57} 60}
58 61EXPORT_SYMBOL(prandom32);
59/*
60 * Handle minimum values for seeds
61 */
62static inline u32 __seed(u32 x, u32 m)
63{
64 return (x < m) ? x + m : x;
65}
66 62
67/** 63/**
68 * random32 - pseudo random number generator 64 * random32 - pseudo random number generator
@@ -75,7 +71,7 @@ u32 random32(void)
75{ 71{
76 unsigned long r; 72 unsigned long r;
77 struct rnd_state *state = &get_cpu_var(net_rand_state); 73 struct rnd_state *state = &get_cpu_var(net_rand_state);
78 r = __random32(state); 74 r = prandom32(state);
79 put_cpu_var(state); 75 put_cpu_var(state);
80 return r; 76 return r;
81} 77}
@@ -118,12 +114,12 @@ static int __init random32_init(void)
118 state->s3 = __seed(LCG(state->s2), 15); 114 state->s3 = __seed(LCG(state->s2), 15);
119 115
120 /* "warm it up" */ 116 /* "warm it up" */
121 __random32(state); 117 prandom32(state);
122 __random32(state); 118 prandom32(state);
123 __random32(state); 119 prandom32(state);
124 __random32(state); 120 prandom32(state);
125 __random32(state); 121 prandom32(state);
126 __random32(state); 122 prandom32(state);
127 } 123 }
128 return 0; 124 return 0;
129} 125}
@@ -131,7 +127,7 @@ core_initcall(random32_init);
131 127
132/* 128/*
133 * Generate better values after random number generator 129 * Generate better values after random number generator
134 * is fully initalized. 130 * is fully initialized.
135 */ 131 */
136static int __init random32_reseed(void) 132static int __init random32_reseed(void)
137{ 133{
@@ -147,7 +143,7 @@ static int __init random32_reseed(void)
147 state->s3 = __seed(seeds[2], 15); 143 state->s3 = __seed(seeds[2], 15);
148 144
149 /* mix it in */ 145 /* mix it in */
150 __random32(state); 146 prandom32(state);
151 } 147 }
152 return 0; 148 return 0;
153} 149}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 09f5ce1810dc..027a03f4c56d 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -16,9 +16,14 @@
16/* 16/*
17 * __ratelimit - rate limiting 17 * __ratelimit - rate limiting
18 * @rs: ratelimit_state data 18 * @rs: ratelimit_state data
19 * @func: name of calling function
19 * 20 *
20 * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks 21 * This enforces a rate limit: not more than @rs->burst callbacks
21 * in every @rs->ratelimit_jiffies 22 * in every @rs->interval
23 *
24 * RETURNS:
25 * 0 means callbacks will be suppressed.
26 * 1 means go ahead and do it.
22 */ 27 */
23int ___ratelimit(struct ratelimit_state *rs, const char *func) 28int ___ratelimit(struct ratelimit_state *rs, const char *func)
24{ 29{
@@ -35,7 +40,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
35 * the entity that is holding the lock already: 40 * the entity that is holding the lock already:
36 */ 41 */
37 if (!spin_trylock_irqsave(&rs->lock, flags)) 42 if (!spin_trylock_irqsave(&rs->lock, flags))
38 return 1; 43 return 0;
39 44
40 if (!rs->begin) 45 if (!rs->begin)
41 rs->begin = jiffies; 46 rs->begin = jiffies;
diff --git a/lib/rbtree.c b/lib/rbtree.c
index e2aa3be29858..4693f79195d3 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -283,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
283} 283}
284EXPORT_SYMBOL(rb_erase); 284EXPORT_SYMBOL(rb_erase);
285 285
286static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
287{
288 struct rb_node *parent;
289
290up:
291 func(node, data);
292 parent = rb_parent(node);
293 if (!parent)
294 return;
295
296 if (node == parent->rb_left && parent->rb_right)
297 func(parent->rb_right, data);
298 else if (parent->rb_left)
299 func(parent->rb_left, data);
300
301 node = parent;
302 goto up;
303}
304
305/*
306 * after inserting @node into the tree, update the tree to account for
307 * both the new entry and any damage done by rebalance
308 */
309void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
310{
311 if (node->rb_left)
312 node = node->rb_left;
313 else if (node->rb_right)
314 node = node->rb_right;
315
316 rb_augment_path(node, func, data);
317}
318
319/*
320 * before removing the node, find the deepest node on the rebalance path
321 * that will still be there after @node gets removed
322 */
323struct rb_node *rb_augment_erase_begin(struct rb_node *node)
324{
325 struct rb_node *deepest;
326
327 if (!node->rb_right && !node->rb_left)
328 deepest = rb_parent(node);
329 else if (!node->rb_right)
330 deepest = node->rb_left;
331 else if (!node->rb_left)
332 deepest = node->rb_right;
333 else {
334 deepest = rb_next(node);
335 if (deepest->rb_right)
336 deepest = deepest->rb_right;
337 else if (rb_parent(deepest) != node)
338 deepest = rb_parent(deepest);
339 }
340
341 return deepest;
342}
343
344/*
345 * after removal, update the tree to account for the removed entry
346 * and any rebalance damage.
347 */
348void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
349{
350 if (node)
351 rb_augment_path(node, func, data);
352}
353
286/* 354/*
287 * This function returns the first node (in sort order) of the tree. 355 * This function returns the first node (in sort order) of the tree.
288 */ 356 */
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index ccf95bff7984..ffc9fc7f3b05 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -143,13 +143,14 @@ void __sched __down_read(struct rw_semaphore *sem)
143{ 143{
144 struct rwsem_waiter waiter; 144 struct rwsem_waiter waiter;
145 struct task_struct *tsk; 145 struct task_struct *tsk;
146 unsigned long flags;
146 147
147 spin_lock_irq(&sem->wait_lock); 148 spin_lock_irqsave(&sem->wait_lock, flags);
148 149
149 if (sem->activity >= 0 && list_empty(&sem->wait_list)) { 150 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
150 /* granted */ 151 /* granted */
151 sem->activity++; 152 sem->activity++;
152 spin_unlock_irq(&sem->wait_lock); 153 spin_unlock_irqrestore(&sem->wait_lock, flags);
153 goto out; 154 goto out;
154 } 155 }
155 156
@@ -164,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem)
164 list_add_tail(&waiter.list, &sem->wait_list); 165 list_add_tail(&waiter.list, &sem->wait_list);
165 166
166 /* we don't need to touch the semaphore struct anymore */ 167 /* we don't need to touch the semaphore struct anymore */
167 spin_unlock_irq(&sem->wait_lock); 168 spin_unlock_irqrestore(&sem->wait_lock, flags);
168 169
169 /* wait to be given the lock */ 170 /* wait to be given the lock */
170 for (;;) { 171 for (;;) {
@@ -209,13 +210,14 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
209{ 210{
210 struct rwsem_waiter waiter; 211 struct rwsem_waiter waiter;
211 struct task_struct *tsk; 212 struct task_struct *tsk;
213 unsigned long flags;
212 214
213 spin_lock_irq(&sem->wait_lock); 215 spin_lock_irqsave(&sem->wait_lock, flags);
214 216
215 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 217 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
216 /* granted */ 218 /* granted */
217 sem->activity = -1; 219 sem->activity = -1;
218 spin_unlock_irq(&sem->wait_lock); 220 spin_unlock_irqrestore(&sem->wait_lock, flags);
219 goto out; 221 goto out;
220 } 222 }
221 223
@@ -230,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
230 list_add_tail(&waiter.list, &sem->wait_list); 232 list_add_tail(&waiter.list, &sem->wait_list);
231 233
232 /* we don't need to touch the semaphore struct anymore */ 234 /* we don't need to touch the semaphore struct anymore */
233 spin_unlock_irq(&sem->wait_lock); 235 spin_unlock_irqrestore(&sem->wait_lock, flags);
234 236
235 /* wait to be given the lock */ 237 /* wait to be given the lock */
236 for (;;) { 238 for (;;) {
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 3e3365e5665e..f236d7cd5cf3 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -36,45 +36,56 @@ struct rwsem_waiter {
36#define RWSEM_WAITING_FOR_WRITE 0x00000002 36#define RWSEM_WAITING_FOR_WRITE 0x00000002
37}; 37};
38 38
39/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and
40 * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
41 * since the rwsem value was observed.
42 */
43#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
44#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
45#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
46
39/* 47/*
40 * handle the lock release when processes blocked on it that can now run 48 * handle the lock release when processes blocked on it that can now run
41 * - if we come here from up_xxxx(), then: 49 * - if we come here from up_xxxx(), then:
42 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) 50 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
43 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) 51 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
44 * - there must be someone on the queue 52 * - there must be someone on the queue
45 * - the spinlock must be held by the caller 53 * - the spinlock must be held by the caller
46 * - woken process blocks are discarded from the list after having task zeroed 54 * - woken process blocks are discarded from the list after having task zeroed
47 * - writers are only woken if downgrading is false 55 * - writers are only woken if downgrading is false
48 */ 56 */
49static inline struct rw_semaphore * 57static struct rw_semaphore *
50__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) 58__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
51{ 59{
52 struct rwsem_waiter *waiter; 60 struct rwsem_waiter *waiter;
53 struct task_struct *tsk; 61 struct task_struct *tsk;
54 struct list_head *next; 62 struct list_head *next;
55 signed long oldcount, woken, loop; 63 signed long oldcount, woken, loop, adjustment;
56
57 if (downgrading)
58 goto dont_wake_writers;
59
60 /* if we came through an up_xxxx() call, we only only wake someone up
61 * if we can transition the active part of the count from 0 -> 1
62 */
63 try_again:
64 oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
65 - RWSEM_ACTIVE_BIAS;
66 if (oldcount & RWSEM_ACTIVE_MASK)
67 goto undo;
68 64
69 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 65 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
70
71 /* try to grant a single write lock if there's a writer at the front
72 * of the queue - note we leave the 'active part' of the count
73 * incremented by 1 and the waiting part incremented by 0x00010000
74 */
75 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) 66 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
76 goto readers_only; 67 goto readers_only;
77 68
69 if (wake_type == RWSEM_WAKE_READ_OWNED)
70 /* Another active reader was observed, so wakeup is not
71 * likely to succeed. Save the atomic op.
72 */
73 goto out;
74
75 /* There's a writer at the front of the queue - try to grant it the
76 * write lock. However, we only wake this writer if we can transition
77 * the active part of the count from 0 -> 1
78 */
79 adjustment = RWSEM_ACTIVE_WRITE_BIAS;
80 if (waiter->list.next == &sem->wait_list)
81 adjustment -= RWSEM_WAITING_BIAS;
82
83 try_again_write:
84 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
85 if (oldcount & RWSEM_ACTIVE_MASK)
86 /* Someone grabbed the sem already */
87 goto undo_write;
88
78 /* We must be careful not to touch 'waiter' after we set ->task = NULL. 89 /* We must be careful not to touch 'waiter' after we set ->task = NULL.
79 * It is an allocated on the waiter's stack and may become invalid at 90 * It is an allocated on the waiter's stack and may become invalid at
80 * any time after that point (due to a wakeup from another source). 91 * any time after that point (due to a wakeup from another source).
@@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
87 put_task_struct(tsk); 98 put_task_struct(tsk);
88 goto out; 99 goto out;
89 100
90 /* don't want to wake any writers */ 101 readers_only:
91 dont_wake_writers: 102 /* If we come here from up_xxxx(), another thread might have reached
92 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 103 * rwsem_down_failed_common() before we acquired the spinlock and
93 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) 104 * woken up a waiter, making it now active. We prefer to check for
105 * this first in order to not spend too much time with the spinlock
106 * held if we're not going to be able to wake up readers in the end.
107 *
108 * Note that we do not need to update the rwsem count: any writer
109 * trying to acquire rwsem will run rwsem_down_write_failed() due
110 * to the waiting threads and block trying to acquire the spinlock.
111 *
112 * We use a dummy atomic update in order to acquire the cache line
113 * exclusively since we expect to succeed and run the final rwsem
114 * count adjustment pretty soon.
115 */
116 if (wake_type == RWSEM_WAKE_ANY &&
117 rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
118 /* Someone grabbed the sem for write already */
94 goto out; 119 goto out;
95 120
96 /* grant an infinite number of read locks to the readers at the front 121 /* Grant an infinite number of read locks to the readers at the front
97 * of the queue 122 * of the queue. Note we increment the 'active part' of the count by
98 * - note we increment the 'active part' of the count by the number of 123 * the number of readers before waking any processes up.
99 * readers before waking any processes up
100 */ 124 */
101 readers_only:
102 woken = 0; 125 woken = 0;
103 do { 126 do {
104 woken++; 127 woken++;
@@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
111 134
112 } while (waiter->flags & RWSEM_WAITING_FOR_READ); 135 } while (waiter->flags & RWSEM_WAITING_FOR_READ);
113 136
114 loop = woken; 137 adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
115 woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; 138 if (waiter->flags & RWSEM_WAITING_FOR_READ)
116 if (!downgrading) 139 /* hit end of list above */
117 /* we'd already done one increment earlier */ 140 adjustment -= RWSEM_WAITING_BIAS;
118 woken -= RWSEM_ACTIVE_BIAS;
119 141
120 rwsem_atomic_add(woken, sem); 142 rwsem_atomic_add(adjustment, sem);
121 143
122 next = sem->wait_list.next; 144 next = sem->wait_list.next;
123 for (; loop > 0; loop--) { 145 for (loop = woken; loop > 0; loop--) {
124 waiter = list_entry(next, struct rwsem_waiter, list); 146 waiter = list_entry(next, struct rwsem_waiter, list);
125 next = waiter->list.next; 147 next = waiter->list.next;
126 tsk = waiter->task; 148 tsk = waiter->task;
@@ -136,11 +158,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
136 out: 158 out:
137 return sem; 159 return sem;
138 160
139 /* undo the change to count, but check for a transition 1->0 */ 161 /* undo the change to the active count, but check for a transition
140 undo: 162 * 1->0 */
141 if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) 163 undo_write:
164 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
142 goto out; 165 goto out;
143 goto try_again; 166 goto try_again_write;
144} 167}
145 168
146/* 169/*
@@ -148,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
148 */ 171 */
149static struct rw_semaphore __sched * 172static struct rw_semaphore __sched *
150rwsem_down_failed_common(struct rw_semaphore *sem, 173rwsem_down_failed_common(struct rw_semaphore *sem,
151 struct rwsem_waiter *waiter, signed long adjustment) 174 unsigned int flags, signed long adjustment)
152{ 175{
176 struct rwsem_waiter waiter;
153 struct task_struct *tsk = current; 177 struct task_struct *tsk = current;
154 signed long count; 178 signed long count;
155 179
@@ -157,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
157 181
158 /* set up my own style of waitqueue */ 182 /* set up my own style of waitqueue */
159 spin_lock_irq(&sem->wait_lock); 183 spin_lock_irq(&sem->wait_lock);
160 waiter->task = tsk; 184 waiter.task = tsk;
185 waiter.flags = flags;
161 get_task_struct(tsk); 186 get_task_struct(tsk);
162 187
163 list_add_tail(&waiter->list, &sem->wait_list); 188 if (list_empty(&sem->wait_list))
189 adjustment += RWSEM_WAITING_BIAS;
190 list_add_tail(&waiter.list, &sem->wait_list);
164 191
165 /* we're now waiting on the lock, but no longer actively read-locking */ 192 /* we're now waiting on the lock, but no longer actively locking */
166 count = rwsem_atomic_update(adjustment, sem); 193 count = rwsem_atomic_update(adjustment, sem);
167 194
168 /* if there are no active locks, wake the front queued process(es) up */ 195 /* If there are no active locks, wake the front queued process(es) up.
169 if (!(count & RWSEM_ACTIVE_MASK)) 196 *
170 sem = __rwsem_do_wake(sem, 0); 197 * Alternatively, if we're called from a failed down_write(), there
198 * were already threads queued before us and there are no active
199 * writers, the lock must be read owned; so we try to wake any read
200 * locks that were queued ahead of us. */
201 if (count == RWSEM_WAITING_BIAS)
202 sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
203 else if (count > RWSEM_WAITING_BIAS &&
204 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
205 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
171 206
172 spin_unlock_irq(&sem->wait_lock); 207 spin_unlock_irq(&sem->wait_lock);
173 208
174 /* wait to be given the lock */ 209 /* wait to be given the lock */
175 for (;;) { 210 for (;;) {
176 if (!waiter->task) 211 if (!waiter.task)
177 break; 212 break;
178 schedule(); 213 schedule();
179 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 214 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
@@ -190,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
190asmregparm struct rw_semaphore __sched * 225asmregparm struct rw_semaphore __sched *
191rwsem_down_read_failed(struct rw_semaphore *sem) 226rwsem_down_read_failed(struct rw_semaphore *sem)
192{ 227{
193 struct rwsem_waiter waiter; 228 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
194 229 -RWSEM_ACTIVE_READ_BIAS);
195 waiter.flags = RWSEM_WAITING_FOR_READ;
196 rwsem_down_failed_common(sem, &waiter,
197 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
198 return sem;
199} 230}
200 231
201/* 232/*
@@ -204,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
204asmregparm struct rw_semaphore __sched * 235asmregparm struct rw_semaphore __sched *
205rwsem_down_write_failed(struct rw_semaphore *sem) 236rwsem_down_write_failed(struct rw_semaphore *sem)
206{ 237{
207 struct rwsem_waiter waiter; 238 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
208 239 -RWSEM_ACTIVE_WRITE_BIAS);
209 waiter.flags = RWSEM_WAITING_FOR_WRITE;
210 rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
211
212 return sem;
213} 240}
214 241
215/* 242/*
@@ -224,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
224 251
225 /* do nothing if list empty */ 252 /* do nothing if list empty */
226 if (!list_empty(&sem->wait_list)) 253 if (!list_empty(&sem->wait_list))
227 sem = __rwsem_do_wake(sem, 0); 254 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
228 255
229 spin_unlock_irqrestore(&sem->wait_lock, flags); 256 spin_unlock_irqrestore(&sem->wait_lock, flags);
230 257
@@ -244,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
244 271
245 /* do nothing if list empty */ 272 /* do nothing if list empty */
246 if (!list_empty(&sem->wait_list)) 273 if (!list_empty(&sem->wait_list))
247 sem = __rwsem_do_wake(sem, 1); 274 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
248 275
249 spin_unlock_irqrestore(&sem->wait_lock, flags); 276 spin_unlock_irqrestore(&sem->wait_lock, flags);
250 277
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 0d475d8167bf..4ceb05d772ae 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -7,8 +7,10 @@
7 * Version 2. See the file COPYING for more details. 7 * Version 2. See the file COPYING for more details.
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/slab.h>
10#include <linux/scatterlist.h> 11#include <linux/scatterlist.h>
11#include <linux/highmem.h> 12#include <linux/highmem.h>
13#include <linux/kmemleak.h>
12 14
13/** 15/**
14 * sg_next - return the next scatterlist entry in a list 16 * sg_next - return the next scatterlist entry in a list
@@ -114,17 +116,29 @@ EXPORT_SYMBOL(sg_init_one);
114 */ 116 */
115static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) 117static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
116{ 118{
117 if (nents == SG_MAX_SINGLE_ALLOC) 119 if (nents == SG_MAX_SINGLE_ALLOC) {
118 return (struct scatterlist *) __get_free_page(gfp_mask); 120 /*
119 else 121 * Kmemleak doesn't track page allocations as they are not
122 * commonly used (in a raw form) for kernel data structures.
123 * As we chain together a list of pages and then a normal
124 * kmalloc (tracked by kmemleak), in order to for that last
125 * allocation not to become decoupled (and thus a
126 * false-positive) we need to inform kmemleak of all the
127 * intermediate allocations.
128 */
129 void *ptr = (void *) __get_free_page(gfp_mask);
130 kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
131 return ptr;
132 } else
120 return kmalloc(nents * sizeof(struct scatterlist), gfp_mask); 133 return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
121} 134}
122 135
123static void sg_kfree(struct scatterlist *sg, unsigned int nents) 136static void sg_kfree(struct scatterlist *sg, unsigned int nents)
124{ 137{
125 if (nents == SG_MAX_SINGLE_ALLOC) 138 if (nents == SG_MAX_SINGLE_ALLOC) {
139 kmemleak_free(sg);
126 free_page((unsigned long) sg); 140 free_page((unsigned long) sg);
127 else 141 } else
128 kfree(sg); 142 kfree(sg);
129} 143}
130 144
@@ -234,8 +248,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
234 left -= sg_size; 248 left -= sg_size;
235 249
236 sg = alloc_fn(alloc_size, gfp_mask); 250 sg = alloc_fn(alloc_size, gfp_mask);
237 if (unlikely(!sg)) 251 if (unlikely(!sg)) {
238 return -ENOMEM; 252 /*
253 * Adjust entry count to reflect that the last
254 * entry of the previous table won't be used for
255 * linkage. Without this, sg_kfree() may get
256 * confused.
257 */
258 if (prv)
259 table->nents = ++table->orig_nents;
260
261 return -ENOMEM;
262 }
239 263
240 sg_init_table(sg, alloc_size); 264 sg_init_table(sg, alloc_size);
241 table->nents = table->orig_nents += sg_size; 265 table->nents = table->orig_nents += sg_size;
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 238e72a18ce1..fdc77c82f922 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -15,7 +15,7 @@ void show_mem(void)
15 unsigned long total = 0, reserved = 0, shared = 0, 15 unsigned long total = 0, reserved = 0, shared = 0,
16 nonshared = 0, highmem = 0; 16 nonshared = 0, highmem = 0;
17 17
18 printk(KERN_INFO "Mem-Info:\n"); 18 printk("Mem-Info:\n");
19 show_free_areas(); 19 show_free_areas();
20 20
21 for_each_online_pgdat(pgdat) { 21 for_each_online_pgdat(pgdat) {
@@ -49,15 +49,15 @@ void show_mem(void)
49 pgdat_resize_unlock(pgdat, &flags); 49 pgdat_resize_unlock(pgdat, &flags);
50 } 50 }
51 51
52 printk(KERN_INFO "%lu pages RAM\n", total); 52 printk("%lu pages RAM\n", total);
53#ifdef CONFIG_HIGHMEM 53#ifdef CONFIG_HIGHMEM
54 printk(KERN_INFO "%lu pages HighMem\n", highmem); 54 printk("%lu pages HighMem\n", highmem);
55#endif 55#endif
56 printk(KERN_INFO "%lu pages reserved\n", reserved); 56 printk("%lu pages reserved\n", reserved);
57 printk(KERN_INFO "%lu pages shared\n", shared); 57 printk("%lu pages shared\n", shared);
58 printk(KERN_INFO "%lu pages non-shared\n", nonshared); 58 printk("%lu pages non-shared\n", nonshared);
59#ifdef CONFIG_QUICKLIST 59#ifdef CONFIG_QUICKLIST
60 printk(KERN_INFO "%lu pages in pagetable cache\n", 60 printk("%lu pages in pagetable cache\n",
61 quicklist_total_size()); 61 quicklist_total_size());
62#endif 62#endif
63} 63}
diff --git a/lib/string.c b/lib/string.c
index a1cdcfcc42d0..f71bead1be3e 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -36,25 +36,21 @@ int strnicmp(const char *s1, const char *s2, size_t len)
36 /* Yes, Virginia, it had better be unsigned */ 36 /* Yes, Virginia, it had better be unsigned */
37 unsigned char c1, c2; 37 unsigned char c1, c2;
38 38
39 c1 = c2 = 0; 39 if (!len)
40 if (len) { 40 return 0;
41 do { 41
42 c1 = *s1; 42 do {
43 c2 = *s2; 43 c1 = *s1++;
44 s1++; 44 c2 = *s2++;
45 s2++; 45 if (!c1 || !c2)
46 if (!c1) 46 break;
47 break; 47 if (c1 == c2)
48 if (!c2) 48 continue;
49 break; 49 c1 = tolower(c1);
50 if (c1 == c2) 50 c2 = tolower(c2);
51 continue; 51 if (c1 != c2)
52 c1 = tolower(c1); 52 break;
53 c2 = tolower(c2); 53 } while (--len);
54 if (c1 != c2)
55 break;
56 } while (--len);
57 }
58 return (int)c1 - (int)c2; 54 return (int)c1 - (int)c2;
59} 55}
60EXPORT_SYMBOL(strnicmp); 56EXPORT_SYMBOL(strnicmp);
@@ -693,13 +689,13 @@ EXPORT_SYMBOL(strstr);
693 */ 689 */
694char *strnstr(const char *s1, const char *s2, size_t len) 690char *strnstr(const char *s1, const char *s2, size_t len)
695{ 691{
696 size_t l1 = len, l2; 692 size_t l2;
697 693
698 l2 = strlen(s2); 694 l2 = strlen(s2);
699 if (!l2) 695 if (!l2)
700 return (char *)s1; 696 return (char *)s1;
701 while (l1 >= l2) { 697 while (len >= l2) {
702 l1--; 698 len--;
703 if (!memcmp(s1, s2, l2)) 699 if (!memcmp(s1, s2, l2))
704 return (char *)s1; 700 return (char *)s1;
705 s1++; 701 s1++;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 437eedb5a53b..c47bbe11b804 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -28,6 +28,7 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/ctype.h> 29#include <linux/ctype.h>
30#include <linux/highmem.h> 30#include <linux/highmem.h>
31#include <linux/gfp.h>
31 32
32#include <asm/io.h> 33#include <asm/io.h>
33#include <asm/dma.h> 34#include <asm/dma.h>
@@ -49,25 +50,17 @@
49 */ 50 */
50#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) 51#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
51 52
52/*
53 * Enumeration for sync targets
54 */
55enum dma_sync_target {
56 SYNC_FOR_CPU = 0,
57 SYNC_FOR_DEVICE = 1,
58};
59
60int swiotlb_force; 53int swiotlb_force;
61 54
62/* 55/*
63 * Used to do a quick range check in unmap_single and 56 * Used to do a quick range check in swiotlb_tbl_unmap_single and
64 * sync_single_*, to see if the memory was in fact allocated by this 57 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
65 * API. 58 * API.
66 */ 59 */
67static char *io_tlb_start, *io_tlb_end; 60static char *io_tlb_start, *io_tlb_end;
68 61
69/* 62/*
70 * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and 63 * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
71 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. 64 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
72 */ 65 */
73static unsigned long io_tlb_nslabs; 66static unsigned long io_tlb_nslabs;
@@ -77,7 +70,7 @@ static unsigned long io_tlb_nslabs;
77 */ 70 */
78static unsigned long io_tlb_overflow = 32*1024; 71static unsigned long io_tlb_overflow = 32*1024;
79 72
80void *io_tlb_overflow_buffer; 73static void *io_tlb_overflow_buffer;
81 74
82/* 75/*
83 * This is a free list describing the number of free entries available from 76 * This is a free list describing the number of free entries available from
@@ -139,28 +132,14 @@ void swiotlb_print_info(void)
139 (unsigned long long)pend); 132 (unsigned long long)pend);
140} 133}
141 134
142/* 135void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
143 * Statically reserve bounce buffer space and initialize bounce buffer data
144 * structures for the software IO TLB used to implement the DMA API.
145 */
146void __init
147swiotlb_init_with_default_size(size_t default_size, int verbose)
148{ 136{
149 unsigned long i, bytes; 137 unsigned long i, bytes;
150 138
151 if (!io_tlb_nslabs) { 139 bytes = nslabs << IO_TLB_SHIFT;
152 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
153 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
154 }
155
156 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
157 140
158 /* 141 io_tlb_nslabs = nslabs;
159 * Get IO TLB memory from the low pages 142 io_tlb_start = tlb;
160 */
161 io_tlb_start = alloc_bootmem_low_pages(bytes);
162 if (!io_tlb_start)
163 panic("Cannot allocate SWIOTLB buffer");
164 io_tlb_end = io_tlb_start + bytes; 143 io_tlb_end = io_tlb_start + bytes;
165 144
166 /* 145 /*
@@ -168,22 +147,48 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
168 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 147 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
169 * between io_tlb_start and io_tlb_end. 148 * between io_tlb_start and io_tlb_end.
170 */ 149 */
171 io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); 150 io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
172 for (i = 0; i < io_tlb_nslabs; i++) 151 for (i = 0; i < io_tlb_nslabs; i++)
173 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 152 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
174 io_tlb_index = 0; 153 io_tlb_index = 0;
175 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); 154 io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
176 155
177 /* 156 /*
178 * Get the overflow emergency buffer 157 * Get the overflow emergency buffer
179 */ 158 */
180 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); 159 io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
181 if (!io_tlb_overflow_buffer) 160 if (!io_tlb_overflow_buffer)
182 panic("Cannot allocate SWIOTLB overflow buffer!\n"); 161 panic("Cannot allocate SWIOTLB overflow buffer!\n");
183 if (verbose) 162 if (verbose)
184 swiotlb_print_info(); 163 swiotlb_print_info();
185} 164}
186 165
166/*
167 * Statically reserve bounce buffer space and initialize bounce buffer data
168 * structures for the software IO TLB used to implement the DMA API.
169 */
170void __init
171swiotlb_init_with_default_size(size_t default_size, int verbose)
172{
173 unsigned long bytes;
174
175 if (!io_tlb_nslabs) {
176 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
177 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
178 }
179
180 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
181
182 /*
183 * Get IO TLB memory from the low pages
184 */
185 io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
186 if (!io_tlb_start)
187 panic("Cannot allocate SWIOTLB buffer");
188
189 swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
190}
191
187void __init 192void __init
188swiotlb_init(int verbose) 193swiotlb_init(int verbose)
189{ 194{
@@ -303,13 +308,13 @@ void __init swiotlb_free(void)
303 get_order(io_tlb_nslabs << IO_TLB_SHIFT)); 308 get_order(io_tlb_nslabs << IO_TLB_SHIFT));
304 } else { 309 } else {
305 free_bootmem_late(__pa(io_tlb_overflow_buffer), 310 free_bootmem_late(__pa(io_tlb_overflow_buffer),
306 io_tlb_overflow); 311 PAGE_ALIGN(io_tlb_overflow));
307 free_bootmem_late(__pa(io_tlb_orig_addr), 312 free_bootmem_late(__pa(io_tlb_orig_addr),
308 io_tlb_nslabs * sizeof(phys_addr_t)); 313 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
309 free_bootmem_late(__pa(io_tlb_list), 314 free_bootmem_late(__pa(io_tlb_list),
310 io_tlb_nslabs * sizeof(int)); 315 PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
311 free_bootmem_late(__pa(io_tlb_start), 316 free_bootmem_late(__pa(io_tlb_start),
312 io_tlb_nslabs << IO_TLB_SHIFT); 317 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
313 } 318 }
314} 319}
315 320
@@ -322,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr)
322/* 327/*
323 * Bounce: copy the swiotlb buffer back to the original dma location 328 * Bounce: copy the swiotlb buffer back to the original dma location
324 */ 329 */
325static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, 330void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
326 enum dma_data_direction dir) 331 enum dma_data_direction dir)
327{ 332{
328 unsigned long pfn = PFN_DOWN(phys); 333 unsigned long pfn = PFN_DOWN(phys);
329 334
@@ -359,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
359 memcpy(phys_to_virt(phys), dma_addr, size); 364 memcpy(phys_to_virt(phys), dma_addr, size);
360 } 365 }
361} 366}
367EXPORT_SYMBOL_GPL(swiotlb_bounce);
362 368
363/* 369void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
364 * Allocates bounce buffer and returns its kernel virtual address. 370 phys_addr_t phys, size_t size,
365 */ 371 enum dma_data_direction dir)
366static void *
367map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
368{ 372{
369 unsigned long flags; 373 unsigned long flags;
370 char *dma_addr; 374 char *dma_addr;
371 unsigned int nslots, stride, index, wrap; 375 unsigned int nslots, stride, index, wrap;
372 int i; 376 int i;
373 unsigned long start_dma_addr;
374 unsigned long mask; 377 unsigned long mask;
375 unsigned long offset_slots; 378 unsigned long offset_slots;
376 unsigned long max_slots; 379 unsigned long max_slots;
377 380
378 mask = dma_get_seg_boundary(hwdev); 381 mask = dma_get_seg_boundary(hwdev);
379 start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
380 382
381 offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 383 tbl_dma_addr &= mask;
384
385 offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
382 386
383 /* 387 /*
384 * Carefully handle integer overflow which can occur when mask == ~0UL. 388 * Carefully handle integer overflow which can occur when mask == ~0UL.
@@ -465,12 +469,27 @@ found:
465 469
466 return dma_addr; 470 return dma_addr;
467} 471}
472EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
473
474/*
475 * Allocates bounce buffer and returns its kernel virtual address.
476 */
477
478static void *
479map_single(struct device *hwdev, phys_addr_t phys, size_t size,
480 enum dma_data_direction dir)
481{
482 dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
483
484 return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
485}
468 486
469/* 487/*
470 * dma_addr is the kernel virtual address of the bounce buffer to unmap. 488 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
471 */ 489 */
472static void 490void
473do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) 491swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
492 enum dma_data_direction dir)
474{ 493{
475 unsigned long flags; 494 unsigned long flags;
476 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 495 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -508,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
508 } 527 }
509 spin_unlock_irqrestore(&io_tlb_lock, flags); 528 spin_unlock_irqrestore(&io_tlb_lock, flags);
510} 529}
530EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
511 531
512static void 532void
513sync_single(struct device *hwdev, char *dma_addr, size_t size, 533swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
514 int dir, int target) 534 enum dma_data_direction dir,
535 enum dma_sync_target target)
515{ 536{
516 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 537 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
517 phys_addr_t phys = io_tlb_orig_addr[index]; 538 phys_addr_t phys = io_tlb_orig_addr[index];
@@ -535,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
535 BUG(); 556 BUG();
536 } 557 }
537} 558}
559EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
538 560
539void * 561void *
540swiotlb_alloc_coherent(struct device *hwdev, size_t size, 562swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -558,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
558 } 580 }
559 if (!ret) { 581 if (!ret) {
560 /* 582 /*
561 * We are either out of memory or the device can't DMA 583 * We are either out of memory or the device can't DMA to
562 * to GFP_DMA memory; fall back on map_single(), which 584 * GFP_DMA memory; fall back on map_single(), which
563 * will grab memory from the lowest available address range. 585 * will grab memory from the lowest available address range.
564 */ 586 */
565 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); 587 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
@@ -577,7 +599,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
577 (unsigned long long)dev_addr); 599 (unsigned long long)dev_addr);
578 600
579 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 601 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
580 do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); 602 swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
581 return NULL; 603 return NULL;
582 } 604 }
583 *dma_handle = dev_addr; 605 *dma_handle = dev_addr;
@@ -595,13 +617,14 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
595 if (!is_swiotlb_buffer(paddr)) 617 if (!is_swiotlb_buffer(paddr))
596 free_pages((unsigned long)vaddr, get_order(size)); 618 free_pages((unsigned long)vaddr, get_order(size));
597 else 619 else
598 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 620 /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
599 do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); 621 swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
600} 622}
601EXPORT_SYMBOL(swiotlb_free_coherent); 623EXPORT_SYMBOL(swiotlb_free_coherent);
602 624
603static void 625static void
604swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) 626swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
627 int do_panic)
605{ 628{
606 /* 629 /*
607 * Ran out of IOMMU space for this operation. This is very bad. 630 * Ran out of IOMMU space for this operation. This is very bad.
@@ -679,14 +702,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
679 * whatever the device wrote there. 702 * whatever the device wrote there.
680 */ 703 */
681static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, 704static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
682 size_t size, int dir) 705 size_t size, enum dma_data_direction dir)
683{ 706{
684 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 707 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
685 708
686 BUG_ON(dir == DMA_NONE); 709 BUG_ON(dir == DMA_NONE);
687 710
688 if (is_swiotlb_buffer(paddr)) { 711 if (is_swiotlb_buffer(paddr)) {
689 do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); 712 swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
690 return; 713 return;
691 } 714 }
692 715
@@ -722,14 +745,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
722 */ 745 */
723static void 746static void
724swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, 747swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
725 size_t size, int dir, int target) 748 size_t size, enum dma_data_direction dir,
749 enum dma_sync_target target)
726{ 750{
727 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 751 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
728 752
729 BUG_ON(dir == DMA_NONE); 753 BUG_ON(dir == DMA_NONE);
730 754
731 if (is_swiotlb_buffer(paddr)) { 755 if (is_swiotlb_buffer(paddr)) {
732 sync_single(hwdev, phys_to_virt(paddr), size, dir, target); 756 swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
757 target);
733 return; 758 return;
734 } 759 }
735 760
@@ -756,37 +781,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
756EXPORT_SYMBOL(swiotlb_sync_single_for_device); 781EXPORT_SYMBOL(swiotlb_sync_single_for_device);
757 782
758/* 783/*
759 * Same as above, but for a sub-range of the mapping.
760 */
761static void
762swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
763 unsigned long offset, size_t size,
764 int dir, int target)
765{
766 swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
767}
768
769void
770swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
771 unsigned long offset, size_t size,
772 enum dma_data_direction dir)
773{
774 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
775 SYNC_FOR_CPU);
776}
777EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
778
779void
780swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
781 unsigned long offset, size_t size,
782 enum dma_data_direction dir)
783{
784 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
785 SYNC_FOR_DEVICE);
786}
787EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
788
789/*
790 * Map a set of buffers described by scatterlist in streaming mode for DMA. 784 * Map a set of buffers described by scatterlist in streaming mode for DMA.
791 * This is the scatter-gather version of the above swiotlb_map_page 785 * This is the scatter-gather version of the above swiotlb_map_page
792 * interface. Here the scatter gather list elements are each tagged with the 786 * interface. Here the scatter gather list elements are each tagged with the
@@ -839,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
839 833
840int 834int
841swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 835swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
842 int dir) 836 enum dma_data_direction dir)
843{ 837{
844 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); 838 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
845} 839}
@@ -866,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
866 860
867void 861void
868swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 862swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
869 int dir) 863 enum dma_data_direction dir)
870{ 864{
871 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); 865 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
872} 866}
@@ -881,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
881 */ 875 */
882static void 876static void
883swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, 877swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
884 int nelems, int dir, int target) 878 int nelems, enum dma_data_direction dir,
879 enum dma_sync_target target)
885{ 880{
886 struct scatterlist *sg; 881 struct scatterlist *sg;
887 int i; 882 int i;
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 9fbcb44c554f..d608331b3e47 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -103,6 +103,7 @@
103#include <linux/rcupdate.h> 103#include <linux/rcupdate.h>
104#include <linux/err.h> 104#include <linux/err.h>
105#include <linux/textsearch.h> 105#include <linux/textsearch.h>
106#include <linux/slab.h>
106 107
107static LIST_HEAD(ts_ops); 108static LIST_HEAD(ts_ops);
108static DEFINE_SPINLOCK(ts_mod_lock); 109static DEFINE_SPINLOCK(ts_mod_lock);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644
index 000000000000..e3a1050e6820
--- /dev/null
+++ b/lib/timerqueue.c
@@ -0,0 +1,107 @@
1/*
2 * Generic Timer-queue
3 *
4 * Manages a simple queue of timers, ordered by expiration time.
5 * Uses rbtrees for quick list adds and expiration.
6 *
7 * NOTE: All of the following functions need to be serialized
8 * to avoid races. No locking is done by this libary code.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/timerqueue.h>
26#include <linux/rbtree.h>
27#include <linux/module.h>
28
29/**
30 * timerqueue_add - Adds timer to timerqueue.
31 *
32 * @head: head of timerqueue
33 * @node: timer node to be added
34 *
35 * Adds the timer node to the timerqueue, sorted by the
36 * node's expires value.
37 */
38void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
39{
40 struct rb_node **p = &head->head.rb_node;
41 struct rb_node *parent = NULL;
42 struct timerqueue_node *ptr;
43
44 /* Make sure we don't add nodes that are already added */
45 WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
46
47 while (*p) {
48 parent = *p;
49 ptr = rb_entry(parent, struct timerqueue_node, node);
50 if (node->expires.tv64 < ptr->expires.tv64)
51 p = &(*p)->rb_left;
52 else
53 p = &(*p)->rb_right;
54 }
55 rb_link_node(&node->node, parent, p);
56 rb_insert_color(&node->node, &head->head);
57
58 if (!head->next || node->expires.tv64 < head->next->expires.tv64)
59 head->next = node;
60}
61EXPORT_SYMBOL_GPL(timerqueue_add);
62
63/**
64 * timerqueue_del - Removes a timer from the timerqueue.
65 *
66 * @head: head of timerqueue
67 * @node: timer node to be removed
68 *
69 * Removes the timer node from the timerqueue.
70 */
71void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
72{
73 WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
74
75 /* update next pointer */
76 if (head->next == node) {
77 struct rb_node *rbn = rb_next(&node->node);
78
79 head->next = rbn ?
80 rb_entry(rbn, struct timerqueue_node, node) : NULL;
81 }
82 rb_erase(&node->node, &head->head);
83 RB_CLEAR_NODE(&node->node);
84}
85EXPORT_SYMBOL_GPL(timerqueue_del);
86
87/**
88 * timerqueue_iterate_next - Returns the timer after the provided timer
89 *
90 * @node: Pointer to a timer.
91 *
92 * Provides the timer that is after the given node. This is used, when
93 * necessary, to iterate through the list of timers in a timer list
94 * without modifying the list.
95 */
96struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
97{
98 struct rb_node *next;
99
100 if (!node)
101 return NULL;
102 next = rb_next(&node->node);
103 if (!next)
104 return NULL;
105 return container_of(next, struct timerqueue_node, node);
106}
107EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 000000000000..8fadd7cef46c
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,53 @@
1/*
2 * Unified UUID/GUID definition
3 *
4 * Copyright (C) 2009, Intel Corp.
5 * Huang Ying <ying.huang@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation;
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/uuid.h>
24#include <linux/random.h>
25
26static void __uuid_gen_common(__u8 b[16])
27{
28 int i;
29 u32 r;
30
31 for (i = 0; i < 4; i++) {
32 r = random32();
33 memcpy(b + i * 4, &r, 4);
34 }
35 /* reversion 0b10 */
36 b[8] = (b[8] & 0x3F) | 0x80;
37}
38
39void uuid_le_gen(uuid_le *lu)
40{
41 __uuid_gen_common(lu->b);
42 /* version 4 : random generation */
43 lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
44}
45EXPORT_SYMBOL_GPL(uuid_le_gen);
46
47void uuid_be_gen(uuid_be *bu)
48{
49 __uuid_gen_common(bu->b);
50 /* version 4 : random generation */
51 bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
52}
53EXPORT_SYMBOL_GPL(uuid_be_gen);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3b8aeec4e327..d3023df8477f 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -118,6 +118,7 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
118 118
119 return simple_strtoull(cp, endp, base); 119 return simple_strtoull(cp, endp, base);
120} 120}
121EXPORT_SYMBOL(simple_strtoll);
121 122
122/** 123/**
123 * strict_strtoul - convert a string to an unsigned long strictly 124 * strict_strtoul - convert a string to an unsigned long strictly
@@ -145,19 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
145{ 146{
146 char *tail; 147 char *tail;
147 unsigned long val; 148 unsigned long val;
148 size_t len;
149 149
150 *res = 0; 150 *res = 0;
151 len = strlen(cp); 151 if (!*cp)
152 if (len == 0)
153 return -EINVAL; 152 return -EINVAL;
154 153
155 val = simple_strtoul(cp, &tail, base); 154 val = simple_strtoul(cp, &tail, base);
156 if (tail == cp) 155 if (tail == cp)
157 return -EINVAL; 156 return -EINVAL;
158 157
159 if ((*tail == '\0') || 158 if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
160 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
161 *res = val; 159 *res = val;
162 return 0; 160 return 0;
163 } 161 }
@@ -219,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
219{ 217{
220 char *tail; 218 char *tail;
221 unsigned long long val; 219 unsigned long long val;
222 size_t len;
223 220
224 *res = 0; 221 *res = 0;
225 len = strlen(cp); 222 if (!*cp)
226 if (len == 0)
227 return -EINVAL; 223 return -EINVAL;
228 224
229 val = simple_strtoull(cp, &tail, base); 225 val = simple_strtoull(cp, &tail, base);
230 if (tail == cp) 226 if (tail == cp)
231 return -EINVAL; 227 return -EINVAL;
232 if ((*tail == '\0') || 228 if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
233 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
234 *res = val; 229 *res = val;
235 return 0; 230 return 0;
236 } 231 }
@@ -266,7 +261,8 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res)
266} 261}
267EXPORT_SYMBOL(strict_strtoll); 262EXPORT_SYMBOL(strict_strtoll);
268 263
269static int skip_atoi(const char **s) 264static noinline_for_stack
265int skip_atoi(const char **s)
270{ 266{
271 int i = 0; 267 int i = 0;
272 268
@@ -286,7 +282,8 @@ static int skip_atoi(const char **s)
286/* Formats correctly any integer in [0,99999]. 282/* Formats correctly any integer in [0,99999].
287 * Outputs from one to five digits depending on input. 283 * Outputs from one to five digits depending on input.
288 * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ 284 * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
289static char *put_dec_trunc(char *buf, unsigned q) 285static noinline_for_stack
286char *put_dec_trunc(char *buf, unsigned q)
290{ 287{
291 unsigned d3, d2, d1, d0; 288 unsigned d3, d2, d1, d0;
292 d1 = (q>>4) & 0xf; 289 d1 = (q>>4) & 0xf;
@@ -323,7 +320,8 @@ static char *put_dec_trunc(char *buf, unsigned q)
323 return buf; 320 return buf;
324} 321}
325/* Same with if's removed. Always emits five digits */ 322/* Same with if's removed. Always emits five digits */
326static char *put_dec_full(char *buf, unsigned q) 323static noinline_for_stack
324char *put_dec_full(char *buf, unsigned q)
327{ 325{
328 /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ 326 /* BTW, if q is in [0,9999], 8-bit ints will be enough, */
329 /* but anyway, gcc produces better code with full-sized ints */ 327 /* but anyway, gcc produces better code with full-sized ints */
@@ -365,7 +363,8 @@ static char *put_dec_full(char *buf, unsigned q)
365 return buf; 363 return buf;
366} 364}
367/* No inlining helps gcc to use registers better */ 365/* No inlining helps gcc to use registers better */
368static noinline char *put_dec(char *buf, unsigned long long num) 366static noinline_for_stack
367char *put_dec(char *buf, unsigned long long num)
369{ 368{
370 while (1) { 369 while (1) {
371 unsigned rem; 370 unsigned rem;
@@ -381,8 +380,8 @@ static noinline char *put_dec(char *buf, unsigned long long num)
381#define PLUS 4 /* show plus */ 380#define PLUS 4 /* show plus */
382#define SPACE 8 /* space if plus */ 381#define SPACE 8 /* space if plus */
383#define LEFT 16 /* left justified */ 382#define LEFT 16 /* left justified */
384#define SMALL 32 /* Must be 32 == 0x20 */ 383#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */
385#define SPECIAL 64 /* 0x */ 384#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */
386 385
387enum format_type { 386enum format_type {
388 FORMAT_TYPE_NONE, /* Just a string part */ 387 FORMAT_TYPE_NONE, /* Just a string part */
@@ -408,16 +407,17 @@ enum format_type {
408}; 407};
409 408
410struct printf_spec { 409struct printf_spec {
411 enum format_type type; 410 u8 type; /* format_type enum */
412 int flags; /* flags to number() */ 411 u8 flags; /* flags to number() */
413 int field_width; /* width of output field */ 412 u8 base; /* number base, 8, 10 or 16 only */
414 int base; 413 u8 qualifier; /* number qualifier, one of 'hHlLtzZ' */
415 int precision; /* # of digits/chars */ 414 s16 field_width; /* width of output field */
416 int qualifier; 415 s16 precision; /* # of digits/chars */
417}; 416};
418 417
419static char *number(char *buf, char *end, unsigned long long num, 418static noinline_for_stack
420 struct printf_spec spec) 419char *number(char *buf, char *end, unsigned long long num,
420 struct printf_spec spec)
421{ 421{
422 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ 422 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
423 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ 423 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -536,7 +536,8 @@ static char *number(char *buf, char *end, unsigned long long num,
536 return buf; 536 return buf;
537} 537}
538 538
539static char *string(char *buf, char *end, const char *s, struct printf_spec spec) 539static noinline_for_stack
540char *string(char *buf, char *end, const char *s, struct printf_spec spec)
540{ 541{
541 int len, i; 542 int len, i;
542 543
@@ -566,8 +567,9 @@ static char *string(char *buf, char *end, const char *s, struct printf_spec spec
566 return buf; 567 return buf;
567} 568}
568 569
569static char *symbol_string(char *buf, char *end, void *ptr, 570static noinline_for_stack
570 struct printf_spec spec, char ext) 571char *symbol_string(char *buf, char *end, void *ptr,
572 struct printf_spec spec, char ext)
571{ 573{
572 unsigned long value = (unsigned long) ptr; 574 unsigned long value = (unsigned long) ptr;
573#ifdef CONFIG_KALLSYMS 575#ifdef CONFIG_KALLSYMS
@@ -587,8 +589,9 @@ static char *symbol_string(char *buf, char *end, void *ptr,
587#endif 589#endif
588} 590}
589 591
590static char *resource_string(char *buf, char *end, struct resource *res, 592static noinline_for_stack
591 struct printf_spec spec, const char *fmt) 593char *resource_string(char *buf, char *end, struct resource *res,
594 struct printf_spec spec, const char *fmt)
592{ 595{
593#ifndef IO_RSRC_PRINTK_SIZE 596#ifndef IO_RSRC_PRINTK_SIZE
594#define IO_RSRC_PRINTK_SIZE 6 597#define IO_RSRC_PRINTK_SIZE 6
@@ -597,22 +600,35 @@ static char *resource_string(char *buf, char *end, struct resource *res,
597#ifndef MEM_RSRC_PRINTK_SIZE 600#ifndef MEM_RSRC_PRINTK_SIZE
598#define MEM_RSRC_PRINTK_SIZE 10 601#define MEM_RSRC_PRINTK_SIZE 10
599#endif 602#endif
600 struct printf_spec hex_spec = { 603 static const struct printf_spec io_spec = {
601 .base = 16, 604 .base = 16,
605 .field_width = IO_RSRC_PRINTK_SIZE,
602 .precision = -1, 606 .precision = -1,
603 .flags = SPECIAL | SMALL | ZEROPAD, 607 .flags = SPECIAL | SMALL | ZEROPAD,
604 }; 608 };
605 struct printf_spec dec_spec = { 609 static const struct printf_spec mem_spec = {
610 .base = 16,
611 .field_width = MEM_RSRC_PRINTK_SIZE,
612 .precision = -1,
613 .flags = SPECIAL | SMALL | ZEROPAD,
614 };
615 static const struct printf_spec bus_spec = {
616 .base = 16,
617 .field_width = 2,
618 .precision = -1,
619 .flags = SMALL | ZEROPAD,
620 };
621 static const struct printf_spec dec_spec = {
606 .base = 10, 622 .base = 10,
607 .precision = -1, 623 .precision = -1,
608 .flags = 0, 624 .flags = 0,
609 }; 625 };
610 struct printf_spec str_spec = { 626 static const struct printf_spec str_spec = {
611 .field_width = -1, 627 .field_width = -1,
612 .precision = 10, 628 .precision = 10,
613 .flags = LEFT, 629 .flags = LEFT,
614 }; 630 };
615 struct printf_spec flag_spec = { 631 static const struct printf_spec flag_spec = {
616 .base = 16, 632 .base = 16,
617 .precision = -1, 633 .precision = -1,
618 .flags = SPECIAL | SMALL, 634 .flags = SPECIAL | SMALL,
@@ -622,47 +638,48 @@ static char *resource_string(char *buf, char *end, struct resource *res,
622 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */ 638 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
623#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4) 639#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4)
624#define FLAG_BUF_SIZE (2 * sizeof(res->flags)) 640#define FLAG_BUF_SIZE (2 * sizeof(res->flags))
625#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref disabled]") 641#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]")
626#define RAW_BUF_SIZE sizeof("[mem - flags 0x]") 642#define RAW_BUF_SIZE sizeof("[mem - flags 0x]")
627 char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, 643 char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
628 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; 644 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
629 645
630 char *p = sym, *pend = sym + sizeof(sym); 646 char *p = sym, *pend = sym + sizeof(sym);
631 int size = -1, addr = 0;
632 int decode = (fmt[0] == 'R') ? 1 : 0; 647 int decode = (fmt[0] == 'R') ? 1 : 0;
633 648 const struct printf_spec *specp;
634 if (res->flags & IORESOURCE_IO) {
635 size = IO_RSRC_PRINTK_SIZE;
636 addr = 1;
637 } else if (res->flags & IORESOURCE_MEM) {
638 size = MEM_RSRC_PRINTK_SIZE;
639 addr = 1;
640 }
641 649
642 *p++ = '['; 650 *p++ = '[';
643 if (res->flags & IORESOURCE_IO) 651 if (res->flags & IORESOURCE_IO) {
644 p = string(p, pend, "io ", str_spec); 652 p = string(p, pend, "io ", str_spec);
645 else if (res->flags & IORESOURCE_MEM) 653 specp = &io_spec;
654 } else if (res->flags & IORESOURCE_MEM) {
646 p = string(p, pend, "mem ", str_spec); 655 p = string(p, pend, "mem ", str_spec);
647 else if (res->flags & IORESOURCE_IRQ) 656 specp = &mem_spec;
657 } else if (res->flags & IORESOURCE_IRQ) {
648 p = string(p, pend, "irq ", str_spec); 658 p = string(p, pend, "irq ", str_spec);
649 else if (res->flags & IORESOURCE_DMA) 659 specp = &dec_spec;
660 } else if (res->flags & IORESOURCE_DMA) {
650 p = string(p, pend, "dma ", str_spec); 661 p = string(p, pend, "dma ", str_spec);
651 else { 662 specp = &dec_spec;
663 } else if (res->flags & IORESOURCE_BUS) {
664 p = string(p, pend, "bus ", str_spec);
665 specp = &bus_spec;
666 } else {
652 p = string(p, pend, "??? ", str_spec); 667 p = string(p, pend, "??? ", str_spec);
668 specp = &mem_spec;
653 decode = 0; 669 decode = 0;
654 } 670 }
655 hex_spec.field_width = size; 671 p = number(p, pend, res->start, *specp);
656 p = number(p, pend, res->start, addr ? hex_spec : dec_spec);
657 if (res->start != res->end) { 672 if (res->start != res->end) {
658 *p++ = '-'; 673 *p++ = '-';
659 p = number(p, pend, res->end, addr ? hex_spec : dec_spec); 674 p = number(p, pend, res->end, *specp);
660 } 675 }
661 if (decode) { 676 if (decode) {
662 if (res->flags & IORESOURCE_MEM_64) 677 if (res->flags & IORESOURCE_MEM_64)
663 p = string(p, pend, " 64bit", str_spec); 678 p = string(p, pend, " 64bit", str_spec);
664 if (res->flags & IORESOURCE_PREFETCH) 679 if (res->flags & IORESOURCE_PREFETCH)
665 p = string(p, pend, " pref", str_spec); 680 p = string(p, pend, " pref", str_spec);
681 if (res->flags & IORESOURCE_WINDOW)
682 p = string(p, pend, " window", str_spec);
666 if (res->flags & IORESOURCE_DISABLED) 683 if (res->flags & IORESOURCE_DISABLED)
667 p = string(p, pend, " disabled", str_spec); 684 p = string(p, pend, " disabled", str_spec);
668 } else { 685 } else {
@@ -675,30 +692,63 @@ static char *resource_string(char *buf, char *end, struct resource *res,
675 return string(buf, end, sym, spec); 692 return string(buf, end, sym, spec);
676} 693}
677 694
678static char *mac_address_string(char *buf, char *end, u8 *addr, 695static noinline_for_stack
679 struct printf_spec spec, const char *fmt) 696char *mac_address_string(char *buf, char *end, u8 *addr,
697 struct printf_spec spec, const char *fmt)
680{ 698{
681 char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")]; 699 char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
682 char *p = mac_addr; 700 char *p = mac_addr;
683 int i; 701 int i;
702 char separator;
703
704 if (fmt[1] == 'F') { /* FDDI canonical format */
705 separator = '-';
706 } else {
707 separator = ':';
708 }
684 709
685 for (i = 0; i < 6; i++) { 710 for (i = 0; i < 6; i++) {
686 p = pack_hex_byte(p, addr[i]); 711 p = pack_hex_byte(p, addr[i]);
687 if (fmt[0] == 'M' && i != 5) 712 if (fmt[0] == 'M' && i != 5)
688 *p++ = ':'; 713 *p++ = separator;
689 } 714 }
690 *p = '\0'; 715 *p = '\0';
691 716
692 return string(buf, end, mac_addr, spec); 717 return string(buf, end, mac_addr, spec);
693} 718}
694 719
695static char *ip4_string(char *p, const u8 *addr, bool leading_zeros) 720static noinline_for_stack
721char *ip4_string(char *p, const u8 *addr, const char *fmt)
696{ 722{
697 int i; 723 int i;
698 724 bool leading_zeros = (fmt[0] == 'i');
725 int index;
726 int step;
727
728 switch (fmt[2]) {
729 case 'h':
730#ifdef __BIG_ENDIAN
731 index = 0;
732 step = 1;
733#else
734 index = 3;
735 step = -1;
736#endif
737 break;
738 case 'l':
739 index = 3;
740 step = -1;
741 break;
742 case 'n':
743 case 'b':
744 default:
745 index = 0;
746 step = 1;
747 break;
748 }
699 for (i = 0; i < 4; i++) { 749 for (i = 0; i < 4; i++) {
700 char temp[3]; /* hold each IP quad in reverse order */ 750 char temp[3]; /* hold each IP quad in reverse order */
701 int digits = put_dec_trunc(temp, addr[i]) - temp; 751 int digits = put_dec_trunc(temp, addr[index]) - temp;
702 if (leading_zeros) { 752 if (leading_zeros) {
703 if (digits < 3) 753 if (digits < 3)
704 *p++ = '0'; 754 *p++ = '0';
@@ -710,13 +760,15 @@ static char *ip4_string(char *p, const u8 *addr, bool leading_zeros)
710 *p++ = temp[digits]; 760 *p++ = temp[digits];
711 if (i < 3) 761 if (i < 3)
712 *p++ = '.'; 762 *p++ = '.';
763 index += step;
713 } 764 }
714 *p = '\0'; 765 *p = '\0';
715 766
716 return p; 767 return p;
717} 768}
718 769
719static char *ip6_compressed_string(char *p, const char *addr) 770static noinline_for_stack
771char *ip6_compressed_string(char *p, const char *addr)
720{ 772{
721 int i, j, range; 773 int i, j, range;
722 unsigned char zerolength[8]; 774 unsigned char zerolength[8];
@@ -789,14 +841,15 @@ static char *ip6_compressed_string(char *p, const char *addr)
789 if (useIPv4) { 841 if (useIPv4) {
790 if (needcolon) 842 if (needcolon)
791 *p++ = ':'; 843 *p++ = ':';
792 p = ip4_string(p, &in6.s6_addr[12], false); 844 p = ip4_string(p, &in6.s6_addr[12], "I4");
793 } 845 }
794 *p = '\0'; 846 *p = '\0';
795 847
796 return p; 848 return p;
797} 849}
798 850
799static char *ip6_string(char *p, const char *addr, const char *fmt) 851static noinline_for_stack
852char *ip6_string(char *p, const char *addr, const char *fmt)
800{ 853{
801 int i; 854 int i;
802 855
@@ -811,8 +864,9 @@ static char *ip6_string(char *p, const char *addr, const char *fmt)
811 return p; 864 return p;
812} 865}
813 866
814static char *ip6_addr_string(char *buf, char *end, const u8 *addr, 867static noinline_for_stack
815 struct printf_spec spec, const char *fmt) 868char *ip6_addr_string(char *buf, char *end, const u8 *addr,
869 struct printf_spec spec, const char *fmt)
816{ 870{
817 char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")]; 871 char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
818 872
@@ -824,18 +878,20 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr,
824 return string(buf, end, ip6_addr, spec); 878 return string(buf, end, ip6_addr, spec);
825} 879}
826 880
827static char *ip4_addr_string(char *buf, char *end, const u8 *addr, 881static noinline_for_stack
828 struct printf_spec spec, const char *fmt) 882char *ip4_addr_string(char *buf, char *end, const u8 *addr,
883 struct printf_spec spec, const char *fmt)
829{ 884{
830 char ip4_addr[sizeof("255.255.255.255")]; 885 char ip4_addr[sizeof("255.255.255.255")];
831 886
832 ip4_string(ip4_addr, addr, fmt[0] == 'i'); 887 ip4_string(ip4_addr, addr, fmt);
833 888
834 return string(buf, end, ip4_addr, spec); 889 return string(buf, end, ip4_addr, spec);
835} 890}
836 891
837static char *uuid_string(char *buf, char *end, const u8 *addr, 892static noinline_for_stack
838 struct printf_spec spec, const char *fmt) 893char *uuid_string(char *buf, char *end, const u8 *addr,
894 struct printf_spec spec, const char *fmt)
839{ 895{
840 char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; 896 char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
841 char *p = uuid; 897 char *p = uuid;
@@ -880,6 +936,8 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
880 return string(buf, end, uuid, spec); 936 return string(buf, end, uuid, spec);
881} 937}
882 938
939int kptr_restrict = 1;
940
883/* 941/*
884 * Show a '%p' thing. A kernel extension is that the '%p' is followed 942 * Show a '%p' thing. A kernel extension is that the '%p' is followed
885 * by an extra set of alphanumeric characters that are extended format 943 * by an extra set of alphanumeric characters that are extended format
@@ -896,12 +954,15 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
896 * - 'M' For a 6-byte MAC address, it prints the address in the 954 * - 'M' For a 6-byte MAC address, it prints the address in the
897 * usual colon-separated hex notation 955 * usual colon-separated hex notation
898 * - 'm' For a 6-byte MAC address, it prints the hex address without colons 956 * - 'm' For a 6-byte MAC address, it prints the hex address without colons
957 * - 'MF' For a 6-byte MAC FDDI address, it prints the address
958 * with a dash-separated hex notation
899 * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way 959 * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
900 * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4) 960 * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
901 * IPv6 uses colon separated network-order 16 bit hex with leading 0's 961 * IPv6 uses colon separated network-order 16 bit hex with leading 0's
902 * - 'i' [46] for 'raw' IPv4/IPv6 addresses 962 * - 'i' [46] for 'raw' IPv4/IPv6 addresses
903 * IPv6 omits the colons (01020304...0f) 963 * IPv6 omits the colons (01020304...0f)
904 * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006) 964 * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
965 * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order
905 * - 'I6c' for IPv6 addresses printed as specified by 966 * - 'I6c' for IPv6 addresses printed as specified by
906 * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00 967 * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
907 * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form 968 * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
@@ -915,16 +976,30 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
915 * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] 976 * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
916 * little endian output byte order is: 977 * little endian output byte order is:
917 * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] 978 * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
979 * - 'V' For a struct va_format which contains a format string * and va_list *,
980 * call vsnprintf(->format, *->va_list).
981 * Implements a "recursive vsnprintf".
982 * Do not use this feature without some mechanism to verify the
983 * correctness of the format string and va_list arguments.
984 * - 'K' For a kernel pointer that should be hidden from unprivileged users
918 * 985 *
919 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 986 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
920 * function pointers are really function descriptors, which contain a 987 * function pointers are really function descriptors, which contain a
921 * pointer to the real address. 988 * pointer to the real address.
922 */ 989 */
923static char *pointer(const char *fmt, char *buf, char *end, void *ptr, 990static noinline_for_stack
924 struct printf_spec spec) 991char *pointer(const char *fmt, char *buf, char *end, void *ptr,
992 struct printf_spec spec)
925{ 993{
926 if (!ptr) 994 if (!ptr) {
995 /*
996 * Print (null) with the same width as a pointer so it makes
997 * tabular output look nice.
998 */
999 if (spec.field_width == -1)
1000 spec.field_width = 2 * sizeof(void *);
927 return string(buf, end, "(null)", spec); 1001 return string(buf, end, "(null)", spec);
1002 }
928 1003
929 switch (*fmt) { 1004 switch (*fmt) {
930 case 'F': 1005 case 'F':
@@ -939,6 +1014,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
939 return resource_string(buf, end, ptr, spec, fmt); 1014 return resource_string(buf, end, ptr, spec, fmt);
940 case 'M': /* Colon separated: 00:01:02:03:04:05 */ 1015 case 'M': /* Colon separated: 00:01:02:03:04:05 */
941 case 'm': /* Contiguous: 000102030405 */ 1016 case 'm': /* Contiguous: 000102030405 */
1017 /* [mM]F (FDDI, bit reversed) */
942 return mac_address_string(buf, end, ptr, spec, fmt); 1018 return mac_address_string(buf, end, ptr, spec, fmt);
943 case 'I': /* Formatted IP supported 1019 case 'I': /* Formatted IP supported
944 * 4: 1.2.3.4 1020 * 4: 1.2.3.4
@@ -958,10 +1034,33 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
958 break; 1034 break;
959 case 'U': 1035 case 'U':
960 return uuid_string(buf, end, ptr, spec, fmt); 1036 return uuid_string(buf, end, ptr, spec, fmt);
1037 case 'V':
1038 return buf + vsnprintf(buf, end - buf,
1039 ((struct va_format *)ptr)->fmt,
1040 *(((struct va_format *)ptr)->va));
1041 case 'K':
1042 /*
1043 * %pK cannot be used in IRQ context because its test
1044 * for CAP_SYSLOG would be meaningless.
1045 */
1046 if (in_irq() || in_serving_softirq() || in_nmi()) {
1047 if (spec.field_width == -1)
1048 spec.field_width = 2 * sizeof(void *);
1049 return string(buf, end, "pK-error", spec);
1050 } else if ((kptr_restrict == 0) ||
1051 (kptr_restrict == 1 &&
1052 has_capability_noaudit(current, CAP_SYSLOG)))
1053 break;
1054
1055 if (spec.field_width == -1) {
1056 spec.field_width = 2 * sizeof(void *);
1057 spec.flags |= ZEROPAD;
1058 }
1059 return number(buf, end, 0, spec);
961 } 1060 }
962 spec.flags |= SMALL; 1061 spec.flags |= SMALL;
963 if (spec.field_width == -1) { 1062 if (spec.field_width == -1) {
964 spec.field_width = 2*sizeof(void *); 1063 spec.field_width = 2 * sizeof(void *);
965 spec.flags |= ZEROPAD; 1064 spec.flags |= ZEROPAD;
966 } 1065 }
967 spec.base = 16; 1066 spec.base = 16;
@@ -989,7 +1088,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
989 * @precision: precision of a number 1088 * @precision: precision of a number
990 * @qualifier: qualifier of a number (long, size_t, ...) 1089 * @qualifier: qualifier of a number (long, size_t, ...)
991 */ 1090 */
992static int format_decode(const char *fmt, struct printf_spec *spec) 1091static noinline_for_stack
1092int format_decode(const char *fmt, struct printf_spec *spec)
993{ 1093{
994 const char *start = fmt; 1094 const char *start = fmt;
995 1095
@@ -1297,7 +1397,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1297 break; 1397 break;
1298 1398
1299 case FORMAT_TYPE_NRCHARS: { 1399 case FORMAT_TYPE_NRCHARS: {
1300 int qualifier = spec.qualifier; 1400 u8 qualifier = spec.qualifier;
1301 1401
1302 if (qualifier == 'l') { 1402 if (qualifier == 'l') {
1303 long *ip = va_arg(args, long *); 1403 long *ip = va_arg(args, long *);
@@ -1373,7 +1473,7 @@ EXPORT_SYMBOL(vsnprintf);
1373 * @args: Arguments for the format string 1473 * @args: Arguments for the format string
1374 * 1474 *
1375 * The return value is the number of characters which have been written into 1475 * The return value is the number of characters which have been written into
1376 * the @buf not including the trailing '\0'. If @size is <= 0 the function 1476 * the @buf not including the trailing '\0'. If @size is == 0 the function
1377 * returns 0. 1477 * returns 0.
1378 * 1478 *
1379 * Call this function if you are already dealing with a va_list. 1479 * Call this function if you are already dealing with a va_list.
@@ -1387,7 +1487,11 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
1387 1487
1388 i = vsnprintf(buf, size, fmt, args); 1488 i = vsnprintf(buf, size, fmt, args);
1389 1489
1390 return (i >= size) ? (size - 1) : i; 1490 if (likely(i < size))
1491 return i;
1492 if (size != 0)
1493 return size - 1;
1494 return 0;
1391} 1495}
1392EXPORT_SYMBOL(vscnprintf); 1496EXPORT_SYMBOL(vscnprintf);
1393 1497
@@ -1426,7 +1530,7 @@ EXPORT_SYMBOL(snprintf);
1426 * @...: Arguments for the format string 1530 * @...: Arguments for the format string
1427 * 1531 *
1428 * The return value is the number of characters written into @buf not including 1532 * The return value is the number of characters written into @buf not including
1429 * the trailing '\0'. If @size is <= 0 the function returns 0. 1533 * the trailing '\0'. If @size is == 0 the function returns 0.
1430 */ 1534 */
1431 1535
1432int scnprintf(char *buf, size_t size, const char *fmt, ...) 1536int scnprintf(char *buf, size_t size, const char *fmt, ...)
@@ -1435,10 +1539,10 @@ int scnprintf(char *buf, size_t size, const char *fmt, ...)
1435 int i; 1539 int i;
1436 1540
1437 va_start(args, fmt); 1541 va_start(args, fmt);
1438 i = vsnprintf(buf, size, fmt, args); 1542 i = vscnprintf(buf, size, fmt, args);
1439 va_end(args); 1543 va_end(args);
1440 1544
1441 return (i >= size) ? (size - 1) : i; 1545 return i;
1442} 1546}
1443EXPORT_SYMBOL(scnprintf); 1547EXPORT_SYMBOL(scnprintf);
1444 1548
@@ -1583,7 +1687,7 @@ do { \
1583 1687
1584 case FORMAT_TYPE_NRCHARS: { 1688 case FORMAT_TYPE_NRCHARS: {
1585 /* skip %n 's argument */ 1689 /* skip %n 's argument */
1586 int qualifier = spec.qualifier; 1690 u8 qualifier = spec.qualifier;
1587 void *skip_arg; 1691 void *skip_arg;
1588 if (qualifier == 'l') 1692 if (qualifier == 'l')
1589 skip_arg = va_arg(args, long *); 1693 skip_arg = va_arg(args, long *);
@@ -1849,7 +1953,9 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
1849 char *next; 1953 char *next;
1850 char digit; 1954 char digit;
1851 int num = 0; 1955 int num = 0;
1852 int qualifier, base, field_width; 1956 u8 qualifier;
1957 u8 base;
1958 s16 field_width;
1853 bool is_sign; 1959 bool is_sign;
1854 1960
1855 while (*fmt && *str) { 1961 while (*fmt && *str) {
@@ -1927,7 +2033,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
1927 { 2033 {
1928 char *s = (char *)va_arg(args, char *); 2034 char *s = (char *)va_arg(args, char *);
1929 if (field_width == -1) 2035 if (field_width == -1)
1930 field_width = INT_MAX; 2036 field_width = SHRT_MAX;
1931 /* first, skip leading white space in buffer */ 2037 /* first, skip leading white space in buffer */
1932 str = skip_spaces(str); 2038 str = skip_spaces(str);
1933 2039
diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig
new file mode 100644
index 000000000000..e3b6e18fdac5
--- /dev/null
+++ b/lib/xz/Kconfig
@@ -0,0 +1,59 @@
1config XZ_DEC
2 tristate "XZ decompression support"
3 select CRC32
4 help
5 LZMA2 compression algorithm and BCJ filters are supported using
6 the .xz file format as the container. For integrity checking,
7 CRC32 is supported. See Documentation/xz.txt for more information.
8
9config XZ_DEC_X86
10 bool "x86 BCJ filter decoder" if EMBEDDED
11 default y
12 depends on XZ_DEC
13 select XZ_DEC_BCJ
14
15config XZ_DEC_POWERPC
16 bool "PowerPC BCJ filter decoder" if EMBEDDED
17 default y
18 depends on XZ_DEC
19 select XZ_DEC_BCJ
20
21config XZ_DEC_IA64
22 bool "IA-64 BCJ filter decoder" if EMBEDDED
23 default y
24 depends on XZ_DEC
25 select XZ_DEC_BCJ
26
27config XZ_DEC_ARM
28 bool "ARM BCJ filter decoder" if EMBEDDED
29 default y
30 depends on XZ_DEC
31 select XZ_DEC_BCJ
32
33config XZ_DEC_ARMTHUMB
34 bool "ARM-Thumb BCJ filter decoder" if EMBEDDED
35 default y
36 depends on XZ_DEC
37 select XZ_DEC_BCJ
38
39config XZ_DEC_SPARC
40 bool "SPARC BCJ filter decoder" if EMBEDDED
41 default y
42 depends on XZ_DEC
43 select XZ_DEC_BCJ
44
45config XZ_DEC_BCJ
46 bool
47 default n
48
49config XZ_DEC_TEST
50 tristate "XZ decompressor tester"
51 default n
52 depends on XZ_DEC
53 help
54 This allows passing .xz files to the in-kernel XZ decoder via
55 a character special file. It calculates CRC32 of the decompressed
56 data and writes diagnostics to the system log.
57
58 Unless you are developing the XZ decoder, you don't need this
59 and should say N.
diff --git a/lib/xz/Makefile b/lib/xz/Makefile
new file mode 100644
index 000000000000..a7fa7693f0f3
--- /dev/null
+++ b/lib/xz/Makefile
@@ -0,0 +1,5 @@
1obj-$(CONFIG_XZ_DEC) += xz_dec.o
2xz_dec-y := xz_dec_syms.o xz_dec_stream.o xz_dec_lzma2.o
3xz_dec-$(CONFIG_XZ_DEC_BCJ) += xz_dec_bcj.o
4
5obj-$(CONFIG_XZ_DEC_TEST) += xz_dec_test.o
diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c
new file mode 100644
index 000000000000..34532d14fd4c
--- /dev/null
+++ b/lib/xz/xz_crc32.c
@@ -0,0 +1,59 @@
1/*
2 * CRC32 using the polynomial from IEEE-802.3
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11/*
12 * This is not the fastest implementation, but it is pretty compact.
13 * The fastest versions of xz_crc32() on modern CPUs without hardware
14 * accelerated CRC instruction are 3-5 times as fast as this version,
15 * but they are bigger and use more memory for the lookup table.
16 */
17
18#include "xz_private.h"
19
20/*
21 * STATIC_RW_DATA is used in the pre-boot environment on some architectures.
22 * See <linux/decompress/mm.h> for details.
23 */
24#ifndef STATIC_RW_DATA
25# define STATIC_RW_DATA static
26#endif
27
28STATIC_RW_DATA uint32_t xz_crc32_table[256];
29
30XZ_EXTERN void xz_crc32_init(void)
31{
32 const uint32_t poly = 0xEDB88320;
33
34 uint32_t i;
35 uint32_t j;
36 uint32_t r;
37
38 for (i = 0; i < 256; ++i) {
39 r = i;
40 for (j = 0; j < 8; ++j)
41 r = (r >> 1) ^ (poly & ~((r & 1) - 1));
42
43 xz_crc32_table[i] = r;
44 }
45
46 return;
47}
48
49XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
50{
51 crc = ~crc;
52
53 while (size != 0) {
54 crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
55 --size;
56 }
57
58 return ~crc;
59}
diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c
new file mode 100644
index 000000000000..e51e2558ca9d
--- /dev/null
+++ b/lib/xz/xz_dec_bcj.c
@@ -0,0 +1,561 @@
1/*
2 * Branch/Call/Jump (BCJ) filter decoders
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12
13/*
14 * The rest of the file is inside this ifdef. It makes things a little more
15 * convenient when building without support for any BCJ filters.
16 */
17#ifdef XZ_DEC_BCJ
18
19struct xz_dec_bcj {
20 /* Type of the BCJ filter being used */
21 enum {
22 BCJ_X86 = 4, /* x86 or x86-64 */
23 BCJ_POWERPC = 5, /* Big endian only */
24 BCJ_IA64 = 6, /* Big or little endian */
25 BCJ_ARM = 7, /* Little endian only */
26 BCJ_ARMTHUMB = 8, /* Little endian only */
27 BCJ_SPARC = 9 /* Big or little endian */
28 } type;
29
30 /*
31 * Return value of the next filter in the chain. We need to preserve
32 * this information across calls, because we must not call the next
33 * filter anymore once it has returned XZ_STREAM_END.
34 */
35 enum xz_ret ret;
36
37 /* True if we are operating in single-call mode. */
38 bool single_call;
39
40 /*
41 * Absolute position relative to the beginning of the uncompressed
42 * data (in a single .xz Block). We care only about the lowest 32
43 * bits so this doesn't need to be uint64_t even with big files.
44 */
45 uint32_t pos;
46
47 /* x86 filter state */
48 uint32_t x86_prev_mask;
49
50 /* Temporary space to hold the variables from struct xz_buf */
51 uint8_t *out;
52 size_t out_pos;
53 size_t out_size;
54
55 struct {
56 /* Amount of already filtered data in the beginning of buf */
57 size_t filtered;
58
59 /* Total amount of data currently stored in buf */
60 size_t size;
61
62 /*
63 * Buffer to hold a mix of filtered and unfiltered data. This
64 * needs to be big enough to hold Alignment + 2 * Look-ahead:
65 *
66 * Type Alignment Look-ahead
67 * x86 1 4
68 * PowerPC 4 0
69 * IA-64 16 0
70 * ARM 4 0
71 * ARM-Thumb 2 2
72 * SPARC 4 0
73 */
74 uint8_t buf[16];
75 } temp;
76};
77
78#ifdef XZ_DEC_X86
79/*
80 * This is used to test the most significant byte of a memory address
81 * in an x86 instruction.
82 */
83static inline int bcj_x86_test_msbyte(uint8_t b)
84{
85 return b == 0x00 || b == 0xFF;
86}
87
88static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
89{
90 static const bool mask_to_allowed_status[8]
91 = { true, true, true, false, true, false, false, false };
92
93 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
94
95 size_t i;
96 size_t prev_pos = (size_t)-1;
97 uint32_t prev_mask = s->x86_prev_mask;
98 uint32_t src;
99 uint32_t dest;
100 uint32_t j;
101 uint8_t b;
102
103 if (size <= 4)
104 return 0;
105
106 size -= 4;
107 for (i = 0; i < size; ++i) {
108 if ((buf[i] & 0xFE) != 0xE8)
109 continue;
110
111 prev_pos = i - prev_pos;
112 if (prev_pos > 3) {
113 prev_mask = 0;
114 } else {
115 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
116 if (prev_mask != 0) {
117 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
118 if (!mask_to_allowed_status[prev_mask]
119 || bcj_x86_test_msbyte(b)) {
120 prev_pos = i;
121 prev_mask = (prev_mask << 1) | 1;
122 continue;
123 }
124 }
125 }
126
127 prev_pos = i;
128
129 if (bcj_x86_test_msbyte(buf[i + 4])) {
130 src = get_unaligned_le32(buf + i + 1);
131 while (true) {
132 dest = src - (s->pos + (uint32_t)i + 5);
133 if (prev_mask == 0)
134 break;
135
136 j = mask_to_bit_num[prev_mask] * 8;
137 b = (uint8_t)(dest >> (24 - j));
138 if (!bcj_x86_test_msbyte(b))
139 break;
140
141 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
142 }
143
144 dest &= 0x01FFFFFF;
145 dest |= (uint32_t)0 - (dest & 0x01000000);
146 put_unaligned_le32(dest, buf + i + 1);
147 i += 4;
148 } else {
149 prev_mask = (prev_mask << 1) | 1;
150 }
151 }
152
153 prev_pos = i - prev_pos;
154 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
155 return i;
156}
157#endif
158
159#ifdef XZ_DEC_POWERPC
160static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
161{
162 size_t i;
163 uint32_t instr;
164
165 for (i = 0; i + 4 <= size; i += 4) {
166 instr = get_unaligned_be32(buf + i);
167 if ((instr & 0xFC000003) == 0x48000001) {
168 instr &= 0x03FFFFFC;
169 instr -= s->pos + (uint32_t)i;
170 instr &= 0x03FFFFFC;
171 instr |= 0x48000001;
172 put_unaligned_be32(instr, buf + i);
173 }
174 }
175
176 return i;
177}
178#endif
179
180#ifdef XZ_DEC_IA64
181static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
182{
183 static const uint8_t branch_table[32] = {
184 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 0, 0, 0, 0, 0, 0, 0,
186 4, 4, 6, 6, 0, 0, 7, 7,
187 4, 4, 0, 0, 4, 4, 0, 0
188 };
189
190 /*
191 * The local variables take a little bit stack space, but it's less
192 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
193 * stack usage here without doing that for the LZMA2 decoder too.
194 */
195
196 /* Loop counters */
197 size_t i;
198 size_t j;
199
200 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
201 uint32_t slot;
202
203 /* Bitwise offset of the instruction indicated by slot */
204 uint32_t bit_pos;
205
206 /* bit_pos split into byte and bit parts */
207 uint32_t byte_pos;
208 uint32_t bit_res;
209
210 /* Address part of an instruction */
211 uint32_t addr;
212
213 /* Mask used to detect which instructions to convert */
214 uint32_t mask;
215
216 /* 41-bit instruction stored somewhere in the lowest 48 bits */
217 uint64_t instr;
218
219 /* Instruction normalized with bit_res for easier manipulation */
220 uint64_t norm;
221
222 for (i = 0; i + 16 <= size; i += 16) {
223 mask = branch_table[buf[i] & 0x1F];
224 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
225 if (((mask >> slot) & 1) == 0)
226 continue;
227
228 byte_pos = bit_pos >> 3;
229 bit_res = bit_pos & 7;
230 instr = 0;
231 for (j = 0; j < 6; ++j)
232 instr |= (uint64_t)(buf[i + j + byte_pos])
233 << (8 * j);
234
235 norm = instr >> bit_res;
236
237 if (((norm >> 37) & 0x0F) == 0x05
238 && ((norm >> 9) & 0x07) == 0) {
239 addr = (norm >> 13) & 0x0FFFFF;
240 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
241 addr <<= 4;
242 addr -= s->pos + (uint32_t)i;
243 addr >>= 4;
244
245 norm &= ~((uint64_t)0x8FFFFF << 13);
246 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
247 norm |= (uint64_t)(addr & 0x100000)
248 << (36 - 20);
249
250 instr &= (1 << bit_res) - 1;
251 instr |= norm << bit_res;
252
253 for (j = 0; j < 6; j++)
254 buf[i + j + byte_pos]
255 = (uint8_t)(instr >> (8 * j));
256 }
257 }
258 }
259
260 return i;
261}
262#endif
263
264#ifdef XZ_DEC_ARM
265static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
266{
267 size_t i;
268 uint32_t addr;
269
270 for (i = 0; i + 4 <= size; i += 4) {
271 if (buf[i + 3] == 0xEB) {
272 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
273 | ((uint32_t)buf[i + 2] << 16);
274 addr <<= 2;
275 addr -= s->pos + (uint32_t)i + 8;
276 addr >>= 2;
277 buf[i] = (uint8_t)addr;
278 buf[i + 1] = (uint8_t)(addr >> 8);
279 buf[i + 2] = (uint8_t)(addr >> 16);
280 }
281 }
282
283 return i;
284}
285#endif
286
287#ifdef XZ_DEC_ARMTHUMB
288static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
289{
290 size_t i;
291 uint32_t addr;
292
293 for (i = 0; i + 4 <= size; i += 2) {
294 if ((buf[i + 1] & 0xF8) == 0xF0
295 && (buf[i + 3] & 0xF8) == 0xF8) {
296 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
297 | ((uint32_t)buf[i] << 11)
298 | (((uint32_t)buf[i + 3] & 0x07) << 8)
299 | (uint32_t)buf[i + 2];
300 addr <<= 1;
301 addr -= s->pos + (uint32_t)i + 4;
302 addr >>= 1;
303 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
304 buf[i] = (uint8_t)(addr >> 11);
305 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
306 buf[i + 2] = (uint8_t)addr;
307 i += 2;
308 }
309 }
310
311 return i;
312}
313#endif
314
315#ifdef XZ_DEC_SPARC
316static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
317{
318 size_t i;
319 uint32_t instr;
320
321 for (i = 0; i + 4 <= size; i += 4) {
322 instr = get_unaligned_be32(buf + i);
323 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
324 instr <<= 2;
325 instr -= s->pos + (uint32_t)i;
326 instr >>= 2;
327 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
328 | 0x40000000 | (instr & 0x3FFFFF);
329 put_unaligned_be32(instr, buf + i);
330 }
331 }
332
333 return i;
334}
335#endif
336
337/*
338 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
339 * of data that got filtered.
340 *
341 * NOTE: This is implemented as a switch statement to avoid using function
342 * pointers, which could be problematic in the kernel boot code, which must
343 * avoid pointers to static data (at least on x86).
344 */
345static void bcj_apply(struct xz_dec_bcj *s,
346 uint8_t *buf, size_t *pos, size_t size)
347{
348 size_t filtered;
349
350 buf += *pos;
351 size -= *pos;
352
353 switch (s->type) {
354#ifdef XZ_DEC_X86
355 case BCJ_X86:
356 filtered = bcj_x86(s, buf, size);
357 break;
358#endif
359#ifdef XZ_DEC_POWERPC
360 case BCJ_POWERPC:
361 filtered = bcj_powerpc(s, buf, size);
362 break;
363#endif
364#ifdef XZ_DEC_IA64
365 case BCJ_IA64:
366 filtered = bcj_ia64(s, buf, size);
367 break;
368#endif
369#ifdef XZ_DEC_ARM
370 case BCJ_ARM:
371 filtered = bcj_arm(s, buf, size);
372 break;
373#endif
374#ifdef XZ_DEC_ARMTHUMB
375 case BCJ_ARMTHUMB:
376 filtered = bcj_armthumb(s, buf, size);
377 break;
378#endif
379#ifdef XZ_DEC_SPARC
380 case BCJ_SPARC:
381 filtered = bcj_sparc(s, buf, size);
382 break;
383#endif
384 default:
385 /* Never reached but silence compiler warnings. */
386 filtered = 0;
387 break;
388 }
389
390 *pos += filtered;
391 s->pos += filtered;
392}
393
394/*
395 * Flush pending filtered data from temp to the output buffer.
396 * Move the remaining mixture of possibly filtered and unfiltered
397 * data to the beginning of temp.
398 */
399static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
400{
401 size_t copy_size;
402
403 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405 b->out_pos += copy_size;
406
407 s->temp.filtered -= copy_size;
408 s->temp.size -= copy_size;
409 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
410}
411
412/*
413 * The BCJ filter functions are primitive in sense that they process the
414 * data in chunks of 1-16 bytes. To hide this issue, this function does
415 * some buffering.
416 */
417XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
418 struct xz_dec_lzma2 *lzma2,
419 struct xz_buf *b)
420{
421 size_t out_start;
422
423 /*
424 * Flush pending already filtered data to the output buffer. Return
425 * immediatelly if we couldn't flush everything, or if the next
426 * filter in the chain had already returned XZ_STREAM_END.
427 */
428 if (s->temp.filtered > 0) {
429 bcj_flush(s, b);
430 if (s->temp.filtered > 0)
431 return XZ_OK;
432
433 if (s->ret == XZ_STREAM_END)
434 return XZ_STREAM_END;
435 }
436
437 /*
438 * If we have more output space than what is currently pending in
439 * temp, copy the unfiltered data from temp to the output buffer
440 * and try to fill the output buffer by decoding more data from the
441 * next filter in the chain. Apply the BCJ filter on the new data
442 * in the output buffer. If everything cannot be filtered, copy it
443 * to temp and rewind the output buffer position accordingly.
444 */
445 if (s->temp.size < b->out_size - b->out_pos) {
446 out_start = b->out_pos;
447 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
448 b->out_pos += s->temp.size;
449
450 s->ret = xz_dec_lzma2_run(lzma2, b);
451 if (s->ret != XZ_STREAM_END
452 && (s->ret != XZ_OK || s->single_call))
453 return s->ret;
454
455 bcj_apply(s, b->out, &out_start, b->out_pos);
456
457 /*
458 * As an exception, if the next filter returned XZ_STREAM_END,
459 * we can do that too, since the last few bytes that remain
460 * unfiltered are meant to remain unfiltered.
461 */
462 if (s->ret == XZ_STREAM_END)
463 return XZ_STREAM_END;
464
465 s->temp.size = b->out_pos - out_start;
466 b->out_pos -= s->temp.size;
467 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
468 }
469
470 /*
471 * If we have unfiltered data in temp, try to fill by decoding more
472 * data from the next filter. Apply the BCJ filter on temp. Then we
473 * hopefully can fill the actual output buffer by copying filtered
474 * data from temp. A mix of filtered and unfiltered data may be left
475 * in temp; it will be taken care on the next call to this function.
476 */
477 if (s->temp.size > 0) {
478 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
479 s->out = b->out;
480 s->out_pos = b->out_pos;
481 s->out_size = b->out_size;
482 b->out = s->temp.buf;
483 b->out_pos = s->temp.size;
484 b->out_size = sizeof(s->temp.buf);
485
486 s->ret = xz_dec_lzma2_run(lzma2, b);
487
488 s->temp.size = b->out_pos;
489 b->out = s->out;
490 b->out_pos = s->out_pos;
491 b->out_size = s->out_size;
492
493 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
494 return s->ret;
495
496 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
497
498 /*
499 * If the next filter returned XZ_STREAM_END, we mark that
500 * everything is filtered, since the last unfiltered bytes
501 * of the stream are meant to be left as is.
502 */
503 if (s->ret == XZ_STREAM_END)
504 s->temp.filtered = s->temp.size;
505
506 bcj_flush(s, b);
507 if (s->temp.filtered > 0)
508 return XZ_OK;
509 }
510
511 return s->ret;
512}
513
514XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
515{
516 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
517 if (s != NULL)
518 s->single_call = single_call;
519
520 return s;
521}
522
523XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
524{
525 switch (id) {
526#ifdef XZ_DEC_X86
527 case BCJ_X86:
528#endif
529#ifdef XZ_DEC_POWERPC
530 case BCJ_POWERPC:
531#endif
532#ifdef XZ_DEC_IA64
533 case BCJ_IA64:
534#endif
535#ifdef XZ_DEC_ARM
536 case BCJ_ARM:
537#endif
538#ifdef XZ_DEC_ARMTHUMB
539 case BCJ_ARMTHUMB:
540#endif
541#ifdef XZ_DEC_SPARC
542 case BCJ_SPARC:
543#endif
544 break;
545
546 default:
547 /* Unsupported Filter ID */
548 return XZ_OPTIONS_ERROR;
549 }
550
551 s->type = id;
552 s->ret = XZ_OK;
553 s->pos = 0;
554 s->x86_prev_mask = 0;
555 s->temp.filtered = 0;
556 s->temp.size = 0;
557
558 return XZ_OK;
559}
560
561#endif
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
new file mode 100644
index 000000000000..ea5fa4fe9d67
--- /dev/null
+++ b/lib/xz/xz_dec_lzma2.c
@@ -0,0 +1,1171 @@
1/*
2 * LZMA2 decoder
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12#include "xz_lzma2.h"
13
14/*
15 * Range decoder initialization eats the first five bytes of each LZMA chunk.
16 */
17#define RC_INIT_BYTES 5
18
19/*
20 * Minimum number of usable input buffer to safely decode one LZMA symbol.
21 * The worst case is that we decode 22 bits using probabilities and 26
22 * direct bits. This may decode at maximum of 20 bytes of input. However,
23 * lzma_main() does an extra normalization before returning, thus we
24 * need to put 21 here.
25 */
26#define LZMA_IN_REQUIRED 21
27
28/*
29 * Dictionary (history buffer)
30 *
31 * These are always true:
32 * start <= pos <= full <= end
33 * pos <= limit <= end
34 *
35 * In multi-call mode, also these are true:
36 * end == size
37 * size <= size_max
38 * allocated <= size
39 *
40 * Most of these variables are size_t to support single-call mode,
41 * in which the dictionary variables address the actual output
42 * buffer directly.
43 */
44struct dictionary {
45 /* Beginning of the history buffer */
46 uint8_t *buf;
47
48 /* Old position in buf (before decoding more data) */
49 size_t start;
50
51 /* Position in buf */
52 size_t pos;
53
54 /*
55 * How full dictionary is. This is used to detect corrupt input that
56 * would read beyond the beginning of the uncompressed stream.
57 */
58 size_t full;
59
60 /* Write limit; we don't write to buf[limit] or later bytes. */
61 size_t limit;
62
63 /*
64 * End of the dictionary buffer. In multi-call mode, this is
65 * the same as the dictionary size. In single-call mode, this
66 * indicates the size of the output buffer.
67 */
68 size_t end;
69
70 /*
71 * Size of the dictionary as specified in Block Header. This is used
72 * together with "full" to detect corrupt input that would make us
73 * read beyond the beginning of the uncompressed stream.
74 */
75 uint32_t size;
76
77 /*
78 * Maximum allowed dictionary size in multi-call mode.
79 * This is ignored in single-call mode.
80 */
81 uint32_t size_max;
82
83 /*
84 * Amount of memory currently allocated for the dictionary.
85 * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
86 * size_max is always the same as the allocated size.)
87 */
88 uint32_t allocated;
89
90 /* Operation mode */
91 enum xz_mode mode;
92};
93
94/* Range decoder */
95struct rc_dec {
96 uint32_t range;
97 uint32_t code;
98
99 /*
100 * Number of initializing bytes remaining to be read
101 * by rc_read_init().
102 */
103 uint32_t init_bytes_left;
104
105 /*
106 * Buffer from which we read our input. It can be either
107 * temp.buf or the caller-provided input buffer.
108 */
109 const uint8_t *in;
110 size_t in_pos;
111 size_t in_limit;
112};
113
114/* Probabilities for a length decoder. */
115struct lzma_len_dec {
116 /* Probability of match length being at least 10 */
117 uint16_t choice;
118
119 /* Probability of match length being at least 18 */
120 uint16_t choice2;
121
122 /* Probabilities for match lengths 2-9 */
123 uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
124
125 /* Probabilities for match lengths 10-17 */
126 uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
127
128 /* Probabilities for match lengths 18-273 */
129 uint16_t high[LEN_HIGH_SYMBOLS];
130};
131
132struct lzma_dec {
133 /* Distances of latest four matches */
134 uint32_t rep0;
135 uint32_t rep1;
136 uint32_t rep2;
137 uint32_t rep3;
138
139 /* Types of the most recently seen LZMA symbols */
140 enum lzma_state state;
141
142 /*
143 * Length of a match. This is updated so that dict_repeat can
144 * be called again to finish repeating the whole match.
145 */
146 uint32_t len;
147
148 /*
149 * LZMA properties or related bit masks (number of literal
150 * context bits, a mask dervied from the number of literal
151 * position bits, and a mask dervied from the number
152 * position bits)
153 */
154 uint32_t lc;
155 uint32_t literal_pos_mask; /* (1 << lp) - 1 */
156 uint32_t pos_mask; /* (1 << pb) - 1 */
157
158 /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
159 uint16_t is_match[STATES][POS_STATES_MAX];
160
161 /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
162 uint16_t is_rep[STATES];
163
164 /*
165 * If 0, distance of a repeated match is rep0.
166 * Otherwise check is_rep1.
167 */
168 uint16_t is_rep0[STATES];
169
170 /*
171 * If 0, distance of a repeated match is rep1.
172 * Otherwise check is_rep2.
173 */
174 uint16_t is_rep1[STATES];
175
176 /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
177 uint16_t is_rep2[STATES];
178
179 /*
180 * If 1, the repeated match has length of one byte. Otherwise
181 * the length is decoded from rep_len_decoder.
182 */
183 uint16_t is_rep0_long[STATES][POS_STATES_MAX];
184
185 /*
186 * Probability tree for the highest two bits of the match
187 * distance. There is a separate probability tree for match
188 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
189 */
190 uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
191
192 /*
193 * Probility trees for additional bits for match distance
194 * when the distance is in the range [4, 127].
195 */
196 uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
197
198 /*
199 * Probability tree for the lowest four bits of a match
200 * distance that is equal to or greater than 128.
201 */
202 uint16_t dist_align[ALIGN_SIZE];
203
204 /* Length of a normal match */
205 struct lzma_len_dec match_len_dec;
206
207 /* Length of a repeated match */
208 struct lzma_len_dec rep_len_dec;
209
210 /* Probabilities of literals */
211 uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
212};
213
214struct lzma2_dec {
215 /* Position in xz_dec_lzma2_run(). */
216 enum lzma2_seq {
217 SEQ_CONTROL,
218 SEQ_UNCOMPRESSED_1,
219 SEQ_UNCOMPRESSED_2,
220 SEQ_COMPRESSED_0,
221 SEQ_COMPRESSED_1,
222 SEQ_PROPERTIES,
223 SEQ_LZMA_PREPARE,
224 SEQ_LZMA_RUN,
225 SEQ_COPY
226 } sequence;
227
228 /* Next position after decoding the compressed size of the chunk. */
229 enum lzma2_seq next_sequence;
230
231 /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
232 uint32_t uncompressed;
233
234 /*
235 * Compressed size of LZMA chunk or compressed/uncompressed
236 * size of uncompressed chunk (64 KiB at maximum)
237 */
238 uint32_t compressed;
239
240 /*
241 * True if dictionary reset is needed. This is false before
242 * the first chunk (LZMA or uncompressed).
243 */
244 bool need_dict_reset;
245
246 /*
247 * True if new LZMA properties are needed. This is false
248 * before the first LZMA chunk.
249 */
250 bool need_props;
251};
252
253struct xz_dec_lzma2 {
254 /*
255 * The order below is important on x86 to reduce code size and
256 * it shouldn't hurt on other platforms. Everything up to and
257 * including lzma.pos_mask are in the first 128 bytes on x86-32,
258 * which allows using smaller instructions to access those
259 * variables. On x86-64, fewer variables fit into the first 128
260 * bytes, but this is still the best order without sacrificing
261 * the readability by splitting the structures.
262 */
263 struct rc_dec rc;
264 struct dictionary dict;
265 struct lzma2_dec lzma2;
266 struct lzma_dec lzma;
267
268 /*
269 * Temporary buffer which holds small number of input bytes between
270 * decoder calls. See lzma2_lzma() for details.
271 */
272 struct {
273 uint32_t size;
274 uint8_t buf[3 * LZMA_IN_REQUIRED];
275 } temp;
276};
277
278/**************
279 * Dictionary *
280 **************/
281
282/*
283 * Reset the dictionary state. When in single-call mode, set up the beginning
284 * of the dictionary to point to the actual output buffer.
285 */
286static void dict_reset(struct dictionary *dict, struct xz_buf *b)
287{
288 if (DEC_IS_SINGLE(dict->mode)) {
289 dict->buf = b->out + b->out_pos;
290 dict->end = b->out_size - b->out_pos;
291 }
292
293 dict->start = 0;
294 dict->pos = 0;
295 dict->limit = 0;
296 dict->full = 0;
297}
298
299/* Set dictionary write limit */
300static void dict_limit(struct dictionary *dict, size_t out_max)
301{
302 if (dict->end - dict->pos <= out_max)
303 dict->limit = dict->end;
304 else
305 dict->limit = dict->pos + out_max;
306}
307
308/* Return true if at least one byte can be written into the dictionary. */
309static inline bool dict_has_space(const struct dictionary *dict)
310{
311 return dict->pos < dict->limit;
312}
313
314/*
315 * Get a byte from the dictionary at the given distance. The distance is
316 * assumed to valid, or as a special case, zero when the dictionary is
317 * still empty. This special case is needed for single-call decoding to
318 * avoid writing a '\0' to the end of the destination buffer.
319 */
320static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
321{
322 size_t offset = dict->pos - dist - 1;
323
324 if (dist >= dict->pos)
325 offset += dict->end;
326
327 return dict->full > 0 ? dict->buf[offset] : 0;
328}
329
330/*
331 * Put one byte into the dictionary. It is assumed that there is space for it.
332 */
333static inline void dict_put(struct dictionary *dict, uint8_t byte)
334{
335 dict->buf[dict->pos++] = byte;
336
337 if (dict->full < dict->pos)
338 dict->full = dict->pos;
339}
340
341/*
342 * Repeat given number of bytes from the given distance. If the distance is
343 * invalid, false is returned. On success, true is returned and *len is
344 * updated to indicate how many bytes were left to be repeated.
345 */
346static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
347{
348 size_t back;
349 uint32_t left;
350
351 if (dist >= dict->full || dist >= dict->size)
352 return false;
353
354 left = min_t(size_t, dict->limit - dict->pos, *len);
355 *len -= left;
356
357 back = dict->pos - dist - 1;
358 if (dist >= dict->pos)
359 back += dict->end;
360
361 do {
362 dict->buf[dict->pos++] = dict->buf[back++];
363 if (back == dict->end)
364 back = 0;
365 } while (--left > 0);
366
367 if (dict->full < dict->pos)
368 dict->full = dict->pos;
369
370 return true;
371}
372
373/* Copy uncompressed data as is from input to dictionary and output buffers. */
374static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
375 uint32_t *left)
376{
377 size_t copy_size;
378
379 while (*left > 0 && b->in_pos < b->in_size
380 && b->out_pos < b->out_size) {
381 copy_size = min(b->in_size - b->in_pos,
382 b->out_size - b->out_pos);
383 if (copy_size > dict->end - dict->pos)
384 copy_size = dict->end - dict->pos;
385 if (copy_size > *left)
386 copy_size = *left;
387
388 *left -= copy_size;
389
390 memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
391 dict->pos += copy_size;
392
393 if (dict->full < dict->pos)
394 dict->full = dict->pos;
395
396 if (DEC_IS_MULTI(dict->mode)) {
397 if (dict->pos == dict->end)
398 dict->pos = 0;
399
400 memcpy(b->out + b->out_pos, b->in + b->in_pos,
401 copy_size);
402 }
403
404 dict->start = dict->pos;
405
406 b->out_pos += copy_size;
407 b->in_pos += copy_size;
408 }
409}
410
411/*
412 * Flush pending data from dictionary to b->out. It is assumed that there is
413 * enough space in b->out. This is guaranteed because caller uses dict_limit()
414 * before decoding data into the dictionary.
415 */
416static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
417{
418 size_t copy_size = dict->pos - dict->start;
419
420 if (DEC_IS_MULTI(dict->mode)) {
421 if (dict->pos == dict->end)
422 dict->pos = 0;
423
424 memcpy(b->out + b->out_pos, dict->buf + dict->start,
425 copy_size);
426 }
427
428 dict->start = dict->pos;
429 b->out_pos += copy_size;
430 return copy_size;
431}
432
433/*****************
434 * Range decoder *
435 *****************/
436
437/* Reset the range decoder. */
438static void rc_reset(struct rc_dec *rc)
439{
440 rc->range = (uint32_t)-1;
441 rc->code = 0;
442 rc->init_bytes_left = RC_INIT_BYTES;
443}
444
445/*
446 * Read the first five initial bytes into rc->code if they haven't been
447 * read already. (Yes, the first byte gets completely ignored.)
448 */
449static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b)
450{
451 while (rc->init_bytes_left > 0) {
452 if (b->in_pos == b->in_size)
453 return false;
454
455 rc->code = (rc->code << 8) + b->in[b->in_pos++];
456 --rc->init_bytes_left;
457 }
458
459 return true;
460}
461
462/* Return true if there may not be enough input for the next decoding loop. */
463static inline bool rc_limit_exceeded(const struct rc_dec *rc)
464{
465 return rc->in_pos > rc->in_limit;
466}
467
468/*
469 * Return true if it is possible (from point of view of range decoder) that
470 * we have reached the end of the LZMA chunk.
471 */
472static inline bool rc_is_finished(const struct rc_dec *rc)
473{
474 return rc->code == 0;
475}
476
477/* Read the next input byte if needed. */
478static __always_inline void rc_normalize(struct rc_dec *rc)
479{
480 if (rc->range < RC_TOP_VALUE) {
481 rc->range <<= RC_SHIFT_BITS;
482 rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
483 }
484}
485
486/*
487 * Decode one bit. In some versions, this function has been splitted in three
488 * functions so that the compiler is supposed to be able to more easily avoid
489 * an extra branch. In this particular version of the LZMA decoder, this
490 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
491 * on x86). Using a non-splitted version results in nicer looking code too.
492 *
493 * NOTE: This must return an int. Do not make it return a bool or the speed
494 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
495 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
496 */
497static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
498{
499 uint32_t bound;
500 int bit;
501
502 rc_normalize(rc);
503 bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
504 if (rc->code < bound) {
505 rc->range = bound;
506 *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
507 bit = 0;
508 } else {
509 rc->range -= bound;
510 rc->code -= bound;
511 *prob -= *prob >> RC_MOVE_BITS;
512 bit = 1;
513 }
514
515 return bit;
516}
517
518/* Decode a bittree starting from the most significant bit. */
519static __always_inline uint32_t rc_bittree(struct rc_dec *rc,
520 uint16_t *probs, uint32_t limit)
521{
522 uint32_t symbol = 1;
523
524 do {
525 if (rc_bit(rc, &probs[symbol]))
526 symbol = (symbol << 1) + 1;
527 else
528 symbol <<= 1;
529 } while (symbol < limit);
530
531 return symbol;
532}
533
534/* Decode a bittree starting from the least significant bit. */
535static __always_inline void rc_bittree_reverse(struct rc_dec *rc,
536 uint16_t *probs,
537 uint32_t *dest, uint32_t limit)
538{
539 uint32_t symbol = 1;
540 uint32_t i = 0;
541
542 do {
543 if (rc_bit(rc, &probs[symbol])) {
544 symbol = (symbol << 1) + 1;
545 *dest += 1 << i;
546 } else {
547 symbol <<= 1;
548 }
549 } while (++i < limit);
550}
551
552/* Decode direct bits (fixed fifty-fifty probability) */
553static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
554{
555 uint32_t mask;
556
557 do {
558 rc_normalize(rc);
559 rc->range >>= 1;
560 rc->code -= rc->range;
561 mask = (uint32_t)0 - (rc->code >> 31);
562 rc->code += rc->range & mask;
563 *dest = (*dest << 1) + (mask + 1);
564 } while (--limit > 0);
565}
566
567/********
568 * LZMA *
569 ********/
570
571/* Get pointer to literal coder probability array. */
572static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
573{
574 uint32_t prev_byte = dict_get(&s->dict, 0);
575 uint32_t low = prev_byte >> (8 - s->lzma.lc);
576 uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
577 return s->lzma.literal[low + high];
578}
579
580/* Decode a literal (one 8-bit byte) */
581static void lzma_literal(struct xz_dec_lzma2 *s)
582{
583 uint16_t *probs;
584 uint32_t symbol;
585 uint32_t match_byte;
586 uint32_t match_bit;
587 uint32_t offset;
588 uint32_t i;
589
590 probs = lzma_literal_probs(s);
591
592 if (lzma_state_is_literal(s->lzma.state)) {
593 symbol = rc_bittree(&s->rc, probs, 0x100);
594 } else {
595 symbol = 1;
596 match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
597 offset = 0x100;
598
599 do {
600 match_bit = match_byte & offset;
601 match_byte <<= 1;
602 i = offset + match_bit + symbol;
603
604 if (rc_bit(&s->rc, &probs[i])) {
605 symbol = (symbol << 1) + 1;
606 offset &= match_bit;
607 } else {
608 symbol <<= 1;
609 offset &= ~match_bit;
610 }
611 } while (symbol < 0x100);
612 }
613
614 dict_put(&s->dict, (uint8_t)symbol);
615 lzma_state_literal(&s->lzma.state);
616}
617
618/* Decode the length of the match into s->lzma.len. */
619static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
620 uint32_t pos_state)
621{
622 uint16_t *probs;
623 uint32_t limit;
624
625 if (!rc_bit(&s->rc, &l->choice)) {
626 probs = l->low[pos_state];
627 limit = LEN_LOW_SYMBOLS;
628 s->lzma.len = MATCH_LEN_MIN;
629 } else {
630 if (!rc_bit(&s->rc, &l->choice2)) {
631 probs = l->mid[pos_state];
632 limit = LEN_MID_SYMBOLS;
633 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
634 } else {
635 probs = l->high;
636 limit = LEN_HIGH_SYMBOLS;
637 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
638 + LEN_MID_SYMBOLS;
639 }
640 }
641
642 s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
643}
644
645/* Decode a match. The distance will be stored in s->lzma.rep0. */
646static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
647{
648 uint16_t *probs;
649 uint32_t dist_slot;
650 uint32_t limit;
651
652 lzma_state_match(&s->lzma.state);
653
654 s->lzma.rep3 = s->lzma.rep2;
655 s->lzma.rep2 = s->lzma.rep1;
656 s->lzma.rep1 = s->lzma.rep0;
657
658 lzma_len(s, &s->lzma.match_len_dec, pos_state);
659
660 probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
661 dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
662
663 if (dist_slot < DIST_MODEL_START) {
664 s->lzma.rep0 = dist_slot;
665 } else {
666 limit = (dist_slot >> 1) - 1;
667 s->lzma.rep0 = 2 + (dist_slot & 1);
668
669 if (dist_slot < DIST_MODEL_END) {
670 s->lzma.rep0 <<= limit;
671 probs = s->lzma.dist_special + s->lzma.rep0
672 - dist_slot - 1;
673 rc_bittree_reverse(&s->rc, probs,
674 &s->lzma.rep0, limit);
675 } else {
676 rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
677 s->lzma.rep0 <<= ALIGN_BITS;
678 rc_bittree_reverse(&s->rc, s->lzma.dist_align,
679 &s->lzma.rep0, ALIGN_BITS);
680 }
681 }
682}
683
684/*
685 * Decode a repeated match. The distance is one of the four most recently
686 * seen matches. The distance will be stored in s->lzma.rep0.
687 */
688static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
689{
690 uint32_t tmp;
691
692 if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
693 if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
694 s->lzma.state][pos_state])) {
695 lzma_state_short_rep(&s->lzma.state);
696 s->lzma.len = 1;
697 return;
698 }
699 } else {
700 if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
701 tmp = s->lzma.rep1;
702 } else {
703 if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
704 tmp = s->lzma.rep2;
705 } else {
706 tmp = s->lzma.rep3;
707 s->lzma.rep3 = s->lzma.rep2;
708 }
709
710 s->lzma.rep2 = s->lzma.rep1;
711 }
712
713 s->lzma.rep1 = s->lzma.rep0;
714 s->lzma.rep0 = tmp;
715 }
716
717 lzma_state_long_rep(&s->lzma.state);
718 lzma_len(s, &s->lzma.rep_len_dec, pos_state);
719}
720
721/* LZMA decoder core */
722static bool lzma_main(struct xz_dec_lzma2 *s)
723{
724 uint32_t pos_state;
725
726 /*
727 * If the dictionary was reached during the previous call, try to
728 * finish the possibly pending repeat in the dictionary.
729 */
730 if (dict_has_space(&s->dict) && s->lzma.len > 0)
731 dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
732
733 /*
734 * Decode more LZMA symbols. One iteration may consume up to
735 * LZMA_IN_REQUIRED - 1 bytes.
736 */
737 while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
738 pos_state = s->dict.pos & s->lzma.pos_mask;
739
740 if (!rc_bit(&s->rc, &s->lzma.is_match[
741 s->lzma.state][pos_state])) {
742 lzma_literal(s);
743 } else {
744 if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
745 lzma_rep_match(s, pos_state);
746 else
747 lzma_match(s, pos_state);
748
749 if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
750 return false;
751 }
752 }
753
754 /*
755 * Having the range decoder always normalized when we are outside
756 * this function makes it easier to correctly handle end of the chunk.
757 */
758 rc_normalize(&s->rc);
759
760 return true;
761}
762
763/*
764 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
765 * here, because LZMA state may be reset without resetting the dictionary.
766 */
767static void lzma_reset(struct xz_dec_lzma2 *s)
768{
769 uint16_t *probs;
770 size_t i;
771
772 s->lzma.state = STATE_LIT_LIT;
773 s->lzma.rep0 = 0;
774 s->lzma.rep1 = 0;
775 s->lzma.rep2 = 0;
776 s->lzma.rep3 = 0;
777
778 /*
779 * All probabilities are initialized to the same value. This hack
780 * makes the code smaller by avoiding a separate loop for each
781 * probability array.
782 *
783 * This could be optimized so that only that part of literal
784 * probabilities that are actually required. In the common case
785 * we would write 12 KiB less.
786 */
787 probs = s->lzma.is_match[0];
788 for (i = 0; i < PROBS_TOTAL; ++i)
789 probs[i] = RC_BIT_MODEL_TOTAL / 2;
790
791 rc_reset(&s->rc);
792}
793
794/*
795 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
796 * from the decoded lp and pb values. On success, the LZMA decoder state is
797 * reset and true is returned.
798 */
799static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
800{
801 if (props > (4 * 5 + 4) * 9 + 8)
802 return false;
803
804 s->lzma.pos_mask = 0;
805 while (props >= 9 * 5) {
806 props -= 9 * 5;
807 ++s->lzma.pos_mask;
808 }
809
810 s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
811
812 s->lzma.literal_pos_mask = 0;
813 while (props >= 9) {
814 props -= 9;
815 ++s->lzma.literal_pos_mask;
816 }
817
818 s->lzma.lc = props;
819
820 if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
821 return false;
822
823 s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
824
825 lzma_reset(s);
826
827 return true;
828}
829
830/*********
831 * LZMA2 *
832 *********/
833
834/*
835 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
836 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
837 * wrapper function takes care of making the LZMA decoder's assumption safe.
838 *
839 * As long as there is plenty of input left to be decoded in the current LZMA
840 * chunk, we decode directly from the caller-supplied input buffer until
841 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
842 * s->temp.buf, which (hopefully) gets filled on the next call to this
843 * function. We decode a few bytes from the temporary buffer so that we can
844 * continue decoding from the caller-supplied input buffer again.
845 */
846static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
847{
848 size_t in_avail;
849 uint32_t tmp;
850
851 in_avail = b->in_size - b->in_pos;
852 if (s->temp.size > 0 || s->lzma2.compressed == 0) {
853 tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
854 if (tmp > s->lzma2.compressed - s->temp.size)
855 tmp = s->lzma2.compressed - s->temp.size;
856 if (tmp > in_avail)
857 tmp = in_avail;
858
859 memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
860
861 if (s->temp.size + tmp == s->lzma2.compressed) {
862 memzero(s->temp.buf + s->temp.size + tmp,
863 sizeof(s->temp.buf)
864 - s->temp.size - tmp);
865 s->rc.in_limit = s->temp.size + tmp;
866 } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
867 s->temp.size += tmp;
868 b->in_pos += tmp;
869 return true;
870 } else {
871 s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
872 }
873
874 s->rc.in = s->temp.buf;
875 s->rc.in_pos = 0;
876
877 if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
878 return false;
879
880 s->lzma2.compressed -= s->rc.in_pos;
881
882 if (s->rc.in_pos < s->temp.size) {
883 s->temp.size -= s->rc.in_pos;
884 memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
885 s->temp.size);
886 return true;
887 }
888
889 b->in_pos += s->rc.in_pos - s->temp.size;
890 s->temp.size = 0;
891 }
892
893 in_avail = b->in_size - b->in_pos;
894 if (in_avail >= LZMA_IN_REQUIRED) {
895 s->rc.in = b->in;
896 s->rc.in_pos = b->in_pos;
897
898 if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
899 s->rc.in_limit = b->in_pos + s->lzma2.compressed;
900 else
901 s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
902
903 if (!lzma_main(s))
904 return false;
905
906 in_avail = s->rc.in_pos - b->in_pos;
907 if (in_avail > s->lzma2.compressed)
908 return false;
909
910 s->lzma2.compressed -= in_avail;
911 b->in_pos = s->rc.in_pos;
912 }
913
914 in_avail = b->in_size - b->in_pos;
915 if (in_avail < LZMA_IN_REQUIRED) {
916 if (in_avail > s->lzma2.compressed)
917 in_avail = s->lzma2.compressed;
918
919 memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
920 s->temp.size = in_avail;
921 b->in_pos += in_avail;
922 }
923
924 return true;
925}
926
927/*
928 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
929 * decoding or copying of uncompressed chunks to other functions.
930 */
931XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
932 struct xz_buf *b)
933{
934 uint32_t tmp;
935
936 while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
937 switch (s->lzma2.sequence) {
938 case SEQ_CONTROL:
939 /*
940 * LZMA2 control byte
941 *
942 * Exact values:
943 * 0x00 End marker
944 * 0x01 Dictionary reset followed by
945 * an uncompressed chunk
946 * 0x02 Uncompressed chunk (no dictionary reset)
947 *
948 * Highest three bits (s->control & 0xE0):
949 * 0xE0 Dictionary reset, new properties and state
950 * reset, followed by LZMA compressed chunk
951 * 0xC0 New properties and state reset, followed
952 * by LZMA compressed chunk (no dictionary
953 * reset)
954 * 0xA0 State reset using old properties,
955 * followed by LZMA compressed chunk (no
956 * dictionary reset)
957 * 0x80 LZMA chunk (no dictionary or state reset)
958 *
959 * For LZMA compressed chunks, the lowest five bits
960 * (s->control & 1F) are the highest bits of the
961 * uncompressed size (bits 16-20).
962 *
963 * A new LZMA2 stream must begin with a dictionary
964 * reset. The first LZMA chunk must set new
965 * properties and reset the LZMA state.
966 *
967 * Values that don't match anything described above
968 * are invalid and we return XZ_DATA_ERROR.
969 */
970 tmp = b->in[b->in_pos++];
971
972 if (tmp >= 0xE0 || tmp == 0x01) {
973 s->lzma2.need_props = true;
974 s->lzma2.need_dict_reset = false;
975 dict_reset(&s->dict, b);
976 } else if (s->lzma2.need_dict_reset) {
977 return XZ_DATA_ERROR;
978 }
979
980 if (tmp >= 0x80) {
981 s->lzma2.uncompressed = (tmp & 0x1F) << 16;
982 s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
983
984 if (tmp >= 0xC0) {
985 /*
986 * When there are new properties,
987 * state reset is done at
988 * SEQ_PROPERTIES.
989 */
990 s->lzma2.need_props = false;
991 s->lzma2.next_sequence
992 = SEQ_PROPERTIES;
993
994 } else if (s->lzma2.need_props) {
995 return XZ_DATA_ERROR;
996
997 } else {
998 s->lzma2.next_sequence
999 = SEQ_LZMA_PREPARE;
1000 if (tmp >= 0xA0)
1001 lzma_reset(s);
1002 }
1003 } else {
1004 if (tmp == 0x00)
1005 return XZ_STREAM_END;
1006
1007 if (tmp > 0x02)
1008 return XZ_DATA_ERROR;
1009
1010 s->lzma2.sequence = SEQ_COMPRESSED_0;
1011 s->lzma2.next_sequence = SEQ_COPY;
1012 }
1013
1014 break;
1015
1016 case SEQ_UNCOMPRESSED_1:
1017 s->lzma2.uncompressed
1018 += (uint32_t)b->in[b->in_pos++] << 8;
1019 s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1020 break;
1021
1022 case SEQ_UNCOMPRESSED_2:
1023 s->lzma2.uncompressed
1024 += (uint32_t)b->in[b->in_pos++] + 1;
1025 s->lzma2.sequence = SEQ_COMPRESSED_0;
1026 break;
1027
1028 case SEQ_COMPRESSED_0:
1029 s->lzma2.compressed
1030 = (uint32_t)b->in[b->in_pos++] << 8;
1031 s->lzma2.sequence = SEQ_COMPRESSED_1;
1032 break;
1033
1034 case SEQ_COMPRESSED_1:
1035 s->lzma2.compressed
1036 += (uint32_t)b->in[b->in_pos++] + 1;
1037 s->lzma2.sequence = s->lzma2.next_sequence;
1038 break;
1039
1040 case SEQ_PROPERTIES:
1041 if (!lzma_props(s, b->in[b->in_pos++]))
1042 return XZ_DATA_ERROR;
1043
1044 s->lzma2.sequence = SEQ_LZMA_PREPARE;
1045
1046 case SEQ_LZMA_PREPARE:
1047 if (s->lzma2.compressed < RC_INIT_BYTES)
1048 return XZ_DATA_ERROR;
1049
1050 if (!rc_read_init(&s->rc, b))
1051 return XZ_OK;
1052
1053 s->lzma2.compressed -= RC_INIT_BYTES;
1054 s->lzma2.sequence = SEQ_LZMA_RUN;
1055
1056 case SEQ_LZMA_RUN:
1057 /*
1058 * Set dictionary limit to indicate how much we want
1059 * to be encoded at maximum. Decode new data into the
1060 * dictionary. Flush the new data from dictionary to
1061 * b->out. Check if we finished decoding this chunk.
1062 * In case the dictionary got full but we didn't fill
1063 * the output buffer yet, we may run this loop
1064 * multiple times without changing s->lzma2.sequence.
1065 */
1066 dict_limit(&s->dict, min_t(size_t,
1067 b->out_size - b->out_pos,
1068 s->lzma2.uncompressed));
1069 if (!lzma2_lzma(s, b))
1070 return XZ_DATA_ERROR;
1071
1072 s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1073
1074 if (s->lzma2.uncompressed == 0) {
1075 if (s->lzma2.compressed > 0 || s->lzma.len > 0
1076 || !rc_is_finished(&s->rc))
1077 return XZ_DATA_ERROR;
1078
1079 rc_reset(&s->rc);
1080 s->lzma2.sequence = SEQ_CONTROL;
1081
1082 } else if (b->out_pos == b->out_size
1083 || (b->in_pos == b->in_size
1084 && s->temp.size
1085 < s->lzma2.compressed)) {
1086 return XZ_OK;
1087 }
1088
1089 break;
1090
1091 case SEQ_COPY:
1092 dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1093 if (s->lzma2.compressed > 0)
1094 return XZ_OK;
1095
1096 s->lzma2.sequence = SEQ_CONTROL;
1097 break;
1098 }
1099 }
1100
1101 return XZ_OK;
1102}
1103
1104XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
1105 uint32_t dict_max)
1106{
1107 struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
1108 if (s == NULL)
1109 return NULL;
1110
1111 s->dict.mode = mode;
1112 s->dict.size_max = dict_max;
1113
1114 if (DEC_IS_PREALLOC(mode)) {
1115 s->dict.buf = vmalloc(dict_max);
1116 if (s->dict.buf == NULL) {
1117 kfree(s);
1118 return NULL;
1119 }
1120 } else if (DEC_IS_DYNALLOC(mode)) {
1121 s->dict.buf = NULL;
1122 s->dict.allocated = 0;
1123 }
1124
1125 return s;
1126}
1127
1128XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
1129{
1130 /* This limits dictionary size to 3 GiB to keep parsing simpler. */
1131 if (props > 39)
1132 return XZ_OPTIONS_ERROR;
1133
1134 s->dict.size = 2 + (props & 1);
1135 s->dict.size <<= (props >> 1) + 11;
1136
1137 if (DEC_IS_MULTI(s->dict.mode)) {
1138 if (s->dict.size > s->dict.size_max)
1139 return XZ_MEMLIMIT_ERROR;
1140
1141 s->dict.end = s->dict.size;
1142
1143 if (DEC_IS_DYNALLOC(s->dict.mode)) {
1144 if (s->dict.allocated < s->dict.size) {
1145 vfree(s->dict.buf);
1146 s->dict.buf = vmalloc(s->dict.size);
1147 if (s->dict.buf == NULL) {
1148 s->dict.allocated = 0;
1149 return XZ_MEM_ERROR;
1150 }
1151 }
1152 }
1153 }
1154
1155 s->lzma.len = 0;
1156
1157 s->lzma2.sequence = SEQ_CONTROL;
1158 s->lzma2.need_dict_reset = true;
1159
1160 s->temp.size = 0;
1161
1162 return XZ_OK;
1163}
1164
1165XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1166{
1167 if (DEC_IS_MULTI(s->dict.mode))
1168 vfree(s->dict.buf);
1169
1170 kfree(s);
1171}
diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c
new file mode 100644
index 000000000000..ac809b1e64f7
--- /dev/null
+++ b/lib/xz/xz_dec_stream.c
@@ -0,0 +1,821 @@
1/*
2 * .xz Stream decoder
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#include "xz_private.h"
11#include "xz_stream.h"
12
13/* Hash used to validate the Index field */
14struct xz_dec_hash {
15 vli_type unpadded;
16 vli_type uncompressed;
17 uint32_t crc32;
18};
19
20struct xz_dec {
21 /* Position in dec_main() */
22 enum {
23 SEQ_STREAM_HEADER,
24 SEQ_BLOCK_START,
25 SEQ_BLOCK_HEADER,
26 SEQ_BLOCK_UNCOMPRESS,
27 SEQ_BLOCK_PADDING,
28 SEQ_BLOCK_CHECK,
29 SEQ_INDEX,
30 SEQ_INDEX_PADDING,
31 SEQ_INDEX_CRC32,
32 SEQ_STREAM_FOOTER
33 } sequence;
34
35 /* Position in variable-length integers and Check fields */
36 uint32_t pos;
37
38 /* Variable-length integer decoded by dec_vli() */
39 vli_type vli;
40
41 /* Saved in_pos and out_pos */
42 size_t in_start;
43 size_t out_start;
44
45 /* CRC32 value in Block or Index */
46 uint32_t crc32;
47
48 /* Type of the integrity check calculated from uncompressed data */
49 enum xz_check check_type;
50
51 /* Operation mode */
52 enum xz_mode mode;
53
54 /*
55 * True if the next call to xz_dec_run() is allowed to return
56 * XZ_BUF_ERROR.
57 */
58 bool allow_buf_error;
59
60 /* Information stored in Block Header */
61 struct {
62 /*
63 * Value stored in the Compressed Size field, or
64 * VLI_UNKNOWN if Compressed Size is not present.
65 */
66 vli_type compressed;
67
68 /*
69 * Value stored in the Uncompressed Size field, or
70 * VLI_UNKNOWN if Uncompressed Size is not present.
71 */
72 vli_type uncompressed;
73
74 /* Size of the Block Header field */
75 uint32_t size;
76 } block_header;
77
78 /* Information collected when decoding Blocks */
79 struct {
80 /* Observed compressed size of the current Block */
81 vli_type compressed;
82
83 /* Observed uncompressed size of the current Block */
84 vli_type uncompressed;
85
86 /* Number of Blocks decoded so far */
87 vli_type count;
88
89 /*
90 * Hash calculated from the Block sizes. This is used to
91 * validate the Index field.
92 */
93 struct xz_dec_hash hash;
94 } block;
95
96 /* Variables needed when verifying the Index field */
97 struct {
98 /* Position in dec_index() */
99 enum {
100 SEQ_INDEX_COUNT,
101 SEQ_INDEX_UNPADDED,
102 SEQ_INDEX_UNCOMPRESSED
103 } sequence;
104
105 /* Size of the Index in bytes */
106 vli_type size;
107
108 /* Number of Records (matches block.count in valid files) */
109 vli_type count;
110
111 /*
112 * Hash calculated from the Records (matches block.hash in
113 * valid files).
114 */
115 struct xz_dec_hash hash;
116 } index;
117
118 /*
119 * Temporary buffer needed to hold Stream Header, Block Header,
120 * and Stream Footer. The Block Header is the biggest (1 KiB)
121 * so we reserve space according to that. buf[] has to be aligned
122 * to a multiple of four bytes; the size_t variables before it
123 * should guarantee this.
124 */
125 struct {
126 size_t pos;
127 size_t size;
128 uint8_t buf[1024];
129 } temp;
130
131 struct xz_dec_lzma2 *lzma2;
132
133#ifdef XZ_DEC_BCJ
134 struct xz_dec_bcj *bcj;
135 bool bcj_active;
136#endif
137};
138
139#ifdef XZ_DEC_ANY_CHECK
140/* Sizes of the Check field with different Check IDs */
141static const uint8_t check_sizes[16] = {
142 0,
143 4, 4, 4,
144 8, 8, 8,
145 16, 16, 16,
146 32, 32, 32,
147 64, 64, 64
148};
149#endif
150
151/*
152 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
153 * must have set s->temp.pos to indicate how much data we are supposed
154 * to copy into s->temp.buf. Return true once s->temp.pos has reached
155 * s->temp.size.
156 */
157static bool fill_temp(struct xz_dec *s, struct xz_buf *b)
158{
159 size_t copy_size = min_t(size_t,
160 b->in_size - b->in_pos, s->temp.size - s->temp.pos);
161
162 memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
163 b->in_pos += copy_size;
164 s->temp.pos += copy_size;
165
166 if (s->temp.pos == s->temp.size) {
167 s->temp.pos = 0;
168 return true;
169 }
170
171 return false;
172}
173
174/* Decode a variable-length integer (little-endian base-128 encoding) */
175static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
176 size_t *in_pos, size_t in_size)
177{
178 uint8_t byte;
179
180 if (s->pos == 0)
181 s->vli = 0;
182
183 while (*in_pos < in_size) {
184 byte = in[*in_pos];
185 ++*in_pos;
186
187 s->vli |= (vli_type)(byte & 0x7F) << s->pos;
188
189 if ((byte & 0x80) == 0) {
190 /* Don't allow non-minimal encodings. */
191 if (byte == 0 && s->pos != 0)
192 return XZ_DATA_ERROR;
193
194 s->pos = 0;
195 return XZ_STREAM_END;
196 }
197
198 s->pos += 7;
199 if (s->pos == 7 * VLI_BYTES_MAX)
200 return XZ_DATA_ERROR;
201 }
202
203 return XZ_OK;
204}
205
206/*
207 * Decode the Compressed Data field from a Block. Update and validate
208 * the observed compressed and uncompressed sizes of the Block so that
209 * they don't exceed the values possibly stored in the Block Header
210 * (validation assumes that no integer overflow occurs, since vli_type
211 * is normally uint64_t). Update the CRC32 if presence of the CRC32
212 * field was indicated in Stream Header.
213 *
214 * Once the decoding is finished, validate that the observed sizes match
215 * the sizes possibly stored in the Block Header. Update the hash and
216 * Block count, which are later used to validate the Index field.
217 */
218static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
219{
220 enum xz_ret ret;
221
222 s->in_start = b->in_pos;
223 s->out_start = b->out_pos;
224
225#ifdef XZ_DEC_BCJ
226 if (s->bcj_active)
227 ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
228 else
229#endif
230 ret = xz_dec_lzma2_run(s->lzma2, b);
231
232 s->block.compressed += b->in_pos - s->in_start;
233 s->block.uncompressed += b->out_pos - s->out_start;
234
235 /*
236 * There is no need to separately check for VLI_UNKNOWN, since
237 * the observed sizes are always smaller than VLI_UNKNOWN.
238 */
239 if (s->block.compressed > s->block_header.compressed
240 || s->block.uncompressed
241 > s->block_header.uncompressed)
242 return XZ_DATA_ERROR;
243
244 if (s->check_type == XZ_CHECK_CRC32)
245 s->crc32 = xz_crc32(b->out + s->out_start,
246 b->out_pos - s->out_start, s->crc32);
247
248 if (ret == XZ_STREAM_END) {
249 if (s->block_header.compressed != VLI_UNKNOWN
250 && s->block_header.compressed
251 != s->block.compressed)
252 return XZ_DATA_ERROR;
253
254 if (s->block_header.uncompressed != VLI_UNKNOWN
255 && s->block_header.uncompressed
256 != s->block.uncompressed)
257 return XZ_DATA_ERROR;
258
259 s->block.hash.unpadded += s->block_header.size
260 + s->block.compressed;
261
262#ifdef XZ_DEC_ANY_CHECK
263 s->block.hash.unpadded += check_sizes[s->check_type];
264#else
265 if (s->check_type == XZ_CHECK_CRC32)
266 s->block.hash.unpadded += 4;
267#endif
268
269 s->block.hash.uncompressed += s->block.uncompressed;
270 s->block.hash.crc32 = xz_crc32(
271 (const uint8_t *)&s->block.hash,
272 sizeof(s->block.hash), s->block.hash.crc32);
273
274 ++s->block.count;
275 }
276
277 return ret;
278}
279
280/* Update the Index size and the CRC32 value. */
281static void index_update(struct xz_dec *s, const struct xz_buf *b)
282{
283 size_t in_used = b->in_pos - s->in_start;
284 s->index.size += in_used;
285 s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
286}
287
288/*
289 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
290 * fields from the Index field. That is, Index Padding and CRC32 are not
291 * decoded by this function.
292 *
293 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
294 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
295 */
296static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
297{
298 enum xz_ret ret;
299
300 do {
301 ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
302 if (ret != XZ_STREAM_END) {
303 index_update(s, b);
304 return ret;
305 }
306
307 switch (s->index.sequence) {
308 case SEQ_INDEX_COUNT:
309 s->index.count = s->vli;
310
311 /*
312 * Validate that the Number of Records field
313 * indicates the same number of Records as
314 * there were Blocks in the Stream.
315 */
316 if (s->index.count != s->block.count)
317 return XZ_DATA_ERROR;
318
319 s->index.sequence = SEQ_INDEX_UNPADDED;
320 break;
321
322 case SEQ_INDEX_UNPADDED:
323 s->index.hash.unpadded += s->vli;
324 s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
325 break;
326
327 case SEQ_INDEX_UNCOMPRESSED:
328 s->index.hash.uncompressed += s->vli;
329 s->index.hash.crc32 = xz_crc32(
330 (const uint8_t *)&s->index.hash,
331 sizeof(s->index.hash),
332 s->index.hash.crc32);
333 --s->index.count;
334 s->index.sequence = SEQ_INDEX_UNPADDED;
335 break;
336 }
337 } while (s->index.count > 0);
338
339 return XZ_STREAM_END;
340}
341
342/*
343 * Validate that the next four input bytes match the value of s->crc32.
344 * s->pos must be zero when starting to validate the first byte.
345 */
346static enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b)
347{
348 do {
349 if (b->in_pos == b->in_size)
350 return XZ_OK;
351
352 if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
353 return XZ_DATA_ERROR;
354
355 s->pos += 8;
356
357 } while (s->pos < 32);
358
359 s->crc32 = 0;
360 s->pos = 0;
361
362 return XZ_STREAM_END;
363}
364
365#ifdef XZ_DEC_ANY_CHECK
366/*
367 * Skip over the Check field when the Check ID is not supported.
368 * Returns true once the whole Check field has been skipped over.
369 */
370static bool check_skip(struct xz_dec *s, struct xz_buf *b)
371{
372 while (s->pos < check_sizes[s->check_type]) {
373 if (b->in_pos == b->in_size)
374 return false;
375
376 ++b->in_pos;
377 ++s->pos;
378 }
379
380 s->pos = 0;
381
382 return true;
383}
384#endif
385
386/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
387static enum xz_ret dec_stream_header(struct xz_dec *s)
388{
389 if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
390 return XZ_FORMAT_ERROR;
391
392 if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
393 != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
394 return XZ_DATA_ERROR;
395
396 if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
397 return XZ_OPTIONS_ERROR;
398
399 /*
400 * Of integrity checks, we support only none (Check ID = 0) and
401 * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
402 * we will accept other check types too, but then the check won't
403 * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
404 */
405 s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
406
407#ifdef XZ_DEC_ANY_CHECK
408 if (s->check_type > XZ_CHECK_MAX)
409 return XZ_OPTIONS_ERROR;
410
411 if (s->check_type > XZ_CHECK_CRC32)
412 return XZ_UNSUPPORTED_CHECK;
413#else
414 if (s->check_type > XZ_CHECK_CRC32)
415 return XZ_OPTIONS_ERROR;
416#endif
417
418 return XZ_OK;
419}
420
421/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
422static enum xz_ret dec_stream_footer(struct xz_dec *s)
423{
424 if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
425 return XZ_DATA_ERROR;
426
427 if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
428 return XZ_DATA_ERROR;
429
430 /*
431 * Validate Backward Size. Note that we never added the size of the
432 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
433 * instead of s->index.size / 4 - 1.
434 */
435 if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
436 return XZ_DATA_ERROR;
437
438 if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
439 return XZ_DATA_ERROR;
440
441 /*
442 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
443 * for the caller.
444 */
445 return XZ_STREAM_END;
446}
447
448/* Decode the Block Header and initialize the filter chain. */
449static enum xz_ret dec_block_header(struct xz_dec *s)
450{
451 enum xz_ret ret;
452
453 /*
454 * Validate the CRC32. We know that the temp buffer is at least
455 * eight bytes so this is safe.
456 */
457 s->temp.size -= 4;
458 if (xz_crc32(s->temp.buf, s->temp.size, 0)
459 != get_le32(s->temp.buf + s->temp.size))
460 return XZ_DATA_ERROR;
461
462 s->temp.pos = 2;
463
464 /*
465 * Catch unsupported Block Flags. We support only one or two filters
466 * in the chain, so we catch that with the same test.
467 */
468#ifdef XZ_DEC_BCJ
469 if (s->temp.buf[1] & 0x3E)
470#else
471 if (s->temp.buf[1] & 0x3F)
472#endif
473 return XZ_OPTIONS_ERROR;
474
475 /* Compressed Size */
476 if (s->temp.buf[1] & 0x40) {
477 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
478 != XZ_STREAM_END)
479 return XZ_DATA_ERROR;
480
481 s->block_header.compressed = s->vli;
482 } else {
483 s->block_header.compressed = VLI_UNKNOWN;
484 }
485
486 /* Uncompressed Size */
487 if (s->temp.buf[1] & 0x80) {
488 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
489 != XZ_STREAM_END)
490 return XZ_DATA_ERROR;
491
492 s->block_header.uncompressed = s->vli;
493 } else {
494 s->block_header.uncompressed = VLI_UNKNOWN;
495 }
496
497#ifdef XZ_DEC_BCJ
498 /* If there are two filters, the first one must be a BCJ filter. */
499 s->bcj_active = s->temp.buf[1] & 0x01;
500 if (s->bcj_active) {
501 if (s->temp.size - s->temp.pos < 2)
502 return XZ_OPTIONS_ERROR;
503
504 ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
505 if (ret != XZ_OK)
506 return ret;
507
508 /*
509 * We don't support custom start offset,
510 * so Size of Properties must be zero.
511 */
512 if (s->temp.buf[s->temp.pos++] != 0x00)
513 return XZ_OPTIONS_ERROR;
514 }
515#endif
516
517 /* Valid Filter Flags always take at least two bytes. */
518 if (s->temp.size - s->temp.pos < 2)
519 return XZ_DATA_ERROR;
520
521 /* Filter ID = LZMA2 */
522 if (s->temp.buf[s->temp.pos++] != 0x21)
523 return XZ_OPTIONS_ERROR;
524
525 /* Size of Properties = 1-byte Filter Properties */
526 if (s->temp.buf[s->temp.pos++] != 0x01)
527 return XZ_OPTIONS_ERROR;
528
529 /* Filter Properties contains LZMA2 dictionary size. */
530 if (s->temp.size - s->temp.pos < 1)
531 return XZ_DATA_ERROR;
532
533 ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
534 if (ret != XZ_OK)
535 return ret;
536
537 /* The rest must be Header Padding. */
538 while (s->temp.pos < s->temp.size)
539 if (s->temp.buf[s->temp.pos++] != 0x00)
540 return XZ_OPTIONS_ERROR;
541
542 s->temp.pos = 0;
543 s->block.compressed = 0;
544 s->block.uncompressed = 0;
545
546 return XZ_OK;
547}
548
549static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
550{
551 enum xz_ret ret;
552
553 /*
554 * Store the start position for the case when we are in the middle
555 * of the Index field.
556 */
557 s->in_start = b->in_pos;
558
559 while (true) {
560 switch (s->sequence) {
561 case SEQ_STREAM_HEADER:
562 /*
563 * Stream Header is copied to s->temp, and then
564 * decoded from there. This way if the caller
565 * gives us only little input at a time, we can
566 * still keep the Stream Header decoding code
567 * simple. Similar approach is used in many places
568 * in this file.
569 */
570 if (!fill_temp(s, b))
571 return XZ_OK;
572
573 /*
574 * If dec_stream_header() returns
575 * XZ_UNSUPPORTED_CHECK, it is still possible
576 * to continue decoding if working in multi-call
577 * mode. Thus, update s->sequence before calling
578 * dec_stream_header().
579 */
580 s->sequence = SEQ_BLOCK_START;
581
582 ret = dec_stream_header(s);
583 if (ret != XZ_OK)
584 return ret;
585
586 case SEQ_BLOCK_START:
587 /* We need one byte of input to continue. */
588 if (b->in_pos == b->in_size)
589 return XZ_OK;
590
591 /* See if this is the beginning of the Index field. */
592 if (b->in[b->in_pos] == 0) {
593 s->in_start = b->in_pos++;
594 s->sequence = SEQ_INDEX;
595 break;
596 }
597
598 /*
599 * Calculate the size of the Block Header and
600 * prepare to decode it.
601 */
602 s->block_header.size
603 = ((uint32_t)b->in[b->in_pos] + 1) * 4;
604
605 s->temp.size = s->block_header.size;
606 s->temp.pos = 0;
607 s->sequence = SEQ_BLOCK_HEADER;
608
609 case SEQ_BLOCK_HEADER:
610 if (!fill_temp(s, b))
611 return XZ_OK;
612
613 ret = dec_block_header(s);
614 if (ret != XZ_OK)
615 return ret;
616
617 s->sequence = SEQ_BLOCK_UNCOMPRESS;
618
619 case SEQ_BLOCK_UNCOMPRESS:
620 ret = dec_block(s, b);
621 if (ret != XZ_STREAM_END)
622 return ret;
623
624 s->sequence = SEQ_BLOCK_PADDING;
625
626 case SEQ_BLOCK_PADDING:
627 /*
628 * Size of Compressed Data + Block Padding
629 * must be a multiple of four. We don't need
630 * s->block.compressed for anything else
631 * anymore, so we use it here to test the size
632 * of the Block Padding field.
633 */
634 while (s->block.compressed & 3) {
635 if (b->in_pos == b->in_size)
636 return XZ_OK;
637
638 if (b->in[b->in_pos++] != 0)
639 return XZ_DATA_ERROR;
640
641 ++s->block.compressed;
642 }
643
644 s->sequence = SEQ_BLOCK_CHECK;
645
646 case SEQ_BLOCK_CHECK:
647 if (s->check_type == XZ_CHECK_CRC32) {
648 ret = crc32_validate(s, b);
649 if (ret != XZ_STREAM_END)
650 return ret;
651 }
652#ifdef XZ_DEC_ANY_CHECK
653 else if (!check_skip(s, b)) {
654 return XZ_OK;
655 }
656#endif
657
658 s->sequence = SEQ_BLOCK_START;
659 break;
660
661 case SEQ_INDEX:
662 ret = dec_index(s, b);
663 if (ret != XZ_STREAM_END)
664 return ret;
665
666 s->sequence = SEQ_INDEX_PADDING;
667
668 case SEQ_INDEX_PADDING:
669 while ((s->index.size + (b->in_pos - s->in_start))
670 & 3) {
671 if (b->in_pos == b->in_size) {
672 index_update(s, b);
673 return XZ_OK;
674 }
675
676 if (b->in[b->in_pos++] != 0)
677 return XZ_DATA_ERROR;
678 }
679
680 /* Finish the CRC32 value and Index size. */
681 index_update(s, b);
682
683 /* Compare the hashes to validate the Index field. */
684 if (!memeq(&s->block.hash, &s->index.hash,
685 sizeof(s->block.hash)))
686 return XZ_DATA_ERROR;
687
688 s->sequence = SEQ_INDEX_CRC32;
689
690 case SEQ_INDEX_CRC32:
691 ret = crc32_validate(s, b);
692 if (ret != XZ_STREAM_END)
693 return ret;
694
695 s->temp.size = STREAM_HEADER_SIZE;
696 s->sequence = SEQ_STREAM_FOOTER;
697
698 case SEQ_STREAM_FOOTER:
699 if (!fill_temp(s, b))
700 return XZ_OK;
701
702 return dec_stream_footer(s);
703 }
704 }
705
706 /* Never reached */
707}
708
709/*
710 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
711 * multi-call and single-call decoding.
712 *
713 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
714 * are not going to make any progress anymore. This is to prevent the caller
715 * from calling us infinitely when the input file is truncated or otherwise
716 * corrupt. Since zlib-style API allows that the caller fills the input buffer
717 * only when the decoder doesn't produce any new output, we have to be careful
718 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
719 * after the second consecutive call to xz_dec_run() that makes no progress.
720 *
721 * In single-call mode, if we couldn't decode everything and no error
722 * occurred, either the input is truncated or the output buffer is too small.
723 * Since we know that the last input byte never produces any output, we know
724 * that if all the input was consumed and decoding wasn't finished, the file
725 * must be corrupt. Otherwise the output buffer has to be too small or the
726 * file is corrupt in a way that decoding it produces too big output.
727 *
728 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
729 * their original values. This is because with some filter chains there won't
730 * be any valid uncompressed data in the output buffer unless the decoding
731 * actually succeeds (that's the price to pay of using the output buffer as
732 * the workspace).
733 */
734XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
735{
736 size_t in_start;
737 size_t out_start;
738 enum xz_ret ret;
739
740 if (DEC_IS_SINGLE(s->mode))
741 xz_dec_reset(s);
742
743 in_start = b->in_pos;
744 out_start = b->out_pos;
745 ret = dec_main(s, b);
746
747 if (DEC_IS_SINGLE(s->mode)) {
748 if (ret == XZ_OK)
749 ret = b->in_pos == b->in_size
750 ? XZ_DATA_ERROR : XZ_BUF_ERROR;
751
752 if (ret != XZ_STREAM_END) {
753 b->in_pos = in_start;
754 b->out_pos = out_start;
755 }
756
757 } else if (ret == XZ_OK && in_start == b->in_pos
758 && out_start == b->out_pos) {
759 if (s->allow_buf_error)
760 ret = XZ_BUF_ERROR;
761
762 s->allow_buf_error = true;
763 } else {
764 s->allow_buf_error = false;
765 }
766
767 return ret;
768}
769
770XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
771{
772 struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
773 if (s == NULL)
774 return NULL;
775
776 s->mode = mode;
777
778#ifdef XZ_DEC_BCJ
779 s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
780 if (s->bcj == NULL)
781 goto error_bcj;
782#endif
783
784 s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
785 if (s->lzma2 == NULL)
786 goto error_lzma2;
787
788 xz_dec_reset(s);
789 return s;
790
791error_lzma2:
792#ifdef XZ_DEC_BCJ
793 xz_dec_bcj_end(s->bcj);
794error_bcj:
795#endif
796 kfree(s);
797 return NULL;
798}
799
800XZ_EXTERN void xz_dec_reset(struct xz_dec *s)
801{
802 s->sequence = SEQ_STREAM_HEADER;
803 s->allow_buf_error = false;
804 s->pos = 0;
805 s->crc32 = 0;
806 memzero(&s->block, sizeof(s->block));
807 memzero(&s->index, sizeof(s->index));
808 s->temp.pos = 0;
809 s->temp.size = STREAM_HEADER_SIZE;
810}
811
812XZ_EXTERN void xz_dec_end(struct xz_dec *s)
813{
814 if (s != NULL) {
815 xz_dec_lzma2_end(s->lzma2);
816#ifdef XZ_DEC_BCJ
817 xz_dec_bcj_end(s->bcj);
818#endif
819 kfree(s);
820 }
821}
diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c
new file mode 100644
index 000000000000..32eb3c03aede
--- /dev/null
+++ b/lib/xz/xz_dec_syms.c
@@ -0,0 +1,26 @@
1/*
2 * XZ decoder module information
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#include <linux/module.h>
11#include <linux/xz.h>
12
13EXPORT_SYMBOL(xz_dec_init);
14EXPORT_SYMBOL(xz_dec_reset);
15EXPORT_SYMBOL(xz_dec_run);
16EXPORT_SYMBOL(xz_dec_end);
17
18MODULE_DESCRIPTION("XZ decompressor");
19MODULE_VERSION("1.0");
20MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov");
21
22/*
23 * This code is in the public domain, but in Linux it's simplest to just
24 * say it's GPL and consider the authors as the copyright holders.
25 */
26MODULE_LICENSE("GPL");
diff --git a/lib/xz/xz_dec_test.c b/lib/xz/xz_dec_test.c
new file mode 100644
index 000000000000..da28a19d6c98
--- /dev/null
+++ b/lib/xz/xz_dec_test.c
@@ -0,0 +1,220 @@
1/*
2 * XZ decoder tester
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#include <linux/kernel.h>
11#include <linux/module.h>
12#include <linux/fs.h>
13#include <linux/uaccess.h>
14#include <linux/crc32.h>
15#include <linux/xz.h>
16
17/* Maximum supported dictionary size */
18#define DICT_MAX (1 << 20)
19
20/* Device name to pass to register_chrdev(). */
21#define DEVICE_NAME "xz_dec_test"
22
23/* Dynamically allocated device major number */
24static int device_major;
25
26/*
27 * We reuse the same decoder state, and thus can decode only one
28 * file at a time.
29 */
30static bool device_is_open;
31
32/* XZ decoder state */
33static struct xz_dec *state;
34
35/*
36 * Return value of xz_dec_run(). We need to avoid calling xz_dec_run() after
37 * it has returned XZ_STREAM_END, so we make this static.
38 */
39static enum xz_ret ret;
40
41/*
42 * Input and output buffers. The input buffer is used as a temporary safe
43 * place for the data coming from the userspace.
44 */
45static uint8_t buffer_in[1024];
46static uint8_t buffer_out[1024];
47
48/*
49 * Structure to pass the input and output buffers to the XZ decoder.
50 * A few of the fields are never modified so we initialize them here.
51 */
52static struct xz_buf buffers = {
53 .in = buffer_in,
54 .out = buffer_out,
55 .out_size = sizeof(buffer_out)
56};
57
58/*
59 * CRC32 of uncompressed data. This is used to give the user a simple way
60 * to check that the decoder produces correct output.
61 */
62static uint32_t crc;
63
64static int xz_dec_test_open(struct inode *i, struct file *f)
65{
66 if (device_is_open)
67 return -EBUSY;
68
69 device_is_open = true;
70
71 xz_dec_reset(state);
72 ret = XZ_OK;
73 crc = 0xFFFFFFFF;
74
75 buffers.in_pos = 0;
76 buffers.in_size = 0;
77 buffers.out_pos = 0;
78
79 printk(KERN_INFO DEVICE_NAME ": opened\n");
80 return 0;
81}
82
83static int xz_dec_test_release(struct inode *i, struct file *f)
84{
85 device_is_open = false;
86
87 if (ret == XZ_OK)
88 printk(KERN_INFO DEVICE_NAME ": input was truncated\n");
89
90 printk(KERN_INFO DEVICE_NAME ": closed\n");
91 return 0;
92}
93
94/*
95 * Decode the data given to us from the userspace. CRC32 of the uncompressed
96 * data is calculated and is printed at the end of successful decoding. The
97 * uncompressed data isn't stored anywhere for further use.
98 *
99 * The .xz file must have exactly one Stream and no Stream Padding. The data
100 * after the first Stream is considered to be garbage.
101 */
102static ssize_t xz_dec_test_write(struct file *file, const char __user *buf,
103 size_t size, loff_t *pos)
104{
105 size_t remaining;
106
107 if (ret != XZ_OK) {
108 if (size > 0)
109 printk(KERN_INFO DEVICE_NAME ": %zu bytes of "
110 "garbage at the end of the file\n",
111 size);
112
113 return -ENOSPC;
114 }
115
116 printk(KERN_INFO DEVICE_NAME ": decoding %zu bytes of input\n",
117 size);
118
119 remaining = size;
120 while ((remaining > 0 || buffers.out_pos == buffers.out_size)
121 && ret == XZ_OK) {
122 if (buffers.in_pos == buffers.in_size) {
123 buffers.in_pos = 0;
124 buffers.in_size = min(remaining, sizeof(buffer_in));
125 if (copy_from_user(buffer_in, buf, buffers.in_size))
126 return -EFAULT;
127
128 buf += buffers.in_size;
129 remaining -= buffers.in_size;
130 }
131
132 buffers.out_pos = 0;
133 ret = xz_dec_run(state, &buffers);
134 crc = crc32(crc, buffer_out, buffers.out_pos);
135 }
136
137 switch (ret) {
138 case XZ_OK:
139 printk(KERN_INFO DEVICE_NAME ": XZ_OK\n");
140 return size;
141
142 case XZ_STREAM_END:
143 printk(KERN_INFO DEVICE_NAME ": XZ_STREAM_END, "
144 "CRC32 = 0x%08X\n", ~crc);
145 return size - remaining - (buffers.in_size - buffers.in_pos);
146
147 case XZ_MEMLIMIT_ERROR:
148 printk(KERN_INFO DEVICE_NAME ": XZ_MEMLIMIT_ERROR\n");
149 break;
150
151 case XZ_FORMAT_ERROR:
152 printk(KERN_INFO DEVICE_NAME ": XZ_FORMAT_ERROR\n");
153 break;
154
155 case XZ_OPTIONS_ERROR:
156 printk(KERN_INFO DEVICE_NAME ": XZ_OPTIONS_ERROR\n");
157 break;
158
159 case XZ_DATA_ERROR:
160 printk(KERN_INFO DEVICE_NAME ": XZ_DATA_ERROR\n");
161 break;
162
163 case XZ_BUF_ERROR:
164 printk(KERN_INFO DEVICE_NAME ": XZ_BUF_ERROR\n");
165 break;
166
167 default:
168 printk(KERN_INFO DEVICE_NAME ": Bug detected!\n");
169 break;
170 }
171
172 return -EIO;
173}
174
175/* Allocate the XZ decoder state and register the character device. */
176static int __init xz_dec_test_init(void)
177{
178 static const struct file_operations fileops = {
179 .owner = THIS_MODULE,
180 .open = &xz_dec_test_open,
181 .release = &xz_dec_test_release,
182 .write = &xz_dec_test_write
183 };
184
185 state = xz_dec_init(XZ_PREALLOC, DICT_MAX);
186 if (state == NULL)
187 return -ENOMEM;
188
189 device_major = register_chrdev(0, DEVICE_NAME, &fileops);
190 if (device_major < 0) {
191 xz_dec_end(state);
192 return device_major;
193 }
194
195 printk(KERN_INFO DEVICE_NAME ": module loaded\n");
196 printk(KERN_INFO DEVICE_NAME ": Create a device node with "
197 "'mknod " DEVICE_NAME " c %d 0' and write .xz files "
198 "to it.\n", device_major);
199 return 0;
200}
201
202static void __exit xz_dec_test_exit(void)
203{
204 unregister_chrdev(device_major, DEVICE_NAME);
205 xz_dec_end(state);
206 printk(KERN_INFO DEVICE_NAME ": module unloaded\n");
207}
208
209module_init(xz_dec_test_init);
210module_exit(xz_dec_test_exit);
211
212MODULE_DESCRIPTION("XZ decompressor tester");
213MODULE_VERSION("1.0");
214MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org>");
215
216/*
217 * This code is in the public domain, but in Linux it's simplest to just
218 * say it's GPL and consider the authors as the copyright holders.
219 */
220MODULE_LICENSE("GPL");
diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h
new file mode 100644
index 000000000000..071d67bee9f5
--- /dev/null
+++ b/lib/xz/xz_lzma2.h
@@ -0,0 +1,204 @@
1/*
2 * LZMA2 definitions
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#ifndef XZ_LZMA2_H
12#define XZ_LZMA2_H
13
14/* Range coder constants */
15#define RC_SHIFT_BITS 8
16#define RC_TOP_BITS 24
17#define RC_TOP_VALUE (1 << RC_TOP_BITS)
18#define RC_BIT_MODEL_TOTAL_BITS 11
19#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
20#define RC_MOVE_BITS 5
21
22/*
23 * Maximum number of position states. A position state is the lowest pb
24 * number of bits of the current uncompressed offset. In some places there
25 * are different sets of probabilities for different position states.
26 */
27#define POS_STATES_MAX (1 << 4)
28
29/*
30 * This enum is used to track which LZMA symbols have occurred most recently
31 * and in which order. This information is used to predict the next symbol.
32 *
33 * Symbols:
34 * - Literal: One 8-bit byte
35 * - Match: Repeat a chunk of data at some distance
36 * - Long repeat: Multi-byte match at a recently seen distance
37 * - Short repeat: One-byte repeat at a recently seen distance
38 *
39 * The symbol names are in from STATE_oldest_older_previous. REP means
40 * either short or long repeated match, and NONLIT means any non-literal.
41 */
42enum lzma_state {
43 STATE_LIT_LIT,
44 STATE_MATCH_LIT_LIT,
45 STATE_REP_LIT_LIT,
46 STATE_SHORTREP_LIT_LIT,
47 STATE_MATCH_LIT,
48 STATE_REP_LIT,
49 STATE_SHORTREP_LIT,
50 STATE_LIT_MATCH,
51 STATE_LIT_LONGREP,
52 STATE_LIT_SHORTREP,
53 STATE_NONLIT_MATCH,
54 STATE_NONLIT_REP
55};
56
57/* Total number of states */
58#define STATES 12
59
60/* The lowest 7 states indicate that the previous state was a literal. */
61#define LIT_STATES 7
62
63/* Indicate that the latest symbol was a literal. */
64static inline void lzma_state_literal(enum lzma_state *state)
65{
66 if (*state <= STATE_SHORTREP_LIT_LIT)
67 *state = STATE_LIT_LIT;
68 else if (*state <= STATE_LIT_SHORTREP)
69 *state -= 3;
70 else
71 *state -= 6;
72}
73
74/* Indicate that the latest symbol was a match. */
75static inline void lzma_state_match(enum lzma_state *state)
76{
77 *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
78}
79
80/* Indicate that the latest state was a long repeated match. */
81static inline void lzma_state_long_rep(enum lzma_state *state)
82{
83 *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
84}
85
86/* Indicate that the latest symbol was a short match. */
87static inline void lzma_state_short_rep(enum lzma_state *state)
88{
89 *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
90}
91
92/* Test if the previous symbol was a literal. */
93static inline bool lzma_state_is_literal(enum lzma_state state)
94{
95 return state < LIT_STATES;
96}
97
98/* Each literal coder is divided in three sections:
99 * - 0x001-0x0FF: Without match byte
100 * - 0x101-0x1FF: With match byte; match bit is 0
101 * - 0x201-0x2FF: With match byte; match bit is 1
102 *
103 * Match byte is used when the previous LZMA symbol was something else than
104 * a literal (that is, it was some kind of match).
105 */
106#define LITERAL_CODER_SIZE 0x300
107
108/* Maximum number of literal coders */
109#define LITERAL_CODERS_MAX (1 << 4)
110
111/* Minimum length of a match is two bytes. */
112#define MATCH_LEN_MIN 2
113
114/* Match length is encoded with 4, 5, or 10 bits.
115 *
116 * Length Bits
117 * 2-9 4 = Choice=0 + 3 bits
118 * 10-17 5 = Choice=1 + Choice2=0 + 3 bits
119 * 18-273 10 = Choice=1 + Choice2=1 + 8 bits
120 */
121#define LEN_LOW_BITS 3
122#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
123#define LEN_MID_BITS 3
124#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
125#define LEN_HIGH_BITS 8
126#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
127#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
128
129/*
130 * Maximum length of a match is 273 which is a result of the encoding
131 * described above.
132 */
133#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
134
135/*
136 * Different sets of probabilities are used for match distances that have
137 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
138 * set of probabilities for each length. The matches with longer length
139 * use a shared set of probabilities.
140 */
141#define DIST_STATES 4
142
143/*
144 * Get the index of the appropriate probability array for decoding
145 * the distance slot.
146 */
147static inline uint32_t lzma_get_dist_state(uint32_t len)
148{
149 return len < DIST_STATES + MATCH_LEN_MIN
150 ? len - MATCH_LEN_MIN : DIST_STATES - 1;
151}
152
153/*
154 * The highest two bits of a 32-bit match distance are encoded using six bits.
155 * This six-bit value is called a distance slot. This way encoding a 32-bit
156 * value takes 6-36 bits, larger values taking more bits.
157 */
158#define DIST_SLOT_BITS 6
159#define DIST_SLOTS (1 << DIST_SLOT_BITS)
160
161/* Match distances up to 127 are fully encoded using probabilities. Since
162 * the highest two bits (distance slot) are always encoded using six bits,
163 * the distances 0-3 don't need any additional bits to encode, since the
164 * distance slot itself is the same as the actual distance. DIST_MODEL_START
165 * indicates the first distance slot where at least one additional bit is
166 * needed.
167 */
168#define DIST_MODEL_START 4
169
170/*
171 * Match distances greater than 127 are encoded in three pieces:
172 * - distance slot: the highest two bits
173 * - direct bits: 2-26 bits below the highest two bits
174 * - alignment bits: four lowest bits
175 *
176 * Direct bits don't use any probabilities.
177 *
178 * The distance slot value of 14 is for distances 128-191.
179 */
180#define DIST_MODEL_END 14
181
182/* Distance slots that indicate a distance <= 127. */
183#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
184#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
185
186/*
187 * For match distances greater than 127, only the highest two bits and the
188 * lowest four bits (alignment) is encoded using probabilities.
189 */
190#define ALIGN_BITS 4
191#define ALIGN_SIZE (1 << ALIGN_BITS)
192#define ALIGN_MASK (ALIGN_SIZE - 1)
193
194/* Total number of all probability variables */
195#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
196
197/*
198 * LZMA remembers the four most recent match distances. Reusing these
199 * distances tends to take less space than re-encoding the actual
200 * distance value.
201 */
202#define REPS 4
203
204#endif
diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h
new file mode 100644
index 000000000000..a65633e06962
--- /dev/null
+++ b/lib/xz/xz_private.h
@@ -0,0 +1,156 @@
1/*
2 * Private includes and definitions
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_PRIVATE_H
11#define XZ_PRIVATE_H
12
13#ifdef __KERNEL__
14# include <linux/xz.h>
15# include <asm/byteorder.h>
16# include <asm/unaligned.h>
17 /* XZ_PREBOOT may be defined only via decompress_unxz.c. */
18# ifndef XZ_PREBOOT
19# include <linux/slab.h>
20# include <linux/vmalloc.h>
21# include <linux/string.h>
22# ifdef CONFIG_XZ_DEC_X86
23# define XZ_DEC_X86
24# endif
25# ifdef CONFIG_XZ_DEC_POWERPC
26# define XZ_DEC_POWERPC
27# endif
28# ifdef CONFIG_XZ_DEC_IA64
29# define XZ_DEC_IA64
30# endif
31# ifdef CONFIG_XZ_DEC_ARM
32# define XZ_DEC_ARM
33# endif
34# ifdef CONFIG_XZ_DEC_ARMTHUMB
35# define XZ_DEC_ARMTHUMB
36# endif
37# ifdef CONFIG_XZ_DEC_SPARC
38# define XZ_DEC_SPARC
39# endif
40# define memeq(a, b, size) (memcmp(a, b, size) == 0)
41# define memzero(buf, size) memset(buf, 0, size)
42# endif
43# define get_le32(p) le32_to_cpup((const uint32_t *)(p))
44#else
45 /*
46 * For userspace builds, use a separate header to define the required
47 * macros and functions. This makes it easier to adapt the code into
48 * different environments and avoids clutter in the Linux kernel tree.
49 */
50# include "xz_config.h"
51#endif
52
53/* If no specific decoding mode is requested, enable support for all modes. */
54#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
55 && !defined(XZ_DEC_DYNALLOC)
56# define XZ_DEC_SINGLE
57# define XZ_DEC_PREALLOC
58# define XZ_DEC_DYNALLOC
59#endif
60
61/*
62 * The DEC_IS_foo(mode) macros are used in "if" statements. If only some
63 * of the supported modes are enabled, these macros will evaluate to true or
64 * false at compile time and thus allow the compiler to omit unneeded code.
65 */
66#ifdef XZ_DEC_SINGLE
67# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
68#else
69# define DEC_IS_SINGLE(mode) (false)
70#endif
71
72#ifdef XZ_DEC_PREALLOC
73# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
74#else
75# define DEC_IS_PREALLOC(mode) (false)
76#endif
77
78#ifdef XZ_DEC_DYNALLOC
79# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
80#else
81# define DEC_IS_DYNALLOC(mode) (false)
82#endif
83
84#if !defined(XZ_DEC_SINGLE)
85# define DEC_IS_MULTI(mode) (true)
86#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
87# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
88#else
89# define DEC_IS_MULTI(mode) (false)
90#endif
91
92/*
93 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
94 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
95 */
96#ifndef XZ_DEC_BCJ
97# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
98 || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
99 || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
100 || defined(XZ_DEC_SPARC)
101# define XZ_DEC_BCJ
102# endif
103#endif
104
105/*
106 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
107 * before calling xz_dec_lzma2_run().
108 */
109XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
110 uint32_t dict_max);
111
112/*
113 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
114 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
115 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
116 * decoder doesn't support.
117 */
118XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
119 uint8_t props);
120
121/* Decode raw LZMA2 stream from b->in to b->out. */
122XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
123 struct xz_buf *b);
124
125/* Free the memory allocated for the LZMA2 decoder. */
126XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
127
128#ifdef XZ_DEC_BCJ
129/*
130 * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
131 * calling xz_dec_bcj_run().
132 */
133XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
134
135/*
136 * Decode the Filter ID of a BCJ filter. This implementation doesn't
137 * support custom start offsets, so no decoding of Filter Properties
138 * is needed. Returns XZ_OK if the given Filter ID is supported.
139 * Otherwise XZ_OPTIONS_ERROR is returned.
140 */
141XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
142
143/*
144 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
145 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
146 * must be called directly.
147 */
148XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
149 struct xz_dec_lzma2 *lzma2,
150 struct xz_buf *b);
151
152/* Free the memory allocated for the BCJ filters. */
153#define xz_dec_bcj_end(s) kfree(s)
154#endif
155
156#endif
diff --git a/lib/xz/xz_stream.h b/lib/xz/xz_stream.h
new file mode 100644
index 000000000000..66cb5a7055ec
--- /dev/null
+++ b/lib/xz/xz_stream.h
@@ -0,0 +1,62 @@
1/*
2 * Definitions for handling the .xz file format
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10#ifndef XZ_STREAM_H
11#define XZ_STREAM_H
12
13#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
14# include <linux/crc32.h>
15# undef crc32
16# define xz_crc32(buf, size, crc) \
17 (~crc32_le(~(uint32_t)(crc), buf, size))
18#endif
19
20/*
21 * See the .xz file format specification at
22 * http://tukaani.org/xz/xz-file-format.txt
23 * to understand the container format.
24 */
25
26#define STREAM_HEADER_SIZE 12
27
28#define HEADER_MAGIC "\3757zXZ"
29#define HEADER_MAGIC_SIZE 6
30
31#define FOOTER_MAGIC "YZ"
32#define FOOTER_MAGIC_SIZE 2
33
34/*
35 * Variable-length integer can hold a 63-bit unsigned integer or a special
36 * value indicating that the value is unknown.
37 *
38 * Experimental: vli_type can be defined to uint32_t to save a few bytes
39 * in code size (no effect on speed). Doing so limits the uncompressed and
40 * compressed size of the file to less than 256 MiB and may also weaken
41 * error detection slightly.
42 */
43typedef uint64_t vli_type;
44
45#define VLI_MAX ((vli_type)-1 / 2)
46#define VLI_UNKNOWN ((vli_type)-1)
47
48/* Maximum encoded size of a VLI */
49#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
50
51/* Integrity Check types */
52enum xz_check {
53 XZ_CHECK_NONE = 0,
54 XZ_CHECK_CRC32 = 1,
55 XZ_CHECK_CRC64 = 4,
56 XZ_CHECK_SHA256 = 10
57};
58
59/* Maximum possible Check ID */
60#define XZ_CHECK_MAX 15
61
62#endif
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 215447c55261..2c13ecc5bb2c 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -8,21 +8,6 @@
8#include "inflate.h" 8#include "inflate.h"
9#include "inffast.h" 9#include "inffast.h"
10 10
11/* Only do the unaligned "Faster" variant when
12 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set
13 *
14 * On powerpc, it won't be as we don't include autoconf.h
15 * automatically for the boot wrapper, which is intended as
16 * we run in an environment where we may not be able to deal
17 * with (even rare) alignment faults. In addition, we do not
18 * define __KERNEL__ for arch/powerpc/boot unlike x86
19 */
20
21#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
22#include <asm/unaligned.h>
23#include <asm/byteorder.h>
24#endif
25
26#ifndef ASMINF 11#ifndef ASMINF
27 12
28/* Allow machine dependent optimization for post-increment or pre-increment. 13/* Allow machine dependent optimization for post-increment or pre-increment.
@@ -36,14 +21,31 @@
36 - Pentium III (Anderson) 21 - Pentium III (Anderson)
37 - M68060 (Nikl) 22 - M68060 (Nikl)
38 */ 23 */
24union uu {
25 unsigned short us;
26 unsigned char b[2];
27};
28
29/* Endian independed version */
30static inline unsigned short
31get_unaligned16(const unsigned short *p)
32{
33 union uu mm;
34 unsigned char *b = (unsigned char *)p;
35
36 mm.b[0] = b[0];
37 mm.b[1] = b[1];
38 return mm.us;
39}
40
39#ifdef POSTINC 41#ifdef POSTINC
40# define OFF 0 42# define OFF 0
41# define PUP(a) *(a)++ 43# define PUP(a) *(a)++
42# define UP_UNALIGNED(a) get_unaligned((a)++) 44# define UP_UNALIGNED(a) get_unaligned16((a)++)
43#else 45#else
44# define OFF 1 46# define OFF 1
45# define PUP(a) *++(a) 47# define PUP(a) *++(a)
46# define UP_UNALIGNED(a) get_unaligned(++(a)) 48# define UP_UNALIGNED(a) get_unaligned16(++(a))
47#endif 49#endif
48 50
49/* 51/*
@@ -256,7 +258,6 @@ void inflate_fast(z_streamp strm, unsigned start)
256 } 258 }
257 } 259 }
258 else { 260 else {
259#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
260 unsigned short *sout; 261 unsigned short *sout;
261 unsigned long loops; 262 unsigned long loops;
262 263
@@ -274,22 +275,25 @@ void inflate_fast(z_streamp strm, unsigned start)
274 sfrom = (unsigned short *)(from - OFF); 275 sfrom = (unsigned short *)(from - OFF);
275 loops = len >> 1; 276 loops = len >> 1;
276 do 277 do
278#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
279 PUP(sout) = PUP(sfrom);
280#else
277 PUP(sout) = UP_UNALIGNED(sfrom); 281 PUP(sout) = UP_UNALIGNED(sfrom);
282#endif
278 while (--loops); 283 while (--loops);
279 out = (unsigned char *)sout + OFF; 284 out = (unsigned char *)sout + OFF;
280 from = (unsigned char *)sfrom + OFF; 285 from = (unsigned char *)sfrom + OFF;
281 } else { /* dist == 1 or dist == 2 */ 286 } else { /* dist == 1 or dist == 2 */
282 unsigned short pat16; 287 unsigned short pat16;
283 288
284 pat16 = *(sout-2+2*OFF); 289 pat16 = *(sout-1+OFF);
285 if (dist == 1) 290 if (dist == 1) {
286#if defined(__BIG_ENDIAN) 291 union uu mm;
287 pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8); 292 /* copy one char pattern to both bytes */
288#elif defined(__LITTLE_ENDIAN) 293 mm.us = pat16;
289 pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8); 294 mm.b[0] = mm.b[1];
290#else 295 pat16 = mm.us;
291#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined 296 }
292#endif
293 loops = len >> 1; 297 loops = len >> 1;
294 do 298 do
295 PUP(sout) = pat16; 299 PUP(sout) = pat16;
@@ -298,20 +302,6 @@ void inflate_fast(z_streamp strm, unsigned start)
298 } 302 }
299 if (len & 1) 303 if (len & 1)
300 PUP(out) = PUP(from); 304 PUP(out) = PUP(from);
301#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
302 from = out - dist; /* copy direct from output */
303 do { /* minimum length is three */
304 PUP(out) = PUP(from);
305 PUP(out) = PUP(from);
306 PUP(out) = PUP(from);
307 len -= 3;
308 } while (len > 2);
309 if (len) {
310 PUP(out) = PUP(from);
311 if (len > 1)
312 PUP(out) = PUP(from);
313 }
314#endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
315 } 305 }
316 } 306 }
317 else if ((op & 64) == 0) { /* 2nd level distance code */ 307 else if ((op & 64) == 0) { /* 2nd level distance code */