aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig12
-rw-r--r--lib/Kconfig.debug257
-rw-r--r--lib/Kconfig.kgdb24
-rw-r--r--lib/Makefile16
-rw-r--r--lib/atomic64.c4
-rw-r--r--lib/atomic64_test.c166
-rw-r--r--lib/average.c61
-rw-r--r--lib/bitmap.c22
-rw-r--r--lib/btree.c798
-rw-r--r--lib/bug.c20
-rw-r--r--lib/cpu-notifier-error-inject.c63
-rw-r--r--lib/cpumask.c1
-rw-r--r--lib/crc32.c55
-rw-r--r--lib/debug_locks.c3
-rw-r--r--lib/debugobjects.c64
-rw-r--r--lib/decompress_bunzip2.c10
-rw-r--r--lib/decompress_unlzo.c22
-rw-r--r--lib/devres.c3
-rw-r--r--lib/div64.c52
-rw-r--r--lib/dma-debug.c5
-rw-r--r--lib/dynamic_debug.c143
-rw-r--r--lib/flex_array.c27
-rw-r--r--lib/gen_crc32table.c47
-rw-r--r--lib/genalloc.c2
-rw-r--r--lib/hexdump.c54
-rw-r--r--lib/hweight.c26
-rw-r--r--lib/idr.c101
-rw-r--r--lib/inflate.c3
-rw-r--r--lib/iommu-helper.c9
-rw-r--r--lib/ioremap.c10
-rw-r--r--lib/kasprintf.c1
-rw-r--r--lib/kobject.c160
-rw-r--r--lib/kobject_uevent.c119
-rw-r--r--lib/kref.c16
-rw-r--r--lib/lcm.c15
-rw-r--r--lib/list_debug.c6
-rw-r--r--lib/list_sort.c335
-rw-r--r--lib/lmb.c532
-rw-r--r--lib/nlattr.c22
-rw-r--r--lib/parser.c7
-rw-r--r--lib/percpu_counter.c88
-rw-r--r--lib/radix-tree.c254
-rw-r--r--lib/raid6/.gitignore4
-rw-r--r--lib/raid6/Makefile75
-rw-r--r--lib/raid6/algos.c154
-rw-r--r--lib/raid6/altivec.uc130
-rw-r--r--lib/raid6/int.uc117
-rw-r--r--lib/raid6/mktables.c132
-rw-r--r--lib/raid6/mmx.c142
-rw-r--r--lib/raid6/recov.c132
-rw-r--r--lib/raid6/sse1.c162
-rw-r--r--lib/raid6/sse2.c262
-rw-r--r--lib/raid6/test/Makefile72
-rw-r--r--lib/raid6/test/test.c124
-rw-r--r--lib/raid6/unroll.awk20
-rw-r--r--lib/raid6/x86.h61
-rw-r--r--lib/random32.c40
-rw-r--r--lib/ratelimit.c11
-rw-r--r--lib/rbtree.c68
-rw-r--r--lib/rwsem-spinlock.c14
-rw-r--r--lib/rwsem.c153
-rw-r--r--lib/scatterlist.c38
-rw-r--r--lib/show_mem.c14
-rw-r--r--lib/string.c40
-rw-r--r--lib/swiotlb.c185
-rw-r--r--lib/textsearch.c1
-rw-r--r--lib/timerqueue.c107
-rw-r--r--lib/uuid.c53
-rw-r--r--lib/vsprintf.c240
-rw-r--r--lib/zlib_inflate/inffast.c72
70 files changed, 4913 insertions, 1345 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 97b136ff117e..3116aa631af6 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -7,6 +7,9 @@ config BINARY_PRINTF
7 7
8menu "Library routines" 8menu "Library routines"
9 9
10config RAID6_PQ
11 tristate
12
10config BITREVERSE 13config BITREVERSE
11 tristate 14 tristate
12 15
@@ -160,6 +163,9 @@ config TEXTSEARCH_BM
160config TEXTSEARCH_FSM 163config TEXTSEARCH_FSM
161 tristate 164 tristate
162 165
166config BTREE
167 boolean
168
163config HAS_IOMEM 169config HAS_IOMEM
164 boolean 170 boolean
165 depends on !NO_IOMEM 171 depends on !NO_IOMEM
@@ -178,9 +184,6 @@ config HAS_DMA
178config CHECK_SIGNATURE 184config CHECK_SIGNATURE
179 bool 185 bool
180 186
181config HAVE_LMB
182 boolean
183
184config CPUMASK_OFFSTACK 187config CPUMASK_OFFSTACK
185 bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS 188 bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
186 help 189 help
@@ -207,4 +210,7 @@ config GENERIC_ATOMIC64
207config LRU_CACHE 210config LRU_CACHE
208 tristate 211 tristate
209 212
213config AVERAGE
214 bool
215
210endmenu 216endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c54..2d05adb98401 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -76,7 +76,6 @@ config UNUSED_SYMBOLS
76 76
77config DEBUG_FS 77config DEBUG_FS
78 bool "Debug Filesystem" 78 bool "Debug Filesystem"
79 depends on SYSFS
80 help 79 help
81 debugfs is a virtual file system that kernel developers use to put 80 debugfs is a virtual file system that kernel developers use to put
82 debugging files into. Enable this option to be able to read and 81 debugging files into. Enable this option to be able to read and
@@ -103,7 +102,8 @@ config HEADERS_CHECK
103 102
104config DEBUG_SECTION_MISMATCH 103config DEBUG_SECTION_MISMATCH
105 bool "Enable full Section mismatch analysis" 104 bool "Enable full Section mismatch analysis"
106 depends on UNDEFINED 105 depends on UNDEFINED || (BLACKFIN)
106 default y
107 # This option is on purpose disabled for now. 107 # This option is on purpose disabled for now.
108 # It will be enabled when we are down to a reasonable number 108 # It will be enabled when we are down to a reasonable number
109 # of section mismatch warnings (< 10 for an allyesconfig build) 109 # of section mismatch warnings (< 10 for an allyesconfig build)
@@ -151,28 +151,34 @@ config DEBUG_SHIRQ
151 Drivers ought to be able to handle interrupts coming in at those 151 Drivers ought to be able to handle interrupts coming in at those
152 points; some don't and need to be caught. 152 points; some don't and need to be caught.
153 153
154config DETECT_SOFTLOCKUP 154config LOCKUP_DETECTOR
155 bool "Detect Soft Lockups" 155 bool "Detect Hard and Soft Lockups"
156 depends on DEBUG_KERNEL && !S390 156 depends on DEBUG_KERNEL && !S390
157 default y
158 help 157 help
159 Say Y here to enable the kernel to detect "soft lockups", 158 Say Y here to enable the kernel to act as a watchdog to detect
160 which are bugs that cause the kernel to loop in kernel 159 hard and soft lockups.
160
161 Softlockups are bugs that cause the kernel to loop in kernel
161 mode for more than 60 seconds, without giving other tasks a 162 mode for more than 60 seconds, without giving other tasks a
162 chance to run. 163 chance to run. The current stack trace is displayed upon
164 detection and the system will stay locked up.
163 165
164 When a soft-lockup is detected, the kernel will print the 166 Hardlockups are bugs that cause the CPU to loop in kernel mode
165 current stack trace (which you should report), but the 167 for more than 60 seconds, without letting other interrupts have a
166 system will stay locked up. This feature has negligible 168 chance to run. The current stack trace is displayed upon detection
167 overhead. 169 and the system will stay locked up.
170
171 The overhead should be minimal. A periodic hrtimer runs to
172 generate interrupts and kick the watchdog task every 10-12 seconds.
173 An NMI is generated every 60 seconds or so to check for hardlockups.
168 174
169 (Note that "hard lockups" are separate type of bugs that 175config HARDLOCKUP_DETECTOR
170 can be detected via the NMI-watchdog, on platforms that 176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
171 support it.) 177 !ARCH_HAS_NMI_WATCHDOG
172 178
173config BOOTPARAM_SOFTLOCKUP_PANIC 179config BOOTPARAM_SOFTLOCKUP_PANIC
174 bool "Panic (Reboot) On Soft Lockups" 180 bool "Panic (Reboot) On Soft Lockups"
175 depends on DETECT_SOFTLOCKUP 181 depends on LOCKUP_DETECTOR
176 help 182 help
177 Say Y here to enable the kernel to panic on "soft lockups", 183 Say Y here to enable the kernel to panic on "soft lockups",
178 which are bugs that cause the kernel to loop in kernel 184 which are bugs that cause the kernel to loop in kernel
@@ -189,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
189 195
190config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE 196config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
191 int 197 int
192 depends on DETECT_SOFTLOCKUP 198 depends on LOCKUP_DETECTOR
193 range 0 1 199 range 0 1
194 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC 200 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
195 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC 201 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -306,6 +312,20 @@ config DEBUG_OBJECTS_WORK
306 work queue routines to track the life time of work objects and 312 work queue routines to track the life time of work objects and
307 validate the work operations. 313 validate the work operations.
308 314
315config DEBUG_OBJECTS_RCU_HEAD
316 bool "Debug RCU callbacks objects"
317 depends on DEBUG_OBJECTS && PREEMPT
318 help
319 Enable this to turn on debugging of RCU list heads (call_rcu() usage).
320
321config DEBUG_OBJECTS_PERCPU_COUNTER
322 bool "Debug percpu counter objects"
323 depends on DEBUG_OBJECTS
324 help
325 If you say Y here, additional code will be inserted into the
326 percpu counter routines to track the life time of percpu counter
327 objects and validate the percpu counter operations.
328
309config DEBUG_OBJECTS_ENABLE_DEFAULT 329config DEBUG_OBJECTS_ENABLE_DEFAULT
310 int "debug_objects bootup default value (0-1)" 330 int "debug_objects bootup default value (0-1)"
311 range 0 1 331 range 0 1
@@ -342,7 +362,7 @@ config SLUB_DEBUG_ON
342config SLUB_STATS 362config SLUB_STATS
343 default n 363 default n
344 bool "Enable SLUB performance statistics" 364 bool "Enable SLUB performance statistics"
345 depends on SLUB && SLUB_DEBUG && SYSFS 365 depends on SLUB && SYSFS
346 help 366 help
347 SLUB statistics are useful to debug SLUBs allocation behavior in 367 SLUB statistics are useful to debug SLUBs allocation behavior in
348 order find ways to optimize the allocator. This should never be 368 order find ways to optimize the allocator. This should never be
@@ -355,7 +375,7 @@ config SLUB_STATS
355config DEBUG_KMEMLEAK 375config DEBUG_KMEMLEAK
356 bool "Kernel memory leak detector" 376 bool "Kernel memory leak detector"
357 depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \ 377 depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
358 (X86 || ARM || PPC || S390) 378 (X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
359 379
360 select DEBUG_FS if SYSFS 380 select DEBUG_FS if SYSFS
361 select STACKTRACE if STACKTRACE_SUPPORT 381 select STACKTRACE if STACKTRACE_SUPPORT
@@ -399,6 +419,13 @@ config DEBUG_KMEMLEAK_TEST
399 419
400 If unsure, say N. 420 If unsure, say N.
401 421
422config DEBUG_KMEMLEAK_DEFAULT_OFF
423 bool "Default kmemleak to off"
424 depends on DEBUG_KMEMLEAK
425 help
426 Say Y here to disable kmemleak by default. It can then be enabled
427 on the command line via kmemleak=on.
428
402config DEBUG_PREEMPT 429config DEBUG_PREEMPT
403 bool "Debug preemptible kernel" 430 bool "Debug preemptible kernel"
404 depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT 431 depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
@@ -443,6 +470,15 @@ config DEBUG_MUTEXES
443 This feature allows mutex semantics violations to be detected and 470 This feature allows mutex semantics violations to be detected and
444 reported. 471 reported.
445 472
473config BKL
474 bool "Big Kernel Lock" if (SMP || PREEMPT)
475 default y
476 help
477 This is the traditional lock that is used in old code instead
478 of proper locking. All drivers that use the BKL should depend
479 on this symbol.
480 Say Y here unless you are working on removing the BKL.
481
446config DEBUG_LOCK_ALLOC 482config DEBUG_LOCK_ALLOC
447 bool "Lock debugging: detect incorrect freeing of live locks" 483 bool "Lock debugging: detect incorrect freeing of live locks"
448 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 484 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -464,6 +500,7 @@ config PROVE_LOCKING
464 select DEBUG_SPINLOCK 500 select DEBUG_SPINLOCK
465 select DEBUG_MUTEXES 501 select DEBUG_MUTEXES
466 select DEBUG_LOCK_ALLOC 502 select DEBUG_LOCK_ALLOC
503 select TRACE_IRQFLAGS
467 default n 504 default n
468 help 505 help
469 This feature enables the kernel to prove that all locking 506 This feature enables the kernel to prove that all locking
@@ -499,11 +536,52 @@ config PROVE_LOCKING
499 536
500 For more details, see Documentation/lockdep-design.txt. 537 For more details, see Documentation/lockdep-design.txt.
501 538
539config PROVE_RCU
540 bool "RCU debugging: prove RCU correctness"
541 depends on PROVE_LOCKING
542 default n
543 help
544 This feature enables lockdep extensions that check for correct
545 use of RCU APIs. This is currently under development. Say Y
546 if you want to debug RCU usage or help work on the PROVE_RCU
547 feature.
548
549 Say N if you are unsure.
550
551config PROVE_RCU_REPEATEDLY
552 bool "RCU debugging: don't disable PROVE_RCU on first splat"
553 depends on PROVE_RCU
554 default n
555 help
556 By itself, PROVE_RCU will disable checking upon issuing the
557 first warning (or "splat"). This feature prevents such
558 disabling, allowing multiple RCU-lockdep warnings to be printed
559 on a single reboot.
560
561 Say Y to allow multiple RCU-lockdep warnings per boot.
562
563 Say N if you are unsure.
564
565config SPARSE_RCU_POINTER
566 bool "RCU debugging: sparse-based checks for pointer usage"
567 default n
568 help
569 This feature enables the __rcu sparse annotation for
570 RCU-protected pointers. This annotation will cause sparse
571 to flag any non-RCU used of annotated pointers. This can be
572 helpful when debugging RCU usage. Please note that this feature
573 is not intended to enforce code cleanliness; it is instead merely
574 a debugging aid.
575
576 Say Y to make sparse flag questionable use of RCU-protected pointers
577
578 Say N if you are unsure.
579
502config LOCKDEP 580config LOCKDEP
503 bool 581 bool
504 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 582 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
505 select STACKTRACE 583 select STACKTRACE
506 select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 584 select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
507 select KALLSYMS 585 select KALLSYMS
508 select KALLSYMS_ALL 586 select KALLSYMS_ALL
509 587
@@ -520,6 +598,14 @@ config LOCK_STAT
520 598
521 For more details, see Documentation/lockstat.txt 599 For more details, see Documentation/lockstat.txt
522 600
601 This also enables lock events required by "perf lock",
602 subcommand of perf.
603 If you want to use "perf lock", you also need to turn on
604 CONFIG_EVENT_TRACING.
605
606 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
607 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
608
523config DEBUG_LOCKDEP 609config DEBUG_LOCKDEP
524 bool "Lock dependency engine debugging" 610 bool "Lock dependency engine debugging"
525 depends on DEBUG_KERNEL && LOCKDEP 611 depends on DEBUG_KERNEL && LOCKDEP
@@ -529,11 +615,10 @@ config DEBUG_LOCKDEP
529 of more runtime overhead. 615 of more runtime overhead.
530 616
531config TRACE_IRQFLAGS 617config TRACE_IRQFLAGS
532 depends on DEBUG_KERNEL
533 bool 618 bool
534 default y 619 help
535 depends on TRACE_IRQFLAGS_SUPPORT 620 Enables hooks to interrupt enabling and disabling for
536 depends on PROVE_LOCKING 621 either tracing or lock debugging.
537 622
538config DEBUG_SPINLOCK_SLEEP 623config DEBUG_SPINLOCK_SLEEP
539 bool "Spinlock debugging: sleep-inside-spinlock checking" 624 bool "Spinlock debugging: sleep-inside-spinlock checking"
@@ -595,6 +680,19 @@ config DEBUG_INFO
595 680
596 If unsure, say N. 681 If unsure, say N.
597 682
683config DEBUG_INFO_REDUCED
684 bool "Reduce debugging information"
685 depends on DEBUG_INFO
686 help
687 If you say Y here gcc is instructed to generate less debugging
688 information for structure types. This means that tools that
689 need full debugging information (like kgdb or systemtap) won't
690 be happy. But if you merely need debugging information to
691 resolve line numbers there is no loss. Advantage is that
692 build directory object sizes shrink dramatically over a full
693 DEBUG_INFO build and compile times are reduced too.
694 Only works with newer gcc versions.
695
598config DEBUG_VM 696config DEBUG_VM
599 bool "Debug VM" 697 bool "Debug VM"
600 depends on DEBUG_KERNEL 698 depends on DEBUG_KERNEL
@@ -651,6 +749,15 @@ config DEBUG_LIST
651 749
652 If unsure, say N. 750 If unsure, say N.
653 751
752config TEST_LIST_SORT
753 bool "Linked list sorting test"
754 depends on DEBUG_KERNEL
755 help
756 Enable this to turn on 'list_sort()' function test. This test is
757 executed only once during system boot, so affects only boot time.
758
759 If unsure, say N.
760
654config DEBUG_SG 761config DEBUG_SG
655 bool "Debug SG table operations" 762 bool "Debug SG table operations"
656 depends on DEBUG_KERNEL 763 depends on DEBUG_KERNEL
@@ -765,10 +872,46 @@ config RCU_CPU_STALL_DETECTOR
765 CPUs are delaying the current grace period, but only when 872 CPUs are delaying the current grace period, but only when
766 the grace period extends for excessive time periods. 873 the grace period extends for excessive time periods.
767 874
768 Say Y if you want RCU to perform such checks. 875 Say N if you want to disable such checks.
876
877 Say Y if you are unsure.
878
879config RCU_CPU_STALL_TIMEOUT
880 int "RCU CPU stall timeout in seconds"
881 depends on RCU_CPU_STALL_DETECTOR
882 range 3 300
883 default 60
884 help
885 If a given RCU grace period extends more than the specified
886 number of seconds, a CPU stall warning is printed. If the
887 RCU grace period persists, additional CPU stall warnings are
888 printed at more widely spaced intervals.
889
890config RCU_CPU_STALL_DETECTOR_RUNNABLE
891 bool "RCU CPU stall checking starts automatically at boot"
892 depends on RCU_CPU_STALL_DETECTOR
893 default y
894 help
895 If set, start checking for RCU CPU stalls immediately on
896 boot. Otherwise, RCU CPU stall checking must be manually
897 enabled.
898
899 Say Y if you are unsure.
900
901 Say N if you wish to suppress RCU CPU stall checking during boot.
902
903config RCU_CPU_STALL_VERBOSE
904 bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
905 depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
906 default y
907 help
908 This option causes RCU to printk detailed per-task information
909 for any tasks that are stalling the current RCU grace period.
769 910
770 Say N if you are unsure. 911 Say N if you are unsure.
771 912
913 Say Y if you want to enable such checks.
914
772config KPROBES_SANITY_TEST 915config KPROBES_SANITY_TEST
773 bool "Kprobes sanity tests" 916 bool "Kprobes sanity tests"
774 depends on DEBUG_KERNEL 917 depends on DEBUG_KERNEL
@@ -840,8 +983,7 @@ config DEBUG_FORCE_WEAK_PER_CPU
840 983
841config LKDTM 984config LKDTM
842 tristate "Linux Kernel Dump Test Tool Module" 985 tristate "Linux Kernel Dump Test Tool Module"
843 depends on DEBUG_KERNEL 986 depends on DEBUG_FS
844 depends on KPROBES
845 depends on BLOCK 987 depends on BLOCK
846 default n 988 default n
847 help 989 help
@@ -852,7 +994,19 @@ config LKDTM
852 called lkdtm. 994 called lkdtm.
853 995
854 Documentation on how to use the module can be found in 996 Documentation on how to use the module can be found in
855 drivers/misc/lkdtm.c 997 Documentation/fault-injection/provoke-crashes.txt
998
999config CPU_NOTIFIER_ERROR_INJECT
1000 tristate "CPU notifier error injection module"
1001 depends on HOTPLUG_CPU && DEBUG_KERNEL
1002 help
1003 This option provides a kernel module that can be used to test
1004 the error handling of the cpu notifiers
1005
1006 To compile this code as a module, choose M here: the module will
1007 be called cpu-notifier-error-inject.
1008
1009 If unsure, say N.
856 1010
857config FAULT_INJECTION 1011config FAULT_INJECTION
858 bool "Fault-injection framework" 1012 bool "Fault-injection framework"
@@ -881,7 +1035,7 @@ config FAIL_MAKE_REQUEST
881 Provide fault-injection capability for disk IO. 1035 Provide fault-injection capability for disk IO.
882 1036
883config FAIL_IO_TIMEOUT 1037config FAIL_IO_TIMEOUT
884 bool "Faul-injection capability for faking disk interrupts" 1038 bool "Fault-injection capability for faking disk interrupts"
885 depends on FAULT_INJECTION && BLOCK 1039 depends on FAULT_INJECTION && BLOCK
886 help 1040 help
887 Provide fault-injection capability on end IO handling. This 1041 Provide fault-injection capability on end IO handling. This
@@ -902,19 +1056,22 @@ config FAULT_INJECTION_STACKTRACE_FILTER
902 depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT 1056 depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
903 depends on !X86_64 1057 depends on !X86_64
904 select STACKTRACE 1058 select STACKTRACE
905 select FRAME_POINTER if !PPC && !S390 1059 select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE
906 help 1060 help
907 Provide stacktrace filter for fault-injection capabilities 1061 Provide stacktrace filter for fault-injection capabilities
908 1062
909config LATENCYTOP 1063config LATENCYTOP
910 bool "Latency measuring infrastructure" 1064 bool "Latency measuring infrastructure"
911 select FRAME_POINTER if !MIPS && !PPC && !S390 1065 depends on HAVE_LATENCYTOP_SUPPORT
1066 depends on DEBUG_KERNEL
1067 depends on STACKTRACE_SUPPORT
1068 depends on PROC_FS
1069 select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
912 select KALLSYMS 1070 select KALLSYMS
913 select KALLSYMS_ALL 1071 select KALLSYMS_ALL
914 select STACKTRACE 1072 select STACKTRACE
915 select SCHEDSTATS 1073 select SCHEDSTATS
916 select SCHED_DEBUG 1074 select SCHED_DEBUG
917 depends on HAVE_LATENCYTOP_SUPPORT
918 help 1075 help
919 Enable this option if you want to use the LatencyTOP tool 1076 Enable this option if you want to use the LatencyTOP tool
920 to find out which userspace is blocking on what kernel operations. 1077 to find out which userspace is blocking on what kernel operations.
@@ -995,10 +1152,10 @@ config DYNAMIC_DEBUG
995 1152
996 Usage: 1153 Usage:
997 1154
998 Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file, 1155 Dynamic debugging is controlled via the 'dynamic_debug/control' file,
999 which is contained in the 'debugfs' filesystem. Thus, the debugfs 1156 which is contained in the 'debugfs' filesystem. Thus, the debugfs
1000 filesystem must first be mounted before making use of this feature. 1157 filesystem must first be mounted before making use of this feature.
1001 We refer the control file as: <debugfs>/dynamic_debug/ddebug. This 1158 We refer the control file as: <debugfs>/dynamic_debug/control. This
1002 file contains a list of the debug statements that can be enabled. The 1159 file contains a list of the debug statements that can be enabled. The
1003 format for each line of the file is: 1160 format for each line of the file is:
1004 1161
@@ -1013,7 +1170,7 @@ config DYNAMIC_DEBUG
1013 1170
1014 From a live system: 1171 From a live system:
1015 1172
1016 nullarbor:~ # cat <debugfs>/dynamic_debug/ddebug 1173 nullarbor:~ # cat <debugfs>/dynamic_debug/control
1017 # filename:lineno [module]function flags format 1174 # filename:lineno [module]function flags format
1018 fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" 1175 fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012"
1019 fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" 1176 fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012"
@@ -1023,23 +1180,23 @@ config DYNAMIC_DEBUG
1023 1180
1024 // enable the message at line 1603 of file svcsock.c 1181 // enable the message at line 1603 of file svcsock.c
1025 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > 1182 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
1026 <debugfs>/dynamic_debug/ddebug 1183 <debugfs>/dynamic_debug/control
1027 1184
1028 // enable all the messages in file svcsock.c 1185 // enable all the messages in file svcsock.c
1029 nullarbor:~ # echo -n 'file svcsock.c +p' > 1186 nullarbor:~ # echo -n 'file svcsock.c +p' >
1030 <debugfs>/dynamic_debug/ddebug 1187 <debugfs>/dynamic_debug/control
1031 1188
1032 // enable all the messages in the NFS server module 1189 // enable all the messages in the NFS server module
1033 nullarbor:~ # echo -n 'module nfsd +p' > 1190 nullarbor:~ # echo -n 'module nfsd +p' >
1034 <debugfs>/dynamic_debug/ddebug 1191 <debugfs>/dynamic_debug/control
1035 1192
1036 // enable all 12 messages in the function svc_process() 1193 // enable all 12 messages in the function svc_process()
1037 nullarbor:~ # echo -n 'func svc_process +p' > 1194 nullarbor:~ # echo -n 'func svc_process +p' >
1038 <debugfs>/dynamic_debug/ddebug 1195 <debugfs>/dynamic_debug/control
1039 1196
1040 // disable all 12 messages in the function svc_process() 1197 // disable all 12 messages in the function svc_process()
1041 nullarbor:~ # echo -n 'func svc_process -p' > 1198 nullarbor:~ # echo -n 'func svc_process -p' >
1042 <debugfs>/dynamic_debug/ddebug 1199 <debugfs>/dynamic_debug/control
1043 1200
1044 See Documentation/dynamic-debug-howto.txt for additional information. 1201 See Documentation/dynamic-debug-howto.txt for additional information.
1045 1202
@@ -1054,6 +1211,26 @@ config DMA_API_DEBUG
1054 This option causes a performance degredation. Use only if you want 1211 This option causes a performance degredation. Use only if you want
1055 to debug device drivers. If unsure, say N. 1212 to debug device drivers. If unsure, say N.
1056 1213
1214config ATOMIC64_SELFTEST
1215 bool "Perform an atomic64_t self-test at boot"
1216 help
1217 Enable this option to test the atomic64_t functions at boot.
1218
1219 If unsure, say N.
1220
1221config ASYNC_RAID6_TEST
1222 tristate "Self test for hardware accelerated raid6 recovery"
1223 depends on ASYNC_RAID6_RECOV
1224 select ASYNC_MEMCPY
1225 ---help---
1226 This is a one-shot self test that permutes through the
1227 recovery of all the possible two disk failure scenarios for a
1228 N-disk array. Recovery is performed with the asynchronous
1229 raid6 recovery routines, and will optionally use an offload
1230 engine if one is available.
1231
1232 If unsure, say N.
1233
1057source "samples/Kconfig" 1234source "samples/Kconfig"
1058 1235
1059source "lib/Kconfig.kgdb" 1236source "lib/Kconfig.kgdb"
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 9b5d1d7f2ef7..43cb93fa2651 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB
3 bool 3 bool
4 4
5menuconfig KGDB 5menuconfig KGDB
6 bool "KGDB: kernel debugging with remote gdb" 6 bool "KGDB: kernel debugger"
7 depends on HAVE_ARCH_KGDB 7 depends on HAVE_ARCH_KGDB
8 depends on DEBUG_KERNEL && EXPERIMENTAL 8 depends on DEBUG_KERNEL && EXPERIMENTAL
9 help 9 help
@@ -57,4 +57,26 @@ config KGDB_TESTS_BOOT_STRING
57 information about other strings you could use beyond the 57 information about other strings you could use beyond the
58 default of V1F100. 58 default of V1F100.
59 59
60config KGDB_LOW_LEVEL_TRAP
61 bool "KGDB: Allow debugging with traps in notifiers"
62 depends on X86 || MIPS
63 default n
64 help
65 This will add an extra call back to kgdb for the breakpoint
66 exception handler on which will will allow kgdb to step
67 through a notify handler.
68
69config KGDB_KDB
70 bool "KGDB_KDB: include kdb frontend for kgdb"
71 default n
72 help
73 KDB frontend for kernel
74
75config KDB_KEYBOARD
76 bool "KGDB_KDB: keyboard as input device"
77 depends on VT && KGDB_KDB
78 default n
79 help
80 KDB can use a PS/2 type keyboard for an input device
81
60endif # KGDB 82endif # KGDB
diff --git a/lib/Makefile b/lib/Makefile
index 3b0b4a696db9..d7b6e30a3a1e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
8endif 8endif
9 9
10lib-y := ctype.o string.o vsprintf.o cmdline.o \ 10lib-y := ctype.o string.o vsprintf.o cmdline.o \
11 rbtree.o radix-tree.o dump_stack.o \ 11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
12 idr.o int_sqrt.o extable.o prio_tree.o \ 12 idr.o int_sqrt.o extable.o prio_tree.o \
13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o prio_heap.o ratelimit.o show_mem.o \
@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
21 21
22obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 22obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
23 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ 23 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
24 string_helpers.o gcd.o list_sort.o 24 string_helpers.o gcd.o lcm.o list_sort.o uuid.o
25 25
26ifeq ($(CONFIG_DEBUG_KOBJECT),y) 26ifeq ($(CONFIG_DEBUG_KOBJECT),y)
27CFLAGS_kobject.o += -DDEBUG 27CFLAGS_kobject.o += -DDEBUG
@@ -39,8 +39,12 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
39lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o 39lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
40lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o 40lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
41obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o 41obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
42
43CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
42obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 44obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
45
43obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o 46obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
47obj-$(CONFIG_BTREE) += btree.o
44obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o 48obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
45obj-$(CONFIG_DEBUG_LIST) += list_debug.o 49obj-$(CONFIG_DEBUG_LIST) += list_debug.o
46obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o 50obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
@@ -65,6 +69,7 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
65obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ 69obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
66obj-$(CONFIG_LZO_COMPRESS) += lzo/ 70obj-$(CONFIG_LZO_COMPRESS) += lzo/
67obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ 71obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
72obj-$(CONFIG_RAID6_PQ) += raid6/
68 73
69lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o 74lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
70lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o 75lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
@@ -81,11 +86,10 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
81obj-$(CONFIG_SWIOTLB) += swiotlb.o 86obj-$(CONFIG_SWIOTLB) += swiotlb.o
82obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o 87obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
83obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 88obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
89obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
84 90
85lib-$(CONFIG_GENERIC_BUG) += bug.o 91lib-$(CONFIG_GENERIC_BUG) += bug.o
86 92
87obj-$(CONFIG_HAVE_LMB) += lmb.o
88
89obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o 93obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
90 94
91obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o 95obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
@@ -100,6 +104,10 @@ obj-$(CONFIG_GENERIC_CSUM) += checksum.o
100 104
101obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o 105obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
102 106
107obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
108
109obj-$(CONFIG_AVERAGE) += average.o
110
103hostprogs-y := gen_crc32table 111hostprogs-y := gen_crc32table
104clean-files := crc32table.h 112clean-files := crc32table.h
105 113
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 8bee16ec7524..a21c12bc727c 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -162,12 +162,12 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u)
162{ 162{
163 unsigned long flags; 163 unsigned long flags;
164 spinlock_t *lock = lock_addr(v); 164 spinlock_t *lock = lock_addr(v);
165 int ret = 1; 165 int ret = 0;
166 166
167 spin_lock_irqsave(lock, flags); 167 spin_lock_irqsave(lock, flags);
168 if (v->counter != u) { 168 if (v->counter != u) {
169 v->counter += a; 169 v->counter += a;
170 ret = 0; 170 ret = 1;
171 } 171 }
172 spin_unlock_irqrestore(lock, flags); 172 spin_unlock_irqrestore(lock, flags);
173 return ret; 173 return ret;
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
new file mode 100644
index 000000000000..44524cc8c32a
--- /dev/null
+++ b/lib/atomic64_test.c
@@ -0,0 +1,166 @@
1/*
2 * Testsuite for atomic64_t functions
3 *
4 * Copyright © 2010 Luca Barbieri
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <asm/atomic.h>
14
15#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
16static __init int test_atomic64(void)
17{
18 long long v0 = 0xaaa31337c001d00dLL;
19 long long v1 = 0xdeadbeefdeafcafeLL;
20 long long v2 = 0xfaceabadf00df001LL;
21 long long onestwos = 0x1111111122222222LL;
22 long long one = 1LL;
23
24 atomic64_t v = ATOMIC64_INIT(v0);
25 long long r = v0;
26 BUG_ON(v.counter != r);
27
28 atomic64_set(&v, v1);
29 r = v1;
30 BUG_ON(v.counter != r);
31 BUG_ON(atomic64_read(&v) != r);
32
33 INIT(v0);
34 atomic64_add(onestwos, &v);
35 r += onestwos;
36 BUG_ON(v.counter != r);
37
38 INIT(v0);
39 atomic64_add(-one, &v);
40 r += -one;
41 BUG_ON(v.counter != r);
42
43 INIT(v0);
44 r += onestwos;
45 BUG_ON(atomic64_add_return(onestwos, &v) != r);
46 BUG_ON(v.counter != r);
47
48 INIT(v0);
49 r += -one;
50 BUG_ON(atomic64_add_return(-one, &v) != r);
51 BUG_ON(v.counter != r);
52
53 INIT(v0);
54 atomic64_sub(onestwos, &v);
55 r -= onestwos;
56 BUG_ON(v.counter != r);
57
58 INIT(v0);
59 atomic64_sub(-one, &v);
60 r -= -one;
61 BUG_ON(v.counter != r);
62
63 INIT(v0);
64 r -= onestwos;
65 BUG_ON(atomic64_sub_return(onestwos, &v) != r);
66 BUG_ON(v.counter != r);
67
68 INIT(v0);
69 r -= -one;
70 BUG_ON(atomic64_sub_return(-one, &v) != r);
71 BUG_ON(v.counter != r);
72
73 INIT(v0);
74 atomic64_inc(&v);
75 r += one;
76 BUG_ON(v.counter != r);
77
78 INIT(v0);
79 r += one;
80 BUG_ON(atomic64_inc_return(&v) != r);
81 BUG_ON(v.counter != r);
82
83 INIT(v0);
84 atomic64_dec(&v);
85 r -= one;
86 BUG_ON(v.counter != r);
87
88 INIT(v0);
89 r -= one;
90 BUG_ON(atomic64_dec_return(&v) != r);
91 BUG_ON(v.counter != r);
92
93 INIT(v0);
94 BUG_ON(atomic64_xchg(&v, v1) != v0);
95 r = v1;
96 BUG_ON(v.counter != r);
97
98 INIT(v0);
99 BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
100 r = v1;
101 BUG_ON(v.counter != r);
102
103 INIT(v0);
104 BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
105 BUG_ON(v.counter != r);
106
107 INIT(v0);
108 BUG_ON(atomic64_add_unless(&v, one, v0));
109 BUG_ON(v.counter != r);
110
111 INIT(v0);
112 BUG_ON(!atomic64_add_unless(&v, one, v1));
113 r += one;
114 BUG_ON(v.counter != r);
115
116#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
117 defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
118 INIT(onestwos);
119 BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
120 r -= one;
121 BUG_ON(v.counter != r);
122
123 INIT(0);
124 BUG_ON(atomic64_dec_if_positive(&v) != -one);
125 BUG_ON(v.counter != r);
126
127 INIT(-one);
128 BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
129 BUG_ON(v.counter != r);
130#else
131#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above
132#endif
133
134 INIT(onestwos);
135 BUG_ON(!atomic64_inc_not_zero(&v));
136 r += one;
137 BUG_ON(v.counter != r);
138
139 INIT(0);
140 BUG_ON(atomic64_inc_not_zero(&v));
141 BUG_ON(v.counter != r);
142
143 INIT(-one);
144 BUG_ON(!atomic64_inc_not_zero(&v));
145 r += one;
146 BUG_ON(v.counter != r);
147
148#ifdef CONFIG_X86
149 printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n",
150#ifdef CONFIG_X86_64
151 "x86-64",
152#elif defined(CONFIG_X86_CMPXCHG64)
153 "i586+",
154#else
155 "i386+",
156#endif
157 boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
158 boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
159#else
160 printk(KERN_INFO "atomic64 test passed\n");
161#endif
162
163 return 0;
164}
165
166core_initcall(test_atomic64);
diff --git a/lib/average.c b/lib/average.c
new file mode 100644
index 000000000000..5576c2841496
--- /dev/null
+++ b/lib/average.c
@@ -0,0 +1,61 @@
1/*
2 * lib/average.c
3 *
4 * This source code is licensed under the GNU General Public License,
5 * Version 2. See the file COPYING for more details.
6 */
7
8#include <linux/module.h>
9#include <linux/average.h>
10#include <linux/bug.h>
11#include <linux/log2.h>
12
13/**
14 * DOC: Exponentially Weighted Moving Average (EWMA)
15 *
16 * These are generic functions for calculating Exponentially Weighted Moving
17 * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled
18 * up internal representation of the average value to prevent rounding errors.
19 * The factor for scaling up and the exponential weight (or decay rate) have to
20 * be specified thru the init fuction. The structure should not be accessed
21 * directly but only thru the helper functions.
22 */
23
24/**
25 * ewma_init() - Initialize EWMA parameters
26 * @avg: Average structure
27 * @factor: Factor to use for the scaled up internal value. The maximum value
28 * of averages can be ULONG_MAX/(factor*weight). For performance reasons
29 * factor has to be a power of 2.
30 * @weight: Exponential weight, or decay rate. This defines how fast the
31 * influence of older values decreases. For performance reasons weight has
32 * to be a power of 2.
33 *
34 * Initialize the EWMA parameters for a given struct ewma @avg.
35 */
36void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
37{
38 WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor));
39
40 avg->weight = ilog2(weight);
41 avg->factor = ilog2(factor);
42 avg->internal = 0;
43}
44EXPORT_SYMBOL(ewma_init);
45
46/**
47 * ewma_add() - Exponentially weighted moving average (EWMA)
48 * @avg: Average structure
49 * @val: Current value
50 *
51 * Add a sample to the average.
52 */
53struct ewma *ewma_add(struct ewma *avg, unsigned long val)
54{
55 avg->internal = avg->internal ?
56 (((avg->internal << avg->weight) - avg->internal) +
57 (val << avg->factor)) >> avg->weight :
58 (val << avg->factor);
59 return avg;
60}
61EXPORT_SYMBOL(ewma_add);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 11bf49750583..741fae905ae3 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -359,7 +359,6 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area);
359 359
360#define CHUNKSZ 32 360#define CHUNKSZ 32
361#define nbits_to_hold_value(val) fls(val) 361#define nbits_to_hold_value(val) fls(val)
362#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
363#define BASEDEC 10 /* fancier cpuset lists input in decimal */ 362#define BASEDEC 10 /* fancier cpuset lists input in decimal */
364 363
365/** 364/**
@@ -466,7 +465,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
466 if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1)) 465 if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
467 return -EOVERFLOW; 466 return -EOVERFLOW;
468 467
469 chunk = (chunk << 4) | unhex(c); 468 chunk = (chunk << 4) | hex_to_bin(c);
470 ndigits++; totaldigits++; 469 ndigits++; totaldigits++;
471 } 470 }
472 if (ndigits == 0) 471 if (ndigits == 0)
@@ -487,7 +486,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
487EXPORT_SYMBOL(__bitmap_parse); 486EXPORT_SYMBOL(__bitmap_parse);
488 487
489/** 488/**
490 * bitmap_parse_user() 489 * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
491 * 490 *
492 * @ubuf: pointer to user buffer containing string. 491 * @ubuf: pointer to user buffer containing string.
493 * @ulen: buffer size in bytes. If string is smaller than this 492 * @ulen: buffer size in bytes. If string is smaller than this
@@ -619,7 +618,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
619EXPORT_SYMBOL(bitmap_parselist); 618EXPORT_SYMBOL(bitmap_parselist);
620 619
621/** 620/**
622 * bitmap_pos_to_ord(buf, pos, bits) 621 * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
623 * @buf: pointer to a bitmap 622 * @buf: pointer to a bitmap
624 * @pos: a bit position in @buf (0 <= @pos < @bits) 623 * @pos: a bit position in @buf (0 <= @pos < @bits)
625 * @bits: number of valid bit positions in @buf 624 * @bits: number of valid bit positions in @buf
@@ -655,7 +654,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
655} 654}
656 655
657/** 656/**
658 * bitmap_ord_to_pos(buf, ord, bits) 657 * bitmap_ord_to_pos - find position of n-th set bit in bitmap
659 * @buf: pointer to bitmap 658 * @buf: pointer to bitmap
660 * @ord: ordinal bit position (n-th set bit, n >= 0) 659 * @ord: ordinal bit position (n-th set bit, n >= 0)
661 * @bits: number of valid bit positions in @buf 660 * @bits: number of valid bit positions in @buf
@@ -733,10 +732,9 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
733 bitmap_zero(dst, bits); 732 bitmap_zero(dst, bits);
734 733
735 w = bitmap_weight(new, bits); 734 w = bitmap_weight(new, bits);
736 for (oldbit = find_first_bit(src, bits); 735 for_each_set_bit(oldbit, src, bits) {
737 oldbit < bits;
738 oldbit = find_next_bit(src, bits, oldbit + 1)) {
739 int n = bitmap_pos_to_ord(old, oldbit, bits); 736 int n = bitmap_pos_to_ord(old, oldbit, bits);
737
740 if (n < 0 || w == 0) 738 if (n < 0 || w == 0)
741 set_bit(oldbit, dst); /* identity map */ 739 set_bit(oldbit, dst); /* identity map */
742 else 740 else
@@ -903,9 +901,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
903 */ 901 */
904 902
905 m = 0; 903 m = 0;
906 for (n = find_first_bit(relmap, bits); 904 for_each_set_bit(n, relmap, bits) {
907 n < bits;
908 n = find_next_bit(relmap, bits, n + 1)) {
909 /* m == bitmap_pos_to_ord(relmap, n, bits) */ 905 /* m == bitmap_pos_to_ord(relmap, n, bits) */
910 if (test_bit(m, orig)) 906 if (test_bit(m, orig))
911 set_bit(n, dst); 907 set_bit(n, dst);
@@ -934,9 +930,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
934 return; 930 return;
935 bitmap_zero(dst, bits); 931 bitmap_zero(dst, bits);
936 932
937 for (oldbit = find_first_bit(orig, bits); 933 for_each_set_bit(oldbit, orig, bits)
938 oldbit < bits;
939 oldbit = find_next_bit(orig, bits, oldbit + 1))
940 set_bit(oldbit % sz, dst); 934 set_bit(oldbit % sz, dst);
941} 935}
942EXPORT_SYMBOL(bitmap_fold); 936EXPORT_SYMBOL(bitmap_fold);
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 000000000000..c9c6f0351526
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,798 @@
1/*
2 * lib/btree.c - Simple In-memory B+Tree
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
7 * Bits and pieces stolen from Peter Zijlstra's code, which is
8 * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
9 * GPLv2
10 *
11 * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
12 *
13 * A relatively simple B+Tree implementation. I have written it as a learning
14 * excercise to understand how B+Trees work. Turned out to be useful as well.
15 *
16 * B+Trees can be used similar to Linux radix trees (which don't have anything
17 * in common with textbook radix trees, beware). Prerequisite for them working
18 * well is that access to a random tree node is much faster than a large number
19 * of operations within each node.
20 *
21 * Disks have fulfilled the prerequisite for a long time. More recently DRAM
22 * has gained similar properties, as memory access times, when measured in cpu
23 * cycles, have increased. Cacheline sizes have increased as well, which also
24 * helps B+Trees.
25 *
26 * Compared to radix trees, B+Trees are more efficient when dealing with a
27 * sparsely populated address space. Between 25% and 50% of the memory is
28 * occupied with valid pointers. When densely populated, radix trees contain
29 * ~98% pointers - hard to beat. Very sparse radix trees contain only ~2%
30 * pointers.
31 *
32 * This particular implementation stores pointers identified by a long value.
33 * Storing NULL pointers is illegal, lookup will return NULL when no entry
34 * was found.
35 *
36 * A tricks was used that is not commonly found in textbooks. The lowest
37 * values are to the right, not to the left. All used slots within a node
38 * are on the left, all unused slots contain NUL values. Most operations
39 * simply loop once over all slots and terminate on the first NUL.
40 */
41
42#include <linux/btree.h>
43#include <linux/cache.h>
44#include <linux/kernel.h>
45#include <linux/slab.h>
46#include <linux/module.h>
47
48#define MAX(a, b) ((a) > (b) ? (a) : (b))
49#define NODESIZE MAX(L1_CACHE_BYTES, 128)
50
51struct btree_geo {
52 int keylen;
53 int no_pairs;
54 int no_longs;
55};
56
57struct btree_geo btree_geo32 = {
58 .keylen = 1,
59 .no_pairs = NODESIZE / sizeof(long) / 2,
60 .no_longs = NODESIZE / sizeof(long) / 2,
61};
62EXPORT_SYMBOL_GPL(btree_geo32);
63
64#define LONG_PER_U64 (64 / BITS_PER_LONG)
65struct btree_geo btree_geo64 = {
66 .keylen = LONG_PER_U64,
67 .no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
68 .no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
69};
70EXPORT_SYMBOL_GPL(btree_geo64);
71
72struct btree_geo btree_geo128 = {
73 .keylen = 2 * LONG_PER_U64,
74 .no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
75 .no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
76};
77EXPORT_SYMBOL_GPL(btree_geo128);
78
79static struct kmem_cache *btree_cachep;
80
81void *btree_alloc(gfp_t gfp_mask, void *pool_data)
82{
83 return kmem_cache_alloc(btree_cachep, gfp_mask);
84}
85EXPORT_SYMBOL_GPL(btree_alloc);
86
87void btree_free(void *element, void *pool_data)
88{
89 kmem_cache_free(btree_cachep, element);
90}
91EXPORT_SYMBOL_GPL(btree_free);
92
93static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
94{
95 unsigned long *node;
96
97 node = mempool_alloc(head->mempool, gfp);
98 if (likely(node))
99 memset(node, 0, NODESIZE);
100 return node;
101}
102
103static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
104{
105 size_t i;
106
107 for (i = 0; i < n; i++) {
108 if (l1[i] < l2[i])
109 return -1;
110 if (l1[i] > l2[i])
111 return 1;
112 }
113 return 0;
114}
115
116static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
117 size_t n)
118{
119 size_t i;
120
121 for (i = 0; i < n; i++)
122 dest[i] = src[i];
123 return dest;
124}
125
126static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
127{
128 size_t i;
129
130 for (i = 0; i < n; i++)
131 s[i] = c;
132 return s;
133}
134
135static void dec_key(struct btree_geo *geo, unsigned long *key)
136{
137 unsigned long val;
138 int i;
139
140 for (i = geo->keylen - 1; i >= 0; i--) {
141 val = key[i];
142 key[i] = val - 1;
143 if (val)
144 break;
145 }
146}
147
148static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
149{
150 return &node[n * geo->keylen];
151}
152
153static void *bval(struct btree_geo *geo, unsigned long *node, int n)
154{
155 return (void *)node[geo->no_longs + n];
156}
157
158static void setkey(struct btree_geo *geo, unsigned long *node, int n,
159 unsigned long *key)
160{
161 longcpy(bkey(geo, node, n), key, geo->keylen);
162}
163
164static void setval(struct btree_geo *geo, unsigned long *node, int n,
165 void *val)
166{
167 node[geo->no_longs + n] = (unsigned long) val;
168}
169
170static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
171{
172 longset(bkey(geo, node, n), 0, geo->keylen);
173 node[geo->no_longs + n] = 0;
174}
175
176static inline void __btree_init(struct btree_head *head)
177{
178 head->node = NULL;
179 head->height = 0;
180}
181
182void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
183{
184 __btree_init(head);
185 head->mempool = mempool;
186}
187EXPORT_SYMBOL_GPL(btree_init_mempool);
188
189int btree_init(struct btree_head *head)
190{
191 __btree_init(head);
192 head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
193 if (!head->mempool)
194 return -ENOMEM;
195 return 0;
196}
197EXPORT_SYMBOL_GPL(btree_init);
198
199void btree_destroy(struct btree_head *head)
200{
201 mempool_destroy(head->mempool);
202 head->mempool = NULL;
203}
204EXPORT_SYMBOL_GPL(btree_destroy);
205
206void *btree_last(struct btree_head *head, struct btree_geo *geo,
207 unsigned long *key)
208{
209 int height = head->height;
210 unsigned long *node = head->node;
211
212 if (height == 0)
213 return NULL;
214
215 for ( ; height > 1; height--)
216 node = bval(geo, node, 0);
217
218 longcpy(key, bkey(geo, node, 0), geo->keylen);
219 return bval(geo, node, 0);
220}
221EXPORT_SYMBOL_GPL(btree_last);
222
223static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
224 unsigned long *key)
225{
226 return longcmp(bkey(geo, node, pos), key, geo->keylen);
227}
228
229static int keyzero(struct btree_geo *geo, unsigned long *key)
230{
231 int i;
232
233 for (i = 0; i < geo->keylen; i++)
234 if (key[i])
235 return 0;
236
237 return 1;
238}
239
240void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
241 unsigned long *key)
242{
243 int i, height = head->height;
244 unsigned long *node = head->node;
245
246 if (height == 0)
247 return NULL;
248
249 for ( ; height > 1; height--) {
250 for (i = 0; i < geo->no_pairs; i++)
251 if (keycmp(geo, node, i, key) <= 0)
252 break;
253 if (i == geo->no_pairs)
254 return NULL;
255 node = bval(geo, node, i);
256 if (!node)
257 return NULL;
258 }
259
260 if (!node)
261 return NULL;
262
263 for (i = 0; i < geo->no_pairs; i++)
264 if (keycmp(geo, node, i, key) == 0)
265 return bval(geo, node, i);
266 return NULL;
267}
268EXPORT_SYMBOL_GPL(btree_lookup);
269
270int btree_update(struct btree_head *head, struct btree_geo *geo,
271 unsigned long *key, void *val)
272{
273 int i, height = head->height;
274 unsigned long *node = head->node;
275
276 if (height == 0)
277 return -ENOENT;
278
279 for ( ; height > 1; height--) {
280 for (i = 0; i < geo->no_pairs; i++)
281 if (keycmp(geo, node, i, key) <= 0)
282 break;
283 if (i == geo->no_pairs)
284 return -ENOENT;
285 node = bval(geo, node, i);
286 if (!node)
287 return -ENOENT;
288 }
289
290 if (!node)
291 return -ENOENT;
292
293 for (i = 0; i < geo->no_pairs; i++)
294 if (keycmp(geo, node, i, key) == 0) {
295 setval(geo, node, i, val);
296 return 0;
297 }
298 return -ENOENT;
299}
300EXPORT_SYMBOL_GPL(btree_update);
301
302/*
303 * Usually this function is quite similar to normal lookup. But the key of
304 * a parent node may be smaller than the smallest key of all its siblings.
305 * In such a case we cannot just return NULL, as we have only proven that no
306 * key smaller than __key, but larger than this parent key exists.
307 * So we set __key to the parent key and retry. We have to use the smallest
308 * such parent key, which is the last parent key we encountered.
309 */
310void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
311 unsigned long *__key)
312{
313 int i, height;
314 unsigned long *node, *oldnode;
315 unsigned long *retry_key = NULL, key[geo->keylen];
316
317 if (keyzero(geo, __key))
318 return NULL;
319
320 if (head->height == 0)
321 return NULL;
322retry:
323 longcpy(key, __key, geo->keylen);
324 dec_key(geo, key);
325
326 node = head->node;
327 for (height = head->height ; height > 1; height--) {
328 for (i = 0; i < geo->no_pairs; i++)
329 if (keycmp(geo, node, i, key) <= 0)
330 break;
331 if (i == geo->no_pairs)
332 goto miss;
333 oldnode = node;
334 node = bval(geo, node, i);
335 if (!node)
336 goto miss;
337 retry_key = bkey(geo, oldnode, i);
338 }
339
340 if (!node)
341 goto miss;
342
343 for (i = 0; i < geo->no_pairs; i++) {
344 if (keycmp(geo, node, i, key) <= 0) {
345 if (bval(geo, node, i)) {
346 longcpy(__key, bkey(geo, node, i), geo->keylen);
347 return bval(geo, node, i);
348 } else
349 goto miss;
350 }
351 }
352miss:
353 if (retry_key) {
354 __key = retry_key;
355 retry_key = NULL;
356 goto retry;
357 }
358 return NULL;
359}
360
361static int getpos(struct btree_geo *geo, unsigned long *node,
362 unsigned long *key)
363{
364 int i;
365
366 for (i = 0; i < geo->no_pairs; i++) {
367 if (keycmp(geo, node, i, key) <= 0)
368 break;
369 }
370 return i;
371}
372
373static int getfill(struct btree_geo *geo, unsigned long *node, int start)
374{
375 int i;
376
377 for (i = start; i < geo->no_pairs; i++)
378 if (!bval(geo, node, i))
379 break;
380 return i;
381}
382
383/*
384 * locate the correct leaf node in the btree
385 */
386static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
387 unsigned long *key, int level)
388{
389 unsigned long *node = head->node;
390 int i, height;
391
392 for (height = head->height; height > level; height--) {
393 for (i = 0; i < geo->no_pairs; i++)
394 if (keycmp(geo, node, i, key) <= 0)
395 break;
396
397 if ((i == geo->no_pairs) || !bval(geo, node, i)) {
398 /* right-most key is too large, update it */
399 /* FIXME: If the right-most key on higher levels is
400 * always zero, this wouldn't be necessary. */
401 i--;
402 setkey(geo, node, i, key);
403 }
404 BUG_ON(i < 0);
405 node = bval(geo, node, i);
406 }
407 BUG_ON(!node);
408 return node;
409}
410
411static int btree_grow(struct btree_head *head, struct btree_geo *geo,
412 gfp_t gfp)
413{
414 unsigned long *node;
415 int fill;
416
417 node = btree_node_alloc(head, gfp);
418 if (!node)
419 return -ENOMEM;
420 if (head->node) {
421 fill = getfill(geo, head->node, 0);
422 setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
423 setval(geo, node, 0, head->node);
424 }
425 head->node = node;
426 head->height++;
427 return 0;
428}
429
430static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
431{
432 unsigned long *node;
433 int fill;
434
435 if (head->height <= 1)
436 return;
437
438 node = head->node;
439 fill = getfill(geo, node, 0);
440 BUG_ON(fill > 1);
441 head->node = bval(geo, node, 0);
442 head->height--;
443 mempool_free(node, head->mempool);
444}
445
446static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
447 unsigned long *key, void *val, int level,
448 gfp_t gfp)
449{
450 unsigned long *node;
451 int i, pos, fill, err;
452
453 BUG_ON(!val);
454 if (head->height < level) {
455 err = btree_grow(head, geo, gfp);
456 if (err)
457 return err;
458 }
459
460retry:
461 node = find_level(head, geo, key, level);
462 pos = getpos(geo, node, key);
463 fill = getfill(geo, node, pos);
464 /* two identical keys are not allowed */
465 BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
466
467 if (fill == geo->no_pairs) {
468 /* need to split node */
469 unsigned long *new;
470
471 new = btree_node_alloc(head, gfp);
472 if (!new)
473 return -ENOMEM;
474 err = btree_insert_level(head, geo,
475 bkey(geo, node, fill / 2 - 1),
476 new, level + 1, gfp);
477 if (err) {
478 mempool_free(new, head->mempool);
479 return err;
480 }
481 for (i = 0; i < fill / 2; i++) {
482 setkey(geo, new, i, bkey(geo, node, i));
483 setval(geo, new, i, bval(geo, node, i));
484 setkey(geo, node, i, bkey(geo, node, i + fill / 2));
485 setval(geo, node, i, bval(geo, node, i + fill / 2));
486 clearpair(geo, node, i + fill / 2);
487 }
488 if (fill & 1) {
489 setkey(geo, node, i, bkey(geo, node, fill - 1));
490 setval(geo, node, i, bval(geo, node, fill - 1));
491 clearpair(geo, node, fill - 1);
492 }
493 goto retry;
494 }
495 BUG_ON(fill >= geo->no_pairs);
496
497 /* shift and insert */
498 for (i = fill; i > pos; i--) {
499 setkey(geo, node, i, bkey(geo, node, i - 1));
500 setval(geo, node, i, bval(geo, node, i - 1));
501 }
502 setkey(geo, node, pos, key);
503 setval(geo, node, pos, val);
504
505 return 0;
506}
507
508int btree_insert(struct btree_head *head, struct btree_geo *geo,
509 unsigned long *key, void *val, gfp_t gfp)
510{
511 return btree_insert_level(head, geo, key, val, 1, gfp);
512}
513EXPORT_SYMBOL_GPL(btree_insert);
514
515static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
516 unsigned long *key, int level);
517static void merge(struct btree_head *head, struct btree_geo *geo, int level,
518 unsigned long *left, int lfill,
519 unsigned long *right, int rfill,
520 unsigned long *parent, int lpos)
521{
522 int i;
523
524 for (i = 0; i < rfill; i++) {
525 /* Move all keys to the left */
526 setkey(geo, left, lfill + i, bkey(geo, right, i));
527 setval(geo, left, lfill + i, bval(geo, right, i));
528 }
529 /* Exchange left and right child in parent */
530 setval(geo, parent, lpos, right);
531 setval(geo, parent, lpos + 1, left);
532 /* Remove left (formerly right) child from parent */
533 btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
534 mempool_free(right, head->mempool);
535}
536
537static void rebalance(struct btree_head *head, struct btree_geo *geo,
538 unsigned long *key, int level, unsigned long *child, int fill)
539{
540 unsigned long *parent, *left = NULL, *right = NULL;
541 int i, no_left, no_right;
542
543 if (fill == 0) {
544 /* Because we don't steal entries from a neigbour, this case
545 * can happen. Parent node contains a single child, this
546 * node, so merging with a sibling never happens.
547 */
548 btree_remove_level(head, geo, key, level + 1);
549 mempool_free(child, head->mempool);
550 return;
551 }
552
553 parent = find_level(head, geo, key, level + 1);
554 i = getpos(geo, parent, key);
555 BUG_ON(bval(geo, parent, i) != child);
556
557 if (i > 0) {
558 left = bval(geo, parent, i - 1);
559 no_left = getfill(geo, left, 0);
560 if (fill + no_left <= geo->no_pairs) {
561 merge(head, geo, level,
562 left, no_left,
563 child, fill,
564 parent, i - 1);
565 return;
566 }
567 }
568 if (i + 1 < getfill(geo, parent, i)) {
569 right = bval(geo, parent, i + 1);
570 no_right = getfill(geo, right, 0);
571 if (fill + no_right <= geo->no_pairs) {
572 merge(head, geo, level,
573 child, fill,
574 right, no_right,
575 parent, i);
576 return;
577 }
578 }
579 /*
580 * We could also try to steal one entry from the left or right
581 * neighbor. By not doing so we changed the invariant from
582 * "all nodes are at least half full" to "no two neighboring
583 * nodes can be merged". Which means that the average fill of
584 * all nodes is still half or better.
585 */
586}
587
588static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
589 unsigned long *key, int level)
590{
591 unsigned long *node;
592 int i, pos, fill;
593 void *ret;
594
595 if (level > head->height) {
596 /* we recursed all the way up */
597 head->height = 0;
598 head->node = NULL;
599 return NULL;
600 }
601
602 node = find_level(head, geo, key, level);
603 pos = getpos(geo, node, key);
604 fill = getfill(geo, node, pos);
605 if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
606 return NULL;
607 ret = bval(geo, node, pos);
608
609 /* remove and shift */
610 for (i = pos; i < fill - 1; i++) {
611 setkey(geo, node, i, bkey(geo, node, i + 1));
612 setval(geo, node, i, bval(geo, node, i + 1));
613 }
614 clearpair(geo, node, fill - 1);
615
616 if (fill - 1 < geo->no_pairs / 2) {
617 if (level < head->height)
618 rebalance(head, geo, key, level, node, fill - 1);
619 else if (fill - 1 == 1)
620 btree_shrink(head, geo);
621 }
622
623 return ret;
624}
625
626void *btree_remove(struct btree_head *head, struct btree_geo *geo,
627 unsigned long *key)
628{
629 if (head->height == 0)
630 return NULL;
631
632 return btree_remove_level(head, geo, key, 1);
633}
634EXPORT_SYMBOL_GPL(btree_remove);
635
636int btree_merge(struct btree_head *target, struct btree_head *victim,
637 struct btree_geo *geo, gfp_t gfp)
638{
639 unsigned long key[geo->keylen];
640 unsigned long dup[geo->keylen];
641 void *val;
642 int err;
643
644 BUG_ON(target == victim);
645
646 if (!(target->node)) {
647 /* target is empty, just copy fields over */
648 target->node = victim->node;
649 target->height = victim->height;
650 __btree_init(victim);
651 return 0;
652 }
653
654 /* TODO: This needs some optimizations. Currently we do three tree
655 * walks to remove a single object from the victim.
656 */
657 for (;;) {
658 if (!btree_last(victim, geo, key))
659 break;
660 val = btree_lookup(victim, geo, key);
661 err = btree_insert(target, geo, key, val, gfp);
662 if (err)
663 return err;
664 /* We must make a copy of the key, as the original will get
665 * mangled inside btree_remove. */
666 longcpy(dup, key, geo->keylen);
667 btree_remove(victim, geo, dup);
668 }
669 return 0;
670}
671EXPORT_SYMBOL_GPL(btree_merge);
672
673static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
674 unsigned long *node, unsigned long opaque,
675 void (*func)(void *elem, unsigned long opaque,
676 unsigned long *key, size_t index,
677 void *func2),
678 void *func2, int reap, int height, size_t count)
679{
680 int i;
681 unsigned long *child;
682
683 for (i = 0; i < geo->no_pairs; i++) {
684 child = bval(geo, node, i);
685 if (!child)
686 break;
687 if (height > 1)
688 count = __btree_for_each(head, geo, child, opaque,
689 func, func2, reap, height - 1, count);
690 else
691 func(child, opaque, bkey(geo, node, i), count++,
692 func2);
693 }
694 if (reap)
695 mempool_free(node, head->mempool);
696 return count;
697}
698
699static void empty(void *elem, unsigned long opaque, unsigned long *key,
700 size_t index, void *func2)
701{
702}
703
704void visitorl(void *elem, unsigned long opaque, unsigned long *key,
705 size_t index, void *__func)
706{
707 visitorl_t func = __func;
708
709 func(elem, opaque, *key, index);
710}
711EXPORT_SYMBOL_GPL(visitorl);
712
713void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
714 size_t index, void *__func)
715{
716 visitor32_t func = __func;
717 u32 *key = (void *)__key;
718
719 func(elem, opaque, *key, index);
720}
721EXPORT_SYMBOL_GPL(visitor32);
722
723void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
724 size_t index, void *__func)
725{
726 visitor64_t func = __func;
727 u64 *key = (void *)__key;
728
729 func(elem, opaque, *key, index);
730}
731EXPORT_SYMBOL_GPL(visitor64);
732
733void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
734 size_t index, void *__func)
735{
736 visitor128_t func = __func;
737 u64 *key = (void *)__key;
738
739 func(elem, opaque, key[0], key[1], index);
740}
741EXPORT_SYMBOL_GPL(visitor128);
742
743size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
744 unsigned long opaque,
745 void (*func)(void *elem, unsigned long opaque,
746 unsigned long *key,
747 size_t index, void *func2),
748 void *func2)
749{
750 size_t count = 0;
751
752 if (!func2)
753 func = empty;
754 if (head->node)
755 count = __btree_for_each(head, geo, head->node, opaque, func,
756 func2, 0, head->height, 0);
757 return count;
758}
759EXPORT_SYMBOL_GPL(btree_visitor);
760
761size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
762 unsigned long opaque,
763 void (*func)(void *elem, unsigned long opaque,
764 unsigned long *key,
765 size_t index, void *func2),
766 void *func2)
767{
768 size_t count = 0;
769
770 if (!func2)
771 func = empty;
772 if (head->node)
773 count = __btree_for_each(head, geo, head->node, opaque, func,
774 func2, 1, head->height, 0);
775 __btree_init(head);
776 return count;
777}
778EXPORT_SYMBOL_GPL(btree_grim_visitor);
779
780static int __init btree_module_init(void)
781{
782 btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
783 SLAB_HWCACHE_ALIGN, NULL);
784 return 0;
785}
786
787static void __exit btree_module_exit(void)
788{
789 kmem_cache_destroy(btree_cachep);
790}
791
792/* If core code starts using btree, initialization should happen even earlier */
793module_init(btree_module_init);
794module_exit(btree_module_exit);
795
796MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
797MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
798MODULE_LICENSE("GPL");
diff --git a/lib/bug.c b/lib/bug.c
index 300e41afbf97..19552096d16b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
72 return NULL; 72 return NULL;
73} 73}
74 74
75int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, 75void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
76 struct module *mod) 76 struct module *mod)
77{ 77{
78 char *secstrings; 78 char *secstrings;
79 unsigned int i; 79 unsigned int i;
@@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
97 * could potentially lead to deadlock and thus be counter-productive. 97 * could potentially lead to deadlock and thus be counter-productive.
98 */ 98 */
99 list_add(&mod->bug_list, &module_bug_list); 99 list_add(&mod->bug_list, &module_bug_list);
100
101 return 0;
102} 100}
103 101
104void module_bug_cleanup(struct module *mod) 102void module_bug_cleanup(struct module *mod)
@@ -136,8 +134,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
136 134
137 bug = find_bug(bugaddr); 135 bug = find_bug(bugaddr);
138 136
139 printk(KERN_EMERG "------------[ cut here ]------------\n");
140
141 file = NULL; 137 file = NULL;
142 line = 0; 138 line = 0;
143 warning = 0; 139 warning = 0;
@@ -156,19 +152,25 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
156 152
157 if (warning) { 153 if (warning) {
158 /* this is a WARN_ON rather than BUG/BUG_ON */ 154 /* this is a WARN_ON rather than BUG/BUG_ON */
155 printk(KERN_WARNING "------------[ cut here ]------------\n");
156
159 if (file) 157 if (file)
160 printk(KERN_ERR "Badness at %s:%u\n", 158 printk(KERN_WARNING "WARNING: at %s:%u\n",
161 file, line); 159 file, line);
162 else 160 else
163 printk(KERN_ERR "Badness at %p " 161 printk(KERN_WARNING "WARNING: at %p "
164 "[verbose debug info unavailable]\n", 162 "[verbose debug info unavailable]\n",
165 (void *)bugaddr); 163 (void *)bugaddr);
166 164
165 print_modules();
167 show_regs(regs); 166 show_regs(regs);
168 add_taint(TAINT_WARN); 167 print_oops_end_marker();
168 add_taint(BUG_GET_TAINT(bug));
169 return BUG_TRAP_TYPE_WARN; 169 return BUG_TRAP_TYPE_WARN;
170 } 170 }
171 171
172 printk(KERN_EMERG "------------[ cut here ]------------\n");
173
172 if (file) 174 if (file)
173 printk(KERN_CRIT "kernel BUG at %s:%u!\n", 175 printk(KERN_CRIT "kernel BUG at %s:%u!\n",
174 file, line); 176 file, line);
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
new file mode 100644
index 000000000000..4dc20321b0d5
--- /dev/null
+++ b/lib/cpu-notifier-error-inject.c
@@ -0,0 +1,63 @@
1#include <linux/kernel.h>
2#include <linux/cpu.h>
3#include <linux/module.h>
4#include <linux/notifier.h>
5
6static int priority;
7static int cpu_up_prepare_error;
8static int cpu_down_prepare_error;
9
10module_param(priority, int, 0);
11MODULE_PARM_DESC(priority, "specify cpu notifier priority");
12
13module_param(cpu_up_prepare_error, int, 0644);
14MODULE_PARM_DESC(cpu_up_prepare_error,
15 "specify error code to inject CPU_UP_PREPARE action");
16
17module_param(cpu_down_prepare_error, int, 0644);
18MODULE_PARM_DESC(cpu_down_prepare_error,
19 "specify error code to inject CPU_DOWN_PREPARE action");
20
21static int err_inject_cpu_callback(struct notifier_block *nfb,
22 unsigned long action, void *hcpu)
23{
24 int err = 0;
25
26 switch (action) {
27 case CPU_UP_PREPARE:
28 case CPU_UP_PREPARE_FROZEN:
29 err = cpu_up_prepare_error;
30 break;
31 case CPU_DOWN_PREPARE:
32 case CPU_DOWN_PREPARE_FROZEN:
33 err = cpu_down_prepare_error;
34 break;
35 }
36 if (err)
37 printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err);
38
39 return notifier_from_errno(err);
40}
41
42static struct notifier_block err_inject_cpu_notifier = {
43 .notifier_call = err_inject_cpu_callback,
44};
45
46static int err_inject_init(void)
47{
48 err_inject_cpu_notifier.priority = priority;
49
50 return register_hotcpu_notifier(&err_inject_cpu_notifier);
51}
52
53static void err_inject_exit(void)
54{
55 unregister_hotcpu_notifier(&err_inject_cpu_notifier);
56}
57
58module_init(err_inject_init);
59module_exit(err_inject_exit);
60
61MODULE_DESCRIPTION("CPU notifier error injection module");
62MODULE_LICENSE("GPL");
63MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 7bb4142a502f..05d6aca7fc19 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -1,3 +1,4 @@
1#include <linux/slab.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/bitops.h> 3#include <linux/bitops.h>
3#include <linux/cpumask.h> 4#include <linux/cpumask.h>
diff --git a/lib/crc32.c b/lib/crc32.c
index 02e3b31b3a79..4855995fcde9 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -25,16 +25,19 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/compiler.h> 26#include <linux/compiler.h>
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/init.h> 28#include <linux/init.h>
30#include <asm/atomic.h> 29#include <asm/atomic.h>
31#include "crc32defs.h" 30#include "crc32defs.h"
32#if CRC_LE_BITS == 8 31#if CRC_LE_BITS == 8
33#define tole(x) __constant_cpu_to_le32(x) 32# define tole(x) __constant_cpu_to_le32(x)
34#define tobe(x) __constant_cpu_to_be32(x)
35#else 33#else
36#define tole(x) (x) 34# define tole(x) (x)
37#define tobe(x) (x) 35#endif
36
37#if CRC_BE_BITS == 8
38# define tobe(x) __constant_cpu_to_be32(x)
39#else
40# define tobe(x) (x)
38#endif 41#endif
39#include "crc32table.h" 42#include "crc32table.h"
40 43
@@ -45,33 +48,37 @@ MODULE_LICENSE("GPL");
45#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8 48#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
46 49
47static inline u32 50static inline u32
48crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab) 51crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
49{ 52{
50# ifdef __LITTLE_ENDIAN 53# ifdef __LITTLE_ENDIAN
51# define DO_CRC(x) crc = tab[(crc ^ (x)) & 255 ] ^ (crc >> 8) 54# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
55# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
56 tab[2][(crc >> 8) & 255] ^ \
57 tab[1][(crc >> 16) & 255] ^ \
58 tab[0][(crc >> 24) & 255]
52# else 59# else
53# define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) 60# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
61# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
62 tab[1][(crc >> 8) & 255] ^ \
63 tab[2][(crc >> 16) & 255] ^ \
64 tab[3][(crc >> 24) & 255]
54# endif 65# endif
55 const u32 *b = (const u32 *)buf; 66 const u32 *b;
56 size_t rem_len; 67 size_t rem_len;
57 68
58 /* Align it */ 69 /* Align it */
59 if (unlikely((long)b & 3 && len)) { 70 if (unlikely((long)buf & 3 && len)) {
60 u8 *p = (u8 *)b;
61 do { 71 do {
62 DO_CRC(*p++); 72 DO_CRC(*buf++);
63 } while ((--len) && ((long)p)&3); 73 } while ((--len) && ((long)buf)&3);
64 b = (u32 *)p;
65 } 74 }
66 rem_len = len & 3; 75 rem_len = len & 3;
67 /* load data 32 bits wide, xor data 32 bits wide. */ 76 /* load data 32 bits wide, xor data 32 bits wide. */
68 len = len >> 2; 77 len = len >> 2;
78 b = (const u32 *)buf;
69 for (--b; len; --len) { 79 for (--b; len; --len) {
70 crc ^= *++b; /* use pre increment for speed */ 80 crc ^= *++b; /* use pre increment for speed */
71 DO_CRC(0); 81 DO_CRC4;
72 DO_CRC(0);
73 DO_CRC(0);
74 DO_CRC(0);
75 } 82 }
76 len = rem_len; 83 len = rem_len;
77 /* And the last few bytes */ 84 /* And the last few bytes */
@@ -82,6 +89,8 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
82 } while (--len); 89 } while (--len);
83 } 90 }
84 return crc; 91 return crc;
92#undef DO_CRC
93#undef DO_CRC4
85} 94}
86#endif 95#endif
87/** 96/**
@@ -114,14 +123,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
114u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) 123u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
115{ 124{
116# if CRC_LE_BITS == 8 125# if CRC_LE_BITS == 8
117 const u32 *tab = crc32table_le; 126 const u32 (*tab)[] = crc32table_le;
118 127
119 crc = __cpu_to_le32(crc); 128 crc = __cpu_to_le32(crc);
120 crc = crc32_body(crc, p, len, tab); 129 crc = crc32_body(crc, p, len, tab);
121 return __le32_to_cpu(crc); 130 return __le32_to_cpu(crc);
122#undef ENDIAN_SHIFT
123#undef DO_CRC
124
125# elif CRC_LE_BITS == 4 131# elif CRC_LE_BITS == 4
126 while (len--) { 132 while (len--) {
127 crc ^= *p++; 133 crc ^= *p++;
@@ -174,14 +180,11 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
174u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) 180u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
175{ 181{
176# if CRC_BE_BITS == 8 182# if CRC_BE_BITS == 8
177 const u32 *tab = crc32table_be; 183 const u32 (*tab)[] = crc32table_be;
178 184
179 crc = __cpu_to_be32(crc); 185 crc = __cpu_to_be32(crc);
180 crc = crc32_body(crc, p, len, tab); 186 crc = crc32_body(crc, p, len, tab);
181 return __be32_to_cpu(crc); 187 return __be32_to_cpu(crc);
182#undef ENDIAN_SHIFT
183#undef DO_CRC
184
185# elif CRC_BE_BITS == 4 188# elif CRC_BE_BITS == 4
186 while (len--) { 189 while (len--) {
187 crc ^= *p++ << 24; 190 crc ^= *p++ << 24;
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index bc3b11731b9c..b1c177307677 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -8,7 +8,6 @@
8 * 8 *
9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
10 */ 10 */
11#include <linux/kernel.h>
12#include <linux/rwsem.h> 11#include <linux/rwsem.h>
13#include <linux/mutex.h> 12#include <linux/mutex.h>
14#include <linux/module.h> 13#include <linux/module.h>
@@ -23,6 +22,7 @@
23 * shut up after that. 22 * shut up after that.
24 */ 23 */
25int debug_locks = 1; 24int debug_locks = 1;
25EXPORT_SYMBOL_GPL(debug_locks);
26 26
27/* 27/*
28 * The locking-testsuite uses <debug_locks_silent> to get a 28 * The locking-testsuite uses <debug_locks_silent> to get a
@@ -38,7 +38,6 @@ int debug_locks_off(void)
38{ 38{
39 if (__debug_locks_off()) { 39 if (__debug_locks_off()) {
40 if (!debug_locks_silent) { 40 if (!debug_locks_silent) {
41 oops_in_progress = 1;
42 console_verbose(); 41 console_verbose();
43 return 1; 42 return 1;
44 } 43 }
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a9a8996d286a..deebcc57d4e6 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -12,6 +12,7 @@
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <linux/debugfs.h> 14#include <linux/debugfs.h>
15#include <linux/slab.h>
15#include <linux/hash.h> 16#include <linux/hash.h>
16 17
17#define ODEBUG_HASH_BITS 14 18#define ODEBUG_HASH_BITS 14
@@ -140,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
140 obj->object = addr; 141 obj->object = addr;
141 obj->descr = descr; 142 obj->descr = descr;
142 obj->state = ODEBUG_STATE_NONE; 143 obj->state = ODEBUG_STATE_NONE;
144 obj->astate = 0;
143 hlist_del(&obj->node); 145 hlist_del(&obj->node);
144 146
145 hlist_add_head(&obj->node, &b->list); 147 hlist_add_head(&obj->node, &b->list);
@@ -251,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
251 253
252 if (limit < 5 && obj->descr != descr_test) { 254 if (limit < 5 && obj->descr != descr_test) {
253 limit++; 255 limit++;
254 WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, 256 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
255 obj_states[obj->state], obj->descr->name); 257 "object type: %s\n",
258 msg, obj_states[obj->state], obj->astate,
259 obj->descr->name);
256 } 260 }
257 debug_objects_warnings++; 261 debug_objects_warnings++;
258} 262}
@@ -446,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
446 case ODEBUG_STATE_INIT: 450 case ODEBUG_STATE_INIT:
447 case ODEBUG_STATE_INACTIVE: 451 case ODEBUG_STATE_INACTIVE:
448 case ODEBUG_STATE_ACTIVE: 452 case ODEBUG_STATE_ACTIVE:
449 obj->state = ODEBUG_STATE_INACTIVE; 453 if (!obj->astate)
454 obj->state = ODEBUG_STATE_INACTIVE;
455 else
456 debug_print_object(obj, "deactivate");
450 break; 457 break;
451 458
452 case ODEBUG_STATE_DESTROYED: 459 case ODEBUG_STATE_DESTROYED:
@@ -552,6 +559,53 @@ out_unlock:
552 raw_spin_unlock_irqrestore(&db->lock, flags); 559 raw_spin_unlock_irqrestore(&db->lock, flags);
553} 560}
554 561
562/**
563 * debug_object_active_state - debug checks object usage state machine
564 * @addr: address of the object
565 * @descr: pointer to an object specific debug description structure
566 * @expect: expected state
567 * @next: state to move to if expected state is found
568 */
569void
570debug_object_active_state(void *addr, struct debug_obj_descr *descr,
571 unsigned int expect, unsigned int next)
572{
573 struct debug_bucket *db;
574 struct debug_obj *obj;
575 unsigned long flags;
576
577 if (!debug_objects_enabled)
578 return;
579
580 db = get_bucket((unsigned long) addr);
581
582 raw_spin_lock_irqsave(&db->lock, flags);
583
584 obj = lookup_object(addr, db);
585 if (obj) {
586 switch (obj->state) {
587 case ODEBUG_STATE_ACTIVE:
588 if (obj->astate == expect)
589 obj->astate = next;
590 else
591 debug_print_object(obj, "active_state");
592 break;
593
594 default:
595 debug_print_object(obj, "active_state");
596 break;
597 }
598 } else {
599 struct debug_obj o = { .object = addr,
600 .state = ODEBUG_STATE_NOTAVAILABLE,
601 .descr = descr };
602
603 debug_print_object(&o, "active_state");
604 }
605
606 raw_spin_unlock_irqrestore(&db->lock, flags);
607}
608
555#ifdef CONFIG_DEBUG_OBJECTS_FREE 609#ifdef CONFIG_DEBUG_OBJECTS_FREE
556static void __debug_check_no_obj_freed(const void *address, unsigned long size) 610static void __debug_check_no_obj_freed(const void *address, unsigned long size)
557{ 611{
@@ -773,7 +827,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state)
773 } 827 }
774} 828}
775 829
776static int 830static int __init
777check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) 831check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
778{ 832{
779 struct debug_bucket *db; 833 struct debug_bucket *db;
@@ -916,7 +970,7 @@ void __init debug_objects_early_init(void)
916/* 970/*
917 * Convert the statically allocated objects to dynamic ones: 971 * Convert the statically allocated objects to dynamic ones:
918 */ 972 */
919static int debug_objects_replace_static_objects(void) 973static int __init debug_objects_replace_static_objects(void)
920{ 974{
921 struct debug_bucket *db = obj_hash; 975 struct debug_bucket *db = obj_hash;
922 struct hlist_node *node, *tmp; 976 struct hlist_node *node, *tmp;
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index a4e971dee102..81c8bb1cc6aa 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -107,6 +107,8 @@ struct bunzip_data {
107 unsigned char selectors[32768]; /* nSelectors = 15 bits */ 107 unsigned char selectors[32768]; /* nSelectors = 15 bits */
108 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ 108 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
109 int io_error; /* non-zero if we have IO error */ 109 int io_error; /* non-zero if we have IO error */
110 int byteCount[256];
111 unsigned char symToByte[256], mtfSymbol[256];
110}; 112};
111 113
112 114
@@ -158,14 +160,16 @@ static int INIT get_next_block(struct bunzip_data *bd)
158 int *base = NULL; 160 int *base = NULL;
159 int *limit = NULL; 161 int *limit = NULL;
160 int dbufCount, nextSym, dbufSize, groupCount, selector, 162 int dbufCount, nextSym, dbufSize, groupCount, selector,
161 i, j, k, t, runPos, symCount, symTotal, nSelectors, 163 i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
162 byteCount[256]; 164 unsigned char uc, *symToByte, *mtfSymbol, *selectors;
163 unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
164 unsigned int *dbuf, origPtr; 165 unsigned int *dbuf, origPtr;
165 166
166 dbuf = bd->dbuf; 167 dbuf = bd->dbuf;
167 dbufSize = bd->dbufSize; 168 dbufSize = bd->dbufSize;
168 selectors = bd->selectors; 169 selectors = bd->selectors;
170 byteCount = bd->byteCount;
171 symToByte = bd->symToByte;
172 mtfSymbol = bd->mtfSymbol;
169 173
170 /* Read in header signature and CRC, then validate signature. 174 /* Read in header signature and CRC, then validate signature.
171 (last block signature means CRC is for whole file, return now) */ 175 (last block signature means CRC is for whole file, return now) */
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index db521f45626e..bcb3a4bd68ff 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -97,7 +97,7 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
97 u32 src_len, dst_len; 97 u32 src_len, dst_len;
98 size_t tmp; 98 size_t tmp;
99 u8 *in_buf, *in_buf_save, *out_buf; 99 u8 *in_buf, *in_buf_save, *out_buf;
100 int obytes_processed = 0; 100 int ret = -1;
101 101
102 set_error_fn(error_fn); 102 set_error_fn(error_fn);
103 103
@@ -174,15 +174,22 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
174 174
175 /* decompress */ 175 /* decompress */
176 tmp = dst_len; 176 tmp = dst_len;
177 r = lzo1x_decompress_safe((u8 *) in_buf, src_len, 177
178 /* When the input data is not compressed at all,
179 * lzo1x_decompress_safe will fail, so call memcpy()
180 * instead */
181 if (unlikely(dst_len == src_len))
182 memcpy(out_buf, in_buf, src_len);
183 else {
184 r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
178 out_buf, &tmp); 185 out_buf, &tmp);
179 186
180 if (r != LZO_E_OK || dst_len != tmp) { 187 if (r != LZO_E_OK || dst_len != tmp) {
181 error("Compressed data violation"); 188 error("Compressed data violation");
182 goto exit_2; 189 goto exit_2;
190 }
183 } 191 }
184 192
185 obytes_processed += dst_len;
186 if (flush) 193 if (flush)
187 flush(out_buf, dst_len); 194 flush(out_buf, dst_len);
188 if (output) 195 if (output)
@@ -196,6 +203,7 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
196 in_buf += src_len; 203 in_buf += src_len;
197 } 204 }
198 205
206 ret = 0;
199exit_2: 207exit_2:
200 if (!input) 208 if (!input)
201 free(in_buf); 209 free(in_buf);
@@ -203,7 +211,7 @@ exit_1:
203 if (!output) 211 if (!output)
204 free(out_buf); 212 free(out_buf);
205exit: 213exit:
206 return obytes_processed; 214 return ret;
207} 215}
208 216
209#define decompress unlzo 217#define decompress unlzo
diff --git a/lib/devres.c b/lib/devres.c
index 72c8909006da..6efddf53b90c 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,5 +1,6 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/io.h> 2#include <linux/io.h>
3#include <linux/gfp.h>
3#include <linux/module.h> 4#include <linux/module.h>
4 5
5void devm_ioremap_release(struct device *dev, void *res) 6void devm_ioremap_release(struct device *dev, void *res)
@@ -327,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
327 * @pdev: PCI device to map IO resources for 328 * @pdev: PCI device to map IO resources for
328 * @mask: Mask of BARs to unmap and release 329 * @mask: Mask of BARs to unmap and release
329 * 330 *
330 * Unamp and release regions specified by @mask. 331 * Unmap and release regions specified by @mask.
331 */ 332 */
332void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) 333void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
333{ 334{
diff --git a/lib/div64.c b/lib/div64.c
index a111eb8de9cf..5b4919191778 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -77,26 +77,58 @@ s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
77EXPORT_SYMBOL(div_s64_rem); 77EXPORT_SYMBOL(div_s64_rem);
78#endif 78#endif
79 79
80/* 64bit divisor, dividend and result. dynamic precision */ 80/**
81 * div64_u64 - unsigned 64bit divide with 64bit divisor
82 * @dividend: 64bit dividend
83 * @divisor: 64bit divisor
84 *
85 * This implementation is a modified version of the algorithm proposed
86 * by the book 'Hacker's Delight'. The original source and full proof
87 * can be found here and is available for use without restriction.
88 *
89 * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c'
90 */
81#ifndef div64_u64 91#ifndef div64_u64
82u64 div64_u64(u64 dividend, u64 divisor) 92u64 div64_u64(u64 dividend, u64 divisor)
83{ 93{
84 u32 high, d; 94 u32 high = divisor >> 32;
95 u64 quot;
85 96
86 high = divisor >> 32; 97 if (high == 0) {
87 if (high) { 98 quot = div_u64(dividend, divisor);
88 unsigned int shift = fls(high); 99 } else {
100 int n = 1 + fls(high);
101 quot = div_u64(dividend >> n, divisor >> n);
89 102
90 d = divisor >> shift; 103 if (quot != 0)
91 dividend >>= shift; 104 quot--;
92 } else 105 if ((dividend - quot * divisor) >= divisor)
93 d = divisor; 106 quot++;
107 }
94 108
95 return div_u64(dividend, d); 109 return quot;
96} 110}
97EXPORT_SYMBOL(div64_u64); 111EXPORT_SYMBOL(div64_u64);
98#endif 112#endif
99 113
114/**
115 * div64_s64 - signed 64bit divide with 64bit divisor
116 * @dividend: 64bit dividend
117 * @divisor: 64bit divisor
118 */
119#ifndef div64_s64
120s64 div64_s64(s64 dividend, s64 divisor)
121{
122 s64 quot, t;
123
124 quot = div64_u64(abs64(dividend), abs64(divisor));
125 t = (dividend ^ divisor) >> 63;
126
127 return (quot ^ t) - t;
128}
129EXPORT_SYMBOL(div64_s64);
130#endif
131
100#endif /* BITS_PER_LONG == 32 */ 132#endif /* BITS_PER_LONG == 32 */
101 133
102/* 134/*
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 7d2f0b33e5a8..4bfb0471f106 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -570,7 +570,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
570 * Now parse out the first token and use it as the name for the 570 * Now parse out the first token and use it as the name for the
571 * driver to filter for. 571 * driver to filter for.
572 */ 572 */
573 for (i = 0; i < NAME_MAX_LEN; ++i) { 573 for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
574 current_driver_name[i] = buf[i]; 574 current_driver_name[i] = buf[i];
575 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) 575 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
576 break; 576 break;
@@ -587,9 +587,10 @@ out_unlock:
587 return count; 587 return count;
588} 588}
589 589
590const struct file_operations filter_fops = { 590static const struct file_operations filter_fops = {
591 .read = filter_read, 591 .read = filter_read,
592 .write = filter_write, 592 .write = filter_write,
593 .llseek = default_llseek,
593}; 594};
594 595
595static int dma_debug_fs_init(void) 596static int dma_debug_fs_init(void)
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index f93502915988..3094318bfea7 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -25,19 +25,12 @@
25#include <linux/uaccess.h> 25#include <linux/uaccess.h>
26#include <linux/dynamic_debug.h> 26#include <linux/dynamic_debug.h>
27#include <linux/debugfs.h> 27#include <linux/debugfs.h>
28#include <linux/slab.h>
29#include <linux/jump_label.h>
28 30
29extern struct _ddebug __start___verbose[]; 31extern struct _ddebug __start___verbose[];
30extern struct _ddebug __stop___verbose[]; 32extern struct _ddebug __stop___verbose[];
31 33
32/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
33 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
34 * use independent hash functions, to reduce the chance of false positives.
35 */
36long long dynamic_debug_enabled;
37EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
38long long dynamic_debug_enabled2;
39EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
40
41struct ddebug_table { 34struct ddebug_table {
42 struct list_head link; 35 struct list_head link;
43 char *mod_name; 36 char *mod_name;
@@ -87,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
87} 80}
88 81
89/* 82/*
90 * must be called with ddebug_lock held
91 */
92
93static int disabled_hash(char hash, bool first_table)
94{
95 struct ddebug_table *dt;
96 char table_hash_value;
97
98 list_for_each_entry(dt, &ddebug_tables, link) {
99 if (first_table)
100 table_hash_value = dt->ddebugs->primary_hash;
101 else
102 table_hash_value = dt->ddebugs->secondary_hash;
103 if (dt->num_enabled && (hash == table_hash_value))
104 return 0;
105 }
106 return 1;
107}
108
109/*
110 * Search the tables for _ddebug's which match the given 83 * Search the tables for _ddebug's which match the given
111 * `query' and apply the `flags' and `mask' to them. Tells 84 * `query' and apply the `flags' and `mask' to them. Tells
112 * the user which ddebug's were changed, or whether none 85 * the user which ddebug's were changed, or whether none
@@ -169,17 +142,9 @@ static void ddebug_change(const struct ddebug_query *query,
169 dt->num_enabled++; 142 dt->num_enabled++;
170 dp->flags = newflags; 143 dp->flags = newflags;
171 if (newflags) { 144 if (newflags) {
172 dynamic_debug_enabled |= 145 jump_label_enable(&dp->enabled);
173 (1LL << dp->primary_hash);
174 dynamic_debug_enabled2 |=
175 (1LL << dp->secondary_hash);
176 } else { 146 } else {
177 if (disabled_hash(dp->primary_hash, true)) 147 jump_label_disable(&dp->enabled);
178 dynamic_debug_enabled &=
179 ~(1LL << dp->primary_hash);
180 if (disabled_hash(dp->secondary_hash, false))
181 dynamic_debug_enabled2 &=
182 ~(1LL << dp->secondary_hash);
183 } 148 }
184 if (verbose) 149 if (verbose)
185 printk(KERN_INFO 150 printk(KERN_INFO
@@ -428,6 +393,40 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
428 return 0; 393 return 0;
429} 394}
430 395
396static int ddebug_exec_query(char *query_string)
397{
398 unsigned int flags = 0, mask = 0;
399 struct ddebug_query query;
400#define MAXWORDS 9
401 int nwords;
402 char *words[MAXWORDS];
403
404 nwords = ddebug_tokenize(query_string, words, MAXWORDS);
405 if (nwords <= 0)
406 return -EINVAL;
407 if (ddebug_parse_query(words, nwords-1, &query))
408 return -EINVAL;
409 if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
410 return -EINVAL;
411
412 /* actually go and implement the change */
413 ddebug_change(&query, flags, mask);
414 return 0;
415}
416
417static __initdata char ddebug_setup_string[1024];
418static __init int ddebug_setup_query(char *str)
419{
420 if (strlen(str) >= 1024) {
421 pr_warning("ddebug boot param string too large\n");
422 return 0;
423 }
424 strcpy(ddebug_setup_string, str);
425 return 1;
426}
427
428__setup("ddebug_query=", ddebug_setup_query);
429
431/* 430/*
432 * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the 431 * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the
433 * command text from userspace, parses and executes it. 432 * command text from userspace, parses and executes it.
@@ -435,12 +434,8 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
435static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, 434static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
436 size_t len, loff_t *offp) 435 size_t len, loff_t *offp)
437{ 436{
438 unsigned int flags = 0, mask = 0;
439 struct ddebug_query query;
440#define MAXWORDS 9
441 int nwords;
442 char *words[MAXWORDS];
443 char tmpbuf[256]; 437 char tmpbuf[256];
438 int ret;
444 439
445 if (len == 0) 440 if (len == 0)
446 return 0; 441 return 0;
@@ -454,16 +449,9 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
454 printk(KERN_INFO "%s: read %d bytes from userspace\n", 449 printk(KERN_INFO "%s: read %d bytes from userspace\n",
455 __func__, (int)len); 450 __func__, (int)len);
456 451
457 nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); 452 ret = ddebug_exec_query(tmpbuf);
458 if (nwords < 0) 453 if (ret)
459 return -EINVAL; 454 return ret;
460 if (ddebug_parse_query(words, nwords-1, &query))
461 return -EINVAL;
462 if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
463 return -EINVAL;
464
465 /* actually go and implement the change */
466 ddebug_change(&query, flags, mask);
467 455
468 *offp += len; 456 *offp += len;
469 return len; 457 return len;
@@ -691,7 +679,7 @@ static void ddebug_table_free(struct ddebug_table *dt)
691 * Called in response to a module being unloaded. Removes 679 * Called in response to a module being unloaded. Removes
692 * any ddebug_table's which point at the module. 680 * any ddebug_table's which point at the module.
693 */ 681 */
694int ddebug_remove_module(char *mod_name) 682int ddebug_remove_module(const char *mod_name)
695{ 683{
696 struct ddebug_table *dt, *nextdt; 684 struct ddebug_table *dt, *nextdt;
697 int ret = -ENOENT; 685 int ret = -ENOENT;
@@ -724,13 +712,14 @@ static void ddebug_remove_all_tables(void)
724 mutex_unlock(&ddebug_lock); 712 mutex_unlock(&ddebug_lock);
725} 713}
726 714
727static int __init dynamic_debug_init(void) 715static __initdata int ddebug_init_success;
716
717static int __init dynamic_debug_init_debugfs(void)
728{ 718{
729 struct dentry *dir, *file; 719 struct dentry *dir, *file;
730 struct _ddebug *iter, *iter_start; 720
731 const char *modname = NULL; 721 if (!ddebug_init_success)
732 int ret = 0; 722 return -ENODEV;
733 int n = 0;
734 723
735 dir = debugfs_create_dir("dynamic_debug", NULL); 724 dir = debugfs_create_dir("dynamic_debug", NULL);
736 if (!dir) 725 if (!dir)
@@ -741,6 +730,16 @@ static int __init dynamic_debug_init(void)
741 debugfs_remove(dir); 730 debugfs_remove(dir);
742 return -ENOMEM; 731 return -ENOMEM;
743 } 732 }
733 return 0;
734}
735
736static int __init dynamic_debug_init(void)
737{
738 struct _ddebug *iter, *iter_start;
739 const char *modname = NULL;
740 int ret = 0;
741 int n = 0;
742
744 if (__start___verbose != __stop___verbose) { 743 if (__start___verbose != __stop___verbose) {
745 iter = __start___verbose; 744 iter = __start___verbose;
746 modname = iter->modname; 745 modname = iter->modname;
@@ -758,12 +757,26 @@ static int __init dynamic_debug_init(void)
758 } 757 }
759 ret = ddebug_add_module(iter_start, n, modname); 758 ret = ddebug_add_module(iter_start, n, modname);
760 } 759 }
760
761 /* ddebug_query boot param got passed -> set it up */
762 if (ddebug_setup_string[0] != '\0') {
763 ret = ddebug_exec_query(ddebug_setup_string);
764 if (ret)
765 pr_warning("Invalid ddebug boot param %s",
766 ddebug_setup_string);
767 else
768 pr_info("ddebug initialized with string %s",
769 ddebug_setup_string);
770 }
771
761out_free: 772out_free:
762 if (ret) { 773 if (ret)
763 ddebug_remove_all_tables(); 774 ddebug_remove_all_tables();
764 debugfs_remove(dir); 775 else
765 debugfs_remove(file); 776 ddebug_init_success = 1;
766 }
767 return 0; 777 return 0;
768} 778}
769module_init(dynamic_debug_init); 779/* Allow early initialization for boot messages via boot param */
780arch_initcall(dynamic_debug_init);
781/* Debugfs setup must be done later */
782module_init(dynamic_debug_init_debugfs);
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 66eef2e4483e..77a6fea7481e 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -99,7 +99,7 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
99 ret->element_size = element_size; 99 ret->element_size = element_size;
100 ret->total_nr_elements = total; 100 ret->total_nr_elements = total;
101 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) 101 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
102 memset(ret->parts[0], FLEX_ARRAY_FREE, 102 memset(&ret->parts[0], FLEX_ARRAY_FREE,
103 FLEX_ARRAY_BASE_BYTES_LEFT); 103 FLEX_ARRAY_BASE_BYTES_LEFT);
104 return ret; 104 return ret;
105} 105}
@@ -171,6 +171,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
171 * Note that this *copies* the contents of @src into 171 * Note that this *copies* the contents of @src into
172 * the array. If you are trying to store an array of 172 * the array. If you are trying to store an array of
173 * pointers, make sure to pass in &ptr instead of ptr. 173 * pointers, make sure to pass in &ptr instead of ptr.
174 * You may instead wish to use the flex_array_put_ptr()
175 * helper function.
174 * 176 *
175 * Locking must be provided by the caller. 177 * Locking must be provided by the caller.
176 */ 178 */
@@ -265,7 +267,8 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,
265 * 267 *
266 * Returns a pointer to the data at index @element_nr. Note 268 * Returns a pointer to the data at index @element_nr. Note
267 * that this is a copy of the data that was passed in. If you 269 * that this is a copy of the data that was passed in. If you
268 * are using this to store pointers, you'll get back &ptr. 270 * are using this to store pointers, you'll get back &ptr. You
271 * may instead wish to use the flex_array_get_ptr helper.
269 * 272 *
270 * Locking must be provided by the caller. 273 * Locking must be provided by the caller.
271 */ 274 */
@@ -286,6 +289,26 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
286 return &part->elements[index_inside_part(fa, element_nr)]; 289 return &part->elements[index_inside_part(fa, element_nr)];
287} 290}
288 291
292/**
293 * flex_array_get_ptr - pull a ptr back out of the array
294 * @fa: the flex array from which to extract data
295 * @element_nr: index of the element to fetch from the array
296 *
297 * Returns the pointer placed in the flex array at element_nr using
298 * flex_array_put_ptr(). This function should not be called if the
299 * element in question was not set using the _put_ptr() helper.
300 */
301void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
302{
303 void **tmp;
304
305 tmp = flex_array_get(fa, element_nr);
306 if (!tmp)
307 return NULL;
308
309 return *tmp;
310}
311
289static int part_is_free(struct flex_array_part *part) 312static int part_is_free(struct flex_array_part *part)
290{ 313{
291 int i; 314 int i;
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index bea5d97df991..85d0e412a04f 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -7,8 +7,8 @@
7#define LE_TABLE_SIZE (1 << CRC_LE_BITS) 7#define LE_TABLE_SIZE (1 << CRC_LE_BITS)
8#define BE_TABLE_SIZE (1 << CRC_BE_BITS) 8#define BE_TABLE_SIZE (1 << CRC_BE_BITS)
9 9
10static uint32_t crc32table_le[LE_TABLE_SIZE]; 10static uint32_t crc32table_le[4][LE_TABLE_SIZE];
11static uint32_t crc32table_be[BE_TABLE_SIZE]; 11static uint32_t crc32table_be[4][BE_TABLE_SIZE];
12 12
13/** 13/**
14 * crc32init_le() - allocate and initialize LE table data 14 * crc32init_le() - allocate and initialize LE table data
@@ -22,12 +22,19 @@ static void crc32init_le(void)
22 unsigned i, j; 22 unsigned i, j;
23 uint32_t crc = 1; 23 uint32_t crc = 1;
24 24
25 crc32table_le[0] = 0; 25 crc32table_le[0][0] = 0;
26 26
27 for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { 27 for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
28 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); 28 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
29 for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) 29 for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
30 crc32table_le[i + j] = crc ^ crc32table_le[j]; 30 crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
31 }
32 for (i = 0; i < LE_TABLE_SIZE; i++) {
33 crc = crc32table_le[0][i];
34 for (j = 1; j < 4; j++) {
35 crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
36 crc32table_le[j][i] = crc;
37 }
31 } 38 }
32} 39}
33 40
@@ -39,25 +46,35 @@ static void crc32init_be(void)
39 unsigned i, j; 46 unsigned i, j;
40 uint32_t crc = 0x80000000; 47 uint32_t crc = 0x80000000;
41 48
42 crc32table_be[0] = 0; 49 crc32table_be[0][0] = 0;
43 50
44 for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { 51 for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
45 crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); 52 crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
46 for (j = 0; j < i; j++) 53 for (j = 0; j < i; j++)
47 crc32table_be[i + j] = crc ^ crc32table_be[j]; 54 crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
55 }
56 for (i = 0; i < BE_TABLE_SIZE; i++) {
57 crc = crc32table_be[0][i];
58 for (j = 1; j < 4; j++) {
59 crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
60 crc32table_be[j][i] = crc;
61 }
48 } 62 }
49} 63}
50 64
51static void output_table(uint32_t table[], int len, char *trans) 65static void output_table(uint32_t table[4][256], int len, char *trans)
52{ 66{
53 int i; 67 int i, j;
54 68
55 for (i = 0; i < len - 1; i++) { 69 for (j = 0 ; j < 4; j++) {
56 if (i % ENTRIES_PER_LINE == 0) 70 printf("{");
57 printf("\n"); 71 for (i = 0; i < len - 1; i++) {
58 printf("%s(0x%8.8xL), ", trans, table[i]); 72 if (i % ENTRIES_PER_LINE == 0)
73 printf("\n");
74 printf("%s(0x%8.8xL), ", trans, table[j][i]);
75 }
76 printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
59 } 77 }
60 printf("%s(0x%8.8xL)\n", trans, table[len - 1]);
61} 78}
62 79
63int main(int argc, char** argv) 80int main(int argc, char** argv)
@@ -66,14 +83,14 @@ int main(int argc, char** argv)
66 83
67 if (CRC_LE_BITS > 1) { 84 if (CRC_LE_BITS > 1) {
68 crc32init_le(); 85 crc32init_le();
69 printf("static const u32 crc32table_le[] = {"); 86 printf("static const u32 crc32table_le[4][256] = {");
70 output_table(crc32table_le, LE_TABLE_SIZE, "tole"); 87 output_table(crc32table_le, LE_TABLE_SIZE, "tole");
71 printf("};\n"); 88 printf("};\n");
72 } 89 }
73 90
74 if (CRC_BE_BITS > 1) { 91 if (CRC_BE_BITS > 1) {
75 crc32init_be(); 92 crc32init_be();
76 printf("static const u32 crc32table_be[] = {"); 93 printf("static const u32 crc32table_be[4][256] = {");
77 output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); 94 output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
78 printf("};\n"); 95 printf("};\n");
79 } 96 }
diff --git a/lib/genalloc.c b/lib/genalloc.c
index e67f97495dd5..1923f1490e72 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -10,6 +10,7 @@
10 * Version 2. See the file COPYING for more details. 10 * Version 2. See the file COPYING for more details.
11 */ 11 */
12 12
13#include <linux/slab.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/bitmap.h> 15#include <linux/bitmap.h>
15#include <linux/genalloc.h> 16#include <linux/genalloc.h>
@@ -127,7 +128,6 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
127 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 128 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
128 129
129 end_bit = (chunk->end_addr - chunk->start_addr) >> order; 130 end_bit = (chunk->end_addr - chunk->start_addr) >> order;
130 end_bit -= nbits + 1;
131 131
132 spin_lock_irqsave(&chunk->lock, flags); 132 spin_lock_irqsave(&chunk->lock, flags);
133 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, 133 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 39af2560f765..5d7a4802c562 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -16,6 +16,24 @@ const char hex_asc[] = "0123456789abcdef";
16EXPORT_SYMBOL(hex_asc); 16EXPORT_SYMBOL(hex_asc);
17 17
18/** 18/**
19 * hex_to_bin - convert a hex digit to its real value
20 * @ch: ascii character represents hex digit
21 *
22 * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
23 * input.
24 */
25int hex_to_bin(char ch)
26{
27 if ((ch >= '0') && (ch <= '9'))
28 return ch - '0';
29 ch = tolower(ch);
30 if ((ch >= 'a') && (ch <= 'f'))
31 return ch - 'a' + 10;
32 return -1;
33}
34EXPORT_SYMBOL(hex_to_bin);
35
36/**
19 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory 37 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
20 * @buf: data blob to dump 38 * @buf: data blob to dump
21 * @len: number of bytes in the @buf 39 * @len: number of bytes in the @buf
@@ -34,7 +52,7 @@ EXPORT_SYMBOL(hex_asc);
34 * 52 *
35 * E.g.: 53 * E.g.:
36 * hex_dump_to_buffer(frame->data, frame->len, 16, 1, 54 * hex_dump_to_buffer(frame->data, frame->len, 16, 1,
37 * linebuf, sizeof(linebuf), 1); 55 * linebuf, sizeof(linebuf), true);
38 * 56 *
39 * example output buffer: 57 * example output buffer:
40 * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO 58 * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
@@ -65,8 +83,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
65 83
66 for (j = 0; j < ngroups; j++) 84 for (j = 0; j < ngroups; j++)
67 lx += scnprintf(linebuf + lx, linebuflen - lx, 85 lx += scnprintf(linebuf + lx, linebuflen - lx,
68 "%s%16.16llx", j ? " " : "", 86 "%s%16.16llx", j ? " " : "",
69 (unsigned long long)*(ptr8 + j)); 87 (unsigned long long)*(ptr8 + j));
70 ascii_column = 17 * ngroups + 2; 88 ascii_column = 17 * ngroups + 2;
71 break; 89 break;
72 } 90 }
@@ -77,7 +95,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
77 95
78 for (j = 0; j < ngroups; j++) 96 for (j = 0; j < ngroups; j++)
79 lx += scnprintf(linebuf + lx, linebuflen - lx, 97 lx += scnprintf(linebuf + lx, linebuflen - lx,
80 "%s%8.8x", j ? " " : "", *(ptr4 + j)); 98 "%s%8.8x", j ? " " : "", *(ptr4 + j));
81 ascii_column = 9 * ngroups + 2; 99 ascii_column = 9 * ngroups + 2;
82 break; 100 break;
83 } 101 }
@@ -88,7 +106,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
88 106
89 for (j = 0; j < ngroups; j++) 107 for (j = 0; j < ngroups; j++)
90 lx += scnprintf(linebuf + lx, linebuflen - lx, 108 lx += scnprintf(linebuf + lx, linebuflen - lx,
91 "%s%4.4x", j ? " " : "", *(ptr2 + j)); 109 "%s%4.4x", j ? " " : "", *(ptr2 + j));
92 ascii_column = 5 * ngroups + 2; 110 ascii_column = 5 * ngroups + 2;
93 break; 111 break;
94 } 112 }
@@ -111,9 +129,10 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
111 129
112 while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) 130 while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
113 linebuf[lx++] = ' '; 131 linebuf[lx++] = ' ';
114 for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) 132 for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) {
115 linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] 133 ch = ptr[j];
116 : '.'; 134 linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.';
135 }
117nil: 136nil:
118 linebuf[lx++] = '\0'; 137 linebuf[lx++] = '\0';
119} 138}
@@ -143,7 +162,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
143 * 162 *
144 * E.g.: 163 * E.g.:
145 * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS, 164 * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS,
146 * 16, 1, frame->data, frame->len, 1); 165 * 16, 1, frame->data, frame->len, true);
147 * 166 *
148 * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode: 167 * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode:
149 * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO 168 * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
@@ -151,12 +170,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
151 * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~. 170 * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~.
152 */ 171 */
153void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, 172void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
154 int rowsize, int groupsize, 173 int rowsize, int groupsize,
155 const void *buf, size_t len, bool ascii) 174 const void *buf, size_t len, bool ascii)
156{ 175{
157 const u8 *ptr = buf; 176 const u8 *ptr = buf;
158 int i, linelen, remaining = len; 177 int i, linelen, remaining = len;
159 unsigned char linebuf[200]; 178 unsigned char linebuf[32 * 3 + 2 + 32 + 1];
160 179
161 if (rowsize != 16 && rowsize != 32) 180 if (rowsize != 16 && rowsize != 32)
162 rowsize = 16; 181 rowsize = 16;
@@ -164,13 +183,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
164 for (i = 0; i < len; i += rowsize) { 183 for (i = 0; i < len; i += rowsize) {
165 linelen = min(remaining, rowsize); 184 linelen = min(remaining, rowsize);
166 remaining -= rowsize; 185 remaining -= rowsize;
186
167 hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, 187 hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
168 linebuf, sizeof(linebuf), ascii); 188 linebuf, sizeof(linebuf), ascii);
169 189
170 switch (prefix_type) { 190 switch (prefix_type) {
171 case DUMP_PREFIX_ADDRESS: 191 case DUMP_PREFIX_ADDRESS:
172 printk("%s%s%*p: %s\n", level, prefix_str, 192 printk("%s%s%p: %s\n",
173 (int)(2 * sizeof(void *)), ptr + i, linebuf); 193 level, prefix_str, ptr + i, linebuf);
174 break; 194 break;
175 case DUMP_PREFIX_OFFSET: 195 case DUMP_PREFIX_OFFSET:
176 printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); 196 printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
@@ -196,9 +216,9 @@ EXPORT_SYMBOL(print_hex_dump);
196 * rowsize of 16, groupsize of 1, and ASCII output included. 216 * rowsize of 16, groupsize of 1, and ASCII output included.
197 */ 217 */
198void print_hex_dump_bytes(const char *prefix_str, int prefix_type, 218void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
199 const void *buf, size_t len) 219 const void *buf, size_t len)
200{ 220{
201 print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, 221 print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
202 buf, len, 1); 222 buf, len, true);
203} 223}
204EXPORT_SYMBOL(print_hex_dump_bytes); 224EXPORT_SYMBOL(print_hex_dump_bytes);
diff --git a/lib/hweight.c b/lib/hweight.c
index 389424ecb129..3c79d50814cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,37 +9,45 @@
9 * The Hamming Weight of a number is the total number of bits set in it. 9 * The Hamming Weight of a number is the total number of bits set in it.
10 */ 10 */
11 11
12unsigned int hweight32(unsigned int w) 12unsigned int __sw_hweight32(unsigned int w)
13{ 13{
14#ifdef ARCH_HAS_FAST_MULTIPLIER
15 w -= (w >> 1) & 0x55555555;
16 w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
17 w = (w + (w >> 4)) & 0x0f0f0f0f;
18 return (w * 0x01010101) >> 24;
19#else
14 unsigned int res = w - ((w >> 1) & 0x55555555); 20 unsigned int res = w - ((w >> 1) & 0x55555555);
15 res = (res & 0x33333333) + ((res >> 2) & 0x33333333); 21 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
16 res = (res + (res >> 4)) & 0x0F0F0F0F; 22 res = (res + (res >> 4)) & 0x0F0F0F0F;
17 res = res + (res >> 8); 23 res = res + (res >> 8);
18 return (res + (res >> 16)) & 0x000000FF; 24 return (res + (res >> 16)) & 0x000000FF;
25#endif
19} 26}
20EXPORT_SYMBOL(hweight32); 27EXPORT_SYMBOL(__sw_hweight32);
21 28
22unsigned int hweight16(unsigned int w) 29unsigned int __sw_hweight16(unsigned int w)
23{ 30{
24 unsigned int res = w - ((w >> 1) & 0x5555); 31 unsigned int res = w - ((w >> 1) & 0x5555);
25 res = (res & 0x3333) + ((res >> 2) & 0x3333); 32 res = (res & 0x3333) + ((res >> 2) & 0x3333);
26 res = (res + (res >> 4)) & 0x0F0F; 33 res = (res + (res >> 4)) & 0x0F0F;
27 return (res + (res >> 8)) & 0x00FF; 34 return (res + (res >> 8)) & 0x00FF;
28} 35}
29EXPORT_SYMBOL(hweight16); 36EXPORT_SYMBOL(__sw_hweight16);
30 37
31unsigned int hweight8(unsigned int w) 38unsigned int __sw_hweight8(unsigned int w)
32{ 39{
33 unsigned int res = w - ((w >> 1) & 0x55); 40 unsigned int res = w - ((w >> 1) & 0x55);
34 res = (res & 0x33) + ((res >> 2) & 0x33); 41 res = (res & 0x33) + ((res >> 2) & 0x33);
35 return (res + (res >> 4)) & 0x0F; 42 return (res + (res >> 4)) & 0x0F;
36} 43}
37EXPORT_SYMBOL(hweight8); 44EXPORT_SYMBOL(__sw_hweight8);
38 45
39unsigned long hweight64(__u64 w) 46unsigned long __sw_hweight64(__u64 w)
40{ 47{
41#if BITS_PER_LONG == 32 48#if BITS_PER_LONG == 32
42 return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); 49 return __sw_hweight32((unsigned int)(w >> 32)) +
50 __sw_hweight32((unsigned int)w);
43#elif BITS_PER_LONG == 64 51#elif BITS_PER_LONG == 64
44#ifdef ARCH_HAS_FAST_MULTIPLIER 52#ifdef ARCH_HAS_FAST_MULTIPLIER
45 w -= (w >> 1) & 0x5555555555555555ul; 53 w -= (w >> 1) & 0x5555555555555555ul;
@@ -56,4 +64,4 @@ unsigned long hweight64(__u64 w)
56#endif 64#endif
57#endif 65#endif
58} 66}
59EXPORT_SYMBOL(hweight64); 67EXPORT_SYMBOL(__sw_hweight64);
diff --git a/lib/idr.c b/lib/idr.c
index 1cac726c44bc..e15502e8b21e 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -106,16 +106,17 @@ static void idr_mark_full(struct idr_layer **pa, int id)
106} 106}
107 107
108/** 108/**
109 * idr_pre_get - reserver resources for idr allocation 109 * idr_pre_get - reserve resources for idr allocation
110 * @idp: idr handle 110 * @idp: idr handle
111 * @gfp_mask: memory allocation flags 111 * @gfp_mask: memory allocation flags
112 * 112 *
113 * This function should be called prior to locking and calling the 113 * This function should be called prior to calling the idr_get_new* functions.
114 * idr_get_new* functions. It preallocates enough memory to satisfy 114 * It preallocates enough memory to satisfy the worst possible allocation. The
115 * the worst possible allocation. 115 * caller should pass in GFP_KERNEL if possible. This of course requires that
116 * no spinning locks be held.
116 * 117 *
117 * If the system is REALLY out of memory this function returns 0, 118 * If the system is REALLY out of memory this function returns %0,
118 * otherwise 1. 119 * otherwise %1.
119 */ 120 */
120int idr_pre_get(struct idr *idp, gfp_t gfp_mask) 121int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
121{ 122{
@@ -156,10 +157,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
156 id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; 157 id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
157 158
158 /* if already at the top layer, we need to grow */ 159 /* if already at the top layer, we need to grow */
159 if (!(p = pa[l])) { 160 if (id >= 1 << (idp->layers * IDR_BITS)) {
160 *starting_id = id; 161 *starting_id = id;
161 return IDR_NEED_TO_GROW; 162 return IDR_NEED_TO_GROW;
162 } 163 }
164 p = pa[l];
165 BUG_ON(!p);
163 166
164 /* If we need to go up one layer, continue the 167 /* If we need to go up one layer, continue the
165 * loop; otherwise, restart from the top. 168 * loop; otherwise, restart from the top.
@@ -282,17 +285,19 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
282 * idr_get_new_above - allocate new idr entry above or equal to a start id 285 * idr_get_new_above - allocate new idr entry above or equal to a start id
283 * @idp: idr handle 286 * @idp: idr handle
284 * @ptr: pointer you want associated with the id 287 * @ptr: pointer you want associated with the id
285 * @start_id: id to start search at 288 * @starting_id: id to start search at
286 * @id: pointer to the allocated handle 289 * @id: pointer to the allocated handle
287 * 290 *
288 * This is the allocate id function. It should be called with any 291 * This is the allocate id function. It should be called with any
289 * required locks. 292 * required locks.
290 * 293 *
291 * If memory is required, it will return -EAGAIN, you should unlock 294 * If allocation from IDR's private freelist fails, idr_get_new_above() will
292 * and go back to the idr_pre_get() call. If the idr is full, it will 295 * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill
293 * return -ENOSPC. 296 * IDR's preallocation and then retry the idr_get_new_above() call.
297 *
298 * If the idr is full idr_get_new_above() will return %-ENOSPC.
294 * 299 *
295 * @id returns a value in the range @starting_id ... 0x7fffffff 300 * @id returns a value in the range @starting_id ... %0x7fffffff
296 */ 301 */
297int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) 302int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
298{ 303{
@@ -316,14 +321,13 @@ EXPORT_SYMBOL(idr_get_new_above);
316 * @ptr: pointer you want associated with the id 321 * @ptr: pointer you want associated with the id
317 * @id: pointer to the allocated handle 322 * @id: pointer to the allocated handle
318 * 323 *
319 * This is the allocate id function. It should be called with any 324 * If allocation from IDR's private freelist fails, idr_get_new_above() will
320 * required locks. 325 * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill
326 * IDR's preallocation and then retry the idr_get_new_above() call.
321 * 327 *
322 * If memory is required, it will return -EAGAIN, you should unlock 328 * If the idr is full idr_get_new_above() will return %-ENOSPC.
323 * and go back to the idr_pre_get() call. If the idr is full, it will
324 * return -ENOSPC.
325 * 329 *
326 * @id returns a value in the range 0 ... 0x7fffffff 330 * @id returns a value in the range %0 ... %0x7fffffff
327 */ 331 */
328int idr_get_new(struct idr *idp, void *ptr, int *id) 332int idr_get_new(struct idr *idp, void *ptr, int *id)
329{ 333{
@@ -386,7 +390,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
386} 390}
387 391
388/** 392/**
389 * idr_remove - remove the given id and free it's slot 393 * idr_remove - remove the given id and free its slot
390 * @idp: idr handle 394 * @idp: idr handle
391 * @id: unique key 395 * @id: unique key
392 */ 396 */
@@ -435,7 +439,7 @@ EXPORT_SYMBOL(idr_remove);
435 * function will remove all id mappings and leave all idp_layers 439 * function will remove all id mappings and leave all idp_layers
436 * unused. 440 * unused.
437 * 441 *
438 * A typical clean-up sequence for objects stored in an idr tree, will 442 * A typical clean-up sequence for objects stored in an idr tree will
439 * use idr_for_each() to free all objects, if necessay, then 443 * use idr_for_each() to free all objects, if necessay, then
440 * idr_remove_all() to remove all ids, and idr_destroy() to free 444 * idr_remove_all() to remove all ids, and idr_destroy() to free
441 * up the cached idr_layers. 445 * up the cached idr_layers.
@@ -443,6 +447,7 @@ EXPORT_SYMBOL(idr_remove);
443void idr_remove_all(struct idr *idp) 447void idr_remove_all(struct idr *idp)
444{ 448{
445 int n, id, max; 449 int n, id, max;
450 int bt_mask;
446 struct idr_layer *p; 451 struct idr_layer *p;
447 struct idr_layer *pa[MAX_LEVEL]; 452 struct idr_layer *pa[MAX_LEVEL];
448 struct idr_layer **paa = &pa[0]; 453 struct idr_layer **paa = &pa[0];
@@ -460,8 +465,10 @@ void idr_remove_all(struct idr *idp)
460 p = p->ary[(id >> n) & IDR_MASK]; 465 p = p->ary[(id >> n) & IDR_MASK];
461 } 466 }
462 467
468 bt_mask = id;
463 id += 1 << n; 469 id += 1 << n;
464 while (n < fls(id)) { 470 /* Get the highest bit that the above add changed from 0->1. */
471 while (n < fls(id ^ bt_mask)) {
465 if (p) 472 if (p)
466 free_layer(p); 473 free_layer(p);
467 n += IDR_BITS; 474 n += IDR_BITS;
@@ -474,7 +481,7 @@ EXPORT_SYMBOL(idr_remove_all);
474 481
475/** 482/**
476 * idr_destroy - release all cached layers within an idr tree 483 * idr_destroy - release all cached layers within an idr tree
477 * idp: idr handle 484 * @idp: idr handle
478 */ 485 */
479void idr_destroy(struct idr *idp) 486void idr_destroy(struct idr *idp)
480{ 487{
@@ -502,7 +509,7 @@ void *idr_find(struct idr *idp, int id)
502 int n; 509 int n;
503 struct idr_layer *p; 510 struct idr_layer *p;
504 511
505 p = rcu_dereference(idp->top); 512 p = rcu_dereference_raw(idp->top);
506 if (!p) 513 if (!p)
507 return NULL; 514 return NULL;
508 n = (p->layer+1) * IDR_BITS; 515 n = (p->layer+1) * IDR_BITS;
@@ -517,7 +524,7 @@ void *idr_find(struct idr *idp, int id)
517 while (n > 0 && p) { 524 while (n > 0 && p) {
518 n -= IDR_BITS; 525 n -= IDR_BITS;
519 BUG_ON(n != p->layer*IDR_BITS); 526 BUG_ON(n != p->layer*IDR_BITS);
520 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 527 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
521 } 528 }
522 return((void *)p); 529 return((void *)p);
523} 530}
@@ -537,7 +544,7 @@ EXPORT_SYMBOL(idr_find);
537 * not allowed. 544 * not allowed.
538 * 545 *
539 * We check the return of @fn each time. If it returns anything other 546 * We check the return of @fn each time. If it returns anything other
540 * than 0, we break out and return that value. 547 * than %0, we break out and return that value.
541 * 548 *
542 * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove(). 549 * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
543 */ 550 */
@@ -550,7 +557,7 @@ int idr_for_each(struct idr *idp,
550 struct idr_layer **paa = &pa[0]; 557 struct idr_layer **paa = &pa[0];
551 558
552 n = idp->layers * IDR_BITS; 559 n = idp->layers * IDR_BITS;
553 p = rcu_dereference(idp->top); 560 p = rcu_dereference_raw(idp->top);
554 max = 1 << n; 561 max = 1 << n;
555 562
556 id = 0; 563 id = 0;
@@ -558,7 +565,7 @@ int idr_for_each(struct idr *idp,
558 while (n > 0 && p) { 565 while (n > 0 && p) {
559 n -= IDR_BITS; 566 n -= IDR_BITS;
560 *paa++ = p; 567 *paa++ = p;
561 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 568 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
562 } 569 }
563 570
564 if (p) { 571 if (p) {
@@ -581,10 +588,11 @@ EXPORT_SYMBOL(idr_for_each);
581/** 588/**
582 * idr_get_next - lookup next object of id to given id. 589 * idr_get_next - lookup next object of id to given id.
583 * @idp: idr handle 590 * @idp: idr handle
584 * @id: pointer to lookup key 591 * @nextidp: pointer to lookup key
585 * 592 *
586 * Returns pointer to registered object with id, which is next number to 593 * Returns pointer to registered object with id, which is next number to
587 * given id. 594 * given id. After being looked up, *@nextidp will be updated for the next
595 * iteration.
588 */ 596 */
589 597
590void *idr_get_next(struct idr *idp, int *nextidp) 598void *idr_get_next(struct idr *idp, int *nextidp)
@@ -597,7 +605,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
597 /* find first ent */ 605 /* find first ent */
598 n = idp->layers * IDR_BITS; 606 n = idp->layers * IDR_BITS;
599 max = 1 << n; 607 max = 1 << n;
600 p = rcu_dereference(idp->top); 608 p = rcu_dereference_raw(idp->top);
601 if (!p) 609 if (!p)
602 return NULL; 610 return NULL;
603 611
@@ -605,7 +613,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
605 while (n > 0 && p) { 613 while (n > 0 && p) {
606 n -= IDR_BITS; 614 n -= IDR_BITS;
607 *paa++ = p; 615 *paa++ = p;
608 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 616 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
609 } 617 }
610 618
611 if (p) { 619 if (p) {
@@ -621,7 +629,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
621 } 629 }
622 return NULL; 630 return NULL;
623} 631}
624 632EXPORT_SYMBOL(idr_get_next);
625 633
626 634
627/** 635/**
@@ -631,8 +639,8 @@ void *idr_get_next(struct idr *idp, int *nextidp)
631 * @id: lookup key 639 * @id: lookup key
632 * 640 *
633 * Replace the pointer registered with an id and return the old value. 641 * Replace the pointer registered with an id and return the old value.
634 * A -ENOENT return indicates that @id was not found. 642 * A %-ENOENT return indicates that @id was not found.
635 * A -EINVAL return indicates that @id was not within valid constraints. 643 * A %-EINVAL return indicates that @id was not within valid constraints.
636 * 644 *
637 * The caller must serialize with writers. 645 * The caller must serialize with writers.
638 */ 646 */
@@ -690,10 +698,11 @@ void idr_init(struct idr *idp)
690EXPORT_SYMBOL(idr_init); 698EXPORT_SYMBOL(idr_init);
691 699
692 700
693/* 701/**
702 * DOC: IDA description
694 * IDA - IDR based ID allocator 703 * IDA - IDR based ID allocator
695 * 704 *
696 * this is id allocator without id -> pointer translation. Memory 705 * This is id allocator without id -> pointer translation. Memory
697 * usage is much lower than full blown idr because each id only 706 * usage is much lower than full blown idr because each id only
698 * occupies a bit. ida uses a custom leaf node which contains 707 * occupies a bit. ida uses a custom leaf node which contains
699 * IDA_BITMAP_BITS slots. 708 * IDA_BITMAP_BITS slots.
@@ -726,8 +735,8 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
726 * following function. It preallocates enough memory to satisfy the 735 * following function. It preallocates enough memory to satisfy the
727 * worst possible allocation. 736 * worst possible allocation.
728 * 737 *
729 * If the system is REALLY out of memory this function returns 0, 738 * If the system is REALLY out of memory this function returns %0,
730 * otherwise 1. 739 * otherwise %1.
731 */ 740 */
732int ida_pre_get(struct ida *ida, gfp_t gfp_mask) 741int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
733{ 742{
@@ -753,17 +762,17 @@ EXPORT_SYMBOL(ida_pre_get);
753/** 762/**
754 * ida_get_new_above - allocate new ID above or equal to a start id 763 * ida_get_new_above - allocate new ID above or equal to a start id
755 * @ida: ida handle 764 * @ida: ida handle
756 * @staring_id: id to start search at 765 * @starting_id: id to start search at
757 * @p_id: pointer to the allocated handle 766 * @p_id: pointer to the allocated handle
758 * 767 *
759 * Allocate new ID above or equal to @ida. It should be called with 768 * Allocate new ID above or equal to @ida. It should be called with
760 * any required locks. 769 * any required locks.
761 * 770 *
762 * If memory is required, it will return -EAGAIN, you should unlock 771 * If memory is required, it will return %-EAGAIN, you should unlock
763 * and go back to the ida_pre_get() call. If the ida is full, it will 772 * and go back to the ida_pre_get() call. If the ida is full, it will
764 * return -ENOSPC. 773 * return %-ENOSPC.
765 * 774 *
766 * @p_id returns a value in the range @starting_id ... 0x7fffffff. 775 * @p_id returns a value in the range @starting_id ... %0x7fffffff.
767 */ 776 */
768int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) 777int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
769{ 778{
@@ -845,11 +854,11 @@ EXPORT_SYMBOL(ida_get_new_above);
845 * 854 *
846 * Allocate new ID. It should be called with any required locks. 855 * Allocate new ID. It should be called with any required locks.
847 * 856 *
848 * If memory is required, it will return -EAGAIN, you should unlock 857 * If memory is required, it will return %-EAGAIN, you should unlock
849 * and go back to the idr_pre_get() call. If the idr is full, it will 858 * and go back to the idr_pre_get() call. If the idr is full, it will
850 * return -ENOSPC. 859 * return %-ENOSPC.
851 * 860 *
852 * @id returns a value in the range 0 ... 0x7fffffff. 861 * @id returns a value in the range %0 ... %0x7fffffff.
853 */ 862 */
854int ida_get_new(struct ida *ida, int *p_id) 863int ida_get_new(struct ida *ida, int *p_id)
855{ 864{
@@ -907,7 +916,7 @@ EXPORT_SYMBOL(ida_remove);
907 916
908/** 917/**
909 * ida_destroy - release all cached layers within an ida tree 918 * ida_destroy - release all cached layers within an ida tree
910 * ida: ida handle 919 * @ida: ida handle
911 */ 920 */
912void ida_destroy(struct ida *ida) 921void ida_destroy(struct ida *ida)
913{ 922{
diff --git a/lib/inflate.c b/lib/inflate.c
index d10255973a9f..013a76193481 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -103,6 +103,9 @@
103 the two sets of lengths. 103 the two sets of lengths.
104 */ 104 */
105#include <linux/compiler.h> 105#include <linux/compiler.h>
106#ifdef NO_INFLATE_MALLOC
107#include <linux/slab.h>
108#endif
106 109
107#ifdef RCSID 110#ifdef RCSID
108static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #"; 111static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #";
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index c0251f4ad08b..da053313ee5c 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -38,12 +38,3 @@ again:
38 return -1; 38 return -1;
39} 39}
40EXPORT_SYMBOL(iommu_area_alloc); 40EXPORT_SYMBOL(iommu_area_alloc);
41
42unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
43 unsigned long io_page_size)
44{
45 unsigned long size = (addr & (io_page_size - 1)) + len;
46
47 return DIV_ROUND_UP(size, io_page_size);
48}
49EXPORT_SYMBOL(iommu_num_pages);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 14c6078f17a2..5730ecd3eb66 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -13,10 +13,10 @@
13#include <asm/pgtable.h> 13#include <asm/pgtable.h>
14 14
15static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, 15static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
16 unsigned long end, unsigned long phys_addr, pgprot_t prot) 16 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
17{ 17{
18 pte_t *pte; 18 pte_t *pte;
19 unsigned long pfn; 19 u64 pfn;
20 20
21 pfn = phys_addr >> PAGE_SHIFT; 21 pfn = phys_addr >> PAGE_SHIFT;
22 pte = pte_alloc_kernel(pmd, addr); 22 pte = pte_alloc_kernel(pmd, addr);
@@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
31} 31}
32 32
33static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, 33static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
34 unsigned long end, unsigned long phys_addr, pgprot_t prot) 34 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
35{ 35{
36 pmd_t *pmd; 36 pmd_t *pmd;
37 unsigned long next; 37 unsigned long next;
@@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
49} 49}
50 50
51static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, 51static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
52 unsigned long end, unsigned long phys_addr, pgprot_t prot) 52 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
53{ 53{
54 pud_t *pud; 54 pud_t *pud;
55 unsigned long next; 55 unsigned long next;
@@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
67} 67}
68 68
69int ioremap_page_range(unsigned long addr, 69int ioremap_page_range(unsigned long addr,
70 unsigned long end, unsigned long phys_addr, pgprot_t prot) 70 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
71{ 71{
72 pgd_t *pgd; 72 pgd_t *pgd;
73 unsigned long start; 73 unsigned long start;
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index c5ff1fd10030..9c4233b23783 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -6,6 +6,7 @@
6 6
7#include <stdarg.h> 7#include <stdarg.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/slab.h>
9#include <linux/types.h> 10#include <linux/types.h>
10#include <linux/string.h> 11#include <linux/string.h>
11 12
diff --git a/lib/kobject.c b/lib/kobject.c
index b512b746d2af..82dc34c095c2 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
700 return ret; 700 return ret;
701} 701}
702 702
703struct sysfs_ops kobj_sysfs_ops = { 703const struct sysfs_ops kobj_sysfs_ops = {
704 .show = kobj_attr_show, 704 .show = kobj_attr_show,
705 .store = kobj_attr_store, 705 .store = kobj_attr_store,
706}; 706};
@@ -746,17 +746,56 @@ void kset_unregister(struct kset *k)
746 */ 746 */
747struct kobject *kset_find_obj(struct kset *kset, const char *name) 747struct kobject *kset_find_obj(struct kset *kset, const char *name)
748{ 748{
749 return kset_find_obj_hinted(kset, name, NULL);
750}
751
752/**
753 * kset_find_obj_hinted - search for object in kset given a predecessor hint.
754 * @kset: kset we're looking in.
755 * @name: object's name.
756 * @hint: hint to possible object's predecessor.
757 *
758 * Check the hint's next object and if it is a match return it directly,
759 * otherwise, fall back to the behavior of kset_find_obj(). Either way
760 * a reference for the returned object is held and the reference on the
761 * hinted object is released.
762 */
763struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name,
764 struct kobject *hint)
765{
749 struct kobject *k; 766 struct kobject *k;
750 struct kobject *ret = NULL; 767 struct kobject *ret = NULL;
751 768
752 spin_lock(&kset->list_lock); 769 spin_lock(&kset->list_lock);
770
771 if (!hint)
772 goto slow_search;
773
774 /* end of list detection */
775 if (hint->entry.next == kset->list.next)
776 goto slow_search;
777
778 k = container_of(hint->entry.next, struct kobject, entry);
779 if (!kobject_name(k) || strcmp(kobject_name(k), name))
780 goto slow_search;
781
782 ret = kobject_get(k);
783 goto unlock_exit;
784
785slow_search:
753 list_for_each_entry(k, &kset->list, entry) { 786 list_for_each_entry(k, &kset->list, entry) {
754 if (kobject_name(k) && !strcmp(kobject_name(k), name)) { 787 if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
755 ret = kobject_get(k); 788 ret = kobject_get(k);
756 break; 789 break;
757 } 790 }
758 } 791 }
792
793unlock_exit:
759 spin_unlock(&kset->list_lock); 794 spin_unlock(&kset->list_lock);
795
796 if (hint)
797 kobject_put(hint);
798
760 return ret; 799 return ret;
761} 800}
762 801
@@ -789,7 +828,7 @@ static struct kobj_type kset_ktype = {
789 * If the kset was not able to be created, NULL will be returned. 828 * If the kset was not able to be created, NULL will be returned.
790 */ 829 */
791static struct kset *kset_create(const char *name, 830static struct kset *kset_create(const char *name,
792 struct kset_uevent_ops *uevent_ops, 831 const struct kset_uevent_ops *uevent_ops,
793 struct kobject *parent_kobj) 832 struct kobject *parent_kobj)
794{ 833{
795 struct kset *kset; 834 struct kset *kset;
@@ -832,7 +871,7 @@ static struct kset *kset_create(const char *name,
832 * If the kset was not able to be created, NULL will be returned. 871 * If the kset was not able to be created, NULL will be returned.
833 */ 872 */
834struct kset *kset_create_and_add(const char *name, 873struct kset *kset_create_and_add(const char *name,
835 struct kset_uevent_ops *uevent_ops, 874 const struct kset_uevent_ops *uevent_ops,
836 struct kobject *parent_kobj) 875 struct kobject *parent_kobj)
837{ 876{
838 struct kset *kset; 877 struct kset *kset;
@@ -850,6 +889,121 @@ struct kset *kset_create_and_add(const char *name,
850} 889}
851EXPORT_SYMBOL_GPL(kset_create_and_add); 890EXPORT_SYMBOL_GPL(kset_create_and_add);
852 891
892
893static DEFINE_SPINLOCK(kobj_ns_type_lock);
894static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
895
896int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
897{
898 enum kobj_ns_type type = ops->type;
899 int error;
900
901 spin_lock(&kobj_ns_type_lock);
902
903 error = -EINVAL;
904 if (type >= KOBJ_NS_TYPES)
905 goto out;
906
907 error = -EINVAL;
908 if (type <= KOBJ_NS_TYPE_NONE)
909 goto out;
910
911 error = -EBUSY;
912 if (kobj_ns_ops_tbl[type])
913 goto out;
914
915 error = 0;
916 kobj_ns_ops_tbl[type] = ops;
917
918out:
919 spin_unlock(&kobj_ns_type_lock);
920 return error;
921}
922
923int kobj_ns_type_registered(enum kobj_ns_type type)
924{
925 int registered = 0;
926
927 spin_lock(&kobj_ns_type_lock);
928 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES))
929 registered = kobj_ns_ops_tbl[type] != NULL;
930 spin_unlock(&kobj_ns_type_lock);
931
932 return registered;
933}
934
935const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
936{
937 const struct kobj_ns_type_operations *ops = NULL;
938
939 if (parent && parent->ktype->child_ns_type)
940 ops = parent->ktype->child_ns_type(parent);
941
942 return ops;
943}
944
945const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
946{
947 return kobj_child_ns_ops(kobj->parent);
948}
949
950
951const void *kobj_ns_current(enum kobj_ns_type type)
952{
953 const void *ns = NULL;
954
955 spin_lock(&kobj_ns_type_lock);
956 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
957 kobj_ns_ops_tbl[type])
958 ns = kobj_ns_ops_tbl[type]->current_ns();
959 spin_unlock(&kobj_ns_type_lock);
960
961 return ns;
962}
963
964const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
965{
966 const void *ns = NULL;
967
968 spin_lock(&kobj_ns_type_lock);
969 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
970 kobj_ns_ops_tbl[type])
971 ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
972 spin_unlock(&kobj_ns_type_lock);
973
974 return ns;
975}
976
977const void *kobj_ns_initial(enum kobj_ns_type type)
978{
979 const void *ns = NULL;
980
981 spin_lock(&kobj_ns_type_lock);
982 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
983 kobj_ns_ops_tbl[type])
984 ns = kobj_ns_ops_tbl[type]->initial_ns();
985 spin_unlock(&kobj_ns_type_lock);
986
987 return ns;
988}
989
990/*
991 * kobj_ns_exit - invalidate a namespace tag
992 *
993 * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
994 * @ns: the actual namespace being invalidated
995 *
996 * This is called when a tag is no longer valid. For instance,
997 * when a network namespace exits, it uses this helper to
998 * make sure no sb's sysfs_info points to the now-invalidated
999 * netns.
1000 */
1001void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
1002{
1003 sysfs_exit_ns(type, ns);
1004}
1005
1006
853EXPORT_SYMBOL(kobject_get); 1007EXPORT_SYMBOL(kobject_get);
854EXPORT_SYMBOL(kobject_put); 1008EXPORT_SYMBOL(kobject_put);
855EXPORT_SYMBOL(kobject_del); 1009EXPORT_SYMBOL(kobject_del);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 920a3ca6e259..70af0a7f97c0 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -18,18 +18,25 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/kobject.h> 19#include <linux/kobject.h>
20#include <linux/module.h> 20#include <linux/module.h>
21 21#include <linux/slab.h>
22#include <linux/user_namespace.h>
22#include <linux/socket.h> 23#include <linux/socket.h>
23#include <linux/skbuff.h> 24#include <linux/skbuff.h>
24#include <linux/netlink.h> 25#include <linux/netlink.h>
25#include <net/sock.h> 26#include <net/sock.h>
27#include <net/net_namespace.h>
26 28
27 29
28u64 uevent_seqnum; 30u64 uevent_seqnum;
29char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; 31char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
30static DEFINE_SPINLOCK(sequence_lock); 32static DEFINE_SPINLOCK(sequence_lock);
31#if defined(CONFIG_NET) 33#ifdef CONFIG_NET
32static struct sock *uevent_sock; 34struct uevent_sock {
35 struct list_head list;
36 struct sock *sk;
37};
38static LIST_HEAD(uevent_sock_list);
39static DEFINE_MUTEX(uevent_sock_mutex);
33#endif 40#endif
34 41
35/* the strings here must match the enum in include/linux/kobject.h */ 42/* the strings here must match the enum in include/linux/kobject.h */
@@ -76,6 +83,39 @@ out:
76 return ret; 83 return ret;
77} 84}
78 85
86#ifdef CONFIG_NET
87static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
88{
89 struct kobject *kobj = data;
90 const struct kobj_ns_type_operations *ops;
91
92 ops = kobj_ns_ops(kobj);
93 if (ops) {
94 const void *sock_ns, *ns;
95 ns = kobj->ktype->namespace(kobj);
96 sock_ns = ops->netlink_ns(dsk);
97 return sock_ns != ns;
98 }
99
100 return 0;
101}
102#endif
103
104static int kobj_usermode_filter(struct kobject *kobj)
105{
106 const struct kobj_ns_type_operations *ops;
107
108 ops = kobj_ns_ops(kobj);
109 if (ops) {
110 const void *init_ns, *ns;
111 ns = kobj->ktype->namespace(kobj);
112 init_ns = ops->initial_ns();
113 return ns != init_ns;
114 }
115
116 return 0;
117}
118
79/** 119/**
80 * kobject_uevent_env - send an uevent with environmental data 120 * kobject_uevent_env - send an uevent with environmental data
81 * 121 *
@@ -83,7 +123,7 @@ out:
83 * @kobj: struct kobject that the action is happening to 123 * @kobj: struct kobject that the action is happening to
84 * @envp_ext: pointer to environmental data 124 * @envp_ext: pointer to environmental data
85 * 125 *
86 * Returns 0 if kobject_uevent() is completed with success or the 126 * Returns 0 if kobject_uevent_env() is completed with success or the
87 * corresponding error when it fails. 127 * corresponding error when it fails.
88 */ 128 */
89int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, 129int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
@@ -95,10 +135,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
95 const char *subsystem; 135 const char *subsystem;
96 struct kobject *top_kobj; 136 struct kobject *top_kobj;
97 struct kset *kset; 137 struct kset *kset;
98 struct kset_uevent_ops *uevent_ops; 138 const struct kset_uevent_ops *uevent_ops;
99 u64 seq; 139 u64 seq;
100 int i = 0; 140 int i = 0;
101 int retval = 0; 141 int retval = 0;
142#ifdef CONFIG_NET
143 struct uevent_sock *ue_sk;
144#endif
102 145
103 pr_debug("kobject: '%s' (%p): %s\n", 146 pr_debug("kobject: '%s' (%p): %s\n",
104 kobject_name(kobj), kobj, __func__); 147 kobject_name(kobj), kobj, __func__);
@@ -210,7 +253,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
210 253
211#if defined(CONFIG_NET) 254#if defined(CONFIG_NET)
212 /* send netlink message */ 255 /* send netlink message */
213 if (uevent_sock) { 256 mutex_lock(&uevent_sock_mutex);
257 list_for_each_entry(ue_sk, &uevent_sock_list, list) {
258 struct sock *uevent_sock = ue_sk->sk;
214 struct sk_buff *skb; 259 struct sk_buff *skb;
215 size_t len; 260 size_t len;
216 261
@@ -232,18 +277,21 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
232 } 277 }
233 278
234 NETLINK_CB(skb).dst_group = 1; 279 NETLINK_CB(skb).dst_group = 1;
235 retval = netlink_broadcast(uevent_sock, skb, 0, 1, 280 retval = netlink_broadcast_filtered(uevent_sock, skb,
236 GFP_KERNEL); 281 0, 1, GFP_KERNEL,
282 kobj_bcast_filter,
283 kobj);
237 /* ENOBUFS should be handled in userspace */ 284 /* ENOBUFS should be handled in userspace */
238 if (retval == -ENOBUFS) 285 if (retval == -ENOBUFS)
239 retval = 0; 286 retval = 0;
240 } else 287 } else
241 retval = -ENOMEM; 288 retval = -ENOMEM;
242 } 289 }
290 mutex_unlock(&uevent_sock_mutex);
243#endif 291#endif
244 292
245 /* call uevent_helper, usually only enabled during early boot */ 293 /* call uevent_helper, usually only enabled during early boot */
246 if (uevent_helper[0]) { 294 if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
247 char *argv [3]; 295 char *argv [3];
248 296
249 argv [0] = uevent_helper; 297 argv [0] = uevent_helper;
@@ -269,7 +317,7 @@ exit:
269EXPORT_SYMBOL_GPL(kobject_uevent_env); 317EXPORT_SYMBOL_GPL(kobject_uevent_env);
270 318
271/** 319/**
272 * kobject_uevent - notify userspace by ending an uevent 320 * kobject_uevent - notify userspace by sending an uevent
273 * 321 *
274 * @action: action that is happening 322 * @action: action that is happening
275 * @kobj: struct kobject that the action is happening to 323 * @kobj: struct kobject that the action is happening to
@@ -319,18 +367,59 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
319EXPORT_SYMBOL_GPL(add_uevent_var); 367EXPORT_SYMBOL_GPL(add_uevent_var);
320 368
321#if defined(CONFIG_NET) 369#if defined(CONFIG_NET)
322static int __init kobject_uevent_init(void) 370static int uevent_net_init(struct net *net)
323{ 371{
324 uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, 372 struct uevent_sock *ue_sk;
325 1, NULL, NULL, THIS_MODULE); 373
326 if (!uevent_sock) { 374 ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
375 if (!ue_sk)
376 return -ENOMEM;
377
378 ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
379 1, NULL, NULL, THIS_MODULE);
380 if (!ue_sk->sk) {
327 printk(KERN_ERR 381 printk(KERN_ERR
328 "kobject_uevent: unable to create netlink socket!\n"); 382 "kobject_uevent: unable to create netlink socket!\n");
383 kfree(ue_sk);
329 return -ENODEV; 384 return -ENODEV;
330 } 385 }
331 netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); 386 mutex_lock(&uevent_sock_mutex);
387 list_add_tail(&ue_sk->list, &uevent_sock_list);
388 mutex_unlock(&uevent_sock_mutex);
332 return 0; 389 return 0;
333} 390}
334 391
392static void uevent_net_exit(struct net *net)
393{
394 struct uevent_sock *ue_sk;
395
396 mutex_lock(&uevent_sock_mutex);
397 list_for_each_entry(ue_sk, &uevent_sock_list, list) {
398 if (sock_net(ue_sk->sk) == net)
399 goto found;
400 }
401 mutex_unlock(&uevent_sock_mutex);
402 return;
403
404found:
405 list_del(&ue_sk->list);
406 mutex_unlock(&uevent_sock_mutex);
407
408 netlink_kernel_release(ue_sk->sk);
409 kfree(ue_sk);
410}
411
412static struct pernet_operations uevent_net_ops = {
413 .init = uevent_net_init,
414 .exit = uevent_net_exit,
415};
416
417static int __init kobject_uevent_init(void)
418{
419 netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
420 return register_pernet_subsys(&uevent_net_ops);
421}
422
423
335postcore_initcall(kobject_uevent_init); 424postcore_initcall(kobject_uevent_init);
336#endif 425#endif
diff --git a/lib/kref.c b/lib/kref.c
index 9ecd6e865610..d3d227a08a4b 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -13,17 +13,7 @@
13 13
14#include <linux/kref.h> 14#include <linux/kref.h>
15#include <linux/module.h> 15#include <linux/module.h>
16 16#include <linux/slab.h>
17/**
18 * kref_set - initialize object and set refcount to requested number.
19 * @kref: object in question.
20 * @num: initial reference counter
21 */
22void kref_set(struct kref *kref, int num)
23{
24 atomic_set(&kref->refcount, num);
25 smp_mb();
26}
27 17
28/** 18/**
29 * kref_init - initialize object. 19 * kref_init - initialize object.
@@ -31,7 +21,8 @@ void kref_set(struct kref *kref, int num)
31 */ 21 */
32void kref_init(struct kref *kref) 22void kref_init(struct kref *kref)
33{ 23{
34 kref_set(kref, 1); 24 atomic_set(&kref->refcount, 1);
25 smp_mb();
35} 26}
36 27
37/** 28/**
@@ -71,7 +62,6 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
71 return 0; 62 return 0;
72} 63}
73 64
74EXPORT_SYMBOL(kref_set);
75EXPORT_SYMBOL(kref_init); 65EXPORT_SYMBOL(kref_init);
76EXPORT_SYMBOL(kref_get); 66EXPORT_SYMBOL(kref_get);
77EXPORT_SYMBOL(kref_put); 67EXPORT_SYMBOL(kref_put);
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 000000000000..157cd88a6ffc
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
1#include <linux/kernel.h>
2#include <linux/gcd.h>
3#include <linux/module.h>
4
5/* Lowest common multiple */
6unsigned long lcm(unsigned long a, unsigned long b)
7{
8 if (a && b)
9 return (a * b) / gcd(a, b);
10 else if (b)
11 return b;
12
13 return a;
14}
15EXPORT_SYMBOL_GPL(lcm);
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 1a39f4e3ae1f..344c710d16ca 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add);
43 */ 43 */
44void list_del(struct list_head *entry) 44void list_del(struct list_head *entry)
45{ 45{
46 WARN(entry->next == LIST_POISON1,
47 "list_del corruption, next is LIST_POISON1 (%p)\n",
48 LIST_POISON1);
49 WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
50 "list_del corruption, prev is LIST_POISON2 (%p)\n",
51 LIST_POISON2);
46 WARN(entry->prev->next != entry, 52 WARN(entry->prev->next != entry,
47 "list_del corruption. prev->next should be %p, " 53 "list_del corruption. prev->next should be %p, "
48 "but was %p\n", entry, entry->prev->next); 54 "but was %p\n", entry, entry->prev->next);
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 19d11e0bb958..d7325c6b103f 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -4,99 +4,288 @@
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/list.h> 5#include <linux/list.h>
6 6
7#define MAX_LIST_LENGTH_BITS 20
8
9/*
10 * Returns a list organized in an intermediate format suited
11 * to chaining of merge() calls: null-terminated, no reserved or
12 * sentinel head node, "prev" links not maintained.
13 */
14static struct list_head *merge(void *priv,
15 int (*cmp)(void *priv, struct list_head *a,
16 struct list_head *b),
17 struct list_head *a, struct list_head *b)
18{
19 struct list_head head, *tail = &head;
20
21 while (a && b) {
22 /* if equal, take 'a' -- important for sort stability */
23 if ((*cmp)(priv, a, b) <= 0) {
24 tail->next = a;
25 a = a->next;
26 } else {
27 tail->next = b;
28 b = b->next;
29 }
30 tail = tail->next;
31 }
32 tail->next = a?:b;
33 return head.next;
34}
35
36/*
37 * Combine final list merge with restoration of standard doubly-linked
38 * list structure. This approach duplicates code from merge(), but
39 * runs faster than the tidier alternatives of either a separate final
40 * prev-link restoration pass, or maintaining the prev links
41 * throughout.
42 */
43static void merge_and_restore_back_links(void *priv,
44 int (*cmp)(void *priv, struct list_head *a,
45 struct list_head *b),
46 struct list_head *head,
47 struct list_head *a, struct list_head *b)
48{
49 struct list_head *tail = head;
50
51 while (a && b) {
52 /* if equal, take 'a' -- important for sort stability */
53 if ((*cmp)(priv, a, b) <= 0) {
54 tail->next = a;
55 a->prev = tail;
56 a = a->next;
57 } else {
58 tail->next = b;
59 b->prev = tail;
60 b = b->next;
61 }
62 tail = tail->next;
63 }
64 tail->next = a ? : b;
65
66 do {
67 /*
68 * In worst cases this loop may run many iterations.
69 * Continue callbacks to the client even though no
70 * element comparison is needed, so the client's cmp()
71 * routine can invoke cond_resched() periodically.
72 */
73 (*cmp)(priv, tail->next, tail->next);
74
75 tail->next->prev = tail;
76 tail = tail->next;
77 } while (tail->next);
78
79 tail->next = head;
80 head->prev = tail;
81}
82
7/** 83/**
8 * list_sort - sort a list. 84 * list_sort - sort a list
9 * @priv: private data, passed to @cmp 85 * @priv: private data, opaque to list_sort(), passed to @cmp
10 * @head: the list to sort 86 * @head: the list to sort
11 * @cmp: the elements comparison function 87 * @cmp: the elements comparison function
12 * 88 *
13 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It 89 * This function implements "merge sort", which has O(nlog(n))
14 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted 90 * complexity.
15 * in ascending order.
16 * 91 *
17 * The comparison function @cmp is supposed to return a negative value if @a is 92 * The comparison function @cmp must return a negative value if @a
18 * less than @b, and a positive value if @a is greater than @b. If @a and @b 93 * should sort before @b, and a positive value if @a should sort after
19 * are equivalent, then it does not matter what this function returns. 94 * @b. If @a and @b are equivalent, and their original relative
95 * ordering is to be preserved, @cmp must return 0.
20 */ 96 */
21void list_sort(void *priv, struct list_head *head, 97void list_sort(void *priv, struct list_head *head,
22 int (*cmp)(void *priv, struct list_head *a, 98 int (*cmp)(void *priv, struct list_head *a,
23 struct list_head *b)) 99 struct list_head *b))
24{ 100{
25 struct list_head *p, *q, *e, *list, *tail, *oldhead; 101 struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
26 int insize, nmerges, psize, qsize, i; 102 -- last slot is a sentinel */
103 int lev; /* index into part[] */
104 int max_lev = 0;
105 struct list_head *list;
27 106
28 if (list_empty(head)) 107 if (list_empty(head))
29 return; 108 return;
30 109
110 memset(part, 0, sizeof(part));
111
112 head->prev->next = NULL;
31 list = head->next; 113 list = head->next;
32 list_del(head);
33 insize = 1;
34 for (;;) {
35 p = oldhead = list;
36 list = tail = NULL;
37 nmerges = 0;
38
39 while (p) {
40 nmerges++;
41 q = p;
42 psize = 0;
43 for (i = 0; i < insize; i++) {
44 psize++;
45 q = q->next == oldhead ? NULL : q->next;
46 if (!q)
47 break;
48 }
49 114
50 qsize = insize; 115 while (list) {
51 while (psize > 0 || (qsize > 0 && q)) { 116 struct list_head *cur = list;
52 if (!psize) { 117 list = list->next;
53 e = q; 118 cur->next = NULL;
54 q = q->next; 119
55 qsize--; 120 for (lev = 0; part[lev]; lev++) {
56 if (q == oldhead) 121 cur = merge(priv, cmp, part[lev], cur);
57 q = NULL; 122 part[lev] = NULL;
58 } else if (!qsize || !q) { 123 }
59 e = p; 124 if (lev > max_lev) {
60 p = p->next; 125 if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
61 psize--; 126 printk_once(KERN_DEBUG "list passed to"
62 if (p == oldhead) 127 " list_sort() too long for"
63 p = NULL; 128 " efficiency\n");
64 } else if (cmp(priv, p, q) <= 0) { 129 lev--;
65 e = p;
66 p = p->next;
67 psize--;
68 if (p == oldhead)
69 p = NULL;
70 } else {
71 e = q;
72 q = q->next;
73 qsize--;
74 if (q == oldhead)
75 q = NULL;
76 }
77 if (tail)
78 tail->next = e;
79 else
80 list = e;
81 e->prev = tail;
82 tail = e;
83 } 130 }
84 p = q; 131 max_lev = lev;
85 } 132 }
133 part[lev] = cur;
134 }
135
136 for (lev = 0; lev < max_lev; lev++)
137 if (part[lev])
138 list = merge(priv, cmp, part[lev], list);
139
140 merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
141}
142EXPORT_SYMBOL(list_sort);
143
144#ifdef CONFIG_TEST_LIST_SORT
145
146#include <linux/random.h>
86 147
87 tail->next = list; 148/*
88 list->prev = tail; 149 * The pattern of set bits in the list length determines which cases
150 * are hit in list_sort().
151 */
152#define TEST_LIST_LEN (512+128+2) /* not including head */
153
154#define TEST_POISON1 0xDEADBEEF
155#define TEST_POISON2 0xA324354C
89 156
90 if (nmerges <= 1) 157struct debug_el {
91 break; 158 unsigned int poison1;
159 struct list_head list;
160 unsigned int poison2;
161 int value;
162 unsigned serial;
163};
92 164
93 insize *= 2; 165/* Array, containing pointers to all elements in the test list */
166static struct debug_el **elts __initdata;
167
168static int __init check(struct debug_el *ela, struct debug_el *elb)
169{
170 if (ela->serial >= TEST_LIST_LEN) {
171 printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
172 ela->serial);
173 return -EINVAL;
94 } 174 }
175 if (elb->serial >= TEST_LIST_LEN) {
176 printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
177 elb->serial);
178 return -EINVAL;
179 }
180 if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
181 printk(KERN_ERR "list_sort_test: error: phantom element\n");
182 return -EINVAL;
183 }
184 if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
185 printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
186 ela->poison1, ela->poison2);
187 return -EINVAL;
188 }
189 if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
190 printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
191 elb->poison1, elb->poison2);
192 return -EINVAL;
193 }
194 return 0;
195}
196
197static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
198{
199 struct debug_el *ela, *elb;
200
201 ela = container_of(a, struct debug_el, list);
202 elb = container_of(b, struct debug_el, list);
95 203
96 head->next = list; 204 check(ela, elb);
97 head->prev = list->prev; 205 return ela->value - elb->value;
98 list->prev->next = head;
99 list->prev = head;
100} 206}
101 207
102EXPORT_SYMBOL(list_sort); 208static int __init list_sort_test(void)
209{
210 int i, count = 1, err = -EINVAL;
211 struct debug_el *el;
212 struct list_head *cur, *tmp;
213 LIST_HEAD(head);
214
215 printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
216
217 elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
218 if (!elts) {
219 printk(KERN_ERR "list_sort_test: error: cannot allocate "
220 "memory\n");
221 goto exit;
222 }
223
224 for (i = 0; i < TEST_LIST_LEN; i++) {
225 el = kmalloc(sizeof(*el), GFP_KERNEL);
226 if (!el) {
227 printk(KERN_ERR "list_sort_test: error: cannot "
228 "allocate memory\n");
229 goto exit;
230 }
231 /* force some equivalencies */
232 el->value = random32() % (TEST_LIST_LEN/3);
233 el->serial = i;
234 el->poison1 = TEST_POISON1;
235 el->poison2 = TEST_POISON2;
236 elts[i] = el;
237 list_add_tail(&el->list, &head);
238 }
239
240 list_sort(NULL, &head, cmp);
241
242 for (cur = head.next; cur->next != &head; cur = cur->next) {
243 struct debug_el *el1;
244 int cmp_result;
245
246 if (cur->next->prev != cur) {
247 printk(KERN_ERR "list_sort_test: error: list is "
248 "corrupted\n");
249 goto exit;
250 }
251
252 cmp_result = cmp(NULL, cur, cur->next);
253 if (cmp_result > 0) {
254 printk(KERN_ERR "list_sort_test: error: list is not "
255 "sorted\n");
256 goto exit;
257 }
258
259 el = container_of(cur, struct debug_el, list);
260 el1 = container_of(cur->next, struct debug_el, list);
261 if (cmp_result == 0 && el->serial >= el1->serial) {
262 printk(KERN_ERR "list_sort_test: error: order of "
263 "equivalent elements not preserved\n");
264 goto exit;
265 }
266
267 if (check(el, el1)) {
268 printk(KERN_ERR "list_sort_test: error: element check "
269 "failed\n");
270 goto exit;
271 }
272 count++;
273 }
274
275 if (count != TEST_LIST_LEN) {
276 printk(KERN_ERR "list_sort_test: error: bad list length %d",
277 count);
278 goto exit;
279 }
280
281 err = 0;
282exit:
283 kfree(elts);
284 list_for_each_safe(cur, tmp, &head) {
285 list_del(cur);
286 kfree(container_of(cur, struct debug_el, list));
287 }
288 return err;
289}
290module_init(list_sort_test);
291#endif /* CONFIG_TEST_LIST_SORT */
diff --git a/lib/lmb.c b/lib/lmb.c
deleted file mode 100644
index 9cee17142b2c..000000000000
--- a/lib/lmb.c
+++ /dev/null
@@ -1,532 +0,0 @@
1/*
2 * Procedures for maintaining information about logical memory blocks.
3 *
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/bitops.h>
16#include <linux/lmb.h>
17
18#define LMB_ALLOC_ANYWHERE 0
19
20struct lmb lmb;
21
22static int lmb_debug;
23
24static int __init early_lmb(char *p)
25{
26 if (p && strstr(p, "debug"))
27 lmb_debug = 1;
28 return 0;
29}
30early_param("lmb", early_lmb);
31
32static void lmb_dump(struct lmb_region *region, char *name)
33{
34 unsigned long long base, size;
35 int i;
36
37 pr_info(" %s.cnt = 0x%lx\n", name, region->cnt);
38
39 for (i = 0; i < region->cnt; i++) {
40 base = region->region[i].base;
41 size = region->region[i].size;
42
43 pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
44 name, i, base, base + size - 1, size);
45 }
46}
47
48void lmb_dump_all(void)
49{
50 if (!lmb_debug)
51 return;
52
53 pr_info("LMB configuration:\n");
54 pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size);
55 pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
56
57 lmb_dump(&lmb.memory, "memory");
58 lmb_dump(&lmb.reserved, "reserved");
59}
60
61static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
62 u64 size2)
63{
64 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
65}
66
67static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
68{
69 if (base2 == base1 + size1)
70 return 1;
71 else if (base1 == base2 + size2)
72 return -1;
73
74 return 0;
75}
76
77static long lmb_regions_adjacent(struct lmb_region *rgn,
78 unsigned long r1, unsigned long r2)
79{
80 u64 base1 = rgn->region[r1].base;
81 u64 size1 = rgn->region[r1].size;
82 u64 base2 = rgn->region[r2].base;
83 u64 size2 = rgn->region[r2].size;
84
85 return lmb_addrs_adjacent(base1, size1, base2, size2);
86}
87
88static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
89{
90 unsigned long i;
91
92 for (i = r; i < rgn->cnt - 1; i++) {
93 rgn->region[i].base = rgn->region[i + 1].base;
94 rgn->region[i].size = rgn->region[i + 1].size;
95 }
96 rgn->cnt--;
97}
98
99/* Assumption: base addr of region 1 < base addr of region 2 */
100static void lmb_coalesce_regions(struct lmb_region *rgn,
101 unsigned long r1, unsigned long r2)
102{
103 rgn->region[r1].size += rgn->region[r2].size;
104 lmb_remove_region(rgn, r2);
105}
106
107void __init lmb_init(void)
108{
109 /* Create a dummy zero size LMB which will get coalesced away later.
110 * This simplifies the lmb_add() code below...
111 */
112 lmb.memory.region[0].base = 0;
113 lmb.memory.region[0].size = 0;
114 lmb.memory.cnt = 1;
115
116 /* Ditto. */
117 lmb.reserved.region[0].base = 0;
118 lmb.reserved.region[0].size = 0;
119 lmb.reserved.cnt = 1;
120}
121
122void __init lmb_analyze(void)
123{
124 int i;
125
126 lmb.memory.size = 0;
127
128 for (i = 0; i < lmb.memory.cnt; i++)
129 lmb.memory.size += lmb.memory.region[i].size;
130}
131
132static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
133{
134 unsigned long coalesced = 0;
135 long adjacent, i;
136
137 if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
138 rgn->region[0].base = base;
139 rgn->region[0].size = size;
140 return 0;
141 }
142
143 /* First try and coalesce this LMB with another. */
144 for (i = 0; i < rgn->cnt; i++) {
145 u64 rgnbase = rgn->region[i].base;
146 u64 rgnsize = rgn->region[i].size;
147
148 if ((rgnbase == base) && (rgnsize == size))
149 /* Already have this region, so we're done */
150 return 0;
151
152 adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize);
153 if (adjacent > 0) {
154 rgn->region[i].base -= size;
155 rgn->region[i].size += size;
156 coalesced++;
157 break;
158 } else if (adjacent < 0) {
159 rgn->region[i].size += size;
160 coalesced++;
161 break;
162 }
163 }
164
165 if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) {
166 lmb_coalesce_regions(rgn, i, i+1);
167 coalesced++;
168 }
169
170 if (coalesced)
171 return coalesced;
172 if (rgn->cnt >= MAX_LMB_REGIONS)
173 return -1;
174
175 /* Couldn't coalesce the LMB, so add it to the sorted table. */
176 for (i = rgn->cnt - 1; i >= 0; i--) {
177 if (base < rgn->region[i].base) {
178 rgn->region[i+1].base = rgn->region[i].base;
179 rgn->region[i+1].size = rgn->region[i].size;
180 } else {
181 rgn->region[i+1].base = base;
182 rgn->region[i+1].size = size;
183 break;
184 }
185 }
186
187 if (base < rgn->region[0].base) {
188 rgn->region[0].base = base;
189 rgn->region[0].size = size;
190 }
191 rgn->cnt++;
192
193 return 0;
194}
195
196long lmb_add(u64 base, u64 size)
197{
198 struct lmb_region *_rgn = &lmb.memory;
199
200 /* On pSeries LPAR systems, the first LMB is our RMO region. */
201 if (base == 0)
202 lmb.rmo_size = size;
203
204 return lmb_add_region(_rgn, base, size);
205
206}
207
208long lmb_remove(u64 base, u64 size)
209{
210 struct lmb_region *rgn = &(lmb.memory);
211 u64 rgnbegin, rgnend;
212 u64 end = base + size;
213 int i;
214
215 rgnbegin = rgnend = 0; /* supress gcc warnings */
216
217 /* Find the region where (base, size) belongs to */
218 for (i=0; i < rgn->cnt; i++) {
219 rgnbegin = rgn->region[i].base;
220 rgnend = rgnbegin + rgn->region[i].size;
221
222 if ((rgnbegin <= base) && (end <= rgnend))
223 break;
224 }
225
226 /* Didn't find the region */
227 if (i == rgn->cnt)
228 return -1;
229
230 /* Check to see if we are removing entire region */
231 if ((rgnbegin == base) && (rgnend == end)) {
232 lmb_remove_region(rgn, i);
233 return 0;
234 }
235
236 /* Check to see if region is matching at the front */
237 if (rgnbegin == base) {
238 rgn->region[i].base = end;
239 rgn->region[i].size -= size;
240 return 0;
241 }
242
243 /* Check to see if the region is matching at the end */
244 if (rgnend == end) {
245 rgn->region[i].size -= size;
246 return 0;
247 }
248
249 /*
250 * We need to split the entry - adjust the current one to the
251 * beginging of the hole and add the region after hole.
252 */
253 rgn->region[i].size = base - rgn->region[i].base;
254 return lmb_add_region(rgn, end, rgnend - end);
255}
256
257long __init lmb_reserve(u64 base, u64 size)
258{
259 struct lmb_region *_rgn = &lmb.reserved;
260
261 BUG_ON(0 == size);
262
263 return lmb_add_region(_rgn, base, size);
264}
265
266long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
267{
268 unsigned long i;
269
270 for (i = 0; i < rgn->cnt; i++) {
271 u64 rgnbase = rgn->region[i].base;
272 u64 rgnsize = rgn->region[i].size;
273 if (lmb_addrs_overlap(base, size, rgnbase, rgnsize))
274 break;
275 }
276
277 return (i < rgn->cnt) ? i : -1;
278}
279
280static u64 lmb_align_down(u64 addr, u64 size)
281{
282 return addr & ~(size - 1);
283}
284
285static u64 lmb_align_up(u64 addr, u64 size)
286{
287 return (addr + (size - 1)) & ~(size - 1);
288}
289
290static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end,
291 u64 size, u64 align)
292{
293 u64 base, res_base;
294 long j;
295
296 base = lmb_align_down((end - size), align);
297 while (start <= base) {
298 j = lmb_overlaps_region(&lmb.reserved, base, size);
299 if (j < 0) {
300 /* this area isn't reserved, take it */
301 if (lmb_add_region(&lmb.reserved, base, size) < 0)
302 base = ~(u64)0;
303 return base;
304 }
305 res_base = lmb.reserved.region[j].base;
306 if (res_base < size)
307 break;
308 base = lmb_align_down(res_base - size, align);
309 }
310
311 return ~(u64)0;
312}
313
314static u64 __init lmb_alloc_nid_region(struct lmb_property *mp,
315 u64 (*nid_range)(u64, u64, int *),
316 u64 size, u64 align, int nid)
317{
318 u64 start, end;
319
320 start = mp->base;
321 end = start + mp->size;
322
323 start = lmb_align_up(start, align);
324 while (start < end) {
325 u64 this_end;
326 int this_nid;
327
328 this_end = nid_range(start, end, &this_nid);
329 if (this_nid == nid) {
330 u64 ret = lmb_alloc_nid_unreserved(start, this_end,
331 size, align);
332 if (ret != ~(u64)0)
333 return ret;
334 }
335 start = this_end;
336 }
337
338 return ~(u64)0;
339}
340
341u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
342 u64 (*nid_range)(u64 start, u64 end, int *nid))
343{
344 struct lmb_region *mem = &lmb.memory;
345 int i;
346
347 BUG_ON(0 == size);
348
349 size = lmb_align_up(size, align);
350
351 for (i = 0; i < mem->cnt; i++) {
352 u64 ret = lmb_alloc_nid_region(&mem->region[i],
353 nid_range,
354 size, align, nid);
355 if (ret != ~(u64)0)
356 return ret;
357 }
358
359 return lmb_alloc(size, align);
360}
361
362u64 __init lmb_alloc(u64 size, u64 align)
363{
364 return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
365}
366
367u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr)
368{
369 u64 alloc;
370
371 alloc = __lmb_alloc_base(size, align, max_addr);
372
373 if (alloc == 0)
374 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
375 (unsigned long long) size, (unsigned long long) max_addr);
376
377 return alloc;
378}
379
380u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
381{
382 long i, j;
383 u64 base = 0;
384 u64 res_base;
385
386 BUG_ON(0 == size);
387
388 size = lmb_align_up(size, align);
389
390 /* On some platforms, make sure we allocate lowmem */
391 /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */
392 if (max_addr == LMB_ALLOC_ANYWHERE)
393 max_addr = LMB_REAL_LIMIT;
394
395 for (i = lmb.memory.cnt - 1; i >= 0; i--) {
396 u64 lmbbase = lmb.memory.region[i].base;
397 u64 lmbsize = lmb.memory.region[i].size;
398
399 if (lmbsize < size)
400 continue;
401 if (max_addr == LMB_ALLOC_ANYWHERE)
402 base = lmb_align_down(lmbbase + lmbsize - size, align);
403 else if (lmbbase < max_addr) {
404 base = min(lmbbase + lmbsize, max_addr);
405 base = lmb_align_down(base - size, align);
406 } else
407 continue;
408
409 while (base && lmbbase <= base) {
410 j = lmb_overlaps_region(&lmb.reserved, base, size);
411 if (j < 0) {
412 /* this area isn't reserved, take it */
413 if (lmb_add_region(&lmb.reserved, base, size) < 0)
414 return 0;
415 return base;
416 }
417 res_base = lmb.reserved.region[j].base;
418 if (res_base < size)
419 break;
420 base = lmb_align_down(res_base - size, align);
421 }
422 }
423 return 0;
424}
425
426/* You must call lmb_analyze() before this. */
427u64 __init lmb_phys_mem_size(void)
428{
429 return lmb.memory.size;
430}
431
432u64 lmb_end_of_DRAM(void)
433{
434 int idx = lmb.memory.cnt - 1;
435
436 return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
437}
438
439/* You must call lmb_analyze() after this. */
440void __init lmb_enforce_memory_limit(u64 memory_limit)
441{
442 unsigned long i;
443 u64 limit;
444 struct lmb_property *p;
445
446 if (!memory_limit)
447 return;
448
449 /* Truncate the lmb regions to satisfy the memory limit. */
450 limit = memory_limit;
451 for (i = 0; i < lmb.memory.cnt; i++) {
452 if (limit > lmb.memory.region[i].size) {
453 limit -= lmb.memory.region[i].size;
454 continue;
455 }
456
457 lmb.memory.region[i].size = limit;
458 lmb.memory.cnt = i + 1;
459 break;
460 }
461
462 if (lmb.memory.region[0].size < lmb.rmo_size)
463 lmb.rmo_size = lmb.memory.region[0].size;
464
465 memory_limit = lmb_end_of_DRAM();
466
467 /* And truncate any reserves above the limit also. */
468 for (i = 0; i < lmb.reserved.cnt; i++) {
469 p = &lmb.reserved.region[i];
470
471 if (p->base > memory_limit)
472 p->size = 0;
473 else if ((p->base + p->size) > memory_limit)
474 p->size = memory_limit - p->base;
475
476 if (p->size == 0) {
477 lmb_remove_region(&lmb.reserved, i);
478 i--;
479 }
480 }
481}
482
483int __init lmb_is_reserved(u64 addr)
484{
485 int i;
486
487 for (i = 0; i < lmb.reserved.cnt; i++) {
488 u64 upper = lmb.reserved.region[i].base +
489 lmb.reserved.region[i].size - 1;
490 if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
491 return 1;
492 }
493 return 0;
494}
495
496int lmb_is_region_reserved(u64 base, u64 size)
497{
498 return lmb_overlaps_region(&lmb.reserved, base, size);
499}
500
501/*
502 * Given a <base, len>, find which memory regions belong to this range.
503 * Adjust the request and return a contiguous chunk.
504 */
505int lmb_find(struct lmb_property *res)
506{
507 int i;
508 u64 rstart, rend;
509
510 rstart = res->base;
511 rend = rstart + res->size - 1;
512
513 for (i = 0; i < lmb.memory.cnt; i++) {
514 u64 start = lmb.memory.region[i].base;
515 u64 end = start + lmb.memory.region[i].size - 1;
516
517 if (start > rend)
518 return -1;
519
520 if ((end >= rstart) && (start < rend)) {
521 /* adjust the request */
522 if (rstart < start)
523 rstart = start;
524 if (rend > end)
525 rend = end;
526 res->base = rstart;
527 res->size = rend - rstart + 1;
528 return 0;
529 }
530 }
531 return -1;
532}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index c4706eb98d3d..00e8a02681a6 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -15,7 +15,7 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <net/netlink.h> 16#include <net/netlink.h>
17 17
18static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { 18static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = {
19 [NLA_U8] = sizeof(u8), 19 [NLA_U8] = sizeof(u8),
20 [NLA_U16] = sizeof(u16), 20 [NLA_U16] = sizeof(u16),
21 [NLA_U32] = sizeof(u32), 21 [NLA_U32] = sizeof(u32),
@@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
23 [NLA_NESTED] = NLA_HDRLEN, 23 [NLA_NESTED] = NLA_HDRLEN,
24}; 24};
25 25
26static int validate_nla(struct nlattr *nla, int maxtype, 26static int validate_nla(const struct nlattr *nla, int maxtype,
27 const struct nla_policy *policy) 27 const struct nla_policy *policy)
28{ 28{
29 const struct nla_policy *pt; 29 const struct nla_policy *pt;
@@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype,
115 * 115 *
116 * Returns 0 on success or a negative error code. 116 * Returns 0 on success or a negative error code.
117 */ 117 */
118int nla_validate(struct nlattr *head, int len, int maxtype, 118int nla_validate(const struct nlattr *head, int len, int maxtype,
119 const struct nla_policy *policy) 119 const struct nla_policy *policy)
120{ 120{
121 struct nlattr *nla; 121 const struct nlattr *nla;
122 int rem, err; 122 int rem, err;
123 123
124 nla_for_each_attr(nla, head, len, rem) { 124 nla_for_each_attr(nla, head, len, rem) {
@@ -173,10 +173,10 @@ nla_policy_len(const struct nla_policy *p, int n)
173 * 173 *
174 * Returns 0 on success or a negative error code. 174 * Returns 0 on success or a negative error code.
175 */ 175 */
176int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, 176int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
177 const struct nla_policy *policy) 177 int len, const struct nla_policy *policy)
178{ 178{
179 struct nlattr *nla; 179 const struct nlattr *nla;
180 int rem, err; 180 int rem, err;
181 181
182 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 182 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
@@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
191 goto errout; 191 goto errout;
192 } 192 }
193 193
194 tb[type] = nla; 194 tb[type] = (struct nlattr *)nla;
195 } 195 }
196 } 196 }
197 197
@@ -212,14 +212,14 @@ errout:
212 * 212 *
213 * Returns the first attribute in the stream matching the specified type. 213 * Returns the first attribute in the stream matching the specified type.
214 */ 214 */
215struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) 215struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
216{ 216{
217 struct nlattr *nla; 217 const struct nlattr *nla;
218 int rem; 218 int rem;
219 219
220 nla_for_each_attr(nla, head, len, rem) 220 nla_for_each_attr(nla, head, len, rem)
221 if (nla_type(nla) == attrtype) 221 if (nla_type(nla) == attrtype)
222 return nla; 222 return (struct nlattr *)nla;
223 223
224 return NULL; 224 return NULL;
225} 225}
diff --git a/lib/parser.c b/lib/parser.c
index fb34977246bb..6e89eca5cca0 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -128,12 +128,13 @@ static int match_number(substring_t *s, int *result, int base)
128 char *endp; 128 char *endp;
129 char *buf; 129 char *buf;
130 int ret; 130 int ret;
131 size_t len = s->to - s->from;
131 132
132 buf = kmalloc(s->to - s->from + 1, GFP_KERNEL); 133 buf = kmalloc(len + 1, GFP_KERNEL);
133 if (!buf) 134 if (!buf)
134 return -ENOMEM; 135 return -ENOMEM;
135 memcpy(buf, s->from, s->to - s->from); 136 memcpy(buf, s->from, len);
136 buf[s->to - s->from] = '\0'; 137 buf[len] = '\0';
137 *result = simple_strtol(buf, &endp, base); 138 *result = simple_strtol(buf, &endp, base);
138 ret = 0; 139 ret = 0;
139 if (endp == buf) 140 if (endp == buf)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index aeaa6d734447..28f2c33c6b53 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -8,10 +8,53 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/cpu.h> 9#include <linux/cpu.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/debugobjects.h>
11 12
12static LIST_HEAD(percpu_counters); 13static LIST_HEAD(percpu_counters);
13static DEFINE_MUTEX(percpu_counters_lock); 14static DEFINE_MUTEX(percpu_counters_lock);
14 15
16#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
17
18static struct debug_obj_descr percpu_counter_debug_descr;
19
20static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
21{
22 struct percpu_counter *fbc = addr;
23
24 switch (state) {
25 case ODEBUG_STATE_ACTIVE:
26 percpu_counter_destroy(fbc);
27 debug_object_free(fbc, &percpu_counter_debug_descr);
28 return 1;
29 default:
30 return 0;
31 }
32}
33
34static struct debug_obj_descr percpu_counter_debug_descr = {
35 .name = "percpu_counter",
36 .fixup_free = percpu_counter_fixup_free,
37};
38
39static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
40{
41 debug_object_init(fbc, &percpu_counter_debug_descr);
42 debug_object_activate(fbc, &percpu_counter_debug_descr);
43}
44
45static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
46{
47 debug_object_deactivate(fbc, &percpu_counter_debug_descr);
48 debug_object_free(fbc, &percpu_counter_debug_descr);
49}
50
51#else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
52static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
53{ }
54static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
55{ }
56#endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
57
15void percpu_counter_set(struct percpu_counter *fbc, s64 amount) 58void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
16{ 59{
17 int cpu; 60 int cpu;
@@ -29,20 +72,18 @@ EXPORT_SYMBOL(percpu_counter_set);
29void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) 72void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
30{ 73{
31 s64 count; 74 s64 count;
32 s32 *pcount;
33 int cpu = get_cpu();
34 75
35 pcount = per_cpu_ptr(fbc->counters, cpu); 76 preempt_disable();
36 count = *pcount + amount; 77 count = __this_cpu_read(*fbc->counters) + amount;
37 if (count >= batch || count <= -batch) { 78 if (count >= batch || count <= -batch) {
38 spin_lock(&fbc->lock); 79 spin_lock(&fbc->lock);
39 fbc->count += count; 80 fbc->count += count;
40 *pcount = 0; 81 __this_cpu_write(*fbc->counters, 0);
41 spin_unlock(&fbc->lock); 82 spin_unlock(&fbc->lock);
42 } else { 83 } else {
43 *pcount = count; 84 __this_cpu_write(*fbc->counters, count);
44 } 85 }
45 put_cpu(); 86 preempt_enable();
46} 87}
47EXPORT_SYMBOL(__percpu_counter_add); 88EXPORT_SYMBOL(__percpu_counter_add);
48 89
@@ -75,7 +116,11 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
75 fbc->counters = alloc_percpu(s32); 116 fbc->counters = alloc_percpu(s32);
76 if (!fbc->counters) 117 if (!fbc->counters)
77 return -ENOMEM; 118 return -ENOMEM;
119
120 debug_percpu_counter_activate(fbc);
121
78#ifdef CONFIG_HOTPLUG_CPU 122#ifdef CONFIG_HOTPLUG_CPU
123 INIT_LIST_HEAD(&fbc->list);
79 mutex_lock(&percpu_counters_lock); 124 mutex_lock(&percpu_counters_lock);
80 list_add(&fbc->list, &percpu_counters); 125 list_add(&fbc->list, &percpu_counters);
81 mutex_unlock(&percpu_counters_lock); 126 mutex_unlock(&percpu_counters_lock);
@@ -89,6 +134,8 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
89 if (!fbc->counters) 134 if (!fbc->counters)
90 return; 135 return;
91 136
137 debug_percpu_counter_deactivate(fbc);
138
92#ifdef CONFIG_HOTPLUG_CPU 139#ifdef CONFIG_HOTPLUG_CPU
93 mutex_lock(&percpu_counters_lock); 140 mutex_lock(&percpu_counters_lock);
94 list_del(&fbc->list); 141 list_del(&fbc->list);
@@ -137,6 +184,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
137 return NOTIFY_OK; 184 return NOTIFY_OK;
138} 185}
139 186
187/*
188 * Compare counter against given value.
189 * Return 1 if greater, 0 if equal and -1 if less
190 */
191int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
192{
193 s64 count;
194
195 count = percpu_counter_read(fbc);
196 /* Check to see if rough count will be sufficient for comparison */
197 if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
198 if (count > rhs)
199 return 1;
200 else
201 return -1;
202 }
203 /* Need to use precise count */
204 count = percpu_counter_sum(fbc);
205 if (count > rhs)
206 return 1;
207 else if (count < rhs)
208 return -1;
209 else
210 return 0;
211}
212EXPORT_SYMBOL(percpu_counter_compare);
213
140static int __init percpu_counter_startup(void) 214static int __init percpu_counter_startup(void)
141{ 215{
142 compute_batch_value(); 216 compute_batch_value();
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 92cdd9936e3d..5086bb962b4d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -28,7 +28,6 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/notifier.h> 29#include <linux/notifier.h>
30#include <linux/cpu.h> 30#include <linux/cpu.h>
31#include <linux/gfp.h>
32#include <linux/string.h> 31#include <linux/string.h>
33#include <linux/bitops.h> 32#include <linux/bitops.h>
34#include <linux/rcupdate.h> 33#include <linux/rcupdate.h>
@@ -50,7 +49,7 @@ struct radix_tree_node {
50 unsigned int height; /* Height from the bottom */ 49 unsigned int height; /* Height from the bottom */
51 unsigned int count; 50 unsigned int count;
52 struct rcu_head rcu_head; 51 struct rcu_head rcu_head;
53 void *slots[RADIX_TREE_MAP_SIZE]; 52 void __rcu *slots[RADIX_TREE_MAP_SIZE];
54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
55}; 54};
56 55
@@ -83,6 +82,16 @@ struct radix_tree_preload {
83}; 82};
84static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 83static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
85 84
85static inline void *ptr_to_indirect(void *ptr)
86{
87 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
88}
89
90static inline void *indirect_to_ptr(void *ptr)
91{
92 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
93}
94
86static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 95static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
87{ 96{
88 return root->gfp_mask & __GFP_BITS_MASK; 97 return root->gfp_mask & __GFP_BITS_MASK;
@@ -175,14 +184,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
175{ 184{
176 struct radix_tree_node *node = 185 struct radix_tree_node *node =
177 container_of(head, struct radix_tree_node, rcu_head); 186 container_of(head, struct radix_tree_node, rcu_head);
187 int i;
178 188
179 /* 189 /*
180 * must only free zeroed nodes into the slab. radix_tree_shrink 190 * must only free zeroed nodes into the slab. radix_tree_shrink
181 * can leave us with a non-NULL entry in the first slot, so clear 191 * can leave us with a non-NULL entry in the first slot, so clear
182 * that here to make sure. 192 * that here to make sure.
183 */ 193 */
184 tag_clear(node, 0, 0); 194 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
185 tag_clear(node, 1, 0); 195 tag_clear(node, i, 0);
196
186 node->slots[0] = NULL; 197 node->slots[0] = NULL;
187 node->count = 0; 198 node->count = 0;
188 199
@@ -264,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
264 return -ENOMEM; 275 return -ENOMEM;
265 276
266 /* Increase the height. */ 277 /* Increase the height. */
267 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 278 node->slots[0] = indirect_to_ptr(root->rnode);
268 279
269 /* Propagate the aggregated tag info into the new root */ 280 /* Propagate the aggregated tag info into the new root */
270 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 281 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -275,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
275 newheight = root->height+1; 286 newheight = root->height+1;
276 node->height = newheight; 287 node->height = newheight;
277 node->count = 1; 288 node->count = 1;
278 node = radix_tree_ptr_to_indirect(node); 289 node = ptr_to_indirect(node);
279 rcu_assign_pointer(root->rnode, node); 290 rcu_assign_pointer(root->rnode, node);
280 root->height = newheight; 291 root->height = newheight;
281 } while (height > root->height); 292 } while (height > root->height);
@@ -308,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root,
308 return error; 319 return error;
309 } 320 }
310 321
311 slot = radix_tree_indirect_to_ptr(root->rnode); 322 slot = indirect_to_ptr(root->rnode);
312 323
313 height = root->height; 324 height = root->height;
314 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 325 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
@@ -324,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
324 rcu_assign_pointer(node->slots[offset], slot); 335 rcu_assign_pointer(node->slots[offset], slot);
325 node->count++; 336 node->count++;
326 } else 337 } else
327 rcu_assign_pointer(root->rnode, 338 rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
328 radix_tree_ptr_to_indirect(slot));
329 } 339 }
330 340
331 /* Go a level down */ 341 /* Go a level down */
@@ -364,7 +374,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
364 unsigned int height, shift; 374 unsigned int height, shift;
365 struct radix_tree_node *node, **slot; 375 struct radix_tree_node *node, **slot;
366 376
367 node = rcu_dereference(root->rnode); 377 node = rcu_dereference_raw(root->rnode);
368 if (node == NULL) 378 if (node == NULL)
369 return NULL; 379 return NULL;
370 380
@@ -373,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
373 return NULL; 383 return NULL;
374 return is_slot ? (void *)&root->rnode : node; 384 return is_slot ? (void *)&root->rnode : node;
375 } 385 }
376 node = radix_tree_indirect_to_ptr(node); 386 node = indirect_to_ptr(node);
377 387
378 height = node->height; 388 height = node->height;
379 if (index > radix_tree_maxindex(height)) 389 if (index > radix_tree_maxindex(height))
@@ -384,7 +394,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
384 do { 394 do {
385 slot = (struct radix_tree_node **) 395 slot = (struct radix_tree_node **)
386 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 396 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
387 node = rcu_dereference(*slot); 397 node = rcu_dereference_raw(*slot);
388 if (node == NULL) 398 if (node == NULL)
389 return NULL; 399 return NULL;
390 400
@@ -392,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
392 height--; 402 height--;
393 } while (height > 0); 403 } while (height > 0);
394 404
395 return is_slot ? (void *)slot:node; 405 return is_slot ? (void *)slot : indirect_to_ptr(node);
396} 406}
397 407
398/** 408/**
@@ -454,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
454 height = root->height; 464 height = root->height;
455 BUG_ON(index > radix_tree_maxindex(height)); 465 BUG_ON(index > radix_tree_maxindex(height));
456 466
457 slot = radix_tree_indirect_to_ptr(root->rnode); 467 slot = indirect_to_ptr(root->rnode);
458 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 468 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
459 469
460 while (height > 0) { 470 while (height > 0) {
@@ -508,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
508 518
509 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 519 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
510 pathp->node = NULL; 520 pathp->node = NULL;
511 slot = radix_tree_indirect_to_ptr(root->rnode); 521 slot = indirect_to_ptr(root->rnode);
512 522
513 while (height > 0) { 523 while (height > 0) {
514 int offset; 524 int offset;
@@ -556,6 +566,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
556 * 566 *
557 * 0: tag not present or not set 567 * 0: tag not present or not set
558 * 1: tag set 568 * 1: tag set
569 *
570 * Note that the return value of this function may not be relied on, even if
571 * the RCU lock is held, unless tag modification and node deletion are excluded
572 * from concurrency.
559 */ 573 */
560int radix_tree_tag_get(struct radix_tree_root *root, 574int radix_tree_tag_get(struct radix_tree_root *root,
561 unsigned long index, unsigned int tag) 575 unsigned long index, unsigned int tag)
@@ -568,13 +582,13 @@ int radix_tree_tag_get(struct radix_tree_root *root,
568 if (!root_tag_get(root, tag)) 582 if (!root_tag_get(root, tag))
569 return 0; 583 return 0;
570 584
571 node = rcu_dereference(root->rnode); 585 node = rcu_dereference_raw(root->rnode);
572 if (node == NULL) 586 if (node == NULL)
573 return 0; 587 return 0;
574 588
575 if (!radix_tree_is_indirect_ptr(node)) 589 if (!radix_tree_is_indirect_ptr(node))
576 return (index == 0); 590 return (index == 0);
577 node = radix_tree_indirect_to_ptr(node); 591 node = indirect_to_ptr(node);
578 592
579 height = node->height; 593 height = node->height;
580 if (index > radix_tree_maxindex(height)) 594 if (index > radix_tree_maxindex(height))
@@ -596,13 +610,9 @@ int radix_tree_tag_get(struct radix_tree_root *root,
596 */ 610 */
597 if (!tag_get(node, tag, offset)) 611 if (!tag_get(node, tag, offset))
598 saw_unset_tag = 1; 612 saw_unset_tag = 1;
599 if (height == 1) { 613 if (height == 1)
600 int ret = tag_get(node, tag, offset); 614 return !!tag_get(node, tag, offset);
601 615 node = rcu_dereference_raw(node->slots[offset]);
602 BUG_ON(ret && saw_unset_tag);
603 return !!ret;
604 }
605 node = rcu_dereference(node->slots[offset]);
606 shift -= RADIX_TREE_MAP_SHIFT; 616 shift -= RADIX_TREE_MAP_SHIFT;
607 height--; 617 height--;
608 } 618 }
@@ -610,6 +620,134 @@ int radix_tree_tag_get(struct radix_tree_root *root,
610EXPORT_SYMBOL(radix_tree_tag_get); 620EXPORT_SYMBOL(radix_tree_tag_get);
611 621
612/** 622/**
623 * radix_tree_range_tag_if_tagged - for each item in given range set given
624 * tag if item has another tag set
625 * @root: radix tree root
626 * @first_indexp: pointer to a starting index of a range to scan
627 * @last_index: last index of a range to scan
628 * @nr_to_tag: maximum number items to tag
629 * @iftag: tag index to test
630 * @settag: tag index to set if tested tag is set
631 *
632 * This function scans range of radix tree from first_index to last_index
633 * (inclusive). For each item in the range if iftag is set, the function sets
634 * also settag. The function stops either after tagging nr_to_tag items or
635 * after reaching last_index.
636 *
637 * The tags must be set from the leaf level only and propagated back up the
638 * path to the root. We must do this so that we resolve the full path before
639 * setting any tags on intermediate nodes. If we set tags as we descend, then
640 * we can get to the leaf node and find that the index that has the iftag
641 * set is outside the range we are scanning. This reults in dangling tags and
642 * can lead to problems with later tag operations (e.g. livelocks on lookups).
643 *
644 * The function returns number of leaves where the tag was set and sets
645 * *first_indexp to the first unscanned index.
646 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
647 * be prepared to handle that.
648 */
649unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
650 unsigned long *first_indexp, unsigned long last_index,
651 unsigned long nr_to_tag,
652 unsigned int iftag, unsigned int settag)
653{
654 unsigned int height = root->height;
655 struct radix_tree_path path[height];
656 struct radix_tree_path *pathp = path;
657 struct radix_tree_node *slot;
658 unsigned int shift;
659 unsigned long tagged = 0;
660 unsigned long index = *first_indexp;
661
662 last_index = min(last_index, radix_tree_maxindex(height));
663 if (index > last_index)
664 return 0;
665 if (!nr_to_tag)
666 return 0;
667 if (!root_tag_get(root, iftag)) {
668 *first_indexp = last_index + 1;
669 return 0;
670 }
671 if (height == 0) {
672 *first_indexp = last_index + 1;
673 root_tag_set(root, settag);
674 return 1;
675 }
676
677 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
678 slot = indirect_to_ptr(root->rnode);
679
680 /*
681 * we fill the path from (root->height - 2) to 0, leaving the index at
682 * (root->height - 1) as a terminator. Zero the node in the terminator
683 * so that we can use this to end walk loops back up the path.
684 */
685 path[height - 1].node = NULL;
686
687 for (;;) {
688 int offset;
689
690 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
691 if (!slot->slots[offset])
692 goto next;
693 if (!tag_get(slot, iftag, offset))
694 goto next;
695 if (height > 1) {
696 /* Go down one level */
697 height--;
698 shift -= RADIX_TREE_MAP_SHIFT;
699 path[height - 1].node = slot;
700 path[height - 1].offset = offset;
701 slot = slot->slots[offset];
702 continue;
703 }
704
705 /* tag the leaf */
706 tagged++;
707 tag_set(slot, settag, offset);
708
709 /* walk back up the path tagging interior nodes */
710 pathp = &path[0];
711 while (pathp->node) {
712 /* stop if we find a node with the tag already set */
713 if (tag_get(pathp->node, settag, pathp->offset))
714 break;
715 tag_set(pathp->node, settag, pathp->offset);
716 pathp++;
717 }
718
719next:
720 /* Go to next item at level determined by 'shift' */
721 index = ((index >> shift) + 1) << shift;
722 /* Overflow can happen when last_index is ~0UL... */
723 if (index > last_index || !index)
724 break;
725 if (tagged >= nr_to_tag)
726 break;
727 while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
728 /*
729 * We've fully scanned this node. Go up. Because
730 * last_index is guaranteed to be in the tree, what
731 * we do below cannot wander astray.
732 */
733 slot = path[height - 1].node;
734 height++;
735 shift += RADIX_TREE_MAP_SHIFT;
736 }
737 }
738 /*
739 * The iftag must have been set somewhere because otherwise
740 * we would return immediated at the beginning of the function
741 */
742 root_tag_set(root, settag);
743 *first_indexp = index;
744
745 return tagged;
746}
747EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
748
749
750/**
613 * radix_tree_next_hole - find the next hole (not-present entry) 751 * radix_tree_next_hole - find the next hole (not-present entry)
614 * @root: tree root 752 * @root: tree root
615 * @index: index key 753 * @index: index key
@@ -657,7 +795,7 @@ EXPORT_SYMBOL(radix_tree_next_hole);
657 * 795 *
658 * Returns: the index of the hole if found, otherwise returns an index 796 * Returns: the index of the hole if found, otherwise returns an index
659 * outside of the set specified (in which case 'index - return >= max_scan' 797 * outside of the set specified (in which case 'index - return >= max_scan'
660 * will be true). In rare cases of wrap-around, LONG_MAX will be returned. 798 * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
661 * 799 *
662 * radix_tree_next_hole may be called under rcu_read_lock. However, like 800 * radix_tree_next_hole may be called under rcu_read_lock. However, like
663 * radix_tree_gang_lookup, this will not atomically search a snapshot of 801 * radix_tree_gang_lookup, this will not atomically search a snapshot of
@@ -675,7 +813,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
675 if (!radix_tree_lookup(root, index)) 813 if (!radix_tree_lookup(root, index))
676 break; 814 break;
677 index--; 815 index--;
678 if (index == LONG_MAX) 816 if (index == ULONG_MAX)
679 break; 817 break;
680 } 818 }
681 819
@@ -711,7 +849,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
711 } 849 }
712 850
713 shift -= RADIX_TREE_MAP_SHIFT; 851 shift -= RADIX_TREE_MAP_SHIFT;
714 slot = rcu_dereference(slot->slots[i]); 852 slot = rcu_dereference_raw(slot->slots[i]);
715 if (slot == NULL) 853 if (slot == NULL)
716 goto out; 854 goto out;
717 } 855 }
@@ -758,7 +896,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
758 unsigned long cur_index = first_index; 896 unsigned long cur_index = first_index;
759 unsigned int ret; 897 unsigned int ret;
760 898
761 node = rcu_dereference(root->rnode); 899 node = rcu_dereference_raw(root->rnode);
762 if (!node) 900 if (!node)
763 return 0; 901 return 0;
764 902
@@ -768,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
768 results[0] = node; 906 results[0] = node;
769 return 1; 907 return 1;
770 } 908 }
771 node = radix_tree_indirect_to_ptr(node); 909 node = indirect_to_ptr(node);
772 910
773 max_index = radix_tree_maxindex(node->height); 911 max_index = radix_tree_maxindex(node->height);
774 912
@@ -787,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
787 slot = *(((void ***)results)[ret + i]); 925 slot = *(((void ***)results)[ret + i]);
788 if (!slot) 926 if (!slot)
789 continue; 927 continue;
790 results[ret + nr_found] = rcu_dereference(slot); 928 results[ret + nr_found] =
929 indirect_to_ptr(rcu_dereference_raw(slot));
791 nr_found++; 930 nr_found++;
792 } 931 }
793 ret += nr_found; 932 ret += nr_found;
@@ -826,7 +965,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
826 unsigned long cur_index = first_index; 965 unsigned long cur_index = first_index;
827 unsigned int ret; 966 unsigned int ret;
828 967
829 node = rcu_dereference(root->rnode); 968 node = rcu_dereference_raw(root->rnode);
830 if (!node) 969 if (!node)
831 return 0; 970 return 0;
832 971
@@ -836,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
836 results[0] = (void **)&root->rnode; 975 results[0] = (void **)&root->rnode;
837 return 1; 976 return 1;
838 } 977 }
839 node = radix_tree_indirect_to_ptr(node); 978 node = indirect_to_ptr(node);
840 979
841 max_index = radix_tree_maxindex(node->height); 980 max_index = radix_tree_maxindex(node->height);
842 981
@@ -915,7 +1054,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
915 } 1054 }
916 } 1055 }
917 shift -= RADIX_TREE_MAP_SHIFT; 1056 shift -= RADIX_TREE_MAP_SHIFT;
918 slot = rcu_dereference(slot->slots[i]); 1057 slot = rcu_dereference_raw(slot->slots[i]);
919 if (slot == NULL) 1058 if (slot == NULL)
920 break; 1059 break;
921 } 1060 }
@@ -951,7 +1090,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
951 if (!root_tag_get(root, tag)) 1090 if (!root_tag_get(root, tag))
952 return 0; 1091 return 0;
953 1092
954 node = rcu_dereference(root->rnode); 1093 node = rcu_dereference_raw(root->rnode);
955 if (!node) 1094 if (!node)
956 return 0; 1095 return 0;
957 1096
@@ -961,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
961 results[0] = node; 1100 results[0] = node;
962 return 1; 1101 return 1;
963 } 1102 }
964 node = radix_tree_indirect_to_ptr(node); 1103 node = indirect_to_ptr(node);
965 1104
966 max_index = radix_tree_maxindex(node->height); 1105 max_index = radix_tree_maxindex(node->height);
967 1106
@@ -980,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
980 slot = *(((void ***)results)[ret + i]); 1119 slot = *(((void ***)results)[ret + i]);
981 if (!slot) 1120 if (!slot)
982 continue; 1121 continue;
983 results[ret + nr_found] = rcu_dereference(slot); 1122 results[ret + nr_found] =
1123 indirect_to_ptr(rcu_dereference_raw(slot));
984 nr_found++; 1124 nr_found++;
985 } 1125 }
986 ret += nr_found; 1126 ret += nr_found;
@@ -1020,7 +1160,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1020 if (!root_tag_get(root, tag)) 1160 if (!root_tag_get(root, tag))
1021 return 0; 1161 return 0;
1022 1162
1023 node = rcu_dereference(root->rnode); 1163 node = rcu_dereference_raw(root->rnode);
1024 if (!node) 1164 if (!node)
1025 return 0; 1165 return 0;
1026 1166
@@ -1030,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1030 results[0] = (void **)&root->rnode; 1170 results[0] = (void **)&root->rnode;
1031 return 1; 1171 return 1;
1032 } 1172 }
1033 node = radix_tree_indirect_to_ptr(node); 1173 node = indirect_to_ptr(node);
1034 1174
1035 max_index = radix_tree_maxindex(node->height); 1175 max_index = radix_tree_maxindex(node->height);
1036 1176
@@ -1066,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1066 void *newptr; 1206 void *newptr;
1067 1207
1068 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1208 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1069 to_free = radix_tree_indirect_to_ptr(to_free); 1209 to_free = indirect_to_ptr(to_free);
1070 1210
1071 /* 1211 /*
1072 * The candidate node has more than one child, or its child 1212 * The candidate node has more than one child, or its child
@@ -1079,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1079 1219
1080 /* 1220 /*
1081 * We don't need rcu_assign_pointer(), since we are simply 1221 * We don't need rcu_assign_pointer(), since we are simply
1082 * moving the node from one part of the tree to another. If 1222 * moving the node from one part of the tree to another: if it
1083 * it was safe to dereference the old pointer to it 1223 * was safe to dereference the old pointer to it
1084 * (to_free->slots[0]), it will be safe to dereference the new 1224 * (to_free->slots[0]), it will be safe to dereference the new
1085 * one (root->rnode). 1225 * one (root->rnode) as far as dependent read barriers go.
1086 */ 1226 */
1087 newptr = to_free->slots[0]; 1227 newptr = to_free->slots[0];
1088 if (root->height > 1) 1228 if (root->height > 1)
1089 newptr = radix_tree_ptr_to_indirect(newptr); 1229 newptr = ptr_to_indirect(newptr);
1090 root->rnode = newptr; 1230 root->rnode = newptr;
1091 root->height--; 1231 root->height--;
1232
1233 /*
1234 * We have a dilemma here. The node's slot[0] must not be
1235 * NULLed in case there are concurrent lookups expecting to
1236 * find the item. However if this was a bottom-level node,
1237 * then it may be subject to the slot pointer being visible
1238 * to callers dereferencing it. If item corresponding to
1239 * slot[0] is subsequently deleted, these callers would expect
1240 * their slot to become empty sooner or later.
1241 *
1242 * For example, lockless pagecache will look up a slot, deref
1243 * the page pointer, and if the page is 0 refcount it means it
1244 * was concurrently deleted from pagecache so try the deref
1245 * again. Fortunately there is already a requirement for logic
1246 * to retry the entire slot lookup -- the indirect pointer
1247 * problem (replacing direct root node with an indirect pointer
1248 * also results in a stale slot). So tag the slot as indirect
1249 * to force callers to retry.
1250 */
1251 if (root->height == 0)
1252 *((unsigned long *)&to_free->slots[0]) |=
1253 RADIX_TREE_INDIRECT_PTR;
1254
1092 radix_tree_node_free(to_free); 1255 radix_tree_node_free(to_free);
1093 } 1256 }
1094} 1257}
@@ -1125,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1125 root->rnode = NULL; 1288 root->rnode = NULL;
1126 goto out; 1289 goto out;
1127 } 1290 }
1128 slot = radix_tree_indirect_to_ptr(slot); 1291 slot = indirect_to_ptr(slot);
1129 1292
1130 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1293 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1131 pathp->node = NULL; 1294 pathp->node = NULL;
@@ -1167,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1167 radix_tree_node_free(to_free); 1330 radix_tree_node_free(to_free);
1168 1331
1169 if (pathp->node->count) { 1332 if (pathp->node->count) {
1170 if (pathp->node == 1333 if (pathp->node == indirect_to_ptr(root->rnode))
1171 radix_tree_indirect_to_ptr(root->rnode))
1172 radix_tree_shrink(root); 1334 radix_tree_shrink(root);
1173 goto out; 1335 goto out;
1174 } 1336 }
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
new file mode 100644
index 000000000000..162becacf97c
--- /dev/null
+++ b/lib/raid6/.gitignore
@@ -0,0 +1,4 @@
1mktables
2altivec*.c
3int*.c
4tables.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
new file mode 100644
index 000000000000..8a38102770f3
--- /dev/null
+++ b/lib/raid6/Makefile
@@ -0,0 +1,75 @@
1obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
2
3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
4 int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
5 altivec8.o mmx.o sse1.o sse2.o
6hostprogs-y += mktables
7
8quiet_cmd_unroll = UNROLL $@
9 cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \
10 < $< > $@ || ( rm -f $@ && exit 1 )
11
12ifeq ($(CONFIG_ALTIVEC),y)
13altivec_flags := -maltivec -mabi=altivec
14endif
15
16targets += int1.c
17$(obj)/int1.c: UNROLL := 1
18$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE
19 $(call if_changed,unroll)
20
21targets += int2.c
22$(obj)/int2.c: UNROLL := 2
23$(obj)/int2.c: $(src)/int.uc $(src)/unroll.awk FORCE
24 $(call if_changed,unroll)
25
26targets += int4.c
27$(obj)/int4.c: UNROLL := 4
28$(obj)/int4.c: $(src)/int.uc $(src)/unroll.awk FORCE
29 $(call if_changed,unroll)
30
31targets += int8.c
32$(obj)/int8.c: UNROLL := 8
33$(obj)/int8.c: $(src)/int.uc $(src)/unroll.awk FORCE
34 $(call if_changed,unroll)
35
36targets += int16.c
37$(obj)/int16.c: UNROLL := 16
38$(obj)/int16.c: $(src)/int.uc $(src)/unroll.awk FORCE
39 $(call if_changed,unroll)
40
41targets += int32.c
42$(obj)/int32.c: UNROLL := 32
43$(obj)/int32.c: $(src)/int.uc $(src)/unroll.awk FORCE
44 $(call if_changed,unroll)
45
46CFLAGS_altivec1.o += $(altivec_flags)
47targets += altivec1.c
48$(obj)/altivec1.c: UNROLL := 1
49$(obj)/altivec1.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
50 $(call if_changed,unroll)
51
52CFLAGS_altivec2.o += $(altivec_flags)
53targets += altivec2.c
54$(obj)/altivec2.c: UNROLL := 2
55$(obj)/altivec2.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
56 $(call if_changed,unroll)
57
58CFLAGS_altivec4.o += $(altivec_flags)
59targets += altivec4.c
60$(obj)/altivec4.c: UNROLL := 4
61$(obj)/altivec4.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
62 $(call if_changed,unroll)
63
64CFLAGS_altivec8.o += $(altivec_flags)
65targets += altivec8.c
66$(obj)/altivec8.c: UNROLL := 8
67$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
68 $(call if_changed,unroll)
69
70quiet_cmd_mktable = TABLE $@
71 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
72
73targets += tables.c
74$(obj)/tables.c: $(obj)/mktables FORCE
75 $(call if_changed,mktable)
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
new file mode 100644
index 000000000000..b595f560bee7
--- /dev/null
+++ b/lib/raid6/algos.c
@@ -0,0 +1,154 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/algos.c
15 *
16 * Algorithm list and algorithm selection for RAID-6
17 */
18
19#include <linux/raid/pq.h>
20#ifndef __KERNEL__
21#include <sys/mman.h>
22#include <stdio.h>
23#else
24#include <linux/gfp.h>
25#if !RAID6_USE_EMPTY_ZERO_PAGE
26/* In .bss so it's zeroed */
27const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
28EXPORT_SYMBOL(raid6_empty_zero_page);
29#endif
30#endif
31
32struct raid6_calls raid6_call;
33EXPORT_SYMBOL_GPL(raid6_call);
34
35const struct raid6_calls * const raid6_algos[] = {
36 &raid6_intx1,
37 &raid6_intx2,
38 &raid6_intx4,
39 &raid6_intx8,
40#if defined(__ia64__)
41 &raid6_intx16,
42 &raid6_intx32,
43#endif
44#if defined(__i386__) && !defined(__arch_um__)
45 &raid6_mmxx1,
46 &raid6_mmxx2,
47 &raid6_sse1x1,
48 &raid6_sse1x2,
49 &raid6_sse2x1,
50 &raid6_sse2x2,
51#endif
52#if defined(__x86_64__) && !defined(__arch_um__)
53 &raid6_sse2x1,
54 &raid6_sse2x2,
55 &raid6_sse2x4,
56#endif
57#ifdef CONFIG_ALTIVEC
58 &raid6_altivec1,
59 &raid6_altivec2,
60 &raid6_altivec4,
61 &raid6_altivec8,
62#endif
63 NULL
64};
65
66#ifdef __KERNEL__
67#define RAID6_TIME_JIFFIES_LG2 4
68#else
69/* Need more time to be stable in userspace */
70#define RAID6_TIME_JIFFIES_LG2 9
71#define time_before(x, y) ((x) < (y))
72#endif
73
74/* Try to pick the best algorithm */
75/* This code uses the gfmul table as convenient data set to abuse */
76
77int __init raid6_select_algo(void)
78{
79 const struct raid6_calls * const * algo;
80 const struct raid6_calls * best;
81 char *syndromes;
82 void *dptrs[(65536/PAGE_SIZE)+2];
83 int i, disks;
84 unsigned long perf, bestperf;
85 int bestprefer;
86 unsigned long j0, j1;
87
88 disks = (65536/PAGE_SIZE)+2;
89 for ( i = 0 ; i < disks-2 ; i++ ) {
90 dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
91 }
92
93 /* Normal code - use a 2-page allocation to avoid D$ conflict */
94 syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
95
96 if ( !syndromes ) {
97 printk("raid6: Yikes! No memory available.\n");
98 return -ENOMEM;
99 }
100
101 dptrs[disks-2] = syndromes;
102 dptrs[disks-1] = syndromes + PAGE_SIZE;
103
104 bestperf = 0; bestprefer = 0; best = NULL;
105
106 for ( algo = raid6_algos ; *algo ; algo++ ) {
107 if ( !(*algo)->valid || (*algo)->valid() ) {
108 perf = 0;
109
110 preempt_disable();
111 j0 = jiffies;
112 while ( (j1 = jiffies) == j0 )
113 cpu_relax();
114 while (time_before(jiffies,
115 j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
116 (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
117 perf++;
118 }
119 preempt_enable();
120
121 if ( (*algo)->prefer > bestprefer ||
122 ((*algo)->prefer == bestprefer &&
123 perf > bestperf) ) {
124 best = *algo;
125 bestprefer = best->prefer;
126 bestperf = perf;
127 }
128 printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
129 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
130 }
131 }
132
133 if (best) {
134 printk("raid6: using algorithm %s (%ld MB/s)\n",
135 best->name,
136 (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
137 raid6_call = *best;
138 } else
139 printk("raid6: Yikes! No algorithm found!\n");
140
141 free_pages((unsigned long)syndromes, 1);
142
143 return best ? 0 : -EINVAL;
144}
145
146static void raid6_exit(void)
147{
148 do { } while (0);
149}
150
151subsys_initcall(raid6_select_algo);
152module_exit(raid6_exit);
153MODULE_LICENSE("GPL");
154MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
new file mode 100644
index 000000000000..2654d5c854be
--- /dev/null
+++ b/lib/raid6/altivec.uc
@@ -0,0 +1,130 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6altivec$#.c
15 *
16 * $#-way unrolled portable integer math RAID-6 instruction set
17 *
18 * This file is postprocessed using unroll.awk
19 *
20 * <benh> hpa: in process,
21 * you can just "steal" the vec unit with enable_kernel_altivec() (but
22 * bracked this with preempt_disable/enable or in a lock)
23 */
24
25#include <linux/raid/pq.h>
26
27#ifdef CONFIG_ALTIVEC
28
29#include <altivec.h>
30#ifdef __KERNEL__
31# include <asm/system.h>
32# include <asm/cputable.h>
33#endif
34
35/*
36 * This is the C data type to use. We use a vector of
37 * signed char so vec_cmpgt() will generate the right
38 * instruction.
39 */
40
41typedef vector signed char unative_t;
42
43#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
44#define NSIZE sizeof(unative_t)
45
46/*
47 * The SHLBYTE() operation shifts each byte left by 1, *not*
48 * rolling over into the next byte
49 */
50static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
51{
52 return vec_add(v,v);
53}
54
55/*
56 * The MASK() operation returns 0xFF in any byte for which the high
57 * bit is 1, 0x00 for any byte for which the high bit is 0.
58 */
59static inline __attribute_const__ unative_t MASK(unative_t v)
60{
61 unative_t zv = NBYTES(0);
62
63 /* vec_cmpgt returns a vector bool char; thus the need for the cast */
64 return (unative_t)vec_cmpgt(zv, v);
65}
66
67
68/* This is noinline to make damned sure that gcc doesn't move any of the
69 Altivec code around the enable/disable code */
70static void noinline
71raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
72{
73 u8 **dptr = (u8 **)ptrs;
74 u8 *p, *q;
75 int d, z, z0;
76
77 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
78 unative_t x1d = NBYTES(0x1d);
79
80 z0 = disks - 3; /* Highest data disk */
81 p = dptr[z0+1]; /* XOR parity */
82 q = dptr[z0+2]; /* RS syndrome */
83
84 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
85 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
86 for ( z = z0-1 ; z >= 0 ; z-- ) {
87 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
88 wp$$ = vec_xor(wp$$, wd$$);
89 w2$$ = MASK(wq$$);
90 w1$$ = SHLBYTE(wq$$);
91 w2$$ = vec_and(w2$$, x1d);
92 w1$$ = vec_xor(w1$$, w2$$);
93 wq$$ = vec_xor(w1$$, wd$$);
94 }
95 *(unative_t *)&p[d+NSIZE*$$] = wp$$;
96 *(unative_t *)&q[d+NSIZE*$$] = wq$$;
97 }
98}
99
100static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
101{
102 preempt_disable();
103 enable_kernel_altivec();
104
105 raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
106
107 preempt_enable();
108}
109
110int raid6_have_altivec(void);
111#if $# == 1
112int raid6_have_altivec(void)
113{
114 /* This assumes either all CPUs have Altivec or none does */
115# ifdef __KERNEL__
116 return cpu_has_feature(CPU_FTR_ALTIVEC);
117# else
118 return 1;
119# endif
120}
121#endif
122
123const struct raid6_calls raid6_altivec$# = {
124 raid6_altivec$#_gen_syndrome,
125 raid6_have_altivec,
126 "altivecx$#",
127 0
128};
129
130#endif /* CONFIG_ALTIVEC */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
new file mode 100644
index 000000000000..d1e276a14fab
--- /dev/null
+++ b/lib/raid6/int.uc
@@ -0,0 +1,117 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6int$#.c
15 *
16 * $#-way unrolled portable integer math RAID-6 instruction set
17 *
18 * This file is postprocessed using unroll.awk
19 */
20
21#include <linux/raid/pq.h>
22
23/*
24 * This is the C data type to use
25 */
26
27/* Change this from BITS_PER_LONG if there is something better... */
28#if BITS_PER_LONG == 64
29# define NBYTES(x) ((x) * 0x0101010101010101UL)
30# define NSIZE 8
31# define NSHIFT 3
32# define NSTRING "64"
33typedef u64 unative_t;
34#else
35# define NBYTES(x) ((x) * 0x01010101U)
36# define NSIZE 4
37# define NSHIFT 2
38# define NSTRING "32"
39typedef u32 unative_t;
40#endif
41
42
43
44/*
45 * IA-64 wants insane amounts of unrolling. On other architectures that
46 * is just a waste of space.
47 */
48#if ($# <= 8) || defined(__ia64__)
49
50
51/*
52 * These sub-operations are separate inlines since they can sometimes be
53 * specially optimized using architecture-specific hacks.
54 */
55
56/*
57 * The SHLBYTE() operation shifts each byte left by 1, *not*
58 * rolling over into the next byte
59 */
60static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
61{
62 unative_t vv;
63
64 vv = (v << 1) & NBYTES(0xfe);
65 return vv;
66}
67
68/*
69 * The MASK() operation returns 0xFF in any byte for which the high
70 * bit is 1, 0x00 for any byte for which the high bit is 0.
71 */
72static inline __attribute_const__ unative_t MASK(unative_t v)
73{
74 unative_t vv;
75
76 vv = v & NBYTES(0x80);
77 vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
78 return vv;
79}
80
81
82static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
83{
84 u8 **dptr = (u8 **)ptrs;
85 u8 *p, *q;
86 int d, z, z0;
87
88 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
89
90 z0 = disks - 3; /* Highest data disk */
91 p = dptr[z0+1]; /* XOR parity */
92 q = dptr[z0+2]; /* RS syndrome */
93
94 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
95 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
96 for ( z = z0-1 ; z >= 0 ; z-- ) {
97 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
98 wp$$ ^= wd$$;
99 w2$$ = MASK(wq$$);
100 w1$$ = SHLBYTE(wq$$);
101 w2$$ &= NBYTES(0x1d);
102 w1$$ ^= w2$$;
103 wq$$ = w1$$ ^ wd$$;
104 }
105 *(unative_t *)&p[d+NSIZE*$$] = wp$$;
106 *(unative_t *)&q[d+NSIZE*$$] = wq$$;
107 }
108}
109
110const struct raid6_calls raid6_intx$# = {
111 raid6_int$#_gen_syndrome,
112 NULL, /* always valid */
113 "int" NSTRING "x$#",
114 0
115};
116
117#endif
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
new file mode 100644
index 000000000000..3b1500843bba
--- /dev/null
+++ b/lib/raid6/mktables.c
@@ -0,0 +1,132 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * mktables.c
13 *
14 * Make RAID-6 tables. This is a host user space program to be run at
15 * compile time.
16 */
17
18#include <stdio.h>
19#include <string.h>
20#include <inttypes.h>
21#include <stdlib.h>
22#include <time.h>
23
24static uint8_t gfmul(uint8_t a, uint8_t b)
25{
26 uint8_t v = 0;
27
28 while (b) {
29 if (b & 1)
30 v ^= a;
31 a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
32 b >>= 1;
33 }
34
35 return v;
36}
37
38static uint8_t gfpow(uint8_t a, int b)
39{
40 uint8_t v = 1;
41
42 b %= 255;
43 if (b < 0)
44 b += 255;
45
46 while (b) {
47 if (b & 1)
48 v = gfmul(v, a);
49 a = gfmul(a, a);
50 b >>= 1;
51 }
52
53 return v;
54}
55
56int main(int argc, char *argv[])
57{
58 int i, j, k;
59 uint8_t v;
60 uint8_t exptbl[256], invtbl[256];
61
62 printf("#include <linux/raid/pq.h>\n");
63
64 /* Compute multiplication table */
65 printf("\nconst u8 __attribute__((aligned(256)))\n"
66 "raid6_gfmul[256][256] =\n"
67 "{\n");
68 for (i = 0; i < 256; i++) {
69 printf("\t{\n");
70 for (j = 0; j < 256; j += 8) {
71 printf("\t\t");
72 for (k = 0; k < 8; k++)
73 printf("0x%02x,%c", gfmul(i, j + k),
74 (k == 7) ? '\n' : ' ');
75 }
76 printf("\t},\n");
77 }
78 printf("};\n");
79 printf("#ifdef __KERNEL__\n");
80 printf("EXPORT_SYMBOL(raid6_gfmul);\n");
81 printf("#endif\n");
82
83 /* Compute power-of-2 table (exponent) */
84 v = 1;
85 printf("\nconst u8 __attribute__((aligned(256)))\n"
86 "raid6_gfexp[256] =\n" "{\n");
87 for (i = 0; i < 256; i += 8) {
88 printf("\t");
89 for (j = 0; j < 8; j++) {
90 exptbl[i + j] = v;
91 printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
92 v = gfmul(v, 2);
93 if (v == 1)
94 v = 0; /* For entry 255, not a real entry */
95 }
96 }
97 printf("};\n");
98 printf("#ifdef __KERNEL__\n");
99 printf("EXPORT_SYMBOL(raid6_gfexp);\n");
100 printf("#endif\n");
101
102 /* Compute inverse table x^-1 == x^254 */
103 printf("\nconst u8 __attribute__((aligned(256)))\n"
104 "raid6_gfinv[256] =\n" "{\n");
105 for (i = 0; i < 256; i += 8) {
106 printf("\t");
107 for (j = 0; j < 8; j++) {
108 invtbl[i + j] = v = gfpow(i + j, 254);
109 printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
110 }
111 }
112 printf("};\n");
113 printf("#ifdef __KERNEL__\n");
114 printf("EXPORT_SYMBOL(raid6_gfinv);\n");
115 printf("#endif\n");
116
117 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
118 printf("\nconst u8 __attribute__((aligned(256)))\n"
119 "raid6_gfexi[256] =\n" "{\n");
120 for (i = 0; i < 256; i += 8) {
121 printf("\t");
122 for (j = 0; j < 8; j++)
123 printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
124 (j == 7) ? '\n' : ' ');
125 }
126 printf("};\n");
127 printf("#ifdef __KERNEL__\n");
128 printf("EXPORT_SYMBOL(raid6_gfexi);\n");
129 printf("#endif\n");
130
131 return 0;
132}
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
new file mode 100644
index 000000000000..279347f23094
--- /dev/null
+++ b/lib/raid6/mmx.c
@@ -0,0 +1,142 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/mmx.c
15 *
16 * MMX implementation of RAID-6 syndrome functions
17 */
18
19#if defined(__i386__) && !defined(__arch_um__)
20
21#include <linux/raid/pq.h>
22#include "x86.h"
23
24/* Shared with raid6/sse1.c */
25const struct raid6_mmx_constants {
26 u64 x1d;
27} raid6_mmx_constants = {
28 0x1d1d1d1d1d1d1d1dULL,
29};
30
31static int raid6_have_mmx(void)
32{
33 /* Not really "boot_cpu" but "all_cpus" */
34 return boot_cpu_has(X86_FEATURE_MMX);
35}
36
37/*
38 * Plain MMX implementation
39 */
40static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
41{
42 u8 **dptr = (u8 **)ptrs;
43 u8 *p, *q;
44 int d, z, z0;
45
46 z0 = disks - 3; /* Highest data disk */
47 p = dptr[z0+1]; /* XOR parity */
48 q = dptr[z0+2]; /* RS syndrome */
49
50 kernel_fpu_begin();
51
52 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
53 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
54
55 for ( d = 0 ; d < bytes ; d += 8 ) {
56 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
57 asm volatile("movq %mm2,%mm4"); /* Q[0] */
58 for ( z = z0-1 ; z >= 0 ; z-- ) {
59 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
60 asm volatile("pcmpgtb %mm4,%mm5");
61 asm volatile("paddb %mm4,%mm4");
62 asm volatile("pand %mm0,%mm5");
63 asm volatile("pxor %mm5,%mm4");
64 asm volatile("pxor %mm5,%mm5");
65 asm volatile("pxor %mm6,%mm2");
66 asm volatile("pxor %mm6,%mm4");
67 }
68 asm volatile("movq %%mm2,%0" : "=m" (p[d]));
69 asm volatile("pxor %mm2,%mm2");
70 asm volatile("movq %%mm4,%0" : "=m" (q[d]));
71 asm volatile("pxor %mm4,%mm4");
72 }
73
74 kernel_fpu_end();
75}
76
77const struct raid6_calls raid6_mmxx1 = {
78 raid6_mmx1_gen_syndrome,
79 raid6_have_mmx,
80 "mmxx1",
81 0
82};
83
84/*
85 * Unrolled-by-2 MMX implementation
86 */
87static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
88{
89 u8 **dptr = (u8 **)ptrs;
90 u8 *p, *q;
91 int d, z, z0;
92
93 z0 = disks - 3; /* Highest data disk */
94 p = dptr[z0+1]; /* XOR parity */
95 q = dptr[z0+2]; /* RS syndrome */
96
97 kernel_fpu_begin();
98
99 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
100 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
101 asm volatile("pxor %mm7,%mm7"); /* Zero temp */
102
103 for ( d = 0 ; d < bytes ; d += 16 ) {
104 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
105 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
106 asm volatile("movq %mm2,%mm4"); /* Q[0] */
107 asm volatile("movq %mm3,%mm6"); /* Q[1] */
108 for ( z = z0-1 ; z >= 0 ; z-- ) {
109 asm volatile("pcmpgtb %mm4,%mm5");
110 asm volatile("pcmpgtb %mm6,%mm7");
111 asm volatile("paddb %mm4,%mm4");
112 asm volatile("paddb %mm6,%mm6");
113 asm volatile("pand %mm0,%mm5");
114 asm volatile("pand %mm0,%mm7");
115 asm volatile("pxor %mm5,%mm4");
116 asm volatile("pxor %mm7,%mm6");
117 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
118 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
119 asm volatile("pxor %mm5,%mm2");
120 asm volatile("pxor %mm7,%mm3");
121 asm volatile("pxor %mm5,%mm4");
122 asm volatile("pxor %mm7,%mm6");
123 asm volatile("pxor %mm5,%mm5");
124 asm volatile("pxor %mm7,%mm7");
125 }
126 asm volatile("movq %%mm2,%0" : "=m" (p[d]));
127 asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
128 asm volatile("movq %%mm4,%0" : "=m" (q[d]));
129 asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
130 }
131
132 kernel_fpu_end();
133}
134
135const struct raid6_calls raid6_mmxx2 = {
136 raid6_mmx2_gen_syndrome,
137 raid6_have_mmx,
138 "mmxx2",
139 0
140};
141
142#endif
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
new file mode 100644
index 000000000000..8590d19cf522
--- /dev/null
+++ b/lib/raid6/recov.c
@@ -0,0 +1,132 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/recov.c
15 *
16 * RAID-6 data recovery in dual failure mode. In single failure mode,
17 * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
18 * the syndrome.)
19 */
20
21#include <linux/raid/pq.h>
22
23/* Recover two failed data blocks. */
24void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
25 void **ptrs)
26{
27 u8 *p, *q, *dp, *dq;
28 u8 px, qx, db;
29 const u8 *pbmul; /* P multiplier table for B data */
30 const u8 *qmul; /* Q multiplier table (for both) */
31
32 p = (u8 *)ptrs[disks-2];
33 q = (u8 *)ptrs[disks-1];
34
35 /* Compute syndrome with zero for the missing data pages
36 Use the dead data pages as temporary storage for
37 delta p and delta q */
38 dp = (u8 *)ptrs[faila];
39 ptrs[faila] = (void *)raid6_empty_zero_page;
40 ptrs[disks-2] = dp;
41 dq = (u8 *)ptrs[failb];
42 ptrs[failb] = (void *)raid6_empty_zero_page;
43 ptrs[disks-1] = dq;
44
45 raid6_call.gen_syndrome(disks, bytes, ptrs);
46
47 /* Restore pointer table */
48 ptrs[faila] = dp;
49 ptrs[failb] = dq;
50 ptrs[disks-2] = p;
51 ptrs[disks-1] = q;
52
53 /* Now, pick the proper data tables */
54 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
55 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
56
57 /* Now do it... */
58 while ( bytes-- ) {
59 px = *p ^ *dp;
60 qx = qmul[*q ^ *dq];
61 *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
62 *dp++ = db ^ px; /* Reconstructed A */
63 p++; q++;
64 }
65}
66EXPORT_SYMBOL_GPL(raid6_2data_recov);
67
68/* Recover failure of one data block plus the P block */
69void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
70{
71 u8 *p, *q, *dq;
72 const u8 *qmul; /* Q multiplier table */
73
74 p = (u8 *)ptrs[disks-2];
75 q = (u8 *)ptrs[disks-1];
76
77 /* Compute syndrome with zero for the missing data page
78 Use the dead data page as temporary storage for delta q */
79 dq = (u8 *)ptrs[faila];
80 ptrs[faila] = (void *)raid6_empty_zero_page;
81 ptrs[disks-1] = dq;
82
83 raid6_call.gen_syndrome(disks, bytes, ptrs);
84
85 /* Restore pointer table */
86 ptrs[faila] = dq;
87 ptrs[disks-1] = q;
88
89 /* Now, pick the proper data tables */
90 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
91
92 /* Now do it... */
93 while ( bytes-- ) {
94 *p++ ^= *dq = qmul[*q ^ *dq];
95 q++; dq++;
96 }
97}
98EXPORT_SYMBOL_GPL(raid6_datap_recov);
99
100#ifndef __KERNEL__
101/* Testing only */
102
103/* Recover two failed blocks. */
104void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
105{
106 if ( faila > failb ) {
107 int tmp = faila;
108 faila = failb;
109 failb = tmp;
110 }
111
112 if ( failb == disks-1 ) {
113 if ( faila == disks-2 ) {
114 /* P+Q failure. Just rebuild the syndrome. */
115 raid6_call.gen_syndrome(disks, bytes, ptrs);
116 } else {
117 /* data+Q failure. Reconstruct data from P,
118 then rebuild syndrome. */
119 /* NOT IMPLEMENTED - equivalent to RAID-5 */
120 }
121 } else {
122 if ( failb == disks-2 ) {
123 /* data+P failure. */
124 raid6_datap_recov(disks, bytes, faila, ptrs);
125 } else {
126 /* data+data failure. */
127 raid6_2data_recov(disks, bytes, faila, failb, ptrs);
128 }
129 }
130}
131
132#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
new file mode 100644
index 000000000000..10dd91948c07
--- /dev/null
+++ b/lib/raid6/sse1.c
@@ -0,0 +1,162 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/sse1.c
15 *
16 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
17 *
18 * This is really an MMX implementation, but it requires SSE-1 or
19 * AMD MMXEXT for prefetch support and a few other features. The
20 * support for nontemporal memory accesses is enough to make this
21 * worthwhile as a separate implementation.
22 */
23
24#if defined(__i386__) && !defined(__arch_um__)
25
26#include <linux/raid/pq.h>
27#include "x86.h"
28
29/* Defined in raid6/mmx.c */
30extern const struct raid6_mmx_constants {
31 u64 x1d;
32} raid6_mmx_constants;
33
34static int raid6_have_sse1_or_mmxext(void)
35{
36 /* Not really boot_cpu but "all_cpus" */
37 return boot_cpu_has(X86_FEATURE_MMX) &&
38 (boot_cpu_has(X86_FEATURE_XMM) ||
39 boot_cpu_has(X86_FEATURE_MMXEXT));
40}
41
42/*
43 * Plain SSE1 implementation
44 */
45static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
46{
47 u8 **dptr = (u8 **)ptrs;
48 u8 *p, *q;
49 int d, z, z0;
50
51 z0 = disks - 3; /* Highest data disk */
52 p = dptr[z0+1]; /* XOR parity */
53 q = dptr[z0+2]; /* RS syndrome */
54
55 kernel_fpu_begin();
56
57 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
58 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
59
60 for ( d = 0 ; d < bytes ; d += 8 ) {
61 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
62 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
63 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
64 asm volatile("movq %mm2,%mm4"); /* Q[0] */
65 asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
66 for ( z = z0-2 ; z >= 0 ; z-- ) {
67 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
68 asm volatile("pcmpgtb %mm4,%mm5");
69 asm volatile("paddb %mm4,%mm4");
70 asm volatile("pand %mm0,%mm5");
71 asm volatile("pxor %mm5,%mm4");
72 asm volatile("pxor %mm5,%mm5");
73 asm volatile("pxor %mm6,%mm2");
74 asm volatile("pxor %mm6,%mm4");
75 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
76 }
77 asm volatile("pcmpgtb %mm4,%mm5");
78 asm volatile("paddb %mm4,%mm4");
79 asm volatile("pand %mm0,%mm5");
80 asm volatile("pxor %mm5,%mm4");
81 asm volatile("pxor %mm5,%mm5");
82 asm volatile("pxor %mm6,%mm2");
83 asm volatile("pxor %mm6,%mm4");
84
85 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
86 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
87 }
88
89 asm volatile("sfence" : : : "memory");
90 kernel_fpu_end();
91}
92
93const struct raid6_calls raid6_sse1x1 = {
94 raid6_sse11_gen_syndrome,
95 raid6_have_sse1_or_mmxext,
96 "sse1x1",
97 1 /* Has cache hints */
98};
99
100/*
101 * Unrolled-by-2 SSE1 implementation
102 */
103static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
104{
105 u8 **dptr = (u8 **)ptrs;
106 u8 *p, *q;
107 int d, z, z0;
108
109 z0 = disks - 3; /* Highest data disk */
110 p = dptr[z0+1]; /* XOR parity */
111 q = dptr[z0+2]; /* RS syndrome */
112
113 kernel_fpu_begin();
114
115 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
116 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
117 asm volatile("pxor %mm7,%mm7"); /* Zero temp */
118
119 /* We uniformly assume a single prefetch covers at least 16 bytes */
120 for ( d = 0 ; d < bytes ; d += 16 ) {
121 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
123 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
124 asm volatile("movq %mm2,%mm4"); /* Q[0] */
125 asm volatile("movq %mm3,%mm6"); /* Q[1] */
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128 asm volatile("pcmpgtb %mm4,%mm5");
129 asm volatile("pcmpgtb %mm6,%mm7");
130 asm volatile("paddb %mm4,%mm4");
131 asm volatile("paddb %mm6,%mm6");
132 asm volatile("pand %mm0,%mm5");
133 asm volatile("pand %mm0,%mm7");
134 asm volatile("pxor %mm5,%mm4");
135 asm volatile("pxor %mm7,%mm6");
136 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
137 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
138 asm volatile("pxor %mm5,%mm2");
139 asm volatile("pxor %mm7,%mm3");
140 asm volatile("pxor %mm5,%mm4");
141 asm volatile("pxor %mm7,%mm6");
142 asm volatile("pxor %mm5,%mm5");
143 asm volatile("pxor %mm7,%mm7");
144 }
145 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
146 asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
147 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
148 asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
149 }
150
151 asm volatile("sfence" : :: "memory");
152 kernel_fpu_end();
153}
154
155const struct raid6_calls raid6_sse1x2 = {
156 raid6_sse12_gen_syndrome,
157 raid6_have_sse1_or_mmxext,
158 "sse1x2",
159 1 /* Has cache hints */
160};
161
162#endif
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
new file mode 100644
index 000000000000..bc2d57daa589
--- /dev/null
+++ b/lib/raid6/sse2.c
@@ -0,0 +1,262 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/sse2.c
15 *
16 * SSE-2 implementation of RAID-6 syndrome functions
17 *
18 */
19
20#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
21
22#include <linux/raid/pq.h>
23#include "x86.h"
24
25static const struct raid6_sse_constants {
26 u64 x1d[2];
27} raid6_sse_constants __attribute__((aligned(16))) = {
28 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
29};
30
31static int raid6_have_sse2(void)
32{
33 /* Not really boot_cpu but "all_cpus" */
34 return boot_cpu_has(X86_FEATURE_MMX) &&
35 boot_cpu_has(X86_FEATURE_FXSR) &&
36 boot_cpu_has(X86_FEATURE_XMM) &&
37 boot_cpu_has(X86_FEATURE_XMM2);
38}
39
40/*
41 * Plain SSE2 implementation
42 */
43static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
44{
45 u8 **dptr = (u8 **)ptrs;
46 u8 *p, *q;
47 int d, z, z0;
48
49 z0 = disks - 3; /* Highest data disk */
50 p = dptr[z0+1]; /* XOR parity */
51 q = dptr[z0+2]; /* RS syndrome */
52
53 kernel_fpu_begin();
54
55 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
56 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
57
58 for ( d = 0 ; d < bytes ; d += 16 ) {
59 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
60 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
61 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
62 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
63 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
64 for ( z = z0-2 ; z >= 0 ; z-- ) {
65 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
66 asm volatile("pcmpgtb %xmm4,%xmm5");
67 asm volatile("paddb %xmm4,%xmm4");
68 asm volatile("pand %xmm0,%xmm5");
69 asm volatile("pxor %xmm5,%xmm4");
70 asm volatile("pxor %xmm5,%xmm5");
71 asm volatile("pxor %xmm6,%xmm2");
72 asm volatile("pxor %xmm6,%xmm4");
73 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
74 }
75 asm volatile("pcmpgtb %xmm4,%xmm5");
76 asm volatile("paddb %xmm4,%xmm4");
77 asm volatile("pand %xmm0,%xmm5");
78 asm volatile("pxor %xmm5,%xmm4");
79 asm volatile("pxor %xmm5,%xmm5");
80 asm volatile("pxor %xmm6,%xmm2");
81 asm volatile("pxor %xmm6,%xmm4");
82
83 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
84 asm volatile("pxor %xmm2,%xmm2");
85 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
86 asm volatile("pxor %xmm4,%xmm4");
87 }
88
89 asm volatile("sfence" : : : "memory");
90 kernel_fpu_end();
91}
92
93const struct raid6_calls raid6_sse2x1 = {
94 raid6_sse21_gen_syndrome,
95 raid6_have_sse2,
96 "sse2x1",
97 1 /* Has cache hints */
98};
99
100/*
101 * Unrolled-by-2 SSE2 implementation
102 */
103static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
104{
105 u8 **dptr = (u8 **)ptrs;
106 u8 *p, *q;
107 int d, z, z0;
108
109 z0 = disks - 3; /* Highest data disk */
110 p = dptr[z0+1]; /* XOR parity */
111 q = dptr[z0+2]; /* RS syndrome */
112
113 kernel_fpu_begin();
114
115 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
116 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
117 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
118
119 /* We uniformly assume a single prefetch covers at least 32 bytes */
120 for ( d = 0 ; d < bytes ; d += 32 ) {
121 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
123 asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
124 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
125 asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128 asm volatile("pcmpgtb %xmm4,%xmm5");
129 asm volatile("pcmpgtb %xmm6,%xmm7");
130 asm volatile("paddb %xmm4,%xmm4");
131 asm volatile("paddb %xmm6,%xmm6");
132 asm volatile("pand %xmm0,%xmm5");
133 asm volatile("pand %xmm0,%xmm7");
134 asm volatile("pxor %xmm5,%xmm4");
135 asm volatile("pxor %xmm7,%xmm6");
136 asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
137 asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
138 asm volatile("pxor %xmm5,%xmm2");
139 asm volatile("pxor %xmm7,%xmm3");
140 asm volatile("pxor %xmm5,%xmm4");
141 asm volatile("pxor %xmm7,%xmm6");
142 asm volatile("pxor %xmm5,%xmm5");
143 asm volatile("pxor %xmm7,%xmm7");
144 }
145 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
146 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
147 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
148 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
149 }
150
151 asm volatile("sfence" : : : "memory");
152 kernel_fpu_end();
153}
154
155const struct raid6_calls raid6_sse2x2 = {
156 raid6_sse22_gen_syndrome,
157 raid6_have_sse2,
158 "sse2x2",
159 1 /* Has cache hints */
160};
161
162#endif
163
164#if defined(__x86_64__) && !defined(__arch_um__)
165
166/*
167 * Unrolled-by-4 SSE2 implementation
168 */
169static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
170{
171 u8 **dptr = (u8 **)ptrs;
172 u8 *p, *q;
173 int d, z, z0;
174
175 z0 = disks - 3; /* Highest data disk */
176 p = dptr[z0+1]; /* XOR parity */
177 q = dptr[z0+2]; /* RS syndrome */
178
179 kernel_fpu_begin();
180
181 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
182 asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
183 asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
184 asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
185 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
186 asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
187 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
188 asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
189 asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
190 asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
191 asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
192 asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
193 asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
194
195 for ( d = 0 ; d < bytes ; d += 64 ) {
196 for ( z = z0 ; z >= 0 ; z-- ) {
197 /* The second prefetch seems to improve performance... */
198 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
199 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
200 asm volatile("pcmpgtb %xmm4,%xmm5");
201 asm volatile("pcmpgtb %xmm6,%xmm7");
202 asm volatile("pcmpgtb %xmm12,%xmm13");
203 asm volatile("pcmpgtb %xmm14,%xmm15");
204 asm volatile("paddb %xmm4,%xmm4");
205 asm volatile("paddb %xmm6,%xmm6");
206 asm volatile("paddb %xmm12,%xmm12");
207 asm volatile("paddb %xmm14,%xmm14");
208 asm volatile("pand %xmm0,%xmm5");
209 asm volatile("pand %xmm0,%xmm7");
210 asm volatile("pand %xmm0,%xmm13");
211 asm volatile("pand %xmm0,%xmm15");
212 asm volatile("pxor %xmm5,%xmm4");
213 asm volatile("pxor %xmm7,%xmm6");
214 asm volatile("pxor %xmm13,%xmm12");
215 asm volatile("pxor %xmm15,%xmm14");
216 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
217 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
218 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
219 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
220 asm volatile("pxor %xmm5,%xmm2");
221 asm volatile("pxor %xmm7,%xmm3");
222 asm volatile("pxor %xmm13,%xmm10");
223 asm volatile("pxor %xmm15,%xmm11");
224 asm volatile("pxor %xmm5,%xmm4");
225 asm volatile("pxor %xmm7,%xmm6");
226 asm volatile("pxor %xmm13,%xmm12");
227 asm volatile("pxor %xmm15,%xmm14");
228 asm volatile("pxor %xmm5,%xmm5");
229 asm volatile("pxor %xmm7,%xmm7");
230 asm volatile("pxor %xmm13,%xmm13");
231 asm volatile("pxor %xmm15,%xmm15");
232 }
233 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
234 asm volatile("pxor %xmm2,%xmm2");
235 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
236 asm volatile("pxor %xmm3,%xmm3");
237 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
238 asm volatile("pxor %xmm10,%xmm10");
239 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
240 asm volatile("pxor %xmm11,%xmm11");
241 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
242 asm volatile("pxor %xmm4,%xmm4");
243 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
244 asm volatile("pxor %xmm6,%xmm6");
245 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
246 asm volatile("pxor %xmm12,%xmm12");
247 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
248 asm volatile("pxor %xmm14,%xmm14");
249 }
250
251 asm volatile("sfence" : : : "memory");
252 kernel_fpu_end();
253}
254
255const struct raid6_calls raid6_sse2x4 = {
256 raid6_sse24_gen_syndrome,
257 raid6_have_sse2,
258 "sse2x4",
259 1 /* Has cache hints */
260};
261
262#endif
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
new file mode 100644
index 000000000000..aa651697b6dc
--- /dev/null
+++ b/lib/raid6/test/Makefile
@@ -0,0 +1,72 @@
1#
2# This is a simple Makefile to test some of the RAID-6 code
3# from userspace.
4#
5
6CC = gcc
7OPTFLAGS = -O2 # Adjust as desired
8CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
9LD = ld
10AWK = awk -f
11AR = ar
12RANLIB = ranlib
13
14.c.o:
15 $(CC) $(CFLAGS) -c -o $@ $<
16
17%.c: ../%.c
18 cp -f $< $@
19
20%.uc: ../%.uc
21 cp -f $< $@
22
23all: raid6.a raid6test
24
25raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
26 altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \
27 tables.o
28 rm -f $@
29 $(AR) cq $@ $^
30 $(RANLIB) $@
31
32raid6test: test.c raid6.a
33 $(CC) $(CFLAGS) -o raid6test $^
34
35altivec1.c: altivec.uc ../unroll.awk
36 $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
37
38altivec2.c: altivec.uc ../unroll.awk
39 $(AWK) ../unroll.awk -vN=2 < altivec.uc > $@
40
41altivec4.c: altivec.uc ../unroll.awk
42 $(AWK) ../unroll.awk -vN=4 < altivec.uc > $@
43
44altivec8.c: altivec.uc ../unroll.awk
45 $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
46
47int1.c: int.uc ../unroll.awk
48 $(AWK) ../unroll.awk -vN=1 < int.uc > $@
49
50int2.c: int.uc ../unroll.awk
51 $(AWK) ../unroll.awk -vN=2 < int.uc > $@
52
53int4.c: int.uc ../unroll.awk
54 $(AWK) ../unroll.awk -vN=4 < int.uc > $@
55
56int8.c: int.uc ../unroll.awk
57 $(AWK) ../unroll.awk -vN=8 < int.uc > $@
58
59int16.c: int.uc ../unroll.awk
60 $(AWK) ../unroll.awk -vN=16 < int.uc > $@
61
62int32.c: int.uc ../unroll.awk
63 $(AWK) ../unroll.awk -vN=32 < int.uc > $@
64
65tables.c: mktables
66 ./mktables > tables.c
67
68clean:
69 rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
70
71spotless: clean
72 rm -f *~
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
new file mode 100644
index 000000000000..7a930318b17d
--- /dev/null
+++ b/lib/raid6/test/test.c
@@ -0,0 +1,124 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * raid6test.c
13 *
14 * Test RAID-6 recovery with various algorithms
15 */
16
17#include <stdlib.h>
18#include <stdio.h>
19#include <string.h>
20#include <linux/raid/pq.h>
21
22#define NDISKS 16 /* Including P and Q */
23
24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
25struct raid6_calls raid6_call;
26
27char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE];
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30
31static void makedata(void)
32{
33 int i, j;
34
35 for (i = 0; i < NDISKS; i++) {
36 for (j = 0; j < PAGE_SIZE; j++)
37 data[i][j] = rand();
38
39 dataptrs[i] = data[i];
40 }
41}
42
43static char disk_type(int d)
44{
45 switch (d) {
46 case NDISKS-2:
47 return 'P';
48 case NDISKS-1:
49 return 'Q';
50 default:
51 return 'D';
52 }
53}
54
55static int test_disks(int i, int j)
56{
57 int erra, errb;
58
59 memset(recovi, 0xf0, PAGE_SIZE);
60 memset(recovj, 0xba, PAGE_SIZE);
61
62 dataptrs[i] = recovi;
63 dataptrs[j] = recovj;
64
65 raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
66
67 erra = memcmp(data[i], recovi, PAGE_SIZE);
68 errb = memcmp(data[j], recovj, PAGE_SIZE);
69
70 if (i < NDISKS-2 && j == NDISKS-1) {
71 /* We don't implement the DQ failure scenario, since it's
72 equivalent to a RAID-5 failure (XOR, then recompute Q) */
73 erra = errb = 0;
74 } else {
75 printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n",
76 raid6_call.name,
77 i, disk_type(i),
78 j, disk_type(j),
79 (!erra && !errb) ? "OK" :
80 !erra ? "ERRB" :
81 !errb ? "ERRA" : "ERRAB");
82 }
83
84 dataptrs[i] = data[i];
85 dataptrs[j] = data[j];
86
87 return erra || errb;
88}
89
90int main(int argc, char *argv[])
91{
92 const struct raid6_calls *const *algo;
93 int i, j;
94 int err = 0;
95
96 makedata();
97
98 for (algo = raid6_algos; *algo; algo++) {
99 if (!(*algo)->valid || (*algo)->valid()) {
100 raid6_call = **algo;
101
102 /* Nuke syndromes */
103 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
104
105 /* Generate assumed good syndrome */
106 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
107 (void **)&dataptrs);
108
109 for (i = 0; i < NDISKS-1; i++)
110 for (j = i+1; j < NDISKS; j++)
111 err += test_disks(i, j);
112 }
113 printf("\n");
114 }
115
116 printf("\n");
117 /* Pick the best algorithm test */
118 raid6_select_algo();
119
120 if (err)
121 printf("\n*** ERRORS FOUND ***\n");
122
123 return err;
124}
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk
new file mode 100644
index 000000000000..c6aa03631df8
--- /dev/null
+++ b/lib/raid6/unroll.awk
@@ -0,0 +1,20 @@
1
2# This filter requires one command line option of form -vN=n
3# where n must be a decimal number.
4#
5# Repeat each input line containing $$ n times, replacing $$ with 0...n-1.
6# Replace each $# with n, and each $* with a single $.
7
8BEGIN {
9 n = N + 0
10}
11{
12 if (/\$\$/) { rep = n } else { rep = 1 }
13 for (i = 0; i < rep; ++i) {
14 tmp = $0
15 gsub(/\$\$/, i, tmp)
16 gsub(/\$\#/, n, tmp)
17 gsub(/\$\*/, "$", tmp)
18 print tmp
19 }
20}
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
new file mode 100644
index 000000000000..cb2a8c91c886
--- /dev/null
+++ b/lib/raid6/x86.h
@@ -0,0 +1,61 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/x86.h
15 *
16 * Definitions common to x86 and x86-64 RAID-6 code only
17 */
18
19#ifndef LINUX_RAID_RAID6X86_H
20#define LINUX_RAID_RAID6X86_H
21
22#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
23
24#ifdef __KERNEL__ /* Real code */
25
26#include <asm/i387.h>
27
28#else /* Dummy code for user space testing */
29
30static inline void kernel_fpu_begin(void)
31{
32}
33
34static inline void kernel_fpu_end(void)
35{
36}
37
38#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
39#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
40 * (fast save and restore) */
41#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
42#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
43#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
44
45/* Should work well enough on modern CPUs for testing */
46static inline int boot_cpu_has(int flag)
47{
48 u32 eax = (flag >> 5) ? 0x80000001 : 1;
49 u32 edx;
50
51 asm volatile("cpuid"
52 : "+a" (eax), "=d" (edx)
53 : : "ecx", "ebx");
54
55 return (edx >> (flag & 31)) & 1;
56}
57
58#endif /* ndef __KERNEL__ */
59
60#endif
61#endif
diff --git a/lib/random32.c b/lib/random32.c
index 217d5c4b666d..fc3545a32771 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -39,13 +39,16 @@
39#include <linux/jiffies.h> 39#include <linux/jiffies.h>
40#include <linux/random.h> 40#include <linux/random.h>
41 41
42struct rnd_state {
43 u32 s1, s2, s3;
44};
45
46static DEFINE_PER_CPU(struct rnd_state, net_rand_state); 42static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
47 43
48static u32 __random32(struct rnd_state *state) 44/**
45 * prandom32 - seeded pseudo-random number generator.
46 * @state: pointer to state structure holding seeded state.
47 *
48 * This is used for pseudo-randomness with no outside seeding.
49 * For more random results, use random32().
50 */
51u32 prandom32(struct rnd_state *state)
49{ 52{
50#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) 53#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
51 54
@@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state)
55 58
56 return (state->s1 ^ state->s2 ^ state->s3); 59 return (state->s1 ^ state->s2 ^ state->s3);
57} 60}
58 61EXPORT_SYMBOL(prandom32);
59/*
60 * Handle minimum values for seeds
61 */
62static inline u32 __seed(u32 x, u32 m)
63{
64 return (x < m) ? x + m : x;
65}
66 62
67/** 63/**
68 * random32 - pseudo random number generator 64 * random32 - pseudo random number generator
@@ -75,7 +71,7 @@ u32 random32(void)
75{ 71{
76 unsigned long r; 72 unsigned long r;
77 struct rnd_state *state = &get_cpu_var(net_rand_state); 73 struct rnd_state *state = &get_cpu_var(net_rand_state);
78 r = __random32(state); 74 r = prandom32(state);
79 put_cpu_var(state); 75 put_cpu_var(state);
80 return r; 76 return r;
81} 77}
@@ -118,12 +114,12 @@ static int __init random32_init(void)
118 state->s3 = __seed(LCG(state->s2), 15); 114 state->s3 = __seed(LCG(state->s2), 15);
119 115
120 /* "warm it up" */ 116 /* "warm it up" */
121 __random32(state); 117 prandom32(state);
122 __random32(state); 118 prandom32(state);
123 __random32(state); 119 prandom32(state);
124 __random32(state); 120 prandom32(state);
125 __random32(state); 121 prandom32(state);
126 __random32(state); 122 prandom32(state);
127 } 123 }
128 return 0; 124 return 0;
129} 125}
@@ -131,7 +127,7 @@ core_initcall(random32_init);
131 127
132/* 128/*
133 * Generate better values after random number generator 129 * Generate better values after random number generator
134 * is fully initalized. 130 * is fully initialized.
135 */ 131 */
136static int __init random32_reseed(void) 132static int __init random32_reseed(void)
137{ 133{
@@ -147,7 +143,7 @@ static int __init random32_reseed(void)
147 state->s3 = __seed(seeds[2], 15); 143 state->s3 = __seed(seeds[2], 15);
148 144
149 /* mix it in */ 145 /* mix it in */
150 __random32(state); 146 prandom32(state);
151 } 147 }
152 return 0; 148 return 0;
153} 149}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 09f5ce1810dc..027a03f4c56d 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -16,9 +16,14 @@
16/* 16/*
17 * __ratelimit - rate limiting 17 * __ratelimit - rate limiting
18 * @rs: ratelimit_state data 18 * @rs: ratelimit_state data
19 * @func: name of calling function
19 * 20 *
20 * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks 21 * This enforces a rate limit: not more than @rs->burst callbacks
21 * in every @rs->ratelimit_jiffies 22 * in every @rs->interval
23 *
24 * RETURNS:
25 * 0 means callbacks will be suppressed.
26 * 1 means go ahead and do it.
22 */ 27 */
23int ___ratelimit(struct ratelimit_state *rs, const char *func) 28int ___ratelimit(struct ratelimit_state *rs, const char *func)
24{ 29{
@@ -35,7 +40,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
35 * the entity that is holding the lock already: 40 * the entity that is holding the lock already:
36 */ 41 */
37 if (!spin_trylock_irqsave(&rs->lock, flags)) 42 if (!spin_trylock_irqsave(&rs->lock, flags))
38 return 1; 43 return 0;
39 44
40 if (!rs->begin) 45 if (!rs->begin)
41 rs->begin = jiffies; 46 rs->begin = jiffies;
diff --git a/lib/rbtree.c b/lib/rbtree.c
index e2aa3be29858..4693f79195d3 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -283,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
283} 283}
284EXPORT_SYMBOL(rb_erase); 284EXPORT_SYMBOL(rb_erase);
285 285
286static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
287{
288 struct rb_node *parent;
289
290up:
291 func(node, data);
292 parent = rb_parent(node);
293 if (!parent)
294 return;
295
296 if (node == parent->rb_left && parent->rb_right)
297 func(parent->rb_right, data);
298 else if (parent->rb_left)
299 func(parent->rb_left, data);
300
301 node = parent;
302 goto up;
303}
304
305/*
306 * after inserting @node into the tree, update the tree to account for
307 * both the new entry and any damage done by rebalance
308 */
309void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
310{
311 if (node->rb_left)
312 node = node->rb_left;
313 else if (node->rb_right)
314 node = node->rb_right;
315
316 rb_augment_path(node, func, data);
317}
318
319/*
320 * before removing the node, find the deepest node on the rebalance path
321 * that will still be there after @node gets removed
322 */
323struct rb_node *rb_augment_erase_begin(struct rb_node *node)
324{
325 struct rb_node *deepest;
326
327 if (!node->rb_right && !node->rb_left)
328 deepest = rb_parent(node);
329 else if (!node->rb_right)
330 deepest = node->rb_left;
331 else if (!node->rb_left)
332 deepest = node->rb_right;
333 else {
334 deepest = rb_next(node);
335 if (deepest->rb_right)
336 deepest = deepest->rb_right;
337 else if (rb_parent(deepest) != node)
338 deepest = rb_parent(deepest);
339 }
340
341 return deepest;
342}
343
344/*
345 * after removal, update the tree to account for the removed entry
346 * and any rebalance damage.
347 */
348void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
349{
350 if (node)
351 rb_augment_path(node, func, data);
352}
353
286/* 354/*
287 * This function returns the first node (in sort order) of the tree. 355 * This function returns the first node (in sort order) of the tree.
288 */ 356 */
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index ccf95bff7984..ffc9fc7f3b05 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -143,13 +143,14 @@ void __sched __down_read(struct rw_semaphore *sem)
143{ 143{
144 struct rwsem_waiter waiter; 144 struct rwsem_waiter waiter;
145 struct task_struct *tsk; 145 struct task_struct *tsk;
146 unsigned long flags;
146 147
147 spin_lock_irq(&sem->wait_lock); 148 spin_lock_irqsave(&sem->wait_lock, flags);
148 149
149 if (sem->activity >= 0 && list_empty(&sem->wait_list)) { 150 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
150 /* granted */ 151 /* granted */
151 sem->activity++; 152 sem->activity++;
152 spin_unlock_irq(&sem->wait_lock); 153 spin_unlock_irqrestore(&sem->wait_lock, flags);
153 goto out; 154 goto out;
154 } 155 }
155 156
@@ -164,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem)
164 list_add_tail(&waiter.list, &sem->wait_list); 165 list_add_tail(&waiter.list, &sem->wait_list);
165 166
166 /* we don't need to touch the semaphore struct anymore */ 167 /* we don't need to touch the semaphore struct anymore */
167 spin_unlock_irq(&sem->wait_lock); 168 spin_unlock_irqrestore(&sem->wait_lock, flags);
168 169
169 /* wait to be given the lock */ 170 /* wait to be given the lock */
170 for (;;) { 171 for (;;) {
@@ -209,13 +210,14 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
209{ 210{
210 struct rwsem_waiter waiter; 211 struct rwsem_waiter waiter;
211 struct task_struct *tsk; 212 struct task_struct *tsk;
213 unsigned long flags;
212 214
213 spin_lock_irq(&sem->wait_lock); 215 spin_lock_irqsave(&sem->wait_lock, flags);
214 216
215 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 217 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
216 /* granted */ 218 /* granted */
217 sem->activity = -1; 219 sem->activity = -1;
218 spin_unlock_irq(&sem->wait_lock); 220 spin_unlock_irqrestore(&sem->wait_lock, flags);
219 goto out; 221 goto out;
220 } 222 }
221 223
@@ -230,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
230 list_add_tail(&waiter.list, &sem->wait_list); 232 list_add_tail(&waiter.list, &sem->wait_list);
231 233
232 /* we don't need to touch the semaphore struct anymore */ 234 /* we don't need to touch the semaphore struct anymore */
233 spin_unlock_irq(&sem->wait_lock); 235 spin_unlock_irqrestore(&sem->wait_lock, flags);
234 236
235 /* wait to be given the lock */ 237 /* wait to be given the lock */
236 for (;;) { 238 for (;;) {
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 3e3365e5665e..f236d7cd5cf3 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -36,45 +36,56 @@ struct rwsem_waiter {
36#define RWSEM_WAITING_FOR_WRITE 0x00000002 36#define RWSEM_WAITING_FOR_WRITE 0x00000002
37}; 37};
38 38
39/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and
40 * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
41 * since the rwsem value was observed.
42 */
43#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
44#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
45#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
46
39/* 47/*
40 * handle the lock release when processes blocked on it that can now run 48 * handle the lock release when processes blocked on it that can now run
41 * - if we come here from up_xxxx(), then: 49 * - if we come here from up_xxxx(), then:
42 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) 50 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
43 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) 51 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
44 * - there must be someone on the queue 52 * - there must be someone on the queue
45 * - the spinlock must be held by the caller 53 * - the spinlock must be held by the caller
46 * - woken process blocks are discarded from the list after having task zeroed 54 * - woken process blocks are discarded from the list after having task zeroed
47 * - writers are only woken if downgrading is false 55 * - writers are only woken if downgrading is false
48 */ 56 */
49static inline struct rw_semaphore * 57static struct rw_semaphore *
50__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) 58__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
51{ 59{
52 struct rwsem_waiter *waiter; 60 struct rwsem_waiter *waiter;
53 struct task_struct *tsk; 61 struct task_struct *tsk;
54 struct list_head *next; 62 struct list_head *next;
55 signed long oldcount, woken, loop; 63 signed long oldcount, woken, loop, adjustment;
56
57 if (downgrading)
58 goto dont_wake_writers;
59
60 /* if we came through an up_xxxx() call, we only only wake someone up
61 * if we can transition the active part of the count from 0 -> 1
62 */
63 try_again:
64 oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
65 - RWSEM_ACTIVE_BIAS;
66 if (oldcount & RWSEM_ACTIVE_MASK)
67 goto undo;
68 64
69 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 65 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
70
71 /* try to grant a single write lock if there's a writer at the front
72 * of the queue - note we leave the 'active part' of the count
73 * incremented by 1 and the waiting part incremented by 0x00010000
74 */
75 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) 66 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
76 goto readers_only; 67 goto readers_only;
77 68
69 if (wake_type == RWSEM_WAKE_READ_OWNED)
70 /* Another active reader was observed, so wakeup is not
71 * likely to succeed. Save the atomic op.
72 */
73 goto out;
74
75 /* There's a writer at the front of the queue - try to grant it the
76 * write lock. However, we only wake this writer if we can transition
77 * the active part of the count from 0 -> 1
78 */
79 adjustment = RWSEM_ACTIVE_WRITE_BIAS;
80 if (waiter->list.next == &sem->wait_list)
81 adjustment -= RWSEM_WAITING_BIAS;
82
83 try_again_write:
84 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
85 if (oldcount & RWSEM_ACTIVE_MASK)
86 /* Someone grabbed the sem already */
87 goto undo_write;
88
78 /* We must be careful not to touch 'waiter' after we set ->task = NULL. 89 /* We must be careful not to touch 'waiter' after we set ->task = NULL.
79 * It is an allocated on the waiter's stack and may become invalid at 90 * It is an allocated on the waiter's stack and may become invalid at
80 * any time after that point (due to a wakeup from another source). 91 * any time after that point (due to a wakeup from another source).
@@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
87 put_task_struct(tsk); 98 put_task_struct(tsk);
88 goto out; 99 goto out;
89 100
90 /* don't want to wake any writers */ 101 readers_only:
91 dont_wake_writers: 102 /* If we come here from up_xxxx(), another thread might have reached
92 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 103 * rwsem_down_failed_common() before we acquired the spinlock and
93 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) 104 * woken up a waiter, making it now active. We prefer to check for
105 * this first in order to not spend too much time with the spinlock
106 * held if we're not going to be able to wake up readers in the end.
107 *
108 * Note that we do not need to update the rwsem count: any writer
109 * trying to acquire rwsem will run rwsem_down_write_failed() due
110 * to the waiting threads and block trying to acquire the spinlock.
111 *
112 * We use a dummy atomic update in order to acquire the cache line
113 * exclusively since we expect to succeed and run the final rwsem
114 * count adjustment pretty soon.
115 */
116 if (wake_type == RWSEM_WAKE_ANY &&
117 rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
118 /* Someone grabbed the sem for write already */
94 goto out; 119 goto out;
95 120
96 /* grant an infinite number of read locks to the readers at the front 121 /* Grant an infinite number of read locks to the readers at the front
97 * of the queue 122 * of the queue. Note we increment the 'active part' of the count by
98 * - note we increment the 'active part' of the count by the number of 123 * the number of readers before waking any processes up.
99 * readers before waking any processes up
100 */ 124 */
101 readers_only:
102 woken = 0; 125 woken = 0;
103 do { 126 do {
104 woken++; 127 woken++;
@@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
111 134
112 } while (waiter->flags & RWSEM_WAITING_FOR_READ); 135 } while (waiter->flags & RWSEM_WAITING_FOR_READ);
113 136
114 loop = woken; 137 adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
115 woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; 138 if (waiter->flags & RWSEM_WAITING_FOR_READ)
116 if (!downgrading) 139 /* hit end of list above */
117 /* we'd already done one increment earlier */ 140 adjustment -= RWSEM_WAITING_BIAS;
118 woken -= RWSEM_ACTIVE_BIAS;
119 141
120 rwsem_atomic_add(woken, sem); 142 rwsem_atomic_add(adjustment, sem);
121 143
122 next = sem->wait_list.next; 144 next = sem->wait_list.next;
123 for (; loop > 0; loop--) { 145 for (loop = woken; loop > 0; loop--) {
124 waiter = list_entry(next, struct rwsem_waiter, list); 146 waiter = list_entry(next, struct rwsem_waiter, list);
125 next = waiter->list.next; 147 next = waiter->list.next;
126 tsk = waiter->task; 148 tsk = waiter->task;
@@ -136,11 +158,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
136 out: 158 out:
137 return sem; 159 return sem;
138 160
139 /* undo the change to count, but check for a transition 1->0 */ 161 /* undo the change to the active count, but check for a transition
140 undo: 162 * 1->0 */
141 if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) 163 undo_write:
164 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
142 goto out; 165 goto out;
143 goto try_again; 166 goto try_again_write;
144} 167}
145 168
146/* 169/*
@@ -148,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
148 */ 171 */
149static struct rw_semaphore __sched * 172static struct rw_semaphore __sched *
150rwsem_down_failed_common(struct rw_semaphore *sem, 173rwsem_down_failed_common(struct rw_semaphore *sem,
151 struct rwsem_waiter *waiter, signed long adjustment) 174 unsigned int flags, signed long adjustment)
152{ 175{
176 struct rwsem_waiter waiter;
153 struct task_struct *tsk = current; 177 struct task_struct *tsk = current;
154 signed long count; 178 signed long count;
155 179
@@ -157,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
157 181
158 /* set up my own style of waitqueue */ 182 /* set up my own style of waitqueue */
159 spin_lock_irq(&sem->wait_lock); 183 spin_lock_irq(&sem->wait_lock);
160 waiter->task = tsk; 184 waiter.task = tsk;
185 waiter.flags = flags;
161 get_task_struct(tsk); 186 get_task_struct(tsk);
162 187
163 list_add_tail(&waiter->list, &sem->wait_list); 188 if (list_empty(&sem->wait_list))
189 adjustment += RWSEM_WAITING_BIAS;
190 list_add_tail(&waiter.list, &sem->wait_list);
164 191
165 /* we're now waiting on the lock, but no longer actively read-locking */ 192 /* we're now waiting on the lock, but no longer actively locking */
166 count = rwsem_atomic_update(adjustment, sem); 193 count = rwsem_atomic_update(adjustment, sem);
167 194
168 /* if there are no active locks, wake the front queued process(es) up */ 195 /* If there are no active locks, wake the front queued process(es) up.
169 if (!(count & RWSEM_ACTIVE_MASK)) 196 *
170 sem = __rwsem_do_wake(sem, 0); 197 * Alternatively, if we're called from a failed down_write(), there
198 * were already threads queued before us and there are no active
199 * writers, the lock must be read owned; so we try to wake any read
200 * locks that were queued ahead of us. */
201 if (count == RWSEM_WAITING_BIAS)
202 sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
203 else if (count > RWSEM_WAITING_BIAS &&
204 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
205 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
171 206
172 spin_unlock_irq(&sem->wait_lock); 207 spin_unlock_irq(&sem->wait_lock);
173 208
174 /* wait to be given the lock */ 209 /* wait to be given the lock */
175 for (;;) { 210 for (;;) {
176 if (!waiter->task) 211 if (!waiter.task)
177 break; 212 break;
178 schedule(); 213 schedule();
179 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 214 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
@@ -190,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
190asmregparm struct rw_semaphore __sched * 225asmregparm struct rw_semaphore __sched *
191rwsem_down_read_failed(struct rw_semaphore *sem) 226rwsem_down_read_failed(struct rw_semaphore *sem)
192{ 227{
193 struct rwsem_waiter waiter; 228 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
194 229 -RWSEM_ACTIVE_READ_BIAS);
195 waiter.flags = RWSEM_WAITING_FOR_READ;
196 rwsem_down_failed_common(sem, &waiter,
197 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
198 return sem;
199} 230}
200 231
201/* 232/*
@@ -204,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
204asmregparm struct rw_semaphore __sched * 235asmregparm struct rw_semaphore __sched *
205rwsem_down_write_failed(struct rw_semaphore *sem) 236rwsem_down_write_failed(struct rw_semaphore *sem)
206{ 237{
207 struct rwsem_waiter waiter; 238 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
208 239 -RWSEM_ACTIVE_WRITE_BIAS);
209 waiter.flags = RWSEM_WAITING_FOR_WRITE;
210 rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
211
212 return sem;
213} 240}
214 241
215/* 242/*
@@ -224,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
224 251
225 /* do nothing if list empty */ 252 /* do nothing if list empty */
226 if (!list_empty(&sem->wait_list)) 253 if (!list_empty(&sem->wait_list))
227 sem = __rwsem_do_wake(sem, 0); 254 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
228 255
229 spin_unlock_irqrestore(&sem->wait_lock, flags); 256 spin_unlock_irqrestore(&sem->wait_lock, flags);
230 257
@@ -244,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
244 271
245 /* do nothing if list empty */ 272 /* do nothing if list empty */
246 if (!list_empty(&sem->wait_list)) 273 if (!list_empty(&sem->wait_list))
247 sem = __rwsem_do_wake(sem, 1); 274 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
248 275
249 spin_unlock_irqrestore(&sem->wait_lock, flags); 276 spin_unlock_irqrestore(&sem->wait_lock, flags);
250 277
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 0d475d8167bf..4ceb05d772ae 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -7,8 +7,10 @@
7 * Version 2. See the file COPYING for more details. 7 * Version 2. See the file COPYING for more details.
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/slab.h>
10#include <linux/scatterlist.h> 11#include <linux/scatterlist.h>
11#include <linux/highmem.h> 12#include <linux/highmem.h>
13#include <linux/kmemleak.h>
12 14
13/** 15/**
14 * sg_next - return the next scatterlist entry in a list 16 * sg_next - return the next scatterlist entry in a list
@@ -114,17 +116,29 @@ EXPORT_SYMBOL(sg_init_one);
114 */ 116 */
115static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) 117static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
116{ 118{
117 if (nents == SG_MAX_SINGLE_ALLOC) 119 if (nents == SG_MAX_SINGLE_ALLOC) {
118 return (struct scatterlist *) __get_free_page(gfp_mask); 120 /*
119 else 121 * Kmemleak doesn't track page allocations as they are not
122 * commonly used (in a raw form) for kernel data structures.
123 * As we chain together a list of pages and then a normal
124 * kmalloc (tracked by kmemleak), in order to for that last
125 * allocation not to become decoupled (and thus a
126 * false-positive) we need to inform kmemleak of all the
127 * intermediate allocations.
128 */
129 void *ptr = (void *) __get_free_page(gfp_mask);
130 kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
131 return ptr;
132 } else
120 return kmalloc(nents * sizeof(struct scatterlist), gfp_mask); 133 return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
121} 134}
122 135
123static void sg_kfree(struct scatterlist *sg, unsigned int nents) 136static void sg_kfree(struct scatterlist *sg, unsigned int nents)
124{ 137{
125 if (nents == SG_MAX_SINGLE_ALLOC) 138 if (nents == SG_MAX_SINGLE_ALLOC) {
139 kmemleak_free(sg);
126 free_page((unsigned long) sg); 140 free_page((unsigned long) sg);
127 else 141 } else
128 kfree(sg); 142 kfree(sg);
129} 143}
130 144
@@ -234,8 +248,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
234 left -= sg_size; 248 left -= sg_size;
235 249
236 sg = alloc_fn(alloc_size, gfp_mask); 250 sg = alloc_fn(alloc_size, gfp_mask);
237 if (unlikely(!sg)) 251 if (unlikely(!sg)) {
238 return -ENOMEM; 252 /*
253 * Adjust entry count to reflect that the last
254 * entry of the previous table won't be used for
255 * linkage. Without this, sg_kfree() may get
256 * confused.
257 */
258 if (prv)
259 table->nents = ++table->orig_nents;
260
261 return -ENOMEM;
262 }
239 263
240 sg_init_table(sg, alloc_size); 264 sg_init_table(sg, alloc_size);
241 table->nents = table->orig_nents += sg_size; 265 table->nents = table->orig_nents += sg_size;
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 238e72a18ce1..fdc77c82f922 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -15,7 +15,7 @@ void show_mem(void)
15 unsigned long total = 0, reserved = 0, shared = 0, 15 unsigned long total = 0, reserved = 0, shared = 0,
16 nonshared = 0, highmem = 0; 16 nonshared = 0, highmem = 0;
17 17
18 printk(KERN_INFO "Mem-Info:\n"); 18 printk("Mem-Info:\n");
19 show_free_areas(); 19 show_free_areas();
20 20
21 for_each_online_pgdat(pgdat) { 21 for_each_online_pgdat(pgdat) {
@@ -49,15 +49,15 @@ void show_mem(void)
49 pgdat_resize_unlock(pgdat, &flags); 49 pgdat_resize_unlock(pgdat, &flags);
50 } 50 }
51 51
52 printk(KERN_INFO "%lu pages RAM\n", total); 52 printk("%lu pages RAM\n", total);
53#ifdef CONFIG_HIGHMEM 53#ifdef CONFIG_HIGHMEM
54 printk(KERN_INFO "%lu pages HighMem\n", highmem); 54 printk("%lu pages HighMem\n", highmem);
55#endif 55#endif
56 printk(KERN_INFO "%lu pages reserved\n", reserved); 56 printk("%lu pages reserved\n", reserved);
57 printk(KERN_INFO "%lu pages shared\n", shared); 57 printk("%lu pages shared\n", shared);
58 printk(KERN_INFO "%lu pages non-shared\n", nonshared); 58 printk("%lu pages non-shared\n", nonshared);
59#ifdef CONFIG_QUICKLIST 59#ifdef CONFIG_QUICKLIST
60 printk(KERN_INFO "%lu pages in pagetable cache\n", 60 printk("%lu pages in pagetable cache\n",
61 quicklist_total_size()); 61 quicklist_total_size());
62#endif 62#endif
63} 63}
diff --git a/lib/string.c b/lib/string.c
index a1cdcfcc42d0..f71bead1be3e 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -36,25 +36,21 @@ int strnicmp(const char *s1, const char *s2, size_t len)
36 /* Yes, Virginia, it had better be unsigned */ 36 /* Yes, Virginia, it had better be unsigned */
37 unsigned char c1, c2; 37 unsigned char c1, c2;
38 38
39 c1 = c2 = 0; 39 if (!len)
40 if (len) { 40 return 0;
41 do { 41
42 c1 = *s1; 42 do {
43 c2 = *s2; 43 c1 = *s1++;
44 s1++; 44 c2 = *s2++;
45 s2++; 45 if (!c1 || !c2)
46 if (!c1) 46 break;
47 break; 47 if (c1 == c2)
48 if (!c2) 48 continue;
49 break; 49 c1 = tolower(c1);
50 if (c1 == c2) 50 c2 = tolower(c2);
51 continue; 51 if (c1 != c2)
52 c1 = tolower(c1); 52 break;
53 c2 = tolower(c2); 53 } while (--len);
54 if (c1 != c2)
55 break;
56 } while (--len);
57 }
58 return (int)c1 - (int)c2; 54 return (int)c1 - (int)c2;
59} 55}
60EXPORT_SYMBOL(strnicmp); 56EXPORT_SYMBOL(strnicmp);
@@ -693,13 +689,13 @@ EXPORT_SYMBOL(strstr);
693 */ 689 */
694char *strnstr(const char *s1, const char *s2, size_t len) 690char *strnstr(const char *s1, const char *s2, size_t len)
695{ 691{
696 size_t l1 = len, l2; 692 size_t l2;
697 693
698 l2 = strlen(s2); 694 l2 = strlen(s2);
699 if (!l2) 695 if (!l2)
700 return (char *)s1; 696 return (char *)s1;
701 while (l1 >= l2) { 697 while (len >= l2) {
702 l1--; 698 len--;
703 if (!memcmp(s1, s2, l2)) 699 if (!memcmp(s1, s2, l2))
704 return (char *)s1; 700 return (char *)s1;
705 s1++; 701 s1++;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 437eedb5a53b..7c06ee51a29a 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -28,6 +28,7 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/ctype.h> 29#include <linux/ctype.h>
30#include <linux/highmem.h> 30#include <linux/highmem.h>
31#include <linux/gfp.h>
31 32
32#include <asm/io.h> 33#include <asm/io.h>
33#include <asm/dma.h> 34#include <asm/dma.h>
@@ -49,19 +50,11 @@
49 */ 50 */
50#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) 51#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
51 52
52/*
53 * Enumeration for sync targets
54 */
55enum dma_sync_target {
56 SYNC_FOR_CPU = 0,
57 SYNC_FOR_DEVICE = 1,
58};
59
60int swiotlb_force; 53int swiotlb_force;
61 54
62/* 55/*
63 * Used to do a quick range check in unmap_single and 56 * Used to do a quick range check in swiotlb_tbl_unmap_single and
64 * sync_single_*, to see if the memory was in fact allocated by this 57 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
65 * API. 58 * API.
66 */ 59 */
67static char *io_tlb_start, *io_tlb_end; 60static char *io_tlb_start, *io_tlb_end;
@@ -77,7 +70,7 @@ static unsigned long io_tlb_nslabs;
77 */ 70 */
78static unsigned long io_tlb_overflow = 32*1024; 71static unsigned long io_tlb_overflow = 32*1024;
79 72
80void *io_tlb_overflow_buffer; 73static void *io_tlb_overflow_buffer;
81 74
82/* 75/*
83 * This is a free list describing the number of free entries available from 76 * This is a free list describing the number of free entries available from
@@ -139,28 +132,14 @@ void swiotlb_print_info(void)
139 (unsigned long long)pend); 132 (unsigned long long)pend);
140} 133}
141 134
142/* 135void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
143 * Statically reserve bounce buffer space and initialize bounce buffer data
144 * structures for the software IO TLB used to implement the DMA API.
145 */
146void __init
147swiotlb_init_with_default_size(size_t default_size, int verbose)
148{ 136{
149 unsigned long i, bytes; 137 unsigned long i, bytes;
150 138
151 if (!io_tlb_nslabs) { 139 bytes = nslabs << IO_TLB_SHIFT;
152 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
153 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
154 }
155
156 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
157 140
158 /* 141 io_tlb_nslabs = nslabs;
159 * Get IO TLB memory from the low pages 142 io_tlb_start = tlb;
160 */
161 io_tlb_start = alloc_bootmem_low_pages(bytes);
162 if (!io_tlb_start)
163 panic("Cannot allocate SWIOTLB buffer");
164 io_tlb_end = io_tlb_start + bytes; 143 io_tlb_end = io_tlb_start + bytes;
165 144
166 /* 145 /*
@@ -168,22 +147,48 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
168 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 147 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
169 * between io_tlb_start and io_tlb_end. 148 * between io_tlb_start and io_tlb_end.
170 */ 149 */
171 io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); 150 io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
172 for (i = 0; i < io_tlb_nslabs; i++) 151 for (i = 0; i < io_tlb_nslabs; i++)
173 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 152 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
174 io_tlb_index = 0; 153 io_tlb_index = 0;
175 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); 154 io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
176 155
177 /* 156 /*
178 * Get the overflow emergency buffer 157 * Get the overflow emergency buffer
179 */ 158 */
180 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); 159 io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
181 if (!io_tlb_overflow_buffer) 160 if (!io_tlb_overflow_buffer)
182 panic("Cannot allocate SWIOTLB overflow buffer!\n"); 161 panic("Cannot allocate SWIOTLB overflow buffer!\n");
183 if (verbose) 162 if (verbose)
184 swiotlb_print_info(); 163 swiotlb_print_info();
185} 164}
186 165
166/*
167 * Statically reserve bounce buffer space and initialize bounce buffer data
168 * structures for the software IO TLB used to implement the DMA API.
169 */
170void __init
171swiotlb_init_with_default_size(size_t default_size, int verbose)
172{
173 unsigned long bytes;
174
175 if (!io_tlb_nslabs) {
176 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
177 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
178 }
179
180 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
181
182 /*
183 * Get IO TLB memory from the low pages
184 */
185 io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
186 if (!io_tlb_start)
187 panic("Cannot allocate SWIOTLB buffer");
188
189 swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
190}
191
187void __init 192void __init
188swiotlb_init(int verbose) 193swiotlb_init(int verbose)
189{ 194{
@@ -303,13 +308,13 @@ void __init swiotlb_free(void)
303 get_order(io_tlb_nslabs << IO_TLB_SHIFT)); 308 get_order(io_tlb_nslabs << IO_TLB_SHIFT));
304 } else { 309 } else {
305 free_bootmem_late(__pa(io_tlb_overflow_buffer), 310 free_bootmem_late(__pa(io_tlb_overflow_buffer),
306 io_tlb_overflow); 311 PAGE_ALIGN(io_tlb_overflow));
307 free_bootmem_late(__pa(io_tlb_orig_addr), 312 free_bootmem_late(__pa(io_tlb_orig_addr),
308 io_tlb_nslabs * sizeof(phys_addr_t)); 313 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
309 free_bootmem_late(__pa(io_tlb_list), 314 free_bootmem_late(__pa(io_tlb_list),
310 io_tlb_nslabs * sizeof(int)); 315 PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
311 free_bootmem_late(__pa(io_tlb_start), 316 free_bootmem_late(__pa(io_tlb_start),
312 io_tlb_nslabs << IO_TLB_SHIFT); 317 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
313 } 318 }
314} 319}
315 320
@@ -322,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr)
322/* 327/*
323 * Bounce: copy the swiotlb buffer back to the original dma location 328 * Bounce: copy the swiotlb buffer back to the original dma location
324 */ 329 */
325static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, 330void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
326 enum dma_data_direction dir) 331 enum dma_data_direction dir)
327{ 332{
328 unsigned long pfn = PFN_DOWN(phys); 333 unsigned long pfn = PFN_DOWN(phys);
329 334
@@ -359,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
359 memcpy(phys_to_virt(phys), dma_addr, size); 364 memcpy(phys_to_virt(phys), dma_addr, size);
360 } 365 }
361} 366}
367EXPORT_SYMBOL_GPL(swiotlb_bounce);
362 368
363/* 369void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
364 * Allocates bounce buffer and returns its kernel virtual address. 370 phys_addr_t phys, size_t size,
365 */ 371 enum dma_data_direction dir)
366static void *
367map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
368{ 372{
369 unsigned long flags; 373 unsigned long flags;
370 char *dma_addr; 374 char *dma_addr;
371 unsigned int nslots, stride, index, wrap; 375 unsigned int nslots, stride, index, wrap;
372 int i; 376 int i;
373 unsigned long start_dma_addr;
374 unsigned long mask; 377 unsigned long mask;
375 unsigned long offset_slots; 378 unsigned long offset_slots;
376 unsigned long max_slots; 379 unsigned long max_slots;
377 380
378 mask = dma_get_seg_boundary(hwdev); 381 mask = dma_get_seg_boundary(hwdev);
379 start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
380 382
381 offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 383 tbl_dma_addr &= mask;
384
385 offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
382 386
383 /* 387 /*
384 * Carefully handle integer overflow which can occur when mask == ~0UL. 388 * Carefully handle integer overflow which can occur when mask == ~0UL.
@@ -465,12 +469,27 @@ found:
465 469
466 return dma_addr; 470 return dma_addr;
467} 471}
472EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
473
474/*
475 * Allocates bounce buffer and returns its kernel virtual address.
476 */
477
478static void *
479map_single(struct device *hwdev, phys_addr_t phys, size_t size,
480 enum dma_data_direction dir)
481{
482 dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
483
484 return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
485}
468 486
469/* 487/*
470 * dma_addr is the kernel virtual address of the bounce buffer to unmap. 488 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
471 */ 489 */
472static void 490void
473do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) 491swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
492 enum dma_data_direction dir)
474{ 493{
475 unsigned long flags; 494 unsigned long flags;
476 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 495 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -508,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
508 } 527 }
509 spin_unlock_irqrestore(&io_tlb_lock, flags); 528 spin_unlock_irqrestore(&io_tlb_lock, flags);
510} 529}
530EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
511 531
512static void 532void
513sync_single(struct device *hwdev, char *dma_addr, size_t size, 533swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
514 int dir, int target) 534 enum dma_data_direction dir,
535 enum dma_sync_target target)
515{ 536{
516 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 537 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
517 phys_addr_t phys = io_tlb_orig_addr[index]; 538 phys_addr_t phys = io_tlb_orig_addr[index];
@@ -535,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
535 BUG(); 556 BUG();
536 } 557 }
537} 558}
559EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
538 560
539void * 561void *
540swiotlb_alloc_coherent(struct device *hwdev, size_t size, 562swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -558,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
558 } 580 }
559 if (!ret) { 581 if (!ret) {
560 /* 582 /*
561 * We are either out of memory or the device can't DMA 583 * We are either out of memory or the device can't DMA to
562 * to GFP_DMA memory; fall back on map_single(), which 584 * GFP_DMA memory; fall back on map_single(), which
563 * will grab memory from the lowest available address range. 585 * will grab memory from the lowest available address range.
564 */ 586 */
565 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); 587 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
@@ -577,7 +599,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
577 (unsigned long long)dev_addr); 599 (unsigned long long)dev_addr);
578 600
579 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 601 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
580 do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); 602 swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
581 return NULL; 603 return NULL;
582 } 604 }
583 *dma_handle = dev_addr; 605 *dma_handle = dev_addr;
@@ -595,13 +617,14 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
595 if (!is_swiotlb_buffer(paddr)) 617 if (!is_swiotlb_buffer(paddr))
596 free_pages((unsigned long)vaddr, get_order(size)); 618 free_pages((unsigned long)vaddr, get_order(size));
597 else 619 else
598 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 620 /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
599 do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); 621 swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
600} 622}
601EXPORT_SYMBOL(swiotlb_free_coherent); 623EXPORT_SYMBOL(swiotlb_free_coherent);
602 624
603static void 625static void
604swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) 626swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
627 int do_panic)
605{ 628{
606 /* 629 /*
607 * Ran out of IOMMU space for this operation. This is very bad. 630 * Ran out of IOMMU space for this operation. This is very bad.
@@ -679,14 +702,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
679 * whatever the device wrote there. 702 * whatever the device wrote there.
680 */ 703 */
681static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, 704static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
682 size_t size, int dir) 705 size_t size, enum dma_data_direction dir)
683{ 706{
684 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 707 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
685 708
686 BUG_ON(dir == DMA_NONE); 709 BUG_ON(dir == DMA_NONE);
687 710
688 if (is_swiotlb_buffer(paddr)) { 711 if (is_swiotlb_buffer(paddr)) {
689 do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); 712 swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
690 return; 713 return;
691 } 714 }
692 715
@@ -722,14 +745,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
722 */ 745 */
723static void 746static void
724swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, 747swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
725 size_t size, int dir, int target) 748 size_t size, enum dma_data_direction dir,
749 enum dma_sync_target target)
726{ 750{
727 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); 751 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
728 752
729 BUG_ON(dir == DMA_NONE); 753 BUG_ON(dir == DMA_NONE);
730 754
731 if (is_swiotlb_buffer(paddr)) { 755 if (is_swiotlb_buffer(paddr)) {
732 sync_single(hwdev, phys_to_virt(paddr), size, dir, target); 756 swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
757 target);
733 return; 758 return;
734 } 759 }
735 760
@@ -756,37 +781,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
756EXPORT_SYMBOL(swiotlb_sync_single_for_device); 781EXPORT_SYMBOL(swiotlb_sync_single_for_device);
757 782
758/* 783/*
759 * Same as above, but for a sub-range of the mapping.
760 */
761static void
762swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
763 unsigned long offset, size_t size,
764 int dir, int target)
765{
766 swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
767}
768
769void
770swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
771 unsigned long offset, size_t size,
772 enum dma_data_direction dir)
773{
774 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
775 SYNC_FOR_CPU);
776}
777EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
778
779void
780swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
781 unsigned long offset, size_t size,
782 enum dma_data_direction dir)
783{
784 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
785 SYNC_FOR_DEVICE);
786}
787EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
788
789/*
790 * Map a set of buffers described by scatterlist in streaming mode for DMA. 784 * Map a set of buffers described by scatterlist in streaming mode for DMA.
791 * This is the scatter-gather version of the above swiotlb_map_page 785 * This is the scatter-gather version of the above swiotlb_map_page
792 * interface. Here the scatter gather list elements are each tagged with the 786 * interface. Here the scatter gather list elements are each tagged with the
@@ -839,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
839 833
840int 834int
841swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 835swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
842 int dir) 836 enum dma_data_direction dir)
843{ 837{
844 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); 838 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
845} 839}
@@ -866,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
866 860
867void 861void
868swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 862swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
869 int dir) 863 enum dma_data_direction dir)
870{ 864{
871 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); 865 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
872} 866}
@@ -881,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
881 */ 875 */
882static void 876static void
883swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, 877swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
884 int nelems, int dir, int target) 878 int nelems, enum dma_data_direction dir,
879 enum dma_sync_target target)
885{ 880{
886 struct scatterlist *sg; 881 struct scatterlist *sg;
887 int i; 882 int i;
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 9fbcb44c554f..d608331b3e47 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -103,6 +103,7 @@
103#include <linux/rcupdate.h> 103#include <linux/rcupdate.h>
104#include <linux/err.h> 104#include <linux/err.h>
105#include <linux/textsearch.h> 105#include <linux/textsearch.h>
106#include <linux/slab.h>
106 107
107static LIST_HEAD(ts_ops); 108static LIST_HEAD(ts_ops);
108static DEFINE_SPINLOCK(ts_mod_lock); 109static DEFINE_SPINLOCK(ts_mod_lock);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644
index 000000000000..e3a1050e6820
--- /dev/null
+++ b/lib/timerqueue.c
@@ -0,0 +1,107 @@
1/*
2 * Generic Timer-queue
3 *
4 * Manages a simple queue of timers, ordered by expiration time.
5 * Uses rbtrees for quick list adds and expiration.
6 *
7 * NOTE: All of the following functions need to be serialized
8 * to avoid races. No locking is done by this libary code.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/timerqueue.h>
26#include <linux/rbtree.h>
27#include <linux/module.h>
28
29/**
30 * timerqueue_add - Adds timer to timerqueue.
31 *
32 * @head: head of timerqueue
33 * @node: timer node to be added
34 *
35 * Adds the timer node to the timerqueue, sorted by the
36 * node's expires value.
37 */
38void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
39{
40 struct rb_node **p = &head->head.rb_node;
41 struct rb_node *parent = NULL;
42 struct timerqueue_node *ptr;
43
44 /* Make sure we don't add nodes that are already added */
45 WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
46
47 while (*p) {
48 parent = *p;
49 ptr = rb_entry(parent, struct timerqueue_node, node);
50 if (node->expires.tv64 < ptr->expires.tv64)
51 p = &(*p)->rb_left;
52 else
53 p = &(*p)->rb_right;
54 }
55 rb_link_node(&node->node, parent, p);
56 rb_insert_color(&node->node, &head->head);
57
58 if (!head->next || node->expires.tv64 < head->next->expires.tv64)
59 head->next = node;
60}
61EXPORT_SYMBOL_GPL(timerqueue_add);
62
63/**
64 * timerqueue_del - Removes a timer from the timerqueue.
65 *
66 * @head: head of timerqueue
67 * @node: timer node to be removed
68 *
69 * Removes the timer node from the timerqueue.
70 */
71void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
72{
73 WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
74
75 /* update next pointer */
76 if (head->next == node) {
77 struct rb_node *rbn = rb_next(&node->node);
78
79 head->next = rbn ?
80 rb_entry(rbn, struct timerqueue_node, node) : NULL;
81 }
82 rb_erase(&node->node, &head->head);
83 RB_CLEAR_NODE(&node->node);
84}
85EXPORT_SYMBOL_GPL(timerqueue_del);
86
87/**
88 * timerqueue_iterate_next - Returns the timer after the provided timer
89 *
90 * @node: Pointer to a timer.
91 *
92 * Provides the timer that is after the given node. This is used, when
93 * necessary, to iterate through the list of timers in a timer list
94 * without modifying the list.
95 */
96struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
97{
98 struct rb_node *next;
99
100 if (!node)
101 return NULL;
102 next = rb_next(&node->node);
103 if (!next)
104 return NULL;
105 return container_of(next, struct timerqueue_node, node);
106}
107EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 000000000000..8fadd7cef46c
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,53 @@
1/*
2 * Unified UUID/GUID definition
3 *
4 * Copyright (C) 2009, Intel Corp.
5 * Huang Ying <ying.huang@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation;
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/uuid.h>
24#include <linux/random.h>
25
26static void __uuid_gen_common(__u8 b[16])
27{
28 int i;
29 u32 r;
30
31 for (i = 0; i < 4; i++) {
32 r = random32();
33 memcpy(b + i * 4, &r, 4);
34 }
35 /* reversion 0b10 */
36 b[8] = (b[8] & 0x3F) | 0x80;
37}
38
39void uuid_le_gen(uuid_le *lu)
40{
41 __uuid_gen_common(lu->b);
42 /* version 4 : random generation */
43 lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
44}
45EXPORT_SYMBOL_GPL(uuid_le_gen);
46
47void uuid_be_gen(uuid_be *bu)
48{
49 __uuid_gen_common(bu->b);
50 /* version 4 : random generation */
51 bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
52}
53EXPORT_SYMBOL_GPL(uuid_be_gen);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3b8aeec4e327..c150d3dafff4 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -118,6 +118,7 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
118 118
119 return simple_strtoull(cp, endp, base); 119 return simple_strtoull(cp, endp, base);
120} 120}
121EXPORT_SYMBOL(simple_strtoll);
121 122
122/** 123/**
123 * strict_strtoul - convert a string to an unsigned long strictly 124 * strict_strtoul - convert a string to an unsigned long strictly
@@ -145,19 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
145{ 146{
146 char *tail; 147 char *tail;
147 unsigned long val; 148 unsigned long val;
148 size_t len;
149 149
150 *res = 0; 150 *res = 0;
151 len = strlen(cp); 151 if (!*cp)
152 if (len == 0)
153 return -EINVAL; 152 return -EINVAL;
154 153
155 val = simple_strtoul(cp, &tail, base); 154 val = simple_strtoul(cp, &tail, base);
156 if (tail == cp) 155 if (tail == cp)
157 return -EINVAL; 156 return -EINVAL;
158 157
159 if ((*tail == '\0') || 158 if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
160 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
161 *res = val; 159 *res = val;
162 return 0; 160 return 0;
163 } 161 }
@@ -219,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
219{ 217{
220 char *tail; 218 char *tail;
221 unsigned long long val; 219 unsigned long long val;
222 size_t len;
223 220
224 *res = 0; 221 *res = 0;
225 len = strlen(cp); 222 if (!*cp)
226 if (len == 0)
227 return -EINVAL; 223 return -EINVAL;
228 224
229 val = simple_strtoull(cp, &tail, base); 225 val = simple_strtoull(cp, &tail, base);
230 if (tail == cp) 226 if (tail == cp)
231 return -EINVAL; 227 return -EINVAL;
232 if ((*tail == '\0') || 228 if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
233 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
234 *res = val; 229 *res = val;
235 return 0; 230 return 0;
236 } 231 }
@@ -266,7 +261,8 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res)
266} 261}
267EXPORT_SYMBOL(strict_strtoll); 262EXPORT_SYMBOL(strict_strtoll);
268 263
269static int skip_atoi(const char **s) 264static noinline_for_stack
265int skip_atoi(const char **s)
270{ 266{
271 int i = 0; 267 int i = 0;
272 268
@@ -286,7 +282,8 @@ static int skip_atoi(const char **s)
286/* Formats correctly any integer in [0,99999]. 282/* Formats correctly any integer in [0,99999].
287 * Outputs from one to five digits depending on input. 283 * Outputs from one to five digits depending on input.
288 * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ 284 * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
289static char *put_dec_trunc(char *buf, unsigned q) 285static noinline_for_stack
286char *put_dec_trunc(char *buf, unsigned q)
290{ 287{
291 unsigned d3, d2, d1, d0; 288 unsigned d3, d2, d1, d0;
292 d1 = (q>>4) & 0xf; 289 d1 = (q>>4) & 0xf;
@@ -323,7 +320,8 @@ static char *put_dec_trunc(char *buf, unsigned q)
323 return buf; 320 return buf;
324} 321}
325/* Same with if's removed. Always emits five digits */ 322/* Same with if's removed. Always emits five digits */
326static char *put_dec_full(char *buf, unsigned q) 323static noinline_for_stack
324char *put_dec_full(char *buf, unsigned q)
327{ 325{
328 /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ 326 /* BTW, if q is in [0,9999], 8-bit ints will be enough, */
329 /* but anyway, gcc produces better code with full-sized ints */ 327 /* but anyway, gcc produces better code with full-sized ints */
@@ -365,7 +363,8 @@ static char *put_dec_full(char *buf, unsigned q)
365 return buf; 363 return buf;
366} 364}
367/* No inlining helps gcc to use registers better */ 365/* No inlining helps gcc to use registers better */
368static noinline char *put_dec(char *buf, unsigned long long num) 366static noinline_for_stack
367char *put_dec(char *buf, unsigned long long num)
369{ 368{
370 while (1) { 369 while (1) {
371 unsigned rem; 370 unsigned rem;
@@ -381,8 +380,8 @@ static noinline char *put_dec(char *buf, unsigned long long num)
381#define PLUS 4 /* show plus */ 380#define PLUS 4 /* show plus */
382#define SPACE 8 /* space if plus */ 381#define SPACE 8 /* space if plus */
383#define LEFT 16 /* left justified */ 382#define LEFT 16 /* left justified */
384#define SMALL 32 /* Must be 32 == 0x20 */ 383#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */
385#define SPECIAL 64 /* 0x */ 384#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */
386 385
387enum format_type { 386enum format_type {
388 FORMAT_TYPE_NONE, /* Just a string part */ 387 FORMAT_TYPE_NONE, /* Just a string part */
@@ -408,16 +407,17 @@ enum format_type {
408}; 407};
409 408
410struct printf_spec { 409struct printf_spec {
411 enum format_type type; 410 u8 type; /* format_type enum */
412 int flags; /* flags to number() */ 411 u8 flags; /* flags to number() */
413 int field_width; /* width of output field */ 412 u8 base; /* number base, 8, 10 or 16 only */
414 int base; 413 u8 qualifier; /* number qualifier, one of 'hHlLtzZ' */
415 int precision; /* # of digits/chars */ 414 s16 field_width; /* width of output field */
416 int qualifier; 415 s16 precision; /* # of digits/chars */
417}; 416};
418 417
419static char *number(char *buf, char *end, unsigned long long num, 418static noinline_for_stack
420 struct printf_spec spec) 419char *number(char *buf, char *end, unsigned long long num,
420 struct printf_spec spec)
421{ 421{
422 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ 422 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
423 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ 423 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -536,7 +536,8 @@ static char *number(char *buf, char *end, unsigned long long num,
536 return buf; 536 return buf;
537} 537}
538 538
539static char *string(char *buf, char *end, const char *s, struct printf_spec spec) 539static noinline_for_stack
540char *string(char *buf, char *end, const char *s, struct printf_spec spec)
540{ 541{
541 int len, i; 542 int len, i;
542 543
@@ -566,8 +567,9 @@ static char *string(char *buf, char *end, const char *s, struct printf_spec spec
566 return buf; 567 return buf;
567} 568}
568 569
569static char *symbol_string(char *buf, char *end, void *ptr, 570static noinline_for_stack
570 struct printf_spec spec, char ext) 571char *symbol_string(char *buf, char *end, void *ptr,
572 struct printf_spec spec, char ext)
571{ 573{
572 unsigned long value = (unsigned long) ptr; 574 unsigned long value = (unsigned long) ptr;
573#ifdef CONFIG_KALLSYMS 575#ifdef CONFIG_KALLSYMS
@@ -587,8 +589,9 @@ static char *symbol_string(char *buf, char *end, void *ptr,
587#endif 589#endif
588} 590}
589 591
590static char *resource_string(char *buf, char *end, struct resource *res, 592static noinline_for_stack
591 struct printf_spec spec, const char *fmt) 593char *resource_string(char *buf, char *end, struct resource *res,
594 struct printf_spec spec, const char *fmt)
592{ 595{
593#ifndef IO_RSRC_PRINTK_SIZE 596#ifndef IO_RSRC_PRINTK_SIZE
594#define IO_RSRC_PRINTK_SIZE 6 597#define IO_RSRC_PRINTK_SIZE 6
@@ -597,22 +600,35 @@ static char *resource_string(char *buf, char *end, struct resource *res,
597#ifndef MEM_RSRC_PRINTK_SIZE 600#ifndef MEM_RSRC_PRINTK_SIZE
598#define MEM_RSRC_PRINTK_SIZE 10 601#define MEM_RSRC_PRINTK_SIZE 10
599#endif 602#endif
600 struct printf_spec hex_spec = { 603 static const struct printf_spec io_spec = {
601 .base = 16, 604 .base = 16,
605 .field_width = IO_RSRC_PRINTK_SIZE,
602 .precision = -1, 606 .precision = -1,
603 .flags = SPECIAL | SMALL | ZEROPAD, 607 .flags = SPECIAL | SMALL | ZEROPAD,
604 }; 608 };
605 struct printf_spec dec_spec = { 609 static const struct printf_spec mem_spec = {
610 .base = 16,
611 .field_width = MEM_RSRC_PRINTK_SIZE,
612 .precision = -1,
613 .flags = SPECIAL | SMALL | ZEROPAD,
614 };
615 static const struct printf_spec bus_spec = {
616 .base = 16,
617 .field_width = 2,
618 .precision = -1,
619 .flags = SMALL | ZEROPAD,
620 };
621 static const struct printf_spec dec_spec = {
606 .base = 10, 622 .base = 10,
607 .precision = -1, 623 .precision = -1,
608 .flags = 0, 624 .flags = 0,
609 }; 625 };
610 struct printf_spec str_spec = { 626 static const struct printf_spec str_spec = {
611 .field_width = -1, 627 .field_width = -1,
612 .precision = 10, 628 .precision = 10,
613 .flags = LEFT, 629 .flags = LEFT,
614 }; 630 };
615 struct printf_spec flag_spec = { 631 static const struct printf_spec flag_spec = {
616 .base = 16, 632 .base = 16,
617 .precision = -1, 633 .precision = -1,
618 .flags = SPECIAL | SMALL, 634 .flags = SPECIAL | SMALL,
@@ -622,47 +638,48 @@ static char *resource_string(char *buf, char *end, struct resource *res,
622 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */ 638 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
623#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4) 639#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4)
624#define FLAG_BUF_SIZE (2 * sizeof(res->flags)) 640#define FLAG_BUF_SIZE (2 * sizeof(res->flags))
625#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref disabled]") 641#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]")
626#define RAW_BUF_SIZE sizeof("[mem - flags 0x]") 642#define RAW_BUF_SIZE sizeof("[mem - flags 0x]")
627 char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, 643 char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
628 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; 644 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
629 645
630 char *p = sym, *pend = sym + sizeof(sym); 646 char *p = sym, *pend = sym + sizeof(sym);
631 int size = -1, addr = 0;
632 int decode = (fmt[0] == 'R') ? 1 : 0; 647 int decode = (fmt[0] == 'R') ? 1 : 0;
633 648 const struct printf_spec *specp;
634 if (res->flags & IORESOURCE_IO) {
635 size = IO_RSRC_PRINTK_SIZE;
636 addr = 1;
637 } else if (res->flags & IORESOURCE_MEM) {
638 size = MEM_RSRC_PRINTK_SIZE;
639 addr = 1;
640 }
641 649
642 *p++ = '['; 650 *p++ = '[';
643 if (res->flags & IORESOURCE_IO) 651 if (res->flags & IORESOURCE_IO) {
644 p = string(p, pend, "io ", str_spec); 652 p = string(p, pend, "io ", str_spec);
645 else if (res->flags & IORESOURCE_MEM) 653 specp = &io_spec;
654 } else if (res->flags & IORESOURCE_MEM) {
646 p = string(p, pend, "mem ", str_spec); 655 p = string(p, pend, "mem ", str_spec);
647 else if (res->flags & IORESOURCE_IRQ) 656 specp = &mem_spec;
657 } else if (res->flags & IORESOURCE_IRQ) {
648 p = string(p, pend, "irq ", str_spec); 658 p = string(p, pend, "irq ", str_spec);
649 else if (res->flags & IORESOURCE_DMA) 659 specp = &dec_spec;
660 } else if (res->flags & IORESOURCE_DMA) {
650 p = string(p, pend, "dma ", str_spec); 661 p = string(p, pend, "dma ", str_spec);
651 else { 662 specp = &dec_spec;
663 } else if (res->flags & IORESOURCE_BUS) {
664 p = string(p, pend, "bus ", str_spec);
665 specp = &bus_spec;
666 } else {
652 p = string(p, pend, "??? ", str_spec); 667 p = string(p, pend, "??? ", str_spec);
668 specp = &mem_spec;
653 decode = 0; 669 decode = 0;
654 } 670 }
655 hex_spec.field_width = size; 671 p = number(p, pend, res->start, *specp);
656 p = number(p, pend, res->start, addr ? hex_spec : dec_spec);
657 if (res->start != res->end) { 672 if (res->start != res->end) {
658 *p++ = '-'; 673 *p++ = '-';
659 p = number(p, pend, res->end, addr ? hex_spec : dec_spec); 674 p = number(p, pend, res->end, *specp);
660 } 675 }
661 if (decode) { 676 if (decode) {
662 if (res->flags & IORESOURCE_MEM_64) 677 if (res->flags & IORESOURCE_MEM_64)
663 p = string(p, pend, " 64bit", str_spec); 678 p = string(p, pend, " 64bit", str_spec);
664 if (res->flags & IORESOURCE_PREFETCH) 679 if (res->flags & IORESOURCE_PREFETCH)
665 p = string(p, pend, " pref", str_spec); 680 p = string(p, pend, " pref", str_spec);
681 if (res->flags & IORESOURCE_WINDOW)
682 p = string(p, pend, " window", str_spec);
666 if (res->flags & IORESOURCE_DISABLED) 683 if (res->flags & IORESOURCE_DISABLED)
667 p = string(p, pend, " disabled", str_spec); 684 p = string(p, pend, " disabled", str_spec);
668 } else { 685 } else {
@@ -675,30 +692,63 @@ static char *resource_string(char *buf, char *end, struct resource *res,
675 return string(buf, end, sym, spec); 692 return string(buf, end, sym, spec);
676} 693}
677 694
678static char *mac_address_string(char *buf, char *end, u8 *addr, 695static noinline_for_stack
679 struct printf_spec spec, const char *fmt) 696char *mac_address_string(char *buf, char *end, u8 *addr,
697 struct printf_spec spec, const char *fmt)
680{ 698{
681 char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")]; 699 char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
682 char *p = mac_addr; 700 char *p = mac_addr;
683 int i; 701 int i;
702 char separator;
703
704 if (fmt[1] == 'F') { /* FDDI canonical format */
705 separator = '-';
706 } else {
707 separator = ':';
708 }
684 709
685 for (i = 0; i < 6; i++) { 710 for (i = 0; i < 6; i++) {
686 p = pack_hex_byte(p, addr[i]); 711 p = pack_hex_byte(p, addr[i]);
687 if (fmt[0] == 'M' && i != 5) 712 if (fmt[0] == 'M' && i != 5)
688 *p++ = ':'; 713 *p++ = separator;
689 } 714 }
690 *p = '\0'; 715 *p = '\0';
691 716
692 return string(buf, end, mac_addr, spec); 717 return string(buf, end, mac_addr, spec);
693} 718}
694 719
695static char *ip4_string(char *p, const u8 *addr, bool leading_zeros) 720static noinline_for_stack
721char *ip4_string(char *p, const u8 *addr, const char *fmt)
696{ 722{
697 int i; 723 int i;
698 724 bool leading_zeros = (fmt[0] == 'i');
725 int index;
726 int step;
727
728 switch (fmt[2]) {
729 case 'h':
730#ifdef __BIG_ENDIAN
731 index = 0;
732 step = 1;
733#else
734 index = 3;
735 step = -1;
736#endif
737 break;
738 case 'l':
739 index = 3;
740 step = -1;
741 break;
742 case 'n':
743 case 'b':
744 default:
745 index = 0;
746 step = 1;
747 break;
748 }
699 for (i = 0; i < 4; i++) { 749 for (i = 0; i < 4; i++) {
700 char temp[3]; /* hold each IP quad in reverse order */ 750 char temp[3]; /* hold each IP quad in reverse order */
701 int digits = put_dec_trunc(temp, addr[i]) - temp; 751 int digits = put_dec_trunc(temp, addr[index]) - temp;
702 if (leading_zeros) { 752 if (leading_zeros) {
703 if (digits < 3) 753 if (digits < 3)
704 *p++ = '0'; 754 *p++ = '0';
@@ -710,13 +760,15 @@ static char *ip4_string(char *p, const u8 *addr, bool leading_zeros)
710 *p++ = temp[digits]; 760 *p++ = temp[digits];
711 if (i < 3) 761 if (i < 3)
712 *p++ = '.'; 762 *p++ = '.';
763 index += step;
713 } 764 }
714 *p = '\0'; 765 *p = '\0';
715 766
716 return p; 767 return p;
717} 768}
718 769
719static char *ip6_compressed_string(char *p, const char *addr) 770static noinline_for_stack
771char *ip6_compressed_string(char *p, const char *addr)
720{ 772{
721 int i, j, range; 773 int i, j, range;
722 unsigned char zerolength[8]; 774 unsigned char zerolength[8];
@@ -789,14 +841,15 @@ static char *ip6_compressed_string(char *p, const char *addr)
789 if (useIPv4) { 841 if (useIPv4) {
790 if (needcolon) 842 if (needcolon)
791 *p++ = ':'; 843 *p++ = ':';
792 p = ip4_string(p, &in6.s6_addr[12], false); 844 p = ip4_string(p, &in6.s6_addr[12], "I4");
793 } 845 }
794 *p = '\0'; 846 *p = '\0';
795 847
796 return p; 848 return p;
797} 849}
798 850
799static char *ip6_string(char *p, const char *addr, const char *fmt) 851static noinline_for_stack
852char *ip6_string(char *p, const char *addr, const char *fmt)
800{ 853{
801 int i; 854 int i;
802 855
@@ -811,8 +864,9 @@ static char *ip6_string(char *p, const char *addr, const char *fmt)
811 return p; 864 return p;
812} 865}
813 866
814static char *ip6_addr_string(char *buf, char *end, const u8 *addr, 867static noinline_for_stack
815 struct printf_spec spec, const char *fmt) 868char *ip6_addr_string(char *buf, char *end, const u8 *addr,
869 struct printf_spec spec, const char *fmt)
816{ 870{
817 char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")]; 871 char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
818 872
@@ -824,18 +878,20 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr,
824 return string(buf, end, ip6_addr, spec); 878 return string(buf, end, ip6_addr, spec);
825} 879}
826 880
827static char *ip4_addr_string(char *buf, char *end, const u8 *addr, 881static noinline_for_stack
828 struct printf_spec spec, const char *fmt) 882char *ip4_addr_string(char *buf, char *end, const u8 *addr,
883 struct printf_spec spec, const char *fmt)
829{ 884{
830 char ip4_addr[sizeof("255.255.255.255")]; 885 char ip4_addr[sizeof("255.255.255.255")];
831 886
832 ip4_string(ip4_addr, addr, fmt[0] == 'i'); 887 ip4_string(ip4_addr, addr, fmt);
833 888
834 return string(buf, end, ip4_addr, spec); 889 return string(buf, end, ip4_addr, spec);
835} 890}
836 891
837static char *uuid_string(char *buf, char *end, const u8 *addr, 892static noinline_for_stack
838 struct printf_spec spec, const char *fmt) 893char *uuid_string(char *buf, char *end, const u8 *addr,
894 struct printf_spec spec, const char *fmt)
839{ 895{
840 char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; 896 char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
841 char *p = uuid; 897 char *p = uuid;
@@ -896,12 +952,15 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
896 * - 'M' For a 6-byte MAC address, it prints the address in the 952 * - 'M' For a 6-byte MAC address, it prints the address in the
897 * usual colon-separated hex notation 953 * usual colon-separated hex notation
898 * - 'm' For a 6-byte MAC address, it prints the hex address without colons 954 * - 'm' For a 6-byte MAC address, it prints the hex address without colons
955 * - 'MF' For a 6-byte MAC FDDI address, it prints the address
956 * with a dash-separated hex notation
899 * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way 957 * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
900 * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4) 958 * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
901 * IPv6 uses colon separated network-order 16 bit hex with leading 0's 959 * IPv6 uses colon separated network-order 16 bit hex with leading 0's
902 * - 'i' [46] for 'raw' IPv4/IPv6 addresses 960 * - 'i' [46] for 'raw' IPv4/IPv6 addresses
903 * IPv6 omits the colons (01020304...0f) 961 * IPv6 omits the colons (01020304...0f)
904 * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006) 962 * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
963 * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order
905 * - 'I6c' for IPv6 addresses printed as specified by 964 * - 'I6c' for IPv6 addresses printed as specified by
906 * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00 965 * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
907 * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form 966 * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
@@ -915,16 +974,29 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
915 * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] 974 * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
916 * little endian output byte order is: 975 * little endian output byte order is:
917 * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] 976 * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
977 * - 'V' For a struct va_format which contains a format string * and va_list *,
978 * call vsnprintf(->format, *->va_list).
979 * Implements a "recursive vsnprintf".
980 * Do not use this feature without some mechanism to verify the
981 * correctness of the format string and va_list arguments.
918 * 982 *
919 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 983 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
920 * function pointers are really function descriptors, which contain a 984 * function pointers are really function descriptors, which contain a
921 * pointer to the real address. 985 * pointer to the real address.
922 */ 986 */
923static char *pointer(const char *fmt, char *buf, char *end, void *ptr, 987static noinline_for_stack
924 struct printf_spec spec) 988char *pointer(const char *fmt, char *buf, char *end, void *ptr,
989 struct printf_spec spec)
925{ 990{
926 if (!ptr) 991 if (!ptr) {
992 /*
993 * Print (null) with the same width as a pointer so it makes
994 * tabular output look nice.
995 */
996 if (spec.field_width == -1)
997 spec.field_width = 2 * sizeof(void *);
927 return string(buf, end, "(null)", spec); 998 return string(buf, end, "(null)", spec);
999 }
928 1000
929 switch (*fmt) { 1001 switch (*fmt) {
930 case 'F': 1002 case 'F':
@@ -939,6 +1011,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
939 return resource_string(buf, end, ptr, spec, fmt); 1011 return resource_string(buf, end, ptr, spec, fmt);
940 case 'M': /* Colon separated: 00:01:02:03:04:05 */ 1012 case 'M': /* Colon separated: 00:01:02:03:04:05 */
941 case 'm': /* Contiguous: 000102030405 */ 1013 case 'm': /* Contiguous: 000102030405 */
1014 /* [mM]F (FDDI, bit reversed) */
942 return mac_address_string(buf, end, ptr, spec, fmt); 1015 return mac_address_string(buf, end, ptr, spec, fmt);
943 case 'I': /* Formatted IP supported 1016 case 'I': /* Formatted IP supported
944 * 4: 1.2.3.4 1017 * 4: 1.2.3.4
@@ -958,10 +1031,14 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
958 break; 1031 break;
959 case 'U': 1032 case 'U':
960 return uuid_string(buf, end, ptr, spec, fmt); 1033 return uuid_string(buf, end, ptr, spec, fmt);
1034 case 'V':
1035 return buf + vsnprintf(buf, end - buf,
1036 ((struct va_format *)ptr)->fmt,
1037 *(((struct va_format *)ptr)->va));
961 } 1038 }
962 spec.flags |= SMALL; 1039 spec.flags |= SMALL;
963 if (spec.field_width == -1) { 1040 if (spec.field_width == -1) {
964 spec.field_width = 2*sizeof(void *); 1041 spec.field_width = 2 * sizeof(void *);
965 spec.flags |= ZEROPAD; 1042 spec.flags |= ZEROPAD;
966 } 1043 }
967 spec.base = 16; 1044 spec.base = 16;
@@ -989,7 +1066,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
989 * @precision: precision of a number 1066 * @precision: precision of a number
990 * @qualifier: qualifier of a number (long, size_t, ...) 1067 * @qualifier: qualifier of a number (long, size_t, ...)
991 */ 1068 */
992static int format_decode(const char *fmt, struct printf_spec *spec) 1069static noinline_for_stack
1070int format_decode(const char *fmt, struct printf_spec *spec)
993{ 1071{
994 const char *start = fmt; 1072 const char *start = fmt;
995 1073
@@ -1297,7 +1375,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1297 break; 1375 break;
1298 1376
1299 case FORMAT_TYPE_NRCHARS: { 1377 case FORMAT_TYPE_NRCHARS: {
1300 int qualifier = spec.qualifier; 1378 u8 qualifier = spec.qualifier;
1301 1379
1302 if (qualifier == 'l') { 1380 if (qualifier == 'l') {
1303 long *ip = va_arg(args, long *); 1381 long *ip = va_arg(args, long *);
@@ -1426,7 +1504,7 @@ EXPORT_SYMBOL(snprintf);
1426 * @...: Arguments for the format string 1504 * @...: Arguments for the format string
1427 * 1505 *
1428 * The return value is the number of characters written into @buf not including 1506 * The return value is the number of characters written into @buf not including
1429 * the trailing '\0'. If @size is <= 0 the function returns 0. 1507 * the trailing '\0'. If @size is == 0 the function returns 0.
1430 */ 1508 */
1431 1509
1432int scnprintf(char *buf, size_t size, const char *fmt, ...) 1510int scnprintf(char *buf, size_t size, const char *fmt, ...)
@@ -1438,7 +1516,11 @@ int scnprintf(char *buf, size_t size, const char *fmt, ...)
1438 i = vsnprintf(buf, size, fmt, args); 1516 i = vsnprintf(buf, size, fmt, args);
1439 va_end(args); 1517 va_end(args);
1440 1518
1441 return (i >= size) ? (size - 1) : i; 1519 if (likely(i < size))
1520 return i;
1521 if (size != 0)
1522 return size - 1;
1523 return 0;
1442} 1524}
1443EXPORT_SYMBOL(scnprintf); 1525EXPORT_SYMBOL(scnprintf);
1444 1526
@@ -1583,7 +1665,7 @@ do { \
1583 1665
1584 case FORMAT_TYPE_NRCHARS: { 1666 case FORMAT_TYPE_NRCHARS: {
1585 /* skip %n 's argument */ 1667 /* skip %n 's argument */
1586 int qualifier = spec.qualifier; 1668 u8 qualifier = spec.qualifier;
1587 void *skip_arg; 1669 void *skip_arg;
1588 if (qualifier == 'l') 1670 if (qualifier == 'l')
1589 skip_arg = va_arg(args, long *); 1671 skip_arg = va_arg(args, long *);
@@ -1849,7 +1931,9 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
1849 char *next; 1931 char *next;
1850 char digit; 1932 char digit;
1851 int num = 0; 1933 int num = 0;
1852 int qualifier, base, field_width; 1934 u8 qualifier;
1935 u8 base;
1936 s16 field_width;
1853 bool is_sign; 1937 bool is_sign;
1854 1938
1855 while (*fmt && *str) { 1939 while (*fmt && *str) {
@@ -1927,7 +2011,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
1927 { 2011 {
1928 char *s = (char *)va_arg(args, char *); 2012 char *s = (char *)va_arg(args, char *);
1929 if (field_width == -1) 2013 if (field_width == -1)
1930 field_width = INT_MAX; 2014 field_width = SHRT_MAX;
1931 /* first, skip leading white space in buffer */ 2015 /* first, skip leading white space in buffer */
1932 str = skip_spaces(str); 2016 str = skip_spaces(str);
1933 2017
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 215447c55261..2c13ecc5bb2c 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -8,21 +8,6 @@
8#include "inflate.h" 8#include "inflate.h"
9#include "inffast.h" 9#include "inffast.h"
10 10
11/* Only do the unaligned "Faster" variant when
12 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set
13 *
14 * On powerpc, it won't be as we don't include autoconf.h
15 * automatically for the boot wrapper, which is intended as
16 * we run in an environment where we may not be able to deal
17 * with (even rare) alignment faults. In addition, we do not
18 * define __KERNEL__ for arch/powerpc/boot unlike x86
19 */
20
21#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
22#include <asm/unaligned.h>
23#include <asm/byteorder.h>
24#endif
25
26#ifndef ASMINF 11#ifndef ASMINF
27 12
28/* Allow machine dependent optimization for post-increment or pre-increment. 13/* Allow machine dependent optimization for post-increment or pre-increment.
@@ -36,14 +21,31 @@
36 - Pentium III (Anderson) 21 - Pentium III (Anderson)
37 - M68060 (Nikl) 22 - M68060 (Nikl)
38 */ 23 */
24union uu {
25 unsigned short us;
26 unsigned char b[2];
27};
28
29/* Endian independed version */
30static inline unsigned short
31get_unaligned16(const unsigned short *p)
32{
33 union uu mm;
34 unsigned char *b = (unsigned char *)p;
35
36 mm.b[0] = b[0];
37 mm.b[1] = b[1];
38 return mm.us;
39}
40
39#ifdef POSTINC 41#ifdef POSTINC
40# define OFF 0 42# define OFF 0
41# define PUP(a) *(a)++ 43# define PUP(a) *(a)++
42# define UP_UNALIGNED(a) get_unaligned((a)++) 44# define UP_UNALIGNED(a) get_unaligned16((a)++)
43#else 45#else
44# define OFF 1 46# define OFF 1
45# define PUP(a) *++(a) 47# define PUP(a) *++(a)
46# define UP_UNALIGNED(a) get_unaligned(++(a)) 48# define UP_UNALIGNED(a) get_unaligned16(++(a))
47#endif 49#endif
48 50
49/* 51/*
@@ -256,7 +258,6 @@ void inflate_fast(z_streamp strm, unsigned start)
256 } 258 }
257 } 259 }
258 else { 260 else {
259#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
260 unsigned short *sout; 261 unsigned short *sout;
261 unsigned long loops; 262 unsigned long loops;
262 263
@@ -274,22 +275,25 @@ void inflate_fast(z_streamp strm, unsigned start)
274 sfrom = (unsigned short *)(from - OFF); 275 sfrom = (unsigned short *)(from - OFF);
275 loops = len >> 1; 276 loops = len >> 1;
276 do 277 do
278#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
279 PUP(sout) = PUP(sfrom);
280#else
277 PUP(sout) = UP_UNALIGNED(sfrom); 281 PUP(sout) = UP_UNALIGNED(sfrom);
282#endif
278 while (--loops); 283 while (--loops);
279 out = (unsigned char *)sout + OFF; 284 out = (unsigned char *)sout + OFF;
280 from = (unsigned char *)sfrom + OFF; 285 from = (unsigned char *)sfrom + OFF;
281 } else { /* dist == 1 or dist == 2 */ 286 } else { /* dist == 1 or dist == 2 */
282 unsigned short pat16; 287 unsigned short pat16;
283 288
284 pat16 = *(sout-2+2*OFF); 289 pat16 = *(sout-1+OFF);
285 if (dist == 1) 290 if (dist == 1) {
286#if defined(__BIG_ENDIAN) 291 union uu mm;
287 pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8); 292 /* copy one char pattern to both bytes */
288#elif defined(__LITTLE_ENDIAN) 293 mm.us = pat16;
289 pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8); 294 mm.b[0] = mm.b[1];
290#else 295 pat16 = mm.us;
291#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined 296 }
292#endif
293 loops = len >> 1; 297 loops = len >> 1;
294 do 298 do
295 PUP(sout) = pat16; 299 PUP(sout) = pat16;
@@ -298,20 +302,6 @@ void inflate_fast(z_streamp strm, unsigned start)
298 } 302 }
299 if (len & 1) 303 if (len & 1)
300 PUP(out) = PUP(from); 304 PUP(out) = PUP(from);
301#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
302 from = out - dist; /* copy direct from output */
303 do { /* minimum length is three */
304 PUP(out) = PUP(from);
305 PUP(out) = PUP(from);
306 PUP(out) = PUP(from);
307 len -= 3;
308 } while (len > 2);
309 if (len) {
310 PUP(out) = PUP(from);
311 if (len > 1)
312 PUP(out) = PUP(from);
313 }
314#endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
315 } 305 }
316 } 306 }
317 else if ((op & 64) == 0) { /* 2nd level distance code */ 307 else if ((op & 64) == 0) { /* 2nd level distance code */