aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2011-01-13 01:06:28 -0500
committerPaul Mundt <lethal@linux-sh.org>2011-01-13 01:06:28 -0500
commitf43dc23d5ea91fca257be02138a255f02d98e806 (patch)
treeb29722f6e965316e90ac97abf79923ced250dc21 /lib
parentf8e53553f452dcbf67cb89c8cba63a1cd6eb4cc0 (diff)
parent4162cf64973df51fc885825bc9ca4d055891c49f (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into common/serial-rework
Conflicts: arch/sh/kernel/cpu/sh2/setup-sh7619.c arch/sh/kernel/cpu/sh2a/setup-mxg.c arch/sh/kernel/cpu/sh2a/setup-sh7201.c arch/sh/kernel/cpu/sh2a/setup-sh7203.c arch/sh/kernel/cpu/sh2a/setup-sh7206.c arch/sh/kernel/cpu/sh3/setup-sh7705.c arch/sh/kernel/cpu/sh3/setup-sh770x.c arch/sh/kernel/cpu/sh3/setup-sh7710.c arch/sh/kernel/cpu/sh3/setup-sh7720.c arch/sh/kernel/cpu/sh4/setup-sh4-202.c arch/sh/kernel/cpu/sh4/setup-sh7750.c arch/sh/kernel/cpu/sh4/setup-sh7760.c arch/sh/kernel/cpu/sh4a/setup-sh7343.c arch/sh/kernel/cpu/sh4a/setup-sh7366.c arch/sh/kernel/cpu/sh4a/setup-sh7722.c arch/sh/kernel/cpu/sh4a/setup-sh7723.c arch/sh/kernel/cpu/sh4a/setup-sh7724.c arch/sh/kernel/cpu/sh4a/setup-sh7763.c arch/sh/kernel/cpu/sh4a/setup-sh7770.c arch/sh/kernel/cpu/sh4a/setup-sh7780.c arch/sh/kernel/cpu/sh4a/setup-sh7785.c arch/sh/kernel/cpu/sh4a/setup-sh7786.c arch/sh/kernel/cpu/sh4a/setup-shx3.c arch/sh/kernel/cpu/sh5/setup-sh5.c drivers/serial/sh-sci.c drivers/serial/sh-sci.h include/linux/serial_sci.h
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig19
-rw-r--r--lib/Kconfig.debug336
-rw-r--r--lib/Kconfig.kgdb24
-rw-r--r--lib/Kconfig.kmemcheck3
-rw-r--r--lib/Makefile21
-rw-r--r--lib/argv_split.c13
-rw-r--r--lib/atomic64.c15
-rw-r--r--lib/atomic64_test.c166
-rw-r--r--lib/average.c61
-rw-r--r--lib/bitmap.c115
-rw-r--r--lib/btree.c798
-rw-r--r--lib/bug.c20
-rw-r--r--lib/checksum.c18
-rw-r--r--lib/cpu-notifier-error-inject.c63
-rw-r--r--lib/cpumask.c1
-rw-r--r--lib/crc32.c150
-rw-r--r--lib/ctype.c50
-rw-r--r--lib/debug_locks.c3
-rw-r--r--lib/debugobjects.c139
-rw-r--r--lib/decompress.c5
-rw-r--r--lib/decompress_bunzip2.c46
-rw-r--r--lib/decompress_inflate.c18
-rw-r--r--lib/decompress_unlzma.c33
-rw-r--r--lib/decompress_unlzo.c217
-rw-r--r--lib/devres.c3
-rw-r--r--lib/div64.c52
-rw-r--r--lib/dma-debug.c82
-rw-r--r--lib/dynamic_debug.c154
-rw-r--r--lib/fault-inject.c1
-rw-r--r--lib/flex_array.c350
-rw-r--r--lib/gen_crc32table.c47
-rw-r--r--lib/genalloc.c35
-rw-r--r--lib/hexdump.c70
-rw-r--r--lib/hweight.c26
-rw-r--r--lib/idr.c105
-rw-r--r--lib/inflate.c5
-rw-r--r--lib/iommu-helper.c68
-rw-r--r--lib/ioremap.c10
-rw-r--r--lib/is_single_threaded.c61
-rw-r--r--lib/kasprintf.c1
-rw-r--r--lib/kernel_lock.c46
-rw-r--r--lib/kobject.c160
-rw-r--r--lib/kobject_uevent.c119
-rw-r--r--lib/kref.c46
-rw-r--r--lib/lcm.c15
-rw-r--r--lib/list_debug.c6
-rw-r--r--lib/list_sort.c291
-rw-r--r--lib/lmb.c527
-rw-r--r--lib/lru_cache.c560
-rw-r--r--lib/lzo/lzo1x_decompress.c9
-rw-r--r--lib/nlattr.c22
-rw-r--r--lib/parser.c18
-rw-r--r--lib/percpu_counter.c88
-rw-r--r--lib/plist.c8
-rw-r--r--lib/radix-tree.c259
-rw-r--r--lib/raid6/.gitignore4
-rw-r--r--lib/raid6/Makefile75
-rw-r--r--lib/raid6/algos.c154
-rw-r--r--lib/raid6/altivec.uc130
-rw-r--r--lib/raid6/int.uc117
-rw-r--r--lib/raid6/mktables.c132
-rw-r--r--lib/raid6/mmx.c142
-rw-r--r--lib/raid6/recov.c132
-rw-r--r--lib/raid6/sse1.c162
-rw-r--r--lib/raid6/sse2.c262
-rw-r--r--lib/raid6/test/Makefile72
-rw-r--r--lib/raid6/test/test.c124
-rw-r--r--lib/raid6/unroll.awk20
-rw-r--r--lib/raid6/x86.h61
-rw-r--r--lib/random32.c40
-rw-r--r--lib/ratelimit.c54
-rw-r--r--lib/rational.c1
-rw-r--r--lib/rbtree.c68
-rw-r--r--lib/rwsem-spinlock.c37
-rw-r--r--lib/rwsem.c153
-rw-r--r--lib/scatterlist.c54
-rw-r--r--lib/show_mem.c14
-rw-r--r--lib/spinlock_debug.c64
-rw-r--r--lib/string.c106
-rw-r--r--lib/swiotlb.c331
-rw-r--r--lib/textsearch.c1
-rw-r--r--lib/timerqueue.c107
-rw-r--r--lib/uuid.c53
-rw-r--r--lib/vsprintf.c871
-rw-r--r--lib/zlib_deflate/deflate.c4
-rw-r--r--lib/zlib_inflate/inffast.c73
86 files changed, 7260 insertions, 1936 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index bb1326d3839..3116aa631af 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -7,6 +7,9 @@ config BINARY_PRINTF
7 7
8menu "Library routines" 8menu "Library routines"
9 9
10config RAID6_PQ
11 tristate
12
10config BITREVERSE 13config BITREVERSE
11 tristate 14 tristate
12 15
@@ -117,6 +120,10 @@ config DECOMPRESS_BZIP2
117config DECOMPRESS_LZMA 120config DECOMPRESS_LZMA
118 tristate 121 tristate
119 122
123config DECOMPRESS_LZO
124 select LZO_DECOMPRESS
125 tristate
126
120# 127#
121# Generic allocator support is selected if needed 128# Generic allocator support is selected if needed
122# 129#
@@ -156,6 +163,9 @@ config TEXTSEARCH_BM
156config TEXTSEARCH_FSM 163config TEXTSEARCH_FSM
157 tristate 164 tristate
158 165
166config BTREE
167 boolean
168
159config HAS_IOMEM 169config HAS_IOMEM
160 boolean 170 boolean
161 depends on !NO_IOMEM 171 depends on !NO_IOMEM
@@ -174,9 +184,6 @@ config HAS_DMA
174config CHECK_SIGNATURE 184config CHECK_SIGNATURE
175 bool 185 bool
176 186
177config HAVE_LMB
178 boolean
179
180config CPUMASK_OFFSTACK 187config CPUMASK_OFFSTACK
181 bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS 188 bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
182 help 189 help
@@ -200,4 +207,10 @@ config NLATTR
200config GENERIC_ATOMIC64 207config GENERIC_ATOMIC64
201 bool 208 bool
202 209
210config LRU_CACHE
211 tristate
212
213config AVERAGE
214 bool
215
203endmenu 216endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 23067ab1a73..2d05adb9840 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -50,6 +50,14 @@ config MAGIC_SYSRQ
50 keys are documented in <file:Documentation/sysrq.txt>. Don't say Y 50 keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
51 unless you really know what this hack does. 51 unless you really know what this hack does.
52 52
53config STRIP_ASM_SYMS
54 bool "Strip assembler-generated symbols during link"
55 default n
56 help
57 Strip internal assembler-generated symbols during a link (symbols
58 that look like '.Lxxx') so they don't pollute the output of
59 get_wchan() and suchlike.
60
53config UNUSED_SYMBOLS 61config UNUSED_SYMBOLS
54 bool "Enable unused/obsolete exported symbols" 62 bool "Enable unused/obsolete exported symbols"
55 default y if X86 63 default y if X86
@@ -68,7 +76,6 @@ config UNUSED_SYMBOLS
68 76
69config DEBUG_FS 77config DEBUG_FS
70 bool "Debug Filesystem" 78 bool "Debug Filesystem"
71 depends on SYSFS
72 help 79 help
73 debugfs is a virtual file system that kernel developers use to put 80 debugfs is a virtual file system that kernel developers use to put
74 debugging files into. Enable this option to be able to read and 81 debugging files into. Enable this option to be able to read and
@@ -95,9 +102,10 @@ config HEADERS_CHECK
95 102
96config DEBUG_SECTION_MISMATCH 103config DEBUG_SECTION_MISMATCH
97 bool "Enable full Section mismatch analysis" 104 bool "Enable full Section mismatch analysis"
98 depends on UNDEFINED 105 depends on UNDEFINED || (BLACKFIN)
106 default y
99 # This option is on purpose disabled for now. 107 # This option is on purpose disabled for now.
100 # It will be enabled when we are down to a resonable number 108 # It will be enabled when we are down to a reasonable number
101 # of section mismatch warnings (< 10 for an allyesconfig build) 109 # of section mismatch warnings (< 10 for an allyesconfig build)
102 help 110 help
103 The section mismatch analysis checks if there are illegal 111 The section mismatch analysis checks if there are illegal
@@ -143,28 +151,34 @@ config DEBUG_SHIRQ
143 Drivers ought to be able to handle interrupts coming in at those 151 Drivers ought to be able to handle interrupts coming in at those
144 points; some don't and need to be caught. 152 points; some don't and need to be caught.
145 153
146config DETECT_SOFTLOCKUP 154config LOCKUP_DETECTOR
147 bool "Detect Soft Lockups" 155 bool "Detect Hard and Soft Lockups"
148 depends on DEBUG_KERNEL && !S390 156 depends on DEBUG_KERNEL && !S390
149 default y
150 help 157 help
151 Say Y here to enable the kernel to detect "soft lockups", 158 Say Y here to enable the kernel to act as a watchdog to detect
152 which are bugs that cause the kernel to loop in kernel 159 hard and soft lockups.
160
161 Softlockups are bugs that cause the kernel to loop in kernel
153 mode for more than 60 seconds, without giving other tasks a 162 mode for more than 60 seconds, without giving other tasks a
154 chance to run. 163 chance to run. The current stack trace is displayed upon
164 detection and the system will stay locked up.
155 165
156 When a soft-lockup is detected, the kernel will print the 166 Hardlockups are bugs that cause the CPU to loop in kernel mode
157 current stack trace (which you should report), but the 167 for more than 60 seconds, without letting other interrupts have a
158 system will stay locked up. This feature has negligible 168 chance to run. The current stack trace is displayed upon detection
159 overhead. 169 and the system will stay locked up.
160 170
161 (Note that "hard lockups" are separate type of bugs that 171 The overhead should be minimal. A periodic hrtimer runs to
162 can be detected via the NMI-watchdog, on platforms that 172 generate interrupts and kick the watchdog task every 10-12 seconds.
163 support it.) 173 An NMI is generated every 60 seconds or so to check for hardlockups.
174
175config HARDLOCKUP_DETECTOR
176 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
177 !ARCH_HAS_NMI_WATCHDOG
164 178
165config BOOTPARAM_SOFTLOCKUP_PANIC 179config BOOTPARAM_SOFTLOCKUP_PANIC
166 bool "Panic (Reboot) On Soft Lockups" 180 bool "Panic (Reboot) On Soft Lockups"
167 depends on DETECT_SOFTLOCKUP 181 depends on LOCKUP_DETECTOR
168 help 182 help
169 Say Y here to enable the kernel to panic on "soft lockups", 183 Say Y here to enable the kernel to panic on "soft lockups",
170 which are bugs that cause the kernel to loop in kernel 184 which are bugs that cause the kernel to loop in kernel
@@ -181,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
181 195
182config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE 196config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
183 int 197 int
184 depends on DETECT_SOFTLOCKUP 198 depends on LOCKUP_DETECTOR
185 range 0 1 199 range 0 1
186 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC 200 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
187 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC 201 default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -290,6 +304,28 @@ config DEBUG_OBJECTS_TIMERS
290 timer routines to track the life time of timer objects and 304 timer routines to track the life time of timer objects and
291 validate the timer operations. 305 validate the timer operations.
292 306
307config DEBUG_OBJECTS_WORK
308 bool "Debug work objects"
309 depends on DEBUG_OBJECTS
310 help
311 If you say Y here, additional code will be inserted into the
312 work queue routines to track the life time of work objects and
313 validate the work operations.
314
315config DEBUG_OBJECTS_RCU_HEAD
316 bool "Debug RCU callbacks objects"
317 depends on DEBUG_OBJECTS && PREEMPT
318 help
319 Enable this to turn on debugging of RCU list heads (call_rcu() usage).
320
321config DEBUG_OBJECTS_PERCPU_COUNTER
322 bool "Debug percpu counter objects"
323 depends on DEBUG_OBJECTS
324 help
325 If you say Y here, additional code will be inserted into the
326 percpu counter routines to track the life time of percpu counter
327 objects and validate the percpu counter operations.
328
293config DEBUG_OBJECTS_ENABLE_DEFAULT 329config DEBUG_OBJECTS_ENABLE_DEFAULT
294 int "debug_objects bootup default value (0-1)" 330 int "debug_objects bootup default value (0-1)"
295 range 0 1 331 range 0 1
@@ -326,7 +362,7 @@ config SLUB_DEBUG_ON
326config SLUB_STATS 362config SLUB_STATS
327 default n 363 default n
328 bool "Enable SLUB performance statistics" 364 bool "Enable SLUB performance statistics"
329 depends on SLUB && SLUB_DEBUG && SYSFS 365 depends on SLUB && SYSFS
330 help 366 help
331 SLUB statistics are useful to debug SLUBs allocation behavior in 367 SLUB statistics are useful to debug SLUBs allocation behavior in
332 order find ways to optimize the allocator. This should never be 368 order find ways to optimize the allocator. This should never be
@@ -338,13 +374,13 @@ config SLUB_STATS
338 374
339config DEBUG_KMEMLEAK 375config DEBUG_KMEMLEAK
340 bool "Kernel memory leak detector" 376 bool "Kernel memory leak detector"
341 depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \ 377 depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
342 !MEMORY_HOTPLUG 378 (X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
343 select DEBUG_SLAB if SLAB 379
344 select SLUB_DEBUG if SLUB
345 select DEBUG_FS if SYSFS 380 select DEBUG_FS if SYSFS
346 select STACKTRACE if STACKTRACE_SUPPORT 381 select STACKTRACE if STACKTRACE_SUPPORT
347 select KALLSYMS 382 select KALLSYMS
383 select CRC32
348 help 384 help
349 Say Y here if you want to enable the memory leak 385 Say Y here if you want to enable the memory leak
350 detector. The memory allocation/freeing is traced in a way 386 detector. The memory allocation/freeing is traced in a way
@@ -355,9 +391,24 @@ config DEBUG_KMEMLEAK
355 allocations. See Documentation/kmemleak.txt for more 391 allocations. See Documentation/kmemleak.txt for more
356 details. 392 details.
357 393
394 Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
395 of finding leaks due to the slab objects poisoning.
396
358 In order to access the kmemleak file, debugfs needs to be 397 In order to access the kmemleak file, debugfs needs to be
359 mounted (usually at /sys/kernel/debug). 398 mounted (usually at /sys/kernel/debug).
360 399
400config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
401 int "Maximum kmemleak early log entries"
402 depends on DEBUG_KMEMLEAK
403 range 200 40000
404 default 400
405 help
406 Kmemleak must track all the memory allocations to avoid
407 reporting false positives. Since memory may be allocated or
408 freed before kmemleak is initialised, an early log buffer is
409 used to store these actions. If kmemleak reports "early log
410 buffer exceeded", please increase this value.
411
361config DEBUG_KMEMLEAK_TEST 412config DEBUG_KMEMLEAK_TEST
362 tristate "Simple test for the kernel memory leak detector" 413 tristate "Simple test for the kernel memory leak detector"
363 depends on DEBUG_KMEMLEAK 414 depends on DEBUG_KMEMLEAK
@@ -368,9 +419,16 @@ config DEBUG_KMEMLEAK_TEST
368 419
369 If unsure, say N. 420 If unsure, say N.
370 421
422config DEBUG_KMEMLEAK_DEFAULT_OFF
423 bool "Default kmemleak to off"
424 depends on DEBUG_KMEMLEAK
425 help
426 Say Y here to disable kmemleak by default. It can then be enabled
427 on the command line via kmemleak=on.
428
371config DEBUG_PREEMPT 429config DEBUG_PREEMPT
372 bool "Debug preemptible kernel" 430 bool "Debug preemptible kernel"
373 depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) 431 depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
374 default y 432 default y
375 help 433 help
376 If you say Y here then the kernel will use a debug variant of the 434 If you say Y here then the kernel will use a debug variant of the
@@ -412,6 +470,15 @@ config DEBUG_MUTEXES
412 This feature allows mutex semantics violations to be detected and 470 This feature allows mutex semantics violations to be detected and
413 reported. 471 reported.
414 472
473config BKL
474 bool "Big Kernel Lock" if (SMP || PREEMPT)
475 default y
476 help
477 This is the traditional lock that is used in old code instead
478 of proper locking. All drivers that use the BKL should depend
479 on this symbol.
480 Say Y here unless you are working on removing the BKL.
481
415config DEBUG_LOCK_ALLOC 482config DEBUG_LOCK_ALLOC
416 bool "Lock debugging: detect incorrect freeing of live locks" 483 bool "Lock debugging: detect incorrect freeing of live locks"
417 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 484 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -433,6 +500,7 @@ config PROVE_LOCKING
433 select DEBUG_SPINLOCK 500 select DEBUG_SPINLOCK
434 select DEBUG_MUTEXES 501 select DEBUG_MUTEXES
435 select DEBUG_LOCK_ALLOC 502 select DEBUG_LOCK_ALLOC
503 select TRACE_IRQFLAGS
436 default n 504 default n
437 help 505 help
438 This feature enables the kernel to prove that all locking 506 This feature enables the kernel to prove that all locking
@@ -468,11 +536,52 @@ config PROVE_LOCKING
468 536
469 For more details, see Documentation/lockdep-design.txt. 537 For more details, see Documentation/lockdep-design.txt.
470 538
539config PROVE_RCU
540 bool "RCU debugging: prove RCU correctness"
541 depends on PROVE_LOCKING
542 default n
543 help
544 This feature enables lockdep extensions that check for correct
545 use of RCU APIs. This is currently under development. Say Y
546 if you want to debug RCU usage or help work on the PROVE_RCU
547 feature.
548
549 Say N if you are unsure.
550
551config PROVE_RCU_REPEATEDLY
552 bool "RCU debugging: don't disable PROVE_RCU on first splat"
553 depends on PROVE_RCU
554 default n
555 help
556 By itself, PROVE_RCU will disable checking upon issuing the
557 first warning (or "splat"). This feature prevents such
558 disabling, allowing multiple RCU-lockdep warnings to be printed
559 on a single reboot.
560
561 Say Y to allow multiple RCU-lockdep warnings per boot.
562
563 Say N if you are unsure.
564
565config SPARSE_RCU_POINTER
566 bool "RCU debugging: sparse-based checks for pointer usage"
567 default n
568 help
569 This feature enables the __rcu sparse annotation for
570 RCU-protected pointers. This annotation will cause sparse
571 to flag any non-RCU used of annotated pointers. This can be
572 helpful when debugging RCU usage. Please note that this feature
573 is not intended to enforce code cleanliness; it is instead merely
574 a debugging aid.
575
576 Say Y to make sparse flag questionable use of RCU-protected pointers
577
578 Say N if you are unsure.
579
471config LOCKDEP 580config LOCKDEP
472 bool 581 bool
473 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT 582 depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
474 select STACKTRACE 583 select STACKTRACE
475 select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 584 select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
476 select KALLSYMS 585 select KALLSYMS
477 select KALLSYMS_ALL 586 select KALLSYMS_ALL
478 587
@@ -489,6 +598,14 @@ config LOCK_STAT
489 598
490 For more details, see Documentation/lockstat.txt 599 For more details, see Documentation/lockstat.txt
491 600
601 This also enables lock events required by "perf lock",
602 subcommand of perf.
603 If you want to use "perf lock", you also need to turn on
604 CONFIG_EVENT_TRACING.
605
606 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
607 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
608
492config DEBUG_LOCKDEP 609config DEBUG_LOCKDEP
493 bool "Lock dependency engine debugging" 610 bool "Lock dependency engine debugging"
494 depends on DEBUG_KERNEL && LOCKDEP 611 depends on DEBUG_KERNEL && LOCKDEP
@@ -498,11 +615,10 @@ config DEBUG_LOCKDEP
498 of more runtime overhead. 615 of more runtime overhead.
499 616
500config TRACE_IRQFLAGS 617config TRACE_IRQFLAGS
501 depends on DEBUG_KERNEL
502 bool 618 bool
503 default y 619 help
504 depends on TRACE_IRQFLAGS_SUPPORT 620 Enables hooks to interrupt enabling and disabling for
505 depends on PROVE_LOCKING 621 either tracing or lock debugging.
506 622
507config DEBUG_SPINLOCK_SLEEP 623config DEBUG_SPINLOCK_SLEEP
508 bool "Spinlock debugging: sleep-inside-spinlock checking" 624 bool "Spinlock debugging: sleep-inside-spinlock checking"
@@ -545,7 +661,7 @@ config DEBUG_BUGVERBOSE
545 depends on BUG 661 depends on BUG
546 depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \ 662 depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \
547 FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 663 FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300
548 default !EMBEDDED 664 default y
549 help 665 help
550 Say Y here to make BUG() panics output the file name and line number 666 Say Y here to make BUG() panics output the file name and line number
551 of the BUG call as well as the EIP and oops trace. This aids 667 of the BUG call as well as the EIP and oops trace. This aids
@@ -564,6 +680,19 @@ config DEBUG_INFO
564 680
565 If unsure, say N. 681 If unsure, say N.
566 682
683config DEBUG_INFO_REDUCED
684 bool "Reduce debugging information"
685 depends on DEBUG_INFO
686 help
687 If you say Y here gcc is instructed to generate less debugging
688 information for structure types. This means that tools that
689 need full debugging information (like kgdb or systemtap) won't
690 be happy. But if you merely need debugging information to
691 resolve line numbers there is no loss. Advantage is that
692 build directory object sizes shrink dramatically over a full
693 DEBUG_INFO build and compile times are reduced too.
694 Only works with newer gcc versions.
695
567config DEBUG_VM 696config DEBUG_VM
568 bool "Debug VM" 697 bool "Debug VM"
569 depends on DEBUG_KERNEL 698 depends on DEBUG_KERNEL
@@ -620,6 +749,15 @@ config DEBUG_LIST
620 749
621 If unsure, say N. 750 If unsure, say N.
622 751
752config TEST_LIST_SORT
753 bool "Linked list sorting test"
754 depends on DEBUG_KERNEL
755 help
756 Enable this to turn on 'list_sort()' function test. This test is
757 executed only once during system boot, so affects only boot time.
758
759 If unsure, say N.
760
623config DEBUG_SG 761config DEBUG_SG
624 bool "Debug SG table operations" 762 bool "Debug SG table operations"
625 depends on DEBUG_KERNEL 763 depends on DEBUG_KERNEL
@@ -640,6 +778,21 @@ config DEBUG_NOTIFIERS
640 This is a relatively cheap check but if you care about maximum 778 This is a relatively cheap check but if you care about maximum
641 performance, say N. 779 performance, say N.
642 780
781config DEBUG_CREDENTIALS
782 bool "Debug credential management"
783 depends on DEBUG_KERNEL
784 help
785 Enable this to turn on some debug checking for credential
786 management. The additional code keeps track of the number of
787 pointers from task_structs to any given cred struct, and checks to
788 see that this number never exceeds the usage count of the cred
789 struct.
790
791 Furthermore, if SELinux is enabled, this also checks that the
792 security pointer in the cred struct is never seen to be invalid.
793
794 If unsure, say N.
795
643# 796#
644# Select this config option from the architecture Kconfig, if it 797# Select this config option from the architecture Kconfig, if it
645# it is preferred to always offer frame pointers as a config 798# it is preferred to always offer frame pointers as a config
@@ -712,17 +865,53 @@ config RCU_TORTURE_TEST_RUNNABLE
712 865
713config RCU_CPU_STALL_DETECTOR 866config RCU_CPU_STALL_DETECTOR
714 bool "Check for stalled CPUs delaying RCU grace periods" 867 bool "Check for stalled CPUs delaying RCU grace periods"
715 depends on CLASSIC_RCU || TREE_RCU 868 depends on TREE_RCU || TREE_PREEMPT_RCU
716 default n 869 default y
717 help 870 help
718 This option causes RCU to printk information on which 871 This option causes RCU to printk information on which
719 CPUs are delaying the current grace period, but only when 872 CPUs are delaying the current grace period, but only when
720 the grace period extends for excessive time periods. 873 the grace period extends for excessive time periods.
721 874
722 Say Y if you want RCU to perform such checks. 875 Say N if you want to disable such checks.
876
877 Say Y if you are unsure.
878
879config RCU_CPU_STALL_TIMEOUT
880 int "RCU CPU stall timeout in seconds"
881 depends on RCU_CPU_STALL_DETECTOR
882 range 3 300
883 default 60
884 help
885 If a given RCU grace period extends more than the specified
886 number of seconds, a CPU stall warning is printed. If the
887 RCU grace period persists, additional CPU stall warnings are
888 printed at more widely spaced intervals.
889
890config RCU_CPU_STALL_DETECTOR_RUNNABLE
891 bool "RCU CPU stall checking starts automatically at boot"
892 depends on RCU_CPU_STALL_DETECTOR
893 default y
894 help
895 If set, start checking for RCU CPU stalls immediately on
896 boot. Otherwise, RCU CPU stall checking must be manually
897 enabled.
898
899 Say Y if you are unsure.
900
901 Say N if you wish to suppress RCU CPU stall checking during boot.
902
903config RCU_CPU_STALL_VERBOSE
904 bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
905 depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
906 default y
907 help
908 This option causes RCU to printk detailed per-task information
909 for any tasks that are stalling the current RCU grace period.
723 910
724 Say N if you are unsure. 911 Say N if you are unsure.
725 912
913 Say Y if you want to enable such checks.
914
726config KPROBES_SANITY_TEST 915config KPROBES_SANITY_TEST
727 bool "Kprobes sanity tests" 916 bool "Kprobes sanity tests"
728 depends on DEBUG_KERNEL 917 depends on DEBUG_KERNEL
@@ -777,10 +966,24 @@ config DEBUG_BLOCK_EXT_DEVT
777 966
778 Say N if you are unsure. 967 Say N if you are unsure.
779 968
969config DEBUG_FORCE_WEAK_PER_CPU
970 bool "Force weak per-cpu definitions"
971 depends on DEBUG_KERNEL
972 help
973 s390 and alpha require percpu variables in modules to be
974 defined weak to work around addressing range issue which
975 puts the following two restrictions on percpu variable
976 definitions.
977
978 1. percpu symbols must be unique whether static or not
979 2. percpu variables can't be defined inside a function
980
981 To ensure that generic code follows the above rules, this
982 option forces all percpu variables to be defined as weak.
983
780config LKDTM 984config LKDTM
781 tristate "Linux Kernel Dump Test Tool Module" 985 tristate "Linux Kernel Dump Test Tool Module"
782 depends on DEBUG_KERNEL 986 depends on DEBUG_FS
783 depends on KPROBES
784 depends on BLOCK 987 depends on BLOCK
785 default n 988 default n
786 help 989 help
@@ -791,7 +994,19 @@ config LKDTM
791 called lkdtm. 994 called lkdtm.
792 995
793 Documentation on how to use the module can be found in 996 Documentation on how to use the module can be found in
794 drivers/misc/lkdtm.c 997 Documentation/fault-injection/provoke-crashes.txt
998
999config CPU_NOTIFIER_ERROR_INJECT
1000 tristate "CPU notifier error injection module"
1001 depends on HOTPLUG_CPU && DEBUG_KERNEL
1002 help
1003 This option provides a kernel module that can be used to test
1004 the error handling of the cpu notifiers
1005
1006 To compile this code as a module, choose M here: the module will
1007 be called cpu-notifier-error-inject.
1008
1009 If unsure, say N.
795 1010
796config FAULT_INJECTION 1011config FAULT_INJECTION
797 bool "Fault-injection framework" 1012 bool "Fault-injection framework"
@@ -820,7 +1035,7 @@ config FAIL_MAKE_REQUEST
820 Provide fault-injection capability for disk IO. 1035 Provide fault-injection capability for disk IO.
821 1036
822config FAIL_IO_TIMEOUT 1037config FAIL_IO_TIMEOUT
823 bool "Faul-injection capability for faking disk interrupts" 1038 bool "Fault-injection capability for faking disk interrupts"
824 depends on FAULT_INJECTION && BLOCK 1039 depends on FAULT_INJECTION && BLOCK
825 help 1040 help
826 Provide fault-injection capability on end IO handling. This 1041 Provide fault-injection capability on end IO handling. This
@@ -841,26 +1056,29 @@ config FAULT_INJECTION_STACKTRACE_FILTER
841 depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT 1056 depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
842 depends on !X86_64 1057 depends on !X86_64
843 select STACKTRACE 1058 select STACKTRACE
844 select FRAME_POINTER if !PPC && !S390 1059 select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE
845 help 1060 help
846 Provide stacktrace filter for fault-injection capabilities 1061 Provide stacktrace filter for fault-injection capabilities
847 1062
848config LATENCYTOP 1063config LATENCYTOP
849 bool "Latency measuring infrastructure" 1064 bool "Latency measuring infrastructure"
850 select FRAME_POINTER if !MIPS && !PPC && !S390 1065 depends on HAVE_LATENCYTOP_SUPPORT
1066 depends on DEBUG_KERNEL
1067 depends on STACKTRACE_SUPPORT
1068 depends on PROC_FS
1069 select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
851 select KALLSYMS 1070 select KALLSYMS
852 select KALLSYMS_ALL 1071 select KALLSYMS_ALL
853 select STACKTRACE 1072 select STACKTRACE
854 select SCHEDSTATS 1073 select SCHEDSTATS
855 select SCHED_DEBUG 1074 select SCHED_DEBUG
856 depends on HAVE_LATENCYTOP_SUPPORT
857 help 1075 help
858 Enable this option if you want to use the LatencyTOP tool 1076 Enable this option if you want to use the LatencyTOP tool
859 to find out which userspace is blocking on what kernel operations. 1077 to find out which userspace is blocking on what kernel operations.
860 1078
861config SYSCTL_SYSCALL_CHECK 1079config SYSCTL_SYSCALL_CHECK
862 bool "Sysctl checks" 1080 bool "Sysctl checks"
863 depends on SYSCTL_SYSCALL 1081 depends on SYSCTL
864 ---help--- 1082 ---help---
865 sys_sysctl uses binary paths that have been found challenging 1083 sys_sysctl uses binary paths that have been found challenging
866 to properly maintain and use. This enables checks that help 1084 to properly maintain and use. This enables checks that help
@@ -934,10 +1152,10 @@ config DYNAMIC_DEBUG
934 1152
935 Usage: 1153 Usage:
936 1154
937 Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file, 1155 Dynamic debugging is controlled via the 'dynamic_debug/control' file,
938 which is contained in the 'debugfs' filesystem. Thus, the debugfs 1156 which is contained in the 'debugfs' filesystem. Thus, the debugfs
939 filesystem must first be mounted before making use of this feature. 1157 filesystem must first be mounted before making use of this feature.
940 We refer the control file as: <debugfs>/dynamic_debug/ddebug. This 1158 We refer the control file as: <debugfs>/dynamic_debug/control. This
941 file contains a list of the debug statements that can be enabled. The 1159 file contains a list of the debug statements that can be enabled. The
942 format for each line of the file is: 1160 format for each line of the file is:
943 1161
@@ -952,7 +1170,7 @@ config DYNAMIC_DEBUG
952 1170
953 From a live system: 1171 From a live system:
954 1172
955 nullarbor:~ # cat <debugfs>/dynamic_debug/ddebug 1173 nullarbor:~ # cat <debugfs>/dynamic_debug/control
956 # filename:lineno [module]function flags format 1174 # filename:lineno [module]function flags format
957 fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" 1175 fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012"
958 fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" 1176 fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012"
@@ -962,23 +1180,23 @@ config DYNAMIC_DEBUG
962 1180
963 // enable the message at line 1603 of file svcsock.c 1181 // enable the message at line 1603 of file svcsock.c
964 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > 1182 nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
965 <debugfs>/dynamic_debug/ddebug 1183 <debugfs>/dynamic_debug/control
966 1184
967 // enable all the messages in file svcsock.c 1185 // enable all the messages in file svcsock.c
968 nullarbor:~ # echo -n 'file svcsock.c +p' > 1186 nullarbor:~ # echo -n 'file svcsock.c +p' >
969 <debugfs>/dynamic_debug/ddebug 1187 <debugfs>/dynamic_debug/control
970 1188
971 // enable all the messages in the NFS server module 1189 // enable all the messages in the NFS server module
972 nullarbor:~ # echo -n 'module nfsd +p' > 1190 nullarbor:~ # echo -n 'module nfsd +p' >
973 <debugfs>/dynamic_debug/ddebug 1191 <debugfs>/dynamic_debug/control
974 1192
975 // enable all 12 messages in the function svc_process() 1193 // enable all 12 messages in the function svc_process()
976 nullarbor:~ # echo -n 'func svc_process +p' > 1194 nullarbor:~ # echo -n 'func svc_process +p' >
977 <debugfs>/dynamic_debug/ddebug 1195 <debugfs>/dynamic_debug/control
978 1196
979 // disable all 12 messages in the function svc_process() 1197 // disable all 12 messages in the function svc_process()
980 nullarbor:~ # echo -n 'func svc_process -p' > 1198 nullarbor:~ # echo -n 'func svc_process -p' >
981 <debugfs>/dynamic_debug/ddebug 1199 <debugfs>/dynamic_debug/control
982 1200
983 See Documentation/dynamic-debug-howto.txt for additional information. 1201 See Documentation/dynamic-debug-howto.txt for additional information.
984 1202
@@ -993,6 +1211,26 @@ config DMA_API_DEBUG
993 This option causes a performance degredation. Use only if you want 1211 This option causes a performance degredation. Use only if you want
994 to debug device drivers. If unsure, say N. 1212 to debug device drivers. If unsure, say N.
995 1213
1214config ATOMIC64_SELFTEST
1215 bool "Perform an atomic64_t self-test at boot"
1216 help
1217 Enable this option to test the atomic64_t functions at boot.
1218
1219 If unsure, say N.
1220
1221config ASYNC_RAID6_TEST
1222 tristate "Self test for hardware accelerated raid6 recovery"
1223 depends on ASYNC_RAID6_RECOV
1224 select ASYNC_MEMCPY
1225 ---help---
1226 This is a one-shot self test that permutes through the
1227 recovery of all the possible two disk failure scenarios for a
1228 N-disk array. Recovery is performed with the asynchronous
1229 raid6 recovery routines, and will optionally use an offload
1230 engine if one is available.
1231
1232 If unsure, say N.
1233
996source "samples/Kconfig" 1234source "samples/Kconfig"
997 1235
998source "lib/Kconfig.kgdb" 1236source "lib/Kconfig.kgdb"
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 9b5d1d7f2ef..43cb93fa265 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB
3 bool 3 bool
4 4
5menuconfig KGDB 5menuconfig KGDB
6 bool "KGDB: kernel debugging with remote gdb" 6 bool "KGDB: kernel debugger"
7 depends on HAVE_ARCH_KGDB 7 depends on HAVE_ARCH_KGDB
8 depends on DEBUG_KERNEL && EXPERIMENTAL 8 depends on DEBUG_KERNEL && EXPERIMENTAL
9 help 9 help
@@ -57,4 +57,26 @@ config KGDB_TESTS_BOOT_STRING
57 information about other strings you could use beyond the 57 information about other strings you could use beyond the
58 default of V1F100. 58 default of V1F100.
59 59
60config KGDB_LOW_LEVEL_TRAP
61 bool "KGDB: Allow debugging with traps in notifiers"
62 depends on X86 || MIPS
63 default n
64 help
65 This will add an extra call back to kgdb for the breakpoint
66 exception handler on which will will allow kgdb to step
67 through a notify handler.
68
69config KGDB_KDB
70 bool "KGDB_KDB: include kdb frontend for kgdb"
71 default n
72 help
73 KDB frontend for kernel
74
75config KDB_KEYBOARD
76 bool "KGDB_KDB: keyboard as input device"
77 depends on VT && KGDB_KDB
78 default n
79 help
80 KDB can use a PS/2 type keyboard for an input device
81
60endif # KGDB 82endif # KGDB
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck
index 603c81b6654..846e039a86b 100644
--- a/lib/Kconfig.kmemcheck
+++ b/lib/Kconfig.kmemcheck
@@ -1,6 +1,8 @@
1config HAVE_ARCH_KMEMCHECK 1config HAVE_ARCH_KMEMCHECK
2 bool 2 bool
3 3
4if HAVE_ARCH_KMEMCHECK
5
4menuconfig KMEMCHECK 6menuconfig KMEMCHECK
5 bool "kmemcheck: trap use of uninitialized memory" 7 bool "kmemcheck: trap use of uninitialized memory"
6 depends on DEBUG_KERNEL 8 depends on DEBUG_KERNEL
@@ -89,3 +91,4 @@ config KMEMCHECK_BITOPS_OK
89 accesses where not all the bits are initialized at the same time. 91 accesses where not all the bits are initialized at the same time.
90 This may also hide some real bugs. 92 This may also hide some real bugs.
91 93
94endif
diff --git a/lib/Makefile b/lib/Makefile
index b6d1857bbf0..d7b6e30a3a1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,11 +8,11 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
8endif 8endif
9 9
10lib-y := ctype.o string.o vsprintf.o cmdline.o \ 10lib-y := ctype.o string.o vsprintf.o cmdline.o \
11 rbtree.o radix-tree.o dump_stack.o \ 11 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
12 idr.o int_sqrt.o extable.o prio_tree.o \ 12 idr.o int_sqrt.o extable.o prio_tree.o \
13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o prio_heap.o ratelimit.o show_mem.o \
15 is_single_threaded.o plist.o decompress.o 15 is_single_threaded.o plist.o decompress.o flex_array.o
16 16
17lib-$(CONFIG_MMU) += ioremap.o 17lib-$(CONFIG_MMU) += ioremap.o
18lib-$(CONFIG_SMP) += cpumask.o 18lib-$(CONFIG_SMP) += cpumask.o
@@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o
21 21
22obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ 22obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
23 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ 23 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
24 string_helpers.o gcd.o 24 string_helpers.o gcd.o lcm.o list_sort.o uuid.o
25 25
26ifeq ($(CONFIG_DEBUG_KOBJECT),y) 26ifeq ($(CONFIG_DEBUG_KOBJECT),y)
27CFLAGS_kobject.o += -DDEBUG 27CFLAGS_kobject.o += -DDEBUG
@@ -39,8 +39,12 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
39lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o 39lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
40lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o 40lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
41obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o 41obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
42
43CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
42obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 44obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
45
43obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o 46obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
47obj-$(CONFIG_BTREE) += btree.o
44obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o 48obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
45obj-$(CONFIG_DEBUG_LIST) += list_debug.o 49obj-$(CONFIG_DEBUG_LIST) += list_debug.o
46obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o 50obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
@@ -65,10 +69,12 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
65obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ 69obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
66obj-$(CONFIG_LZO_COMPRESS) += lzo/ 70obj-$(CONFIG_LZO_COMPRESS) += lzo/
67obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ 71obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
72obj-$(CONFIG_RAID6_PQ) += raid6/
68 73
69lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o 74lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
70lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o 75lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
71lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o 76lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
77lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
72 78
73obj-$(CONFIG_TEXTSEARCH) += textsearch.o 79obj-$(CONFIG_TEXTSEARCH) += textsearch.o
74obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o 80obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
@@ -80,23 +86,28 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
80obj-$(CONFIG_SWIOTLB) += swiotlb.o 86obj-$(CONFIG_SWIOTLB) += swiotlb.o
81obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o 87obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
82obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o 88obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
89obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
83 90
84lib-$(CONFIG_GENERIC_BUG) += bug.o 91lib-$(CONFIG_GENERIC_BUG) += bug.o
85 92
86obj-$(CONFIG_HAVE_LMB) += lmb.o
87
88obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o 93obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
89 94
90obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o 95obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
91 96
92obj-$(CONFIG_NLATTR) += nlattr.o 97obj-$(CONFIG_NLATTR) += nlattr.o
93 98
99obj-$(CONFIG_LRU_CACHE) += lru_cache.o
100
94obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o 101obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
95 102
96obj-$(CONFIG_GENERIC_CSUM) += checksum.o 103obj-$(CONFIG_GENERIC_CSUM) += checksum.o
97 104
98obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o 105obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
99 106
107obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
108
109obj-$(CONFIG_AVERAGE) += average.o
110
100hostprogs-y := gen_crc32table 111hostprogs-y := gen_crc32table
101clean-files := crc32table.h 112clean-files := crc32table.h
102 113
diff --git a/lib/argv_split.c b/lib/argv_split.c
index 5205a8dae5b..4b1b083f219 100644
--- a/lib/argv_split.c
+++ b/lib/argv_split.c
@@ -4,17 +4,10 @@
4 4
5#include <linux/kernel.h> 5#include <linux/kernel.h>
6#include <linux/ctype.h> 6#include <linux/ctype.h>
7#include <linux/string.h>
7#include <linux/slab.h> 8#include <linux/slab.h>
8#include <linux/module.h> 9#include <linux/module.h>
9 10
10static const char *skip_sep(const char *cp)
11{
12 while (*cp && isspace(*cp))
13 cp++;
14
15 return cp;
16}
17
18static const char *skip_arg(const char *cp) 11static const char *skip_arg(const char *cp)
19{ 12{
20 while (*cp && !isspace(*cp)) 13 while (*cp && !isspace(*cp))
@@ -28,7 +21,7 @@ static int count_argc(const char *str)
28 int count = 0; 21 int count = 0;
29 22
30 while (*str) { 23 while (*str) {
31 str = skip_sep(str); 24 str = skip_spaces(str);
32 if (*str) { 25 if (*str) {
33 count++; 26 count++;
34 str = skip_arg(str); 27 str = skip_arg(str);
@@ -82,7 +75,7 @@ char **argv_split(gfp_t gfp, const char *str, int *argcp)
82 argvp = argv; 75 argvp = argv;
83 76
84 while (*str) { 77 while (*str) {
85 str = skip_sep(str); 78 str = skip_spaces(str);
86 79
87 if (*str) { 80 if (*str) {
88 const char *p = str; 81 const char *p = str;
diff --git a/lib/atomic64.c b/lib/atomic64.c
index c5e72556241..a21c12bc727 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -13,6 +13,7 @@
13#include <linux/cache.h> 13#include <linux/cache.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/module.h>
16#include <asm/atomic.h> 17#include <asm/atomic.h>
17 18
18/* 19/*
@@ -52,6 +53,7 @@ long long atomic64_read(const atomic64_t *v)
52 spin_unlock_irqrestore(lock, flags); 53 spin_unlock_irqrestore(lock, flags);
53 return val; 54 return val;
54} 55}
56EXPORT_SYMBOL(atomic64_read);
55 57
56void atomic64_set(atomic64_t *v, long long i) 58void atomic64_set(atomic64_t *v, long long i)
57{ 59{
@@ -62,6 +64,7 @@ void atomic64_set(atomic64_t *v, long long i)
62 v->counter = i; 64 v->counter = i;
63 spin_unlock_irqrestore(lock, flags); 65 spin_unlock_irqrestore(lock, flags);
64} 66}
67EXPORT_SYMBOL(atomic64_set);
65 68
66void atomic64_add(long long a, atomic64_t *v) 69void atomic64_add(long long a, atomic64_t *v)
67{ 70{
@@ -72,6 +75,7 @@ void atomic64_add(long long a, atomic64_t *v)
72 v->counter += a; 75 v->counter += a;
73 spin_unlock_irqrestore(lock, flags); 76 spin_unlock_irqrestore(lock, flags);
74} 77}
78EXPORT_SYMBOL(atomic64_add);
75 79
76long long atomic64_add_return(long long a, atomic64_t *v) 80long long atomic64_add_return(long long a, atomic64_t *v)
77{ 81{
@@ -84,6 +88,7 @@ long long atomic64_add_return(long long a, atomic64_t *v)
84 spin_unlock_irqrestore(lock, flags); 88 spin_unlock_irqrestore(lock, flags);
85 return val; 89 return val;
86} 90}
91EXPORT_SYMBOL(atomic64_add_return);
87 92
88void atomic64_sub(long long a, atomic64_t *v) 93void atomic64_sub(long long a, atomic64_t *v)
89{ 94{
@@ -94,6 +99,7 @@ void atomic64_sub(long long a, atomic64_t *v)
94 v->counter -= a; 99 v->counter -= a;
95 spin_unlock_irqrestore(lock, flags); 100 spin_unlock_irqrestore(lock, flags);
96} 101}
102EXPORT_SYMBOL(atomic64_sub);
97 103
98long long atomic64_sub_return(long long a, atomic64_t *v) 104long long atomic64_sub_return(long long a, atomic64_t *v)
99{ 105{
@@ -106,6 +112,7 @@ long long atomic64_sub_return(long long a, atomic64_t *v)
106 spin_unlock_irqrestore(lock, flags); 112 spin_unlock_irqrestore(lock, flags);
107 return val; 113 return val;
108} 114}
115EXPORT_SYMBOL(atomic64_sub_return);
109 116
110long long atomic64_dec_if_positive(atomic64_t *v) 117long long atomic64_dec_if_positive(atomic64_t *v)
111{ 118{
@@ -120,6 +127,7 @@ long long atomic64_dec_if_positive(atomic64_t *v)
120 spin_unlock_irqrestore(lock, flags); 127 spin_unlock_irqrestore(lock, flags);
121 return val; 128 return val;
122} 129}
130EXPORT_SYMBOL(atomic64_dec_if_positive);
123 131
124long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) 132long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
125{ 133{
@@ -134,6 +142,7 @@ long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
134 spin_unlock_irqrestore(lock, flags); 142 spin_unlock_irqrestore(lock, flags);
135 return val; 143 return val;
136} 144}
145EXPORT_SYMBOL(atomic64_cmpxchg);
137 146
138long long atomic64_xchg(atomic64_t *v, long long new) 147long long atomic64_xchg(atomic64_t *v, long long new)
139{ 148{
@@ -147,21 +156,23 @@ long long atomic64_xchg(atomic64_t *v, long long new)
147 spin_unlock_irqrestore(lock, flags); 156 spin_unlock_irqrestore(lock, flags);
148 return val; 157 return val;
149} 158}
159EXPORT_SYMBOL(atomic64_xchg);
150 160
151int atomic64_add_unless(atomic64_t *v, long long a, long long u) 161int atomic64_add_unless(atomic64_t *v, long long a, long long u)
152{ 162{
153 unsigned long flags; 163 unsigned long flags;
154 spinlock_t *lock = lock_addr(v); 164 spinlock_t *lock = lock_addr(v);
155 int ret = 1; 165 int ret = 0;
156 166
157 spin_lock_irqsave(lock, flags); 167 spin_lock_irqsave(lock, flags);
158 if (v->counter != u) { 168 if (v->counter != u) {
159 v->counter += a; 169 v->counter += a;
160 ret = 0; 170 ret = 1;
161 } 171 }
162 spin_unlock_irqrestore(lock, flags); 172 spin_unlock_irqrestore(lock, flags);
163 return ret; 173 return ret;
164} 174}
175EXPORT_SYMBOL(atomic64_add_unless);
165 176
166static int init_atomic64_lock(void) 177static int init_atomic64_lock(void)
167{ 178{
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
new file mode 100644
index 00000000000..44524cc8c32
--- /dev/null
+++ b/lib/atomic64_test.c
@@ -0,0 +1,166 @@
1/*
2 * Testsuite for atomic64_t functions
3 *
4 * Copyright © 2010 Luca Barbieri
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <asm/atomic.h>
14
15#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
16static __init int test_atomic64(void)
17{
18 long long v0 = 0xaaa31337c001d00dLL;
19 long long v1 = 0xdeadbeefdeafcafeLL;
20 long long v2 = 0xfaceabadf00df001LL;
21 long long onestwos = 0x1111111122222222LL;
22 long long one = 1LL;
23
24 atomic64_t v = ATOMIC64_INIT(v0);
25 long long r = v0;
26 BUG_ON(v.counter != r);
27
28 atomic64_set(&v, v1);
29 r = v1;
30 BUG_ON(v.counter != r);
31 BUG_ON(atomic64_read(&v) != r);
32
33 INIT(v0);
34 atomic64_add(onestwos, &v);
35 r += onestwos;
36 BUG_ON(v.counter != r);
37
38 INIT(v0);
39 atomic64_add(-one, &v);
40 r += -one;
41 BUG_ON(v.counter != r);
42
43 INIT(v0);
44 r += onestwos;
45 BUG_ON(atomic64_add_return(onestwos, &v) != r);
46 BUG_ON(v.counter != r);
47
48 INIT(v0);
49 r += -one;
50 BUG_ON(atomic64_add_return(-one, &v) != r);
51 BUG_ON(v.counter != r);
52
53 INIT(v0);
54 atomic64_sub(onestwos, &v);
55 r -= onestwos;
56 BUG_ON(v.counter != r);
57
58 INIT(v0);
59 atomic64_sub(-one, &v);
60 r -= -one;
61 BUG_ON(v.counter != r);
62
63 INIT(v0);
64 r -= onestwos;
65 BUG_ON(atomic64_sub_return(onestwos, &v) != r);
66 BUG_ON(v.counter != r);
67
68 INIT(v0);
69 r -= -one;
70 BUG_ON(atomic64_sub_return(-one, &v) != r);
71 BUG_ON(v.counter != r);
72
73 INIT(v0);
74 atomic64_inc(&v);
75 r += one;
76 BUG_ON(v.counter != r);
77
78 INIT(v0);
79 r += one;
80 BUG_ON(atomic64_inc_return(&v) != r);
81 BUG_ON(v.counter != r);
82
83 INIT(v0);
84 atomic64_dec(&v);
85 r -= one;
86 BUG_ON(v.counter != r);
87
88 INIT(v0);
89 r -= one;
90 BUG_ON(atomic64_dec_return(&v) != r);
91 BUG_ON(v.counter != r);
92
93 INIT(v0);
94 BUG_ON(atomic64_xchg(&v, v1) != v0);
95 r = v1;
96 BUG_ON(v.counter != r);
97
98 INIT(v0);
99 BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
100 r = v1;
101 BUG_ON(v.counter != r);
102
103 INIT(v0);
104 BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
105 BUG_ON(v.counter != r);
106
107 INIT(v0);
108 BUG_ON(atomic64_add_unless(&v, one, v0));
109 BUG_ON(v.counter != r);
110
111 INIT(v0);
112 BUG_ON(!atomic64_add_unless(&v, one, v1));
113 r += one;
114 BUG_ON(v.counter != r);
115
116#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
117 defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
118 INIT(onestwos);
119 BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
120 r -= one;
121 BUG_ON(v.counter != r);
122
123 INIT(0);
124 BUG_ON(atomic64_dec_if_positive(&v) != -one);
125 BUG_ON(v.counter != r);
126
127 INIT(-one);
128 BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
129 BUG_ON(v.counter != r);
130#else
131#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above
132#endif
133
134 INIT(onestwos);
135 BUG_ON(!atomic64_inc_not_zero(&v));
136 r += one;
137 BUG_ON(v.counter != r);
138
139 INIT(0);
140 BUG_ON(atomic64_inc_not_zero(&v));
141 BUG_ON(v.counter != r);
142
143 INIT(-one);
144 BUG_ON(!atomic64_inc_not_zero(&v));
145 r += one;
146 BUG_ON(v.counter != r);
147
148#ifdef CONFIG_X86
149 printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n",
150#ifdef CONFIG_X86_64
151 "x86-64",
152#elif defined(CONFIG_X86_CMPXCHG64)
153 "i586+",
154#else
155 "i386+",
156#endif
157 boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
158 boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
159#else
160 printk(KERN_INFO "atomic64 test passed\n");
161#endif
162
163 return 0;
164}
165
166core_initcall(test_atomic64);
diff --git a/lib/average.c b/lib/average.c
new file mode 100644
index 00000000000..5576c284149
--- /dev/null
+++ b/lib/average.c
@@ -0,0 +1,61 @@
1/*
2 * lib/average.c
3 *
4 * This source code is licensed under the GNU General Public License,
5 * Version 2. See the file COPYING for more details.
6 */
7
8#include <linux/module.h>
9#include <linux/average.h>
10#include <linux/bug.h>
11#include <linux/log2.h>
12
13/**
14 * DOC: Exponentially Weighted Moving Average (EWMA)
15 *
16 * These are generic functions for calculating Exponentially Weighted Moving
17 * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled
18 * up internal representation of the average value to prevent rounding errors.
19 * The factor for scaling up and the exponential weight (or decay rate) have to
20 * be specified thru the init fuction. The structure should not be accessed
21 * directly but only thru the helper functions.
22 */
23
24/**
25 * ewma_init() - Initialize EWMA parameters
26 * @avg: Average structure
27 * @factor: Factor to use for the scaled up internal value. The maximum value
28 * of averages can be ULONG_MAX/(factor*weight). For performance reasons
29 * factor has to be a power of 2.
30 * @weight: Exponential weight, or decay rate. This defines how fast the
31 * influence of older values decreases. For performance reasons weight has
32 * to be a power of 2.
33 *
34 * Initialize the EWMA parameters for a given struct ewma @avg.
35 */
36void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
37{
38 WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor));
39
40 avg->weight = ilog2(weight);
41 avg->factor = ilog2(factor);
42 avg->internal = 0;
43}
44EXPORT_SYMBOL(ewma_init);
45
46/**
47 * ewma_add() - Exponentially weighted moving average (EWMA)
48 * @avg: Average structure
49 * @val: Current value
50 *
51 * Add a sample to the average.
52 */
53struct ewma *ewma_add(struct ewma *avg, unsigned long val)
54{
55 avg->internal = avg->internal ?
56 (((avg->internal << avg->weight) - avg->internal) +
57 (val << avg->factor)) >> avg->weight :
58 (val << avg->factor);
59 return avg;
60}
61EXPORT_SYMBOL(ewma_add);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 35a1f7ff414..741fae905ae 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -179,14 +179,16 @@ void __bitmap_shift_left(unsigned long *dst,
179} 179}
180EXPORT_SYMBOL(__bitmap_shift_left); 180EXPORT_SYMBOL(__bitmap_shift_left);
181 181
182void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, 182int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
183 const unsigned long *bitmap2, int bits) 183 const unsigned long *bitmap2, int bits)
184{ 184{
185 int k; 185 int k;
186 int nr = BITS_TO_LONGS(bits); 186 int nr = BITS_TO_LONGS(bits);
187 unsigned long result = 0;
187 188
188 for (k = 0; k < nr; k++) 189 for (k = 0; k < nr; k++)
189 dst[k] = bitmap1[k] & bitmap2[k]; 190 result |= (dst[k] = bitmap1[k] & bitmap2[k]);
191 return result != 0;
190} 192}
191EXPORT_SYMBOL(__bitmap_and); 193EXPORT_SYMBOL(__bitmap_and);
192 194
@@ -212,14 +214,16 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
212} 214}
213EXPORT_SYMBOL(__bitmap_xor); 215EXPORT_SYMBOL(__bitmap_xor);
214 216
215void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, 217int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
216 const unsigned long *bitmap2, int bits) 218 const unsigned long *bitmap2, int bits)
217{ 219{
218 int k; 220 int k;
219 int nr = BITS_TO_LONGS(bits); 221 int nr = BITS_TO_LONGS(bits);
222 unsigned long result = 0;
220 223
221 for (k = 0; k < nr; k++) 224 for (k = 0; k < nr; k++)
222 dst[k] = bitmap1[k] & ~bitmap2[k]; 225 result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
226 return result != 0;
223} 227}
224EXPORT_SYMBOL(__bitmap_andnot); 228EXPORT_SYMBOL(__bitmap_andnot);
225 229
@@ -267,6 +271,87 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
267} 271}
268EXPORT_SYMBOL(__bitmap_weight); 272EXPORT_SYMBOL(__bitmap_weight);
269 273
274#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
275
276void bitmap_set(unsigned long *map, int start, int nr)
277{
278 unsigned long *p = map + BIT_WORD(start);
279 const int size = start + nr;
280 int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
281 unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
282
283 while (nr - bits_to_set >= 0) {
284 *p |= mask_to_set;
285 nr -= bits_to_set;
286 bits_to_set = BITS_PER_LONG;
287 mask_to_set = ~0UL;
288 p++;
289 }
290 if (nr) {
291 mask_to_set &= BITMAP_LAST_WORD_MASK(size);
292 *p |= mask_to_set;
293 }
294}
295EXPORT_SYMBOL(bitmap_set);
296
297void bitmap_clear(unsigned long *map, int start, int nr)
298{
299 unsigned long *p = map + BIT_WORD(start);
300 const int size = start + nr;
301 int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
302 unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
303
304 while (nr - bits_to_clear >= 0) {
305 *p &= ~mask_to_clear;
306 nr -= bits_to_clear;
307 bits_to_clear = BITS_PER_LONG;
308 mask_to_clear = ~0UL;
309 p++;
310 }
311 if (nr) {
312 mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
313 *p &= ~mask_to_clear;
314 }
315}
316EXPORT_SYMBOL(bitmap_clear);
317
318/*
319 * bitmap_find_next_zero_area - find a contiguous aligned zero area
320 * @map: The address to base the search on
321 * @size: The bitmap size in bits
322 * @start: The bitnumber to start searching at
323 * @nr: The number of zeroed bits we're looking for
324 * @align_mask: Alignment mask for zero area
325 *
326 * The @align_mask should be one less than a power of 2; the effect is that
327 * the bit offset of all zero areas this function finds is multiples of that
328 * power of 2. A @align_mask of 0 means no alignment is required.
329 */
330unsigned long bitmap_find_next_zero_area(unsigned long *map,
331 unsigned long size,
332 unsigned long start,
333 unsigned int nr,
334 unsigned long align_mask)
335{
336 unsigned long index, end, i;
337again:
338 index = find_next_zero_bit(map, size, start);
339
340 /* Align allocation */
341 index = __ALIGN_MASK(index, align_mask);
342
343 end = index + nr;
344 if (end > size)
345 return end;
346 i = find_next_bit(map, end, index);
347 if (i < end) {
348 start = i + 1;
349 goto again;
350 }
351 return index;
352}
353EXPORT_SYMBOL(bitmap_find_next_zero_area);
354
270/* 355/*
271 * Bitmap printing & parsing functions: first version by Bill Irwin, 356 * Bitmap printing & parsing functions: first version by Bill Irwin,
272 * second version by Paul Jackson, third by Joe Korty. 357 * second version by Paul Jackson, third by Joe Korty.
@@ -274,7 +359,6 @@ EXPORT_SYMBOL(__bitmap_weight);
274 359
275#define CHUNKSZ 32 360#define CHUNKSZ 32
276#define nbits_to_hold_value(val) fls(val) 361#define nbits_to_hold_value(val) fls(val)
277#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
278#define BASEDEC 10 /* fancier cpuset lists input in decimal */ 362#define BASEDEC 10 /* fancier cpuset lists input in decimal */
279 363
280/** 364/**
@@ -381,7 +465,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
381 if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1)) 465 if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
382 return -EOVERFLOW; 466 return -EOVERFLOW;
383 467
384 chunk = (chunk << 4) | unhex(c); 468 chunk = (chunk << 4) | hex_to_bin(c);
385 ndigits++; totaldigits++; 469 ndigits++; totaldigits++;
386 } 470 }
387 if (ndigits == 0) 471 if (ndigits == 0)
@@ -402,7 +486,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
402EXPORT_SYMBOL(__bitmap_parse); 486EXPORT_SYMBOL(__bitmap_parse);
403 487
404/** 488/**
405 * bitmap_parse_user() 489 * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
406 * 490 *
407 * @ubuf: pointer to user buffer containing string. 491 * @ubuf: pointer to user buffer containing string.
408 * @ulen: buffer size in bytes. If string is smaller than this 492 * @ulen: buffer size in bytes. If string is smaller than this
@@ -534,7 +618,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
534EXPORT_SYMBOL(bitmap_parselist); 618EXPORT_SYMBOL(bitmap_parselist);
535 619
536/** 620/**
537 * bitmap_pos_to_ord(buf, pos, bits) 621 * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
538 * @buf: pointer to a bitmap 622 * @buf: pointer to a bitmap
539 * @pos: a bit position in @buf (0 <= @pos < @bits) 623 * @pos: a bit position in @buf (0 <= @pos < @bits)
540 * @bits: number of valid bit positions in @buf 624 * @bits: number of valid bit positions in @buf
@@ -570,7 +654,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
570} 654}
571 655
572/** 656/**
573 * bitmap_ord_to_pos(buf, ord, bits) 657 * bitmap_ord_to_pos - find position of n-th set bit in bitmap
574 * @buf: pointer to bitmap 658 * @buf: pointer to bitmap
575 * @ord: ordinal bit position (n-th set bit, n >= 0) 659 * @ord: ordinal bit position (n-th set bit, n >= 0)
576 * @bits: number of valid bit positions in @buf 660 * @bits: number of valid bit positions in @buf
@@ -648,10 +732,9 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
648 bitmap_zero(dst, bits); 732 bitmap_zero(dst, bits);
649 733
650 w = bitmap_weight(new, bits); 734 w = bitmap_weight(new, bits);
651 for (oldbit = find_first_bit(src, bits); 735 for_each_set_bit(oldbit, src, bits) {
652 oldbit < bits;
653 oldbit = find_next_bit(src, bits, oldbit + 1)) {
654 int n = bitmap_pos_to_ord(old, oldbit, bits); 736 int n = bitmap_pos_to_ord(old, oldbit, bits);
737
655 if (n < 0 || w == 0) 738 if (n < 0 || w == 0)
656 set_bit(oldbit, dst); /* identity map */ 739 set_bit(oldbit, dst); /* identity map */
657 else 740 else
@@ -818,9 +901,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
818 */ 901 */
819 902
820 m = 0; 903 m = 0;
821 for (n = find_first_bit(relmap, bits); 904 for_each_set_bit(n, relmap, bits) {
822 n < bits;
823 n = find_next_bit(relmap, bits, n + 1)) {
824 /* m == bitmap_pos_to_ord(relmap, n, bits) */ 905 /* m == bitmap_pos_to_ord(relmap, n, bits) */
825 if (test_bit(m, orig)) 906 if (test_bit(m, orig))
826 set_bit(n, dst); 907 set_bit(n, dst);
@@ -849,9 +930,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
849 return; 930 return;
850 bitmap_zero(dst, bits); 931 bitmap_zero(dst, bits);
851 932
852 for (oldbit = find_first_bit(orig, bits); 933 for_each_set_bit(oldbit, orig, bits)
853 oldbit < bits;
854 oldbit = find_next_bit(orig, bits, oldbit + 1))
855 set_bit(oldbit % sz, dst); 934 set_bit(oldbit % sz, dst);
856} 935}
857EXPORT_SYMBOL(bitmap_fold); 936EXPORT_SYMBOL(bitmap_fold);
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 00000000000..c9c6f035152
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,798 @@
1/*
2 * lib/btree.c - Simple In-memory B+Tree
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
7 * Bits and pieces stolen from Peter Zijlstra's code, which is
8 * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
9 * GPLv2
10 *
11 * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
12 *
13 * A relatively simple B+Tree implementation. I have written it as a learning
14 * excercise to understand how B+Trees work. Turned out to be useful as well.
15 *
16 * B+Trees can be used similar to Linux radix trees (which don't have anything
17 * in common with textbook radix trees, beware). Prerequisite for them working
18 * well is that access to a random tree node is much faster than a large number
19 * of operations within each node.
20 *
21 * Disks have fulfilled the prerequisite for a long time. More recently DRAM
22 * has gained similar properties, as memory access times, when measured in cpu
23 * cycles, have increased. Cacheline sizes have increased as well, which also
24 * helps B+Trees.
25 *
26 * Compared to radix trees, B+Trees are more efficient when dealing with a
27 * sparsely populated address space. Between 25% and 50% of the memory is
28 * occupied with valid pointers. When densely populated, radix trees contain
29 * ~98% pointers - hard to beat. Very sparse radix trees contain only ~2%
30 * pointers.
31 *
32 * This particular implementation stores pointers identified by a long value.
33 * Storing NULL pointers is illegal, lookup will return NULL when no entry
34 * was found.
35 *
36 * A tricks was used that is not commonly found in textbooks. The lowest
37 * values are to the right, not to the left. All used slots within a node
38 * are on the left, all unused slots contain NUL values. Most operations
39 * simply loop once over all slots and terminate on the first NUL.
40 */
41
42#include <linux/btree.h>
43#include <linux/cache.h>
44#include <linux/kernel.h>
45#include <linux/slab.h>
46#include <linux/module.h>
47
48#define MAX(a, b) ((a) > (b) ? (a) : (b))
49#define NODESIZE MAX(L1_CACHE_BYTES, 128)
50
51struct btree_geo {
52 int keylen;
53 int no_pairs;
54 int no_longs;
55};
56
57struct btree_geo btree_geo32 = {
58 .keylen = 1,
59 .no_pairs = NODESIZE / sizeof(long) / 2,
60 .no_longs = NODESIZE / sizeof(long) / 2,
61};
62EXPORT_SYMBOL_GPL(btree_geo32);
63
64#define LONG_PER_U64 (64 / BITS_PER_LONG)
65struct btree_geo btree_geo64 = {
66 .keylen = LONG_PER_U64,
67 .no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
68 .no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
69};
70EXPORT_SYMBOL_GPL(btree_geo64);
71
72struct btree_geo btree_geo128 = {
73 .keylen = 2 * LONG_PER_U64,
74 .no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
75 .no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
76};
77EXPORT_SYMBOL_GPL(btree_geo128);
78
79static struct kmem_cache *btree_cachep;
80
81void *btree_alloc(gfp_t gfp_mask, void *pool_data)
82{
83 return kmem_cache_alloc(btree_cachep, gfp_mask);
84}
85EXPORT_SYMBOL_GPL(btree_alloc);
86
87void btree_free(void *element, void *pool_data)
88{
89 kmem_cache_free(btree_cachep, element);
90}
91EXPORT_SYMBOL_GPL(btree_free);
92
93static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
94{
95 unsigned long *node;
96
97 node = mempool_alloc(head->mempool, gfp);
98 if (likely(node))
99 memset(node, 0, NODESIZE);
100 return node;
101}
102
103static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
104{
105 size_t i;
106
107 for (i = 0; i < n; i++) {
108 if (l1[i] < l2[i])
109 return -1;
110 if (l1[i] > l2[i])
111 return 1;
112 }
113 return 0;
114}
115
116static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
117 size_t n)
118{
119 size_t i;
120
121 for (i = 0; i < n; i++)
122 dest[i] = src[i];
123 return dest;
124}
125
126static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
127{
128 size_t i;
129
130 for (i = 0; i < n; i++)
131 s[i] = c;
132 return s;
133}
134
135static void dec_key(struct btree_geo *geo, unsigned long *key)
136{
137 unsigned long val;
138 int i;
139
140 for (i = geo->keylen - 1; i >= 0; i--) {
141 val = key[i];
142 key[i] = val - 1;
143 if (val)
144 break;
145 }
146}
147
148static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
149{
150 return &node[n * geo->keylen];
151}
152
153static void *bval(struct btree_geo *geo, unsigned long *node, int n)
154{
155 return (void *)node[geo->no_longs + n];
156}
157
158static void setkey(struct btree_geo *geo, unsigned long *node, int n,
159 unsigned long *key)
160{
161 longcpy(bkey(geo, node, n), key, geo->keylen);
162}
163
164static void setval(struct btree_geo *geo, unsigned long *node, int n,
165 void *val)
166{
167 node[geo->no_longs + n] = (unsigned long) val;
168}
169
170static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
171{
172 longset(bkey(geo, node, n), 0, geo->keylen);
173 node[geo->no_longs + n] = 0;
174}
175
176static inline void __btree_init(struct btree_head *head)
177{
178 head->node = NULL;
179 head->height = 0;
180}
181
182void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
183{
184 __btree_init(head);
185 head->mempool = mempool;
186}
187EXPORT_SYMBOL_GPL(btree_init_mempool);
188
189int btree_init(struct btree_head *head)
190{
191 __btree_init(head);
192 head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
193 if (!head->mempool)
194 return -ENOMEM;
195 return 0;
196}
197EXPORT_SYMBOL_GPL(btree_init);
198
199void btree_destroy(struct btree_head *head)
200{
201 mempool_destroy(head->mempool);
202 head->mempool = NULL;
203}
204EXPORT_SYMBOL_GPL(btree_destroy);
205
206void *btree_last(struct btree_head *head, struct btree_geo *geo,
207 unsigned long *key)
208{
209 int height = head->height;
210 unsigned long *node = head->node;
211
212 if (height == 0)
213 return NULL;
214
215 for ( ; height > 1; height--)
216 node = bval(geo, node, 0);
217
218 longcpy(key, bkey(geo, node, 0), geo->keylen);
219 return bval(geo, node, 0);
220}
221EXPORT_SYMBOL_GPL(btree_last);
222
223static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
224 unsigned long *key)
225{
226 return longcmp(bkey(geo, node, pos), key, geo->keylen);
227}
228
229static int keyzero(struct btree_geo *geo, unsigned long *key)
230{
231 int i;
232
233 for (i = 0; i < geo->keylen; i++)
234 if (key[i])
235 return 0;
236
237 return 1;
238}
239
240void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
241 unsigned long *key)
242{
243 int i, height = head->height;
244 unsigned long *node = head->node;
245
246 if (height == 0)
247 return NULL;
248
249 for ( ; height > 1; height--) {
250 for (i = 0; i < geo->no_pairs; i++)
251 if (keycmp(geo, node, i, key) <= 0)
252 break;
253 if (i == geo->no_pairs)
254 return NULL;
255 node = bval(geo, node, i);
256 if (!node)
257 return NULL;
258 }
259
260 if (!node)
261 return NULL;
262
263 for (i = 0; i < geo->no_pairs; i++)
264 if (keycmp(geo, node, i, key) == 0)
265 return bval(geo, node, i);
266 return NULL;
267}
268EXPORT_SYMBOL_GPL(btree_lookup);
269
270int btree_update(struct btree_head *head, struct btree_geo *geo,
271 unsigned long *key, void *val)
272{
273 int i, height = head->height;
274 unsigned long *node = head->node;
275
276 if (height == 0)
277 return -ENOENT;
278
279 for ( ; height > 1; height--) {
280 for (i = 0; i < geo->no_pairs; i++)
281 if (keycmp(geo, node, i, key) <= 0)
282 break;
283 if (i == geo->no_pairs)
284 return -ENOENT;
285 node = bval(geo, node, i);
286 if (!node)
287 return -ENOENT;
288 }
289
290 if (!node)
291 return -ENOENT;
292
293 for (i = 0; i < geo->no_pairs; i++)
294 if (keycmp(geo, node, i, key) == 0) {
295 setval(geo, node, i, val);
296 return 0;
297 }
298 return -ENOENT;
299}
300EXPORT_SYMBOL_GPL(btree_update);
301
302/*
303 * Usually this function is quite similar to normal lookup. But the key of
304 * a parent node may be smaller than the smallest key of all its siblings.
305 * In such a case we cannot just return NULL, as we have only proven that no
306 * key smaller than __key, but larger than this parent key exists.
307 * So we set __key to the parent key and retry. We have to use the smallest
308 * such parent key, which is the last parent key we encountered.
309 */
310void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
311 unsigned long *__key)
312{
313 int i, height;
314 unsigned long *node, *oldnode;
315 unsigned long *retry_key = NULL, key[geo->keylen];
316
317 if (keyzero(geo, __key))
318 return NULL;
319
320 if (head->height == 0)
321 return NULL;
322retry:
323 longcpy(key, __key, geo->keylen);
324 dec_key(geo, key);
325
326 node = head->node;
327 for (height = head->height ; height > 1; height--) {
328 for (i = 0; i < geo->no_pairs; i++)
329 if (keycmp(geo, node, i, key) <= 0)
330 break;
331 if (i == geo->no_pairs)
332 goto miss;
333 oldnode = node;
334 node = bval(geo, node, i);
335 if (!node)
336 goto miss;
337 retry_key = bkey(geo, oldnode, i);
338 }
339
340 if (!node)
341 goto miss;
342
343 for (i = 0; i < geo->no_pairs; i++) {
344 if (keycmp(geo, node, i, key) <= 0) {
345 if (bval(geo, node, i)) {
346 longcpy(__key, bkey(geo, node, i), geo->keylen);
347 return bval(geo, node, i);
348 } else
349 goto miss;
350 }
351 }
352miss:
353 if (retry_key) {
354 __key = retry_key;
355 retry_key = NULL;
356 goto retry;
357 }
358 return NULL;
359}
360
361static int getpos(struct btree_geo *geo, unsigned long *node,
362 unsigned long *key)
363{
364 int i;
365
366 for (i = 0; i < geo->no_pairs; i++) {
367 if (keycmp(geo, node, i, key) <= 0)
368 break;
369 }
370 return i;
371}
372
373static int getfill(struct btree_geo *geo, unsigned long *node, int start)
374{
375 int i;
376
377 for (i = start; i < geo->no_pairs; i++)
378 if (!bval(geo, node, i))
379 break;
380 return i;
381}
382
383/*
384 * locate the correct leaf node in the btree
385 */
386static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
387 unsigned long *key, int level)
388{
389 unsigned long *node = head->node;
390 int i, height;
391
392 for (height = head->height; height > level; height--) {
393 for (i = 0; i < geo->no_pairs; i++)
394 if (keycmp(geo, node, i, key) <= 0)
395 break;
396
397 if ((i == geo->no_pairs) || !bval(geo, node, i)) {
398 /* right-most key is too large, update it */
399 /* FIXME: If the right-most key on higher levels is
400 * always zero, this wouldn't be necessary. */
401 i--;
402 setkey(geo, node, i, key);
403 }
404 BUG_ON(i < 0);
405 node = bval(geo, node, i);
406 }
407 BUG_ON(!node);
408 return node;
409}
410
411static int btree_grow(struct btree_head *head, struct btree_geo *geo,
412 gfp_t gfp)
413{
414 unsigned long *node;
415 int fill;
416
417 node = btree_node_alloc(head, gfp);
418 if (!node)
419 return -ENOMEM;
420 if (head->node) {
421 fill = getfill(geo, head->node, 0);
422 setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
423 setval(geo, node, 0, head->node);
424 }
425 head->node = node;
426 head->height++;
427 return 0;
428}
429
430static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
431{
432 unsigned long *node;
433 int fill;
434
435 if (head->height <= 1)
436 return;
437
438 node = head->node;
439 fill = getfill(geo, node, 0);
440 BUG_ON(fill > 1);
441 head->node = bval(geo, node, 0);
442 head->height--;
443 mempool_free(node, head->mempool);
444}
445
446static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
447 unsigned long *key, void *val, int level,
448 gfp_t gfp)
449{
450 unsigned long *node;
451 int i, pos, fill, err;
452
453 BUG_ON(!val);
454 if (head->height < level) {
455 err = btree_grow(head, geo, gfp);
456 if (err)
457 return err;
458 }
459
460retry:
461 node = find_level(head, geo, key, level);
462 pos = getpos(geo, node, key);
463 fill = getfill(geo, node, pos);
464 /* two identical keys are not allowed */
465 BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
466
467 if (fill == geo->no_pairs) {
468 /* need to split node */
469 unsigned long *new;
470
471 new = btree_node_alloc(head, gfp);
472 if (!new)
473 return -ENOMEM;
474 err = btree_insert_level(head, geo,
475 bkey(geo, node, fill / 2 - 1),
476 new, level + 1, gfp);
477 if (err) {
478 mempool_free(new, head->mempool);
479 return err;
480 }
481 for (i = 0; i < fill / 2; i++) {
482 setkey(geo, new, i, bkey(geo, node, i));
483 setval(geo, new, i, bval(geo, node, i));
484 setkey(geo, node, i, bkey(geo, node, i + fill / 2));
485 setval(geo, node, i, bval(geo, node, i + fill / 2));
486 clearpair(geo, node, i + fill / 2);
487 }
488 if (fill & 1) {
489 setkey(geo, node, i, bkey(geo, node, fill - 1));
490 setval(geo, node, i, bval(geo, node, fill - 1));
491 clearpair(geo, node, fill - 1);
492 }
493 goto retry;
494 }
495 BUG_ON(fill >= geo->no_pairs);
496
497 /* shift and insert */
498 for (i = fill; i > pos; i--) {
499 setkey(geo, node, i, bkey(geo, node, i - 1));
500 setval(geo, node, i, bval(geo, node, i - 1));
501 }
502 setkey(geo, node, pos, key);
503 setval(geo, node, pos, val);
504
505 return 0;
506}
507
508int btree_insert(struct btree_head *head, struct btree_geo *geo,
509 unsigned long *key, void *val, gfp_t gfp)
510{
511 return btree_insert_level(head, geo, key, val, 1, gfp);
512}
513EXPORT_SYMBOL_GPL(btree_insert);
514
515static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
516 unsigned long *key, int level);
517static void merge(struct btree_head *head, struct btree_geo *geo, int level,
518 unsigned long *left, int lfill,
519 unsigned long *right, int rfill,
520 unsigned long *parent, int lpos)
521{
522 int i;
523
524 for (i = 0; i < rfill; i++) {
525 /* Move all keys to the left */
526 setkey(geo, left, lfill + i, bkey(geo, right, i));
527 setval(geo, left, lfill + i, bval(geo, right, i));
528 }
529 /* Exchange left and right child in parent */
530 setval(geo, parent, lpos, right);
531 setval(geo, parent, lpos + 1, left);
532 /* Remove left (formerly right) child from parent */
533 btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
534 mempool_free(right, head->mempool);
535}
536
537static void rebalance(struct btree_head *head, struct btree_geo *geo,
538 unsigned long *key, int level, unsigned long *child, int fill)
539{
540 unsigned long *parent, *left = NULL, *right = NULL;
541 int i, no_left, no_right;
542
543 if (fill == 0) {
544 /* Because we don't steal entries from a neigbour, this case
545 * can happen. Parent node contains a single child, this
546 * node, so merging with a sibling never happens.
547 */
548 btree_remove_level(head, geo, key, level + 1);
549 mempool_free(child, head->mempool);
550 return;
551 }
552
553 parent = find_level(head, geo, key, level + 1);
554 i = getpos(geo, parent, key);
555 BUG_ON(bval(geo, parent, i) != child);
556
557 if (i > 0) {
558 left = bval(geo, parent, i - 1);
559 no_left = getfill(geo, left, 0);
560 if (fill + no_left <= geo->no_pairs) {
561 merge(head, geo, level,
562 left, no_left,
563 child, fill,
564 parent, i - 1);
565 return;
566 }
567 }
568 if (i + 1 < getfill(geo, parent, i)) {
569 right = bval(geo, parent, i + 1);
570 no_right = getfill(geo, right, 0);
571 if (fill + no_right <= geo->no_pairs) {
572 merge(head, geo, level,
573 child, fill,
574 right, no_right,
575 parent, i);
576 return;
577 }
578 }
579 /*
580 * We could also try to steal one entry from the left or right
581 * neighbor. By not doing so we changed the invariant from
582 * "all nodes are at least half full" to "no two neighboring
583 * nodes can be merged". Which means that the average fill of
584 * all nodes is still half or better.
585 */
586}
587
588static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
589 unsigned long *key, int level)
590{
591 unsigned long *node;
592 int i, pos, fill;
593 void *ret;
594
595 if (level > head->height) {
596 /* we recursed all the way up */
597 head->height = 0;
598 head->node = NULL;
599 return NULL;
600 }
601
602 node = find_level(head, geo, key, level);
603 pos = getpos(geo, node, key);
604 fill = getfill(geo, node, pos);
605 if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
606 return NULL;
607 ret = bval(geo, node, pos);
608
609 /* remove and shift */
610 for (i = pos; i < fill - 1; i++) {
611 setkey(geo, node, i, bkey(geo, node, i + 1));
612 setval(geo, node, i, bval(geo, node, i + 1));
613 }
614 clearpair(geo, node, fill - 1);
615
616 if (fill - 1 < geo->no_pairs / 2) {
617 if (level < head->height)
618 rebalance(head, geo, key, level, node, fill - 1);
619 else if (fill - 1 == 1)
620 btree_shrink(head, geo);
621 }
622
623 return ret;
624}
625
626void *btree_remove(struct btree_head *head, struct btree_geo *geo,
627 unsigned long *key)
628{
629 if (head->height == 0)
630 return NULL;
631
632 return btree_remove_level(head, geo, key, 1);
633}
634EXPORT_SYMBOL_GPL(btree_remove);
635
636int btree_merge(struct btree_head *target, struct btree_head *victim,
637 struct btree_geo *geo, gfp_t gfp)
638{
639 unsigned long key[geo->keylen];
640 unsigned long dup[geo->keylen];
641 void *val;
642 int err;
643
644 BUG_ON(target == victim);
645
646 if (!(target->node)) {
647 /* target is empty, just copy fields over */
648 target->node = victim->node;
649 target->height = victim->height;
650 __btree_init(victim);
651 return 0;
652 }
653
654 /* TODO: This needs some optimizations. Currently we do three tree
655 * walks to remove a single object from the victim.
656 */
657 for (;;) {
658 if (!btree_last(victim, geo, key))
659 break;
660 val = btree_lookup(victim, geo, key);
661 err = btree_insert(target, geo, key, val, gfp);
662 if (err)
663 return err;
664 /* We must make a copy of the key, as the original will get
665 * mangled inside btree_remove. */
666 longcpy(dup, key, geo->keylen);
667 btree_remove(victim, geo, dup);
668 }
669 return 0;
670}
671EXPORT_SYMBOL_GPL(btree_merge);
672
673static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
674 unsigned long *node, unsigned long opaque,
675 void (*func)(void *elem, unsigned long opaque,
676 unsigned long *key, size_t index,
677 void *func2),
678 void *func2, int reap, int height, size_t count)
679{
680 int i;
681 unsigned long *child;
682
683 for (i = 0; i < geo->no_pairs; i++) {
684 child = bval(geo, node, i);
685 if (!child)
686 break;
687 if (height > 1)
688 count = __btree_for_each(head, geo, child, opaque,
689 func, func2, reap, height - 1, count);
690 else
691 func(child, opaque, bkey(geo, node, i), count++,
692 func2);
693 }
694 if (reap)
695 mempool_free(node, head->mempool);
696 return count;
697}
698
699static void empty(void *elem, unsigned long opaque, unsigned long *key,
700 size_t index, void *func2)
701{
702}
703
704void visitorl(void *elem, unsigned long opaque, unsigned long *key,
705 size_t index, void *__func)
706{
707 visitorl_t func = __func;
708
709 func(elem, opaque, *key, index);
710}
711EXPORT_SYMBOL_GPL(visitorl);
712
713void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
714 size_t index, void *__func)
715{
716 visitor32_t func = __func;
717 u32 *key = (void *)__key;
718
719 func(elem, opaque, *key, index);
720}
721EXPORT_SYMBOL_GPL(visitor32);
722
723void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
724 size_t index, void *__func)
725{
726 visitor64_t func = __func;
727 u64 *key = (void *)__key;
728
729 func(elem, opaque, *key, index);
730}
731EXPORT_SYMBOL_GPL(visitor64);
732
733void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
734 size_t index, void *__func)
735{
736 visitor128_t func = __func;
737 u64 *key = (void *)__key;
738
739 func(elem, opaque, key[0], key[1], index);
740}
741EXPORT_SYMBOL_GPL(visitor128);
742
743size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
744 unsigned long opaque,
745 void (*func)(void *elem, unsigned long opaque,
746 unsigned long *key,
747 size_t index, void *func2),
748 void *func2)
749{
750 size_t count = 0;
751
752 if (!func2)
753 func = empty;
754 if (head->node)
755 count = __btree_for_each(head, geo, head->node, opaque, func,
756 func2, 0, head->height, 0);
757 return count;
758}
759EXPORT_SYMBOL_GPL(btree_visitor);
760
761size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
762 unsigned long opaque,
763 void (*func)(void *elem, unsigned long opaque,
764 unsigned long *key,
765 size_t index, void *func2),
766 void *func2)
767{
768 size_t count = 0;
769
770 if (!func2)
771 func = empty;
772 if (head->node)
773 count = __btree_for_each(head, geo, head->node, opaque, func,
774 func2, 1, head->height, 0);
775 __btree_init(head);
776 return count;
777}
778EXPORT_SYMBOL_GPL(btree_grim_visitor);
779
780static int __init btree_module_init(void)
781{
782 btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
783 SLAB_HWCACHE_ALIGN, NULL);
784 return 0;
785}
786
787static void __exit btree_module_exit(void)
788{
789 kmem_cache_destroy(btree_cachep);
790}
791
792/* If core code starts using btree, initialization should happen even earlier */
793module_init(btree_module_init);
794module_exit(btree_module_exit);
795
796MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
797MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
798MODULE_LICENSE("GPL");
diff --git a/lib/bug.c b/lib/bug.c
index 300e41afbf9..19552096d16 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
72 return NULL; 72 return NULL;
73} 73}
74 74
75int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, 75void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
76 struct module *mod) 76 struct module *mod)
77{ 77{
78 char *secstrings; 78 char *secstrings;
79 unsigned int i; 79 unsigned int i;
@@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
97 * could potentially lead to deadlock and thus be counter-productive. 97 * could potentially lead to deadlock and thus be counter-productive.
98 */ 98 */
99 list_add(&mod->bug_list, &module_bug_list); 99 list_add(&mod->bug_list, &module_bug_list);
100
101 return 0;
102} 100}
103 101
104void module_bug_cleanup(struct module *mod) 102void module_bug_cleanup(struct module *mod)
@@ -136,8 +134,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
136 134
137 bug = find_bug(bugaddr); 135 bug = find_bug(bugaddr);
138 136
139 printk(KERN_EMERG "------------[ cut here ]------------\n");
140
141 file = NULL; 137 file = NULL;
142 line = 0; 138 line = 0;
143 warning = 0; 139 warning = 0;
@@ -156,19 +152,25 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
156 152
157 if (warning) { 153 if (warning) {
158 /* this is a WARN_ON rather than BUG/BUG_ON */ 154 /* this is a WARN_ON rather than BUG/BUG_ON */
155 printk(KERN_WARNING "------------[ cut here ]------------\n");
156
159 if (file) 157 if (file)
160 printk(KERN_ERR "Badness at %s:%u\n", 158 printk(KERN_WARNING "WARNING: at %s:%u\n",
161 file, line); 159 file, line);
162 else 160 else
163 printk(KERN_ERR "Badness at %p " 161 printk(KERN_WARNING "WARNING: at %p "
164 "[verbose debug info unavailable]\n", 162 "[verbose debug info unavailable]\n",
165 (void *)bugaddr); 163 (void *)bugaddr);
166 164
165 print_modules();
167 show_regs(regs); 166 show_regs(regs);
168 add_taint(TAINT_WARN); 167 print_oops_end_marker();
168 add_taint(BUG_GET_TAINT(bug));
169 return BUG_TRAP_TYPE_WARN; 169 return BUG_TRAP_TYPE_WARN;
170 } 170 }
171 171
172 printk(KERN_EMERG "------------[ cut here ]------------\n");
173
172 if (file) 174 if (file)
173 printk(KERN_CRIT "kernel BUG at %s:%u!\n", 175 printk(KERN_CRIT "kernel BUG at %s:%u!\n",
174 file, line); 176 file, line);
diff --git a/lib/checksum.c b/lib/checksum.c
index 12e5a1c91cd..097508732f3 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -37,7 +37,8 @@
37 37
38#include <asm/byteorder.h> 38#include <asm/byteorder.h>
39 39
40static inline unsigned short from32to16(unsigned long x) 40#ifndef do_csum
41static inline unsigned short from32to16(unsigned int x)
41{ 42{
42 /* add up 16-bit and 16-bit for 16+c bit */ 43 /* add up 16-bit and 16-bit for 16+c bit */
43 x = (x & 0xffff) + (x >> 16); 44 x = (x & 0xffff) + (x >> 16);
@@ -49,13 +50,17 @@ static inline unsigned short from32to16(unsigned long x)
49static unsigned int do_csum(const unsigned char *buff, int len) 50static unsigned int do_csum(const unsigned char *buff, int len)
50{ 51{
51 int odd, count; 52 int odd, count;
52 unsigned long result = 0; 53 unsigned int result = 0;
53 54
54 if (len <= 0) 55 if (len <= 0)
55 goto out; 56 goto out;
56 odd = 1 & (unsigned long) buff; 57 odd = 1 & (unsigned long) buff;
57 if (odd) { 58 if (odd) {
59#ifdef __LITTLE_ENDIAN
60 result += (*buff << 8);
61#else
58 result = *buff; 62 result = *buff;
63#endif
59 len--; 64 len--;
60 buff++; 65 buff++;
61 } 66 }
@@ -69,9 +74,9 @@ static unsigned int do_csum(const unsigned char *buff, int len)
69 } 74 }
70 count >>= 1; /* nr of 32-bit words.. */ 75 count >>= 1; /* nr of 32-bit words.. */
71 if (count) { 76 if (count) {
72 unsigned long carry = 0; 77 unsigned int carry = 0;
73 do { 78 do {
74 unsigned long w = *(unsigned long *) buff; 79 unsigned int w = *(unsigned int *) buff;
75 count--; 80 count--;
76 buff += 4; 81 buff += 4;
77 result += carry; 82 result += carry;
@@ -87,13 +92,18 @@ static unsigned int do_csum(const unsigned char *buff, int len)
87 } 92 }
88 } 93 }
89 if (len & 1) 94 if (len & 1)
95#ifdef __LITTLE_ENDIAN
96 result += *buff;
97#else
90 result += (*buff << 8); 98 result += (*buff << 8);
99#endif
91 result = from32to16(result); 100 result = from32to16(result);
92 if (odd) 101 if (odd)
93 result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); 102 result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
94out: 103out:
95 return result; 104 return result;
96} 105}
106#endif
97 107
98/* 108/*
99 * This is a version of ip_compute_csum() optimized for IP headers, 109 * This is a version of ip_compute_csum() optimized for IP headers,
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
new file mode 100644
index 00000000000..4dc20321b0d
--- /dev/null
+++ b/lib/cpu-notifier-error-inject.c
@@ -0,0 +1,63 @@
1#include <linux/kernel.h>
2#include <linux/cpu.h>
3#include <linux/module.h>
4#include <linux/notifier.h>
5
6static int priority;
7static int cpu_up_prepare_error;
8static int cpu_down_prepare_error;
9
10module_param(priority, int, 0);
11MODULE_PARM_DESC(priority, "specify cpu notifier priority");
12
13module_param(cpu_up_prepare_error, int, 0644);
14MODULE_PARM_DESC(cpu_up_prepare_error,
15 "specify error code to inject CPU_UP_PREPARE action");
16
17module_param(cpu_down_prepare_error, int, 0644);
18MODULE_PARM_DESC(cpu_down_prepare_error,
19 "specify error code to inject CPU_DOWN_PREPARE action");
20
21static int err_inject_cpu_callback(struct notifier_block *nfb,
22 unsigned long action, void *hcpu)
23{
24 int err = 0;
25
26 switch (action) {
27 case CPU_UP_PREPARE:
28 case CPU_UP_PREPARE_FROZEN:
29 err = cpu_up_prepare_error;
30 break;
31 case CPU_DOWN_PREPARE:
32 case CPU_DOWN_PREPARE_FROZEN:
33 err = cpu_down_prepare_error;
34 break;
35 }
36 if (err)
37 printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err);
38
39 return notifier_from_errno(err);
40}
41
42static struct notifier_block err_inject_cpu_notifier = {
43 .notifier_call = err_inject_cpu_callback,
44};
45
46static int err_inject_init(void)
47{
48 err_inject_cpu_notifier.priority = priority;
49
50 return register_hotcpu_notifier(&err_inject_cpu_notifier);
51}
52
53static void err_inject_exit(void)
54{
55 unregister_hotcpu_notifier(&err_inject_cpu_notifier);
56}
57
58module_init(err_inject_init);
59module_exit(err_inject_exit);
60
61MODULE_DESCRIPTION("CPU notifier error injection module");
62MODULE_LICENSE("GPL");
63MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 7bb4142a502..05d6aca7fc1 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -1,3 +1,4 @@
1#include <linux/slab.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/bitops.h> 3#include <linux/bitops.h>
3#include <linux/cpumask.h> 4#include <linux/cpumask.h>
diff --git a/lib/crc32.c b/lib/crc32.c
index 49d1c9e3ce3..4855995fcde 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -25,16 +25,19 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/compiler.h> 26#include <linux/compiler.h>
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/slab.h>
29#include <linux/init.h> 28#include <linux/init.h>
30#include <asm/atomic.h> 29#include <asm/atomic.h>
31#include "crc32defs.h" 30#include "crc32defs.h"
32#if CRC_LE_BITS == 8 31#if CRC_LE_BITS == 8
33#define tole(x) __constant_cpu_to_le32(x) 32# define tole(x) __constant_cpu_to_le32(x)
34#define tobe(x) __constant_cpu_to_be32(x)
35#else 33#else
36#define tole(x) (x) 34# define tole(x) (x)
37#define tobe(x) (x) 35#endif
36
37#if CRC_BE_BITS == 8
38# define tobe(x) __constant_cpu_to_be32(x)
39#else
40# define tobe(x) (x)
38#endif 41#endif
39#include "crc32table.h" 42#include "crc32table.h"
40 43
@@ -42,6 +45,54 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
42MODULE_DESCRIPTION("Ethernet CRC32 calculations"); 45MODULE_DESCRIPTION("Ethernet CRC32 calculations");
43MODULE_LICENSE("GPL"); 46MODULE_LICENSE("GPL");
44 47
48#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
49
50static inline u32
51crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
52{
53# ifdef __LITTLE_ENDIAN
54# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
55# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
56 tab[2][(crc >> 8) & 255] ^ \
57 tab[1][(crc >> 16) & 255] ^ \
58 tab[0][(crc >> 24) & 255]
59# else
60# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
61# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
62 tab[1][(crc >> 8) & 255] ^ \
63 tab[2][(crc >> 16) & 255] ^ \
64 tab[3][(crc >> 24) & 255]
65# endif
66 const u32 *b;
67 size_t rem_len;
68
69 /* Align it */
70 if (unlikely((long)buf & 3 && len)) {
71 do {
72 DO_CRC(*buf++);
73 } while ((--len) && ((long)buf)&3);
74 }
75 rem_len = len & 3;
76 /* load data 32 bits wide, xor data 32 bits wide. */
77 len = len >> 2;
78 b = (const u32 *)buf;
79 for (--b; len; --len) {
80 crc ^= *++b; /* use pre increment for speed */
81 DO_CRC4;
82 }
83 len = rem_len;
84 /* And the last few bytes */
85 if (len) {
86 u8 *p = (u8 *)(b + 1) - 1;
87 do {
88 DO_CRC(*++p); /* use pre increment for speed */
89 } while (--len);
90 }
91 return crc;
92#undef DO_CRC
93#undef DO_CRC4
94}
95#endif
45/** 96/**
46 * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 97 * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
47 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for 98 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
@@ -72,52 +123,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
72u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) 123u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
73{ 124{
74# if CRC_LE_BITS == 8 125# if CRC_LE_BITS == 8
75 const u32 *b =(u32 *)p; 126 const u32 (*tab)[] = crc32table_le;
76 const u32 *tab = crc32table_le;
77
78# ifdef __LITTLE_ENDIAN
79# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
80# else
81# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
82# endif
83 127
84 crc = __cpu_to_le32(crc); 128 crc = __cpu_to_le32(crc);
85 /* Align it */ 129 crc = crc32_body(crc, p, len, tab);
86 if(unlikely(((long)b)&3 && len)){
87 do {
88 u8 *p = (u8 *)b;
89 DO_CRC(*p++);
90 b = (void *)p;
91 } while ((--len) && ((long)b)&3 );
92 }
93 if(likely(len >= 4)){
94 /* load data 32 bits wide, xor data 32 bits wide. */
95 size_t save_len = len & 3;
96 len = len >> 2;
97 --b; /* use pre increment below(*++b) for speed */
98 do {
99 crc ^= *++b;
100 DO_CRC(0);
101 DO_CRC(0);
102 DO_CRC(0);
103 DO_CRC(0);
104 } while (--len);
105 b++; /* point to next byte(s) */
106 len = save_len;
107 }
108 /* And the last few bytes */
109 if(len){
110 do {
111 u8 *p = (u8 *)b;
112 DO_CRC(*p++);
113 b = (void *)p;
114 } while (--len);
115 }
116
117 return __le32_to_cpu(crc); 130 return __le32_to_cpu(crc);
118#undef ENDIAN_SHIFT
119#undef DO_CRC
120
121# elif CRC_LE_BITS == 4 131# elif CRC_LE_BITS == 4
122 while (len--) { 132 while (len--) {
123 crc ^= *p++; 133 crc ^= *p++;
@@ -170,51 +180,11 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
170u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) 180u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
171{ 181{
172# if CRC_BE_BITS == 8 182# if CRC_BE_BITS == 8
173 const u32 *b =(u32 *)p; 183 const u32 (*tab)[] = crc32table_be;
174 const u32 *tab = crc32table_be;
175
176# ifdef __LITTLE_ENDIAN
177# define DO_CRC(x) crc = tab[ (crc ^ (x)) & 255 ] ^ (crc>>8)
178# else
179# define DO_CRC(x) crc = tab[ ((crc >> 24) ^ (x)) & 255] ^ (crc<<8)
180# endif
181 184
182 crc = __cpu_to_be32(crc); 185 crc = __cpu_to_be32(crc);
183 /* Align it */ 186 crc = crc32_body(crc, p, len, tab);
184 if(unlikely(((long)b)&3 && len)){
185 do {
186 u8 *p = (u8 *)b;
187 DO_CRC(*p++);
188 b = (u32 *)p;
189 } while ((--len) && ((long)b)&3 );
190 }
191 if(likely(len >= 4)){
192 /* load data 32 bits wide, xor data 32 bits wide. */
193 size_t save_len = len & 3;
194 len = len >> 2;
195 --b; /* use pre increment below(*++b) for speed */
196 do {
197 crc ^= *++b;
198 DO_CRC(0);
199 DO_CRC(0);
200 DO_CRC(0);
201 DO_CRC(0);
202 } while (--len);
203 b++; /* point to next byte(s) */
204 len = save_len;
205 }
206 /* And the last few bytes */
207 if(len){
208 do {
209 u8 *p = (u8 *)b;
210 DO_CRC(*p++);
211 b = (void *)p;
212 } while (--len);
213 }
214 return __be32_to_cpu(crc); 187 return __be32_to_cpu(crc);
215#undef ENDIAN_SHIFT
216#undef DO_CRC
217
218# elif CRC_BE_BITS == 4 188# elif CRC_BE_BITS == 4
219 while (len--) { 189 while (len--) {
220 crc ^= *p++ << 24; 190 crc ^= *p++ << 24;
diff --git a/lib/ctype.c b/lib/ctype.c
index d02ace14a32..26baa620e95 100644
--- a/lib/ctype.c
+++ b/lib/ctype.c
@@ -7,30 +7,30 @@
7#include <linux/ctype.h> 7#include <linux/ctype.h>
8#include <linux/module.h> 8#include <linux/module.h>
9 9
10unsigned char _ctype[] = { 10const unsigned char _ctype[] = {
11_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ 11_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */
12_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ 12_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */
13_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ 13_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */
14_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ 14_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */
15_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ 15_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */
16_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ 16_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */
17_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ 17_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */
18_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ 18_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */
19_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ 19_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */
20_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ 20_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */
21_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ 21_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */
22_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ 22_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */
23_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ 23_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */
24_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ 24_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */
25_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ 25_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */
26_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ 26_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */
270,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ 270,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */
280,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ 280,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */
29_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ 29_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */
30_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ 30_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */
31_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ 31_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */
32_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ 32_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */
33_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ 33_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */
34_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ 34_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */
35 35
36EXPORT_SYMBOL(_ctype); 36EXPORT_SYMBOL(_ctype);
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index bc3b11731b9..b1c17730767 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -8,7 +8,6 @@
8 * 8 *
9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 9 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
10 */ 10 */
11#include <linux/kernel.h>
12#include <linux/rwsem.h> 11#include <linux/rwsem.h>
13#include <linux/mutex.h> 12#include <linux/mutex.h>
14#include <linux/module.h> 13#include <linux/module.h>
@@ -23,6 +22,7 @@
23 * shut up after that. 22 * shut up after that.
24 */ 23 */
25int debug_locks = 1; 24int debug_locks = 1;
25EXPORT_SYMBOL_GPL(debug_locks);
26 26
27/* 27/*
28 * The locking-testsuite uses <debug_locks_silent> to get a 28 * The locking-testsuite uses <debug_locks_silent> to get a
@@ -38,7 +38,6 @@ int debug_locks_off(void)
38{ 38{
39 if (__debug_locks_off()) { 39 if (__debug_locks_off()) {
40 if (!debug_locks_silent) { 40 if (!debug_locks_silent) {
41 oops_in_progress = 1;
42 console_verbose(); 41 console_verbose();
43 return 1; 42 return 1;
44 } 43 }
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2755a3bd16a..deebcc57d4e 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -9,8 +9,10 @@
9 */ 9 */
10#include <linux/debugobjects.h> 10#include <linux/debugobjects.h>
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/sched.h>
12#include <linux/seq_file.h> 13#include <linux/seq_file.h>
13#include <linux/debugfs.h> 14#include <linux/debugfs.h>
15#include <linux/slab.h>
14#include <linux/hash.h> 16#include <linux/hash.h>
15 17
16#define ODEBUG_HASH_BITS 14 18#define ODEBUG_HASH_BITS 14
@@ -25,14 +27,14 @@
25 27
26struct debug_bucket { 28struct debug_bucket {
27 struct hlist_head list; 29 struct hlist_head list;
28 spinlock_t lock; 30 raw_spinlock_t lock;
29}; 31};
30 32
31static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; 33static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
32 34
33static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; 35static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
34 36
35static DEFINE_SPINLOCK(pool_lock); 37static DEFINE_RAW_SPINLOCK(pool_lock);
36 38
37static HLIST_HEAD(obj_pool); 39static HLIST_HEAD(obj_pool);
38 40
@@ -95,10 +97,10 @@ static int fill_pool(void)
95 if (!new) 97 if (!new)
96 return obj_pool_free; 98 return obj_pool_free;
97 99
98 spin_lock_irqsave(&pool_lock, flags); 100 raw_spin_lock_irqsave(&pool_lock, flags);
99 hlist_add_head(&new->node, &obj_pool); 101 hlist_add_head(&new->node, &obj_pool);
100 obj_pool_free++; 102 obj_pool_free++;
101 spin_unlock_irqrestore(&pool_lock, flags); 103 raw_spin_unlock_irqrestore(&pool_lock, flags);
102 } 104 }
103 return obj_pool_free; 105 return obj_pool_free;
104} 106}
@@ -132,13 +134,14 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
132{ 134{
133 struct debug_obj *obj = NULL; 135 struct debug_obj *obj = NULL;
134 136
135 spin_lock(&pool_lock); 137 raw_spin_lock(&pool_lock);
136 if (obj_pool.first) { 138 if (obj_pool.first) {
137 obj = hlist_entry(obj_pool.first, typeof(*obj), node); 139 obj = hlist_entry(obj_pool.first, typeof(*obj), node);
138 140
139 obj->object = addr; 141 obj->object = addr;
140 obj->descr = descr; 142 obj->descr = descr;
141 obj->state = ODEBUG_STATE_NONE; 143 obj->state = ODEBUG_STATE_NONE;
144 obj->astate = 0;
142 hlist_del(&obj->node); 145 hlist_del(&obj->node);
143 146
144 hlist_add_head(&obj->node, &b->list); 147 hlist_add_head(&obj->node, &b->list);
@@ -151,7 +154,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
151 if (obj_pool_free < obj_pool_min_free) 154 if (obj_pool_free < obj_pool_min_free)
152 obj_pool_min_free = obj_pool_free; 155 obj_pool_min_free = obj_pool_free;
153 } 156 }
154 spin_unlock(&pool_lock); 157 raw_spin_unlock(&pool_lock);
155 158
156 return obj; 159 return obj;
157} 160}
@@ -164,7 +167,7 @@ static void free_obj_work(struct work_struct *work)
164 struct debug_obj *obj; 167 struct debug_obj *obj;
165 unsigned long flags; 168 unsigned long flags;
166 169
167 spin_lock_irqsave(&pool_lock, flags); 170 raw_spin_lock_irqsave(&pool_lock, flags);
168 while (obj_pool_free > ODEBUG_POOL_SIZE) { 171 while (obj_pool_free > ODEBUG_POOL_SIZE) {
169 obj = hlist_entry(obj_pool.first, typeof(*obj), node); 172 obj = hlist_entry(obj_pool.first, typeof(*obj), node);
170 hlist_del(&obj->node); 173 hlist_del(&obj->node);
@@ -173,11 +176,11 @@ static void free_obj_work(struct work_struct *work)
173 * We release pool_lock across kmem_cache_free() to 176 * We release pool_lock across kmem_cache_free() to
174 * avoid contention on pool_lock. 177 * avoid contention on pool_lock.
175 */ 178 */
176 spin_unlock_irqrestore(&pool_lock, flags); 179 raw_spin_unlock_irqrestore(&pool_lock, flags);
177 kmem_cache_free(obj_cache, obj); 180 kmem_cache_free(obj_cache, obj);
178 spin_lock_irqsave(&pool_lock, flags); 181 raw_spin_lock_irqsave(&pool_lock, flags);
179 } 182 }
180 spin_unlock_irqrestore(&pool_lock, flags); 183 raw_spin_unlock_irqrestore(&pool_lock, flags);
181} 184}
182 185
183/* 186/*
@@ -189,7 +192,7 @@ static void free_object(struct debug_obj *obj)
189 unsigned long flags; 192 unsigned long flags;
190 int sched = 0; 193 int sched = 0;
191 194
192 spin_lock_irqsave(&pool_lock, flags); 195 raw_spin_lock_irqsave(&pool_lock, flags);
193 /* 196 /*
194 * schedule work when the pool is filled and the cache is 197 * schedule work when the pool is filled and the cache is
195 * initialized: 198 * initialized:
@@ -199,7 +202,7 @@ static void free_object(struct debug_obj *obj)
199 hlist_add_head(&obj->node, &obj_pool); 202 hlist_add_head(&obj->node, &obj_pool);
200 obj_pool_free++; 203 obj_pool_free++;
201 obj_pool_used--; 204 obj_pool_used--;
202 spin_unlock_irqrestore(&pool_lock, flags); 205 raw_spin_unlock_irqrestore(&pool_lock, flags);
203 if (sched) 206 if (sched)
204 schedule_work(&debug_obj_work); 207 schedule_work(&debug_obj_work);
205} 208}
@@ -220,9 +223,9 @@ static void debug_objects_oom(void)
220 printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n"); 223 printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n");
221 224
222 for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { 225 for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
223 spin_lock_irqsave(&db->lock, flags); 226 raw_spin_lock_irqsave(&db->lock, flags);
224 hlist_move_list(&db->list, &freelist); 227 hlist_move_list(&db->list, &freelist);
225 spin_unlock_irqrestore(&db->lock, flags); 228 raw_spin_unlock_irqrestore(&db->lock, flags);
226 229
227 /* Now free them */ 230 /* Now free them */
228 hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { 231 hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
@@ -250,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
250 253
251 if (limit < 5 && obj->descr != descr_test) { 254 if (limit < 5 && obj->descr != descr_test) {
252 limit++; 255 limit++;
253 WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, 256 WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
254 obj_states[obj->state], obj->descr->name); 257 "object type: %s\n",
258 msg, obj_states[obj->state], obj->astate,
259 obj->descr->name);
255 } 260 }
256 debug_objects_warnings++; 261 debug_objects_warnings++;
257} 262}
@@ -302,14 +307,14 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
302 307
303 db = get_bucket((unsigned long) addr); 308 db = get_bucket((unsigned long) addr);
304 309
305 spin_lock_irqsave(&db->lock, flags); 310 raw_spin_lock_irqsave(&db->lock, flags);
306 311
307 obj = lookup_object(addr, db); 312 obj = lookup_object(addr, db);
308 if (!obj) { 313 if (!obj) {
309 obj = alloc_object(addr, db, descr); 314 obj = alloc_object(addr, db, descr);
310 if (!obj) { 315 if (!obj) {
311 debug_objects_enabled = 0; 316 debug_objects_enabled = 0;
312 spin_unlock_irqrestore(&db->lock, flags); 317 raw_spin_unlock_irqrestore(&db->lock, flags);
313 debug_objects_oom(); 318 debug_objects_oom();
314 return; 319 return;
315 } 320 }
@@ -326,7 +331,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
326 case ODEBUG_STATE_ACTIVE: 331 case ODEBUG_STATE_ACTIVE:
327 debug_print_object(obj, "init"); 332 debug_print_object(obj, "init");
328 state = obj->state; 333 state = obj->state;
329 spin_unlock_irqrestore(&db->lock, flags); 334 raw_spin_unlock_irqrestore(&db->lock, flags);
330 debug_object_fixup(descr->fixup_init, addr, state); 335 debug_object_fixup(descr->fixup_init, addr, state);
331 return; 336 return;
332 337
@@ -337,7 +342,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
337 break; 342 break;
338 } 343 }
339 344
340 spin_unlock_irqrestore(&db->lock, flags); 345 raw_spin_unlock_irqrestore(&db->lock, flags);
341} 346}
342 347
343/** 348/**
@@ -384,7 +389,7 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
384 389
385 db = get_bucket((unsigned long) addr); 390 db = get_bucket((unsigned long) addr);
386 391
387 spin_lock_irqsave(&db->lock, flags); 392 raw_spin_lock_irqsave(&db->lock, flags);
388 393
389 obj = lookup_object(addr, db); 394 obj = lookup_object(addr, db);
390 if (obj) { 395 if (obj) {
@@ -397,7 +402,7 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
397 case ODEBUG_STATE_ACTIVE: 402 case ODEBUG_STATE_ACTIVE:
398 debug_print_object(obj, "activate"); 403 debug_print_object(obj, "activate");
399 state = obj->state; 404 state = obj->state;
400 spin_unlock_irqrestore(&db->lock, flags); 405 raw_spin_unlock_irqrestore(&db->lock, flags);
401 debug_object_fixup(descr->fixup_activate, addr, state); 406 debug_object_fixup(descr->fixup_activate, addr, state);
402 return; 407 return;
403 408
@@ -407,11 +412,11 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr)
407 default: 412 default:
408 break; 413 break;
409 } 414 }
410 spin_unlock_irqrestore(&db->lock, flags); 415 raw_spin_unlock_irqrestore(&db->lock, flags);
411 return; 416 return;
412 } 417 }
413 418
414 spin_unlock_irqrestore(&db->lock, flags); 419 raw_spin_unlock_irqrestore(&db->lock, flags);
415 /* 420 /*
416 * This happens when a static object is activated. We 421 * This happens when a static object is activated. We
417 * let the type specific code decide whether this is 422 * let the type specific code decide whether this is
@@ -437,7 +442,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
437 442
438 db = get_bucket((unsigned long) addr); 443 db = get_bucket((unsigned long) addr);
439 444
440 spin_lock_irqsave(&db->lock, flags); 445 raw_spin_lock_irqsave(&db->lock, flags);
441 446
442 obj = lookup_object(addr, db); 447 obj = lookup_object(addr, db);
443 if (obj) { 448 if (obj) {
@@ -445,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
445 case ODEBUG_STATE_INIT: 450 case ODEBUG_STATE_INIT:
446 case ODEBUG_STATE_INACTIVE: 451 case ODEBUG_STATE_INACTIVE:
447 case ODEBUG_STATE_ACTIVE: 452 case ODEBUG_STATE_ACTIVE:
448 obj->state = ODEBUG_STATE_INACTIVE; 453 if (!obj->astate)
454 obj->state = ODEBUG_STATE_INACTIVE;
455 else
456 debug_print_object(obj, "deactivate");
449 break; 457 break;
450 458
451 case ODEBUG_STATE_DESTROYED: 459 case ODEBUG_STATE_DESTROYED:
@@ -462,7 +470,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
462 debug_print_object(&o, "deactivate"); 470 debug_print_object(&o, "deactivate");
463 } 471 }
464 472
465 spin_unlock_irqrestore(&db->lock, flags); 473 raw_spin_unlock_irqrestore(&db->lock, flags);
466} 474}
467 475
468/** 476/**
@@ -482,7 +490,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
482 490
483 db = get_bucket((unsigned long) addr); 491 db = get_bucket((unsigned long) addr);
484 492
485 spin_lock_irqsave(&db->lock, flags); 493 raw_spin_lock_irqsave(&db->lock, flags);
486 494
487 obj = lookup_object(addr, db); 495 obj = lookup_object(addr, db);
488 if (!obj) 496 if (!obj)
@@ -497,7 +505,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
497 case ODEBUG_STATE_ACTIVE: 505 case ODEBUG_STATE_ACTIVE:
498 debug_print_object(obj, "destroy"); 506 debug_print_object(obj, "destroy");
499 state = obj->state; 507 state = obj->state;
500 spin_unlock_irqrestore(&db->lock, flags); 508 raw_spin_unlock_irqrestore(&db->lock, flags);
501 debug_object_fixup(descr->fixup_destroy, addr, state); 509 debug_object_fixup(descr->fixup_destroy, addr, state);
502 return; 510 return;
503 511
@@ -508,7 +516,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
508 break; 516 break;
509 } 517 }
510out_unlock: 518out_unlock:
511 spin_unlock_irqrestore(&db->lock, flags); 519 raw_spin_unlock_irqrestore(&db->lock, flags);
512} 520}
513 521
514/** 522/**
@@ -528,7 +536,7 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
528 536
529 db = get_bucket((unsigned long) addr); 537 db = get_bucket((unsigned long) addr);
530 538
531 spin_lock_irqsave(&db->lock, flags); 539 raw_spin_lock_irqsave(&db->lock, flags);
532 540
533 obj = lookup_object(addr, db); 541 obj = lookup_object(addr, db);
534 if (!obj) 542 if (!obj)
@@ -538,17 +546,64 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
538 case ODEBUG_STATE_ACTIVE: 546 case ODEBUG_STATE_ACTIVE:
539 debug_print_object(obj, "free"); 547 debug_print_object(obj, "free");
540 state = obj->state; 548 state = obj->state;
541 spin_unlock_irqrestore(&db->lock, flags); 549 raw_spin_unlock_irqrestore(&db->lock, flags);
542 debug_object_fixup(descr->fixup_free, addr, state); 550 debug_object_fixup(descr->fixup_free, addr, state);
543 return; 551 return;
544 default: 552 default:
545 hlist_del(&obj->node); 553 hlist_del(&obj->node);
546 spin_unlock_irqrestore(&db->lock, flags); 554 raw_spin_unlock_irqrestore(&db->lock, flags);
547 free_object(obj); 555 free_object(obj);
548 return; 556 return;
549 } 557 }
550out_unlock: 558out_unlock:
551 spin_unlock_irqrestore(&db->lock, flags); 559 raw_spin_unlock_irqrestore(&db->lock, flags);
560}
561
562/**
563 * debug_object_active_state - debug checks object usage state machine
564 * @addr: address of the object
565 * @descr: pointer to an object specific debug description structure
566 * @expect: expected state
567 * @next: state to move to if expected state is found
568 */
569void
570debug_object_active_state(void *addr, struct debug_obj_descr *descr,
571 unsigned int expect, unsigned int next)
572{
573 struct debug_bucket *db;
574 struct debug_obj *obj;
575 unsigned long flags;
576
577 if (!debug_objects_enabled)
578 return;
579
580 db = get_bucket((unsigned long) addr);
581
582 raw_spin_lock_irqsave(&db->lock, flags);
583
584 obj = lookup_object(addr, db);
585 if (obj) {
586 switch (obj->state) {
587 case ODEBUG_STATE_ACTIVE:
588 if (obj->astate == expect)
589 obj->astate = next;
590 else
591 debug_print_object(obj, "active_state");
592 break;
593
594 default:
595 debug_print_object(obj, "active_state");
596 break;
597 }
598 } else {
599 struct debug_obj o = { .object = addr,
600 .state = ODEBUG_STATE_NOTAVAILABLE,
601 .descr = descr };
602
603 debug_print_object(&o, "active_state");
604 }
605
606 raw_spin_unlock_irqrestore(&db->lock, flags);
552} 607}
553 608
554#ifdef CONFIG_DEBUG_OBJECTS_FREE 609#ifdef CONFIG_DEBUG_OBJECTS_FREE
@@ -574,7 +629,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
574 629
575repeat: 630repeat:
576 cnt = 0; 631 cnt = 0;
577 spin_lock_irqsave(&db->lock, flags); 632 raw_spin_lock_irqsave(&db->lock, flags);
578 hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { 633 hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) {
579 cnt++; 634 cnt++;
580 oaddr = (unsigned long) obj->object; 635 oaddr = (unsigned long) obj->object;
@@ -586,7 +641,7 @@ repeat:
586 debug_print_object(obj, "free"); 641 debug_print_object(obj, "free");
587 descr = obj->descr; 642 descr = obj->descr;
588 state = obj->state; 643 state = obj->state;
589 spin_unlock_irqrestore(&db->lock, flags); 644 raw_spin_unlock_irqrestore(&db->lock, flags);
590 debug_object_fixup(descr->fixup_free, 645 debug_object_fixup(descr->fixup_free,
591 (void *) oaddr, state); 646 (void *) oaddr, state);
592 goto repeat; 647 goto repeat;
@@ -596,7 +651,7 @@ repeat:
596 break; 651 break;
597 } 652 }
598 } 653 }
599 spin_unlock_irqrestore(&db->lock, flags); 654 raw_spin_unlock_irqrestore(&db->lock, flags);
600 655
601 /* Now free them */ 656 /* Now free them */
602 hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { 657 hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
@@ -772,7 +827,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state)
772 } 827 }
773} 828}
774 829
775static int 830static int __init
776check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) 831check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
777{ 832{
778 struct debug_bucket *db; 833 struct debug_bucket *db;
@@ -782,7 +837,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
782 837
783 db = get_bucket((unsigned long) addr); 838 db = get_bucket((unsigned long) addr);
784 839
785 spin_lock_irqsave(&db->lock, flags); 840 raw_spin_lock_irqsave(&db->lock, flags);
786 841
787 obj = lookup_object(addr, db); 842 obj = lookup_object(addr, db);
788 if (!obj && state != ODEBUG_STATE_NONE) { 843 if (!obj && state != ODEBUG_STATE_NONE) {
@@ -806,7 +861,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
806 } 861 }
807 res = 0; 862 res = 0;
808out: 863out:
809 spin_unlock_irqrestore(&db->lock, flags); 864 raw_spin_unlock_irqrestore(&db->lock, flags);
810 if (res) 865 if (res)
811 debug_objects_enabled = 0; 866 debug_objects_enabled = 0;
812 return res; 867 return res;
@@ -906,7 +961,7 @@ void __init debug_objects_early_init(void)
906 int i; 961 int i;
907 962
908 for (i = 0; i < ODEBUG_HASH_SIZE; i++) 963 for (i = 0; i < ODEBUG_HASH_SIZE; i++)
909 spin_lock_init(&obj_hash[i].lock); 964 raw_spin_lock_init(&obj_hash[i].lock);
910 965
911 for (i = 0; i < ODEBUG_POOL_SIZE; i++) 966 for (i = 0; i < ODEBUG_POOL_SIZE; i++)
912 hlist_add_head(&obj_static_pool[i].node, &obj_pool); 967 hlist_add_head(&obj_static_pool[i].node, &obj_pool);
@@ -915,7 +970,7 @@ void __init debug_objects_early_init(void)
915/* 970/*
916 * Convert the statically allocated objects to dynamic ones: 971 * Convert the statically allocated objects to dynamic ones:
917 */ 972 */
918static int debug_objects_replace_static_objects(void) 973static int __init debug_objects_replace_static_objects(void)
919{ 974{
920 struct debug_bucket *db = obj_hash; 975 struct debug_bucket *db = obj_hash;
921 struct hlist_node *node, *tmp; 976 struct hlist_node *node, *tmp;
diff --git a/lib/decompress.c b/lib/decompress.c
index d2842f57167..a7606815541 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -9,6 +9,7 @@
9#include <linux/decompress/bunzip2.h> 9#include <linux/decompress/bunzip2.h>
10#include <linux/decompress/unlzma.h> 10#include <linux/decompress/unlzma.h>
11#include <linux/decompress/inflate.h> 11#include <linux/decompress/inflate.h>
12#include <linux/decompress/unlzo.h>
12 13
13#include <linux/types.h> 14#include <linux/types.h>
14#include <linux/string.h> 15#include <linux/string.h>
@@ -22,6 +23,9 @@
22#ifndef CONFIG_DECOMPRESS_LZMA 23#ifndef CONFIG_DECOMPRESS_LZMA
23# define unlzma NULL 24# define unlzma NULL
24#endif 25#endif
26#ifndef CONFIG_DECOMPRESS_LZO
27# define unlzo NULL
28#endif
25 29
26static const struct compress_format { 30static const struct compress_format {
27 unsigned char magic[2]; 31 unsigned char magic[2];
@@ -32,6 +36,7 @@ static const struct compress_format {
32 { {037, 0236}, "gzip", gunzip }, 36 { {037, 0236}, "gzip", gunzip },
33 { {0x42, 0x5a}, "bzip2", bunzip2 }, 37 { {0x42, 0x5a}, "bzip2", bunzip2 },
34 { {0x5d, 0x00}, "lzma", unlzma }, 38 { {0x5d, 0x00}, "lzma", unlzma },
39 { {0x89, 0x4c}, "lzo", unlzo },
35 { {0, 0}, NULL, NULL } 40 { {0, 0}, NULL, NULL }
36}; 41};
37 42
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index 708e2a86d87..81c8bb1cc6a 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -45,12 +45,14 @@
45*/ 45*/
46 46
47 47
48#ifndef STATIC 48#ifdef STATIC
49#define PREBOOT
50#else
49#include <linux/decompress/bunzip2.h> 51#include <linux/decompress/bunzip2.h>
50#endif /* !STATIC */ 52#include <linux/slab.h>
53#endif /* STATIC */
51 54
52#include <linux/decompress/mm.h> 55#include <linux/decompress/mm.h>
53#include <linux/slab.h>
54 56
55#ifndef INT_MAX 57#ifndef INT_MAX
56#define INT_MAX 0x7fffffff 58#define INT_MAX 0x7fffffff
@@ -105,6 +107,8 @@ struct bunzip_data {
105 unsigned char selectors[32768]; /* nSelectors = 15 bits */ 107 unsigned char selectors[32768]; /* nSelectors = 15 bits */
106 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ 108 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
107 int io_error; /* non-zero if we have IO error */ 109 int io_error; /* non-zero if we have IO error */
110 int byteCount[256];
111 unsigned char symToByte[256], mtfSymbol[256];
108}; 112};
109 113
110 114
@@ -156,14 +160,16 @@ static int INIT get_next_block(struct bunzip_data *bd)
156 int *base = NULL; 160 int *base = NULL;
157 int *limit = NULL; 161 int *limit = NULL;
158 int dbufCount, nextSym, dbufSize, groupCount, selector, 162 int dbufCount, nextSym, dbufSize, groupCount, selector,
159 i, j, k, t, runPos, symCount, symTotal, nSelectors, 163 i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
160 byteCount[256]; 164 unsigned char uc, *symToByte, *mtfSymbol, *selectors;
161 unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
162 unsigned int *dbuf, origPtr; 165 unsigned int *dbuf, origPtr;
163 166
164 dbuf = bd->dbuf; 167 dbuf = bd->dbuf;
165 dbufSize = bd->dbufSize; 168 dbufSize = bd->dbufSize;
166 selectors = bd->selectors; 169 selectors = bd->selectors;
170 byteCount = bd->byteCount;
171 symToByte = bd->symToByte;
172 mtfSymbol = bd->mtfSymbol;
167 173
168 /* Read in header signature and CRC, then validate signature. 174 /* Read in header signature and CRC, then validate signature.
169 (last block signature means CRC is for whole file, return now) */ 175 (last block signature means CRC is for whole file, return now) */
@@ -297,7 +303,7 @@ static int INIT get_next_block(struct bunzip_data *bd)
297 again when using them (during symbol decoding).*/ 303 again when using them (during symbol decoding).*/
298 base = hufGroup->base-1; 304 base = hufGroup->base-1;
299 limit = hufGroup->limit-1; 305 limit = hufGroup->limit-1;
300 /* Calculate permute[]. Concurently, initialize 306 /* Calculate permute[]. Concurrently, initialize
301 * temp[] and limit[]. */ 307 * temp[] and limit[]. */
302 pp = 0; 308 pp = 0;
303 for (i = minLen; i <= maxLen; i++) { 309 for (i = minLen; i <= maxLen; i++) {
@@ -635,6 +641,8 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
635 641
636 /* Allocate bunzip_data. Most fields initialize to zero. */ 642 /* Allocate bunzip_data. Most fields initialize to zero. */
637 bd = *bdp = malloc(i); 643 bd = *bdp = malloc(i);
644 if (!bd)
645 return RETVAL_OUT_OF_MEMORY;
638 memset(bd, 0, sizeof(struct bunzip_data)); 646 memset(bd, 0, sizeof(struct bunzip_data));
639 /* Setup input buffer */ 647 /* Setup input buffer */
640 bd->inbuf = inbuf; 648 bd->inbuf = inbuf;
@@ -662,6 +670,8 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
662 bd->dbufSize = 100000*(i-BZh0); 670 bd->dbufSize = 100000*(i-BZh0);
663 671
664 bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); 672 bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
673 if (!bd->dbuf)
674 return RETVAL_OUT_OF_MEMORY;
665 return RETVAL_OK; 675 return RETVAL_OK;
666} 676}
667 677
@@ -681,12 +691,10 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
681 set_error_fn(error_fn); 691 set_error_fn(error_fn);
682 if (flush) 692 if (flush)
683 outbuf = malloc(BZIP2_IOBUF_SIZE); 693 outbuf = malloc(BZIP2_IOBUF_SIZE);
684 else 694
685 len -= 4; /* Uncompressed size hack active in pre-boot
686 environment */
687 if (!outbuf) { 695 if (!outbuf) {
688 error("Could not allocate output bufer"); 696 error("Could not allocate output bufer");
689 return -1; 697 return RETVAL_OUT_OF_MEMORY;
690 } 698 }
691 if (buf) 699 if (buf)
692 inbuf = buf; 700 inbuf = buf;
@@ -694,6 +702,7 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
694 inbuf = malloc(BZIP2_IOBUF_SIZE); 702 inbuf = malloc(BZIP2_IOBUF_SIZE);
695 if (!inbuf) { 703 if (!inbuf) {
696 error("Could not allocate input bufer"); 704 error("Could not allocate input bufer");
705 i = RETVAL_OUT_OF_MEMORY;
697 goto exit_0; 706 goto exit_0;
698 } 707 }
699 i = start_bunzip(&bd, inbuf, len, fill); 708 i = start_bunzip(&bd, inbuf, len, fill);
@@ -720,11 +729,14 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
720 } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { 729 } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
721 error("Compressed file ends unexpectedly"); 730 error("Compressed file ends unexpectedly");
722 } 731 }
732 if (!bd)
733 goto exit_1;
723 if (bd->dbuf) 734 if (bd->dbuf)
724 large_free(bd->dbuf); 735 large_free(bd->dbuf);
725 if (pos) 736 if (pos)
726 *pos = bd->inbufPos; 737 *pos = bd->inbufPos;
727 free(bd); 738 free(bd);
739exit_1:
728 if (!buf) 740 if (!buf)
729 free(inbuf); 741 free(inbuf);
730exit_0: 742exit_0:
@@ -733,4 +745,14 @@ exit_0:
733 return i; 745 return i;
734} 746}
735 747
736#define decompress bunzip2 748#ifdef PREBOOT
749STATIC int INIT decompress(unsigned char *buf, int len,
750 int(*fill)(void*, unsigned int),
751 int(*flush)(void*, unsigned int),
752 unsigned char *outbuf,
753 int *pos,
754 void(*error_fn)(char *x))
755{
756 return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn);
757}
758#endif
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index e36b296fc9f..fc686c7a0a0 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -19,13 +19,18 @@
19#include "zlib_inflate/inflate.h" 19#include "zlib_inflate/inflate.h"
20 20
21#include "zlib_inflate/infutil.h" 21#include "zlib_inflate/infutil.h"
22#include <linux/slab.h>
22 23
23#endif /* STATIC */ 24#endif /* STATIC */
24 25
25#include <linux/decompress/mm.h> 26#include <linux/decompress/mm.h>
26#include <linux/slab.h>
27 27
28#define INBUF_LEN (16*1024) 28#define GZIP_IOBUF_SIZE (16*1024)
29
30static int nofill(void *buffer, unsigned int len)
31{
32 return -1;
33}
29 34
30/* Included from initramfs et al code */ 35/* Included from initramfs et al code */
31STATIC int INIT gunzip(unsigned char *buf, int len, 36STATIC int INIT gunzip(unsigned char *buf, int len,
@@ -55,7 +60,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
55 if (buf) 60 if (buf)
56 zbuf = buf; 61 zbuf = buf;
57 else { 62 else {
58 zbuf = malloc(INBUF_LEN); 63 zbuf = malloc(GZIP_IOBUF_SIZE);
59 len = 0; 64 len = 0;
60 } 65 }
61 if (!zbuf) { 66 if (!zbuf) {
@@ -76,8 +81,11 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
76 goto gunzip_nomem4; 81 goto gunzip_nomem4;
77 } 82 }
78 83
84 if (!fill)
85 fill = nofill;
86
79 if (len == 0) 87 if (len == 0)
80 len = fill(zbuf, INBUF_LEN); 88 len = fill(zbuf, GZIP_IOBUF_SIZE);
81 89
82 /* verify the gzip header */ 90 /* verify the gzip header */
83 if (len < 10 || 91 if (len < 10 ||
@@ -113,7 +121,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
113 while (rc == Z_OK) { 121 while (rc == Z_OK) {
114 if (strm->avail_in == 0) { 122 if (strm->avail_in == 0) {
115 /* TODO: handle case where both pos and fill are set */ 123 /* TODO: handle case where both pos and fill are set */
116 len = fill(zbuf, INBUF_LEN); 124 len = fill(zbuf, GZIP_IOBUF_SIZE);
117 if (len < 0) { 125 if (len < 0) {
118 rc = -1; 126 rc = -1;
119 error("read error"); 127 error("read error");
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index 32123a1340e..ca82fde81c8 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -29,12 +29,14 @@
29 *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 29 *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 */ 30 */
31 31
32#ifndef STATIC 32#ifdef STATIC
33#define PREBOOT
34#else
33#include <linux/decompress/unlzma.h> 35#include <linux/decompress/unlzma.h>
36#include <linux/slab.h>
34#endif /* STATIC */ 37#endif /* STATIC */
35 38
36#include <linux/decompress/mm.h> 39#include <linux/decompress/mm.h>
37#include <linux/slab.h>
38 40
39#define MIN(a, b) (((a) < (b)) ? (a) : (b)) 41#define MIN(a, b) (((a) < (b)) ? (a) : (b))
40 42
@@ -80,6 +82,11 @@ struct rc {
80#define RC_MODEL_TOTAL_BITS 11 82#define RC_MODEL_TOTAL_BITS 11
81 83
82 84
85static int nofill(void *buffer, unsigned int len)
86{
87 return -1;
88}
89
83/* Called twice: once at startup and once in rc_normalize() */ 90/* Called twice: once at startup and once in rc_normalize() */
84static void INIT rc_read(struct rc *rc) 91static void INIT rc_read(struct rc *rc)
85{ 92{
@@ -95,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc,
95 int (*fill)(void*, unsigned int), 102 int (*fill)(void*, unsigned int),
96 char *buffer, int buffer_size) 103 char *buffer, int buffer_size)
97{ 104{
98 rc->fill = fill; 105 if (fill)
106 rc->fill = fill;
107 else
108 rc->fill = nofill;
99 rc->buffer = (uint8_t *)buffer; 109 rc->buffer = (uint8_t *)buffer;
100 rc->buffer_size = buffer_size; 110 rc->buffer_size = buffer_size;
101 rc->buffer_end = rc->buffer + rc->buffer_size; 111 rc->buffer_end = rc->buffer + rc->buffer_size;
@@ -543,9 +553,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
543 int ret = -1; 553 int ret = -1;
544 554
545 set_error_fn(error_fn); 555 set_error_fn(error_fn);
546 if (!flush) 556
547 in_len -= 4; /* Uncompressed size hack active in pre-boot
548 environment */
549 if (buf) 557 if (buf)
550 inbuf = buf; 558 inbuf = buf;
551 else 559 else
@@ -645,4 +653,15 @@ exit_0:
645 return ret; 653 return ret;
646} 654}
647 655
648#define decompress unlzma 656#ifdef PREBOOT
657STATIC int INIT decompress(unsigned char *buf, int in_len,
658 int(*fill)(void*, unsigned int),
659 int(*flush)(void*, unsigned int),
660 unsigned char *output,
661 int *posp,
662 void(*error_fn)(char *x)
663 )
664{
665 return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn);
666}
667#endif
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
new file mode 100644
index 00000000000..bcb3a4bd68f
--- /dev/null
+++ b/lib/decompress_unlzo.c
@@ -0,0 +1,217 @@
1/*
2 * LZO decompressor for the Linux kernel. Code borrowed from the lzo
3 * implementation by Markus Franz Xaver Johannes Oberhumer.
4 *
5 * Linux kernel adaptation:
6 * Copyright (C) 2009
7 * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com>
8 *
9 * Original code:
10 * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer
11 * All Rights Reserved.
12 *
13 * lzop and the LZO library are free software; you can redistribute them
14 * and/or modify them under the terms of the GNU General Public License as
15 * published by the Free Software Foundation; either version 2 of
16 * the License, or (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; see the file COPYING.
25 * If not, write to the Free Software Foundation, Inc.,
26 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
27 *
28 * Markus F.X.J. Oberhumer
29 * <markus@oberhumer.com>
30 * http://www.oberhumer.com/opensource/lzop/
31 */
32
33#ifdef STATIC
34#include "lzo/lzo1x_decompress.c"
35#else
36#include <linux/slab.h>
37#include <linux/decompress/unlzo.h>
38#endif
39
40#include <linux/types.h>
41#include <linux/lzo.h>
42#include <linux/decompress/mm.h>
43
44#include <linux/compiler.h>
45#include <asm/unaligned.h>
46
47static const unsigned char lzop_magic[] = {
48 0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
49
50#define LZO_BLOCK_SIZE (256*1024l)
51#define HEADER_HAS_FILTER 0x00000800L
52
53STATIC inline int INIT parse_header(u8 *input, u8 *skip)
54{
55 int l;
56 u8 *parse = input;
57 u8 level = 0;
58 u16 version;
59
60 /* read magic: 9 first bits */
61 for (l = 0; l < 9; l++) {
62 if (*parse++ != lzop_magic[l])
63 return 0;
64 }
65 /* get version (2bytes), skip library version (2),
66 * 'need to be extracted' version (2) and
67 * method (1) */
68 version = get_unaligned_be16(parse);
69 parse += 7;
70 if (version >= 0x0940)
71 level = *parse++;
72 if (get_unaligned_be32(parse) & HEADER_HAS_FILTER)
73 parse += 8; /* flags + filter info */
74 else
75 parse += 4; /* flags */
76
77 /* skip mode and mtime_low */
78 parse += 8;
79 if (version >= 0x0940)
80 parse += 4; /* skip mtime_high */
81
82 l = *parse++;
83 /* don't care about the file name, and skip checksum */
84 parse += l + 4;
85
86 *skip = parse - input;
87 return 1;
88}
89
90STATIC inline int INIT unlzo(u8 *input, int in_len,
91 int (*fill) (void *, unsigned int),
92 int (*flush) (void *, unsigned int),
93 u8 *output, int *posp,
94 void (*error_fn) (char *x))
95{
96 u8 skip = 0, r = 0;
97 u32 src_len, dst_len;
98 size_t tmp;
99 u8 *in_buf, *in_buf_save, *out_buf;
100 int ret = -1;
101
102 set_error_fn(error_fn);
103
104 if (output) {
105 out_buf = output;
106 } else if (!flush) {
107 error("NULL output pointer and no flush function provided");
108 goto exit;
109 } else {
110 out_buf = malloc(LZO_BLOCK_SIZE);
111 if (!out_buf) {
112 error("Could not allocate output buffer");
113 goto exit;
114 }
115 }
116
117 if (input && fill) {
118 error("Both input pointer and fill function provided, don't know what to do");
119 goto exit_1;
120 } else if (input) {
121 in_buf = input;
122 } else if (!fill || !posp) {
123 error("NULL input pointer and missing position pointer or fill function");
124 goto exit_1;
125 } else {
126 in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
127 if (!in_buf) {
128 error("Could not allocate input buffer");
129 goto exit_1;
130 }
131 }
132 in_buf_save = in_buf;
133
134 if (posp)
135 *posp = 0;
136
137 if (fill)
138 fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
139
140 if (!parse_header(input, &skip)) {
141 error("invalid header");
142 goto exit_2;
143 }
144 in_buf += skip;
145
146 if (posp)
147 *posp = skip;
148
149 for (;;) {
150 /* read uncompressed block size */
151 dst_len = get_unaligned_be32(in_buf);
152 in_buf += 4;
153
154 /* exit if last block */
155 if (dst_len == 0) {
156 if (posp)
157 *posp += 4;
158 break;
159 }
160
161 if (dst_len > LZO_BLOCK_SIZE) {
162 error("dest len longer than block size");
163 goto exit_2;
164 }
165
166 /* read compressed block size, and skip block checksum info */
167 src_len = get_unaligned_be32(in_buf);
168 in_buf += 8;
169
170 if (src_len <= 0 || src_len > dst_len) {
171 error("file corrupted");
172 goto exit_2;
173 }
174
175 /* decompress */
176 tmp = dst_len;
177
178 /* When the input data is not compressed at all,
179 * lzo1x_decompress_safe will fail, so call memcpy()
180 * instead */
181 if (unlikely(dst_len == src_len))
182 memcpy(out_buf, in_buf, src_len);
183 else {
184 r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
185 out_buf, &tmp);
186
187 if (r != LZO_E_OK || dst_len != tmp) {
188 error("Compressed data violation");
189 goto exit_2;
190 }
191 }
192
193 if (flush)
194 flush(out_buf, dst_len);
195 if (output)
196 out_buf += dst_len;
197 if (posp)
198 *posp += src_len + 12;
199 if (fill) {
200 in_buf = in_buf_save;
201 fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
202 } else
203 in_buf += src_len;
204 }
205
206 ret = 0;
207exit_2:
208 if (!input)
209 free(in_buf);
210exit_1:
211 if (!output)
212 free(out_buf);
213exit:
214 return ret;
215}
216
217#define decompress unlzo
diff --git a/lib/devres.c b/lib/devres.c
index 72c8909006d..6efddf53b90 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,5 +1,6 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/io.h> 2#include <linux/io.h>
3#include <linux/gfp.h>
3#include <linux/module.h> 4#include <linux/module.h>
4 5
5void devm_ioremap_release(struct device *dev, void *res) 6void devm_ioremap_release(struct device *dev, void *res)
@@ -327,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
327 * @pdev: PCI device to map IO resources for 328 * @pdev: PCI device to map IO resources for
328 * @mask: Mask of BARs to unmap and release 329 * @mask: Mask of BARs to unmap and release
329 * 330 *
330 * Unamp and release regions specified by @mask. 331 * Unmap and release regions specified by @mask.
331 */ 332 */
332void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) 333void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
333{ 334{
diff --git a/lib/div64.c b/lib/div64.c
index a111eb8de9c..5b491919177 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -77,26 +77,58 @@ s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
77EXPORT_SYMBOL(div_s64_rem); 77EXPORT_SYMBOL(div_s64_rem);
78#endif 78#endif
79 79
80/* 64bit divisor, dividend and result. dynamic precision */ 80/**
81 * div64_u64 - unsigned 64bit divide with 64bit divisor
82 * @dividend: 64bit dividend
83 * @divisor: 64bit divisor
84 *
85 * This implementation is a modified version of the algorithm proposed
86 * by the book 'Hacker's Delight'. The original source and full proof
87 * can be found here and is available for use without restriction.
88 *
89 * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c'
90 */
81#ifndef div64_u64 91#ifndef div64_u64
82u64 div64_u64(u64 dividend, u64 divisor) 92u64 div64_u64(u64 dividend, u64 divisor)
83{ 93{
84 u32 high, d; 94 u32 high = divisor >> 32;
95 u64 quot;
85 96
86 high = divisor >> 32; 97 if (high == 0) {
87 if (high) { 98 quot = div_u64(dividend, divisor);
88 unsigned int shift = fls(high); 99 } else {
100 int n = 1 + fls(high);
101 quot = div_u64(dividend >> n, divisor >> n);
89 102
90 d = divisor >> shift; 103 if (quot != 0)
91 dividend >>= shift; 104 quot--;
92 } else 105 if ((dividend - quot * divisor) >= divisor)
93 d = divisor; 106 quot++;
107 }
94 108
95 return div_u64(dividend, d); 109 return quot;
96} 110}
97EXPORT_SYMBOL(div64_u64); 111EXPORT_SYMBOL(div64_u64);
98#endif 112#endif
99 113
114/**
115 * div64_s64 - signed 64bit divide with 64bit divisor
116 * @dividend: 64bit dividend
117 * @divisor: 64bit divisor
118 */
119#ifndef div64_s64
120s64 div64_s64(s64 dividend, s64 divisor)
121{
122 s64 quot, t;
123
124 quot = div64_u64(abs64(dividend), abs64(divisor));
125 t = (dividend ^ divisor) >> 63;
126
127 return (quot ^ t) - t;
128}
129EXPORT_SYMBOL(div64_s64);
130#endif
131
100#endif /* BITS_PER_LONG == 32 */ 132#endif /* BITS_PER_LONG == 32 */
101 133
102/* 134/*
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 3b93129a968..4bfb0471f10 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -156,9 +156,13 @@ static bool driver_filter(struct device *dev)
156 return true; 156 return true;
157 157
158 /* driver filter on and initialized */ 158 /* driver filter on and initialized */
159 if (current_driver && dev->driver == current_driver) 159 if (current_driver && dev && dev->driver == current_driver)
160 return true; 160 return true;
161 161
162 /* driver filter on, but we can't filter on a NULL device... */
163 if (!dev)
164 return false;
165
162 if (current_driver || !current_driver_name[0]) 166 if (current_driver || !current_driver_name[0])
163 return false; 167 return false;
164 168
@@ -183,17 +187,17 @@ static bool driver_filter(struct device *dev)
183 return ret; 187 return ret;
184} 188}
185 189
186#define err_printk(dev, entry, format, arg...) do { \ 190#define err_printk(dev, entry, format, arg...) do { \
187 error_count += 1; \ 191 error_count += 1; \
188 if (driver_filter(dev) && \ 192 if (driver_filter(dev) && \
189 (show_all_errors || show_num_errors > 0)) { \ 193 (show_all_errors || show_num_errors > 0)) { \
190 WARN(1, "%s %s: " format, \ 194 WARN(1, "%s %s: " format, \
191 dev_driver_string(dev), \ 195 dev ? dev_driver_string(dev) : "NULL", \
192 dev_name(dev) , ## arg); \ 196 dev ? dev_name(dev) : "NULL", ## arg); \
193 dump_entry_trace(entry); \ 197 dump_entry_trace(entry); \
194 } \ 198 } \
195 if (!show_all_errors && show_num_errors > 0) \ 199 if (!show_all_errors && show_num_errors > 0) \
196 show_num_errors -= 1; \ 200 show_num_errors -= 1; \
197 } while (0); 201 } while (0);
198 202
199/* 203/*
@@ -255,7 +259,7 @@ static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
255 * times. Without a hardware IOMMU this results in the 259 * times. Without a hardware IOMMU this results in the
256 * same device addresses being put into the dma-debug 260 * same device addresses being put into the dma-debug
257 * hash multiple times too. This can result in false 261 * hash multiple times too. This can result in false
258 * positives being reported. Therfore we implement a 262 * positives being reported. Therefore we implement a
259 * best-fit algorithm here which returns the entry from 263 * best-fit algorithm here which returns the entry from
260 * the hash which fits best to the reference value 264 * the hash which fits best to the reference value
261 * instead of the first-fit. 265 * instead of the first-fit.
@@ -566,7 +570,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
566 * Now parse out the first token and use it as the name for the 570 * Now parse out the first token and use it as the name for the
567 * driver to filter for. 571 * driver to filter for.
568 */ 572 */
569 for (i = 0; i < NAME_MAX_LEN; ++i) { 573 for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
570 current_driver_name[i] = buf[i]; 574 current_driver_name[i] = buf[i];
571 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) 575 if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
572 break; 576 break;
@@ -583,9 +587,10 @@ out_unlock:
583 return count; 587 return count;
584} 588}
585 589
586const struct file_operations filter_fops = { 590static const struct file_operations filter_fops = {
587 .read = filter_read, 591 .read = filter_read,
588 .write = filter_write, 592 .write = filter_write,
593 .llseek = default_llseek,
589}; 594};
590 595
591static int dma_debug_fs_init(void) 596static int dma_debug_fs_init(void)
@@ -666,12 +671,13 @@ static int device_dma_allocations(struct device *dev)
666 return count; 671 return count;
667} 672}
668 673
669static int dma_debug_device_change(struct notifier_block *nb, 674static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
670 unsigned long action, void *data)
671{ 675{
672 struct device *dev = data; 676 struct device *dev = data;
673 int count; 677 int count;
674 678
679 if (global_disable)
680 return 0;
675 681
676 switch (action) { 682 switch (action) {
677 case BUS_NOTIFY_UNBOUND_DRIVER: 683 case BUS_NOTIFY_UNBOUND_DRIVER:
@@ -693,6 +699,9 @@ void dma_debug_add_bus(struct bus_type *bus)
693{ 699{
694 struct notifier_block *nb; 700 struct notifier_block *nb;
695 701
702 if (global_disable)
703 return;
704
696 nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); 705 nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
697 if (nb == NULL) { 706 if (nb == NULL) {
698 pr_err("dma_debug_add_bus: out of memory\n"); 707 pr_err("dma_debug_add_bus: out of memory\n");
@@ -716,7 +725,7 @@ void dma_debug_init(u32 num_entries)
716 725
717 for (i = 0; i < HASH_SIZE; ++i) { 726 for (i = 0; i < HASH_SIZE; ++i) {
718 INIT_LIST_HEAD(&dma_entry_hash[i].list); 727 INIT_LIST_HEAD(&dma_entry_hash[i].list);
719 dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED; 728 spin_lock_init(&dma_entry_hash[i].lock);
720 } 729 }
721 730
722 if (dma_debug_fs_init() != 0) { 731 if (dma_debug_fs_init() != 0) {
@@ -815,9 +824,11 @@ static void check_unmap(struct dma_debug_entry *ref)
815 err_printk(ref->dev, entry, "DMA-API: device driver frees " 824 err_printk(ref->dev, entry, "DMA-API: device driver frees "
816 "DMA memory with different CPU address " 825 "DMA memory with different CPU address "
817 "[device address=0x%016llx] [size=%llu bytes] " 826 "[device address=0x%016llx] [size=%llu bytes] "
818 "[cpu alloc address=%p] [cpu free address=%p]", 827 "[cpu alloc address=0x%016llx] "
828 "[cpu free address=0x%016llx]",
819 ref->dev_addr, ref->size, 829 ref->dev_addr, ref->size,
820 (void *)entry->paddr, (void *)ref->paddr); 830 (unsigned long long)entry->paddr,
831 (unsigned long long)ref->paddr);
821 } 832 }
822 833
823 if (ref->sg_call_ents && ref->type == dma_debug_sg && 834 if (ref->sg_call_ents && ref->type == dma_debug_sg &&
@@ -856,22 +867,21 @@ static void check_for_stack(struct device *dev, void *addr)
856 "stack [addr=%p]\n", addr); 867 "stack [addr=%p]\n", addr);
857} 868}
858 869
859static inline bool overlap(void *addr, u64 size, void *start, void *end) 870static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
860{ 871{
861 void *addr2 = (char *)addr + size; 872 unsigned long a1 = (unsigned long)addr;
873 unsigned long b1 = a1 + len;
874 unsigned long a2 = (unsigned long)start;
875 unsigned long b2 = (unsigned long)end;
862 876
863 return ((addr >= start && addr < end) || 877 return !(b1 <= a2 || a1 >= b2);
864 (addr2 >= start && addr2 < end) ||
865 ((addr < start) && (addr2 >= end)));
866} 878}
867 879
868static void check_for_illegal_area(struct device *dev, void *addr, u64 size) 880static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
869{ 881{
870 if (overlap(addr, size, _text, _etext) || 882 if (overlap(addr, len, _text, _etext) ||
871 overlap(addr, size, __start_rodata, __end_rodata)) 883 overlap(addr, len, __start_rodata, __end_rodata))
872 err_printk(dev, NULL, "DMA-API: device driver maps " 884 err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
873 "memory from kernel text or rodata "
874 "[addr=%p] [size=%llu]\n", addr, size);
875} 885}
876 886
877static void check_sync(struct device *dev, 887static void check_sync(struct device *dev,
@@ -904,6 +914,9 @@ static void check_sync(struct device *dev,
904 ref->size); 914 ref->size);
905 } 915 }
906 916
917 if (entry->direction == DMA_BIDIRECTIONAL)
918 goto out;
919
907 if (ref->direction != entry->direction) { 920 if (ref->direction != entry->direction) {
908 err_printk(dev, entry, "DMA-API: device driver syncs " 921 err_printk(dev, entry, "DMA-API: device driver syncs "
909 "DMA memory with different direction " 922 "DMA memory with different direction "
@@ -914,9 +927,6 @@ static void check_sync(struct device *dev,
914 dir2name[ref->direction]); 927 dir2name[ref->direction]);
915 } 928 }
916 929
917 if (entry->direction == DMA_BIDIRECTIONAL)
918 goto out;
919
920 if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && 930 if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
921 !(ref->direction == DMA_TO_DEVICE)) 931 !(ref->direction == DMA_TO_DEVICE))
922 err_printk(dev, entry, "DMA-API: device driver syncs " 932 err_printk(dev, entry, "DMA-API: device driver syncs "
@@ -939,7 +949,6 @@ static void check_sync(struct device *dev,
939 949
940out: 950out:
941 put_hash_bucket(bucket, &flags); 951 put_hash_bucket(bucket, &flags);
942
943} 952}
944 953
945void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, 954void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
@@ -969,7 +978,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
969 entry->type = dma_debug_single; 978 entry->type = dma_debug_single;
970 979
971 if (!PageHighMem(page)) { 980 if (!PageHighMem(page)) {
972 void *addr = ((char *)page_address(page)) + offset; 981 void *addr = page_address(page) + offset;
982
973 check_for_stack(dev, addr); 983 check_for_stack(dev, addr);
974 check_for_illegal_area(dev, addr, size); 984 check_for_illegal_area(dev, addr, size);
975 } 985 }
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 833139ce1e2..b335acb43be 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -21,22 +21,16 @@
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/string.h>
24#include <linux/uaccess.h> 25#include <linux/uaccess.h>
25#include <linux/dynamic_debug.h> 26#include <linux/dynamic_debug.h>
26#include <linux/debugfs.h> 27#include <linux/debugfs.h>
28#include <linux/slab.h>
29#include <linux/jump_label.h>
27 30
28extern struct _ddebug __start___verbose[]; 31extern struct _ddebug __start___verbose[];
29extern struct _ddebug __stop___verbose[]; 32extern struct _ddebug __stop___verbose[];
30 33
31/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
32 * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
33 * use independent hash functions, to reduce the chance of false positives.
34 */
35long long dynamic_debug_enabled;
36EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
37long long dynamic_debug_enabled2;
38EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
39
40struct ddebug_table { 34struct ddebug_table {
41 struct list_head link; 35 struct list_head link;
42 char *mod_name; 36 char *mod_name;
@@ -86,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
86} 80}
87 81
88/* 82/*
89 * must be called with ddebug_lock held
90 */
91
92static int disabled_hash(char hash, bool first_table)
93{
94 struct ddebug_table *dt;
95 char table_hash_value;
96
97 list_for_each_entry(dt, &ddebug_tables, link) {
98 if (first_table)
99 table_hash_value = dt->ddebugs->primary_hash;
100 else
101 table_hash_value = dt->ddebugs->secondary_hash;
102 if (dt->num_enabled && (hash == table_hash_value))
103 return 0;
104 }
105 return 1;
106}
107
108/*
109 * Search the tables for _ddebug's which match the given 83 * Search the tables for _ddebug's which match the given
110 * `query' and apply the `flags' and `mask' to them. Tells 84 * `query' and apply the `flags' and `mask' to them. Tells
111 * the user which ddebug's were changed, or whether none 85 * the user which ddebug's were changed, or whether none
@@ -164,22 +138,13 @@ static void ddebug_change(const struct ddebug_query *query,
164 138
165 if (!newflags) 139 if (!newflags)
166 dt->num_enabled--; 140 dt->num_enabled--;
167 else if (!dp-flags) 141 else if (!dp->flags)
168 dt->num_enabled++; 142 dt->num_enabled++;
169 dp->flags = newflags; 143 dp->flags = newflags;
170 if (newflags) { 144 if (newflags)
171 dynamic_debug_enabled |= 145 dp->enabled = 1;
172 (1LL << dp->primary_hash); 146 else
173 dynamic_debug_enabled2 |= 147 dp->enabled = 0;
174 (1LL << dp->secondary_hash);
175 } else {
176 if (disabled_hash(dp->primary_hash, true))
177 dynamic_debug_enabled &=
178 ~(1LL << dp->primary_hash);
179 if (disabled_hash(dp->secondary_hash, false))
180 dynamic_debug_enabled2 &=
181 ~(1LL << dp->secondary_hash);
182 }
183 if (verbose) 148 if (verbose)
184 printk(KERN_INFO 149 printk(KERN_INFO
185 "ddebug: changed %s:%d [%s]%s %s\n", 150 "ddebug: changed %s:%d [%s]%s %s\n",
@@ -209,8 +174,7 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords)
209 char *end; 174 char *end;
210 175
211 /* Skip leading whitespace */ 176 /* Skip leading whitespace */
212 while (*buf && isspace(*buf)) 177 buf = skip_spaces(buf);
213 buf++;
214 if (!*buf) 178 if (!*buf)
215 break; /* oh, it was trailing whitespace */ 179 break; /* oh, it was trailing whitespace */
216 180
@@ -428,6 +392,40 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
428 return 0; 392 return 0;
429} 393}
430 394
395static int ddebug_exec_query(char *query_string)
396{
397 unsigned int flags = 0, mask = 0;
398 struct ddebug_query query;
399#define MAXWORDS 9
400 int nwords;
401 char *words[MAXWORDS];
402
403 nwords = ddebug_tokenize(query_string, words, MAXWORDS);
404 if (nwords <= 0)
405 return -EINVAL;
406 if (ddebug_parse_query(words, nwords-1, &query))
407 return -EINVAL;
408 if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
409 return -EINVAL;
410
411 /* actually go and implement the change */
412 ddebug_change(&query, flags, mask);
413 return 0;
414}
415
416static __initdata char ddebug_setup_string[1024];
417static __init int ddebug_setup_query(char *str)
418{
419 if (strlen(str) >= 1024) {
420 pr_warning("ddebug boot param string too large\n");
421 return 0;
422 }
423 strcpy(ddebug_setup_string, str);
424 return 1;
425}
426
427__setup("ddebug_query=", ddebug_setup_query);
428
431/* 429/*
432 * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the 430 * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the
433 * command text from userspace, parses and executes it. 431 * command text from userspace, parses and executes it.
@@ -435,12 +433,8 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
435static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, 433static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
436 size_t len, loff_t *offp) 434 size_t len, loff_t *offp)
437{ 435{
438 unsigned int flags = 0, mask = 0;
439 struct ddebug_query query;
440#define MAXWORDS 9
441 int nwords;
442 char *words[MAXWORDS];
443 char tmpbuf[256]; 436 char tmpbuf[256];
437 int ret;
444 438
445 if (len == 0) 439 if (len == 0)
446 return 0; 440 return 0;
@@ -454,16 +448,9 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
454 printk(KERN_INFO "%s: read %d bytes from userspace\n", 448 printk(KERN_INFO "%s: read %d bytes from userspace\n",
455 __func__, (int)len); 449 __func__, (int)len);
456 450
457 nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); 451 ret = ddebug_exec_query(tmpbuf);
458 if (nwords < 0) 452 if (ret)
459 return -EINVAL; 453 return ret;
460 if (ddebug_parse_query(words, nwords-1, &query))
461 return -EINVAL;
462 if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
463 return -EINVAL;
464
465 /* actually go and implement the change */
466 ddebug_change(&query, flags, mask);
467 454
468 *offp += len; 455 *offp += len;
469 return len; 456 return len;
@@ -691,7 +678,7 @@ static void ddebug_table_free(struct ddebug_table *dt)
691 * Called in response to a module being unloaded. Removes 678 * Called in response to a module being unloaded. Removes
692 * any ddebug_table's which point at the module. 679 * any ddebug_table's which point at the module.
693 */ 680 */
694int ddebug_remove_module(char *mod_name) 681int ddebug_remove_module(const char *mod_name)
695{ 682{
696 struct ddebug_table *dt, *nextdt; 683 struct ddebug_table *dt, *nextdt;
697 int ret = -ENOENT; 684 int ret = -ENOENT;
@@ -724,13 +711,14 @@ static void ddebug_remove_all_tables(void)
724 mutex_unlock(&ddebug_lock); 711 mutex_unlock(&ddebug_lock);
725} 712}
726 713
727static int __init dynamic_debug_init(void) 714static __initdata int ddebug_init_success;
715
716static int __init dynamic_debug_init_debugfs(void)
728{ 717{
729 struct dentry *dir, *file; 718 struct dentry *dir, *file;
730 struct _ddebug *iter, *iter_start; 719
731 const char *modname = NULL; 720 if (!ddebug_init_success)
732 int ret = 0; 721 return -ENODEV;
733 int n = 0;
734 722
735 dir = debugfs_create_dir("dynamic_debug", NULL); 723 dir = debugfs_create_dir("dynamic_debug", NULL);
736 if (!dir) 724 if (!dir)
@@ -741,6 +729,16 @@ static int __init dynamic_debug_init(void)
741 debugfs_remove(dir); 729 debugfs_remove(dir);
742 return -ENOMEM; 730 return -ENOMEM;
743 } 731 }
732 return 0;
733}
734
735static int __init dynamic_debug_init(void)
736{
737 struct _ddebug *iter, *iter_start;
738 const char *modname = NULL;
739 int ret = 0;
740 int n = 0;
741
744 if (__start___verbose != __stop___verbose) { 742 if (__start___verbose != __stop___verbose) {
745 iter = __start___verbose; 743 iter = __start___verbose;
746 modname = iter->modname; 744 modname = iter->modname;
@@ -758,12 +756,26 @@ static int __init dynamic_debug_init(void)
758 } 756 }
759 ret = ddebug_add_module(iter_start, n, modname); 757 ret = ddebug_add_module(iter_start, n, modname);
760 } 758 }
759
760 /* ddebug_query boot param got passed -> set it up */
761 if (ddebug_setup_string[0] != '\0') {
762 ret = ddebug_exec_query(ddebug_setup_string);
763 if (ret)
764 pr_warning("Invalid ddebug boot param %s",
765 ddebug_setup_string);
766 else
767 pr_info("ddebug initialized with string %s",
768 ddebug_setup_string);
769 }
770
761out_free: 771out_free:
762 if (ret) { 772 if (ret)
763 ddebug_remove_all_tables(); 773 ddebug_remove_all_tables();
764 debugfs_remove(dir); 774 else
765 debugfs_remove(file); 775 ddebug_init_success = 1;
766 }
767 return 0; 776 return 0;
768} 777}
769module_init(dynamic_debug_init); 778/* Allow early initialization for boot messages via boot param */
779arch_initcall(dynamic_debug_init);
780/* Debugfs setup must be done later */
781module_init(dynamic_debug_init_debugfs);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index f97af55bdd9..7e65af70635 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -1,6 +1,7 @@
1#include <linux/kernel.h> 1#include <linux/kernel.h>
2#include <linux/init.h> 2#include <linux/init.h>
3#include <linux/random.h> 3#include <linux/random.h>
4#include <linux/sched.h>
4#include <linux/stat.h> 5#include <linux/stat.h>
5#include <linux/types.h> 6#include <linux/types.h>
6#include <linux/fs.h> 7#include <linux/fs.h>
diff --git a/lib/flex_array.c b/lib/flex_array.c
new file mode 100644
index 00000000000..77a6fea7481
--- /dev/null
+++ b/lib/flex_array.c
@@ -0,0 +1,350 @@
1/*
2 * Flexible array managed in PAGE_SIZE parts
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2009
19 *
20 * Author: Dave Hansen <dave@linux.vnet.ibm.com>
21 */
22
23#include <linux/flex_array.h>
24#include <linux/slab.h>
25#include <linux/stddef.h>
26
27struct flex_array_part {
28 char elements[FLEX_ARRAY_PART_SIZE];
29};
30
31/*
32 * If a user requests an allocation which is small
33 * enough, we may simply use the space in the
34 * flex_array->parts[] array to store the user
35 * data.
36 */
37static inline int elements_fit_in_base(struct flex_array *fa)
38{
39 int data_size = fa->element_size * fa->total_nr_elements;
40 if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
41 return 1;
42 return 0;
43}
44
45/**
46 * flex_array_alloc - allocate a new flexible array
47 * @element_size: the size of individual elements in the array
48 * @total: total number of elements that this should hold
49 * @flags: page allocation flags to use for base array
50 *
51 * Note: all locking must be provided by the caller.
52 *
53 * @total is used to size internal structures. If the user ever
54 * accesses any array indexes >=@total, it will produce errors.
55 *
56 * The maximum number of elements is defined as: the number of
57 * elements that can be stored in a page times the number of
58 * page pointers that we can fit in the base structure or (using
59 * integer math):
60 *
61 * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
62 *
63 * Here's a table showing example capacities. Note that the maximum
64 * index that the get/put() functions is just nr_objects-1. This
65 * basically means that you get 4MB of storage on 32-bit and 2MB on
66 * 64-bit.
67 *
68 *
69 * Element size | Objects | Objects |
70 * PAGE_SIZE=4k | 32-bit | 64-bit |
71 * ---------------------------------|
72 * 1 bytes | 4186112 | 2093056 |
73 * 2 bytes | 2093056 | 1046528 |
74 * 3 bytes | 1395030 | 697515 |
75 * 4 bytes | 1046528 | 523264 |
76 * 32 bytes | 130816 | 65408 |
77 * 33 bytes | 126728 | 63364 |
78 * 2048 bytes | 2044 | 1022 |
79 * 2049 bytes | 1022 | 511 |
80 * void * | 1046528 | 261632 |
81 *
82 * Since 64-bit pointers are twice the size, we lose half the
83 * capacity in the base structure. Also note that no effort is made
84 * to efficiently pack objects across page boundaries.
85 */
86struct flex_array *flex_array_alloc(int element_size, unsigned int total,
87 gfp_t flags)
88{
89 struct flex_array *ret;
90 int max_size = FLEX_ARRAY_NR_BASE_PTRS *
91 FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
92
93 /* max_size will end up 0 if element_size > PAGE_SIZE */
94 if (total > max_size)
95 return NULL;
96 ret = kzalloc(sizeof(struct flex_array), flags);
97 if (!ret)
98 return NULL;
99 ret->element_size = element_size;
100 ret->total_nr_elements = total;
101 if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
102 memset(&ret->parts[0], FLEX_ARRAY_FREE,
103 FLEX_ARRAY_BASE_BYTES_LEFT);
104 return ret;
105}
106
107static int fa_element_to_part_nr(struct flex_array *fa,
108 unsigned int element_nr)
109{
110 return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
111}
112
113/**
114 * flex_array_free_parts - just free the second-level pages
115 * @fa: the flex array from which to free parts
116 *
117 * This is to be used in cases where the base 'struct flex_array'
118 * has been statically allocated and should not be free.
119 */
120void flex_array_free_parts(struct flex_array *fa)
121{
122 int part_nr;
123
124 if (elements_fit_in_base(fa))
125 return;
126 for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
127 kfree(fa->parts[part_nr]);
128}
129
130void flex_array_free(struct flex_array *fa)
131{
132 flex_array_free_parts(fa);
133 kfree(fa);
134}
135
136static unsigned int index_inside_part(struct flex_array *fa,
137 unsigned int element_nr)
138{
139 unsigned int part_offset;
140
141 part_offset = element_nr %
142 FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
143 return part_offset * fa->element_size;
144}
145
146static struct flex_array_part *
147__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
148{
149 struct flex_array_part *part = fa->parts[part_nr];
150 if (!part) {
151 part = kmalloc(sizeof(struct flex_array_part), flags);
152 if (!part)
153 return NULL;
154 if (!(flags & __GFP_ZERO))
155 memset(part, FLEX_ARRAY_FREE,
156 sizeof(struct flex_array_part));
157 fa->parts[part_nr] = part;
158 }
159 return part;
160}
161
162/**
163 * flex_array_put - copy data into the array at @element_nr
164 * @fa: the flex array to copy data into
165 * @element_nr: index of the position in which to insert
166 * the new element.
167 * @src: address of data to copy into the array
168 * @flags: page allocation flags to use for array expansion
169 *
170 *
171 * Note that this *copies* the contents of @src into
172 * the array. If you are trying to store an array of
173 * pointers, make sure to pass in &ptr instead of ptr.
174 * You may instead wish to use the flex_array_put_ptr()
175 * helper function.
176 *
177 * Locking must be provided by the caller.
178 */
179int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
180 gfp_t flags)
181{
182 int part_nr = fa_element_to_part_nr(fa, element_nr);
183 struct flex_array_part *part;
184 void *dst;
185
186 if (element_nr >= fa->total_nr_elements)
187 return -ENOSPC;
188 if (elements_fit_in_base(fa))
189 part = (struct flex_array_part *)&fa->parts[0];
190 else {
191 part = __fa_get_part(fa, part_nr, flags);
192 if (!part)
193 return -ENOMEM;
194 }
195 dst = &part->elements[index_inside_part(fa, element_nr)];
196 memcpy(dst, src, fa->element_size);
197 return 0;
198}
199
200/**
201 * flex_array_clear - clear element in array at @element_nr
202 * @fa: the flex array of the element.
203 * @element_nr: index of the position to clear.
204 *
205 * Locking must be provided by the caller.
206 */
207int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
208{
209 int part_nr = fa_element_to_part_nr(fa, element_nr);
210 struct flex_array_part *part;
211 void *dst;
212
213 if (element_nr >= fa->total_nr_elements)
214 return -ENOSPC;
215 if (elements_fit_in_base(fa))
216 part = (struct flex_array_part *)&fa->parts[0];
217 else {
218 part = fa->parts[part_nr];
219 if (!part)
220 return -EINVAL;
221 }
222 dst = &part->elements[index_inside_part(fa, element_nr)];
223 memset(dst, FLEX_ARRAY_FREE, fa->element_size);
224 return 0;
225}
226
227/**
228 * flex_array_prealloc - guarantee that array space exists
229 * @fa: the flex array for which to preallocate parts
230 * @start: index of first array element for which space is allocated
231 * @end: index of last (inclusive) element for which space is allocated
232 * @flags: page allocation flags
233 *
234 * This will guarantee that no future calls to flex_array_put()
235 * will allocate memory. It can be used if you are expecting to
236 * be holding a lock or in some atomic context while writing
237 * data into the array.
238 *
239 * Locking must be provided by the caller.
240 */
241int flex_array_prealloc(struct flex_array *fa, unsigned int start,
242 unsigned int end, gfp_t flags)
243{
244 int start_part;
245 int end_part;
246 int part_nr;
247 struct flex_array_part *part;
248
249 if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
250 return -ENOSPC;
251 if (elements_fit_in_base(fa))
252 return 0;
253 start_part = fa_element_to_part_nr(fa, start);
254 end_part = fa_element_to_part_nr(fa, end);
255 for (part_nr = start_part; part_nr <= end_part; part_nr++) {
256 part = __fa_get_part(fa, part_nr, flags);
257 if (!part)
258 return -ENOMEM;
259 }
260 return 0;
261}
262
263/**
264 * flex_array_get - pull data back out of the array
265 * @fa: the flex array from which to extract data
266 * @element_nr: index of the element to fetch from the array
267 *
268 * Returns a pointer to the data at index @element_nr. Note
269 * that this is a copy of the data that was passed in. If you
270 * are using this to store pointers, you'll get back &ptr. You
271 * may instead wish to use the flex_array_get_ptr helper.
272 *
273 * Locking must be provided by the caller.
274 */
275void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
276{
277 int part_nr = fa_element_to_part_nr(fa, element_nr);
278 struct flex_array_part *part;
279
280 if (element_nr >= fa->total_nr_elements)
281 return NULL;
282 if (elements_fit_in_base(fa))
283 part = (struct flex_array_part *)&fa->parts[0];
284 else {
285 part = fa->parts[part_nr];
286 if (!part)
287 return NULL;
288 }
289 return &part->elements[index_inside_part(fa, element_nr)];
290}
291
292/**
293 * flex_array_get_ptr - pull a ptr back out of the array
294 * @fa: the flex array from which to extract data
295 * @element_nr: index of the element to fetch from the array
296 *
297 * Returns the pointer placed in the flex array at element_nr using
298 * flex_array_put_ptr(). This function should not be called if the
299 * element in question was not set using the _put_ptr() helper.
300 */
301void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
302{
303 void **tmp;
304
305 tmp = flex_array_get(fa, element_nr);
306 if (!tmp)
307 return NULL;
308
309 return *tmp;
310}
311
312static int part_is_free(struct flex_array_part *part)
313{
314 int i;
315
316 for (i = 0; i < sizeof(struct flex_array_part); i++)
317 if (part->elements[i] != FLEX_ARRAY_FREE)
318 return 0;
319 return 1;
320}
321
322/**
323 * flex_array_shrink - free unused second-level pages
324 * @fa: the flex array to shrink
325 *
326 * Frees all second-level pages that consist solely of unused
327 * elements. Returns the number of pages freed.
328 *
329 * Locking must be provided by the caller.
330 */
331int flex_array_shrink(struct flex_array *fa)
332{
333 struct flex_array_part *part;
334 int part_nr;
335 int ret = 0;
336
337 if (elements_fit_in_base(fa))
338 return ret;
339 for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
340 part = fa->parts[part_nr];
341 if (!part)
342 continue;
343 if (part_is_free(part)) {
344 fa->parts[part_nr] = NULL;
345 kfree(part);
346 ret++;
347 }
348 }
349 return ret;
350}
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index bea5d97df99..85d0e412a04 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -7,8 +7,8 @@
7#define LE_TABLE_SIZE (1 << CRC_LE_BITS) 7#define LE_TABLE_SIZE (1 << CRC_LE_BITS)
8#define BE_TABLE_SIZE (1 << CRC_BE_BITS) 8#define BE_TABLE_SIZE (1 << CRC_BE_BITS)
9 9
10static uint32_t crc32table_le[LE_TABLE_SIZE]; 10static uint32_t crc32table_le[4][LE_TABLE_SIZE];
11static uint32_t crc32table_be[BE_TABLE_SIZE]; 11static uint32_t crc32table_be[4][BE_TABLE_SIZE];
12 12
13/** 13/**
14 * crc32init_le() - allocate and initialize LE table data 14 * crc32init_le() - allocate and initialize LE table data
@@ -22,12 +22,19 @@ static void crc32init_le(void)
22 unsigned i, j; 22 unsigned i, j;
23 uint32_t crc = 1; 23 uint32_t crc = 1;
24 24
25 crc32table_le[0] = 0; 25 crc32table_le[0][0] = 0;
26 26
27 for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { 27 for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
28 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); 28 crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
29 for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) 29 for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
30 crc32table_le[i + j] = crc ^ crc32table_le[j]; 30 crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
31 }
32 for (i = 0; i < LE_TABLE_SIZE; i++) {
33 crc = crc32table_le[0][i];
34 for (j = 1; j < 4; j++) {
35 crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
36 crc32table_le[j][i] = crc;
37 }
31 } 38 }
32} 39}
33 40
@@ -39,25 +46,35 @@ static void crc32init_be(void)
39 unsigned i, j; 46 unsigned i, j;
40 uint32_t crc = 0x80000000; 47 uint32_t crc = 0x80000000;
41 48
42 crc32table_be[0] = 0; 49 crc32table_be[0][0] = 0;
43 50
44 for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { 51 for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
45 crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); 52 crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
46 for (j = 0; j < i; j++) 53 for (j = 0; j < i; j++)
47 crc32table_be[i + j] = crc ^ crc32table_be[j]; 54 crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
55 }
56 for (i = 0; i < BE_TABLE_SIZE; i++) {
57 crc = crc32table_be[0][i];
58 for (j = 1; j < 4; j++) {
59 crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
60 crc32table_be[j][i] = crc;
61 }
48 } 62 }
49} 63}
50 64
51static void output_table(uint32_t table[], int len, char *trans) 65static void output_table(uint32_t table[4][256], int len, char *trans)
52{ 66{
53 int i; 67 int i, j;
54 68
55 for (i = 0; i < len - 1; i++) { 69 for (j = 0 ; j < 4; j++) {
56 if (i % ENTRIES_PER_LINE == 0) 70 printf("{");
57 printf("\n"); 71 for (i = 0; i < len - 1; i++) {
58 printf("%s(0x%8.8xL), ", trans, table[i]); 72 if (i % ENTRIES_PER_LINE == 0)
73 printf("\n");
74 printf("%s(0x%8.8xL), ", trans, table[j][i]);
75 }
76 printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
59 } 77 }
60 printf("%s(0x%8.8xL)\n", trans, table[len - 1]);
61} 78}
62 79
63int main(int argc, char** argv) 80int main(int argc, char** argv)
@@ -66,14 +83,14 @@ int main(int argc, char** argv)
66 83
67 if (CRC_LE_BITS > 1) { 84 if (CRC_LE_BITS > 1) {
68 crc32init_le(); 85 crc32init_le();
69 printf("static const u32 crc32table_le[] = {"); 86 printf("static const u32 crc32table_le[4][256] = {");
70 output_table(crc32table_le, LE_TABLE_SIZE, "tole"); 87 output_table(crc32table_le, LE_TABLE_SIZE, "tole");
71 printf("};\n"); 88 printf("};\n");
72 } 89 }
73 90
74 if (CRC_BE_BITS > 1) { 91 if (CRC_BE_BITS > 1) {
75 crc32init_be(); 92 crc32init_be();
76 printf("static const u32 crc32table_be[] = {"); 93 printf("static const u32 crc32table_be[4][256] = {");
77 output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); 94 output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
78 printf("};\n"); 95 printf("};\n");
79 } 96 }
diff --git a/lib/genalloc.c b/lib/genalloc.c
index eed2bdb865e..1923f1490e7 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -10,7 +10,9 @@
10 * Version 2. See the file COPYING for more details. 10 * Version 2. See the file COPYING for more details.
11 */ 11 */
12 12
13#include <linux/slab.h>
13#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/bitmap.h>
14#include <linux/genalloc.h> 16#include <linux/genalloc.h>
15 17
16 18
@@ -114,7 +116,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
114 struct gen_pool_chunk *chunk; 116 struct gen_pool_chunk *chunk;
115 unsigned long addr, flags; 117 unsigned long addr, flags;
116 int order = pool->min_alloc_order; 118 int order = pool->min_alloc_order;
117 int nbits, bit, start_bit, end_bit; 119 int nbits, start_bit, end_bit;
118 120
119 if (size == 0) 121 if (size == 0)
120 return 0; 122 return 0;
@@ -126,32 +128,21 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
126 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 128 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
127 129
128 end_bit = (chunk->end_addr - chunk->start_addr) >> order; 130 end_bit = (chunk->end_addr - chunk->start_addr) >> order;
129 end_bit -= nbits + 1;
130 131
131 spin_lock_irqsave(&chunk->lock, flags); 132 spin_lock_irqsave(&chunk->lock, flags);
132 bit = -1; 133 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
133 while (bit + 1 < end_bit) { 134 nbits, 0);
134 bit = find_next_zero_bit(chunk->bits, end_bit, bit + 1); 135 if (start_bit >= end_bit) {
135 if (bit >= end_bit)
136 break;
137
138 start_bit = bit;
139 if (nbits > 1) {
140 bit = find_next_bit(chunk->bits, bit + nbits,
141 bit + 1);
142 if (bit - start_bit < nbits)
143 continue;
144 }
145
146 addr = chunk->start_addr +
147 ((unsigned long)start_bit << order);
148 while (nbits--)
149 __set_bit(start_bit++, chunk->bits);
150 spin_unlock_irqrestore(&chunk->lock, flags); 136 spin_unlock_irqrestore(&chunk->lock, flags);
151 read_unlock(&pool->lock); 137 continue;
152 return addr;
153 } 138 }
139
140 addr = chunk->start_addr + ((unsigned long)start_bit << order);
141
142 bitmap_set(chunk->bits, start_bit, nbits);
154 spin_unlock_irqrestore(&chunk->lock, flags); 143 spin_unlock_irqrestore(&chunk->lock, flags);
144 read_unlock(&pool->lock);
145 return addr;
155 } 146 }
156 read_unlock(&pool->lock); 147 read_unlock(&pool->lock);
157 return 0; 148 return 0;
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 39af2560f76..b66b2bd6795 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -16,6 +16,40 @@ const char hex_asc[] = "0123456789abcdef";
16EXPORT_SYMBOL(hex_asc); 16EXPORT_SYMBOL(hex_asc);
17 17
18/** 18/**
19 * hex_to_bin - convert a hex digit to its real value
20 * @ch: ascii character represents hex digit
21 *
22 * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
23 * input.
24 */
25int hex_to_bin(char ch)
26{
27 if ((ch >= '0') && (ch <= '9'))
28 return ch - '0';
29 ch = tolower(ch);
30 if ((ch >= 'a') && (ch <= 'f'))
31 return ch - 'a' + 10;
32 return -1;
33}
34EXPORT_SYMBOL(hex_to_bin);
35
36/**
37 * hex2bin - convert an ascii hexadecimal string to its binary representation
38 * @dst: binary result
39 * @src: ascii hexadecimal string
40 * @count: result length
41 */
42void hex2bin(u8 *dst, const char *src, size_t count)
43{
44 while (count--) {
45 *dst = hex_to_bin(*src++) << 4;
46 *dst += hex_to_bin(*src++);
47 dst++;
48 }
49}
50EXPORT_SYMBOL(hex2bin);
51
52/**
19 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory 53 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
20 * @buf: data blob to dump 54 * @buf: data blob to dump
21 * @len: number of bytes in the @buf 55 * @len: number of bytes in the @buf
@@ -34,7 +68,7 @@ EXPORT_SYMBOL(hex_asc);
34 * 68 *
35 * E.g.: 69 * E.g.:
36 * hex_dump_to_buffer(frame->data, frame->len, 16, 1, 70 * hex_dump_to_buffer(frame->data, frame->len, 16, 1,
37 * linebuf, sizeof(linebuf), 1); 71 * linebuf, sizeof(linebuf), true);
38 * 72 *
39 * example output buffer: 73 * example output buffer:
40 * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO 74 * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
@@ -65,8 +99,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
65 99
66 for (j = 0; j < ngroups; j++) 100 for (j = 0; j < ngroups; j++)
67 lx += scnprintf(linebuf + lx, linebuflen - lx, 101 lx += scnprintf(linebuf + lx, linebuflen - lx,
68 "%s%16.16llx", j ? " " : "", 102 "%s%16.16llx", j ? " " : "",
69 (unsigned long long)*(ptr8 + j)); 103 (unsigned long long)*(ptr8 + j));
70 ascii_column = 17 * ngroups + 2; 104 ascii_column = 17 * ngroups + 2;
71 break; 105 break;
72 } 106 }
@@ -77,7 +111,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
77 111
78 for (j = 0; j < ngroups; j++) 112 for (j = 0; j < ngroups; j++)
79 lx += scnprintf(linebuf + lx, linebuflen - lx, 113 lx += scnprintf(linebuf + lx, linebuflen - lx,
80 "%s%8.8x", j ? " " : "", *(ptr4 + j)); 114 "%s%8.8x", j ? " " : "", *(ptr4 + j));
81 ascii_column = 9 * ngroups + 2; 115 ascii_column = 9 * ngroups + 2;
82 break; 116 break;
83 } 117 }
@@ -88,7 +122,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
88 122
89 for (j = 0; j < ngroups; j++) 123 for (j = 0; j < ngroups; j++)
90 lx += scnprintf(linebuf + lx, linebuflen - lx, 124 lx += scnprintf(linebuf + lx, linebuflen - lx,
91 "%s%4.4x", j ? " " : "", *(ptr2 + j)); 125 "%s%4.4x", j ? " " : "", *(ptr2 + j));
92 ascii_column = 5 * ngroups + 2; 126 ascii_column = 5 * ngroups + 2;
93 break; 127 break;
94 } 128 }
@@ -111,9 +145,10 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
111 145
112 while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) 146 while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
113 linebuf[lx++] = ' '; 147 linebuf[lx++] = ' ';
114 for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) 148 for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) {
115 linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] 149 ch = ptr[j];
116 : '.'; 150 linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.';
151 }
117nil: 152nil:
118 linebuf[lx++] = '\0'; 153 linebuf[lx++] = '\0';
119} 154}
@@ -143,7 +178,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
143 * 178 *
144 * E.g.: 179 * E.g.:
145 * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS, 180 * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS,
146 * 16, 1, frame->data, frame->len, 1); 181 * 16, 1, frame->data, frame->len, true);
147 * 182 *
148 * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode: 183 * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode:
149 * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO 184 * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
@@ -151,12 +186,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
151 * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~. 186 * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~.
152 */ 187 */
153void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, 188void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
154 int rowsize, int groupsize, 189 int rowsize, int groupsize,
155 const void *buf, size_t len, bool ascii) 190 const void *buf, size_t len, bool ascii)
156{ 191{
157 const u8 *ptr = buf; 192 const u8 *ptr = buf;
158 int i, linelen, remaining = len; 193 int i, linelen, remaining = len;
159 unsigned char linebuf[200]; 194 unsigned char linebuf[32 * 3 + 2 + 32 + 1];
160 195
161 if (rowsize != 16 && rowsize != 32) 196 if (rowsize != 16 && rowsize != 32)
162 rowsize = 16; 197 rowsize = 16;
@@ -164,13 +199,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
164 for (i = 0; i < len; i += rowsize) { 199 for (i = 0; i < len; i += rowsize) {
165 linelen = min(remaining, rowsize); 200 linelen = min(remaining, rowsize);
166 remaining -= rowsize; 201 remaining -= rowsize;
202
167 hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, 203 hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
168 linebuf, sizeof(linebuf), ascii); 204 linebuf, sizeof(linebuf), ascii);
169 205
170 switch (prefix_type) { 206 switch (prefix_type) {
171 case DUMP_PREFIX_ADDRESS: 207 case DUMP_PREFIX_ADDRESS:
172 printk("%s%s%*p: %s\n", level, prefix_str, 208 printk("%s%s%p: %s\n",
173 (int)(2 * sizeof(void *)), ptr + i, linebuf); 209 level, prefix_str, ptr + i, linebuf);
174 break; 210 break;
175 case DUMP_PREFIX_OFFSET: 211 case DUMP_PREFIX_OFFSET:
176 printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); 212 printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
@@ -196,9 +232,9 @@ EXPORT_SYMBOL(print_hex_dump);
196 * rowsize of 16, groupsize of 1, and ASCII output included. 232 * rowsize of 16, groupsize of 1, and ASCII output included.
197 */ 233 */
198void print_hex_dump_bytes(const char *prefix_str, int prefix_type, 234void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
199 const void *buf, size_t len) 235 const void *buf, size_t len)
200{ 236{
201 print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, 237 print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
202 buf, len, 1); 238 buf, len, true);
203} 239}
204EXPORT_SYMBOL(print_hex_dump_bytes); 240EXPORT_SYMBOL(print_hex_dump_bytes);
diff --git a/lib/hweight.c b/lib/hweight.c
index 389424ecb12..3c79d50814c 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,37 +9,45 @@
9 * The Hamming Weight of a number is the total number of bits set in it. 9 * The Hamming Weight of a number is the total number of bits set in it.
10 */ 10 */
11 11
12unsigned int hweight32(unsigned int w) 12unsigned int __sw_hweight32(unsigned int w)
13{ 13{
14#ifdef ARCH_HAS_FAST_MULTIPLIER
15 w -= (w >> 1) & 0x55555555;
16 w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
17 w = (w + (w >> 4)) & 0x0f0f0f0f;
18 return (w * 0x01010101) >> 24;
19#else
14 unsigned int res = w - ((w >> 1) & 0x55555555); 20 unsigned int res = w - ((w >> 1) & 0x55555555);
15 res = (res & 0x33333333) + ((res >> 2) & 0x33333333); 21 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
16 res = (res + (res >> 4)) & 0x0F0F0F0F; 22 res = (res + (res >> 4)) & 0x0F0F0F0F;
17 res = res + (res >> 8); 23 res = res + (res >> 8);
18 return (res + (res >> 16)) & 0x000000FF; 24 return (res + (res >> 16)) & 0x000000FF;
25#endif
19} 26}
20EXPORT_SYMBOL(hweight32); 27EXPORT_SYMBOL(__sw_hweight32);
21 28
22unsigned int hweight16(unsigned int w) 29unsigned int __sw_hweight16(unsigned int w)
23{ 30{
24 unsigned int res = w - ((w >> 1) & 0x5555); 31 unsigned int res = w - ((w >> 1) & 0x5555);
25 res = (res & 0x3333) + ((res >> 2) & 0x3333); 32 res = (res & 0x3333) + ((res >> 2) & 0x3333);
26 res = (res + (res >> 4)) & 0x0F0F; 33 res = (res + (res >> 4)) & 0x0F0F;
27 return (res + (res >> 8)) & 0x00FF; 34 return (res + (res >> 8)) & 0x00FF;
28} 35}
29EXPORT_SYMBOL(hweight16); 36EXPORT_SYMBOL(__sw_hweight16);
30 37
31unsigned int hweight8(unsigned int w) 38unsigned int __sw_hweight8(unsigned int w)
32{ 39{
33 unsigned int res = w - ((w >> 1) & 0x55); 40 unsigned int res = w - ((w >> 1) & 0x55);
34 res = (res & 0x33) + ((res >> 2) & 0x33); 41 res = (res & 0x33) + ((res >> 2) & 0x33);
35 return (res + (res >> 4)) & 0x0F; 42 return (res + (res >> 4)) & 0x0F;
36} 43}
37EXPORT_SYMBOL(hweight8); 44EXPORT_SYMBOL(__sw_hweight8);
38 45
39unsigned long hweight64(__u64 w) 46unsigned long __sw_hweight64(__u64 w)
40{ 47{
41#if BITS_PER_LONG == 32 48#if BITS_PER_LONG == 32
42 return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); 49 return __sw_hweight32((unsigned int)(w >> 32)) +
50 __sw_hweight32((unsigned int)w);
43#elif BITS_PER_LONG == 64 51#elif BITS_PER_LONG == 64
44#ifdef ARCH_HAS_FAST_MULTIPLIER 52#ifdef ARCH_HAS_FAST_MULTIPLIER
45 w -= (w >> 1) & 0x5555555555555555ul; 53 w -= (w >> 1) & 0x5555555555555555ul;
@@ -56,4 +64,4 @@ unsigned long hweight64(__u64 w)
56#endif 64#endif
57#endif 65#endif
58} 66}
59EXPORT_SYMBOL(hweight64); 67EXPORT_SYMBOL(__sw_hweight64);
diff --git a/lib/idr.c b/lib/idr.c
index 80ca9aca038..e15502e8b21 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -106,16 +106,17 @@ static void idr_mark_full(struct idr_layer **pa, int id)
106} 106}
107 107
108/** 108/**
109 * idr_pre_get - reserver resources for idr allocation 109 * idr_pre_get - reserve resources for idr allocation
110 * @idp: idr handle 110 * @idp: idr handle
111 * @gfp_mask: memory allocation flags 111 * @gfp_mask: memory allocation flags
112 * 112 *
113 * This function should be called prior to locking and calling the 113 * This function should be called prior to calling the idr_get_new* functions.
114 * idr_get_new* functions. It preallocates enough memory to satisfy 114 * It preallocates enough memory to satisfy the worst possible allocation. The
115 * the worst possible allocation. 115 * caller should pass in GFP_KERNEL if possible. This of course requires that
116 * no spinning locks be held.
116 * 117 *
117 * If the system is REALLY out of memory this function returns 0, 118 * If the system is REALLY out of memory this function returns %0,
118 * otherwise 1. 119 * otherwise %1.
119 */ 120 */
120int idr_pre_get(struct idr *idp, gfp_t gfp_mask) 121int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
121{ 122{
@@ -156,10 +157,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
156 id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; 157 id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
157 158
158 /* if already at the top layer, we need to grow */ 159 /* if already at the top layer, we need to grow */
159 if (!(p = pa[l])) { 160 if (id >= 1 << (idp->layers * IDR_BITS)) {
160 *starting_id = id; 161 *starting_id = id;
161 return IDR_NEED_TO_GROW; 162 return IDR_NEED_TO_GROW;
162 } 163 }
164 p = pa[l];
165 BUG_ON(!p);
163 166
164 /* If we need to go up one layer, continue the 167 /* If we need to go up one layer, continue the
165 * loop; otherwise, restart from the top. 168 * loop; otherwise, restart from the top.
@@ -281,18 +284,20 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
281/** 284/**
282 * idr_get_new_above - allocate new idr entry above or equal to a start id 285 * idr_get_new_above - allocate new idr entry above or equal to a start id
283 * @idp: idr handle 286 * @idp: idr handle
284 * @ptr: pointer you want associated with the ide 287 * @ptr: pointer you want associated with the id
285 * @start_id: id to start search at 288 * @starting_id: id to start search at
286 * @id: pointer to the allocated handle 289 * @id: pointer to the allocated handle
287 * 290 *
288 * This is the allocate id function. It should be called with any 291 * This is the allocate id function. It should be called with any
289 * required locks. 292 * required locks.
290 * 293 *
291 * If memory is required, it will return -EAGAIN, you should unlock 294 * If allocation from IDR's private freelist fails, idr_get_new_above() will
292 * and go back to the idr_pre_get() call. If the idr is full, it will 295 * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill
293 * return -ENOSPC. 296 * IDR's preallocation and then retry the idr_get_new_above() call.
297 *
298 * If the idr is full idr_get_new_above() will return %-ENOSPC.
294 * 299 *
295 * @id returns a value in the range @starting_id ... 0x7fffffff 300 * @id returns a value in the range @starting_id ... %0x7fffffff
296 */ 301 */
297int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) 302int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
298{ 303{
@@ -313,17 +318,16 @@ EXPORT_SYMBOL(idr_get_new_above);
313/** 318/**
314 * idr_get_new - allocate new idr entry 319 * idr_get_new - allocate new idr entry
315 * @idp: idr handle 320 * @idp: idr handle
316 * @ptr: pointer you want associated with the ide 321 * @ptr: pointer you want associated with the id
317 * @id: pointer to the allocated handle 322 * @id: pointer to the allocated handle
318 * 323 *
319 * This is the allocate id function. It should be called with any 324 * If allocation from IDR's private freelist fails, idr_get_new_above() will
320 * required locks. 325 * return %-EAGAIN. The caller should retry the idr_pre_get() call to refill
326 * IDR's preallocation and then retry the idr_get_new_above() call.
321 * 327 *
322 * If memory is required, it will return -EAGAIN, you should unlock 328 * If the idr is full idr_get_new_above() will return %-ENOSPC.
323 * and go back to the idr_pre_get() call. If the idr is full, it will
324 * return -ENOSPC.
325 * 329 *
326 * @id returns a value in the range 0 ... 0x7fffffff 330 * @id returns a value in the range %0 ... %0x7fffffff
327 */ 331 */
328int idr_get_new(struct idr *idp, void *ptr, int *id) 332int idr_get_new(struct idr *idp, void *ptr, int *id)
329{ 333{
@@ -386,7 +390,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
386} 390}
387 391
388/** 392/**
389 * idr_remove - remove the given id and free it's slot 393 * idr_remove - remove the given id and free its slot
390 * @idp: idr handle 394 * @idp: idr handle
391 * @id: unique key 395 * @id: unique key
392 */ 396 */
@@ -435,7 +439,7 @@ EXPORT_SYMBOL(idr_remove);
435 * function will remove all id mappings and leave all idp_layers 439 * function will remove all id mappings and leave all idp_layers
436 * unused. 440 * unused.
437 * 441 *
438 * A typical clean-up sequence for objects stored in an idr tree, will 442 * A typical clean-up sequence for objects stored in an idr tree will
439 * use idr_for_each() to free all objects, if necessay, then 443 * use idr_for_each() to free all objects, if necessay, then
440 * idr_remove_all() to remove all ids, and idr_destroy() to free 444 * idr_remove_all() to remove all ids, and idr_destroy() to free
441 * up the cached idr_layers. 445 * up the cached idr_layers.
@@ -443,6 +447,7 @@ EXPORT_SYMBOL(idr_remove);
443void idr_remove_all(struct idr *idp) 447void idr_remove_all(struct idr *idp)
444{ 448{
445 int n, id, max; 449 int n, id, max;
450 int bt_mask;
446 struct idr_layer *p; 451 struct idr_layer *p;
447 struct idr_layer *pa[MAX_LEVEL]; 452 struct idr_layer *pa[MAX_LEVEL];
448 struct idr_layer **paa = &pa[0]; 453 struct idr_layer **paa = &pa[0];
@@ -460,8 +465,10 @@ void idr_remove_all(struct idr *idp)
460 p = p->ary[(id >> n) & IDR_MASK]; 465 p = p->ary[(id >> n) & IDR_MASK];
461 } 466 }
462 467
468 bt_mask = id;
463 id += 1 << n; 469 id += 1 << n;
464 while (n < fls(id)) { 470 /* Get the highest bit that the above add changed from 0->1. */
471 while (n < fls(id ^ bt_mask)) {
465 if (p) 472 if (p)
466 free_layer(p); 473 free_layer(p);
467 n += IDR_BITS; 474 n += IDR_BITS;
@@ -474,7 +481,7 @@ EXPORT_SYMBOL(idr_remove_all);
474 481
475/** 482/**
476 * idr_destroy - release all cached layers within an idr tree 483 * idr_destroy - release all cached layers within an idr tree
477 * idp: idr handle 484 * @idp: idr handle
478 */ 485 */
479void idr_destroy(struct idr *idp) 486void idr_destroy(struct idr *idp)
480{ 487{
@@ -502,7 +509,7 @@ void *idr_find(struct idr *idp, int id)
502 int n; 509 int n;
503 struct idr_layer *p; 510 struct idr_layer *p;
504 511
505 p = rcu_dereference(idp->top); 512 p = rcu_dereference_raw(idp->top);
506 if (!p) 513 if (!p)
507 return NULL; 514 return NULL;
508 n = (p->layer+1) * IDR_BITS; 515 n = (p->layer+1) * IDR_BITS;
@@ -517,7 +524,7 @@ void *idr_find(struct idr *idp, int id)
517 while (n > 0 && p) { 524 while (n > 0 && p) {
518 n -= IDR_BITS; 525 n -= IDR_BITS;
519 BUG_ON(n != p->layer*IDR_BITS); 526 BUG_ON(n != p->layer*IDR_BITS);
520 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 527 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
521 } 528 }
522 return((void *)p); 529 return((void *)p);
523} 530}
@@ -537,7 +544,7 @@ EXPORT_SYMBOL(idr_find);
537 * not allowed. 544 * not allowed.
538 * 545 *
539 * We check the return of @fn each time. If it returns anything other 546 * We check the return of @fn each time. If it returns anything other
540 * than 0, we break out and return that value. 547 * than %0, we break out and return that value.
541 * 548 *
542 * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove(). 549 * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
543 */ 550 */
@@ -550,7 +557,7 @@ int idr_for_each(struct idr *idp,
550 struct idr_layer **paa = &pa[0]; 557 struct idr_layer **paa = &pa[0];
551 558
552 n = idp->layers * IDR_BITS; 559 n = idp->layers * IDR_BITS;
553 p = rcu_dereference(idp->top); 560 p = rcu_dereference_raw(idp->top);
554 max = 1 << n; 561 max = 1 << n;
555 562
556 id = 0; 563 id = 0;
@@ -558,7 +565,7 @@ int idr_for_each(struct idr *idp,
558 while (n > 0 && p) { 565 while (n > 0 && p) {
559 n -= IDR_BITS; 566 n -= IDR_BITS;
560 *paa++ = p; 567 *paa++ = p;
561 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 568 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
562 } 569 }
563 570
564 if (p) { 571 if (p) {
@@ -581,10 +588,11 @@ EXPORT_SYMBOL(idr_for_each);
581/** 588/**
582 * idr_get_next - lookup next object of id to given id. 589 * idr_get_next - lookup next object of id to given id.
583 * @idp: idr handle 590 * @idp: idr handle
584 * @id: pointer to lookup key 591 * @nextidp: pointer to lookup key
585 * 592 *
586 * Returns pointer to registered object with id, which is next number to 593 * Returns pointer to registered object with id, which is next number to
587 * given id. 594 * given id. After being looked up, *@nextidp will be updated for the next
595 * iteration.
588 */ 596 */
589 597
590void *idr_get_next(struct idr *idp, int *nextidp) 598void *idr_get_next(struct idr *idp, int *nextidp)
@@ -597,7 +605,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
597 /* find first ent */ 605 /* find first ent */
598 n = idp->layers * IDR_BITS; 606 n = idp->layers * IDR_BITS;
599 max = 1 << n; 607 max = 1 << n;
600 p = rcu_dereference(idp->top); 608 p = rcu_dereference_raw(idp->top);
601 if (!p) 609 if (!p)
602 return NULL; 610 return NULL;
603 611
@@ -605,7 +613,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
605 while (n > 0 && p) { 613 while (n > 0 && p) {
606 n -= IDR_BITS; 614 n -= IDR_BITS;
607 *paa++ = p; 615 *paa++ = p;
608 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); 616 p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
609 } 617 }
610 618
611 if (p) { 619 if (p) {
@@ -621,7 +629,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
621 } 629 }
622 return NULL; 630 return NULL;
623} 631}
624 632EXPORT_SYMBOL(idr_get_next);
625 633
626 634
627/** 635/**
@@ -631,8 +639,8 @@ void *idr_get_next(struct idr *idp, int *nextidp)
631 * @id: lookup key 639 * @id: lookup key
632 * 640 *
633 * Replace the pointer registered with an id and return the old value. 641 * Replace the pointer registered with an id and return the old value.
634 * A -ENOENT return indicates that @id was not found. 642 * A %-ENOENT return indicates that @id was not found.
635 * A -EINVAL return indicates that @id was not within valid constraints. 643 * A %-EINVAL return indicates that @id was not within valid constraints.
636 * 644 *
637 * The caller must serialize with writers. 645 * The caller must serialize with writers.
638 */ 646 */
@@ -690,10 +698,11 @@ void idr_init(struct idr *idp)
690EXPORT_SYMBOL(idr_init); 698EXPORT_SYMBOL(idr_init);
691 699
692 700
693/* 701/**
702 * DOC: IDA description
694 * IDA - IDR based ID allocator 703 * IDA - IDR based ID allocator
695 * 704 *
696 * this is id allocator without id -> pointer translation. Memory 705 * This is id allocator without id -> pointer translation. Memory
697 * usage is much lower than full blown idr because each id only 706 * usage is much lower than full blown idr because each id only
698 * occupies a bit. ida uses a custom leaf node which contains 707 * occupies a bit. ida uses a custom leaf node which contains
699 * IDA_BITMAP_BITS slots. 708 * IDA_BITMAP_BITS slots.
@@ -726,8 +735,8 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
726 * following function. It preallocates enough memory to satisfy the 735 * following function. It preallocates enough memory to satisfy the
727 * worst possible allocation. 736 * worst possible allocation.
728 * 737 *
729 * If the system is REALLY out of memory this function returns 0, 738 * If the system is REALLY out of memory this function returns %0,
730 * otherwise 1. 739 * otherwise %1.
731 */ 740 */
732int ida_pre_get(struct ida *ida, gfp_t gfp_mask) 741int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
733{ 742{
@@ -753,17 +762,17 @@ EXPORT_SYMBOL(ida_pre_get);
753/** 762/**
754 * ida_get_new_above - allocate new ID above or equal to a start id 763 * ida_get_new_above - allocate new ID above or equal to a start id
755 * @ida: ida handle 764 * @ida: ida handle
756 * @staring_id: id to start search at 765 * @starting_id: id to start search at
757 * @p_id: pointer to the allocated handle 766 * @p_id: pointer to the allocated handle
758 * 767 *
759 * Allocate new ID above or equal to @ida. It should be called with 768 * Allocate new ID above or equal to @ida. It should be called with
760 * any required locks. 769 * any required locks.
761 * 770 *
762 * If memory is required, it will return -EAGAIN, you should unlock 771 * If memory is required, it will return %-EAGAIN, you should unlock
763 * and go back to the ida_pre_get() call. If the ida is full, it will 772 * and go back to the ida_pre_get() call. If the ida is full, it will
764 * return -ENOSPC. 773 * return %-ENOSPC.
765 * 774 *
766 * @p_id returns a value in the range @starting_id ... 0x7fffffff. 775 * @p_id returns a value in the range @starting_id ... %0x7fffffff.
767 */ 776 */
768int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) 777int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
769{ 778{
@@ -845,11 +854,11 @@ EXPORT_SYMBOL(ida_get_new_above);
845 * 854 *
846 * Allocate new ID. It should be called with any required locks. 855 * Allocate new ID. It should be called with any required locks.
847 * 856 *
848 * If memory is required, it will return -EAGAIN, you should unlock 857 * If memory is required, it will return %-EAGAIN, you should unlock
849 * and go back to the idr_pre_get() call. If the idr is full, it will 858 * and go back to the idr_pre_get() call. If the idr is full, it will
850 * return -ENOSPC. 859 * return %-ENOSPC.
851 * 860 *
852 * @id returns a value in the range 0 ... 0x7fffffff. 861 * @id returns a value in the range %0 ... %0x7fffffff.
853 */ 862 */
854int ida_get_new(struct ida *ida, int *p_id) 863int ida_get_new(struct ida *ida, int *p_id)
855{ 864{
@@ -907,7 +916,7 @@ EXPORT_SYMBOL(ida_remove);
907 916
908/** 917/**
909 * ida_destroy - release all cached layers within an ida tree 918 * ida_destroy - release all cached layers within an ida tree
910 * ida: ida handle 919 * @ida: ida handle
911 */ 920 */
912void ida_destroy(struct ida *ida) 921void ida_destroy(struct ida *ida)
913{ 922{
diff --git a/lib/inflate.c b/lib/inflate.c
index 1a8e8a97812..013a7619348 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -7,7 +7,7 @@
7 * Adapted for booting Linux by Hannu Savolainen 1993 7 * Adapted for booting Linux by Hannu Savolainen 1993
8 * based on gzip-1.0.3 8 * based on gzip-1.0.3
9 * 9 *
10 * Nicolas Pitre <nico@cam.org>, 1999/04/14 : 10 * Nicolas Pitre <nico@fluxnic.net>, 1999/04/14 :
11 * Little mods for all variable to reside either into rodata or bss segments 11 * Little mods for all variable to reside either into rodata or bss segments
12 * by marking constant variables with 'const' and initializing all the others 12 * by marking constant variables with 'const' and initializing all the others
13 * at run-time only. This allows for the kernel uncompressor to run 13 * at run-time only. This allows for the kernel uncompressor to run
@@ -103,6 +103,9 @@
103 the two sets of lengths. 103 the two sets of lengths.
104 */ 104 */
105#include <linux/compiler.h> 105#include <linux/compiler.h>
106#ifdef NO_INFLATE_MALLOC
107#include <linux/slab.h>
108#endif
106 109
107#ifdef RCSID 110#ifdef RCSID
108static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #"; 111static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #";
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 75dbda03f4f..da053313ee5 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -3,41 +3,7 @@
3 */ 3 */
4 4
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/bitops.h> 6#include <linux/bitmap.h>
7
8static unsigned long find_next_zero_area(unsigned long *map,
9 unsigned long size,
10 unsigned long start,
11 unsigned int nr,
12 unsigned long align_mask)
13{
14 unsigned long index, end, i;
15again:
16 index = find_next_zero_bit(map, size, start);
17
18 /* Align allocation */
19 index = (index + align_mask) & ~align_mask;
20
21 end = index + nr;
22 if (end >= size)
23 return -1;
24 for (i = index; i < end; i++) {
25 if (test_bit(i, map)) {
26 start = i+1;
27 goto again;
28 }
29 }
30 return index;
31}
32
33void iommu_area_reserve(unsigned long *map, unsigned long i, int len)
34{
35 unsigned long end = i + len;
36 while (i < end) {
37 __set_bit(i, map);
38 i++;
39 }
40}
41 7
42int iommu_is_span_boundary(unsigned int index, unsigned int nr, 8int iommu_is_span_boundary(unsigned int index, unsigned int nr,
43 unsigned long shift, 9 unsigned long shift,
@@ -55,36 +21,20 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
55 unsigned long align_mask) 21 unsigned long align_mask)
56{ 22{
57 unsigned long index; 23 unsigned long index;
24
25 /* We don't want the last of the limit */
26 size -= 1;
58again: 27again:
59 index = find_next_zero_area(map, size, start, nr, align_mask); 28 index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
60 if (index != -1) { 29 if (index < size) {
61 if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { 30 if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
62 /* we could do more effectively */ 31 /* we could do more effectively */
63 start = index + 1; 32 start = index + 1;
64 goto again; 33 goto again;
65 } 34 }
66 iommu_area_reserve(map, index, nr); 35 bitmap_set(map, index, nr);
36 return index;
67 } 37 }
68 return index; 38 return -1;
69} 39}
70EXPORT_SYMBOL(iommu_area_alloc); 40EXPORT_SYMBOL(iommu_area_alloc);
71
72void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
73{
74 unsigned long end = start + nr;
75
76 while (start < end) {
77 __clear_bit(start, map);
78 start++;
79 }
80}
81EXPORT_SYMBOL(iommu_area_free);
82
83unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
84 unsigned long io_page_size)
85{
86 unsigned long size = (addr & (io_page_size - 1)) + len;
87
88 return DIV_ROUND_UP(size, io_page_size);
89}
90EXPORT_SYMBOL(iommu_num_pages);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 14c6078f17a..5730ecd3eb6 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -13,10 +13,10 @@
13#include <asm/pgtable.h> 13#include <asm/pgtable.h>
14 14
15static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, 15static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
16 unsigned long end, unsigned long phys_addr, pgprot_t prot) 16 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
17{ 17{
18 pte_t *pte; 18 pte_t *pte;
19 unsigned long pfn; 19 u64 pfn;
20 20
21 pfn = phys_addr >> PAGE_SHIFT; 21 pfn = phys_addr >> PAGE_SHIFT;
22 pte = pte_alloc_kernel(pmd, addr); 22 pte = pte_alloc_kernel(pmd, addr);
@@ -31,7 +31,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
31} 31}
32 32
33static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, 33static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
34 unsigned long end, unsigned long phys_addr, pgprot_t prot) 34 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
35{ 35{
36 pmd_t *pmd; 36 pmd_t *pmd;
37 unsigned long next; 37 unsigned long next;
@@ -49,7 +49,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
49} 49}
50 50
51static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, 51static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
52 unsigned long end, unsigned long phys_addr, pgprot_t prot) 52 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
53{ 53{
54 pud_t *pud; 54 pud_t *pud;
55 unsigned long next; 55 unsigned long next;
@@ -67,7 +67,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
67} 67}
68 68
69int ioremap_page_range(unsigned long addr, 69int ioremap_page_range(unsigned long addr,
70 unsigned long end, unsigned long phys_addr, pgprot_t prot) 70 unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
71{ 71{
72 pgd_t *pgd; 72 pgd_t *pgd;
73 unsigned long start; 73 unsigned long start;
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index f1ed2fe76c6..bd2bea96336 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -12,34 +12,47 @@
12 12
13#include <linux/sched.h> 13#include <linux/sched.h>
14 14
15/** 15/*
16 * is_single_threaded - Determine if a thread group is single-threaded or not 16 * Returns true if the task does not share ->mm with another thread/process.
17 * @p: A task in the thread group in question
18 *
19 * This returns true if the thread group to which a task belongs is single
20 * threaded, false if it is not.
21 */ 17 */
22bool is_single_threaded(struct task_struct *p) 18bool current_is_single_threaded(void)
23{ 19{
24 struct task_struct *g, *t; 20 struct task_struct *task = current;
25 struct mm_struct *mm = p->mm; 21 struct mm_struct *mm = task->mm;
22 struct task_struct *p, *t;
23 bool ret;
26 24
27 if (atomic_read(&p->signal->count) != 1) 25 if (atomic_read(&task->signal->live) != 1)
28 goto no; 26 return false;
29 27
30 if (atomic_read(&p->mm->mm_users) != 1) { 28 if (atomic_read(&mm->mm_users) == 1)
31 read_lock(&tasklist_lock); 29 return true;
32 do_each_thread(g, t) {
33 if (t->mm == mm && t != p)
34 goto no_unlock;
35 } while_each_thread(g, t);
36 read_unlock(&tasklist_lock);
37 }
38 30
39 return true; 31 ret = false;
32 rcu_read_lock();
33 for_each_process(p) {
34 if (unlikely(p->flags & PF_KTHREAD))
35 continue;
36 if (unlikely(p == task->group_leader))
37 continue;
38
39 t = p;
40 do {
41 if (unlikely(t->mm == mm))
42 goto found;
43 if (likely(t->mm))
44 break;
45 /*
46 * t->mm == NULL. Make sure next_thread/next_task
47 * will see other CLONE_VM tasks which might be
48 * forked before exiting.
49 */
50 smp_rmb();
51 } while_each_thread(p, t);
52 }
53 ret = true;
54found:
55 rcu_read_unlock();
40 56
41no_unlock: 57 return ret;
42 read_unlock(&tasklist_lock);
43no:
44 return false;
45} 58}
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index c5ff1fd1003..9c4233b2378 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -6,6 +6,7 @@
6 6
7#include <stdarg.h> 7#include <stdarg.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/slab.h>
9#include <linux/types.h> 10#include <linux/types.h>
10#include <linux/string.h> 11#include <linux/string.h>
11 12
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 39f1029e352..b135d04aa48 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -5,10 +5,13 @@
5 * relegated to obsolescence, but used by various less 5 * relegated to obsolescence, but used by various less
6 * important (or lazy) subsystems. 6 * important (or lazy) subsystems.
7 */ 7 */
8#include <linux/smp_lock.h>
9#include <linux/module.h> 8#include <linux/module.h>
10#include <linux/kallsyms.h> 9#include <linux/kallsyms.h>
11#include <linux/semaphore.h> 10#include <linux/semaphore.h>
11#include <linux/smp_lock.h>
12
13#define CREATE_TRACE_POINTS
14#include <trace/events/bkl.h>
12 15
13/* 16/*
14 * The 'big kernel lock' 17 * The 'big kernel lock'
@@ -20,7 +23,7 @@
20 * 23 *
21 * Don't use in new code. 24 * Don't use in new code.
22 */ 25 */
23static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag); 26static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag);
24 27
25 28
26/* 29/*
@@ -33,12 +36,12 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
33 * If it successfully gets the lock, it should increment 36 * If it successfully gets the lock, it should increment
34 * the preemption count like any spinlock does. 37 * the preemption count like any spinlock does.
35 * 38 *
36 * (This works on UP too - _raw_spin_trylock will never 39 * (This works on UP too - do_raw_spin_trylock will never
37 * return false in that case) 40 * return false in that case)
38 */ 41 */
39int __lockfunc __reacquire_kernel_lock(void) 42int __lockfunc __reacquire_kernel_lock(void)
40{ 43{
41 while (!_raw_spin_trylock(&kernel_flag)) { 44 while (!do_raw_spin_trylock(&kernel_flag)) {
42 if (need_resched()) 45 if (need_resched())
43 return -EAGAIN; 46 return -EAGAIN;
44 cpu_relax(); 47 cpu_relax();
@@ -49,27 +52,27 @@ int __lockfunc __reacquire_kernel_lock(void)
49 52
50void __lockfunc __release_kernel_lock(void) 53void __lockfunc __release_kernel_lock(void)
51{ 54{
52 _raw_spin_unlock(&kernel_flag); 55 do_raw_spin_unlock(&kernel_flag);
53 preempt_enable_no_resched(); 56 preempt_enable_no_resched();
54} 57}
55 58
56/* 59/*
57 * These are the BKL spinlocks - we try to be polite about preemption. 60 * These are the BKL spinlocks - we try to be polite about preemption.
58 * If SMP is not on (ie UP preemption), this all goes away because the 61 * If SMP is not on (ie UP preemption), this all goes away because the
59 * _raw_spin_trylock() will always succeed. 62 * do_raw_spin_trylock() will always succeed.
60 */ 63 */
61#ifdef CONFIG_PREEMPT 64#ifdef CONFIG_PREEMPT
62static inline void __lock_kernel(void) 65static inline void __lock_kernel(void)
63{ 66{
64 preempt_disable(); 67 preempt_disable();
65 if (unlikely(!_raw_spin_trylock(&kernel_flag))) { 68 if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
66 /* 69 /*
67 * If preemption was disabled even before this 70 * If preemption was disabled even before this
68 * was called, there's nothing we can be polite 71 * was called, there's nothing we can be polite
69 * about - just spin. 72 * about - just spin.
70 */ 73 */
71 if (preempt_count() > 1) { 74 if (preempt_count() > 1) {
72 _raw_spin_lock(&kernel_flag); 75 do_raw_spin_lock(&kernel_flag);
73 return; 76 return;
74 } 77 }
75 78
@@ -79,10 +82,10 @@ static inline void __lock_kernel(void)
79 */ 82 */
80 do { 83 do {
81 preempt_enable(); 84 preempt_enable();
82 while (spin_is_locked(&kernel_flag)) 85 while (raw_spin_is_locked(&kernel_flag))
83 cpu_relax(); 86 cpu_relax();
84 preempt_disable(); 87 preempt_disable();
85 } while (!_raw_spin_trylock(&kernel_flag)); 88 } while (!do_raw_spin_trylock(&kernel_flag));
86 } 89 }
87} 90}
88 91
@@ -93,7 +96,7 @@ static inline void __lock_kernel(void)
93 */ 96 */
94static inline void __lock_kernel(void) 97static inline void __lock_kernel(void)
95{ 98{
96 _raw_spin_lock(&kernel_flag); 99 do_raw_spin_lock(&kernel_flag);
97} 100}
98#endif 101#endif
99 102
@@ -103,7 +106,7 @@ static inline void __unlock_kernel(void)
103 * the BKL is not covered by lockdep, so we open-code the 106 * the BKL is not covered by lockdep, so we open-code the
104 * unlocking sequence (and thus avoid the dep-chain ops): 107 * unlocking sequence (and thus avoid the dep-chain ops):
105 */ 108 */
106 _raw_spin_unlock(&kernel_flag); 109 do_raw_spin_unlock(&kernel_flag);
107 preempt_enable(); 110 preempt_enable();
108} 111}
109 112
@@ -113,21 +116,28 @@ static inline void __unlock_kernel(void)
113 * This cannot happen asynchronously, so we only need to 116 * This cannot happen asynchronously, so we only need to
114 * worry about other CPU's. 117 * worry about other CPU's.
115 */ 118 */
116void __lockfunc lock_kernel(void) 119void __lockfunc _lock_kernel(const char *func, const char *file, int line)
117{ 120{
118 int depth = current->lock_depth+1; 121 int depth = current->lock_depth + 1;
119 if (likely(!depth)) 122
123 trace_lock_kernel(func, file, line);
124
125 if (likely(!depth)) {
126 might_sleep();
120 __lock_kernel(); 127 __lock_kernel();
128 }
121 current->lock_depth = depth; 129 current->lock_depth = depth;
122} 130}
123 131
124void __lockfunc unlock_kernel(void) 132void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
125{ 133{
126 BUG_ON(current->lock_depth < 0); 134 BUG_ON(current->lock_depth < 0);
127 if (likely(--current->lock_depth < 0)) 135 if (likely(--current->lock_depth < 0))
128 __unlock_kernel(); 136 __unlock_kernel();
137
138 trace_unlock_kernel(func, file, line);
129} 139}
130 140
131EXPORT_SYMBOL(lock_kernel); 141EXPORT_SYMBOL(_lock_kernel);
132EXPORT_SYMBOL(unlock_kernel); 142EXPORT_SYMBOL(_unlock_kernel);
133 143
diff --git a/lib/kobject.c b/lib/kobject.c
index b512b746d2a..82dc34c095c 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
700 return ret; 700 return ret;
701} 701}
702 702
703struct sysfs_ops kobj_sysfs_ops = { 703const struct sysfs_ops kobj_sysfs_ops = {
704 .show = kobj_attr_show, 704 .show = kobj_attr_show,
705 .store = kobj_attr_store, 705 .store = kobj_attr_store,
706}; 706};
@@ -746,17 +746,56 @@ void kset_unregister(struct kset *k)
746 */ 746 */
747struct kobject *kset_find_obj(struct kset *kset, const char *name) 747struct kobject *kset_find_obj(struct kset *kset, const char *name)
748{ 748{
749 return kset_find_obj_hinted(kset, name, NULL);
750}
751
752/**
753 * kset_find_obj_hinted - search for object in kset given a predecessor hint.
754 * @kset: kset we're looking in.
755 * @name: object's name.
756 * @hint: hint to possible object's predecessor.
757 *
758 * Check the hint's next object and if it is a match return it directly,
759 * otherwise, fall back to the behavior of kset_find_obj(). Either way
760 * a reference for the returned object is held and the reference on the
761 * hinted object is released.
762 */
763struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name,
764 struct kobject *hint)
765{
749 struct kobject *k; 766 struct kobject *k;
750 struct kobject *ret = NULL; 767 struct kobject *ret = NULL;
751 768
752 spin_lock(&kset->list_lock); 769 spin_lock(&kset->list_lock);
770
771 if (!hint)
772 goto slow_search;
773
774 /* end of list detection */
775 if (hint->entry.next == kset->list.next)
776 goto slow_search;
777
778 k = container_of(hint->entry.next, struct kobject, entry);
779 if (!kobject_name(k) || strcmp(kobject_name(k), name))
780 goto slow_search;
781
782 ret = kobject_get(k);
783 goto unlock_exit;
784
785slow_search:
753 list_for_each_entry(k, &kset->list, entry) { 786 list_for_each_entry(k, &kset->list, entry) {
754 if (kobject_name(k) && !strcmp(kobject_name(k), name)) { 787 if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
755 ret = kobject_get(k); 788 ret = kobject_get(k);
756 break; 789 break;
757 } 790 }
758 } 791 }
792
793unlock_exit:
759 spin_unlock(&kset->list_lock); 794 spin_unlock(&kset->list_lock);
795
796 if (hint)
797 kobject_put(hint);
798
760 return ret; 799 return ret;
761} 800}
762 801
@@ -789,7 +828,7 @@ static struct kobj_type kset_ktype = {
789 * If the kset was not able to be created, NULL will be returned. 828 * If the kset was not able to be created, NULL will be returned.
790 */ 829 */
791static struct kset *kset_create(const char *name, 830static struct kset *kset_create(const char *name,
792 struct kset_uevent_ops *uevent_ops, 831 const struct kset_uevent_ops *uevent_ops,
793 struct kobject *parent_kobj) 832 struct kobject *parent_kobj)
794{ 833{
795 struct kset *kset; 834 struct kset *kset;
@@ -832,7 +871,7 @@ static struct kset *kset_create(const char *name,
832 * If the kset was not able to be created, NULL will be returned. 871 * If the kset was not able to be created, NULL will be returned.
833 */ 872 */
834struct kset *kset_create_and_add(const char *name, 873struct kset *kset_create_and_add(const char *name,
835 struct kset_uevent_ops *uevent_ops, 874 const struct kset_uevent_ops *uevent_ops,
836 struct kobject *parent_kobj) 875 struct kobject *parent_kobj)
837{ 876{
838 struct kset *kset; 877 struct kset *kset;
@@ -850,6 +889,121 @@ struct kset *kset_create_and_add(const char *name,
850} 889}
851EXPORT_SYMBOL_GPL(kset_create_and_add); 890EXPORT_SYMBOL_GPL(kset_create_and_add);
852 891
892
893static DEFINE_SPINLOCK(kobj_ns_type_lock);
894static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
895
896int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
897{
898 enum kobj_ns_type type = ops->type;
899 int error;
900
901 spin_lock(&kobj_ns_type_lock);
902
903 error = -EINVAL;
904 if (type >= KOBJ_NS_TYPES)
905 goto out;
906
907 error = -EINVAL;
908 if (type <= KOBJ_NS_TYPE_NONE)
909 goto out;
910
911 error = -EBUSY;
912 if (kobj_ns_ops_tbl[type])
913 goto out;
914
915 error = 0;
916 kobj_ns_ops_tbl[type] = ops;
917
918out:
919 spin_unlock(&kobj_ns_type_lock);
920 return error;
921}
922
923int kobj_ns_type_registered(enum kobj_ns_type type)
924{
925 int registered = 0;
926
927 spin_lock(&kobj_ns_type_lock);
928 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES))
929 registered = kobj_ns_ops_tbl[type] != NULL;
930 spin_unlock(&kobj_ns_type_lock);
931
932 return registered;
933}
934
935const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
936{
937 const struct kobj_ns_type_operations *ops = NULL;
938
939 if (parent && parent->ktype->child_ns_type)
940 ops = parent->ktype->child_ns_type(parent);
941
942 return ops;
943}
944
945const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
946{
947 return kobj_child_ns_ops(kobj->parent);
948}
949
950
951const void *kobj_ns_current(enum kobj_ns_type type)
952{
953 const void *ns = NULL;
954
955 spin_lock(&kobj_ns_type_lock);
956 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
957 kobj_ns_ops_tbl[type])
958 ns = kobj_ns_ops_tbl[type]->current_ns();
959 spin_unlock(&kobj_ns_type_lock);
960
961 return ns;
962}
963
964const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
965{
966 const void *ns = NULL;
967
968 spin_lock(&kobj_ns_type_lock);
969 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
970 kobj_ns_ops_tbl[type])
971 ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
972 spin_unlock(&kobj_ns_type_lock);
973
974 return ns;
975}
976
977const void *kobj_ns_initial(enum kobj_ns_type type)
978{
979 const void *ns = NULL;
980
981 spin_lock(&kobj_ns_type_lock);
982 if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
983 kobj_ns_ops_tbl[type])
984 ns = kobj_ns_ops_tbl[type]->initial_ns();
985 spin_unlock(&kobj_ns_type_lock);
986
987 return ns;
988}
989
990/*
991 * kobj_ns_exit - invalidate a namespace tag
992 *
993 * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
994 * @ns: the actual namespace being invalidated
995 *
996 * This is called when a tag is no longer valid. For instance,
997 * when a network namespace exits, it uses this helper to
998 * make sure no sb's sysfs_info points to the now-invalidated
999 * netns.
1000 */
1001void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
1002{
1003 sysfs_exit_ns(type, ns);
1004}
1005
1006
853EXPORT_SYMBOL(kobject_get); 1007EXPORT_SYMBOL(kobject_get);
854EXPORT_SYMBOL(kobject_put); 1008EXPORT_SYMBOL(kobject_put);
855EXPORT_SYMBOL(kobject_del); 1009EXPORT_SYMBOL(kobject_del);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 920a3ca6e25..70af0a7f97c 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -18,18 +18,25 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/kobject.h> 19#include <linux/kobject.h>
20#include <linux/module.h> 20#include <linux/module.h>
21 21#include <linux/slab.h>
22#include <linux/user_namespace.h>
22#include <linux/socket.h> 23#include <linux/socket.h>
23#include <linux/skbuff.h> 24#include <linux/skbuff.h>
24#include <linux/netlink.h> 25#include <linux/netlink.h>
25#include <net/sock.h> 26#include <net/sock.h>
27#include <net/net_namespace.h>
26 28
27 29
28u64 uevent_seqnum; 30u64 uevent_seqnum;
29char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; 31char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
30static DEFINE_SPINLOCK(sequence_lock); 32static DEFINE_SPINLOCK(sequence_lock);
31#if defined(CONFIG_NET) 33#ifdef CONFIG_NET
32static struct sock *uevent_sock; 34struct uevent_sock {
35 struct list_head list;
36 struct sock *sk;
37};
38static LIST_HEAD(uevent_sock_list);
39static DEFINE_MUTEX(uevent_sock_mutex);
33#endif 40#endif
34 41
35/* the strings here must match the enum in include/linux/kobject.h */ 42/* the strings here must match the enum in include/linux/kobject.h */
@@ -76,6 +83,39 @@ out:
76 return ret; 83 return ret;
77} 84}
78 85
86#ifdef CONFIG_NET
87static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
88{
89 struct kobject *kobj = data;
90 const struct kobj_ns_type_operations *ops;
91
92 ops = kobj_ns_ops(kobj);
93 if (ops) {
94 const void *sock_ns, *ns;
95 ns = kobj->ktype->namespace(kobj);
96 sock_ns = ops->netlink_ns(dsk);
97 return sock_ns != ns;
98 }
99
100 return 0;
101}
102#endif
103
104static int kobj_usermode_filter(struct kobject *kobj)
105{
106 const struct kobj_ns_type_operations *ops;
107
108 ops = kobj_ns_ops(kobj);
109 if (ops) {
110 const void *init_ns, *ns;
111 ns = kobj->ktype->namespace(kobj);
112 init_ns = ops->initial_ns();
113 return ns != init_ns;
114 }
115
116 return 0;
117}
118
79/** 119/**
80 * kobject_uevent_env - send an uevent with environmental data 120 * kobject_uevent_env - send an uevent with environmental data
81 * 121 *
@@ -83,7 +123,7 @@ out:
83 * @kobj: struct kobject that the action is happening to 123 * @kobj: struct kobject that the action is happening to
84 * @envp_ext: pointer to environmental data 124 * @envp_ext: pointer to environmental data
85 * 125 *
86 * Returns 0 if kobject_uevent() is completed with success or the 126 * Returns 0 if kobject_uevent_env() is completed with success or the
87 * corresponding error when it fails. 127 * corresponding error when it fails.
88 */ 128 */
89int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, 129int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
@@ -95,10 +135,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
95 const char *subsystem; 135 const char *subsystem;
96 struct kobject *top_kobj; 136 struct kobject *top_kobj;
97 struct kset *kset; 137 struct kset *kset;
98 struct kset_uevent_ops *uevent_ops; 138 const struct kset_uevent_ops *uevent_ops;
99 u64 seq; 139 u64 seq;
100 int i = 0; 140 int i = 0;
101 int retval = 0; 141 int retval = 0;
142#ifdef CONFIG_NET
143 struct uevent_sock *ue_sk;
144#endif
102 145
103 pr_debug("kobject: '%s' (%p): %s\n", 146 pr_debug("kobject: '%s' (%p): %s\n",
104 kobject_name(kobj), kobj, __func__); 147 kobject_name(kobj), kobj, __func__);
@@ -210,7 +253,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
210 253
211#if defined(CONFIG_NET) 254#if defined(CONFIG_NET)
212 /* send netlink message */ 255 /* send netlink message */
213 if (uevent_sock) { 256 mutex_lock(&uevent_sock_mutex);
257 list_for_each_entry(ue_sk, &uevent_sock_list, list) {
258 struct sock *uevent_sock = ue_sk->sk;
214 struct sk_buff *skb; 259 struct sk_buff *skb;
215 size_t len; 260 size_t len;
216 261
@@ -232,18 +277,21 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
232 } 277 }
233 278
234 NETLINK_CB(skb).dst_group = 1; 279 NETLINK_CB(skb).dst_group = 1;
235 retval = netlink_broadcast(uevent_sock, skb, 0, 1, 280 retval = netlink_broadcast_filtered(uevent_sock, skb,
236 GFP_KERNEL); 281 0, 1, GFP_KERNEL,
282 kobj_bcast_filter,
283 kobj);
237 /* ENOBUFS should be handled in userspace */ 284 /* ENOBUFS should be handled in userspace */
238 if (retval == -ENOBUFS) 285 if (retval == -ENOBUFS)
239 retval = 0; 286 retval = 0;
240 } else 287 } else
241 retval = -ENOMEM; 288 retval = -ENOMEM;
242 } 289 }
290 mutex_unlock(&uevent_sock_mutex);
243#endif 291#endif
244 292
245 /* call uevent_helper, usually only enabled during early boot */ 293 /* call uevent_helper, usually only enabled during early boot */
246 if (uevent_helper[0]) { 294 if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
247 char *argv [3]; 295 char *argv [3];
248 296
249 argv [0] = uevent_helper; 297 argv [0] = uevent_helper;
@@ -269,7 +317,7 @@ exit:
269EXPORT_SYMBOL_GPL(kobject_uevent_env); 317EXPORT_SYMBOL_GPL(kobject_uevent_env);
270 318
271/** 319/**
272 * kobject_uevent - notify userspace by ending an uevent 320 * kobject_uevent - notify userspace by sending an uevent
273 * 321 *
274 * @action: action that is happening 322 * @action: action that is happening
275 * @kobj: struct kobject that the action is happening to 323 * @kobj: struct kobject that the action is happening to
@@ -319,18 +367,59 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
319EXPORT_SYMBOL_GPL(add_uevent_var); 367EXPORT_SYMBOL_GPL(add_uevent_var);
320 368
321#if defined(CONFIG_NET) 369#if defined(CONFIG_NET)
322static int __init kobject_uevent_init(void) 370static int uevent_net_init(struct net *net)
323{ 371{
324 uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, 372 struct uevent_sock *ue_sk;
325 1, NULL, NULL, THIS_MODULE); 373
326 if (!uevent_sock) { 374 ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
375 if (!ue_sk)
376 return -ENOMEM;
377
378 ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
379 1, NULL, NULL, THIS_MODULE);
380 if (!ue_sk->sk) {
327 printk(KERN_ERR 381 printk(KERN_ERR
328 "kobject_uevent: unable to create netlink socket!\n"); 382 "kobject_uevent: unable to create netlink socket!\n");
383 kfree(ue_sk);
329 return -ENODEV; 384 return -ENODEV;
330 } 385 }
331 netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); 386 mutex_lock(&uevent_sock_mutex);
387 list_add_tail(&ue_sk->list, &uevent_sock_list);
388 mutex_unlock(&uevent_sock_mutex);
332 return 0; 389 return 0;
333} 390}
334 391
392static void uevent_net_exit(struct net *net)
393{
394 struct uevent_sock *ue_sk;
395
396 mutex_lock(&uevent_sock_mutex);
397 list_for_each_entry(ue_sk, &uevent_sock_list, list) {
398 if (sock_net(ue_sk->sk) == net)
399 goto found;
400 }
401 mutex_unlock(&uevent_sock_mutex);
402 return;
403
404found:
405 list_del(&ue_sk->list);
406 mutex_unlock(&uevent_sock_mutex);
407
408 netlink_kernel_release(ue_sk->sk);
409 kfree(ue_sk);
410}
411
412static struct pernet_operations uevent_net_ops = {
413 .init = uevent_net_init,
414 .exit = uevent_net_exit,
415};
416
417static int __init kobject_uevent_init(void)
418{
419 netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
420 return register_pernet_subsys(&uevent_net_ops);
421}
422
423
335postcore_initcall(kobject_uevent_init); 424postcore_initcall(kobject_uevent_init);
336#endif 425#endif
diff --git a/lib/kref.c b/lib/kref.c
index 9ecd6e86561..3efb882b11d 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -13,17 +13,7 @@
13 13
14#include <linux/kref.h> 14#include <linux/kref.h>
15#include <linux/module.h> 15#include <linux/module.h>
16 16#include <linux/slab.h>
17/**
18 * kref_set - initialize object and set refcount to requested number.
19 * @kref: object in question.
20 * @num: initial reference counter
21 */
22void kref_set(struct kref *kref, int num)
23{
24 atomic_set(&kref->refcount, num);
25 smp_mb();
26}
27 17
28/** 18/**
29 * kref_init - initialize object. 19 * kref_init - initialize object.
@@ -31,7 +21,8 @@ void kref_set(struct kref *kref, int num)
31 */ 21 */
32void kref_init(struct kref *kref) 22void kref_init(struct kref *kref)
33{ 23{
34 kref_set(kref, 1); 24 atomic_set(&kref->refcount, 1);
25 smp_mb();
35} 26}
36 27
37/** 28/**
@@ -71,7 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
71 return 0; 62 return 0;
72} 63}
73 64
74EXPORT_SYMBOL(kref_set); 65
66/**
67 * kref_sub - subtract a number of refcounts for object.
68 * @kref: object.
69 * @count: Number of recounts to subtract.
70 * @release: pointer to the function that will clean up the object when the
71 * last reference to the object is released.
72 * This pointer is required, and it is not acceptable to pass kfree
73 * in as this function.
74 *
75 * Subtract @count from the refcount, and if 0, call release().
76 * Return 1 if the object was removed, otherwise return 0. Beware, if this
77 * function returns 0, you still can not count on the kref from remaining in
78 * memory. Only use the return value if you want to see if the kref is now
79 * gone, not present.
80 */
81int kref_sub(struct kref *kref, unsigned int count,
82 void (*release)(struct kref *kref))
83{
84 WARN_ON(release == NULL);
85 WARN_ON(release == (void (*)(struct kref *))kfree);
86
87 if (atomic_sub_and_test((int) count, &kref->refcount)) {
88 release(kref);
89 return 1;
90 }
91 return 0;
92}
93
75EXPORT_SYMBOL(kref_init); 94EXPORT_SYMBOL(kref_init);
76EXPORT_SYMBOL(kref_get); 95EXPORT_SYMBOL(kref_get);
77EXPORT_SYMBOL(kref_put); 96EXPORT_SYMBOL(kref_put);
97EXPORT_SYMBOL(kref_sub);
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 00000000000..157cd88a6ff
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
1#include <linux/kernel.h>
2#include <linux/gcd.h>
3#include <linux/module.h>
4
5/* Lowest common multiple */
6unsigned long lcm(unsigned long a, unsigned long b)
7{
8 if (a && b)
9 return (a * b) / gcd(a, b);
10 else if (b)
11 return b;
12
13 return a;
14}
15EXPORT_SYMBOL_GPL(lcm);
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 1a39f4e3ae1..344c710d16c 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add);
43 */ 43 */
44void list_del(struct list_head *entry) 44void list_del(struct list_head *entry)
45{ 45{
46 WARN(entry->next == LIST_POISON1,
47 "list_del corruption, next is LIST_POISON1 (%p)\n",
48 LIST_POISON1);
49 WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
50 "list_del corruption, prev is LIST_POISON2 (%p)\n",
51 LIST_POISON2);
46 WARN(entry->prev->next != entry, 52 WARN(entry->prev->next != entry,
47 "list_del corruption. prev->next should be %p, " 53 "list_del corruption. prev->next should be %p, "
48 "but was %p\n", entry, entry->prev->next); 54 "but was %p\n", entry, entry->prev->next);
diff --git a/lib/list_sort.c b/lib/list_sort.c
new file mode 100644
index 00000000000..d7325c6b103
--- /dev/null
+++ b/lib/list_sort.c
@@ -0,0 +1,291 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/list_sort.h>
4#include <linux/slab.h>
5#include <linux/list.h>
6
7#define MAX_LIST_LENGTH_BITS 20
8
9/*
10 * Returns a list organized in an intermediate format suited
11 * to chaining of merge() calls: null-terminated, no reserved or
12 * sentinel head node, "prev" links not maintained.
13 */
14static struct list_head *merge(void *priv,
15 int (*cmp)(void *priv, struct list_head *a,
16 struct list_head *b),
17 struct list_head *a, struct list_head *b)
18{
19 struct list_head head, *tail = &head;
20
21 while (a && b) {
22 /* if equal, take 'a' -- important for sort stability */
23 if ((*cmp)(priv, a, b) <= 0) {
24 tail->next = a;
25 a = a->next;
26 } else {
27 tail->next = b;
28 b = b->next;
29 }
30 tail = tail->next;
31 }
32 tail->next = a?:b;
33 return head.next;
34}
35
36/*
37 * Combine final list merge with restoration of standard doubly-linked
38 * list structure. This approach duplicates code from merge(), but
39 * runs faster than the tidier alternatives of either a separate final
40 * prev-link restoration pass, or maintaining the prev links
41 * throughout.
42 */
43static void merge_and_restore_back_links(void *priv,
44 int (*cmp)(void *priv, struct list_head *a,
45 struct list_head *b),
46 struct list_head *head,
47 struct list_head *a, struct list_head *b)
48{
49 struct list_head *tail = head;
50
51 while (a && b) {
52 /* if equal, take 'a' -- important for sort stability */
53 if ((*cmp)(priv, a, b) <= 0) {
54 tail->next = a;
55 a->prev = tail;
56 a = a->next;
57 } else {
58 tail->next = b;
59 b->prev = tail;
60 b = b->next;
61 }
62 tail = tail->next;
63 }
64 tail->next = a ? : b;
65
66 do {
67 /*
68 * In worst cases this loop may run many iterations.
69 * Continue callbacks to the client even though no
70 * element comparison is needed, so the client's cmp()
71 * routine can invoke cond_resched() periodically.
72 */
73 (*cmp)(priv, tail->next, tail->next);
74
75 tail->next->prev = tail;
76 tail = tail->next;
77 } while (tail->next);
78
79 tail->next = head;
80 head->prev = tail;
81}
82
83/**
84 * list_sort - sort a list
85 * @priv: private data, opaque to list_sort(), passed to @cmp
86 * @head: the list to sort
87 * @cmp: the elements comparison function
88 *
89 * This function implements "merge sort", which has O(nlog(n))
90 * complexity.
91 *
92 * The comparison function @cmp must return a negative value if @a
93 * should sort before @b, and a positive value if @a should sort after
94 * @b. If @a and @b are equivalent, and their original relative
95 * ordering is to be preserved, @cmp must return 0.
96 */
97void list_sort(void *priv, struct list_head *head,
98 int (*cmp)(void *priv, struct list_head *a,
99 struct list_head *b))
100{
101 struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
102 -- last slot is a sentinel */
103 int lev; /* index into part[] */
104 int max_lev = 0;
105 struct list_head *list;
106
107 if (list_empty(head))
108 return;
109
110 memset(part, 0, sizeof(part));
111
112 head->prev->next = NULL;
113 list = head->next;
114
115 while (list) {
116 struct list_head *cur = list;
117 list = list->next;
118 cur->next = NULL;
119
120 for (lev = 0; part[lev]; lev++) {
121 cur = merge(priv, cmp, part[lev], cur);
122 part[lev] = NULL;
123 }
124 if (lev > max_lev) {
125 if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
126 printk_once(KERN_DEBUG "list passed to"
127 " list_sort() too long for"
128 " efficiency\n");
129 lev--;
130 }
131 max_lev = lev;
132 }
133 part[lev] = cur;
134 }
135
136 for (lev = 0; lev < max_lev; lev++)
137 if (part[lev])
138 list = merge(priv, cmp, part[lev], list);
139
140 merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
141}
142EXPORT_SYMBOL(list_sort);
143
144#ifdef CONFIG_TEST_LIST_SORT
145
146#include <linux/random.h>
147
148/*
149 * The pattern of set bits in the list length determines which cases
150 * are hit in list_sort().
151 */
152#define TEST_LIST_LEN (512+128+2) /* not including head */
153
154#define TEST_POISON1 0xDEADBEEF
155#define TEST_POISON2 0xA324354C
156
157struct debug_el {
158 unsigned int poison1;
159 struct list_head list;
160 unsigned int poison2;
161 int value;
162 unsigned serial;
163};
164
165/* Array, containing pointers to all elements in the test list */
166static struct debug_el **elts __initdata;
167
168static int __init check(struct debug_el *ela, struct debug_el *elb)
169{
170 if (ela->serial >= TEST_LIST_LEN) {
171 printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
172 ela->serial);
173 return -EINVAL;
174 }
175 if (elb->serial >= TEST_LIST_LEN) {
176 printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
177 elb->serial);
178 return -EINVAL;
179 }
180 if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
181 printk(KERN_ERR "list_sort_test: error: phantom element\n");
182 return -EINVAL;
183 }
184 if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
185 printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
186 ela->poison1, ela->poison2);
187 return -EINVAL;
188 }
189 if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
190 printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
191 elb->poison1, elb->poison2);
192 return -EINVAL;
193 }
194 return 0;
195}
196
197static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
198{
199 struct debug_el *ela, *elb;
200
201 ela = container_of(a, struct debug_el, list);
202 elb = container_of(b, struct debug_el, list);
203
204 check(ela, elb);
205 return ela->value - elb->value;
206}
207
208static int __init list_sort_test(void)
209{
210 int i, count = 1, err = -EINVAL;
211 struct debug_el *el;
212 struct list_head *cur, *tmp;
213 LIST_HEAD(head);
214
215 printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
216
217 elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
218 if (!elts) {
219 printk(KERN_ERR "list_sort_test: error: cannot allocate "
220 "memory\n");
221 goto exit;
222 }
223
224 for (i = 0; i < TEST_LIST_LEN; i++) {
225 el = kmalloc(sizeof(*el), GFP_KERNEL);
226 if (!el) {
227 printk(KERN_ERR "list_sort_test: error: cannot "
228 "allocate memory\n");
229 goto exit;
230 }
231 /* force some equivalencies */
232 el->value = random32() % (TEST_LIST_LEN/3);
233 el->serial = i;
234 el->poison1 = TEST_POISON1;
235 el->poison2 = TEST_POISON2;
236 elts[i] = el;
237 list_add_tail(&el->list, &head);
238 }
239
240 list_sort(NULL, &head, cmp);
241
242 for (cur = head.next; cur->next != &head; cur = cur->next) {
243 struct debug_el *el1;
244 int cmp_result;
245
246 if (cur->next->prev != cur) {
247 printk(KERN_ERR "list_sort_test: error: list is "
248 "corrupted\n");
249 goto exit;
250 }
251
252 cmp_result = cmp(NULL, cur, cur->next);
253 if (cmp_result > 0) {
254 printk(KERN_ERR "list_sort_test: error: list is not "
255 "sorted\n");
256 goto exit;
257 }
258
259 el = container_of(cur, struct debug_el, list);
260 el1 = container_of(cur->next, struct debug_el, list);
261 if (cmp_result == 0 && el->serial >= el1->serial) {
262 printk(KERN_ERR "list_sort_test: error: order of "
263 "equivalent elements not preserved\n");
264 goto exit;
265 }
266
267 if (check(el, el1)) {
268 printk(KERN_ERR "list_sort_test: error: element check "
269 "failed\n");
270 goto exit;
271 }
272 count++;
273 }
274
275 if (count != TEST_LIST_LEN) {
276 printk(KERN_ERR "list_sort_test: error: bad list length %d",
277 count);
278 goto exit;
279 }
280
281 err = 0;
282exit:
283 kfree(elts);
284 list_for_each_safe(cur, tmp, &head) {
285 list_del(cur);
286 kfree(container_of(cur, struct debug_el, list));
287 }
288 return err;
289}
290module_init(list_sort_test);
291#endif /* CONFIG_TEST_LIST_SORT */
diff --git a/lib/lmb.c b/lib/lmb.c
deleted file mode 100644
index e4a6482d8b2..00000000000
--- a/lib/lmb.c
+++ /dev/null
@@ -1,527 +0,0 @@
1/*
2 * Procedures for maintaining information about logical memory blocks.
3 *
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/bitops.h>
16#include <linux/lmb.h>
17
18#define LMB_ALLOC_ANYWHERE 0
19
20struct lmb lmb;
21
22static int lmb_debug;
23
24static int __init early_lmb(char *p)
25{
26 if (p && strstr(p, "debug"))
27 lmb_debug = 1;
28 return 0;
29}
30early_param("lmb", early_lmb);
31
32static void lmb_dump(struct lmb_region *region, char *name)
33{
34 unsigned long long base, size;
35 int i;
36
37 pr_info(" %s.cnt = 0x%lx\n", name, region->cnt);
38
39 for (i = 0; i < region->cnt; i++) {
40 base = region->region[i].base;
41 size = region->region[i].size;
42
43 pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
44 name, i, base, base + size - 1, size);
45 }
46}
47
48void lmb_dump_all(void)
49{
50 if (!lmb_debug)
51 return;
52
53 pr_info("LMB configuration:\n");
54 pr_info(" rmo_size = 0x%llx\n", (unsigned long long)lmb.rmo_size);
55 pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
56
57 lmb_dump(&lmb.memory, "memory");
58 lmb_dump(&lmb.reserved, "reserved");
59}
60
61static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
62 u64 size2)
63{
64 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
65}
66
67static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
68{
69 if (base2 == base1 + size1)
70 return 1;
71 else if (base1 == base2 + size2)
72 return -1;
73
74 return 0;
75}
76
77static long lmb_regions_adjacent(struct lmb_region *rgn,
78 unsigned long r1, unsigned long r2)
79{
80 u64 base1 = rgn->region[r1].base;
81 u64 size1 = rgn->region[r1].size;
82 u64 base2 = rgn->region[r2].base;
83 u64 size2 = rgn->region[r2].size;
84
85 return lmb_addrs_adjacent(base1, size1, base2, size2);
86}
87
88static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
89{
90 unsigned long i;
91
92 for (i = r; i < rgn->cnt - 1; i++) {
93 rgn->region[i].base = rgn->region[i + 1].base;
94 rgn->region[i].size = rgn->region[i + 1].size;
95 }
96 rgn->cnt--;
97}
98
99/* Assumption: base addr of region 1 < base addr of region 2 */
100static void lmb_coalesce_regions(struct lmb_region *rgn,
101 unsigned long r1, unsigned long r2)
102{
103 rgn->region[r1].size += rgn->region[r2].size;
104 lmb_remove_region(rgn, r2);
105}
106
107void __init lmb_init(void)
108{
109 /* Create a dummy zero size LMB which will get coalesced away later.
110 * This simplifies the lmb_add() code below...
111 */
112 lmb.memory.region[0].base = 0;
113 lmb.memory.region[0].size = 0;
114 lmb.memory.cnt = 1;
115
116 /* Ditto. */
117 lmb.reserved.region[0].base = 0;
118 lmb.reserved.region[0].size = 0;
119 lmb.reserved.cnt = 1;
120}
121
122void __init lmb_analyze(void)
123{
124 int i;
125
126 lmb.memory.size = 0;
127
128 for (i = 0; i < lmb.memory.cnt; i++)
129 lmb.memory.size += lmb.memory.region[i].size;
130}
131
132static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
133{
134 unsigned long coalesced = 0;
135 long adjacent, i;
136
137 if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
138 rgn->region[0].base = base;
139 rgn->region[0].size = size;
140 return 0;
141 }
142
143 /* First try and coalesce this LMB with another. */
144 for (i = 0; i < rgn->cnt; i++) {
145 u64 rgnbase = rgn->region[i].base;
146 u64 rgnsize = rgn->region[i].size;
147
148 if ((rgnbase == base) && (rgnsize == size))
149 /* Already have this region, so we're done */
150 return 0;
151
152 adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize);
153 if (adjacent > 0) {
154 rgn->region[i].base -= size;
155 rgn->region[i].size += size;
156 coalesced++;
157 break;
158 } else if (adjacent < 0) {
159 rgn->region[i].size += size;
160 coalesced++;
161 break;
162 }
163 }
164
165 if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) {
166 lmb_coalesce_regions(rgn, i, i+1);
167 coalesced++;
168 }
169
170 if (coalesced)
171 return coalesced;
172 if (rgn->cnt >= MAX_LMB_REGIONS)
173 return -1;
174
175 /* Couldn't coalesce the LMB, so add it to the sorted table. */
176 for (i = rgn->cnt - 1; i >= 0; i--) {
177 if (base < rgn->region[i].base) {
178 rgn->region[i+1].base = rgn->region[i].base;
179 rgn->region[i+1].size = rgn->region[i].size;
180 } else {
181 rgn->region[i+1].base = base;
182 rgn->region[i+1].size = size;
183 break;
184 }
185 }
186
187 if (base < rgn->region[0].base) {
188 rgn->region[0].base = base;
189 rgn->region[0].size = size;
190 }
191 rgn->cnt++;
192
193 return 0;
194}
195
196long lmb_add(u64 base, u64 size)
197{
198 struct lmb_region *_rgn = &lmb.memory;
199
200 /* On pSeries LPAR systems, the first LMB is our RMO region. */
201 if (base == 0)
202 lmb.rmo_size = size;
203
204 return lmb_add_region(_rgn, base, size);
205
206}
207
208long lmb_remove(u64 base, u64 size)
209{
210 struct lmb_region *rgn = &(lmb.memory);
211 u64 rgnbegin, rgnend;
212 u64 end = base + size;
213 int i;
214
215 rgnbegin = rgnend = 0; /* supress gcc warnings */
216
217 /* Find the region where (base, size) belongs to */
218 for (i=0; i < rgn->cnt; i++) {
219 rgnbegin = rgn->region[i].base;
220 rgnend = rgnbegin + rgn->region[i].size;
221
222 if ((rgnbegin <= base) && (end <= rgnend))
223 break;
224 }
225
226 /* Didn't find the region */
227 if (i == rgn->cnt)
228 return -1;
229
230 /* Check to see if we are removing entire region */
231 if ((rgnbegin == base) && (rgnend == end)) {
232 lmb_remove_region(rgn, i);
233 return 0;
234 }
235
236 /* Check to see if region is matching at the front */
237 if (rgnbegin == base) {
238 rgn->region[i].base = end;
239 rgn->region[i].size -= size;
240 return 0;
241 }
242
243 /* Check to see if the region is matching at the end */
244 if (rgnend == end) {
245 rgn->region[i].size -= size;
246 return 0;
247 }
248
249 /*
250 * We need to split the entry - adjust the current one to the
251 * beginging of the hole and add the region after hole.
252 */
253 rgn->region[i].size = base - rgn->region[i].base;
254 return lmb_add_region(rgn, end, rgnend - end);
255}
256
257long __init lmb_reserve(u64 base, u64 size)
258{
259 struct lmb_region *_rgn = &lmb.reserved;
260
261 BUG_ON(0 == size);
262
263 return lmb_add_region(_rgn, base, size);
264}
265
266long __init lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
267{
268 unsigned long i;
269
270 for (i = 0; i < rgn->cnt; i++) {
271 u64 rgnbase = rgn->region[i].base;
272 u64 rgnsize = rgn->region[i].size;
273 if (lmb_addrs_overlap(base, size, rgnbase, rgnsize))
274 break;
275 }
276
277 return (i < rgn->cnt) ? i : -1;
278}
279
280static u64 lmb_align_down(u64 addr, u64 size)
281{
282 return addr & ~(size - 1);
283}
284
285static u64 lmb_align_up(u64 addr, u64 size)
286{
287 return (addr + (size - 1)) & ~(size - 1);
288}
289
290static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end,
291 u64 size, u64 align)
292{
293 u64 base, res_base;
294 long j;
295
296 base = lmb_align_down((end - size), align);
297 while (start <= base) {
298 j = lmb_overlaps_region(&lmb.reserved, base, size);
299 if (j < 0) {
300 /* this area isn't reserved, take it */
301 if (lmb_add_region(&lmb.reserved, base, size) < 0)
302 base = ~(u64)0;
303 return base;
304 }
305 res_base = lmb.reserved.region[j].base;
306 if (res_base < size)
307 break;
308 base = lmb_align_down(res_base - size, align);
309 }
310
311 return ~(u64)0;
312}
313
314static u64 __init lmb_alloc_nid_region(struct lmb_property *mp,
315 u64 (*nid_range)(u64, u64, int *),
316 u64 size, u64 align, int nid)
317{
318 u64 start, end;
319
320 start = mp->base;
321 end = start + mp->size;
322
323 start = lmb_align_up(start, align);
324 while (start < end) {
325 u64 this_end;
326 int this_nid;
327
328 this_end = nid_range(start, end, &this_nid);
329 if (this_nid == nid) {
330 u64 ret = lmb_alloc_nid_unreserved(start, this_end,
331 size, align);
332 if (ret != ~(u64)0)
333 return ret;
334 }
335 start = this_end;
336 }
337
338 return ~(u64)0;
339}
340
341u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
342 u64 (*nid_range)(u64 start, u64 end, int *nid))
343{
344 struct lmb_region *mem = &lmb.memory;
345 int i;
346
347 BUG_ON(0 == size);
348
349 size = lmb_align_up(size, align);
350
351 for (i = 0; i < mem->cnt; i++) {
352 u64 ret = lmb_alloc_nid_region(&mem->region[i],
353 nid_range,
354 size, align, nid);
355 if (ret != ~(u64)0)
356 return ret;
357 }
358
359 return lmb_alloc(size, align);
360}
361
362u64 __init lmb_alloc(u64 size, u64 align)
363{
364 return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
365}
366
367u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr)
368{
369 u64 alloc;
370
371 alloc = __lmb_alloc_base(size, align, max_addr);
372
373 if (alloc == 0)
374 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
375 (unsigned long long) size, (unsigned long long) max_addr);
376
377 return alloc;
378}
379
380u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
381{
382 long i, j;
383 u64 base = 0;
384 u64 res_base;
385
386 BUG_ON(0 == size);
387
388 size = lmb_align_up(size, align);
389
390 /* On some platforms, make sure we allocate lowmem */
391 /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */
392 if (max_addr == LMB_ALLOC_ANYWHERE)
393 max_addr = LMB_REAL_LIMIT;
394
395 for (i = lmb.memory.cnt - 1; i >= 0; i--) {
396 u64 lmbbase = lmb.memory.region[i].base;
397 u64 lmbsize = lmb.memory.region[i].size;
398
399 if (lmbsize < size)
400 continue;
401 if (max_addr == LMB_ALLOC_ANYWHERE)
402 base = lmb_align_down(lmbbase + lmbsize - size, align);
403 else if (lmbbase < max_addr) {
404 base = min(lmbbase + lmbsize, max_addr);
405 base = lmb_align_down(base - size, align);
406 } else
407 continue;
408
409 while (base && lmbbase <= base) {
410 j = lmb_overlaps_region(&lmb.reserved, base, size);
411 if (j < 0) {
412 /* this area isn't reserved, take it */
413 if (lmb_add_region(&lmb.reserved, base, size) < 0)
414 return 0;
415 return base;
416 }
417 res_base = lmb.reserved.region[j].base;
418 if (res_base < size)
419 break;
420 base = lmb_align_down(res_base - size, align);
421 }
422 }
423 return 0;
424}
425
426/* You must call lmb_analyze() before this. */
427u64 __init lmb_phys_mem_size(void)
428{
429 return lmb.memory.size;
430}
431
432u64 __init lmb_end_of_DRAM(void)
433{
434 int idx = lmb.memory.cnt - 1;
435
436 return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
437}
438
439/* You must call lmb_analyze() after this. */
440void __init lmb_enforce_memory_limit(u64 memory_limit)
441{
442 unsigned long i;
443 u64 limit;
444 struct lmb_property *p;
445
446 if (!memory_limit)
447 return;
448
449 /* Truncate the lmb regions to satisfy the memory limit. */
450 limit = memory_limit;
451 for (i = 0; i < lmb.memory.cnt; i++) {
452 if (limit > lmb.memory.region[i].size) {
453 limit -= lmb.memory.region[i].size;
454 continue;
455 }
456
457 lmb.memory.region[i].size = limit;
458 lmb.memory.cnt = i + 1;
459 break;
460 }
461
462 if (lmb.memory.region[0].size < lmb.rmo_size)
463 lmb.rmo_size = lmb.memory.region[0].size;
464
465 memory_limit = lmb_end_of_DRAM();
466
467 /* And truncate any reserves above the limit also. */
468 for (i = 0; i < lmb.reserved.cnt; i++) {
469 p = &lmb.reserved.region[i];
470
471 if (p->base > memory_limit)
472 p->size = 0;
473 else if ((p->base + p->size) > memory_limit)
474 p->size = memory_limit - p->base;
475
476 if (p->size == 0) {
477 lmb_remove_region(&lmb.reserved, i);
478 i--;
479 }
480 }
481}
482
483int __init lmb_is_reserved(u64 addr)
484{
485 int i;
486
487 for (i = 0; i < lmb.reserved.cnt; i++) {
488 u64 upper = lmb.reserved.region[i].base +
489 lmb.reserved.region[i].size - 1;
490 if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
491 return 1;
492 }
493 return 0;
494}
495
496/*
497 * Given a <base, len>, find which memory regions belong to this range.
498 * Adjust the request and return a contiguous chunk.
499 */
500int lmb_find(struct lmb_property *res)
501{
502 int i;
503 u64 rstart, rend;
504
505 rstart = res->base;
506 rend = rstart + res->size - 1;
507
508 for (i = 0; i < lmb.memory.cnt; i++) {
509 u64 start = lmb.memory.region[i].base;
510 u64 end = start + lmb.memory.region[i].size - 1;
511
512 if (start > rend)
513 return -1;
514
515 if ((end >= rstart) && (start < rend)) {
516 /* adjust the request */
517 if (rstart < start)
518 rstart = start;
519 if (rend > end)
520 rend = end;
521 res->base = rstart;
522 res->size = rend - rstart + 1;
523 return 0;
524 }
525 }
526 return -1;
527}
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
new file mode 100644
index 00000000000..270de9d31b8
--- /dev/null
+++ b/lib/lru_cache.c
@@ -0,0 +1,560 @@
1/*
2 lru_cache.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26#include <linux/module.h>
27#include <linux/bitops.h>
28#include <linux/slab.h>
29#include <linux/string.h> /* for memset */
30#include <linux/seq_file.h> /* for seq_printf */
31#include <linux/lru_cache.h>
32
33MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
34 "Lars Ellenberg <lars@linbit.com>");
35MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
36MODULE_LICENSE("GPL");
37
38/* this is developers aid only.
39 * it catches concurrent access (lack of locking on the users part) */
40#define PARANOIA_ENTRY() do { \
41 BUG_ON(!lc); \
42 BUG_ON(!lc->nr_elements); \
43 BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
44} while (0)
45
46#define RETURN(x...) do { \
47 clear_bit(__LC_PARANOIA, &lc->flags); \
48 smp_mb__after_clear_bit(); return x ; } while (0)
49
50/* BUG() if e is not one of the elements tracked by lc */
51#define PARANOIA_LC_ELEMENT(lc, e) do { \
52 struct lru_cache *lc_ = (lc); \
53 struct lc_element *e_ = (e); \
54 unsigned i = e_->lc_index; \
55 BUG_ON(i >= lc_->nr_elements); \
56 BUG_ON(lc_->lc_element[i] != e_); } while (0)
57
58/**
59 * lc_create - prepares to track objects in an active set
60 * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
61 * @e_count: number of elements allowed to be active simultaneously
62 * @e_size: size of the tracked objects
63 * @e_off: offset to the &struct lc_element member in a tracked object
64 *
65 * Returns a pointer to a newly initialized struct lru_cache on success,
66 * or NULL on (allocation) failure.
67 */
68struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
69 unsigned e_count, size_t e_size, size_t e_off)
70{
71 struct hlist_head *slot = NULL;
72 struct lc_element **element = NULL;
73 struct lru_cache *lc;
74 struct lc_element *e;
75 unsigned cache_obj_size = kmem_cache_size(cache);
76 unsigned i;
77
78 WARN_ON(cache_obj_size < e_size);
79 if (cache_obj_size < e_size)
80 return NULL;
81
82 /* e_count too big; would probably fail the allocation below anyways.
83 * for typical use cases, e_count should be few thousand at most. */
84 if (e_count > LC_MAX_ACTIVE)
85 return NULL;
86
87 slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL);
88 if (!slot)
89 goto out_fail;
90 element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
91 if (!element)
92 goto out_fail;
93
94 lc = kzalloc(sizeof(*lc), GFP_KERNEL);
95 if (!lc)
96 goto out_fail;
97
98 INIT_LIST_HEAD(&lc->in_use);
99 INIT_LIST_HEAD(&lc->lru);
100 INIT_LIST_HEAD(&lc->free);
101
102 lc->name = name;
103 lc->element_size = e_size;
104 lc->element_off = e_off;
105 lc->nr_elements = e_count;
106 lc->new_number = LC_FREE;
107 lc->lc_cache = cache;
108 lc->lc_element = element;
109 lc->lc_slot = slot;
110
111 /* preallocate all objects */
112 for (i = 0; i < e_count; i++) {
113 void *p = kmem_cache_alloc(cache, GFP_KERNEL);
114 if (!p)
115 break;
116 memset(p, 0, lc->element_size);
117 e = p + e_off;
118 e->lc_index = i;
119 e->lc_number = LC_FREE;
120 list_add(&e->list, &lc->free);
121 element[i] = e;
122 }
123 if (i == e_count)
124 return lc;
125
126 /* else: could not allocate all elements, give up */
127 for (i--; i; i--) {
128 void *p = element[i];
129 kmem_cache_free(cache, p - e_off);
130 }
131 kfree(lc);
132out_fail:
133 kfree(element);
134 kfree(slot);
135 return NULL;
136}
137
138void lc_free_by_index(struct lru_cache *lc, unsigned i)
139{
140 void *p = lc->lc_element[i];
141 WARN_ON(!p);
142 if (p) {
143 p -= lc->element_off;
144 kmem_cache_free(lc->lc_cache, p);
145 }
146}
147
148/**
149 * lc_destroy - frees memory allocated by lc_create()
150 * @lc: the lru cache to destroy
151 */
152void lc_destroy(struct lru_cache *lc)
153{
154 unsigned i;
155 if (!lc)
156 return;
157 for (i = 0; i < lc->nr_elements; i++)
158 lc_free_by_index(lc, i);
159 kfree(lc->lc_element);
160 kfree(lc->lc_slot);
161 kfree(lc);
162}
163
164/**
165 * lc_reset - does a full reset for @lc and the hash table slots.
166 * @lc: the lru cache to operate on
167 *
168 * It is roughly the equivalent of re-allocating a fresh lru_cache object,
169 * basically a short cut to lc_destroy(lc); lc = lc_create(...);
170 */
171void lc_reset(struct lru_cache *lc)
172{
173 unsigned i;
174
175 INIT_LIST_HEAD(&lc->in_use);
176 INIT_LIST_HEAD(&lc->lru);
177 INIT_LIST_HEAD(&lc->free);
178 lc->used = 0;
179 lc->hits = 0;
180 lc->misses = 0;
181 lc->starving = 0;
182 lc->dirty = 0;
183 lc->changed = 0;
184 lc->flags = 0;
185 lc->changing_element = NULL;
186 lc->new_number = LC_FREE;
187 memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
188
189 for (i = 0; i < lc->nr_elements; i++) {
190 struct lc_element *e = lc->lc_element[i];
191 void *p = e;
192 p -= lc->element_off;
193 memset(p, 0, lc->element_size);
194 /* re-init it */
195 e->lc_index = i;
196 e->lc_number = LC_FREE;
197 list_add(&e->list, &lc->free);
198 }
199}
200
201/**
202 * lc_seq_printf_stats - print stats about @lc into @seq
203 * @seq: the seq_file to print into
204 * @lc: the lru cache to print statistics of
205 */
206size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
207{
208 /* NOTE:
209 * total calls to lc_get are
210 * (starving + hits + misses)
211 * misses include "dirty" count (update from an other thread in
212 * progress) and "changed", when this in fact lead to an successful
213 * update of the cache.
214 */
215 return seq_printf(seq, "\t%s: used:%u/%u "
216 "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
217 lc->name, lc->used, lc->nr_elements,
218 lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
219}
220
221static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
222{
223 return lc->lc_slot + (enr % lc->nr_elements);
224}
225
226
227/**
228 * lc_find - find element by label, if present in the hash table
229 * @lc: The lru_cache object
230 * @enr: element number
231 *
232 * Returns the pointer to an element, if the element with the requested
233 * "label" or element number is present in the hash table,
234 * or NULL if not found. Does not change the refcnt.
235 */
236struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
237{
238 struct hlist_node *n;
239 struct lc_element *e;
240
241 BUG_ON(!lc);
242 BUG_ON(!lc->nr_elements);
243 hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
244 if (e->lc_number == enr)
245 return e;
246 }
247 return NULL;
248}
249
250/* returned element will be "recycled" immediately */
251static struct lc_element *lc_evict(struct lru_cache *lc)
252{
253 struct list_head *n;
254 struct lc_element *e;
255
256 if (list_empty(&lc->lru))
257 return NULL;
258
259 n = lc->lru.prev;
260 e = list_entry(n, struct lc_element, list);
261
262 PARANOIA_LC_ELEMENT(lc, e);
263
264 list_del(&e->list);
265 hlist_del(&e->colision);
266 return e;
267}
268
269/**
270 * lc_del - removes an element from the cache
271 * @lc: The lru_cache object
272 * @e: The element to remove
273 *
274 * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
275 * sets @e->enr to %LC_FREE.
276 */
277void lc_del(struct lru_cache *lc, struct lc_element *e)
278{
279 PARANOIA_ENTRY();
280 PARANOIA_LC_ELEMENT(lc, e);
281 BUG_ON(e->refcnt);
282
283 e->lc_number = LC_FREE;
284 hlist_del_init(&e->colision);
285 list_move(&e->list, &lc->free);
286 RETURN();
287}
288
289static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
290{
291 struct list_head *n;
292
293 if (list_empty(&lc->free))
294 return lc_evict(lc);
295
296 n = lc->free.next;
297 list_del(n);
298 return list_entry(n, struct lc_element, list);
299}
300
301static int lc_unused_element_available(struct lru_cache *lc)
302{
303 if (!list_empty(&lc->free))
304 return 1; /* something on the free list */
305 if (!list_empty(&lc->lru))
306 return 1; /* something to evict */
307
308 return 0;
309}
310
311
312/**
313 * lc_get - get element by label, maybe change the active set
314 * @lc: the lru cache to operate on
315 * @enr: the label to look up
316 *
317 * Finds an element in the cache, increases its usage count,
318 * "touches" and returns it.
319 *
320 * In case the requested number is not present, it needs to be added to the
321 * cache. Therefore it is possible that an other element becomes evicted from
322 * the cache. In either case, the user is notified so he is able to e.g. keep
323 * a persistent log of the cache changes, and therefore the objects in use.
324 *
325 * Return values:
326 * NULL
327 * The cache was marked %LC_STARVING,
328 * or the requested label was not in the active set
329 * and a changing transaction is still pending (@lc was marked %LC_DIRTY).
330 * Or no unused or free element could be recycled (@lc will be marked as
331 * %LC_STARVING, blocking further lc_get() operations).
332 *
333 * pointer to the element with the REQUESTED element number.
334 * In this case, it can be used right away
335 *
336 * pointer to an UNUSED element with some different element number,
337 * where that different number may also be %LC_FREE.
338 *
339 * In this case, the cache is marked %LC_DIRTY (blocking further changes),
340 * and the returned element pointer is removed from the lru list and
341 * hash collision chains. The user now should do whatever housekeeping
342 * is necessary.
343 * Then he must call lc_changed(lc,element_pointer), to finish
344 * the change.
345 *
346 * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
347 * any cache set change.
348 */
349struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
350{
351 struct lc_element *e;
352
353 PARANOIA_ENTRY();
354 if (lc->flags & LC_STARVING) {
355 ++lc->starving;
356 RETURN(NULL);
357 }
358
359 e = lc_find(lc, enr);
360 if (e) {
361 ++lc->hits;
362 if (e->refcnt++ == 0)
363 lc->used++;
364 list_move(&e->list, &lc->in_use); /* Not evictable... */
365 RETURN(e);
366 }
367
368 ++lc->misses;
369
370 /* In case there is nothing available and we can not kick out
371 * the LRU element, we have to wait ...
372 */
373 if (!lc_unused_element_available(lc)) {
374 __set_bit(__LC_STARVING, &lc->flags);
375 RETURN(NULL);
376 }
377
378 /* it was not present in the active set.
379 * we are going to recycle an unused (or even "free") element.
380 * user may need to commit a transaction to record that change.
381 * we serialize on flags & TF_DIRTY */
382 if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
383 ++lc->dirty;
384 RETURN(NULL);
385 }
386
387 e = lc_get_unused_element(lc);
388 BUG_ON(!e);
389
390 clear_bit(__LC_STARVING, &lc->flags);
391 BUG_ON(++e->refcnt != 1);
392 lc->used++;
393
394 lc->changing_element = e;
395 lc->new_number = enr;
396
397 RETURN(e);
398}
399
400/* similar to lc_get,
401 * but only gets a new reference on an existing element.
402 * you either get the requested element, or NULL.
403 * will be consolidated into one function.
404 */
405struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
406{
407 struct lc_element *e;
408
409 PARANOIA_ENTRY();
410 if (lc->flags & LC_STARVING) {
411 ++lc->starving;
412 RETURN(NULL);
413 }
414
415 e = lc_find(lc, enr);
416 if (e) {
417 ++lc->hits;
418 if (e->refcnt++ == 0)
419 lc->used++;
420 list_move(&e->list, &lc->in_use); /* Not evictable... */
421 }
422 RETURN(e);
423}
424
425/**
426 * lc_changed - tell @lc that the change has been recorded
427 * @lc: the lru cache to operate on
428 * @e: the element pending label change
429 */
430void lc_changed(struct lru_cache *lc, struct lc_element *e)
431{
432 PARANOIA_ENTRY();
433 BUG_ON(e != lc->changing_element);
434 PARANOIA_LC_ELEMENT(lc, e);
435 ++lc->changed;
436 e->lc_number = lc->new_number;
437 list_add(&e->list, &lc->in_use);
438 hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
439 lc->changing_element = NULL;
440 lc->new_number = LC_FREE;
441 clear_bit(__LC_DIRTY, &lc->flags);
442 smp_mb__after_clear_bit();
443 RETURN();
444}
445
446
447/**
448 * lc_put - give up refcnt of @e
449 * @lc: the lru cache to operate on
450 * @e: the element to put
451 *
452 * If refcnt reaches zero, the element is moved to the lru list,
453 * and a %LC_STARVING (if set) is cleared.
454 * Returns the new (post-decrement) refcnt.
455 */
456unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
457{
458 PARANOIA_ENTRY();
459 PARANOIA_LC_ELEMENT(lc, e);
460 BUG_ON(e->refcnt == 0);
461 BUG_ON(e == lc->changing_element);
462 if (--e->refcnt == 0) {
463 /* move it to the front of LRU. */
464 list_move(&e->list, &lc->lru);
465 lc->used--;
466 clear_bit(__LC_STARVING, &lc->flags);
467 smp_mb__after_clear_bit();
468 }
469 RETURN(e->refcnt);
470}
471
472/**
473 * lc_element_by_index
474 * @lc: the lru cache to operate on
475 * @i: the index of the element to return
476 */
477struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
478{
479 BUG_ON(i >= lc->nr_elements);
480 BUG_ON(lc->lc_element[i] == NULL);
481 BUG_ON(lc->lc_element[i]->lc_index != i);
482 return lc->lc_element[i];
483}
484
485/**
486 * lc_index_of
487 * @lc: the lru cache to operate on
488 * @e: the element to query for its index position in lc->element
489 */
490unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
491{
492 PARANOIA_LC_ELEMENT(lc, e);
493 return e->lc_index;
494}
495
496/**
497 * lc_set - associate index with label
498 * @lc: the lru cache to operate on
499 * @enr: the label to set
500 * @index: the element index to associate label with.
501 *
502 * Used to initialize the active set to some previously recorded state.
503 */
504void lc_set(struct lru_cache *lc, unsigned int enr, int index)
505{
506 struct lc_element *e;
507
508 if (index < 0 || index >= lc->nr_elements)
509 return;
510
511 e = lc_element_by_index(lc, index);
512 e->lc_number = enr;
513
514 hlist_del_init(&e->colision);
515 hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
516 list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
517}
518
519/**
520 * lc_dump - Dump a complete LRU cache to seq in textual form.
521 * @lc: the lru cache to operate on
522 * @seq: the &struct seq_file pointer to seq_printf into
523 * @utext: user supplied "heading" or other info
524 * @detail: function pointer the user may provide to dump further details
525 * of the object the lc_element is embedded in.
526 */
527void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
528 void (*detail) (struct seq_file *, struct lc_element *))
529{
530 unsigned int nr_elements = lc->nr_elements;
531 struct lc_element *e;
532 int i;
533
534 seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
535 for (i = 0; i < nr_elements; i++) {
536 e = lc_element_by_index(lc, i);
537 if (e->lc_number == LC_FREE) {
538 seq_printf(seq, "\t%2d: FREE\n", i);
539 } else {
540 seq_printf(seq, "\t%2d: %4u %4u ", i,
541 e->lc_number, e->refcnt);
542 detail(seq, e);
543 }
544 }
545}
546
547EXPORT_SYMBOL(lc_create);
548EXPORT_SYMBOL(lc_reset);
549EXPORT_SYMBOL(lc_destroy);
550EXPORT_SYMBOL(lc_set);
551EXPORT_SYMBOL(lc_del);
552EXPORT_SYMBOL(lc_try_get);
553EXPORT_SYMBOL(lc_find);
554EXPORT_SYMBOL(lc_get);
555EXPORT_SYMBOL(lc_put);
556EXPORT_SYMBOL(lc_changed);
557EXPORT_SYMBOL(lc_element_by_index);
558EXPORT_SYMBOL(lc_index_of);
559EXPORT_SYMBOL(lc_seq_printf_stats);
560EXPORT_SYMBOL(lc_seq_dump_details);
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 5dc6b29c157..f2fd0985022 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -11,11 +11,13 @@
11 * Richard Purdie <rpurdie@openedhand.com> 11 * Richard Purdie <rpurdie@openedhand.com>
12 */ 12 */
13 13
14#ifndef STATIC
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/kernel.h> 16#include <linux/kernel.h>
16#include <linux/lzo.h> 17#endif
17#include <asm/byteorder.h> 18
18#include <asm/unaligned.h> 19#include <asm/unaligned.h>
20#include <linux/lzo.h>
19#include "lzodefs.h" 21#include "lzodefs.h"
20 22
21#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) 23#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
@@ -244,9 +246,10 @@ lookbehind_overrun:
244 *out_len = op - out; 246 *out_len = op - out;
245 return LZO_E_LOOKBEHIND_OVERRUN; 247 return LZO_E_LOOKBEHIND_OVERRUN;
246} 248}
247 249#ifndef STATIC
248EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); 250EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
249 251
250MODULE_LICENSE("GPL"); 252MODULE_LICENSE("GPL");
251MODULE_DESCRIPTION("LZO1X Decompressor"); 253MODULE_DESCRIPTION("LZO1X Decompressor");
252 254
255#endif
diff --git a/lib/nlattr.c b/lib/nlattr.c
index c4706eb98d3..00e8a02681a 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -15,7 +15,7 @@
15#include <linux/types.h> 15#include <linux/types.h>
16#include <net/netlink.h> 16#include <net/netlink.h>
17 17
18static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { 18static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = {
19 [NLA_U8] = sizeof(u8), 19 [NLA_U8] = sizeof(u8),
20 [NLA_U16] = sizeof(u16), 20 [NLA_U16] = sizeof(u16),
21 [NLA_U32] = sizeof(u32), 21 [NLA_U32] = sizeof(u32),
@@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
23 [NLA_NESTED] = NLA_HDRLEN, 23 [NLA_NESTED] = NLA_HDRLEN,
24}; 24};
25 25
26static int validate_nla(struct nlattr *nla, int maxtype, 26static int validate_nla(const struct nlattr *nla, int maxtype,
27 const struct nla_policy *policy) 27 const struct nla_policy *policy)
28{ 28{
29 const struct nla_policy *pt; 29 const struct nla_policy *pt;
@@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype,
115 * 115 *
116 * Returns 0 on success or a negative error code. 116 * Returns 0 on success or a negative error code.
117 */ 117 */
118int nla_validate(struct nlattr *head, int len, int maxtype, 118int nla_validate(const struct nlattr *head, int len, int maxtype,
119 const struct nla_policy *policy) 119 const struct nla_policy *policy)
120{ 120{
121 struct nlattr *nla; 121 const struct nlattr *nla;
122 int rem, err; 122 int rem, err;
123 123
124 nla_for_each_attr(nla, head, len, rem) { 124 nla_for_each_attr(nla, head, len, rem) {
@@ -173,10 +173,10 @@ nla_policy_len(const struct nla_policy *p, int n)
173 * 173 *
174 * Returns 0 on success or a negative error code. 174 * Returns 0 on success or a negative error code.
175 */ 175 */
176int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, 176int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
177 const struct nla_policy *policy) 177 int len, const struct nla_policy *policy)
178{ 178{
179 struct nlattr *nla; 179 const struct nlattr *nla;
180 int rem, err; 180 int rem, err;
181 181
182 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 182 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
@@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
191 goto errout; 191 goto errout;
192 } 192 }
193 193
194 tb[type] = nla; 194 tb[type] = (struct nlattr *)nla;
195 } 195 }
196 } 196 }
197 197
@@ -212,14 +212,14 @@ errout:
212 * 212 *
213 * Returns the first attribute in the stream matching the specified type. 213 * Returns the first attribute in the stream matching the specified type.
214 */ 214 */
215struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) 215struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
216{ 216{
217 struct nlattr *nla; 217 const struct nlattr *nla;
218 int rem; 218 int rem;
219 219
220 nla_for_each_attr(nla, head, len, rem) 220 nla_for_each_attr(nla, head, len, rem)
221 if (nla_type(nla) == attrtype) 221 if (nla_type(nla) == attrtype)
222 return nla; 222 return (struct nlattr *)nla;
223 223
224 return NULL; 224 return NULL;
225} 225}
diff --git a/lib/parser.c b/lib/parser.c
index b00d02059a5..6e89eca5cca 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -56,13 +56,16 @@ static int match_one(char *s, const char *p, substring_t args[])
56 56
57 args[argc].from = s; 57 args[argc].from = s;
58 switch (*p++) { 58 switch (*p++) {
59 case 's': 59 case 's': {
60 if (strlen(s) == 0) 60 size_t str_len = strlen(s);
61
62 if (str_len == 0)
61 return 0; 63 return 0;
62 else if (len == -1 || len > strlen(s)) 64 if (len == -1 || len > str_len)
63 len = strlen(s); 65 len = str_len;
64 args[argc].to = s + len; 66 args[argc].to = s + len;
65 break; 67 break;
68 }
66 case 'd': 69 case 'd':
67 simple_strtol(s, &args[argc].to, 0); 70 simple_strtol(s, &args[argc].to, 0);
68 goto num; 71 goto num;
@@ -125,12 +128,13 @@ static int match_number(substring_t *s, int *result, int base)
125 char *endp; 128 char *endp;
126 char *buf; 129 char *buf;
127 int ret; 130 int ret;
131 size_t len = s->to - s->from;
128 132
129 buf = kmalloc(s->to - s->from + 1, GFP_KERNEL); 133 buf = kmalloc(len + 1, GFP_KERNEL);
130 if (!buf) 134 if (!buf)
131 return -ENOMEM; 135 return -ENOMEM;
132 memcpy(buf, s->from, s->to - s->from); 136 memcpy(buf, s->from, len);
133 buf[s->to - s->from] = '\0'; 137 buf[len] = '\0';
134 *result = simple_strtol(buf, &endp, base); 138 *result = simple_strtol(buf, &endp, base);
135 ret = 0; 139 ret = 0;
136 if (endp == buf) 140 if (endp == buf)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index aeaa6d73444..28f2c33c6b5 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -8,10 +8,53 @@
8#include <linux/init.h> 8#include <linux/init.h>
9#include <linux/cpu.h> 9#include <linux/cpu.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/debugobjects.h>
11 12
12static LIST_HEAD(percpu_counters); 13static LIST_HEAD(percpu_counters);
13static DEFINE_MUTEX(percpu_counters_lock); 14static DEFINE_MUTEX(percpu_counters_lock);
14 15
16#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
17
18static struct debug_obj_descr percpu_counter_debug_descr;
19
20static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
21{
22 struct percpu_counter *fbc = addr;
23
24 switch (state) {
25 case ODEBUG_STATE_ACTIVE:
26 percpu_counter_destroy(fbc);
27 debug_object_free(fbc, &percpu_counter_debug_descr);
28 return 1;
29 default:
30 return 0;
31 }
32}
33
34static struct debug_obj_descr percpu_counter_debug_descr = {
35 .name = "percpu_counter",
36 .fixup_free = percpu_counter_fixup_free,
37};
38
39static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
40{
41 debug_object_init(fbc, &percpu_counter_debug_descr);
42 debug_object_activate(fbc, &percpu_counter_debug_descr);
43}
44
45static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
46{
47 debug_object_deactivate(fbc, &percpu_counter_debug_descr);
48 debug_object_free(fbc, &percpu_counter_debug_descr);
49}
50
51#else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
52static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
53{ }
54static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
55{ }
56#endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
57
15void percpu_counter_set(struct percpu_counter *fbc, s64 amount) 58void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
16{ 59{
17 int cpu; 60 int cpu;
@@ -29,20 +72,18 @@ EXPORT_SYMBOL(percpu_counter_set);
29void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) 72void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
30{ 73{
31 s64 count; 74 s64 count;
32 s32 *pcount;
33 int cpu = get_cpu();
34 75
35 pcount = per_cpu_ptr(fbc->counters, cpu); 76 preempt_disable();
36 count = *pcount + amount; 77 count = __this_cpu_read(*fbc->counters) + amount;
37 if (count >= batch || count <= -batch) { 78 if (count >= batch || count <= -batch) {
38 spin_lock(&fbc->lock); 79 spin_lock(&fbc->lock);
39 fbc->count += count; 80 fbc->count += count;
40 *pcount = 0; 81 __this_cpu_write(*fbc->counters, 0);
41 spin_unlock(&fbc->lock); 82 spin_unlock(&fbc->lock);
42 } else { 83 } else {
43 *pcount = count; 84 __this_cpu_write(*fbc->counters, count);
44 } 85 }
45 put_cpu(); 86 preempt_enable();
46} 87}
47EXPORT_SYMBOL(__percpu_counter_add); 88EXPORT_SYMBOL(__percpu_counter_add);
48 89
@@ -75,7 +116,11 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
75 fbc->counters = alloc_percpu(s32); 116 fbc->counters = alloc_percpu(s32);
76 if (!fbc->counters) 117 if (!fbc->counters)
77 return -ENOMEM; 118 return -ENOMEM;
119
120 debug_percpu_counter_activate(fbc);
121
78#ifdef CONFIG_HOTPLUG_CPU 122#ifdef CONFIG_HOTPLUG_CPU
123 INIT_LIST_HEAD(&fbc->list);
79 mutex_lock(&percpu_counters_lock); 124 mutex_lock(&percpu_counters_lock);
80 list_add(&fbc->list, &percpu_counters); 125 list_add(&fbc->list, &percpu_counters);
81 mutex_unlock(&percpu_counters_lock); 126 mutex_unlock(&percpu_counters_lock);
@@ -89,6 +134,8 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
89 if (!fbc->counters) 134 if (!fbc->counters)
90 return; 135 return;
91 136
137 debug_percpu_counter_deactivate(fbc);
138
92#ifdef CONFIG_HOTPLUG_CPU 139#ifdef CONFIG_HOTPLUG_CPU
93 mutex_lock(&percpu_counters_lock); 140 mutex_lock(&percpu_counters_lock);
94 list_del(&fbc->list); 141 list_del(&fbc->list);
@@ -137,6 +184,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
137 return NOTIFY_OK; 184 return NOTIFY_OK;
138} 185}
139 186
187/*
188 * Compare counter against given value.
189 * Return 1 if greater, 0 if equal and -1 if less
190 */
191int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
192{
193 s64 count;
194
195 count = percpu_counter_read(fbc);
196 /* Check to see if rough count will be sufficient for comparison */
197 if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
198 if (count > rhs)
199 return 1;
200 else
201 return -1;
202 }
203 /* Need to use precise count */
204 count = percpu_counter_sum(fbc);
205 if (count > rhs)
206 return 1;
207 else if (count < rhs)
208 return -1;
209 else
210 return 0;
211}
212EXPORT_SYMBOL(percpu_counter_compare);
213
140static int __init percpu_counter_startup(void) 214static int __init percpu_counter_startup(void)
141{ 215{
142 compute_batch_value(); 216 compute_batch_value();
diff --git a/lib/plist.c b/lib/plist.c
index d6c64a824e1..1471988d919 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -54,9 +54,11 @@ static void plist_check_list(struct list_head *top)
54 54
55static void plist_check_head(struct plist_head *head) 55static void plist_check_head(struct plist_head *head)
56{ 56{
57 WARN_ON(!head->lock); 57 WARN_ON(!head->rawlock && !head->spinlock);
58 if (head->lock) 58 if (head->rawlock)
59 WARN_ON_SMP(!spin_is_locked(head->lock)); 59 WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
60 if (head->spinlock)
61 WARN_ON_SMP(!spin_is_locked(head->spinlock));
60 plist_check_list(&head->prio_list); 62 plist_check_list(&head->prio_list);
61 plist_check_list(&head->node_list); 63 plist_check_list(&head->node_list);
62} 64}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 23abbd93cae..5086bb962b4 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -28,7 +28,6 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/notifier.h> 29#include <linux/notifier.h>
30#include <linux/cpu.h> 30#include <linux/cpu.h>
31#include <linux/gfp.h>
32#include <linux/string.h> 31#include <linux/string.h>
33#include <linux/bitops.h> 32#include <linux/bitops.h>
34#include <linux/rcupdate.h> 33#include <linux/rcupdate.h>
@@ -50,7 +49,7 @@ struct radix_tree_node {
50 unsigned int height; /* Height from the bottom */ 49 unsigned int height; /* Height from the bottom */
51 unsigned int count; 50 unsigned int count;
52 struct rcu_head rcu_head; 51 struct rcu_head rcu_head;
53 void *slots[RADIX_TREE_MAP_SIZE]; 52 void __rcu *slots[RADIX_TREE_MAP_SIZE];
54 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
55}; 54};
56 55
@@ -83,6 +82,16 @@ struct radix_tree_preload {
83}; 82};
84static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; 83static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
85 84
85static inline void *ptr_to_indirect(void *ptr)
86{
87 return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
88}
89
90static inline void *indirect_to_ptr(void *ptr)
91{
92 return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
93}
94
86static inline gfp_t root_gfp_mask(struct radix_tree_root *root) 95static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
87{ 96{
88 return root->gfp_mask & __GFP_BITS_MASK; 97 return root->gfp_mask & __GFP_BITS_MASK;
@@ -175,14 +184,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
175{ 184{
176 struct radix_tree_node *node = 185 struct radix_tree_node *node =
177 container_of(head, struct radix_tree_node, rcu_head); 186 container_of(head, struct radix_tree_node, rcu_head);
187 int i;
178 188
179 /* 189 /*
180 * must only free zeroed nodes into the slab. radix_tree_shrink 190 * must only free zeroed nodes into the slab. radix_tree_shrink
181 * can leave us with a non-NULL entry in the first slot, so clear 191 * can leave us with a non-NULL entry in the first slot, so clear
182 * that here to make sure. 192 * that here to make sure.
183 */ 193 */
184 tag_clear(node, 0, 0); 194 for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
185 tag_clear(node, 1, 0); 195 tag_clear(node, i, 0);
196
186 node->slots[0] = NULL; 197 node->slots[0] = NULL;
187 node->count = 0; 198 node->count = 0;
188 199
@@ -200,6 +211,9 @@ radix_tree_node_free(struct radix_tree_node *node)
200 * ensure that the addition of a single element in the tree cannot fail. On 211 * ensure that the addition of a single element in the tree cannot fail. On
201 * success, return zero, with preemption disabled. On error, return -ENOMEM 212 * success, return zero, with preemption disabled. On error, return -ENOMEM
202 * with preemption not disabled. 213 * with preemption not disabled.
214 *
215 * To make use of this facility, the radix tree must be initialised without
216 * __GFP_WAIT being passed to INIT_RADIX_TREE().
203 */ 217 */
204int radix_tree_preload(gfp_t gfp_mask) 218int radix_tree_preload(gfp_t gfp_mask)
205{ 219{
@@ -261,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
261 return -ENOMEM; 275 return -ENOMEM;
262 276
263 /* Increase the height. */ 277 /* Increase the height. */
264 node->slots[0] = radix_tree_indirect_to_ptr(root->rnode); 278 node->slots[0] = indirect_to_ptr(root->rnode);
265 279
266 /* Propagate the aggregated tag info into the new root */ 280 /* Propagate the aggregated tag info into the new root */
267 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 281 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -272,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
272 newheight = root->height+1; 286 newheight = root->height+1;
273 node->height = newheight; 287 node->height = newheight;
274 node->count = 1; 288 node->count = 1;
275 node = radix_tree_ptr_to_indirect(node); 289 node = ptr_to_indirect(node);
276 rcu_assign_pointer(root->rnode, node); 290 rcu_assign_pointer(root->rnode, node);
277 root->height = newheight; 291 root->height = newheight;
278 } while (height > root->height); 292 } while (height > root->height);
@@ -305,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root,
305 return error; 319 return error;
306 } 320 }
307 321
308 slot = radix_tree_indirect_to_ptr(root->rnode); 322 slot = indirect_to_ptr(root->rnode);
309 323
310 height = root->height; 324 height = root->height;
311 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 325 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
@@ -321,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
321 rcu_assign_pointer(node->slots[offset], slot); 335 rcu_assign_pointer(node->slots[offset], slot);
322 node->count++; 336 node->count++;
323 } else 337 } else
324 rcu_assign_pointer(root->rnode, 338 rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
325 radix_tree_ptr_to_indirect(slot));
326 } 339 }
327 340
328 /* Go a level down */ 341 /* Go a level down */
@@ -361,7 +374,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
361 unsigned int height, shift; 374 unsigned int height, shift;
362 struct radix_tree_node *node, **slot; 375 struct radix_tree_node *node, **slot;
363 376
364 node = rcu_dereference(root->rnode); 377 node = rcu_dereference_raw(root->rnode);
365 if (node == NULL) 378 if (node == NULL)
366 return NULL; 379 return NULL;
367 380
@@ -370,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
370 return NULL; 383 return NULL;
371 return is_slot ? (void *)&root->rnode : node; 384 return is_slot ? (void *)&root->rnode : node;
372 } 385 }
373 node = radix_tree_indirect_to_ptr(node); 386 node = indirect_to_ptr(node);
374 387
375 height = node->height; 388 height = node->height;
376 if (index > radix_tree_maxindex(height)) 389 if (index > radix_tree_maxindex(height))
@@ -381,7 +394,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
381 do { 394 do {
382 slot = (struct radix_tree_node **) 395 slot = (struct radix_tree_node **)
383 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); 396 (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
384 node = rcu_dereference(*slot); 397 node = rcu_dereference_raw(*slot);
385 if (node == NULL) 398 if (node == NULL)
386 return NULL; 399 return NULL;
387 400
@@ -389,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
389 height--; 402 height--;
390 } while (height > 0); 403 } while (height > 0);
391 404
392 return is_slot ? (void *)slot:node; 405 return is_slot ? (void *)slot : indirect_to_ptr(node);
393} 406}
394 407
395/** 408/**
@@ -451,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
451 height = root->height; 464 height = root->height;
452 BUG_ON(index > radix_tree_maxindex(height)); 465 BUG_ON(index > radix_tree_maxindex(height));
453 466
454 slot = radix_tree_indirect_to_ptr(root->rnode); 467 slot = indirect_to_ptr(root->rnode);
455 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 468 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
456 469
457 while (height > 0) { 470 while (height > 0) {
@@ -505,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
505 518
506 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 519 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
507 pathp->node = NULL; 520 pathp->node = NULL;
508 slot = radix_tree_indirect_to_ptr(root->rnode); 521 slot = indirect_to_ptr(root->rnode);
509 522
510 while (height > 0) { 523 while (height > 0) {
511 int offset; 524 int offset;
@@ -543,7 +556,6 @@ out:
543} 556}
544EXPORT_SYMBOL(radix_tree_tag_clear); 557EXPORT_SYMBOL(radix_tree_tag_clear);
545 558
546#ifndef __KERNEL__ /* Only the test harness uses this at present */
547/** 559/**
548 * radix_tree_tag_get - get a tag on a radix tree node 560 * radix_tree_tag_get - get a tag on a radix tree node
549 * @root: radix tree root 561 * @root: radix tree root
@@ -554,6 +566,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
554 * 566 *
555 * 0: tag not present or not set 567 * 0: tag not present or not set
556 * 1: tag set 568 * 1: tag set
569 *
570 * Note that the return value of this function may not be relied on, even if
571 * the RCU lock is held, unless tag modification and node deletion are excluded
572 * from concurrency.
557 */ 573 */
558int radix_tree_tag_get(struct radix_tree_root *root, 574int radix_tree_tag_get(struct radix_tree_root *root,
559 unsigned long index, unsigned int tag) 575 unsigned long index, unsigned int tag)
@@ -566,13 +582,13 @@ int radix_tree_tag_get(struct radix_tree_root *root,
566 if (!root_tag_get(root, tag)) 582 if (!root_tag_get(root, tag))
567 return 0; 583 return 0;
568 584
569 node = rcu_dereference(root->rnode); 585 node = rcu_dereference_raw(root->rnode);
570 if (node == NULL) 586 if (node == NULL)
571 return 0; 587 return 0;
572 588
573 if (!radix_tree_is_indirect_ptr(node)) 589 if (!radix_tree_is_indirect_ptr(node))
574 return (index == 0); 590 return (index == 0);
575 node = radix_tree_indirect_to_ptr(node); 591 node = indirect_to_ptr(node);
576 592
577 height = node->height; 593 height = node->height;
578 if (index > radix_tree_maxindex(height)) 594 if (index > radix_tree_maxindex(height))
@@ -594,19 +610,142 @@ int radix_tree_tag_get(struct radix_tree_root *root,
594 */ 610 */
595 if (!tag_get(node, tag, offset)) 611 if (!tag_get(node, tag, offset))
596 saw_unset_tag = 1; 612 saw_unset_tag = 1;
597 if (height == 1) { 613 if (height == 1)
598 int ret = tag_get(node, tag, offset); 614 return !!tag_get(node, tag, offset);
599 615 node = rcu_dereference_raw(node->slots[offset]);
600 BUG_ON(ret && saw_unset_tag);
601 return !!ret;
602 }
603 node = rcu_dereference(node->slots[offset]);
604 shift -= RADIX_TREE_MAP_SHIFT; 616 shift -= RADIX_TREE_MAP_SHIFT;
605 height--; 617 height--;
606 } 618 }
607} 619}
608EXPORT_SYMBOL(radix_tree_tag_get); 620EXPORT_SYMBOL(radix_tree_tag_get);
609#endif 621
622/**
623 * radix_tree_range_tag_if_tagged - for each item in given range set given
624 * tag if item has another tag set
625 * @root: radix tree root
626 * @first_indexp: pointer to a starting index of a range to scan
627 * @last_index: last index of a range to scan
628 * @nr_to_tag: maximum number items to tag
629 * @iftag: tag index to test
630 * @settag: tag index to set if tested tag is set
631 *
632 * This function scans range of radix tree from first_index to last_index
633 * (inclusive). For each item in the range if iftag is set, the function sets
634 * also settag. The function stops either after tagging nr_to_tag items or
635 * after reaching last_index.
636 *
637 * The tags must be set from the leaf level only and propagated back up the
638 * path to the root. We must do this so that we resolve the full path before
639 * setting any tags on intermediate nodes. If we set tags as we descend, then
640 * we can get to the leaf node and find that the index that has the iftag
641 * set is outside the range we are scanning. This reults in dangling tags and
642 * can lead to problems with later tag operations (e.g. livelocks on lookups).
643 *
644 * The function returns number of leaves where the tag was set and sets
645 * *first_indexp to the first unscanned index.
646 * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
647 * be prepared to handle that.
648 */
649unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
650 unsigned long *first_indexp, unsigned long last_index,
651 unsigned long nr_to_tag,
652 unsigned int iftag, unsigned int settag)
653{
654 unsigned int height = root->height;
655 struct radix_tree_path path[height];
656 struct radix_tree_path *pathp = path;
657 struct radix_tree_node *slot;
658 unsigned int shift;
659 unsigned long tagged = 0;
660 unsigned long index = *first_indexp;
661
662 last_index = min(last_index, radix_tree_maxindex(height));
663 if (index > last_index)
664 return 0;
665 if (!nr_to_tag)
666 return 0;
667 if (!root_tag_get(root, iftag)) {
668 *first_indexp = last_index + 1;
669 return 0;
670 }
671 if (height == 0) {
672 *first_indexp = last_index + 1;
673 root_tag_set(root, settag);
674 return 1;
675 }
676
677 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
678 slot = indirect_to_ptr(root->rnode);
679
680 /*
681 * we fill the path from (root->height - 2) to 0, leaving the index at
682 * (root->height - 1) as a terminator. Zero the node in the terminator
683 * so that we can use this to end walk loops back up the path.
684 */
685 path[height - 1].node = NULL;
686
687 for (;;) {
688 int offset;
689
690 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
691 if (!slot->slots[offset])
692 goto next;
693 if (!tag_get(slot, iftag, offset))
694 goto next;
695 if (height > 1) {
696 /* Go down one level */
697 height--;
698 shift -= RADIX_TREE_MAP_SHIFT;
699 path[height - 1].node = slot;
700 path[height - 1].offset = offset;
701 slot = slot->slots[offset];
702 continue;
703 }
704
705 /* tag the leaf */
706 tagged++;
707 tag_set(slot, settag, offset);
708
709 /* walk back up the path tagging interior nodes */
710 pathp = &path[0];
711 while (pathp->node) {
712 /* stop if we find a node with the tag already set */
713 if (tag_get(pathp->node, settag, pathp->offset))
714 break;
715 tag_set(pathp->node, settag, pathp->offset);
716 pathp++;
717 }
718
719next:
720 /* Go to next item at level determined by 'shift' */
721 index = ((index >> shift) + 1) << shift;
722 /* Overflow can happen when last_index is ~0UL... */
723 if (index > last_index || !index)
724 break;
725 if (tagged >= nr_to_tag)
726 break;
727 while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
728 /*
729 * We've fully scanned this node. Go up. Because
730 * last_index is guaranteed to be in the tree, what
731 * we do below cannot wander astray.
732 */
733 slot = path[height - 1].node;
734 height++;
735 shift += RADIX_TREE_MAP_SHIFT;
736 }
737 }
738 /*
739 * The iftag must have been set somewhere because otherwise
740 * we would return immediated at the beginning of the function
741 */
742 root_tag_set(root, settag);
743 *first_indexp = index;
744
745 return tagged;
746}
747EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
748
610 749
611/** 750/**
612 * radix_tree_next_hole - find the next hole (not-present entry) 751 * radix_tree_next_hole - find the next hole (not-present entry)
@@ -656,7 +795,7 @@ EXPORT_SYMBOL(radix_tree_next_hole);
656 * 795 *
657 * Returns: the index of the hole if found, otherwise returns an index 796 * Returns: the index of the hole if found, otherwise returns an index
658 * outside of the set specified (in which case 'index - return >= max_scan' 797 * outside of the set specified (in which case 'index - return >= max_scan'
659 * will be true). In rare cases of wrap-around, LONG_MAX will be returned. 798 * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
660 * 799 *
661 * radix_tree_next_hole may be called under rcu_read_lock. However, like 800 * radix_tree_next_hole may be called under rcu_read_lock. However, like
662 * radix_tree_gang_lookup, this will not atomically search a snapshot of 801 * radix_tree_gang_lookup, this will not atomically search a snapshot of
@@ -674,7 +813,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
674 if (!radix_tree_lookup(root, index)) 813 if (!radix_tree_lookup(root, index))
675 break; 814 break;
676 index--; 815 index--;
677 if (index == LONG_MAX) 816 if (index == ULONG_MAX)
678 break; 817 break;
679 } 818 }
680 819
@@ -710,7 +849,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
710 } 849 }
711 850
712 shift -= RADIX_TREE_MAP_SHIFT; 851 shift -= RADIX_TREE_MAP_SHIFT;
713 slot = rcu_dereference(slot->slots[i]); 852 slot = rcu_dereference_raw(slot->slots[i]);
714 if (slot == NULL) 853 if (slot == NULL)
715 goto out; 854 goto out;
716 } 855 }
@@ -757,7 +896,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
757 unsigned long cur_index = first_index; 896 unsigned long cur_index = first_index;
758 unsigned int ret; 897 unsigned int ret;
759 898
760 node = rcu_dereference(root->rnode); 899 node = rcu_dereference_raw(root->rnode);
761 if (!node) 900 if (!node)
762 return 0; 901 return 0;
763 902
@@ -767,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
767 results[0] = node; 906 results[0] = node;
768 return 1; 907 return 1;
769 } 908 }
770 node = radix_tree_indirect_to_ptr(node); 909 node = indirect_to_ptr(node);
771 910
772 max_index = radix_tree_maxindex(node->height); 911 max_index = radix_tree_maxindex(node->height);
773 912
@@ -786,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
786 slot = *(((void ***)results)[ret + i]); 925 slot = *(((void ***)results)[ret + i]);
787 if (!slot) 926 if (!slot)
788 continue; 927 continue;
789 results[ret + nr_found] = rcu_dereference(slot); 928 results[ret + nr_found] =
929 indirect_to_ptr(rcu_dereference_raw(slot));
790 nr_found++; 930 nr_found++;
791 } 931 }
792 ret += nr_found; 932 ret += nr_found;
@@ -825,7 +965,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
825 unsigned long cur_index = first_index; 965 unsigned long cur_index = first_index;
826 unsigned int ret; 966 unsigned int ret;
827 967
828 node = rcu_dereference(root->rnode); 968 node = rcu_dereference_raw(root->rnode);
829 if (!node) 969 if (!node)
830 return 0; 970 return 0;
831 971
@@ -835,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
835 results[0] = (void **)&root->rnode; 975 results[0] = (void **)&root->rnode;
836 return 1; 976 return 1;
837 } 977 }
838 node = radix_tree_indirect_to_ptr(node); 978 node = indirect_to_ptr(node);
839 979
840 max_index = radix_tree_maxindex(node->height); 980 max_index = radix_tree_maxindex(node->height);
841 981
@@ -914,7 +1054,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
914 } 1054 }
915 } 1055 }
916 shift -= RADIX_TREE_MAP_SHIFT; 1056 shift -= RADIX_TREE_MAP_SHIFT;
917 slot = rcu_dereference(slot->slots[i]); 1057 slot = rcu_dereference_raw(slot->slots[i]);
918 if (slot == NULL) 1058 if (slot == NULL)
919 break; 1059 break;
920 } 1060 }
@@ -950,7 +1090,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
950 if (!root_tag_get(root, tag)) 1090 if (!root_tag_get(root, tag))
951 return 0; 1091 return 0;
952 1092
953 node = rcu_dereference(root->rnode); 1093 node = rcu_dereference_raw(root->rnode);
954 if (!node) 1094 if (!node)
955 return 0; 1095 return 0;
956 1096
@@ -960,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
960 results[0] = node; 1100 results[0] = node;
961 return 1; 1101 return 1;
962 } 1102 }
963 node = radix_tree_indirect_to_ptr(node); 1103 node = indirect_to_ptr(node);
964 1104
965 max_index = radix_tree_maxindex(node->height); 1105 max_index = radix_tree_maxindex(node->height);
966 1106
@@ -979,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
979 slot = *(((void ***)results)[ret + i]); 1119 slot = *(((void ***)results)[ret + i]);
980 if (!slot) 1120 if (!slot)
981 continue; 1121 continue;
982 results[ret + nr_found] = rcu_dereference(slot); 1122 results[ret + nr_found] =
1123 indirect_to_ptr(rcu_dereference_raw(slot));
983 nr_found++; 1124 nr_found++;
984 } 1125 }
985 ret += nr_found; 1126 ret += nr_found;
@@ -1019,7 +1160,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1019 if (!root_tag_get(root, tag)) 1160 if (!root_tag_get(root, tag))
1020 return 0; 1161 return 0;
1021 1162
1022 node = rcu_dereference(root->rnode); 1163 node = rcu_dereference_raw(root->rnode);
1023 if (!node) 1164 if (!node)
1024 return 0; 1165 return 0;
1025 1166
@@ -1029,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1029 results[0] = (void **)&root->rnode; 1170 results[0] = (void **)&root->rnode;
1030 return 1; 1171 return 1;
1031 } 1172 }
1032 node = radix_tree_indirect_to_ptr(node); 1173 node = indirect_to_ptr(node);
1033 1174
1034 max_index = radix_tree_maxindex(node->height); 1175 max_index = radix_tree_maxindex(node->height);
1035 1176
@@ -1065,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1065 void *newptr; 1206 void *newptr;
1066 1207
1067 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1208 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1068 to_free = radix_tree_indirect_to_ptr(to_free); 1209 to_free = indirect_to_ptr(to_free);
1069 1210
1070 /* 1211 /*
1071 * The candidate node has more than one child, or its child 1212 * The candidate node has more than one child, or its child
@@ -1078,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1078 1219
1079 /* 1220 /*
1080 * We don't need rcu_assign_pointer(), since we are simply 1221 * We don't need rcu_assign_pointer(), since we are simply
1081 * moving the node from one part of the tree to another. If 1222 * moving the node from one part of the tree to another: if it
1082 * it was safe to dereference the old pointer to it 1223 * was safe to dereference the old pointer to it
1083 * (to_free->slots[0]), it will be safe to dereference the new 1224 * (to_free->slots[0]), it will be safe to dereference the new
1084 * one (root->rnode). 1225 * one (root->rnode) as far as dependent read barriers go.
1085 */ 1226 */
1086 newptr = to_free->slots[0]; 1227 newptr = to_free->slots[0];
1087 if (root->height > 1) 1228 if (root->height > 1)
1088 newptr = radix_tree_ptr_to_indirect(newptr); 1229 newptr = ptr_to_indirect(newptr);
1089 root->rnode = newptr; 1230 root->rnode = newptr;
1090 root->height--; 1231 root->height--;
1232
1233 /*
1234 * We have a dilemma here. The node's slot[0] must not be
1235 * NULLed in case there are concurrent lookups expecting to
1236 * find the item. However if this was a bottom-level node,
1237 * then it may be subject to the slot pointer being visible
1238 * to callers dereferencing it. If item corresponding to
1239 * slot[0] is subsequently deleted, these callers would expect
1240 * their slot to become empty sooner or later.
1241 *
1242 * For example, lockless pagecache will look up a slot, deref
1243 * the page pointer, and if the page is 0 refcount it means it
1244 * was concurrently deleted from pagecache so try the deref
1245 * again. Fortunately there is already a requirement for logic
1246 * to retry the entire slot lookup -- the indirect pointer
1247 * problem (replacing direct root node with an indirect pointer
1248 * also results in a stale slot). So tag the slot as indirect
1249 * to force callers to retry.
1250 */
1251 if (root->height == 0)
1252 *((unsigned long *)&to_free->slots[0]) |=
1253 RADIX_TREE_INDIRECT_PTR;
1254
1091 radix_tree_node_free(to_free); 1255 radix_tree_node_free(to_free);
1092 } 1256 }
1093} 1257}
@@ -1124,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1124 root->rnode = NULL; 1288 root->rnode = NULL;
1125 goto out; 1289 goto out;
1126 } 1290 }
1127 slot = radix_tree_indirect_to_ptr(slot); 1291 slot = indirect_to_ptr(slot);
1128 1292
1129 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 1293 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1130 pathp->node = NULL; 1294 pathp->node = NULL;
@@ -1166,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1166 radix_tree_node_free(to_free); 1330 radix_tree_node_free(to_free);
1167 1331
1168 if (pathp->node->count) { 1332 if (pathp->node->count) {
1169 if (pathp->node == 1333 if (pathp->node == indirect_to_ptr(root->rnode))
1170 radix_tree_indirect_to_ptr(root->rnode))
1171 radix_tree_shrink(root); 1334 radix_tree_shrink(root);
1172 goto out; 1335 goto out;
1173 } 1336 }
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
new file mode 100644
index 00000000000..162becacf97
--- /dev/null
+++ b/lib/raid6/.gitignore
@@ -0,0 +1,4 @@
1mktables
2altivec*.c
3int*.c
4tables.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
new file mode 100644
index 00000000000..8a38102770f
--- /dev/null
+++ b/lib/raid6/Makefile
@@ -0,0 +1,75 @@
1obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
2
3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
4 int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
5 altivec8.o mmx.o sse1.o sse2.o
6hostprogs-y += mktables
7
8quiet_cmd_unroll = UNROLL $@
9 cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \
10 < $< > $@ || ( rm -f $@ && exit 1 )
11
12ifeq ($(CONFIG_ALTIVEC),y)
13altivec_flags := -maltivec -mabi=altivec
14endif
15
16targets += int1.c
17$(obj)/int1.c: UNROLL := 1
18$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE
19 $(call if_changed,unroll)
20
21targets += int2.c
22$(obj)/int2.c: UNROLL := 2
23$(obj)/int2.c: $(src)/int.uc $(src)/unroll.awk FORCE
24 $(call if_changed,unroll)
25
26targets += int4.c
27$(obj)/int4.c: UNROLL := 4
28$(obj)/int4.c: $(src)/int.uc $(src)/unroll.awk FORCE
29 $(call if_changed,unroll)
30
31targets += int8.c
32$(obj)/int8.c: UNROLL := 8
33$(obj)/int8.c: $(src)/int.uc $(src)/unroll.awk FORCE
34 $(call if_changed,unroll)
35
36targets += int16.c
37$(obj)/int16.c: UNROLL := 16
38$(obj)/int16.c: $(src)/int.uc $(src)/unroll.awk FORCE
39 $(call if_changed,unroll)
40
41targets += int32.c
42$(obj)/int32.c: UNROLL := 32
43$(obj)/int32.c: $(src)/int.uc $(src)/unroll.awk FORCE
44 $(call if_changed,unroll)
45
46CFLAGS_altivec1.o += $(altivec_flags)
47targets += altivec1.c
48$(obj)/altivec1.c: UNROLL := 1
49$(obj)/altivec1.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
50 $(call if_changed,unroll)
51
52CFLAGS_altivec2.o += $(altivec_flags)
53targets += altivec2.c
54$(obj)/altivec2.c: UNROLL := 2
55$(obj)/altivec2.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
56 $(call if_changed,unroll)
57
58CFLAGS_altivec4.o += $(altivec_flags)
59targets += altivec4.c
60$(obj)/altivec4.c: UNROLL := 4
61$(obj)/altivec4.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
62 $(call if_changed,unroll)
63
64CFLAGS_altivec8.o += $(altivec_flags)
65targets += altivec8.c
66$(obj)/altivec8.c: UNROLL := 8
67$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
68 $(call if_changed,unroll)
69
70quiet_cmd_mktable = TABLE $@
71 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
72
73targets += tables.c
74$(obj)/tables.c: $(obj)/mktables FORCE
75 $(call if_changed,mktable)
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
new file mode 100644
index 00000000000..b595f560bee
--- /dev/null
+++ b/lib/raid6/algos.c
@@ -0,0 +1,154 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/algos.c
15 *
16 * Algorithm list and algorithm selection for RAID-6
17 */
18
19#include <linux/raid/pq.h>
20#ifndef __KERNEL__
21#include <sys/mman.h>
22#include <stdio.h>
23#else
24#include <linux/gfp.h>
25#if !RAID6_USE_EMPTY_ZERO_PAGE
26/* In .bss so it's zeroed */
27const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
28EXPORT_SYMBOL(raid6_empty_zero_page);
29#endif
30#endif
31
32struct raid6_calls raid6_call;
33EXPORT_SYMBOL_GPL(raid6_call);
34
35const struct raid6_calls * const raid6_algos[] = {
36 &raid6_intx1,
37 &raid6_intx2,
38 &raid6_intx4,
39 &raid6_intx8,
40#if defined(__ia64__)
41 &raid6_intx16,
42 &raid6_intx32,
43#endif
44#if defined(__i386__) && !defined(__arch_um__)
45 &raid6_mmxx1,
46 &raid6_mmxx2,
47 &raid6_sse1x1,
48 &raid6_sse1x2,
49 &raid6_sse2x1,
50 &raid6_sse2x2,
51#endif
52#if defined(__x86_64__) && !defined(__arch_um__)
53 &raid6_sse2x1,
54 &raid6_sse2x2,
55 &raid6_sse2x4,
56#endif
57#ifdef CONFIG_ALTIVEC
58 &raid6_altivec1,
59 &raid6_altivec2,
60 &raid6_altivec4,
61 &raid6_altivec8,
62#endif
63 NULL
64};
65
66#ifdef __KERNEL__
67#define RAID6_TIME_JIFFIES_LG2 4
68#else
69/* Need more time to be stable in userspace */
70#define RAID6_TIME_JIFFIES_LG2 9
71#define time_before(x, y) ((x) < (y))
72#endif
73
74/* Try to pick the best algorithm */
75/* This code uses the gfmul table as convenient data set to abuse */
76
77int __init raid6_select_algo(void)
78{
79 const struct raid6_calls * const * algo;
80 const struct raid6_calls * best;
81 char *syndromes;
82 void *dptrs[(65536/PAGE_SIZE)+2];
83 int i, disks;
84 unsigned long perf, bestperf;
85 int bestprefer;
86 unsigned long j0, j1;
87
88 disks = (65536/PAGE_SIZE)+2;
89 for ( i = 0 ; i < disks-2 ; i++ ) {
90 dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
91 }
92
93 /* Normal code - use a 2-page allocation to avoid D$ conflict */
94 syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
95
96 if ( !syndromes ) {
97 printk("raid6: Yikes! No memory available.\n");
98 return -ENOMEM;
99 }
100
101 dptrs[disks-2] = syndromes;
102 dptrs[disks-1] = syndromes + PAGE_SIZE;
103
104 bestperf = 0; bestprefer = 0; best = NULL;
105
106 for ( algo = raid6_algos ; *algo ; algo++ ) {
107 if ( !(*algo)->valid || (*algo)->valid() ) {
108 perf = 0;
109
110 preempt_disable();
111 j0 = jiffies;
112 while ( (j1 = jiffies) == j0 )
113 cpu_relax();
114 while (time_before(jiffies,
115 j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
116 (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
117 perf++;
118 }
119 preempt_enable();
120
121 if ( (*algo)->prefer > bestprefer ||
122 ((*algo)->prefer == bestprefer &&
123 perf > bestperf) ) {
124 best = *algo;
125 bestprefer = best->prefer;
126 bestperf = perf;
127 }
128 printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
129 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
130 }
131 }
132
133 if (best) {
134 printk("raid6: using algorithm %s (%ld MB/s)\n",
135 best->name,
136 (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
137 raid6_call = *best;
138 } else
139 printk("raid6: Yikes! No algorithm found!\n");
140
141 free_pages((unsigned long)syndromes, 1);
142
143 return best ? 0 : -EINVAL;
144}
145
146static void raid6_exit(void)
147{
148 do { } while (0);
149}
150
151subsys_initcall(raid6_select_algo);
152module_exit(raid6_exit);
153MODULE_LICENSE("GPL");
154MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
new file mode 100644
index 00000000000..2654d5c854b
--- /dev/null
+++ b/lib/raid6/altivec.uc
@@ -0,0 +1,130 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6altivec$#.c
15 *
16 * $#-way unrolled portable integer math RAID-6 instruction set
17 *
18 * This file is postprocessed using unroll.awk
19 *
20 * <benh> hpa: in process,
21 * you can just "steal" the vec unit with enable_kernel_altivec() (but
22 * bracked this with preempt_disable/enable or in a lock)
23 */
24
25#include <linux/raid/pq.h>
26
27#ifdef CONFIG_ALTIVEC
28
29#include <altivec.h>
30#ifdef __KERNEL__
31# include <asm/system.h>
32# include <asm/cputable.h>
33#endif
34
35/*
36 * This is the C data type to use. We use a vector of
37 * signed char so vec_cmpgt() will generate the right
38 * instruction.
39 */
40
41typedef vector signed char unative_t;
42
43#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
44#define NSIZE sizeof(unative_t)
45
46/*
47 * The SHLBYTE() operation shifts each byte left by 1, *not*
48 * rolling over into the next byte
49 */
50static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
51{
52 return vec_add(v,v);
53}
54
55/*
56 * The MASK() operation returns 0xFF in any byte for which the high
57 * bit is 1, 0x00 for any byte for which the high bit is 0.
58 */
59static inline __attribute_const__ unative_t MASK(unative_t v)
60{
61 unative_t zv = NBYTES(0);
62
63 /* vec_cmpgt returns a vector bool char; thus the need for the cast */
64 return (unative_t)vec_cmpgt(zv, v);
65}
66
67
68/* This is noinline to make damned sure that gcc doesn't move any of the
69 Altivec code around the enable/disable code */
70static void noinline
71raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
72{
73 u8 **dptr = (u8 **)ptrs;
74 u8 *p, *q;
75 int d, z, z0;
76
77 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
78 unative_t x1d = NBYTES(0x1d);
79
80 z0 = disks - 3; /* Highest data disk */
81 p = dptr[z0+1]; /* XOR parity */
82 q = dptr[z0+2]; /* RS syndrome */
83
84 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
85 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
86 for ( z = z0-1 ; z >= 0 ; z-- ) {
87 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
88 wp$$ = vec_xor(wp$$, wd$$);
89 w2$$ = MASK(wq$$);
90 w1$$ = SHLBYTE(wq$$);
91 w2$$ = vec_and(w2$$, x1d);
92 w1$$ = vec_xor(w1$$, w2$$);
93 wq$$ = vec_xor(w1$$, wd$$);
94 }
95 *(unative_t *)&p[d+NSIZE*$$] = wp$$;
96 *(unative_t *)&q[d+NSIZE*$$] = wq$$;
97 }
98}
99
100static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
101{
102 preempt_disable();
103 enable_kernel_altivec();
104
105 raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
106
107 preempt_enable();
108}
109
110int raid6_have_altivec(void);
111#if $# == 1
112int raid6_have_altivec(void)
113{
114 /* This assumes either all CPUs have Altivec or none does */
115# ifdef __KERNEL__
116 return cpu_has_feature(CPU_FTR_ALTIVEC);
117# else
118 return 1;
119# endif
120}
121#endif
122
123const struct raid6_calls raid6_altivec$# = {
124 raid6_altivec$#_gen_syndrome,
125 raid6_have_altivec,
126 "altivecx$#",
127 0
128};
129
130#endif /* CONFIG_ALTIVEC */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
new file mode 100644
index 00000000000..d1e276a14fa
--- /dev/null
+++ b/lib/raid6/int.uc
@@ -0,0 +1,117 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6int$#.c
15 *
16 * $#-way unrolled portable integer math RAID-6 instruction set
17 *
18 * This file is postprocessed using unroll.awk
19 */
20
21#include <linux/raid/pq.h>
22
23/*
24 * This is the C data type to use
25 */
26
27/* Change this from BITS_PER_LONG if there is something better... */
28#if BITS_PER_LONG == 64
29# define NBYTES(x) ((x) * 0x0101010101010101UL)
30# define NSIZE 8
31# define NSHIFT 3
32# define NSTRING "64"
33typedef u64 unative_t;
34#else
35# define NBYTES(x) ((x) * 0x01010101U)
36# define NSIZE 4
37# define NSHIFT 2
38# define NSTRING "32"
39typedef u32 unative_t;
40#endif
41
42
43
44/*
45 * IA-64 wants insane amounts of unrolling. On other architectures that
46 * is just a waste of space.
47 */
48#if ($# <= 8) || defined(__ia64__)
49
50
51/*
52 * These sub-operations are separate inlines since they can sometimes be
53 * specially optimized using architecture-specific hacks.
54 */
55
56/*
57 * The SHLBYTE() operation shifts each byte left by 1, *not*
58 * rolling over into the next byte
59 */
60static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
61{
62 unative_t vv;
63
64 vv = (v << 1) & NBYTES(0xfe);
65 return vv;
66}
67
68/*
69 * The MASK() operation returns 0xFF in any byte for which the high
70 * bit is 1, 0x00 for any byte for which the high bit is 0.
71 */
72static inline __attribute_const__ unative_t MASK(unative_t v)
73{
74 unative_t vv;
75
76 vv = v & NBYTES(0x80);
77 vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
78 return vv;
79}
80
81
82static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
83{
84 u8 **dptr = (u8 **)ptrs;
85 u8 *p, *q;
86 int d, z, z0;
87
88 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
89
90 z0 = disks - 3; /* Highest data disk */
91 p = dptr[z0+1]; /* XOR parity */
92 q = dptr[z0+2]; /* RS syndrome */
93
94 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
95 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
96 for ( z = z0-1 ; z >= 0 ; z-- ) {
97 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
98 wp$$ ^= wd$$;
99 w2$$ = MASK(wq$$);
100 w1$$ = SHLBYTE(wq$$);
101 w2$$ &= NBYTES(0x1d);
102 w1$$ ^= w2$$;
103 wq$$ = w1$$ ^ wd$$;
104 }
105 *(unative_t *)&p[d+NSIZE*$$] = wp$$;
106 *(unative_t *)&q[d+NSIZE*$$] = wq$$;
107 }
108}
109
110const struct raid6_calls raid6_intx$# = {
111 raid6_int$#_gen_syndrome,
112 NULL, /* always valid */
113 "int" NSTRING "x$#",
114 0
115};
116
117#endif
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
new file mode 100644
index 00000000000..3b1500843bb
--- /dev/null
+++ b/lib/raid6/mktables.c
@@ -0,0 +1,132 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * mktables.c
13 *
14 * Make RAID-6 tables. This is a host user space program to be run at
15 * compile time.
16 */
17
18#include <stdio.h>
19#include <string.h>
20#include <inttypes.h>
21#include <stdlib.h>
22#include <time.h>
23
24static uint8_t gfmul(uint8_t a, uint8_t b)
25{
26 uint8_t v = 0;
27
28 while (b) {
29 if (b & 1)
30 v ^= a;
31 a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
32 b >>= 1;
33 }
34
35 return v;
36}
37
38static uint8_t gfpow(uint8_t a, int b)
39{
40 uint8_t v = 1;
41
42 b %= 255;
43 if (b < 0)
44 b += 255;
45
46 while (b) {
47 if (b & 1)
48 v = gfmul(v, a);
49 a = gfmul(a, a);
50 b >>= 1;
51 }
52
53 return v;
54}
55
56int main(int argc, char *argv[])
57{
58 int i, j, k;
59 uint8_t v;
60 uint8_t exptbl[256], invtbl[256];
61
62 printf("#include <linux/raid/pq.h>\n");
63
64 /* Compute multiplication table */
65 printf("\nconst u8 __attribute__((aligned(256)))\n"
66 "raid6_gfmul[256][256] =\n"
67 "{\n");
68 for (i = 0; i < 256; i++) {
69 printf("\t{\n");
70 for (j = 0; j < 256; j += 8) {
71 printf("\t\t");
72 for (k = 0; k < 8; k++)
73 printf("0x%02x,%c", gfmul(i, j + k),
74 (k == 7) ? '\n' : ' ');
75 }
76 printf("\t},\n");
77 }
78 printf("};\n");
79 printf("#ifdef __KERNEL__\n");
80 printf("EXPORT_SYMBOL(raid6_gfmul);\n");
81 printf("#endif\n");
82
83 /* Compute power-of-2 table (exponent) */
84 v = 1;
85 printf("\nconst u8 __attribute__((aligned(256)))\n"
86 "raid6_gfexp[256] =\n" "{\n");
87 for (i = 0; i < 256; i += 8) {
88 printf("\t");
89 for (j = 0; j < 8; j++) {
90 exptbl[i + j] = v;
91 printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
92 v = gfmul(v, 2);
93 if (v == 1)
94 v = 0; /* For entry 255, not a real entry */
95 }
96 }
97 printf("};\n");
98 printf("#ifdef __KERNEL__\n");
99 printf("EXPORT_SYMBOL(raid6_gfexp);\n");
100 printf("#endif\n");
101
102 /* Compute inverse table x^-1 == x^254 */
103 printf("\nconst u8 __attribute__((aligned(256)))\n"
104 "raid6_gfinv[256] =\n" "{\n");
105 for (i = 0; i < 256; i += 8) {
106 printf("\t");
107 for (j = 0; j < 8; j++) {
108 invtbl[i + j] = v = gfpow(i + j, 254);
109 printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
110 }
111 }
112 printf("};\n");
113 printf("#ifdef __KERNEL__\n");
114 printf("EXPORT_SYMBOL(raid6_gfinv);\n");
115 printf("#endif\n");
116
117 /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
118 printf("\nconst u8 __attribute__((aligned(256)))\n"
119 "raid6_gfexi[256] =\n" "{\n");
120 for (i = 0; i < 256; i += 8) {
121 printf("\t");
122 for (j = 0; j < 8; j++)
123 printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
124 (j == 7) ? '\n' : ' ');
125 }
126 printf("};\n");
127 printf("#ifdef __KERNEL__\n");
128 printf("EXPORT_SYMBOL(raid6_gfexi);\n");
129 printf("#endif\n");
130
131 return 0;
132}
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
new file mode 100644
index 00000000000..279347f2309
--- /dev/null
+++ b/lib/raid6/mmx.c
@@ -0,0 +1,142 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/mmx.c
15 *
16 * MMX implementation of RAID-6 syndrome functions
17 */
18
19#if defined(__i386__) && !defined(__arch_um__)
20
21#include <linux/raid/pq.h>
22#include "x86.h"
23
24/* Shared with raid6/sse1.c */
25const struct raid6_mmx_constants {
26 u64 x1d;
27} raid6_mmx_constants = {
28 0x1d1d1d1d1d1d1d1dULL,
29};
30
31static int raid6_have_mmx(void)
32{
33 /* Not really "boot_cpu" but "all_cpus" */
34 return boot_cpu_has(X86_FEATURE_MMX);
35}
36
37/*
38 * Plain MMX implementation
39 */
40static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
41{
42 u8 **dptr = (u8 **)ptrs;
43 u8 *p, *q;
44 int d, z, z0;
45
46 z0 = disks - 3; /* Highest data disk */
47 p = dptr[z0+1]; /* XOR parity */
48 q = dptr[z0+2]; /* RS syndrome */
49
50 kernel_fpu_begin();
51
52 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
53 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
54
55 for ( d = 0 ; d < bytes ; d += 8 ) {
56 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
57 asm volatile("movq %mm2,%mm4"); /* Q[0] */
58 for ( z = z0-1 ; z >= 0 ; z-- ) {
59 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
60 asm volatile("pcmpgtb %mm4,%mm5");
61 asm volatile("paddb %mm4,%mm4");
62 asm volatile("pand %mm0,%mm5");
63 asm volatile("pxor %mm5,%mm4");
64 asm volatile("pxor %mm5,%mm5");
65 asm volatile("pxor %mm6,%mm2");
66 asm volatile("pxor %mm6,%mm4");
67 }
68 asm volatile("movq %%mm2,%0" : "=m" (p[d]));
69 asm volatile("pxor %mm2,%mm2");
70 asm volatile("movq %%mm4,%0" : "=m" (q[d]));
71 asm volatile("pxor %mm4,%mm4");
72 }
73
74 kernel_fpu_end();
75}
76
77const struct raid6_calls raid6_mmxx1 = {
78 raid6_mmx1_gen_syndrome,
79 raid6_have_mmx,
80 "mmxx1",
81 0
82};
83
84/*
85 * Unrolled-by-2 MMX implementation
86 */
87static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
88{
89 u8 **dptr = (u8 **)ptrs;
90 u8 *p, *q;
91 int d, z, z0;
92
93 z0 = disks - 3; /* Highest data disk */
94 p = dptr[z0+1]; /* XOR parity */
95 q = dptr[z0+2]; /* RS syndrome */
96
97 kernel_fpu_begin();
98
99 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
100 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
101 asm volatile("pxor %mm7,%mm7"); /* Zero temp */
102
103 for ( d = 0 ; d < bytes ; d += 16 ) {
104 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
105 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
106 asm volatile("movq %mm2,%mm4"); /* Q[0] */
107 asm volatile("movq %mm3,%mm6"); /* Q[1] */
108 for ( z = z0-1 ; z >= 0 ; z-- ) {
109 asm volatile("pcmpgtb %mm4,%mm5");
110 asm volatile("pcmpgtb %mm6,%mm7");
111 asm volatile("paddb %mm4,%mm4");
112 asm volatile("paddb %mm6,%mm6");
113 asm volatile("pand %mm0,%mm5");
114 asm volatile("pand %mm0,%mm7");
115 asm volatile("pxor %mm5,%mm4");
116 asm volatile("pxor %mm7,%mm6");
117 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
118 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
119 asm volatile("pxor %mm5,%mm2");
120 asm volatile("pxor %mm7,%mm3");
121 asm volatile("pxor %mm5,%mm4");
122 asm volatile("pxor %mm7,%mm6");
123 asm volatile("pxor %mm5,%mm5");
124 asm volatile("pxor %mm7,%mm7");
125 }
126 asm volatile("movq %%mm2,%0" : "=m" (p[d]));
127 asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
128 asm volatile("movq %%mm4,%0" : "=m" (q[d]));
129 asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
130 }
131
132 kernel_fpu_end();
133}
134
135const struct raid6_calls raid6_mmxx2 = {
136 raid6_mmx2_gen_syndrome,
137 raid6_have_mmx,
138 "mmxx2",
139 0
140};
141
142#endif
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
new file mode 100644
index 00000000000..8590d19cf52
--- /dev/null
+++ b/lib/raid6/recov.c
@@ -0,0 +1,132 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/recov.c
15 *
16 * RAID-6 data recovery in dual failure mode. In single failure mode,
17 * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
18 * the syndrome.)
19 */
20
21#include <linux/raid/pq.h>
22
23/* Recover two failed data blocks. */
24void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
25 void **ptrs)
26{
27 u8 *p, *q, *dp, *dq;
28 u8 px, qx, db;
29 const u8 *pbmul; /* P multiplier table for B data */
30 const u8 *qmul; /* Q multiplier table (for both) */
31
32 p = (u8 *)ptrs[disks-2];
33 q = (u8 *)ptrs[disks-1];
34
35 /* Compute syndrome with zero for the missing data pages
36 Use the dead data pages as temporary storage for
37 delta p and delta q */
38 dp = (u8 *)ptrs[faila];
39 ptrs[faila] = (void *)raid6_empty_zero_page;
40 ptrs[disks-2] = dp;
41 dq = (u8 *)ptrs[failb];
42 ptrs[failb] = (void *)raid6_empty_zero_page;
43 ptrs[disks-1] = dq;
44
45 raid6_call.gen_syndrome(disks, bytes, ptrs);
46
47 /* Restore pointer table */
48 ptrs[faila] = dp;
49 ptrs[failb] = dq;
50 ptrs[disks-2] = p;
51 ptrs[disks-1] = q;
52
53 /* Now, pick the proper data tables */
54 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
55 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
56
57 /* Now do it... */
58 while ( bytes-- ) {
59 px = *p ^ *dp;
60 qx = qmul[*q ^ *dq];
61 *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
62 *dp++ = db ^ px; /* Reconstructed A */
63 p++; q++;
64 }
65}
66EXPORT_SYMBOL_GPL(raid6_2data_recov);
67
68/* Recover failure of one data block plus the P block */
69void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
70{
71 u8 *p, *q, *dq;
72 const u8 *qmul; /* Q multiplier table */
73
74 p = (u8 *)ptrs[disks-2];
75 q = (u8 *)ptrs[disks-1];
76
77 /* Compute syndrome with zero for the missing data page
78 Use the dead data page as temporary storage for delta q */
79 dq = (u8 *)ptrs[faila];
80 ptrs[faila] = (void *)raid6_empty_zero_page;
81 ptrs[disks-1] = dq;
82
83 raid6_call.gen_syndrome(disks, bytes, ptrs);
84
85 /* Restore pointer table */
86 ptrs[faila] = dq;
87 ptrs[disks-1] = q;
88
89 /* Now, pick the proper data tables */
90 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
91
92 /* Now do it... */
93 while ( bytes-- ) {
94 *p++ ^= *dq = qmul[*q ^ *dq];
95 q++; dq++;
96 }
97}
98EXPORT_SYMBOL_GPL(raid6_datap_recov);
99
100#ifndef __KERNEL__
101/* Testing only */
102
103/* Recover two failed blocks. */
104void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
105{
106 if ( faila > failb ) {
107 int tmp = faila;
108 faila = failb;
109 failb = tmp;
110 }
111
112 if ( failb == disks-1 ) {
113 if ( faila == disks-2 ) {
114 /* P+Q failure. Just rebuild the syndrome. */
115 raid6_call.gen_syndrome(disks, bytes, ptrs);
116 } else {
117 /* data+Q failure. Reconstruct data from P,
118 then rebuild syndrome. */
119 /* NOT IMPLEMENTED - equivalent to RAID-5 */
120 }
121 } else {
122 if ( failb == disks-2 ) {
123 /* data+P failure. */
124 raid6_datap_recov(disks, bytes, faila, ptrs);
125 } else {
126 /* data+data failure. */
127 raid6_2data_recov(disks, bytes, faila, failb, ptrs);
128 }
129 }
130}
131
132#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
new file mode 100644
index 00000000000..10dd91948c0
--- /dev/null
+++ b/lib/raid6/sse1.c
@@ -0,0 +1,162 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/sse1.c
15 *
16 * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
17 *
18 * This is really an MMX implementation, but it requires SSE-1 or
19 * AMD MMXEXT for prefetch support and a few other features. The
20 * support for nontemporal memory accesses is enough to make this
21 * worthwhile as a separate implementation.
22 */
23
24#if defined(__i386__) && !defined(__arch_um__)
25
26#include <linux/raid/pq.h>
27#include "x86.h"
28
29/* Defined in raid6/mmx.c */
30extern const struct raid6_mmx_constants {
31 u64 x1d;
32} raid6_mmx_constants;
33
34static int raid6_have_sse1_or_mmxext(void)
35{
36 /* Not really boot_cpu but "all_cpus" */
37 return boot_cpu_has(X86_FEATURE_MMX) &&
38 (boot_cpu_has(X86_FEATURE_XMM) ||
39 boot_cpu_has(X86_FEATURE_MMXEXT));
40}
41
42/*
43 * Plain SSE1 implementation
44 */
45static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
46{
47 u8 **dptr = (u8 **)ptrs;
48 u8 *p, *q;
49 int d, z, z0;
50
51 z0 = disks - 3; /* Highest data disk */
52 p = dptr[z0+1]; /* XOR parity */
53 q = dptr[z0+2]; /* RS syndrome */
54
55 kernel_fpu_begin();
56
57 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
58 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
59
60 for ( d = 0 ; d < bytes ; d += 8 ) {
61 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
62 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
63 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
64 asm volatile("movq %mm2,%mm4"); /* Q[0] */
65 asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
66 for ( z = z0-2 ; z >= 0 ; z-- ) {
67 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
68 asm volatile("pcmpgtb %mm4,%mm5");
69 asm volatile("paddb %mm4,%mm4");
70 asm volatile("pand %mm0,%mm5");
71 asm volatile("pxor %mm5,%mm4");
72 asm volatile("pxor %mm5,%mm5");
73 asm volatile("pxor %mm6,%mm2");
74 asm volatile("pxor %mm6,%mm4");
75 asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
76 }
77 asm volatile("pcmpgtb %mm4,%mm5");
78 asm volatile("paddb %mm4,%mm4");
79 asm volatile("pand %mm0,%mm5");
80 asm volatile("pxor %mm5,%mm4");
81 asm volatile("pxor %mm5,%mm5");
82 asm volatile("pxor %mm6,%mm2");
83 asm volatile("pxor %mm6,%mm4");
84
85 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
86 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
87 }
88
89 asm volatile("sfence" : : : "memory");
90 kernel_fpu_end();
91}
92
93const struct raid6_calls raid6_sse1x1 = {
94 raid6_sse11_gen_syndrome,
95 raid6_have_sse1_or_mmxext,
96 "sse1x1",
97 1 /* Has cache hints */
98};
99
100/*
101 * Unrolled-by-2 SSE1 implementation
102 */
103static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
104{
105 u8 **dptr = (u8 **)ptrs;
106 u8 *p, *q;
107 int d, z, z0;
108
109 z0 = disks - 3; /* Highest data disk */
110 p = dptr[z0+1]; /* XOR parity */
111 q = dptr[z0+2]; /* RS syndrome */
112
113 kernel_fpu_begin();
114
115 asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
116 asm volatile("pxor %mm5,%mm5"); /* Zero temp */
117 asm volatile("pxor %mm7,%mm7"); /* Zero temp */
118
119 /* We uniformly assume a single prefetch covers at least 16 bytes */
120 for ( d = 0 ; d < bytes ; d += 16 ) {
121 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122 asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
123 asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
124 asm volatile("movq %mm2,%mm4"); /* Q[0] */
125 asm volatile("movq %mm3,%mm6"); /* Q[1] */
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128 asm volatile("pcmpgtb %mm4,%mm5");
129 asm volatile("pcmpgtb %mm6,%mm7");
130 asm volatile("paddb %mm4,%mm4");
131 asm volatile("paddb %mm6,%mm6");
132 asm volatile("pand %mm0,%mm5");
133 asm volatile("pand %mm0,%mm7");
134 asm volatile("pxor %mm5,%mm4");
135 asm volatile("pxor %mm7,%mm6");
136 asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
137 asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
138 asm volatile("pxor %mm5,%mm2");
139 asm volatile("pxor %mm7,%mm3");
140 asm volatile("pxor %mm5,%mm4");
141 asm volatile("pxor %mm7,%mm6");
142 asm volatile("pxor %mm5,%mm5");
143 asm volatile("pxor %mm7,%mm7");
144 }
145 asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
146 asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
147 asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
148 asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
149 }
150
151 asm volatile("sfence" : :: "memory");
152 kernel_fpu_end();
153}
154
155const struct raid6_calls raid6_sse1x2 = {
156 raid6_sse12_gen_syndrome,
157 raid6_have_sse1_or_mmxext,
158 "sse1x2",
159 1 /* Has cache hints */
160};
161
162#endif
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
new file mode 100644
index 00000000000..bc2d57daa58
--- /dev/null
+++ b/lib/raid6/sse2.c
@@ -0,0 +1,262 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/sse2.c
15 *
16 * SSE-2 implementation of RAID-6 syndrome functions
17 *
18 */
19
20#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
21
22#include <linux/raid/pq.h>
23#include "x86.h"
24
25static const struct raid6_sse_constants {
26 u64 x1d[2];
27} raid6_sse_constants __attribute__((aligned(16))) = {
28 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
29};
30
31static int raid6_have_sse2(void)
32{
33 /* Not really boot_cpu but "all_cpus" */
34 return boot_cpu_has(X86_FEATURE_MMX) &&
35 boot_cpu_has(X86_FEATURE_FXSR) &&
36 boot_cpu_has(X86_FEATURE_XMM) &&
37 boot_cpu_has(X86_FEATURE_XMM2);
38}
39
40/*
41 * Plain SSE2 implementation
42 */
43static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
44{
45 u8 **dptr = (u8 **)ptrs;
46 u8 *p, *q;
47 int d, z, z0;
48
49 z0 = disks - 3; /* Highest data disk */
50 p = dptr[z0+1]; /* XOR parity */
51 q = dptr[z0+2]; /* RS syndrome */
52
53 kernel_fpu_begin();
54
55 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
56 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
57
58 for ( d = 0 ; d < bytes ; d += 16 ) {
59 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
60 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
61 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
62 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
63 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
64 for ( z = z0-2 ; z >= 0 ; z-- ) {
65 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
66 asm volatile("pcmpgtb %xmm4,%xmm5");
67 asm volatile("paddb %xmm4,%xmm4");
68 asm volatile("pand %xmm0,%xmm5");
69 asm volatile("pxor %xmm5,%xmm4");
70 asm volatile("pxor %xmm5,%xmm5");
71 asm volatile("pxor %xmm6,%xmm2");
72 asm volatile("pxor %xmm6,%xmm4");
73 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
74 }
75 asm volatile("pcmpgtb %xmm4,%xmm5");
76 asm volatile("paddb %xmm4,%xmm4");
77 asm volatile("pand %xmm0,%xmm5");
78 asm volatile("pxor %xmm5,%xmm4");
79 asm volatile("pxor %xmm5,%xmm5");
80 asm volatile("pxor %xmm6,%xmm2");
81 asm volatile("pxor %xmm6,%xmm4");
82
83 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
84 asm volatile("pxor %xmm2,%xmm2");
85 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
86 asm volatile("pxor %xmm4,%xmm4");
87 }
88
89 asm volatile("sfence" : : : "memory");
90 kernel_fpu_end();
91}
92
93const struct raid6_calls raid6_sse2x1 = {
94 raid6_sse21_gen_syndrome,
95 raid6_have_sse2,
96 "sse2x1",
97 1 /* Has cache hints */
98};
99
100/*
101 * Unrolled-by-2 SSE2 implementation
102 */
103static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
104{
105 u8 **dptr = (u8 **)ptrs;
106 u8 *p, *q;
107 int d, z, z0;
108
109 z0 = disks - 3; /* Highest data disk */
110 p = dptr[z0+1]; /* XOR parity */
111 q = dptr[z0+2]; /* RS syndrome */
112
113 kernel_fpu_begin();
114
115 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
116 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
117 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
118
119 /* We uniformly assume a single prefetch covers at least 32 bytes */
120 for ( d = 0 ; d < bytes ; d += 32 ) {
121 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
122 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
123 asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
124 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
125 asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
126 for ( z = z0-1 ; z >= 0 ; z-- ) {
127 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
128 asm volatile("pcmpgtb %xmm4,%xmm5");
129 asm volatile("pcmpgtb %xmm6,%xmm7");
130 asm volatile("paddb %xmm4,%xmm4");
131 asm volatile("paddb %xmm6,%xmm6");
132 asm volatile("pand %xmm0,%xmm5");
133 asm volatile("pand %xmm0,%xmm7");
134 asm volatile("pxor %xmm5,%xmm4");
135 asm volatile("pxor %xmm7,%xmm6");
136 asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
137 asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
138 asm volatile("pxor %xmm5,%xmm2");
139 asm volatile("pxor %xmm7,%xmm3");
140 asm volatile("pxor %xmm5,%xmm4");
141 asm volatile("pxor %xmm7,%xmm6");
142 asm volatile("pxor %xmm5,%xmm5");
143 asm volatile("pxor %xmm7,%xmm7");
144 }
145 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
146 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
147 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
148 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
149 }
150
151 asm volatile("sfence" : : : "memory");
152 kernel_fpu_end();
153}
154
155const struct raid6_calls raid6_sse2x2 = {
156 raid6_sse22_gen_syndrome,
157 raid6_have_sse2,
158 "sse2x2",
159 1 /* Has cache hints */
160};
161
162#endif
163
164#if defined(__x86_64__) && !defined(__arch_um__)
165
166/*
167 * Unrolled-by-4 SSE2 implementation
168 */
169static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
170{
171 u8 **dptr = (u8 **)ptrs;
172 u8 *p, *q;
173 int d, z, z0;
174
175 z0 = disks - 3; /* Highest data disk */
176 p = dptr[z0+1]; /* XOR parity */
177 q = dptr[z0+2]; /* RS syndrome */
178
179 kernel_fpu_begin();
180
181 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
182 asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
183 asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
184 asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
185 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
186 asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
187 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
188 asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
189 asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
190 asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
191 asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
192 asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
193 asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
194
195 for ( d = 0 ; d < bytes ; d += 64 ) {
196 for ( z = z0 ; z >= 0 ; z-- ) {
197 /* The second prefetch seems to improve performance... */
198 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
199 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
200 asm volatile("pcmpgtb %xmm4,%xmm5");
201 asm volatile("pcmpgtb %xmm6,%xmm7");
202 asm volatile("pcmpgtb %xmm12,%xmm13");
203 asm volatile("pcmpgtb %xmm14,%xmm15");
204 asm volatile("paddb %xmm4,%xmm4");
205 asm volatile("paddb %xmm6,%xmm6");
206 asm volatile("paddb %xmm12,%xmm12");
207 asm volatile("paddb %xmm14,%xmm14");
208 asm volatile("pand %xmm0,%xmm5");
209 asm volatile("pand %xmm0,%xmm7");
210 asm volatile("pand %xmm0,%xmm13");
211 asm volatile("pand %xmm0,%xmm15");
212 asm volatile("pxor %xmm5,%xmm4");
213 asm volatile("pxor %xmm7,%xmm6");
214 asm volatile("pxor %xmm13,%xmm12");
215 asm volatile("pxor %xmm15,%xmm14");
216 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
217 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
218 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
219 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
220 asm volatile("pxor %xmm5,%xmm2");
221 asm volatile("pxor %xmm7,%xmm3");
222 asm volatile("pxor %xmm13,%xmm10");
223 asm volatile("pxor %xmm15,%xmm11");
224 asm volatile("pxor %xmm5,%xmm4");
225 asm volatile("pxor %xmm7,%xmm6");
226 asm volatile("pxor %xmm13,%xmm12");
227 asm volatile("pxor %xmm15,%xmm14");
228 asm volatile("pxor %xmm5,%xmm5");
229 asm volatile("pxor %xmm7,%xmm7");
230 asm volatile("pxor %xmm13,%xmm13");
231 asm volatile("pxor %xmm15,%xmm15");
232 }
233 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
234 asm volatile("pxor %xmm2,%xmm2");
235 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
236 asm volatile("pxor %xmm3,%xmm3");
237 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
238 asm volatile("pxor %xmm10,%xmm10");
239 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
240 asm volatile("pxor %xmm11,%xmm11");
241 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
242 asm volatile("pxor %xmm4,%xmm4");
243 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
244 asm volatile("pxor %xmm6,%xmm6");
245 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
246 asm volatile("pxor %xmm12,%xmm12");
247 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
248 asm volatile("pxor %xmm14,%xmm14");
249 }
250
251 asm volatile("sfence" : : : "memory");
252 kernel_fpu_end();
253}
254
255const struct raid6_calls raid6_sse2x4 = {
256 raid6_sse24_gen_syndrome,
257 raid6_have_sse2,
258 "sse2x4",
259 1 /* Has cache hints */
260};
261
262#endif
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
new file mode 100644
index 00000000000..aa651697b6d
--- /dev/null
+++ b/lib/raid6/test/Makefile
@@ -0,0 +1,72 @@
1#
2# This is a simple Makefile to test some of the RAID-6 code
3# from userspace.
4#
5
6CC = gcc
7OPTFLAGS = -O2 # Adjust as desired
8CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
9LD = ld
10AWK = awk -f
11AR = ar
12RANLIB = ranlib
13
14.c.o:
15 $(CC) $(CFLAGS) -c -o $@ $<
16
17%.c: ../%.c
18 cp -f $< $@
19
20%.uc: ../%.uc
21 cp -f $< $@
22
23all: raid6.a raid6test
24
25raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
26 altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \
27 tables.o
28 rm -f $@
29 $(AR) cq $@ $^
30 $(RANLIB) $@
31
32raid6test: test.c raid6.a
33 $(CC) $(CFLAGS) -o raid6test $^
34
35altivec1.c: altivec.uc ../unroll.awk
36 $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
37
38altivec2.c: altivec.uc ../unroll.awk
39 $(AWK) ../unroll.awk -vN=2 < altivec.uc > $@
40
41altivec4.c: altivec.uc ../unroll.awk
42 $(AWK) ../unroll.awk -vN=4 < altivec.uc > $@
43
44altivec8.c: altivec.uc ../unroll.awk
45 $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
46
47int1.c: int.uc ../unroll.awk
48 $(AWK) ../unroll.awk -vN=1 < int.uc > $@
49
50int2.c: int.uc ../unroll.awk
51 $(AWK) ../unroll.awk -vN=2 < int.uc > $@
52
53int4.c: int.uc ../unroll.awk
54 $(AWK) ../unroll.awk -vN=4 < int.uc > $@
55
56int8.c: int.uc ../unroll.awk
57 $(AWK) ../unroll.awk -vN=8 < int.uc > $@
58
59int16.c: int.uc ../unroll.awk
60 $(AWK) ../unroll.awk -vN=16 < int.uc > $@
61
62int32.c: int.uc ../unroll.awk
63 $(AWK) ../unroll.awk -vN=32 < int.uc > $@
64
65tables.c: mktables
66 ./mktables > tables.c
67
68clean:
69 rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
70
71spotless: clean
72 rm -f *~
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
new file mode 100644
index 00000000000..7a930318b17
--- /dev/null
+++ b/lib/raid6/test/test.c
@@ -0,0 +1,124 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * raid6test.c
13 *
14 * Test RAID-6 recovery with various algorithms
15 */
16
17#include <stdlib.h>
18#include <stdio.h>
19#include <string.h>
20#include <linux/raid/pq.h>
21
22#define NDISKS 16 /* Including P and Q */
23
24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
25struct raid6_calls raid6_call;
26
27char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE];
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30
31static void makedata(void)
32{
33 int i, j;
34
35 for (i = 0; i < NDISKS; i++) {
36 for (j = 0; j < PAGE_SIZE; j++)
37 data[i][j] = rand();
38
39 dataptrs[i] = data[i];
40 }
41}
42
43static char disk_type(int d)
44{
45 switch (d) {
46 case NDISKS-2:
47 return 'P';
48 case NDISKS-1:
49 return 'Q';
50 default:
51 return 'D';
52 }
53}
54
55static int test_disks(int i, int j)
56{
57 int erra, errb;
58
59 memset(recovi, 0xf0, PAGE_SIZE);
60 memset(recovj, 0xba, PAGE_SIZE);
61
62 dataptrs[i] = recovi;
63 dataptrs[j] = recovj;
64
65 raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
66
67 erra = memcmp(data[i], recovi, PAGE_SIZE);
68 errb = memcmp(data[j], recovj, PAGE_SIZE);
69
70 if (i < NDISKS-2 && j == NDISKS-1) {
71 /* We don't implement the DQ failure scenario, since it's
72 equivalent to a RAID-5 failure (XOR, then recompute Q) */
73 erra = errb = 0;
74 } else {
75 printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n",
76 raid6_call.name,
77 i, disk_type(i),
78 j, disk_type(j),
79 (!erra && !errb) ? "OK" :
80 !erra ? "ERRB" :
81 !errb ? "ERRA" : "ERRAB");
82 }
83
84 dataptrs[i] = data[i];
85 dataptrs[j] = data[j];
86
87 return erra || errb;
88}
89
90int main(int argc, char *argv[])
91{
92 const struct raid6_calls *const *algo;
93 int i, j;
94 int err = 0;
95
96 makedata();
97
98 for (algo = raid6_algos; *algo; algo++) {
99 if (!(*algo)->valid || (*algo)->valid()) {
100 raid6_call = **algo;
101
102 /* Nuke syndromes */
103 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
104
105 /* Generate assumed good syndrome */
106 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
107 (void **)&dataptrs);
108
109 for (i = 0; i < NDISKS-1; i++)
110 for (j = i+1; j < NDISKS; j++)
111 err += test_disks(i, j);
112 }
113 printf("\n");
114 }
115
116 printf("\n");
117 /* Pick the best algorithm test */
118 raid6_select_algo();
119
120 if (err)
121 printf("\n*** ERRORS FOUND ***\n");
122
123 return err;
124}
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk
new file mode 100644
index 00000000000..c6aa03631df
--- /dev/null
+++ b/lib/raid6/unroll.awk
@@ -0,0 +1,20 @@
1
2# This filter requires one command line option of form -vN=n
3# where n must be a decimal number.
4#
5# Repeat each input line containing $$ n times, replacing $$ with 0...n-1.
6# Replace each $# with n, and each $* with a single $.
7
8BEGIN {
9 n = N + 0
10}
11{
12 if (/\$\$/) { rep = n } else { rep = 1 }
13 for (i = 0; i < rep; ++i) {
14 tmp = $0
15 gsub(/\$\$/, i, tmp)
16 gsub(/\$\#/, n, tmp)
17 gsub(/\$\*/, "$", tmp)
18 print tmp
19 }
20}
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
new file mode 100644
index 00000000000..cb2a8c91c88
--- /dev/null
+++ b/lib/raid6/x86.h
@@ -0,0 +1,61 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Boston MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/x86.h
15 *
16 * Definitions common to x86 and x86-64 RAID-6 code only
17 */
18
19#ifndef LINUX_RAID_RAID6X86_H
20#define LINUX_RAID_RAID6X86_H
21
22#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
23
24#ifdef __KERNEL__ /* Real code */
25
26#include <asm/i387.h>
27
28#else /* Dummy code for user space testing */
29
30static inline void kernel_fpu_begin(void)
31{
32}
33
34static inline void kernel_fpu_end(void)
35{
36}
37
38#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
39#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
40 * (fast save and restore) */
41#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
42#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
43#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
44
45/* Should work well enough on modern CPUs for testing */
46static inline int boot_cpu_has(int flag)
47{
48 u32 eax = (flag >> 5) ? 0x80000001 : 1;
49 u32 edx;
50
51 asm volatile("cpuid"
52 : "+a" (eax), "=d" (edx)
53 : : "ecx", "ebx");
54
55 return (edx >> (flag & 31)) & 1;
56}
57
58#endif /* ndef __KERNEL__ */
59
60#endif
61#endif
diff --git a/lib/random32.c b/lib/random32.c
index 217d5c4b666..fc3545a3277 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -39,13 +39,16 @@
39#include <linux/jiffies.h> 39#include <linux/jiffies.h>
40#include <linux/random.h> 40#include <linux/random.h>
41 41
42struct rnd_state {
43 u32 s1, s2, s3;
44};
45
46static DEFINE_PER_CPU(struct rnd_state, net_rand_state); 42static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
47 43
48static u32 __random32(struct rnd_state *state) 44/**
45 * prandom32 - seeded pseudo-random number generator.
46 * @state: pointer to state structure holding seeded state.
47 *
48 * This is used for pseudo-randomness with no outside seeding.
49 * For more random results, use random32().
50 */
51u32 prandom32(struct rnd_state *state)
49{ 52{
50#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) 53#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
51 54
@@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state)
55 58
56 return (state->s1 ^ state->s2 ^ state->s3); 59 return (state->s1 ^ state->s2 ^ state->s3);
57} 60}
58 61EXPORT_SYMBOL(prandom32);
59/*
60 * Handle minimum values for seeds
61 */
62static inline u32 __seed(u32 x, u32 m)
63{
64 return (x < m) ? x + m : x;
65}
66 62
67/** 63/**
68 * random32 - pseudo random number generator 64 * random32 - pseudo random number generator
@@ -75,7 +71,7 @@ u32 random32(void)
75{ 71{
76 unsigned long r; 72 unsigned long r;
77 struct rnd_state *state = &get_cpu_var(net_rand_state); 73 struct rnd_state *state = &get_cpu_var(net_rand_state);
78 r = __random32(state); 74 r = prandom32(state);
79 put_cpu_var(state); 75 put_cpu_var(state);
80 return r; 76 return r;
81} 77}
@@ -118,12 +114,12 @@ static int __init random32_init(void)
118 state->s3 = __seed(LCG(state->s2), 15); 114 state->s3 = __seed(LCG(state->s2), 15);
119 115
120 /* "warm it up" */ 116 /* "warm it up" */
121 __random32(state); 117 prandom32(state);
122 __random32(state); 118 prandom32(state);
123 __random32(state); 119 prandom32(state);
124 __random32(state); 120 prandom32(state);
125 __random32(state); 121 prandom32(state);
126 __random32(state); 122 prandom32(state);
127 } 123 }
128 return 0; 124 return 0;
129} 125}
@@ -131,7 +127,7 @@ core_initcall(random32_init);
131 127
132/* 128/*
133 * Generate better values after random number generator 129 * Generate better values after random number generator
134 * is fully initalized. 130 * is fully initialized.
135 */ 131 */
136static int __init random32_reseed(void) 132static int __init random32_reseed(void)
137{ 133{
@@ -147,7 +143,7 @@ static int __init random32_reseed(void)
147 state->s3 = __seed(seeds[2], 15); 143 state->s3 = __seed(seeds[2], 15);
148 144
149 /* mix it in */ 145 /* mix it in */
150 __random32(state); 146 prandom32(state);
151 } 147 }
152 return 0; 148 return 0;
153} 149}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 26187edcc7e..027a03f4c56 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -7,51 +7,61 @@
7 * parameter. Now every user can use their own standalone ratelimit_state. 7 * parameter. Now every user can use their own standalone ratelimit_state.
8 * 8 *
9 * This file is released under the GPLv2. 9 * This file is released under the GPLv2.
10 *
11 */ 10 */
12 11
13#include <linux/kernel.h> 12#include <linux/ratelimit.h>
14#include <linux/jiffies.h> 13#include <linux/jiffies.h>
15#include <linux/module.h> 14#include <linux/module.h>
16 15
17static DEFINE_SPINLOCK(ratelimit_lock);
18
19/* 16/*
20 * __ratelimit - rate limiting 17 * __ratelimit - rate limiting
21 * @rs: ratelimit_state data 18 * @rs: ratelimit_state data
19 * @func: name of calling function
20 *
21 * This enforces a rate limit: not more than @rs->burst callbacks
22 * in every @rs->interval
22 * 23 *
23 * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks 24 * RETURNS:
24 * in every @rs->ratelimit_jiffies 25 * 0 means callbacks will be suppressed.
26 * 1 means go ahead and do it.
25 */ 27 */
26int __ratelimit(struct ratelimit_state *rs) 28int ___ratelimit(struct ratelimit_state *rs, const char *func)
27{ 29{
28 unsigned long flags; 30 unsigned long flags;
31 int ret;
29 32
30 if (!rs->interval) 33 if (!rs->interval)
31 return 1; 34 return 1;
32 35
33 spin_lock_irqsave(&ratelimit_lock, flags); 36 /*
37 * If we contend on this state's lock then almost
38 * by definition we are too busy to print a message,
39 * in addition to the one that will be printed by
40 * the entity that is holding the lock already:
41 */
42 if (!spin_trylock_irqsave(&rs->lock, flags))
43 return 0;
44
34 if (!rs->begin) 45 if (!rs->begin)
35 rs->begin = jiffies; 46 rs->begin = jiffies;
36 47
37 if (time_is_before_jiffies(rs->begin + rs->interval)) { 48 if (time_is_before_jiffies(rs->begin + rs->interval)) {
38 if (rs->missed) 49 if (rs->missed)
39 printk(KERN_WARNING "%s: %d callbacks suppressed\n", 50 printk(KERN_WARNING "%s: %d callbacks suppressed\n",
40 __func__, rs->missed); 51 func, rs->missed);
41 rs->begin = 0; 52 rs->begin = 0;
42 rs->printed = 0; 53 rs->printed = 0;
43 rs->missed = 0; 54 rs->missed = 0;
44 } 55 }
45 if (rs->burst && rs->burst > rs->printed) 56 if (rs->burst && rs->burst > rs->printed) {
46 goto print; 57 rs->printed++;
47 58 ret = 1;
48 rs->missed++; 59 } else {
49 spin_unlock_irqrestore(&ratelimit_lock, flags); 60 rs->missed++;
50 return 0; 61 ret = 0;
62 }
63 spin_unlock_irqrestore(&rs->lock, flags);
51 64
52print: 65 return ret;
53 rs->printed++;
54 spin_unlock_irqrestore(&ratelimit_lock, flags);
55 return 1;
56} 66}
57EXPORT_SYMBOL(__ratelimit); 67EXPORT_SYMBOL(___ratelimit);
diff --git a/lib/rational.c b/lib/rational.c
index b3c099b5478..3ed247b8066 100644
--- a/lib/rational.c
+++ b/lib/rational.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/rational.h> 9#include <linux/rational.h>
10#include <linux/module.h>
10 11
11/* 12/*
12 * calculate best rational approximation for a given fraction 13 * calculate best rational approximation for a given fraction
diff --git a/lib/rbtree.c b/lib/rbtree.c
index e2aa3be2985..4693f79195d 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -283,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
283} 283}
284EXPORT_SYMBOL(rb_erase); 284EXPORT_SYMBOL(rb_erase);
285 285
286static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
287{
288 struct rb_node *parent;
289
290up:
291 func(node, data);
292 parent = rb_parent(node);
293 if (!parent)
294 return;
295
296 if (node == parent->rb_left && parent->rb_right)
297 func(parent->rb_right, data);
298 else if (parent->rb_left)
299 func(parent->rb_left, data);
300
301 node = parent;
302 goto up;
303}
304
305/*
306 * after inserting @node into the tree, update the tree to account for
307 * both the new entry and any damage done by rebalance
308 */
309void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
310{
311 if (node->rb_left)
312 node = node->rb_left;
313 else if (node->rb_right)
314 node = node->rb_right;
315
316 rb_augment_path(node, func, data);
317}
318
319/*
320 * before removing the node, find the deepest node on the rebalance path
321 * that will still be there after @node gets removed
322 */
323struct rb_node *rb_augment_erase_begin(struct rb_node *node)
324{
325 struct rb_node *deepest;
326
327 if (!node->rb_right && !node->rb_left)
328 deepest = rb_parent(node);
329 else if (!node->rb_right)
330 deepest = node->rb_left;
331 else if (!node->rb_left)
332 deepest = node->rb_right;
333 else {
334 deepest = rb_next(node);
335 if (deepest->rb_right)
336 deepest = deepest->rb_right;
337 else if (rb_parent(deepest) != node)
338 deepest = rb_parent(deepest);
339 }
340
341 return deepest;
342}
343
344/*
345 * after removal, update the tree to account for the removed entry
346 * and any rebalance damage.
347 */
348void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
349{
350 if (node)
351 rb_augment_path(node, func, data);
352}
353
286/* 354/*
287 * This function returns the first node (in sort order) of the tree. 355 * This function returns the first node (in sort order) of the tree.
288 */ 356 */
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 9df3ca56db1..ffc9fc7f3b0 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -17,6 +17,19 @@ struct rwsem_waiter {
17#define RWSEM_WAITING_FOR_WRITE 0x00000002 17#define RWSEM_WAITING_FOR_WRITE 0x00000002
18}; 18};
19 19
20int rwsem_is_locked(struct rw_semaphore *sem)
21{
22 int ret = 1;
23 unsigned long flags;
24
25 if (spin_trylock_irqsave(&sem->wait_lock, flags)) {
26 ret = (sem->activity != 0);
27 spin_unlock_irqrestore(&sem->wait_lock, flags);
28 }
29 return ret;
30}
31EXPORT_SYMBOL(rwsem_is_locked);
32
20/* 33/*
21 * initialise the semaphore 34 * initialise the semaphore
22 */ 35 */
@@ -34,6 +47,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
34 spin_lock_init(&sem->wait_lock); 47 spin_lock_init(&sem->wait_lock);
35 INIT_LIST_HEAD(&sem->wait_list); 48 INIT_LIST_HEAD(&sem->wait_list);
36} 49}
50EXPORT_SYMBOL(__init_rwsem);
37 51
38/* 52/*
39 * handle the lock release when processes blocked on it that can now run 53 * handle the lock release when processes blocked on it that can now run
@@ -129,13 +143,14 @@ void __sched __down_read(struct rw_semaphore *sem)
129{ 143{
130 struct rwsem_waiter waiter; 144 struct rwsem_waiter waiter;
131 struct task_struct *tsk; 145 struct task_struct *tsk;
146 unsigned long flags;
132 147
133 spin_lock_irq(&sem->wait_lock); 148 spin_lock_irqsave(&sem->wait_lock, flags);
134 149
135 if (sem->activity >= 0 && list_empty(&sem->wait_list)) { 150 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
136 /* granted */ 151 /* granted */
137 sem->activity++; 152 sem->activity++;
138 spin_unlock_irq(&sem->wait_lock); 153 spin_unlock_irqrestore(&sem->wait_lock, flags);
139 goto out; 154 goto out;
140 } 155 }
141 156
@@ -150,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem)
150 list_add_tail(&waiter.list, &sem->wait_list); 165 list_add_tail(&waiter.list, &sem->wait_list);
151 166
152 /* we don't need to touch the semaphore struct anymore */ 167 /* we don't need to touch the semaphore struct anymore */
153 spin_unlock_irq(&sem->wait_lock); 168 spin_unlock_irqrestore(&sem->wait_lock, flags);
154 169
155 /* wait to be given the lock */ 170 /* wait to be given the lock */
156 for (;;) { 171 for (;;) {
@@ -195,13 +210,14 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
195{ 210{
196 struct rwsem_waiter waiter; 211 struct rwsem_waiter waiter;
197 struct task_struct *tsk; 212 struct task_struct *tsk;
213 unsigned long flags;
198 214
199 spin_lock_irq(&sem->wait_lock); 215 spin_lock_irqsave(&sem->wait_lock, flags);
200 216
201 if (sem->activity == 0 && list_empty(&sem->wait_list)) { 217 if (sem->activity == 0 && list_empty(&sem->wait_list)) {
202 /* granted */ 218 /* granted */
203 sem->activity = -1; 219 sem->activity = -1;
204 spin_unlock_irq(&sem->wait_lock); 220 spin_unlock_irqrestore(&sem->wait_lock, flags);
205 goto out; 221 goto out;
206 } 222 }
207 223
@@ -216,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
216 list_add_tail(&waiter.list, &sem->wait_list); 232 list_add_tail(&waiter.list, &sem->wait_list);
217 233
218 /* we don't need to touch the semaphore struct anymore */ 234 /* we don't need to touch the semaphore struct anymore */
219 spin_unlock_irq(&sem->wait_lock); 235 spin_unlock_irqrestore(&sem->wait_lock, flags);
220 236
221 /* wait to be given the lock */ 237 /* wait to be given the lock */
222 for (;;) { 238 for (;;) {
@@ -305,12 +321,3 @@ void __downgrade_write(struct rw_semaphore *sem)
305 spin_unlock_irqrestore(&sem->wait_lock, flags); 321 spin_unlock_irqrestore(&sem->wait_lock, flags);
306} 322}
307 323
308EXPORT_SYMBOL(__init_rwsem);
309EXPORT_SYMBOL(__down_read);
310EXPORT_SYMBOL(__down_read_trylock);
311EXPORT_SYMBOL(__down_write_nested);
312EXPORT_SYMBOL(__down_write);
313EXPORT_SYMBOL(__down_write_trylock);
314EXPORT_SYMBOL(__up_read);
315EXPORT_SYMBOL(__up_write);
316EXPORT_SYMBOL(__downgrade_write);
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 3e3365e5665..f236d7cd5cf 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -36,45 +36,56 @@ struct rwsem_waiter {
36#define RWSEM_WAITING_FOR_WRITE 0x00000002 36#define RWSEM_WAITING_FOR_WRITE 0x00000002
37}; 37};
38 38
39/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and
40 * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
41 * since the rwsem value was observed.
42 */
43#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
44#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
45#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
46
39/* 47/*
40 * handle the lock release when processes blocked on it that can now run 48 * handle the lock release when processes blocked on it that can now run
41 * - if we come here from up_xxxx(), then: 49 * - if we come here from up_xxxx(), then:
42 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) 50 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
43 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) 51 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
44 * - there must be someone on the queue 52 * - there must be someone on the queue
45 * - the spinlock must be held by the caller 53 * - the spinlock must be held by the caller
46 * - woken process blocks are discarded from the list after having task zeroed 54 * - woken process blocks are discarded from the list after having task zeroed
47 * - writers are only woken if downgrading is false 55 * - writers are only woken if downgrading is false
48 */ 56 */
49static inline struct rw_semaphore * 57static struct rw_semaphore *
50__rwsem_do_wake(struct rw_semaphore *sem, int downgrading) 58__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
51{ 59{
52 struct rwsem_waiter *waiter; 60 struct rwsem_waiter *waiter;
53 struct task_struct *tsk; 61 struct task_struct *tsk;
54 struct list_head *next; 62 struct list_head *next;
55 signed long oldcount, woken, loop; 63 signed long oldcount, woken, loop, adjustment;
56
57 if (downgrading)
58 goto dont_wake_writers;
59
60 /* if we came through an up_xxxx() call, we only only wake someone up
61 * if we can transition the active part of the count from 0 -> 1
62 */
63 try_again:
64 oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
65 - RWSEM_ACTIVE_BIAS;
66 if (oldcount & RWSEM_ACTIVE_MASK)
67 goto undo;
68 64
69 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 65 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
70
71 /* try to grant a single write lock if there's a writer at the front
72 * of the queue - note we leave the 'active part' of the count
73 * incremented by 1 and the waiting part incremented by 0x00010000
74 */
75 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) 66 if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
76 goto readers_only; 67 goto readers_only;
77 68
69 if (wake_type == RWSEM_WAKE_READ_OWNED)
70 /* Another active reader was observed, so wakeup is not
71 * likely to succeed. Save the atomic op.
72 */
73 goto out;
74
75 /* There's a writer at the front of the queue - try to grant it the
76 * write lock. However, we only wake this writer if we can transition
77 * the active part of the count from 0 -> 1
78 */
79 adjustment = RWSEM_ACTIVE_WRITE_BIAS;
80 if (waiter->list.next == &sem->wait_list)
81 adjustment -= RWSEM_WAITING_BIAS;
82
83 try_again_write:
84 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
85 if (oldcount & RWSEM_ACTIVE_MASK)
86 /* Someone grabbed the sem already */
87 goto undo_write;
88
78 /* We must be careful not to touch 'waiter' after we set ->task = NULL. 89 /* We must be careful not to touch 'waiter' after we set ->task = NULL.
79 * It is an allocated on the waiter's stack and may become invalid at 90 * It is an allocated on the waiter's stack and may become invalid at
80 * any time after that point (due to a wakeup from another source). 91 * any time after that point (due to a wakeup from another source).
@@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
87 put_task_struct(tsk); 98 put_task_struct(tsk);
88 goto out; 99 goto out;
89 100
90 /* don't want to wake any writers */ 101 readers_only:
91 dont_wake_writers: 102 /* If we come here from up_xxxx(), another thread might have reached
92 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); 103 * rwsem_down_failed_common() before we acquired the spinlock and
93 if (waiter->flags & RWSEM_WAITING_FOR_WRITE) 104 * woken up a waiter, making it now active. We prefer to check for
105 * this first in order to not spend too much time with the spinlock
106 * held if we're not going to be able to wake up readers in the end.
107 *
108 * Note that we do not need to update the rwsem count: any writer
109 * trying to acquire rwsem will run rwsem_down_write_failed() due
110 * to the waiting threads and block trying to acquire the spinlock.
111 *
112 * We use a dummy atomic update in order to acquire the cache line
113 * exclusively since we expect to succeed and run the final rwsem
114 * count adjustment pretty soon.
115 */
116 if (wake_type == RWSEM_WAKE_ANY &&
117 rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
118 /* Someone grabbed the sem for write already */
94 goto out; 119 goto out;
95 120
96 /* grant an infinite number of read locks to the readers at the front 121 /* Grant an infinite number of read locks to the readers at the front
97 * of the queue 122 * of the queue. Note we increment the 'active part' of the count by
98 * - note we increment the 'active part' of the count by the number of 123 * the number of readers before waking any processes up.
99 * readers before waking any processes up
100 */ 124 */
101 readers_only:
102 woken = 0; 125 woken = 0;
103 do { 126 do {
104 woken++; 127 woken++;
@@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
111 134
112 } while (waiter->flags & RWSEM_WAITING_FOR_READ); 135 } while (waiter->flags & RWSEM_WAITING_FOR_READ);
113 136
114 loop = woken; 137 adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
115 woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; 138 if (waiter->flags & RWSEM_WAITING_FOR_READ)
116 if (!downgrading) 139 /* hit end of list above */
117 /* we'd already done one increment earlier */ 140 adjustment -= RWSEM_WAITING_BIAS;
118 woken -= RWSEM_ACTIVE_BIAS;
119 141
120 rwsem_atomic_add(woken, sem); 142 rwsem_atomic_add(adjustment, sem);
121 143
122 next = sem->wait_list.next; 144 next = sem->wait_list.next;
123 for (; loop > 0; loop--) { 145 for (loop = woken; loop > 0; loop--) {
124 waiter = list_entry(next, struct rwsem_waiter, list); 146 waiter = list_entry(next, struct rwsem_waiter, list);
125 next = waiter->list.next; 147 next = waiter->list.next;
126 tsk = waiter->task; 148 tsk = waiter->task;
@@ -136,11 +158,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
136 out: 158 out:
137 return sem; 159 return sem;
138 160
139 /* undo the change to count, but check for a transition 1->0 */ 161 /* undo the change to the active count, but check for a transition
140 undo: 162 * 1->0 */
141 if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) 163 undo_write:
164 if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
142 goto out; 165 goto out;
143 goto try_again; 166 goto try_again_write;
144} 167}
145 168
146/* 169/*
@@ -148,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
148 */ 171 */
149static struct rw_semaphore __sched * 172static struct rw_semaphore __sched *
150rwsem_down_failed_common(struct rw_semaphore *sem, 173rwsem_down_failed_common(struct rw_semaphore *sem,
151 struct rwsem_waiter *waiter, signed long adjustment) 174 unsigned int flags, signed long adjustment)
152{ 175{
176 struct rwsem_waiter waiter;
153 struct task_struct *tsk = current; 177 struct task_struct *tsk = current;
154 signed long count; 178 signed long count;
155 179
@@ -157,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
157 181
158 /* set up my own style of waitqueue */ 182 /* set up my own style of waitqueue */
159 spin_lock_irq(&sem->wait_lock); 183 spin_lock_irq(&sem->wait_lock);
160 waiter->task = tsk; 184 waiter.task = tsk;
185 waiter.flags = flags;
161 get_task_struct(tsk); 186 get_task_struct(tsk);
162 187
163 list_add_tail(&waiter->list, &sem->wait_list); 188 if (list_empty(&sem->wait_list))
189 adjustment += RWSEM_WAITING_BIAS;
190 list_add_tail(&waiter.list, &sem->wait_list);
164 191
165 /* we're now waiting on the lock, but no longer actively read-locking */ 192 /* we're now waiting on the lock, but no longer actively locking */
166 count = rwsem_atomic_update(adjustment, sem); 193 count = rwsem_atomic_update(adjustment, sem);
167 194
168 /* if there are no active locks, wake the front queued process(es) up */ 195 /* If there are no active locks, wake the front queued process(es) up.
169 if (!(count & RWSEM_ACTIVE_MASK)) 196 *
170 sem = __rwsem_do_wake(sem, 0); 197 * Alternatively, if we're called from a failed down_write(), there
198 * were already threads queued before us and there are no active
199 * writers, the lock must be read owned; so we try to wake any read
200 * locks that were queued ahead of us. */
201 if (count == RWSEM_WAITING_BIAS)
202 sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
203 else if (count > RWSEM_WAITING_BIAS &&
204 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
205 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
171 206
172 spin_unlock_irq(&sem->wait_lock); 207 spin_unlock_irq(&sem->wait_lock);
173 208
174 /* wait to be given the lock */ 209 /* wait to be given the lock */
175 for (;;) { 210 for (;;) {
176 if (!waiter->task) 211 if (!waiter.task)
177 break; 212 break;
178 schedule(); 213 schedule();
179 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 214 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
@@ -190,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
190asmregparm struct rw_semaphore __sched * 225asmregparm struct rw_semaphore __sched *
191rwsem_down_read_failed(struct rw_semaphore *sem) 226rwsem_down_read_failed(struct rw_semaphore *sem)
192{ 227{
193 struct rwsem_waiter waiter; 228 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
194 229 -RWSEM_ACTIVE_READ_BIAS);
195 waiter.flags = RWSEM_WAITING_FOR_READ;
196 rwsem_down_failed_common(sem, &waiter,
197 RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
198 return sem;
199} 230}
200 231
201/* 232/*
@@ -204,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
204asmregparm struct rw_semaphore __sched * 235asmregparm struct rw_semaphore __sched *
205rwsem_down_write_failed(struct rw_semaphore *sem) 236rwsem_down_write_failed(struct rw_semaphore *sem)
206{ 237{
207 struct rwsem_waiter waiter; 238 return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
208 239 -RWSEM_ACTIVE_WRITE_BIAS);
209 waiter.flags = RWSEM_WAITING_FOR_WRITE;
210 rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
211
212 return sem;
213} 240}
214 241
215/* 242/*
@@ -224,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
224 251
225 /* do nothing if list empty */ 252 /* do nothing if list empty */
226 if (!list_empty(&sem->wait_list)) 253 if (!list_empty(&sem->wait_list))
227 sem = __rwsem_do_wake(sem, 0); 254 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
228 255
229 spin_unlock_irqrestore(&sem->wait_lock, flags); 256 spin_unlock_irqrestore(&sem->wait_lock, flags);
230 257
@@ -244,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
244 271
245 /* do nothing if list empty */ 272 /* do nothing if list empty */
246 if (!list_empty(&sem->wait_list)) 273 if (!list_empty(&sem->wait_list))
247 sem = __rwsem_do_wake(sem, 1); 274 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
248 275
249 spin_unlock_irqrestore(&sem->wait_lock, flags); 276 spin_unlock_irqrestore(&sem->wait_lock, flags);
250 277
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a295e404e90..4ceb05d772a 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -7,8 +7,10 @@
7 * Version 2. See the file COPYING for more details. 7 * Version 2. See the file COPYING for more details.
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/slab.h>
10#include <linux/scatterlist.h> 11#include <linux/scatterlist.h>
11#include <linux/highmem.h> 12#include <linux/highmem.h>
13#include <linux/kmemleak.h>
12 14
13/** 15/**
14 * sg_next - return the next scatterlist entry in a list 16 * sg_next - return the next scatterlist entry in a list
@@ -114,17 +116,29 @@ EXPORT_SYMBOL(sg_init_one);
114 */ 116 */
115static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) 117static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
116{ 118{
117 if (nents == SG_MAX_SINGLE_ALLOC) 119 if (nents == SG_MAX_SINGLE_ALLOC) {
118 return (struct scatterlist *) __get_free_page(gfp_mask); 120 /*
119 else 121 * Kmemleak doesn't track page allocations as they are not
122 * commonly used (in a raw form) for kernel data structures.
123 * As we chain together a list of pages and then a normal
124 * kmalloc (tracked by kmemleak), in order to for that last
125 * allocation not to become decoupled (and thus a
126 * false-positive) we need to inform kmemleak of all the
127 * intermediate allocations.
128 */
129 void *ptr = (void *) __get_free_page(gfp_mask);
130 kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
131 return ptr;
132 } else
120 return kmalloc(nents * sizeof(struct scatterlist), gfp_mask); 133 return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
121} 134}
122 135
123static void sg_kfree(struct scatterlist *sg, unsigned int nents) 136static void sg_kfree(struct scatterlist *sg, unsigned int nents)
124{ 137{
125 if (nents == SG_MAX_SINGLE_ALLOC) 138 if (nents == SG_MAX_SINGLE_ALLOC) {
139 kmemleak_free(sg);
126 free_page((unsigned long) sg); 140 free_page((unsigned long) sg);
127 else 141 } else
128 kfree(sg); 142 kfree(sg);
129} 143}
130 144
@@ -234,8 +248,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
234 left -= sg_size; 248 left -= sg_size;
235 249
236 sg = alloc_fn(alloc_size, gfp_mask); 250 sg = alloc_fn(alloc_size, gfp_mask);
237 if (unlikely(!sg)) 251 if (unlikely(!sg)) {
238 return -ENOMEM; 252 /*
253 * Adjust entry count to reflect that the last
254 * entry of the previous table won't be used for
255 * linkage. Without this, sg_kfree() may get
256 * confused.
257 */
258 if (prv)
259 table->nents = ++table->orig_nents;
260
261 return -ENOMEM;
262 }
239 263
240 sg_init_table(sg, alloc_size); 264 sg_init_table(sg, alloc_size);
241 table->nents = table->orig_nents += sg_size; 265 table->nents = table->orig_nents += sg_size;
@@ -314,6 +338,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
314 miter->__sg = sgl; 338 miter->__sg = sgl;
315 miter->__nents = nents; 339 miter->__nents = nents;
316 miter->__offset = 0; 340 miter->__offset = 0;
341 WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
317 miter->__flags = flags; 342 miter->__flags = flags;
318} 343}
319EXPORT_SYMBOL(sg_miter_start); 344EXPORT_SYMBOL(sg_miter_start);
@@ -394,6 +419,9 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
394 if (miter->addr) { 419 if (miter->addr) {
395 miter->__offset += miter->consumed; 420 miter->__offset += miter->consumed;
396 421
422 if (miter->__flags & SG_MITER_TO_SG)
423 flush_kernel_dcache_page(miter->page);
424
397 if (miter->__flags & SG_MITER_ATOMIC) { 425 if (miter->__flags & SG_MITER_ATOMIC) {
398 WARN_ON(!irqs_disabled()); 426 WARN_ON(!irqs_disabled());
399 kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ); 427 kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
@@ -426,8 +454,14 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
426 unsigned int offset = 0; 454 unsigned int offset = 0;
427 struct sg_mapping_iter miter; 455 struct sg_mapping_iter miter;
428 unsigned long flags; 456 unsigned long flags;
457 unsigned int sg_flags = SG_MITER_ATOMIC;
458
459 if (to_buffer)
460 sg_flags |= SG_MITER_FROM_SG;
461 else
462 sg_flags |= SG_MITER_TO_SG;
429 463
430 sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC); 464 sg_miter_start(&miter, sgl, nents, sg_flags);
431 465
432 local_irq_save(flags); 466 local_irq_save(flags);
433 467
@@ -438,10 +472,8 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
438 472
439 if (to_buffer) 473 if (to_buffer)
440 memcpy(buf + offset, miter.addr, len); 474 memcpy(buf + offset, miter.addr, len);
441 else { 475 else
442 memcpy(miter.addr, buf + offset, len); 476 memcpy(miter.addr, buf + offset, len);
443 flush_kernel_dcache_page(miter.page);
444 }
445 477
446 offset += len; 478 offset += len;
447 } 479 }
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 238e72a18ce..fdc77c82f92 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -15,7 +15,7 @@ void show_mem(void)
15 unsigned long total = 0, reserved = 0, shared = 0, 15 unsigned long total = 0, reserved = 0, shared = 0,
16 nonshared = 0, highmem = 0; 16 nonshared = 0, highmem = 0;
17 17
18 printk(KERN_INFO "Mem-Info:\n"); 18 printk("Mem-Info:\n");
19 show_free_areas(); 19 show_free_areas();
20 20
21 for_each_online_pgdat(pgdat) { 21 for_each_online_pgdat(pgdat) {
@@ -49,15 +49,15 @@ void show_mem(void)
49 pgdat_resize_unlock(pgdat, &flags); 49 pgdat_resize_unlock(pgdat, &flags);
50 } 50 }
51 51
52 printk(KERN_INFO "%lu pages RAM\n", total); 52 printk("%lu pages RAM\n", total);
53#ifdef CONFIG_HIGHMEM 53#ifdef CONFIG_HIGHMEM
54 printk(KERN_INFO "%lu pages HighMem\n", highmem); 54 printk("%lu pages HighMem\n", highmem);
55#endif 55#endif
56 printk(KERN_INFO "%lu pages reserved\n", reserved); 56 printk("%lu pages reserved\n", reserved);
57 printk(KERN_INFO "%lu pages shared\n", shared); 57 printk("%lu pages shared\n", shared);
58 printk(KERN_INFO "%lu pages non-shared\n", nonshared); 58 printk("%lu pages non-shared\n", nonshared);
59#ifdef CONFIG_QUICKLIST 59#ifdef CONFIG_QUICKLIST
60 printk(KERN_INFO "%lu pages in pagetable cache\n", 60 printk("%lu pages in pagetable cache\n",
61 quicklist_total_size()); 61 quicklist_total_size());
62#endif 62#endif
63} 63}
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 9c4b0256490..4755b98b6df 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -13,8 +13,8 @@
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/module.h> 14#include <linux/module.h>
15 15
16void __spin_lock_init(spinlock_t *lock, const char *name, 16void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
17 struct lock_class_key *key) 17 struct lock_class_key *key)
18{ 18{
19#ifdef CONFIG_DEBUG_LOCK_ALLOC 19#ifdef CONFIG_DEBUG_LOCK_ALLOC
20 /* 20 /*
@@ -23,13 +23,13 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
23 debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 23 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
24 lockdep_init_map(&lock->dep_map, name, key, 0); 24 lockdep_init_map(&lock->dep_map, name, key, 0);
25#endif 25#endif
26 lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 26 lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
27 lock->magic = SPINLOCK_MAGIC; 27 lock->magic = SPINLOCK_MAGIC;
28 lock->owner = SPINLOCK_OWNER_INIT; 28 lock->owner = SPINLOCK_OWNER_INIT;
29 lock->owner_cpu = -1; 29 lock->owner_cpu = -1;
30} 30}
31 31
32EXPORT_SYMBOL(__spin_lock_init); 32EXPORT_SYMBOL(__raw_spin_lock_init);
33 33
34void __rwlock_init(rwlock_t *lock, const char *name, 34void __rwlock_init(rwlock_t *lock, const char *name,
35 struct lock_class_key *key) 35 struct lock_class_key *key)
@@ -41,7 +41,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
41 debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 41 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
42 lockdep_init_map(&lock->dep_map, name, key, 0); 42 lockdep_init_map(&lock->dep_map, name, key, 0);
43#endif 43#endif
44 lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED; 44 lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
45 lock->magic = RWLOCK_MAGIC; 45 lock->magic = RWLOCK_MAGIC;
46 lock->owner = SPINLOCK_OWNER_INIT; 46 lock->owner = SPINLOCK_OWNER_INIT;
47 lock->owner_cpu = -1; 47 lock->owner_cpu = -1;
@@ -49,7 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
49 49
50EXPORT_SYMBOL(__rwlock_init); 50EXPORT_SYMBOL(__rwlock_init);
51 51
52static void spin_bug(spinlock_t *lock, const char *msg) 52static void spin_bug(raw_spinlock_t *lock, const char *msg)
53{ 53{
54 struct task_struct *owner = NULL; 54 struct task_struct *owner = NULL;
55 55
@@ -73,7 +73,7 @@ static void spin_bug(spinlock_t *lock, const char *msg)
73#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) 73#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
74 74
75static inline void 75static inline void
76debug_spin_lock_before(spinlock_t *lock) 76debug_spin_lock_before(raw_spinlock_t *lock)
77{ 77{
78 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); 78 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
79 SPIN_BUG_ON(lock->owner == current, lock, "recursion"); 79 SPIN_BUG_ON(lock->owner == current, lock, "recursion");
@@ -81,16 +81,16 @@ debug_spin_lock_before(spinlock_t *lock)
81 lock, "cpu recursion"); 81 lock, "cpu recursion");
82} 82}
83 83
84static inline void debug_spin_lock_after(spinlock_t *lock) 84static inline void debug_spin_lock_after(raw_spinlock_t *lock)
85{ 85{
86 lock->owner_cpu = raw_smp_processor_id(); 86 lock->owner_cpu = raw_smp_processor_id();
87 lock->owner = current; 87 lock->owner = current;
88} 88}
89 89
90static inline void debug_spin_unlock(spinlock_t *lock) 90static inline void debug_spin_unlock(raw_spinlock_t *lock)
91{ 91{
92 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); 92 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
93 SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked"); 93 SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
94 SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); 94 SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
95 SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), 95 SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
96 lock, "wrong CPU"); 96 lock, "wrong CPU");
@@ -98,7 +98,7 @@ static inline void debug_spin_unlock(spinlock_t *lock)
98 lock->owner_cpu = -1; 98 lock->owner_cpu = -1;
99} 99}
100 100
101static void __spin_lock_debug(spinlock_t *lock) 101static void __spin_lock_debug(raw_spinlock_t *lock)
102{ 102{
103 u64 i; 103 u64 i;
104 u64 loops = loops_per_jiffy * HZ; 104 u64 loops = loops_per_jiffy * HZ;
@@ -106,7 +106,7 @@ static void __spin_lock_debug(spinlock_t *lock)
106 106
107 for (;;) { 107 for (;;) {
108 for (i = 0; i < loops; i++) { 108 for (i = 0; i < loops; i++) {
109 if (__raw_spin_trylock(&lock->raw_lock)) 109 if (arch_spin_trylock(&lock->raw_lock))
110 return; 110 return;
111 __delay(1); 111 __delay(1);
112 } 112 }
@@ -125,17 +125,17 @@ static void __spin_lock_debug(spinlock_t *lock)
125 } 125 }
126} 126}
127 127
128void _raw_spin_lock(spinlock_t *lock) 128void do_raw_spin_lock(raw_spinlock_t *lock)
129{ 129{
130 debug_spin_lock_before(lock); 130 debug_spin_lock_before(lock);
131 if (unlikely(!__raw_spin_trylock(&lock->raw_lock))) 131 if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
132 __spin_lock_debug(lock); 132 __spin_lock_debug(lock);
133 debug_spin_lock_after(lock); 133 debug_spin_lock_after(lock);
134} 134}
135 135
136int _raw_spin_trylock(spinlock_t *lock) 136int do_raw_spin_trylock(raw_spinlock_t *lock)
137{ 137{
138 int ret = __raw_spin_trylock(&lock->raw_lock); 138 int ret = arch_spin_trylock(&lock->raw_lock);
139 139
140 if (ret) 140 if (ret)
141 debug_spin_lock_after(lock); 141 debug_spin_lock_after(lock);
@@ -148,10 +148,10 @@ int _raw_spin_trylock(spinlock_t *lock)
148 return ret; 148 return ret;
149} 149}
150 150
151void _raw_spin_unlock(spinlock_t *lock) 151void do_raw_spin_unlock(raw_spinlock_t *lock)
152{ 152{
153 debug_spin_unlock(lock); 153 debug_spin_unlock(lock);
154 __raw_spin_unlock(&lock->raw_lock); 154 arch_spin_unlock(&lock->raw_lock);
155} 155}
156 156
157static void rwlock_bug(rwlock_t *lock, const char *msg) 157static void rwlock_bug(rwlock_t *lock, const char *msg)
@@ -176,7 +176,7 @@ static void __read_lock_debug(rwlock_t *lock)
176 176
177 for (;;) { 177 for (;;) {
178 for (i = 0; i < loops; i++) { 178 for (i = 0; i < loops; i++) {
179 if (__raw_read_trylock(&lock->raw_lock)) 179 if (arch_read_trylock(&lock->raw_lock))
180 return; 180 return;
181 __delay(1); 181 __delay(1);
182 } 182 }
@@ -193,15 +193,15 @@ static void __read_lock_debug(rwlock_t *lock)
193} 193}
194#endif 194#endif
195 195
196void _raw_read_lock(rwlock_t *lock) 196void do_raw_read_lock(rwlock_t *lock)
197{ 197{
198 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); 198 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
199 __raw_read_lock(&lock->raw_lock); 199 arch_read_lock(&lock->raw_lock);
200} 200}
201 201
202int _raw_read_trylock(rwlock_t *lock) 202int do_raw_read_trylock(rwlock_t *lock)
203{ 203{
204 int ret = __raw_read_trylock(&lock->raw_lock); 204 int ret = arch_read_trylock(&lock->raw_lock);
205 205
206#ifndef CONFIG_SMP 206#ifndef CONFIG_SMP
207 /* 207 /*
@@ -212,10 +212,10 @@ int _raw_read_trylock(rwlock_t *lock)
212 return ret; 212 return ret;
213} 213}
214 214
215void _raw_read_unlock(rwlock_t *lock) 215void do_raw_read_unlock(rwlock_t *lock)
216{ 216{
217 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); 217 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
218 __raw_read_unlock(&lock->raw_lock); 218 arch_read_unlock(&lock->raw_lock);
219} 219}
220 220
221static inline void debug_write_lock_before(rwlock_t *lock) 221static inline void debug_write_lock_before(rwlock_t *lock)
@@ -251,7 +251,7 @@ static void __write_lock_debug(rwlock_t *lock)
251 251
252 for (;;) { 252 for (;;) {
253 for (i = 0; i < loops; i++) { 253 for (i = 0; i < loops; i++) {
254 if (__raw_write_trylock(&lock->raw_lock)) 254 if (arch_write_trylock(&lock->raw_lock))
255 return; 255 return;
256 __delay(1); 256 __delay(1);
257 } 257 }
@@ -268,16 +268,16 @@ static void __write_lock_debug(rwlock_t *lock)
268} 268}
269#endif 269#endif
270 270
271void _raw_write_lock(rwlock_t *lock) 271void do_raw_write_lock(rwlock_t *lock)
272{ 272{
273 debug_write_lock_before(lock); 273 debug_write_lock_before(lock);
274 __raw_write_lock(&lock->raw_lock); 274 arch_write_lock(&lock->raw_lock);
275 debug_write_lock_after(lock); 275 debug_write_lock_after(lock);
276} 276}
277 277
278int _raw_write_trylock(rwlock_t *lock) 278int do_raw_write_trylock(rwlock_t *lock)
279{ 279{
280 int ret = __raw_write_trylock(&lock->raw_lock); 280 int ret = arch_write_trylock(&lock->raw_lock);
281 281
282 if (ret) 282 if (ret)
283 debug_write_lock_after(lock); 283 debug_write_lock_after(lock);
@@ -290,8 +290,8 @@ int _raw_write_trylock(rwlock_t *lock)
290 return ret; 290 return ret;
291} 291}
292 292
293void _raw_write_unlock(rwlock_t *lock) 293void do_raw_write_unlock(rwlock_t *lock)
294{ 294{
295 debug_write_unlock(lock); 295 debug_write_unlock(lock);
296 __raw_write_unlock(&lock->raw_lock); 296 arch_write_unlock(&lock->raw_lock);
297} 297}
diff --git a/lib/string.c b/lib/string.c
index b19b87af65a..f71bead1be3 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -36,25 +36,21 @@ int strnicmp(const char *s1, const char *s2, size_t len)
36 /* Yes, Virginia, it had better be unsigned */ 36 /* Yes, Virginia, it had better be unsigned */
37 unsigned char c1, c2; 37 unsigned char c1, c2;
38 38
39 c1 = c2 = 0; 39 if (!len)
40 if (len) { 40 return 0;
41 do { 41
42 c1 = *s1; 42 do {
43 c2 = *s2; 43 c1 = *s1++;
44 s1++; 44 c2 = *s2++;
45 s2++; 45 if (!c1 || !c2)
46 if (!c1) 46 break;
47 break; 47 if (c1 == c2)
48 if (!c2) 48 continue;
49 break; 49 c1 = tolower(c1);
50 if (c1 == c2) 50 c2 = tolower(c2);
51 continue; 51 if (c1 != c2)
52 c1 = tolower(c1); 52 break;
53 c2 = tolower(c2); 53 } while (--len);
54 if (c1 != c2)
55 break;
56 } while (--len);
57 }
58 return (int)c1 - (int)c2; 54 return (int)c1 - (int)c2;
59} 55}
60EXPORT_SYMBOL(strnicmp); 56EXPORT_SYMBOL(strnicmp);
@@ -246,13 +242,17 @@ EXPORT_SYMBOL(strlcat);
246#undef strcmp 242#undef strcmp
247int strcmp(const char *cs, const char *ct) 243int strcmp(const char *cs, const char *ct)
248{ 244{
249 signed char __res; 245 unsigned char c1, c2;
250 246
251 while (1) { 247 while (1) {
252 if ((__res = *cs - *ct++) != 0 || !*cs++) 248 c1 = *cs++;
249 c2 = *ct++;
250 if (c1 != c2)
251 return c1 < c2 ? -1 : 1;
252 if (!c1)
253 break; 253 break;
254 } 254 }
255 return __res; 255 return 0;
256} 256}
257EXPORT_SYMBOL(strcmp); 257EXPORT_SYMBOL(strcmp);
258#endif 258#endif
@@ -266,14 +266,18 @@ EXPORT_SYMBOL(strcmp);
266 */ 266 */
267int strncmp(const char *cs, const char *ct, size_t count) 267int strncmp(const char *cs, const char *ct, size_t count)
268{ 268{
269 signed char __res = 0; 269 unsigned char c1, c2;
270 270
271 while (count) { 271 while (count) {
272 if ((__res = *cs - *ct++) != 0 || !*cs++) 272 c1 = *cs++;
273 c2 = *ct++;
274 if (c1 != c2)
275 return c1 < c2 ? -1 : 1;
276 if (!c1)
273 break; 277 break;
274 count--; 278 count--;
275 } 279 }
276 return __res; 280 return 0;
277} 281}
278EXPORT_SYMBOL(strncmp); 282EXPORT_SYMBOL(strncmp);
279#endif 283#endif
@@ -330,20 +334,34 @@ EXPORT_SYMBOL(strnchr);
330#endif 334#endif
331 335
332/** 336/**
333 * strstrip - Removes leading and trailing whitespace from @s. 337 * skip_spaces - Removes leading whitespace from @str.
338 * @str: The string to be stripped.
339 *
340 * Returns a pointer to the first non-whitespace character in @str.
341 */
342char *skip_spaces(const char *str)
343{
344 while (isspace(*str))
345 ++str;
346 return (char *)str;
347}
348EXPORT_SYMBOL(skip_spaces);
349
350/**
351 * strim - Removes leading and trailing whitespace from @s.
334 * @s: The string to be stripped. 352 * @s: The string to be stripped.
335 * 353 *
336 * Note that the first trailing whitespace is replaced with a %NUL-terminator 354 * Note that the first trailing whitespace is replaced with a %NUL-terminator
337 * in the given string @s. Returns a pointer to the first non-whitespace 355 * in the given string @s. Returns a pointer to the first non-whitespace
338 * character in @s. 356 * character in @s.
339 */ 357 */
340char *strstrip(char *s) 358char *strim(char *s)
341{ 359{
342 size_t size; 360 size_t size;
343 char *end; 361 char *end;
344 362
363 s = skip_spaces(s);
345 size = strlen(s); 364 size = strlen(s);
346
347 if (!size) 365 if (!size)
348 return s; 366 return s;
349 367
@@ -352,12 +370,9 @@ char *strstrip(char *s)
352 end--; 370 end--;
353 *(end + 1) = '\0'; 371 *(end + 1) = '\0';
354 372
355 while (*s && isspace(*s))
356 s++;
357
358 return s; 373 return s;
359} 374}
360EXPORT_SYMBOL(strstrip); 375EXPORT_SYMBOL(strim);
361 376
362#ifndef __HAVE_ARCH_STRLEN 377#ifndef __HAVE_ARCH_STRLEN
363/** 378/**
@@ -648,7 +663,7 @@ EXPORT_SYMBOL(memscan);
648 */ 663 */
649char *strstr(const char *s1, const char *s2) 664char *strstr(const char *s1, const char *s2)
650{ 665{
651 int l1, l2; 666 size_t l1, l2;
652 667
653 l2 = strlen(s2); 668 l2 = strlen(s2);
654 if (!l2) 669 if (!l2)
@@ -665,6 +680,31 @@ char *strstr(const char *s1, const char *s2)
665EXPORT_SYMBOL(strstr); 680EXPORT_SYMBOL(strstr);
666#endif 681#endif
667 682
683#ifndef __HAVE_ARCH_STRNSTR
684/**
685 * strnstr - Find the first substring in a length-limited string
686 * @s1: The string to be searched
687 * @s2: The string to search for
688 * @len: the maximum number of characters to search
689 */
690char *strnstr(const char *s1, const char *s2, size_t len)
691{
692 size_t l2;
693
694 l2 = strlen(s2);
695 if (!l2)
696 return (char *)s1;
697 while (len >= l2) {
698 len--;
699 if (!memcmp(s1, s2, l2))
700 return (char *)s1;
701 s1++;
702 }
703 return NULL;
704}
705EXPORT_SYMBOL(strnstr);
706#endif
707
668#ifndef __HAVE_ARCH_MEMCHR 708#ifndef __HAVE_ARCH_MEMCHR
669/** 709/**
670 * memchr - Find a character in an area of memory. 710 * memchr - Find a character in an area of memory.
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bffe6d7ef9d..7c06ee51a29 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -28,6 +28,7 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/ctype.h> 29#include <linux/ctype.h>
30#include <linux/highmem.h> 30#include <linux/highmem.h>
31#include <linux/gfp.h>
31 32
32#include <asm/io.h> 33#include <asm/io.h>
33#include <asm/dma.h> 34#include <asm/dma.h>
@@ -49,19 +50,11 @@
49 */ 50 */
50#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) 51#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
51 52
52/*
53 * Enumeration for sync targets
54 */
55enum dma_sync_target {
56 SYNC_FOR_CPU = 0,
57 SYNC_FOR_DEVICE = 1,
58};
59
60int swiotlb_force; 53int swiotlb_force;
61 54
62/* 55/*
63 * Used to do a quick range check in unmap_single and 56 * Used to do a quick range check in swiotlb_tbl_unmap_single and
64 * sync_single_*, to see if the memory was in fact allocated by this 57 * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
65 * API. 58 * API.
66 */ 59 */
67static char *io_tlb_start, *io_tlb_end; 60static char *io_tlb_start, *io_tlb_end;
@@ -77,7 +70,7 @@ static unsigned long io_tlb_nslabs;
77 */ 70 */
78static unsigned long io_tlb_overflow = 32*1024; 71static unsigned long io_tlb_overflow = 32*1024;
79 72
80void *io_tlb_overflow_buffer; 73static void *io_tlb_overflow_buffer;
81 74
82/* 75/*
83 * This is a free list describing the number of free entries available from 76 * This is a free list describing the number of free entries available from
@@ -97,6 +90,8 @@ static phys_addr_t *io_tlb_orig_addr;
97 */ 90 */
98static DEFINE_SPINLOCK(io_tlb_lock); 91static DEFINE_SPINLOCK(io_tlb_lock);
99 92
93static int late_alloc;
94
100static int __init 95static int __init
101setup_io_tlb_npages(char *str) 96setup_io_tlb_npages(char *str)
102{ 97{
@@ -109,55 +104,22 @@ setup_io_tlb_npages(char *str)
109 ++str; 104 ++str;
110 if (!strcmp(str, "force")) 105 if (!strcmp(str, "force"))
111 swiotlb_force = 1; 106 swiotlb_force = 1;
107
112 return 1; 108 return 1;
113} 109}
114__setup("swiotlb=", setup_io_tlb_npages); 110__setup("swiotlb=", setup_io_tlb_npages);
115/* make io_tlb_overflow tunable too? */ 111/* make io_tlb_overflow tunable too? */
116 112
117void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) 113/* Note that this doesn't work with highmem page */
118{
119 return alloc_bootmem_low_pages(size);
120}
121
122void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
123{
124 return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
125}
126
127dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
128{
129 return paddr;
130}
131
132phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
133{
134 return baddr;
135}
136
137static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, 114static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
138 volatile void *address) 115 volatile void *address)
139{ 116{
140 return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); 117 return phys_to_dma(hwdev, virt_to_phys(address));
141}
142
143void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
144{
145 return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
146}
147
148int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
149 dma_addr_t addr, size_t size)
150{
151 return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
152} 118}
153 119
154int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) 120void swiotlb_print_info(void)
155{
156 return 0;
157}
158
159static void swiotlb_print_info(unsigned long bytes)
160{ 121{
122 unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
161 phys_addr_t pstart, pend; 123 phys_addr_t pstart, pend;
162 124
163 pstart = virt_to_phys(io_tlb_start); 125 pstart = virt_to_phys(io_tlb_start);
@@ -170,28 +132,14 @@ static void swiotlb_print_info(unsigned long bytes)
170 (unsigned long long)pend); 132 (unsigned long long)pend);
171} 133}
172 134
173/* 135void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
174 * Statically reserve bounce buffer space and initialize bounce buffer data
175 * structures for the software IO TLB used to implement the DMA API.
176 */
177void __init
178swiotlb_init_with_default_size(size_t default_size)
179{ 136{
180 unsigned long i, bytes; 137 unsigned long i, bytes;
181 138
182 if (!io_tlb_nslabs) { 139 bytes = nslabs << IO_TLB_SHIFT;
183 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
184 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
185 }
186
187 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
188 140
189 /* 141 io_tlb_nslabs = nslabs;
190 * Get IO TLB memory from the low pages 142 io_tlb_start = tlb;
191 */
192 io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
193 if (!io_tlb_start)
194 panic("Cannot allocate SWIOTLB buffer");
195 io_tlb_end = io_tlb_start + bytes; 143 io_tlb_end = io_tlb_start + bytes;
196 144
197 /* 145 /*
@@ -199,26 +147,52 @@ swiotlb_init_with_default_size(size_t default_size)
199 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 147 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
200 * between io_tlb_start and io_tlb_end. 148 * between io_tlb_start and io_tlb_end.
201 */ 149 */
202 io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); 150 io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
203 for (i = 0; i < io_tlb_nslabs; i++) 151 for (i = 0; i < io_tlb_nslabs; i++)
204 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); 152 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
205 io_tlb_index = 0; 153 io_tlb_index = 0;
206 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); 154 io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
207 155
208 /* 156 /*
209 * Get the overflow emergency buffer 157 * Get the overflow emergency buffer
210 */ 158 */
211 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); 159 io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
212 if (!io_tlb_overflow_buffer) 160 if (!io_tlb_overflow_buffer)
213 panic("Cannot allocate SWIOTLB overflow buffer!\n"); 161 panic("Cannot allocate SWIOTLB overflow buffer!\n");
162 if (verbose)
163 swiotlb_print_info();
164}
165
166/*
167 * Statically reserve bounce buffer space and initialize bounce buffer data
168 * structures for the software IO TLB used to implement the DMA API.
169 */
170void __init
171swiotlb_init_with_default_size(size_t default_size, int verbose)
172{
173 unsigned long bytes;
174
175 if (!io_tlb_nslabs) {
176 io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
177 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
178 }
179
180 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
181
182 /*
183 * Get IO TLB memory from the low pages
184 */
185 io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
186 if (!io_tlb_start)
187 panic("Cannot allocate SWIOTLB buffer");
214 188
215 swiotlb_print_info(bytes); 189 swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
216} 190}
217 191
218void __init 192void __init
219swiotlb_init(void) 193swiotlb_init(int verbose)
220{ 194{
221 swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ 195 swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */
222} 196}
223 197
224/* 198/*
@@ -245,7 +219,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
245 bytes = io_tlb_nslabs << IO_TLB_SHIFT; 219 bytes = io_tlb_nslabs << IO_TLB_SHIFT;
246 220
247 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { 221 while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
248 io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs); 222 io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
223 order);
249 if (io_tlb_start) 224 if (io_tlb_start)
250 break; 225 break;
251 order--; 226 order--;
@@ -294,7 +269,9 @@ swiotlb_late_init_with_default_size(size_t default_size)
294 if (!io_tlb_overflow_buffer) 269 if (!io_tlb_overflow_buffer)
295 goto cleanup4; 270 goto cleanup4;
296 271
297 swiotlb_print_info(bytes); 272 swiotlb_print_info();
273
274 late_alloc = 1;
298 275
299 return 0; 276 return 0;
300 277
@@ -315,27 +292,43 @@ cleanup1:
315 return -ENOMEM; 292 return -ENOMEM;
316} 293}
317 294
318static inline int 295void __init swiotlb_free(void)
319address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
320{ 296{
321 return swiotlb_arch_address_needs_mapping(hwdev, addr, size); 297 if (!io_tlb_overflow_buffer)
322} 298 return;
323 299
324static inline int range_needs_mapping(phys_addr_t paddr, size_t size) 300 if (late_alloc) {
325{ 301 free_pages((unsigned long)io_tlb_overflow_buffer,
326 return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size); 302 get_order(io_tlb_overflow));
303 free_pages((unsigned long)io_tlb_orig_addr,
304 get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
305 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
306 sizeof(int)));
307 free_pages((unsigned long)io_tlb_start,
308 get_order(io_tlb_nslabs << IO_TLB_SHIFT));
309 } else {
310 free_bootmem_late(__pa(io_tlb_overflow_buffer),
311 PAGE_ALIGN(io_tlb_overflow));
312 free_bootmem_late(__pa(io_tlb_orig_addr),
313 PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
314 free_bootmem_late(__pa(io_tlb_list),
315 PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
316 free_bootmem_late(__pa(io_tlb_start),
317 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
318 }
327} 319}
328 320
329static int is_swiotlb_buffer(char *addr) 321static int is_swiotlb_buffer(phys_addr_t paddr)
330{ 322{
331 return addr >= io_tlb_start && addr < io_tlb_end; 323 return paddr >= virt_to_phys(io_tlb_start) &&
324 paddr < virt_to_phys(io_tlb_end);
332} 325}
333 326
334/* 327/*
335 * Bounce: copy the swiotlb buffer back to the original dma location 328 * Bounce: copy the swiotlb buffer back to the original dma location
336 */ 329 */
337static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, 330void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
338 enum dma_data_direction dir) 331 enum dma_data_direction dir)
339{ 332{
340 unsigned long pfn = PFN_DOWN(phys); 333 unsigned long pfn = PFN_DOWN(phys);
341 334
@@ -371,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
371 memcpy(phys_to_virt(phys), dma_addr, size); 364 memcpy(phys_to_virt(phys), dma_addr, size);
372 } 365 }
373} 366}
367EXPORT_SYMBOL_GPL(swiotlb_bounce);
374 368
375/* 369void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
376 * Allocates bounce buffer and returns its kernel virtual address. 370 phys_addr_t phys, size_t size,
377 */ 371 enum dma_data_direction dir)
378static void *
379map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
380{ 372{
381 unsigned long flags; 373 unsigned long flags;
382 char *dma_addr; 374 char *dma_addr;
383 unsigned int nslots, stride, index, wrap; 375 unsigned int nslots, stride, index, wrap;
384 int i; 376 int i;
385 unsigned long start_dma_addr;
386 unsigned long mask; 377 unsigned long mask;
387 unsigned long offset_slots; 378 unsigned long offset_slots;
388 unsigned long max_slots; 379 unsigned long max_slots;
389 380
390 mask = dma_get_seg_boundary(hwdev); 381 mask = dma_get_seg_boundary(hwdev);
391 start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
392 382
393 offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 383 tbl_dma_addr &= mask;
384
385 offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
394 386
395 /* 387 /*
396 * Carefully handle integer overflow which can occur when mask == ~0UL. 388 * Carefully handle integer overflow which can occur when mask == ~0UL.
@@ -477,12 +469,27 @@ found:
477 469
478 return dma_addr; 470 return dma_addr;
479} 471}
472EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
473
474/*
475 * Allocates bounce buffer and returns its kernel virtual address.
476 */
477
478static void *
479map_single(struct device *hwdev, phys_addr_t phys, size_t size,
480 enum dma_data_direction dir)
481{
482 dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
483
484 return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
485}
480 486
481/* 487/*
482 * dma_addr is the kernel virtual address of the bounce buffer to unmap. 488 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
483 */ 489 */
484static void 490void
485do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) 491swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
492 enum dma_data_direction dir)
486{ 493{
487 unsigned long flags; 494 unsigned long flags;
488 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; 495 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -497,7 +504,7 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
497 504
498 /* 505 /*
499 * Return the buffer to the free list by setting the corresponding 506 * Return the buffer to the free list by setting the corresponding
500 * entries to indicate the number of contigous entries available. 507 * entries to indicate the number of contiguous entries available.
501 * While returning the entries to the free list, we merge the entries 508 * While returning the entries to the free list, we merge the entries
502 * with slots below and above the pool being returned. 509 * with slots below and above the pool being returned.
503 */ 510 */
@@ -520,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
520 } 527 }
521 spin_unlock_irqrestore(&io_tlb_lock, flags); 528 spin_unlock_irqrestore(&io_tlb_lock, flags);
522} 529}
530EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
523 531
524static void 532void
525sync_single(struct device *hwdev, char *dma_addr, size_t size, 533swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
526 int dir, int target) 534 enum dma_data_direction dir,
535 enum dma_sync_target target)
527{ 536{
528 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; 537 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
529 phys_addr_t phys = io_tlb_orig_addr[index]; 538 phys_addr_t phys = io_tlb_orig_addr[index];
@@ -547,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
547 BUG(); 556 BUG();
548 } 557 }
549} 558}
559EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
550 560
551void * 561void *
552swiotlb_alloc_coherent(struct device *hwdev, size_t size, 562swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -561,9 +571,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
561 dma_mask = hwdev->coherent_dma_mask; 571 dma_mask = hwdev->coherent_dma_mask;
562 572
563 ret = (void *)__get_free_pages(flags, order); 573 ret = (void *)__get_free_pages(flags, order);
564 if (ret && 574 if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
565 !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(hwdev, ret),
566 size)) {
567 /* 575 /*
568 * The allocated memory isn't reachable by the device. 576 * The allocated memory isn't reachable by the device.
569 */ 577 */
@@ -572,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
572 } 580 }
573 if (!ret) { 581 if (!ret) {
574 /* 582 /*
575 * We are either out of memory or the device can't DMA 583 * We are either out of memory or the device can't DMA to
576 * to GFP_DMA memory; fall back on map_single(), which 584 * GFP_DMA memory; fall back on map_single(), which
577 * will grab memory from the lowest available address range. 585 * will grab memory from the lowest available address range.
578 */ 586 */
579 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); 587 ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
@@ -585,13 +593,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
585 dev_addr = swiotlb_virt_to_bus(hwdev, ret); 593 dev_addr = swiotlb_virt_to_bus(hwdev, ret);
586 594
587 /* Confirm address can be DMA'd by device */ 595 /* Confirm address can be DMA'd by device */
588 if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) { 596 if (dev_addr + size - 1 > dma_mask) {
589 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", 597 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
590 (unsigned long long)dma_mask, 598 (unsigned long long)dma_mask,
591 (unsigned long long)dev_addr); 599 (unsigned long long)dev_addr);
592 600
593 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 601 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
594 do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); 602 swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
595 return NULL; 603 return NULL;
596 } 604 }
597 *dma_handle = dev_addr; 605 *dma_handle = dev_addr;
@@ -601,19 +609,22 @@ EXPORT_SYMBOL(swiotlb_alloc_coherent);
601 609
602void 610void
603swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, 611swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
604 dma_addr_t dma_handle) 612 dma_addr_t dev_addr)
605{ 613{
614 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
615
606 WARN_ON(irqs_disabled()); 616 WARN_ON(irqs_disabled());
607 if (!is_swiotlb_buffer(vaddr)) 617 if (!is_swiotlb_buffer(paddr))
608 free_pages((unsigned long) vaddr, get_order(size)); 618 free_pages((unsigned long)vaddr, get_order(size));
609 else 619 else
610 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ 620 /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
611 do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); 621 swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
612} 622}
613EXPORT_SYMBOL(swiotlb_free_coherent); 623EXPORT_SYMBOL(swiotlb_free_coherent);
614 624
615static void 625static void
616swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) 626swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
627 int do_panic)
617{ 628{
618 /* 629 /*
619 * Ran out of IOMMU space for this operation. This is very bad. 630 * Ran out of IOMMU space for this operation. This is very bad.
@@ -625,12 +636,15 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
625 printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " 636 printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at "
626 "device %s\n", size, dev ? dev_name(dev) : "?"); 637 "device %s\n", size, dev ? dev_name(dev) : "?");
627 638
628 if (size > io_tlb_overflow && do_panic) { 639 if (size <= io_tlb_overflow || !do_panic)
629 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 640 return;
630 panic("DMA: Memory would be corrupted\n"); 641
631 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) 642 if (dir == DMA_BIDIRECTIONAL)
632 panic("DMA: Random memory would be DMAed\n"); 643 panic("DMA: Random memory could be DMA accessed\n");
633 } 644 if (dir == DMA_FROM_DEVICE)
645 panic("DMA: Random memory could be DMA written\n");
646 if (dir == DMA_TO_DEVICE)
647 panic("DMA: Random memory could be DMA read\n");
634} 648}
635 649
636/* 650/*
@@ -646,7 +660,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
646 struct dma_attrs *attrs) 660 struct dma_attrs *attrs)
647{ 661{
648 phys_addr_t phys = page_to_phys(page) + offset; 662 phys_addr_t phys = page_to_phys(page) + offset;
649 dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); 663 dma_addr_t dev_addr = phys_to_dma(dev, phys);
650 void *map; 664 void *map;
651 665
652 BUG_ON(dir == DMA_NONE); 666 BUG_ON(dir == DMA_NONE);
@@ -655,8 +669,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
655 * we can safely return the device addr and not worry about bounce 669 * we can safely return the device addr and not worry about bounce
656 * buffering it. 670 * buffering it.
657 */ 671 */
658 if (!address_needs_mapping(dev, dev_addr, size) && 672 if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
659 !range_needs_mapping(phys, size))
660 return dev_addr; 673 return dev_addr;
661 674
662 /* 675 /*
@@ -673,7 +686,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
673 /* 686 /*
674 * Ensure that the address returned is DMA'ble 687 * Ensure that the address returned is DMA'ble
675 */ 688 */
676 if (address_needs_mapping(dev, dev_addr, size)) 689 if (!dma_capable(dev, dev_addr, size))
677 panic("map_single: bounce buffer is not DMA'ble"); 690 panic("map_single: bounce buffer is not DMA'ble");
678 691
679 return dev_addr; 692 return dev_addr;
@@ -689,21 +702,27 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
689 * whatever the device wrote there. 702 * whatever the device wrote there.
690 */ 703 */
691static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, 704static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
692 size_t size, int dir) 705 size_t size, enum dma_data_direction dir)
693{ 706{
694 char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); 707 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
695 708
696 BUG_ON(dir == DMA_NONE); 709 BUG_ON(dir == DMA_NONE);
697 710
698 if (is_swiotlb_buffer(dma_addr)) { 711 if (is_swiotlb_buffer(paddr)) {
699 do_unmap_single(hwdev, dma_addr, size, dir); 712 swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
700 return; 713 return;
701 } 714 }
702 715
703 if (dir != DMA_FROM_DEVICE) 716 if (dir != DMA_FROM_DEVICE)
704 return; 717 return;
705 718
706 dma_mark_clean(dma_addr, size); 719 /*
720 * phys_to_virt doesn't work with hihgmem page but we could
721 * call dma_mark_clean() with hihgmem page here. However, we
722 * are fine since dma_mark_clean() is null on POWERPC. We can
723 * make dma_mark_clean() take a physical address if necessary.
724 */
725 dma_mark_clean(phys_to_virt(paddr), size);
707} 726}
708 727
709void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, 728void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
@@ -726,21 +745,23 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
726 */ 745 */
727static void 746static void
728swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, 747swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
729 size_t size, int dir, int target) 748 size_t size, enum dma_data_direction dir,
749 enum dma_sync_target target)
730{ 750{
731 char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); 751 phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
732 752
733 BUG_ON(dir == DMA_NONE); 753 BUG_ON(dir == DMA_NONE);
734 754
735 if (is_swiotlb_buffer(dma_addr)) { 755 if (is_swiotlb_buffer(paddr)) {
736 sync_single(hwdev, dma_addr, size, dir, target); 756 swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
757 target);
737 return; 758 return;
738 } 759 }
739 760
740 if (dir != DMA_FROM_DEVICE) 761 if (dir != DMA_FROM_DEVICE)
741 return; 762 return;
742 763
743 dma_mark_clean(dma_addr, size); 764 dma_mark_clean(phys_to_virt(paddr), size);
744} 765}
745 766
746void 767void
@@ -760,37 +781,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
760EXPORT_SYMBOL(swiotlb_sync_single_for_device); 781EXPORT_SYMBOL(swiotlb_sync_single_for_device);
761 782
762/* 783/*
763 * Same as above, but for a sub-range of the mapping.
764 */
765static void
766swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
767 unsigned long offset, size_t size,
768 int dir, int target)
769{
770 swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
771}
772
773void
774swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
775 unsigned long offset, size_t size,
776 enum dma_data_direction dir)
777{
778 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
779 SYNC_FOR_CPU);
780}
781EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
782
783void
784swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
785 unsigned long offset, size_t size,
786 enum dma_data_direction dir)
787{
788 swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
789 SYNC_FOR_DEVICE);
790}
791EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
792
793/*
794 * Map a set of buffers described by scatterlist in streaming mode for DMA. 784 * Map a set of buffers described by scatterlist in streaming mode for DMA.
795 * This is the scatter-gather version of the above swiotlb_map_page 785 * This is the scatter-gather version of the above swiotlb_map_page
796 * interface. Here the scatter gather list elements are each tagged with the 786 * interface. Here the scatter gather list elements are each tagged with the
@@ -817,10 +807,10 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
817 807
818 for_each_sg(sgl, sg, nelems, i) { 808 for_each_sg(sgl, sg, nelems, i) {
819 phys_addr_t paddr = sg_phys(sg); 809 phys_addr_t paddr = sg_phys(sg);
820 dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr); 810 dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
821 811
822 if (range_needs_mapping(paddr, sg->length) || 812 if (swiotlb_force ||
823 address_needs_mapping(hwdev, dev_addr, sg->length)) { 813 !dma_capable(hwdev, dev_addr, sg->length)) {
824 void *map = map_single(hwdev, sg_phys(sg), 814 void *map = map_single(hwdev, sg_phys(sg),
825 sg->length, dir); 815 sg->length, dir);
826 if (!map) { 816 if (!map) {
@@ -843,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
843 833
844int 834int
845swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 835swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
846 int dir) 836 enum dma_data_direction dir)
847{ 837{
848 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); 838 return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
849} 839}
@@ -870,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
870 860
871void 861void
872swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, 862swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
873 int dir) 863 enum dma_data_direction dir)
874{ 864{
875 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); 865 return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
876} 866}
@@ -885,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
885 */ 875 */
886static void 876static void
887swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, 877swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
888 int nelems, int dir, int target) 878 int nelems, enum dma_data_direction dir,
879 enum dma_sync_target target)
889{ 880{
890 struct scatterlist *sg; 881 struct scatterlist *sg;
891 int i; 882 int i;
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 9fbcb44c554..d608331b3e4 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -103,6 +103,7 @@
103#include <linux/rcupdate.h> 103#include <linux/rcupdate.h>
104#include <linux/err.h> 104#include <linux/err.h>
105#include <linux/textsearch.h> 105#include <linux/textsearch.h>
106#include <linux/slab.h>
106 107
107static LIST_HEAD(ts_ops); 108static LIST_HEAD(ts_ops);
108static DEFINE_SPINLOCK(ts_mod_lock); 109static DEFINE_SPINLOCK(ts_mod_lock);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644
index 00000000000..e3a1050e682
--- /dev/null
+++ b/lib/timerqueue.c
@@ -0,0 +1,107 @@
1/*
2 * Generic Timer-queue
3 *
4 * Manages a simple queue of timers, ordered by expiration time.
5 * Uses rbtrees for quick list adds and expiration.
6 *
7 * NOTE: All of the following functions need to be serialized
8 * to avoid races. No locking is done by this libary code.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25#include <linux/timerqueue.h>
26#include <linux/rbtree.h>
27#include <linux/module.h>
28
29/**
30 * timerqueue_add - Adds timer to timerqueue.
31 *
32 * @head: head of timerqueue
33 * @node: timer node to be added
34 *
35 * Adds the timer node to the timerqueue, sorted by the
36 * node's expires value.
37 */
38void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
39{
40 struct rb_node **p = &head->head.rb_node;
41 struct rb_node *parent = NULL;
42 struct timerqueue_node *ptr;
43
44 /* Make sure we don't add nodes that are already added */
45 WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
46
47 while (*p) {
48 parent = *p;
49 ptr = rb_entry(parent, struct timerqueue_node, node);
50 if (node->expires.tv64 < ptr->expires.tv64)
51 p = &(*p)->rb_left;
52 else
53 p = &(*p)->rb_right;
54 }
55 rb_link_node(&node->node, parent, p);
56 rb_insert_color(&node->node, &head->head);
57
58 if (!head->next || node->expires.tv64 < head->next->expires.tv64)
59 head->next = node;
60}
61EXPORT_SYMBOL_GPL(timerqueue_add);
62
63/**
64 * timerqueue_del - Removes a timer from the timerqueue.
65 *
66 * @head: head of timerqueue
67 * @node: timer node to be removed
68 *
69 * Removes the timer node from the timerqueue.
70 */
71void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
72{
73 WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
74
75 /* update next pointer */
76 if (head->next == node) {
77 struct rb_node *rbn = rb_next(&node->node);
78
79 head->next = rbn ?
80 rb_entry(rbn, struct timerqueue_node, node) : NULL;
81 }
82 rb_erase(&node->node, &head->head);
83 RB_CLEAR_NODE(&node->node);
84}
85EXPORT_SYMBOL_GPL(timerqueue_del);
86
87/**
88 * timerqueue_iterate_next - Returns the timer after the provided timer
89 *
90 * @node: Pointer to a timer.
91 *
92 * Provides the timer that is after the given node. This is used, when
93 * necessary, to iterate through the list of timers in a timer list
94 * without modifying the list.
95 */
96struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
97{
98 struct rb_node *next;
99
100 if (!node)
101 return NULL;
102 next = rb_next(&node->node);
103 if (!next)
104 return NULL;
105 return container_of(next, struct timerqueue_node, node);
106}
107EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 00000000000..8fadd7cef46
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,53 @@
1/*
2 * Unified UUID/GUID definition
3 *
4 * Copyright (C) 2009, Intel Corp.
5 * Huang Ying <ying.huang@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation;
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/uuid.h>
24#include <linux/random.h>
25
26static void __uuid_gen_common(__u8 b[16])
27{
28 int i;
29 u32 r;
30
31 for (i = 0; i < 4; i++) {
32 r = random32();
33 memcpy(b + i * 4, &r, 4);
34 }
35 /* reversion 0b10 */
36 b[8] = (b[8] & 0x3F) | 0x80;
37}
38
39void uuid_le_gen(uuid_le *lu)
40{
41 __uuid_gen_common(lu->b);
42 /* version 4 : random generation */
43 lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
44}
45EXPORT_SYMBOL_GPL(uuid_le_gen);
46
47void uuid_be_gen(uuid_be *bu)
48{
49 __uuid_gen_common(bu->b);
50 /* version 4 : random generation */
51 bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
52}
53EXPORT_SYMBOL_GPL(uuid_be_gen);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 756ccafa9ce..c150d3dafff 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -9,7 +9,7 @@
9 * Wirzenius wrote this portably, Torvalds fucked it up :-) 9 * Wirzenius wrote this portably, Torvalds fucked it up :-)
10 */ 10 */
11 11
12/* 12/*
13 * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com> 13 * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
14 * - changed to provide snprintf and vsnprintf functions 14 * - changed to provide snprintf and vsnprintf functions
15 * So Feb 1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de> 15 * So Feb 1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de>
@@ -25,6 +25,7 @@
25#include <linux/kallsyms.h> 25#include <linux/kallsyms.h>
26#include <linux/uaccess.h> 26#include <linux/uaccess.h>
27#include <linux/ioport.h> 27#include <linux/ioport.h>
28#include <net/addrconf.h>
28 29
29#include <asm/page.h> /* for PAGE_SIZE */ 30#include <asm/page.h> /* for PAGE_SIZE */
30#include <asm/div64.h> 31#include <asm/div64.h>
@@ -46,14 +47,14 @@ static unsigned int simple_guess_base(const char *cp)
46} 47}
47 48
48/** 49/**
49 * simple_strtoul - convert a string to an unsigned long 50 * simple_strtoull - convert a string to an unsigned long long
50 * @cp: The start of the string 51 * @cp: The start of the string
51 * @endp: A pointer to the end of the parsed string will be placed here 52 * @endp: A pointer to the end of the parsed string will be placed here
52 * @base: The number base to use 53 * @base: The number base to use
53 */ 54 */
54unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) 55unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
55{ 56{
56 unsigned long result = 0; 57 unsigned long long result = 0;
57 58
58 if (!base) 59 if (!base)
59 base = simple_guess_base(cp); 60 base = simple_guess_base(cp);
@@ -70,58 +71,39 @@ unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
70 result = result * base + value; 71 result = result * base + value;
71 cp++; 72 cp++;
72 } 73 }
73
74 if (endp) 74 if (endp)
75 *endp = (char *)cp; 75 *endp = (char *)cp;
76
76 return result; 77 return result;
77} 78}
78EXPORT_SYMBOL(simple_strtoul); 79EXPORT_SYMBOL(simple_strtoull);
79 80
80/** 81/**
81 * simple_strtol - convert a string to a signed long 82 * simple_strtoul - convert a string to an unsigned long
82 * @cp: The start of the string 83 * @cp: The start of the string
83 * @endp: A pointer to the end of the parsed string will be placed here 84 * @endp: A pointer to the end of the parsed string will be placed here
84 * @base: The number base to use 85 * @base: The number base to use
85 */ 86 */
86long simple_strtol(const char *cp, char **endp, unsigned int base) 87unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
87{ 88{
88 if(*cp == '-') 89 return simple_strtoull(cp, endp, base);
89 return -simple_strtoul(cp + 1, endp, base);
90 return simple_strtoul(cp, endp, base);
91} 90}
92EXPORT_SYMBOL(simple_strtol); 91EXPORT_SYMBOL(simple_strtoul);
93 92
94/** 93/**
95 * simple_strtoull - convert a string to an unsigned long long 94 * simple_strtol - convert a string to a signed long
96 * @cp: The start of the string 95 * @cp: The start of the string
97 * @endp: A pointer to the end of the parsed string will be placed here 96 * @endp: A pointer to the end of the parsed string will be placed here
98 * @base: The number base to use 97 * @base: The number base to use
99 */ 98 */
100unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) 99long simple_strtol(const char *cp, char **endp, unsigned int base)
101{ 100{
102 unsigned long long result = 0; 101 if (*cp == '-')
103 102 return -simple_strtoul(cp + 1, endp, base);
104 if (!base)
105 base = simple_guess_base(cp);
106
107 if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
108 cp += 2;
109
110 while (isxdigit(*cp)) {
111 unsigned int value;
112
113 value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
114 if (value >= base)
115 break;
116 result = result * base + value;
117 cp++;
118 }
119 103
120 if (endp) 104 return simple_strtoul(cp, endp, base);
121 *endp = (char *)cp;
122 return result;
123} 105}
124EXPORT_SYMBOL(simple_strtoull); 106EXPORT_SYMBOL(simple_strtol);
125 107
126/** 108/**
127 * simple_strtoll - convert a string to a signed long long 109 * simple_strtoll - convert a string to a signed long long
@@ -131,10 +113,12 @@ EXPORT_SYMBOL(simple_strtoull);
131 */ 113 */
132long long simple_strtoll(const char *cp, char **endp, unsigned int base) 114long long simple_strtoll(const char *cp, char **endp, unsigned int base)
133{ 115{
134 if(*cp=='-') 116 if (*cp == '-')
135 return -simple_strtoull(cp + 1, endp, base); 117 return -simple_strtoull(cp + 1, endp, base);
118
136 return simple_strtoull(cp, endp, base); 119 return simple_strtoull(cp, endp, base);
137} 120}
121EXPORT_SYMBOL(simple_strtoll);
138 122
139/** 123/**
140 * strict_strtoul - convert a string to an unsigned long strictly 124 * strict_strtoul - convert a string to an unsigned long strictly
@@ -162,18 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
162{ 146{
163 char *tail; 147 char *tail;
164 unsigned long val; 148 unsigned long val;
165 size_t len;
166 149
167 *res = 0; 150 *res = 0;
168 len = strlen(cp); 151 if (!*cp)
169 if (len == 0)
170 return -EINVAL; 152 return -EINVAL;
171 153
172 val = simple_strtoul(cp, &tail, base); 154 val = simple_strtoul(cp, &tail, base);
173 if (tail == cp) 155 if (tail == cp)
174 return -EINVAL; 156 return -EINVAL;
175 if ((*tail == '\0') || 157
176 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) { 158 if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
177 *res = val; 159 *res = val;
178 return 0; 160 return 0;
179 } 161 }
@@ -235,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
235{ 217{
236 char *tail; 218 char *tail;
237 unsigned long long val; 219 unsigned long long val;
238 size_t len;
239 220
240 *res = 0; 221 *res = 0;
241 len = strlen(cp); 222 if (!*cp)
242 if (len == 0)
243 return -EINVAL; 223 return -EINVAL;
244 224
245 val = simple_strtoull(cp, &tail, base); 225 val = simple_strtoull(cp, &tail, base);
246 if (tail == cp) 226 if (tail == cp)
247 return -EINVAL; 227 return -EINVAL;
248 if ((*tail == '\0') || 228 if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
249 ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
250 *res = val; 229 *res = val;
251 return 0; 230 return 0;
252 } 231 }
@@ -282,12 +261,14 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res)
282} 261}
283EXPORT_SYMBOL(strict_strtoll); 262EXPORT_SYMBOL(strict_strtoll);
284 263
285static int skip_atoi(const char **s) 264static noinline_for_stack
265int skip_atoi(const char **s)
286{ 266{
287 int i=0; 267 int i = 0;
288 268
289 while (isdigit(**s)) 269 while (isdigit(**s))
290 i = i*10 + *((*s)++) - '0'; 270 i = i*10 + *((*s)++) - '0';
271
291 return i; 272 return i;
292} 273}
293 274
@@ -301,7 +282,8 @@ static int skip_atoi(const char **s)
301/* Formats correctly any integer in [0,99999]. 282/* Formats correctly any integer in [0,99999].
302 * Outputs from one to five digits depending on input. 283 * Outputs from one to five digits depending on input.
303 * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ 284 * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
304static char* put_dec_trunc(char *buf, unsigned q) 285static noinline_for_stack
286char *put_dec_trunc(char *buf, unsigned q)
305{ 287{
306 unsigned d3, d2, d1, d0; 288 unsigned d3, d2, d1, d0;
307 d1 = (q>>4) & 0xf; 289 d1 = (q>>4) & 0xf;
@@ -330,14 +312,16 @@ static char* put_dec_trunc(char *buf, unsigned q)
330 d3 = d3 - 10*q; 312 d3 = d3 - 10*q;
331 *buf++ = d3 + '0'; /* next digit */ 313 *buf++ = d3 + '0'; /* next digit */
332 if (q != 0) 314 if (q != 0)
333 *buf++ = q + '0'; /* most sign. digit */ 315 *buf++ = q + '0'; /* most sign. digit */
334 } 316 }
335 } 317 }
336 } 318 }
319
337 return buf; 320 return buf;
338} 321}
339/* Same with if's removed. Always emits five digits */ 322/* Same with if's removed. Always emits five digits */
340static char* put_dec_full(char *buf, unsigned q) 323static noinline_for_stack
324char *put_dec_full(char *buf, unsigned q)
341{ 325{
342 /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ 326 /* BTW, if q is in [0,9999], 8-bit ints will be enough, */
343 /* but anyway, gcc produces better code with full-sized ints */ 327 /* but anyway, gcc produces better code with full-sized ints */
@@ -346,14 +330,15 @@ static char* put_dec_full(char *buf, unsigned q)
346 d2 = (q>>8) & 0xf; 330 d2 = (q>>8) & 0xf;
347 d3 = (q>>12); 331 d3 = (q>>12);
348 332
349 /* Possible ways to approx. divide by 10 */ 333 /*
350 /* gcc -O2 replaces multiply with shifts and adds */ 334 * Possible ways to approx. divide by 10
351 // (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386) 335 * gcc -O2 replaces multiply with shifts and adds
352 // (x * 0x67) >> 10: 1100111 336 * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)
353 // (x * 0x34) >> 9: 110100 - same 337 * (x * 0x67) >> 10: 1100111
354 // (x * 0x1a) >> 8: 11010 - same 338 * (x * 0x34) >> 9: 110100 - same
355 // (x * 0x0d) >> 7: 1101 - same, shortest code (on i386) 339 * (x * 0x1a) >> 8: 11010 - same
356 340 * (x * 0x0d) >> 7: 1101 - same, shortest code (on i386)
341 */
357 d0 = 6*(d3 + d2 + d1) + (q & 0xf); 342 d0 = 6*(d3 + d2 + d1) + (q & 0xf);
358 q = (d0 * 0xcd) >> 11; 343 q = (d0 * 0xcd) >> 11;
359 d0 = d0 - 10*q; 344 d0 = d0 - 10*q;
@@ -374,10 +359,12 @@ static char* put_dec_full(char *buf, unsigned q)
374 d3 = d3 - 10*q; 359 d3 = d3 - 10*q;
375 *buf++ = d3 + '0'; 360 *buf++ = d3 + '0';
376 *buf++ = q + '0'; 361 *buf++ = q + '0';
362
377 return buf; 363 return buf;
378} 364}
379/* No inlining helps gcc to use registers better */ 365/* No inlining helps gcc to use registers better */
380static noinline char* put_dec(char *buf, unsigned long long num) 366static noinline_for_stack
367char *put_dec(char *buf, unsigned long long num)
381{ 368{
382 while (1) { 369 while (1) {
383 unsigned rem; 370 unsigned rem;
@@ -393,8 +380,8 @@ static noinline char* put_dec(char *buf, unsigned long long num)
393#define PLUS 4 /* show plus */ 380#define PLUS 4 /* show plus */
394#define SPACE 8 /* space if plus */ 381#define SPACE 8 /* space if plus */
395#define LEFT 16 /* left justified */ 382#define LEFT 16 /* left justified */
396#define SMALL 32 /* Must be 32 == 0x20 */ 383#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */
397#define SPECIAL 64 /* 0x */ 384#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */
398 385
399enum format_type { 386enum format_type {
400 FORMAT_TYPE_NONE, /* Just a string part */ 387 FORMAT_TYPE_NONE, /* Just a string part */
@@ -420,16 +407,17 @@ enum format_type {
420}; 407};
421 408
422struct printf_spec { 409struct printf_spec {
423 enum format_type type; 410 u8 type; /* format_type enum */
424 int flags; /* flags to number() */ 411 u8 flags; /* flags to number() */
425 int field_width; /* width of output field */ 412 u8 base; /* number base, 8, 10 or 16 only */
426 int base; 413 u8 qualifier; /* number qualifier, one of 'hHlLtzZ' */
427 int precision; /* # of digits/chars */ 414 s16 field_width; /* width of output field */
428 int qualifier; 415 s16 precision; /* # of digits/chars */
429}; 416};
430 417
431static char *number(char *buf, char *end, unsigned long long num, 418static noinline_for_stack
432 struct printf_spec spec) 419char *number(char *buf, char *end, unsigned long long num,
420 struct printf_spec spec)
433{ 421{
434 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ 422 /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
435 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ 423 static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -447,9 +435,9 @@ static char *number(char *buf, char *end, unsigned long long num,
447 spec.flags &= ~ZEROPAD; 435 spec.flags &= ~ZEROPAD;
448 sign = 0; 436 sign = 0;
449 if (spec.flags & SIGN) { 437 if (spec.flags & SIGN) {
450 if ((signed long long) num < 0) { 438 if ((signed long long)num < 0) {
451 sign = '-'; 439 sign = '-';
452 num = - (signed long long) num; 440 num = -(signed long long)num;
453 spec.field_width--; 441 spec.field_width--;
454 } else if (spec.flags & PLUS) { 442 } else if (spec.flags & PLUS) {
455 sign = '+'; 443 sign = '+';
@@ -477,7 +465,9 @@ static char *number(char *buf, char *end, unsigned long long num,
477 else if (spec.base != 10) { /* 8 or 16 */ 465 else if (spec.base != 10) { /* 8 or 16 */
478 int mask = spec.base - 1; 466 int mask = spec.base - 1;
479 int shift = 3; 467 int shift = 3;
480 if (spec.base == 16) shift = 4; 468
469 if (spec.base == 16)
470 shift = 4;
481 do { 471 do {
482 tmp[i++] = (digits[((unsigned char)num) & mask] | locase); 472 tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
483 num >>= shift; 473 num >>= shift;
@@ -492,7 +482,7 @@ static char *number(char *buf, char *end, unsigned long long num,
492 /* leading space padding */ 482 /* leading space padding */
493 spec.field_width -= spec.precision; 483 spec.field_width -= spec.precision;
494 if (!(spec.flags & (ZEROPAD+LEFT))) { 484 if (!(spec.flags & (ZEROPAD+LEFT))) {
495 while(--spec.field_width >= 0) { 485 while (--spec.field_width >= 0) {
496 if (buf < end) 486 if (buf < end)
497 *buf = ' '; 487 *buf = ' ';
498 ++buf; 488 ++buf;
@@ -542,15 +532,17 @@ static char *number(char *buf, char *end, unsigned long long num,
542 *buf = ' '; 532 *buf = ' ';
543 ++buf; 533 ++buf;
544 } 534 }
535
545 return buf; 536 return buf;
546} 537}
547 538
548static char *string(char *buf, char *end, char *s, struct printf_spec spec) 539static noinline_for_stack
540char *string(char *buf, char *end, const char *s, struct printf_spec spec)
549{ 541{
550 int len, i; 542 int len, i;
551 543
552 if ((unsigned long)s < PAGE_SIZE) 544 if ((unsigned long)s < PAGE_SIZE)
553 s = "<NULL>"; 545 s = "(null)";
554 546
555 len = strnlen(s, spec.precision); 547 len = strnlen(s, spec.precision);
556 548
@@ -571,123 +563,379 @@ static char *string(char *buf, char *end, char *s, struct printf_spec spec)
571 *buf = ' '; 563 *buf = ' ';
572 ++buf; 564 ++buf;
573 } 565 }
566
574 return buf; 567 return buf;
575} 568}
576 569
577static char *symbol_string(char *buf, char *end, void *ptr, 570static noinline_for_stack
578 struct printf_spec spec, char ext) 571char *symbol_string(char *buf, char *end, void *ptr,
572 struct printf_spec spec, char ext)
579{ 573{
580 unsigned long value = (unsigned long) ptr; 574 unsigned long value = (unsigned long) ptr;
581#ifdef CONFIG_KALLSYMS 575#ifdef CONFIG_KALLSYMS
582 char sym[KSYM_SYMBOL_LEN]; 576 char sym[KSYM_SYMBOL_LEN];
583 if (ext != 'f') 577 if (ext != 'f' && ext != 's')
584 sprint_symbol(sym, value); 578 sprint_symbol(sym, value);
585 else 579 else
586 kallsyms_lookup(value, NULL, NULL, NULL, sym); 580 kallsyms_lookup(value, NULL, NULL, NULL, sym);
581
587 return string(buf, end, sym, spec); 582 return string(buf, end, sym, spec);
588#else 583#else
589 spec.field_width = 2*sizeof(void *); 584 spec.field_width = 2 * sizeof(void *);
590 spec.flags |= SPECIAL | SMALL | ZEROPAD; 585 spec.flags |= SPECIAL | SMALL | ZEROPAD;
591 spec.base = 16; 586 spec.base = 16;
587
592 return number(buf, end, value, spec); 588 return number(buf, end, value, spec);
593#endif 589#endif
594} 590}
595 591
596static char *resource_string(char *buf, char *end, struct resource *res, 592static noinline_for_stack
597 struct printf_spec spec) 593char *resource_string(char *buf, char *end, struct resource *res,
594 struct printf_spec spec, const char *fmt)
598{ 595{
599#ifndef IO_RSRC_PRINTK_SIZE 596#ifndef IO_RSRC_PRINTK_SIZE
600#define IO_RSRC_PRINTK_SIZE 4 597#define IO_RSRC_PRINTK_SIZE 6
601#endif 598#endif
602 599
603#ifndef MEM_RSRC_PRINTK_SIZE 600#ifndef MEM_RSRC_PRINTK_SIZE
604#define MEM_RSRC_PRINTK_SIZE 8 601#define MEM_RSRC_PRINTK_SIZE 10
605#endif 602#endif
606 struct printf_spec num_spec = { 603 static const struct printf_spec io_spec = {
607 .base = 16, 604 .base = 16,
605 .field_width = IO_RSRC_PRINTK_SIZE,
608 .precision = -1, 606 .precision = -1,
609 .flags = SPECIAL | SMALL | ZEROPAD, 607 .flags = SPECIAL | SMALL | ZEROPAD,
610 }; 608 };
611 /* room for the actual numbers, the two "0x", -, [, ] and the final zero */ 609 static const struct printf_spec mem_spec = {
612 char sym[4*sizeof(resource_size_t) + 8]; 610 .base = 16,
613 char *p = sym, *pend = sym + sizeof(sym); 611 .field_width = MEM_RSRC_PRINTK_SIZE,
614 int size = -1; 612 .precision = -1,
613 .flags = SPECIAL | SMALL | ZEROPAD,
614 };
615 static const struct printf_spec bus_spec = {
616 .base = 16,
617 .field_width = 2,
618 .precision = -1,
619 .flags = SMALL | ZEROPAD,
620 };
621 static const struct printf_spec dec_spec = {
622 .base = 10,
623 .precision = -1,
624 .flags = 0,
625 };
626 static const struct printf_spec str_spec = {
627 .field_width = -1,
628 .precision = 10,
629 .flags = LEFT,
630 };
631 static const struct printf_spec flag_spec = {
632 .base = 16,
633 .precision = -1,
634 .flags = SPECIAL | SMALL,
635 };
615 636
616 if (res->flags & IORESOURCE_IO) 637 /* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8)
617 size = IO_RSRC_PRINTK_SIZE; 638 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
618 else if (res->flags & IORESOURCE_MEM) 639#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4)
619 size = MEM_RSRC_PRINTK_SIZE; 640#define FLAG_BUF_SIZE (2 * sizeof(res->flags))
641#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]")
642#define RAW_BUF_SIZE sizeof("[mem - flags 0x]")
643 char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
644 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
645
646 char *p = sym, *pend = sym + sizeof(sym);
647 int decode = (fmt[0] == 'R') ? 1 : 0;
648 const struct printf_spec *specp;
620 649
621 *p++ = '['; 650 *p++ = '[';
622 num_spec.field_width = size; 651 if (res->flags & IORESOURCE_IO) {
623 p = number(p, pend, res->start, num_spec); 652 p = string(p, pend, "io ", str_spec);
624 *p++ = '-'; 653 specp = &io_spec;
625 p = number(p, pend, res->end, num_spec); 654 } else if (res->flags & IORESOURCE_MEM) {
655 p = string(p, pend, "mem ", str_spec);
656 specp = &mem_spec;
657 } else if (res->flags & IORESOURCE_IRQ) {
658 p = string(p, pend, "irq ", str_spec);
659 specp = &dec_spec;
660 } else if (res->flags & IORESOURCE_DMA) {
661 p = string(p, pend, "dma ", str_spec);
662 specp = &dec_spec;
663 } else if (res->flags & IORESOURCE_BUS) {
664 p = string(p, pend, "bus ", str_spec);
665 specp = &bus_spec;
666 } else {
667 p = string(p, pend, "??? ", str_spec);
668 specp = &mem_spec;
669 decode = 0;
670 }
671 p = number(p, pend, res->start, *specp);
672 if (res->start != res->end) {
673 *p++ = '-';
674 p = number(p, pend, res->end, *specp);
675 }
676 if (decode) {
677 if (res->flags & IORESOURCE_MEM_64)
678 p = string(p, pend, " 64bit", str_spec);
679 if (res->flags & IORESOURCE_PREFETCH)
680 p = string(p, pend, " pref", str_spec);
681 if (res->flags & IORESOURCE_WINDOW)
682 p = string(p, pend, " window", str_spec);
683 if (res->flags & IORESOURCE_DISABLED)
684 p = string(p, pend, " disabled", str_spec);
685 } else {
686 p = string(p, pend, " flags ", str_spec);
687 p = number(p, pend, res->flags, flag_spec);
688 }
626 *p++ = ']'; 689 *p++ = ']';
627 *p = 0; 690 *p = '\0';
628 691
629 return string(buf, end, sym, spec); 692 return string(buf, end, sym, spec);
630} 693}
631 694
632static char *mac_address_string(char *buf, char *end, u8 *addr, 695static noinline_for_stack
633 struct printf_spec spec) 696char *mac_address_string(char *buf, char *end, u8 *addr,
697 struct printf_spec spec, const char *fmt)
634{ 698{
635 char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */ 699 char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
636 char *p = mac_addr; 700 char *p = mac_addr;
637 int i; 701 int i;
702 char separator;
703
704 if (fmt[1] == 'F') { /* FDDI canonical format */
705 separator = '-';
706 } else {
707 separator = ':';
708 }
638 709
639 for (i = 0; i < 6; i++) { 710 for (i = 0; i < 6; i++) {
640 p = pack_hex_byte(p, addr[i]); 711 p = pack_hex_byte(p, addr[i]);
641 if (!(spec.flags & SPECIAL) && i != 5) 712 if (fmt[0] == 'M' && i != 5)
642 *p++ = ':'; 713 *p++ = separator;
643 } 714 }
644 *p = '\0'; 715 *p = '\0';
645 spec.flags &= ~SPECIAL;
646 716
647 return string(buf, end, mac_addr, spec); 717 return string(buf, end, mac_addr, spec);
648} 718}
649 719
650static char *ip6_addr_string(char *buf, char *end, u8 *addr, 720static noinline_for_stack
651 struct printf_spec spec) 721char *ip4_string(char *p, const u8 *addr, const char *fmt)
652{ 722{
653 char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */
654 char *p = ip6_addr;
655 int i; 723 int i;
724 bool leading_zeros = (fmt[0] == 'i');
725 int index;
726 int step;
727
728 switch (fmt[2]) {
729 case 'h':
730#ifdef __BIG_ENDIAN
731 index = 0;
732 step = 1;
733#else
734 index = 3;
735 step = -1;
736#endif
737 break;
738 case 'l':
739 index = 3;
740 step = -1;
741 break;
742 case 'n':
743 case 'b':
744 default:
745 index = 0;
746 step = 1;
747 break;
748 }
749 for (i = 0; i < 4; i++) {
750 char temp[3]; /* hold each IP quad in reverse order */
751 int digits = put_dec_trunc(temp, addr[index]) - temp;
752 if (leading_zeros) {
753 if (digits < 3)
754 *p++ = '0';
755 if (digits < 2)
756 *p++ = '0';
757 }
758 /* reverse the digits in the quad */
759 while (digits--)
760 *p++ = temp[digits];
761 if (i < 3)
762 *p++ = '.';
763 index += step;
764 }
765 *p = '\0';
656 766
657 for (i = 0; i < 8; i++) { 767 return p;
658 p = pack_hex_byte(p, addr[2 * i]); 768}
659 p = pack_hex_byte(p, addr[2 * i + 1]); 769
660 if (!(spec.flags & SPECIAL) && i != 7) 770static noinline_for_stack
771char *ip6_compressed_string(char *p, const char *addr)
772{
773 int i, j, range;
774 unsigned char zerolength[8];
775 int longest = 1;
776 int colonpos = -1;
777 u16 word;
778 u8 hi, lo;
779 bool needcolon = false;
780 bool useIPv4;
781 struct in6_addr in6;
782
783 memcpy(&in6, addr, sizeof(struct in6_addr));
784
785 useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
786
787 memset(zerolength, 0, sizeof(zerolength));
788
789 if (useIPv4)
790 range = 6;
791 else
792 range = 8;
793
794 /* find position of longest 0 run */
795 for (i = 0; i < range; i++) {
796 for (j = i; j < range; j++) {
797 if (in6.s6_addr16[j] != 0)
798 break;
799 zerolength[i]++;
800 }
801 }
802 for (i = 0; i < range; i++) {
803 if (zerolength[i] > longest) {
804 longest = zerolength[i];
805 colonpos = i;
806 }
807 }
808
809 /* emit address */
810 for (i = 0; i < range; i++) {
811 if (i == colonpos) {
812 if (needcolon || i == 0)
813 *p++ = ':';
661 *p++ = ':'; 814 *p++ = ':';
815 needcolon = false;
816 i += longest - 1;
817 continue;
818 }
819 if (needcolon) {
820 *p++ = ':';
821 needcolon = false;
822 }
823 /* hex u16 without leading 0s */
824 word = ntohs(in6.s6_addr16[i]);
825 hi = word >> 8;
826 lo = word & 0xff;
827 if (hi) {
828 if (hi > 0x0f)
829 p = pack_hex_byte(p, hi);
830 else
831 *p++ = hex_asc_lo(hi);
832 p = pack_hex_byte(p, lo);
833 }
834 else if (lo > 0x0f)
835 p = pack_hex_byte(p, lo);
836 else
837 *p++ = hex_asc_lo(lo);
838 needcolon = true;
839 }
840
841 if (useIPv4) {
842 if (needcolon)
843 *p++ = ':';
844 p = ip4_string(p, &in6.s6_addr[12], "I4");
662 } 845 }
663 *p = '\0'; 846 *p = '\0';
664 spec.flags &= ~SPECIAL;
665 847
666 return string(buf, end, ip6_addr, spec); 848 return p;
667} 849}
668 850
669static char *ip4_addr_string(char *buf, char *end, u8 *addr, 851static noinline_for_stack
670 struct printf_spec spec) 852char *ip6_string(char *p, const char *addr, const char *fmt)
671{ 853{
672 char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */ 854 int i;
673 char temp[3]; /* hold each IP quad in reverse order */
674 char *p = ip4_addr;
675 int i, digits;
676 855
677 for (i = 0; i < 4; i++) { 856 for (i = 0; i < 8; i++) {
678 digits = put_dec_trunc(temp, addr[i]) - temp; 857 p = pack_hex_byte(p, *addr++);
679 /* reverse the digits in the quad */ 858 p = pack_hex_byte(p, *addr++);
680 while (digits--) 859 if (fmt[0] == 'I' && i != 7)
681 *p++ = temp[digits]; 860 *p++ = ':';
682 if (i != 3)
683 *p++ = '.';
684 } 861 }
685 *p = '\0'; 862 *p = '\0';
686 spec.flags &= ~SPECIAL; 863
864 return p;
865}
866
867static noinline_for_stack
868char *ip6_addr_string(char *buf, char *end, const u8 *addr,
869 struct printf_spec spec, const char *fmt)
870{
871 char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
872
873 if (fmt[0] == 'I' && fmt[2] == 'c')
874 ip6_compressed_string(ip6_addr, addr);
875 else
876 ip6_string(ip6_addr, addr, fmt);
877
878 return string(buf, end, ip6_addr, spec);
879}
880
881static noinline_for_stack
882char *ip4_addr_string(char *buf, char *end, const u8 *addr,
883 struct printf_spec spec, const char *fmt)
884{
885 char ip4_addr[sizeof("255.255.255.255")];
886
887 ip4_string(ip4_addr, addr, fmt);
687 888
688 return string(buf, end, ip4_addr, spec); 889 return string(buf, end, ip4_addr, spec);
689} 890}
690 891
892static noinline_for_stack
893char *uuid_string(char *buf, char *end, const u8 *addr,
894 struct printf_spec spec, const char *fmt)
895{
896 char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
897 char *p = uuid;
898 int i;
899 static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
900 static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
901 const u8 *index = be;
902 bool uc = false;
903
904 switch (*(++fmt)) {
905 case 'L':
906 uc = true; /* fall-through */
907 case 'l':
908 index = le;
909 break;
910 case 'B':
911 uc = true;
912 break;
913 }
914
915 for (i = 0; i < 16; i++) {
916 p = pack_hex_byte(p, addr[index[i]]);
917 switch (i) {
918 case 3:
919 case 5:
920 case 7:
921 case 9:
922 *p++ = '-';
923 break;
924 }
925 }
926
927 *p = 0;
928
929 if (uc) {
930 p = uuid;
931 do {
932 *p = toupper(*p);
933 } while (*(++p));
934 }
935
936 return string(buf, end, uuid, spec);
937}
938
691/* 939/*
692 * Show a '%p' thing. A kernel extension is that the '%p' is followed 940 * Show a '%p' thing. A kernel extension is that the '%p' is followed
693 * by an extra set of alphanumeric characters that are extended format 941 * by an extra set of alphanumeric characters that are extended format
@@ -697,25 +945,58 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr,
697 * 945 *
698 * - 'F' For symbolic function descriptor pointers with offset 946 * - 'F' For symbolic function descriptor pointers with offset
699 * - 'f' For simple symbolic function names without offset 947 * - 'f' For simple symbolic function names without offset
700 * - 'S' For symbolic direct pointers 948 * - 'S' For symbolic direct pointers with offset
701 * - 'R' For a struct resource pointer, it prints the range of 949 * - 's' For symbolic direct pointers without offset
702 * addresses (not the name nor the flags) 950 * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
951 * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
703 * - 'M' For a 6-byte MAC address, it prints the address in the 952 * - 'M' For a 6-byte MAC address, it prints the address in the
704 * usual colon-separated hex notation 953 * usual colon-separated hex notation
705 * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way (dot-separated 954 * - 'm' For a 6-byte MAC address, it prints the hex address without colons
706 * decimal for v4 and colon separated network-order 16 bit hex for v6) 955 * - 'MF' For a 6-byte MAC FDDI address, it prints the address
707 * - 'i' [46] for 'raw' IPv4/IPv6 addresses, IPv6 omits the colons, IPv4 is 956 * with a dash-separated hex notation
708 * currently the same 957 * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
958 * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
959 * IPv6 uses colon separated network-order 16 bit hex with leading 0's
960 * - 'i' [46] for 'raw' IPv4/IPv6 addresses
961 * IPv6 omits the colons (01020304...0f)
962 * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
963 * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order
964 * - 'I6c' for IPv6 addresses printed as specified by
965 * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
966 * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
967 * "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
968 * Options for %pU are:
969 * b big endian lower case hex (default)
970 * B big endian UPPER case hex
971 * l little endian lower case hex
972 * L little endian UPPER case hex
973 * big endian output byte order is:
974 * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
975 * little endian output byte order is:
976 * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
977 * - 'V' For a struct va_format which contains a format string * and va_list *,
978 * call vsnprintf(->format, *->va_list).
979 * Implements a "recursive vsnprintf".
980 * Do not use this feature without some mechanism to verify the
981 * correctness of the format string and va_list arguments.
709 * 982 *
710 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 983 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
711 * function pointers are really function descriptors, which contain a 984 * function pointers are really function descriptors, which contain a
712 * pointer to the real address. 985 * pointer to the real address.
713 */ 986 */
714static char *pointer(const char *fmt, char *buf, char *end, void *ptr, 987static noinline_for_stack
715 struct printf_spec spec) 988char *pointer(const char *fmt, char *buf, char *end, void *ptr,
989 struct printf_spec spec)
716{ 990{
717 if (!ptr) 991 if (!ptr) {
992 /*
993 * Print (null) with the same width as a pointer so it makes
994 * tabular output look nice.
995 */
996 if (spec.field_width == -1)
997 spec.field_width = 2 * sizeof(void *);
718 return string(buf, end, "(null)", spec); 998 return string(buf, end, "(null)", spec);
999 }
719 1000
720 switch (*fmt) { 1001 switch (*fmt) {
721 case 'F': 1002 case 'F':
@@ -723,28 +1004,41 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
723 ptr = dereference_function_descriptor(ptr); 1004 ptr = dereference_function_descriptor(ptr);
724 /* Fallthrough */ 1005 /* Fallthrough */
725 case 'S': 1006 case 'S':
1007 case 's':
726 return symbol_string(buf, end, ptr, spec, *fmt); 1008 return symbol_string(buf, end, ptr, spec, *fmt);
727 case 'R': 1009 case 'R':
728 return resource_string(buf, end, ptr, spec); 1010 case 'r':
729 case 'm': 1011 return resource_string(buf, end, ptr, spec, fmt);
730 spec.flags |= SPECIAL; 1012 case 'M': /* Colon separated: 00:01:02:03:04:05 */
731 /* Fallthrough */ 1013 case 'm': /* Contiguous: 000102030405 */
732 case 'M': 1014 /* [mM]F (FDDI, bit reversed) */
733 return mac_address_string(buf, end, ptr, spec); 1015 return mac_address_string(buf, end, ptr, spec, fmt);
734 case 'i': 1016 case 'I': /* Formatted IP supported
735 spec.flags |= SPECIAL; 1017 * 4: 1.2.3.4
736 /* Fallthrough */ 1018 * 6: 0001:0203:...:0708
737 case 'I': 1019 * 6c: 1::708 or 1::1.2.3.4
738 if (fmt[1] == '6') 1020 */
739 return ip6_addr_string(buf, end, ptr, spec); 1021 case 'i': /* Contiguous:
740 if (fmt[1] == '4') 1022 * 4: 001.002.003.004
741 return ip4_addr_string(buf, end, ptr, spec); 1023 * 6: 000102...0f
742 spec.flags &= ~SPECIAL; 1024 */
1025 switch (fmt[1]) {
1026 case '6':
1027 return ip6_addr_string(buf, end, ptr, spec, fmt);
1028 case '4':
1029 return ip4_addr_string(buf, end, ptr, spec, fmt);
1030 }
743 break; 1031 break;
1032 case 'U':
1033 return uuid_string(buf, end, ptr, spec, fmt);
1034 case 'V':
1035 return buf + vsnprintf(buf, end - buf,
1036 ((struct va_format *)ptr)->fmt,
1037 *(((struct va_format *)ptr)->va));
744 } 1038 }
745 spec.flags |= SMALL; 1039 spec.flags |= SMALL;
746 if (spec.field_width == -1) { 1040 if (spec.field_width == -1) {
747 spec.field_width = 2*sizeof(void *); 1041 spec.field_width = 2 * sizeof(void *);
748 spec.flags |= ZEROPAD; 1042 spec.flags |= ZEROPAD;
749 } 1043 }
750 spec.base = 16; 1044 spec.base = 16;
@@ -772,7 +1066,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
772 * @precision: precision of a number 1066 * @precision: precision of a number
773 * @qualifier: qualifier of a number (long, size_t, ...) 1067 * @qualifier: qualifier of a number (long, size_t, ...)
774 */ 1068 */
775static int format_decode(const char *fmt, struct printf_spec *spec) 1069static noinline_for_stack
1070int format_decode(const char *fmt, struct printf_spec *spec)
776{ 1071{
777 const char *start = fmt; 1072 const char *start = fmt;
778 1073
@@ -858,8 +1153,8 @@ precision:
858qualifier: 1153qualifier:
859 /* get the conversion qualifier */ 1154 /* get the conversion qualifier */
860 spec->qualifier = -1; 1155 spec->qualifier = -1;
861 if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || 1156 if (*fmt == 'h' || TOLOWER(*fmt) == 'l' ||
862 *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { 1157 TOLOWER(*fmt) == 'z' || *fmt == 't') {
863 spec->qualifier = *fmt++; 1158 spec->qualifier = *fmt++;
864 if (unlikely(spec->qualifier == *fmt)) { 1159 if (unlikely(spec->qualifier == *fmt)) {
865 if (spec->qualifier == 'l') { 1160 if (spec->qualifier == 'l') {
@@ -926,7 +1221,7 @@ qualifier:
926 spec->type = FORMAT_TYPE_LONG; 1221 spec->type = FORMAT_TYPE_LONG;
927 else 1222 else
928 spec->type = FORMAT_TYPE_ULONG; 1223 spec->type = FORMAT_TYPE_ULONG;
929 } else if (spec->qualifier == 'Z' || spec->qualifier == 'z') { 1224 } else if (TOLOWER(spec->qualifier) == 'z') {
930 spec->type = FORMAT_TYPE_SIZE_T; 1225 spec->type = FORMAT_TYPE_SIZE_T;
931 } else if (spec->qualifier == 't') { 1226 } else if (spec->qualifier == 't') {
932 spec->type = FORMAT_TYPE_PTRDIFF; 1227 spec->type = FORMAT_TYPE_PTRDIFF;
@@ -958,10 +1253,23 @@ qualifier:
958 * @args: Arguments for the format string 1253 * @args: Arguments for the format string
959 * 1254 *
960 * This function follows C99 vsnprintf, but has some extensions: 1255 * This function follows C99 vsnprintf, but has some extensions:
961 * %pS output the name of a text symbol 1256 * %pS output the name of a text symbol with offset
1257 * %ps output the name of a text symbol without offset
962 * %pF output the name of a function pointer with its offset 1258 * %pF output the name of a function pointer with its offset
963 * %pf output the name of a function pointer without its offset 1259 * %pf output the name of a function pointer without its offset
964 * %pR output the address range in a struct resource 1260 * %pR output the address range in a struct resource with decoded flags
1261 * %pr output the address range in a struct resource with raw flags
1262 * %pM output a 6-byte MAC address with colons
1263 * %pm output a 6-byte MAC address without colons
1264 * %pI4 print an IPv4 address without leading zeros
1265 * %pi4 print an IPv4 address with leading zeros
1266 * %pI6 print an IPv6 address with colons
1267 * %pi6 print an IPv6 address without colons
1268 * %pI6c print an IPv6 address as specified by
1269 * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
1270 * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper
1271 * case.
1272 * %n is ignored
965 * 1273 *
966 * The return value is the number of characters which would 1274 * The return value is the number of characters which would
967 * be generated for the given input, excluding the trailing 1275 * be generated for the given input, excluding the trailing
@@ -977,19 +1285,13 @@ qualifier:
977int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) 1285int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
978{ 1286{
979 unsigned long long num; 1287 unsigned long long num;
980 char *str, *end, c; 1288 char *str, *end;
981 int read;
982 struct printf_spec spec = {0}; 1289 struct printf_spec spec = {0};
983 1290
984 /* Reject out-of-range values early. Large positive sizes are 1291 /* Reject out-of-range values early. Large positive sizes are
985 used for unknown buffer sizes. */ 1292 used for unknown buffer sizes. */
986 if (unlikely((int) size < 0)) { 1293 if (WARN_ON_ONCE((int) size < 0))
987 /* There can be only one.. */
988 static char warn = 1;
989 WARN_ON(warn);
990 warn = 0;
991 return 0; 1294 return 0;
992 }
993 1295
994 str = buf; 1296 str = buf;
995 end = buf + size; 1297 end = buf + size;
@@ -1002,8 +1304,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1002 1304
1003 while (*fmt) { 1305 while (*fmt) {
1004 const char *old_fmt = fmt; 1306 const char *old_fmt = fmt;
1005 1307 int read = format_decode(fmt, &spec);
1006 read = format_decode(fmt, &spec);
1007 1308
1008 fmt += read; 1309 fmt += read;
1009 1310
@@ -1027,7 +1328,9 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1027 spec.precision = va_arg(args, int); 1328 spec.precision = va_arg(args, int);
1028 break; 1329 break;
1029 1330
1030 case FORMAT_TYPE_CHAR: 1331 case FORMAT_TYPE_CHAR: {
1332 char c;
1333
1031 if (!(spec.flags & LEFT)) { 1334 if (!(spec.flags & LEFT)) {
1032 while (--spec.field_width > 0) { 1335 while (--spec.field_width > 0) {
1033 if (str < end) 1336 if (str < end)
@@ -1046,6 +1349,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1046 ++str; 1349 ++str;
1047 } 1350 }
1048 break; 1351 break;
1352 }
1049 1353
1050 case FORMAT_TYPE_STR: 1354 case FORMAT_TYPE_STR:
1051 str = string(str, end, va_arg(args, char *), spec); 1355 str = string(str, end, va_arg(args, char *), spec);
@@ -1071,13 +1375,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1071 break; 1375 break;
1072 1376
1073 case FORMAT_TYPE_NRCHARS: { 1377 case FORMAT_TYPE_NRCHARS: {
1074 int qualifier = spec.qualifier; 1378 u8 qualifier = spec.qualifier;
1075 1379
1076 if (qualifier == 'l') { 1380 if (qualifier == 'l') {
1077 long *ip = va_arg(args, long *); 1381 long *ip = va_arg(args, long *);
1078 *ip = (str - buf); 1382 *ip = (str - buf);
1079 } else if (qualifier == 'Z' || 1383 } else if (TOLOWER(qualifier) == 'z') {
1080 qualifier == 'z') {
1081 size_t *ip = va_arg(args, size_t *); 1384 size_t *ip = va_arg(args, size_t *);
1082 *ip = (str - buf); 1385 *ip = (str - buf);
1083 } else { 1386 } else {
@@ -1160,7 +1463,8 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
1160{ 1463{
1161 int i; 1464 int i;
1162 1465
1163 i=vsnprintf(buf,size,fmt,args); 1466 i = vsnprintf(buf, size, fmt, args);
1467
1164 return (i >= size) ? (size - 1) : i; 1468 return (i >= size) ? (size - 1) : i;
1165} 1469}
1166EXPORT_SYMBOL(vscnprintf); 1470EXPORT_SYMBOL(vscnprintf);
@@ -1179,14 +1483,15 @@ EXPORT_SYMBOL(vscnprintf);
1179 * 1483 *
1180 * See the vsnprintf() documentation for format string extensions over C99. 1484 * See the vsnprintf() documentation for format string extensions over C99.
1181 */ 1485 */
1182int snprintf(char * buf, size_t size, const char *fmt, ...) 1486int snprintf(char *buf, size_t size, const char *fmt, ...)
1183{ 1487{
1184 va_list args; 1488 va_list args;
1185 int i; 1489 int i;
1186 1490
1187 va_start(args, fmt); 1491 va_start(args, fmt);
1188 i=vsnprintf(buf,size,fmt,args); 1492 i = vsnprintf(buf, size, fmt, args);
1189 va_end(args); 1493 va_end(args);
1494
1190 return i; 1495 return i;
1191} 1496}
1192EXPORT_SYMBOL(snprintf); 1497EXPORT_SYMBOL(snprintf);
@@ -1199,10 +1504,10 @@ EXPORT_SYMBOL(snprintf);
1199 * @...: Arguments for the format string 1504 * @...: Arguments for the format string
1200 * 1505 *
1201 * The return value is the number of characters written into @buf not including 1506 * The return value is the number of characters written into @buf not including
1202 * the trailing '\0'. If @size is <= 0 the function returns 0. 1507 * the trailing '\0'. If @size is == 0 the function returns 0.
1203 */ 1508 */
1204 1509
1205int scnprintf(char * buf, size_t size, const char *fmt, ...) 1510int scnprintf(char *buf, size_t size, const char *fmt, ...)
1206{ 1511{
1207 va_list args; 1512 va_list args;
1208 int i; 1513 int i;
@@ -1210,7 +1515,12 @@ int scnprintf(char * buf, size_t size, const char *fmt, ...)
1210 va_start(args, fmt); 1515 va_start(args, fmt);
1211 i = vsnprintf(buf, size, fmt, args); 1516 i = vsnprintf(buf, size, fmt, args);
1212 va_end(args); 1517 va_end(args);
1213 return (i >= size) ? (size - 1) : i; 1518
1519 if (likely(i < size))
1520 return i;
1521 if (size != 0)
1522 return size - 1;
1523 return 0;
1214} 1524}
1215EXPORT_SYMBOL(scnprintf); 1525EXPORT_SYMBOL(scnprintf);
1216 1526
@@ -1247,14 +1557,15 @@ EXPORT_SYMBOL(vsprintf);
1247 * 1557 *
1248 * See the vsnprintf() documentation for format string extensions over C99. 1558 * See the vsnprintf() documentation for format string extensions over C99.
1249 */ 1559 */
1250int sprintf(char * buf, const char *fmt, ...) 1560int sprintf(char *buf, const char *fmt, ...)
1251{ 1561{
1252 va_list args; 1562 va_list args;
1253 int i; 1563 int i;
1254 1564
1255 va_start(args, fmt); 1565 va_start(args, fmt);
1256 i=vsnprintf(buf, INT_MAX, fmt, args); 1566 i = vsnprintf(buf, INT_MAX, fmt, args);
1257 va_end(args); 1567 va_end(args);
1568
1258 return i; 1569 return i;
1259} 1570}
1260EXPORT_SYMBOL(sprintf); 1571EXPORT_SYMBOL(sprintf);
@@ -1287,7 +1598,6 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
1287{ 1598{
1288 struct printf_spec spec = {0}; 1599 struct printf_spec spec = {0};
1289 char *str, *end; 1600 char *str, *end;
1290 int read;
1291 1601
1292 str = (char *)bin_buf; 1602 str = (char *)bin_buf;
1293 end = (char *)(bin_buf + size); 1603 end = (char *)(bin_buf + size);
@@ -1312,14 +1622,15 @@ do { \
1312 str += sizeof(type); \ 1622 str += sizeof(type); \
1313} while (0) 1623} while (0)
1314 1624
1315
1316 while (*fmt) { 1625 while (*fmt) {
1317 read = format_decode(fmt, &spec); 1626 int read = format_decode(fmt, &spec);
1318 1627
1319 fmt += read; 1628 fmt += read;
1320 1629
1321 switch (spec.type) { 1630 switch (spec.type) {
1322 case FORMAT_TYPE_NONE: 1631 case FORMAT_TYPE_NONE:
1632 case FORMAT_TYPE_INVALID:
1633 case FORMAT_TYPE_PERCENT_CHAR:
1323 break; 1634 break;
1324 1635
1325 case FORMAT_TYPE_WIDTH: 1636 case FORMAT_TYPE_WIDTH:
@@ -1334,13 +1645,14 @@ do { \
1334 case FORMAT_TYPE_STR: { 1645 case FORMAT_TYPE_STR: {
1335 const char *save_str = va_arg(args, char *); 1646 const char *save_str = va_arg(args, char *);
1336 size_t len; 1647 size_t len;
1648
1337 if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE 1649 if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
1338 || (unsigned long)save_str < PAGE_SIZE) 1650 || (unsigned long)save_str < PAGE_SIZE)
1339 save_str = "<NULL>"; 1651 save_str = "(null)";
1340 len = strlen(save_str); 1652 len = strlen(save_str) + 1;
1341 if (str + len + 1 < end) 1653 if (str + len < end)
1342 memcpy(str, save_str, len + 1); 1654 memcpy(str, save_str, len);
1343 str += len + 1; 1655 str += len;
1344 break; 1656 break;
1345 } 1657 }
1346 1658
@@ -1351,19 +1663,13 @@ do { \
1351 fmt++; 1663 fmt++;
1352 break; 1664 break;
1353 1665
1354 case FORMAT_TYPE_PERCENT_CHAR:
1355 break;
1356
1357 case FORMAT_TYPE_INVALID:
1358 break;
1359
1360 case FORMAT_TYPE_NRCHARS: { 1666 case FORMAT_TYPE_NRCHARS: {
1361 /* skip %n 's argument */ 1667 /* skip %n 's argument */
1362 int qualifier = spec.qualifier; 1668 u8 qualifier = spec.qualifier;
1363 void *skip_arg; 1669 void *skip_arg;
1364 if (qualifier == 'l') 1670 if (qualifier == 'l')
1365 skip_arg = va_arg(args, long *); 1671 skip_arg = va_arg(args, long *);
1366 else if (qualifier == 'Z' || qualifier == 'z') 1672 else if (TOLOWER(qualifier) == 'z')
1367 skip_arg = va_arg(args, size_t *); 1673 skip_arg = va_arg(args, size_t *);
1368 else 1674 else
1369 skip_arg = va_arg(args, int *); 1675 skip_arg = va_arg(args, int *);
@@ -1399,8 +1705,8 @@ do { \
1399 } 1705 }
1400 } 1706 }
1401 } 1707 }
1402 return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
1403 1708
1709 return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
1404#undef save_arg 1710#undef save_arg
1405} 1711}
1406EXPORT_SYMBOL_GPL(vbin_printf); 1712EXPORT_SYMBOL_GPL(vbin_printf);
@@ -1417,11 +1723,7 @@ EXPORT_SYMBOL_GPL(vbin_printf);
1417 * a binary buffer that generated by vbin_printf. 1723 * a binary buffer that generated by vbin_printf.
1418 * 1724 *
1419 * The format follows C99 vsnprintf, but has some extensions: 1725 * The format follows C99 vsnprintf, but has some extensions:
1420 * %pS output the name of a text symbol 1726 * see vsnprintf comment for details.
1421 * %pF output the name of a function pointer with its offset
1422 * %pf output the name of a function pointer without its offset
1423 * %pR output the address range in a struct resource
1424 * %n is ignored
1425 * 1727 *
1426 * The return value is the number of characters which would 1728 * The return value is the number of characters which would
1427 * be generated for the given input, excluding the trailing 1729 * be generated for the given input, excluding the trailing
@@ -1433,19 +1735,12 @@ EXPORT_SYMBOL_GPL(vbin_printf);
1433 */ 1735 */
1434int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) 1736int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1435{ 1737{
1436 unsigned long long num;
1437 char *str, *end, c;
1438 const char *args = (const char *)bin_buf;
1439
1440 struct printf_spec spec = {0}; 1738 struct printf_spec spec = {0};
1739 char *str, *end;
1740 const char *args = (const char *)bin_buf;
1441 1741
1442 if (unlikely((int) size < 0)) { 1742 if (WARN_ON_ONCE((int) size < 0))
1443 /* There can be only one.. */
1444 static char warn = 1;
1445 WARN_ON(warn);
1446 warn = 0;
1447 return 0; 1743 return 0;
1448 }
1449 1744
1450 str = buf; 1745 str = buf;
1451 end = buf + size; 1746 end = buf + size;
@@ -1472,10 +1767,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1472 } 1767 }
1473 1768
1474 while (*fmt) { 1769 while (*fmt) {
1475 int read;
1476 const char *old_fmt = fmt; 1770 const char *old_fmt = fmt;
1477 1771 int read = format_decode(fmt, &spec);
1478 read = format_decode(fmt, &spec);
1479 1772
1480 fmt += read; 1773 fmt += read;
1481 1774
@@ -1499,7 +1792,9 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1499 spec.precision = get_arg(int); 1792 spec.precision = get_arg(int);
1500 break; 1793 break;
1501 1794
1502 case FORMAT_TYPE_CHAR: 1795 case FORMAT_TYPE_CHAR: {
1796 char c;
1797
1503 if (!(spec.flags & LEFT)) { 1798 if (!(spec.flags & LEFT)) {
1504 while (--spec.field_width > 0) { 1799 while (--spec.field_width > 0) {
1505 if (str < end) 1800 if (str < end)
@@ -1517,11 +1812,11 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1517 ++str; 1812 ++str;
1518 } 1813 }
1519 break; 1814 break;
1815 }
1520 1816
1521 case FORMAT_TYPE_STR: { 1817 case FORMAT_TYPE_STR: {
1522 const char *str_arg = args; 1818 const char *str_arg = args;
1523 size_t len = strlen(str_arg); 1819 args += strlen(str_arg) + 1;
1524 args += len + 1;
1525 str = string(str, end, (char *)str_arg, spec); 1820 str = string(str, end, (char *)str_arg, spec);
1526 break; 1821 break;
1527 } 1822 }
@@ -1533,11 +1828,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1533 break; 1828 break;
1534 1829
1535 case FORMAT_TYPE_PERCENT_CHAR: 1830 case FORMAT_TYPE_PERCENT_CHAR:
1536 if (str < end)
1537 *str = '%';
1538 ++str;
1539 break;
1540
1541 case FORMAT_TYPE_INVALID: 1831 case FORMAT_TYPE_INVALID:
1542 if (str < end) 1832 if (str < end)
1543 *str = '%'; 1833 *str = '%';
@@ -1548,15 +1838,15 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1548 /* skip */ 1838 /* skip */
1549 break; 1839 break;
1550 1840
1551 default: 1841 default: {
1842 unsigned long long num;
1843
1552 switch (spec.type) { 1844 switch (spec.type) {
1553 1845
1554 case FORMAT_TYPE_LONG_LONG: 1846 case FORMAT_TYPE_LONG_LONG:
1555 num = get_arg(long long); 1847 num = get_arg(long long);
1556 break; 1848 break;
1557 case FORMAT_TYPE_ULONG: 1849 case FORMAT_TYPE_ULONG:
1558 num = get_arg(unsigned long);
1559 break;
1560 case FORMAT_TYPE_LONG: 1850 case FORMAT_TYPE_LONG:
1561 num = get_arg(unsigned long); 1851 num = get_arg(unsigned long);
1562 break; 1852 break;
@@ -1586,8 +1876,9 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
1586 } 1876 }
1587 1877
1588 str = number(str, end, num, spec); 1878 str = number(str, end, num, spec);
1589 } 1879 } /* default: */
1590 } 1880 } /* switch(spec.type) */
1881 } /* while(*fmt) */
1591 1882
1592 if (size > 0) { 1883 if (size > 0) {
1593 if (str < end) 1884 if (str < end)
@@ -1621,6 +1912,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
1621 va_start(args, fmt); 1912 va_start(args, fmt);
1622 ret = vbin_printf(bin_buf, size, fmt, args); 1913 ret = vbin_printf(bin_buf, size, fmt, args);
1623 va_end(args); 1914 va_end(args);
1915
1624 return ret; 1916 return ret;
1625} 1917}
1626EXPORT_SYMBOL_GPL(bprintf); 1918EXPORT_SYMBOL_GPL(bprintf);
@@ -1633,27 +1925,25 @@ EXPORT_SYMBOL_GPL(bprintf);
1633 * @fmt: format of buffer 1925 * @fmt: format of buffer
1634 * @args: arguments 1926 * @args: arguments
1635 */ 1927 */
1636int vsscanf(const char * buf, const char * fmt, va_list args) 1928int vsscanf(const char *buf, const char *fmt, va_list args)
1637{ 1929{
1638 const char *str = buf; 1930 const char *str = buf;
1639 char *next; 1931 char *next;
1640 char digit; 1932 char digit;
1641 int num = 0; 1933 int num = 0;
1642 int qualifier; 1934 u8 qualifier;
1643 int base; 1935 u8 base;
1644 int field_width; 1936 s16 field_width;
1645 int is_sign = 0; 1937 bool is_sign;
1646 1938
1647 while(*fmt && *str) { 1939 while (*fmt && *str) {
1648 /* skip any white space in format */ 1940 /* skip any white space in format */
1649 /* white space in format matchs any amount of 1941 /* white space in format matchs any amount of
1650 * white space, including none, in the input. 1942 * white space, including none, in the input.
1651 */ 1943 */
1652 if (isspace(*fmt)) { 1944 if (isspace(*fmt)) {
1653 while (isspace(*fmt)) 1945 fmt = skip_spaces(++fmt);
1654 ++fmt; 1946 str = skip_spaces(str);
1655 while (isspace(*str))
1656 ++str;
1657 } 1947 }
1658 1948
1659 /* anything that is not a conversion must match exactly */ 1949 /* anything that is not a conversion must match exactly */
@@ -1666,12 +1956,12 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
1666 if (!*fmt) 1956 if (!*fmt)
1667 break; 1957 break;
1668 ++fmt; 1958 ++fmt;
1669 1959
1670 /* skip this conversion. 1960 /* skip this conversion.
1671 * advance both strings to next white space 1961 * advance both strings to next white space
1672 */ 1962 */
1673 if (*fmt == '*') { 1963 if (*fmt == '*') {
1674 while (!isspace(*fmt) && *fmt) 1964 while (!isspace(*fmt) && *fmt != '%' && *fmt)
1675 fmt++; 1965 fmt++;
1676 while (!isspace(*str) && *str) 1966 while (!isspace(*str) && *str)
1677 str++; 1967 str++;
@@ -1685,8 +1975,8 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
1685 1975
1686 /* get conversion qualifier */ 1976 /* get conversion qualifier */
1687 qualifier = -1; 1977 qualifier = -1;
1688 if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || 1978 if (*fmt == 'h' || TOLOWER(*fmt) == 'l' ||
1689 *fmt == 'Z' || *fmt == 'z') { 1979 TOLOWER(*fmt) == 'z') {
1690 qualifier = *fmt++; 1980 qualifier = *fmt++;
1691 if (unlikely(qualifier == *fmt)) { 1981 if (unlikely(qualifier == *fmt)) {
1692 if (qualifier == 'h') { 1982 if (qualifier == 'h') {
@@ -1698,16 +1988,17 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
1698 } 1988 }
1699 } 1989 }
1700 } 1990 }
1701 base = 10;
1702 is_sign = 0;
1703 1991
1704 if (!*fmt || !*str) 1992 if (!*fmt || !*str)
1705 break; 1993 break;
1706 1994
1707 switch(*fmt++) { 1995 base = 10;
1996 is_sign = 0;
1997
1998 switch (*fmt++) {
1708 case 'c': 1999 case 'c':
1709 { 2000 {
1710 char *s = (char *) va_arg(args,char*); 2001 char *s = (char *)va_arg(args, char*);
1711 if (field_width == -1) 2002 if (field_width == -1)
1712 field_width = 1; 2003 field_width = 1;
1713 do { 2004 do {
@@ -1718,17 +2009,15 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
1718 continue; 2009 continue;
1719 case 's': 2010 case 's':
1720 { 2011 {
1721 char *s = (char *) va_arg(args, char *); 2012 char *s = (char *)va_arg(args, char *);
1722 if(field_width == -1) 2013 if (field_width == -1)
1723 field_width = INT_MAX; 2014 field_width = SHRT_MAX;
1724 /* first, skip leading white space in buffer */ 2015 /* first, skip leading white space in buffer */
1725 while (isspace(*str)) 2016 str = skip_spaces(str);
1726 str++;
1727 2017
1728 /* now copy until next white space */ 2018 /* now copy until next white space */
1729 while (*str && !isspace(*str) && field_width--) { 2019 while (*str && !isspace(*str) && field_width--)
1730 *s++ = *str++; 2020 *s++ = *str++;
1731 }
1732 *s = '\0'; 2021 *s = '\0';
1733 num++; 2022 num++;
1734 } 2023 }
@@ -1736,7 +2025,7 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
1736 case 'n': 2025 case 'n':
1737 /* return number of characters read so far */ 2026 /* return number of characters read so far */
1738 { 2027 {
1739 int *i = (int *)va_arg(args,int*); 2028 int *i = (int *)va_arg(args, int*);
1740 *i = str - buf; 2029 *i = str - buf;
1741 } 2030 }
1742 continue; 2031 continue;
@@ -1748,14 +2037,14 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
1748 base = 16; 2037 base = 16;
1749 break; 2038 break;
1750 case 'i': 2039 case 'i':
1751 base = 0; 2040 base = 0;
1752 case 'd': 2041 case 'd':
1753 is_sign = 1; 2042 is_sign = 1;
1754 case 'u': 2043 case 'u':
1755 break; 2044 break;
1756 case '%': 2045 case '%':
1757 /* looking for '%' in str */ 2046 /* looking for '%' in str */
1758 if (*str++ != '%') 2047 if (*str++ != '%')
1759 return num; 2048 return num;
1760 continue; 2049 continue;
1761 default: 2050 default:
@@ -1766,71 +2055,70 @@ int vsscanf(const char * buf, const char * fmt, va_list args)
1766 /* have some sort of integer conversion. 2055 /* have some sort of integer conversion.
1767 * first, skip white space in buffer. 2056 * first, skip white space in buffer.
1768 */ 2057 */
1769 while (isspace(*str)) 2058 str = skip_spaces(str);
1770 str++;
1771 2059
1772 digit = *str; 2060 digit = *str;
1773 if (is_sign && digit == '-') 2061 if (is_sign && digit == '-')
1774 digit = *(str + 1); 2062 digit = *(str + 1);
1775 2063
1776 if (!digit 2064 if (!digit
1777 || (base == 16 && !isxdigit(digit)) 2065 || (base == 16 && !isxdigit(digit))
1778 || (base == 10 && !isdigit(digit)) 2066 || (base == 10 && !isdigit(digit))
1779 || (base == 8 && (!isdigit(digit) || digit > '7')) 2067 || (base == 8 && (!isdigit(digit) || digit > '7'))
1780 || (base == 0 && !isdigit(digit))) 2068 || (base == 0 && !isdigit(digit)))
1781 break; 2069 break;
1782 2070
1783 switch(qualifier) { 2071 switch (qualifier) {
1784 case 'H': /* that's 'hh' in format */ 2072 case 'H': /* that's 'hh' in format */
1785 if (is_sign) { 2073 if (is_sign) {
1786 signed char *s = (signed char *) va_arg(args,signed char *); 2074 signed char *s = (signed char *)va_arg(args, signed char *);
1787 *s = (signed char) simple_strtol(str,&next,base); 2075 *s = (signed char)simple_strtol(str, &next, base);
1788 } else { 2076 } else {
1789 unsigned char *s = (unsigned char *) va_arg(args, unsigned char *); 2077 unsigned char *s = (unsigned char *)va_arg(args, unsigned char *);
1790 *s = (unsigned char) simple_strtoul(str, &next, base); 2078 *s = (unsigned char)simple_strtoul(str, &next, base);
1791 } 2079 }
1792 break; 2080 break;
1793 case 'h': 2081 case 'h':
1794 if (is_sign) { 2082 if (is_sign) {
1795 short *s = (short *) va_arg(args,short *); 2083 short *s = (short *)va_arg(args, short *);
1796 *s = (short) simple_strtol(str,&next,base); 2084 *s = (short)simple_strtol(str, &next, base);
1797 } else { 2085 } else {
1798 unsigned short *s = (unsigned short *) va_arg(args, unsigned short *); 2086 unsigned short *s = (unsigned short *)va_arg(args, unsigned short *);
1799 *s = (unsigned short) simple_strtoul(str, &next, base); 2087 *s = (unsigned short)simple_strtoul(str, &next, base);
1800 } 2088 }
1801 break; 2089 break;
1802 case 'l': 2090 case 'l':
1803 if (is_sign) { 2091 if (is_sign) {
1804 long *l = (long *) va_arg(args,long *); 2092 long *l = (long *)va_arg(args, long *);
1805 *l = simple_strtol(str,&next,base); 2093 *l = simple_strtol(str, &next, base);
1806 } else { 2094 } else {
1807 unsigned long *l = (unsigned long*) va_arg(args,unsigned long*); 2095 unsigned long *l = (unsigned long *)va_arg(args, unsigned long *);
1808 *l = simple_strtoul(str,&next,base); 2096 *l = simple_strtoul(str, &next, base);
1809 } 2097 }
1810 break; 2098 break;
1811 case 'L': 2099 case 'L':
1812 if (is_sign) { 2100 if (is_sign) {
1813 long long *l = (long long*) va_arg(args,long long *); 2101 long long *l = (long long *)va_arg(args, long long *);
1814 *l = simple_strtoll(str,&next,base); 2102 *l = simple_strtoll(str, &next, base);
1815 } else { 2103 } else {
1816 unsigned long long *l = (unsigned long long*) va_arg(args,unsigned long long*); 2104 unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *);
1817 *l = simple_strtoull(str,&next,base); 2105 *l = simple_strtoull(str, &next, base);
1818 } 2106 }
1819 break; 2107 break;
1820 case 'Z': 2108 case 'Z':
1821 case 'z': 2109 case 'z':
1822 { 2110 {
1823 size_t *s = (size_t*) va_arg(args,size_t*); 2111 size_t *s = (size_t *)va_arg(args, size_t *);
1824 *s = (size_t) simple_strtoul(str,&next,base); 2112 *s = (size_t)simple_strtoul(str, &next, base);
1825 } 2113 }
1826 break; 2114 break;
1827 default: 2115 default:
1828 if (is_sign) { 2116 if (is_sign) {
1829 int *i = (int *) va_arg(args, int*); 2117 int *i = (int *)va_arg(args, int *);
1830 *i = (int) simple_strtol(str,&next,base); 2118 *i = (int)simple_strtol(str, &next, base);
1831 } else { 2119 } else {
1832 unsigned int *i = (unsigned int*) va_arg(args, unsigned int*); 2120 unsigned int *i = (unsigned int *)va_arg(args, unsigned int*);
1833 *i = (unsigned int) simple_strtoul(str,&next,base); 2121 *i = (unsigned int)simple_strtoul(str, &next, base);
1834 } 2122 }
1835 break; 2123 break;
1836 } 2124 }
@@ -1861,14 +2149,15 @@ EXPORT_SYMBOL(vsscanf);
1861 * @fmt: formatting of buffer 2149 * @fmt: formatting of buffer
1862 * @...: resulting arguments 2150 * @...: resulting arguments
1863 */ 2151 */
1864int sscanf(const char * buf, const char * fmt, ...) 2152int sscanf(const char *buf, const char *fmt, ...)
1865{ 2153{
1866 va_list args; 2154 va_list args;
1867 int i; 2155 int i;
1868 2156
1869 va_start(args,fmt); 2157 va_start(args, fmt);
1870 i = vsscanf(buf,fmt,args); 2158 i = vsscanf(buf, fmt, args);
1871 va_end(args); 2159 va_end(args);
2160
1872 return i; 2161 return i;
1873} 2162}
1874EXPORT_SYMBOL(sscanf); 2163EXPORT_SYMBOL(sscanf);
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index c3e4a2baf83..46a31e5f49c 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -135,7 +135,7 @@ static const config configuration_table[10] = {
135 135
136/* =========================================================================== 136/* ===========================================================================
137 * Update a hash value with the given input byte 137 * Update a hash value with the given input byte
138 * IN assertion: all calls to to UPDATE_HASH are made with consecutive 138 * IN assertion: all calls to UPDATE_HASH are made with consecutive
139 * input characters, so that a running hash key can be computed from the 139 * input characters, so that a running hash key can be computed from the
140 * previous key instead of complete recalculation each time. 140 * previous key instead of complete recalculation each time.
141 */ 141 */
@@ -146,7 +146,7 @@ static const config configuration_table[10] = {
146 * Insert string str in the dictionary and set match_head to the previous head 146 * Insert string str in the dictionary and set match_head to the previous head
147 * of the hash chain (the most recent string with same hash key). Return 147 * of the hash chain (the most recent string with same hash key). Return
148 * the previous length of the hash chain. 148 * the previous length of the hash chain.
149 * IN assertion: all calls to to INSERT_STRING are made with consecutive 149 * IN assertion: all calls to INSERT_STRING are made with consecutive
150 * input characters and the first MIN_MATCH bytes of str are valid 150 * input characters and the first MIN_MATCH bytes of str are valid
151 * (except for the last MIN_MATCH-1 bytes of the input file). 151 * (except for the last MIN_MATCH-1 bytes of the input file).
152 */ 152 */
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 8550b0c05d0..2c13ecc5bb2 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -21,12 +21,31 @@
21 - Pentium III (Anderson) 21 - Pentium III (Anderson)
22 - M68060 (Nikl) 22 - M68060 (Nikl)
23 */ 23 */
24union uu {
25 unsigned short us;
26 unsigned char b[2];
27};
28
29/* Endian independed version */
30static inline unsigned short
31get_unaligned16(const unsigned short *p)
32{
33 union uu mm;
34 unsigned char *b = (unsigned char *)p;
35
36 mm.b[0] = b[0];
37 mm.b[1] = b[1];
38 return mm.us;
39}
40
24#ifdef POSTINC 41#ifdef POSTINC
25# define OFF 0 42# define OFF 0
26# define PUP(a) *(a)++ 43# define PUP(a) *(a)++
44# define UP_UNALIGNED(a) get_unaligned16((a)++)
27#else 45#else
28# define OFF 1 46# define OFF 1
29# define PUP(a) *++(a) 47# define PUP(a) *++(a)
48# define UP_UNALIGNED(a) get_unaligned16(++(a))
30#endif 49#endif
31 50
32/* 51/*
@@ -239,18 +258,50 @@ void inflate_fast(z_streamp strm, unsigned start)
239 } 258 }
240 } 259 }
241 else { 260 else {
261 unsigned short *sout;
262 unsigned long loops;
263
242 from = out - dist; /* copy direct from output */ 264 from = out - dist; /* copy direct from output */
243 do { /* minimum length is three */ 265 /* minimum length is three */
244 PUP(out) = PUP(from); 266 /* Align out addr */
245 PUP(out) = PUP(from); 267 if (!((long)(out - 1 + OFF) & 1)) {
246 PUP(out) = PUP(from); 268 PUP(out) = PUP(from);
247 len -= 3; 269 len--;
248 } while (len > 2); 270 }
249 if (len) { 271 sout = (unsigned short *)(out - OFF);
250 PUP(out) = PUP(from); 272 if (dist > 2) {
251 if (len > 1) 273 unsigned short *sfrom;
252 PUP(out) = PUP(from); 274
253 } 275 sfrom = (unsigned short *)(from - OFF);
276 loops = len >> 1;
277 do
278#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
279 PUP(sout) = PUP(sfrom);
280#else
281 PUP(sout) = UP_UNALIGNED(sfrom);
282#endif
283 while (--loops);
284 out = (unsigned char *)sout + OFF;
285 from = (unsigned char *)sfrom + OFF;
286 } else { /* dist == 1 or dist == 2 */
287 unsigned short pat16;
288
289 pat16 = *(sout-1+OFF);
290 if (dist == 1) {
291 union uu mm;
292 /* copy one char pattern to both bytes */
293 mm.us = pat16;
294 mm.b[0] = mm.b[1];
295 pat16 = mm.us;
296 }
297 loops = len >> 1;
298 do
299 PUP(sout) = pat16;
300 while (--loops);
301 out = (unsigned char *)sout + OFF;
302 }
303 if (len & 1)
304 PUP(out) = PUP(from);
254 } 305 }
255 } 306 }
256 else if ((op & 64) == 0) { /* 2nd level distance code */ 307 else if ((op & 64) == 0) { /* 2nd level distance code */