85 files changed, 8913 insertions, 1419 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 97b136ff117..0ee67e08ad3 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -7,6 +7,9 @@ config BINARY_PRINTF
 menu "Library routines"
+config RAID6_PQ
+        tristate
 config BITREVERSE
        tristate
@@ -103,6 +106,8 @@ config LZO_COMPRESS
 config LZO_DECOMPRESS
        tristate
+source "lib/xz/Kconfig"
 #
 # These all provide a common interface (hence the apparent duplication with
 # ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.)
@@ -117,6 +122,10 @@ config DECOMPRESS_BZIP2
 config DECOMPRESS_LZMA
        tristate
+config DECOMPRESS_XZ
+        select XZ_DEC
+        tristate
 config DECOMPRESS_LZO
        select LZO_DECOMPRESS
        tristate
@@ -160,6 +169,9 @@ config TEXTSEARCH_BM
 config TEXTSEARCH_FSM
        tristate
+config BTREE
+        boolean
 config HAS_IOMEM
        boolean
        depends on !NO_IOMEM
@@ -178,9 +190,6 @@ config HAS_DMA
 config CHECK_SIGNATURE
        bool
-config HAVE_LMB
-        boolean
 config CPUMASK_OFFSTACK
        bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
        help
@@ -207,4 +216,7 @@ config GENERIC_ATOMIC64
 config LRU_CACHE
        tristate
+config AVERAGE
+        bool
 endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c5..2d05adb9840 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -76,7 +76,6 @@ config UNUSED_SYMBOLS
 config DEBUG_FS
        bool "Debug Filesystem"
-        depends on SYSFS
        help
          debugfs is a virtual file system that kernel developers use to put
          debugging files into.  Enable this option to be able to read and
@@ -103,7 +102,8 @@ config HEADERS_CHECK
 config DEBUG_SECTION_MISMATCH
        bool "Enable full Section mismatch analysis"
-        depends on UNDEFINED
+        depends on UNDEFINED || (BLACKFIN)
+        default y
        # This option is on purpose disabled for now.
        # It will be enabled when we are down to a reasonable number
        # of section mismatch warnings (< 10 for an allyesconfig build)
@@ -151,28 +151,34 @@ config DEBUG_SHIRQ
          Drivers ought to be able to handle interrupts coming in at those
          points; some don't and need to be caught.
-config DETECT_SOFTLOCKUP
+config LOCKUP_DETECTOR
-        bool "Detect Soft Lockups"
+        bool "Detect Hard and Soft Lockups"
        depends on DEBUG_KERNEL && !S390
-        default y
        help
-          Say Y here to enable the kernel to detect "soft lockups",
+          Say Y here to enable the kernel to act as a watchdog to detect
-          which are bugs that cause the kernel to loop in kernel
+          hard and soft lockups.
+          Softlockups are bugs that cause the kernel to loop in kernel
          mode for more than 60 seconds, without giving other tasks a
-          chance to run.
+          chance to run.  The current stack trace is displayed upon
+          detection and the system will stay locked up.
-          When a soft-lockup is detected, the kernel will print the
+          Hardlockups are bugs that cause the CPU to loop in kernel mode
-          current stack trace (which you should report), but the
+          for more than 60 seconds, without letting other interrupts have a
-          system will stay locked up. This feature has negligible
+          chance to run.  The current stack trace is displayed upon detection
-          overhead.
+          and the system will stay locked up.
+          The overhead should be minimal.  A periodic hrtimer runs to
+          generate interrupts and kick the watchdog task every 10-12 seconds.
+          An NMI is generated every 60 seconds or so to check for hardlockups.
-          (Note that "hard lockups" are separate type of bugs that
+config HARDLOCKUP_DETECTOR
-           can be detected via the NMI-watchdog, on platforms that
+        def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
-           support it.)
+                 !ARCH_HAS_NMI_WATCHDOG
 config BOOTPARAM_SOFTLOCKUP_PANIC
        bool "Panic (Reboot) On Soft Lockups"
-        depends on DETECT_SOFTLOCKUP
+        depends on LOCKUP_DETECTOR
        help
          Say Y here to enable the kernel to panic on "soft lockups",
          which are bugs that cause the kernel to loop in kernel
@@ -189,7 +195,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
        int
-        depends on DETECT_SOFTLOCKUP
+        depends on LOCKUP_DETECTOR
        range 0 1
        default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
        default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
@@ -306,6 +312,20 @@ config DEBUG_OBJECTS_WORK
          work queue routines to track the life time of work objects and
          validate the work operations.
+config DEBUG_OBJECTS_RCU_HEAD
+        bool "Debug RCU callbacks objects"
+        depends on DEBUG_OBJECTS && PREEMPT
+        help
+          Enable this to turn on debugging of RCU list heads (call_rcu() usage).
+config DEBUG_OBJECTS_PERCPU_COUNTER
+        bool "Debug percpu counter objects"
+        depends on DEBUG_OBJECTS
+        help
+          If you say Y here, additional code will be inserted into the
+          percpu counter routines to track the life time of percpu counter
+          objects and validate the percpu counter operations.
 config DEBUG_OBJECTS_ENABLE_DEFAULT
        int "debug_objects bootup default value (0-1)"
        range 0 1
@@ -342,7 +362,7 @@ config SLUB_DEBUG_ON
 config SLUB_STATS
        default n
        bool "Enable SLUB performance statistics"
-        depends on SLUB && SLUB_DEBUG && SYSFS
+        depends on SLUB && SYSFS
        help
          SLUB statistics are useful to debug SLUBs allocation behavior in
          order find ways to optimize the allocator. This should never be
@@ -355,7 +375,7 @@ config SLUB_STATS
 config DEBUG_KMEMLEAK
        bool "Kernel memory leak detector"
        depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
-                (X86 || ARM || PPC || S390)
+                (X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE || TILE)
        select DEBUG_FS if SYSFS
        select STACKTRACE if STACKTRACE_SUPPORT
@@ -399,6 +419,13 @@ config DEBUG_KMEMLEAK_TEST
          If unsure, say N.
+config DEBUG_KMEMLEAK_DEFAULT_OFF
+        bool "Default kmemleak to off"
+        depends on DEBUG_KMEMLEAK
+        help
+          Say Y here to disable kmemleak by default. It can then be enabled
+          on the command line via kmemleak=on.
 config DEBUG_PREEMPT
        bool "Debug preemptible kernel"
        depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
@@ -443,6 +470,15 @@ config DEBUG_MUTEXES
         This feature allows mutex semantics violations to be detected and
         reported.
+config BKL
+        bool "Big Kernel Lock" if (SMP || PREEMPT)
+        default y
+        help
+          This is the traditional lock that is used in old code instead
+          of proper locking. All drivers that use the BKL should depend
+          on this symbol.
+          Say Y here unless you are working on removing the BKL.
 config DEBUG_LOCK_ALLOC
        bool "Lock debugging: detect incorrect freeing of live locks"
        depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -464,6 +500,7 @@ config PROVE_LOCKING
        select DEBUG_SPINLOCK
        select DEBUG_MUTEXES
        select DEBUG_LOCK_ALLOC
+        select TRACE_IRQFLAGS
        default n
        help
         This feature enables the kernel to prove that all locking
@@ -499,11 +536,52 @@ config PROVE_LOCKING
         For more details, see Documentation/lockdep-design.txt.
+config PROVE_RCU
+        bool "RCU debugging: prove RCU correctness"
+        depends on PROVE_LOCKING
+        default n
+        help
+         This feature enables lockdep extensions that check for correct
+         use of RCU APIs.  This is currently under development.  Say Y
+         if you want to debug RCU usage or help work on the PROVE_RCU
+         feature.
+         Say N if you are unsure.
+config PROVE_RCU_REPEATEDLY
+        bool "RCU debugging: don't disable PROVE_RCU on first splat"
+        depends on PROVE_RCU
+        default n
+        help
+         By itself, PROVE_RCU will disable checking upon issuing the
+         first warning (or "splat").  This feature prevents such
+         disabling, allowing multiple RCU-lockdep warnings to be printed
+         on a single reboot.
+         Say Y to allow multiple RCU-lockdep warnings per boot.
+         Say N if you are unsure.
+config SPARSE_RCU_POINTER
+        bool "RCU debugging: sparse-based checks for pointer usage"
+        default n
+        help
+         This feature enables the __rcu sparse annotation for
+         RCU-protected pointers.  This annotation will cause sparse
+         to flag any non-RCU used of annotated pointers.  This can be
+         helpful when debugging RCU usage.  Please note that this feature
+         is not intended to enforce code cleanliness; it is instead merely
+         a debugging aid.
+         Say Y to make sparse flag questionable use of RCU-protected pointers
+         Say N if you are unsure.
 config LOCKDEP
        bool
        depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
        select STACKTRACE
-        select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390
+        select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
        select KALLSYMS
        select KALLSYMS_ALL
@@ -520,6 +598,14 @@ config LOCK_STAT
         For more details, see Documentation/lockstat.txt
+         This also enables lock events required by "perf lock",
+         subcommand of perf.
+         If you want to use "perf lock", you also need to turn on
+         CONFIG_EVENT_TRACING.
+         CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
+         (CONFIG_LOCKDEP defines "acquire" and "release" events.)
 config DEBUG_LOCKDEP
        bool "Lock dependency engine debugging"
        depends on DEBUG_KERNEL && LOCKDEP
@@ -529,11 +615,10 @@ config DEBUG_LOCKDEP
          of more runtime overhead.
 config TRACE_IRQFLAGS
-        depends on DEBUG_KERNEL
        bool
-        default y
+        help
-        depends on TRACE_IRQFLAGS_SUPPORT
+          Enables hooks to interrupt enabling and disabling for
-        depends on PROVE_LOCKING
+          either tracing or lock debugging.
 config DEBUG_SPINLOCK_SLEEP
        bool "Spinlock debugging: sleep-inside-spinlock checking"
@@ -595,6 +680,19 @@ config DEBUG_INFO
          If unsure, say N.
+config DEBUG_INFO_REDUCED
+        bool "Reduce debugging information"
+        depends on DEBUG_INFO
+        help
+          If you say Y here gcc is instructed to generate less debugging
+          information for structure types. This means that tools that
+          need full debugging information (like kgdb or systemtap) won't
+          be happy. But if you merely need debugging information to
+          resolve line numbers there is no loss. Advantage is that
+          build directory object sizes shrink dramatically over a full
+          DEBUG_INFO build and compile times are reduced too.
+          Only works with newer gcc versions.
 config DEBUG_VM
        bool "Debug VM"
        depends on DEBUG_KERNEL
@@ -651,6 +749,15 @@ config DEBUG_LIST
          If unsure, say N.
+config TEST_LIST_SORT
+        bool "Linked list sorting test"
+        depends on DEBUG_KERNEL
+        help
+          Enable this to turn on 'list_sort()' function test. This test is
+          executed only once during system boot, so affects only boot time.
+          If unsure, say N.
 config DEBUG_SG
        bool "Debug SG table operations"
        depends on DEBUG_KERNEL
@@ -765,10 +872,46 @@ config RCU_CPU_STALL_DETECTOR
          CPUs are delaying the current grace period, but only when
          the grace period extends for excessive time periods.
-          Say Y if you want RCU to perform such checks.
+          Say N if you want to disable such checks.
+          Say Y if you are unsure.
+config RCU_CPU_STALL_TIMEOUT
+        int "RCU CPU stall timeout in seconds"
+        depends on RCU_CPU_STALL_DETECTOR
+        range 3 300
+        default 60
+        help
+          If a given RCU grace period extends more than the specified
+          number of seconds, a CPU stall warning is printed.  If the
+          RCU grace period persists, additional CPU stall warnings are
+          printed at more widely spaced intervals.
+config RCU_CPU_STALL_DETECTOR_RUNNABLE
+        bool "RCU CPU stall checking starts automatically at boot"
+        depends on RCU_CPU_STALL_DETECTOR
+        default y
+        help
+          If set, start checking for RCU CPU stalls immediately on
+          boot.  Otherwise, RCU CPU stall checking must be manually
+          enabled.
+          Say Y if you are unsure.
+          Say N if you wish to suppress RCU CPU stall checking during boot.
+config RCU_CPU_STALL_VERBOSE
+        bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
+        depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
+        default y
+        help
+          This option causes RCU to printk detailed per-task information
+          for any tasks that are stalling the current RCU grace period.
          Say N if you are unsure.
+          Say Y if you want to enable such checks.
 config KPROBES_SANITY_TEST
        bool "Kprobes sanity tests"
        depends on DEBUG_KERNEL
@@ -840,8 +983,7 @@ config DEBUG_FORCE_WEAK_PER_CPU
 config LKDTM
        tristate "Linux Kernel Dump Test Tool Module"
-        depends on DEBUG_KERNEL
+        depends on DEBUG_FS
-        depends on KPROBES
        depends on BLOCK
        default n
        help
@@ -852,7 +994,19 @@ config LKDTM
        called lkdtm.
        Documentation on how to use the module can be found in
-        drivers/misc/lkdtm.c
+        Documentation/fault-injection/provoke-crashes.txt
+config CPU_NOTIFIER_ERROR_INJECT
+        tristate "CPU notifier error injection module"
+        depends on HOTPLUG_CPU && DEBUG_KERNEL
+        help
+          This option provides a kernel module that can be used to test
+          the error handling of the cpu notifiers
+          To compile this code as a module, choose M here: the module will
+          be called cpu-notifier-error-inject.
+          If unsure, say N.
 config FAULT_INJECTION
        bool "Fault-injection framework"
@@ -881,7 +1035,7 @@ config FAIL_MAKE_REQUEST
          Provide fault-injection capability for disk IO.
 config FAIL_IO_TIMEOUT
-        bool "Faul-injection capability for faking disk interrupts"
+        bool "Fault-injection capability for faking disk interrupts"
        depends on FAULT_INJECTION && BLOCK
        help
          Provide fault-injection capability on end IO handling. This
@@ -902,19 +1056,22 @@ config FAULT_INJECTION_STACKTRACE_FILTER
        depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
        depends on !X86_64
        select STACKTRACE
-        select FRAME_POINTER if !PPC && !S390
+        select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE
        help
          Provide stacktrace filter for fault-injection capabilities
 config LATENCYTOP
        bool "Latency measuring infrastructure"
-        select FRAME_POINTER if !MIPS && !PPC && !S390
+        depends on HAVE_LATENCYTOP_SUPPORT
+        depends on DEBUG_KERNEL
+        depends on STACKTRACE_SUPPORT
+        depends on PROC_FS
+        select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
        select KALLSYMS
        select KALLSYMS_ALL
        select STACKTRACE
        select SCHEDSTATS
        select SCHED_DEBUG
-        depends on HAVE_LATENCYTOP_SUPPORT
        help
          Enable this option if you want to use the LatencyTOP tool
          to find out which userspace is blocking on what kernel operations.
@@ -995,10 +1152,10 @@ config DYNAMIC_DEBUG
          Usage:
-          Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file,
+          Dynamic debugging is controlled via the 'dynamic_debug/control' file,
          which is contained in the 'debugfs' filesystem. Thus, the debugfs
          filesystem must first be mounted before making use of this feature.
-          We refer the control file as: <debugfs>/dynamic_debug/ddebug. This
+          We refer the control file as: <debugfs>/dynamic_debug/control. This
          file contains a list of the debug statements that can be enabled. The
          format for each line of the file is:
@@ -1013,7 +1170,7 @@ config DYNAMIC_DEBUG
          From a live system:
-                nullarbor:~ # cat <debugfs>/dynamic_debug/ddebug
+                nullarbor:~ # cat <debugfs>/dynamic_debug/control
                # filename:lineno [module]function flags format
                fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012"
                fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012"
@@ -1023,23 +1180,23 @@ config DYNAMIC_DEBUG
                // enable the message at line 1603 of file svcsock.c
                nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
-                                                <debugfs>/dynamic_debug/ddebug
+                                                <debugfs>/dynamic_debug/control
                // enable all the messages in file svcsock.c
                nullarbor:~ # echo -n 'file svcsock.c +p' >
-                                                <debugfs>/dynamic_debug/ddebug
+                                                <debugfs>/dynamic_debug/control
                // enable all the messages in the NFS server module
                nullarbor:~ # echo -n 'module nfsd +p' >
-                                                <debugfs>/dynamic_debug/ddebug
+                                                <debugfs>/dynamic_debug/control
                // enable all 12 messages in the function svc_process()
                nullarbor:~ # echo -n 'func svc_process +p' >
-                                                <debugfs>/dynamic_debug/ddebug
+                                                <debugfs>/dynamic_debug/control
                // disable all 12 messages in the function svc_process()
                nullarbor:~ # echo -n 'func svc_process -p' >
-                                                <debugfs>/dynamic_debug/ddebug
+                                                <debugfs>/dynamic_debug/control
          See Documentation/dynamic-debug-howto.txt for additional information.
@@ -1054,6 +1211,26 @@ config DMA_API_DEBUG
          This option causes a performance degredation.  Use only if you want
          to debug device drivers. If unsure, say N.
+config ATOMIC64_SELFTEST
+        bool "Perform an atomic64_t self-test at boot"
+        help
+          Enable this option to test the atomic64_t functions at boot.
+          If unsure, say N.
+config ASYNC_RAID6_TEST
+        tristate "Self test for hardware accelerated raid6 recovery"
+        depends on ASYNC_RAID6_RECOV
+        select ASYNC_MEMCPY
+        ---help---
+          This is a one-shot self test that permutes through the
+          recovery of all the possible two disk failure scenarios for a
+          N-disk array.  Recovery is performed with the asynchronous
+          raid6 recovery routines, and will optionally use an offload
+          engine if one is available.
+          If unsure, say N.
 source "samples/Kconfig"
 source "lib/Kconfig.kgdb"
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 9b5d1d7f2ef..43cb93fa265 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB
        bool
 menuconfig KGDB
-        bool "KGDB: kernel debugging with remote gdb"
+        bool "KGDB: kernel debugger"
        depends on HAVE_ARCH_KGDB
        depends on DEBUG_KERNEL && EXPERIMENTAL
        help
@@ -57,4 +57,26 @@ config KGDB_TESTS_BOOT_STRING
          information about other strings you could use beyond the
          default of V1F100.
+config KGDB_LOW_LEVEL_TRAP
+       bool "KGDB: Allow debugging with traps in notifiers"
+       depends on X86 || MIPS
+       default n
+       help
+         This will add an extra call back to kgdb for the breakpoint
+         exception handler on which will will allow kgdb to step
+         through a notify handler.
+config KGDB_KDB
+        bool "KGDB_KDB: include kdb frontend for kgdb"
+        default n
+        help
+          KDB frontend for kernel
+config KDB_KEYBOARD
+        bool "KGDB_KDB: keyboard as input device"
+        depends on VT && KGDB_KDB
+        default n
+        help
+          KDB can use a PS/2 type keyboard for an input device
 endif # KGDB
diff --git a/lib/Makefile b/lib/Makefile
index 3b0b4a696db..cbb774f7d41 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,11 +8,11 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
 endif
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-         rbtree.o radix-tree.o dump_stack.o \
+         rbtree.o radix-tree.o dump_stack.o timerqueue.o\
         idr.o int_sqrt.o extable.o prio_tree.o \
         sha1.o irq_regs.o reciprocal_div.o argv_split.o \
         proportions.o prio_heap.o ratelimit.o show_mem.o \
-         is_single_threaded.o plist.o decompress.o flex_array.o
+         is_single_threaded.o plist.o decompress.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
         bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-         string_helpers.o gcd.o list_sort.o
+         string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -39,8 +39,12 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
+CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
@@ -65,10 +69,13 @@ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
 obj-$(CONFIG_LZO_COMPRESS) += lzo/
 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
+obj-$(CONFIG_XZ_DEC) += xz/
+obj-$(CONFIG_RAID6_PQ) += raid6/
 lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
 lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
 lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
+lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
 lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
 obj-$(CONFIG_TEXTSEARCH) += textsearch.o
@@ -81,11 +88,10 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
+obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 lib-$(CONFIG_GENERIC_BUG) += bug.o
-obj-$(CONFIG_HAVE_LMB) += lmb.o
 obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
 obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
@@ -100,6 +106,10 @@ obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
+obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
+obj-$(CONFIG_AVERAGE) += average.o
 hostprogs-y     := gen_crc32table
 clean-files     := crc32table.h
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 8bee16ec752..a21c12bc727 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -162,12 +162,12 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
        unsigned long flags;
        spinlock_t *lock = lock_addr(v);
-        int ret = 1;
+        int ret = 0;
        spin_lock_irqsave(lock, flags);
        if (v->counter != u) {
                v->counter += a;
-                ret = 0;
+                ret = 1;
        }
        spin_unlock_irqrestore(lock, flags);
        return ret;
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
new file mode 100644
index 00000000000..44524cc8c32
--- /dev/null
+++ b/lib/atomic64_test.c
@@ -0,0 +1,166 @@
+/*
+ * Testsuite for atomic64_t functions
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/atomic.h>
+#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
+static __init int test_atomic64(void)
+{
+        long long v0 = 0xaaa31337c001d00dLL;
+        long long v1 = 0xdeadbeefdeafcafeLL;
+        long long v2 = 0xfaceabadf00df001LL;
+        long long onestwos = 0x1111111122222222LL;
+        long long one = 1LL;
+        atomic64_t v = ATOMIC64_INIT(v0);
+        long long r = v0;
+        BUG_ON(v.counter != r);
+        atomic64_set(&v, v1);
+        r = v1;
+        BUG_ON(v.counter != r);
+        BUG_ON(atomic64_read(&v) != r);
+        INIT(v0);
+        atomic64_add(onestwos, &v);
+        r += onestwos;
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        atomic64_add(-one, &v);
+        r += -one;
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        r += onestwos;
+        BUG_ON(atomic64_add_return(onestwos, &v) != r);
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        r += -one;
+        BUG_ON(atomic64_add_return(-one, &v) != r);
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        atomic64_sub(onestwos, &v);
+        r -= onestwos;
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        atomic64_sub(-one, &v);
+        r -= -one;
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        r -= onestwos;
+        BUG_ON(atomic64_sub_return(onestwos, &v) != r);
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        r -= -one;
+        BUG_ON(atomic64_sub_return(-one, &v) != r);
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        atomic64_inc(&v);
+        r += one;
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        r += one;
+        BUG_ON(atomic64_inc_return(&v) != r);
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        atomic64_dec(&v);
+        r -= one;
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        r -= one;
+        BUG_ON(atomic64_dec_return(&v) != r);
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        BUG_ON(atomic64_xchg(&v, v1) != v0);
+        r = v1;
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
+        r = v1;
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        BUG_ON(atomic64_add_unless(&v, one, v0));
+        BUG_ON(v.counter != r);
+        INIT(v0);
+        BUG_ON(!atomic64_add_unless(&v, one, v1));
+        r += one;
+        BUG_ON(v.counter != r);
+#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
+    defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
+        INIT(onestwos);
+        BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
+        r -= one;
+        BUG_ON(v.counter != r);
+        INIT(0);
+        BUG_ON(atomic64_dec_if_positive(&v) != -one);
+        BUG_ON(v.counter != r);
+        INIT(-one);
+        BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
+        BUG_ON(v.counter != r);
+#else
+#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above
+#endif
+        INIT(onestwos);
+        BUG_ON(!atomic64_inc_not_zero(&v));
+        r += one;
+        BUG_ON(v.counter != r);
+        INIT(0);
+        BUG_ON(atomic64_inc_not_zero(&v));
+        BUG_ON(v.counter != r);
+        INIT(-one);
+        BUG_ON(!atomic64_inc_not_zero(&v));
+        r += one;
+        BUG_ON(v.counter != r);
+#ifdef CONFIG_X86
+        printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n",
+#ifdef CONFIG_X86_64
+               "x86-64",
+#elif defined(CONFIG_X86_CMPXCHG64)
+               "i586+",
+#else
+               "i386+",
+#endif
+               boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
+               boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
+#else
+        printk(KERN_INFO "atomic64 test passed\n");
+#endif
+        return 0;
+}
+core_initcall(test_atomic64);
diff --git a/lib/average.c b/lib/average.c
new file mode 100644
index 00000000000..5576c284149
--- /dev/null
+++ b/lib/average.c
@@ -0,0 +1,61 @@
+/*
+ * lib/average.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <linux/module.h>
+#include <linux/average.h>
+#include <linux/bug.h>
+#include <linux/log2.h>
+/**
+ * DOC: Exponentially Weighted Moving Average (EWMA)
+ *
+ * These are generic functions for calculating Exponentially Weighted Moving
+ * Averages (EWMA). We keep a structure with the EWMA parameters and a scaled
+ * up internal representation of the average value to prevent rounding errors.
+ * The factor for scaling up and the exponential weight (or decay rate) have to
+ * be specified thru the init fuction. The structure should not be accessed
+ * directly but only thru the helper functions.
+ */
+/**
+ * ewma_init() - Initialize EWMA parameters
+ * @avg: Average structure
+ * @factor: Factor to use for the scaled up internal value. The maximum value
+ *      of averages can be ULONG_MAX/(factor*weight). For performance reasons
+ *      factor has to be a power of 2.
+ * @weight: Exponential weight, or decay rate. This defines how fast the
+ *      influence of older values decreases. For performance reasons weight has
+ *      to be a power of 2.
+ *
+ * Initialize the EWMA parameters for a given struct ewma @avg.
+ */
+void ewma_init(struct ewma *avg, unsigned long factor, unsigned long weight)
+{
+        WARN_ON(!is_power_of_2(weight) || !is_power_of_2(factor));
+        avg->weight = ilog2(weight);
+        avg->factor = ilog2(factor);
+        avg->internal = 0;
+}
+EXPORT_SYMBOL(ewma_init);
+/**
+ * ewma_add() - Exponentially weighted moving average (EWMA)
+ * @avg: Average structure
+ * @val: Current value
+ *
+ * Add a sample to the average.
+ */
+struct ewma *ewma_add(struct ewma *avg, unsigned long val)
+{
+        avg->internal = avg->internal  ?
+                (((avg->internal << avg->weight) - avg->internal) +
+                        (val << avg->factor)) >> avg->weight :
+                (val << avg->factor);
+        return avg;
+}
+EXPORT_SYMBOL(ewma_add);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 11bf4975058..741fae905ae 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -359,7 +359,6 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area);
 #define CHUNKSZ                         32
 #define nbits_to_hold_value(val)        fls(val)
-#define unhex(c)                        (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
 #define BASEDEC 10              /* fancier cpuset lists input in decimal */
 /**
@@ -466,7 +465,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
                        if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
                                return -EOVERFLOW;
-                        chunk = (chunk << 4) | unhex(c);
+                        chunk = (chunk << 4) | hex_to_bin(c);
                        ndigits++; totaldigits++;
                }
                if (ndigits == 0)
@@ -487,7 +486,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
 EXPORT_SYMBOL(__bitmap_parse);
 /**
- * bitmap_parse_user()
+ * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
 *
 * @ubuf: pointer to user buffer containing string.
 * @ulen: buffer size in bytes.  If string is smaller than this
@@ -619,7 +618,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 EXPORT_SYMBOL(bitmap_parselist);
 /**
- * bitmap_pos_to_ord(buf, pos, bits)
+ * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
 *      @buf: pointer to a bitmap
 *      @pos: a bit position in @buf (0 <= @pos < @bits)
 *      @bits: number of valid bit positions in @buf
@@ -655,7 +654,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
 }
 /**
- * bitmap_ord_to_pos(buf, ord, bits)
+ * bitmap_ord_to_pos - find position of n-th set bit in bitmap
 *      @buf: pointer to bitmap
 *      @ord: ordinal bit position (n-th set bit, n >= 0)
 *      @bits: number of valid bit positions in @buf
@@ -733,10 +732,9 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
        bitmap_zero(dst, bits);
        w = bitmap_weight(new, bits);
-        for (oldbit = find_first_bit(src, bits);
+        for_each_set_bit(oldbit, src, bits) {
-             oldbit < bits;
-             oldbit = find_next_bit(src, bits, oldbit + 1)) {
                int n = bitmap_pos_to_ord(old, oldbit, bits);
                if (n < 0 || w == 0)
                        set_bit(oldbit, dst);   /* identity map */
                else
@@ -903,9 +901,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
         */
        m = 0;
-        for (n = find_first_bit(relmap, bits);
+        for_each_set_bit(n, relmap, bits) {
-             n < bits;
-             n = find_next_bit(relmap, bits, n + 1)) {
                /* m == bitmap_pos_to_ord(relmap, n, bits) */
                if (test_bit(m, orig))
                        set_bit(n, dst);
@@ -934,9 +930,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
                return;
        bitmap_zero(dst, bits);
-        for (oldbit = find_first_bit(orig, bits);
+        for_each_set_bit(oldbit, orig, bits)
-             oldbit < bits;
-             oldbit = find_next_bit(orig, bits, oldbit + 1))
                set_bit(oldbit % sz, dst);
 }
 EXPORT_SYMBOL(bitmap_fold);
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 00000000000..c9c6f035152
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,798 @@
+/*
+ * lib/btree.c  - Simple In-memory B+Tree
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Bits and pieces stolen from Peter Zijlstra's code, which is
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * GPLv2
+ *
+ * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
+ *
+ * A relatively simple B+Tree implementation.  I have written it as a learning
+ * excercise to understand how B+Trees work.  Turned out to be useful as well.
+ *
+ * B+Trees can be used similar to Linux radix trees (which don't have anything
+ * in common with textbook radix trees, beware).  Prerequisite for them working
+ * well is that access to a random tree node is much faster than a large number
+ * of operations within each node.
+ *
+ * Disks have fulfilled the prerequisite for a long time.  More recently DRAM
+ * has gained similar properties, as memory access times, when measured in cpu
+ * cycles, have increased.  Cacheline sizes have increased as well, which also
+ * helps B+Trees.
+ *
+ * Compared to radix trees, B+Trees are more efficient when dealing with a
+ * sparsely populated address space.  Between 25% and 50% of the memory is
+ * occupied with valid pointers.  When densely populated, radix trees contain
+ * ~98% pointers - hard to beat.  Very sparse radix trees contain only ~2%
+ * pointers.
+ *
+ * This particular implementation stores pointers identified by a long value.
+ * Storing NULL pointers is illegal, lookup will return NULL when no entry
+ * was found.
+ *
+ * A tricks was used that is not commonly found in textbooks.  The lowest
+ * values are to the right, not to the left.  All used slots within a node
+ * are on the left, all unused slots contain NUL values.  Most operations
+ * simply loop once over all slots and terminate on the first NUL.
+ */
+#include <linux/btree.h>
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define NODESIZE MAX(L1_CACHE_BYTES, 128)
+struct btree_geo {
+        int keylen;
+        int no_pairs;
+        int no_longs;
+};
+struct btree_geo btree_geo32 = {
+        .keylen = 1,
+        .no_pairs = NODESIZE / sizeof(long) / 2,
+        .no_longs = NODESIZE / sizeof(long) / 2,
+};
+EXPORT_SYMBOL_GPL(btree_geo32);
+#define LONG_PER_U64 (64 / BITS_PER_LONG)
+struct btree_geo btree_geo64 = {
+        .keylen = LONG_PER_U64,
+        .no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
+        .no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo64);
+struct btree_geo btree_geo128 = {
+        .keylen = 2 * LONG_PER_U64,
+        .no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
+        .no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo128);
+static struct kmem_cache *btree_cachep;
+void *btree_alloc(gfp_t gfp_mask, void *pool_data)
+{
+        return kmem_cache_alloc(btree_cachep, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(btree_alloc);
+void btree_free(void *element, void *pool_data)
+{
+        kmem_cache_free(btree_cachep, element);
+}
+EXPORT_SYMBOL_GPL(btree_free);
+static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
+{
+        unsigned long *node;
+        node = mempool_alloc(head->mempool, gfp);
+        if (likely(node))
+                memset(node, 0, NODESIZE);
+        return node;
+}
+static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
+{
+        size_t i;
+        for (i = 0; i < n; i++) {
+                if (l1[i] < l2[i])
+                        return -1;
+                if (l1[i] > l2[i])
+                        return 1;
+        }
+        return 0;
+}
+static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
+                size_t n)
+{
+        size_t i;
+        for (i = 0; i < n; i++)
+                dest[i] = src[i];
+        return dest;
+}
+static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
+{
+        size_t i;
+        for (i = 0; i < n; i++)
+                s[i] = c;
+        return s;
+}
+static void dec_key(struct btree_geo *geo, unsigned long *key)
+{
+        unsigned long val;
+        int i;
+        for (i = geo->keylen - 1; i >= 0; i--) {
+                val = key[i];
+                key[i] = val - 1;
+                if (val)
+                        break;
+        }
+}
+static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
+{
+        return &node[n * geo->keylen];
+}
+static void *bval(struct btree_geo *geo, unsigned long *node, int n)
+{
+        return (void *)node[geo->no_longs + n];
+}
+static void setkey(struct btree_geo *geo, unsigned long *node, int n,
+                   unsigned long *key)
+{
+        longcpy(bkey(geo, node, n), key, geo->keylen);
+}
+static void setval(struct btree_geo *geo, unsigned long *node, int n,
+                   void *val)
+{
+        node[geo->no_longs + n] = (unsigned long) val;
+}
+static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
+{
+        longset(bkey(geo, node, n), 0, geo->keylen);
+        node[geo->no_longs + n] = 0;
+}
+static inline void __btree_init(struct btree_head *head)
+{
+        head->node = NULL;
+        head->height = 0;
+}
+void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
+{
+        __btree_init(head);
+        head->mempool = mempool;
+}
+EXPORT_SYMBOL_GPL(btree_init_mempool);
+int btree_init(struct btree_head *head)
+{
+        __btree_init(head);
+        head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
+        if (!head->mempool)
+                return -ENOMEM;
+        return 0;
+}
+EXPORT_SYMBOL_GPL(btree_init);
+void btree_destroy(struct btree_head *head)
+{
+        mempool_destroy(head->mempool);
+        head->mempool = NULL;
+}
+EXPORT_SYMBOL_GPL(btree_destroy);
+void *btree_last(struct btree_head *head, struct btree_geo *geo,
+                 unsigned long *key)
+{
+        int height = head->height;
+        unsigned long *node = head->node;
+        if (height == 0)
+                return NULL;
+        for ( ; height > 1; height--)
+                node = bval(geo, node, 0);
+        longcpy(key, bkey(geo, node, 0), geo->keylen);
+        return bval(geo, node, 0);
+}
+EXPORT_SYMBOL_GPL(btree_last);
+static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
+                  unsigned long *key)
+{
+        return longcmp(bkey(geo, node, pos), key, geo->keylen);
+}
+static int keyzero(struct btree_geo *geo, unsigned long *key)
+{
+        int i;
+        for (i = 0; i < geo->keylen; i++)
+                if (key[i])
+                        return 0;
+        return 1;
+}
+void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
+                unsigned long *key)
+{
+        int i, height = head->height;
+        unsigned long *node = head->node;
+        if (height == 0)
+                return NULL;
+        for ( ; height > 1; height--) {
+                for (i = 0; i < geo->no_pairs; i++)
+                        if (keycmp(geo, node, i, key) <= 0)
+                                break;
+                if (i == geo->no_pairs)
+                        return NULL;
+                node = bval(geo, node, i);
+                if (!node)
+                        return NULL;
+        }
+        if (!node)
+                return NULL;
+        for (i = 0; i < geo->no_pairs; i++)
+                if (keycmp(geo, node, i, key) == 0)
+                        return bval(geo, node, i);
+        return NULL;
+}
+EXPORT_SYMBOL_GPL(btree_lookup);
+int btree_update(struct btree_head *head, struct btree_geo *geo,
+                 unsigned long *key, void *val)
+{
+        int i, height = head->height;
+        unsigned long *node = head->node;
+        if (height == 0)
+                return -ENOENT;
+        for ( ; height > 1; height--) {
+                for (i = 0; i < geo->no_pairs; i++)
+                        if (keycmp(geo, node, i, key) <= 0)
+                                break;
+                if (i == geo->no_pairs)
+                        return -ENOENT;
+                node = bval(geo, node, i);
+                if (!node)
+                        return -ENOENT;
+        }
+        if (!node)
+                return -ENOENT;
+        for (i = 0; i < geo->no_pairs; i++)
+                if (keycmp(geo, node, i, key) == 0) {
+                        setval(geo, node, i, val);
+                        return 0;
+                }
+        return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(btree_update);
+/*
+ * Usually this function is quite similar to normal lookup.  But the key of
+ * a parent node may be smaller than the smallest key of all its siblings.
+ * In such a case we cannot just return NULL, as we have only proven that no
+ * key smaller than __key, but larger than this parent key exists.
+ * So we set __key to the parent key and retry.  We have to use the smallest
+ * such parent key, which is the last parent key we encountered.
+ */
+void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
+                     unsigned long *__key)
+{
+        int i, height;
+        unsigned long *node, *oldnode;
+        unsigned long *retry_key = NULL, key[geo->keylen];
+        if (keyzero(geo, __key))
+                return NULL;
+        if (head->height == 0)
+                return NULL;
+retry:
+        longcpy(key, __key, geo->keylen);
+        dec_key(geo, key);
+        node = head->node;
+        for (height = head->height ; height > 1; height--) {
+                for (i = 0; i < geo->no_pairs; i++)
+                        if (keycmp(geo, node, i, key) <= 0)
+                                break;
+                if (i == geo->no_pairs)
+                        goto miss;
+                oldnode = node;
+                node = bval(geo, node, i);
+                if (!node)
+                        goto miss;
+                retry_key = bkey(geo, oldnode, i);
+        }
+        if (!node)
+                goto miss;
+        for (i = 0; i < geo->no_pairs; i++) {
+                if (keycmp(geo, node, i, key) <= 0) {
+                        if (bval(geo, node, i)) {
+                                longcpy(__key, bkey(geo, node, i), geo->keylen);
+                                return bval(geo, node, i);
+                        } else
+                                goto miss;
+                }
+        }
+miss:
+        if (retry_key) {
+                __key = retry_key;
+                retry_key = NULL;
+                goto retry;
+        }
+        return NULL;
+}
+static int getpos(struct btree_geo *geo, unsigned long *node,
+                unsigned long *key)
+{
+        int i;
+        for (i = 0; i < geo->no_pairs; i++) {
+                if (keycmp(geo, node, i, key) <= 0)
+                        break;
+        }
+        return i;
+}
+static int getfill(struct btree_geo *geo, unsigned long *node, int start)
+{
+        int i;
+        for (i = start; i < geo->no_pairs; i++)
+                if (!bval(geo, node, i))
+                        break;
+        return i;
+}
+/*
+ * locate the correct leaf node in the btree
+ */
+static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
+                unsigned long *key, int level)
+{
+        unsigned long *node = head->node;
+        int i, height;
+        for (height = head->height; height > level; height--) {
+                for (i = 0; i < geo->no_pairs; i++)
+                        if (keycmp(geo, node, i, key) <= 0)
+                                break;
+                if ((i == geo->no_pairs) || !bval(geo, node, i)) {
+                        /* right-most key is too large, update it */
+                        /* FIXME: If the right-most key on higher levels is
+                         * always zero, this wouldn't be necessary. */
+                        i--;
+                        setkey(geo, node, i, key);
+                }
+                BUG_ON(i < 0);
+                node = bval(geo, node, i);
+        }
+        BUG_ON(!node);
+        return node;
+}
+static int btree_grow(struct btree_head *head, struct btree_geo *geo,
+                      gfp_t gfp)
+{
+        unsigned long *node;
+        int fill;
+        node = btree_node_alloc(head, gfp);
+        if (!node)
+                return -ENOMEM;
+        if (head->node) {
+                fill = getfill(geo, head->node, 0);
+                setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
+                setval(geo, node, 0, head->node);
+        }
+        head->node = node;
+        head->height++;
+        return 0;
+}
+static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
+{
+        unsigned long *node;
+        int fill;
+        if (head->height <= 1)
+                return;
+        node = head->node;
+        fill = getfill(geo, node, 0);
+        BUG_ON(fill > 1);
+        head->node = bval(geo, node, 0);
+        head->height--;
+        mempool_free(node, head->mempool);
+}
+static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
+                              unsigned long *key, void *val, int level,
+                              gfp_t gfp)
+{
+        unsigned long *node;
+        int i, pos, fill, err;
+        BUG_ON(!val);
+        if (head->height < level) {
+                err = btree_grow(head, geo, gfp);
+                if (err)
+                        return err;
+        }
+retry:
+        node = find_level(head, geo, key, level);
+        pos = getpos(geo, node, key);
+        fill = getfill(geo, node, pos);
+        /* two identical keys are not allowed */
+        BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
+        if (fill == geo->no_pairs) {
+                /* need to split node */
+                unsigned long *new;
+                new = btree_node_alloc(head, gfp);
+                if (!new)
+                        return -ENOMEM;
+                err = btree_insert_level(head, geo,
+                                bkey(geo, node, fill / 2 - 1),
+                                new, level + 1, gfp);
+                if (err) {
+                        mempool_free(new, head->mempool);
+                        return err;
+                }
+                for (i = 0; i < fill / 2; i++) {
+                        setkey(geo, new, i, bkey(geo, node, i));
+                        setval(geo, new, i, bval(geo, node, i));
+                        setkey(geo, node, i, bkey(geo, node, i + fill / 2));
+                        setval(geo, node, i, bval(geo, node, i + fill / 2));
+                        clearpair(geo, node, i + fill / 2);
+                }
+                if (fill & 1) {
+                        setkey(geo, node, i, bkey(geo, node, fill - 1));
+                        setval(geo, node, i, bval(geo, node, fill - 1));
+                        clearpair(geo, node, fill - 1);
+                }
+                goto retry;
+        }
+        BUG_ON(fill >= geo->no_pairs);
+        /* shift and insert */
+        for (i = fill; i > pos; i--) {
+                setkey(geo, node, i, bkey(geo, node, i - 1));
+                setval(geo, node, i, bval(geo, node, i - 1));
+        }
+        setkey(geo, node, pos, key);
+        setval(geo, node, pos, val);
+        return 0;
+}
+int btree_insert(struct btree_head *head, struct btree_geo *geo,
+                unsigned long *key, void *val, gfp_t gfp)
+{
+        return btree_insert_level(head, geo, key, val, 1, gfp);
+}
+EXPORT_SYMBOL_GPL(btree_insert);
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+                unsigned long *key, int level);
+static void merge(struct btree_head *head, struct btree_geo *geo, int level,
+                unsigned long *left, int lfill,
+                unsigned long *right, int rfill,
+                unsigned long *parent, int lpos)
+{
+        int i;
+        for (i = 0; i < rfill; i++) {
+                /* Move all keys to the left */
+                setkey(geo, left, lfill + i, bkey(geo, right, i));
+                setval(geo, left, lfill + i, bval(geo, right, i));
+        }
+        /* Exchange left and right child in parent */
+        setval(geo, parent, lpos, right);
+        setval(geo, parent, lpos + 1, left);
+        /* Remove left (formerly right) child from parent */
+        btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
+        mempool_free(right, head->mempool);
+}
+static void rebalance(struct btree_head *head, struct btree_geo *geo,
+                unsigned long *key, int level, unsigned long *child, int fill)
+{
+        unsigned long *parent, *left = NULL, *right = NULL;
+        int i, no_left, no_right;
+        if (fill == 0) {
+                /* Because we don't steal entries from a neigbour, this case
+                 * can happen.  Parent node contains a single child, this
+                 * node, so merging with a sibling never happens.
+                 */
+                btree_remove_level(head, geo, key, level + 1);
+                mempool_free(child, head->mempool);
+                return;
+        }
+        parent = find_level(head, geo, key, level + 1);
+        i = getpos(geo, parent, key);
+        BUG_ON(bval(geo, parent, i) != child);
+        if (i > 0) {
+                left = bval(geo, parent, i - 1);
+                no_left = getfill(geo, left, 0);
+                if (fill + no_left <= geo->no_pairs) {
+                        merge(head, geo, level,
+                                        left, no_left,
+                                        child, fill,
+                                        parent, i - 1);
+                        return;
+                }
+        }
+        if (i + 1 < getfill(geo, parent, i)) {
+                right = bval(geo, parent, i + 1);
+                no_right = getfill(geo, right, 0);
+                if (fill + no_right <= geo->no_pairs) {
+                        merge(head, geo, level,
+                                        child, fill,
+                                        right, no_right,
+                                        parent, i);
+                        return;
+                }
+        }
+        /*
+         * We could also try to steal one entry from the left or right
+         * neighbor.  By not doing so we changed the invariant from
+         * "all nodes are at least half full" to "no two neighboring
+         * nodes can be merged".  Which means that the average fill of
+         * all nodes is still half or better.
+         */
+}
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+                unsigned long *key, int level)
+{
+        unsigned long *node;
+        int i, pos, fill;
+        void *ret;
+        if (level > head->height) {
+                /* we recursed all the way up */
+                head->height = 0;
+                head->node = NULL;
+                return NULL;
+        }
+        node = find_level(head, geo, key, level);
+        pos = getpos(geo, node, key);
+        fill = getfill(geo, node, pos);
+        if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
+                return NULL;
+        ret = bval(geo, node, pos);
+        /* remove and shift */
+        for (i = pos; i < fill - 1; i++) {
+                setkey(geo, node, i, bkey(geo, node, i + 1));
+                setval(geo, node, i, bval(geo, node, i + 1));
+        }
+        clearpair(geo, node, fill - 1);
+        if (fill - 1 < geo->no_pairs / 2) {
+                if (level < head->height)
+                        rebalance(head, geo, key, level, node, fill - 1);
+                else if (fill - 1 == 1)
+                        btree_shrink(head, geo);
+        }
+        return ret;
+}
+void *btree_remove(struct btree_head *head, struct btree_geo *geo,
+                unsigned long *key)
+{
+        if (head->height == 0)
+                return NULL;
+        return btree_remove_level(head, geo, key, 1);
+}
+EXPORT_SYMBOL_GPL(btree_remove);
+int btree_merge(struct btree_head *target, struct btree_head *victim,
+                struct btree_geo *geo, gfp_t gfp)
+{
+        unsigned long key[geo->keylen];
+        unsigned long dup[geo->keylen];
+        void *val;
+        int err;
+        BUG_ON(target == victim);
+        if (!(target->node)) {
+                /* target is empty, just copy fields over */
+                target->node = victim->node;
+                target->height = victim->height;
+                __btree_init(victim);
+                return 0;
+        }
+        /* TODO: This needs some optimizations.  Currently we do three tree
+         * walks to remove a single object from the victim.
+         */
+        for (;;) {
+                if (!btree_last(victim, geo, key))
+                        break;
+                val = btree_lookup(victim, geo, key);
+                err = btree_insert(target, geo, key, val, gfp);
+                if (err)
+                        return err;
+                /* We must make a copy of the key, as the original will get
+                 * mangled inside btree_remove. */
+                longcpy(dup, key, geo->keylen);
+                btree_remove(victim, geo, dup);
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(btree_merge);
+static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
+                               unsigned long *node, unsigned long opaque,
+                               void (*func)(void *elem, unsigned long opaque,
+                                            unsigned long *key, size_t index,
+                                            void *func2),
+                               void *func2, int reap, int height, size_t count)
+{
+        int i;
+        unsigned long *child;
+        for (i = 0; i < geo->no_pairs; i++) {
+                child = bval(geo, node, i);
+                if (!child)
+                        break;
+                if (height > 1)
+                        count = __btree_for_each(head, geo, child, opaque,
+                                        func, func2, reap, height - 1, count);
+                else
+                        func(child, opaque, bkey(geo, node, i), count++,
+                                        func2);
+        }
+        if (reap)
+                mempool_free(node, head->mempool);
+        return count;
+}
+static void empty(void *elem, unsigned long opaque, unsigned long *key,
+                  size_t index, void *func2)
+{
+}
+void visitorl(void *elem, unsigned long opaque, unsigned long *key,
+              size_t index, void *__func)
+{
+        visitorl_t func = __func;
+        func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitorl);
+void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
+               size_t index, void *__func)
+{
+        visitor32_t func = __func;
+        u32 *key = (void *)__key;
+        func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor32);
+void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
+               size_t index, void *__func)
+{
+        visitor64_t func = __func;
+        u64 *key = (void *)__key;
+        func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor64);
+void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
+                size_t index, void *__func)
+{
+        visitor128_t func = __func;
+        u64 *key = (void *)__key;
+        func(elem, opaque, key[0], key[1], index);
+}
+EXPORT_SYMBOL_GPL(visitor128);
+size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
+                     unsigned long opaque,
+                     void (*func)(void *elem, unsigned long opaque,
+                                  unsigned long *key,
+                                  size_t index, void *func2),
+                     void *func2)
+{
+        size_t count = 0;
+        if (!func2)
+                func = empty;
+        if (head->node)
+                count = __btree_for_each(head, geo, head->node, opaque, func,
+                                func2, 0, head->height, 0);
+        return count;
+}
+EXPORT_SYMBOL_GPL(btree_visitor);
+size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
+                          unsigned long opaque,
+                          void (*func)(void *elem, unsigned long opaque,
+                                       unsigned long *key,
+                                       size_t index, void *func2),
+                          void *func2)
+{
+        size_t count = 0;
+        if (!func2)
+                func = empty;
+        if (head->node)
+                count = __btree_for_each(head, geo, head->node, opaque, func,
+                                func2, 1, head->height, 0);
+        __btree_init(head);
+        return count;
+}
+EXPORT_SYMBOL_GPL(btree_grim_visitor);
+static int __init btree_module_init(void)
+{
+        btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
+                        SLAB_HWCACHE_ALIGN, NULL);
+        return 0;
+}
+static void __exit btree_module_exit(void)
+{
+        kmem_cache_destroy(btree_cachep);
+}
+/* If core code starts using btree, initialization should happen even earlier */
+module_init(btree_module_init);
+module_exit(btree_module_exit);
+MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_LICENSE("GPL");
diff --git a/lib/bug.c b/lib/bug.c
index 300e41afbf9..19552096d16 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr)
        return NULL;
 }
-int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
-                        struct module *mod)
+                         struct module *mod)
 {
        char *secstrings;
        unsigned int i;
@@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
         * could potentially lead to deadlock and thus be counter-productive.
         */
        list_add(&mod->bug_list, &module_bug_list);
-        return 0;
 }
 void module_bug_cleanup(struct module *mod)
@@ -136,8 +134,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
        bug = find_bug(bugaddr);
-        printk(KERN_EMERG "------------[ cut here ]------------\n");
        file = NULL;
        line = 0;
        warning = 0;
@@ -156,19 +152,25 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
        if (warning) {
                /* this is a WARN_ON rather than BUG/BUG_ON */
+                printk(KERN_WARNING "------------[ cut here ]------------\n");
                if (file)
-                        printk(KERN_ERR "Badness at %s:%u\n",
+                        printk(KERN_WARNING "WARNING: at %s:%u\n",
                               file, line);
                else
-                        printk(KERN_ERR "Badness at %p "
+                        printk(KERN_WARNING "WARNING: at %p "
                               "[verbose debug info unavailable]\n",
                               (void *)bugaddr);
+                print_modules();
                show_regs(regs);
-                add_taint(TAINT_WARN);
+                print_oops_end_marker();
+                add_taint(BUG_GET_TAINT(bug));
                return BUG_TRAP_TYPE_WARN;
        }
+        printk(KERN_EMERG "------------[ cut here ]------------\n");
        if (file)
                printk(KERN_CRIT "kernel BUG at %s:%u!\n",
                       file, line);
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
new file mode 100644
index 00000000000..4dc20321b0d
--- /dev/null
+++ b/lib/cpu-notifier-error-inject.c
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+static int priority;
+static int cpu_up_prepare_error;
+static int cpu_down_prepare_error;
+module_param(priority, int, 0);
+MODULE_PARM_DESC(priority, "specify cpu notifier priority");
+module_param(cpu_up_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_up_prepare_error,
+                "specify error code to inject CPU_UP_PREPARE action");
+module_param(cpu_down_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_down_prepare_error,
+                "specify error code to inject CPU_DOWN_PREPARE action");
+static int err_inject_cpu_callback(struct notifier_block *nfb,
+                                unsigned long action, void *hcpu)
+{
+        int err = 0;
+        switch (action) {
+        case CPU_UP_PREPARE:
+        case CPU_UP_PREPARE_FROZEN:
+                err = cpu_up_prepare_error;
+                break;
+        case CPU_DOWN_PREPARE:
+        case CPU_DOWN_PREPARE_FROZEN:
+                err = cpu_down_prepare_error;
+                break;
+        }
+        if (err)
+                printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err);
+        return notifier_from_errno(err);
+}
+static struct notifier_block err_inject_cpu_notifier = {
+        .notifier_call = err_inject_cpu_callback,
+};
+static int err_inject_init(void)
+{
+        err_inject_cpu_notifier.priority = priority;
+        return register_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+static void err_inject_exit(void)
+{
+        unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+MODULE_DESCRIPTION("CPU notifier error injection module");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 7bb4142a502..05d6aca7fc1 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -1,3 +1,4 @@
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
diff --git a/lib/crc32.c b/lib/crc32.c
index 02e3b31b3a7..4855995fcde 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -25,16 +25,19 @@
 #include <linux/module.h>
 #include <linux/compiler.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 #include <asm/atomic.h>
 #include "crc32defs.h"
 #if CRC_LE_BITS == 8
-#define tole(x) __constant_cpu_to_le32(x)
+# define tole(x) __constant_cpu_to_le32(x)
-#define tobe(x) __constant_cpu_to_be32(x)
 #else
-#define tole(x) (x)
+# define tole(x) (x)
-#define tobe(x) (x)
+#endif
+#if CRC_BE_BITS == 8
+# define tobe(x) __constant_cpu_to_be32(x)
+#else
+# define tobe(x) (x)
 #endif
 #include "crc32table.h"
@@ -45,33 +48,37 @@ MODULE_LICENSE("GPL");
 #if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
 static inline u32
-crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
+crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
-#  define DO_CRC(x) crc = tab[(crc ^ (x)) & 255 ] ^ (crc >> 8)
+#  define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
+#  define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
+                tab[2][(crc >> 8) & 255] ^ \
+                tab[1][(crc >> 16) & 255] ^ \
+                tab[0][(crc >> 24) & 255]
 # else
-#  define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
+#  define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
+#  define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
+                tab[1][(crc >> 8) & 255] ^ \
+                tab[2][(crc >> 16) & 255] ^ \
+                tab[3][(crc >> 24) & 255]
 # endif
-        const u32 *b = (const u32 *)buf;
+        const u32 *b;
        size_t    rem_len;
        /* Align it */
-        if (unlikely((long)b & 3 && len)) {
+        if (unlikely((long)buf & 3 && len)) {
-                u8 *p = (u8 *)b;
                do {
-                        DO_CRC(*p++);
+                        DO_CRC(*buf++);
-                } while ((--len) && ((long)p)&3);
+                } while ((--len) && ((long)buf)&3);
-                b = (u32 *)p;
        }
        rem_len = len & 3;
        /* load data 32 bits wide, xor data 32 bits wide. */
        len = len >> 2;
+        b = (const u32 *)buf;
        for (--b; len; --len) {
                crc ^= *++b; /* use pre increment for speed */
-                DO_CRC(0);
+                DO_CRC4;
-                DO_CRC(0);
-                DO_CRC(0);
-                DO_CRC(0);
        }
        len = rem_len;
        /* And the last few bytes */
@@ -82,6 +89,8 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
                } while (--len);
        }
        return crc;
+#undef DO_CRC
+#undef DO_CRC4
 }
 #endif
 /**
@@ -114,14 +123,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
 # if CRC_LE_BITS == 8
-        const u32      *tab = crc32table_le;
+        const u32      (*tab)[] = crc32table_le;
        crc = __cpu_to_le32(crc);
        crc = crc32_body(crc, p, len, tab);
        return __le32_to_cpu(crc);
-#undef ENDIAN_SHIFT
-#undef DO_CRC
 # elif CRC_LE_BITS == 4
        while (len--) {
                crc ^= *p++;
@@ -174,14 +180,11 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 # if CRC_BE_BITS == 8
-        const u32      *tab = crc32table_be;
+        const u32      (*tab)[] = crc32table_be;
        crc = __cpu_to_be32(crc);
        crc = crc32_body(crc, p, len, tab);
        return __be32_to_cpu(crc);
-#undef ENDIAN_SHIFT
-#undef DO_CRC
 # elif CRC_BE_BITS == 4
        while (len--) {
                crc ^= *p++ << 24;
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index bc3b11731b9..b1c17730767 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -8,7 +8,6 @@
 *
 *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
 */
-#include <linux/kernel.h>
 #include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
@@ -23,6 +22,7 @@
 * shut up after that.
 */
 int debug_locks = 1;
+EXPORT_SYMBOL_GPL(debug_locks);
 /*
 * The locking-testsuite uses <debug_locks_silent> to get a
@@ -38,7 +38,6 @@ int debug_locks_off(void)
 {
        if (__debug_locks_off()) {
                if (!debug_locks_silent) {
-                        oops_in_progress = 1;
                        console_verbose();
                        return 1;
                }
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a9a8996d286..deebcc57d4e 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
 #include <linux/hash.h>
 #define ODEBUG_HASH_BITS        14
@@ -140,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
                obj->object = addr;
                obj->descr  = descr;
                obj->state  = ODEBUG_STATE_NONE;
+                obj->astate = 0;
                hlist_del(&obj->node);
                hlist_add_head(&obj->node, &b->list);
@@ -251,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
        if (limit < 5 && obj->descr != descr_test) {
                limit++;
-                WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg,
+                WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
-                       obj_states[obj->state], obj->descr->name);
+                                 "object type: %s\n",
+                        msg, obj_states[obj->state], obj->astate,
+                        obj->descr->name);
        }
        debug_objects_warnings++;
 }
@@ -446,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
                case ODEBUG_STATE_INIT:
                case ODEBUG_STATE_INACTIVE:
                case ODEBUG_STATE_ACTIVE:
-                        obj->state = ODEBUG_STATE_INACTIVE;
+                        if (!obj->astate)
+                                obj->state = ODEBUG_STATE_INACTIVE;
+                        else
+                                debug_print_object(obj, "deactivate");
                        break;
                case ODEBUG_STATE_DESTROYED:
@@ -552,6 +559,53 @@ out_unlock:
        raw_spin_unlock_irqrestore(&db->lock, flags);
 }
+/**
+ * debug_object_active_state - debug checks object usage state machine
+ * @addr:       address of the object
+ * @descr:      pointer to an object specific debug description structure
+ * @expect:     expected state
+ * @next:       state to move to if expected state is found
+ */
+void
+debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+                          unsigned int expect, unsigned int next)
+{
+        struct debug_bucket *db;
+        struct debug_obj *obj;
+        unsigned long flags;
+        if (!debug_objects_enabled)
+                return;
+        db = get_bucket((unsigned long) addr);
+        raw_spin_lock_irqsave(&db->lock, flags);
+        obj = lookup_object(addr, db);
+        if (obj) {
+                switch (obj->state) {
+                case ODEBUG_STATE_ACTIVE:
+                        if (obj->astate == expect)
+                                obj->astate = next;
+                        else
+                                debug_print_object(obj, "active_state");
+                        break;
+                default:
+                        debug_print_object(obj, "active_state");
+                        break;
+                }
+        } else {
+                struct debug_obj o = { .object = addr,
+                                       .state = ODEBUG_STATE_NOTAVAILABLE,
+                                       .descr = descr };
+                debug_print_object(&o, "active_state");
+        }
+        raw_spin_unlock_irqrestore(&db->lock, flags);
+}
 #ifdef CONFIG_DEBUG_OBJECTS_FREE
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
@@ -773,7 +827,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state)
        }
 }
-static int
+static int __init
 check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
 {
        struct debug_bucket *db;
@@ -916,7 +970,7 @@ void __init debug_objects_early_init(void)
 /*
 * Convert the statically allocated objects to dynamic ones:
 */
-static int debug_objects_replace_static_objects(void)
+static int __init debug_objects_replace_static_objects(void)
 {
        struct debug_bucket *db = obj_hash;
        struct hlist_node *node, *tmp;
diff --git a/lib/decompress.c b/lib/decompress.c
index a7606815541..3d766b7f60a 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -8,6 +8,7 @@
 #include <linux/decompress/bunzip2.h>
 #include <linux/decompress/unlzma.h>
+#include <linux/decompress/unxz.h>
 #include <linux/decompress/inflate.h>
 #include <linux/decompress/unlzo.h>
@@ -23,6 +24,9 @@
 #ifndef CONFIG_DECOMPRESS_LZMA
 # define unlzma NULL
 #endif
+#ifndef CONFIG_DECOMPRESS_XZ
+# define unxz NULL
+#endif
 #ifndef CONFIG_DECOMPRESS_LZO
 # define unlzo NULL
 #endif
@@ -36,6 +40,7 @@ static const struct compress_format {
        { {037, 0236}, "gzip", gunzip },
        { {0x42, 0x5a}, "bzip2", bunzip2 },
        { {0x5d, 0x00}, "lzma", unlzma },
+        { {0xfd, 0x37}, "xz", unxz },
        { {0x89, 0x4c}, "lzo", unlzo },
        { {0, 0}, NULL, NULL }
 };
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index a4e971dee10..a7b80c1d6a0 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -49,7 +49,6 @@
 #define PREBOOT
 #else
 #include <linux/decompress/bunzip2.h>
-#include <linux/slab.h>
 #endif /* STATIC */
 #include <linux/decompress/mm.h>
@@ -107,6 +106,8 @@ struct bunzip_data {
        unsigned char selectors[32768];         /* nSelectors = 15 bits */
        struct group_data groups[MAX_GROUPS];   /* Huffman coding tables */
        int io_error;                   /* non-zero if we have IO error */
+        int byteCount[256];
+        unsigned char symToByte[256], mtfSymbol[256];
 };
@@ -158,14 +159,16 @@ static int INIT get_next_block(struct bunzip_data *bd)
        int *base = NULL;
        int *limit = NULL;
        int dbufCount, nextSym, dbufSize, groupCount, selector,
-                i, j, k, t, runPos, symCount, symTotal, nSelectors,
+                i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
-                byteCount[256];
+        unsigned char uc, *symToByte, *mtfSymbol, *selectors;
-        unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
        unsigned int *dbuf, origPtr;
        dbuf = bd->dbuf;
        dbufSize = bd->dbufSize;
        selectors = bd->selectors;
+        byteCount = bd->byteCount;
+        symToByte = bd->symToByte;
+        mtfSymbol = bd->mtfSymbol;
        /* Read in header signature and CRC, then validate signature.
           (last block signature means CRC is for whole file, return now) */
@@ -678,13 +681,12 @@ STATIC int INIT bunzip2(unsigned char *buf, int len,
                        int(*flush)(void*, unsigned int),
                        unsigned char *outbuf,
                        int *pos,
-                        void(*error_fn)(char *x))
+                        void(*error)(char *x))
 {
        struct bunzip_data *bd;
        int i = -1;
        unsigned char *inbuf;
-        set_error_fn(error_fn);
        if (flush)
                outbuf = malloc(BZIP2_IOBUF_SIZE);
@@ -747,8 +749,8 @@ STATIC int INIT decompress(unsigned char *buf, int len,
                        int(*flush)(void*, unsigned int),
                        unsigned char *outbuf,
                        int *pos,
-                        void(*error_fn)(char *x))
+                        void(*error)(char *x))
 {
-        return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn);
+        return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
 }
 #endif
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index fc686c7a0a0..19ff89e34ee 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -19,7 +19,6 @@
 #include "zlib_inflate/inflate.h"
 #include "zlib_inflate/infutil.h"
-#include <linux/slab.h>
 #endif /* STATIC */
@@ -27,7 +26,7 @@
 #define GZIP_IOBUF_SIZE (16*1024)
-static int nofill(void *buffer, unsigned int len)
+static int INIT nofill(void *buffer, unsigned int len)
 {
        return -1;
 }
@@ -38,13 +37,12 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
                       int(*flush)(void*, unsigned int),
                       unsigned char *out_buf,
                       int *pos,
-                       void(*error_fn)(char *x)) {
+                       void(*error)(char *x)) {
        u8 *zbuf;
        struct z_stream_s *strm;
        int rc;
        size_t out_len;
-        set_error_fn(error_fn);
        rc = -1;
        if (flush) {
                out_len = 0x8000; /* 32 K */
@@ -100,13 +98,22 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
         * possible asciz filename)
         */
        strm->next_in = zbuf + 10;
+        strm->avail_in = len - 10;
        /* skip over asciz filename */
        if (zbuf[3] & 0x8) {
-                while (strm->next_in[0])
+                do {
-                        strm->next_in++;
+                        /*
-                strm->next_in++;
+                         * If the filename doesn't fit into the buffer,
+                         * the file is very probably corrupt. Don't try
+                         * to read more data.
+                         */
+                        if (strm->avail_in == 0) {
+                                error("header error");
+                                goto gunzip_5;
+                        }
+                        --strm->avail_in;
+                } while (*strm->next_in++);
        }
-        strm->avail_in = len - (strm->next_in - zbuf);
        strm->next_out = out_buf;
        strm->avail_out = out_len;
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
index ca82fde81c8..476c65af970 100644
--- a/lib/decompress_unlzma.c
+++ b/lib/decompress_unlzma.c
@@ -33,7 +33,6 @@
 #define PREBOOT
 #else
 #include <linux/decompress/unlzma.h>
-#include <linux/slab.h>
 #endif /* STATIC */
 #include <linux/decompress/mm.h>
@@ -74,6 +73,7 @@ struct rc {
        uint32_t code;
        uint32_t range;
        uint32_t bound;
+        void (*error)(char *);
 };
@@ -82,7 +82,7 @@ struct rc {
 #define RC_MODEL_TOTAL_BITS 11
-static int nofill(void *buffer, unsigned int len)
+static int INIT nofill(void *buffer, unsigned int len)
 {
        return -1;
 }
@@ -92,7 +92,7 @@ static void INIT rc_read(struct rc *rc)
 {
        rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE);
        if (rc->buffer_size <= 0)
-                error("unexpected EOF");
+                rc->error("unexpected EOF");
        rc->ptr = rc->buffer;
        rc->buffer_end = rc->buffer + rc->buffer_size;
 }
@@ -127,12 +127,6 @@ static inline void INIT rc_init_code(struct rc *rc)
 }
-/* Called once. TODO: bb_maybe_free() */
-static inline void INIT rc_free(struct rc *rc)
-{
-        free(rc->buffer);
-}
 /* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */
 static void INIT rc_do_normalize(struct rc *rc)
 {
@@ -169,7 +163,7 @@ static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p)
        rc->range = rc->bound;
        *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
 }
-static inline void rc_update_bit_1(struct rc *rc, uint16_t *p)
+static inline void INIT rc_update_bit_1(struct rc *rc, uint16_t *p)
 {
        rc->range -= rc->bound;
        rc->code -= rc->bound;
@@ -319,32 +313,38 @@ static inline uint8_t INIT peek_old_byte(struct writer *wr,
 }
-static inline void INIT write_byte(struct writer *wr, uint8_t byte)
+static inline int INIT write_byte(struct writer *wr, uint8_t byte)
 {
        wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte;
        if (wr->flush && wr->buffer_pos == wr->header->dict_size) {
                wr->buffer_pos = 0;
                wr->global_pos += wr->header->dict_size;
-                wr->flush((char *)wr->buffer, wr->header->dict_size);
+                if (wr->flush((char *)wr->buffer, wr->header->dict_size)
+                                != wr->header->dict_size)
+                        return -1;
        }
+        return 0;
 }
-static inline void INIT copy_byte(struct writer *wr, uint32_t offs)
+static inline int INIT copy_byte(struct writer *wr, uint32_t offs)
 {
-        write_byte(wr, peek_old_byte(wr, offs));
+        return write_byte(wr, peek_old_byte(wr, offs));
 }
-static inline void INIT copy_bytes(struct writer *wr,
+static inline int INIT copy_bytes(struct writer *wr,
                                         uint32_t rep0, int len)
 {
        do {
-                copy_byte(wr, rep0);
+                if (copy_byte(wr, rep0))
+                        return -1;
                len--;
        } while (len != 0 && wr->buffer_pos < wr->header->dst_size);
+        return len;
 }
-static inline void INIT process_bit0(struct writer *wr, struct rc *rc,
+static inline int INIT process_bit0(struct writer *wr, struct rc *rc,
                                     struct cstate *cst, uint16_t *p,
                                     int pos_state, uint16_t *prob,
                                     int lc, uint32_t literal_pos_mask) {
@@ -378,16 +378,17 @@ static inline void INIT process_bit0(struct writer *wr, struct rc *rc,
                uint16_t *prob_lit = prob + mi;
                rc_get_bit(rc, prob_lit, &mi);
        }
-        write_byte(wr, mi);
        if (cst->state < 4)
                cst->state = 0;
        else if (cst->state < 10)
                cst->state -= 3;
        else
                cst->state -= 6;
+        return write_byte(wr, mi);
 }
-static inline void INIT process_bit1(struct writer *wr, struct rc *rc,
+static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
                                            struct cstate *cst, uint16_t *p,
                                            int pos_state, uint16_t *prob) {
  int offset;
@@ -418,8 +419,7 @@ static inline void INIT process_bit1(struct writer *wr, struct rc *rc,
                                cst->state = cst->state < LZMA_NUM_LIT_STATES ?
                                        9 : 11;
-                                copy_byte(wr, cst->rep0);
+                                return copy_byte(wr, cst->rep0);
-                                return;
                        } else {
                                rc_update_bit_1(rc, prob);
                        }
@@ -521,12 +521,15 @@ static inline void INIT process_bit1(struct writer *wr, struct rc *rc,
                } else
                        cst->rep0 = pos_slot;
                if (++(cst->rep0) == 0)
-                        return;
+                        return 0;
+                if (cst->rep0 > wr->header->dict_size
+                                || cst->rep0 > get_pos(wr))
+                        return -1;
        }
        len += LZMA_MATCH_MIN_LEN;
-        copy_bytes(wr, cst->rep0, len);
+        return copy_bytes(wr, cst->rep0, len);
 }
@@ -536,7 +539,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
                              int(*flush)(void*, unsigned int),
                              unsigned char *output,
                              int *posp,
-                              void(*error_fn)(char *x)
+                              void(*error)(char *x)
        )
 {
        struct lzma_header header;
@@ -552,7 +555,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
        unsigned char *inbuf;
        int ret = -1;
-        set_error_fn(error_fn);
+        rc.error = error;
        if (buf)
                inbuf = buf;
@@ -580,8 +583,10 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
                ((unsigned char *)&header)[i] = *rc.ptr++;
        }
-        if (header.pos >= (9 * 5 * 5))
+        if (header.pos >= (9 * 5 * 5)) {
                error("bad header");
+                goto exit_1;
+        }
        mi = 0;
        lc = header.pos;
@@ -627,21 +632,29 @@ STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
                int pos_state = get_pos(&wr) & pos_state_mask;
                uint16_t *prob = p + LZMA_IS_MATCH +
                        (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state;
-                if (rc_is_bit_0(&rc, prob))
+                if (rc_is_bit_0(&rc, prob)) {
-                        process_bit0(&wr, &rc, &cst, p, pos_state, prob,
+                        if (process_bit0(&wr, &rc, &cst, p, pos_state, prob,
-                                     lc, literal_pos_mask);
+                                        lc, literal_pos_mask)) {
-                else {
+                                error("LZMA data is corrupt");
-                        process_bit1(&wr, &rc, &cst, p, pos_state, prob);
+                                goto exit_3;
+                        }
+                } else {
+                        if (process_bit1(&wr, &rc, &cst, p, pos_state, prob)) {
+                                error("LZMA data is corrupt");
+                                goto exit_3;
+                        }
                        if (cst.rep0 == 0)
                                break;
                }
+                if (rc.buffer_size <= 0)
+                        goto exit_3;
        }
        if (posp)
                *posp = rc.ptr-rc.buffer;
-        if (wr.flush)
+        if (!wr.flush || wr.flush(wr.buffer, wr.buffer_pos) == wr.buffer_pos)
-                wr.flush(wr.buffer, wr.buffer_pos);
+                ret = 0;
-        ret = 0;
+exit_3:
        large_free(p);
 exit_2:
        if (!output)
@@ -659,9 +672,9 @@ STATIC int INIT decompress(unsigned char *buf, int in_len,
                              int(*flush)(void*, unsigned int),
                              unsigned char *output,
                              int *posp,
-                              void(*error_fn)(char *x)
+                              void(*error)(char *x)
        )
 {
-        return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn);
+        return unlzma(buf, in_len - 4, fill, flush, output, posp, error);
 }
 #endif
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index db521f45626..5a7a2adf4c4 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -33,7 +33,6 @@
 #ifdef STATIC
 #include "lzo/lzo1x_decompress.c"
 #else
-#include <linux/slab.h>
 #include <linux/decompress/unlzo.h>
 #endif
@@ -49,14 +48,25 @@ static const unsigned char lzop_magic[] = {
 #define LZO_BLOCK_SIZE        (256*1024l)
 #define HEADER_HAS_FILTER      0x00000800L
+#define HEADER_SIZE_MIN       (9 + 7     + 4 + 8     + 1       + 4)
+#define HEADER_SIZE_MAX       (9 + 7 + 1 + 8 + 8 + 4 + 1 + 255 + 4)
-STATIC inline int INIT parse_header(u8 *input, u8 *skip)
+STATIC inline int INIT parse_header(u8 *input, int *skip, int in_len)
 {
        int l;
        u8 *parse = input;
+        u8 *end = input + in_len;
        u8 level = 0;
        u16 version;
+        /*
+         * Check that there's enough input to possibly have a valid header.
+         * Then it is possible to parse several fields until the minimum
+         * size may have been used.
+         */
+        if (in_len < HEADER_SIZE_MIN)
+                return 0;
        /* read magic: 9 first bits */
        for (l = 0; l < 9; l++) {
                if (*parse++ != lzop_magic[l])
@@ -74,6 +84,15 @@ STATIC inline int INIT parse_header(u8 *input, u8 *skip)
        else
                parse += 4; /* flags */
+        /*
+         * At least mode, mtime_low, filename length, and checksum must
+         * be left to be parsed. If also mtime_high is present, it's OK
+         * because the next input buffer check is after reading the
+         * filename length.
+         */
+        if (end - parse < 8 + 1 + 4)
+                return 0;
        /* skip mode and mtime_low */
        parse += 8;
        if (version >= 0x0940)
@@ -81,6 +100,8 @@ STATIC inline int INIT parse_header(u8 *input, u8 *skip)
        l = *parse++;
        /* don't care about the file name, and skip checksum */
+        if (end - parse < l + 4)
+                return 0;
        parse += l + 4;
        *skip = parse - input;
@@ -91,15 +112,14 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
                                int (*fill) (void *, unsigned int),
                                int (*flush) (void *, unsigned int),
                                u8 *output, int *posp,
-                                void (*error_fn) (char *x))
+                                void (*error) (char *x))
 {
-        u8 skip = 0, r = 0;
+        u8 r = 0;
+        int skip = 0;
        u32 src_len, dst_len;
        size_t tmp;
        u8 *in_buf, *in_buf_save, *out_buf;
-        int obytes_processed = 0;
+        int ret = -1;
-        set_error_fn(error_fn);
        if (output) {
                out_buf = output;
@@ -119,8 +139,8 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
                goto exit_1;
        } else if (input) {
                in_buf = input;
-        } else if (!fill || !posp) {
+        } else if (!fill) {
-                error("NULL input pointer and missing position pointer or fill function");
+                error("NULL input pointer and missing fill function");
                goto exit_1;
        } else {
                in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
@@ -134,22 +154,47 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
        if (posp)
                *posp = 0;
-        if (fill)
+        if (fill) {
-                fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+                /*
+                 * Start from in_buf + HEADER_SIZE_MAX to make it possible
+                 * to use memcpy() to copy the unused data to the beginning
+                 * of the buffer. This way memmove() isn't needed which
+                 * is missing from pre-boot environments of most archs.
+                 */
+                in_buf += HEADER_SIZE_MAX;
+                in_len = fill(in_buf, HEADER_SIZE_MAX);
+        }
-        if (!parse_header(input, &skip)) {
+        if (!parse_header(in_buf, &skip, in_len)) {
                error("invalid header");
                goto exit_2;
        }
        in_buf += skip;
+        in_len -= skip;
+        if (fill) {
+                /* Move the unused data to the beginning of the buffer. */
+                memcpy(in_buf_save, in_buf, in_len);
+                in_buf = in_buf_save;
+        }
        if (posp)
                *posp = skip;
        for (;;) {
                /* read uncompressed block size */
+                if (fill && in_len < 4) {
+                        skip = fill(in_buf + in_len, 4 - in_len);
+                        if (skip > 0)
+                                in_len += skip;
+                }
+                if (in_len < 4) {
+                        error("file corrupted");
+                        goto exit_2;
+                }
                dst_len = get_unaligned_be32(in_buf);
                in_buf += 4;
+                in_len -= 4;
                /* exit if last block */
                if (dst_len == 0) {
@@ -164,8 +209,18 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
                }
                /* read compressed block size, and skip block checksum info */
+                if (fill && in_len < 8) {
+                        skip = fill(in_buf + in_len, 8 - in_len);
+                        if (skip > 0)
+                                in_len += skip;
+                }
+                if (in_len < 8) {
+                        error("file corrupted");
+                        goto exit_2;
+                }
                src_len = get_unaligned_be32(in_buf);
                in_buf += 8;
+                in_len -= 8;
                if (src_len <= 0 || src_len > dst_len) {
                        error("file corrupted");
@@ -173,29 +228,55 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
                }
                /* decompress */
+                if (fill && in_len < src_len) {
+                        skip = fill(in_buf + in_len, src_len - in_len);
+                        if (skip > 0)
+                                in_len += skip;
+                }
+                if (in_len < src_len) {
+                        error("file corrupted");
+                        goto exit_2;
+                }
                tmp = dst_len;
-                r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+                /* When the input data is not compressed at all,
+                 * lzo1x_decompress_safe will fail, so call memcpy()
+                 * instead */
+                if (unlikely(dst_len == src_len))
+                        memcpy(out_buf, in_buf, src_len);
+                else {
+                        r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
                                                out_buf, &tmp);
-                if (r != LZO_E_OK || dst_len != tmp) {
+                        if (r != LZO_E_OK || dst_len != tmp) {
-                        error("Compressed data violation");
+                                error("Compressed data violation");
-                        goto exit_2;
+                                goto exit_2;
+                        }
                }
-                obytes_processed += dst_len;
+                if (flush && flush(out_buf, dst_len) != dst_len)
-                if (flush)
+                        goto exit_2;
-                        flush(out_buf, dst_len);
                if (output)
                        out_buf += dst_len;
                if (posp)
                        *posp += src_len + 12;
+                in_buf += src_len;
+                in_len -= src_len;
                if (fill) {
+                        /*
+                         * If there happens to still be unused data left in
+                         * in_buf, move it to the beginning of the buffer.
+                         * Use a loop to avoid memmove() dependency.
+                         */
+                        if (in_len > 0)
+                                for (skip = 0; skip < in_len; ++skip)
+                                        in_buf_save[skip] = in_buf[skip];
                        in_buf = in_buf_save;
-                        fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+                }
-                } else
-                        in_buf += src_len;
        }
+        ret = 0;
 exit_2:
        if (!input)
                free(in_buf);
@@ -203,7 +284,7 @@ exit_1:
        if (!output)
                free(out_buf);
 exit:
-        return obytes_processed;
+        return ret;
 }
 #define decompress unlzo
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c
new file mode 100644
index 00000000000..cecd23df2b9
--- /dev/null
+++ b/lib/decompress_unxz.c
@@ -0,0 +1,397 @@
+/*
+ * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+/*
+ * Important notes about in-place decompression
+ *
+ * At least on x86, the kernel is decompressed in place: the compressed data
+ * is placed to the end of the output buffer, and the decompressor overwrites
+ * most of the compressed data. There must be enough safety margin to
+ * guarantee that the write position is always behind the read position.
+ *
+ * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below.
+ * Note that the margin with XZ is bigger than with Deflate (gzip)!
+ *
+ * The worst case for in-place decompression is that the beginning of
+ * the file is compressed extremely well, and the rest of the file is
+ * uncompressible. Thus, we must look for worst-case expansion when the
+ * compressor is encoding uncompressible data.
+ *
+ * The structure of the .xz file in case of a compresed kernel is as follows.
+ * Sizes (as bytes) of the fields are in parenthesis.
+ *
+ *    Stream Header (12)
+ *    Block Header:
+ *      Block Header (8-12)
+ *      Compressed Data (N)
+ *      Block Padding (0-3)
+ *      CRC32 (4)
+ *    Index (8-20)
+ *    Stream Footer (12)
+ *
+ * Normally there is exactly one Block, but let's assume that there are
+ * 2-4 Blocks just in case. Because Stream Header and also Block Header
+ * of the first Block don't make the decompressor produce any uncompressed
+ * data, we can ignore them from our calculations. Block Headers of possible
+ * additional Blocks have to be taken into account still. With these
+ * assumptions, it is safe to assume that the total header overhead is
+ * less than 128 bytes.
+ *
+ * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ
+ * doesn't change the size of the data, it is enough to calculate the
+ * safety margin for LZMA2.
+ *
+ * LZMA2 stores the data in chunks. Each chunk has a header whose size is
+ * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that
+ * the maximum chunk header size is 8 bytes. After the chunk header, there
+ * may be up to 64 KiB of actual payload in the chunk. Often the payload is
+ * quite a bit smaller though; to be safe, let's assume that an average
+ * chunk has only 32 KiB of payload.
+ *
+ * The maximum uncompressed size of the payload is 2 MiB. The minimum
+ * uncompressed size of the payload is in practice never less than the
+ * payload size itself. The LZMA2 format would allow uncompressed size
+ * to be less than the payload size, but no sane compressor creates such
+ * files. LZMA2 supports storing uncompressible data in uncompressed form,
+ * so there's never a need to create payloads whose uncompressed size is
+ * smaller than the compressed size.
+ *
+ * The assumption, that the uncompressed size of the payload is never
+ * smaller than the payload itself, is valid only when talking about
+ * the payload as a whole. It is possible that the payload has parts where
+ * the decompressor consumes more input than it produces output. Calculating
+ * the worst case for this would be tricky. Instead of trying to do that,
+ * let's simply make sure that the decompressor never overwrites any bytes
+ * of the payload which it is currently reading.
+ *
+ * Now we have enough information to calculate the safety margin. We need
+ *   - 128 bytes for the .xz file format headers;
+ *   - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header
+ *     per chunk, each chunk having average payload size of 32 KiB); and
+ *   - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that
+ *     the decompressor never overwrites anything from the LZMA2 chunk
+ *     payload it is currently reading.
+ *
+ * We get the following formula:
+ *
+ *    safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536
+ *                  = 128 + (uncompressed_size >> 12) + 65536
+ *
+ * For comparision, according to arch/x86/boot/compressed/misc.c, the
+ * equivalent formula for Deflate is this:
+ *
+ *    safety_margin = 18 + (uncompressed_size >> 12) + 32768
+ *
+ * Thus, when updating Deflate-only in-place kernel decompressor to
+ * support XZ, the fixed overhead has to be increased from 18+32768 bytes
+ * to 128+65536 bytes.
+ */
+/*
+ * STATIC is defined to "static" if we are being built for kernel
+ * decompression (pre-boot code). <linux/decompress/mm.h> will define
+ * STATIC to empty if it wasn't already defined. Since we will need to
+ * know later if we are being used for kernel decompression, we define
+ * XZ_PREBOOT here.
+ */
+#ifdef STATIC
+#       define XZ_PREBOOT
+#endif
+#ifdef __KERNEL__
+#       include <linux/decompress/mm.h>
+#endif
+#define XZ_EXTERN STATIC
+#ifndef XZ_PREBOOT
+#       include <linux/slab.h>
+#       include <linux/xz.h>
+#else
+/*
+ * Use the internal CRC32 code instead of kernel's CRC32 module, which
+ * is not available in early phase of booting.
+ */
+#define XZ_INTERNAL_CRC32 1
+/*
+ * For boot time use, we enable only the BCJ filter of the current
+ * architecture or none if no BCJ filter is available for the architecture.
+ */
+#ifdef CONFIG_X86
+#       define XZ_DEC_X86
+#endif
+#ifdef CONFIG_PPC
+#       define XZ_DEC_POWERPC
+#endif
+#ifdef CONFIG_ARM
+#       define XZ_DEC_ARM
+#endif
+#ifdef CONFIG_IA64
+#       define XZ_DEC_IA64
+#endif
+#ifdef CONFIG_SPARC
+#       define XZ_DEC_SPARC
+#endif
+/*
+ * This will get the basic headers so that memeq() and others
+ * can be defined.
+ */
+#include "xz/xz_private.h"
+/*
+ * Replace the normal allocation functions with the versions from
+ * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL)
+ * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it.
+ * Workaround it here because the other decompressors don't need it.
+ */
+#undef kmalloc
+#undef kfree
+#undef vmalloc
+#undef vfree
+#define kmalloc(size, flags) malloc(size)
+#define kfree(ptr) free(ptr)
+#define vmalloc(size) malloc(size)
+#define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0)
+/*
+ * FIXME: Not all basic memory functions are provided in architecture-specific
+ * files (yet). We define our own versions here for now, but this should be
+ * only a temporary solution.
+ *
+ * memeq and memzero are not used much and any remotely sane implementation
+ * is fast enough. memcpy/memmove speed matters in multi-call mode, but
+ * the kernel image is decompressed in single-call mode, in which only
+ * memcpy speed can matter and only if there is a lot of uncompressible data
+ * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
+ * functions below should just be kept small; it's probably not worth
+ * optimizing for speed.
+ */
+#ifndef memeq
+static bool memeq(const void *a, const void *b, size_t size)
+{
+        const uint8_t *x = a;
+        const uint8_t *y = b;
+        size_t i;
+        for (i = 0; i < size; ++i)
+                if (x[i] != y[i])
+                        return false;
+        return true;
+}
+#endif
+#ifndef memzero
+static void memzero(void *buf, size_t size)
+{
+        uint8_t *b = buf;
+        uint8_t *e = b + size;
+        while (b != e)
+                *b++ = '\0';
+}
+#endif
+#ifndef memmove
+/* Not static to avoid a conflict with the prototype in the Linux headers. */
+void *memmove(void *dest, const void *src, size_t size)
+{
+        uint8_t *d = dest;
+        const uint8_t *s = src;
+        size_t i;
+        if (d < s) {
+                for (i = 0; i < size; ++i)
+                        d[i] = s[i];
+        } else if (d > s) {
+                i = size;
+                while (i-- > 0)
+                        d[i] = s[i];
+        }
+        return dest;
+}
+#endif
+/*
+ * Since we need memmove anyway, would use it as memcpy too.
+ * Commented out for now to avoid breaking things.
+ */
+/*
+#ifndef memcpy
+#       define memcpy memmove
+#endif
+*/
+#include "xz/xz_crc32.c"
+#include "xz/xz_dec_stream.c"
+#include "xz/xz_dec_lzma2.c"
+#include "xz/xz_dec_bcj.c"
+#endif /* XZ_PREBOOT */
+/* Size of the input and output buffers in multi-call mode */
+#define XZ_IOBUF_SIZE 4096
+/*
+ * This function implements the API defined in <linux/decompress/generic.h>.
+ *
+ * This wrapper will automatically choose single-call or multi-call mode
+ * of the native XZ decoder API. The single-call mode can be used only when
+ * both input and output buffers are available as a single chunk, i.e. when
+ * fill() and flush() won't be used.
+ */
+STATIC int INIT unxz(unsigned char *in, int in_size,
+                     int (*fill)(void *dest, unsigned int size),
+                     int (*flush)(void *src, unsigned int size),
+                     unsigned char *out, int *in_used,
+                     void (*error)(char *x))
+{
+        struct xz_buf b;
+        struct xz_dec *s;
+        enum xz_ret ret;
+        bool must_free_in = false;
+#if XZ_INTERNAL_CRC32
+        xz_crc32_init();
+#endif
+        if (in_used != NULL)
+                *in_used = 0;
+        if (fill == NULL && flush == NULL)
+                s = xz_dec_init(XZ_SINGLE, 0);
+        else
+                s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1);
+        if (s == NULL)
+                goto error_alloc_state;
+        if (flush == NULL) {
+                b.out = out;
+                b.out_size = (size_t)-1;
+        } else {
+                b.out_size = XZ_IOBUF_SIZE;
+                b.out = malloc(XZ_IOBUF_SIZE);
+                if (b.out == NULL)
+                        goto error_alloc_out;
+        }
+        if (in == NULL) {
+                must_free_in = true;
+                in = malloc(XZ_IOBUF_SIZE);
+                if (in == NULL)
+                        goto error_alloc_in;
+        }
+        b.in = in;
+        b.in_pos = 0;
+        b.in_size = in_size;
+        b.out_pos = 0;
+        if (fill == NULL && flush == NULL) {
+                ret = xz_dec_run(s, &b);
+        } else {
+                do {
+                        if (b.in_pos == b.in_size && fill != NULL) {
+                                if (in_used != NULL)
+                                        *in_used += b.in_pos;
+                                b.in_pos = 0;
+                                in_size = fill(in, XZ_IOBUF_SIZE);
+                                if (in_size < 0) {
+                                        /*
+                                         * This isn't an optimal error code
+                                         * but it probably isn't worth making
+                                         * a new one either.
+                                         */
+                                        ret = XZ_BUF_ERROR;
+                                        break;
+                                }
+                                b.in_size = in_size;
+                        }
+                        ret = xz_dec_run(s, &b);
+                        if (flush != NULL && (b.out_pos == b.out_size
+                                        || (ret != XZ_OK && b.out_pos > 0))) {
+                                /*
+                                 * Setting ret here may hide an error
+                                 * returned by xz_dec_run(), but probably
+                                 * it's not too bad.
+                                 */
+                                if (flush(b.out, b.out_pos) != (int)b.out_pos)
+                                        ret = XZ_BUF_ERROR;
+                                b.out_pos = 0;
+                        }
+                } while (ret == XZ_OK);
+                if (must_free_in)
+                        free(in);
+                if (flush != NULL)
+                        free(b.out);
+        }
+        if (in_used != NULL)
+                *in_used += b.in_pos;
+        xz_dec_end(s);
+        switch (ret) {
+        case XZ_STREAM_END:
+                return 0;
+        case XZ_MEM_ERROR:
+                /* This can occur only in multi-call mode. */
+                error("XZ decompressor ran out of memory");
+                break;
+        case XZ_FORMAT_ERROR:
+                error("Input is not in the XZ format (wrong magic bytes)");
+                break;
+        case XZ_OPTIONS_ERROR:
+                error("Input was encoded with settings that are not "
+                                "supported by this XZ decoder");
+                break;
+        case XZ_DATA_ERROR:
+        case XZ_BUF_ERROR:
+                error("XZ-compressed data is corrupt");
+                break;
+        default:
+                error("Bug in the XZ decompressor");
+                break;
+        }
+        return -1;
+error_alloc_in:
+        if (flush != NULL)
+                free(b.out);
+error_alloc_out:
+        xz_dec_end(s);
+error_alloc_state:
+        error("XZ decompressor ran out of memory");
+        return -1;
+}
+/*
+ * This macro is used by architecture-specific files to decompress
+ * the kernel image.
+ */
+#define decompress unxz
diff --git a/lib/devres.c b/lib/devres.c
index 72c8909006d..6efddf53b90 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,5 +1,6 @@
 #include <linux/pci.h>
 #include <linux/io.h>
+#include <linux/gfp.h>
 #include <linux/module.h>
 void devm_ioremap_release(struct device *dev, void *res)
@@ -327,7 +328,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
 * @pdev: PCI device to map IO resources for
 * @mask: Mask of BARs to unmap and release
 *
- * Unamp and release regions specified by @mask.
+ * Unmap and release regions specified by @mask.
 */
 void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
 {
diff --git a/lib/div64.c b/lib/div64.c
index a111eb8de9c..5b491919177 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -77,26 +77,58 @@ s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
 EXPORT_SYMBOL(div_s64_rem);
 #endif
-/* 64bit divisor, dividend and result. dynamic precision */
+/**
+ * div64_u64 - unsigned 64bit divide with 64bit divisor
+ * @dividend:   64bit dividend
+ * @divisor:    64bit divisor
+ *
+ * This implementation is a modified version of the algorithm proposed
+ * by the book 'Hacker's Delight'.  The original source and full proof
+ * can be found here and is available for use without restriction.
+ *
+ * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c'
+ */
 #ifndef div64_u64
 u64 div64_u64(u64 dividend, u64 divisor)
 {
-        u32 high, d;
+        u32 high = divisor >> 32;
+        u64 quot;
-        high = divisor >> 32;
+        if (high == 0) {
-        if (high) {
+                quot = div_u64(dividend, divisor);
-                unsigned int shift = fls(high);
+        } else {
+                int n = 1 + fls(high);
+                quot = div_u64(dividend >> n, divisor >> n);
-                d = divisor >> shift;
+                if (quot != 0)
-                dividend >>= shift;
+                        quot--;
-        } else
+                if ((dividend - quot * divisor) >= divisor)
-                d = divisor;
+                        quot++;
+        }
-        return div_u64(dividend, d);
+        return quot;
 }
 EXPORT_SYMBOL(div64_u64);
 #endif
+/**
+ * div64_s64 - signed 64bit divide with 64bit divisor
+ * @dividend:   64bit dividend
+ * @divisor:    64bit divisor
+ */
+#ifndef div64_s64
+s64 div64_s64(s64 dividend, s64 divisor)
+{
+        s64 quot, t;
+        quot = div64_u64(abs64(dividend), abs64(divisor));
+        t = (dividend ^ divisor) >> 63;
+        return (quot ^ t) - t;
+}
+EXPORT_SYMBOL(div64_s64);
+#endif
 #endif /* BITS_PER_LONG == 32 */
 /*
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 7d2f0b33e5a..4bfb0471f10 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -570,7 +570,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
         * Now parse out the first token and use it as the name for the
         * driver to filter for.
         */
-        for (i = 0; i < NAME_MAX_LEN; ++i) {
+        for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
                current_driver_name[i] = buf[i];
                if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
                        break;
@@ -587,9 +587,10 @@ out_unlock:
        return count;
 }
-const struct file_operations filter_fops = {
+static const struct file_operations filter_fops = {
        .read  = filter_read,
        .write = filter_write,
+        .llseek = default_llseek,
 };
 static int dma_debug_fs_init(void)
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index f9350291598..b335acb43be 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -25,19 +25,12 @@
 #include <linux/uaccess.h>
 #include <linux/dynamic_debug.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/jump_label.h>
 extern struct _ddebug __start___verbose[];
 extern struct _ddebug __stop___verbose[];
-/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
- * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
- * use independent hash functions, to reduce the chance of false positives.
- */
-long long dynamic_debug_enabled;
-EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
-long long dynamic_debug_enabled2;
-EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
 struct ddebug_table {
        struct list_head link;
        char *mod_name;
@@ -87,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
 }
 /*
- * must be called with ddebug_lock held
- */
-static int disabled_hash(char hash, bool first_table)
-{
-        struct ddebug_table *dt;
-        char table_hash_value;
-        list_for_each_entry(dt, &ddebug_tables, link) {
-                if (first_table)
-                        table_hash_value = dt->ddebugs->primary_hash;
-                else
-                        table_hash_value = dt->ddebugs->secondary_hash;
-                if (dt->num_enabled && (hash == table_hash_value))
-                        return 0;
-        }
-        return 1;
-}
-/*
 * Search the tables for _ddebug's which match the given
 * `query' and apply the `flags' and `mask' to them.  Tells
 * the user which ddebug's were changed, or whether none
@@ -168,19 +141,10 @@ static void ddebug_change(const struct ddebug_query *query,
                        else if (!dp->flags)
                                dt->num_enabled++;
                        dp->flags = newflags;
-                        if (newflags) {
+                        if (newflags)
-                                dynamic_debug_enabled |=
+                                dp->enabled = 1;
-                                                (1LL << dp->primary_hash);
+                        else
-                                dynamic_debug_enabled2 |=
+                                dp->enabled = 0;
-                                                (1LL << dp->secondary_hash);
-                        } else {
-                                if (disabled_hash(dp->primary_hash, true))
-                                        dynamic_debug_enabled &=
-                                                ~(1LL << dp->primary_hash);
-                                if (disabled_hash(dp->secondary_hash, false))
-                                        dynamic_debug_enabled2 &=
-                                                ~(1LL << dp->secondary_hash);
-                        }
                        if (verbose)
                                printk(KERN_INFO
                                        "ddebug: changed %s:%d [%s]%s %s\n",
@@ -428,6 +392,40 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
        return 0;
 }
+static int ddebug_exec_query(char *query_string)
+{
+        unsigned int flags = 0, mask = 0;
+        struct ddebug_query query;
+#define MAXWORDS 9
+        int nwords;
+        char *words[MAXWORDS];
+        nwords = ddebug_tokenize(query_string, words, MAXWORDS);
+        if (nwords <= 0)
+                return -EINVAL;
+        if (ddebug_parse_query(words, nwords-1, &query))
+                return -EINVAL;
+        if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
+                return -EINVAL;
+        /* actually go and implement the change */
+        ddebug_change(&query, flags, mask);
+        return 0;
+}
+static __initdata char ddebug_setup_string[1024];
+static __init int ddebug_setup_query(char *str)
+{
+        if (strlen(str) >= 1024) {
+                pr_warning("ddebug boot param string too large\n");
+                return 0;
+        }
+        strcpy(ddebug_setup_string, str);
+        return 1;
+}
+__setup("ddebug_query=", ddebug_setup_query);
 /*
 * File_ops->write method for <debugfs>/dynamic_debug/conrol.  Gathers the
 * command text from userspace, parses and executes it.
@@ -435,12 +433,8 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
 static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
                                  size_t len, loff_t *offp)
 {
-        unsigned int flags = 0, mask = 0;
-        struct ddebug_query query;
-#define MAXWORDS 9
-        int nwords;
-        char *words[MAXWORDS];
        char tmpbuf[256];
+        int ret;
        if (len == 0)
                return 0;
@@ -454,16 +448,9 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
                printk(KERN_INFO "%s: read %d bytes from userspace\n",
                        __func__, (int)len);
-        nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS);
+        ret = ddebug_exec_query(tmpbuf);
-        if (nwords < 0)
+        if (ret)
-                return -EINVAL;
+                return ret;
-        if (ddebug_parse_query(words, nwords-1, &query))
-                return -EINVAL;
-        if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
-                return -EINVAL;
-        /* actually go and implement the change */
-        ddebug_change(&query, flags, mask);
        *offp += len;
        return len;
@@ -691,7 +678,7 @@ static void ddebug_table_free(struct ddebug_table *dt)
 * Called in response to a module being unloaded.  Removes
 * any ddebug_table's which point at the module.
 */
-int ddebug_remove_module(char *mod_name)
+int ddebug_remove_module(const char *mod_name)
 {
        struct ddebug_table *dt, *nextdt;
        int ret = -ENOENT;
@@ -724,13 +711,14 @@ static void ddebug_remove_all_tables(void)
        mutex_unlock(&ddebug_lock);
 }
-static int __init dynamic_debug_init(void)
+static __initdata int ddebug_init_success;
+static int __init dynamic_debug_init_debugfs(void)
 {
        struct dentry *dir, *file;
-        struct _ddebug *iter, *iter_start;
-        const char *modname = NULL;
+        if (!ddebug_init_success)
-        int ret = 0;
+                return -ENODEV;
-        int n = 0;
        dir = debugfs_create_dir("dynamic_debug", NULL);
        if (!dir)
@@ -741,6 +729,16 @@ static int __init dynamic_debug_init(void)
                debugfs_remove(dir);
                return -ENOMEM;
        }
+        return 0;
+}
+static int __init dynamic_debug_init(void)
+{
+        struct _ddebug *iter, *iter_start;
+        const char *modname = NULL;
+        int ret = 0;
+        int n = 0;
        if (__start___verbose != __stop___verbose) {
                iter = __start___verbose;
                modname = iter->modname;
@@ -758,12 +756,26 @@ static int __init dynamic_debug_init(void)
                }
                ret = ddebug_add_module(iter_start, n, modname);
        }
+        /* ddebug_query boot param got passed -> set it up */
+        if (ddebug_setup_string[0] != '\0') {
+                ret = ddebug_exec_query(ddebug_setup_string);
+                if (ret)
+                        pr_warning("Invalid ddebug boot param %s",
+                                   ddebug_setup_string);
+                else
+                        pr_info("ddebug initialized with string %s",
+                                ddebug_setup_string);
+        }
 out_free:
-        if (ret) {
+        if (ret)
                ddebug_remove_all_tables();
-                debugfs_remove(dir);
+        else
-                debugfs_remove(file);
+                ddebug_init_success = 1;
-        }
        return 0;
 }
-module_init(dynamic_debug_init);
+/* Allow early initialization for boot messages via boot param */
+arch_initcall(dynamic_debug_init);
+/* Debugfs setup must be done later */
+module_init(dynamic_debug_init_debugfs);
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 66eef2e4483..c0ea40ba208 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -23,6 +23,7 @@
 #include <linux/flex_array.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
+#include <linux/module.h>
 struct flex_array_part {
        char elements[FLEX_ARRAY_PART_SIZE];
@@ -99,10 +100,11 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
        ret->element_size = element_size;
        ret->total_nr_elements = total;
        if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
-                memset(ret->parts[0], FLEX_ARRAY_FREE,
+                memset(&ret->parts[0], FLEX_ARRAY_FREE,
                                                FLEX_ARRAY_BASE_BYTES_LEFT);
        return ret;
 }
+EXPORT_SYMBOL(flex_array_alloc);
 static int fa_element_to_part_nr(struct flex_array *fa,
                                        unsigned int element_nr)
@@ -126,12 +128,14 @@ void flex_array_free_parts(struct flex_array *fa)
        for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
                kfree(fa->parts[part_nr]);
 }
+EXPORT_SYMBOL(flex_array_free_parts);
 void flex_array_free(struct flex_array *fa)
 {
        flex_array_free_parts(fa);
        kfree(fa);
 }
+EXPORT_SYMBOL(flex_array_free);
 static unsigned int index_inside_part(struct flex_array *fa,
                                        unsigned int element_nr)
@@ -171,6 +175,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
 * Note that this *copies* the contents of @src into
 * the array.  If you are trying to store an array of
 * pointers, make sure to pass in &ptr instead of ptr.
+ * You may instead wish to use the flex_array_put_ptr()
+ * helper function.
 *
 * Locking must be provided by the caller.
 */
@@ -194,6 +200,7 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
        memcpy(dst, src, fa->element_size);
        return 0;
 }
+EXPORT_SYMBOL(flex_array_put);
 /**
 * flex_array_clear - clear element in array at @element_nr
@@ -221,6 +228,7 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
        memset(dst, FLEX_ARRAY_FREE, fa->element_size);
        return 0;
 }
+EXPORT_SYMBOL(flex_array_clear);
 /**
 * flex_array_prealloc - guarantee that array space exists
@@ -257,6 +265,7 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,
        }
        return 0;
 }
+EXPORT_SYMBOL(flex_array_prealloc);
 /**
 * flex_array_get - pull data back out of the array
@@ -265,7 +274,8 @@ int flex_array_prealloc(struct flex_array *fa, unsigned int start,
 *
 * Returns a pointer to the data at index @element_nr.  Note
 * that this is a copy of the data that was passed in.  If you
- * are using this to store pointers, you'll get back &ptr.
+ * are using this to store pointers, you'll get back &ptr.  You
+ * may instead wish to use the flex_array_get_ptr helper.
 *
 * Locking must be provided by the caller.
 */
@@ -285,6 +295,28 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
        }
        return &part->elements[index_inside_part(fa, element_nr)];
 }
+EXPORT_SYMBOL(flex_array_get);
+/**
+ * flex_array_get_ptr - pull a ptr back out of the array
+ * @fa:         the flex array from which to extract data
+ * @element_nr: index of the element to fetch from the array
+ *
+ * Returns the pointer placed in the flex array at element_nr using
+ * flex_array_put_ptr().  This function should not be called if the
+ * element in question was not set using the _put_ptr() helper.
+ */
+void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
+{
+        void **tmp;
+        tmp = flex_array_get(fa, element_nr);
+        if (!tmp)
+                return NULL;
+        return *tmp;
+}
+EXPORT_SYMBOL(flex_array_get_ptr);
 static int part_is_free(struct flex_array_part *part)
 {
@@ -325,3 +357,4 @@ int flex_array_shrink(struct flex_array *fa)
        }
        return ret;
 }
+EXPORT_SYMBOL(flex_array_shrink);
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index bea5d97df99..85d0e412a04 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -7,8 +7,8 @@
 #define LE_TABLE_SIZE (1 << CRC_LE_BITS)
 #define BE_TABLE_SIZE (1 << CRC_BE_BITS)
-static uint32_t crc32table_le[LE_TABLE_SIZE];
+static uint32_t crc32table_le[4][LE_TABLE_SIZE];
-static uint32_t crc32table_be[BE_TABLE_SIZE];
+static uint32_t crc32table_be[4][BE_TABLE_SIZE];
 /**
 * crc32init_le() - allocate and initialize LE table data
@@ -22,12 +22,19 @@ static void crc32init_le(void)
        unsigned i, j;
        uint32_t crc = 1;
-        crc32table_le[0] = 0;
+        crc32table_le[0][0] = 0;
        for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
                crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
                for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
-                        crc32table_le[i + j] = crc ^ crc32table_le[j];
+                        crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
+        }
+        for (i = 0; i < LE_TABLE_SIZE; i++) {
+                crc = crc32table_le[0][i];
+                for (j = 1; j < 4; j++) {
+                        crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
+                        crc32table_le[j][i] = crc;
+                }
        }
 }
@@ -39,25 +46,35 @@ static void crc32init_be(void)
        unsigned i, j;
        uint32_t crc = 0x80000000;
-        crc32table_be[0] = 0;
+        crc32table_be[0][0] = 0;
        for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
                crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
                for (j = 0; j < i; j++)
-                        crc32table_be[i + j] = crc ^ crc32table_be[j];
+                        crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
+        }
+        for (i = 0; i < BE_TABLE_SIZE; i++) {
+                crc = crc32table_be[0][i];
+                for (j = 1; j < 4; j++) {
+                        crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
+                        crc32table_be[j][i] = crc;
+                }
        }
 }
-static void output_table(uint32_t table[], int len, char *trans)
+static void output_table(uint32_t table[4][256], int len, char *trans)
 {
-        int i;
+        int i, j;
-        for (i = 0; i < len - 1; i++) {
+        for (j = 0 ; j < 4; j++) {
-                if (i % ENTRIES_PER_LINE == 0)
+                printf("{");
-                        printf("\n");
+                for (i = 0; i < len - 1; i++) {
-                printf("%s(0x%8.8xL), ", trans, table[i]);
+                        if (i % ENTRIES_PER_LINE == 0)
+                                printf("\n");
+                        printf("%s(0x%8.8xL), ", trans, table[j][i]);
+                }
+                printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
        }
-        printf("%s(0x%8.8xL)\n", trans, table[len - 1]);
 }
 int main(int argc, char** argv)
@@ -66,14 +83,14 @@ int main(int argc, char** argv)
        if (CRC_LE_BITS > 1) {
                crc32init_le();
-                printf("static const u32 crc32table_le[] = {");
+                printf("static const u32 crc32table_le[4][256] = {");
                output_table(crc32table_le, LE_TABLE_SIZE, "tole");
                printf("};\n");
        }
        if (CRC_BE_BITS > 1) {
                crc32init_be();
-                printf("static const u32 crc32table_be[] = {");
+                printf("static const u32 crc32table_be[4][256] = {");
                output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
                printf("};\n");
        }
diff --git a/lib/genalloc.c b/lib/genalloc.c
index e67f97495dd..1923f1490e7 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -10,6 +10,7 @@
 * Version 2.  See the file COPYING for more details.
 */
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/bitmap.h>
 #include <linux/genalloc.h>
@@ -127,7 +128,6 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
                chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
                end_bit = (chunk->end_addr - chunk->start_addr) >> order;
-                end_bit -= nbits + 1;
                spin_lock_irqsave(&chunk->lock, flags);
                start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 39af2560f76..f5fe6ba7a3a 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -16,6 +16,40 @@ const char hex_asc[] = "0123456789abcdef";
 EXPORT_SYMBOL(hex_asc);
 /**
+ * hex_to_bin - convert a hex digit to its real value
+ * @ch: ascii character represents hex digit
+ *
+ * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
+ * input.
+ */
+int hex_to_bin(char ch)
+{
+        if ((ch >= '0') && (ch <= '9'))
+                return ch - '0';
+        ch = tolower(ch);
+        if ((ch >= 'a') && (ch <= 'f'))
+                return ch - 'a' + 10;
+        return -1;
+}
+EXPORT_SYMBOL(hex_to_bin);
+/**
+ * hex2bin - convert an ascii hexadecimal string to its binary representation
+ * @dst: binary result
+ * @src: ascii hexadecimal string
+ * @count: result length
+ */
+void hex2bin(u8 *dst, const char *src, size_t count)
+{
+        while (count--) {
+                *dst = hex_to_bin(*src++) << 4;
+                *dst += hex_to_bin(*src++);
+                dst++;
+        }
+}
+EXPORT_SYMBOL(hex2bin);
+/**
 * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
 * @buf: data blob to dump
 * @len: number of bytes in the @buf
@@ -34,7 +68,7 @@ EXPORT_SYMBOL(hex_asc);
 *
 * E.g.:
 *   hex_dump_to_buffer(frame->data, frame->len, 16, 1,
- *                      linebuf, sizeof(linebuf), 1);
+ *                      linebuf, sizeof(linebuf), true);
 *
 * example output buffer:
 * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f  @ABCDEFGHIJKLMNO
@@ -65,8 +99,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
                for (j = 0; j < ngroups; j++)
                        lx += scnprintf(linebuf + lx, linebuflen - lx,
-                                "%s%16.16llx", j ? " " : "",
+                                        "%s%16.16llx", j ? " " : "",
-                                (unsigned long long)*(ptr8 + j));
+                                        (unsigned long long)*(ptr8 + j));
                ascii_column = 17 * ngroups + 2;
                break;
        }
@@ -77,7 +111,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
                for (j = 0; j < ngroups; j++)
                        lx += scnprintf(linebuf + lx, linebuflen - lx,
-                                "%s%8.8x", j ? " " : "", *(ptr4 + j));
+                                        "%s%8.8x", j ? " " : "", *(ptr4 + j));
                ascii_column = 9 * ngroups + 2;
                break;
        }
@@ -88,7 +122,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
                for (j = 0; j < ngroups; j++)
                        lx += scnprintf(linebuf + lx, linebuflen - lx,
-                                "%s%4.4x", j ? " " : "", *(ptr2 + j));
+                                        "%s%4.4x", j ? " " : "", *(ptr2 + j));
                ascii_column = 5 * ngroups + 2;
                break;
        }
@@ -111,14 +145,16 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
        while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
                linebuf[lx++] = ' ';
-        for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
+        for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) {
-                linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
+                ch = ptr[j];
-                                : '.';
+                linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.';
+        }
 nil:
        linebuf[lx++] = '\0';
 }
 EXPORT_SYMBOL(hex_dump_to_buffer);
+#ifdef CONFIG_PRINTK
 /**
 * print_hex_dump - print a text hex dump to syslog for a binary blob of data
 * @level: kernel log level (e.g. KERN_DEBUG)
@@ -143,7 +179,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
 *
 * E.g.:
 *   print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS,
- *              16, 1, frame->data, frame->len, 1);
+ *                  16, 1, frame->data, frame->len, true);
 *
 * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode:
 * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f  @ABCDEFGHIJKLMNO
@@ -151,12 +187,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
 * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c  pqrstuvwxyz{|}~.
 */
 void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
-                        int rowsize, int groupsize,
+                    int rowsize, int groupsize,
-                        const void *buf, size_t len, bool ascii)
+                    const void *buf, size_t len, bool ascii)
 {
        const u8 *ptr = buf;
        int i, linelen, remaining = len;
-        unsigned char linebuf[200];
+        unsigned char linebuf[32 * 3 + 2 + 32 + 1];
        if (rowsize != 16 && rowsize != 32)
                rowsize = 16;
@@ -164,13 +200,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
        for (i = 0; i < len; i += rowsize) {
                linelen = min(remaining, rowsize);
                remaining -= rowsize;
                hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
-                                linebuf, sizeof(linebuf), ascii);
+                                   linebuf, sizeof(linebuf), ascii);
                switch (prefix_type) {
                case DUMP_PREFIX_ADDRESS:
-                        printk("%s%s%*p: %s\n", level, prefix_str,
+                        printk("%s%s%p: %s\n",
-                                (int)(2 * sizeof(void *)), ptr + i, linebuf);
+                               level, prefix_str, ptr + i, linebuf);
                        break;
                case DUMP_PREFIX_OFFSET:
                        printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
@@ -196,9 +233,10 @@ EXPORT_SYMBOL(print_hex_dump);
 * rowsize of 16, groupsize of 1, and ASCII output included.
 */
 void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-                        const void *buf, size_t len)
+                          const void *buf, size_t len)
 {
        print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
-                        buf, len, 1);
+                       buf, len, true);
 }
 EXPORT_SYMBOL(print_hex_dump_bytes);
+#endif
diff --git a/lib/hweight.c b/lib/hweight.c
index 389424ecb12..3c79d50814c 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,37 +9,45 @@
 * The Hamming Weight of a number is the total number of bits set in it.
 */
-unsigned int hweight32(unsigned int w)
+unsigned int __sw_hweight32(unsigned int w)
 {
+#ifdef ARCH_HAS_FAST_MULTIPLIER
+        w -= (w >> 1) & 0x55555555;
+        w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
+        w =  (w + (w >> 4)) & 0x0f0f0f0f;
+        return (w * 0x01010101) >> 24;
+#else
        unsigned int res = w - ((w >> 1) & 0x55555555);
        res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
        res = (res + (res >> 4)) & 0x0F0F0F0F;
        res = res + (res >> 8);
        return (res + (res >> 16)) & 0x000000FF;
+#endif
 }
-EXPORT_SYMBOL(hweight32);
+EXPORT_SYMBOL(__sw_hweight32);
-unsigned int hweight16(unsigned int w)
+unsigned int __sw_hweight16(unsigned int w)
 {
        unsigned int res = w - ((w >> 1) & 0x5555);
        res = (res & 0x3333) + ((res >> 2) & 0x3333);
        res = (res + (res >> 4)) & 0x0F0F;
        return (res + (res >> 8)) & 0x00FF;
 }
-EXPORT_SYMBOL(hweight16);
+EXPORT_SYMBOL(__sw_hweight16);
-unsigned int hweight8(unsigned int w)
+unsigned int __sw_hweight8(unsigned int w)
 {
        unsigned int res = w - ((w >> 1) & 0x55);
        res = (res & 0x33) + ((res >> 2) & 0x33);
        return (res + (res >> 4)) & 0x0F;
 }
-EXPORT_SYMBOL(hweight8);
+EXPORT_SYMBOL(__sw_hweight8);
-unsigned long hweight64(__u64 w)
+unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
-        return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+        return __sw_hweight32((unsigned int)(w >> 32)) +
+               __sw_hweight32((unsigned int)w);
 #elif BITS_PER_LONG == 64
 #ifdef ARCH_HAS_FAST_MULTIPLIER
        w -= (w >> 1) & 0x5555555555555555ul;
@@ -56,4 +64,4 @@ unsigned long hweight64(__u64 w)
 #endif
 #endif
 }
-EXPORT_SYMBOL(hweight64);
+EXPORT_SYMBOL(__sw_hweight64);
diff --git a/lib/idr.c b/lib/idr.c
index 1cac726c44b..e15502e8b21 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -106,16 +106,17 @@ static void idr_mark_full(struct idr_layer **pa, int id)
 }
 /**
- * idr_pre_get - reserver resources for idr allocation
+ * idr_pre_get - reserve resources for idr allocation
 * @idp:        idr handle
 * @gfp_mask:   memory allocation flags
 *
- * This function should be called prior to locking and calling the
+ * This function should be called prior to calling the idr_get_new* functions.
- * idr_get_new* functions. It preallocates enough memory to satisfy
+ * It preallocates enough memory to satisfy the worst possible allocation. The
- * the worst possible allocation.
+ * caller should pass in GFP_KERNEL if possible.  This of course requires that
+ * no spinning locks be held.
 *
- * If the system is REALLY out of memory this function returns 0,
+ * If the system is REALLY out of memory this function returns %0,
- * otherwise 1.
+ * otherwise %1.
 */
 int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
 {
@@ -156,10 +157,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
                        id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
                        /* if already at the top layer, we need to grow */
-                        if (!(p = pa[l])) {
+                        if (id >= 1 << (idp->layers * IDR_BITS)) {
                                *starting_id = id;
                                return IDR_NEED_TO_GROW;
                        }
+                        p = pa[l];
+                        BUG_ON(!p);
                        /* If we need to go up one layer, continue the
                         * loop; otherwise, restart from the top.
@@ -282,17 +285,19 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
 * idr_get_new_above - allocate new idr entry above or equal to a start id
 * @idp: idr handle
 * @ptr: pointer you want associated with the id
- * @start_id: id to start search at
+ * @starting_id: id to start search at
 * @id: pointer to the allocated handle
 *
 * This is the allocate id function.  It should be called with any
 * required locks.
 *
- * If memory is required, it will return -EAGAIN, you should unlock
+ * If allocation from IDR's private freelist fails, idr_get_new_above() will
- * and go back to the idr_pre_get() call.  If the idr is full, it will
+ * return %-EAGAIN.  The caller should retry the idr_pre_get() call to refill
- * return -ENOSPC.
+ * IDR's preallocation and then retry the idr_get_new_above() call.
+ *
+ * If the idr is full idr_get_new_above() will return %-ENOSPC.
 *
- * @id returns a value in the range @starting_id ... 0x7fffffff
+ * @id returns a value in the range @starting_id ... %0x7fffffff
 */
 int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 {
@@ -316,14 +321,13 @@ EXPORT_SYMBOL(idr_get_new_above);
 * @ptr: pointer you want associated with the id
 * @id: pointer to the allocated handle
 *
- * This is the allocate id function.  It should be called with any
+ * If allocation from IDR's private freelist fails, idr_get_new_above() will
- * required locks.
+ * return %-EAGAIN.  The caller should retry the idr_pre_get() call to refill
+ * IDR's preallocation and then retry the idr_get_new_above() call.
 *
- * If memory is required, it will return -EAGAIN, you should unlock
+ * If the idr is full idr_get_new_above() will return %-ENOSPC.
- * and go back to the idr_pre_get() call.  If the idr is full, it will
- * return -ENOSPC.
 *
- * @id returns a value in the range 0 ... 0x7fffffff
+ * @id returns a value in the range %0 ... %0x7fffffff
 */
 int idr_get_new(struct idr *idp, void *ptr, int *id)
 {
@@ -386,7 +390,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
 }
 /**
- * idr_remove - remove the given id and free it's slot
+ * idr_remove - remove the given id and free its slot
 * @idp: idr handle
 * @id: unique key
 */
@@ -435,7 +439,7 @@ EXPORT_SYMBOL(idr_remove);
 * function will remove all id mappings and leave all idp_layers
 * unused.
 *
- * A typical clean-up sequence for objects stored in an idr tree, will
+ * A typical clean-up sequence for objects stored in an idr tree will
 * use idr_for_each() to free all objects, if necessay, then
 * idr_remove_all() to remove all ids, and idr_destroy() to free
 * up the cached idr_layers.
@@ -443,6 +447,7 @@ EXPORT_SYMBOL(idr_remove);
 void idr_remove_all(struct idr *idp)
 {
        int n, id, max;
+        int bt_mask;
        struct idr_layer *p;
        struct idr_layer *pa[MAX_LEVEL];
        struct idr_layer **paa = &pa[0];
@@ -460,8 +465,10 @@ void idr_remove_all(struct idr *idp)
                        p = p->ary[(id >> n) & IDR_MASK];
                }
+                bt_mask = id;
                id += 1 << n;
-                while (n < fls(id)) {
+                /* Get the highest bit that the above add changed from 0->1. */
+                while (n < fls(id ^ bt_mask)) {
                        if (p)
                                free_layer(p);
                        n += IDR_BITS;
@@ -474,7 +481,7 @@ EXPORT_SYMBOL(idr_remove_all);
 /**
 * idr_destroy - release all cached layers within an idr tree
- * idp: idr handle
+ * @idp: idr handle
 */
 void idr_destroy(struct idr *idp)
 {
@@ -502,7 +509,7 @@ void *idr_find(struct idr *idp, int id)
        int n;
        struct idr_layer *p;
-        p = rcu_dereference(idp->top);
+        p = rcu_dereference_raw(idp->top);
        if (!p)
                return NULL;
        n = (p->layer+1) * IDR_BITS;
@@ -517,7 +524,7 @@ void *idr_find(struct idr *idp, int id)
        while (n > 0 && p) {
                n -= IDR_BITS;
                BUG_ON(n != p->layer*IDR_BITS);
-                p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+                p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
        }
        return((void *)p);
 }
@@ -537,7 +544,7 @@ EXPORT_SYMBOL(idr_find);
 * not allowed.
 *
 * We check the return of @fn each time. If it returns anything other
- * than 0, we break out and return that value.
+ * than %0, we break out and return that value.
 *
 * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
 */
@@ -550,7 +557,7 @@ int idr_for_each(struct idr *idp,
        struct idr_layer **paa = &pa[0];
        n = idp->layers * IDR_BITS;
-        p = rcu_dereference(idp->top);
+        p = rcu_dereference_raw(idp->top);
        max = 1 << n;
        id = 0;
@@ -558,7 +565,7 @@ int idr_for_each(struct idr *idp,
                while (n > 0 && p) {
                        n -= IDR_BITS;
                        *paa++ = p;
-                        p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+                        p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
                }
                if (p) {
@@ -581,10 +588,11 @@ EXPORT_SYMBOL(idr_for_each);
 /**
 * idr_get_next - lookup next object of id to given id.
 * @idp: idr handle
- * @id:  pointer to lookup key
+ * @nextidp:  pointer to lookup key
 *
 * Returns pointer to registered object with id, which is next number to
- * given id.
+ * given id. After being looked up, *@nextidp will be updated for the next
+ * iteration.
 */
 void *idr_get_next(struct idr *idp, int *nextidp)
@@ -597,7 +605,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
        /* find first ent */
        n = idp->layers * IDR_BITS;
        max = 1 << n;
-        p = rcu_dereference(idp->top);
+        p = rcu_dereference_raw(idp->top);
        if (!p)
                return NULL;
@@ -605,7 +613,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
                while (n > 0 && p) {
                        n -= IDR_BITS;
                        *paa++ = p;
-                        p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+                        p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
                }
                if (p) {
@@ -621,7 +629,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
        }
        return NULL;
 }
+EXPORT_SYMBOL(idr_get_next);
 /**
@@ -631,8 +639,8 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 * @id: lookup key
 *
 * Replace the pointer registered with an id and return the old value.
- * A -ENOENT return indicates that @id was not found.
+ * A %-ENOENT return indicates that @id was not found.
- * A -EINVAL return indicates that @id was not within valid constraints.
+ * A %-EINVAL return indicates that @id was not within valid constraints.
 *
 * The caller must serialize with writers.
 */
@@ -690,10 +698,11 @@ void idr_init(struct idr *idp)
 EXPORT_SYMBOL(idr_init);
-/*
+/**
+ * DOC: IDA description
 * IDA - IDR based ID allocator
 *
- * this is id allocator without id -> pointer translation.  Memory
+ * This is id allocator without id -> pointer translation.  Memory
 * usage is much lower than full blown idr because each id only
 * occupies a bit.  ida uses a custom leaf node which contains
 * IDA_BITMAP_BITS slots.
@@ -726,8 +735,8 @@ static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
 * following function.  It preallocates enough memory to satisfy the
 * worst possible allocation.
 *
- * If the system is REALLY out of memory this function returns 0,
+ * If the system is REALLY out of memory this function returns %0,
- * otherwise 1.
+ * otherwise %1.
 */
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
 {
@@ -753,17 +762,17 @@ EXPORT_SYMBOL(ida_pre_get);
 /**
 * ida_get_new_above - allocate new ID above or equal to a start id
 * @ida:        ida handle
- * @staring_id: id to start search at
+ * @starting_id: id to start search at
 * @p_id:       pointer to the allocated handle
 *
 * Allocate new ID above or equal to @ida.  It should be called with
 * any required locks.
 *
- * If memory is required, it will return -EAGAIN, you should unlock
+ * If memory is required, it will return %-EAGAIN, you should unlock
 * and go back to the ida_pre_get() call.  If the ida is full, it will
- * return -ENOSPC.
+ * return %-ENOSPC.
 *
- * @p_id returns a value in the range @starting_id ... 0x7fffffff.
+ * @p_id returns a value in the range @starting_id ... %0x7fffffff.
 */
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 {
@@ -845,11 +854,11 @@ EXPORT_SYMBOL(ida_get_new_above);
 *
 * Allocate new ID.  It should be called with any required locks.
 *
- * If memory is required, it will return -EAGAIN, you should unlock
+ * If memory is required, it will return %-EAGAIN, you should unlock
 * and go back to the idr_pre_get() call.  If the idr is full, it will
- * return -ENOSPC.
+ * return %-ENOSPC.
 *
- * @id returns a value in the range 0 ... 0x7fffffff.
+ * @id returns a value in the range %0 ... %0x7fffffff.
 */
 int ida_get_new(struct ida *ida, int *p_id)
 {
@@ -907,7 +916,7 @@ EXPORT_SYMBOL(ida_remove);
 /**
 * ida_destroy - release all cached layers within an ida tree
- * ida:         ida handle
+ * @ida:                ida handle
 */
 void ida_destroy(struct ida *ida)
 {
diff --git a/lib/inflate.c b/lib/inflate.c
index d10255973a9..013a7619348 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -103,6 +103,9 @@
      the two sets of lengths.
 */
 #include <linux/compiler.h>
+#ifdef NO_INFLATE_MALLOC
+#include <linux/slab.h>
+#endif
 #ifdef RCSID
 static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #";
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index c0251f4ad08..da053313ee5 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -38,12 +38,3 @@ again:
        return -1;
 }
 EXPORT_SYMBOL(iommu_area_alloc);
-unsigned long iommu_num_pages(unsigned long addr, unsigned long len,
-                              unsigned long io_page_size)
-{
-        unsigned long size = (addr & (io_page_size - 1)) + len;
-        return DIV_ROUND_UP(size, io_page_size);
-}
-EXPORT_SYMBOL(iommu_num_pages);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 14c6078f17a..da4e2ad74b6 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -9,14 +9,15 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/io.h>
+#include <linux/module.h>
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
-                unsigned long end, unsigned long phys_addr, pgprot_t prot)
+                unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
        pte_t *pte;
-        unsigned long pfn;
+        u64 pfn;
        pfn = phys_addr >> PAGE_SHIFT;
        pte = pte_alloc_kernel(pmd, addr);
@@ -31,7 +32,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
 }
 static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
-                unsigned long end, unsigned long phys_addr, pgprot_t prot)
+                unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
        pmd_t *pmd;
        unsigned long next;
@@ -49,7 +50,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 }
 static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
-                unsigned long end, unsigned long phys_addr, pgprot_t prot)
+                unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
        pud_t *pud;
        unsigned long next;
@@ -67,7 +68,7 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
 }
 int ioremap_page_range(unsigned long addr,
-                       unsigned long end, unsigned long phys_addr, pgprot_t prot)
+                       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
 {
        pgd_t *pgd;
        unsigned long start;
@@ -90,3 +91,4 @@ int ioremap_page_range(unsigned long addr,
        return err;
 }
+EXPORT_SYMBOL_GPL(ioremap_page_range);
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index c5ff1fd1003..9c4233b2378 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -6,6 +6,7 @@
 #include <stdarg.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/string.h>
diff --git a/lib/kobject.c b/lib/kobject.c
index b512b746d2a..82dc34c095c 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
        return ret;
 }
-struct sysfs_ops kobj_sysfs_ops = {
+const struct sysfs_ops kobj_sysfs_ops = {
        .show   = kobj_attr_show,
        .store  = kobj_attr_store,
 };
@@ -746,17 +746,56 @@ void kset_unregister(struct kset *k)
 */
 struct kobject *kset_find_obj(struct kset *kset, const char *name)
 {
+        return kset_find_obj_hinted(kset, name, NULL);
+}
+/**
+ * kset_find_obj_hinted - search for object in kset given a predecessor hint.
+ * @kset: kset we're looking in.
+ * @name: object's name.
+ * @hint: hint to possible object's predecessor.
+ *
+ * Check the hint's next object and if it is a match return it directly,
+ * otherwise, fall back to the behavior of kset_find_obj().  Either way
+ * a reference for the returned object is held and the reference on the
+ * hinted object is released.
+ */
+struct kobject *kset_find_obj_hinted(struct kset *kset, const char *name,
+                                     struct kobject *hint)
+{
        struct kobject *k;
        struct kobject *ret = NULL;
        spin_lock(&kset->list_lock);
+        if (!hint)
+                goto slow_search;
+        /* end of list detection */
+        if (hint->entry.next == kset->list.next)
+                goto slow_search;
+        k = container_of(hint->entry.next, struct kobject, entry);
+        if (!kobject_name(k) || strcmp(kobject_name(k), name))
+                goto slow_search;
+        ret = kobject_get(k);
+        goto unlock_exit;
+slow_search:
        list_for_each_entry(k, &kset->list, entry) {
                if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
                        ret = kobject_get(k);
                        break;
                }
        }
+unlock_exit:
        spin_unlock(&kset->list_lock);
+        if (hint)
+                kobject_put(hint);
        return ret;
 }
@@ -789,7 +828,7 @@ static struct kobj_type kset_ktype = {
 * If the kset was not able to be created, NULL will be returned.
 */
 static struct kset *kset_create(const char *name,
-                                struct kset_uevent_ops *uevent_ops,
+                                const struct kset_uevent_ops *uevent_ops,
                                struct kobject *parent_kobj)
 {
        struct kset *kset;
@@ -832,7 +871,7 @@ static struct kset *kset_create(const char *name,
 * If the kset was not able to be created, NULL will be returned.
 */
 struct kset *kset_create_and_add(const char *name,
-                                 struct kset_uevent_ops *uevent_ops,
+                                 const struct kset_uevent_ops *uevent_ops,
                                 struct kobject *parent_kobj)
 {
        struct kset *kset;
@@ -850,6 +889,121 @@ struct kset *kset_create_and_add(const char *name,
 }
 EXPORT_SYMBOL_GPL(kset_create_and_add);
+static DEFINE_SPINLOCK(kobj_ns_type_lock);
+static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
+int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
+{
+        enum kobj_ns_type type = ops->type;
+        int error;
+        spin_lock(&kobj_ns_type_lock);
+        error = -EINVAL;
+        if (type >= KOBJ_NS_TYPES)
+                goto out;
+        error = -EINVAL;
+        if (type <= KOBJ_NS_TYPE_NONE)
+                goto out;
+        error = -EBUSY;
+        if (kobj_ns_ops_tbl[type])
+                goto out;
+        error = 0;
+        kobj_ns_ops_tbl[type] = ops;
+out:
+        spin_unlock(&kobj_ns_type_lock);
+        return error;
+}
+int kobj_ns_type_registered(enum kobj_ns_type type)
+{
+        int registered = 0;
+        spin_lock(&kobj_ns_type_lock);
+        if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES))
+                registered = kobj_ns_ops_tbl[type] != NULL;
+        spin_unlock(&kobj_ns_type_lock);
+        return registered;
+}
+const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
+{
+        const struct kobj_ns_type_operations *ops = NULL;
+        if (parent && parent->ktype->child_ns_type)
+                ops = parent->ktype->child_ns_type(parent);
+        return ops;
+}
+const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
+{
+        return kobj_child_ns_ops(kobj->parent);
+}
+const void *kobj_ns_current(enum kobj_ns_type type)
+{
+        const void *ns = NULL;
+        spin_lock(&kobj_ns_type_lock);
+        if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+            kobj_ns_ops_tbl[type])
+                ns = kobj_ns_ops_tbl[type]->current_ns();
+        spin_unlock(&kobj_ns_type_lock);
+        return ns;
+}
+const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
+{
+        const void *ns = NULL;
+        spin_lock(&kobj_ns_type_lock);
+        if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+            kobj_ns_ops_tbl[type])
+                ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
+        spin_unlock(&kobj_ns_type_lock);
+        return ns;
+}
+const void *kobj_ns_initial(enum kobj_ns_type type)
+{
+        const void *ns = NULL;
+        spin_lock(&kobj_ns_type_lock);
+        if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+            kobj_ns_ops_tbl[type])
+                ns = kobj_ns_ops_tbl[type]->initial_ns();
+        spin_unlock(&kobj_ns_type_lock);
+        return ns;
+}
+/*
+ * kobj_ns_exit - invalidate a namespace tag
+ *
+ * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
+ * @ns: the actual namespace being invalidated
+ *
+ * This is called when a tag is no longer valid.  For instance,
+ * when a network namespace exits, it uses this helper to
+ * make sure no sb's sysfs_info points to the now-invalidated
+ * netns.
+ */
+void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
+{
+        sysfs_exit_ns(type, ns);
+}
 EXPORT_SYMBOL(kobject_get);
 EXPORT_SYMBOL(kobject_put);
 EXPORT_SYMBOL(kobject_del);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 920a3ca6e25..70af0a7f97c 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -18,18 +18,25 @@
 #include <linux/string.h>
 #include <linux/kobject.h>
 #include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 u64 uevent_seqnum;
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 static DEFINE_SPINLOCK(sequence_lock);
-#if defined(CONFIG_NET)
+#ifdef CONFIG_NET
-static struct sock *uevent_sock;
+struct uevent_sock {
+        struct list_head list;
+        struct sock *sk;
+};
+static LIST_HEAD(uevent_sock_list);
+static DEFINE_MUTEX(uevent_sock_mutex);
 #endif
 /* the strings here must match the enum in include/linux/kobject.h */
@@ -76,6 +83,39 @@ out:
        return ret;
 }
+#ifdef CONFIG_NET
+static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
+{
+        struct kobject *kobj = data;
+        const struct kobj_ns_type_operations *ops;
+        ops = kobj_ns_ops(kobj);
+        if (ops) {
+                const void *sock_ns, *ns;
+                ns = kobj->ktype->namespace(kobj);
+                sock_ns = ops->netlink_ns(dsk);
+                return sock_ns != ns;
+        }
+        return 0;
+}
+#endif
+static int kobj_usermode_filter(struct kobject *kobj)
+{
+        const struct kobj_ns_type_operations *ops;
+        ops = kobj_ns_ops(kobj);
+        if (ops) {
+                const void *init_ns, *ns;
+                ns = kobj->ktype->namespace(kobj);
+                init_ns = ops->initial_ns();
+                return ns != init_ns;
+        }
+        return 0;
+}
 /**
 * kobject_uevent_env - send an uevent with environmental data
 *
@@ -83,7 +123,7 @@ out:
 * @kobj: struct kobject that the action is happening to
 * @envp_ext: pointer to environmental data
 *
- * Returns 0 if kobject_uevent() is completed with success or the
+ * Returns 0 if kobject_uevent_env() is completed with success or the
 * corresponding error when it fails.
 */
 int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
@@ -95,10 +135,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
        const char *subsystem;
        struct kobject *top_kobj;
        struct kset *kset;
-        struct kset_uevent_ops *uevent_ops;
+        const struct kset_uevent_ops *uevent_ops;
        u64 seq;
        int i = 0;
        int retval = 0;
+#ifdef CONFIG_NET
+        struct uevent_sock *ue_sk;
+#endif
        pr_debug("kobject: '%s' (%p): %s\n",
                 kobject_name(kobj), kobj, __func__);
@@ -210,7 +253,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 #if defined(CONFIG_NET)
        /* send netlink message */
-        if (uevent_sock) {
+        mutex_lock(&uevent_sock_mutex);
+        list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+                struct sock *uevent_sock = ue_sk->sk;
                struct sk_buff *skb;
                size_t len;
@@ -232,18 +277,21 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
                        }
                        NETLINK_CB(skb).dst_group = 1;
-                        retval = netlink_broadcast(uevent_sock, skb, 0, 1,
+                        retval = netlink_broadcast_filtered(uevent_sock, skb,
-                                                   GFP_KERNEL);
+                                                            0, 1, GFP_KERNEL,
+                                                            kobj_bcast_filter,
+                                                            kobj);
                        /* ENOBUFS should be handled in userspace */
                        if (retval == -ENOBUFS)
                                retval = 0;
                } else
                        retval = -ENOMEM;
        }
+        mutex_unlock(&uevent_sock_mutex);
 #endif
        /* call uevent_helper, usually only enabled during early boot */
-        if (uevent_helper[0]) {
+        if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
                char *argv [3];
                argv [0] = uevent_helper;
@@ -269,7 +317,7 @@ exit:
 EXPORT_SYMBOL_GPL(kobject_uevent_env);
 /**
- * kobject_uevent - notify userspace by ending an uevent
+ * kobject_uevent - notify userspace by sending an uevent
 *
 * @action: action that is happening
 * @kobj: struct kobject that the action is happening to
@@ -319,18 +367,59 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 #if defined(CONFIG_NET)
-static int __init kobject_uevent_init(void)
+static int uevent_net_init(struct net *net)
 {
-        uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT,
+        struct uevent_sock *ue_sk;
-                                            1, NULL, NULL, THIS_MODULE);
-        if (!uevent_sock) {
+        ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
+        if (!ue_sk)
+                return -ENOMEM;
+        ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
+                                          1, NULL, NULL, THIS_MODULE);
+        if (!ue_sk->sk) {
                printk(KERN_ERR
                       "kobject_uevent: unable to create netlink socket!\n");
+                kfree(ue_sk);
                return -ENODEV;
        }
-        netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+        mutex_lock(&uevent_sock_mutex);
+        list_add_tail(&ue_sk->list, &uevent_sock_list);
+        mutex_unlock(&uevent_sock_mutex);
        return 0;
 }
+static void uevent_net_exit(struct net *net)
+{
+        struct uevent_sock *ue_sk;
+        mutex_lock(&uevent_sock_mutex);
+        list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+                if (sock_net(ue_sk->sk) == net)
+                        goto found;
+        }
+        mutex_unlock(&uevent_sock_mutex);
+        return;
+found:
+        list_del(&ue_sk->list);
+        mutex_unlock(&uevent_sock_mutex);
+        netlink_kernel_release(ue_sk->sk);
+        kfree(ue_sk);
+}
+static struct pernet_operations uevent_net_ops = {
+        .init   = uevent_net_init,
+        .exit   = uevent_net_exit,
+};
+static int __init kobject_uevent_init(void)
+{
+        netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+        return register_pernet_subsys(&uevent_net_ops);
+}
 postcore_initcall(kobject_uevent_init);
 #endif
diff --git a/lib/kref.c b/lib/kref.c
index 9ecd6e86561..3efb882b11d 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -13,17 +13,7 @@
 #include <linux/kref.h>
 #include <linux/module.h>
+#include <linux/slab.h>
-/**
- * kref_set - initialize object and set refcount to requested number.
- * @kref: object in question.
- * @num: initial reference counter
- */
-void kref_set(struct kref *kref, int num)
-{
-        atomic_set(&kref->refcount, num);
-        smp_mb();
-}
 /**
 * kref_init - initialize object.
@@ -31,7 +21,8 @@ void kref_set(struct kref *kref, int num)
 */
 void kref_init(struct kref *kref)
 {
-        kref_set(kref, 1);
+        atomic_set(&kref->refcount, 1);
+        smp_mb();
 }
 /**
@@ -71,7 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
        return 0;
 }
-EXPORT_SYMBOL(kref_set);
+/**
+ * kref_sub - subtract a number of refcounts for object.
+ * @kref: object.
+ * @count: Number of recounts to subtract.
+ * @release: pointer to the function that will clean up the object when the
+ *           last reference to the object is released.
+ *           This pointer is required, and it is not acceptable to pass kfree
+ *           in as this function.
+ *
+ * Subtract @count from the refcount, and if 0, call release().
+ * Return 1 if the object was removed, otherwise return 0.  Beware, if this
+ * function returns 0, you still can not count on the kref from remaining in
+ * memory.  Only use the return value if you want to see if the kref is now
+ * gone, not present.
+ */
+int kref_sub(struct kref *kref, unsigned int count,
+             void (*release)(struct kref *kref))
+{
+        WARN_ON(release == NULL);
+        WARN_ON(release == (void (*)(struct kref *))kfree);
+        if (atomic_sub_and_test((int) count, &kref->refcount)) {
+                release(kref);
+                return 1;
+        }
+        return 0;
+}
 EXPORT_SYMBOL(kref_init);
 EXPORT_SYMBOL(kref_get);
 EXPORT_SYMBOL(kref_put);
+EXPORT_SYMBOL(kref_sub);
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 00000000000..157cd88a6ff
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+/* Lowest common multiple */
+unsigned long lcm(unsigned long a, unsigned long b)
+{
+        if (a && b)
+                return (a * b) / gcd(a, b);
+        else if (b)
+                return b;
+        return a;
+}
+EXPORT_SYMBOL_GPL(lcm);
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 1a39f4e3ae1..344c710d16c 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -43,6 +43,12 @@ EXPORT_SYMBOL(__list_add);
 */
 void list_del(struct list_head *entry)
 {
+        WARN(entry->next == LIST_POISON1,
+                "list_del corruption, next is LIST_POISON1 (%p)\n",
+                LIST_POISON1);
+        WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
+                "list_del corruption, prev is LIST_POISON2 (%p)\n",
+                LIST_POISON2);
        WARN(entry->prev->next != entry,
                "list_del corruption. prev->next should be %p, "
                "but was %p\n", entry, entry->prev->next);
diff --git a/lib/list_sort.c b/lib/list_sort.c
index 19d11e0bb95..d7325c6b103 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -4,99 +4,288 @@
 #include <linux/slab.h>
 #include <linux/list.h>
+#define MAX_LIST_LENGTH_BITS 20
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+static struct list_head *merge(void *priv,
+                                int (*cmp)(void *priv, struct list_head *a,
+                                        struct list_head *b),
+                                struct list_head *a, struct list_head *b)
+{
+        struct list_head head, *tail = &head;
+        while (a && b) {
+                /* if equal, take 'a' -- important for sort stability */
+                if ((*cmp)(priv, a, b) <= 0) {
+                        tail->next = a;
+                        a = a->next;
+                } else {
+                        tail->next = b;
+                        b = b->next;
+                }
+                tail = tail->next;
+        }
+        tail->next = a?:b;
+        return head.next;
+}
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure.  This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+static void merge_and_restore_back_links(void *priv,
+                                int (*cmp)(void *priv, struct list_head *a,
+                                        struct list_head *b),
+                                struct list_head *head,
+                                struct list_head *a, struct list_head *b)
+{
+        struct list_head *tail = head;
+        while (a && b) {
+                /* if equal, take 'a' -- important for sort stability */
+                if ((*cmp)(priv, a, b) <= 0) {
+                        tail->next = a;
+                        a->prev = tail;
+                        a = a->next;
+                } else {
+                        tail->next = b;
+                        b->prev = tail;
+                        b = b->next;
+                }
+                tail = tail->next;
+        }
+        tail->next = a ? : b;
+        do {
+                /*
+                 * In worst cases this loop may run many iterations.
+                 * Continue callbacks to the client even though no
+                 * element comparison is needed, so the client's cmp()
+                 * routine can invoke cond_resched() periodically.
+                 */
+                (*cmp)(priv, tail->next, tail->next);
+                tail->next->prev = tail;
+                tail = tail->next;
+        } while (tail->next);
+        tail->next = head;
+        head->prev = tail;
+}
 /**
- * list_sort - sort a list.
+ * list_sort - sort a list
- * @priv: private data, passed to @cmp
+ * @priv: private data, opaque to list_sort(), passed to @cmp
 * @head: the list to sort
 * @cmp: the elements comparison function
 *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
+ * This function implements "merge sort", which has O(nlog(n))
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
+ * complexity.
- * in ascending order.
 *
- * The comparison function @cmp is supposed to return a negative value if @a is
+ * The comparison function @cmp must return a negative value if @a
- * less than @b, and a positive value if @a is greater than @b. If @a and @b
+ * should sort before @b, and a positive value if @a should sort after
- * are equivalent, then it does not matter what this function returns.
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0.
 */
 void list_sort(void *priv, struct list_head *head,
-               int (*cmp)(void *priv, struct list_head *a,
+                int (*cmp)(void *priv, struct list_head *a,
-                          struct list_head *b))
+                        struct list_head *b))
 {
-        struct list_head *p, *q, *e, *list, *tail, *oldhead;
+        struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
-        int insize, nmerges, psize, qsize, i;
+                                                -- last slot is a sentinel */
+        int lev;  /* index into part[] */
+        int max_lev = 0;
+        struct list_head *list;
        if (list_empty(head))
                return;
+        memset(part, 0, sizeof(part));
+        head->prev->next = NULL;
        list = head->next;
-        list_del(head);
-        insize = 1;
-        for (;;) {
-                p = oldhead = list;
-                list = tail = NULL;
-                nmerges = 0;
-                while (p) {
-                        nmerges++;
-                        q = p;
-                        psize = 0;
-                        for (i = 0; i < insize; i++) {
-                                psize++;
-                                q = q->next == oldhead ? NULL : q->next;
-                                if (!q)
-                                        break;
-                        }
-                        qsize = insize;
+        while (list) {
-                        while (psize > 0 || (qsize > 0 && q)) {
+                struct list_head *cur = list;
-                                if (!psize) {
+                list = list->next;
-                                        e = q;
+                cur->next = NULL;
-                                        q = q->next;
-                                        qsize--;
+                for (lev = 0; part[lev]; lev++) {
-                                        if (q == oldhead)
+                        cur = merge(priv, cmp, part[lev], cur);
-                                                q = NULL;
+                        part[lev] = NULL;
-                                } else if (!qsize || !q) {
+                }
-                                        e = p;
+                if (lev > max_lev) {
-                                        p = p->next;
+                        if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
-                                        psize--;
+                                printk_once(KERN_DEBUG "list passed to"
-                                        if (p == oldhead)
+                                        " list_sort() too long for"
-                                                p = NULL;
+                                        " efficiency\n");
-                                } else if (cmp(priv, p, q) <= 0) {
+                                lev--;
-                                        e = p;
-                                        p = p->next;
-                                        psize--;
-                                        if (p == oldhead)
-                                                p = NULL;
-                                } else {
-                                        e = q;
-                                        q = q->next;
-                                        qsize--;
-                                        if (q == oldhead)
-                                                q = NULL;
-                                }
-                                if (tail)
-                                        tail->next = e;
-                                else
-                                        list = e;
-                                e->prev = tail;
-                                tail = e;
                        }
-                        p = q;
+                        max_lev = lev;
                }
+                part[lev] = cur;
+        }
+        for (lev = 0; lev < max_lev; lev++)
+                if (part[lev])
+                        list = merge(priv, cmp, part[lev], list);
+        merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
+}
+EXPORT_SYMBOL(list_sort);
+#ifdef CONFIG_TEST_LIST_SORT
+#include <linux/random.h>
-                tail->next = list;
+/*
-                list->prev = tail;
+ * The pattern of set bits in the list length determines which cases
+ * are hit in list_sort().
+ */
+#define TEST_LIST_LEN (512+128+2) /* not including head */
+#define TEST_POISON1 0xDEADBEEF
+#define TEST_POISON2 0xA324354C
-                if (nmerges <= 1)
+struct debug_el {
-                        break;
+        unsigned int poison1;
+        struct list_head list;
+        unsigned int poison2;
+        int value;
+        unsigned serial;
+};
-                insize *= 2;
+/* Array, containing pointers to all elements in the test list */
+static struct debug_el **elts __initdata;
+static int __init check(struct debug_el *ela, struct debug_el *elb)
+{
+        if (ela->serial >= TEST_LIST_LEN) {
+                printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
+                                ela->serial);
+                return -EINVAL;
        }
+        if (elb->serial >= TEST_LIST_LEN) {
+                printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
+                                elb->serial);
+                return -EINVAL;
+        }
+        if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
+                printk(KERN_ERR "list_sort_test: error: phantom element\n");
+                return -EINVAL;
+        }
+        if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
+                printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
+                                ela->poison1, ela->poison2);
+                return -EINVAL;
+        }
+        if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
+                printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
+                                elb->poison1, elb->poison2);
+                return -EINVAL;
+        }
+        return 0;
+}
+static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+        struct debug_el *ela, *elb;
+        ela = container_of(a, struct debug_el, list);
+        elb = container_of(b, struct debug_el, list);
-        head->next = list;
+        check(ela, elb);
-        head->prev = list->prev;
+        return ela->value - elb->value;
-        list->prev->next = head;
-        list->prev = head;
 }
-EXPORT_SYMBOL(list_sort);
+static int __init list_sort_test(void)
+{
+        int i, count = 1, err = -EINVAL;
+        struct debug_el *el;
+        struct list_head *cur, *tmp;
+        LIST_HEAD(head);
+        printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
+        elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
+        if (!elts) {
+                printk(KERN_ERR "list_sort_test: error: cannot allocate "
+                                "memory\n");
+                goto exit;
+        }
+        for (i = 0; i < TEST_LIST_LEN; i++) {
+                el = kmalloc(sizeof(*el), GFP_KERNEL);
+                if (!el) {
+                        printk(KERN_ERR "list_sort_test: error: cannot "
+                                        "allocate memory\n");
+                        goto exit;
+                }
+                 /* force some equivalencies */
+                el->value = random32() % (TEST_LIST_LEN/3);
+                el->serial = i;
+                el->poison1 = TEST_POISON1;
+                el->poison2 = TEST_POISON2;
+                elts[i] = el;
+                list_add_tail(&el->list, &head);
+        }
+        list_sort(NULL, &head, cmp);
+        for (cur = head.next; cur->next != &head; cur = cur->next) {
+                struct debug_el *el1;
+                int cmp_result;
+                if (cur->next->prev != cur) {
+                        printk(KERN_ERR "list_sort_test: error: list is "
+                                        "corrupted\n");
+                        goto exit;
+                }
+                cmp_result = cmp(NULL, cur, cur->next);
+                if (cmp_result > 0) {
+                        printk(KERN_ERR "list_sort_test: error: list is not "
+                                        "sorted\n");
+                        goto exit;
+                }
+                el = container_of(cur, struct debug_el, list);
+                el1 = container_of(cur->next, struct debug_el, list);
+                if (cmp_result == 0 && el->serial >= el1->serial) {
+                        printk(KERN_ERR "list_sort_test: error: order of "
+                                        "equivalent elements not preserved\n");
+                        goto exit;
+                }
+                if (check(el, el1)) {
+                        printk(KERN_ERR "list_sort_test: error: element check "
+                                        "failed\n");
+                        goto exit;
+                }
+                count++;
+        }
+        if (count != TEST_LIST_LEN) {
+                printk(KERN_ERR "list_sort_test: error: bad list length %d",
+                                count);
+                goto exit;
+        }
+        err = 0;
+exit:
+        kfree(elts);
+        list_for_each_safe(cur, tmp, &head) {
+                list_del(cur);
+                kfree(container_of(cur, struct debug_el, list));
+        }
+        return err;
+}
+module_init(list_sort_test);
+#endif /* CONFIG_TEST_LIST_SORT */
diff --git a/lib/lmb.c b/lib/lmb.c
deleted file mode 100644
index 9cee17142b2..00000000000
--- a/lib/lmb.c
+++ /dev/null
@@ -1,532 +0,0 @@
-/*
- * Procedures for maintaining information about logical memory blocks.
- *
- * Peter Bergner, IBM Corp.     June 2001.
- * Copyright (C) 2001 Peter Bergner.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/lmb.h>
-#define LMB_ALLOC_ANYWHERE      0
-struct lmb lmb;
-static int lmb_debug;
-static int __init early_lmb(char *p)
-{
-        if (p && strstr(p, "debug"))
-                lmb_debug = 1;
-        return 0;
-}
-early_param("lmb", early_lmb);
-static void lmb_dump(struct lmb_region *region, char *name)
-{
-        unsigned long long base, size;
-        int i;
-        pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
-        for (i = 0; i < region->cnt; i++) {
-                base = region->region[i].base;
-                size = region->region[i].size;
-                pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
-                    name, i, base, base + size - 1, size);
-        }
-}
-void lmb_dump_all(void)
-{
-        if (!lmb_debug)
-                return;
-        pr_info("LMB configuration:\n");
-        pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)lmb.rmo_size);
-        pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
-        lmb_dump(&lmb.memory, "memory");
-        lmb_dump(&lmb.reserved, "reserved");
-}
-static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
-                                        u64 size2)
-{
-        return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
-}
-static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
-{
-        if (base2 == base1 + size1)
-                return 1;
-        else if (base1 == base2 + size2)
-                return -1;
-        return 0;
-}
-static long lmb_regions_adjacent(struct lmb_region *rgn,
-                unsigned long r1, unsigned long r2)
-{
-        u64 base1 = rgn->region[r1].base;
-        u64 size1 = rgn->region[r1].size;
-        u64 base2 = rgn->region[r2].base;
-        u64 size2 = rgn->region[r2].size;
-        return lmb_addrs_adjacent(base1, size1, base2, size2);
-}
-static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
-{
-        unsigned long i;
-        for (i = r; i < rgn->cnt - 1; i++) {
-                rgn->region[i].base = rgn->region[i + 1].base;
-                rgn->region[i].size = rgn->region[i + 1].size;
-        }
-        rgn->cnt--;
-}
-/* Assumption: base addr of region 1 < base addr of region 2 */
-static void lmb_coalesce_regions(struct lmb_region *rgn,
-                unsigned long r1, unsigned long r2)
-{
-        rgn->region[r1].size += rgn->region[r2].size;
-        lmb_remove_region(rgn, r2);
-}
-void __init lmb_init(void)
-{
-        /* Create a dummy zero size LMB which will get coalesced away later.
-         * This simplifies the lmb_add() code below...
-         */
-        lmb.memory.region[0].base = 0;
-        lmb.memory.region[0].size = 0;
-        lmb.memory.cnt = 1;
-        /* Ditto. */
-        lmb.reserved.region[0].base = 0;
-        lmb.reserved.region[0].size = 0;
-        lmb.reserved.cnt = 1;
-}
-void __init lmb_analyze(void)
-{
-        int i;
-        lmb.memory.size = 0;
-        for (i = 0; i < lmb.memory.cnt; i++)
-                lmb.memory.size += lmb.memory.region[i].size;
-}
-static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
-{
-        unsigned long coalesced = 0;
-        long adjacent, i;
-        if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
-                rgn->region[0].base = base;
-                rgn->region[0].size = size;
-                return 0;
-        }
-        /* First try and coalesce this LMB with another. */
-        for (i = 0; i < rgn->cnt; i++) {
-                u64 rgnbase = rgn->region[i].base;
-                u64 rgnsize = rgn->region[i].size;
-                if ((rgnbase == base) && (rgnsize == size))
-                        /* Already have this region, so we're done */
-                        return 0;
-                adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize);
-                if (adjacent > 0) {
-                        rgn->region[i].base -= size;
-                        rgn->region[i].size += size;
-                        coalesced++;
-                        break;
-                } else if (adjacent < 0) {
-                        rgn->region[i].size += size;
-                        coalesced++;
-                        break;
-                }
-        }
-        if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) {
-                lmb_coalesce_regions(rgn, i, i+1);
-                coalesced++;
-        }
-        if (coalesced)
-                return coalesced;
-        if (rgn->cnt >= MAX_LMB_REGIONS)
-                return -1;
-        /* Couldn't coalesce the LMB, so add it to the sorted table. */
-        for (i = rgn->cnt - 1; i >= 0; i--) {
-                if (base < rgn->region[i].base) {
-                        rgn->region[i+1].base = rgn->region[i].base;
-                        rgn->region[i+1].size = rgn->region[i].size;
-                } else {
-                        rgn->region[i+1].base = base;
-                        rgn->region[i+1].size = size;
-                        break;
-                }
-        }
-        if (base < rgn->region[0].base) {
-                rgn->region[0].base = base;
-                rgn->region[0].size = size;
-        }
-        rgn->cnt++;
-        return 0;
-}
-long lmb_add(u64 base, u64 size)
-{
-        struct lmb_region *_rgn = &lmb.memory;
-        /* On pSeries LPAR systems, the first LMB is our RMO region. */
-        if (base == 0)
-                lmb.rmo_size = size;
-        return lmb_add_region(_rgn, base, size);
-}
-long lmb_remove(u64 base, u64 size)
-{
-        struct lmb_region *rgn = &(lmb.memory);
-        u64 rgnbegin, rgnend;
-        u64 end = base + size;
-        int i;
-        rgnbegin = rgnend = 0; /* supress gcc warnings */
-        /* Find the region where (base, size) belongs to */
-        for (i=0; i < rgn->cnt; i++) {
-                rgnbegin = rgn->region[i].base;
-                rgnend = rgnbegin + rgn->region[i].size;
-                if ((rgnbegin <= base) && (end <= rgnend))
-                        break;
-        }
-        /* Didn't find the region */
-        if (i == rgn->cnt)
-                return -1;
-        /* Check to see if we are removing entire region */
-        if ((rgnbegin == base) && (rgnend == end)) {
-                lmb_remove_region(rgn, i);
-                return 0;
-        }
-        /* Check to see if region is matching at the front */
-        if (rgnbegin == base) {
-                rgn->region[i].base = end;
-                rgn->region[i].size -= size;
-                return 0;
-        }
-        /* Check to see if the region is matching at the end */
-        if (rgnend == end) {
-                rgn->region[i].size -= size;
-                return 0;
-        }
-        /*
-         * We need to split the entry -  adjust the current one to the
-         * beginging of the hole and add the region after hole.
-         */
-        rgn->region[i].size = base - rgn->region[i].base;
-        return lmb_add_region(rgn, end, rgnend - end);
-}
-long __init lmb_reserve(u64 base, u64 size)
-{
-        struct lmb_region *_rgn = &lmb.reserved;
-        BUG_ON(0 == size);
-        return lmb_add_region(_rgn, base, size);
-}
-long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
-{
-        unsigned long i;
-        for (i = 0; i < rgn->cnt; i++) {
-                u64 rgnbase = rgn->region[i].base;
-                u64 rgnsize = rgn->region[i].size;
-                if (lmb_addrs_overlap(base, size, rgnbase, rgnsize))
-                        break;
-        }
-        return (i < rgn->cnt) ? i : -1;
-}
-static u64 lmb_align_down(u64 addr, u64 size)
-{
-        return addr & ~(size - 1);
-}
-static u64 lmb_align_up(u64 addr, u64 size)
-{
-        return (addr + (size - 1)) & ~(size - 1);
-}
-static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end,
-                                           u64 size, u64 align)
-{
-        u64 base, res_base;
-        long j;
-        base = lmb_align_down((end - size), align);
-        while (start <= base) {
-                j = lmb_overlaps_region(&lmb.reserved, base, size);
-                if (j < 0) {
-                        /* this area isn't reserved, take it */
-                        if (lmb_add_region(&lmb.reserved, base, size) < 0)
-                                base = ~(u64)0;
-                        return base;
-                }
-                res_base = lmb.reserved.region[j].base;
-                if (res_base < size)
-                        break;
-                base = lmb_align_down(res_base - size, align);
-        }
-        return ~(u64)0;
-}
-static u64 __init lmb_alloc_nid_region(struct lmb_property *mp,
-                                       u64 (*nid_range)(u64, u64, int *),
-                                       u64 size, u64 align, int nid)
-{
-        u64 start, end;
-        start = mp->base;
-        end = start + mp->size;
-        start = lmb_align_up(start, align);
-        while (start < end) {
-                u64 this_end;
-                int this_nid;
-                this_end = nid_range(start, end, &this_nid);
-                if (this_nid == nid) {
-                        u64 ret = lmb_alloc_nid_unreserved(start, this_end,
-                                                           size, align);
-                        if (ret != ~(u64)0)
-                                return ret;
-                }
-                start = this_end;
-        }
-        return ~(u64)0;
-}
-u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
-                         u64 (*nid_range)(u64 start, u64 end, int *nid))
-{
-        struct lmb_region *mem = &lmb.memory;
-        int i;
-        BUG_ON(0 == size);
-        size = lmb_align_up(size, align);
-        for (i = 0; i < mem->cnt; i++) {
-                u64 ret = lmb_alloc_nid_region(&mem->region[i],
-                                               nid_range,
-                                               size, align, nid);
-                if (ret != ~(u64)0)
-                        return ret;
-        }
-        return lmb_alloc(size, align);
-}
-u64 __init lmb_alloc(u64 size, u64 align)
-{
-        return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
-}
-u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
-        u64 alloc;
-        alloc = __lmb_alloc_base(size, align, max_addr);
-        if (alloc == 0)
-                panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
-                      (unsigned long long) size, (unsigned long long) max_addr);
-        return alloc;
-}
-u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
-        long i, j;
-        u64 base = 0;
-        u64 res_base;
-        BUG_ON(0 == size);
-        size = lmb_align_up(size, align);
-        /* On some platforms, make sure we allocate lowmem */
-        /* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */
-        if (max_addr == LMB_ALLOC_ANYWHERE)
-                max_addr = LMB_REAL_LIMIT;
-        for (i = lmb.memory.cnt - 1; i >= 0; i--) {
-                u64 lmbbase = lmb.memory.region[i].base;
-                u64 lmbsize = lmb.memory.region[i].size;
-                if (lmbsize < size)
-                        continue;
-                if (max_addr == LMB_ALLOC_ANYWHERE)
-                        base = lmb_align_down(lmbbase + lmbsize - size, align);
-                else if (lmbbase < max_addr) {
-                        base = min(lmbbase + lmbsize, max_addr);
-                        base = lmb_align_down(base - size, align);
-                } else
-                        continue;
-                while (base && lmbbase <= base) {
-                        j = lmb_overlaps_region(&lmb.reserved, base, size);
-                        if (j < 0) {
-                                /* this area isn't reserved, take it */
-                                if (lmb_add_region(&lmb.reserved, base, size) < 0)
-                                        return 0;
-                                return base;
-                        }
-                        res_base = lmb.reserved.region[j].base;
-                        if (res_base < size)
-                                break;
-                        base = lmb_align_down(res_base - size, align);
-                }
-        }
-        return 0;
-}
-/* You must call lmb_analyze() before this. */
-u64 __init lmb_phys_mem_size(void)
-{
-        return lmb.memory.size;
-}
-u64 lmb_end_of_DRAM(void)
-{
-        int idx = lmb.memory.cnt - 1;
-        return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
-}
-/* You must call lmb_analyze() after this. */
-void __init lmb_enforce_memory_limit(u64 memory_limit)
-{
-        unsigned long i;
-        u64 limit;
-        struct lmb_property *p;
-        if (!memory_limit)
-                return;
-        /* Truncate the lmb regions to satisfy the memory limit. */
-        limit = memory_limit;
-        for (i = 0; i < lmb.memory.cnt; i++) {
-                if (limit > lmb.memory.region[i].size) {
-                        limit -= lmb.memory.region[i].size;
-                        continue;
-                }
-                lmb.memory.region[i].size = limit;
-                lmb.memory.cnt = i + 1;
-                break;
-        }
-        if (lmb.memory.region[0].size < lmb.rmo_size)
-                lmb.rmo_size = lmb.memory.region[0].size;
-        memory_limit = lmb_end_of_DRAM();
-        /* And truncate any reserves above the limit also. */
-        for (i = 0; i < lmb.reserved.cnt; i++) {
-                p = &lmb.reserved.region[i];
-                if (p->base > memory_limit)
-                        p->size = 0;
-                else if ((p->base + p->size) > memory_limit)
-                        p->size = memory_limit - p->base;
-                if (p->size == 0) {
-                        lmb_remove_region(&lmb.reserved, i);
-                        i--;
-                }
-        }
-}
-int __init lmb_is_reserved(u64 addr)
-{
-        int i;
-        for (i = 0; i < lmb.reserved.cnt; i++) {
-                u64 upper = lmb.reserved.region[i].base +
-                        lmb.reserved.region[i].size - 1;
-                if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
-                        return 1;
-        }
-        return 0;
-}
-int lmb_is_region_reserved(u64 base, u64 size)
-{
-        return lmb_overlaps_region(&lmb.reserved, base, size);
-}
-/*
- * Given a <base, len>, find which memory regions belong to this range.
- * Adjust the request and return a contiguous chunk.
- */
-int lmb_find(struct lmb_property *res)
-{
-        int i;
-        u64 rstart, rend;
-        rstart = res->base;
-        rend = rstart + res->size - 1;
-        for (i = 0; i < lmb.memory.cnt; i++) {
-                u64 start = lmb.memory.region[i].base;
-                u64 end = start + lmb.memory.region[i].size - 1;
-                if (start > rend)
-                        return -1;
-                if ((end >= rstart) && (start < rend)) {
-                        /* adjust the request */
-                        if (rstart < start)
-                                rstart = start;
-                        if (rend > end)
-                                rend = end;
-                        res->base = rstart;
-                        res->size = rend - rstart + 1;
-                        return 0;
-                }
-        }
-        return -1;
-}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index c4706eb98d3..5021cbc3441 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -15,7 +15,7 @@
 #include <linux/types.h>
 #include <net/netlink.h>
-static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
+static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = {
        [NLA_U8]        = sizeof(u8),
        [NLA_U16]       = sizeof(u16),
        [NLA_U32]       = sizeof(u32),
@@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
        [NLA_NESTED]    = NLA_HDRLEN,
 };
-static int validate_nla(struct nlattr *nla, int maxtype,
+static int validate_nla(const struct nlattr *nla, int maxtype,
                        const struct nla_policy *policy)
 {
        const struct nla_policy *pt;
@@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 *
 * Returns 0 on success or a negative error code.
 */
-int nla_validate(struct nlattr *head, int len, int maxtype,
+int nla_validate(const struct nlattr *head, int len, int maxtype,
                 const struct nla_policy *policy)
 {
-        struct nlattr *nla;
+        const struct nlattr *nla;
        int rem, err;
        nla_for_each_attr(nla, head, len, rem) {
@@ -167,16 +167,16 @@ nla_policy_len(const struct nla_policy *p, int n)
 * @policy: validation policy
 *
 * Parses a stream of attributes and stores a pointer to each attribute in
- * the tb array accessable via the attribute type. Attributes with a type
+ * the tb array accessible via the attribute type. Attributes with a type
 * exceeding maxtype will be silently ignored for backwards compatibility
 * reasons. policy may be set to NULL if no validation is required.
 *
 * Returns 0 on success or a negative error code.
 */
-int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
-              const struct nla_policy *policy)
+              int len, const struct nla_policy *policy)
 {
-        struct nlattr *nla;
+        const struct nlattr *nla;
        int rem, err;
        memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
@@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
                                        goto errout;
                        }
-                        tb[type] = nla;
+                        tb[type] = (struct nlattr *)nla;
                }
        }
@@ -212,14 +212,14 @@ errout:
 *
 * Returns the first attribute in the stream matching the specified type.
 */
-struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
+struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype)
 {
-        struct nlattr *nla;
+        const struct nlattr *nla;
        int rem;
        nla_for_each_attr(nla, head, len, rem)
                if (nla_type(nla) == attrtype)
-                        return nla;
+                        return (struct nlattr *)nla;
        return NULL;
 }
diff --git a/lib/parser.c b/lib/parser.c
index fb34977246b..6e89eca5cca 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -128,12 +128,13 @@ static int match_number(substring_t *s, int *result, int base)
        char *endp;
        char *buf;
        int ret;
+        size_t len = s->to - s->from;
-        buf = kmalloc(s->to - s->from + 1, GFP_KERNEL);
+        buf = kmalloc(len + 1, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
-        memcpy(buf, s->from, s->to - s->from);
+        memcpy(buf, s->from, len);
-        buf[s->to - s->from] = '\0';
+        buf[len] = '\0';
        *result = simple_strtol(buf, &endp, base);
        ret = 0;
        if (endp == buf)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index aeaa6d73444..28f2c33c6b5 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -8,10 +8,53 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
+#include <linux/debugobjects.h>
 static LIST_HEAD(percpu_counters);
 static DEFINE_MUTEX(percpu_counters_lock);
+#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
+static struct debug_obj_descr percpu_counter_debug_descr;
+static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
+{
+        struct percpu_counter *fbc = addr;
+        switch (state) {
+        case ODEBUG_STATE_ACTIVE:
+                percpu_counter_destroy(fbc);
+                debug_object_free(fbc, &percpu_counter_debug_descr);
+                return 1;
+        default:
+                return 0;
+        }
+}
+static struct debug_obj_descr percpu_counter_debug_descr = {
+        .name           = "percpu_counter",
+        .fixup_free     = percpu_counter_fixup_free,
+};
+static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
+{
+        debug_object_init(fbc, &percpu_counter_debug_descr);
+        debug_object_activate(fbc, &percpu_counter_debug_descr);
+}
+static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
+{
+        debug_object_deactivate(fbc, &percpu_counter_debug_descr);
+        debug_object_free(fbc, &percpu_counter_debug_descr);
+}
+#else   /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
+static inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
+{ }
+static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
+{ }
+#endif  /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 {
        int cpu;
@@ -29,20 +72,18 @@ EXPORT_SYMBOL(percpu_counter_set);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
        s64 count;
-        s32 *pcount;
-        int cpu = get_cpu();
-        pcount = per_cpu_ptr(fbc->counters, cpu);
+        preempt_disable();
-        count = *pcount + amount;
+        count = __this_cpu_read(*fbc->counters) + amount;
        if (count >= batch || count <= -batch) {
                spin_lock(&fbc->lock);
                fbc->count += count;
-                *pcount = 0;
+                __this_cpu_write(*fbc->counters, 0);
                spin_unlock(&fbc->lock);
        } else {
-                *pcount = count;
+                __this_cpu_write(*fbc->counters, count);
        }
-        put_cpu();
+        preempt_enable();
 }
 EXPORT_SYMBOL(__percpu_counter_add);
@@ -75,7 +116,11 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
        fbc->counters = alloc_percpu(s32);
        if (!fbc->counters)
                return -ENOMEM;
+        debug_percpu_counter_activate(fbc);
 #ifdef CONFIG_HOTPLUG_CPU
+        INIT_LIST_HEAD(&fbc->list);
        mutex_lock(&percpu_counters_lock);
        list_add(&fbc->list, &percpu_counters);
        mutex_unlock(&percpu_counters_lock);
@@ -89,6 +134,8 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
        if (!fbc->counters)
                return;
+        debug_percpu_counter_deactivate(fbc);
 #ifdef CONFIG_HOTPLUG_CPU
        mutex_lock(&percpu_counters_lock);
        list_del(&fbc->list);
@@ -137,6 +184,33 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
        return NOTIFY_OK;
 }
+/*
+ * Compare counter against given value.
+ * Return 1 if greater, 0 if equal and -1 if less
+ */
+int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+{
+        s64     count;
+        count = percpu_counter_read(fbc);
+        /* Check to see if rough count will be sufficient for comparison */
+        if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
+                if (count > rhs)
+                        return 1;
+                else
+                        return -1;
+        }
+        /* Need to use precise count */
+        count = percpu_counter_sum(fbc);
+        if (count > rhs)
+                return 1;
+        else if (count < rhs)
+                return -1;
+        else
+                return 0;
+}
+EXPORT_SYMBOL(percpu_counter_compare);
 static int __init percpu_counter_startup(void)
 {
        compute_batch_value();
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 92cdd9936e3..5086bb962b4 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -28,7 +28,6 @@
 #include <linux/slab.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
-#include <linux/gfp.h>
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/rcupdate.h>
@@ -50,7 +49,7 @@ struct radix_tree_node {
        unsigned int    height;         /* Height from the bottom */
        unsigned int    count;
        struct rcu_head rcu_head;
-        void            *slots[RADIX_TREE_MAP_SIZE];
+        void __rcu      *slots[RADIX_TREE_MAP_SIZE];
        unsigned long   tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
@@ -83,6 +82,16 @@ struct radix_tree_preload {
 };
 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
+static inline void *ptr_to_indirect(void *ptr)
+{
+        return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
+}
+static inline void *indirect_to_ptr(void *ptr)
+{
+        return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
+}
 static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
 {
        return root->gfp_mask & __GFP_BITS_MASK;
@@ -175,14 +184,16 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
 {
        struct radix_tree_node *node =
                        container_of(head, struct radix_tree_node, rcu_head);
+        int i;
        /*
         * must only free zeroed nodes into the slab. radix_tree_shrink
         * can leave us with a non-NULL entry in the first slot, so clear
         * that here to make sure.
         */
-        tag_clear(node, 0, 0);
+        for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
-        tag_clear(node, 1, 0);
+                tag_clear(node, i, 0);
        node->slots[0] = NULL;
        node->count = 0;
@@ -264,7 +275,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
                        return -ENOMEM;
                /* Increase the height.  */
-                node->slots[0] = radix_tree_indirect_to_ptr(root->rnode);
+                node->slots[0] = indirect_to_ptr(root->rnode);
                /* Propagate the aggregated tag info into the new root */
                for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -275,7 +286,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
                newheight = root->height+1;
                node->height = newheight;
                node->count = 1;
-                node = radix_tree_ptr_to_indirect(node);
+                node = ptr_to_indirect(node);
                rcu_assign_pointer(root->rnode, node);
                root->height = newheight;
        } while (height > root->height);
@@ -308,7 +319,7 @@ int radix_tree_insert(struct radix_tree_root *root,
                        return error;
        }
-        slot = radix_tree_indirect_to_ptr(root->rnode);
+        slot = indirect_to_ptr(root->rnode);
        height = root->height;
        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
@@ -324,8 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
                                rcu_assign_pointer(node->slots[offset], slot);
                                node->count++;
                        } else
-                                rcu_assign_pointer(root->rnode,
+                                rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
-                                        radix_tree_ptr_to_indirect(slot));
                }
                /* Go a level down */
@@ -364,7 +374,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
        unsigned int height, shift;
        struct radix_tree_node *node, **slot;
-        node = rcu_dereference(root->rnode);
+        node = rcu_dereference_raw(root->rnode);
        if (node == NULL)
                return NULL;
@@ -373,7 +383,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
                        return NULL;
                return is_slot ? (void *)&root->rnode : node;
        }
-        node = radix_tree_indirect_to_ptr(node);
+        node = indirect_to_ptr(node);
        height = node->height;
        if (index > radix_tree_maxindex(height))
@@ -384,7 +394,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
        do {
                slot = (struct radix_tree_node **)
                        (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
-                node = rcu_dereference(*slot);
+                node = rcu_dereference_raw(*slot);
                if (node == NULL)
                        return NULL;
@@ -392,7 +402,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
                height--;
        } while (height > 0);
-        return is_slot ? (void *)slot:node;
+        return is_slot ? (void *)slot : indirect_to_ptr(node);
 }
 /**
@@ -454,7 +464,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
        height = root->height;
        BUG_ON(index > radix_tree_maxindex(height));
-        slot = radix_tree_indirect_to_ptr(root->rnode);
+        slot = indirect_to_ptr(root->rnode);
        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
        while (height > 0) {
@@ -508,7 +518,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
        pathp->node = NULL;
-        slot = radix_tree_indirect_to_ptr(root->rnode);
+        slot = indirect_to_ptr(root->rnode);
        while (height > 0) {
                int offset;
@@ -556,6 +566,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
 *
 *  0: tag not present or not set
 *  1: tag set
+ *
+ * Note that the return value of this function may not be relied on, even if
+ * the RCU lock is held, unless tag modification and node deletion are excluded
+ * from concurrency.
 */
 int radix_tree_tag_get(struct radix_tree_root *root,
                        unsigned long index, unsigned int tag)
@@ -568,13 +582,13 @@ int radix_tree_tag_get(struct radix_tree_root *root,
        if (!root_tag_get(root, tag))
                return 0;
-        node = rcu_dereference(root->rnode);
+        node = rcu_dereference_raw(root->rnode);
        if (node == NULL)
                return 0;
        if (!radix_tree_is_indirect_ptr(node))
                return (index == 0);
-        node = radix_tree_indirect_to_ptr(node);
+        node = indirect_to_ptr(node);
        height = node->height;
        if (index > radix_tree_maxindex(height))
@@ -596,13 +610,9 @@ int radix_tree_tag_get(struct radix_tree_root *root,
                 */
                if (!tag_get(node, tag, offset))
                        saw_unset_tag = 1;
-                if (height == 1) {
+                if (height == 1)
-                        int ret = tag_get(node, tag, offset);
+                        return !!tag_get(node, tag, offset);
+                node = rcu_dereference_raw(node->slots[offset]);
-                        BUG_ON(ret && saw_unset_tag);
-                        return !!ret;
-                }
-                node = rcu_dereference(node->slots[offset]);
                shift -= RADIX_TREE_MAP_SHIFT;
                height--;
        }
@@ -610,6 +620,134 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_tag_get);
 /**
+ * radix_tree_range_tag_if_tagged - for each item in given range set given
+ *                                 tag if item has another tag set
+ * @root:               radix tree root
+ * @first_indexp:       pointer to a starting index of a range to scan
+ * @last_index:         last index of a range to scan
+ * @nr_to_tag:          maximum number items to tag
+ * @iftag:              tag index to test
+ * @settag:             tag index to set if tested tag is set
+ *
+ * This function scans range of radix tree from first_index to last_index
+ * (inclusive).  For each item in the range if iftag is set, the function sets
+ * also settag. The function stops either after tagging nr_to_tag items or
+ * after reaching last_index.
+ *
+ * The tags must be set from the leaf level only and propagated back up the
+ * path to the root. We must do this so that we resolve the full path before
+ * setting any tags on intermediate nodes. If we set tags as we descend, then
+ * we can get to the leaf node and find that the index that has the iftag
+ * set is outside the range we are scanning. This reults in dangling tags and
+ * can lead to problems with later tag operations (e.g. livelocks on lookups).
+ *
+ * The function returns number of leaves where the tag was set and sets
+ * *first_indexp to the first unscanned index.
+ * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
+ * be prepared to handle that.
+ */
+unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
+                unsigned long *first_indexp, unsigned long last_index,
+                unsigned long nr_to_tag,
+                unsigned int iftag, unsigned int settag)
+{
+        unsigned int height = root->height;
+        struct radix_tree_path path[height];
+        struct radix_tree_path *pathp = path;
+        struct radix_tree_node *slot;
+        unsigned int shift;
+        unsigned long tagged = 0;
+        unsigned long index = *first_indexp;
+        last_index = min(last_index, radix_tree_maxindex(height));
+        if (index > last_index)
+                return 0;
+        if (!nr_to_tag)
+                return 0;
+        if (!root_tag_get(root, iftag)) {
+                *first_indexp = last_index + 1;
+                return 0;
+        }
+        if (height == 0) {
+                *first_indexp = last_index + 1;
+                root_tag_set(root, settag);
+                return 1;
+        }
+        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+        slot = indirect_to_ptr(root->rnode);
+        /*
+         * we fill the path from (root->height - 2) to 0, leaving the index at
+         * (root->height - 1) as a terminator. Zero the node in the terminator
+         * so that we can use this to end walk loops back up the path.
+         */
+        path[height - 1].node = NULL;
+        for (;;) {
+                int offset;
+                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+                if (!slot->slots[offset])
+                        goto next;
+                if (!tag_get(slot, iftag, offset))
+                        goto next;
+                if (height > 1) {
+                        /* Go down one level */
+                        height--;
+                        shift -= RADIX_TREE_MAP_SHIFT;
+                        path[height - 1].node = slot;
+                        path[height - 1].offset = offset;
+                        slot = slot->slots[offset];
+                        continue;
+                }
+                /* tag the leaf */
+                tagged++;
+                tag_set(slot, settag, offset);
+                /* walk back up the path tagging interior nodes */
+                pathp = &path[0];
+                while (pathp->node) {
+                        /* stop if we find a node with the tag already set */
+                        if (tag_get(pathp->node, settag, pathp->offset))
+                                break;
+                        tag_set(pathp->node, settag, pathp->offset);
+                        pathp++;
+                }
+next:
+                /* Go to next item at level determined by 'shift' */
+                index = ((index >> shift) + 1) << shift;
+                /* Overflow can happen when last_index is ~0UL... */
+                if (index > last_index || !index)
+                        break;
+                if (tagged >= nr_to_tag)
+                        break;
+                while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
+                        /*
+                         * We've fully scanned this node. Go up. Because
+                         * last_index is guaranteed to be in the tree, what
+                         * we do below cannot wander astray.
+                         */
+                        slot = path[height - 1].node;
+                        height++;
+                        shift += RADIX_TREE_MAP_SHIFT;
+                }
+        }
+        /*
+         * The iftag must have been set somewhere because otherwise
+         * we would return immediated at the beginning of the function
+         */
+        root_tag_set(root, settag);
+        *first_indexp = index;
+        return tagged;
+}
+EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
+/**
 *      radix_tree_next_hole    -    find the next hole (not-present entry)
 *      @root:          tree root
 *      @index:         index key
@@ -657,7 +795,7 @@ EXPORT_SYMBOL(radix_tree_next_hole);
 *
 *      Returns: the index of the hole if found, otherwise returns an index
 *      outside of the set specified (in which case 'index - return >= max_scan'
- *      will be true). In rare cases of wrap-around, LONG_MAX will be returned.
+ *      will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
 *
 *      radix_tree_next_hole may be called under rcu_read_lock. However, like
 *      radix_tree_gang_lookup, this will not atomically search a snapshot of
@@ -675,7 +813,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
                if (!radix_tree_lookup(root, index))
                        break;
                index--;
-                if (index == LONG_MAX)
+                if (index == ULONG_MAX)
                        break;
        }
@@ -711,7 +849,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
                }
                shift -= RADIX_TREE_MAP_SHIFT;
-                slot = rcu_dereference(slot->slots[i]);
+                slot = rcu_dereference_raw(slot->slots[i]);
                if (slot == NULL)
                        goto out;
        }
@@ -758,7 +896,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
        unsigned long cur_index = first_index;
        unsigned int ret;
-        node = rcu_dereference(root->rnode);
+        node = rcu_dereference_raw(root->rnode);
        if (!node)
                return 0;
@@ -768,7 +906,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                results[0] = node;
                return 1;
        }
-        node = radix_tree_indirect_to_ptr(node);
+        node = indirect_to_ptr(node);
        max_index = radix_tree_maxindex(node->height);
@@ -787,7 +925,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                        slot = *(((void ***)results)[ret + i]);
                        if (!slot)
                                continue;
-                        results[ret + nr_found] = rcu_dereference(slot);
+                        results[ret + nr_found] =
+                                indirect_to_ptr(rcu_dereference_raw(slot));
                        nr_found++;
                }
                ret += nr_found;
@@ -826,7 +965,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
        unsigned long cur_index = first_index;
        unsigned int ret;
-        node = rcu_dereference(root->rnode);
+        node = rcu_dereference_raw(root->rnode);
        if (!node)
                return 0;
@@ -836,7 +975,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
                results[0] = (void **)&root->rnode;
                return 1;
        }
-        node = radix_tree_indirect_to_ptr(node);
+        node = indirect_to_ptr(node);
        max_index = radix_tree_maxindex(node->height);
@@ -915,7 +1054,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
                        }
                }
                shift -= RADIX_TREE_MAP_SHIFT;
-                slot = rcu_dereference(slot->slots[i]);
+                slot = rcu_dereference_raw(slot->slots[i]);
                if (slot == NULL)
                        break;
        }
@@ -951,7 +1090,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
        if (!root_tag_get(root, tag))
                return 0;
-        node = rcu_dereference(root->rnode);
+        node = rcu_dereference_raw(root->rnode);
        if (!node)
                return 0;
@@ -961,7 +1100,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
                results[0] = node;
                return 1;
        }
-        node = radix_tree_indirect_to_ptr(node);
+        node = indirect_to_ptr(node);
        max_index = radix_tree_maxindex(node->height);
@@ -980,7 +1119,8 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
                        slot = *(((void ***)results)[ret + i]);
                        if (!slot)
                                continue;
-                        results[ret + nr_found] = rcu_dereference(slot);
+                        results[ret + nr_found] =
+                                indirect_to_ptr(rcu_dereference_raw(slot));
                        nr_found++;
                }
                ret += nr_found;
@@ -1020,7 +1160,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
        if (!root_tag_get(root, tag))
                return 0;
-        node = rcu_dereference(root->rnode);
+        node = rcu_dereference_raw(root->rnode);
        if (!node)
                return 0;
@@ -1030,7 +1170,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
                results[0] = (void **)&root->rnode;
                return 1;
        }
-        node = radix_tree_indirect_to_ptr(node);
+        node = indirect_to_ptr(node);
        max_index = radix_tree_maxindex(node->height);
@@ -1066,7 +1206,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
                void *newptr;
                BUG_ON(!radix_tree_is_indirect_ptr(to_free));
-                to_free = radix_tree_indirect_to_ptr(to_free);
+                to_free = indirect_to_ptr(to_free);
                /*
                 * The candidate node has more than one child, or its child
@@ -1079,16 +1219,39 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
                /*
                 * We don't need rcu_assign_pointer(), since we are simply
-                 * moving the node from one part of the tree to another. If
+                 * moving the node from one part of the tree to another: if it
-                 * it was safe to dereference the old pointer to it
+                 * was safe to dereference the old pointer to it
                 * (to_free->slots[0]), it will be safe to dereference the new
-                 * one (root->rnode).
+                 * one (root->rnode) as far as dependent read barriers go.
                 */
                newptr = to_free->slots[0];
                if (root->height > 1)
-                        newptr = radix_tree_ptr_to_indirect(newptr);
+                        newptr = ptr_to_indirect(newptr);
                root->rnode = newptr;
                root->height--;
+                /*
+                 * We have a dilemma here. The node's slot[0] must not be
+                 * NULLed in case there are concurrent lookups expecting to
+                 * find the item. However if this was a bottom-level node,
+                 * then it may be subject to the slot pointer being visible
+                 * to callers dereferencing it. If item corresponding to
+                 * slot[0] is subsequently deleted, these callers would expect
+                 * their slot to become empty sooner or later.
+                 *
+                 * For example, lockless pagecache will look up a slot, deref
+                 * the page pointer, and if the page is 0 refcount it means it
+                 * was concurrently deleted from pagecache so try the deref
+                 * again. Fortunately there is already a requirement for logic
+                 * to retry the entire slot lookup -- the indirect pointer
+                 * problem (replacing direct root node with an indirect pointer
+                 * also results in a stale slot). So tag the slot as indirect
+                 * to force callers to retry.
+                 */
+                if (root->height == 0)
+                        *((unsigned long *)&to_free->slots[0]) |=
+                                                RADIX_TREE_INDIRECT_PTR;
                radix_tree_node_free(to_free);
        }
 }
@@ -1125,7 +1288,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
                root->rnode = NULL;
                goto out;
        }
-        slot = radix_tree_indirect_to_ptr(slot);
+        slot = indirect_to_ptr(slot);
        shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
        pathp->node = NULL;
@@ -1167,8 +1330,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
                        radix_tree_node_free(to_free);
                if (pathp->node->count) {
-                        if (pathp->node ==
+                        if (pathp->node == indirect_to_ptr(root->rnode))
-                                        radix_tree_indirect_to_ptr(root->rnode))
                                radix_tree_shrink(root);
                        goto out;
                }
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
new file mode 100644
index 00000000000..162becacf97
--- /dev/null
+++ b/lib/raid6/.gitignore
@@ -0,0 +1,4 @@
+mktables
+altivec*.c
+int*.c
+tables.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
new file mode 100644
index 00000000000..8a38102770f
--- /dev/null
+++ b/lib/raid6/Makefile
@@ -0,0 +1,75 @@
+obj-$(CONFIG_RAID6_PQ)  += raid6_pq.o
+raid6_pq-y      += algos.o recov.o tables.o int1.o int2.o int4.o \
+                   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
+                   altivec8.o mmx.o sse1.o sse2.o
+hostprogs-y     += mktables
+quiet_cmd_unroll = UNROLL  $@
+      cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \
+                   < $< > $@ || ( rm -f $@ && exit 1 )
+ifeq ($(CONFIG_ALTIVEC),y)
+altivec_flags := -maltivec -mabi=altivec
+endif
+targets += int1.c
+$(obj)/int1.c:   UNROLL := 1
+$(obj)/int1.c:   $(src)/int.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+targets += int2.c
+$(obj)/int2.c:   UNROLL := 2
+$(obj)/int2.c:   $(src)/int.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+targets += int4.c
+$(obj)/int4.c:   UNROLL := 4
+$(obj)/int4.c:   $(src)/int.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+targets += int8.c
+$(obj)/int8.c:   UNROLL := 8
+$(obj)/int8.c:   $(src)/int.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+targets += int16.c
+$(obj)/int16.c:  UNROLL := 16
+$(obj)/int16.c:  $(src)/int.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+targets += int32.c
+$(obj)/int32.c:  UNROLL := 32
+$(obj)/int32.c:  $(src)/int.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+CFLAGS_altivec1.o += $(altivec_flags)
+targets += altivec1.c
+$(obj)/altivec1.c:   UNROLL := 1
+$(obj)/altivec1.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+CFLAGS_altivec2.o += $(altivec_flags)
+targets += altivec2.c
+$(obj)/altivec2.c:   UNROLL := 2
+$(obj)/altivec2.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+CFLAGS_altivec4.o += $(altivec_flags)
+targets += altivec4.c
+$(obj)/altivec4.c:   UNROLL := 4
+$(obj)/altivec4.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+CFLAGS_altivec8.o += $(altivec_flags)
+targets += altivec8.c
+$(obj)/altivec8.c:   UNROLL := 8
+$(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
+        $(call if_changed,unroll)
+quiet_cmd_mktable = TABLE   $@
+      cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
+targets += tables.c
+$(obj)/tables.c: $(obj)/mktables FORCE
+        $(call if_changed,mktable)
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
new file mode 100644
index 00000000000..b595f560bee
--- /dev/null
+++ b/lib/raid6/algos.c
@@ -0,0 +1,154 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6/algos.c
+ *
+ * Algorithm list and algorithm selection for RAID-6
+ */
+#include <linux/raid/pq.h>
+#ifndef __KERNEL__
+#include <sys/mman.h>
+#include <stdio.h>
+#else
+#include <linux/gfp.h>
+#if !RAID6_USE_EMPTY_ZERO_PAGE
+/* In .bss so it's zeroed */
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+EXPORT_SYMBOL(raid6_empty_zero_page);
+#endif
+#endif
+struct raid6_calls raid6_call;
+EXPORT_SYMBOL_GPL(raid6_call);
+const struct raid6_calls * const raid6_algos[] = {
+        &raid6_intx1,
+        &raid6_intx2,
+        &raid6_intx4,
+        &raid6_intx8,
+#if defined(__ia64__)
+        &raid6_intx16,
+        &raid6_intx32,
+#endif
+#if defined(__i386__) && !defined(__arch_um__)
+        &raid6_mmxx1,
+        &raid6_mmxx2,
+        &raid6_sse1x1,
+        &raid6_sse1x2,
+        &raid6_sse2x1,
+        &raid6_sse2x2,
+#endif
+#if defined(__x86_64__) && !defined(__arch_um__)
+        &raid6_sse2x1,
+        &raid6_sse2x2,
+        &raid6_sse2x4,
+#endif
+#ifdef CONFIG_ALTIVEC
+        &raid6_altivec1,
+        &raid6_altivec2,
+        &raid6_altivec4,
+        &raid6_altivec8,
+#endif
+        NULL
+};
+#ifdef __KERNEL__
+#define RAID6_TIME_JIFFIES_LG2  4
+#else
+/* Need more time to be stable in userspace */
+#define RAID6_TIME_JIFFIES_LG2  9
+#define time_before(x, y) ((x) < (y))
+#endif
+/* Try to pick the best algorithm */
+/* This code uses the gfmul table as convenient data set to abuse */
+int __init raid6_select_algo(void)
+{
+        const struct raid6_calls * const * algo;
+        const struct raid6_calls * best;
+        char *syndromes;
+        void *dptrs[(65536/PAGE_SIZE)+2];
+        int i, disks;
+        unsigned long perf, bestperf;
+        int bestprefer;
+        unsigned long j0, j1;
+        disks = (65536/PAGE_SIZE)+2;
+        for ( i = 0 ; i < disks-2 ; i++ ) {
+                dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
+        }
+        /* Normal code - use a 2-page allocation to avoid D$ conflict */
+        syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
+        if ( !syndromes ) {
+                printk("raid6: Yikes!  No memory available.\n");
+                return -ENOMEM;
+        }
+        dptrs[disks-2] = syndromes;
+        dptrs[disks-1] = syndromes + PAGE_SIZE;
+        bestperf = 0;  bestprefer = 0;  best = NULL;
+        for ( algo = raid6_algos ; *algo ; algo++ ) {
+                if ( !(*algo)->valid || (*algo)->valid() ) {
+                        perf = 0;
+                        preempt_disable();
+                        j0 = jiffies;
+                        while ( (j1 = jiffies) == j0 )
+                                cpu_relax();
+                        while (time_before(jiffies,
+                                            j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+                                (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
+                                perf++;
+                        }
+                        preempt_enable();
+                        if ( (*algo)->prefer > bestprefer ||
+                             ((*algo)->prefer == bestprefer &&
+                              perf > bestperf) ) {
+                                best = *algo;
+                                bestprefer = best->prefer;
+                                bestperf = perf;
+                        }
+                        printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
+                               (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+                }
+        }
+        if (best) {
+                printk("raid6: using algorithm %s (%ld MB/s)\n",
+                       best->name,
+                       (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+                raid6_call = *best;
+        } else
+                printk("raid6: Yikes!  No algorithm found!\n");
+        free_pages((unsigned long)syndromes, 1);
+        return best ? 0 : -EINVAL;
+}
+static void raid6_exit(void)
+{
+        do { } while (0);
+}
+subsys_initcall(raid6_select_algo);
+module_exit(raid6_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
new file mode 100644
index 00000000000..2654d5c854b
--- /dev/null
+++ b/lib/raid6/altivec.uc
@@ -0,0 +1,130 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6altivec$#.c
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ *
+ * This file is postprocessed using unroll.awk
+ *
+ * <benh> hpa: in process,
+ * you can just "steal" the vec unit with enable_kernel_altivec() (but
+ * bracked this with preempt_disable/enable or in a lock)
+ */
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+#include <altivec.h>
+#ifdef __KERNEL__
+# include <asm/system.h>
+# include <asm/cputable.h>
+#endif
+/*
+ * This is the C data type to use.  We use a vector of
+ * signed char so vec_cmpgt() will generate the right
+ * instruction.
+ */
+typedef vector signed char unative_t;
+#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
+#define NSIZE   sizeof(unative_t)
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
+{
+        return vec_add(v,v);
+}
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ unative_t MASK(unative_t v)
+{
+        unative_t zv = NBYTES(0);
+        /* vec_cmpgt returns a vector bool char; thus the need for the cast */
+        return (unative_t)vec_cmpgt(zv, v);
+}
+/* This is noinline to make damned sure that gcc doesn't move any of the
+   Altivec code around the enable/disable code */
+static void noinline
+raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+        unative_t x1d = NBYTES(0x1d);
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+                wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+                for ( z = z0-1 ; z >= 0 ; z-- ) {
+                        wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+                        wp$$ = vec_xor(wp$$, wd$$);
+                        w2$$ = MASK(wq$$);
+                        w1$$ = SHLBYTE(wq$$);
+                        w2$$ = vec_and(w2$$, x1d);
+                        w1$$ = vec_xor(w1$$, w2$$);
+                        wq$$ = vec_xor(w1$$, wd$$);
+                }
+                *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+                *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+        }
+}
+static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        preempt_disable();
+        enable_kernel_altivec();
+        raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
+        preempt_enable();
+}
+int raid6_have_altivec(void);
+#if $# == 1
+int raid6_have_altivec(void)
+{
+        /* This assumes either all CPUs have Altivec or none does */
+# ifdef __KERNEL__
+        return cpu_has_feature(CPU_FTR_ALTIVEC);
+# else
+        return 1;
+# endif
+}
+#endif
+const struct raid6_calls raid6_altivec$# = {
+        raid6_altivec$#_gen_syndrome,
+        raid6_have_altivec,
+        "altivecx$#",
+        0
+};
+#endif /* CONFIG_ALTIVEC */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
new file mode 100644
index 00000000000..d1e276a14fa
--- /dev/null
+++ b/lib/raid6/int.uc
@@ -0,0 +1,117 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6int$#.c
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ *
+ * This file is postprocessed using unroll.awk
+ */
+#include <linux/raid/pq.h>
+/*
+ * This is the C data type to use
+ */
+/* Change this from BITS_PER_LONG if there is something better... */
+#if BITS_PER_LONG == 64
+# define NBYTES(x) ((x) * 0x0101010101010101UL)
+# define NSIZE  8
+# define NSHIFT 3
+# define NSTRING "64"
+typedef u64 unative_t;
+#else
+# define NBYTES(x) ((x) * 0x01010101U)
+# define NSIZE  4
+# define NSHIFT 2
+# define NSTRING "32"
+typedef u32 unative_t;
+#endif
+/*
+ * IA-64 wants insane amounts of unrolling.  On other architectures that
+ * is just a waste of space.
+ */
+#if ($# <= 8) || defined(__ia64__)
+/*
+ * These sub-operations are separate inlines since they can sometimes be
+ * specially optimized using architecture-specific hacks.
+ */
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
+{
+        unative_t vv;
+        vv = (v << 1) & NBYTES(0xfe);
+        return vv;
+}
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ unative_t MASK(unative_t v)
+{
+        unative_t vv;
+        vv = v & NBYTES(0x80);
+        vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
+        return vv;
+}
+static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+                wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+                for ( z = z0-1 ; z >= 0 ; z-- ) {
+                        wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+                        wp$$ ^= wd$$;
+                        w2$$ = MASK(wq$$);
+                        w1$$ = SHLBYTE(wq$$);
+                        w2$$ &= NBYTES(0x1d);
+                        w1$$ ^= w2$$;
+                        wq$$ = w1$$ ^ wd$$;
+                }
+                *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+                *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+        }
+}
+const struct raid6_calls raid6_intx$# = {
+        raid6_int$#_gen_syndrome,
+        NULL,           /* always valid */
+        "int" NSTRING "x$#",
+        0
+};
+#endif
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
new file mode 100644
index 00000000000..3b1500843bb
--- /dev/null
+++ b/lib/raid6/mktables.c
@@ -0,0 +1,132 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * mktables.c
+ *
+ * Make RAID-6 tables.  This is a host user space program to be run at
+ * compile time.
+ */
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <time.h>
+static uint8_t gfmul(uint8_t a, uint8_t b)
+{
+        uint8_t v = 0;
+        while (b) {
+                if (b & 1)
+                        v ^= a;
+                a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
+                b >>= 1;
+        }
+        return v;
+}
+static uint8_t gfpow(uint8_t a, int b)
+{
+        uint8_t v = 1;
+        b %= 255;
+        if (b < 0)
+                b += 255;
+        while (b) {
+                if (b & 1)
+                        v = gfmul(v, a);
+                a = gfmul(a, a);
+                b >>= 1;
+        }
+        return v;
+}
+int main(int argc, char *argv[])
+{
+        int i, j, k;
+        uint8_t v;
+        uint8_t exptbl[256], invtbl[256];
+        printf("#include <linux/raid/pq.h>\n");
+        /* Compute multiplication table */
+        printf("\nconst u8  __attribute__((aligned(256)))\n"
+                "raid6_gfmul[256][256] =\n"
+                "{\n");
+        for (i = 0; i < 256; i++) {
+                printf("\t{\n");
+                for (j = 0; j < 256; j += 8) {
+                        printf("\t\t");
+                        for (k = 0; k < 8; k++)
+                                printf("0x%02x,%c", gfmul(i, j + k),
+                                       (k == 7) ? '\n' : ' ');
+                }
+                printf("\t},\n");
+        }
+        printf("};\n");
+        printf("#ifdef __KERNEL__\n");
+        printf("EXPORT_SYMBOL(raid6_gfmul);\n");
+        printf("#endif\n");
+        /* Compute power-of-2 table (exponent) */
+        v = 1;
+        printf("\nconst u8 __attribute__((aligned(256)))\n"
+               "raid6_gfexp[256] =\n" "{\n");
+        for (i = 0; i < 256; i += 8) {
+                printf("\t");
+                for (j = 0; j < 8; j++) {
+                        exptbl[i + j] = v;
+                        printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
+                        v = gfmul(v, 2);
+                        if (v == 1)
+                                v = 0;  /* For entry 255, not a real entry */
+                }
+        }
+        printf("};\n");
+        printf("#ifdef __KERNEL__\n");
+        printf("EXPORT_SYMBOL(raid6_gfexp);\n");
+        printf("#endif\n");
+        /* Compute inverse table x^-1 == x^254 */
+        printf("\nconst u8 __attribute__((aligned(256)))\n"
+               "raid6_gfinv[256] =\n" "{\n");
+        for (i = 0; i < 256; i += 8) {
+                printf("\t");
+                for (j = 0; j < 8; j++) {
+                        invtbl[i + j] = v = gfpow(i + j, 254);
+                        printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
+                }
+        }
+        printf("};\n");
+        printf("#ifdef __KERNEL__\n");
+        printf("EXPORT_SYMBOL(raid6_gfinv);\n");
+        printf("#endif\n");
+        /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
+        printf("\nconst u8 __attribute__((aligned(256)))\n"
+               "raid6_gfexi[256] =\n" "{\n");
+        for (i = 0; i < 256; i += 8) {
+                printf("\t");
+                for (j = 0; j < 8; j++)
+                        printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
+                               (j == 7) ? '\n' : ' ');
+        }
+        printf("};\n");
+        printf("#ifdef __KERNEL__\n");
+        printf("EXPORT_SYMBOL(raid6_gfexi);\n");
+        printf("#endif\n");
+        return 0;
+}
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
new file mode 100644
index 00000000000..279347f2309
--- /dev/null
+++ b/lib/raid6/mmx.c
@@ -0,0 +1,142 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6/mmx.c
+ *
+ * MMX implementation of RAID-6 syndrome functions
+ */
+#if defined(__i386__) && !defined(__arch_um__)
+#include <linux/raid/pq.h>
+#include "x86.h"
+/* Shared with raid6/sse1.c */
+const struct raid6_mmx_constants {
+        u64 x1d;
+} raid6_mmx_constants = {
+        0x1d1d1d1d1d1d1d1dULL,
+};
+static int raid6_have_mmx(void)
+{
+        /* Not really "boot_cpu" but "all_cpus" */
+        return boot_cpu_has(X86_FEATURE_MMX);
+}
+/*
+ * Plain MMX implementation
+ */
+static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+        asm volatile("pxor %mm5,%mm5"); /* Zero temp */
+        for ( d = 0 ; d < bytes ; d += 8 ) {
+                asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+                asm volatile("movq %mm2,%mm4"); /* Q[0] */
+                for ( z = z0-1 ; z >= 0 ; z-- ) {
+                        asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
+                        asm volatile("pcmpgtb %mm4,%mm5");
+                        asm volatile("paddb %mm4,%mm4");
+                        asm volatile("pand %mm0,%mm5");
+                        asm volatile("pxor %mm5,%mm4");
+                        asm volatile("pxor %mm5,%mm5");
+                        asm volatile("pxor %mm6,%mm2");
+                        asm volatile("pxor %mm6,%mm4");
+                }
+                asm volatile("movq %%mm2,%0" : "=m" (p[d]));
+                asm volatile("pxor %mm2,%mm2");
+                asm volatile("movq %%mm4,%0" : "=m" (q[d]));
+                asm volatile("pxor %mm4,%mm4");
+        }
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_mmxx1 = {
+        raid6_mmx1_gen_syndrome,
+        raid6_have_mmx,
+        "mmxx1",
+        0
+};
+/*
+ * Unrolled-by-2 MMX implementation
+ */
+static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+        asm volatile("pxor %mm5,%mm5"); /* Zero temp */
+        asm volatile("pxor %mm7,%mm7"); /* Zero temp */
+        for ( d = 0 ; d < bytes ; d += 16 ) {
+                asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+                asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
+                asm volatile("movq %mm2,%mm4"); /* Q[0] */
+                asm volatile("movq %mm3,%mm6"); /* Q[1] */
+                for ( z = z0-1 ; z >= 0 ; z-- ) {
+                        asm volatile("pcmpgtb %mm4,%mm5");
+                        asm volatile("pcmpgtb %mm6,%mm7");
+                        asm volatile("paddb %mm4,%mm4");
+                        asm volatile("paddb %mm6,%mm6");
+                        asm volatile("pand %mm0,%mm5");
+                        asm volatile("pand %mm0,%mm7");
+                        asm volatile("pxor %mm5,%mm4");
+                        asm volatile("pxor %mm7,%mm6");
+                        asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
+                        asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
+                        asm volatile("pxor %mm5,%mm2");
+                        asm volatile("pxor %mm7,%mm3");
+                        asm volatile("pxor %mm5,%mm4");
+                        asm volatile("pxor %mm7,%mm6");
+                        asm volatile("pxor %mm5,%mm5");
+                        asm volatile("pxor %mm7,%mm7");
+                }
+                asm volatile("movq %%mm2,%0" : "=m" (p[d]));
+                asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
+                asm volatile("movq %%mm4,%0" : "=m" (q[d]));
+                asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
+        }
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_mmxx2 = {
+        raid6_mmx2_gen_syndrome,
+        raid6_have_mmx,
+        "mmxx2",
+        0
+};
+#endif
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
new file mode 100644
index 00000000000..8590d19cf52
--- /dev/null
+++ b/lib/raid6/recov.c
@@ -0,0 +1,132 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6/recov.c
+ *
+ * RAID-6 data recovery in dual failure mode.  In single failure mode,
+ * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
+ * the syndrome.)
+ */
+#include <linux/raid/pq.h>
+/* Recover two failed data blocks. */
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+                       void **ptrs)
+{
+        u8 *p, *q, *dp, *dq;
+        u8 px, qx, db;
+        const u8 *pbmul;        /* P multiplier table for B data */
+        const u8 *qmul;         /* Q multiplier table (for both) */
+        p = (u8 *)ptrs[disks-2];
+        q = (u8 *)ptrs[disks-1];
+        /* Compute syndrome with zero for the missing data pages
+           Use the dead data pages as temporary storage for
+           delta p and delta q */
+        dp = (u8 *)ptrs[faila];
+        ptrs[faila] = (void *)raid6_empty_zero_page;
+        ptrs[disks-2] = dp;
+        dq = (u8 *)ptrs[failb];
+        ptrs[failb] = (void *)raid6_empty_zero_page;
+        ptrs[disks-1] = dq;
+        raid6_call.gen_syndrome(disks, bytes, ptrs);
+        /* Restore pointer table */
+        ptrs[faila]   = dp;
+        ptrs[failb]   = dq;
+        ptrs[disks-2] = p;
+        ptrs[disks-1] = q;
+        /* Now, pick the proper data tables */
+        pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+        qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+        /* Now do it... */
+        while ( bytes-- ) {
+                px    = *p ^ *dp;
+                qx    = qmul[*q ^ *dq];
+                *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
+                *dp++ = db ^ px; /* Reconstructed A */
+                p++; q++;
+        }
+}
+EXPORT_SYMBOL_GPL(raid6_2data_recov);
+/* Recover failure of one data block plus the P block */
+void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
+{
+        u8 *p, *q, *dq;
+        const u8 *qmul;         /* Q multiplier table */
+        p = (u8 *)ptrs[disks-2];
+        q = (u8 *)ptrs[disks-1];
+        /* Compute syndrome with zero for the missing data page
+           Use the dead data page as temporary storage for delta q */
+        dq = (u8 *)ptrs[faila];
+        ptrs[faila] = (void *)raid6_empty_zero_page;
+        ptrs[disks-1] = dq;
+        raid6_call.gen_syndrome(disks, bytes, ptrs);
+        /* Restore pointer table */
+        ptrs[faila]   = dq;
+        ptrs[disks-1] = q;
+        /* Now, pick the proper data tables */
+        qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+        /* Now do it... */
+        while ( bytes-- ) {
+                *p++ ^= *dq = qmul[*q ^ *dq];
+                q++; dq++;
+        }
+}
+EXPORT_SYMBOL_GPL(raid6_datap_recov);
+#ifndef __KERNEL__
+/* Testing only */
+/* Recover two failed blocks. */
+void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
+{
+        if ( faila > failb ) {
+                int tmp = faila;
+                faila = failb;
+                failb = tmp;
+        }
+        if ( failb == disks-1 ) {
+                if ( faila == disks-2 ) {
+                        /* P+Q failure.  Just rebuild the syndrome. */
+                        raid6_call.gen_syndrome(disks, bytes, ptrs);
+                } else {
+                        /* data+Q failure.  Reconstruct data from P,
+                           then rebuild syndrome. */
+                        /* NOT IMPLEMENTED - equivalent to RAID-5 */
+                }
+        } else {
+                if ( failb == disks-2 ) {
+                        /* data+P failure. */
+                        raid6_datap_recov(disks, bytes, faila, ptrs);
+                } else {
+                        /* data+data failure. */
+                        raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+                }
+        }
+}
+#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
new file mode 100644
index 00000000000..10dd91948c0
--- /dev/null
+++ b/lib/raid6/sse1.c
@@ -0,0 +1,162 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6/sse1.c
+ *
+ * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
+ *
+ * This is really an MMX implementation, but it requires SSE-1 or
+ * AMD MMXEXT for prefetch support and a few other features.  The
+ * support for nontemporal memory accesses is enough to make this
+ * worthwhile as a separate implementation.
+ */
+#if defined(__i386__) && !defined(__arch_um__)
+#include <linux/raid/pq.h>
+#include "x86.h"
+/* Defined in raid6/mmx.c */
+extern const struct raid6_mmx_constants {
+        u64 x1d;
+} raid6_mmx_constants;
+static int raid6_have_sse1_or_mmxext(void)
+{
+        /* Not really boot_cpu but "all_cpus" */
+        return boot_cpu_has(X86_FEATURE_MMX) &&
+                (boot_cpu_has(X86_FEATURE_XMM) ||
+                 boot_cpu_has(X86_FEATURE_MMXEXT));
+}
+/*
+ * Plain SSE1 implementation
+ */
+static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+        asm volatile("pxor %mm5,%mm5"); /* Zero temp */
+        for ( d = 0 ; d < bytes ; d += 8 ) {
+                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+                asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+                asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+                asm volatile("movq %mm2,%mm4"); /* Q[0] */
+                asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
+                for ( z = z0-2 ; z >= 0 ; z-- ) {
+                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+                        asm volatile("pcmpgtb %mm4,%mm5");
+                        asm volatile("paddb %mm4,%mm4");
+                        asm volatile("pand %mm0,%mm5");
+                        asm volatile("pxor %mm5,%mm4");
+                        asm volatile("pxor %mm5,%mm5");
+                        asm volatile("pxor %mm6,%mm2");
+                        asm volatile("pxor %mm6,%mm4");
+                        asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
+                }
+                asm volatile("pcmpgtb %mm4,%mm5");
+                asm volatile("paddb %mm4,%mm4");
+                asm volatile("pand %mm0,%mm5");
+                asm volatile("pxor %mm5,%mm4");
+                asm volatile("pxor %mm5,%mm5");
+                asm volatile("pxor %mm6,%mm2");
+                asm volatile("pxor %mm6,%mm4");
+                asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
+                asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_sse1x1 = {
+        raid6_sse11_gen_syndrome,
+        raid6_have_sse1_or_mmxext,
+        "sse1x1",
+        1                       /* Has cache hints */
+};
+/*
+ * Unrolled-by-2 SSE1 implementation
+ */
+static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+        asm volatile("pxor %mm5,%mm5"); /* Zero temp */
+        asm volatile("pxor %mm7,%mm7"); /* Zero temp */
+        /* We uniformly assume a single prefetch covers at least 16 bytes */
+        for ( d = 0 ; d < bytes ; d += 16 ) {
+                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+                asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+                asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
+                asm volatile("movq %mm2,%mm4"); /* Q[0] */
+                asm volatile("movq %mm3,%mm6"); /* Q[1] */
+                for ( z = z0-1 ; z >= 0 ; z-- ) {
+                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+                        asm volatile("pcmpgtb %mm4,%mm5");
+                        asm volatile("pcmpgtb %mm6,%mm7");
+                        asm volatile("paddb %mm4,%mm4");
+                        asm volatile("paddb %mm6,%mm6");
+                        asm volatile("pand %mm0,%mm5");
+                        asm volatile("pand %mm0,%mm7");
+                        asm volatile("pxor %mm5,%mm4");
+                        asm volatile("pxor %mm7,%mm6");
+                        asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
+                        asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
+                        asm volatile("pxor %mm5,%mm2");
+                        asm volatile("pxor %mm7,%mm3");
+                        asm volatile("pxor %mm5,%mm4");
+                        asm volatile("pxor %mm7,%mm6");
+                        asm volatile("pxor %mm5,%mm5");
+                        asm volatile("pxor %mm7,%mm7");
+                }
+                asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
+                asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
+                asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
+                asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
+        }
+        asm volatile("sfence" : :: "memory");
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_sse1x2 = {
+        raid6_sse12_gen_syndrome,
+        raid6_have_sse1_or_mmxext,
+        "sse1x2",
+        1                       /* Has cache hints */
+};
+#endif
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
new file mode 100644
index 00000000000..bc2d57daa58
--- /dev/null
+++ b/lib/raid6/sse2.c
@@ -0,0 +1,262 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6/sse2.c
+ *
+ * SSE-2 implementation of RAID-6 syndrome functions
+ *
+ */
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+#include <linux/raid/pq.h>
+#include "x86.h"
+static const struct raid6_sse_constants {
+        u64 x1d[2];
+} raid6_sse_constants  __attribute__((aligned(16))) = {
+        { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
+};
+static int raid6_have_sse2(void)
+{
+        /* Not really boot_cpu but "all_cpus" */
+        return boot_cpu_has(X86_FEATURE_MMX) &&
+                boot_cpu_has(X86_FEATURE_FXSR) &&
+                boot_cpu_has(X86_FEATURE_XMM) &&
+                boot_cpu_has(X86_FEATURE_XMM2);
+}
+/*
+ * Plain SSE2 implementation
+ */
+static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+        asm volatile("pxor %xmm5,%xmm5");       /* Zero temp */
+        for ( d = 0 ; d < bytes ; d += 16 ) {
+                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+                asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
+                asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+                asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
+                asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
+                for ( z = z0-2 ; z >= 0 ; z-- ) {
+                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+                        asm volatile("pcmpgtb %xmm4,%xmm5");
+                        asm volatile("paddb %xmm4,%xmm4");
+                        asm volatile("pand %xmm0,%xmm5");
+                        asm volatile("pxor %xmm5,%xmm4");
+                        asm volatile("pxor %xmm5,%xmm5");
+                        asm volatile("pxor %xmm6,%xmm2");
+                        asm volatile("pxor %xmm6,%xmm4");
+                        asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
+                }
+                asm volatile("pcmpgtb %xmm4,%xmm5");
+                asm volatile("paddb %xmm4,%xmm4");
+                asm volatile("pand %xmm0,%xmm5");
+                asm volatile("pxor %xmm5,%xmm4");
+                asm volatile("pxor %xmm5,%xmm5");
+                asm volatile("pxor %xmm6,%xmm2");
+                asm volatile("pxor %xmm6,%xmm4");
+                asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+                asm volatile("pxor %xmm2,%xmm2");
+                asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+                asm volatile("pxor %xmm4,%xmm4");
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_sse2x1 = {
+        raid6_sse21_gen_syndrome,
+        raid6_have_sse2,
+        "sse2x1",
+        1                       /* Has cache hints */
+};
+/*
+ * Unrolled-by-2 SSE2 implementation
+ */
+static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+        asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
+        asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
+        /* We uniformly assume a single prefetch covers at least 32 bytes */
+        for ( d = 0 ; d < bytes ; d += 32 ) {
+                asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+                asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
+                asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
+                asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
+                asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
+                for ( z = z0-1 ; z >= 0 ; z-- ) {
+                        asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+                        asm volatile("pcmpgtb %xmm4,%xmm5");
+                        asm volatile("pcmpgtb %xmm6,%xmm7");
+                        asm volatile("paddb %xmm4,%xmm4");
+                        asm volatile("paddb %xmm6,%xmm6");
+                        asm volatile("pand %xmm0,%xmm5");
+                        asm volatile("pand %xmm0,%xmm7");
+                        asm volatile("pxor %xmm5,%xmm4");
+                        asm volatile("pxor %xmm7,%xmm6");
+                        asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
+                        asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
+                        asm volatile("pxor %xmm5,%xmm2");
+                        asm volatile("pxor %xmm7,%xmm3");
+                        asm volatile("pxor %xmm5,%xmm4");
+                        asm volatile("pxor %xmm7,%xmm6");
+                        asm volatile("pxor %xmm5,%xmm5");
+                        asm volatile("pxor %xmm7,%xmm7");
+                }
+                asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+                asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+                asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+                asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_sse2x2 = {
+        raid6_sse22_gen_syndrome,
+        raid6_have_sse2,
+        "sse2x2",
+        1                       /* Has cache hints */
+};
+#endif
+#if defined(__x86_64__) && !defined(__arch_um__)
+/*
+ * Unrolled-by-4 SSE2 implementation
+ */
+static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+        u8 **dptr = (u8 **)ptrs;
+        u8 *p, *q;
+        int d, z, z0;
+        z0 = disks - 3;         /* Highest data disk */
+        p = dptr[z0+1];         /* XOR parity */
+        q = dptr[z0+2];         /* RS syndrome */
+        kernel_fpu_begin();
+        asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
+        asm volatile("pxor %xmm2,%xmm2");       /* P[0] */
+        asm volatile("pxor %xmm3,%xmm3");       /* P[1] */
+        asm volatile("pxor %xmm4,%xmm4");       /* Q[0] */
+        asm volatile("pxor %xmm5,%xmm5");       /* Zero temp */
+        asm volatile("pxor %xmm6,%xmm6");       /* Q[1] */
+        asm volatile("pxor %xmm7,%xmm7");       /* Zero temp */
+        asm volatile("pxor %xmm10,%xmm10");     /* P[2] */
+        asm volatile("pxor %xmm11,%xmm11");     /* P[3] */
+        asm volatile("pxor %xmm12,%xmm12");     /* Q[2] */
+        asm volatile("pxor %xmm13,%xmm13");     /* Zero temp */
+        asm volatile("pxor %xmm14,%xmm14");     /* Q[3] */
+        asm volatile("pxor %xmm15,%xmm15");     /* Zero temp */
+        for ( d = 0 ; d < bytes ; d += 64 ) {
+                for ( z = z0 ; z >= 0 ; z-- ) {
+                        /* The second prefetch seems to improve performance... */
+                        asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
+                        asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
+                        asm volatile("pcmpgtb %xmm4,%xmm5");
+                        asm volatile("pcmpgtb %xmm6,%xmm7");
+                        asm volatile("pcmpgtb %xmm12,%xmm13");
+                        asm volatile("pcmpgtb %xmm14,%xmm15");
+                        asm volatile("paddb %xmm4,%xmm4");
+                        asm volatile("paddb %xmm6,%xmm6");
+                        asm volatile("paddb %xmm12,%xmm12");
+                        asm volatile("paddb %xmm14,%xmm14");
+                        asm volatile("pand %xmm0,%xmm5");
+                        asm volatile("pand %xmm0,%xmm7");
+                        asm volatile("pand %xmm0,%xmm13");
+                        asm volatile("pand %xmm0,%xmm15");
+                        asm volatile("pxor %xmm5,%xmm4");
+                        asm volatile("pxor %xmm7,%xmm6");
+                        asm volatile("pxor %xmm13,%xmm12");
+                        asm volatile("pxor %xmm15,%xmm14");
+                        asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+                        asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+                        asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
+                        asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
+                        asm volatile("pxor %xmm5,%xmm2");
+                        asm volatile("pxor %xmm7,%xmm3");
+                        asm volatile("pxor %xmm13,%xmm10");
+                        asm volatile("pxor %xmm15,%xmm11");
+                        asm volatile("pxor %xmm5,%xmm4");
+                        asm volatile("pxor %xmm7,%xmm6");
+                        asm volatile("pxor %xmm13,%xmm12");
+                        asm volatile("pxor %xmm15,%xmm14");
+                        asm volatile("pxor %xmm5,%xmm5");
+                        asm volatile("pxor %xmm7,%xmm7");
+                        asm volatile("pxor %xmm13,%xmm13");
+                        asm volatile("pxor %xmm15,%xmm15");
+                }
+                asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+                asm volatile("pxor %xmm2,%xmm2");
+                asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+                asm volatile("pxor %xmm3,%xmm3");
+                asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
+                asm volatile("pxor %xmm10,%xmm10");
+                asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
+                asm volatile("pxor %xmm11,%xmm11");
+                asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+                asm volatile("pxor %xmm4,%xmm4");
+                asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+                asm volatile("pxor %xmm6,%xmm6");
+                asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
+                asm volatile("pxor %xmm12,%xmm12");
+                asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
+                asm volatile("pxor %xmm14,%xmm14");
+        }
+        asm volatile("sfence" : : : "memory");
+        kernel_fpu_end();
+}
+const struct raid6_calls raid6_sse2x4 = {
+        raid6_sse24_gen_syndrome,
+        raid6_have_sse2,
+        "sse2x4",
+        1                       /* Has cache hints */
+};
+#endif
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
new file mode 100644
index 00000000000..aa651697b6d
--- /dev/null
+++ b/lib/raid6/test/Makefile
@@ -0,0 +1,72 @@
+#
+# This is a simple Makefile to test some of the RAID-6 code
+# from userspace.
+#
+CC       = gcc
+OPTFLAGS = -O2                  # Adjust as desired
+CFLAGS   = -I.. -I ../../../include -g $(OPTFLAGS)
+LD       = ld
+AWK      = awk -f
+AR       = ar
+RANLIB   = ranlib
+.c.o:
+        $(CC) $(CFLAGS) -c -o $@ $<
+%.c: ../%.c
+        cp -f $< $@
+%.uc: ../%.uc
+        cp -f $< $@
+all:    raid6.a raid6test
+raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
+         altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \
+         tables.o
+         rm -f $@
+         $(AR) cq $@ $^
+         $(RANLIB) $@
+raid6test: test.c raid6.a
+        $(CC) $(CFLAGS) -o raid6test $^
+altivec1.c: altivec.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
+altivec2.c: altivec.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=2 < altivec.uc > $@
+altivec4.c: altivec.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=4 < altivec.uc > $@
+altivec8.c: altivec.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
+int1.c: int.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=1 < int.uc > $@
+int2.c: int.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=2 < int.uc > $@
+int4.c: int.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=4 < int.uc > $@
+int8.c: int.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=8 < int.uc > $@
+int16.c: int.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=16 < int.uc > $@
+int32.c: int.uc ../unroll.awk
+        $(AWK) ../unroll.awk -vN=32 < int.uc > $@
+tables.c: mktables
+        ./mktables > tables.c
+clean:
+        rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
+spotless: clean
+        rm -f *~
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
new file mode 100644
index 00000000000..7a930318b17
--- /dev/null
+++ b/lib/raid6/test/test.c
@@ -0,0 +1,124 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6test.c
+ *
+ * Test RAID-6 recovery with various algorithms
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/raid/pq.h>
+#define NDISKS          16      /* Including P and Q */
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+struct raid6_calls raid6_call;
+char *dataptrs[NDISKS];
+char data[NDISKS][PAGE_SIZE];
+char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
+static void makedata(void)
+{
+        int i, j;
+        for (i = 0; i < NDISKS; i++) {
+                for (j = 0; j < PAGE_SIZE; j++)
+                        data[i][j] = rand();
+                dataptrs[i] = data[i];
+        }
+}
+static char disk_type(int d)
+{
+        switch (d) {
+        case NDISKS-2:
+                return 'P';
+        case NDISKS-1:
+                return 'Q';
+        default:
+                return 'D';
+        }
+}
+static int test_disks(int i, int j)
+{
+        int erra, errb;
+        memset(recovi, 0xf0, PAGE_SIZE);
+        memset(recovj, 0xba, PAGE_SIZE);
+        dataptrs[i] = recovi;
+        dataptrs[j] = recovj;
+        raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
+        erra = memcmp(data[i], recovi, PAGE_SIZE);
+        errb = memcmp(data[j], recovj, PAGE_SIZE);
+        if (i < NDISKS-2 && j == NDISKS-1) {
+                /* We don't implement the DQ failure scenario, since it's
+                   equivalent to a RAID-5 failure (XOR, then recompute Q) */
+                erra = errb = 0;
+        } else {
+                printf("algo=%-8s  faila=%3d(%c)  failb=%3d(%c)  %s\n",
+                       raid6_call.name,
+                       i, disk_type(i),
+                       j, disk_type(j),
+                       (!erra && !errb) ? "OK" :
+                       !erra ? "ERRB" :
+                       !errb ? "ERRA" : "ERRAB");
+        }
+        dataptrs[i] = data[i];
+        dataptrs[j] = data[j];
+        return erra || errb;
+}
+int main(int argc, char *argv[])
+{
+        const struct raid6_calls *const *algo;
+        int i, j;
+        int err = 0;
+        makedata();
+        for (algo = raid6_algos; *algo; algo++) {
+                if (!(*algo)->valid || (*algo)->valid()) {
+                        raid6_call = **algo;
+                        /* Nuke syndromes */
+                        memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+                        /* Generate assumed good syndrome */
+                        raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+                                                (void **)&dataptrs);
+                        for (i = 0; i < NDISKS-1; i++)
+                                for (j = i+1; j < NDISKS; j++)
+                                        err += test_disks(i, j);
+                }
+                printf("\n");
+        }
+        printf("\n");
+        /* Pick the best algorithm test */
+        raid6_select_algo();
+        if (err)
+                printf("\n*** ERRORS FOUND ***\n");
+        return err;
+}
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk
new file mode 100644
index 00000000000..c6aa03631df
--- /dev/null
+++ b/lib/raid6/unroll.awk
@@ -0,0 +1,20 @@
+# This filter requires one command line option of form -vN=n
+# where n must be a decimal number.
+#
+# Repeat each input line containing $$ n times, replacing $$ with 0...n-1.
+# Replace each $# with n, and each $* with a single $.
+BEGIN {
+        n = N + 0
+}
+{
+        if (/\$\$/) { rep = n } else { rep = 1 }
+        for (i = 0; i < rep; ++i) {
+                tmp = $0
+                gsub(/\$\$/, i, tmp)
+                gsub(/\$\#/, n, tmp)
+                gsub(/\$\*/, "$", tmp)
+                print tmp
+        }
+}
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
new file mode 100644
index 00000000000..cb2a8c91c88
--- /dev/null
+++ b/lib/raid6/x86.h
@@ -0,0 +1,61 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+/*
+ * raid6/x86.h
+ *
+ * Definitions common to x86 and x86-64 RAID-6 code only
+ */
+#ifndef LINUX_RAID_RAID6X86_H
+#define LINUX_RAID_RAID6X86_H
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+#ifdef __KERNEL__ /* Real code */
+#include <asm/i387.h>
+#else /* Dummy code for user space testing */
+static inline void kernel_fpu_begin(void)
+{
+}
+static inline void kernel_fpu_end(void)
+{
+}
+#define X86_FEATURE_MMX         (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR        (0*32+24) /* FXSAVE and FXRSTOR instructions
+                                           * (fast save and restore) */
+#define X86_FEATURE_XMM         (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2        (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_MMXEXT      (1*32+22) /* AMD MMX extensions */
+/* Should work well enough on modern CPUs for testing */
+static inline int boot_cpu_has(int flag)
+{
+        u32 eax = (flag >> 5) ? 0x80000001 : 1;
+        u32 edx;
+        asm volatile("cpuid"
+                     : "+a" (eax), "=d" (edx)
+                     : : "ecx", "ebx");
+        return (edx >> (flag & 31)) & 1;
+}
+#endif /* ndef __KERNEL__ */
+#endif
+#endif
diff --git a/lib/random32.c b/lib/random32.c
index 217d5c4b666..fc3545a3277 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -39,13 +39,16 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
-struct rnd_state {
-        u32 s1, s2, s3;
-};
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
-static u32 __random32(struct rnd_state *state)
+/**
+ *      prandom32 - seeded pseudo-random number generator.
+ *      @state: pointer to state structure holding seeded state.
+ *
+ *      This is used for pseudo-randomness with no outside seeding.
+ *      For more random results, use random32().
+ */
+u32 prandom32(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
@@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state)
        return (state->s1 ^ state->s2 ^ state->s3);
 }
+EXPORT_SYMBOL(prandom32);
-/*
- * Handle minimum values for seeds
- */
-static inline u32 __seed(u32 x, u32 m)
-{
-        return (x < m) ? x + m : x;
-}
 /**
 *      random32 - pseudo random number generator
@@ -75,7 +71,7 @@ u32 random32(void)
 {
        unsigned long r;
        struct rnd_state *state = &get_cpu_var(net_rand_state);
-        r = __random32(state);
+        r = prandom32(state);
        put_cpu_var(state);
        return r;
 }
@@ -118,12 +114,12 @@ static int __init random32_init(void)
                state->s3 = __seed(LCG(state->s2), 15);
                /* "warm it up" */
-                __random32(state);
+                prandom32(state);
-                __random32(state);
+                prandom32(state);
-                __random32(state);
+                prandom32(state);
-                __random32(state);
+                prandom32(state);
-                __random32(state);
+                prandom32(state);
-                __random32(state);
+                prandom32(state);
        }
        return 0;
 }
@@ -131,7 +127,7 @@ core_initcall(random32_init);
 /*
 *      Generate better values after random number generator
- *      is fully initalized.
+ *      is fully initialized.
 */
 static int __init random32_reseed(void)
 {
@@ -147,7 +143,7 @@ static int __init random32_reseed(void)
                state->s3 = __seed(seeds[2], 15);
                /* mix it in */
-                __random32(state);
+                prandom32(state);
        }
        return 0;
 }
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 09f5ce1810d..027a03f4c56 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -16,9 +16,14 @@
 /*
 * __ratelimit - rate limiting
 * @rs: ratelimit_state data
+ * @func: name of calling function
 *
- * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * This enforces a rate limit: not more than @rs->burst callbacks
- * in every @rs->ratelimit_jiffies
+ * in every @rs->interval
+ *
+ * RETURNS:
+ * 0 means callbacks will be suppressed.
+ * 1 means go ahead and do it.
 */
 int ___ratelimit(struct ratelimit_state *rs, const char *func)
 {
@@ -35,7 +40,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
         * the entity that is holding the lock already:
         */
        if (!spin_trylock_irqsave(&rs->lock, flags))
-                return 1;
+                return 0;
        if (!rs->begin)
                rs->begin = jiffies;
diff --git a/lib/rbtree.c b/lib/rbtree.c
index e2aa3be2985..4693f79195d 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -283,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 }
 EXPORT_SYMBOL(rb_erase);
+static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
+{
+        struct rb_node *parent;
+up:
+        func(node, data);
+        parent = rb_parent(node);
+        if (!parent)
+                return;
+        if (node == parent->rb_left && parent->rb_right)
+                func(parent->rb_right, data);
+        else if (parent->rb_left)
+                func(parent->rb_left, data);
+        node = parent;
+        goto up;
+}
+/*
+ * after inserting @node into the tree, update the tree to account for
+ * both the new entry and any damage done by rebalance
+ */
+void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
+{
+        if (node->rb_left)
+                node = node->rb_left;
+        else if (node->rb_right)
+                node = node->rb_right;
+        rb_augment_path(node, func, data);
+}
+/*
+ * before removing the node, find the deepest node on the rebalance path
+ * that will still be there after @node gets removed
+ */
+struct rb_node *rb_augment_erase_begin(struct rb_node *node)
+{
+        struct rb_node *deepest;
+        if (!node->rb_right && !node->rb_left)
+                deepest = rb_parent(node);
+        else if (!node->rb_right)
+                deepest = node->rb_left;
+        else if (!node->rb_left)
+                deepest = node->rb_right;
+        else {
+                deepest = rb_next(node);
+                if (deepest->rb_right)
+                        deepest = deepest->rb_right;
+                else if (rb_parent(deepest) != node)
+                        deepest = rb_parent(deepest);
+        }
+        return deepest;
+}
+/*
+ * after removal, update the tree to account for the removed entry
+ * and any rebalance damage.
+ */
+void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
+{
+        if (node)
+                rb_augment_path(node, func, data);
+}
 /*
 * This function returns the first node (in sort order) of the tree.
 */
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index ccf95bff798..ffc9fc7f3b0 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -143,13 +143,14 @@ void __sched __down_read(struct rw_semaphore *sem)
 {
        struct rwsem_waiter waiter;
        struct task_struct *tsk;
+        unsigned long flags;
-        spin_lock_irq(&sem->wait_lock);
+        spin_lock_irqsave(&sem->wait_lock, flags);
        if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
                /* granted */
                sem->activity++;
-                spin_unlock_irq(&sem->wait_lock);
+                spin_unlock_irqrestore(&sem->wait_lock, flags);
                goto out;
        }
@@ -164,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem)
        list_add_tail(&waiter.list, &sem->wait_list);
        /* we don't need to touch the semaphore struct anymore */
-        spin_unlock_irq(&sem->wait_lock);
+        spin_unlock_irqrestore(&sem->wait_lock, flags);
        /* wait to be given the lock */
        for (;;) {
@@ -209,13 +210,14 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
        struct rwsem_waiter waiter;
        struct task_struct *tsk;
+        unsigned long flags;
-        spin_lock_irq(&sem->wait_lock);
+        spin_lock_irqsave(&sem->wait_lock, flags);
        if (sem->activity == 0 && list_empty(&sem->wait_list)) {
                /* granted */
                sem->activity = -1;
-                spin_unlock_irq(&sem->wait_lock);
+                spin_unlock_irqrestore(&sem->wait_lock, flags);
                goto out;
        }
@@ -230,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
        list_add_tail(&waiter.list, &sem->wait_list);
        /* we don't need to touch the semaphore struct anymore */
-        spin_unlock_irq(&sem->wait_lock);
+        spin_unlock_irqrestore(&sem->wait_lock, flags);
        /* wait to be given the lock */
        for (;;) {
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 3e3365e5665..f236d7cd5cf 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -36,45 +36,56 @@ struct rwsem_waiter {
 #define RWSEM_WAITING_FOR_WRITE 0x00000002
 };
+/* Wake types for __rwsem_do_wake().  Note that RWSEM_WAKE_NO_ACTIVE and
+ * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
+ * since the rwsem value was observed.
+ */
+#define RWSEM_WAKE_ANY        0 /* Wake whatever's at head of wait list */
+#define RWSEM_WAKE_NO_ACTIVE  1 /* rwsem was observed with no active thread */
+#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
 /*
 * handle the lock release when processes blocked on it that can now run
 * - if we come here from up_xxxx(), then:
 *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
 *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
- *   - there must be someone on the queue
+ * - there must be someone on the queue
 * - the spinlock must be held by the caller
 * - woken process blocks are discarded from the list after having task zeroed
 * - writers are only woken if downgrading is false
 */
-static inline struct rw_semaphore *
+static struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
+__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
 {
        struct rwsem_waiter *waiter;
        struct task_struct *tsk;
        struct list_head *next;
-        signed long oldcount, woken, loop;
+        signed long oldcount, woken, loop, adjustment;
-        if (downgrading)
-                goto dont_wake_writers;
-        /* if we came through an up_xxxx() call, we only only wake someone up
-         * if we can transition the active part of the count from 0 -> 1
-         */
- try_again:
-        oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem)
-                                                - RWSEM_ACTIVE_BIAS;
-        if (oldcount & RWSEM_ACTIVE_MASK)
-                goto undo;
        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
-        /* try to grant a single write lock if there's a writer at the front
-         * of the queue - note we leave the 'active part' of the count
-         * incremented by 1 and the waiting part incremented by 0x00010000
-         */
        if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
                goto readers_only;
+        if (wake_type == RWSEM_WAKE_READ_OWNED)
+                /* Another active reader was observed, so wakeup is not
+                 * likely to succeed. Save the atomic op.
+                 */
+                goto out;
+        /* There's a writer at the front of the queue - try to grant it the
+         * write lock.  However, we only wake this writer if we can transition
+         * the active part of the count from 0 -> 1
+         */
+        adjustment = RWSEM_ACTIVE_WRITE_BIAS;
+        if (waiter->list.next == &sem->wait_list)
+                adjustment -= RWSEM_WAITING_BIAS;
+ try_again_write:
+        oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+        if (oldcount & RWSEM_ACTIVE_MASK)
+                /* Someone grabbed the sem already */
+                goto undo_write;
        /* We must be careful not to touch 'waiter' after we set ->task = NULL.
         * It is an allocated on the waiter's stack and may become invalid at
         * any time after that point (due to a wakeup from another source).
@@ -87,18 +98,30 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
        put_task_struct(tsk);
        goto out;
-        /* don't want to wake any writers */
+ readers_only:
- dont_wake_writers:
+        /* If we come here from up_xxxx(), another thread might have reached
-        waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+         * rwsem_down_failed_common() before we acquired the spinlock and
-        if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
+         * woken up a waiter, making it now active.  We prefer to check for
+         * this first in order to not spend too much time with the spinlock
+         * held if we're not going to be able to wake up readers in the end.
+         *
+         * Note that we do not need to update the rwsem count: any writer
+         * trying to acquire rwsem will run rwsem_down_write_failed() due
+         * to the waiting threads and block trying to acquire the spinlock.
+         *
+         * We use a dummy atomic update in order to acquire the cache line
+         * exclusively since we expect to succeed and run the final rwsem
+         * count adjustment pretty soon.
+         */
+        if (wake_type == RWSEM_WAKE_ANY &&
+            rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
+                /* Someone grabbed the sem for write already */
                goto out;
-        /* grant an infinite number of read locks to the readers at the front
+        /* Grant an infinite number of read locks to the readers at the front
-         * of the queue
+         * of the queue.  Note we increment the 'active part' of the count by
-         * - note we increment the 'active part' of the count by the number of
+         * the number of readers before waking any processes up.
-         *   readers before waking any processes up
         */
- readers_only:
        woken = 0;
        do {
                woken++;
@@ -111,16 +134,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
        } while (waiter->flags & RWSEM_WAITING_FOR_READ);
-        loop = woken;
+        adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
-        woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS;
+        if (waiter->flags & RWSEM_WAITING_FOR_READ)
-        if (!downgrading)
+                /* hit end of list above */
-                /* we'd already done one increment earlier */
+                adjustment -= RWSEM_WAITING_BIAS;
-                woken -= RWSEM_ACTIVE_BIAS;
-        rwsem_atomic_add(woken, sem);
+        rwsem_atomic_add(adjustment, sem);
        next = sem->wait_list.next;
-        for (; loop > 0; loop--) {
+        for (loop = woken; loop > 0; loop--) {
                waiter = list_entry(next, struct rwsem_waiter, list);
                next = waiter->list.next;
                tsk = waiter->task;
@@ -136,11 +158,12 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
 out:
        return sem;
-        /* undo the change to count, but check for a transition 1->0 */
+        /* undo the change to the active count, but check for a transition
- undo:
+         * 1->0 */
-        if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0)
+ undo_write:
+        if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
                goto out;
-        goto try_again;
+        goto try_again_write;
 }
 /*
@@ -148,8 +171,9 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
 */
 static struct rw_semaphore __sched *
 rwsem_down_failed_common(struct rw_semaphore *sem,
-                        struct rwsem_waiter *waiter, signed long adjustment)
+                         unsigned int flags, signed long adjustment)
 {
+        struct rwsem_waiter waiter;
        struct task_struct *tsk = current;
        signed long count;
@@ -157,23 +181,34 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
        /* set up my own style of waitqueue */
        spin_lock_irq(&sem->wait_lock);
-        waiter->task = tsk;
+        waiter.task = tsk;
+        waiter.flags = flags;
        get_task_struct(tsk);
-        list_add_tail(&waiter->list, &sem->wait_list);
+        if (list_empty(&sem->wait_list))
+                adjustment += RWSEM_WAITING_BIAS;
+        list_add_tail(&waiter.list, &sem->wait_list);
-        /* we're now waiting on the lock, but no longer actively read-locking */
+        /* we're now waiting on the lock, but no longer actively locking */
        count = rwsem_atomic_update(adjustment, sem);
-        /* if there are no active locks, wake the front queued process(es) up */
+        /* If there are no active locks, wake the front queued process(es) up.
-        if (!(count & RWSEM_ACTIVE_MASK))
+         *
-                sem = __rwsem_do_wake(sem, 0);
+         * Alternatively, if we're called from a failed down_write(), there
+         * were already threads queued before us and there are no active
+         * writers, the lock must be read owned; so we try to wake any read
+         * locks that were queued ahead of us. */
+        if (count == RWSEM_WAITING_BIAS)
+                sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
+        else if (count > RWSEM_WAITING_BIAS &&
+                 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
+                sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
        spin_unlock_irq(&sem->wait_lock);
        /* wait to be given the lock */
        for (;;) {
-                if (!waiter->task)
+                if (!waiter.task)
                        break;
                schedule();
                set_task_state(tsk, TASK_UNINTERRUPTIBLE);
@@ -190,12 +225,8 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
 asmregparm struct rw_semaphore __sched *
 rwsem_down_read_failed(struct rw_semaphore *sem)
 {
-        struct rwsem_waiter waiter;
+        return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
+                                        -RWSEM_ACTIVE_READ_BIAS);
-        waiter.flags = RWSEM_WAITING_FOR_READ;
-        rwsem_down_failed_common(sem, &waiter,
-                                RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
-        return sem;
 }
 /*
@@ -204,12 +235,8 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
 asmregparm struct rw_semaphore __sched *
 rwsem_down_write_failed(struct rw_semaphore *sem)
 {
-        struct rwsem_waiter waiter;
+        return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
+                                        -RWSEM_ACTIVE_WRITE_BIAS);
-        waiter.flags = RWSEM_WAITING_FOR_WRITE;
-        rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
-        return sem;
 }
 /*
@@ -224,7 +251,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
        /* do nothing if list empty */
        if (!list_empty(&sem->wait_list))
-                sem = __rwsem_do_wake(sem, 0);
+                sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
        spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -244,7 +271,7 @@ asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
        /* do nothing if list empty */
        if (!list_empty(&sem->wait_list))
-                sem = __rwsem_do_wake(sem, 1);
+                sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
        spin_unlock_irqrestore(&sem->wait_lock, flags);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 0d475d8167b..4ceb05d772a 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -7,8 +7,10 @@
 * Version 2. See the file COPYING for more details.
 */
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/scatterlist.h>
 #include <linux/highmem.h>
+#include <linux/kmemleak.h>
 /**
 * sg_next - return the next scatterlist entry in a list
@@ -114,17 +116,29 @@ EXPORT_SYMBOL(sg_init_one);
 */
 static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
 {
-        if (nents == SG_MAX_SINGLE_ALLOC)
+        if (nents == SG_MAX_SINGLE_ALLOC) {
-                return (struct scatterlist *) __get_free_page(gfp_mask);
+                /*
-        else
+                 * Kmemleak doesn't track page allocations as they are not
+                 * commonly used (in a raw form) for kernel data structures.
+                 * As we chain together a list of pages and then a normal
+                 * kmalloc (tracked by kmemleak), in order to for that last
+                 * allocation not to become decoupled (and thus a
+                 * false-positive) we need to inform kmemleak of all the
+                 * intermediate allocations.
+                 */
+                void *ptr = (void *) __get_free_page(gfp_mask);
+                kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
+                return ptr;
+        } else
                return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
 }
 static void sg_kfree(struct scatterlist *sg, unsigned int nents)
 {
-        if (nents == SG_MAX_SINGLE_ALLOC)
+        if (nents == SG_MAX_SINGLE_ALLOC) {
+                kmemleak_free(sg);
                free_page((unsigned long) sg);
-        else
+        } else
                kfree(sg);
 }
@@ -234,8 +248,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
                left -= sg_size;
                sg = alloc_fn(alloc_size, gfp_mask);
-                if (unlikely(!sg))
+                if (unlikely(!sg)) {
-                        return -ENOMEM;
+                        /*
+                         * Adjust entry count to reflect that the last
+                         * entry of the previous table won't be used for
+                         * linkage.  Without this, sg_kfree() may get
+                         * confused.
+                         */
+                        if (prv)
+                                table->nents = ++table->orig_nents;
+                        return -ENOMEM;
+                }
                sg_init_table(sg, alloc_size);
                table->nents = table->orig_nents += sg_size;
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 238e72a18ce..fdc77c82f92 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -15,7 +15,7 @@ void show_mem(void)
        unsigned long total = 0, reserved = 0, shared = 0,
                nonshared = 0, highmem = 0;
-        printk(KERN_INFO "Mem-Info:\n");
+        printk("Mem-Info:\n");
        show_free_areas();
        for_each_online_pgdat(pgdat) {
@@ -49,15 +49,15 @@ void show_mem(void)
                pgdat_resize_unlock(pgdat, &flags);
        }
-        printk(KERN_INFO "%lu pages RAM\n", total);
+        printk("%lu pages RAM\n", total);
 #ifdef CONFIG_HIGHMEM
-        printk(KERN_INFO "%lu pages HighMem\n", highmem);
+        printk("%lu pages HighMem\n", highmem);
 #endif
-        printk(KERN_INFO "%lu pages reserved\n", reserved);
+        printk("%lu pages reserved\n", reserved);
-        printk(KERN_INFO "%lu pages shared\n", shared);
+        printk("%lu pages shared\n", shared);
-        printk(KERN_INFO "%lu pages non-shared\n", nonshared);
+        printk("%lu pages non-shared\n", nonshared);
 #ifdef CONFIG_QUICKLIST
-        printk(KERN_INFO "%lu pages in pagetable cache\n",
+        printk("%lu pages in pagetable cache\n",
                quicklist_total_size());
 #endif
 }
diff --git a/lib/string.c b/lib/string.c
index a1cdcfcc42d..f71bead1be3 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -36,25 +36,21 @@ int strnicmp(const char *s1, const char *s2, size_t len)
        /* Yes, Virginia, it had better be unsigned */
        unsigned char c1, c2;
-        c1 = c2 = 0;
+        if (!len)
-        if (len) {
+                return 0;
-                do {
-                        c1 = *s1;
+        do {
-                        c2 = *s2;
+                c1 = *s1++;
-                        s1++;
+                c2 = *s2++;
-                        s2++;
+                if (!c1 || !c2)
-                        if (!c1)
+                        break;
-                                break;
+                if (c1 == c2)
-                        if (!c2)
+                        continue;
-                                break;
+                c1 = tolower(c1);
-                        if (c1 == c2)
+                c2 = tolower(c2);
-                                continue;
+                if (c1 != c2)
-                        c1 = tolower(c1);
+                        break;
-                        c2 = tolower(c2);
+        } while (--len);
-                        if (c1 != c2)
-                                break;
-                } while (--len);
-        }
        return (int)c1 - (int)c2;
 }
 EXPORT_SYMBOL(strnicmp);
@@ -693,13 +689,13 @@ EXPORT_SYMBOL(strstr);
 */
 char *strnstr(const char *s1, const char *s2, size_t len)
 {
-        size_t l1 = len, l2;
+        size_t l2;
        l2 = strlen(s2);
        if (!l2)
                return (char *)s1;
-        while (l1 >= l2) {
+        while (len >= l2) {
-                l1--;
+                len--;
                if (!memcmp(s1, s2, l2))
                        return (char *)s1;
                s1++;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 437eedb5a53..c47bbe11b80 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/highmem.h>
+#include <linux/gfp.h>
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -49,25 +50,17 @@
 */
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-/*
- * Enumeration for sync targets
- */
-enum dma_sync_target {
-        SYNC_FOR_CPU = 0,
-        SYNC_FOR_DEVICE = 1,
-};
 int swiotlb_force;
 /*
- * Used to do a quick range check in unmap_single and
+ * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * sync_single_*, to see if the memory was in fact allocated by this
+ * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
 * API.
 */
 static char *io_tlb_start, *io_tlb_end;
 /*
- * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
+ * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
 * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
 */
 static unsigned long io_tlb_nslabs;
@@ -77,7 +70,7 @@ static unsigned long io_tlb_nslabs;
 */
 static unsigned long io_tlb_overflow = 32*1024;
-void *io_tlb_overflow_buffer;
+static void *io_tlb_overflow_buffer;
 /*
 * This is a free list describing the number of free entries available from
@@ -139,28 +132,14 @@ void swiotlb_print_info(void)
               (unsigned long long)pend);
 }
-/*
+void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void __init
-swiotlb_init_with_default_size(size_t default_size, int verbose)
 {
        unsigned long i, bytes;
-        if (!io_tlb_nslabs) {
+        bytes = nslabs << IO_TLB_SHIFT;
-                io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-                io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-        }
-        bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-        /*
+        io_tlb_nslabs = nslabs;
-         * Get IO TLB memory from the low pages
+        io_tlb_start = tlb;
-         */
-        io_tlb_start = alloc_bootmem_low_pages(bytes);
-        if (!io_tlb_start)
-                panic("Cannot allocate SWIOTLB buffer");
        io_tlb_end = io_tlb_start + bytes;
        /*
@@ -168,22 +147,48 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
         * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
         * between io_tlb_start and io_tlb_end.
         */
-        io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+        io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
        for (i = 0; i < io_tlb_nslabs; i++)
                io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
        io_tlb_index = 0;
-        io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
+        io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
        /*
         * Get the overflow emergency buffer
         */
-        io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+        io_tlb_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
        if (!io_tlb_overflow_buffer)
                panic("Cannot allocate SWIOTLB overflow buffer!\n");
        if (verbose)
                swiotlb_print_info();
 }
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init
+swiotlb_init_with_default_size(size_t default_size, int verbose)
+{
+        unsigned long bytes;
+        if (!io_tlb_nslabs) {
+                io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+                io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+        }
+        bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+        /*
+         * Get IO TLB memory from the low pages
+         */
+        io_tlb_start = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
+        if (!io_tlb_start)
+                panic("Cannot allocate SWIOTLB buffer");
+        swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
+}
 void __init
 swiotlb_init(int verbose)
 {
@@ -303,13 +308,13 @@ void __init swiotlb_free(void)
                           get_order(io_tlb_nslabs << IO_TLB_SHIFT));
        } else {
                free_bootmem_late(__pa(io_tlb_overflow_buffer),
-                                  io_tlb_overflow);
+                                  PAGE_ALIGN(io_tlb_overflow));
                free_bootmem_late(__pa(io_tlb_orig_addr),
-                                  io_tlb_nslabs * sizeof(phys_addr_t));
+                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
                free_bootmem_late(__pa(io_tlb_list),
-                                  io_tlb_nslabs * sizeof(int));
+                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
                free_bootmem_late(__pa(io_tlb_start),
-                                  io_tlb_nslabs << IO_TLB_SHIFT);
+                                  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
        }
 }
@@ -322,8 +327,8 @@ static int is_swiotlb_buffer(phys_addr_t paddr)
 /*
 * Bounce: copy the swiotlb buffer back to the original dma location
 */
-static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-                           enum dma_data_direction dir)
+                    enum dma_data_direction dir)
 {
        unsigned long pfn = PFN_DOWN(phys);
@@ -359,26 +364,25 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
                        memcpy(phys_to_virt(phys), dma_addr, size);
        }
 }
+EXPORT_SYMBOL_GPL(swiotlb_bounce);
-/*
+void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
- * Allocates bounce buffer and returns its kernel virtual address.
+                             phys_addr_t phys, size_t size,
- */
+                             enum dma_data_direction dir)
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
 {
        unsigned long flags;
        char *dma_addr;
        unsigned int nslots, stride, index, wrap;
        int i;
-        unsigned long start_dma_addr;
        unsigned long mask;
        unsigned long offset_slots;
        unsigned long max_slots;
        mask = dma_get_seg_boundary(hwdev);
-        start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start) & mask;
-        offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+        tbl_dma_addr &= mask;
+        offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
        /*
         * Carefully handle integer overflow which can occur when mask == ~0UL.
@@ -465,12 +469,27 @@ found:
        return dma_addr;
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+           enum dma_data_direction dir)
+{
+        dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+        return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
+}
 /*
 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
 */
-static void
+void
-do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
+                        enum dma_data_direction dir)
 {
        unsigned long flags;
        int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -508,10 +527,12 @@ do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
        }
        spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
-static void
+void
-sync_single(struct device *hwdev, char *dma_addr, size_t size,
+swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
-            int dir, int target)
+                        enum dma_data_direction dir,
+                        enum dma_sync_target target)
 {
        int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
        phys_addr_t phys = io_tlb_orig_addr[index];
@@ -535,6 +556,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size,
                BUG();
        }
 }
+EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
 void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
@@ -558,8 +580,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
        }
        if (!ret) {
                /*
-                 * We are either out of memory or the device can't DMA
+                 * We are either out of memory or the device can't DMA to
-                 * to GFP_DMA memory; fall back on map_single(), which
+                 * GFP_DMA memory; fall back on map_single(), which
                 * will grab memory from the lowest available address range.
                 */
                ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
@@ -577,7 +599,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
                       (unsigned long long)dev_addr);
                /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-                do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
+                swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
                return NULL;
        }
        *dma_handle = dev_addr;
@@ -595,13 +617,14 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
        if (!is_swiotlb_buffer(paddr))
                free_pages((unsigned long)vaddr, get_order(size));
        else
-                /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+                /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
-                do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+                swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+             int do_panic)
 {
        /*
         * Ran out of IOMMU space for this operation. This is very bad.
@@ -679,14 +702,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 * whatever the device wrote there.
 */
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-                         size_t size, int dir)
+                         size_t size, enum dma_data_direction dir)
 {
        phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
        BUG_ON(dir == DMA_NONE);
        if (is_swiotlb_buffer(paddr)) {
-                do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+                swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
                return;
        }
@@ -722,14 +745,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 */
 static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-                    size_t size, int dir, int target)
+                    size_t size, enum dma_data_direction dir,
+                    enum dma_sync_target target)
 {
        phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
        BUG_ON(dir == DMA_NONE);
        if (is_swiotlb_buffer(paddr)) {
-                sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
+                swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
+                                       target);
                return;
        }
@@ -756,37 +781,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
 /*
- * Same as above, but for a sub-range of the mapping.
- */
-static void
-swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
-                          unsigned long offset, size_t size,
-                          int dir, int target)
-{
-        swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
-}
-void
-swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-                                  unsigned long offset, size_t size,
-                                  enum dma_data_direction dir)
-{
-        swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-                                  SYNC_FOR_CPU);
-}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
-void
-swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
-                                     unsigned long offset, size_t size,
-                                     enum dma_data_direction dir)
-{
-        swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-                                  SYNC_FOR_DEVICE);
-}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
-/*
 * Map a set of buffers described by scatterlist in streaming mode for DMA.
 * This is the scatter-gather version of the above swiotlb_map_page
 * interface.  Here the scatter gather list elements are each tagged with the
@@ -839,7 +833,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
 int
 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-               int dir)
+               enum dma_data_direction dir)
 {
        return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
 }
@@ -866,7 +860,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 void
 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
-                 int dir)
+                 enum dma_data_direction dir)
 {
        return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
 }
@@ -881,7 +875,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
 */
 static void
 swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-                int nelems, int dir, int target)
+                int nelems, enum dma_data_direction dir,
+                enum dma_sync_target target)
 {
        struct scatterlist *sg;
        int i;
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 9fbcb44c554..d608331b3e4 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -103,6 +103,7 @@
 #include <linux/rcupdate.h>
 #include <linux/err.h>
 #include <linux/textsearch.h>
+#include <linux/slab.h>
 static LIST_HEAD(ts_ops);
 static DEFINE_SPINLOCK(ts_mod_lock);
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644
index 00000000000..e3a1050e682
--- /dev/null
+++ b/lib/timerqueue.c
@@ -0,0 +1,107 @@
+/*
+ *  Generic Timer-queue
+ *
+ *  Manages a simple queue of timers, ordered by expiration time.
+ *  Uses rbtrees for quick list adds and expiration.
+ *
+ *  NOTE: All of the following functions need to be serialized
+ *  to avoid races. No locking is done by this libary code.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/timerqueue.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+/**
+ * timerqueue_add - Adds timer to timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be added
+ *
+ * Adds the timer node to the timerqueue, sorted by the
+ * node's expires value.
+ */
+void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+        struct rb_node **p = &head->head.rb_node;
+        struct rb_node *parent = NULL;
+        struct timerqueue_node  *ptr;
+        /* Make sure we don't add nodes that are already added */
+        WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
+        while (*p) {
+                parent = *p;
+                ptr = rb_entry(parent, struct timerqueue_node, node);
+                if (node->expires.tv64 < ptr->expires.tv64)
+                        p = &(*p)->rb_left;
+                else
+                        p = &(*p)->rb_right;
+        }
+        rb_link_node(&node->node, parent, p);
+        rb_insert_color(&node->node, &head->head);
+        if (!head->next || node->expires.tv64 < head->next->expires.tv64)
+                head->next = node;
+}
+EXPORT_SYMBOL_GPL(timerqueue_add);
+/**
+ * timerqueue_del - Removes a timer from the timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be removed
+ *
+ * Removes the timer node from the timerqueue.
+ */
+void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+        WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
+        /* update next pointer */
+        if (head->next == node) {
+                struct rb_node *rbn = rb_next(&node->node);
+                head->next = rbn ?
+                        rb_entry(rbn, struct timerqueue_node, node) : NULL;
+        }
+        rb_erase(&node->node, &head->head);
+        RB_CLEAR_NODE(&node->node);
+}
+EXPORT_SYMBOL_GPL(timerqueue_del);
+/**
+ * timerqueue_iterate_next - Returns the timer after the provided timer
+ *
+ * @node: Pointer to a timer.
+ *
+ * Provides the timer that is after the given node. This is used, when
+ * necessary, to iterate through the list of timers in a timer list
+ * without modifying the list.
+ */
+struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
+{
+        struct rb_node *next;
+        if (!node)
+                return NULL;
+        next = rb_next(&node->node);
+        if (!next)
+                return NULL;
+        return container_of(next, struct timerqueue_node, node);
+}
+EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 00000000000..8fadd7cef46
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,53 @@
+/*
+ * Unified UUID/GUID definition
+ *
+ * Copyright (C) 2009, Intel Corp.
+ *      Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uuid.h>
+#include <linux/random.h>
+static void __uuid_gen_common(__u8 b[16])
+{
+        int i;
+        u32 r;
+        for (i = 0; i < 4; i++) {
+                r = random32();
+                memcpy(b + i * 4, &r, 4);
+        }
+        /* reversion 0b10 */
+        b[8] = (b[8] & 0x3F) | 0x80;
+}
+void uuid_le_gen(uuid_le *lu)
+{
+        __uuid_gen_common(lu->b);
+        /* version 4 : random generation */
+        lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_le_gen);
+void uuid_be_gen(uuid_be *bu)
+{
+        __uuid_gen_common(bu->b);
+        /* version 4 : random generation */
+        bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_be_gen);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3b8aeec4e32..d3023df8477 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -118,6 +118,7 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
        return simple_strtoull(cp, endp, base);
 }
+EXPORT_SYMBOL(simple_strtoll);
 /**
 * strict_strtoul - convert a string to an unsigned long strictly
@@ -145,19 +146,16 @@ int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
 {
        char *tail;
        unsigned long val;
-        size_t len;
        *res = 0;
-        len = strlen(cp);
+        if (!*cp)
-        if (len == 0)
                return -EINVAL;
        val = simple_strtoul(cp, &tail, base);
        if (tail == cp)
                return -EINVAL;
-        if ((*tail == '\0') ||
+        if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
-                ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
                *res = val;
                return 0;
        }
@@ -219,18 +217,15 @@ int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
 {
        char *tail;
        unsigned long long val;
-        size_t len;
        *res = 0;
-        len = strlen(cp);
+        if (!*cp)
-        if (len == 0)
                return -EINVAL;
        val = simple_strtoull(cp, &tail, base);
        if (tail == cp)
                return -EINVAL;
-        if ((*tail == '\0') ||
+        if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
-                ((len == (size_t)(tail - cp) + 1) && (*tail == '\n'))) {
                *res = val;
                return 0;
        }
@@ -266,7 +261,8 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res)
 }
 EXPORT_SYMBOL(strict_strtoll);
-static int skip_atoi(const char **s)
+static noinline_for_stack
+int skip_atoi(const char **s)
 {
        int i = 0;
@@ -286,7 +282,8 @@ static int skip_atoi(const char **s)
 /* Formats correctly any integer in [0,99999].
 * Outputs from one to five digits depending on input.
 * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
-static char *put_dec_trunc(char *buf, unsigned q)
+static noinline_for_stack
+char *put_dec_trunc(char *buf, unsigned q)
 {
        unsigned d3, d2, d1, d0;
        d1 = (q>>4) & 0xf;
@@ -323,7 +320,8 @@ static char *put_dec_trunc(char *buf, unsigned q)
        return buf;
 }
 /* Same with if's removed. Always emits five digits */
-static char *put_dec_full(char *buf, unsigned q)
+static noinline_for_stack
+char *put_dec_full(char *buf, unsigned q)
 {
        /* BTW, if q is in [0,9999], 8-bit ints will be enough, */
        /* but anyway, gcc produces better code with full-sized ints */
@@ -365,7 +363,8 @@ static char *put_dec_full(char *buf, unsigned q)
        return buf;
 }
 /* No inlining helps gcc to use registers better */
-static noinline char *put_dec(char *buf, unsigned long long num)
+static noinline_for_stack
+char *put_dec(char *buf, unsigned long long num)
 {
        while (1) {
                unsigned rem;
@@ -381,8 +380,8 @@ static noinline char *put_dec(char *buf, unsigned long long num)
 #define PLUS    4               /* show plus */
 #define SPACE   8               /* space if plus */
 #define LEFT    16              /* left justified */
-#define SMALL   32              /* Must be 32 == 0x20 */
+#define SMALL   32              /* use lowercase in hex (must be 32 == 0x20) */
-#define SPECIAL 64              /* 0x */
+#define SPECIAL 64              /* prefix hex with "0x", octal with "0" */
 enum format_type {
        FORMAT_TYPE_NONE, /* Just a string part */
@@ -408,16 +407,17 @@ enum format_type {
 };
 struct printf_spec {
-        enum format_type        type;
+        u8      type;           /* format_type enum */
-        int                     flags;          /* flags to number() */
+        u8      flags;          /* flags to number() */
-        int                     field_width;    /* width of output field */
+        u8      base;           /* number base, 8, 10 or 16 only */
-        int                     base;
+        u8      qualifier;      /* number qualifier, one of 'hHlLtzZ' */
-        int                     precision;      /* # of digits/chars */
+        s16     field_width;    /* width of output field */
-        int                     qualifier;
+        s16     precision;      /* # of digits/chars */
 };
-static char *number(char *buf, char *end, unsigned long long num,
+static noinline_for_stack
-                        struct printf_spec spec)
+char *number(char *buf, char *end, unsigned long long num,
+             struct printf_spec spec)
 {
        /* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
        static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -536,7 +536,8 @@ static char *number(char *buf, char *end, unsigned long long num,
        return buf;
 }
-static char *string(char *buf, char *end, const char *s, struct printf_spec spec)
+static noinline_for_stack
+char *string(char *buf, char *end, const char *s, struct printf_spec spec)
 {
        int len, i;
@@ -566,8 +567,9 @@ static char *string(char *buf, char *end, const char *s, struct printf_spec spec
        return buf;
 }
-static char *symbol_string(char *buf, char *end, void *ptr,
+static noinline_for_stack
-                                struct printf_spec spec, char ext)
+char *symbol_string(char *buf, char *end, void *ptr,
+                    struct printf_spec spec, char ext)
 {
        unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
@@ -587,8 +589,9 @@ static char *symbol_string(char *buf, char *end, void *ptr,
 #endif
 }
-static char *resource_string(char *buf, char *end, struct resource *res,
+static noinline_for_stack
-                                struct printf_spec spec, const char *fmt)
+char *resource_string(char *buf, char *end, struct resource *res,
+                      struct printf_spec spec, const char *fmt)
 {
 #ifndef IO_RSRC_PRINTK_SIZE
 #define IO_RSRC_PRINTK_SIZE     6
@@ -597,22 +600,35 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 #ifndef MEM_RSRC_PRINTK_SIZE
 #define MEM_RSRC_PRINTK_SIZE    10
 #endif
-        struct printf_spec hex_spec = {
+        static const struct printf_spec io_spec = {
                .base = 16,
+                .field_width = IO_RSRC_PRINTK_SIZE,
                .precision = -1,
                .flags = SPECIAL | SMALL | ZEROPAD,
        };
-        struct printf_spec dec_spec = {
+        static const struct printf_spec mem_spec = {
+                .base = 16,
+                .field_width = MEM_RSRC_PRINTK_SIZE,
+                .precision = -1,
+                .flags = SPECIAL | SMALL | ZEROPAD,
+        };
+        static const struct printf_spec bus_spec = {
+                .base = 16,
+                .field_width = 2,
+                .precision = -1,
+                .flags = SMALL | ZEROPAD,
+        };
+        static const struct printf_spec dec_spec = {
                .base = 10,
                .precision = -1,
                .flags = 0,
        };
-        struct printf_spec str_spec = {
+        static const struct printf_spec str_spec = {
                .field_width = -1,
                .precision = 10,
                .flags = LEFT,
        };
-        struct printf_spec flag_spec = {
+        static const struct printf_spec flag_spec = {
                .base = 16,
                .precision = -1,
                .flags = SPECIAL | SMALL,
@@ -622,47 +638,48 @@ static char *resource_string(char *buf, char *end, struct resource *res,
         * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
 #define RSRC_BUF_SIZE           ((2 * sizeof(resource_size_t)) + 4)
 #define FLAG_BUF_SIZE           (2 * sizeof(res->flags))
-#define DECODED_BUF_SIZE        sizeof("[mem - 64bit pref disabled]")
+#define DECODED_BUF_SIZE        sizeof("[mem - 64bit pref window disabled]")
 #define RAW_BUF_SIZE            sizeof("[mem - flags 0x]")
        char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
                     2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
        char *p = sym, *pend = sym + sizeof(sym);
-        int size = -1, addr = 0;
        int decode = (fmt[0] == 'R') ? 1 : 0;
+        const struct printf_spec *specp;
-        if (res->flags & IORESOURCE_IO) {
-                size = IO_RSRC_PRINTK_SIZE;
-                addr = 1;
-        } else if (res->flags & IORESOURCE_MEM) {
-                size = MEM_RSRC_PRINTK_SIZE;
-                addr = 1;
-        }
        *p++ = '[';
-        if (res->flags & IORESOURCE_IO)
+        if (res->flags & IORESOURCE_IO) {
                p = string(p, pend, "io  ", str_spec);
-        else if (res->flags & IORESOURCE_MEM)
+                specp = &io_spec;
+        } else if (res->flags & IORESOURCE_MEM) {
                p = string(p, pend, "mem ", str_spec);
-        else if (res->flags & IORESOURCE_IRQ)
+                specp = &mem_spec;
+        } else if (res->flags & IORESOURCE_IRQ) {
                p = string(p, pend, "irq ", str_spec);
-        else if (res->flags & IORESOURCE_DMA)
+                specp = &dec_spec;
+        } else if (res->flags & IORESOURCE_DMA) {
                p = string(p, pend, "dma ", str_spec);
-        else {
+                specp = &dec_spec;
+        } else if (res->flags & IORESOURCE_BUS) {
+                p = string(p, pend, "bus ", str_spec);
+                specp = &bus_spec;
+        } else {
                p = string(p, pend, "??? ", str_spec);
+                specp = &mem_spec;
                decode = 0;
        }
-        hex_spec.field_width = size;
+        p = number(p, pend, res->start, *specp);
-        p = number(p, pend, res->start, addr ? hex_spec : dec_spec);
        if (res->start != res->end) {
                *p++ = '-';
-                p = number(p, pend, res->end, addr ? hex_spec : dec_spec);
+                p = number(p, pend, res->end, *specp);
        }
        if (decode) {
                if (res->flags & IORESOURCE_MEM_64)
                        p = string(p, pend, " 64bit", str_spec);
                if (res->flags & IORESOURCE_PREFETCH)
                        p = string(p, pend, " pref", str_spec);
+                if (res->flags & IORESOURCE_WINDOW)
+                        p = string(p, pend, " window", str_spec);
                if (res->flags & IORESOURCE_DISABLED)
                        p = string(p, pend, " disabled", str_spec);
        } else {
@@ -675,30 +692,63 @@ static char *resource_string(char *buf, char *end, struct resource *res,
        return string(buf, end, sym, spec);
 }
-static char *mac_address_string(char *buf, char *end, u8 *addr,
+static noinline_for_stack
-                                struct printf_spec spec, const char *fmt)
+char *mac_address_string(char *buf, char *end, u8 *addr,
+                         struct printf_spec spec, const char *fmt)
 {
        char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
        char *p = mac_addr;
        int i;
+        char separator;
+        if (fmt[1] == 'F') {            /* FDDI canonical format */
+                separator = '-';
+        } else {
+                separator = ':';
+        }
        for (i = 0; i < 6; i++) {
                p = pack_hex_byte(p, addr[i]);
                if (fmt[0] == 'M' && i != 5)
-                        *p++ = ':';
+                        *p++ = separator;
        }
        *p = '\0';
        return string(buf, end, mac_addr, spec);
 }
-static char *ip4_string(char *p, const u8 *addr, bool leading_zeros)
+static noinline_for_stack
+char *ip4_string(char *p, const u8 *addr, const char *fmt)
 {
        int i;
+        bool leading_zeros = (fmt[0] == 'i');
+        int index;
+        int step;
+        switch (fmt[2]) {
+        case 'h':
+#ifdef __BIG_ENDIAN
+                index = 0;
+                step = 1;
+#else
+                index = 3;
+                step = -1;
+#endif
+                break;
+        case 'l':
+                index = 3;
+                step = -1;
+                break;
+        case 'n':
+        case 'b':
+        default:
+                index = 0;
+                step = 1;
+                break;
+        }
        for (i = 0; i < 4; i++) {
                char temp[3];   /* hold each IP quad in reverse order */
-                int digits = put_dec_trunc(temp, addr[i]) - temp;
+                int digits = put_dec_trunc(temp, addr[index]) - temp;
                if (leading_zeros) {
                        if (digits < 3)
                                *p++ = '0';
@@ -710,13 +760,15 @@ static char *ip4_string(char *p, const u8 *addr, bool leading_zeros)
                        *p++ = temp[digits];
                if (i < 3)
                        *p++ = '.';
+                index += step;
        }
        *p = '\0';
        return p;
 }
-static char *ip6_compressed_string(char *p, const char *addr)
+static noinline_for_stack
+char *ip6_compressed_string(char *p, const char *addr)
 {
        int i, j, range;
        unsigned char zerolength[8];
@@ -789,14 +841,15 @@ static char *ip6_compressed_string(char *p, const char *addr)
        if (useIPv4) {
                if (needcolon)
                        *p++ = ':';
-                p = ip4_string(p, &in6.s6_addr[12], false);
+                p = ip4_string(p, &in6.s6_addr[12], "I4");
        }
        *p = '\0';
        return p;
 }
-static char *ip6_string(char *p, const char *addr, const char *fmt)
+static noinline_for_stack
+char *ip6_string(char *p, const char *addr, const char *fmt)
 {
        int i;
@@ -811,8 +864,9 @@ static char *ip6_string(char *p, const char *addr, const char *fmt)
        return p;
 }
-static char *ip6_addr_string(char *buf, char *end, const u8 *addr,
+static noinline_for_stack
-                             struct printf_spec spec, const char *fmt)
+char *ip6_addr_string(char *buf, char *end, const u8 *addr,
+                      struct printf_spec spec, const char *fmt)
 {
        char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
@@ -824,18 +878,20 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr,
        return string(buf, end, ip6_addr, spec);
 }
-static char *ip4_addr_string(char *buf, char *end, const u8 *addr,
+static noinline_for_stack
-                             struct printf_spec spec, const char *fmt)
+char *ip4_addr_string(char *buf, char *end, const u8 *addr,
+                      struct printf_spec spec, const char *fmt)
 {
        char ip4_addr[sizeof("255.255.255.255")];
-        ip4_string(ip4_addr, addr, fmt[0] == 'i');
+        ip4_string(ip4_addr, addr, fmt);
        return string(buf, end, ip4_addr, spec);
 }
-static char *uuid_string(char *buf, char *end, const u8 *addr,
+static noinline_for_stack
-                         struct printf_spec spec, const char *fmt)
+char *uuid_string(char *buf, char *end, const u8 *addr,
+                  struct printf_spec spec, const char *fmt)
 {
        char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
        char *p = uuid;
@@ -880,6 +936,8 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
        return string(buf, end, uuid, spec);
 }
+int kptr_restrict = 1;
 /*
 * Show a '%p' thing.  A kernel extension is that the '%p' is followed
 * by an extra set of alphanumeric characters that are extended format
@@ -896,12 +954,15 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
 * - 'M' For a 6-byte MAC address, it prints the address in the
 *       usual colon-separated hex notation
 * - 'm' For a 6-byte MAC address, it prints the hex address without colons
+ * - 'MF' For a 6-byte MAC FDDI address, it prints the address
+ *       with a dash-separated hex notation
 * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
 *       IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
 *       IPv6 uses colon separated network-order 16 bit hex with leading 0's
 * - 'i' [46] for 'raw' IPv4/IPv6 addresses
 *       IPv6 omits the colons (01020304...0f)
 *       IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
+ * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order
 * - 'I6c' for IPv6 addresses printed as specified by
 *       http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
 * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
@@ -915,16 +976,30 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
 *             [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
 *           little endian output byte order is:
 *             [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
+ * - 'V' For a struct va_format which contains a format string * and va_list *,
+ *       call vsnprintf(->format, *->va_list).
+ *       Implements a "recursive vsnprintf".
+ *       Do not use this feature without some mechanism to verify the
+ *       correctness of the format string and va_list arguments.
+ * - 'K' For a kernel pointer that should be hidden from unprivileged users
 *
 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
 * function pointers are really function descriptors, which contain a
 * pointer to the real address.
 */
-static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+static noinline_for_stack
-                        struct printf_spec spec)
+char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+              struct printf_spec spec)
 {
-        if (!ptr)
+        if (!ptr) {
+                /*
+                 * Print (null) with the same width as a pointer so it makes
+                 * tabular output look nice.
+                 */
+                if (spec.field_width == -1)
+                        spec.field_width = 2 * sizeof(void *);
                return string(buf, end, "(null)", spec);
+        }
        switch (*fmt) {
        case 'F':
@@ -939,6 +1014,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
                return resource_string(buf, end, ptr, spec, fmt);
        case 'M':                       /* Colon separated: 00:01:02:03:04:05 */
        case 'm':                       /* Contiguous: 000102030405 */
+                                        /* [mM]F (FDDI, bit reversed) */
                return mac_address_string(buf, end, ptr, spec, fmt);
        case 'I':                       /* Formatted IP supported
                                         * 4:   1.2.3.4
@@ -958,10 +1034,33 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
                break;
        case 'U':
                return uuid_string(buf, end, ptr, spec, fmt);
+        case 'V':
+                return buf + vsnprintf(buf, end - buf,
+                                       ((struct va_format *)ptr)->fmt,
+                                       *(((struct va_format *)ptr)->va));
+        case 'K':
+                /*
+                 * %pK cannot be used in IRQ context because its test
+                 * for CAP_SYSLOG would be meaningless.
+                 */
+                if (in_irq() || in_serving_softirq() || in_nmi()) {
+                        if (spec.field_width == -1)
+                                spec.field_width = 2 * sizeof(void *);
+                        return string(buf, end, "pK-error", spec);
+                } else if ((kptr_restrict == 0) ||
+                         (kptr_restrict == 1 &&
+                          has_capability_noaudit(current, CAP_SYSLOG)))
+                        break;
+                if (spec.field_width == -1) {
+                        spec.field_width = 2 * sizeof(void *);
+                        spec.flags |= ZEROPAD;
+                }
+                return number(buf, end, 0, spec);
        }
        spec.flags |= SMALL;
        if (spec.field_width == -1) {
-                spec.field_width = 2*sizeof(void *);
+                spec.field_width = 2 * sizeof(void *);
                spec.flags |= ZEROPAD;
        }
        spec.base = 16;
@@ -989,7 +1088,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 * @precision: precision of a number
 * @qualifier: qualifier of a number (long, size_t, ...)
 */
-static int format_decode(const char *fmt, struct printf_spec *spec)
+static noinline_for_stack
+int format_decode(const char *fmt, struct printf_spec *spec)
 {
        const char *start = fmt;
@@ -1297,7 +1397,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
                        break;
                case FORMAT_TYPE_NRCHARS: {
-                        int qualifier = spec.qualifier;
+                        u8 qualifier = spec.qualifier;
                        if (qualifier == 'l') {
                                long *ip = va_arg(args, long *);
@@ -1373,7 +1473,7 @@ EXPORT_SYMBOL(vsnprintf);
 * @args: Arguments for the format string
 *
 * The return value is the number of characters which have been written into
- * the @buf not including the trailing '\0'. If @size is <= 0 the function
+ * the @buf not including the trailing '\0'. If @size is == 0 the function
 * returns 0.
 *
 * Call this function if you are already dealing with a va_list.
@@ -1387,7 +1487,11 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
        i = vsnprintf(buf, size, fmt, args);
-        return (i >= size) ? (size - 1) : i;
+        if (likely(i < size))
+                return i;
+        if (size != 0)
+                return size - 1;
+        return 0;
 }
 EXPORT_SYMBOL(vscnprintf);
@@ -1426,7 +1530,7 @@ EXPORT_SYMBOL(snprintf);
 * @...: Arguments for the format string
 *
 * The return value is the number of characters written into @buf not including
- * the trailing '\0'. If @size is <= 0 the function returns 0.
+ * the trailing '\0'. If @size is == 0 the function returns 0.
 */
 int scnprintf(char *buf, size_t size, const char *fmt, ...)
@@ -1435,10 +1539,10 @@ int scnprintf(char *buf, size_t size, const char *fmt, ...)
        int i;
        va_start(args, fmt);
-        i = vsnprintf(buf, size, fmt, args);
+        i = vscnprintf(buf, size, fmt, args);
        va_end(args);
-        return (i >= size) ? (size - 1) : i;
+        return i;
 }
 EXPORT_SYMBOL(scnprintf);
@@ -1583,7 +1687,7 @@ do {									\
                case FORMAT_TYPE_NRCHARS: {
                        /* skip %n 's argument */
-                        int qualifier = spec.qualifier;
+                        u8 qualifier = spec.qualifier;
                        void *skip_arg;
                        if (qualifier == 'l')
                                skip_arg = va_arg(args, long *);
@@ -1849,7 +1953,9 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
        char *next;
        char digit;
        int num = 0;
-        int qualifier, base, field_width;
+        u8 qualifier;
+        u8 base;
+        s16 field_width;
        bool is_sign;
        while (*fmt && *str) {
@@ -1927,7 +2033,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
                {
                        char *s = (char *)va_arg(args, char *);
                        if (field_width == -1)
-                                field_width = INT_MAX;
+                                field_width = SHRT_MAX;
                        /* first, skip leading white space in buffer */
                        str = skip_spaces(str);
diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig
new file mode 100644
index 00000000000..e3b6e18fdac
--- /dev/null
+++ b/lib/xz/Kconfig
@@ -0,0 +1,59 @@
+config XZ_DEC
+        tristate "XZ decompression support"
+        select CRC32
+        help
+          LZMA2 compression algorithm and BCJ filters are supported using
+          the .xz file format as the container. For integrity checking,
+          CRC32 is supported. See Documentation/xz.txt for more information.
+config XZ_DEC_X86
+        bool "x86 BCJ filter decoder" if EMBEDDED
+        default y
+        depends on XZ_DEC
+        select XZ_DEC_BCJ
+config XZ_DEC_POWERPC
+        bool "PowerPC BCJ filter decoder" if EMBEDDED
+        default y
+        depends on XZ_DEC
+        select XZ_DEC_BCJ
+config XZ_DEC_IA64
+        bool "IA-64 BCJ filter decoder" if EMBEDDED
+        default y
+        depends on XZ_DEC
+        select XZ_DEC_BCJ
+config XZ_DEC_ARM
+        bool "ARM BCJ filter decoder" if EMBEDDED
+        default y
+        depends on XZ_DEC
+        select XZ_DEC_BCJ
+config XZ_DEC_ARMTHUMB
+        bool "ARM-Thumb BCJ filter decoder" if EMBEDDED
+        default y
+        depends on XZ_DEC
+        select XZ_DEC_BCJ
+config XZ_DEC_SPARC
+        bool "SPARC BCJ filter decoder" if EMBEDDED
+        default y
+        depends on XZ_DEC
+        select XZ_DEC_BCJ
+config XZ_DEC_BCJ
+        bool
+        default n
+config XZ_DEC_TEST
+        tristate "XZ decompressor tester"
+        default n
+        depends on XZ_DEC
+        help
+          This allows passing .xz files to the in-kernel XZ decoder via
+          a character special file. It calculates CRC32 of the decompressed
+          data and writes diagnostics to the system log.
+          Unless you are developing the XZ decoder, you don't need this
+          and should say N.
diff --git a/lib/xz/Makefile b/lib/xz/Makefile
new file mode 100644
index 00000000000..a7fa7693f0f
--- /dev/null
+++ b/lib/xz/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_XZ_DEC) += xz_dec.o
+xz_dec-y := xz_dec_syms.o xz_dec_stream.o xz_dec_lzma2.o
+xz_dec-$(CONFIG_XZ_DEC_BCJ) += xz_dec_bcj.o
+obj-$(CONFIG_XZ_DEC_TEST) += xz_dec_test.o
diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c
new file mode 100644
index 00000000000..34532d14fd4
--- /dev/null
+++ b/lib/xz/xz_crc32.c
@@ -0,0 +1,59 @@
+/*
+ * CRC32 using the polynomial from IEEE-802.3
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ *          Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+/*
+ * This is not the fastest implementation, but it is pretty compact.
+ * The fastest versions of xz_crc32() on modern CPUs without hardware
+ * accelerated CRC instruction are 3-5 times as fast as this version,
+ * but they are bigger and use more memory for the lookup table.
+ */
+#include "xz_private.h"
+/*
+ * STATIC_RW_DATA is used in the pre-boot environment on some architectures.
+ * See <linux/decompress/mm.h> for details.
+ */
+#ifndef STATIC_RW_DATA
+#       define STATIC_RW_DATA static
+#endif
+STATIC_RW_DATA uint32_t xz_crc32_table[256];
+XZ_EXTERN void xz_crc32_init(void)
+{
+        const uint32_t poly = 0xEDB88320;
+        uint32_t i;
+        uint32_t j;
+        uint32_t r;
+        for (i = 0; i < 256; ++i) {
+                r = i;
+                for (j = 0; j < 8; ++j)
+                        r = (r >> 1) ^ (poly & ~((r & 1) - 1));
+                xz_crc32_table[i] = r;
+        }
+        return;
+}
+XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
+{
+        crc = ~crc;
+        while (size != 0) {
+                crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
+                --size;
+        }
+        return ~crc;
+}
diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c
new file mode 100644
index 00000000000..e51e2558ca9
--- /dev/null
+++ b/lib/xz/xz_dec_bcj.c
@@ -0,0 +1,561 @@
+/*
+ * Branch/Call/Jump (BCJ) filter decoders
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ *          Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+#include "xz_private.h"
+/*
+ * The rest of the file is inside this ifdef. It makes things a little more
+ * convenient when building without support for any BCJ filters.
+ */
+#ifdef XZ_DEC_BCJ
+struct xz_dec_bcj {
+        /* Type of the BCJ filter being used */
+        enum {
+                BCJ_X86 = 4,        /* x86 or x86-64 */
+                BCJ_POWERPC = 5,    /* Big endian only */
+                BCJ_IA64 = 6,       /* Big or little endian */
+                BCJ_ARM = 7,        /* Little endian only */
+                BCJ_ARMTHUMB = 8,   /* Little endian only */
+                BCJ_SPARC = 9       /* Big or little endian */
+        } type;
+        /*
+         * Return value of the next filter in the chain. We need to preserve
+         * this information across calls, because we must not call the next
+         * filter anymore once it has returned XZ_STREAM_END.
+         */
+        enum xz_ret ret;
+        /* True if we are operating in single-call mode. */
+        bool single_call;
+        /*
+         * Absolute position relative to the beginning of the uncompressed
+         * data (in a single .xz Block). We care only about the lowest 32
+         * bits so this doesn't need to be uint64_t even with big files.
+         */
+        uint32_t pos;
+        /* x86 filter state */
+        uint32_t x86_prev_mask;
+        /* Temporary space to hold the variables from struct xz_buf */
+        uint8_t *out;
+        size_t out_pos;
+        size_t out_size;
+        struct {
+                /* Amount of already filtered data in the beginning of buf */
+                size_t filtered;
+                /* Total amount of data currently stored in buf  */
+                size_t size;
+                /*
+                 * Buffer to hold a mix of filtered and unfiltered data. This
+                 * needs to be big enough to hold Alignment + 2 * Look-ahead:
+                 *
+                 * Type         Alignment   Look-ahead
+                 * x86              1           4
+                 * PowerPC          4           0
+                 * IA-64           16           0
+                 * ARM              4           0
+                 * ARM-Thumb        2           2
+                 * SPARC            4           0
+                 */
+                uint8_t buf[16];
+        } temp;
+};
+#ifdef XZ_DEC_X86
+/*
+ * This is used to test the most significant byte of a memory address
+ * in an x86 instruction.
+ */
+static inline int bcj_x86_test_msbyte(uint8_t b)
+{
+        return b == 0x00 || b == 0xFF;
+}
+static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+        static const bool mask_to_allowed_status[8]
+                = { true, true, true, false, true, false, false, false };
+        static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
+        size_t i;
+        size_t prev_pos = (size_t)-1;
+        uint32_t prev_mask = s->x86_prev_mask;
+        uint32_t src;
+        uint32_t dest;
+        uint32_t j;
+        uint8_t b;
+        if (size <= 4)
+                return 0;
+        size -= 4;
+        for (i = 0; i < size; ++i) {
+                if ((buf[i] & 0xFE) != 0xE8)
+                        continue;
+                prev_pos = i - prev_pos;
+                if (prev_pos > 3) {
+                        prev_mask = 0;
+                } else {
+                        prev_mask = (prev_mask << (prev_pos - 1)) & 7;
+                        if (prev_mask != 0) {
+                                b = buf[i + 4 - mask_to_bit_num[prev_mask]];
+                                if (!mask_to_allowed_status[prev_mask]
+                                                || bcj_x86_test_msbyte(b)) {
+                                        prev_pos = i;
+                                        prev_mask = (prev_mask << 1) | 1;
+                                        continue;
+                                }
+                        }
+                }
+                prev_pos = i;
+                if (bcj_x86_test_msbyte(buf[i + 4])) {
+                        src = get_unaligned_le32(buf + i + 1);
+                        while (true) {
+                                dest = src - (s->pos + (uint32_t)i + 5);
+                                if (prev_mask == 0)
+                                        break;
+                                j = mask_to_bit_num[prev_mask] * 8;
+                                b = (uint8_t)(dest >> (24 - j));
+                                if (!bcj_x86_test_msbyte(b))
+                                        break;
+                                src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
+                        }
+                        dest &= 0x01FFFFFF;
+                        dest |= (uint32_t)0 - (dest & 0x01000000);
+                        put_unaligned_le32(dest, buf + i + 1);
+                        i += 4;
+                } else {
+                        prev_mask = (prev_mask << 1) | 1;
+                }
+        }
+        prev_pos = i - prev_pos;
+        s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
+        return i;
+}
+#endif
+#ifdef XZ_DEC_POWERPC
+static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+        size_t i;
+        uint32_t instr;
+        for (i = 0; i + 4 <= size; i += 4) {
+                instr = get_unaligned_be32(buf + i);
+                if ((instr & 0xFC000003) == 0x48000001) {
+                        instr &= 0x03FFFFFC;
+                        instr -= s->pos + (uint32_t)i;
+                        instr &= 0x03FFFFFC;
+                        instr |= 0x48000001;
+                        put_unaligned_be32(instr, buf + i);
+                }
+        }
+        return i;
+}
+#endif
+#ifdef XZ_DEC_IA64
+static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+        static const uint8_t branch_table[32] = {
+                0, 0, 0, 0, 0, 0, 0, 0,
+                0, 0, 0, 0, 0, 0, 0, 0,
+                4, 4, 6, 6, 0, 0, 7, 7,
+                4, 4, 0, 0, 4, 4, 0, 0
+        };
+        /*
+         * The local variables take a little bit stack space, but it's less
+         * than what LZMA2 decoder takes, so it doesn't make sense to reduce
+         * stack usage here without doing that for the LZMA2 decoder too.
+         */
+        /* Loop counters */
+        size_t i;
+        size_t j;
+        /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
+        uint32_t slot;
+        /* Bitwise offset of the instruction indicated by slot */
+        uint32_t bit_pos;
+        /* bit_pos split into byte and bit parts */
+        uint32_t byte_pos;
+        uint32_t bit_res;
+        /* Address part of an instruction */
+        uint32_t addr;
+        /* Mask used to detect which instructions to convert */
+        uint32_t mask;
+        /* 41-bit instruction stored somewhere in the lowest 48 bits */
+        uint64_t instr;
+        /* Instruction normalized with bit_res for easier manipulation */
+        uint64_t norm;
+        for (i = 0; i + 16 <= size; i += 16) {
+                mask = branch_table[buf[i] & 0x1F];
+                for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
+                        if (((mask >> slot) & 1) == 0)
+                                continue;
+                        byte_pos = bit_pos >> 3;
+                        bit_res = bit_pos & 7;
+                        instr = 0;
+                        for (j = 0; j < 6; ++j)
+                                instr |= (uint64_t)(buf[i + j + byte_pos])
+                                                << (8 * j);
+                        norm = instr >> bit_res;
+                        if (((norm >> 37) & 0x0F) == 0x05
+                                        && ((norm >> 9) & 0x07) == 0) {
+                                addr = (norm >> 13) & 0x0FFFFF;
+                                addr |= ((uint32_t)(norm >> 36) & 1) << 20;
+                                addr <<= 4;
+                                addr -= s->pos + (uint32_t)i;
+                                addr >>= 4;
+                                norm &= ~((uint64_t)0x8FFFFF << 13);
+                                norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
+                                norm |= (uint64_t)(addr & 0x100000)
+                                                << (36 - 20);
+                                instr &= (1 << bit_res) - 1;
+                                instr |= norm << bit_res;
+                                for (j = 0; j < 6; j++)
+                                        buf[i + j + byte_pos]
+                                                = (uint8_t)(instr >> (8 * j));
+                        }
+                }
+        }
+        return i;
+}
+#endif
+#ifdef XZ_DEC_ARM
+static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+        size_t i;
+        uint32_t addr;
+        for (i = 0; i + 4 <= size; i += 4) {
+                if (buf[i + 3] == 0xEB) {
+                        addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
+                                        | ((uint32_t)buf[i + 2] << 16);
+                        addr <<= 2;
+                        addr -= s->pos + (uint32_t)i + 8;
+                        addr >>= 2;
+                        buf[i] = (uint8_t)addr;
+                        buf[i + 1] = (uint8_t)(addr >> 8);
+                        buf[i + 2] = (uint8_t)(addr >> 16);
+                }
+        }
+        return i;
+}
+#endif
+#ifdef XZ_DEC_ARMTHUMB
+static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+        size_t i;
+        uint32_t addr;
+        for (i = 0; i + 4 <= size; i += 2) {
+                if ((buf[i + 1] & 0xF8) == 0xF0
+                                && (buf[i + 3] & 0xF8) == 0xF8) {
+                        addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
+                                        | ((uint32_t)buf[i] << 11)
+                                        | (((uint32_t)buf[i + 3] & 0x07) << 8)
+                                        | (uint32_t)buf[i + 2];
+                        addr <<= 1;
+                        addr -= s->pos + (uint32_t)i + 4;
+                        addr >>= 1;
+                        buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
+                        buf[i] = (uint8_t)(addr >> 11);
+                        buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
+                        buf[i + 2] = (uint8_t)addr;
+                        i += 2;
+                }
+        }
+        return i;
+}
+#endif
+#ifdef XZ_DEC_SPARC
+static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
+{
+        size_t i;
+        uint32_t instr;
+        for (i = 0; i + 4 <= size; i += 4) {
+                instr = get_unaligned_be32(buf + i);
+                if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
+                        instr <<= 2;
+                        instr -= s->pos + (uint32_t)i;
+                        instr >>= 2;
+                        instr = ((uint32_t)0x40000000 - (instr & 0x400000))
+                                        | 0x40000000 | (instr & 0x3FFFFF);
+                        put_unaligned_be32(instr, buf + i);
+                }
+        }
+        return i;
+}
+#endif
+/*
+ * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
+ * of data that got filtered.
+ *
+ * NOTE: This is implemented as a switch statement to avoid using function
+ * pointers, which could be problematic in the kernel boot code, which must
+ * avoid pointers to static data (at least on x86).
+ */
+static void bcj_apply(struct xz_dec_bcj *s,
+                      uint8_t *buf, size_t *pos, size_t size)
+{
+        size_t filtered;
+        buf += *pos;
+        size -= *pos;
+        switch (s->type) {
+#ifdef XZ_DEC_X86
+        case BCJ_X86:
+                filtered = bcj_x86(s, buf, size);
+                break;
+#endif
+#ifdef XZ_DEC_POWERPC
+        case BCJ_POWERPC:
+                filtered = bcj_powerpc(s, buf, size);
+                break;
+#endif
+#ifdef XZ_DEC_IA64
+        case BCJ_IA64:
+                filtered = bcj_ia64(s, buf, size);
+                break;
+#endif
+#ifdef XZ_DEC_ARM
+        case BCJ_ARM:
+                filtered = bcj_arm(s, buf, size);
+                break;
+#endif
+#ifdef XZ_DEC_ARMTHUMB
+        case BCJ_ARMTHUMB:
+                filtered = bcj_armthumb(s, buf, size);
+                break;
+#endif
+#ifdef XZ_DEC_SPARC
+        case BCJ_SPARC:
+                filtered = bcj_sparc(s, buf, size);
+                break;
+#endif
+        default:
+                /* Never reached but silence compiler warnings. */
+                filtered = 0;
+                break;
+        }
+        *pos += filtered;
+        s->pos += filtered;
+}
+/*
+ * Flush pending filtered data from temp to the output buffer.
+ * Move the remaining mixture of possibly filtered and unfiltered
+ * data to the beginning of temp.
+ */
+static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
+{
+        size_t copy_size;
+        copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
+        memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
+        b->out_pos += copy_size;
+        s->temp.filtered -= copy_size;
+        s->temp.size -= copy_size;
+        memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
+}
+/*
+ * The BCJ filter functions are primitive in sense that they process the
+ * data in chunks of 1-16 bytes. To hide this issue, this function does
+ * some buffering.
+ */
+XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
+                                     struct xz_dec_lzma2 *lzma2,
+                                     struct xz_buf *b)
+{
+        size_t out_start;
+        /*
+         * Flush pending already filtered data to the output buffer. Return
+         * immediatelly if we couldn't flush everything, or if the next
+         * filter in the chain had already returned XZ_STREAM_END.
+         */
+        if (s->temp.filtered > 0) {
+                bcj_flush(s, b);
+                if (s->temp.filtered > 0)
+                        return XZ_OK;
+                if (s->ret == XZ_STREAM_END)
+                        return XZ_STREAM_END;
+        }
+        /*
+         * If we have more output space than what is currently pending in
+         * temp, copy the unfiltered data from temp to the output buffer
+         * and try to fill the output buffer by decoding more data from the
+         * next filter in the chain. Apply the BCJ filter on the new data
+         * in the output buffer. If everything cannot be filtered, copy it
+         * to temp and rewind the output buffer position accordingly.
+         */
+        if (s->temp.size < b->out_size - b->out_pos) {
+                out_start = b->out_pos;
+                memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
+                b->out_pos += s->temp.size;
+                s->ret = xz_dec_lzma2_run(lzma2, b);
+                if (s->ret != XZ_STREAM_END
+                                && (s->ret != XZ_OK || s->single_call))
+                        return s->ret;
+                bcj_apply(s, b->out, &out_start, b->out_pos);
+                /*
+                 * As an exception, if the next filter returned XZ_STREAM_END,
+                 * we can do that too, since the last few bytes that remain
+                 * unfiltered are meant to remain unfiltered.
+                 */
+                if (s->ret == XZ_STREAM_END)
+                        return XZ_STREAM_END;
+                s->temp.size = b->out_pos - out_start;
+                b->out_pos -= s->temp.size;
+                memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
+        }
+        /*
+         * If we have unfiltered data in temp, try to fill by decoding more
+         * data from the next filter. Apply the BCJ filter on temp. Then we
+         * hopefully can fill the actual output buffer by copying filtered
+         * data from temp. A mix of filtered and unfiltered data may be left
+         * in temp; it will be taken care on the next call to this function.
+         */
+        if (s->temp.size > 0) {
+                /* Make b->out{,_pos,_size} temporarily point to s->temp. */
+                s->out = b->out;
+                s->out_pos = b->out_pos;
+                s->out_size = b->out_size;
+                b->out = s->temp.buf;
+                b->out_pos = s->temp.size;
+                b->out_size = sizeof(s->temp.buf);
+                s->ret = xz_dec_lzma2_run(lzma2, b);
+                s->temp.size = b->out_pos;
+                b->out = s->out;
+                b->out_pos = s->out_pos;
+                b->out_size = s->out_size;
+                if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
+                        return s->ret;
+                bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
+                /*
+                 * If the next filter returned XZ_STREAM_END, we mark that
+                 * everything is filtered, since the last unfiltered bytes
+                 * of the stream are meant to be left as is.
+                 */
+                if (s->ret == XZ_STREAM_END)
+                        s->temp.filtered = s->temp.size;
+                bcj_flush(s, b);
+                if (s->temp.filtered > 0)
+                        return XZ_OK;
+        }
+        return s->ret;
+}
+XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
+{
+        struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
+        if (s != NULL)
+                s->single_call = single_call;
+        return s;
+}
+XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
+{
+        switch (id) {
+#ifdef XZ_DEC_X86
+        case BCJ_X86:
+#endif
+#ifdef XZ_DEC_POWERPC
+        case BCJ_POWERPC:
+#endif
+#ifdef XZ_DEC_IA64
+        case BCJ_IA64:
+#endif
+#ifdef XZ_DEC_ARM
+        case BCJ_ARM:
+#endif
+#ifdef XZ_DEC_ARMTHUMB
+        case BCJ_ARMTHUMB:
+#endif
+#ifdef XZ_DEC_SPARC
+        case BCJ_SPARC:
+#endif
+                break;
+        default:
+                /* Unsupported Filter ID */
+                return XZ_OPTIONS_ERROR;
+        }
+        s->type = id;
+        s->ret = XZ_OK;
+        s->pos = 0;
+        s->x86_prev_mask = 0;
+        s->temp.filtered = 0;
+        s->temp.size = 0;
+        return XZ_OK;
+}
+#endif
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
new file mode 100644
index 00000000000..ea5fa4fe9d6
--- /dev/null
+++ b/lib/xz/xz_dec_lzma2.c
@@ -0,0 +1,1171 @@
+/*
+ * LZMA2 decoder
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ *          Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+#include "xz_private.h"
+#include "xz_lzma2.h"
+/*
+ * Range decoder initialization eats the first five bytes of each LZMA chunk.
+ */
+#define RC_INIT_BYTES 5
+/*
+ * Minimum number of usable input buffer to safely decode one LZMA symbol.
+ * The worst case is that we decode 22 bits using probabilities and 26
+ * direct bits. This may decode at maximum of 20 bytes of input. However,
+ * lzma_main() does an extra normalization before returning, thus we
+ * need to put 21 here.
+ */
+#define LZMA_IN_REQUIRED 21
+/*
+ * Dictionary (history buffer)
+ *
+ * These are always true:
+ *    start <= pos <= full <= end
+ *    pos <= limit <= end
+ *
+ * In multi-call mode, also these are true:
+ *    end == size
+ *    size <= size_max
+ *    allocated <= size
+ *
+ * Most of these variables are size_t to support single-call mode,
+ * in which the dictionary variables address the actual output
+ * buffer directly.
+ */
+struct dictionary {
+        /* Beginning of the history buffer */
+        uint8_t *buf;
+        /* Old position in buf (before decoding more data) */
+        size_t start;
+        /* Position in buf */
+        size_t pos;
+        /*
+         * How full dictionary is. This is used to detect corrupt input that
+         * would read beyond the beginning of the uncompressed stream.
+         */
+        size_t full;
+        /* Write limit; we don't write to buf[limit] or later bytes. */
+        size_t limit;
+        /*
+         * End of the dictionary buffer. In multi-call mode, this is
+         * the same as the dictionary size. In single-call mode, this
+         * indicates the size of the output buffer.
+         */
+        size_t end;
+        /*
+         * Size of the dictionary as specified in Block Header. This is used
+         * together with "full" to detect corrupt input that would make us
+         * read beyond the beginning of the uncompressed stream.
+         */
+        uint32_t size;
+        /*
+         * Maximum allowed dictionary size in multi-call mode.
+         * This is ignored in single-call mode.
+         */
+        uint32_t size_max;
+        /*
+         * Amount of memory currently allocated for the dictionary.
+         * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
+         * size_max is always the same as the allocated size.)
+         */
+        uint32_t allocated;
+        /* Operation mode */
+        enum xz_mode mode;
+};
+/* Range decoder */
+struct rc_dec {
+        uint32_t range;
+        uint32_t code;
+        /*
+         * Number of initializing bytes remaining to be read
+         * by rc_read_init().
+         */
+        uint32_t init_bytes_left;
+        /*
+         * Buffer from which we read our input. It can be either
+         * temp.buf or the caller-provided input buffer.
+         */
+        const uint8_t *in;
+        size_t in_pos;
+        size_t in_limit;
+};
+/* Probabilities for a length decoder. */
+struct lzma_len_dec {
+        /* Probability of match length being at least 10 */
+        uint16_t choice;
+        /* Probability of match length being at least 18 */
+        uint16_t choice2;
+        /* Probabilities for match lengths 2-9 */
+        uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
+        /* Probabilities for match lengths 10-17 */
+        uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
+        /* Probabilities for match lengths 18-273 */
+        uint16_t high[LEN_HIGH_SYMBOLS];
+};
+struct lzma_dec {
+        /* Distances of latest four matches */
+        uint32_t rep0;
+        uint32_t rep1;
+        uint32_t rep2;
+        uint32_t rep3;
+        /* Types of the most recently seen LZMA symbols */
+        enum lzma_state state;
+        /*
+         * Length of a match. This is updated so that dict_repeat can
+         * be called again to finish repeating the whole match.
+         */
+        uint32_t len;
+        /*
+         * LZMA properties or related bit masks (number of literal
+         * context bits, a mask dervied from the number of literal
+         * position bits, and a mask dervied from the number
+         * position bits)
+         */
+        uint32_t lc;
+        uint32_t literal_pos_mask; /* (1 << lp) - 1 */
+        uint32_t pos_mask;         /* (1 << pb) - 1 */
+        /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
+        uint16_t is_match[STATES][POS_STATES_MAX];
+        /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
+        uint16_t is_rep[STATES];
+        /*
+         * If 0, distance of a repeated match is rep0.
+         * Otherwise check is_rep1.
+         */
+        uint16_t is_rep0[STATES];
+        /*
+         * If 0, distance of a repeated match is rep1.
+         * Otherwise check is_rep2.
+         */
+        uint16_t is_rep1[STATES];
+        /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
+        uint16_t is_rep2[STATES];
+        /*
+         * If 1, the repeated match has length of one byte. Otherwise
+         * the length is decoded from rep_len_decoder.
+         */
+        uint16_t is_rep0_long[STATES][POS_STATES_MAX];
+        /*
+         * Probability tree for the highest two bits of the match
+         * distance. There is a separate probability tree for match
+         * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
+         */
+        uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
+        /*
+         * Probility trees for additional bits for match distance
+         * when the distance is in the range [4, 127].
+         */
+        uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
+        /*
+         * Probability tree for the lowest four bits of a match
+         * distance that is equal to or greater than 128.
+         */
+        uint16_t dist_align[ALIGN_SIZE];
+        /* Length of a normal match */
+        struct lzma_len_dec match_len_dec;
+        /* Length of a repeated match */
+        struct lzma_len_dec rep_len_dec;
+        /* Probabilities of literals */
+        uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
+};
+struct lzma2_dec {
+        /* Position in xz_dec_lzma2_run(). */
+        enum lzma2_seq {
+                SEQ_CONTROL,
+                SEQ_UNCOMPRESSED_1,
+                SEQ_UNCOMPRESSED_2,
+                SEQ_COMPRESSED_0,
+                SEQ_COMPRESSED_1,
+                SEQ_PROPERTIES,
+                SEQ_LZMA_PREPARE,
+                SEQ_LZMA_RUN,
+                SEQ_COPY
+        } sequence;
+        /* Next position after decoding the compressed size of the chunk. */
+        enum lzma2_seq next_sequence;
+        /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
+        uint32_t uncompressed;
+        /*
+         * Compressed size of LZMA chunk or compressed/uncompressed
+         * size of uncompressed chunk (64 KiB at maximum)
+         */
+        uint32_t compressed;
+        /*
+         * True if dictionary reset is needed. This is false before
+         * the first chunk (LZMA or uncompressed).
+         */
+        bool need_dict_reset;
+        /*
+         * True if new LZMA properties are needed. This is false
+         * before the first LZMA chunk.
+         */
+        bool need_props;
+};
+struct xz_dec_lzma2 {
+        /*
+         * The order below is important on x86 to reduce code size and
+         * it shouldn't hurt on other platforms. Everything up to and
+         * including lzma.pos_mask are in the first 128 bytes on x86-32,
+         * which allows using smaller instructions to access those
+         * variables. On x86-64, fewer variables fit into the first 128
+         * bytes, but this is still the best order without sacrificing
+         * the readability by splitting the structures.
+         */
+        struct rc_dec rc;
+        struct dictionary dict;
+        struct lzma2_dec lzma2;
+        struct lzma_dec lzma;
+        /*
+         * Temporary buffer which holds small number of input bytes between
+         * decoder calls. See lzma2_lzma() for details.
+         */
+        struct {
+                uint32_t size;
+                uint8_t buf[3 * LZMA_IN_REQUIRED];
+        } temp;
+};
+/**************
+ * Dictionary *
+ **************/
+/*
+ * Reset the dictionary state. When in single-call mode, set up the beginning
+ * of the dictionary to point to the actual output buffer.
+ */
+static void dict_reset(struct dictionary *dict, struct xz_buf *b)
+{
+        if (DEC_IS_SINGLE(dict->mode)) {
+                dict->buf = b->out + b->out_pos;
+                dict->end = b->out_size - b->out_pos;
+        }
+        dict->start = 0;
+        dict->pos = 0;
+        dict->limit = 0;
+        dict->full = 0;
+}
+/* Set dictionary write limit */
+static void dict_limit(struct dictionary *dict, size_t out_max)
+{
+        if (dict->end - dict->pos <= out_max)
+                dict->limit = dict->end;
+        else
+                dict->limit = dict->pos + out_max;
+}
+/* Return true if at least one byte can be written into the dictionary. */
+static inline bool dict_has_space(const struct dictionary *dict)
+{
+        return dict->pos < dict->limit;
+}
+/*
+ * Get a byte from the dictionary at the given distance. The distance is
+ * assumed to valid, or as a special case, zero when the dictionary is
+ * still empty. This special case is needed for single-call decoding to
+ * avoid writing a '\0' to the end of the destination buffer.
+ */
+static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
+{
+        size_t offset = dict->pos - dist - 1;
+        if (dist >= dict->pos)
+                offset += dict->end;
+        return dict->full > 0 ? dict->buf[offset] : 0;
+}
+/*
+ * Put one byte into the dictionary. It is assumed that there is space for it.
+ */
+static inline void dict_put(struct dictionary *dict, uint8_t byte)
+{
+        dict->buf[dict->pos++] = byte;
+        if (dict->full < dict->pos)
+                dict->full = dict->pos;
+}
+/*
+ * Repeat given number of bytes from the given distance. If the distance is
+ * invalid, false is returned. On success, true is returned and *len is
+ * updated to indicate how many bytes were left to be repeated.
+ */
+static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
+{
+        size_t back;
+        uint32_t left;
+        if (dist >= dict->full || dist >= dict->size)
+                return false;
+        left = min_t(size_t, dict->limit - dict->pos, *len);
+        *len -= left;
+        back = dict->pos - dist - 1;
+        if (dist >= dict->pos)
+                back += dict->end;
+        do {
+                dict->buf[dict->pos++] = dict->buf[back++];
+                if (back == dict->end)
+                        back = 0;
+        } while (--left > 0);
+        if (dict->full < dict->pos)
+                dict->full = dict->pos;
+        return true;
+}
+/* Copy uncompressed data as is from input to dictionary and output buffers. */
+static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
+                              uint32_t *left)
+{
+        size_t copy_size;
+        while (*left > 0 && b->in_pos < b->in_size
+                        && b->out_pos < b->out_size) {
+                copy_size = min(b->in_size - b->in_pos,
+                                b->out_size - b->out_pos);
+                if (copy_size > dict->end - dict->pos)
+                        copy_size = dict->end - dict->pos;
+                if (copy_size > *left)
+                        copy_size = *left;
+                *left -= copy_size;
+                memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
+                dict->pos += copy_size;
+                if (dict->full < dict->pos)
+                        dict->full = dict->pos;
+                if (DEC_IS_MULTI(dict->mode)) {
+                        if (dict->pos == dict->end)
+                                dict->pos = 0;
+                        memcpy(b->out + b->out_pos, b->in + b->in_pos,
+                                        copy_size);
+                }
+                dict->start = dict->pos;
+                b->out_pos += copy_size;
+                b->in_pos += copy_size;
+        }
+}
+/*
+ * Flush pending data from dictionary to b->out. It is assumed that there is
+ * enough space in b->out. This is guaranteed because caller uses dict_limit()
+ * before decoding data into the dictionary.
+ */
+static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
+{
+        size_t copy_size = dict->pos - dict->start;
+        if (DEC_IS_MULTI(dict->mode)) {
+                if (dict->pos == dict->end)
+                        dict->pos = 0;
+                memcpy(b->out + b->out_pos, dict->buf + dict->start,
+                                copy_size);
+        }
+        dict->start = dict->pos;
+        b->out_pos += copy_size;
+        return copy_size;
+}
+/*****************
+ * Range decoder *
+ *****************/
+/* Reset the range decoder. */
+static void rc_reset(struct rc_dec *rc)
+{
+        rc->range = (uint32_t)-1;
+        rc->code = 0;
+        rc->init_bytes_left = RC_INIT_BYTES;
+}
+/*
+ * Read the first five initial bytes into rc->code if they haven't been
+ * read already. (Yes, the first byte gets completely ignored.)
+ */
+static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b)
+{
+        while (rc->init_bytes_left > 0) {
+                if (b->in_pos == b->in_size)
+                        return false;
+                rc->code = (rc->code << 8) + b->in[b->in_pos++];
+                --rc->init_bytes_left;
+        }
+        return true;
+}
+/* Return true if there may not be enough input for the next decoding loop. */
+static inline bool rc_limit_exceeded(const struct rc_dec *rc)
+{
+        return rc->in_pos > rc->in_limit;
+}
+/*
+ * Return true if it is possible (from point of view of range decoder) that
+ * we have reached the end of the LZMA chunk.
+ */
+static inline bool rc_is_finished(const struct rc_dec *rc)
+{
+        return rc->code == 0;
+}
+/* Read the next input byte if needed. */
+static __always_inline void rc_normalize(struct rc_dec *rc)
+{
+        if (rc->range < RC_TOP_VALUE) {
+                rc->range <<= RC_SHIFT_BITS;
+                rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
+        }
+}
+/*
+ * Decode one bit. In some versions, this function has been splitted in three
+ * functions so that the compiler is supposed to be able to more easily avoid
+ * an extra branch. In this particular version of the LZMA decoder, this
+ * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
+ * on x86). Using a non-splitted version results in nicer looking code too.
+ *
+ * NOTE: This must return an int. Do not make it return a bool or the speed
+ * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
+ * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
+ */
+static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
+{
+        uint32_t bound;
+        int bit;
+        rc_normalize(rc);
+        bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
+        if (rc->code < bound) {
+                rc->range = bound;
+                *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
+                bit = 0;
+        } else {
+                rc->range -= bound;
+                rc->code -= bound;
+                *prob -= *prob >> RC_MOVE_BITS;
+                bit = 1;
+        }
+        return bit;
+}
+/* Decode a bittree starting from the most significant bit. */
+static __always_inline uint32_t rc_bittree(struct rc_dec *rc,
+                                           uint16_t *probs, uint32_t limit)
+{
+        uint32_t symbol = 1;
+        do {
+                if (rc_bit(rc, &probs[symbol]))
+                        symbol = (symbol << 1) + 1;
+                else
+                        symbol <<= 1;
+        } while (symbol < limit);
+        return symbol;
+}
+/* Decode a bittree starting from the least significant bit. */
+static __always_inline void rc_bittree_reverse(struct rc_dec *rc,
+                                               uint16_t *probs,
+                                               uint32_t *dest, uint32_t limit)
+{
+        uint32_t symbol = 1;
+        uint32_t i = 0;
+        do {
+                if (rc_bit(rc, &probs[symbol])) {
+                        symbol = (symbol << 1) + 1;
+                        *dest += 1 << i;
+                } else {
+                        symbol <<= 1;
+                }
+        } while (++i < limit);
+}
+/* Decode direct bits (fixed fifty-fifty probability) */
+static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
+{
+        uint32_t mask;
+        do {
+                rc_normalize(rc);
+                rc->range >>= 1;
+                rc->code -= rc->range;
+                mask = (uint32_t)0 - (rc->code >> 31);
+                rc->code += rc->range & mask;
+                *dest = (*dest << 1) + (mask + 1);
+        } while (--limit > 0);
+}
+/********
+ * LZMA *
+ ********/
+/* Get pointer to literal coder probability array. */
+static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
+{
+        uint32_t prev_byte = dict_get(&s->dict, 0);
+        uint32_t low = prev_byte >> (8 - s->lzma.lc);
+        uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
+        return s->lzma.literal[low + high];
+}
+/* Decode a literal (one 8-bit byte) */
+static void lzma_literal(struct xz_dec_lzma2 *s)
+{
+        uint16_t *probs;
+        uint32_t symbol;
+        uint32_t match_byte;
+        uint32_t match_bit;
+        uint32_t offset;
+        uint32_t i;
+        probs = lzma_literal_probs(s);
+        if (lzma_state_is_literal(s->lzma.state)) {
+                symbol = rc_bittree(&s->rc, probs, 0x100);
+        } else {
+                symbol = 1;
+                match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
+                offset = 0x100;
+                do {
+                        match_bit = match_byte & offset;
+                        match_byte <<= 1;
+                        i = offset + match_bit + symbol;
+                        if (rc_bit(&s->rc, &probs[i])) {
+                                symbol = (symbol << 1) + 1;
+                                offset &= match_bit;
+                        } else {
+                                symbol <<= 1;
+                                offset &= ~match_bit;
+                        }
+                } while (symbol < 0x100);
+        }
+        dict_put(&s->dict, (uint8_t)symbol);
+        lzma_state_literal(&s->lzma.state);
+}
+/* Decode the length of the match into s->lzma.len. */
+static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
+                     uint32_t pos_state)
+{
+        uint16_t *probs;
+        uint32_t limit;
+        if (!rc_bit(&s->rc, &l->choice)) {
+                probs = l->low[pos_state];
+                limit = LEN_LOW_SYMBOLS;
+                s->lzma.len = MATCH_LEN_MIN;
+        } else {
+                if (!rc_bit(&s->rc, &l->choice2)) {
+                        probs = l->mid[pos_state];
+                        limit = LEN_MID_SYMBOLS;
+                        s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
+                } else {
+                        probs = l->high;
+                        limit = LEN_HIGH_SYMBOLS;
+                        s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
+                                        + LEN_MID_SYMBOLS;
+                }
+        }
+        s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
+}
+/* Decode a match. The distance will be stored in s->lzma.rep0. */
+static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
+{
+        uint16_t *probs;
+        uint32_t dist_slot;
+        uint32_t limit;
+        lzma_state_match(&s->lzma.state);
+        s->lzma.rep3 = s->lzma.rep2;
+        s->lzma.rep2 = s->lzma.rep1;
+        s->lzma.rep1 = s->lzma.rep0;
+        lzma_len(s, &s->lzma.match_len_dec, pos_state);
+        probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
+        dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
+        if (dist_slot < DIST_MODEL_START) {
+                s->lzma.rep0 = dist_slot;
+        } else {
+                limit = (dist_slot >> 1) - 1;
+                s->lzma.rep0 = 2 + (dist_slot & 1);
+                if (dist_slot < DIST_MODEL_END) {
+                        s->lzma.rep0 <<= limit;
+                        probs = s->lzma.dist_special + s->lzma.rep0
+                                        - dist_slot - 1;
+                        rc_bittree_reverse(&s->rc, probs,
+                                        &s->lzma.rep0, limit);
+                } else {
+                        rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
+                        s->lzma.rep0 <<= ALIGN_BITS;
+                        rc_bittree_reverse(&s->rc, s->lzma.dist_align,
+                                        &s->lzma.rep0, ALIGN_BITS);
+                }
+        }
+}
+/*
+ * Decode a repeated match. The distance is one of the four most recently
+ * seen matches. The distance will be stored in s->lzma.rep0.
+ */
+static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
+{
+        uint32_t tmp;
+        if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
+                if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
+                                s->lzma.state][pos_state])) {
+                        lzma_state_short_rep(&s->lzma.state);
+                        s->lzma.len = 1;
+                        return;
+                }
+        } else {
+                if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
+                        tmp = s->lzma.rep1;
+                } else {
+                        if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
+                                tmp = s->lzma.rep2;
+                        } else {
+                                tmp = s->lzma.rep3;
+                                s->lzma.rep3 = s->lzma.rep2;
+                        }
+                        s->lzma.rep2 = s->lzma.rep1;
+                }
+                s->lzma.rep1 = s->lzma.rep0;
+                s->lzma.rep0 = tmp;
+        }
+        lzma_state_long_rep(&s->lzma.state);
+        lzma_len(s, &s->lzma.rep_len_dec, pos_state);
+}
+/* LZMA decoder core */
+static bool lzma_main(struct xz_dec_lzma2 *s)
+{
+        uint32_t pos_state;
+        /*
+         * If the dictionary was reached during the previous call, try to
+         * finish the possibly pending repeat in the dictionary.
+         */
+        if (dict_has_space(&s->dict) && s->lzma.len > 0)
+                dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
+        /*
+         * Decode more LZMA symbols. One iteration may consume up to
+         * LZMA_IN_REQUIRED - 1 bytes.
+         */
+        while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
+                pos_state = s->dict.pos & s->lzma.pos_mask;
+                if (!rc_bit(&s->rc, &s->lzma.is_match[
+                                s->lzma.state][pos_state])) {
+                        lzma_literal(s);
+                } else {
+                        if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
+                                lzma_rep_match(s, pos_state);
+                        else
+                                lzma_match(s, pos_state);
+                        if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
+                                return false;
+                }
+        }
+        /*
+         * Having the range decoder always normalized when we are outside
+         * this function makes it easier to correctly handle end of the chunk.
+         */
+        rc_normalize(&s->rc);
+        return true;
+}
+/*
+ * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
+ * here, because LZMA state may be reset without resetting the dictionary.
+ */
+static void lzma_reset(struct xz_dec_lzma2 *s)
+{
+        uint16_t *probs;
+        size_t i;
+        s->lzma.state = STATE_LIT_LIT;
+        s->lzma.rep0 = 0;
+        s->lzma.rep1 = 0;
+        s->lzma.rep2 = 0;
+        s->lzma.rep3 = 0;
+        /*
+         * All probabilities are initialized to the same value. This hack
+         * makes the code smaller by avoiding a separate loop for each
+         * probability array.
+         *
+         * This could be optimized so that only that part of literal
+         * probabilities that are actually required. In the common case
+         * we would write 12 KiB less.
+         */
+        probs = s->lzma.is_match[0];
+        for (i = 0; i < PROBS_TOTAL; ++i)
+                probs[i] = RC_BIT_MODEL_TOTAL / 2;
+        rc_reset(&s->rc);
+}
+/*
+ * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
+ * from the decoded lp and pb values. On success, the LZMA decoder state is
+ * reset and true is returned.
+ */
+static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
+{
+        if (props > (4 * 5 + 4) * 9 + 8)
+                return false;
+        s->lzma.pos_mask = 0;
+        while (props >= 9 * 5) {
+                props -= 9 * 5;
+                ++s->lzma.pos_mask;
+        }
+        s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
+        s->lzma.literal_pos_mask = 0;
+        while (props >= 9) {
+                props -= 9;
+                ++s->lzma.literal_pos_mask;
+        }
+        s->lzma.lc = props;
+        if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
+                return false;
+        s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
+        lzma_reset(s);
+        return true;
+}
+/*********
+ * LZMA2 *
+ *********/
+/*
+ * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
+ * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
+ * wrapper function takes care of making the LZMA decoder's assumption safe.
+ *
+ * As long as there is plenty of input left to be decoded in the current LZMA
+ * chunk, we decode directly from the caller-supplied input buffer until
+ * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
+ * s->temp.buf, which (hopefully) gets filled on the next call to this
+ * function. We decode a few bytes from the temporary buffer so that we can
+ * continue decoding from the caller-supplied input buffer again.
+ */
+static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
+{
+        size_t in_avail;
+        uint32_t tmp;
+        in_avail = b->in_size - b->in_pos;
+        if (s->temp.size > 0 || s->lzma2.compressed == 0) {
+                tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
+                if (tmp > s->lzma2.compressed - s->temp.size)
+                        tmp = s->lzma2.compressed - s->temp.size;
+                if (tmp > in_avail)
+                        tmp = in_avail;
+                memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
+                if (s->temp.size + tmp == s->lzma2.compressed) {
+                        memzero(s->temp.buf + s->temp.size + tmp,
+                                        sizeof(s->temp.buf)
+                                                - s->temp.size - tmp);
+                        s->rc.in_limit = s->temp.size + tmp;
+                } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
+                        s->temp.size += tmp;
+                        b->in_pos += tmp;
+                        return true;
+                } else {
+                        s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
+                }
+                s->rc.in = s->temp.buf;
+                s->rc.in_pos = 0;
+                if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
+                        return false;
+                s->lzma2.compressed -= s->rc.in_pos;
+                if (s->rc.in_pos < s->temp.size) {
+                        s->temp.size -= s->rc.in_pos;
+                        memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
+                                        s->temp.size);
+                        return true;
+                }
+                b->in_pos += s->rc.in_pos - s->temp.size;
+                s->temp.size = 0;
+        }
+        in_avail = b->in_size - b->in_pos;
+        if (in_avail >= LZMA_IN_REQUIRED) {
+                s->rc.in = b->in;
+                s->rc.in_pos = b->in_pos;
+                if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
+                        s->rc.in_limit = b->in_pos + s->lzma2.compressed;
+                else
+                        s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
+                if (!lzma_main(s))
+                        return false;
+                in_avail = s->rc.in_pos - b->in_pos;
+                if (in_avail > s->lzma2.compressed)
+                        return false;
+                s->lzma2.compressed -= in_avail;
+                b->in_pos = s->rc.in_pos;
+        }
+        in_avail = b->in_size - b->in_pos;
+        if (in_avail < LZMA_IN_REQUIRED) {
+                if (in_avail > s->lzma2.compressed)
+                        in_avail = s->lzma2.compressed;
+                memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
+                s->temp.size = in_avail;
+                b->in_pos += in_avail;
+        }
+        return true;
+}
+/*
+ * Take care of the LZMA2 control layer, and forward the job of actual LZMA
+ * decoding or copying of uncompressed chunks to other functions.
+ */
+XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
+                                       struct xz_buf *b)
+{
+        uint32_t tmp;
+        while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
+                switch (s->lzma2.sequence) {
+                case SEQ_CONTROL:
+                        /*
+                         * LZMA2 control byte
+                         *
+                         * Exact values:
+                         *   0x00   End marker
+                         *   0x01   Dictionary reset followed by
+                         *          an uncompressed chunk
+                         *   0x02   Uncompressed chunk (no dictionary reset)
+                         *
+                         * Highest three bits (s->control & 0xE0):
+                         *   0xE0   Dictionary reset, new properties and state
+                         *          reset, followed by LZMA compressed chunk
+                         *   0xC0   New properties and state reset, followed
+                         *          by LZMA compressed chunk (no dictionary
+                         *          reset)
+                         *   0xA0   State reset using old properties,
+                         *          followed by LZMA compressed chunk (no
+                         *          dictionary reset)
+                         *   0x80   LZMA chunk (no dictionary or state reset)
+                         *
+                         * For LZMA compressed chunks, the lowest five bits
+                         * (s->control & 1F) are the highest bits of the
+                         * uncompressed size (bits 16-20).
+                         *
+                         * A new LZMA2 stream must begin with a dictionary
+                         * reset. The first LZMA chunk must set new
+                         * properties and reset the LZMA state.
+                         *
+                         * Values that don't match anything described above
+                         * are invalid and we return XZ_DATA_ERROR.
+                         */
+                        tmp = b->in[b->in_pos++];
+                        if (tmp >= 0xE0 || tmp == 0x01) {
+                                s->lzma2.need_props = true;
+                                s->lzma2.need_dict_reset = false;
+                                dict_reset(&s->dict, b);
+                        } else if (s->lzma2.need_dict_reset) {
+                                return XZ_DATA_ERROR;
+                        }
+                        if (tmp >= 0x80) {
+                                s->lzma2.uncompressed = (tmp & 0x1F) << 16;
+                                s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
+                                if (tmp >= 0xC0) {
+                                        /*
+                                         * When there are new properties,
+                                         * state reset is done at
+                                         * SEQ_PROPERTIES.
+                                         */
+                                        s->lzma2.need_props = false;
+                                        s->lzma2.next_sequence
+                                                        = SEQ_PROPERTIES;
+                                } else if (s->lzma2.need_props) {
+                                        return XZ_DATA_ERROR;
+                                } else {
+                                        s->lzma2.next_sequence
+                                                        = SEQ_LZMA_PREPARE;
+                                        if (tmp >= 0xA0)
+                                                lzma_reset(s);
+                                }
+                        } else {
+                                if (tmp == 0x00)
+                                        return XZ_STREAM_END;
+                                if (tmp > 0x02)
+                                        return XZ_DATA_ERROR;
+                                s->lzma2.sequence = SEQ_COMPRESSED_0;
+                                s->lzma2.next_sequence = SEQ_COPY;
+                        }
+                        break;
+                case SEQ_UNCOMPRESSED_1:
+                        s->lzma2.uncompressed
+                                        += (uint32_t)b->in[b->in_pos++] << 8;
+                        s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
+                        break;
+                case SEQ_UNCOMPRESSED_2:
+                        s->lzma2.uncompressed
+                                        += (uint32_t)b->in[b->in_pos++] + 1;
+                        s->lzma2.sequence = SEQ_COMPRESSED_0;
+                        break;
+                case SEQ_COMPRESSED_0:
+                        s->lzma2.compressed
+                                        = (uint32_t)b->in[b->in_pos++] << 8;
+                        s->lzma2.sequence = SEQ_COMPRESSED_1;
+                        break;
+                case SEQ_COMPRESSED_1:
+                        s->lzma2.compressed
+                                        += (uint32_t)b->in[b->in_pos++] + 1;
+                        s->lzma2.sequence = s->lzma2.next_sequence;
+                        break;
+                case SEQ_PROPERTIES:
+                        if (!lzma_props(s, b->in[b->in_pos++]))
+                                return XZ_DATA_ERROR;
+                        s->lzma2.sequence = SEQ_LZMA_PREPARE;
+                case SEQ_LZMA_PREPARE:
+                        if (s->lzma2.compressed < RC_INIT_BYTES)
+                                return XZ_DATA_ERROR;
+                        if (!rc_read_init(&s->rc, b))
+                                return XZ_OK;
+                        s->lzma2.compressed -= RC_INIT_BYTES;
+                        s->lzma2.sequence = SEQ_LZMA_RUN;
+                case SEQ_LZMA_RUN:
+                        /*
+                         * Set dictionary limit to indicate how much we want
+                         * to be encoded at maximum. Decode new data into the
+                         * dictionary. Flush the new data from dictionary to
+                         * b->out. Check if we finished decoding this chunk.
+                         * In case the dictionary got full but we didn't fill
+                         * the output buffer yet, we may run this loop
+                         * multiple times without changing s->lzma2.sequence.
+                         */
+                        dict_limit(&s->dict, min_t(size_t,
+                                        b->out_size - b->out_pos,
+                                        s->lzma2.uncompressed));
+                        if (!lzma2_lzma(s, b))
+                                return XZ_DATA_ERROR;
+                        s->lzma2.uncompressed -= dict_flush(&s->dict, b);
+                        if (s->lzma2.uncompressed == 0) {
+                                if (s->lzma2.compressed > 0 || s->lzma.len > 0
+                                                || !rc_is_finished(&s->rc))
+                                        return XZ_DATA_ERROR;
+                                rc_reset(&s->rc);
+                                s->lzma2.sequence = SEQ_CONTROL;
+                        } else if (b->out_pos == b->out_size
+                                        || (b->in_pos == b->in_size
+                                                && s->temp.size
+                                                < s->lzma2.compressed)) {
+                                return XZ_OK;
+                        }
+                        break;
+                case SEQ_COPY:
+                        dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
+                        if (s->lzma2.compressed > 0)
+                                return XZ_OK;
+                        s->lzma2.sequence = SEQ_CONTROL;
+                        break;
+                }
+        }
+        return XZ_OK;
+}
+XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
+                                                   uint32_t dict_max)
+{
+        struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
+        if (s == NULL)
+                return NULL;
+        s->dict.mode = mode;
+        s->dict.size_max = dict_max;
+        if (DEC_IS_PREALLOC(mode)) {
+                s->dict.buf = vmalloc(dict_max);
+                if (s->dict.buf == NULL) {
+                        kfree(s);
+                        return NULL;
+                }
+        } else if (DEC_IS_DYNALLOC(mode)) {
+                s->dict.buf = NULL;
+                s->dict.allocated = 0;
+        }
+        return s;
+}
+XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
+{
+        /* This limits dictionary size to 3 GiB to keep parsing simpler. */
+        if (props > 39)
+                return XZ_OPTIONS_ERROR;
+        s->dict.size = 2 + (props & 1);
+        s->dict.size <<= (props >> 1) + 11;
+        if (DEC_IS_MULTI(s->dict.mode)) {
+                if (s->dict.size > s->dict.size_max)
+                        return XZ_MEMLIMIT_ERROR;
+                s->dict.end = s->dict.size;
+                if (DEC_IS_DYNALLOC(s->dict.mode)) {
+                        if (s->dict.allocated < s->dict.size) {
+                                vfree(s->dict.buf);
+                                s->dict.buf = vmalloc(s->dict.size);
+                                if (s->dict.buf == NULL) {
+                                        s->dict.allocated = 0;
+                                        return XZ_MEM_ERROR;
+                                }
+                        }
+                }
+        }
+        s->lzma.len = 0;
+        s->lzma2.sequence = SEQ_CONTROL;
+        s->lzma2.need_dict_reset = true;
+        s->temp.size = 0;
+        return XZ_OK;
+}
+XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
+{
+        if (DEC_IS_MULTI(s->dict.mode))
+                vfree(s->dict.buf);
+        kfree(s);
+}
diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c
new file mode 100644
index 00000000000..ac809b1e64f
--- /dev/null
+++ b/lib/xz/xz_dec_stream.c
@@ -0,0 +1,821 @@
+/*
+ * .xz Stream decoder
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+#include "xz_private.h"
+#include "xz_stream.h"
+/* Hash used to validate the Index field */
+struct xz_dec_hash {
+        vli_type unpadded;
+        vli_type uncompressed;
+        uint32_t crc32;
+};
+struct xz_dec {
+        /* Position in dec_main() */
+        enum {
+                SEQ_STREAM_HEADER,
+                SEQ_BLOCK_START,
+                SEQ_BLOCK_HEADER,
+                SEQ_BLOCK_UNCOMPRESS,
+                SEQ_BLOCK_PADDING,
+                SEQ_BLOCK_CHECK,
+                SEQ_INDEX,
+                SEQ_INDEX_PADDING,
+                SEQ_INDEX_CRC32,
+                SEQ_STREAM_FOOTER
+        } sequence;
+        /* Position in variable-length integers and Check fields */
+        uint32_t pos;
+        /* Variable-length integer decoded by dec_vli() */
+        vli_type vli;
+        /* Saved in_pos and out_pos */
+        size_t in_start;
+        size_t out_start;
+        /* CRC32 value in Block or Index */
+        uint32_t crc32;
+        /* Type of the integrity check calculated from uncompressed data */
+        enum xz_check check_type;
+        /* Operation mode */
+        enum xz_mode mode;
+        /*
+         * True if the next call to xz_dec_run() is allowed to return
+         * XZ_BUF_ERROR.
+         */
+        bool allow_buf_error;
+        /* Information stored in Block Header */
+        struct {
+                /*
+                 * Value stored in the Compressed Size field, or
+                 * VLI_UNKNOWN if Compressed Size is not present.
+                 */
+                vli_type compressed;
+                /*
+                 * Value stored in the Uncompressed Size field, or
+                 * VLI_UNKNOWN if Uncompressed Size is not present.
+                 */
+                vli_type uncompressed;
+                /* Size of the Block Header field */
+                uint32_t size;
+        } block_header;
+        /* Information collected when decoding Blocks */
+        struct {
+                /* Observed compressed size of the current Block */
+                vli_type compressed;
+                /* Observed uncompressed size of the current Block */
+                vli_type uncompressed;
+                /* Number of Blocks decoded so far */
+                vli_type count;
+                /*
+                 * Hash calculated from the Block sizes. This is used to
+                 * validate the Index field.
+                 */
+                struct xz_dec_hash hash;
+        } block;
+        /* Variables needed when verifying the Index field */
+        struct {
+                /* Position in dec_index() */
+                enum {
+                        SEQ_INDEX_COUNT,
+                        SEQ_INDEX_UNPADDED,
+                        SEQ_INDEX_UNCOMPRESSED
+                } sequence;
+                /* Size of the Index in bytes */
+                vli_type size;
+                /* Number of Records (matches block.count in valid files) */
+                vli_type count;
+                /*
+                 * Hash calculated from the Records (matches block.hash in
+                 * valid files).
+                 */
+                struct xz_dec_hash hash;
+        } index;
+        /*
+         * Temporary buffer needed to hold Stream Header, Block Header,
+         * and Stream Footer. The Block Header is the biggest (1 KiB)
+         * so we reserve space according to that. buf[] has to be aligned
+         * to a multiple of four bytes; the size_t variables before it
+         * should guarantee this.
+         */
+        struct {
+                size_t pos;
+                size_t size;
+                uint8_t buf[1024];
+        } temp;
+        struct xz_dec_lzma2 *lzma2;
+#ifdef XZ_DEC_BCJ
+        struct xz_dec_bcj *bcj;
+        bool bcj_active;
+#endif
+};
+#ifdef XZ_DEC_ANY_CHECK
+/* Sizes of the Check field with different Check IDs */
+static const uint8_t check_sizes[16] = {
+        0,
+        4, 4, 4,
+        8, 8, 8,
+        16, 16, 16,
+        32, 32, 32,
+        64, 64, 64
+};
+#endif
+/*
+ * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
+ * must have set s->temp.pos to indicate how much data we are supposed
+ * to copy into s->temp.buf. Return true once s->temp.pos has reached
+ * s->temp.size.
+ */
+static bool fill_temp(struct xz_dec *s, struct xz_buf *b)
+{
+        size_t copy_size = min_t(size_t,
+                        b->in_size - b->in_pos, s->temp.size - s->temp.pos);
+        memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
+        b->in_pos += copy_size;
+        s->temp.pos += copy_size;
+        if (s->temp.pos == s->temp.size) {
+                s->temp.pos = 0;
+                return true;
+        }
+        return false;
+}
+/* Decode a variable-length integer (little-endian base-128 encoding) */
+static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
+                           size_t *in_pos, size_t in_size)
+{
+        uint8_t byte;
+        if (s->pos == 0)
+                s->vli = 0;
+        while (*in_pos < in_size) {
+                byte = in[*in_pos];
+                ++*in_pos;
+                s->vli |= (vli_type)(byte & 0x7F) << s->pos;
+                if ((byte & 0x80) == 0) {
+                        /* Don't allow non-minimal encodings. */
+                        if (byte == 0 && s->pos != 0)
+                                return XZ_DATA_ERROR;
+                        s->pos = 0;
+                        return XZ_STREAM_END;
+                }
+                s->pos += 7;
+                if (s->pos == 7 * VLI_BYTES_MAX)
+                        return XZ_DATA_ERROR;
+        }
+        return XZ_OK;
+}
+/*
+ * Decode the Compressed Data field from a Block. Update and validate
+ * the observed compressed and uncompressed sizes of the Block so that
+ * they don't exceed the values possibly stored in the Block Header
+ * (validation assumes that no integer overflow occurs, since vli_type
+ * is normally uint64_t). Update the CRC32 if presence of the CRC32
+ * field was indicated in Stream Header.
+ *
+ * Once the decoding is finished, validate that the observed sizes match
+ * the sizes possibly stored in the Block Header. Update the hash and
+ * Block count, which are later used to validate the Index field.
+ */
+static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
+{
+        enum xz_ret ret;
+        s->in_start = b->in_pos;
+        s->out_start = b->out_pos;
+#ifdef XZ_DEC_BCJ
+        if (s->bcj_active)
+                ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
+        else
+#endif
+                ret = xz_dec_lzma2_run(s->lzma2, b);
+        s->block.compressed += b->in_pos - s->in_start;
+        s->block.uncompressed += b->out_pos - s->out_start;
+        /*
+         * There is no need to separately check for VLI_UNKNOWN, since
+         * the observed sizes are always smaller than VLI_UNKNOWN.
+         */
+        if (s->block.compressed > s->block_header.compressed
+                        || s->block.uncompressed
+                                > s->block_header.uncompressed)
+                return XZ_DATA_ERROR;
+        if (s->check_type == XZ_CHECK_CRC32)
+                s->crc32 = xz_crc32(b->out + s->out_start,
+                                b->out_pos - s->out_start, s->crc32);
+        if (ret == XZ_STREAM_END) {
+                if (s->block_header.compressed != VLI_UNKNOWN
+                                && s->block_header.compressed
+                                        != s->block.compressed)
+                        return XZ_DATA_ERROR;
+                if (s->block_header.uncompressed != VLI_UNKNOWN
+                                && s->block_header.uncompressed
+                                        != s->block.uncompressed)
+                        return XZ_DATA_ERROR;
+                s->block.hash.unpadded += s->block_header.size
+                                + s->block.compressed;
+#ifdef XZ_DEC_ANY_CHECK
+                s->block.hash.unpadded += check_sizes[s->check_type];
+#else
+                if (s->check_type == XZ_CHECK_CRC32)
+                        s->block.hash.unpadded += 4;
+#endif
+                s->block.hash.uncompressed += s->block.uncompressed;
+                s->block.hash.crc32 = xz_crc32(
+                                (const uint8_t *)&s->block.hash,
+                                sizeof(s->block.hash), s->block.hash.crc32);
+                ++s->block.count;
+        }
+        return ret;
+}
+/* Update the Index size and the CRC32 value. */
+static void index_update(struct xz_dec *s, const struct xz_buf *b)
+{
+        size_t in_used = b->in_pos - s->in_start;
+        s->index.size += in_used;
+        s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
+}
+/*
+ * Decode the Number of Records, Unpadded Size, and Uncompressed Size
+ * fields from the Index field. That is, Index Padding and CRC32 are not
+ * decoded by this function.
+ *
+ * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
+ * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
+ */
+static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
+{
+        enum xz_ret ret;
+        do {
+                ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
+                if (ret != XZ_STREAM_END) {
+                        index_update(s, b);
+                        return ret;
+                }
+                switch (s->index.sequence) {
+                case SEQ_INDEX_COUNT:
+                        s->index.count = s->vli;
+                        /*
+                         * Validate that the Number of Records field
+                         * indicates the same number of Records as
+                         * there were Blocks in the Stream.
+                         */
+                        if (s->index.count != s->block.count)
+                                return XZ_DATA_ERROR;
+                        s->index.sequence = SEQ_INDEX_UNPADDED;
+                        break;
+                case SEQ_INDEX_UNPADDED:
+                        s->index.hash.unpadded += s->vli;
+                        s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
+                        break;
+                case SEQ_INDEX_UNCOMPRESSED:
+                        s->index.hash.uncompressed += s->vli;
+                        s->index.hash.crc32 = xz_crc32(
+                                        (const uint8_t *)&s->index.hash,
+                                        sizeof(s->index.hash),
+                                        s->index.hash.crc32);
+                        --s->index.count;
+                        s->index.sequence = SEQ_INDEX_UNPADDED;
+                        break;
+                }
+        } while (s->index.count > 0);
+        return XZ_STREAM_END;
+}
+/*
+ * Validate that the next four input bytes match the value of s->crc32.
+ * s->pos must be zero when starting to validate the first byte.
+ */
+static enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b)
+{
+        do {
+                if (b->in_pos == b->in_size)
+                        return XZ_OK;
+                if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
+                        return XZ_DATA_ERROR;
+                s->pos += 8;
+        } while (s->pos < 32);
+        s->crc32 = 0;
+        s->pos = 0;
+        return XZ_STREAM_END;
+}
+#ifdef XZ_DEC_ANY_CHECK
+/*
+ * Skip over the Check field when the Check ID is not supported.
+ * Returns true once the whole Check field has been skipped over.
+ */
+static bool check_skip(struct xz_dec *s, struct xz_buf *b)
+{
+        while (s->pos < check_sizes[s->check_type]) {
+                if (b->in_pos == b->in_size)
+                        return false;
+                ++b->in_pos;
+                ++s->pos;
+        }
+        s->pos = 0;
+        return true;
+}
+#endif
+/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
+static enum xz_ret dec_stream_header(struct xz_dec *s)
+{
+        if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
+                return XZ_FORMAT_ERROR;
+        if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
+                        != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
+                return XZ_DATA_ERROR;
+        if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
+                return XZ_OPTIONS_ERROR;
+        /*
+         * Of integrity checks, we support only none (Check ID = 0) and
+         * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
+         * we will accept other check types too, but then the check won't
+         * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
+         */
+        s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
+#ifdef XZ_DEC_ANY_CHECK
+        if (s->check_type > XZ_CHECK_MAX)
+                return XZ_OPTIONS_ERROR;
+        if (s->check_type > XZ_CHECK_CRC32)
+                return XZ_UNSUPPORTED_CHECK;
+#else
+        if (s->check_type > XZ_CHECK_CRC32)
+                return XZ_OPTIONS_ERROR;
+#endif
+        return XZ_OK;
+}
+/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
+static enum xz_ret dec_stream_footer(struct xz_dec *s)
+{
+        if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
+                return XZ_DATA_ERROR;
+        if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
+                return XZ_DATA_ERROR;
+        /*
+         * Validate Backward Size. Note that we never added the size of the
+         * Index CRC32 field to s->index.size, thus we use s->index.size / 4
+         * instead of s->index.size / 4 - 1.
+         */
+        if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
+                return XZ_DATA_ERROR;
+        if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
+                return XZ_DATA_ERROR;
+        /*
+         * Use XZ_STREAM_END instead of XZ_OK to be more convenient
+         * for the caller.
+         */
+        return XZ_STREAM_END;
+}
+/* Decode the Block Header and initialize the filter chain. */
+static enum xz_ret dec_block_header(struct xz_dec *s)
+{
+        enum xz_ret ret;
+        /*
+         * Validate the CRC32. We know that the temp buffer is at least
+         * eight bytes so this is safe.
+         */
+        s->temp.size -= 4;
+        if (xz_crc32(s->temp.buf, s->temp.size, 0)
+                        != get_le32(s->temp.buf + s->temp.size))
+                return XZ_DATA_ERROR;
+        s->temp.pos = 2;
+        /*
+         * Catch unsupported Block Flags. We support only one or two filters
+         * in the chain, so we catch that with the same test.
+         */
+#ifdef XZ_DEC_BCJ
+        if (s->temp.buf[1] & 0x3E)
+#else
+        if (s->temp.buf[1] & 0x3F)
+#endif
+                return XZ_OPTIONS_ERROR;
+        /* Compressed Size */
+        if (s->temp.buf[1] & 0x40) {
+                if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
+                                        != XZ_STREAM_END)
+                        return XZ_DATA_ERROR;
+                s->block_header.compressed = s->vli;
+        } else {
+                s->block_header.compressed = VLI_UNKNOWN;
+        }
+        /* Uncompressed Size */
+        if (s->temp.buf[1] & 0x80) {
+                if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
+                                != XZ_STREAM_END)
+                        return XZ_DATA_ERROR;
+                s->block_header.uncompressed = s->vli;
+        } else {
+                s->block_header.uncompressed = VLI_UNKNOWN;
+        }
+#ifdef XZ_DEC_BCJ
+        /* If there are two filters, the first one must be a BCJ filter. */
+        s->bcj_active = s->temp.buf[1] & 0x01;
+        if (s->bcj_active) {
+                if (s->temp.size - s->temp.pos < 2)
+                        return XZ_OPTIONS_ERROR;
+                ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
+                if (ret != XZ_OK)
+                        return ret;
+                /*
+                 * We don't support custom start offset,
+                 * so Size of Properties must be zero.
+                 */
+                if (s->temp.buf[s->temp.pos++] != 0x00)
+                        return XZ_OPTIONS_ERROR;
+        }
+#endif
+        /* Valid Filter Flags always take at least two bytes. */
+        if (s->temp.size - s->temp.pos < 2)
+                return XZ_DATA_ERROR;
+        /* Filter ID = LZMA2 */
+        if (s->temp.buf[s->temp.pos++] != 0x21)
+                return XZ_OPTIONS_ERROR;
+        /* Size of Properties = 1-byte Filter Properties */
+        if (s->temp.buf[s->temp.pos++] != 0x01)
+                return XZ_OPTIONS_ERROR;
+        /* Filter Properties contains LZMA2 dictionary size. */
+        if (s->temp.size - s->temp.pos < 1)
+                return XZ_DATA_ERROR;
+        ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
+        if (ret != XZ_OK)
+                return ret;
+        /* The rest must be Header Padding. */
+        while (s->temp.pos < s->temp.size)
+                if (s->temp.buf[s->temp.pos++] != 0x00)
+                        return XZ_OPTIONS_ERROR;
+        s->temp.pos = 0;
+        s->block.compressed = 0;
+        s->block.uncompressed = 0;
+        return XZ_OK;
+}
+static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
+{
+        enum xz_ret ret;
+        /*
+         * Store the start position for the case when we are in the middle
+         * of the Index field.
+         */
+        s->in_start = b->in_pos;
+        while (true) {
+                switch (s->sequence) {
+                case SEQ_STREAM_HEADER:
+                        /*
+                         * Stream Header is copied to s->temp, and then
+                         * decoded from there. This way if the caller
+                         * gives us only little input at a time, we can
+                         * still keep the Stream Header decoding code
+                         * simple. Similar approach is used in many places
+                         * in this file.
+                         */
+                        if (!fill_temp(s, b))
+                                return XZ_OK;
+                        /*
+                         * If dec_stream_header() returns
+                         * XZ_UNSUPPORTED_CHECK, it is still possible
+                         * to continue decoding if working in multi-call
+                         * mode. Thus, update s->sequence before calling
+                         * dec_stream_header().
+                         */
+                        s->sequence = SEQ_BLOCK_START;
+                        ret = dec_stream_header(s);
+                        if (ret != XZ_OK)
+                                return ret;
+                case SEQ_BLOCK_START:
+                        /* We need one byte of input to continue. */
+                        if (b->in_pos == b->in_size)
+                                return XZ_OK;
+                        /* See if this is the beginning of the Index field. */
+                        if (b->in[b->in_pos] == 0) {
+                                s->in_start = b->in_pos++;
+                                s->sequence = SEQ_INDEX;
+                                break;
+                        }
+                        /*
+                         * Calculate the size of the Block Header and
+                         * prepare to decode it.
+                         */
+                        s->block_header.size
+                                = ((uint32_t)b->in[b->in_pos] + 1) * 4;
+                        s->temp.size = s->block_header.size;
+                        s->temp.pos = 0;
+                        s->sequence = SEQ_BLOCK_HEADER;
+                case SEQ_BLOCK_HEADER:
+                        if (!fill_temp(s, b))
+                                return XZ_OK;
+                        ret = dec_block_header(s);
+                        if (ret != XZ_OK)
+                                return ret;
+                        s->sequence = SEQ_BLOCK_UNCOMPRESS;
+                case SEQ_BLOCK_UNCOMPRESS:
+                        ret = dec_block(s, b);
+                        if (ret != XZ_STREAM_END)
+                                return ret;
+                        s->sequence = SEQ_BLOCK_PADDING;
+                case SEQ_BLOCK_PADDING:
+                        /*
+                         * Size of Compressed Data + Block Padding
+                         * must be a multiple of four. We don't need
+                         * s->block.compressed for anything else
+                         * anymore, so we use it here to test the size
+                         * of the Block Padding field.
+                         */
+                        while (s->block.compressed & 3) {
+                                if (b->in_pos == b->in_size)
+                                        return XZ_OK;
+                                if (b->in[b->in_pos++] != 0)
+                                        return XZ_DATA_ERROR;
+                                ++s->block.compressed;
+                        }
+                        s->sequence = SEQ_BLOCK_CHECK;
+                case SEQ_BLOCK_CHECK:
+                        if (s->check_type == XZ_CHECK_CRC32) {
+                                ret = crc32_validate(s, b);
+                                if (ret != XZ_STREAM_END)
+                                        return ret;
+                        }
+#ifdef XZ_DEC_ANY_CHECK
+                        else if (!check_skip(s, b)) {
+                                return XZ_OK;
+                        }
+#endif
+                        s->sequence = SEQ_BLOCK_START;
+                        break;
+                case SEQ_INDEX:
+                        ret = dec_index(s, b);
+                        if (ret != XZ_STREAM_END)
+                                return ret;
+                        s->sequence = SEQ_INDEX_PADDING;
+                case SEQ_INDEX_PADDING:
+                        while ((s->index.size + (b->in_pos - s->in_start))
+                                        & 3) {
+                                if (b->in_pos == b->in_size) {
+                                        index_update(s, b);
+                                        return XZ_OK;
+                                }
+                                if (b->in[b->in_pos++] != 0)
+                                        return XZ_DATA_ERROR;
+                        }
+                        /* Finish the CRC32 value and Index size. */
+                        index_update(s, b);
+                        /* Compare the hashes to validate the Index field. */
+                        if (!memeq(&s->block.hash, &s->index.hash,
+                                        sizeof(s->block.hash)))
+                                return XZ_DATA_ERROR;
+                        s->sequence = SEQ_INDEX_CRC32;
+                case SEQ_INDEX_CRC32:
+                        ret = crc32_validate(s, b);
+                        if (ret != XZ_STREAM_END)
+                                return ret;
+                        s->temp.size = STREAM_HEADER_SIZE;
+                        s->sequence = SEQ_STREAM_FOOTER;
+                case SEQ_STREAM_FOOTER:
+                        if (!fill_temp(s, b))
+                                return XZ_OK;
+                        return dec_stream_footer(s);
+                }
+        }
+        /* Never reached */
+}
+/*
+ * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
+ * multi-call and single-call decoding.
+ *
+ * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
+ * are not going to make any progress anymore. This is to prevent the caller
+ * from calling us infinitely when the input file is truncated or otherwise
+ * corrupt. Since zlib-style API allows that the caller fills the input buffer
+ * only when the decoder doesn't produce any new output, we have to be careful
+ * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
+ * after the second consecutive call to xz_dec_run() that makes no progress.
+ *
+ * In single-call mode, if we couldn't decode everything and no error
+ * occurred, either the input is truncated or the output buffer is too small.
+ * Since we know that the last input byte never produces any output, we know
+ * that if all the input was consumed and decoding wasn't finished, the file
+ * must be corrupt. Otherwise the output buffer has to be too small or the
+ * file is corrupt in a way that decoding it produces too big output.
+ *
+ * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
+ * their original values. This is because with some filter chains there won't
+ * be any valid uncompressed data in the output buffer unless the decoding
+ * actually succeeds (that's the price to pay of using the output buffer as
+ * the workspace).
+ */
+XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
+{
+        size_t in_start;
+        size_t out_start;
+        enum xz_ret ret;
+        if (DEC_IS_SINGLE(s->mode))
+                xz_dec_reset(s);
+        in_start = b->in_pos;
+        out_start = b->out_pos;
+        ret = dec_main(s, b);
+        if (DEC_IS_SINGLE(s->mode)) {
+                if (ret == XZ_OK)
+                        ret = b->in_pos == b->in_size
+                                        ? XZ_DATA_ERROR : XZ_BUF_ERROR;
+                if (ret != XZ_STREAM_END) {
+                        b->in_pos = in_start;
+                        b->out_pos = out_start;
+                }
+        } else if (ret == XZ_OK && in_start == b->in_pos
+                        && out_start == b->out_pos) {
+                if (s->allow_buf_error)
+                        ret = XZ_BUF_ERROR;
+                s->allow_buf_error = true;
+        } else {
+                s->allow_buf_error = false;
+        }
+        return ret;
+}
+XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
+{
+        struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
+        if (s == NULL)
+                return NULL;
+        s->mode = mode;
+#ifdef XZ_DEC_BCJ
+        s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
+        if (s->bcj == NULL)
+                goto error_bcj;
+#endif
+        s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
+        if (s->lzma2 == NULL)
+                goto error_lzma2;
+        xz_dec_reset(s);
+        return s;
+error_lzma2:
+#ifdef XZ_DEC_BCJ
+        xz_dec_bcj_end(s->bcj);
+error_bcj:
+#endif
+        kfree(s);
+        return NULL;
+}
+XZ_EXTERN void xz_dec_reset(struct xz_dec *s)
+{
+        s->sequence = SEQ_STREAM_HEADER;
+        s->allow_buf_error = false;
+        s->pos = 0;
+        s->crc32 = 0;
+        memzero(&s->block, sizeof(s->block));
+        memzero(&s->index, sizeof(s->index));
+        s->temp.pos = 0;
+        s->temp.size = STREAM_HEADER_SIZE;
+}
+XZ_EXTERN void xz_dec_end(struct xz_dec *s)
+{
+        if (s != NULL) {
+                xz_dec_lzma2_end(s->lzma2);
+#ifdef XZ_DEC_BCJ
+                xz_dec_bcj_end(s->bcj);
+#endif
+                kfree(s);
+        }
+}
diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c
new file mode 100644
index 00000000000..32eb3c03aed
--- /dev/null
+++ b/lib/xz/xz_dec_syms.c
@@ -0,0 +1,26 @@
+/*
+ * XZ decoder module information
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+#include <linux/module.h>
+#include <linux/xz.h>
+EXPORT_SYMBOL(xz_dec_init);
+EXPORT_SYMBOL(xz_dec_reset);
+EXPORT_SYMBOL(xz_dec_run);
+EXPORT_SYMBOL(xz_dec_end);
+MODULE_DESCRIPTION("XZ decompressor");
+MODULE_VERSION("1.0");
+MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov");
+/*
+ * This code is in the public domain, but in Linux it's simplest to just
+ * say it's GPL and consider the authors as the copyright holders.
+ */
+MODULE_LICENSE("GPL");
diff --git a/lib/xz/xz_dec_test.c b/lib/xz/xz_dec_test.c
new file mode 100644
index 00000000000..da28a19d6c9
--- /dev/null
+++ b/lib/xz/xz_dec_test.c
@@ -0,0 +1,220 @@
+/*
+ * XZ decoder tester
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/crc32.h>
+#include <linux/xz.h>
+/* Maximum supported dictionary size */
+#define DICT_MAX (1 << 20)
+/* Device name to pass to register_chrdev(). */
+#define DEVICE_NAME "xz_dec_test"
+/* Dynamically allocated device major number */
+static int device_major;
+/*
+ * We reuse the same decoder state, and thus can decode only one
+ * file at a time.
+ */
+static bool device_is_open;
+/* XZ decoder state */
+static struct xz_dec *state;
+/*
+ * Return value of xz_dec_run(). We need to avoid calling xz_dec_run() after
+ * it has returned XZ_STREAM_END, so we make this static.
+ */
+static enum xz_ret ret;
+/*
+ * Input and output buffers. The input buffer is used as a temporary safe
+ * place for the data coming from the userspace.
+ */
+static uint8_t buffer_in[1024];
+static uint8_t buffer_out[1024];
+/*
+ * Structure to pass the input and output buffers to the XZ decoder.
+ * A few of the fields are never modified so we initialize them here.
+ */
+static struct xz_buf buffers = {
+        .in = buffer_in,
+        .out = buffer_out,
+        .out_size = sizeof(buffer_out)
+};
+/*
+ * CRC32 of uncompressed data. This is used to give the user a simple way
+ * to check that the decoder produces correct output.
+ */
+static uint32_t crc;
+static int xz_dec_test_open(struct inode *i, struct file *f)
+{
+        if (device_is_open)
+                return -EBUSY;
+        device_is_open = true;
+        xz_dec_reset(state);
+        ret = XZ_OK;
+        crc = 0xFFFFFFFF;
+        buffers.in_pos = 0;
+        buffers.in_size = 0;
+        buffers.out_pos = 0;
+        printk(KERN_INFO DEVICE_NAME ": opened\n");
+        return 0;
+}
+static int xz_dec_test_release(struct inode *i, struct file *f)
+{
+        device_is_open = false;
+        if (ret == XZ_OK)
+                printk(KERN_INFO DEVICE_NAME ": input was truncated\n");
+        printk(KERN_INFO DEVICE_NAME ": closed\n");
+        return 0;
+}
+/*
+ * Decode the data given to us from the userspace. CRC32 of the uncompressed
+ * data is calculated and is printed at the end of successful decoding. The
+ * uncompressed data isn't stored anywhere for further use.
+ *
+ * The .xz file must have exactly one Stream and no Stream Padding. The data
+ * after the first Stream is considered to be garbage.
+ */
+static ssize_t xz_dec_test_write(struct file *file, const char __user *buf,
+                                 size_t size, loff_t *pos)
+{
+        size_t remaining;
+        if (ret != XZ_OK) {
+                if (size > 0)
+                        printk(KERN_INFO DEVICE_NAME ": %zu bytes of "
+                                        "garbage at the end of the file\n",
+                                        size);
+                return -ENOSPC;
+        }
+        printk(KERN_INFO DEVICE_NAME ": decoding %zu bytes of input\n",
+                        size);
+        remaining = size;
+        while ((remaining > 0 || buffers.out_pos == buffers.out_size)
+                        && ret == XZ_OK) {
+                if (buffers.in_pos == buffers.in_size) {
+                        buffers.in_pos = 0;
+                        buffers.in_size = min(remaining, sizeof(buffer_in));
+                        if (copy_from_user(buffer_in, buf, buffers.in_size))
+                                return -EFAULT;
+                        buf += buffers.in_size;
+                        remaining -= buffers.in_size;
+                }
+                buffers.out_pos = 0;
+                ret = xz_dec_run(state, &buffers);
+                crc = crc32(crc, buffer_out, buffers.out_pos);
+        }
+        switch (ret) {
+        case XZ_OK:
+                printk(KERN_INFO DEVICE_NAME ": XZ_OK\n");
+                return size;
+        case XZ_STREAM_END:
+                printk(KERN_INFO DEVICE_NAME ": XZ_STREAM_END, "
+                                "CRC32 = 0x%08X\n", ~crc);
+                return size - remaining - (buffers.in_size - buffers.in_pos);
+        case XZ_MEMLIMIT_ERROR:
+                printk(KERN_INFO DEVICE_NAME ": XZ_MEMLIMIT_ERROR\n");
+                break;
+        case XZ_FORMAT_ERROR:
+                printk(KERN_INFO DEVICE_NAME ": XZ_FORMAT_ERROR\n");
+                break;
+        case XZ_OPTIONS_ERROR:
+                printk(KERN_INFO DEVICE_NAME ": XZ_OPTIONS_ERROR\n");
+                break;
+        case XZ_DATA_ERROR:
+                printk(KERN_INFO DEVICE_NAME ": XZ_DATA_ERROR\n");
+                break;
+        case XZ_BUF_ERROR:
+                printk(KERN_INFO DEVICE_NAME ": XZ_BUF_ERROR\n");
+                break;
+        default:
+                printk(KERN_INFO DEVICE_NAME ": Bug detected!\n");
+                break;
+        }
+        return -EIO;
+}
+/* Allocate the XZ decoder state and register the character device. */
+static int __init xz_dec_test_init(void)
+{
+        static const struct file_operations fileops = {
+                .owner = THIS_MODULE,
+                .open = &xz_dec_test_open,
+                .release = &xz_dec_test_release,
+                .write = &xz_dec_test_write
+        };
+        state = xz_dec_init(XZ_PREALLOC, DICT_MAX);
+        if (state == NULL)
+                return -ENOMEM;
+        device_major = register_chrdev(0, DEVICE_NAME, &fileops);
+        if (device_major < 0) {
+                xz_dec_end(state);
+                return device_major;
+        }
+        printk(KERN_INFO DEVICE_NAME ": module loaded\n");
+        printk(KERN_INFO DEVICE_NAME ": Create a device node with "
+                        "'mknod " DEVICE_NAME " c %d 0' and write .xz files "
+                        "to it.\n", device_major);
+        return 0;
+}
+static void __exit xz_dec_test_exit(void)
+{
+        unregister_chrdev(device_major, DEVICE_NAME);
+        xz_dec_end(state);
+        printk(KERN_INFO DEVICE_NAME ": module unloaded\n");
+}
+module_init(xz_dec_test_init);
+module_exit(xz_dec_test_exit);
+MODULE_DESCRIPTION("XZ decompressor tester");
+MODULE_VERSION("1.0");
+MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org>");
+/*
+ * This code is in the public domain, but in Linux it's simplest to just
+ * say it's GPL and consider the authors as the copyright holders.
+ */
+MODULE_LICENSE("GPL");
diff --git a/lib/xz/xz_lzma2.h b/lib/xz/xz_lzma2.h
new file mode 100644
index 00000000000..071d67bee9f
--- /dev/null
+++ b/lib/xz/xz_lzma2.h
@@ -0,0 +1,204 @@
+/*
+ * LZMA2 definitions
+ *
+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
+ *          Igor Pavlov <http://7-zip.org/>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+#ifndef XZ_LZMA2_H
+#define XZ_LZMA2_H
+/* Range coder constants */
+#define RC_SHIFT_BITS 8
+#define RC_TOP_BITS 24
+#define RC_TOP_VALUE (1 << RC_TOP_BITS)
+#define RC_BIT_MODEL_TOTAL_BITS 11
+#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
+#define RC_MOVE_BITS 5
+/*
+ * Maximum number of position states. A position state is the lowest pb
+ * number of bits of the current uncompressed offset. In some places there
+ * are different sets of probabilities for different position states.
+ */
+#define POS_STATES_MAX (1 << 4)
+/*
+ * This enum is used to track which LZMA symbols have occurred most recently
+ * and in which order. This information is used to predict the next symbol.
+ *
+ * Symbols:
+ *  - Literal: One 8-bit byte
+ *  - Match: Repeat a chunk of data at some distance
+ *  - Long repeat: Multi-byte match at a recently seen distance
+ *  - Short repeat: One-byte repeat at a recently seen distance
+ *
+ * The symbol names are in from STATE_oldest_older_previous. REP means
+ * either short or long repeated match, and NONLIT means any non-literal.
+ */
+enum lzma_state {
+        STATE_LIT_LIT,
+        STATE_MATCH_LIT_LIT,
+        STATE_REP_LIT_LIT,
+        STATE_SHORTREP_LIT_LIT,
+        STATE_MATCH_LIT,
+        STATE_REP_LIT,
+        STATE_SHORTREP_LIT,
+        STATE_LIT_MATCH,
+        STATE_LIT_LONGREP,
+        STATE_LIT_SHORTREP,
+        STATE_NONLIT_MATCH,
+        STATE_NONLIT_REP
+};
+/* Total number of states */
+#define STATES 12
+/* The lowest 7 states indicate that the previous state was a literal. */
+#define LIT_STATES 7
+/* Indicate that the latest symbol was a literal. */
+static inline void lzma_state_literal(enum lzma_state *state)
+{
+        if (*state <= STATE_SHORTREP_LIT_LIT)
+                *state = STATE_LIT_LIT;
+        else if (*state <= STATE_LIT_SHORTREP)
+                *state -= 3;
+        else
+                *state -= 6;
+}
+/* Indicate that the latest symbol was a match. */
+static inline void lzma_state_match(enum lzma_state *state)
+{
+        *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
+}
+/* Indicate that the latest state was a long repeated match. */
+static inline void lzma_state_long_rep(enum lzma_state *state)
+{
+        *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
+}
+/* Indicate that the latest symbol was a short match. */
+static inline void lzma_state_short_rep(enum lzma_state *state)
+{
+        *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
+}
+/* Test if the previous symbol was a literal. */
+static inline bool lzma_state_is_literal(enum lzma_state state)
+{
+        return state < LIT_STATES;
+}
+/* Each literal coder is divided in three sections:
+ *   - 0x001-0x0FF: Without match byte
+ *   - 0x101-0x1FF: With match byte; match bit is 0
+ *   - 0x201-0x2FF: With match byte; match bit is 1
+ *
+ * Match byte is used when the previous LZMA symbol was something else than
+ * a literal (that is, it was some kind of match).
+ */
+#define LITERAL_CODER_SIZE 0x300
+/* Maximum number of literal coders */
+#define LITERAL_CODERS_MAX (1 << 4)
+/* Minimum length of a match is two bytes. */
+#define MATCH_LEN_MIN 2
+/* Match length is encoded with 4, 5, or 10 bits.
+ *
+ * Length   Bits
+ *  2-9      4 = Choice=0 + 3 bits
+ * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
+ * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
+ */
+#define LEN_LOW_BITS 3
+#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
+#define LEN_MID_BITS 3
+#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
+#define LEN_HIGH_BITS 8
+#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
+#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
+/*
+ * Maximum length of a match is 273 which is a result of the encoding
+ * described above.
+ */
+#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
+/*
+ * Different sets of probabilities are used for match distances that have
+ * very short match length: Lengths of 2, 3, and 4 bytes have a separate
+ * set of probabilities for each length. The matches with longer length
+ * use a shared set of probabilities.
+ */
+#define DIST_STATES 4
+/*
+ * Get the index of the appropriate probability array for decoding
+ * the distance slot.
+ */
+static inline uint32_t lzma_get_dist_state(uint32_t len)
+{
+        return len < DIST_STATES + MATCH_LEN_MIN
+                        ? len - MATCH_LEN_MIN : DIST_STATES - 1;
+}
+/*
+ * The highest two bits of a 32-bit match distance are encoded using six bits.
+ * This six-bit value is called a distance slot. This way encoding a 32-bit
+ * value takes 6-36 bits, larger values taking more bits.
+ */
+#define DIST_SLOT_BITS 6
+#define DIST_SLOTS (1 << DIST_SLOT_BITS)
+/* Match distances up to 127 are fully encoded using probabilities. Since
+ * the highest two bits (distance slot) are always encoded using six bits,
+ * the distances 0-3 don't need any additional bits to encode, since the
+ * distance slot itself is the same as the actual distance. DIST_MODEL_START
+ * indicates the first distance slot where at least one additional bit is
+ * needed.
+ */
+#define DIST_MODEL_START 4
+/*
+ * Match distances greater than 127 are encoded in three pieces:
+ *   - distance slot: the highest two bits
+ *   - direct bits: 2-26 bits below the highest two bits
+ *   - alignment bits: four lowest bits
+ *
+ * Direct bits don't use any probabilities.
+ *
+ * The distance slot value of 14 is for distances 128-191.
+ */
+#define DIST_MODEL_END 14
+/* Distance slots that indicate a distance <= 127. */
+#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
+#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
+/*
+ * For match distances greater than 127, only the highest two bits and the
+ * lowest four bits (alignment) is encoded using probabilities.
+ */
+#define ALIGN_BITS 4
+#define ALIGN_SIZE (1 << ALIGN_BITS)
+#define ALIGN_MASK (ALIGN_SIZE - 1)
+/* Total number of all probability variables */
+#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
+/*
+ * LZMA remembers the four most recent match distances. Reusing these
+ * distances tends to take less space than re-encoding the actual
+ * distance value.
+ */
+#define REPS 4
+#endif
diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h
new file mode 100644
index 00000000000..a65633e0696
--- /dev/null
+++ b/lib/xz/xz_private.h
@@ -0,0 +1,156 @@
+/*
+ * Private includes and definitions
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+#ifndef XZ_PRIVATE_H
+#define XZ_PRIVATE_H
+#ifdef __KERNEL__
+#       include <linux/xz.h>
+#       include <asm/byteorder.h>
+#       include <asm/unaligned.h>
+        /* XZ_PREBOOT may be defined only via decompress_unxz.c. */
+#       ifndef XZ_PREBOOT
+#               include <linux/slab.h>
+#               include <linux/vmalloc.h>
+#               include <linux/string.h>
+#               ifdef CONFIG_XZ_DEC_X86
+#                       define XZ_DEC_X86
+#               endif
+#               ifdef CONFIG_XZ_DEC_POWERPC
+#                       define XZ_DEC_POWERPC
+#               endif
+#               ifdef CONFIG_XZ_DEC_IA64
+#                       define XZ_DEC_IA64
+#               endif
+#               ifdef CONFIG_XZ_DEC_ARM
+#                       define XZ_DEC_ARM
+#               endif
+#               ifdef CONFIG_XZ_DEC_ARMTHUMB
+#                       define XZ_DEC_ARMTHUMB
+#               endif
+#               ifdef CONFIG_XZ_DEC_SPARC
+#                       define XZ_DEC_SPARC
+#               endif
+#               define memeq(a, b, size) (memcmp(a, b, size) == 0)
+#               define memzero(buf, size) memset(buf, 0, size)
+#       endif
+#       define get_le32(p) le32_to_cpup((const uint32_t *)(p))
+#else
+        /*
+         * For userspace builds, use a separate header to define the required
+         * macros and functions. This makes it easier to adapt the code into
+         * different environments and avoids clutter in the Linux kernel tree.
+         */
+#       include "xz_config.h"
+#endif
+/* If no specific decoding mode is requested, enable support for all modes. */
+#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
+                && !defined(XZ_DEC_DYNALLOC)
+#       define XZ_DEC_SINGLE
+#       define XZ_DEC_PREALLOC
+#       define XZ_DEC_DYNALLOC
+#endif
+/*
+ * The DEC_IS_foo(mode) macros are used in "if" statements. If only some
+ * of the supported modes are enabled, these macros will evaluate to true or
+ * false at compile time and thus allow the compiler to omit unneeded code.
+ */
+#ifdef XZ_DEC_SINGLE
+#       define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
+#else
+#       define DEC_IS_SINGLE(mode) (false)
+#endif
+#ifdef XZ_DEC_PREALLOC
+#       define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
+#else
+#       define DEC_IS_PREALLOC(mode) (false)
+#endif
+#ifdef XZ_DEC_DYNALLOC
+#       define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
+#else
+#       define DEC_IS_DYNALLOC(mode) (false)
+#endif
+#if !defined(XZ_DEC_SINGLE)
+#       define DEC_IS_MULTI(mode) (true)
+#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
+#       define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
+#else
+#       define DEC_IS_MULTI(mode) (false)
+#endif
+/*
+ * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
+ * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
+ */
+#ifndef XZ_DEC_BCJ
+#       if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
+                        || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
+                        || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
+                        || defined(XZ_DEC_SPARC)
+#               define XZ_DEC_BCJ
+#       endif
+#endif
+/*
+ * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
+ * before calling xz_dec_lzma2_run().
+ */
+XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
+                                                   uint32_t dict_max);
+/*
+ * Decode the LZMA2 properties (one byte) and reset the decoder. Return
+ * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
+ * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
+ * decoder doesn't support.
+ */
+XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
+                                         uint8_t props);
+/* Decode raw LZMA2 stream from b->in to b->out. */
+XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
+                                       struct xz_buf *b);
+/* Free the memory allocated for the LZMA2 decoder. */
+XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
+#ifdef XZ_DEC_BCJ
+/*
+ * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
+ * calling xz_dec_bcj_run().
+ */
+XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call);
+/*
+ * Decode the Filter ID of a BCJ filter. This implementation doesn't
+ * support custom start offsets, so no decoding of Filter Properties
+ * is needed. Returns XZ_OK if the given Filter ID is supported.
+ * Otherwise XZ_OPTIONS_ERROR is returned.
+ */
+XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
+/*
+ * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
+ * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
+ * must be called directly.
+ */
+XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
+                                     struct xz_dec_lzma2 *lzma2,
+                                     struct xz_buf *b);
+/* Free the memory allocated for the BCJ filters. */
+#define xz_dec_bcj_end(s) kfree(s)
+#endif
+#endif
diff --git a/lib/xz/xz_stream.h b/lib/xz/xz_stream.h
new file mode 100644
index 00000000000..66cb5a7055e
--- /dev/null
+++ b/lib/xz/xz_stream.h
@@ -0,0 +1,62 @@
+/*
+ * Definitions for handling the .xz file format
+ *
+ * Author: Lasse Collin <lasse.collin@tukaani.org>
+ *
+ * This file has been put into the public domain.
+ * You can do whatever you want with this file.
+ */
+#ifndef XZ_STREAM_H
+#define XZ_STREAM_H
+#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
+#       include <linux/crc32.h>
+#       undef crc32
+#       define xz_crc32(buf, size, crc) \
+                (~crc32_le(~(uint32_t)(crc), buf, size))
+#endif
+/*
+ * See the .xz file format specification at
+ * http://tukaani.org/xz/xz-file-format.txt
+ * to understand the container format.
+ */
+#define STREAM_HEADER_SIZE 12
+#define HEADER_MAGIC "\3757zXZ"
+#define HEADER_MAGIC_SIZE 6
+#define FOOTER_MAGIC "YZ"
+#define FOOTER_MAGIC_SIZE 2
+/*
+ * Variable-length integer can hold a 63-bit unsigned integer or a special
+ * value indicating that the value is unknown.
+ *
+ * Experimental: vli_type can be defined to uint32_t to save a few bytes
+ * in code size (no effect on speed). Doing so limits the uncompressed and
+ * compressed size of the file to less than 256 MiB and may also weaken
+ * error detection slightly.
+ */
+typedef uint64_t vli_type;
+#define VLI_MAX ((vli_type)-1 / 2)
+#define VLI_UNKNOWN ((vli_type)-1)
+/* Maximum encoded size of a VLI */
+#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
+/* Integrity Check types */
+enum xz_check {
+        XZ_CHECK_NONE = 0,
+        XZ_CHECK_CRC32 = 1,
+        XZ_CHECK_CRC64 = 4,
+        XZ_CHECK_SHA256 = 10
+};
+/* Maximum possible Check ID */
+#define XZ_CHECK_MAX 15
+#endif
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 215447c5526..2c13ecc5bb2 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -8,21 +8,6 @@
 #include "inflate.h"
 #include "inffast.h"
-/* Only do the unaligned "Faster" variant when
- * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set
- *
- * On powerpc, it won't be as we don't include autoconf.h
- * automatically for the boot wrapper, which is intended as
- * we run in an environment where we may not be able to deal
- * with (even rare) alignment faults. In addition, we do not
- * define __KERNEL__ for arch/powerpc/boot unlike x86
- */
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#include <asm/unaligned.h>
-#include <asm/byteorder.h>
-#endif
 #ifndef ASMINF
 /* Allow machine dependent optimization for post-increment or pre-increment.
@@ -36,14 +21,31 @@
   - Pentium III (Anderson)
   - M68060 (Nikl)
 */
+union uu {
+        unsigned short us;
+        unsigned char b[2];
+};
+/* Endian independed version */
+static inline unsigned short
+get_unaligned16(const unsigned short *p)
+{
+        union uu  mm;
+        unsigned char *b = (unsigned char *)p;
+        mm.b[0] = b[0];
+        mm.b[1] = b[1];
+        return mm.us;
+}
 #ifdef POSTINC
 #  define OFF 0
 #  define PUP(a) *(a)++
-#  define UP_UNALIGNED(a) get_unaligned((a)++)
+#  define UP_UNALIGNED(a) get_unaligned16((a)++)
 #else
 #  define OFF 1
 #  define PUP(a) *++(a)
-#  define UP_UNALIGNED(a) get_unaligned(++(a))
+#  define UP_UNALIGNED(a) get_unaligned16(++(a))
 #endif
 /*
@@ -256,7 +258,6 @@ void inflate_fast(z_streamp strm, unsigned start)
                    }
                }
                else {
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
                    unsigned short *sout;
                    unsigned long loops;
@@ -274,22 +275,25 @@ void inflate_fast(z_streamp strm, unsigned start)
                        sfrom = (unsigned short *)(from - OFF);
                        loops = len >> 1;
                        do
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+                            PUP(sout) = PUP(sfrom);
+#else
                            PUP(sout) = UP_UNALIGNED(sfrom);
+#endif
                        while (--loops);
                        out = (unsigned char *)sout + OFF;
                        from = (unsigned char *)sfrom + OFF;
                    } else { /* dist == 1 or dist == 2 */
                        unsigned short pat16;
-                        pat16 = *(sout-2+2*OFF);
+                        pat16 = *(sout-1+OFF);
-                        if (dist == 1)
+                        if (dist == 1) {
-#if defined(__BIG_ENDIAN)
+                                union uu mm;
-                            pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8);
+                                /* copy one char pattern to both bytes */
-#elif defined(__LITTLE_ENDIAN)
+                                mm.us = pat16;
-                            pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8);
+                                mm.b[0] = mm.b[1];
-#else
+                                pat16 = mm.us;
-#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
+                        }
-#endif
                        loops = len >> 1;
                        do
                            PUP(sout) = pat16;
@@ -298,20 +302,6 @@ void inflate_fast(z_streamp strm, unsigned start)
                    }
                    if (len & 1)
                        PUP(out) = PUP(from);
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-                    from = out - dist;          /* copy direct from output */
-                    do {                        /* minimum length is three */
-                         PUP(out) = PUP(from);
-                         PUP(out) = PUP(from);
-                         PUP(out) = PUP(from);
-                         len -= 3;
-                    } while (len > 2);
-                    if (len) {
-                         PUP(out) = PUP(from);
-                         if (len > 1)
-                             PUP(out) = PUP(from);
-                    }
-#endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
                }
            }
            else if ((op & 64) == 0) {          /* 2nd level distance code */