Merge branch 'linus' into perfcounters/core

Merge reason: Bring in tracing changes we depend on. Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2009-09-19 05:27:32 -0400
committer: Ingo Molnar <mingo@elte.hu> 2009-09-19 05:28:41 -0400
commit: 929bf0d0156562ce631728b6fa53d68004d456d2 (patch)
tree: 739063990a8077b29ef97e69d73bce94573daae4 /kernel
parent: def0a9b2573e00ab0b486cb5382625203ab4c4a6 (diff)
parent: 202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff)
44 files changed, 2009 insertions, 1543 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index b833bd5cc127..3d9c7e27e3f9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -90,7 +90,6 @@ obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 obj-$(CONFIG_MARKERS) += marker.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
-obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_X86_DS) += trace/
@@ -117,7 +116,7 @@ $(obj)/config_data.gz: .config FORCE
        $(call if_changed,gzip)
 quiet_cmd_ikconfiggz = IKCFG   $@
-      cmd_ikconfiggz = (echo "static const char kernel_config_data[] = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
+      cmd_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;") > $@
 targets += config_data.h
 $(obj)/config_data.h: $(obj)/config_data.gz FORCE
        $(call if_changed,ikconfiggz)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8ce10043e4ac..6ba0f1ecb212 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -401,6 +401,7 @@ int disable_nonboot_cpus(void)
                        break;
                }
        }
        if (!error) {
                BUG_ON(num_online_cpus() > 1);
                /* Make sure the CPUs won't be enabled by someone else */
@@ -413,6 +414,14 @@ int disable_nonboot_cpus(void)
        return error;
 }
+void __weak arch_enable_nonboot_cpus_begin(void)
+{
+}
+void __weak arch_enable_nonboot_cpus_end(void)
+{
+}
 void __ref enable_nonboot_cpus(void)
 {
        int cpu, error;
@@ -424,6 +433,9 @@ void __ref enable_nonboot_cpus(void)
                goto out;
        printk("Enabling non-boot CPUs ...\n");
+        arch_enable_nonboot_cpus_begin();
        for_each_cpu(cpu, frozen_cpus) {
                error = _cpu_up(cpu, 1);
                if (!error) {
@@ -432,6 +444,9 @@ void __ref enable_nonboot_cpus(void)
                }
                printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
        }
+        arch_enable_nonboot_cpus_end();
        cpumask_clear(frozen_cpus);
 out:
        cpu_maps_update_done();
diff --git a/kernel/cred.c b/kernel/cred.c
index 006fcab009d5..d7f7a01082eb 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -147,7 +147,8 @@ static void put_cred_rcu(struct rcu_head *rcu)
        key_put(cred->thread_keyring);
        key_put(cred->request_key_auth);
        release_tgcred(cred);
-        put_group_info(cred->group_info);
+        if (cred->group_info)
+                put_group_info(cred->group_info);
        free_uid(cred->user);
        kmem_cache_free(cred_jar, cred);
 }
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
deleted file mode 100644
index 962a3b574f21..000000000000
--- a/kernel/dma-coherent.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Coherent per-device memory handling.
- * Borrowed from i386
- */
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-struct dma_coherent_mem {
-        void            *virt_base;
-        u32             device_base;
-        int             size;
-        int             flags;
-        unsigned long   *bitmap;
-};
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
-                                dma_addr_t device_addr, size_t size, int flags)
-{
-        void __iomem *mem_base = NULL;
-        int pages = size >> PAGE_SHIFT;
-        int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
-        if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
-                goto out;
-        if (!size)
-                goto out;
-        if (dev->dma_mem)
-                goto out;
-        /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
-        mem_base = ioremap(bus_addr, size);
-        if (!mem_base)
-                goto out;
-        dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
-        if (!dev->dma_mem)
-                goto out;
-        dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-        if (!dev->dma_mem->bitmap)
-                goto free1_out;
-        dev->dma_mem->virt_base = mem_base;
-        dev->dma_mem->device_base = device_addr;
-        dev->dma_mem->size = pages;
-        dev->dma_mem->flags = flags;
-        if (flags & DMA_MEMORY_MAP)
-                return DMA_MEMORY_MAP;
-        return DMA_MEMORY_IO;
- free1_out:
-        kfree(dev->dma_mem);
- out:
-        if (mem_base)
-                iounmap(mem_base);
-        return 0;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-void dma_release_declared_memory(struct device *dev)
-{
-        struct dma_coherent_mem *mem = dev->dma_mem;
-        if (!mem)
-                return;
-        dev->dma_mem = NULL;
-        iounmap(mem->virt_base);
-        kfree(mem->bitmap);
-        kfree(mem);
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-void *dma_mark_declared_memory_occupied(struct device *dev,
-                                        dma_addr_t device_addr, size_t size)
-{
-        struct dma_coherent_mem *mem = dev->dma_mem;
-        int pos, err;
-        size += device_addr & ~PAGE_MASK;
-        if (!mem)
-                return ERR_PTR(-EINVAL);
-        pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
-        err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
-        if (err != 0)
-                return ERR_PTR(err);
-        return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-/**
- * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area
- *
- * @dev:        device from which we allocate memory
- * @size:       size of requested memory area
- * @dma_handle: This will be filled with the correct dma handle
- * @ret:        This pointer will be filled with the virtual address
- *              to allocated area.
- *
- * This function should be only called from per-arch dma_alloc_coherent()
- * to support allocation from per-device coherent memory pools.
- *
- * Returns 0 if dma_alloc_coherent should continue with allocating from
- * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
- */
-int dma_alloc_from_coherent(struct device *dev, ssize_t size,
-                                       dma_addr_t *dma_handle, void **ret)
-{
-        struct dma_coherent_mem *mem;
-        int order = get_order(size);
-        int pageno;
-        if (!dev)
-                return 0;
-        mem = dev->dma_mem;
-        if (!mem)
-                return 0;
-        *ret = NULL;
-        if (unlikely(size > (mem->size << PAGE_SHIFT)))
-                goto err;
-        pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
-        if (unlikely(pageno < 0))
-                goto err;
-        /*
-         * Memory was found in the per-device area.
-         */
-        *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
-        *ret = mem->virt_base + (pageno << PAGE_SHIFT);
-        memset(*ret, 0, size);
-        return 1;
-err:
-        /*
-         * In the case where the allocation can not be satisfied from the
-         * per-device area, try to fall back to generic memory if the
-         * constraints allow it.
-         */
-        return mem->flags & DMA_MEMORY_EXCLUSIVE;
-}
-EXPORT_SYMBOL(dma_alloc_from_coherent);
-/**
- * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
- * @dev:        device from which the memory was allocated
- * @order:      the order of pages allocated
- * @vaddr:      virtual address of allocated pages
- *
- * This checks whether the memory was allocated from the per-device
- * coherent memory pool and if so, releases that memory.
- *
- * Returns 1 if we correctly released the memory, or 0 if
- * dma_release_coherent() should proceed with releasing memory from
- * generic pools.
- */
-int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
-{
-        struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-        if (mem && vaddr >= mem->virt_base && vaddr <
-                   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
-                int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-                bitmap_release_region(mem->bitmap, page, order);
-                return 1;
-        }
-        return 0;
-}
-EXPORT_SYMBOL(dma_release_from_coherent);
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index 22e9dcfaa3d3..654efd09f6a9 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -34,7 +34,7 @@ config GCOV_KERNEL
 config GCOV_PROFILE_ALL
        bool "Profile entire Kernel"
        depends on GCOV_KERNEL
-        depends on S390 || X86
+        depends on S390 || X86 || (PPC && EXPERIMENTAL)
        default n
        ---help---
        This options activates profiling for the entire kernel.
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 49da79ab8486..05071bf6a37b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -485,6 +485,7 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
        debug_object_init_on_stack(timer, &hrtimer_debug_descr);
        __hrtimer_init(timer, clock_id, mode);
 }
+EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
 void destroy_hrtimer_on_stack(struct hrtimer *timer)
 {
@@ -1477,6 +1478,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
        sl->timer.function = hrtimer_wakeup;
        sl->task = task;
 }
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
 {
diff --git a/kernel/module.c b/kernel/module.c
index 46580edff0cb..05ce49ced8f6 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -369,7 +369,7 @@ EXPORT_SYMBOL_GPL(find_module);
 #ifdef CONFIG_SMP
-#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 static void *percpu_modalloc(unsigned long size, unsigned long align,
                             const char *name)
@@ -394,7 +394,7 @@ static void percpu_modfree(void *freeme)
        free_percpu(freeme);
 }
-#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 /* Number of blocks used and allocated. */
 static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -540,7 +540,7 @@ static int percpu_modinit(void)
 }
 __initcall(percpu_modinit);
-#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 static unsigned int find_pcpusec(Elf_Ehdr *hdr,
                                 Elf_Shdr *sechdrs,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 06d233a06da5..d013f4e89e9c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -106,16 +106,16 @@ hw_perf_group_sched_in(struct perf_counter *group_leader,
 void __weak perf_counter_print_debug(void)      { }
-static DEFINE_PER_CPU(int, disable_count);
+static DEFINE_PER_CPU(int, perf_disable_count);
 void __perf_disable(void)
 {
-        __get_cpu_var(disable_count)++;
+        __get_cpu_var(perf_disable_count)++;
 }
 bool __perf_enable(void)
 {
-        return !--__get_cpu_var(disable_count);
+        return !--__get_cpu_var(perf_disable_count);
 }
 void perf_disable(void)
@@ -4246,6 +4246,7 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr,
                        if (val)
                                goto err_size;
                }
+                size = sizeof(*attr);
        }
        ret = copy_from_user(attr, uattr, size);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 72067cbdb37f..91e09d3b2eb2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -208,3 +208,17 @@ config APM_EMULATION
          random kernel OOPSes or reboots that don't seem to be related to
          anything, try disabling/enabling this option (or disabling/enabling
          APM in your BIOS).
+config PM_RUNTIME
+        bool "Run-time PM core functionality"
+        depends on PM
+        ---help---
+          Enable functionality allowing I/O devices to be put into energy-saving
+          (low power) states at run time (or autosuspended) after a specified
+          period of inactivity and woken up in response to a hardware-generated
+          wake-up event or a driver's request.
+          Hardware support is generally required for this functionality to work
+          and the bus type drivers of the buses the devices are on are
+          responsible for the actual handling of the autosuspend requests and
+          wake-up events.
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 81d2e7464893..04b3a83d686f 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -298,8 +298,8 @@ int hibernation_snapshot(int platform_mode)
        if (error)
                return error;
-        /* Free memory before shutting down devices. */
+        /* Preallocate image memory before shutting down devices. */
-        error = swsusp_shrink_memory();
+        error = hibernate_preallocate_memory();
        if (error)
                goto Close;
@@ -315,6 +315,10 @@ int hibernation_snapshot(int platform_mode)
        /* Control returns here after successful restore */
 Resume_devices:
+        /* We may need to release the preallocated image pages here. */
+        if (error || !in_suspend)
+                swsusp_free();
        dpm_resume_end(in_suspend ?
                (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
        resume_console();
@@ -460,11 +464,11 @@ int hibernation_platform_enter(void)
        error = hibernation_ops->prepare();
        if (error)
-                goto Platofrm_finish;
+                goto Platform_finish;
        error = disable_nonboot_cpus();
        if (error)
-                goto Platofrm_finish;
+                goto Platform_finish;
        local_irq_disable();
        sysdev_suspend(PMSG_HIBERNATE);
@@ -476,7 +480,7 @@ int hibernation_platform_enter(void)
         * We don't need to reenable the nonboot CPUs or resume consoles, since
         * the system is going to be halted anyway.
         */
- Platofrm_finish:
+ Platform_finish:
        hibernation_ops->finish();
        dpm_suspend_noirq(PMSG_RESTORE);
@@ -578,7 +582,10 @@ int hibernate(void)
                goto Thaw;
        error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
-        if (in_suspend && !error) {
+        if (error)
+                goto Thaw;
+        if (in_suspend) {
                unsigned int flags = 0;
                if (hibernation_mode == HIBERNATION_PLATFORM)
@@ -590,8 +597,8 @@ int hibernate(void)
                        power_down();
        } else {
                pr_debug("PM: Image restored successfully.\n");
-                swsusp_free();
        }
 Thaw:
        thaw_processes();
 Finish:
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f710e36930cc..347d2cc88cd0 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -11,6 +11,7 @@
 #include <linux/kobject.h>
 #include <linux/string.h>
 #include <linux/resume-trace.h>
+#include <linux/workqueue.h>
 #include "power.h"
@@ -217,8 +218,24 @@ static struct attribute_group attr_group = {
        .attrs = g,
 };
+#ifdef CONFIG_PM_RUNTIME
+struct workqueue_struct *pm_wq;
+static int __init pm_start_workqueue(void)
+{
+        pm_wq = create_freezeable_workqueue("pm");
+        return pm_wq ? 0 : -ENOMEM;
+}
+#else
+static inline int pm_start_workqueue(void) { return 0; }
+#endif
 static int __init pm_init(void)
 {
+        int error = pm_start_workqueue();
+        if (error)
+                return error;
        power_kobj = kobject_create_and_add("power", NULL);
        if (!power_kobj)
                return -ENOMEM;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 26d5a26f82e3..46c5a26630a3 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void);
 extern int create_basic_memory_bitmaps(void);
 extern void free_basic_memory_bitmaps(void);
-extern int swsusp_shrink_memory(void);
+extern int hibernate_preallocate_memory(void);
 /**
 *      Auxiliary structure used for reading the snapshot image data and
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 523a451b45d3..97955b0e44f4 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -233,7 +233,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 #define BM_END_OF_MAP   (~0UL)
-#define BM_BITS_PER_BLOCK       (PAGE_SIZE << 3)
+#define BM_BITS_PER_BLOCK       (PAGE_SIZE * BITS_PER_BYTE)
 struct bm_block {
        struct list_head hook;  /* hook into a list of bitmap blocks */
@@ -275,7 +275,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
 /**
 *      create_bm_block_list - create a list of block bitmap objects
- *      @nr_blocks - number of blocks to allocate
+ *      @pages - number of pages to track
 *      @list - list to put the allocated blocks into
 *      @ca - chain allocator to be used for allocating memory
 */
@@ -853,7 +853,7 @@ static unsigned int count_highmem_pages(void)
        struct zone *zone;
        unsigned int n = 0;
-        for_each_zone(zone) {
+        for_each_populated_zone(zone) {
                unsigned long pfn, max_zone_pfn;
                if (!is_highmem(zone))
@@ -916,7 +916,7 @@ static unsigned int count_data_pages(void)
        unsigned long pfn, max_zone_pfn;
        unsigned int n = 0;
-        for_each_zone(zone) {
+        for_each_populated_zone(zone) {
                if (is_highmem(zone))
                        continue;
@@ -1010,7 +1010,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
        struct zone *zone;
        unsigned long pfn;
-        for_each_zone(zone) {
+        for_each_populated_zone(zone) {
                unsigned long max_zone_pfn;
                mark_free_pages(zone);
@@ -1033,6 +1033,25 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/*
+ * Numbers of normal and highmem page frames allocated for hibernation image
+ * before suspending devices.
+ */
+unsigned int alloc_normal, alloc_highmem;
+/*
+ * Memory bitmap used for marking saveable pages (during hibernation) or
+ * hibernation image pages (during restore)
+ */
+static struct memory_bitmap orig_bm;
+/*
+ * Memory bitmap used during hibernation for marking allocated page frames that
+ * will contain copies of saveable pages.  During restore it is initially used
+ * for marking hibernation image pages, but then the set bits from it are
+ * duplicated in @orig_bm and it is released.  On highmem systems it is next
+ * used for marking "safe" highmem pages, but it has to be reinitialized for
+ * this purpose.
+ */
+static struct memory_bitmap copy_bm;
 /**
 *      swsusp_free - free pages allocated for the suspend.
@@ -1046,7 +1065,7 @@ void swsusp_free(void)
        struct zone *zone;
        unsigned long pfn, max_zone_pfn;
-        for_each_zone(zone) {
+        for_each_populated_zone(zone) {
                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (pfn_valid(pfn)) {
@@ -1064,74 +1083,286 @@ void swsusp_free(void)
        nr_meta_pages = 0;
        restore_pblist = NULL;
        buffer = NULL;
+        alloc_normal = 0;
+        alloc_highmem = 0;
 }
+/* Helper functions used for the shrinking of memory. */
+#define GFP_IMAGE       (GFP_KERNEL | __GFP_NOWARN)
 /**
- *      swsusp_shrink_memory -  Try to free as much memory as needed
+ * preallocate_image_pages - Allocate a number of pages for hibernation image
- *
+ * @nr_pages: Number of page frames to allocate.
- *      ... but do not OOM-kill anyone
+ * @mask: GFP flags to use for the allocation.
 *
- *      Notice: all userland should be stopped before it is called, or
+ * Return value: Number of page frames actually allocated
- *      livelock is possible.
+ */
+static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
+{
+        unsigned long nr_alloc = 0;
+        while (nr_pages > 0) {
+                struct page *page;
+                page = alloc_image_page(mask);
+                if (!page)
+                        break;
+                memory_bm_set_bit(&copy_bm, page_to_pfn(page));
+                if (PageHighMem(page))
+                        alloc_highmem++;
+                else
+                        alloc_normal++;
+                nr_pages--;
+                nr_alloc++;
+        }
+        return nr_alloc;
+}
+static unsigned long preallocate_image_memory(unsigned long nr_pages)
+{
+        return preallocate_image_pages(nr_pages, GFP_IMAGE);
+}
+#ifdef CONFIG_HIGHMEM
+static unsigned long preallocate_image_highmem(unsigned long nr_pages)
+{
+        return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
+}
+/**
+ *  __fraction - Compute (an approximation of) x * (multiplier / base)
 */
+static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
+{
+        x *= multiplier;
+        do_div(x, base);
+        return (unsigned long)x;
+}
+static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
+                                                unsigned long highmem,
+                                                unsigned long total)
+{
+        unsigned long alloc = __fraction(nr_pages, highmem, total);
-#define SHRINK_BITE     10000
+        return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
-static inline unsigned long __shrink_memory(long tmp)
+}
+#else /* CONFIG_HIGHMEM */
+static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
 {
-        if (tmp > SHRINK_BITE)
+        return 0;
-                tmp = SHRINK_BITE;
-        return shrink_all_memory(tmp);
 }
-int swsusp_shrink_memory(void)
+static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
+                                                unsigned long highmem,
+                                                unsigned long total)
+{
+        return 0;
+}
+#endif /* CONFIG_HIGHMEM */
+/**
+ * free_unnecessary_pages - Release preallocated pages not needed for the image
+ */
+static void free_unnecessary_pages(void)
+{
+        unsigned long save_highmem, to_free_normal, to_free_highmem;
+        to_free_normal = alloc_normal - count_data_pages();
+        save_highmem = count_highmem_pages();
+        if (alloc_highmem > save_highmem) {
+                to_free_highmem = alloc_highmem - save_highmem;
+        } else {
+                to_free_highmem = 0;
+                to_free_normal -= save_highmem - alloc_highmem;
+        }
+        memory_bm_position_reset(&copy_bm);
+        while (to_free_normal > 0 && to_free_highmem > 0) {
+                unsigned long pfn = memory_bm_next_pfn(&copy_bm);
+                struct page *page = pfn_to_page(pfn);
+                if (PageHighMem(page)) {
+                        if (!to_free_highmem)
+                                continue;
+                        to_free_highmem--;
+                        alloc_highmem--;
+                } else {
+                        if (!to_free_normal)
+                                continue;
+                        to_free_normal--;
+                        alloc_normal--;
+                }
+                memory_bm_clear_bit(&copy_bm, pfn);
+                swsusp_unset_page_forbidden(page);
+                swsusp_unset_page_free(page);
+                __free_page(page);
+        }
+}
+/**
+ * minimum_image_size - Estimate the minimum acceptable size of an image
+ * @saveable: Number of saveable pages in the system.
+ *
+ * We want to avoid attempting to free too much memory too hard, so estimate the
+ * minimum acceptable size of a hibernation image to use as the lower limit for
+ * preallocating memory.
+ *
+ * We assume that the minimum image size should be proportional to
+ *
+ * [number of saveable pages] - [number of pages that can be freed in theory]
+ *
+ * where the second term is the sum of (1) reclaimable slab pages, (2) active
+ * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages,
+ * minus mapped file pages.
+ */
+static unsigned long minimum_image_size(unsigned long saveable)
+{
+        unsigned long size;
+        size = global_page_state(NR_SLAB_RECLAIMABLE)
+                + global_page_state(NR_ACTIVE_ANON)
+                + global_page_state(NR_INACTIVE_ANON)
+                + global_page_state(NR_ACTIVE_FILE)
+                + global_page_state(NR_INACTIVE_FILE)
+                - global_page_state(NR_FILE_MAPPED);
+        return saveable <= size ? 0 : saveable - size;
+}
+/**
+ * hibernate_preallocate_memory - Preallocate memory for hibernation image
+ *
+ * To create a hibernation image it is necessary to make a copy of every page
+ * frame in use.  We also need a number of page frames to be free during
+ * hibernation for allocations made while saving the image and for device
+ * drivers, in case they need to allocate memory from their hibernation
+ * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES,
+ * respectively, both of which are rough estimates).  To make this happen, we
+ * compute the total number of available page frames and allocate at least
+ *
+ * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES
+ *
+ * of them, which corresponds to the maximum size of a hibernation image.
+ *
+ * If image_size is set below the number following from the above formula,
+ * the preallocation of memory is continued until the total number of saveable
+ * pages in the system is below the requested image size or the minimum
+ * acceptable image size returned by minimum_image_size(), whichever is greater.
+ */
+int hibernate_preallocate_memory(void)
 {
-        long tmp;
        struct zone *zone;
-        unsigned long pages = 0;
+        unsigned long saveable, size, max_size, count, highmem, pages = 0;
-        unsigned int i = 0;
+        unsigned long alloc, save_highmem, pages_highmem;
-        char *p = "-\\|/";
        struct timeval start, stop;
+        int error;
-        printk(KERN_INFO "PM: Shrinking memory...  ");
+        printk(KERN_INFO "PM: Preallocating image memory... ");
        do_gettimeofday(&start);
-        do {
-                long size, highmem_size;
-                highmem_size = count_highmem_pages();
-                size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
-                tmp = size;
-                size += highmem_size;
-                for_each_populated_zone(zone) {
-                        tmp += snapshot_additional_pages(zone);
-                        if (is_highmem(zone)) {
-                                highmem_size -=
-                                        zone_page_state(zone, NR_FREE_PAGES);
-                        } else {
-                                tmp -= zone_page_state(zone, NR_FREE_PAGES);
-                                tmp += zone->lowmem_reserve[ZONE_NORMAL];
-                        }
-                }
-                if (highmem_size < 0)
+        error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
-                        highmem_size = 0;
+        if (error)
+                goto err_out;
-                tmp += highmem_size;
+        error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
-                if (tmp > 0) {
+        if (error)
-                        tmp = __shrink_memory(tmp);
+                goto err_out;
-                        if (!tmp)
-                                return -ENOMEM;
+        alloc_normal = 0;
-                        pages += tmp;
+        alloc_highmem = 0;
-                } else if (size > image_size / PAGE_SIZE) {
-                        tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
+        /* Count the number of saveable data pages. */
-                        pages += tmp;
+        save_highmem = count_highmem_pages();
-                }
+        saveable = count_data_pages();
-                printk("\b%c", p[i++%4]);
-        } while (tmp > 0);
+        /*
+         * Compute the total number of page frames we can use (count) and the
+         * number of pages needed for image metadata (size).
+         */
+        count = saveable;
+        saveable += save_highmem;
+        highmem = save_highmem;
+        size = 0;
+        for_each_populated_zone(zone) {
+                size += snapshot_additional_pages(zone);
+                if (is_highmem(zone))
+                        highmem += zone_page_state(zone, NR_FREE_PAGES);
+                else
+                        count += zone_page_state(zone, NR_FREE_PAGES);
+        }
+        count += highmem;
+        count -= totalreserve_pages;
+        /* Compute the maximum number of saveable pages to leave in memory. */
+        max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES;
+        size = DIV_ROUND_UP(image_size, PAGE_SIZE);
+        if (size > max_size)
+                size = max_size;
+        /*
+         * If the maximum is not less than the current number of saveable pages
+         * in memory, allocate page frames for the image and we're done.
+         */
+        if (size >= saveable) {
+                pages = preallocate_image_highmem(save_highmem);
+                pages += preallocate_image_memory(saveable - pages);
+                goto out;
+        }
+        /* Estimate the minimum size of the image. */
+        pages = minimum_image_size(saveable);
+        if (size < pages)
+                size = min_t(unsigned long, pages, max_size);
+        /*
+         * Let the memory management subsystem know that we're going to need a
+         * large number of page frames to allocate and make it free some memory.
+         * NOTE: If this is not done, performance will be hurt badly in some
+         * test cases.
+         */
+        shrink_all_memory(saveable - size);
+        /*
+         * The number of saveable pages in memory was too high, so apply some
+         * pressure to decrease it.  First, make room for the largest possible
+         * image and fail if that doesn't work.  Next, try to decrease the size
+         * of the image as much as indicated by 'size' using allocations from
+         * highmem and non-highmem zones separately.
+         */
+        pages_highmem = preallocate_image_highmem(highmem / 2);
+        alloc = (count - max_size) - pages_highmem;
+        pages = preallocate_image_memory(alloc);
+        if (pages < alloc)
+                goto err_out;
+        size = max_size - size;
+        alloc = size;
+        size = preallocate_highmem_fraction(size, highmem, count);
+        pages_highmem += size;
+        alloc -= size;
+        pages += preallocate_image_memory(alloc);
+        pages += pages_highmem;
+        /*
+         * We only need as many page frames for the image as there are saveable
+         * pages in memory, but we have allocated more.  Release the excessive
+         * ones now.
+         */
+        free_unnecessary_pages();
+ out:
        do_gettimeofday(&stop);
-        printk("\bdone (%lu pages freed)\n", pages);
+        printk(KERN_CONT "done (allocated %lu pages)\n", pages);
-        swsusp_show_speed(&start, &stop, pages, "Freed");
+        swsusp_show_speed(&start, &stop, pages, "Allocated");
        return 0;
+ err_out:
+        printk(KERN_CONT "\n");
+        swsusp_free();
+        return -ENOMEM;
 }
 #ifdef CONFIG_HIGHMEM
@@ -1142,7 +1373,7 @@ int swsusp_shrink_memory(void)
 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
 {
-        unsigned int free_highmem = count_free_highmem_pages();
+        unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
        if (free_highmem >= nr_highmem)
                nr_highmem = 0;
@@ -1164,19 +1395,17 @@ count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
 {
        struct zone *zone;
-        unsigned int free = 0, meta = 0;
+        unsigned int free = alloc_normal;
-        for_each_zone(zone) {
+        for_each_populated_zone(zone)
-                meta += snapshot_additional_pages(zone);
                if (!is_highmem(zone))
                        free += zone_page_state(zone, NR_FREE_PAGES);
-        }
        nr_pages += count_pages_for_highmem(nr_highmem);
-        pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n",
+        pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
-                nr_pages, PAGES_FOR_IO, meta, free);
+                nr_pages, PAGES_FOR_IO, free);
-        return free > nr_pages + PAGES_FOR_IO + meta;
+        return free > nr_pages + PAGES_FOR_IO;
 }
 #ifdef CONFIG_HIGHMEM
@@ -1198,7 +1427,7 @@ static inline int get_highmem_buffer(int safe_needed)
 */
 static inline unsigned int
-alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
+alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 {
        unsigned int to_alloc = count_free_highmem_pages();
@@ -1218,7 +1447,7 @@ alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 static inline int get_highmem_buffer(int safe_needed) { return 0; }
 static inline unsigned int
-alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
+alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
 #endif /* CONFIG_HIGHMEM */
 /**
@@ -1237,51 +1466,36 @@ static int
 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
                unsigned int nr_pages, unsigned int nr_highmem)
 {
-        int error;
+        int error = 0;
-        error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
-        if (error)
-                goto Free;
-        error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
-        if (error)
-                goto Free;
        if (nr_highmem > 0) {
                error = get_highmem_buffer(PG_ANY);
                if (error)
-                        goto Free;
+                        goto err_out;
+                if (nr_highmem > alloc_highmem) {
-                nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
+                        nr_highmem -= alloc_highmem;
+                        nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
+                }
        }
-        while (nr_pages-- > 0) {
+        if (nr_pages > alloc_normal) {
-                struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
+                nr_pages -= alloc_normal;
+                while (nr_pages-- > 0) {
-                if (!page)
+                        struct page *page;
-                        goto Free;
-                memory_bm_set_bit(copy_bm, page_to_pfn(page));
+                        page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
+                        if (!page)
+                                goto err_out;
+                        memory_bm_set_bit(copy_bm, page_to_pfn(page));
+                }
        }
        return 0;
- Free:
+ err_out:
        swsusp_free();
-        return -ENOMEM;
+        return error;
 }
-/* Memory bitmap used for marking saveable pages (during suspend) or the
- * suspend image pages (during resume)
- */
-static struct memory_bitmap orig_bm;
-/* Memory bitmap used on suspend for marking allocated pages that will contain
- * the copies of saveable pages.  During resume it is initially used for
- * marking the suspend image pages, but then its set bits are duplicated in
- * @orig_bm and it is released.  Next, on systems with high memory, it may be
- * used for marking "safe" highmem pages, but it has to be reinitialized for
- * this purpose.
- */
-static struct memory_bitmap copy_bm;
 asmlinkage int swsusp_save(void)
 {
        unsigned int nr_pages, nr_highmem;
@@ -1474,7 +1688,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
        unsigned long pfn, max_zone_pfn;
        /* Clear page flags */
-        for_each_zone(zone) {
+        for_each_populated_zone(zone) {
                max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (pfn_valid(pfn))
diff --git a/kernel/printk.c b/kernel/printk.c
index e10d193a833a..602033acd6c7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1075,12 +1075,6 @@ void __sched console_conditional_schedule(void)
 }
 EXPORT_SYMBOL(console_conditional_schedule);
-void console_print(const char *s)
-{
-        printk(KERN_EMERG "%s", s);
-}
-EXPORT_SYMBOL(console_print);
 void console_unblank(void)
 {
        struct console *c;
diff --git a/kernel/sched.c b/kernel/sched.c
index e27a53685ed9..faf4d463bbff 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -119,8 +119,6 @@
 */
 #define RUNTIME_INF     ((u64)~0ULL)
-static void double_rq_lock(struct rq *rq1, struct rq *rq2);
 static inline int rt_policy(int policy)
 {
        if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
@@ -295,12 +293,12 @@ struct task_group root_task_group;
 /* Default task group's sched entity on each cpu */
 static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
 /* Default task group's cfs_rq on each cpu */
-static DEFINE_PER_CPU(struct cfs_rq, init_tg_cfs_rq) ____cacheline_aligned_in_smp;
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 #ifdef CONFIG_RT_GROUP_SCHED
 static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
-static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
 #endif /* CONFIG_RT_GROUP_SCHED */
 #else /* !CONFIG_USER_SCHED */
 #define root_task_group init_task_group
@@ -378,13 +376,6 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 #else
-#ifdef CONFIG_SMP
-static int root_task_group_empty(void)
-{
-        return 1;
-}
-#endif
 static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
 static inline struct task_group *task_group(struct task_struct *p)
 {
@@ -514,14 +505,6 @@ struct root_domain {
 #ifdef CONFIG_SMP
        struct cpupri cpupri;
 #endif
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-        /*
-         * Preferred wake up cpu nominated by sched_mc balance that will be
-         * used when most cpus are idle in the system indicating overall very
-         * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
-         */
-        unsigned int sched_mc_preferred_wakeup_cpu;
-#endif
 };
 /*
@@ -646,9 +629,10 @@ struct rq {
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
+static inline
+void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
-        rq->curr->sched_class->check_preempt_curr(rq, p, sync);
+        rq->curr->sched_class->check_preempt_curr(rq, p, flags);
 }
 static inline int cpu_of(struct rq *rq)
@@ -1509,8 +1493,65 @@ static int tg_nop(struct task_group *tg, void *data)
 #endif
 #ifdef CONFIG_SMP
-static unsigned long source_load(int cpu, int type);
+/* Used instead of source_load when we know the type == 0 */
-static unsigned long target_load(int cpu, int type);
+static unsigned long weighted_cpuload(const int cpu)
+{
+        return cpu_rq(cpu)->load.weight;
+}
+/*
+ * Return a low guess at the load of a migration-source cpu weighted
+ * according to the scheduling class and "nice" value.
+ *
+ * We want to under-estimate the load of migration sources, to
+ * balance conservatively.
+ */
+static unsigned long source_load(int cpu, int type)
+{
+        struct rq *rq = cpu_rq(cpu);
+        unsigned long total = weighted_cpuload(cpu);
+        if (type == 0 || !sched_feat(LB_BIAS))
+                return total;
+        return min(rq->cpu_load[type-1], total);
+}
+/*
+ * Return a high guess at the load of a migration-target cpu weighted
+ * according to the scheduling class and "nice" value.
+ */
+static unsigned long target_load(int cpu, int type)
+{
+        struct rq *rq = cpu_rq(cpu);
+        unsigned long total = weighted_cpuload(cpu);
+        if (type == 0 || !sched_feat(LB_BIAS))
+                return total;
+        return max(rq->cpu_load[type-1], total);
+}
+static struct sched_group *group_of(int cpu)
+{
+        struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
+        if (!sd)
+                return NULL;
+        return sd->groups;
+}
+static unsigned long power_of(int cpu)
+{
+        struct sched_group *group = group_of(cpu);
+        if (!group)
+                return SCHED_LOAD_SCALE;
+        return group->cpu_power;
+}
 static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 static unsigned long cpu_avg_load_per_task(int cpu)
@@ -1695,6 +1736,8 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
 #ifdef CONFIG_PREEMPT
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
 /*
 * fair double_lock_balance: Safely acquires both rq->locks in a fair
 * way at the expense of forcing extra atomic operations in all
@@ -1959,13 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
 }
 #ifdef CONFIG_SMP
-/* Used instead of source_load when we know the type == 0 */
-static unsigned long weighted_cpuload(const int cpu)
-{
-        return cpu_rq(cpu)->load.weight;
-}
 /*
 * Is this task likely cache-hot:
 */
@@ -2239,185 +2275,6 @@ void kick_process(struct task_struct *p)
        preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kick_process);
-/*
- * Return a low guess at the load of a migration-source cpu weighted
- * according to the scheduling class and "nice" value.
- *
- * We want to under-estimate the load of migration sources, to
- * balance conservatively.
- */
-static unsigned long source_load(int cpu, int type)
-{
-        struct rq *rq = cpu_rq(cpu);
-        unsigned long total = weighted_cpuload(cpu);
-        if (type == 0 || !sched_feat(LB_BIAS))
-                return total;
-        return min(rq->cpu_load[type-1], total);
-}
-/*
- * Return a high guess at the load of a migration-target cpu weighted
- * according to the scheduling class and "nice" value.
- */
-static unsigned long target_load(int cpu, int type)
-{
-        struct rq *rq = cpu_rq(cpu);
-        unsigned long total = weighted_cpuload(cpu);
-        if (type == 0 || !sched_feat(LB_BIAS))
-                return total;
-        return max(rq->cpu_load[type-1], total);
-}
-/*
- * find_idlest_group finds and returns the least busy CPU group within the
- * domain.
- */
-static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
-{
-        struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
-        unsigned long min_load = ULONG_MAX, this_load = 0;
-        int load_idx = sd->forkexec_idx;
-        int imbalance = 100 + (sd->imbalance_pct-100)/2;
-        do {
-                unsigned long load, avg_load;
-                int local_group;
-                int i;
-                /* Skip over this group if it has no CPUs allowed */
-                if (!cpumask_intersects(sched_group_cpus(group),
-                                        &p->cpus_allowed))
-                        continue;
-                local_group = cpumask_test_cpu(this_cpu,
-                                               sched_group_cpus(group));
-                /* Tally up the load of all CPUs in the group */
-                avg_load = 0;
-                for_each_cpu(i, sched_group_cpus(group)) {
-                        /* Bias balancing toward cpus of our domain */
-                        if (local_group)
-                                load = source_load(i, load_idx);
-                        else
-                                load = target_load(i, load_idx);
-                        avg_load += load;
-                }
-                /* Adjust by relative CPU power of the group */
-                avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
-                if (local_group) {
-                        this_load = avg_load;
-                        this = group;
-                } else if (avg_load < min_load) {
-                        min_load = avg_load;
-                        idlest = group;
-                }
-        } while (group = group->next, group != sd->groups);
-        if (!idlest || 100*this_load < imbalance*min_load)
-                return NULL;
-        return idlest;
-}
-/*
- * find_idlest_cpu - find the idlest cpu among the cpus in group.
- */
-static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
-{
-        unsigned long load, min_load = ULONG_MAX;
-        int idlest = -1;
-        int i;
-        /* Traverse only the allowed CPUs */
-        for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
-                load = weighted_cpuload(i);
-                if (load < min_load || (load == min_load && i == this_cpu)) {
-                        min_load = load;
-                        idlest = i;
-                }
-        }
-        return idlest;
-}
-/*
- * sched_balance_self: balance the current task (running on cpu) in domains
- * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
- * SD_BALANCE_EXEC.
- *
- * Balance, ie. select the least loaded group.
- *
- * Returns the target CPU number, or the same CPU if no balancing is needed.
- *
- * preempt must be disabled.
- */
-static int sched_balance_self(int cpu, int flag)
-{
-        struct task_struct *t = current;
-        struct sched_domain *tmp, *sd = NULL;
-        for_each_domain(cpu, tmp) {
-                /*
-                 * If power savings logic is enabled for a domain, stop there.
-                 */
-                if (tmp->flags & SD_POWERSAVINGS_BALANCE)
-                        break;
-                if (tmp->flags & flag)
-                        sd = tmp;
-        }
-        if (sd)
-                update_shares(sd);
-        while (sd) {
-                struct sched_group *group;
-                int new_cpu, weight;
-                if (!(sd->flags & flag)) {
-                        sd = sd->child;
-                        continue;
-                }
-                group = find_idlest_group(sd, t, cpu);
-                if (!group) {
-                        sd = sd->child;
-                        continue;
-                }
-                new_cpu = find_idlest_cpu(group, t, cpu);
-                if (new_cpu == -1 || new_cpu == cpu) {
-                        /* Now try balancing at a lower domain level of cpu */
-                        sd = sd->child;
-                        continue;
-                }
-                /* Now try balancing at a lower domain level of new_cpu */
-                cpu = new_cpu;
-                weight = cpumask_weight(sched_domain_span(sd));
-                sd = NULL;
-                for_each_domain(cpu, tmp) {
-                        if (weight <= cpumask_weight(sched_domain_span(tmp)))
-                                break;
-                        if (tmp->flags & flag)
-                                sd = tmp;
-                }
-                /* while loop will break here if sd == NULL */
-        }
-        return cpu;
-}
 #endif /* CONFIG_SMP */
 /**
@@ -2455,37 +2312,22 @@ void task_oncpu_function_call(struct task_struct *p,
 *
 * returns failure only if the task is already active.
 */
-static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
+static int try_to_wake_up(struct task_struct *p, unsigned int state,
+                          int wake_flags)
 {
        int cpu, orig_cpu, this_cpu, success = 0;
        unsigned long flags;
-        long old_state;
        struct rq *rq;
        if (!sched_feat(SYNC_WAKEUPS))
-                sync = 0;
+                wake_flags &= ~WF_SYNC;
-#ifdef CONFIG_SMP
-        if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
-                struct sched_domain *sd;
-                this_cpu = raw_smp_processor_id();
+        this_cpu = get_cpu();
-                cpu = task_cpu(p);
-                for_each_domain(this_cpu, sd) {
-                        if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-                                update_shares(sd);
-                                break;
-                        }
-                }
-        }
-#endif
        smp_wmb();
        rq = task_rq_lock(p, &flags);
        update_rq_clock(rq);
-        old_state = p->state;
+        if (!(p->state & state))
-        if (!(old_state & state))
                goto out;
        if (p->se.on_rq)
@@ -2493,27 +2335,29 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
        cpu = task_cpu(p);
        orig_cpu = cpu;
-        this_cpu = smp_processor_id();
 #ifdef CONFIG_SMP
        if (unlikely(task_running(rq, p)))
                goto out_activate;
-        cpu = p->sched_class->select_task_rq(p, sync);
+        /*
-        if (cpu != orig_cpu) {
+         * In order to handle concurrent wakeups and release the rq->lock
+         * we put the task in TASK_WAKING state.
+         *
+         * First fix up the nr_uninterruptible count:
+         */
+        if (task_contributes_to_load(p))
+                rq->nr_uninterruptible--;
+        p->state = TASK_WAKING;
+        task_rq_unlock(rq, &flags);
+        cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+        if (cpu != orig_cpu)
                set_task_cpu(p, cpu);
-                task_rq_unlock(rq, &flags);
-                /* might preempt at this point */
-                rq = task_rq_lock(p, &flags);
-                old_state = p->state;
-                if (!(old_state & state))
-                        goto out;
-                if (p->se.on_rq)
-                        goto out_running;
-                this_cpu = smp_processor_id();
+        rq = task_rq_lock(p, &flags);
-                cpu = task_cpu(p);
+        WARN_ON(p->state != TASK_WAKING);
-        }
+        cpu = task_cpu(p);
 #ifdef CONFIG_SCHEDSTATS
        schedstat_inc(rq, ttwu_count);
@@ -2533,7 +2377,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 out_activate:
 #endif /* CONFIG_SMP */
        schedstat_inc(p, se.nr_wakeups);
-        if (sync)
+        if (wake_flags & WF_SYNC)
                schedstat_inc(p, se.nr_wakeups_sync);
        if (orig_cpu != cpu)
                schedstat_inc(p, se.nr_wakeups_migrate);
@@ -2562,7 +2406,7 @@ out_activate:
 out_running:
        trace_sched_wakeup(rq, p, success);
-        check_preempt_curr(rq, p, sync);
+        check_preempt_curr(rq, p, wake_flags);
        p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
@@ -2571,6 +2415,7 @@ out_running:
 #endif
 out:
        task_rq_unlock(rq, &flags);
+        put_cpu();
        return success;
 }
@@ -2613,6 +2458,7 @@ static void __sched_fork(struct task_struct *p)
        p->se.avg_overlap               = 0;
        p->se.start_runtime             = 0;
        p->se.avg_wakeup                = sysctl_sched_wakeup_granularity;
+        p->se.avg_running               = 0;
 #ifdef CONFIG_SCHEDSTATS
        p->se.wait_start                        = 0;
@@ -2674,11 +2520,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
        __sched_fork(p);
-#ifdef CONFIG_SMP
-        cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
-#endif
-        set_task_cpu(p, cpu);
        /*
         * Make sure we do not leak PI boosting priority to the child.
         */
@@ -2709,6 +2550,11 @@ void sched_fork(struct task_struct *p, int clone_flags)
        if (!rt_prio(p->prio))
                p->sched_class = &fair_sched_class;
+#ifdef CONFIG_SMP
+        cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+#endif
+        set_task_cpu(p, cpu);
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
        if (likely(sched_info_on()))
                memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -2754,7 +2600,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
                inc_nr_running(rq);
        }
        trace_sched_wakeup_new(rq, p, 1);
-        check_preempt_curr(rq, p, 0);
+        check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
        if (p->sched_class->task_wake_up)
                p->sched_class->task_wake_up(rq, p);
@@ -3263,7 +3109,7 @@ out:
 void sched_exec(void)
 {
        int new_cpu, this_cpu = get_cpu();
-        new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
+        new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
        put_cpu();
        if (new_cpu != this_cpu)
                sched_migrate_task(current, new_cpu);
@@ -3683,11 +3529,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
        *imbalance = sds->min_load_per_task;
        sds->busiest = sds->group_min;
-        if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
-                cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
-                        group_first_cpu(sds->group_leader);
-        }
        return 1;
 }
@@ -3711,7 +3552,18 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
 }
 #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
-unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
+unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+        return SCHED_LOAD_SCALE;
+}
+unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+        return default_scale_freq_power(sd, cpu);
+}
+unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
 {
        unsigned long weight = cpumask_weight(sched_domain_span(sd));
        unsigned long smt_gain = sd->smt_gain;
@@ -3721,6 +3573,11 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
        return smt_gain;
 }
+unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+        return default_scale_smt_power(sd, cpu);
+}
 unsigned long scale_rt_power(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -3745,10 +3602,19 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
        unsigned long power = SCHED_LOAD_SCALE;
        struct sched_group *sdg = sd->groups;
-        /* here we could scale based on cpufreq */
+        if (sched_feat(ARCH_POWER))
+                power *= arch_scale_freq_power(sd, cpu);
+        else
+                power *= default_scale_freq_power(sd, cpu);
+        power >>= SCHED_LOAD_SHIFT;
        if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-                power *= arch_scale_smt_power(sd, cpu);
+                if (sched_feat(ARCH_POWER))
+                        power *= arch_scale_smt_power(sd, cpu);
+                else
+                        power *= default_scale_smt_power(sd, cpu);
                power >>= SCHED_LOAD_SHIFT;
        }
@@ -4161,26 +4027,6 @@ ret:
        return NULL;
 }
-static struct sched_group *group_of(int cpu)
-{
-        struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd);
-        if (!sd)
-                return NULL;
-        return sd->groups;
-}
-static unsigned long power_of(int cpu)
-{
-        struct sched_group *group = group_of(cpu);
-        if (!group)
-                return SCHED_LOAD_SCALE;
-        return group->cpu_power;
-}
 /*
 * find_busiest_queue - find the busiest runqueue among the cpus in group.
 */
@@ -5465,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev)
 #endif
 }
-static void put_prev_task(struct rq *rq, struct task_struct *prev)
+static void put_prev_task(struct rq *rq, struct task_struct *p)
 {
-        if (prev->state == TASK_RUNNING) {
+        u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
-                u64 runtime = prev->se.sum_exec_runtime;
-                runtime -= prev->se.prev_sum_exec_runtime;
+        update_avg(&p->se.avg_running, runtime);
-                runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+        if (p->state == TASK_RUNNING) {
                /*
                 * In order to avoid avg_overlap growing stale when we are
                 * indeed overlapping and hence not getting put to sleep, grow
@@ -5482,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
                 * correlates to the amount of cache footprint a task can
                 * build up.
                 */
-                update_avg(&prev->se.avg_overlap, runtime);
+                runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+                update_avg(&p->se.avg_overlap, runtime);
+        } else {
+                update_avg(&p->se.avg_running, 0);
        }
-        prev->sched_class->put_prev_task(rq, prev);
+        p->sched_class->put_prev_task(rq, p);
 }
 /*
@@ -5716,10 +5564,10 @@ asmlinkage void __sched preempt_schedule_irq(void)
 #endif /* CONFIG_PREEMPT */
-int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
+int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
                          void *key)
 {
-        return try_to_wake_up(curr->private, mode, sync);
+        return try_to_wake_up(curr->private, mode, wake_flags);
 }
 EXPORT_SYMBOL(default_wake_function);
@@ -5733,14 +5581,14 @@ EXPORT_SYMBOL(default_wake_function);
 * zero in this (rare) case, and we handle it by continuing to scan the queue.
 */
 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
-                        int nr_exclusive, int sync, void *key)
+                        int nr_exclusive, int wake_flags, void *key)
 {
        wait_queue_t *curr, *next;
        list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
                unsigned flags = curr->flags;
-                if (curr->func(curr, mode, sync, key) &&
+                if (curr->func(curr, mode, wake_flags, key) &&
                                (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
                        break;
        }
@@ -5801,16 +5649,16 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
                        int nr_exclusive, void *key)
 {
        unsigned long flags;
-        int sync = 1;
+        int wake_flags = WF_SYNC;
        if (unlikely(!q))
                return;
        if (unlikely(!nr_exclusive))
-                sync = 0;
+                wake_flags = 0;
        spin_lock_irqsave(&q->lock, flags);
-        __wake_up_common(q, mode, nr_exclusive, sync, key);
+        __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
        spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
@@ -8000,9 +7848,7 @@ static int sd_degenerate(struct sched_domain *sd)
        }
        /* Following flags don't use groups */
-        if (sd->flags & (SD_WAKE_IDLE |
+        if (sd->flags & (SD_WAKE_AFFINE))
-                         SD_WAKE_AFFINE |
-                         SD_WAKE_BALANCE))
                return 0;
        return 1;
@@ -8019,10 +7865,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
        if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
                return 0;
-        /* Does parent contain flags not in child? */
-        /* WAKE_BALANCE is a subset of WAKE_AFFINE */
-        if (cflags & SD_WAKE_AFFINE)
-                pflags &= ~SD_WAKE_BALANCE;
        /* Flags needing groups don't count if only 1 group in parent */
        if (parent->groups == parent->groups->next) {
                pflags &= ~(SD_LOAD_BALANCE |
@@ -8708,10 +8550,10 @@ static void set_domain_attribute(struct sched_domain *sd,
                request = attr->relax_domain_level;
        if (request < sd->level) {
                /* turn off idle balance on this domain */
-                sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE);
+                sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
        } else {
                /* turn on idle balance on this domain */
-                sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE);
+                sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
        }
 }
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5ddbd0891267..efb84409bc43 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        PN(se.sum_exec_runtime);
        PN(se.avg_overlap);
        PN(se.avg_wakeup);
+        PN(se.avg_running);
        nr_switches = p->nvcsw + p->nivcsw;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a097e909e80f..990b188803ce 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -712,7 +712,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
        if (!initial) {
                /* sleeps upto a single latency don't count. */
-                if (sched_feat(NEW_FAIR_SLEEPERS)) {
+                if (sched_feat(FAIR_SLEEPERS)) {
                        unsigned long thresh = sysctl_sched_latency;
                        /*
@@ -726,6 +726,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
                                         task_of(se)->policy != SCHED_IDLE))
                                thresh = calc_delta_fair(thresh, se);
+                        /*
+                         * Halve their sleep time's effect, to allow
+                         * for a gentler effect of sleepers:
+                         */
+                        if (sched_feat(GENTLE_FAIR_SLEEPERS))
+                                thresh >>= 1;
                        vruntime -= thresh;
                }
        }
@@ -758,10 +765,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
 static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-        if (cfs_rq->last == se)
+        if (!se || cfs_rq->last == se)
                cfs_rq->last = NULL;
-        if (cfs_rq->next == se)
+        if (!se || cfs_rq->next == se)
                cfs_rq->next = NULL;
 }
@@ -1063,83 +1070,6 @@ static void yield_task_fair(struct rq *rq)
        se->vruntime = rightmost->vruntime + 1;
 }
-/*
- * wake_idle() will wake a task on an idle cpu if task->cpu is
- * not idle and an idle cpu is available.  The span of cpus to
- * search starts with cpus closest then further out as needed,
- * so we always favor a closer, idle cpu.
- * Domains may include CPUs that are not usable for migration,
- * hence we need to mask them out (rq->rd->online)
- *
- * Returns the CPU we should wake onto.
- */
-#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online)
-static int wake_idle(int cpu, struct task_struct *p)
-{
-        struct sched_domain *sd;
-        int i;
-        unsigned int chosen_wakeup_cpu;
-        int this_cpu;
-        struct rq *task_rq = task_rq(p);
-        /*
-         * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
-         * are idle and this is not a kernel thread and this task's affinity
-         * allows it to be moved to preferred cpu, then just move!
-         */
-        this_cpu = smp_processor_id();
-        chosen_wakeup_cpu =
-                cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
-        if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
-                idle_cpu(cpu) && idle_cpu(this_cpu) &&
-                p->mm && !(p->flags & PF_KTHREAD) &&
-                cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
-                return chosen_wakeup_cpu;
-        /*
-         * If it is idle, then it is the best cpu to run this task.
-         *
-         * This cpu is also the best, if it has more than one task already.
-         * Siblings must be also busy(in most cases) as they didn't already
-         * pickup the extra load from this cpu and hence we need not check
-         * sibling runqueue info. This will avoid the checks and cache miss
-         * penalities associated with that.
-         */
-        if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
-                return cpu;
-        for_each_domain(cpu, sd) {
-                if ((sd->flags & SD_WAKE_IDLE)
-                    || ((sd->flags & SD_WAKE_IDLE_FAR)
-                        && !task_hot(p, task_rq->clock, sd))) {
-                        for_each_cpu_and(i, sched_domain_span(sd),
-                                         &p->cpus_allowed) {
-                                if (cpu_rd_active(i, task_rq) && idle_cpu(i)) {
-                                        if (i != task_cpu(p)) {
-                                                schedstat_inc(p,
-                                                       se.nr_wakeups_idle);
-                                        }
-                                        return i;
-                                }
-                        }
-                } else {
-                        break;
-                }
-        }
-        return cpu;
-}
-#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
-static inline int wake_idle(int cpu, struct task_struct *p)
-{
-        return cpu;
-}
-#endif
 #ifdef CONFIG_SMP
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1226,25 +1156,34 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 #endif
-static int
+static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
-wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
-            struct task_struct *p, int prev_cpu, int this_cpu, int sync,
-            int idx, unsigned long load, unsigned long this_load,
-            unsigned int imbalance)
 {
-        struct task_struct *curr = this_rq->curr;
+        struct task_struct *curr = current;
-        struct task_group *tg;
+        unsigned long this_load, load;
-        unsigned long tl = this_load;
+        int idx, this_cpu, prev_cpu;
        unsigned long tl_per_task;
+        unsigned int imbalance;
+        struct task_group *tg;
        unsigned long weight;
        int balanced;
-        if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
+        idx       = sd->wake_idx;
-                return 0;
+        this_cpu  = smp_processor_id();
+        prev_cpu  = task_cpu(p);
+        load      = source_load(prev_cpu, idx);
+        this_load = target_load(this_cpu, idx);
-        if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
+        if (sync) {
-                        p->se.avg_overlap > sysctl_sched_migration_cost))
+               if (sched_feat(SYNC_LESS) &&
-                sync = 0;
+                   (curr->se.avg_overlap > sysctl_sched_migration_cost ||
+                    p->se.avg_overlap > sysctl_sched_migration_cost))
+                       sync = 0;
+        } else {
+                if (sched_feat(SYNC_MORE) &&
+                    (curr->se.avg_overlap < sysctl_sched_migration_cost &&
+                     p->se.avg_overlap < sysctl_sched_migration_cost))
+                        sync = 1;
+        }
        /*
         * If sync wakeup then subtract the (maximum possible)
@@ -1255,24 +1194,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
                tg = task_group(current);
                weight = current->se.load.weight;
-                tl += effective_load(tg, this_cpu, -weight, -weight);
+                this_load += effective_load(tg, this_cpu, -weight, -weight);
                load += effective_load(tg, prev_cpu, 0, -weight);
        }
        tg = task_group(p);
        weight = p->se.load.weight;
+        imbalance = 100 + (sd->imbalance_pct - 100) / 2;
        /*
         * In low-load situations, where prev_cpu is idle and this_cpu is idle
-         * due to the sync cause above having dropped tl to 0, we'll always have
+         * due to the sync cause above having dropped this_load to 0, we'll
-         * an imbalance, but there's really nothing you can do about that, so
+         * always have an imbalance, but there's really nothing you can do
-         * that's good too.
+         * about that, so that's good too.
         *
         * Otherwise check if either cpus are near enough in load to allow this
         * task to be woken on this_cpu.
         */
-        balanced = !tl ||
+        balanced = !this_load ||
-                100*(tl + effective_load(tg, this_cpu, weight, weight)) <=
+                100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
                imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
        /*
@@ -1286,14 +1227,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
        schedstat_inc(p, se.nr_wakeups_affine_attempts);
        tl_per_task = cpu_avg_load_per_task(this_cpu);
-        if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
+        if (balanced ||
-                        tl_per_task)) {
+            (this_load <= load &&
+             this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
                /*
                 * This domain has SD_WAKE_AFFINE and
                 * p is cache cold in this domain, and
                 * there is no bad imbalance.
                 */
-                schedstat_inc(this_sd, ttwu_move_affine);
+                schedstat_inc(sd, ttwu_move_affine);
                schedstat_inc(p, se.nr_wakeups_affine);
                return 1;
@@ -1301,65 +1243,215 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
        return 0;
 }
-static int select_task_rq_fair(struct task_struct *p, int sync)
+/*
+ * find_idlest_group finds and returns the least busy CPU group within the
+ * domain.
+ */
+static struct sched_group *
+find_idlest_group(struct sched_domain *sd, struct task_struct *p,
+                  int this_cpu, int load_idx)
 {
-        struct sched_domain *sd, *this_sd = NULL;
+        struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
-        int prev_cpu, this_cpu, new_cpu;
+        unsigned long min_load = ULONG_MAX, this_load = 0;
-        unsigned long load, this_load;
+        int imbalance = 100 + (sd->imbalance_pct-100)/2;
-        struct rq *this_rq;
-        unsigned int imbalance;
-        int idx;
-        prev_cpu        = task_cpu(p);
+        do {
-        this_cpu        = smp_processor_id();
+                unsigned long load, avg_load;
-        this_rq         = cpu_rq(this_cpu);
+                int local_group;
-        new_cpu         = prev_cpu;
+                int i;
-        /*
+                /* Skip over this group if it has no CPUs allowed */
-         * 'this_sd' is the first domain that both
+                if (!cpumask_intersects(sched_group_cpus(group),
-         * this_cpu and prev_cpu are present in:
+                                        &p->cpus_allowed))
-         */
+                        continue;
-        for_each_domain(this_cpu, sd) {
-                if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
+                local_group = cpumask_test_cpu(this_cpu,
-                        this_sd = sd;
+                                               sched_group_cpus(group));
-                        break;
+                /* Tally up the load of all CPUs in the group */
+                avg_load = 0;
+                for_each_cpu(i, sched_group_cpus(group)) {
+                        /* Bias balancing toward cpus of our domain */
+                        if (local_group)
+                                load = source_load(i, load_idx);
+                        else
+                                load = target_load(i, load_idx);
+                        avg_load += load;
+                }
+                /* Adjust by relative CPU power of the group */
+                avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
+                if (local_group) {
+                        this_load = avg_load;
+                        this = group;
+                } else if (avg_load < min_load) {
+                        min_load = avg_load;
+                        idlest = group;
+                }
+        } while (group = group->next, group != sd->groups);
+        if (!idlest || 100*this_load < imbalance*min_load)
+                return NULL;
+        return idlest;
+}
+/*
+ * find_idlest_cpu - find the idlest cpu among the cpus in group.
+ */
+static int
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+{
+        unsigned long load, min_load = ULONG_MAX;
+        int idlest = -1;
+        int i;
+        /* Traverse only the allowed CPUs */
+        for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
+                load = weighted_cpuload(i);
+                if (load < min_load || (load == min_load && i == this_cpu)) {
+                        min_load = load;
+                        idlest = i;
                }
        }
-        if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
+        return idlest;
-                goto out;
+}
-        /*
+/*
-         * Check for affine wakeup and passive balancing possibilities.
+ * sched_balance_self: balance the current task (running on cpu) in domains
-         */
+ * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
-        if (!this_sd)
+ * SD_BALANCE_EXEC.
+ *
+ * Balance, ie. select the least loaded group.
+ *
+ * Returns the target CPU number, or the same CPU if no balancing is needed.
+ *
+ * preempt must be disabled.
+ */
+static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
+{
+        struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+        int cpu = smp_processor_id();
+        int prev_cpu = task_cpu(p);
+        int new_cpu = cpu;
+        int want_affine = 0;
+        int want_sd = 1;
+        int sync = wake_flags & WF_SYNC;
+        if (sd_flag & SD_BALANCE_WAKE) {
+                if (sched_feat(AFFINE_WAKEUPS))
+                        want_affine = 1;
+                new_cpu = prev_cpu;
+        }
+        rcu_read_lock();
+        for_each_domain(cpu, tmp) {
+                /*
+                 * If power savings logic is enabled for a domain, see if we
+                 * are not overloaded, if so, don't balance wider.
+                 */
+                if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) {
+                        unsigned long power = 0;
+                        unsigned long nr_running = 0;
+                        unsigned long capacity;
+                        int i;
+                        for_each_cpu(i, sched_domain_span(tmp)) {
+                                power += power_of(i);
+                                nr_running += cpu_rq(i)->cfs.nr_running;
+                        }
+                        capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
+                        if (tmp->flags & SD_POWERSAVINGS_BALANCE)
+                                nr_running /= 2;
+                        if (nr_running < capacity)
+                                want_sd = 0;
+                }
+                if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
+                    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
+                        affine_sd = tmp;
+                        want_affine = 0;
+                }
+                if (!want_sd && !want_affine)
+                        break;
+                if (!(tmp->flags & sd_flag))
+                        continue;
+                if (want_sd)
+                        sd = tmp;
+        }
+        if (sched_feat(LB_SHARES_UPDATE)) {
+                /*
+                 * Pick the largest domain to update shares over
+                 */
+                tmp = sd;
+                if (affine_sd && (!tmp ||
+                                  cpumask_weight(sched_domain_span(affine_sd)) >
+                                  cpumask_weight(sched_domain_span(sd))))
+                        tmp = affine_sd;
+                if (tmp)
+                        update_shares(tmp);
+        }
+        if (affine_sd && wake_affine(affine_sd, p, sync)) {
+                new_cpu = cpu;
                goto out;
+        }
-        idx = this_sd->wake_idx;
+        while (sd) {
+                int load_idx = sd->forkexec_idx;
+                struct sched_group *group;
+                int weight;
-        imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
+                if (!(sd->flags & sd_flag)) {
+                        sd = sd->child;
+                        continue;
+                }
-        load = source_load(prev_cpu, idx);
+                if (sd_flag & SD_BALANCE_WAKE)
-        this_load = target_load(this_cpu, idx);
+                        load_idx = sd->wake_idx;
-        if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
+                group = find_idlest_group(sd, p, cpu, load_idx);
-                                     load, this_load, imbalance))
+                if (!group) {
-                return this_cpu;
+                        sd = sd->child;
+                        continue;
+                }
-        /*
+                new_cpu = find_idlest_cpu(group, p, cpu);
-         * Start passive balancing when half the imbalance_pct
+                if (new_cpu == -1 || new_cpu == cpu) {
-         * limit is reached.
+                        /* Now try balancing at a lower domain level of cpu */
-         */
+                        sd = sd->child;
-        if (this_sd->flags & SD_WAKE_BALANCE) {
+                        continue;
-                if (imbalance*this_load <= 100*load) {
-                        schedstat_inc(this_sd, ttwu_move_balance);
-                        schedstat_inc(p, se.nr_wakeups_passive);
-                        return this_cpu;
                }
+                /* Now try balancing at a lower domain level of new_cpu */
+                cpu = new_cpu;
+                weight = cpumask_weight(sched_domain_span(sd));
+                sd = NULL;
+                for_each_domain(cpu, tmp) {
+                        if (weight <= cpumask_weight(sched_domain_span(tmp)))
+                                break;
+                        if (tmp->flags & sd_flag)
+                                sd = tmp;
+                }
+                /* while loop will break here if sd == NULL */
        }
 out:
-        return wake_idle(new_cpu, p);
+        rcu_read_unlock();
+        return new_cpu;
 }
 #endif /* CONFIG_SMP */
@@ -1472,11 +1564,12 @@ static void set_next_buddy(struct sched_entity *se)
 /*
 * Preempt the current task with a newly woken task if needed:
 */
-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
 {
        struct task_struct *curr = rq->curr;
        struct sched_entity *se = &curr->se, *pse = &p->se;
        struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+        int sync = wake_flags & WF_SYNC;
        update_curr(cfs_rq);
@@ -1502,7 +1595,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
         */
        if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
                set_last_buddy(se);
-        set_next_buddy(pse);
+        if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK))
+                set_next_buddy(pse);
        /*
         * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1524,16 +1618,25 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
                return;
        }
-        if (!sched_feat(WAKEUP_PREEMPT))
+        if ((sched_feat(WAKEUP_SYNC) && sync) ||
-                return;
+            (sched_feat(WAKEUP_OVERLAP) &&
+             (se->avg_overlap < sysctl_sched_migration_cost &&
-        if (sched_feat(WAKEUP_OVERLAP) && (sync ||
+              pse->avg_overlap < sysctl_sched_migration_cost))) {
-                        (se->avg_overlap < sysctl_sched_migration_cost &&
-                         pse->avg_overlap < sysctl_sched_migration_cost))) {
                resched_task(curr);
                return;
        }
+        if (sched_feat(WAKEUP_RUNNING)) {
+                if (pse->avg_running < se->avg_running) {
+                        set_next_buddy(pse);
+                        resched_task(curr);
+                        return;
+                }
+        }
+        if (!sched_feat(WAKEUP_PREEMPT))
+                return;
        find_matching_se(&se, &pse);
        BUG_ON(!pse);
@@ -1556,8 +1659,13 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
                /*
                 * If se was a buddy, clear it so that it will have to earn
                 * the favour again.
+                 *
+                 * If se was not a buddy, clear the buddies because neither
+                 * was elegible to run, let them earn it again.
+                 *
+                 * IOW. unconditionally clear buddies.
                 */
-                __clear_buddies(cfs_rq, se);
+                __clear_buddies(cfs_rq, NULL);
                set_next_entity(cfs_rq, se);
                cfs_rq = group_cfs_rq(se);
        } while (cfs_rq);
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index e2dc63a5815d..0d94083582c7 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -1,17 +1,123 @@
-SCHED_FEAT(NEW_FAIR_SLEEPERS, 0)
+/*
+ * Disregards a certain amount of sleep time (sched_latency_ns) and
+ * considers the task to be running during that period. This gives it
+ * a service deficit on wakeup, allowing it to run sooner.
+ */
+SCHED_FEAT(FAIR_SLEEPERS, 1)
+/*
+ * Only give sleepers 50% of their service deficit. This allows
+ * them to run sooner, but does not allow tons of sleepers to
+ * rip the spread apart.
+ */
+SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
+/*
+ * By not normalizing the sleep time, heavy tasks get an effective
+ * longer period, and lighter task an effective shorter period they
+ * are considered running.
+ */
 SCHED_FEAT(NORMALIZED_SLEEPER, 0)
-SCHED_FEAT(ADAPTIVE_GRAN, 1)
-SCHED_FEAT(WAKEUP_PREEMPT, 1)
+/*
+ * Place new tasks ahead so that they do not starve already running
+ * tasks
+ */
 SCHED_FEAT(START_DEBIT, 1)
+/*
+ * Should wakeups try to preempt running tasks.
+ */
+SCHED_FEAT(WAKEUP_PREEMPT, 1)
+/*
+ * Compute wakeup_gran based on task behaviour, clipped to
+ *  [0, sched_wakeup_gran_ns]
+ */
+SCHED_FEAT(ADAPTIVE_GRAN, 1)
+/*
+ * When converting the wakeup granularity to virtual time, do it such
+ * that heavier tasks preempting a lighter task have an edge.
+ */
+SCHED_FEAT(ASYM_GRAN, 1)
+/*
+ * Always wakeup-preempt SYNC wakeups, see SYNC_WAKEUPS.
+ */
+SCHED_FEAT(WAKEUP_SYNC, 0)
+/*
+ * Wakeup preempt based on task behaviour. Tasks that do not overlap
+ * don't get preempted.
+ */
+SCHED_FEAT(WAKEUP_OVERLAP, 0)
+/*
+ * Wakeup preemption towards tasks that run short
+ */
+SCHED_FEAT(WAKEUP_RUNNING, 0)
+/*
+ * Use the SYNC wakeup hint, pipes and the likes use this to indicate
+ * the remote end is likely to consume the data we just wrote, and
+ * therefore has cache benefit from being placed on the same cpu, see
+ * also AFFINE_WAKEUPS.
+ */
+SCHED_FEAT(SYNC_WAKEUPS, 1)
+/*
+ * Based on load and program behaviour, see if it makes sense to place
+ * a newly woken task on the same cpu as the task that woke it --
+ * improve cache locality. Typically used with SYNC wakeups as
+ * generated by pipes and the like, see also SYNC_WAKEUPS.
+ */
 SCHED_FEAT(AFFINE_WAKEUPS, 1)
+/*
+ * Weaken SYNC hint based on overlap
+ */
+SCHED_FEAT(SYNC_LESS, 1)
+/*
+ * Add SYNC hint based on overlap
+ */
+SCHED_FEAT(SYNC_MORE, 0)
+/*
+ * Prefer to schedule the task we woke last (assuming it failed
+ * wakeup-preemption), since its likely going to consume data we
+ * touched, increases cache locality.
+ */
+SCHED_FEAT(NEXT_BUDDY, 0)
+/*
+ * Prefer to schedule the task that ran last (when we did
+ * wake-preempt) as that likely will touch the same data, increases
+ * cache locality.
+ */
+SCHED_FEAT(LAST_BUDDY, 1)
+/*
+ * Consider buddies to be cache hot, decreases the likelyness of a
+ * cache buddy being migrated away, increases cache locality.
+ */
 SCHED_FEAT(CACHE_HOT_BUDDY, 1)
-SCHED_FEAT(SYNC_WAKEUPS, 1)
+/*
+ * Use arch dependent cpu power functions
+ */
+SCHED_FEAT(ARCH_POWER, 0)
 SCHED_FEAT(HRTICK, 0)
 SCHED_FEAT(DOUBLE_TICK, 0)
-SCHED_FEAT(ASYM_GRAN, 1)
 SCHED_FEAT(LB_BIAS, 1)
-SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
+SCHED_FEAT(LB_SHARES_UPDATE, 1)
 SCHED_FEAT(ASYM_EFF_LOAD, 1)
-SCHED_FEAT(WAKEUP_OVERLAP, 0)
-SCHED_FEAT(LAST_BUDDY, 1)
+/*
+ * Spin-wait on mutex acquisition when the mutex owner is running on
+ * another cpu -- assumes that when the owner is running, it will soon
+ * release the lock. Decreases scheduling overhead.
+ */
 SCHED_FEAT(OWNER_SPIN, 1)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 499672c10cbd..a8b448af004b 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -6,7 +6,7 @@
 */
 #ifdef CONFIG_SMP
-static int select_task_rq_idle(struct task_struct *p, int sync)
+static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
 {
        return task_cpu(p); /* IDLE tasks as never migrated */
 }
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync)
 /*
 * Idle tasks are unconditionally rescheduled:
 */
-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
 {
        resched_task(rq->idle);
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 2eb4bd6a526c..13de7126a6ab 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -938,10 +938,13 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
-static int select_task_rq_rt(struct task_struct *p, int sync)
+static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 {
        struct rq *rq = task_rq(p);
+        if (sd_flag != SD_BALANCE_WAKE)
+                return smp_processor_id();
        /*
         * If the current task is an RT task, then
         * try to see if we can wake this RT task up on another
@@ -999,7 +1002,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 /*
 * Preempt the current task with a newly woken task if needed:
 */
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
 {
        if (p->prio < rq->curr->prio) {
                resched_task(rq->curr);
diff --git a/kernel/smp.c b/kernel/smp.c
index 94188b8ecc33..8e218500ab14 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -177,6 +177,11 @@ void generic_smp_call_function_interrupt(void)
        int cpu = get_cpu();
        /*
+         * Shouldn't receive this interrupt on a cpu that is not yet online.
+         */
+        WARN_ON_ONCE(!cpu_online(cpu));
+        /*
         * Ensure entry is visible on call_function_queue after we have
         * entered the IPI. See comment in smp_call_function_many.
         * If we don't have this, then we may miss an entry on the list
@@ -230,6 +235,11 @@ void generic_smp_call_function_single_interrupt(void)
        unsigned int data_flags;
        LIST_HEAD(list);
+        /*
+         * Shouldn't receive this interrupt on a cpu that is not yet online.
+         */
+        WARN_ON_ONCE(!cpu_online(smp_processor_id()));
        spin_lock(&q->lock);
        list_replace_init(&q->list, &list);
        spin_unlock(&q->lock);
@@ -285,8 +295,14 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
         */
        this_cpu = get_cpu();
-        /* Can deadlock when called with interrupts disabled */
+        /*
-        WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
+         * Can deadlock when called with interrupts disabled.
+         * We allow cpu's that are not yet online though, as no one else can
+         * send smp call function interrupt to this cpu and as such deadlocks
+         * can't happen.
+         */
+        WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
+                     && !oops_in_progress);
        if (cpu == this_cpu) {
                local_irq_save(flags);
@@ -329,8 +345,14 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
 {
        csd_lock(data);
-        /* Can deadlock when called with interrupts disabled */
+        /*
-        WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress);
+         * Can deadlock when called with interrupts disabled.
+         * We allow cpu's that are not yet online though, as no one else can
+         * send smp call function interrupt to this cpu and as such deadlocks
+         * can't happen.
+         */
+        WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
+                     && !oops_in_progress);
        generic_exec_single(cpu, data, wait);
 }
@@ -365,8 +387,14 @@ void smp_call_function_many(const struct cpumask *mask,
        unsigned long flags;
        int cpu, next_cpu, this_cpu = smp_processor_id();
-        /* Can deadlock when called with interrupts disabled */
+        /*
-        WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
+         * Can deadlock when called with interrupts disabled.
+         * We allow cpu's that are not yet online though, as no one else can
+         * send smp call function interrupt to this cpu and as such deadlocks
+         * can't happen.
+         */
+        WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
+                     && !oops_in_progress);
        /* So, what's a CPU they want? Ignoring this one. */
        cpu = cpumask_first_and(mask, cpu_online_mask);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7db25067cd2d..f8749e5216e0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -57,7 +57,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 char *softirq_to_name[NR_SOFTIRQS] = {
-        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
+        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
        "TASKLET", "SCHED", "HRTIMER",  "RCU"
 };
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3125cff1c570..1a631ba684a4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -91,6 +91,9 @@ extern int sysctl_nr_trim_pages;
 #ifdef CONFIG_RCU_TORTURE_TEST
 extern int rcutorture_runnable;
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+#ifdef CONFIG_BLOCK
+extern int blk_iopoll_enabled;
+#endif
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_DETECT_SOFTLOCKUP
@@ -997,7 +1000,16 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#ifdef CONFIG_BLOCK
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "blk_iopoll",
+                .data           = &blk_iopoll_enabled,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec,
+        },
+#endif
 /*
 * NOTE: do not add new entries to this table unless you have read
 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 888adbcca30c..ea8384d3caa7 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -108,7 +108,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
 /*
 * Send taskstats data in @skb to listener with nl_pid @pid
 */
-static int send_reply(struct sk_buff *skb, pid_t pid)
+static int send_reply(struct sk_buff *skb, struct genl_info *info)
 {
        struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
        void *reply = genlmsg_data(genlhdr);
@@ -120,7 +120,7 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
                return rc;
        }
-        return genlmsg_unicast(skb, pid);
+        return genlmsg_reply(skb, info);
 }
 /*
@@ -150,7 +150,7 @@ static void send_cpu_listeners(struct sk_buff *skb,
                        if (!skb_next)
                                break;
                }
-                rc = genlmsg_unicast(skb_cur, s->pid);
+                rc = genlmsg_unicast(&init_net, skb_cur, s->pid);
                if (rc == -ECONNREFUSED) {
                        s->valid = 0;
                        delcount++;
@@ -418,7 +418,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
                goto err;
        }
-        rc = send_reply(rep_skb, info->snd_pid);
+        rc = send_reply(rep_skb, info);
 err:
        fput_light(file, fput_needed);
@@ -487,7 +487,7 @@ free_return_rc:
        } else
                goto err;
-        return send_reply(rep_skb, info->snd_pid);
+        return send_reply(rep_skb, info);
 err:
        nlmsg_free(rep_skb);
        return rc;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1ea0d1234f4a..e71634604400 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -11,12 +11,18 @@ config NOP_TRACER
 config HAVE_FTRACE_NMI_ENTER
        bool
+        help
+          See Documentation/trace/ftrace-implementation.txt
 config HAVE_FUNCTION_TRACER
        bool
+        help
+          See Documentation/trace/ftrace-implementation.txt
 config HAVE_FUNCTION_GRAPH_TRACER
        bool
+        help
+          See Documentation/trace/ftrace-implementation.txt
 config HAVE_FUNCTION_GRAPH_FP_TEST
        bool
@@ -28,21 +34,25 @@ config HAVE_FUNCTION_GRAPH_FP_TEST
 config HAVE_FUNCTION_TRACE_MCOUNT_TEST
        bool
        help
-         This gets selected when the arch tests the function_trace_stop
+          See Documentation/trace/ftrace-implementation.txt
-         variable at the mcount call site. Otherwise, this variable
-         is tested by the called function.
 config HAVE_DYNAMIC_FTRACE
        bool
+        help
+          See Documentation/trace/ftrace-implementation.txt
 config HAVE_FTRACE_MCOUNT_RECORD
        bool
+        help
+          See Documentation/trace/ftrace-implementation.txt
 config HAVE_HW_BRANCH_TRACER
        bool
 config HAVE_SYSCALL_TRACEPOINTS
        bool
+        help
+          See Documentation/trace/ftrace-implementation.txt
 config TRACER_MAX_TRACE
        bool
@@ -469,6 +479,18 @@ config FTRACE_STARTUP_TEST
          functioning properly. It will do tests on all the configured
          tracers of ftrace.
+config EVENT_TRACE_TEST_SYSCALLS
+        bool "Run selftest on syscall events"
+        depends on FTRACE_STARTUP_TEST
+        help
+         This option will also enable testing every syscall event.
+         It only enables the event and disables it and runs various loads
+         with the event enabled. This adds a bit more time for kernel boot
+         up since it runs this on every system call defined.
+         TBD - enable a way to actually call the syscalls as we test their
+               events
 config MMIOTRACE
        bool "Memory mapped IO tracing"
        depends on HAVE_MMIOTRACE_SUPPORT && PCI
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8c804e24f96f..cc615f84751b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1323,11 +1323,10 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 enum {
        FTRACE_ITER_FILTER      = (1 << 0),
-        FTRACE_ITER_CONT        = (1 << 1),
+        FTRACE_ITER_NOTRACE     = (1 << 1),
-        FTRACE_ITER_NOTRACE     = (1 << 2),
+        FTRACE_ITER_FAILURES    = (1 << 2),
-        FTRACE_ITER_FAILURES    = (1 << 3),
+        FTRACE_ITER_PRINTALL    = (1 << 3),
-        FTRACE_ITER_PRINTALL    = (1 << 4),
+        FTRACE_ITER_HASH        = (1 << 4),
-        FTRACE_ITER_HASH        = (1 << 5),
 };
 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
@@ -1337,8 +1336,7 @@ struct ftrace_iterator {
        int                     hidx;
        int                     idx;
        unsigned                flags;
-        unsigned char           buffer[FTRACE_BUFF_MAX+1];
+        struct trace_parser     parser;
-        unsigned                buffer_idx;
 };
 static void *
@@ -1407,7 +1405,7 @@ static int t_hash_show(struct seq_file *m, void *v)
        if (rec->ops->print)
                return rec->ops->print(m, rec->ip, rec->ops, rec->data);
-        seq_printf(m, "%pf:%pf", (void *)rec->ip, (void *)rec->ops->func);
+        seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func);
        if (rec->data)
                seq_printf(m, ":%p", rec->data);
@@ -1517,7 +1515,7 @@ static int t_show(struct seq_file *m, void *v)
        if (!rec)
                return 0;
-        seq_printf(m, "%pf\n", (void *)rec->ip);
+        seq_printf(m, "%ps\n", (void *)rec->ip);
        return 0;
 }
@@ -1604,6 +1602,11 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
        if (!iter)
                return -ENOMEM;
+        if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) {
+                kfree(iter);
+                return -ENOMEM;
+        }
        mutex_lock(&ftrace_regex_lock);
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
@@ -2059,9 +2062,9 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
        int i, len = 0;
        char *search;
-        if (glob && (strcmp(glob, "*") || !strlen(glob)))
+        if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
                glob = NULL;
-        else {
+        else if (glob) {
                int not;
                type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
@@ -2196,9 +2199,8 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
                   size_t cnt, loff_t *ppos, int enable)
 {
        struct ftrace_iterator *iter;
-        char ch;
+        struct trace_parser *parser;
-        size_t read = 0;
+        ssize_t ret, read;
-        ssize_t ret;
        if (!cnt || cnt < 0)
                return 0;
@@ -2211,72 +2213,23 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
        } else
                iter = file->private_data;
-        if (!*ppos) {
+        parser = &iter->parser;
-                iter->flags &= ~FTRACE_ITER_CONT;
+        read = trace_get_user(parser, ubuf, cnt, ppos);
-                iter->buffer_idx = 0;
-        }
-        ret = get_user(ch, ubuf++);
-        if (ret)
-                goto out;
-        read++;
-        cnt--;
-        /*
+        if (trace_parser_loaded(parser) &&
-         * If the parser haven't finished with the last write,
+            !trace_parser_cont(parser)) {
-         * continue reading the user input without skipping spaces.
+                ret = ftrace_process_regex(parser->buffer,
-         */
+                                           parser->idx, enable);
-        if (!(iter->flags & FTRACE_ITER_CONT)) {
-                /* skip white space */
-                while (cnt && isspace(ch)) {
-                        ret = get_user(ch, ubuf++);
-                        if (ret)
-                                goto out;
-                        read++;
-                        cnt--;
-                }
-                /* only spaces were written */
-                if (isspace(ch)) {
-                        *ppos += read;
-                        ret = read;
-                        goto out;
-                }
-                iter->buffer_idx = 0;
-        }
-        while (cnt && !isspace(ch)) {
-                if (iter->buffer_idx < FTRACE_BUFF_MAX)
-                        iter->buffer[iter->buffer_idx++] = ch;
-                else {
-                        ret = -EINVAL;
-                        goto out;
-                }
-                ret = get_user(ch, ubuf++);
                if (ret)
                        goto out;
-                read++;
-                cnt--;
-        }
-        if (isspace(ch)) {
+                trace_parser_clear(parser);
-                iter->buffer[iter->buffer_idx] = 0;
-                ret = ftrace_process_regex(iter->buffer,
-                                           iter->buffer_idx, enable);
-                if (ret)
-                        goto out;
-                iter->buffer_idx = 0;
-        } else {
-                iter->flags |= FTRACE_ITER_CONT;
-                iter->buffer[iter->buffer_idx++] = ch;
        }
-        *ppos += read;
        ret = read;
- out:
-        mutex_unlock(&ftrace_regex_lock);
+        mutex_unlock(&ftrace_regex_lock);
+out:
        return ret;
 }
@@ -2381,6 +2334,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 {
        struct seq_file *m = (struct seq_file *)file->private_data;
        struct ftrace_iterator *iter;
+        struct trace_parser *parser;
        mutex_lock(&ftrace_regex_lock);
        if (file->f_mode & FMODE_READ) {
@@ -2390,9 +2344,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
        } else
                iter = file->private_data;
-        if (iter->buffer_idx) {
+        parser = &iter->parser;
-                iter->buffer[iter->buffer_idx] = 0;
+        if (trace_parser_loaded(parser)) {
-                ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
+                parser->buffer[parser->idx] = 0;
+                ftrace_match_records(parser->buffer, parser->idx, enable);
        }
        mutex_lock(&ftrace_lock);
@@ -2400,7 +2355,9 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
                ftrace_run_update_code(FTRACE_ENABLE_CALLS);
        mutex_unlock(&ftrace_lock);
+        trace_parser_put(parser);
        kfree(iter);
        mutex_unlock(&ftrace_regex_lock);
        return 0;
 }
@@ -2499,7 +2456,7 @@ static int g_show(struct seq_file *m, void *v)
                return 0;
        }
-        seq_printf(m, "%pf\n", v);
+        seq_printf(m, "%ps\n", (void *)*ptr);
        return 0;
 }
@@ -2602,12 +2559,10 @@ static ssize_t
 ftrace_graph_write(struct file *file, const char __user *ubuf,
                   size_t cnt, loff_t *ppos)
 {
-        unsigned char buffer[FTRACE_BUFF_MAX+1];
+        struct trace_parser parser;
        unsigned long *array;
        size_t read = 0;
        ssize_t ret;
-        int index = 0;
-        char ch;
        if (!cnt || cnt < 0)
                return 0;
@@ -2625,51 +2580,26 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
        } else
                array = file->private_data;
-        ret = get_user(ch, ubuf++);
+        if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
-        if (ret)
+                ret = -ENOMEM;
                goto out;
-        read++;
-        cnt--;
-        /* skip white space */
-        while (cnt && isspace(ch)) {
-                ret = get_user(ch, ubuf++);
-                if (ret)
-                        goto out;
-                read++;
-                cnt--;
        }
-        if (isspace(ch)) {
+        read = trace_get_user(&parser, ubuf, cnt, ppos);
-                *ppos += read;
-                ret = read;
-                goto out;
-        }
-        while (cnt && !isspace(ch)) {
+        if (trace_parser_loaded((&parser))) {
-                if (index < FTRACE_BUFF_MAX)
+                parser.buffer[parser.idx] = 0;
-                        buffer[index++] = ch;
-                else {
+                /* we allow only one expression at a time */
-                        ret = -EINVAL;
+                ret = ftrace_set_func(array, &ftrace_graph_count,
-                        goto out;
+                                        parser.buffer);
-                }
-                ret = get_user(ch, ubuf++);
                if (ret)
                        goto out;
-                read++;
-                cnt--;
        }
-        buffer[index] = 0;
-        /* we allow only one expression at a time */
-        ret = ftrace_set_func(array, &ftrace_graph_count, buffer);
-        if (ret)
-                goto out;
-        file->f_pos += read;
        ret = read;
 out:
+        trace_parser_put(&parser);
        mutex_unlock(&graph_lock);
        return ret;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 454e74e718cf..6eef38923b07 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -701,8 +701,8 @@ static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
        val &= ~RB_FLAG_MASK;
-        ret = (unsigned long)cmpxchg(&list->next,
+        ret = cmpxchg((unsigned long *)&list->next,
-                                     val | old_flag, val | new_flag);
+                      val | old_flag, val | new_flag);
        /* check if the reader took the page */
        if ((ret & ~RB_FLAG_MASK) != val)
@@ -794,7 +794,7 @@ static int rb_head_page_replace(struct buffer_page *old,
        val = *ptr & ~RB_FLAG_MASK;
        val |= RB_PAGE_HEAD;
-        ret = cmpxchg(ptr, val, &new->list);
+        ret = cmpxchg(ptr, val, (unsigned long)&new->list);
        return ret == val;
 }
@@ -2997,15 +2997,12 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 }
 static struct ring_buffer_event *
-rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
 {
-        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
        struct buffer_page *reader;
        int nr_loops = 0;
-        cpu_buffer = buffer->buffers[cpu];
 again:
        /*
         * We repeat when a timestamp is encountered. It is possible
@@ -3049,7 +3046,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        case RINGBUF_TYPE_DATA:
                if (ts) {
                        *ts = cpu_buffer->read_stamp + event->time_delta;
-                        ring_buffer_normalize_time_stamp(buffer,
+                        ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
                                                         cpu_buffer->cpu, ts);
                }
                return event;
@@ -3168,7 +3165,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        local_irq_save(flags);
        if (dolock)
                spin_lock(&cpu_buffer->reader_lock);
-        event = rb_buffer_peek(buffer, cpu, ts);
+        event = rb_buffer_peek(cpu_buffer, ts);
        if (event && event->type_len == RINGBUF_TYPE_PADDING)
                rb_advance_reader(cpu_buffer);
        if (dolock)
@@ -3237,7 +3234,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
        if (dolock)
                spin_lock(&cpu_buffer->reader_lock);
-        event = rb_buffer_peek(buffer, cpu, ts);
+        event = rb_buffer_peek(cpu_buffer, ts);
        if (event)
                rb_advance_reader(cpu_buffer);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5c75deeefe30..fd52a19dd172 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -339,6 +339,112 @@ static struct {
 int trace_clock_id;
+/*
+ * trace_parser_get_init - gets the buffer for trace parser
+ */
+int trace_parser_get_init(struct trace_parser *parser, int size)
+{
+        memset(parser, 0, sizeof(*parser));
+        parser->buffer = kmalloc(size, GFP_KERNEL);
+        if (!parser->buffer)
+                return 1;
+        parser->size = size;
+        return 0;
+}
+/*
+ * trace_parser_put - frees the buffer for trace parser
+ */
+void trace_parser_put(struct trace_parser *parser)
+{
+        kfree(parser->buffer);
+}
+/*
+ * trace_get_user - reads the user input string separated by  space
+ * (matched by isspace(ch))
+ *
+ * For each string found the 'struct trace_parser' is updated,
+ * and the function returns.
+ *
+ * Returns number of bytes read.
+ *
+ * See kernel/trace/trace.h for 'struct trace_parser' details.
+ */
+int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
+        size_t cnt, loff_t *ppos)
+{
+        char ch;
+        size_t read = 0;
+        ssize_t ret;
+        if (!*ppos)
+                trace_parser_clear(parser);
+        ret = get_user(ch, ubuf++);
+        if (ret)
+                goto out;
+        read++;
+        cnt--;
+        /*
+         * The parser is not finished with the last write,
+         * continue reading the user input without skipping spaces.
+         */
+        if (!parser->cont) {
+                /* skip white space */
+                while (cnt && isspace(ch)) {
+                        ret = get_user(ch, ubuf++);
+                        if (ret)
+                                goto out;
+                        read++;
+                        cnt--;
+                }
+                /* only spaces were written */
+                if (isspace(ch)) {
+                        *ppos += read;
+                        ret = read;
+                        goto out;
+                }
+                parser->idx = 0;
+        }
+        /* read the non-space input */
+        while (cnt && !isspace(ch)) {
+                if (parser->idx < parser->size)
+                        parser->buffer[parser->idx++] = ch;
+                else {
+                        ret = -EINVAL;
+                        goto out;
+                }
+                ret = get_user(ch, ubuf++);
+                if (ret)
+                        goto out;
+                read++;
+                cnt--;
+        }
+        /* We either got finished input or we have to wait for another call. */
+        if (isspace(ch)) {
+                parser->buffer[parser->idx] = 0;
+                parser->cont = false;
+        } else {
+                parser->cont = true;
+                parser->buffer[parser->idx++] = ch;
+        }
+        *ppos += read;
+        ret = read;
+out:
+        return ret;
+}
 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
 {
        int len;
@@ -719,6 +825,11 @@ static void trace_init_cmdlines(void)
        cmdline_idx = 0;
 }
+int is_tracing_stopped(void)
+{
+        return trace_stop_count;
+}
 /**
 * ftrace_off_permanent - disable all ftrace code permanently
 *
@@ -886,7 +997,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
        entry->preempt_count            = pc & 0xff;
        entry->pid                      = (tsk) ? tsk->pid : 0;
-        entry->tgid                     = (tsk) ? tsk->tgid : 0;
+        entry->lock_depth               = (tsk) ? tsk->lock_depth : 0;
        entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1068,6 +1179,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
                return;
        entry   = ring_buffer_event_data(event);
+        entry->tgid             = current->tgid;
        memset(&entry->caller, 0, sizeof(entry->caller));
        trace.nr_entries        = 0;
@@ -1094,6 +1206,7 @@ ftrace_trace_special(void *__tr,
                     unsigned long arg1, unsigned long arg2, unsigned long arg3,
                     int pc)
 {
+        struct ftrace_event_call *call = &event_special;
        struct ring_buffer_event *event;
        struct trace_array *tr = __tr;
        struct ring_buffer *buffer = tr->buffer;
@@ -1107,7 +1220,9 @@ ftrace_trace_special(void *__tr,
        entry->arg1                     = arg1;
        entry->arg2                     = arg2;
        entry->arg3                     = arg3;
-        trace_buffer_unlock_commit(buffer, event, 0, pc);
+        if (!filter_check_discard(call, entry, buffer, event))
+                trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 void
@@ -1530,10 +1645,10 @@ static void print_lat_help_header(struct seq_file *m)
        seq_puts(m, "#                | / _----=> need-resched    \n");
        seq_puts(m, "#                || / _---=> hardirq/softirq \n");
        seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
-        seq_puts(m, "#                |||| /                      \n");
+        seq_puts(m, "#                |||| /_--=> lock-depth       \n");
-        seq_puts(m, "#                |||||     delay             \n");
+        seq_puts(m, "#                |||||/     delay             \n");
-        seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
+        seq_puts(m, "#  cmd     pid   |||||| time  |   caller      \n");
-        seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
+        seq_puts(m, "#     \\   /      ||||||   \\   |   /           \n");
 }
 static void print_func_help_header(struct seq_file *m)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fa1dccb579d5..86bcff94791a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -7,6 +7,7 @@
 #include <linux/clocksource.h>
 #include <linux/ring_buffer.h>
 #include <linux/mmiotrace.h>
+#include <linux/tracepoint.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 #include <linux/kmemtrace.h>
@@ -42,157 +43,54 @@ enum trace_type {
        __TRACE_LAST_TYPE,
 };
-/*
+enum kmemtrace_type_id {
- * Function trace entry - function address and parent function addres:
+        KMEMTRACE_TYPE_KMALLOC = 0,     /* kmalloc() or kfree(). */
- */
+        KMEMTRACE_TYPE_CACHE,           /* kmem_cache_*(). */
-struct ftrace_entry {
+        KMEMTRACE_TYPE_PAGES,           /* __get_free_pages() and friends. */
-        struct trace_entry      ent;
-        unsigned long           ip;
-        unsigned long           parent_ip;
-};
-/* Function call entry */
-struct ftrace_graph_ent_entry {
-        struct trace_entry              ent;
-        struct ftrace_graph_ent         graph_ent;
 };
-/* Function return entry */
-struct ftrace_graph_ret_entry {
-        struct trace_entry              ent;
-        struct ftrace_graph_ret         ret;
-};
 extern struct tracer boot_tracer;
-/*
+#undef __field
- * Context switch trace entry - which task (and prio) we switched from/to:
+#define __field(type, item)             type    item;
- */
-struct ctx_switch_entry {
-        struct trace_entry      ent;
-        unsigned int            prev_pid;
-        unsigned char           prev_prio;
-        unsigned char           prev_state;
-        unsigned int            next_pid;
-        unsigned char           next_prio;
-        unsigned char           next_state;
-        unsigned int            next_cpu;
-};
-/*
- * Special (free-form) trace entry:
- */
-struct special_entry {
-        struct trace_entry      ent;
-        unsigned long           arg1;
-        unsigned long           arg2;
-        unsigned long           arg3;
-};
-/*
- * Stack-trace entry:
- */
-#define FTRACE_STACK_ENTRIES    8
-struct stack_entry {
-        struct trace_entry      ent;
-        unsigned long           caller[FTRACE_STACK_ENTRIES];
-};
-struct userstack_entry {
-        struct trace_entry      ent;
-        unsigned long           caller[FTRACE_STACK_ENTRIES];
-};
-/*
- * trace_printk entry:
- */
-struct bprint_entry {
-        struct trace_entry      ent;
-        unsigned long           ip;
-        const char              *fmt;
-        u32                     buf[];
-};
-struct print_entry {
+#undef __field_struct
-        struct trace_entry      ent;
+#define __field_struct(type, item)      __field(type, item)
-        unsigned long           ip;
-        char                    buf[];
-};
-#define TRACE_OLD_SIZE          88
+#undef __field_desc
+#define __field_desc(type, container, item)
-struct trace_field_cont {
+#undef __array
-        unsigned char           type;
+#define __array(type, item, size)       type    item[size];
-        /* Temporary till we get rid of this completely */
-        char                    buf[TRACE_OLD_SIZE - 1];
-};
-struct trace_mmiotrace_rw {
+#undef __array_desc
-        struct trace_entry      ent;
+#define __array_desc(type, container, item, size)
-        struct mmiotrace_rw     rw;
-};
-struct trace_mmiotrace_map {
+#undef __dynamic_array
-        struct trace_entry      ent;
+#define __dynamic_array(type, item)     type    item[];
-        struct mmiotrace_map    map;
-};
-struct trace_boot_call {
+#undef F_STRUCT
-        struct trace_entry      ent;
+#define F_STRUCT(args...)               args
-        struct boot_trace_call boot_call;
-};
-struct trace_boot_ret {
+#undef FTRACE_ENTRY
-        struct trace_entry      ent;
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)     \
-        struct boot_trace_ret boot_ret;
+        struct struct_name {                                    \
-};
+                struct trace_entry      ent;                    \
+                tstruct                                         \
-#define TRACE_FUNC_SIZE 30
+        }
-#define TRACE_FILE_SIZE 20
-struct trace_branch {
-        struct trace_entry      ent;
-        unsigned                line;
-        char                    func[TRACE_FUNC_SIZE+1];
-        char                    file[TRACE_FILE_SIZE+1];
-        char                    correct;
-};
-struct hw_branch_entry {
-        struct trace_entry      ent;
-        u64                     from;
-        u64                     to;
-};
-struct trace_power {
-        struct trace_entry      ent;
-        struct power_trace      state_data;
-};
-enum kmemtrace_type_id {
+#undef TP_ARGS
-        KMEMTRACE_TYPE_KMALLOC = 0,     /* kmalloc() or kfree(). */
+#define TP_ARGS(args...)        args
-        KMEMTRACE_TYPE_CACHE,           /* kmem_cache_*(). */
-        KMEMTRACE_TYPE_PAGES,           /* __get_free_pages() and friends. */
-};
-struct kmemtrace_alloc_entry {
+#undef FTRACE_ENTRY_DUP
-        struct trace_entry      ent;
+#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
-        enum kmemtrace_type_id type_id;
-        unsigned long call_site;
-        const void *ptr;
-        size_t bytes_req;
-        size_t bytes_alloc;
-        gfp_t gfp_flags;
-        int node;
-};
-struct kmemtrace_free_entry {
+#include "trace_entries.h"
-        struct trace_entry      ent;
-        enum kmemtrace_type_id type_id;
-        unsigned long call_site;
-        const void *ptr;
-};
+/*
+ * syscalls are special, and need special handling, this is why
+ * they are not included in trace_entries.h
+ */
 struct syscall_trace_enter {
        struct trace_entry      ent;
        int                     nr;
@@ -205,13 +103,12 @@ struct syscall_trace_exit {
        unsigned long           ret;
 };
 /*
 * trace_flag_type is an enumeration that holds different
 * states when a trace occurs. These are:
 *  IRQS_OFF            - interrupts were disabled
 *  IRQS_NOSUPPORT      - arch does not support irqs_disabled_flags
- *  NEED_RESCED         - reschedule is requested
+ *  NEED_RESCHED        - reschedule is requested
 *  HARDIRQ             - inside an interrupt handler
 *  SOFTIRQ             - inside a softirq handler
 */
@@ -390,7 +287,6 @@ struct tracer {
        struct tracer           *next;
        int                     print_max;
        struct tracer_flags     *flags;
-        struct tracer_stat      *stats;
 };
@@ -469,6 +365,7 @@ void tracing_stop_sched_switch_record(void);
 void tracing_start_sched_switch_record(void);
 int register_tracer(struct tracer *type);
 void unregister_tracer(struct tracer *type);
+int is_tracing_stopped(void);
 extern unsigned long nsecs_to_usecs(unsigned long nsecs);
@@ -509,20 +406,6 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
 extern cycle_t ftrace_now(int cpu);
-#ifdef CONFIG_CONTEXT_SWITCH_TRACER
-typedef void
-(*tracer_switch_func_t)(void *private,
-                        void *__rq,
-                        struct task_struct *prev,
-                        struct task_struct *next);
-struct tracer_switch_ops {
-        tracer_switch_func_t            func;
-        void                            *private;
-        struct tracer_switch_ops        *next;
-};
-#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
 extern void trace_find_cmdline(int pid, char comm[]);
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -638,6 +521,41 @@ static inline int ftrace_trace_task(struct task_struct *task)
 #endif
 /*
+ * struct trace_parser - servers for reading the user input separated by spaces
+ * @cont: set if the input is not complete - no final space char was found
+ * @buffer: holds the parsed user input
+ * @idx: user input lenght
+ * @size: buffer size
+ */
+struct trace_parser {
+        bool            cont;
+        char            *buffer;
+        unsigned        idx;
+        unsigned        size;
+};
+static inline bool trace_parser_loaded(struct trace_parser *parser)
+{
+        return (parser->idx != 0);
+}
+static inline bool trace_parser_cont(struct trace_parser *parser)
+{
+        return parser->cont;
+}
+static inline void trace_parser_clear(struct trace_parser *parser)
+{
+        parser->cont = false;
+        parser->idx = 0;
+}
+extern int trace_parser_get_init(struct trace_parser *parser, int size);
+extern void trace_parser_put(struct trace_parser *parser);
+extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
+        size_t cnt, loff_t *ppos);
+/*
 * trace_iterator_flags is an enumeration that defines bit
 * positions into trace_flags that controls the output.
 *
@@ -823,58 +741,18 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
        return 0;
 }
-#define DEFINE_COMPARISON_PRED(type)                                    \
-static int filter_pred_##type(struct filter_pred *pred, void *event,    \
-                              int val1, int val2)                       \
-{                                                                       \
-        type *addr = (type *)(event + pred->offset);                    \
-        type val = (type)pred->val;                                     \
-        int match = 0;                                                  \
-                                                                        \
-        switch (pred->op) {                                             \
-        case OP_LT:                                                     \
-                match = (*addr < val);                                  \
-                break;                                                  \
-        case OP_LE:                                                     \
-                match = (*addr <= val);                                 \
-                break;                                                  \
-        case OP_GT:                                                     \
-                match = (*addr > val);                                  \
-                break;                                                  \
-        case OP_GE:                                                     \
-                match = (*addr >= val);                                 \
-                break;                                                  \
-        default:                                                        \
-                break;                                                  \
-        }                                                               \
-                                                                        \
-        return match;                                                   \
-}
-#define DEFINE_EQUALITY_PRED(size)                                      \
-static int filter_pred_##size(struct filter_pred *pred, void *event,    \
-                              int val1, int val2)                       \
-{                                                                       \
-        u##size *addr = (u##size *)(event + pred->offset);              \
-        u##size val = (u##size)pred->val;                               \
-        int match;                                                      \
-                                                                        \
-        match = (val == *addr) ^ pred->not;                             \
-                                                                        \
-        return match;                                                   \
-}
 extern struct mutex event_mutex;
 extern struct list_head ftrace_events;
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
-#undef TRACE_EVENT_FORMAT
+#undef FTRACE_ENTRY
-#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)      \
+#define FTRACE_ENTRY(call, struct_name, id, tstruct, print)             \
        extern struct ftrace_event_call event_##call;
-#undef TRACE_EVENT_FORMAT_NOFILTER
+#undef FTRACE_ENTRY_DUP
-#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
+#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print)         \
-#include "trace_event_types.h"
+        FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
+#include "trace_entries.h"
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 19bfc75d467e..c21d5f3956ad 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -129,6 +129,7 @@ struct tracer boot_tracer __read_mostly =
 void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
 {
+        struct ftrace_event_call *call = &event_boot_call;
        struct ring_buffer_event *event;
        struct ring_buffer *buffer;
        struct trace_boot_call *entry;
@@ -150,13 +151,15 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
                goto out;
        entry   = ring_buffer_event_data(event);
        entry->boot_call = *bt;
-        trace_buffer_unlock_commit(buffer, event, 0, 0);
+        if (!filter_check_discard(call, entry, buffer, event))
+                trace_buffer_unlock_commit(buffer, event, 0, 0);
 out:
        preempt_enable();
 }
 void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
 {
+        struct ftrace_event_call *call = &event_boot_ret;
        struct ring_buffer_event *event;
        struct ring_buffer *buffer;
        struct trace_boot_ret *entry;
@@ -175,7 +178,8 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
                goto out;
        entry   = ring_buffer_event_data(event);
        entry->boot_ret = *bt;
-        trace_buffer_unlock_commit(buffer, event, 0, 0);
+        if (!filter_check_discard(call, entry, buffer, event))
+                trace_buffer_unlock_commit(buffer, event, 0, 0);
 out:
        preempt_enable();
 }
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index b588fd81f7f9..20c5f92e28a8 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -66,10 +66,14 @@ u64 notrace trace_clock(void)
 * Used by plugins that need globally coherent timestamps.
 */
-static u64 prev_trace_clock_time;
+/* keep prev_time and lock in the same cacheline. */
+static struct {
-static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
+        u64 prev_time;
-        (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+        raw_spinlock_t lock;
+} trace_clock_struct ____cacheline_aligned_in_smp =
+        {
+                .lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
+        };
 u64 notrace trace_clock_global(void)
 {
@@ -88,19 +92,19 @@ u64 notrace trace_clock_global(void)
        if (unlikely(in_nmi()))
                goto out;
-        __raw_spin_lock(&trace_clock_lock);
+        __raw_spin_lock(&trace_clock_struct.lock);
        /*
         * TODO: if this happens often then maybe we should reset
-         * my_scd->clock to prev_trace_clock_time+1, to make sure
+         * my_scd->clock to prev_time+1, to make sure
         * we start ticking with the local clock from now on?
         */
-        if ((s64)(now - prev_trace_clock_time) < 0)
+        if ((s64)(now - trace_clock_struct.prev_time) < 0)
-                now = prev_trace_clock_time + 1;
+                now = trace_clock_struct.prev_time + 1;
-        prev_trace_clock_time = now;
+        trace_clock_struct.prev_time = now;
-        __raw_spin_unlock(&trace_clock_lock);
+        __raw_spin_unlock(&trace_clock_struct.lock);
 out:
        raw_local_irq_restore(flags);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
new file mode 100644
index 000000000000..a431748ddd6e
--- /dev/null
+++ b/kernel/trace/trace_entries.h
@@ -0,0 +1,383 @@
+/*
+ * This file defines the trace event structures that go into the ring
+ * buffer directly. They are created via macros so that changes for them
+ * appear in the format file. Using macros will automate this process.
+ *
+ * The macro used to create a ftrace data structure is:
+ *
+ * FTRACE_ENTRY( name, struct_name, id, structure, print )
+ *
+ * @name: the name used the event name, as well as the name of
+ *   the directory that holds the format file.
+ *
+ * @struct_name: the name of the structure that is created.
+ *
+ * @id: The event identifier that is used to detect what event
+ *    this is from the ring buffer.
+ *
+ * @structure: the structure layout
+ *
+ *  - __field(  type,   item    )
+ *        This is equivalent to declaring
+ *              type    item;
+ *        in the structure.
+ *  - __array(  type,   item,   size    )
+ *        This is equivalent to declaring
+ *              type    item[size];
+ *        in the structure.
+ *
+ *   * for structures within structures, the format of the internal
+ *      structure is layed out. This allows the internal structure
+ *      to be deciphered for the format file. Although these macros
+ *      may become out of sync with the internal structure, they
+ *      will create a compile error if it happens. Since the
+ *      internel structures are just tracing helpers, this is not
+ *      an issue.
+ *
+ *      When an internal structure is used, it should use:
+ *
+ *      __field_struct( type,   item    )
+ *
+ *      instead of __field. This will prevent it from being shown in
+ *      the output file. The fields in the structure should use.
+ *
+ *      __field_desc(   type,   container,      item            )
+ *      __array_desc(   type,   container,      item,   len     )
+ *
+ *      type, item and len are the same as __field and __array, but
+ *      container is added. This is the name of the item in
+ *      __field_struct that this is describing.
+ *
+ *
+ * @print: the print format shown to users in the format file.
+ */
+/*
+ * Function trace entry - function address and parent function addres:
+ */
+FTRACE_ENTRY(function, ftrace_entry,
+        TRACE_FN,
+        F_STRUCT(
+                __field(        unsigned long,  ip              )
+                __field(        unsigned long,  parent_ip       )
+        ),
+        F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip)
+);
+/* Function call entry */
+FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
+        TRACE_GRAPH_ENT,
+        F_STRUCT(
+                __field_struct( struct ftrace_graph_ent,        graph_ent       )
+                __field_desc(   unsigned long,  graph_ent,      func            )
+                __field_desc(   int,            graph_ent,      depth           )
+        ),
+        F_printk("--> %lx (%d)", __entry->func, __entry->depth)
+);
+/* Function return entry */
+FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
+        TRACE_GRAPH_RET,
+        F_STRUCT(
+                __field_struct( struct ftrace_graph_ret,        ret     )
+                __field_desc(   unsigned long,  ret,            func    )
+                __field_desc(   unsigned long long, ret,        calltime)
+                __field_desc(   unsigned long long, ret,        rettime )
+                __field_desc(   unsigned long,  ret,            overrun )
+                __field_desc(   int,            ret,            depth   )
+        ),
+        F_printk("<-- %lx (%d) (start: %llx  end: %llx) over: %d",
+                 __entry->func, __entry->depth,
+                 __entry->calltime, __entry->rettime,
+                 __entry->depth)
+);
+/*
+ * Context switch trace entry - which task (and prio) we switched from/to:
+ *
+ * This is used for both wakeup and context switches. We only want
+ * to create one structure, but we need two outputs for it.
+ */
+#define FTRACE_CTX_FIELDS                                       \
+        __field(        unsigned int,   prev_pid        )       \
+        __field(        unsigned char,  prev_prio       )       \
+        __field(        unsigned char,  prev_state      )       \
+        __field(        unsigned int,   next_pid        )       \
+        __field(        unsigned char,  next_prio       )       \
+        __field(        unsigned char,  next_state      )       \
+        __field(        unsigned int,   next_cpu        )
+FTRACE_ENTRY(context_switch, ctx_switch_entry,
+        TRACE_CTX,
+        F_STRUCT(
+                FTRACE_CTX_FIELDS
+        ),
+        F_printk("%u:%u:%u  ==> %u:%u:%u [%03u]",
+                 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
+                 __entry->next_pid, __entry->next_prio, __entry->next_state,
+                 __entry->next_cpu
+                )
+);
+/*
+ * FTRACE_ENTRY_DUP only creates the format file, it will not
+ *  create another structure.
+ */
+FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
+        TRACE_WAKE,
+        F_STRUCT(
+                FTRACE_CTX_FIELDS
+        ),
+        F_printk("%u:%u:%u  ==+ %u:%u:%u [%03u]",
+                 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
+                 __entry->next_pid, __entry->next_prio, __entry->next_state,
+                 __entry->next_cpu
+                )
+);
+/*
+ * Special (free-form) trace entry:
+ */
+FTRACE_ENTRY(special, special_entry,
+        TRACE_SPECIAL,
+        F_STRUCT(
+                __field(        unsigned long,  arg1    )
+                __field(        unsigned long,  arg2    )
+                __field(        unsigned long,  arg3    )
+        ),
+        F_printk("(%08lx) (%08lx) (%08lx)",
+                 __entry->arg1, __entry->arg2, __entry->arg3)
+);
+/*
+ * Stack-trace entry:
+ */
+#define FTRACE_STACK_ENTRIES    8
+FTRACE_ENTRY(kernel_stack, stack_entry,
+        TRACE_STACK,
+        F_STRUCT(
+                __array(        unsigned long,  caller, FTRACE_STACK_ENTRIES    )
+        ),
+        F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
+                 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
+                 __entry->caller[0], __entry->caller[1], __entry->caller[2],
+                 __entry->caller[3], __entry->caller[4], __entry->caller[5],
+                 __entry->caller[6], __entry->caller[7])
+);
+FTRACE_ENTRY(user_stack, userstack_entry,
+        TRACE_USER_STACK,
+        F_STRUCT(
+                __field(        unsigned int,   tgid    )
+                __array(        unsigned long,  caller, FTRACE_STACK_ENTRIES    )
+        ),
+        F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
+                 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
+                 __entry->caller[0], __entry->caller[1], __entry->caller[2],
+                 __entry->caller[3], __entry->caller[4], __entry->caller[5],
+                 __entry->caller[6], __entry->caller[7])
+);
+/*
+ * trace_printk entry:
+ */
+FTRACE_ENTRY(bprint, bprint_entry,
+        TRACE_BPRINT,
+        F_STRUCT(
+                __field(        unsigned long,  ip      )
+                __field(        const char *,   fmt     )
+                __dynamic_array(        u32,    buf     )
+        ),
+        F_printk("%08lx fmt:%p",
+                 __entry->ip, __entry->fmt)
+);
+FTRACE_ENTRY(print, print_entry,
+        TRACE_PRINT,
+        F_STRUCT(
+                __field(        unsigned long,  ip      )
+                __dynamic_array(        char,   buf     )
+        ),
+        F_printk("%08lx %s",
+                 __entry->ip, __entry->buf)
+);
+FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
+        TRACE_MMIO_RW,
+        F_STRUCT(
+                __field_struct( struct mmiotrace_rw,    rw      )
+                __field_desc(   resource_size_t, rw,    phys    )
+                __field_desc(   unsigned long,  rw,     value   )
+                __field_desc(   unsigned long,  rw,     pc      )
+                __field_desc(   int,            rw,     map_id  )
+                __field_desc(   unsigned char,  rw,     opcode  )
+                __field_desc(   unsigned char,  rw,     width   )
+        ),
+        F_printk("%lx %lx %lx %d %x %x",
+                 (unsigned long)__entry->phys, __entry->value, __entry->pc,
+                 __entry->map_id, __entry->opcode, __entry->width)
+);
+FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
+        TRACE_MMIO_MAP,
+        F_STRUCT(
+                __field_struct( struct mmiotrace_map,   map     )
+                __field_desc(   resource_size_t, map,   phys    )
+                __field_desc(   unsigned long,  map,    virt    )
+                __field_desc(   unsigned long,  map,    len     )
+                __field_desc(   int,            map,    map_id  )
+                __field_desc(   unsigned char,  map,    opcode  )
+        ),
+        F_printk("%lx %lx %lx %d %x",
+                 (unsigned long)__entry->phys, __entry->virt, __entry->len,
+                 __entry->map_id, __entry->opcode)
+);
+FTRACE_ENTRY(boot_call, trace_boot_call,
+        TRACE_BOOT_CALL,
+        F_STRUCT(
+                __field_struct( struct boot_trace_call, boot_call       )
+                __field_desc(   pid_t,  boot_call,      caller          )
+                __array_desc(   char,   boot_call,      func,   KSYM_SYMBOL_LEN)
+        ),
+        F_printk("%d  %s", __entry->caller, __entry->func)
+);
+FTRACE_ENTRY(boot_ret, trace_boot_ret,
+        TRACE_BOOT_RET,
+        F_STRUCT(
+                __field_struct( struct boot_trace_ret,  boot_ret        )
+                __array_desc(   char,   boot_ret,       func,   KSYM_SYMBOL_LEN)
+                __field_desc(   int,    boot_ret,       result          )
+                __field_desc(   unsigned long, boot_ret, duration       )
+        ),
+        F_printk("%s %d %lx",
+                 __entry->func, __entry->result, __entry->duration)
+);
+#define TRACE_FUNC_SIZE 30
+#define TRACE_FILE_SIZE 20
+FTRACE_ENTRY(branch, trace_branch,
+        TRACE_BRANCH,
+        F_STRUCT(
+                __field(        unsigned int,   line                            )
+                __array(        char,           func,   TRACE_FUNC_SIZE+1       )
+                __array(        char,           file,   TRACE_FILE_SIZE+1       )
+                __field(        char,           correct                         )
+        ),
+        F_printk("%u:%s:%s (%u)",
+                 __entry->line,
+                 __entry->func, __entry->file, __entry->correct)
+);
+FTRACE_ENTRY(hw_branch, hw_branch_entry,
+        TRACE_HW_BRANCHES,
+        F_STRUCT(
+                __field(        u64,    from    )
+                __field(        u64,    to      )
+        ),
+        F_printk("from: %llx to: %llx", __entry->from, __entry->to)
+);
+FTRACE_ENTRY(power, trace_power,
+        TRACE_POWER,
+        F_STRUCT(
+                __field_struct( struct power_trace,     state_data      )
+                __field_desc(   s64,    state_data,     stamp           )
+                __field_desc(   s64,    state_data,     end             )
+                __field_desc(   int,    state_data,     type            )
+                __field_desc(   int,    state_data,     state           )
+        ),
+        F_printk("%llx->%llx type:%u state:%u",
+                 __entry->stamp, __entry->end,
+                 __entry->type, __entry->state)
+);
+FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
+        TRACE_KMEM_ALLOC,
+        F_STRUCT(
+                __field(        enum kmemtrace_type_id, type_id         )
+                __field(        unsigned long,          call_site       )
+                __field(        const void *,           ptr             )
+                __field(        size_t,                 bytes_req       )
+                __field(        size_t,                 bytes_alloc     )
+                __field(        gfp_t,                  gfp_flags       )
+                __field(        int,                    node            )
+        ),
+        F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
+                 " flags:%x node:%d",
+                 __entry->type_id, __entry->call_site, __entry->ptr,
+                 __entry->bytes_req, __entry->bytes_alloc,
+                 __entry->gfp_flags, __entry->node)
+);
+FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
+        TRACE_KMEM_FREE,
+        F_STRUCT(
+                __field(        enum kmemtrace_type_id, type_id         )
+                __field(        unsigned long,          call_site       )
+                __field(        const void *,           ptr             )
+        ),
+        F_printk("type:%u call_site:%lx ptr:%p",
+                 __entry->type_id, __entry->call_site, __entry->ptr)
+);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 11ba5bb4ed0a..55a25c933d15 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -5,6 +5,7 @@
 *
 */
+#include <linux/module.h>
 #include "trace.h"
 int ftrace_profile_enable(int event_id)
@@ -14,7 +15,8 @@ int ftrace_profile_enable(int event_id)
        mutex_lock(&event_mutex);
        list_for_each_entry(event, &ftrace_events, list) {
-                if (event->id == event_id && event->profile_enable) {
+                if (event->id == event_id && event->profile_enable &&
+                    try_module_get(event->mod)) {
                        ret = event->profile_enable(event);
                        break;
                }
@@ -32,6 +34,7 @@ void ftrace_profile_disable(int event_id)
        list_for_each_entry(event, &ftrace_events, list) {
                if (event->id == event_id) {
                        event->profile_disable(event);
+                        module_put(event->mod);
                        break;
                }
        }
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
deleted file mode 100644
index 6db005e12487..000000000000
--- a/kernel/trace/trace_event_types.h
+++ /dev/null
@@ -1,178 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM    ftrace
-/*
- * We cheat and use the proto type field as the ID
- * and args as the entry type (minus 'struct')
- */
-TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned long, ip, ip)
-                TRACE_FIELD(unsigned long, parent_ip, parent_ip)
-        ),
-        TP_RAW_FMT(" %lx <-- %lx")
-);
-TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT,
-                   ftrace_graph_ent_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned long, graph_ent.func, func)
-                TRACE_FIELD(int, graph_ent.depth, depth)
-        ),
-        TP_RAW_FMT("--> %lx (%d)")
-);
-TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
-                   ftrace_graph_ret_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned long, ret.func, func)
-                TRACE_FIELD(unsigned long long, ret.calltime, calltime)
-                TRACE_FIELD(unsigned long long, ret.rettime, rettime)
-                TRACE_FIELD(unsigned long, ret.overrun, overrun)
-                TRACE_FIELD(int, ret.depth, depth)
-        ),
-        TP_RAW_FMT("<-- %lx (%d)")
-);
-TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned int, prev_pid, prev_pid)
-                TRACE_FIELD(unsigned char, prev_prio, prev_prio)
-                TRACE_FIELD(unsigned char, prev_state, prev_state)
-                TRACE_FIELD(unsigned int, next_pid, next_pid)
-                TRACE_FIELD(unsigned char, next_prio, next_prio)
-                TRACE_FIELD(unsigned char, next_state, next_state)
-                TRACE_FIELD(unsigned int, next_cpu, next_cpu)
-        ),
-        TP_RAW_FMT("%u:%u:%u  ==+ %u:%u:%u [%03u]")
-);
-TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned int, prev_pid, prev_pid)
-                TRACE_FIELD(unsigned char, prev_prio, prev_prio)
-                TRACE_FIELD(unsigned char, prev_state, prev_state)
-                TRACE_FIELD(unsigned int, next_pid, next_pid)
-                TRACE_FIELD(unsigned char, next_prio, next_prio)
-                TRACE_FIELD(unsigned char, next_state, next_state)
-                TRACE_FIELD(unsigned int, next_cpu, next_cpu)
-        ),
-        TP_RAW_FMT("%u:%u:%u  ==+ %u:%u:%u [%03u]")
-);
-TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned long, arg1, arg1)
-                TRACE_FIELD(unsigned long, arg2, arg2)
-                TRACE_FIELD(unsigned long, arg3, arg3)
-        ),
-        TP_RAW_FMT("(%08lx) (%08lx) (%08lx)")
-);
-/*
- * Stack-trace entry:
- */
-/* #define FTRACE_STACK_ENTRIES   8 */
-TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned long, caller[0], stack0)
-                TRACE_FIELD(unsigned long, caller[1], stack1)
-                TRACE_FIELD(unsigned long, caller[2], stack2)
-                TRACE_FIELD(unsigned long, caller[3], stack3)
-                TRACE_FIELD(unsigned long, caller[4], stack4)
-                TRACE_FIELD(unsigned long, caller[5], stack5)
-                TRACE_FIELD(unsigned long, caller[6], stack6)
-                TRACE_FIELD(unsigned long, caller[7], stack7)
-        ),
-        TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
-                 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
-);
-TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned long, caller[0], stack0)
-                TRACE_FIELD(unsigned long, caller[1], stack1)
-                TRACE_FIELD(unsigned long, caller[2], stack2)
-                TRACE_FIELD(unsigned long, caller[3], stack3)
-                TRACE_FIELD(unsigned long, caller[4], stack4)
-                TRACE_FIELD(unsigned long, caller[5], stack5)
-                TRACE_FIELD(unsigned long, caller[6], stack6)
-                TRACE_FIELD(unsigned long, caller[7], stack7)
-        ),
-        TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
-                 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
-);
-TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned long, ip, ip)
-                TRACE_FIELD(char *, fmt, fmt)
-                TRACE_FIELD_ZERO_CHAR(buf)
-        ),
-        TP_RAW_FMT("%08lx (%d) fmt:%p %s")
-);
-TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned long, ip, ip)
-                TRACE_FIELD_ZERO_CHAR(buf)
-        ),
-        TP_RAW_FMT("%08lx (%d) fmt:%p %s")
-);
-TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(unsigned int, line, line)
-                TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
-                                    TRACE_FUNC_SIZE+1, func)
-                TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
-                                    TRACE_FUNC_SIZE+1, file)
-                TRACE_FIELD(char, correct, correct)
-        ),
-        TP_RAW_FMT("%u:%s:%s (%u)")
-);
-TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(u64, from, from)
-                TRACE_FIELD(u64, to, to)
-        ),
-        TP_RAW_FMT("from: %llx to: %llx")
-);
-TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
-                TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
-                TRACE_FIELD(int, state_data.type, type)
-                TRACE_FIELD(int, state_data.state, state)
-        ),
-        TP_RAW_FMT("%llx->%llx type:%u state:%u")
-);
-TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
-                TRACE_FIELD(unsigned long, call_site, call_site)
-                TRACE_FIELD(const void *, ptr, ptr)
-                TRACE_FIELD(size_t, bytes_req, bytes_req)
-                TRACE_FIELD(size_t, bytes_alloc, bytes_alloc)
-                TRACE_FIELD(gfp_t, gfp_flags, gfp_flags)
-                TRACE_FIELD(int, node, node)
-        ),
-        TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu"
-                 " flags:%x node:%d")
-);
-TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore,
-        TRACE_STRUCT(
-                TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
-                TRACE_FIELD(unsigned long, call_site, call_site)
-                TRACE_FIELD(const void *, ptr, ptr)
-        ),
-        TP_RAW_FMT("type:%u call_site:%lx ptr:%p")
-);
-#undef TRACE_SYSTEM
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 78b1ed230177..56c260b83a9c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -21,6 +21,7 @@
 #include "trace_output.h"
+#undef TRACE_SYSTEM
 #define TRACE_SYSTEM "TRACE_SYSTEM"
 DEFINE_MUTEX(event_mutex);
@@ -86,7 +87,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
        __common_field(unsigned char, flags);
        __common_field(unsigned char, preempt_count);
        __common_field(int, pid);
-        __common_field(int, tgid);
+        __common_field(int, lock_depth);
        return ret;
 }
@@ -230,11 +231,9 @@ static ssize_t
 ftrace_event_write(struct file *file, const char __user *ubuf,
                   size_t cnt, loff_t *ppos)
 {
+        struct trace_parser parser;
        size_t read = 0;
-        int i, set = 1;
        ssize_t ret;
-        char *buf;
-        char ch;
        if (!cnt || cnt < 0)
                return 0;
@@ -243,60 +242,28 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
        if (ret < 0)
                return ret;
-        ret = get_user(ch, ubuf++);
+        if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
-        if (ret)
-                return ret;
-        read++;
-        cnt--;
-        /* skip white space */
-        while (cnt && isspace(ch)) {
-                ret = get_user(ch, ubuf++);
-                if (ret)
-                        return ret;
-                read++;
-                cnt--;
-        }
-        /* Only white space found? */
-        if (isspace(ch)) {
-                file->f_pos += read;
-                ret = read;
-                return ret;
-        }
-        buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
-        if (!buf)
                return -ENOMEM;
-        if (cnt > EVENT_BUF_SIZE)
+        read = trace_get_user(&parser, ubuf, cnt, ppos);
-                cnt = EVENT_BUF_SIZE;
+        if (trace_parser_loaded((&parser))) {
+                int set = 1;
-        i = 0;
+                if (*parser.buffer == '!')
-        while (cnt && !isspace(ch)) {
-                if (!i && ch == '!')
                        set = 0;
-                else
-                        buf[i++] = ch;
-                ret = get_user(ch, ubuf++);
+                parser.buffer[parser.idx] = 0;
+                ret = ftrace_set_clr_event(parser.buffer + !set, set);
                if (ret)
-                        goto out_free;
+                        goto out_put;
-                read++;
-                cnt--;
        }
-        buf[i] = 0;
-        file->f_pos += read;
-        ret = ftrace_set_clr_event(buf, set);
-        if (ret)
-                goto out_free;
        ret = read;
- out_free:
+ out_put:
-        kfree(buf);
+        trace_parser_put(&parser);
        return ret;
 }
@@ -578,7 +545,7 @@ static int trace_write_header(struct trace_seq *s)
                                FIELD(unsigned char, flags),
                                FIELD(unsigned char, preempt_count),
                                FIELD(int, pid),
-                                FIELD(int, tgid));
+                                FIELD(int, lock_depth));
 }
 static ssize_t
@@ -1187,7 +1154,7 @@ static int trace_module_notify(struct notifier_block *self,
 }
 #endif /* CONFIG_MODULES */
-struct notifier_block trace_module_nb = {
+static struct notifier_block trace_module_nb = {
        .notifier_call = trace_module_notify,
        .priority = 0,
 };
@@ -1359,6 +1326,18 @@ static __init void event_trace_self_tests(void)
                if (!call->regfunc)
                        continue;
+/*
+ * Testing syscall events here is pretty useless, but
+ * we still do it if configured. But this is time consuming.
+ * What we really need is a user thread to perform the
+ * syscalls as we test.
+ */
+#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
+                if (call->system &&
+                    strcmp(call->system, "syscalls") == 0)
+                        continue;
+#endif
                pr_info("Testing event %s: ", call->name);
                /*
@@ -1432,7 +1411,7 @@ static __init void event_trace_self_tests(void)
 #ifdef CONFIG_FUNCTION_TRACER
-static DEFINE_PER_CPU(atomic_t, test_event_disable);
+static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
 static void
 function_test_events_call(unsigned long ip, unsigned long parent_ip)
@@ -1449,7 +1428,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
        pc = preempt_count();
        resched = ftrace_preempt_disable();
        cpu = raw_smp_processor_id();
-        disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+        disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
        if (disabled != 1)
                goto out;
@@ -1468,7 +1447,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
        trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
 out:
-        atomic_dec(&per_cpu(test_event_disable, cpu));
+        atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
        ftrace_preempt_enable(resched);
 }
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 93660fbbf629..23245785927f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -121,6 +121,47 @@ struct filter_parse_state {
        } operand;
 };
+#define DEFINE_COMPARISON_PRED(type)                                    \
+static int filter_pred_##type(struct filter_pred *pred, void *event,    \
+                              int val1, int val2)                       \
+{                                                                       \
+        type *addr = (type *)(event + pred->offset);                    \
+        type val = (type)pred->val;                                     \
+        int match = 0;                                                  \
+                                                                        \
+        switch (pred->op) {                                             \
+        case OP_LT:                                                     \
+                match = (*addr < val);                                  \
+                break;                                                  \
+        case OP_LE:                                                     \
+                match = (*addr <= val);                                 \
+                break;                                                  \
+        case OP_GT:                                                     \
+                match = (*addr > val);                                  \
+                break;                                                  \
+        case OP_GE:                                                     \
+                match = (*addr >= val);                                 \
+                break;                                                  \
+        default:                                                        \
+                break;                                                  \
+        }                                                               \
+                                                                        \
+        return match;                                                   \
+}
+#define DEFINE_EQUALITY_PRED(size)                                      \
+static int filter_pred_##size(struct filter_pred *pred, void *event,    \
+                              int val1, int val2)                       \
+{                                                                       \
+        u##size *addr = (u##size *)(event + pred->offset);              \
+        u##size val = (u##size)pred->val;                               \
+        int match;                                                      \
+                                                                        \
+        match = (val == *addr) ^ pred->not;                             \
+                                                                        \
+        return match;                                                   \
+}
 DEFINE_COMPARISON_PRED(s64);
 DEFINE_COMPARISON_PRED(u64);
 DEFINE_COMPARISON_PRED(s32);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index df1bf6e48bb9..9753fcc61bc5 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -15,146 +15,125 @@
 #include "trace_output.h"
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM    ftrace
-#undef TRACE_STRUCT
+/* not needed for this file */
-#define TRACE_STRUCT(args...) args
+#undef __field_struct
+#define __field_struct(type, item)
-extern void __bad_type_size(void);
+#undef __field
+#define __field(type, item)                             type item;
-#undef TRACE_FIELD
+#undef __field_desc
-#define TRACE_FIELD(type, item, assign)                                 \
+#define __field_desc(type, container, item)             type item;
-        if (sizeof(type) != sizeof(field.item))                         \
-                __bad_type_size();                                      \
+#undef __array
+#define __array(type, item, size)                       type item[size];
+#undef __array_desc
+#define __array_desc(type, container, item, size)       type item[size];
+#undef __dynamic_array
+#define __dynamic_array(type, item)                     type item[];
+#undef F_STRUCT
+#define F_STRUCT(args...)                               args
+#undef F_printk
+#define F_printk(fmt, args...) fmt, args
+#undef FTRACE_ENTRY
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)     \
+struct ____ftrace_##name {                                      \
+        tstruct                                                 \
+};                                                              \
+static void __used ____ftrace_check_##name(void)                \
+{                                                               \
+        struct ____ftrace_##name *__entry = NULL;               \
+                                                                \
+        /* force cmpile-time check on F_printk() */             \
+        printk(print);                                          \
+}
+#undef FTRACE_ENTRY_DUP
+#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print) \
+        FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+#include "trace_entries.h"
+#undef __field
+#define __field(type, item)                                             \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
-                               "offset:%u;\tsize:%u;\n",                \
+                               "offset:%zu;\tsize:%zu;\n",              \
-                               (unsigned int)offsetof(typeof(field), item), \
+                               offsetof(typeof(field), item),           \
-                               (unsigned int)sizeof(field.item));       \
+                               sizeof(field.item));                     \
        if (!ret)                                                       \
                return 0;
+#undef __field_desc
+#define __field_desc(type, container, item)                             \
+        ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
+                               "offset:%zu;\tsize:%zu;\n",              \
+                               offsetof(typeof(field), container.item), \
+                               sizeof(field.container.item));           \
+        if (!ret)                                                       \
+                return 0;
-#undef TRACE_FIELD_SPECIAL
+#undef __array
-#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)                  \
+#define __array(type, item, len)                                        \
-        ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t"   \
+        ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-                               "offset:%u;\tsize:%u;\n",                \
+                               "offset:%zu;\tsize:%zu;\n",              \
-                               (unsigned int)offsetof(typeof(field), item), \
+                               offsetof(typeof(field), item),   \
-                               (unsigned int)sizeof(field.item));       \
+                               sizeof(field.item));             \
        if (!ret)                                                       \
                return 0;
-#undef TRACE_FIELD_ZERO_CHAR
+#undef __array_desc
-#define TRACE_FIELD_ZERO_CHAR(item)                                     \
+#define __array_desc(type, container, item, len)                        \
-        ret = trace_seq_printf(s, "\tfield:char " #item ";\t"           \
+        ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
-                               "offset:%u;\tsize:0;\n",                 \
+                               "offset:%zu;\tsize:%zu;\n",              \
-                               (unsigned int)offsetof(typeof(field), item)); \
+                               offsetof(typeof(field), container.item), \
+                               sizeof(field.container.item));           \
        if (!ret)                                                       \
                return 0;
-#undef TRACE_FIELD_SIGN
+#undef __dynamic_array
-#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
+#define __dynamic_array(type, item)                                     \
-        TRACE_FIELD(type, item, assign)
+        ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
+                               "offset:%zu;\tsize:0;\n",                \
+                               offsetof(typeof(field), item));          \
+        if (!ret)                                                       \
+                return 0;
-#undef TP_RAW_FMT
+#undef F_printk
-#define TP_RAW_FMT(args...) args
+#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-#undef TRACE_EVENT_FORMAT
+#undef __entry
-#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)      \
+#define __entry REC
-static int                                                              \
-ftrace_format_##call(struct ftrace_event_call *unused,                  \
-                      struct trace_seq *s)                              \
-{                                                                       \
-        struct args field;                                              \
-        int ret;                                                        \
-                                                                        \
-        tstruct;                                                        \
-                                                                        \
-        trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);            \
-                                                                        \
-        return ret;                                                     \
-}
-#undef TRACE_EVENT_FORMAT_NOFILTER
+#undef FTRACE_ENTRY
-#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,    \
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)             \
-                                    tpfmt)                              \
 static int                                                              \
-ftrace_format_##call(struct ftrace_event_call *unused,                  \
+ftrace_format_##name(struct ftrace_event_call *unused,                  \
-                      struct trace_seq *s)                              \
+                     struct trace_seq *s)                               \
 {                                                                       \
-        struct args field;                                              \
+        struct struct_name field __attribute__((unused));               \
-        int ret;                                                        \
+        int ret = 0;                                                    \
                                                                        \
        tstruct;                                                        \
                                                                        \
-        trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);            \
+        trace_seq_printf(s, "\nprint fmt: " print);                     \
                                                                        \
        return ret;                                                     \
 }
-#include "trace_event_types.h"
+#include "trace_entries.h"
-#undef TRACE_ZERO_CHAR
-#define TRACE_ZERO_CHAR(arg)
-#undef TRACE_FIELD
-#define TRACE_FIELD(type, item, assign)\
-        entry->item = assign;
-#undef TRACE_FIELD
-#define TRACE_FIELD(type, item, assign)\
-        entry->item = assign;
-#undef TRACE_FIELD_SIGN
-#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
-        TRACE_FIELD(type, item, assign)
-#undef TP_CMD
-#define TP_CMD(cmd...)  cmd
-#undef TRACE_ENTRY
-#define TRACE_ENTRY     entry
-#undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)  \
-        cmd;
-#undef TRACE_EVENT_FORMAT
-#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)      \
-int ftrace_define_fields_##call(struct ftrace_event_call *event_call);  \
-static int ftrace_raw_init_event_##call(void);                          \
-                                                                        \
-struct ftrace_event_call __used                                         \
-__attribute__((__aligned__(4)))                                         \
-__attribute__((section("_ftrace_events"))) event_##call = {             \
-        .name                   = #call,                                \
-        .id                     = proto,                                \
-        .system                 = __stringify(TRACE_SYSTEM),            \
-        .raw_init               = ftrace_raw_init_event_##call,         \
-        .show_format            = ftrace_format_##call,                 \
-        .define_fields          = ftrace_define_fields_##call,          \
-};                                                                      \
-static int ftrace_raw_init_event_##call(void)                           \
-{                                                                       \
-        INIT_LIST_HEAD(&event_##call.fields);                           \
-        return 0;                                                       \
-}                                                                       \
-#undef TRACE_EVENT_FORMAT_NOFILTER
-#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,    \
-                                    tpfmt)                              \
-                                                                        \
-struct ftrace_event_call __used                                         \
-__attribute__((__aligned__(4)))                                         \
-__attribute__((section("_ftrace_events"))) event_##call = {             \
-        .name                   = #call,                                \
-        .id                     = proto,                                \
-        .system                 = __stringify(TRACE_SYSTEM),            \
-        .show_format            = ftrace_format_##call,                 \
-};
-#include "trace_event_types.h"
-#undef TRACE_FIELD
+#undef __field
-#define TRACE_FIELD(type, item, assign)                                 \
+#define __field(type, item)                                             \
        ret = trace_define_field(event_call, #type, #item,              \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
@@ -162,32 +141,45 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
        if (ret)                                                        \
                return ret;
-#undef TRACE_FIELD_SPECIAL
+#undef __field_desc
-#define TRACE_FIELD_SPECIAL(type, item, len, cmd)                       \
+#define __field_desc(type, container, item)     \
+        ret = trace_define_field(event_call, #type, #item,              \
+                                 offsetof(typeof(field),                \
+                                          container.item),              \
+                                 sizeof(field.container.item),          \
+                                 is_signed_type(type), FILTER_OTHER);   \
+        if (ret)                                                        \
+                return ret;
+#undef __array
+#define __array(type, item, len)                                        \
+        BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
        ret = trace_define_field(event_call, #type "[" #len "]", #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item), 0, FILTER_OTHER);  \
        if (ret)                                                        \
                return ret;
-#undef TRACE_FIELD_SIGN
+#undef __array_desc
-#define TRACE_FIELD_SIGN(type, item, assign, is_signed)                 \
+#define __array_desc(type, container, item, len)                        \
-        ret = trace_define_field(event_call, #type, #item,              \
+        BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
-                                 offsetof(typeof(field), item),         \
+        ret = trace_define_field(event_call, #type "[" #len "]", #item, \
-                                 sizeof(field.item), is_signed,         \
+                                 offsetof(typeof(field),                \
+                                          container.item),              \
+                                 sizeof(field.container.item), 0,       \
                                 FILTER_OTHER);                         \
        if (ret)                                                        \
                return ret;
-#undef TRACE_FIELD_ZERO_CHAR
+#undef __dynamic_array
-#define TRACE_FIELD_ZERO_CHAR(item)
+#define __dynamic_array(type, item)
-#undef TRACE_EVENT_FORMAT
+#undef FTRACE_ENTRY
-#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)      \
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)             \
 int                                                                     \
-ftrace_define_fields_##call(struct ftrace_event_call *event_call)       \
+ftrace_define_fields_##name(struct ftrace_event_call *event_call)       \
 {                                                                       \
-        struct args field;                                              \
+        struct struct_name field;                                       \
        int ret;                                                        \
                                                                        \
        ret = trace_define_common_fields(event_call);                   \
@@ -199,8 +191,42 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
        return ret;                                                     \
 }
-#undef TRACE_EVENT_FORMAT_NOFILTER
+#include "trace_entries.h"
-#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,    \
-                                    tpfmt)
+#undef __field
+#define __field(type, item)
+#undef __field_desc
+#define __field_desc(type, container, item)
+#undef __array
+#define __array(type, item, len)
+#undef __array_desc
+#define __array_desc(type, container, item, len)
+#undef __dynamic_array
+#define __dynamic_array(type, item)
+#undef FTRACE_ENTRY
+#define FTRACE_ENTRY(call, struct_name, type, tstruct, print)           \
+static int ftrace_raw_init_event_##call(void);                          \
+                                                                        \
+struct ftrace_event_call __used                                         \
+__attribute__((__aligned__(4)))                                         \
+__attribute__((section("_ftrace_events"))) event_##call = {             \
+        .name                   = #call,                                \
+        .id                     = type,                                 \
+        .system                 = __stringify(TRACE_SYSTEM),            \
+        .raw_init               = ftrace_raw_init_event_##call,         \
+        .show_format            = ftrace_format_##call,                 \
+        .define_fields          = ftrace_define_fields_##call,          \
+};                                                                      \
+static int ftrace_raw_init_event_##call(void)                           \
+{                                                                       \
+        INIT_LIST_HEAD(&event_##call.fields);                           \
+        return 0;                                                       \
+}                                                                       \
-#include "trace_event_types.h"
+#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 5b01b94518fc..b3f3776b0cd6 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -290,7 +290,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
 {
        long count = (long)data;
-        seq_printf(m, "%pf:", (void *)ip);
+        seq_printf(m, "%ps:", (void *)ip);
        if (ops == &traceon_probe_ops)
                seq_printf(m, "traceon");
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b3749a2c3132..45e6c01b2e4d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -124,7 +124,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
        if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
                ftrace_graph_stop();
                WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
-                     "  from func %pF return to %lx\n",
+                     "  from func %ps return to %lx\n",
                     current->ret_stack[index].fp,
                     frame_pointer,
                     (void *)current->ret_stack[index].func,
@@ -364,6 +364,15 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
 }
+static enum print_line_t
+print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+{
+        if (!trace_seq_putc(s, ' '))
+                return 0;
+        return trace_print_lat_fmt(s, entry);
+}
 /* If the pid changed since the last trace, output this event */
 static enum print_line_t
 verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
@@ -521,6 +530,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
                if (ret == TRACE_TYPE_PARTIAL_LINE)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
        /* Proc */
        if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
                ret = print_graph_proc(s, pid);
@@ -659,7 +669,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
                        return TRACE_TYPE_PARTIAL_LINE;
        }
-        ret = trace_seq_printf(s, "%pf();\n", (void *)call->func);
+        ret = trace_seq_printf(s, "%ps();\n", (void *)call->func);
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
@@ -702,7 +712,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
                        return TRACE_TYPE_PARTIAL_LINE;
        }
-        ret = trace_seq_printf(s, "%pf() {\n", (void *)call->func);
+        ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func);
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
@@ -758,6 +768,13 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
                        return TRACE_TYPE_PARTIAL_LINE;
        }
+        /* Latency format */
+        if (trace_flags & TRACE_ITER_LATENCY_FMT) {
+                ret = print_graph_lat_fmt(s, ent);
+                if (ret == TRACE_TYPE_PARTIAL_LINE)
+                        return TRACE_TYPE_PARTIAL_LINE;
+        }
        return 0;
 }
@@ -952,28 +969,59 @@ print_graph_function(struct trace_iterator *iter)
        return TRACE_TYPE_HANDLED;
 }
+static void print_lat_header(struct seq_file *s)
+{
+        static const char spaces[] = "                " /* 16 spaces */
+                "    "                                  /* 4 spaces */
+                "                 ";                    /* 17 spaces */
+        int size = 0;
+        if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
+                size += 16;
+        if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
+                size += 4;
+        if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
+                size += 17;
+        seq_printf(s, "#%.*s  _-----=> irqs-off        \n", size, spaces);
+        seq_printf(s, "#%.*s / _----=> need-resched    \n", size, spaces);
+        seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
+        seq_printf(s, "#%.*s|| / _--=> preempt-depth   \n", size, spaces);
+        seq_printf(s, "#%.*s||| / _-=> lock-depth      \n", size, spaces);
+        seq_printf(s, "#%.*s|||| /                     \n", size, spaces);
+}
 static void print_graph_headers(struct seq_file *s)
 {
+        int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
+        if (lat)
+                print_lat_header(s);
        /* 1st line */
-        seq_printf(s, "# ");
+        seq_printf(s, "#");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
                seq_printf(s, "     TIME       ");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
-                seq_printf(s, "CPU");
+                seq_printf(s, " CPU");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
-                seq_printf(s, "  TASK/PID      ");
+                seq_printf(s, "  TASK/PID       ");
+        if (lat)
+                seq_printf(s, "|||||");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
                seq_printf(s, "  DURATION   ");
        seq_printf(s, "               FUNCTION CALLS\n");
        /* 2nd line */
-        seq_printf(s, "# ");
+        seq_printf(s, "#");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
                seq_printf(s, "      |         ");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
-                seq_printf(s, "|  ");
+                seq_printf(s, " |  ");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
-                seq_printf(s, "  |    |        ");
+                seq_printf(s, "   |    |        ");
+        if (lat)
+                seq_printf(s, "|||||");
        if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
                seq_printf(s, "   |   |      ");
        seq_printf(s, "               |   |   |   |\n");
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 5555b75a0d12..3aa7eaa2114c 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -129,15 +129,10 @@ check_critical_timing(struct trace_array *tr,
                      unsigned long parent_ip,
                      int cpu)
 {
-        unsigned long latency, t0, t1;
        cycle_t T0, T1, delta;
        unsigned long flags;
        int pc;
-        /*
-         * usecs conversion is slow so we try to delay the conversion
-         * as long as possible:
-         */
        T0 = data->preempt_timestamp;
        T1 = ftrace_now(cpu);
        delta = T1-T0;
@@ -157,18 +152,15 @@ check_critical_timing(struct trace_array *tr,
        trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
-        latency = nsecs_to_usecs(delta);
        if (data->critical_sequence != max_sequence)
                goto out_unlock;
-        tracing_max_latency = delta;
-        t0 = nsecs_to_usecs(T0);
-        t1 = nsecs_to_usecs(T1);
        data->critical_end = parent_ip;
-        update_max_tr_single(tr, current, cpu);
+        if (likely(!is_tracing_stopped())) {
+                tracing_max_latency = delta;
+                update_max_tr_single(tr, current, cpu);
+        }
        max_sequence++;
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index c4c9bbda53d3..0acd834659ed 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -307,6 +307,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
                                struct trace_array_cpu *data,
                                struct mmiotrace_rw *rw)
 {
+        struct ftrace_event_call *call = &event_mmiotrace_rw;
        struct ring_buffer *buffer = tr->buffer;
        struct ring_buffer_event *event;
        struct trace_mmiotrace_rw *entry;
@@ -320,7 +321,9 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
        }
        entry   = ring_buffer_event_data(event);
        entry->rw                       = *rw;
-        trace_buffer_unlock_commit(buffer, event, 0, pc);
+        if (!filter_check_discard(call, entry, buffer, event))
+                trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -334,6 +337,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
                                struct trace_array_cpu *data,
                                struct mmiotrace_map *map)
 {
+        struct ftrace_event_call *call = &event_mmiotrace_map;
        struct ring_buffer *buffer = tr->buffer;
        struct ring_buffer_event *event;
        struct trace_mmiotrace_map *entry;
@@ -347,7 +351,9 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
        }
        entry   = ring_buffer_event_data(event);
        entry->map                      = *map;
-        trace_buffer_unlock_commit(buffer, event, 0, pc);
+        if (!filter_check_discard(call, entry, buffer, event))
+                trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e0c2545622e8..f572f44c6e1e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -407,7 +407,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
                 * since individual threads might have already quit!
                 */
                rcu_read_lock();
-                task = find_task_by_vpid(entry->ent.tgid);
+                task = find_task_by_vpid(entry->tgid);
                if (task)
                        mm = get_task_mm(task);
                rcu_read_unlock();
@@ -460,18 +460,23 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
        return ret;
 }
-static int
+/**
-lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
+ * trace_print_lat_fmt - print the irq, preempt and lockdep fields
+ * @s: trace seq struct to write to
+ * @entry: The trace entry field from the ring buffer
+ *
+ * Prints the generic fields of irqs off, in hard or softirq, preempt
+ * count and lock depth.
+ */
+int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 {
        int hardirq, softirq;
-        char comm[TASK_COMM_LEN];
+        int ret;
-        trace_find_cmdline(entry->pid, comm);
        hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
        softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
-        if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
+        if (!trace_seq_printf(s, "%c%c%c",
-                              comm, entry->pid, cpu,
                              (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
                                (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
                                  'X' : '.',
@@ -481,9 +486,30 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
                                hardirq ? 'h' : softirq ? 's' : '.'))
                return 0;
+        if (entry->lock_depth < 0)
+                ret = trace_seq_putc(s, '.');
+        else
+                ret = trace_seq_printf(s, "%d", entry->lock_depth);
+        if (!ret)
+                return 0;
        if (entry->preempt_count)
                return trace_seq_printf(s, "%x", entry->preempt_count);
-        return trace_seq_puts(s, ".");
+        return trace_seq_putc(s, '.');
+}
+static int
+lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
+{
+        char comm[TASK_COMM_LEN];
+        trace_find_cmdline(entry->pid, comm);
+        if (!trace_seq_printf(s, "%8.8s-%-5d %3d",
+                              comm, entry->pid, cpu))
+                return 0;
+        return trace_print_lat_fmt(s, entry);
 }
 static unsigned long preempt_mark_thresh = 100;
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index d38bec4a9c30..9d91c72ba38b 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -26,6 +26,8 @@ extern struct trace_event *ftrace_find_event(int type);
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
                                         int flags);
+extern int
+trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
 /* used by module unregistering */
 extern int __unregister_ftrace_event(struct trace_event *event);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index ad69f105a7c6..26185d727676 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -24,6 +24,7 @@ static int __read_mostly	tracer_enabled;
 static struct task_struct       *wakeup_task;
 static int                      wakeup_cpu;
+static int                      wakeup_current_cpu;
 static unsigned                 wakeup_prio = -1;
 static int                      wakeup_rt;
@@ -56,33 +57,23 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
        resched = ftrace_preempt_disable();
        cpu = raw_smp_processor_id();
+        if (cpu != wakeup_current_cpu)
+                goto out_enable;
        data = tr->data[cpu];
        disabled = atomic_inc_return(&data->disabled);
        if (unlikely(disabled != 1))
                goto out;
        local_irq_save(flags);
-        __raw_spin_lock(&wakeup_lock);
-        if (unlikely(!wakeup_task))
-                goto unlock;
-        /*
-         * The task can't disappear because it needs to
-         * wake up first, and we have the wakeup_lock.
-         */
-        if (task_cpu(wakeup_task) != cpu)
-                goto unlock;
        trace_function(tr, ip, parent_ip, flags, pc);
- unlock:
-        __raw_spin_unlock(&wakeup_lock);
        local_irq_restore(flags);
 out:
        atomic_dec(&data->disabled);
+ out_enable:
        ftrace_preempt_enable(resched);
 }
@@ -107,11 +98,18 @@ static int report_latency(cycle_t delta)
        return 1;
 }
+static void probe_wakeup_migrate_task(struct task_struct *task, int cpu)
+{
+        if (task != wakeup_task)
+                return;
+        wakeup_current_cpu = cpu;
+}
 static void notrace
 probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
        struct task_struct *next)
 {
-        unsigned long latency = 0, t0 = 0, t1 = 0;
        struct trace_array_cpu *data;
        cycle_t T0, T1, delta;
        unsigned long flags;
@@ -157,10 +155,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
        trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
        tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
-        /*
-         * usecs conversion is slow so we try to delay the conversion
-         * as long as possible:
-         */
        T0 = data->preempt_timestamp;
        T1 = ftrace_now(cpu);
        delta = T1-T0;
@@ -168,13 +162,10 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
        if (!report_latency(delta))
                goto out_unlock;
-        latency = nsecs_to_usecs(delta);
+        if (likely(!is_tracing_stopped())) {
+                tracing_max_latency = delta;
-        tracing_max_latency = delta;
+                update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
-        t0 = nsecs_to_usecs(T0);
+        }
-        t1 = nsecs_to_usecs(T1);
-        update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
 out_unlock:
        __wakeup_reset(wakeup_trace);
@@ -244,6 +235,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
        __wakeup_reset(wakeup_trace);
        wakeup_cpu = task_cpu(p);
+        wakeup_current_cpu = wakeup_cpu;
        wakeup_prio = p->prio;
        wakeup_task = p;
@@ -293,6 +285,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
                goto fail_deprobe_wake_new;
        }
+        ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task);
+        if (ret) {
+                pr_info("wakeup trace: Couldn't activate tracepoint"
+                        " probe to kernel_sched_migrate_task\n");
+                return;
+        }
        wakeup_reset(tr);
        /*
@@ -325,6 +324,7 @@ static void stop_wakeup_tracer(struct trace_array *tr)
        unregister_trace_sched_switch(probe_wakeup_sched_switch);
        unregister_trace_sched_wakeup_new(probe_wakeup);
        unregister_trace_sched_wakeup(probe_wakeup);
+        unregister_trace_sched_migrate_task(probe_wakeup_migrate_task);
 }
 static int __wakeup_tracer_init(struct trace_array *tr)
author	Ingo Molnar <mingo@elte.hu>	2009-09-19 05:27:32 -0400
committer	Ingo Molnar <mingo@elte.hu>	2009-09-19 05:28:41 -0400
commit	929bf0d0156562ce631728b6fa53d68004d456d2 (patch)
tree	739063990a8077b29ef97e69d73bce94573daae4 /kernel
parent	def0a9b2573e00ab0b486cb5382625203ab4c4a6 (diff)
parent	202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff)