31 files changed, 6460 insertions, 89 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 52462ae26455..e032716c839b 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -61,6 +61,9 @@ OPTIONS
 -r::
 --realtime=::
        Collect data with this RT SCHED_FIFO priority.
+-D::
+--no-delay::
+        Collect data without buffering.
 -A::
 --append::
        Append to the output file to do incremental profiling.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1b9b13ee2a72..2b5387d53ba5 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -227,7 +227,7 @@ ifndef PERF_DEBUG
  CFLAGS_OPTIMIZE = -O6
 endif
-CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 EXTLIBS = -lpthread -lrt -lelf -lm
 ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 ALL_LDFLAGS = $(LDFLAGS)
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
new file mode 100644
index 000000000000..15130b50dfe3
--- /dev/null
+++ b/tools/perf/arch/s390/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c
new file mode 100644
index 000000000000..e19653e025fa
--- /dev/null
+++ b/tools/perf/arch/s390/util/dwarf-regs.c
@@ -0,0 +1,22 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ *    Copyright IBM Corp. 2010
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+#include <libio.h>
+#include <dwarf-regs.h>
+#define NUM_GPRS 16
+static const char *gpr_names[NUM_GPRS] = {
+        "%r0", "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+        "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+};
+const char *get_arch_regstr(unsigned int n)
+{
+        return (n >= NUM_GPRS) ? NULL : gpr_names[n];
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bc049035484..df6064ad9bf2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,6 +49,7 @@ static int			pipe_output			=      0;
 static const char               *output_name                    = "perf.data";
 static int                      group                           =      0;
 static int                      realtime_prio                   =      0;
+static bool                     nodelay                         =  false;
 static bool                     raw_samples                     =  false;
 static bool                     sample_id_all_avail             =   true;
 static bool                     system_wide                     =  false;
@@ -307,6 +308,11 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
                attr->sample_type       |= PERF_SAMPLE_CPU;
        }
+        if (nodelay) {
+                attr->watermark = 0;
+                attr->wakeup_events = 1;
+        }
        attr->mmap              = track;
        attr->comm              = track;
        attr->inherit           = !no_inherit;
@@ -477,6 +483,7 @@ static void atexit_header(void)
                        process_buildids();
                perf_header__write(&session->header, output, true);
                perf_session__delete(session);
+                perf_evsel_list__delete();
                symbol__exit();
        }
 }
@@ -842,6 +849,8 @@ const struct option record_options[] = {
                    "record events on existing thread id"),
        OPT_INTEGER('r', "realtime", &realtime_prio,
                    "collect data with this RT SCHED_FIFO priority"),
+        OPT_BOOLEAN('D', "no-delay", &nodelay,
+                    "collect data without buffering"),
        OPT_BOOLEAN('R', "raw-samples", &raw_samples,
                    "collect raw sample records from all opened counters"),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 7a4ebeb8b016..29e7ffd85690 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -489,7 +489,8 @@ static void create_tasks(void)
        err = pthread_attr_init(&attr);
        BUG_ON(err);
-        err = pthread_attr_setstacksize(&attr, (size_t)(16*1024));
+        err = pthread_attr_setstacksize(&attr,
+                        (size_t) max(16 * 1024, PTHREAD_STACK_MIN));
        BUG_ON(err);
        err = pthread_mutex_lock(&start_work_mutex);
        BUG_ON(err);
@@ -1842,15 +1843,15 @@ static const char *record_args[] = {
        "-f",
        "-m", "1024",
        "-c", "1",
-        "-e", "sched:sched_switch:r",
+        "-e", "sched:sched_switch",
-        "-e", "sched:sched_stat_wait:r",
+        "-e", "sched:sched_stat_wait",
-        "-e", "sched:sched_stat_sleep:r",
+        "-e", "sched:sched_stat_sleep",
-        "-e", "sched:sched_stat_iowait:r",
+        "-e", "sched:sched_stat_iowait",
-        "-e", "sched:sched_stat_runtime:r",
+        "-e", "sched:sched_stat_runtime",
-        "-e", "sched:sched_process_exit:r",
+        "-e", "sched:sched_process_exit",
-        "-e", "sched:sched_process_fork:r",
+        "-e", "sched:sched_process_fork",
-        "-e", "sched:sched_wakeup:r",
+        "-e", "sched:sched_wakeup",
-        "-e", "sched:sched_migrate_task:r",
+        "-e", "sched:sched_migrate_task",
 };
 static int __cmd_record(int argc, const char **argv)
@@ -1861,7 +1862,7 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
-        if (rec_argv)
+        if (rec_argv == NULL)
                return -ENOMEM;
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 02b2d8013a61..0ff11d9b13be 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -316,6 +316,8 @@ static int run_perf_stat(int argc __used, const char **argv)
                                      "\t Consider tweaking"
                                      " /proc/sys/kernel/perf_event_paranoid or running as root.",
                                      system_wide ? "system-wide " : "");
+                        } else if (errno == ENOENT) {
+                                error("%s event is not supported. ", event_name(counter));
                        } else {
                                error("open_counter returned with %d (%s). "
                                      "/bin/dmesg may provide additional information.\n",
@@ -683,8 +685,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
                nr_counters = ARRAY_SIZE(default_attrs);
                for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
-                        pos = perf_evsel__new(default_attrs[c].type,
+                        pos = perf_evsel__new(&default_attrs[c],
-                                              default_attrs[c].config,
                                              nr_counters);
                        if (pos == NULL)
                                goto out;
@@ -742,6 +743,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 out_free_fd:
        list_for_each_entry(pos, &evsel_list, node)
                perf_evsel__free_stat_priv(pos);
+        perf_evsel_list__delete();
 out:
        thread_map__delete(threads);
        threads = NULL;
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 1c984342a579..ed5696198d3d 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -234,6 +234,7 @@ out:
        return err;
 }
+#include "util/cpumap.h"
 #include "util/evsel.h"
 #include <sys/types.h>
@@ -264,6 +265,7 @@ static int test__open_syscall_event(void)
        int err = -1, fd;
        struct thread_map *threads;
        struct perf_evsel *evsel;
+        struct perf_event_attr attr;
        unsigned int nr_open_calls = 111, i;
        int id = trace_event__id("sys_enter_open");
@@ -278,7 +280,10 @@ static int test__open_syscall_event(void)
                return -1;
        }
-        evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0);
+        memset(&attr, 0, sizeof(attr));
+        attr.type = PERF_TYPE_TRACEPOINT;
+        attr.config = id;
+        evsel = perf_evsel__new(&attr, 0);
        if (evsel == NULL) {
                pr_debug("perf_evsel__new\n");
                goto out_thread_map_delete;
@@ -317,6 +322,111 @@ out_thread_map_delete:
        return err;
 }
+#include <sched.h>
+static int test__open_syscall_event_on_all_cpus(void)
+{
+        int err = -1, fd, cpu;
+        struct thread_map *threads;
+        struct cpu_map *cpus;
+        struct perf_evsel *evsel;
+        struct perf_event_attr attr;
+        unsigned int nr_open_calls = 111, i;
+        cpu_set_t *cpu_set;
+        size_t cpu_set_size;
+        int id = trace_event__id("sys_enter_open");
+        if (id < 0) {
+                pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+                return -1;
+        }
+        threads = thread_map__new(-1, getpid());
+        if (threads == NULL) {
+                pr_debug("thread_map__new\n");
+                return -1;
+        }
+        cpus = cpu_map__new(NULL);
+        if (threads == NULL) {
+                pr_debug("thread_map__new\n");
+                return -1;
+        }
+        cpu_set = CPU_ALLOC(cpus->nr);
+        if (cpu_set == NULL)
+                goto out_thread_map_delete;
+        cpu_set_size = CPU_ALLOC_SIZE(cpus->nr);
+        CPU_ZERO_S(cpu_set_size, cpu_set);
+        memset(&attr, 0, sizeof(attr));
+        attr.type = PERF_TYPE_TRACEPOINT;
+        attr.config = id;
+        evsel = perf_evsel__new(&attr, 0);
+        if (evsel == NULL) {
+                pr_debug("perf_evsel__new\n");
+                goto out_cpu_free;
+        }
+        if (perf_evsel__open(evsel, cpus, threads) < 0) {
+                pr_debug("failed to open counter: %s, "
+                         "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+                         strerror(errno));
+                goto out_evsel_delete;
+        }
+        for (cpu = 0; cpu < cpus->nr; ++cpu) {
+                unsigned int ncalls = nr_open_calls + cpu;
+                CPU_SET(cpu, cpu_set);
+                sched_setaffinity(0, cpu_set_size, cpu_set);
+                for (i = 0; i < ncalls; ++i) {
+                        fd = open("/etc/passwd", O_RDONLY);
+                        close(fd);
+                }
+                CPU_CLR(cpu, cpu_set);
+        }
+        /*
+         * Here we need to explicitely preallocate the counts, as if
+         * we use the auto allocation it will allocate just for 1 cpu,
+         * as we start by cpu 0.
+         */
+        if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) {
+                pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+                goto out_close_fd;
+        }
+        for (cpu = 0; cpu < cpus->nr; ++cpu) {
+                unsigned int expected;
+                if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+                        pr_debug("perf_evsel__open_read_on_cpu\n");
+                        goto out_close_fd;
+                }
+                expected = nr_open_calls + cpu;
+                if (evsel->counts->cpu[cpu].val != expected) {
+                        pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n",
+                                 expected, cpu, evsel->counts->cpu[cpu].val);
+                        goto out_close_fd;
+                }
+        }
+        err = 0;
+out_close_fd:
+        perf_evsel__close_fd(evsel, 1, threads->nr);
+out_evsel_delete:
+        perf_evsel__delete(evsel);
+out_cpu_free:
+        CPU_FREE(cpu_set);
+out_thread_map_delete:
+        thread_map__delete(threads);
+        return err;
+}
 static struct test {
        const char *desc;
        int (*func)(void);
@@ -330,6 +440,10 @@ static struct test {
                .func = test__open_syscall_event,
        },
        {
+                .desc = "detect open syscall event on all cpus",
+                .func = test__open_syscall_event_on_all_cpus,
+        },
+        {
                .func = NULL,
        },
 };
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1e67ab9c7ebc..05344c6210ac 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1471,6 +1471,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                pos->attr.sample_period = default_interval;
        }
+        sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
        symbol_conf.priv_size = (sizeof(struct sym_entry) +
                                 (nr_counters + 1) * sizeof(unsigned long));
@@ -1488,6 +1490,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 out_free_fd:
        list_for_each_entry(pos, &evsel_list, node)
                perf_evsel__free_mmap(pos);
+        perf_evsel_list__delete();
        return status;
 }
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 5b1ecd66bb36..595d0f4a7103 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -286,8 +286,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
        status = p->fn(argc, argv, prefix);
        exit_browser(status);
-        perf_evsel_list__delete();
        if (status)
                return status & 0xff;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c95267e63c5b..f5cfed60af98 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -6,14 +6,13 @@
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
 {
        struct perf_evsel *evsel = zalloc(sizeof(*evsel));
        if (evsel != NULL) {
                evsel->idx         = idx;
-                evsel->attr.type   = type;
+                evsel->attr        = *attr;
-                evsel->attr.config = config;
                INIT_LIST_HEAD(&evsel->node);
        }
@@ -128,59 +127,75 @@ int __perf_evsel__read(struct perf_evsel *evsel,
        return 0;
 }
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+                              struct thread_map *threads)
 {
-        int cpu;
+        int cpu, thread;
-        if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0)
+        if (evsel->fd == NULL &&
+            perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
                return -1;
        for (cpu = 0; cpu < cpus->nr; cpu++) {
-                FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1,
+                for (thread = 0; thread < threads->nr; thread++) {
-                                                        cpus->map[cpu], -1, 0);
+                        FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
-                if (FD(evsel, cpu, 0) < 0)
+                                                                     threads->map[thread],
-                        goto out_close;
+                                                                     cpus->map[cpu], -1, 0);
+                        if (FD(evsel, cpu, thread) < 0)
+                                goto out_close;
+                }
        }
        return 0;
 out_close:
-        while (--cpu >= 0) {
+        do {
-                close(FD(evsel, cpu, 0));
+                while (--thread >= 0) {
-                FD(evsel, cpu, 0) = -1;
+                        close(FD(evsel, cpu, thread));
-        }
+                        FD(evsel, cpu, thread) = -1;
+                }
+                thread = threads->nr;
+        } while (--cpu >= 0);
        return -1;
 }
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+static struct {
+        struct cpu_map map;
+        int cpus[1];
+} empty_cpu_map = {
+        .map.nr = 1,
+        .cpus   = { -1, },
+};
+static struct {
+        struct thread_map map;
+        int threads[1];
+} empty_thread_map = {
+        .map.nr  = 1,
+        .threads = { -1, },
+};
+int perf_evsel__open(struct perf_evsel *evsel,
+                     struct cpu_map *cpus, struct thread_map *threads)
 {
-        int thread;
-        if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr))
-                return -1;
-        for (thread = 0; thread < threads->nr; thread++) {
+        if (cpus == NULL) {
-                FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr,
+                /* Work around old compiler warnings about strict aliasing */
-                                                           threads->map[thread], -1, -1, 0);
+                cpus = &empty_cpu_map.map;
-                if (FD(evsel, 0, thread) < 0)
-                        goto out_close;
        }
-        return 0;
+        if (threads == NULL)
+                threads = &empty_thread_map.map;
-out_close:
+        return __perf_evsel__open(evsel, cpus, threads);
-        while (--thread >= 0) {
-                close(FD(evsel, 0, thread));
-                FD(evsel, 0, thread) = -1;
-        }
-        return -1;
 }
-int perf_evsel__open(struct perf_evsel *evsel, 
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
-                     struct cpu_map *cpus, struct thread_map *threads)
 {
-        if (threads == NULL)
+        return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
-                return perf_evsel__open_per_cpu(evsel, cpus);
+}
-        return perf_evsel__open_per_thread(evsel, threads);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+{
+        return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
 }
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a0ccd69c3fc2..b2d755fe88a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -37,7 +37,7 @@ struct perf_evsel {
 struct cpu_map;
 struct thread_map;
-struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
 void perf_evsel__delete(struct perf_evsel *evsel);
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 649083f27e08..5cb6f4bde905 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -490,6 +490,31 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
        return EVT_HANDLED_ALL;
 }
+static int store_event_type(const char *orgname)
+{
+        char filename[PATH_MAX], *c;
+        FILE *file;
+        int id, n;
+        sprintf(filename, "%s/", debugfs_path);
+        strncat(filename, orgname, strlen(orgname));
+        strcat(filename, "/id");
+        c = strchr(filename, ':');
+        if (c)
+                *c = '/';
+        file = fopen(filename, "r");
+        if (!file)
+                return 0;
+        n = fscanf(file, "%i", &id);
+        fclose(file);
+        if (n < 1) {
+                pr_err("cannot store event ID\n");
+                return -EINVAL;
+        }
+        return perf_header__push_event(id, orgname);
+}
 static enum event_result parse_tracepoint_event(const char **strp,
                                    struct perf_event_attr *attr)
@@ -533,9 +558,13 @@ static enum event_result parse_tracepoint_event(const char **strp,
                *strp += strlen(sys_name) + evt_length;
                return parse_multiple_tracepoint_event(sys_name, evt_name,
                                                       flags);
-        } else
+        } else {
+                if (store_event_type(evt_name) < 0)
+                        return EVT_FAILED;
                return parse_single_tracepoint_event(sys_name, evt_name,
                                                     evt_length, attr, strp);
+        }
 }
 static enum event_result
@@ -778,41 +807,11 @@ modifier:
        return ret;
 }
-static int store_event_type(const char *orgname)
-{
-        char filename[PATH_MAX], *c;
-        FILE *file;
-        int id, n;
-        sprintf(filename, "%s/", debugfs_path);
-        strncat(filename, orgname, strlen(orgname));
-        strcat(filename, "/id");
-        c = strchr(filename, ':');
-        if (c)
-                *c = '/';
-        file = fopen(filename, "r");
-        if (!file)
-                return 0;
-        n = fscanf(file, "%i", &id);
-        fclose(file);
-        if (n < 1) {
-                pr_err("cannot store event ID\n");
-                return -EINVAL;
-        }
-        return perf_header__push_event(id, orgname);
-}
 int parse_events(const struct option *opt __used, const char *str, int unset __used)
 {
        struct perf_event_attr attr;
        enum event_result ret;
-        if (strchr(str, ':'))
-                if (store_event_type(str) < 0)
-                        return -1;
        for (;;) {
                memset(&attr, 0, sizeof(attr));
                ret = parse_event_symbols(&str, &attr);
@@ -824,7 +823,7 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
                if (ret != EVT_HANDLED_ALL) {
                        struct perf_evsel *evsel;
-                        evsel = perf_evsel__new(attr.type, attr.config,
+                        evsel = perf_evsel__new(&attr,
                                                nr_counters);
                        if (evsel == NULL)
                                return -1;
@@ -1014,8 +1013,15 @@ void print_events(void)
 int perf_evsel_list__create_default(void)
 {
-        struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE,
+        struct perf_evsel *evsel;
-                                                   PERF_COUNT_HW_CPU_CYCLES, 0);
+        struct perf_event_attr attr;
+        memset(&attr, 0, sizeof(attr));
+        attr.type = PERF_TYPE_HARDWARE;
+        attr.config = PERF_COUNT_HW_CPU_CYCLES;
+        evsel = perf_evsel__new(&attr, 0);
        if (evsel == NULL)
                return -ENOMEM;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6fb4694d05fa..313dac2d94ce 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1007,7 +1007,7 @@ more:
        if (size == 0)
                size = 8;
-        if (head + event->header.size >= mmap_size) {
+        if (head + event->header.size > mmap_size) {
                if (mmaps[map_idx]) {
                        munmap(mmaps[map_idx], mmap_size);
                        mmaps[map_idx] = NULL;
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
new file mode 100644
index 000000000000..fd8e1f1297aa
--- /dev/null
+++ b/tools/power/x86/turbostat/Makefile
@@ -0,0 +1,8 @@
+turbostat : turbostat.c
+clean :
+        rm -f turbostat
+install :
+        install turbostat /usr/bin/turbostat
+        install turbostat.8 /usr/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
new file mode 100644
index 000000000000..ff75125deed0
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -0,0 +1,172 @@
+.TH TURBOSTAT 8
+.SH NAME
+turbostat \- Report processor frequency and idle statistics
+.SH SYNOPSIS
+.ft B
+.B turbostat
+.RB [ "\-v" ]
+.RB [ "\-M MSR#" ]
+.RB command
+.br
+.B turbostat
+.RB [ "\-v" ]
+.RB [ "\-M MSR#" ]
+.RB [ "\-i interval_sec" ]
+.SH DESCRIPTION
+\fBturbostat \fP reports processor topology, frequency
+and idle power state statistics on modern X86 processors.
+Either \fBcommand\fP is forked and statistics are printed
+upon its completion, or statistics are printed periodically.
+\fBturbostat \fP
+requires that the processor
+supports an "invariant" TSC, plus the APERF and MPERF MSRs.
+\fBturbostat \fP will report idle cpu power state residency
+on processors that additionally support C-state residency counters.
+.SS Options
+The \fB-v\fP option increases verbosity.
+.PP
+The \fB-M MSR#\fP option dumps the specified MSR,
+in addition to the usual frequency and idle statistics.
+.PP
+The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds.
+The default is 5 seconds.
+.PP
+The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit,
+displays the statistics gathered since it was forked.
+.PP
+.SH FIELD DESCRIPTIONS
+.nf
+\fBpkg\fP processor package number.
+\fBcore\fP processor core number.
+\fBCPU\fP Linux CPU (logical processor) number.
+\fB%c0\fP percent of the interval that the CPU retired instructions.
+\fBGHz\fP average clock rate while the CPU was in c0 state.
+\fBTSC\fP average GHz that the TSC ran during the entire interval.
+\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states.
+\fB%pc3, %pc6\fP percentage residency in hardware package idle states.
+.fi
+.PP
+.SH EXAMPLE
+Without any parameters, turbostat prints out counters ever 5 seconds.
+(override interval with "-i sec" option, or specify a command
+for turbostat to fork).
+The first row of statistics reflect the average for the entire system.
+Subsequent rows show per-CPU statistics.
+.nf
+[root@x980]# ./turbostat
+core CPU   %c0   GHz  TSC   %c1    %c3    %c6   %pc3   %pc6
+          0.04 1.62 3.38   0.11   0.00  99.85   0.00  95.07
+  0   0   0.04 1.62 3.38   0.06   0.00  99.90   0.00  95.07
+  0   6   0.02 1.62 3.38   0.08   0.00  99.90   0.00  95.07
+  1   2   0.10 1.62 3.38   0.29   0.00  99.61   0.00  95.07
+  1   8   0.11 1.62 3.38   0.28   0.00  99.61   0.00  95.07
+  2   4   0.01 1.62 3.38   0.01   0.00  99.98   0.00  95.07
+  2  10   0.01 1.61 3.38   0.02   0.00  99.98   0.00  95.07
+  8   1   0.07 1.62 3.38   0.15   0.00  99.78   0.00  95.07
+  8   7   0.03 1.62 3.38   0.19   0.00  99.78   0.00  95.07
+  9   3   0.01 1.62 3.38   0.02   0.00  99.98   0.00  95.07
+  9   9   0.01 1.62 3.38   0.02   0.00  99.98   0.00  95.07
+ 10   5   0.01 1.62 3.38   0.13   0.00  99.86   0.00  95.07
+ 10  11   0.08 1.62 3.38   0.05   0.00  99.86   0.00  95.07
+.fi
+.SH VERBOSE EXAMPLE
+The "-v" option adds verbosity to the output:
+.nf
+GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2)
+12 * 133 = 1600 MHz max efficiency
+25 * 133 = 3333 MHz TSC frequency
+26 * 133 = 3467 MHz max turbo 4 active cores
+26 * 133 = 3467 MHz max turbo 3 active cores
+27 * 133 = 3600 MHz max turbo 2 active cores
+27 * 133 = 3600 MHz max turbo 1 active cores
+.fi
+The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
+available at the minimum package voltage.  The \fBTSC frequency\fP is the nominal
+maximum frequency of the processor if turbo-mode were not available.  This frequency
+should be sustainable on all CPUs indefinitely, given nominal power and cooling.
+The remaining rows show what maximum turbo frequency is possible
+depending on the number of idle cores.  Note that this information is
+not available on all processors.
+.SH FORK EXAMPLE
+If turbostat is invoked with a command, it will fork that command
+and output the statistics gathered when the command exits.
+eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
+until ^C while the other CPUs are mostly idle:
+.nf
+[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
+^Ccore CPU   %c0   GHz  TSC   %c1    %c3    %c6   %pc3   %pc6
+           8.49 3.63 3.38  16.23   0.66  74.63   0.00   0.00
+   0   0   1.22 3.62 3.38  32.18   0.00  66.60   0.00   0.00
+   0   6   0.40 3.61 3.38  33.00   0.00  66.60   0.00   0.00
+   1   2   0.11 3.14 3.38   0.19   3.95  95.75   0.00   0.00
+   1   8   0.05 2.88 3.38   0.25   3.95  95.75   0.00   0.00
+   2   4   0.00 3.13 3.38   0.02   0.00  99.98   0.00   0.00
+   2  10   0.00 3.09 3.38   0.02   0.00  99.98   0.00   0.00
+   8   1   0.04 3.50 3.38  14.43   0.00  85.54   0.00   0.00
+   8   7   0.03 2.98 3.38  14.43   0.00  85.54   0.00   0.00
+   9   3   0.00 3.16 3.38 100.00   0.00   0.00   0.00   0.00
+   9   9  99.93 3.63 3.38   0.06   0.00   0.00   0.00   0.00
+  10   5   0.01 2.82 3.38   0.08   0.00  99.91   0.00   0.00
+  10  11   0.02 3.36 3.38   0.06   0.00  99.91   0.00   0.00
+6.950866 sec
+.fi
+Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit
+while the other processors are generally in various states of idle.
+Note that cpu3 is an HT sibling sharing core9
+with cpu9, and thus it is unable to get to an idle state
+deeper than c1 while cpu9 is busy.
+Note that turbostat reports average GHz of 3.61, while
+the arithmetic average of the GHz column above is 3.24.
+This is a weighted average, where the weight is %c0.  ie. it is the total number of
+un-halted cycles elapsed per time divided by the number of CPUs.
+.SH NOTES
+.B "turbostat "
+must be run as root.
+.B "turbostat "
+reads hardware counters, but doesn't write them.
+So it will not interfere with the OS or other programs, including
+multiple invocations of itself.
+\fBturbostat \fP
+may work poorly on Linux-2.6.20 through 2.6.29,
+as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF
+in those kernels.
+The APERF, MPERF MSRs are defined to count non-halted cycles.
+Although it is not guaranteed by the architecture, turbostat assumes
+that they count at TSC rate, which is true on all processors tested to date.
+.SH REFERENCES
+"Intel® Turbo Boost Technology
+in Intel® Core™ Microarchitecture (Nehalem) Based Processors"
+http://download.intel.com/design/processor/applnots/320354.pdf
+"Intel® 64 and IA-32 Architectures Software Developer's Manual
+Volume 3B: System Programming Guide"
+http://www.intel.com/products/processor/manuals/
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+.fi
+.SH "SEE ALSO"
+msr(4), vmstat(8)
+.PP
+.SH AUTHORS
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
new file mode 100644
index 000000000000..4c6983de6fd9
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -0,0 +1,1048 @@
+/*
+ * turbostat -- show CPU frequency and C-state residency
+ * on modern Intel turbo-capable processors.
+ *
+ * Copyright (c) 2010, Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <string.h>
+#include <ctype.h>
+#define MSR_TSC 0x10
+#define MSR_NEHALEM_PLATFORM_INFO       0xCE
+#define MSR_NEHALEM_TURBO_RATIO_LIMIT   0x1AD
+#define MSR_APERF       0xE8
+#define MSR_MPERF       0xE7
+#define MSR_PKG_C2_RESIDENCY    0x60D   /* SNB only */
+#define MSR_PKG_C3_RESIDENCY    0x3F8
+#define MSR_PKG_C6_RESIDENCY    0x3F9
+#define MSR_PKG_C7_RESIDENCY    0x3FA   /* SNB only */
+#define MSR_CORE_C3_RESIDENCY   0x3FC
+#define MSR_CORE_C6_RESIDENCY   0x3FD
+#define MSR_CORE_C7_RESIDENCY   0x3FE   /* SNB only */
+char *proc_stat = "/proc/stat";
+unsigned int interval_sec = 5;  /* set with -i interval_sec */
+unsigned int verbose;           /* set with -v */
+unsigned int skip_c0;
+unsigned int skip_c1;
+unsigned int do_nhm_cstates;
+unsigned int do_snb_cstates;
+unsigned int has_aperf;
+unsigned int units = 1000000000;        /* Ghz etc */
+unsigned int genuine_intel;
+unsigned int has_invariant_tsc;
+unsigned int do_nehalem_platform_info;
+unsigned int do_nehalem_turbo_ratio_limit;
+unsigned int extra_msr_offset;
+double bclk;
+unsigned int show_pkg;
+unsigned int show_core;
+unsigned int show_cpu;
+int aperf_mperf_unstable;
+int backwards_count;
+char *progname;
+int need_reinitialize;
+int num_cpus;
+typedef struct per_cpu_counters {
+        unsigned long long tsc;         /* per thread */
+        unsigned long long aperf;       /* per thread */
+        unsigned long long mperf;       /* per thread */
+        unsigned long long c1;  /* per thread (calculated) */
+        unsigned long long c3;  /* per core */
+        unsigned long long c6;  /* per core */
+        unsigned long long c7;  /* per core */
+        unsigned long long pc2; /* per package */
+        unsigned long long pc3; /* per package */
+        unsigned long long pc6; /* per package */
+        unsigned long long pc7; /* per package */
+        unsigned long long extra_msr;   /* per thread */
+        int pkg;
+        int core;
+        int cpu;
+        struct per_cpu_counters *next;
+} PCC;
+PCC *pcc_even;
+PCC *pcc_odd;
+PCC *pcc_delta;
+PCC *pcc_average;
+struct timeval tv_even;
+struct timeval tv_odd;
+struct timeval tv_delta;
+unsigned long long get_msr(int cpu, off_t offset)
+{
+        ssize_t retval;
+        unsigned long long msr;
+        char pathname[32];
+        int fd;
+        sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+        fd = open(pathname, O_RDONLY);
+        if (fd < 0) {
+                perror(pathname);
+                need_reinitialize = 1;
+                return 0;
+        }
+        retval = pread(fd, &msr, sizeof msr, offset);
+        if (retval != sizeof msr) {
+                fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n",
+                        cpu, offset, retval);
+                exit(-2);
+        }
+        close(fd);
+        return msr;
+}
+void print_header()
+{
+        if (show_pkg)
+                fprintf(stderr, "pkg ");
+        if (show_core)
+                fprintf(stderr, "core");
+        if (show_cpu)
+                fprintf(stderr, " CPU");
+        if (do_nhm_cstates)
+                fprintf(stderr, "   %%c0 ");
+        if (has_aperf)
+                fprintf(stderr, "  GHz");
+        fprintf(stderr, "  TSC");
+        if (do_nhm_cstates)
+                fprintf(stderr, "   %%c1 ");
+        if (do_nhm_cstates)
+                fprintf(stderr, "   %%c3 ");
+        if (do_nhm_cstates)
+                fprintf(stderr, "   %%c6 ");
+        if (do_snb_cstates)
+                fprintf(stderr, "   %%c7 ");
+        if (do_snb_cstates)
+                fprintf(stderr, "  %%pc2 ");
+        if (do_nhm_cstates)
+                fprintf(stderr, "  %%pc3 ");
+        if (do_nhm_cstates)
+                fprintf(stderr, "  %%pc6 ");
+        if (do_snb_cstates)
+                fprintf(stderr, "  %%pc7 ");
+        if (extra_msr_offset)
+                fprintf(stderr, "       MSR 0x%x ", extra_msr_offset);
+        putc('\n', stderr);
+}
+void dump_pcc(PCC *pcc)
+{
+        fprintf(stderr, "package: %d ", pcc->pkg);
+        fprintf(stderr, "core:: %d ", pcc->core);
+        fprintf(stderr, "CPU: %d ", pcc->cpu);
+        fprintf(stderr, "TSC: %016llX\n", pcc->tsc);
+        fprintf(stderr, "c3: %016llX\n", pcc->c3);
+        fprintf(stderr, "c6: %016llX\n", pcc->c6);
+        fprintf(stderr, "c7: %016llX\n", pcc->c7);
+        fprintf(stderr, "aperf: %016llX\n", pcc->aperf);
+        fprintf(stderr, "pc2: %016llX\n", pcc->pc2);
+        fprintf(stderr, "pc3: %016llX\n", pcc->pc3);
+        fprintf(stderr, "pc6: %016llX\n", pcc->pc6);
+        fprintf(stderr, "pc7: %016llX\n", pcc->pc7);
+        fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, pcc->extra_msr);
+}
+void dump_list(PCC *pcc)
+{
+        printf("dump_list 0x%p\n", pcc);
+        for (; pcc; pcc = pcc->next)
+                dump_pcc(pcc);
+}
+void print_pcc(PCC *p)
+{
+        double interval_float;
+        interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+        /* topology columns, print blanks on 1st (average) line */
+        if (p == pcc_average) {
+                if (show_pkg)
+                        fprintf(stderr, "    ");
+                if (show_core)
+                        fprintf(stderr, "    ");
+                if (show_cpu)
+                        fprintf(stderr, "    ");
+        } else {
+                if (show_pkg)
+                        fprintf(stderr, "%4d", p->pkg);
+                if (show_core)
+                        fprintf(stderr, "%4d", p->core);
+                if (show_cpu)
+                        fprintf(stderr, "%4d", p->cpu);
+        }
+        /* %c0 */
+        if (do_nhm_cstates) {
+                if (!skip_c0)
+                        fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc);
+                else
+                        fprintf(stderr, "   ****");
+        }
+        /* GHz */
+        if (has_aperf) {
+                if (!aperf_mperf_unstable) {
+                        fprintf(stderr, "%5.2f",
+                                1.0 * p->tsc / units * p->aperf /
+                                p->mperf / interval_float);
+                } else {
+                        if (p->aperf > p->tsc || p->mperf > p->tsc) {
+                                fprintf(stderr, " ****");
+                        } else {
+                                fprintf(stderr, "%4.1f*",
+                                        1.0 * p->tsc /
+                                        units * p->aperf /
+                                        p->mperf / interval_float);
+                        }
+                }
+        }
+        /* TSC */
+        fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float);
+        if (do_nhm_cstates) {
+                if (!skip_c1)
+                        fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
+                else
+                        fprintf(stderr, "   ****");
+        }
+        if (do_nhm_cstates)
+                fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc);
+        if (do_nhm_cstates)
+                fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc);
+        if (do_snb_cstates)
+                fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc);
+        if (do_snb_cstates)
+                fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc);
+        if (do_nhm_cstates)
+                fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc);
+        if (do_nhm_cstates)
+                fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc);
+        if (do_snb_cstates)
+                fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc);
+        if (extra_msr_offset)
+                fprintf(stderr, "  0x%016llx", p->extra_msr);
+        putc('\n', stderr);
+}
+void print_counters(PCC *cnt)
+{
+        PCC *pcc;
+        print_header();
+        if (num_cpus > 1)
+                print_pcc(pcc_average);
+        for (pcc = cnt; pcc != NULL; pcc = pcc->next)
+                print_pcc(pcc);
+}
+#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after))
+int compute_delta(PCC *after, PCC *before, PCC *delta)
+{
+        int errors = 0;
+        int perf_err = 0;
+        skip_c0 = skip_c1 = 0;
+        for ( ; after && before && delta;
+                after = after->next, before = before->next, delta = delta->next) {
+                if (before->cpu != after->cpu) {
+                        printf("cpu configuration changed: %d != %d\n",
+                                before->cpu, after->cpu);
+                        return -1;
+                }
+                if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) {
+                        fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n",
+                                before->cpu, before->tsc, after->tsc);
+                        errors++;
+                }
+                /* check for TSC < 1 Mcycles over interval */
+                if (delta->tsc < (1000 * 1000)) {
+                        fprintf(stderr, "Insanely slow TSC rate,"
+                                " TSC stops in idle?\n");
+                        fprintf(stderr, "You can disable all c-states"
+                                " by booting with \"idle=poll\"\n");
+                        fprintf(stderr, "or just the deep ones with"
+                                " \"processor.max_cstate=1\"\n");
+                        exit(-3);
+                }
+                if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) {
+                        fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n",
+                                before->cpu, before->c3, after->c3);
+                        errors++;
+                }
+                if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) {
+                        fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n",
+                                before->cpu, before->c6, after->c6);
+                        errors++;
+                }
+                if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) {
+                        fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n",
+                                before->cpu, before->c7, after->c7);
+                        errors++;
+                }
+                if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) {
+                        fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n",
+                                before->cpu, before->pc2, after->pc2);
+                        errors++;
+                }
+                if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) {
+                        fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n",
+                                before->cpu, before->pc3, after->pc3);
+                        errors++;
+                }
+                if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) {
+                        fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n",
+                                before->cpu, before->pc6, after->pc6);
+                        errors++;
+                }
+                if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) {
+                        fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n",
+                                before->cpu, before->pc7, after->pc7);
+                        errors++;
+                }
+                perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf);
+                if (perf_err) {
+                        fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n",
+                                before->cpu, before->aperf, after->aperf);
+                }
+                perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf);
+                if (perf_err) {
+                        fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n",
+                                before->cpu, before->mperf, after->mperf);
+                }
+                if (perf_err) {
+                        if (!aperf_mperf_unstable) {
+                                fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
+                                fprintf(stderr, "* Frequency results do not cover entire interval *\n");
+                                fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
+                                aperf_mperf_unstable = 1;
+                        }
+                        /*
+                         * mperf delta is likely a huge "positive" number
+                         * can not use it for calculating c0 time
+                         */
+                        skip_c0 = 1;
+                        skip_c1 = 1;
+                }
+                /*
+                 * As mperf and tsc collection are not atomic,
+                 * it is possible for mperf's non-halted cycles
+                 * to exceed TSC's all cycles: show c1 = 0% in that case.
+                 */
+                if (delta->mperf > delta->tsc)
+                        delta->c1 = 0;
+                else /* normal case, derive c1 */
+                        delta->c1 = delta->tsc - delta->mperf
+                                - delta->c3 - delta->c6 - delta->c7;
+                if (delta->mperf == 0)
+                        delta->mperf = 1;       /* divide by 0 protection */
+                /*
+                 * for "extra msr", just copy the latest w/o subtracting
+                 */
+                delta->extra_msr = after->extra_msr;
+                if (errors) {
+                        fprintf(stderr, "ERROR cpu%d before:\n", before->cpu);
+                        dump_pcc(before);
+                        fprintf(stderr, "ERROR cpu%d after:\n", before->cpu);
+                        dump_pcc(after);
+                        errors = 0;
+                }
+        }
+        return 0;
+}
+void compute_average(PCC *delta, PCC *avg)
+{
+        PCC *sum;
+        sum = calloc(1, sizeof(PCC));
+        if (sum == NULL) {
+                perror("calloc sum");
+                exit(1);
+        }
+        for (; delta; delta = delta->next) {
+                sum->tsc += delta->tsc;
+                sum->c1 += delta->c1;
+                sum->c3 += delta->c3;
+                sum->c6 += delta->c6;
+                sum->c7 += delta->c7;
+                sum->aperf += delta->aperf;
+                sum->mperf += delta->mperf;
+                sum->pc2 += delta->pc2;
+                sum->pc3 += delta->pc3;
+                sum->pc6 += delta->pc6;
+                sum->pc7 += delta->pc7;
+        }
+        avg->tsc = sum->tsc/num_cpus;
+        avg->c1 = sum->c1/num_cpus;
+        avg->c3 = sum->c3/num_cpus;
+        avg->c6 = sum->c6/num_cpus;
+        avg->c7 = sum->c7/num_cpus;
+        avg->aperf = sum->aperf/num_cpus;
+        avg->mperf = sum->mperf/num_cpus;
+        avg->pc2 = sum->pc2/num_cpus;
+        avg->pc3 = sum->pc3/num_cpus;
+        avg->pc6 = sum->pc6/num_cpus;
+        avg->pc7 = sum->pc7/num_cpus;
+        free(sum);
+}
+void get_counters(PCC *pcc)
+{
+        for ( ; pcc; pcc = pcc->next) {
+                pcc->tsc = get_msr(pcc->cpu, MSR_TSC);
+                if (do_nhm_cstates)
+                        pcc->c3 = get_msr(pcc->cpu, MSR_CORE_C3_RESIDENCY);
+                if (do_nhm_cstates)
+                        pcc->c6 = get_msr(pcc->cpu, MSR_CORE_C6_RESIDENCY);
+                if (do_snb_cstates)
+                        pcc->c7 = get_msr(pcc->cpu, MSR_CORE_C7_RESIDENCY);
+                if (has_aperf)
+                        pcc->aperf = get_msr(pcc->cpu, MSR_APERF);
+                if (has_aperf)
+                        pcc->mperf = get_msr(pcc->cpu, MSR_MPERF);
+                if (do_snb_cstates)
+                        pcc->pc2 = get_msr(pcc->cpu, MSR_PKG_C2_RESIDENCY);
+                if (do_nhm_cstates)
+                        pcc->pc3 = get_msr(pcc->cpu, MSR_PKG_C3_RESIDENCY);
+                if (do_nhm_cstates)
+                        pcc->pc6 = get_msr(pcc->cpu, MSR_PKG_C6_RESIDENCY);
+                if (do_snb_cstates)
+                        pcc->pc7 = get_msr(pcc->cpu, MSR_PKG_C7_RESIDENCY);
+                if (extra_msr_offset)
+                        pcc->extra_msr = get_msr(pcc->cpu, extra_msr_offset);
+        }
+}
+void print_nehalem_info()
+{
+        unsigned long long msr;
+        unsigned int ratio;
+        if (!do_nehalem_platform_info)
+                return;
+        msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO);
+        ratio = (msr >> 40) & 0xFF;
+        fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
+                ratio, bclk, ratio * bclk);
+        ratio = (msr >> 8) & 0xFF;
+        fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
+                ratio, bclk, ratio * bclk);
+        if (verbose > 1)
+                fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
+        if (!do_nehalem_turbo_ratio_limit)
+                return;
+        msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT);
+        ratio = (msr >> 24) & 0xFF;
+        if (ratio)
+                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
+                        ratio, bclk, ratio * bclk);
+        ratio = (msr >> 16) & 0xFF;
+        if (ratio)
+                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
+                        ratio, bclk, ratio * bclk);
+        ratio = (msr >> 8) & 0xFF;
+        if (ratio)
+                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
+                        ratio, bclk, ratio * bclk);
+        ratio = (msr >> 0) & 0xFF;
+        if (ratio)
+                fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
+                        ratio, bclk, ratio * bclk);
+}
+void free_counter_list(PCC *list)
+{
+        PCC *p;
+        for (p = list; p; ) {
+                PCC *free_me;
+                free_me = p;
+                p = p->next;
+                free(free_me);
+        }
+        return;
+}
+void free_all_counters(void)
+{
+        free_counter_list(pcc_even);
+        pcc_even = NULL;
+        free_counter_list(pcc_odd);
+        pcc_odd = NULL;
+        free_counter_list(pcc_delta);
+        pcc_delta = NULL;
+        free_counter_list(pcc_average);
+        pcc_average = NULL;
+}
+void insert_cpu_counters(PCC **list, PCC *new)
+{
+        PCC *prev;
+        /*
+         * list was empty
+         */
+        if (*list == NULL) {
+                new->next = *list;
+                *list = new;
+                return;
+        }
+        show_cpu = 1;   /* there is more than one CPU */
+        /*
+         * insert on front of list.
+         * It is sorted by ascending package#, core#, cpu#
+         */
+        if (((*list)->pkg > new->pkg) ||
+            (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) ||
+            (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) {
+                new->next = *list;
+                *list = new;
+                return;
+        }
+        prev = *list;
+        while (prev->next && (prev->next->pkg < new->pkg)) {
+                prev = prev->next;
+                show_pkg = 1;   /* there is more than 1 package */
+        }
+        while (prev->next && (prev->next->pkg == new->pkg)
+                && (prev->next->core < new->core)) {
+                prev = prev->next;
+                show_core = 1;  /* there is more than 1 core */
+        }
+        while (prev->next && (prev->next->pkg == new->pkg)
+                && (prev->next->core == new->core)
+                && (prev->next->cpu < new->cpu)) {
+                prev = prev->next;
+        }
+        /*
+         * insert after "prev"
+         */
+        new->next = prev->next;
+        prev->next = new;
+        return;
+}
+void alloc_new_cpu_counters(int pkg, int core, int cpu)
+{
+        PCC *new;
+        if (verbose > 1)
+                printf("pkg%d core%d, cpu%d\n", pkg, core, cpu);
+        new = (PCC *)calloc(1, sizeof(PCC));
+        if (new == NULL) {
+                perror("calloc");
+                exit(1);
+        }
+        new->pkg = pkg;
+        new->core = core;
+        new->cpu = cpu;
+        insert_cpu_counters(&pcc_odd, new);
+        new = (PCC *)calloc(1, sizeof(PCC));
+        if (new == NULL) {
+                perror("calloc");
+                exit(1);
+        }
+        new->pkg = pkg;
+        new->core = core;
+        new->cpu = cpu;
+        insert_cpu_counters(&pcc_even, new);
+        new = (PCC *)calloc(1, sizeof(PCC));
+        if (new == NULL) {
+                perror("calloc");
+                exit(1);
+        }
+        new->pkg = pkg;
+        new->core = core;
+        new->cpu = cpu;
+        insert_cpu_counters(&pcc_delta, new);
+        new = (PCC *)calloc(1, sizeof(PCC));
+        if (new == NULL) {
+                perror("calloc");
+                exit(1);
+        }
+        new->pkg = pkg;
+        new->core = core;
+        new->cpu = cpu;
+        pcc_average = new;
+}
+int get_physical_package_id(int cpu)
+{
+        char path[64];
+        FILE *filep;
+        int pkg;
+        sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
+        filep = fopen(path, "r");
+        if (filep == NULL) {
+                perror(path);
+                exit(1);
+        }
+        fscanf(filep, "%d", &pkg);
+        fclose(filep);
+        return pkg;
+}
+int get_core_id(int cpu)
+{
+        char path[64];
+        FILE *filep;
+        int core;
+        sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+        filep = fopen(path, "r");
+        if (filep == NULL) {
+                perror(path);
+                exit(1);
+        }
+        fscanf(filep, "%d", &core);
+        fclose(filep);
+        return core;
+}
+/*
+ * run func(index, cpu) on every cpu in /proc/stat
+ */
+int for_all_cpus(void (func)(int, int, int))
+{
+        FILE *fp;
+        int cpu_count;
+        int retval;
+        fp = fopen(proc_stat, "r");
+        if (fp == NULL) {
+                perror(proc_stat);
+                exit(1);
+        }
+        retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+        if (retval != 0) {
+                perror("/proc/stat format");
+                exit(1);
+        }
+        for (cpu_count = 0; ; cpu_count++) {
+                int cpu;
+                retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu);
+                if (retval != 1)
+                        break;
+                func(get_physical_package_id(cpu), get_core_id(cpu), cpu);
+        }
+        fclose(fp);
+        return cpu_count;
+}
+void re_initialize(void)
+{
+        printf("turbostat: topology changed, re-initializing.\n");
+        free_all_counters();
+        num_cpus = for_all_cpus(alloc_new_cpu_counters);
+        need_reinitialize = 0;
+        printf("num_cpus is now %d\n", num_cpus);
+}
+void dummy(int pkg, int core, int cpu) { return; }
+/*
+ * check to see if a cpu came on-line
+ */
+void verify_num_cpus()
+{
+        int new_num_cpus;
+        new_num_cpus = for_all_cpus(dummy);
+        if (new_num_cpus != num_cpus) {
+                if (verbose)
+                        printf("num_cpus was %d, is now  %d\n",
+                                num_cpus, new_num_cpus);
+                need_reinitialize = 1;
+        }
+        return;
+}
+void turbostat_loop()
+{
+restart:
+        get_counters(pcc_even);
+        gettimeofday(&tv_even, (struct timezone *)NULL);
+        while (1) {
+                verify_num_cpus();
+                if (need_reinitialize) {
+                        re_initialize();
+                        goto restart;
+                }
+                sleep(interval_sec);
+                get_counters(pcc_odd);
+                gettimeofday(&tv_odd, (struct timezone *)NULL);
+                compute_delta(pcc_odd, pcc_even, pcc_delta);
+                timersub(&tv_odd, &tv_even, &tv_delta);
+                compute_average(pcc_delta, pcc_average);
+                print_counters(pcc_delta);
+                if (need_reinitialize) {
+                        re_initialize();
+                        goto restart;
+                }
+                sleep(interval_sec);
+                get_counters(pcc_even);
+                gettimeofday(&tv_even, (struct timezone *)NULL);
+                compute_delta(pcc_even, pcc_odd, pcc_delta);
+                timersub(&tv_even, &tv_odd, &tv_delta);
+                compute_average(pcc_delta, pcc_average);
+                print_counters(pcc_delta);
+        }
+}
+void check_dev_msr()
+{
+        struct stat sb;
+        if (stat("/dev/cpu/0/msr", &sb)) {
+                fprintf(stderr, "no /dev/cpu/0/msr\n");
+                fprintf(stderr, "Try \"# modprobe msr\"\n");
+                exit(-5);
+        }
+}
+void check_super_user()
+{
+        if (getuid() != 0) {
+                fprintf(stderr, "must be root\n");
+                exit(-6);
+        }
+}
+int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+        if (!genuine_intel)
+                return 0;
+        if (family != 6)
+                return 0;
+        switch (model) {
+        case 0x1A:      /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
+        case 0x1E:      /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
+        case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
+        case 0x25:      /* Westmere Client - Clarkdale, Arrandale */
+        case 0x2C:      /* Westmere EP - Gulftown */
+        case 0x2A:      /* SNB */
+        case 0x2D:      /* SNB Xeon */
+                return 1;
+        case 0x2E:      /* Nehalem-EX Xeon - Beckton */
+        case 0x2F:      /* Westmere-EX Xeon - Eagleton */
+        default:
+                return 0;
+        }
+}
+int is_snb(unsigned int family, unsigned int model)
+{
+        if (!genuine_intel)
+                return 0;
+        switch (model) {
+        case 0x2A:
+        case 0x2D:
+                return 1;
+        }
+        return 0;
+}
+double discover_bclk(unsigned int family, unsigned int model)
+{
+        if (is_snb(family, model))
+                return 100.00;
+        else
+                return 133.33;
+}
+void check_cpuid()
+{
+        unsigned int eax, ebx, ecx, edx, max_level;
+        unsigned int fms, family, model, stepping;
+        eax = ebx = ecx = edx = 0;
+        asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0));
+        if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
+                genuine_intel = 1;
+        if (verbose)
+                fprintf(stderr, "%.4s%.4s%.4s ",
+                        (char *)&ebx, (char *)&edx, (char *)&ecx);
+        asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
+        family = (fms >> 8) & 0xf;
+        model = (fms >> 4) & 0xf;
+        stepping = fms & 0xf;
+        if (family == 6 || family == 0xf)
+                model += ((fms >> 16) & 0xf) << 4;
+        if (verbose)
+                fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+                        max_level, family, model, stepping, family, model, stepping);
+        if (!(edx & (1 << 5))) {
+                fprintf(stderr, "CPUID: no MSR\n");
+                exit(1);
+        }
+        /*
+         * check max extended function levels of CPUID.
+         * This is needed to check for invariant TSC.
+         * This check is valid for both Intel and AMD.
+         */
+        ebx = ecx = edx = 0;
+        asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000));
+        if (max_level < 0x80000007) {
+                fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level);
+                exit(1);
+        }
+        /*
+         * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+         * this check is valid for both Intel and AMD
+         */
+        asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007));
+        has_invariant_tsc = edx && (1 << 8);
+        if (!has_invariant_tsc) {
+                fprintf(stderr, "No invariant TSC\n");
+                exit(1);
+        }
+        /*
+         * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+         * this check is valid for both Intel and AMD
+         */
+        asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
+        has_aperf = ecx && (1 << 0);
+        if (!has_aperf) {
+                fprintf(stderr, "No APERF MSR\n");
+                exit(1);
+        }
+        do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
+        do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
+        do_snb_cstates = is_snb(family, model);
+        bclk = discover_bclk(family, model);
+        do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
+}
+void usage()
+{
+        fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n",
+                progname);
+        exit(1);
+}
+/*
+ * in /dev/cpu/ return success for names that are numbers
+ * ie. filter out ".", "..", "microcode".
+ */
+int dir_filter(const struct dirent *dirp)
+{
+        if (isdigit(dirp->d_name[0]))
+                return 1;
+        else
+                return 0;
+}
+int open_dev_cpu_msr(int dummy1)
+{
+        return 0;
+}
+void turbostat_init()
+{
+        check_cpuid();
+        check_dev_msr();
+        check_super_user();
+        num_cpus = for_all_cpus(alloc_new_cpu_counters);
+        if (verbose)
+                print_nehalem_info();
+}
+int fork_it(char **argv)
+{
+        int retval;
+        pid_t child_pid;
+        get_counters(pcc_even);
+        gettimeofday(&tv_even, (struct timezone *)NULL);
+        child_pid = fork();
+        if (!child_pid) {
+                /* child */
+                execvp(argv[0], argv);
+        } else {
+                int status;
+                /* parent */
+                if (child_pid == -1) {
+                        perror("fork");
+                        exit(1);
+                }
+                signal(SIGINT, SIG_IGN);
+                signal(SIGQUIT, SIG_IGN);
+                if (waitpid(child_pid, &status, 0) == -1) {
+                        perror("wait");
+                        exit(1);
+                }
+        }
+        get_counters(pcc_odd);
+        gettimeofday(&tv_odd, (struct timezone *)NULL);
+        retval = compute_delta(pcc_odd, pcc_even, pcc_delta);
+        timersub(&tv_odd, &tv_even, &tv_delta);
+        compute_average(pcc_delta, pcc_average);
+        if (!retval)
+                print_counters(pcc_delta);
+        fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);;
+        return 0;
+}
+void cmdline(int argc, char **argv)
+{
+        int opt;
+        progname = argv[0];
+        while ((opt = getopt(argc, argv, "+vi:M:")) != -1) {
+                switch (opt) {
+                case 'v':
+                        verbose++;
+                        break;
+                case 'i':
+                        interval_sec = atoi(optarg);
+                        break;
+                case 'M':
+                        sscanf(optarg, "%x", &extra_msr_offset);
+                        if (verbose > 1)
+                                fprintf(stderr, "MSR 0x%X\n", extra_msr_offset);
+                        break;
+                default:
+                        usage();
+                }
+        }
+}
+int main(int argc, char **argv)
+{
+        cmdline(argc, argv);
+        if (verbose > 1)
+                fprintf(stderr, "turbostat Dec 6, 2010"
+                        " - Len Brown <lenb@kernel.org>\n");
+        if (verbose > 1)
+                fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n");
+        turbostat_init();
+        /*
+         * if any params left, it must be a command to fork
+         */
+        if (argc - optind)
+                return fork_it(argv + optind);
+        else
+                turbostat_loop();
+        return 0;
+}
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
new file mode 100644
index 000000000000..f458237fdd79
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -0,0 +1,8 @@
+x86_energy_perf_policy : x86_energy_perf_policy.c
+clean :
+        rm -f x86_energy_perf_policy
+install :
+        install x86_energy_perf_policy /usr/bin/
+        install x86_energy_perf_policy.8 /usr/share/man/man8/
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
new file mode 100644
index 000000000000..8eaaad648cdb
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -0,0 +1,104 @@
+.\"  This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
+.\"  Distributed under the GPL, Copyleft 1994.
+.TH X86_ENERGY_PERF_POLICY 8
+.SH NAME
+x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
+.SH SYNOPSIS
+.ft B
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB "\-r"
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'performance'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'normal'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'powersave'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB n
+.br
+.SH DESCRIPTION
+\fBx86_energy_perf_policy\fP
+allows software to convey
+its policy for the relative importance of performance
+versus energy savings to the processor.
+The processor uses this information in model-specific ways
+when it must select trade-offs between performance and
+energy efficiency.
+This policy hint does not supersede Processor Performance states
+(P-states) or CPU Idle power states (C-states), but allows
+software to have influence where it would otherwise be unable
+to express a preference.
+For example, this setting may tell the hardware how
+aggressively or conservatively to control frequency
+in the "turbo range" above the explicitly OS-controlled
+P-state frequency range.  It may also tell the hardware
+how aggressively is should enter the OS requested C-states.
+Support for this feature is indicated by CPUID.06H.ECX.bit3
+per the Intel Architectures Software Developer's Manual.
+.SS Options
+\fB-c\fP limits operation to a single CPU.
+The default is to operate on all CPUs.
+Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
+logical processor, but that the initial implementations
+of the MSR were shared among all processors in each package.
+.PP
+\fB-v\fP increases verbosity.  By default
+x86_energy_perf_policy is silent.
+.PP
+\fB-r\fP is for "read-only" mode - the unchanged state
+is read and displayed.
+.PP
+.I performance
+Set a policy where performance is paramount.
+The processor will be unwilling to sacrifice any performance
+for the sake of energy saving. This is the hardware default.
+.PP
+.I normal
+Set a policy with a normal balance between performance and energy efficiency.
+The processor will tolerate minor performance compromise
+for potentially significant energy savings.
+This reasonable default for most desktops and servers.
+.PP
+.I powersave
+Set a policy where the processor can accept
+a measurable performance hit to maximize energy efficiency.
+.PP
+.I n
+Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
+The range of valid numbers is 0-15, where 0 is maximum
+performance and 15 is maximum energy efficiency.
+.SH NOTES
+.B "x86_energy_perf_policy "
+runs only as root.
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+.fi
+.SH "SEE ALSO"
+msr(4)
+.PP
+.SH AUTHORS
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
new file mode 100644
index 000000000000..d9678a34dd70
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -0,0 +1,325 @@
+/*
+ * x86_energy_perf_policy -- set the energy versus performance
+ * policy preference bias on recent X86 processors.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <string.h>
+unsigned int verbose;           /* set with -v */
+unsigned int read_only;         /* set with -r */
+char *progname;
+unsigned long long new_bias;
+int cpu = -1;
+/*
+ * Usage:
+ *
+ * -c cpu: limit action to a single CPU (default is all CPUs)
+ * -v: verbose output (can invoke more than once)
+ * -r: read-only, don't change any settings
+ *
+ *  performance
+ *      Performance is paramount.
+ *      Unwilling to sacrafice any performance
+ *      for the sake of energy saving. (hardware default)
+ *
+ *  normal
+ *      Can tolerate minor performance compromise
+ *      for potentially significant energy savings.
+ *      (reasonable default for most desktops and servers)
+ *
+ *  powersave
+ *      Can tolerate significant performance hit
+ *      to maximize energy savings.
+ *
+ * n
+ *      a numerical value to write to the underlying MSR.
+ */
+void usage(void)
+{
+        printf("%s: [-c cpu] [-v] "
+                "(-r | 'performance' | 'normal' | 'powersave' | n)\n",
+                progname);
+        exit(1);
+}
+#define MSR_IA32_ENERGY_PERF_BIAS       0x000001b0
+#define BIAS_PERFORMANCE                0
+#define BIAS_BALANCE                    6
+#define BIAS_POWERSAVE                  15
+void cmdline(int argc, char **argv)
+{
+        int opt;
+        progname = argv[0];
+        while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
+                switch (opt) {
+                case 'c':
+                        cpu = atoi(optarg);
+                        break;
+                case 'r':
+                        read_only = 1;
+                        break;
+                case 'v':
+                        verbose++;
+                        break;
+                default:
+                        usage();
+                }
+        }
+        /* if -r, then should be no additional optind */
+        if (read_only && (argc > optind))
+                usage();
+        /*
+         * if no -r , then must be one additional optind
+         */
+        if (!read_only) {
+                if (argc != optind + 1) {
+                        printf("must supply -r or policy param\n");
+                        usage();
+                        }
+                if (!strcmp("performance", argv[optind])) {
+                        new_bias = BIAS_PERFORMANCE;
+                } else if (!strcmp("normal", argv[optind])) {
+                        new_bias = BIAS_BALANCE;
+                } else if (!strcmp("powersave", argv[optind])) {
+                        new_bias = BIAS_POWERSAVE;
+                } else {
+                        char *endptr;
+                        new_bias = strtoull(argv[optind], &endptr, 0);
+                        if (endptr == argv[optind] ||
+                                new_bias > BIAS_POWERSAVE) {
+                                        fprintf(stderr, "invalid value: %s\n",
+                                                argv[optind]);
+                                usage();
+                        }
+                }
+        }
+}
+/*
+ * validate_cpuid()
+ * returns on success, quietly exits on failure (make verbose with -v)
+ */
+void validate_cpuid(void)
+{
+        unsigned int eax, ebx, ecx, edx, max_level;
+        char brand[16];
+        unsigned int fms, family, model, stepping;
+        eax = ebx = ecx = edx = 0;
+        asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
+                "=d" (edx) : "a" (0));
+        if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
+                if (verbose)
+                        fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
+                                (char *)&ebx, (char *)&edx, (char *)&ecx);
+                exit(1);
+        }
+        asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
+        family = (fms >> 8) & 0xf;
+        model = (fms >> 4) & 0xf;
+        stepping = fms & 0xf;
+        if (family == 6 || family == 0xf)
+                model += ((fms >> 16) & 0xf) << 4;
+        if (verbose > 1)
+                printf("CPUID %s %d levels family:model:stepping "
+                        "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
+                        family, model, stepping, family, model, stepping);
+        if (!(edx & (1 << 5))) {
+                if (verbose)
+                        printf("CPUID: no MSR\n");
+                exit(1);
+        }
+        /*
+         * Support for MSR_IA32_ENERGY_PERF_BIAS
+         * is indicated by CPUID.06H.ECX.bit3
+         */
+        asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
+        if (verbose)
+                printf("CPUID.06H.ECX: 0x%x\n", ecx);
+        if (!(ecx & (1 << 3))) {
+                if (verbose)
+                        printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
+                exit(1);
+        }
+        return; /* success */
+}
+unsigned long long get_msr(int cpu, int offset)
+{
+        unsigned long long msr;
+        char msr_path[32];
+        int retval;
+        int fd;
+        sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
+        fd = open(msr_path, O_RDONLY);
+        if (fd < 0) {
+                printf("Try \"# modprobe msr\"\n");
+                perror(msr_path);
+                exit(1);
+        }
+        retval = pread(fd, &msr, sizeof msr, offset);
+        if (retval != sizeof msr) {
+                printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
+                exit(-2);
+        }
+        close(fd);
+        return msr;
+}
+unsigned long long  put_msr(int cpu, unsigned long long new_msr, int offset)
+{
+        unsigned long long old_msr;
+        char msr_path[32];
+        int retval;
+        int fd;
+        sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
+        fd = open(msr_path, O_RDWR);
+        if (fd < 0) {
+                perror(msr_path);
+                exit(1);
+        }
+        retval = pread(fd, &old_msr, sizeof old_msr, offset);
+        if (retval != sizeof old_msr) {
+                perror("pwrite");
+                printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
+                exit(-2);
+        }
+        retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
+        if (retval != sizeof new_msr) {
+                perror("pwrite");
+                printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
+                exit(-2);
+        }
+        close(fd);
+        return old_msr;
+}
+void print_msr(int cpu)
+{
+        printf("cpu%d: 0x%016llx\n",
+                cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
+}
+void update_msr(int cpu)
+{
+        unsigned long long previous_msr;
+        previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
+        if (verbose)
+                printf("cpu%d  msr0x%x 0x%016llx -> 0x%016llx\n",
+                        cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
+        return;
+}
+char *proc_stat = "/proc/stat";
+/*
+ * run func() on every cpu in /dev/cpu
+ */
+void for_every_cpu(void (func)(int))
+{
+        FILE *fp;
+        int retval;
+        fp = fopen(proc_stat, "r");
+        if (fp == NULL) {
+                perror(proc_stat);
+                exit(1);
+        }
+        retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+        if (retval != 0) {
+                perror("/proc/stat format");
+                exit(1);
+        }
+        while (1) {
+                int cpu;
+                retval = fscanf(fp,
+                        "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
+                        &cpu);
+                if (retval != 1)
+                        return;
+                func(cpu);
+        }
+        fclose(fp);
+}
+int main(int argc, char **argv)
+{
+        cmdline(argc, argv);
+        if (verbose > 1)
+                printf("x86_energy_perf_policy Nov 24, 2010"
+                                " - Len Brown <lenb@kernel.org>\n");
+        if (verbose > 1 && !read_only)
+                printf("new_bias %lld\n", new_bias);
+        validate_cpuid();
+        if (cpu != -1) {
+                if (read_only)
+                        print_msr(cpu);
+                else
+                        update_msr(cpu);
+        } else {
+                if (read_only)
+                        for_every_cpu(print_msr);
+                else
+                        for_every_cpu(update_msr);
+        }
+        return 0;
+}
diff --git a/tools/slub/slabinfo.c b/tools/slub/slabinfo.c
new file mode 100644
index 000000000000..516551c9f172
--- /dev/null
+++ b/tools/slub/slabinfo.c
@@ -0,0 +1,1364 @@
+/*
+ * Slabinfo: Tool to get reports about slabs
+ *
+ * (C) 2007 sgi, Christoph Lameter
+ *
+ * Compile by:
+ *
+ * gcc -o slabinfo slabinfo.c
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <strings.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <regex.h>
+#include <errno.h>
+#define MAX_SLABS 500
+#define MAX_ALIASES 500
+#define MAX_NODES 1024
+struct slabinfo {
+        char *name;
+        int alias;
+        int refs;
+        int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
+        int hwcache_align, object_size, objs_per_slab;
+        int sanity_checks, slab_size, store_user, trace;
+        int order, poison, reclaim_account, red_zone;
+        unsigned long partial, objects, slabs, objects_partial, objects_total;
+        unsigned long alloc_fastpath, alloc_slowpath;
+        unsigned long free_fastpath, free_slowpath;
+        unsigned long free_frozen, free_add_partial, free_remove_partial;
+        unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
+        unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
+        unsigned long deactivate_to_head, deactivate_to_tail;
+        unsigned long deactivate_remote_frees, order_fallback;
+        int numa[MAX_NODES];
+        int numa_partial[MAX_NODES];
+} slabinfo[MAX_SLABS];
+struct aliasinfo {
+        char *name;
+        char *ref;
+        struct slabinfo *slab;
+} aliasinfo[MAX_ALIASES];
+int slabs = 0;
+int actual_slabs = 0;
+int aliases = 0;
+int alias_targets = 0;
+int highest_node = 0;
+char buffer[4096];
+int show_empty = 0;
+int show_report = 0;
+int show_alias = 0;
+int show_slab = 0;
+int skip_zero = 1;
+int show_numa = 0;
+int show_track = 0;
+int show_first_alias = 0;
+int validate = 0;
+int shrink = 0;
+int show_inverted = 0;
+int show_single_ref = 0;
+int show_totals = 0;
+int sort_size = 0;
+int sort_active = 0;
+int set_debug = 0;
+int show_ops = 0;
+int show_activity = 0;
+/* Debug options */
+int sanity = 0;
+int redzone = 0;
+int poison = 0;
+int tracking = 0;
+int tracing = 0;
+int page_size;
+regex_t pattern;
+static void fatal(const char *x, ...)
+{
+        va_list ap;
+        va_start(ap, x);
+        vfprintf(stderr, x, ap);
+        va_end(ap);
+        exit(EXIT_FAILURE);
+}
+static void usage(void)
+{
+        printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n"
+                "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
+                "-a|--aliases           Show aliases\n"
+                "-A|--activity          Most active slabs first\n"
+                "-d<options>|--debug=<options> Set/Clear Debug options\n"
+                "-D|--display-active    Switch line format to activity\n"
+                "-e|--empty             Show empty slabs\n"
+                "-f|--first-alias       Show first alias\n"
+                "-h|--help              Show usage information\n"
+                "-i|--inverted          Inverted list\n"
+                "-l|--slabs             Show slabs\n"
+                "-n|--numa              Show NUMA information\n"
+                "-o|--ops               Show kmem_cache_ops\n"
+                "-s|--shrink            Shrink slabs\n"
+                "-r|--report            Detailed report on single slabs\n"
+                "-S|--Size              Sort by size\n"
+                "-t|--tracking          Show alloc/free information\n"
+                "-T|--Totals            Show summary information\n"
+                "-v|--validate          Validate slabs\n"
+                "-z|--zero              Include empty slabs\n"
+                "-1|--1ref              Single reference\n"
+                "\nValid debug options (FZPUT may be combined)\n"
+                "a / A          Switch on all debug options (=FZUP)\n"
+                "-              Switch off all debug options\n"
+                "f / F          Sanity Checks (SLAB_DEBUG_FREE)\n"
+                "z / Z          Redzoning\n"
+                "p / P          Poisoning\n"
+                "u / U          Tracking\n"
+                "t / T          Tracing\n"
+        );
+}
+static unsigned long read_obj(const char *name)
+{
+        FILE *f = fopen(name, "r");
+        if (!f)
+                buffer[0] = 0;
+        else {
+                if (!fgets(buffer, sizeof(buffer), f))
+                        buffer[0] = 0;
+                fclose(f);
+                if (buffer[strlen(buffer)] == '\n')
+                        buffer[strlen(buffer)] = 0;
+        }
+        return strlen(buffer);
+}
+/*
+ * Get the contents of an attribute
+ */
+static unsigned long get_obj(const char *name)
+{
+        if (!read_obj(name))
+                return 0;
+        return atol(buffer);
+}
+static unsigned long get_obj_and_str(const char *name, char **x)
+{
+        unsigned long result = 0;
+        char *p;
+        *x = NULL;
+        if (!read_obj(name)) {
+                x = NULL;
+                return 0;
+        }
+        result = strtoul(buffer, &p, 10);
+        while (*p == ' ')
+                p++;
+        if (*p)
+                *x = strdup(p);
+        return result;
+}
+static void set_obj(struct slabinfo *s, const char *name, int n)
+{
+        char x[100];
+        FILE *f;
+        snprintf(x, 100, "%s/%s", s->name, name);
+        f = fopen(x, "w");
+        if (!f)
+                fatal("Cannot write to %s\n", x);
+        fprintf(f, "%d\n", n);
+        fclose(f);
+}
+static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
+{
+        char x[100];
+        FILE *f;
+        size_t l;
+        snprintf(x, 100, "%s/%s", s->name, name);
+        f = fopen(x, "r");
+        if (!f) {
+                buffer[0] = 0;
+                l = 0;
+        } else {
+                l = fread(buffer, 1, sizeof(buffer), f);
+                buffer[l] = 0;
+                fclose(f);
+        }
+        return l;
+}
+/*
+ * Put a size string together
+ */
+static int store_size(char *buffer, unsigned long value)
+{
+        unsigned long divisor = 1;
+        char trailer = 0;
+        int n;
+        if (value > 1000000000UL) {
+                divisor = 100000000UL;
+                trailer = 'G';
+        } else if (value > 1000000UL) {
+                divisor = 100000UL;
+                trailer = 'M';
+        } else if (value > 1000UL) {
+                divisor = 100;
+                trailer = 'K';
+        }
+        value /= divisor;
+        n = sprintf(buffer, "%ld",value);
+        if (trailer) {
+                buffer[n] = trailer;
+                n++;
+                buffer[n] = 0;
+        }
+        if (divisor != 1) {
+                memmove(buffer + n - 2, buffer + n - 3, 4);
+                buffer[n-2] = '.';
+                n++;
+        }
+        return n;
+}
+static void decode_numa_list(int *numa, char *t)
+{
+        int node;
+        int nr;
+        memset(numa, 0, MAX_NODES * sizeof(int));
+        if (!t)
+                return;
+        while (*t == 'N') {
+                t++;
+                node = strtoul(t, &t, 10);
+                if (*t == '=') {
+                        t++;
+                        nr = strtoul(t, &t, 10);
+                        numa[node] = nr;
+                        if (node > highest_node)
+                                highest_node = node;
+                }
+                while (*t == ' ')
+                        t++;
+        }
+}
+static void slab_validate(struct slabinfo *s)
+{
+        if (strcmp(s->name, "*") == 0)
+                return;
+        set_obj(s, "validate", 1);
+}
+static void slab_shrink(struct slabinfo *s)
+{
+        if (strcmp(s->name, "*") == 0)
+                return;
+        set_obj(s, "shrink", 1);
+}
+int line = 0;
+static void first_line(void)
+{
+        if (show_activity)
+                printf("Name                   Objects      Alloc       Free   %%Fast Fallb O\n");
+        else
+                printf("Name                   Objects Objsize    Space "
+                        "Slabs/Part/Cpu  O/S O %%Fr %%Ef Flg\n");
+}
+/*
+ * Find the shortest alias of a slab
+ */
+static struct aliasinfo *find_one_alias(struct slabinfo *find)
+{
+        struct aliasinfo *a;
+        struct aliasinfo *best = NULL;
+        for(a = aliasinfo;a < aliasinfo + aliases; a++) {
+                if (a->slab == find &&
+                        (!best || strlen(best->name) < strlen(a->name))) {
+                                best = a;
+                                if (strncmp(a->name,"kmall", 5) == 0)
+                                        return best;
+                        }
+        }
+        return best;
+}
+static unsigned long slab_size(struct slabinfo *s)
+{
+        return  s->slabs * (page_size << s->order);
+}
+static unsigned long slab_activity(struct slabinfo *s)
+{
+        return  s->alloc_fastpath + s->free_fastpath +
+                s->alloc_slowpath + s->free_slowpath;
+}
+static void slab_numa(struct slabinfo *s, int mode)
+{
+        int node;
+        if (strcmp(s->name, "*") == 0)
+                return;
+        if (!highest_node) {
+                printf("\n%s: No NUMA information available.\n", s->name);
+                return;
+        }
+        if (skip_zero && !s->slabs)
+                return;
+        if (!line) {
+                printf("\n%-21s:", mode ? "NUMA nodes" : "Slab");
+                for(node = 0; node <= highest_node; node++)
+                        printf(" %4d", node);
+                printf("\n----------------------");
+                for(node = 0; node <= highest_node; node++)
+                        printf("-----");
+                printf("\n");
+        }
+        printf("%-21s ", mode ? "All slabs" : s->name);
+        for(node = 0; node <= highest_node; node++) {
+                char b[20];
+                store_size(b, s->numa[node]);
+                printf(" %4s", b);
+        }
+        printf("\n");
+        if (mode) {
+                printf("%-21s ", "Partial slabs");
+                for(node = 0; node <= highest_node; node++) {
+                        char b[20];
+                        store_size(b, s->numa_partial[node]);
+                        printf(" %4s", b);
+                }
+                printf("\n");
+        }
+        line++;
+}
+static void show_tracking(struct slabinfo *s)
+{
+        printf("\n%s: Kernel object allocation\n", s->name);
+        printf("-----------------------------------------------------------------------\n");
+        if (read_slab_obj(s, "alloc_calls"))
+                printf(buffer);
+        else
+                printf("No Data\n");
+        printf("\n%s: Kernel object freeing\n", s->name);
+        printf("------------------------------------------------------------------------\n");
+        if (read_slab_obj(s, "free_calls"))
+                printf(buffer);
+        else
+                printf("No Data\n");
+}
+static void ops(struct slabinfo *s)
+{
+        if (strcmp(s->name, "*") == 0)
+                return;
+        if (read_slab_obj(s, "ops")) {
+                printf("\n%s: kmem_cache operations\n", s->name);
+                printf("--------------------------------------------\n");
+                printf(buffer);
+        } else
+                printf("\n%s has no kmem_cache operations\n", s->name);
+}
+static const char *onoff(int x)
+{
+        if (x)
+                return "On ";
+        return "Off";
+}
+static void slab_stats(struct slabinfo *s)
+{
+        unsigned long total_alloc;
+        unsigned long total_free;
+        unsigned long total;
+        if (!s->alloc_slab)
+                return;
+        total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+        total_free = s->free_fastpath + s->free_slowpath;
+        if (!total_alloc)
+                return;
+        printf("\n");
+        printf("Slab Perf Counter       Alloc     Free %%Al %%Fr\n");
+        printf("--------------------------------------------------\n");
+        printf("Fastpath             %8lu %8lu %3lu %3lu\n",
+                s->alloc_fastpath, s->free_fastpath,
+                s->alloc_fastpath * 100 / total_alloc,
+                s->free_fastpath * 100 / total_free);
+        printf("Slowpath             %8lu %8lu %3lu %3lu\n",
+                total_alloc - s->alloc_fastpath, s->free_slowpath,
+                (total_alloc - s->alloc_fastpath) * 100 / total_alloc,
+                s->free_slowpath * 100 / total_free);
+        printf("Page Alloc           %8lu %8lu %3lu %3lu\n",
+                s->alloc_slab, s->free_slab,
+                s->alloc_slab * 100 / total_alloc,
+                s->free_slab * 100 / total_free);
+        printf("Add partial          %8lu %8lu %3lu %3lu\n",
+                s->deactivate_to_head + s->deactivate_to_tail,
+                s->free_add_partial,
+                (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
+                s->free_add_partial * 100 / total_free);
+        printf("Remove partial       %8lu %8lu %3lu %3lu\n",
+                s->alloc_from_partial, s->free_remove_partial,
+                s->alloc_from_partial * 100 / total_alloc,
+                s->free_remove_partial * 100 / total_free);
+        printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
+                s->deactivate_remote_frees, s->free_frozen,
+                s->deactivate_remote_frees * 100 / total_alloc,
+                s->free_frozen * 100 / total_free);
+        printf("Total                %8lu %8lu\n\n", total_alloc, total_free);
+        if (s->cpuslab_flush)
+                printf("Flushes %8lu\n", s->cpuslab_flush);
+        if (s->alloc_refill)
+                printf("Refill %8lu\n", s->alloc_refill);
+        total = s->deactivate_full + s->deactivate_empty +
+                        s->deactivate_to_head + s->deactivate_to_tail;
+        if (total)
+                printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
+                        "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
+                        s->deactivate_full, (s->deactivate_full * 100) / total,
+                        s->deactivate_empty, (s->deactivate_empty * 100) / total,
+                        s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
+                        s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
+}
+static void report(struct slabinfo *s)
+{
+        if (strcmp(s->name, "*") == 0)
+                return;
+        printf("\nSlabcache: %-20s  Aliases: %2d Order : %2d Objects: %lu\n",
+                s->name, s->aliases, s->order, s->objects);
+        if (s->hwcache_align)
+                printf("** Hardware cacheline aligned\n");
+        if (s->cache_dma)
+                printf("** Memory is allocated in a special DMA zone\n");
+        if (s->destroy_by_rcu)
+                printf("** Slabs are destroyed via RCU\n");
+        if (s->reclaim_account)
+                printf("** Reclaim accounting active\n");
+        printf("\nSizes (bytes)     Slabs              Debug                Memory\n");
+        printf("------------------------------------------------------------------------\n");
+        printf("Object : %7d  Total  : %7ld   Sanity Checks : %s  Total: %7ld\n",
+                        s->object_size, s->slabs, onoff(s->sanity_checks),
+                        s->slabs * (page_size << s->order));
+        printf("SlabObj: %7d  Full   : %7ld   Redzoning     : %s  Used : %7ld\n",
+                        s->slab_size, s->slabs - s->partial - s->cpu_slabs,
+                        onoff(s->red_zone), s->objects * s->object_size);
+        printf("SlabSiz: %7d  Partial: %7ld   Poisoning     : %s  Loss : %7ld\n",
+                        page_size << s->order, s->partial, onoff(s->poison),
+                        s->slabs * (page_size << s->order) - s->objects * s->object_size);
+        printf("Loss   : %7d  CpuSlab: %7d   Tracking      : %s  Lalig: %7ld\n",
+                        s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user),
+                        (s->slab_size - s->object_size) * s->objects);
+        printf("Align  : %7d  Objects: %7d   Tracing       : %s  Lpadd: %7ld\n",
+                        s->align, s->objs_per_slab, onoff(s->trace),
+                        ((page_size << s->order) - s->objs_per_slab * s->slab_size) *
+                        s->slabs);
+        ops(s);
+        show_tracking(s);
+        slab_numa(s, 1);
+        slab_stats(s);
+}
+static void slabcache(struct slabinfo *s)
+{
+        char size_str[20];
+        char dist_str[40];
+        char flags[20];
+        char *p = flags;
+        if (strcmp(s->name, "*") == 0)
+                return;
+        if (actual_slabs == 1) {
+                report(s);
+                return;
+        }
+        if (skip_zero && !show_empty && !s->slabs)
+                return;
+        if (show_empty && s->slabs)
+                return;
+        store_size(size_str, slab_size(s));
+        snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs,
+                                                s->partial, s->cpu_slabs);
+        if (!line++)
+                first_line();
+        if (s->aliases)
+                *p++ = '*';
+        if (s->cache_dma)
+                *p++ = 'd';
+        if (s->hwcache_align)
+                *p++ = 'A';
+        if (s->poison)
+                *p++ = 'P';
+        if (s->reclaim_account)
+                *p++ = 'a';
+        if (s->red_zone)
+                *p++ = 'Z';
+        if (s->sanity_checks)
+                *p++ = 'F';
+        if (s->store_user)
+                *p++ = 'U';
+        if (s->trace)
+                *p++ = 'T';
+        *p = 0;
+        if (show_activity) {
+                unsigned long total_alloc;
+                unsigned long total_free;
+                total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+                total_free = s->free_fastpath + s->free_slowpath;
+                printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d\n",
+                        s->name, s->objects,
+                        total_alloc, total_free,
+                        total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
+                        total_free ? (s->free_fastpath * 100 / total_free) : 0,
+                        s->order_fallback, s->order);
+        }
+        else
+                printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
+                        s->name, s->objects, s->object_size, size_str, dist_str,
+                        s->objs_per_slab, s->order,
+                        s->slabs ? (s->partial * 100) / s->slabs : 100,
+                        s->slabs ? (s->objects * s->object_size * 100) /
+                                (s->slabs * (page_size << s->order)) : 100,
+                        flags);
+}
+/*
+ * Analyze debug options. Return false if something is amiss.
+ */
+static int debug_opt_scan(char *opt)
+{
+        if (!opt || !opt[0] || strcmp(opt, "-") == 0)
+                return 1;
+        if (strcasecmp(opt, "a") == 0) {
+                sanity = 1;
+                poison = 1;
+                redzone = 1;
+                tracking = 1;
+                return 1;
+        }
+        for ( ; *opt; opt++)
+                switch (*opt) {
+                case 'F' : case 'f':
+                        if (sanity)
+                                return 0;
+                        sanity = 1;
+                        break;
+                case 'P' : case 'p':
+                        if (poison)
+                                return 0;
+                        poison = 1;
+                        break;
+                case 'Z' : case 'z':
+                        if (redzone)
+                                return 0;
+                        redzone = 1;
+                        break;
+                case 'U' : case 'u':
+                        if (tracking)
+                                return 0;
+                        tracking = 1;
+                        break;
+                case 'T' : case 't':
+                        if (tracing)
+                                return 0;
+                        tracing = 1;
+                        break;
+                default:
+                        return 0;
+                }
+        return 1;
+}
+static int slab_empty(struct slabinfo *s)
+{
+        if (s->objects > 0)
+                return 0;
+        /*
+         * We may still have slabs even if there are no objects. Shrinking will
+         * remove them.
+         */
+        if (s->slabs != 0)
+                set_obj(s, "shrink", 1);
+        return 1;
+}
+static void slab_debug(struct slabinfo *s)
+{
+        if (strcmp(s->name, "*") == 0)
+                return;
+        if (sanity && !s->sanity_checks) {
+                set_obj(s, "sanity", 1);
+        }
+        if (!sanity && s->sanity_checks) {
+                if (slab_empty(s))
+                        set_obj(s, "sanity", 0);
+                else
+                        fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name);
+        }
+        if (redzone && !s->red_zone) {
+                if (slab_empty(s))
+                        set_obj(s, "red_zone", 1);
+                else
+                        fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name);
+        }
+        if (!redzone && s->red_zone) {
+                if (slab_empty(s))
+                        set_obj(s, "red_zone", 0);
+                else
+                        fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name);
+        }
+        if (poison && !s->poison) {
+                if (slab_empty(s))
+                        set_obj(s, "poison", 1);
+                else
+                        fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name);
+        }
+        if (!poison && s->poison) {
+                if (slab_empty(s))
+                        set_obj(s, "poison", 0);
+                else
+                        fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name);
+        }
+        if (tracking && !s->store_user) {
+                if (slab_empty(s))
+                        set_obj(s, "store_user", 1);
+                else
+                        fprintf(stderr, "%s not empty cannot enable tracking\n", s->name);
+        }
+        if (!tracking && s->store_user) {
+                if (slab_empty(s))
+                        set_obj(s, "store_user", 0);
+                else
+                        fprintf(stderr, "%s not empty cannot disable tracking\n", s->name);
+        }
+        if (tracing && !s->trace) {
+                if (slabs == 1)
+                        set_obj(s, "trace", 1);
+                else
+                        fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name);
+        }
+        if (!tracing && s->trace)
+                set_obj(s, "trace", 1);
+}
+static void totals(void)
+{
+        struct slabinfo *s;
+        int used_slabs = 0;
+        char b1[20], b2[20], b3[20], b4[20];
+        unsigned long long max = 1ULL << 63;
+        /* Object size */
+        unsigned long long min_objsize = max, max_objsize = 0, avg_objsize;
+        /* Number of partial slabs in a slabcache */
+        unsigned long long min_partial = max, max_partial = 0,
+                                avg_partial, total_partial = 0;
+        /* Number of slabs in a slab cache */
+        unsigned long long min_slabs = max, max_slabs = 0,
+                                avg_slabs, total_slabs = 0;
+        /* Size of the whole slab */
+        unsigned long long min_size = max, max_size = 0,
+                                avg_size, total_size = 0;
+        /* Bytes used for object storage in a slab */
+        unsigned long long min_used = max, max_used = 0,
+                                avg_used, total_used = 0;
+        /* Waste: Bytes used for alignment and padding */
+        unsigned long long min_waste = max, max_waste = 0,
+                                avg_waste, total_waste = 0;
+        /* Number of objects in a slab */
+        unsigned long long min_objects = max, max_objects = 0,
+                                avg_objects, total_objects = 0;
+        /* Waste per object */
+        unsigned long long min_objwaste = max,
+                                max_objwaste = 0, avg_objwaste,
+                                total_objwaste = 0;
+        /* Memory per object */
+        unsigned long long min_memobj = max,
+                                max_memobj = 0, avg_memobj,
+                                total_objsize = 0;
+        /* Percentage of partial slabs per slab */
+        unsigned long min_ppart = 100, max_ppart = 0,
+                                avg_ppart, total_ppart = 0;
+        /* Number of objects in partial slabs */
+        unsigned long min_partobj = max, max_partobj = 0,
+                                avg_partobj, total_partobj = 0;
+        /* Percentage of partial objects of all objects in a slab */
+        unsigned long min_ppartobj = 100, max_ppartobj = 0,
+                                avg_ppartobj, total_ppartobj = 0;
+        for (s = slabinfo; s < slabinfo + slabs; s++) {
+                unsigned long long size;
+                unsigned long used;
+                unsigned long long wasted;
+                unsigned long long objwaste;
+                unsigned long percentage_partial_slabs;
+                unsigned long percentage_partial_objs;
+                if (!s->slabs || !s->objects)
+                        continue;
+                used_slabs++;
+                size = slab_size(s);
+                used = s->objects * s->object_size;
+                wasted = size - used;
+                objwaste = s->slab_size - s->object_size;
+                percentage_partial_slabs = s->partial * 100 / s->slabs;
+                if (percentage_partial_slabs > 100)
+                        percentage_partial_slabs = 100;
+                percentage_partial_objs = s->objects_partial * 100
+                                                        / s->objects;
+                if (percentage_partial_objs > 100)
+                        percentage_partial_objs = 100;
+                if (s->object_size < min_objsize)
+                        min_objsize = s->object_size;
+                if (s->partial < min_partial)
+                        min_partial = s->partial;
+                if (s->slabs < min_slabs)
+                        min_slabs = s->slabs;
+                if (size < min_size)
+                        min_size = size;
+                if (wasted < min_waste)
+                        min_waste = wasted;
+                if (objwaste < min_objwaste)
+                        min_objwaste = objwaste;
+                if (s->objects < min_objects)
+                        min_objects = s->objects;
+                if (used < min_used)
+                        min_used = used;
+                if (s->objects_partial < min_partobj)
+                        min_partobj = s->objects_partial;
+                if (percentage_partial_slabs < min_ppart)
+                        min_ppart = percentage_partial_slabs;
+                if (percentage_partial_objs < min_ppartobj)
+                        min_ppartobj = percentage_partial_objs;
+                if (s->slab_size < min_memobj)
+                        min_memobj = s->slab_size;
+                if (s->object_size > max_objsize)
+                        max_objsize = s->object_size;
+                if (s->partial > max_partial)
+                        max_partial = s->partial;
+                if (s->slabs > max_slabs)
+                        max_slabs = s->slabs;
+                if (size > max_size)
+                        max_size = size;
+                if (wasted > max_waste)
+                        max_waste = wasted;
+                if (objwaste > max_objwaste)
+                        max_objwaste = objwaste;
+                if (s->objects > max_objects)
+                        max_objects = s->objects;
+                if (used > max_used)
+                        max_used = used;
+                if (s->objects_partial > max_partobj)
+                        max_partobj = s->objects_partial;
+                if (percentage_partial_slabs > max_ppart)
+                        max_ppart = percentage_partial_slabs;
+                if (percentage_partial_objs > max_ppartobj)
+                        max_ppartobj = percentage_partial_objs;
+                if (s->slab_size > max_memobj)
+                        max_memobj = s->slab_size;
+                total_partial += s->partial;
+                total_slabs += s->slabs;
+                total_size += size;
+                total_waste += wasted;
+                total_objects += s->objects;
+                total_used += used;
+                total_partobj += s->objects_partial;
+                total_ppart += percentage_partial_slabs;
+                total_ppartobj += percentage_partial_objs;
+                total_objwaste += s->objects * objwaste;
+                total_objsize += s->objects * s->slab_size;
+        }
+        if (!total_objects) {
+                printf("No objects\n");
+                return;
+        }
+        if (!used_slabs) {
+                printf("No slabs\n");
+                return;
+        }
+        /* Per slab averages */
+        avg_partial = total_partial / used_slabs;
+        avg_slabs = total_slabs / used_slabs;
+        avg_size = total_size / used_slabs;
+        avg_waste = total_waste / used_slabs;
+        avg_objects = total_objects / used_slabs;
+        avg_used = total_used / used_slabs;
+        avg_partobj = total_partobj / used_slabs;
+        avg_ppart = total_ppart / used_slabs;
+        avg_ppartobj = total_ppartobj / used_slabs;
+        /* Per object object sizes */
+        avg_objsize = total_used / total_objects;
+        avg_objwaste = total_objwaste / total_objects;
+        avg_partobj = total_partobj * 100 / total_objects;
+        avg_memobj = total_objsize / total_objects;
+        printf("Slabcache Totals\n");
+        printf("----------------\n");
+        printf("Slabcaches : %3d      Aliases  : %3d->%-3d Active: %3d\n",
+                        slabs, aliases, alias_targets, used_slabs);
+        store_size(b1, total_size);store_size(b2, total_waste);
+        store_size(b3, total_waste * 100 / total_used);
+        printf("Memory used: %6s   # Loss   : %6s   MRatio:%6s%%\n", b1, b2, b3);
+        store_size(b1, total_objects);store_size(b2, total_partobj);
+        store_size(b3, total_partobj * 100 / total_objects);
+        printf("# Objects  : %6s   # PartObj: %6s   ORatio:%6s%%\n", b1, b2, b3);
+        printf("\n");
+        printf("Per Cache    Average         Min         Max       Total\n");
+        printf("---------------------------------------------------------\n");
+        store_size(b1, avg_objects);store_size(b2, min_objects);
+        store_size(b3, max_objects);store_size(b4, total_objects);
+        printf("#Objects  %10s  %10s  %10s  %10s\n",
+                        b1,     b2,     b3,     b4);
+        store_size(b1, avg_slabs);store_size(b2, min_slabs);
+        store_size(b3, max_slabs);store_size(b4, total_slabs);
+        printf("#Slabs    %10s  %10s  %10s  %10s\n",
+                        b1,     b2,     b3,     b4);
+        store_size(b1, avg_partial);store_size(b2, min_partial);
+        store_size(b3, max_partial);store_size(b4, total_partial);
+        printf("#PartSlab %10s  %10s  %10s  %10s\n",
+                        b1,     b2,     b3,     b4);
+        store_size(b1, avg_ppart);store_size(b2, min_ppart);
+        store_size(b3, max_ppart);
+        store_size(b4, total_partial * 100  / total_slabs);
+        printf("%%PartSlab%10s%% %10s%% %10s%% %10s%%\n",
+                        b1,     b2,     b3,     b4);
+        store_size(b1, avg_partobj);store_size(b2, min_partobj);
+        store_size(b3, max_partobj);
+        store_size(b4, total_partobj);
+        printf("PartObjs  %10s  %10s  %10s  %10s\n",
+                        b1,     b2,     b3,     b4);
+        store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj);
+        store_size(b3, max_ppartobj);
+        store_size(b4, total_partobj * 100 / total_objects);
+        printf("%% PartObj%10s%% %10s%% %10s%% %10s%%\n",
+                        b1,     b2,     b3,     b4);
+        store_size(b1, avg_size);store_size(b2, min_size);
+        store_size(b3, max_size);store_size(b4, total_size);
+        printf("Memory    %10s  %10s  %10s  %10s\n",
+                        b1,     b2,     b3,     b4);
+        store_size(b1, avg_used);store_size(b2, min_used);
+        store_size(b3, max_used);store_size(b4, total_used);
+        printf("Used      %10s  %10s  %10s  %10s\n",
+                        b1,     b2,     b3,     b4);
+        store_size(b1, avg_waste);store_size(b2, min_waste);
+        store_size(b3, max_waste);store_size(b4, total_waste);
+        printf("Loss      %10s  %10s  %10s  %10s\n",
+                        b1,     b2,     b3,     b4);
+        printf("\n");
+        printf("Per Object   Average         Min         Max\n");
+        printf("---------------------------------------------\n");
+        store_size(b1, avg_memobj);store_size(b2, min_memobj);
+        store_size(b3, max_memobj);
+        printf("Memory    %10s  %10s  %10s\n",
+                        b1,     b2,     b3);
+        store_size(b1, avg_objsize);store_size(b2, min_objsize);
+        store_size(b3, max_objsize);
+        printf("User      %10s  %10s  %10s\n",
+                        b1,     b2,     b3);
+        store_size(b1, avg_objwaste);store_size(b2, min_objwaste);
+        store_size(b3, max_objwaste);
+        printf("Loss      %10s  %10s  %10s\n",
+                        b1,     b2,     b3);
+}
+static void sort_slabs(void)
+{
+        struct slabinfo *s1,*s2;
+        for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) {
+                for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
+                        int result;
+                        if (sort_size)
+                                result = slab_size(s1) < slab_size(s2);
+                        else if (sort_active)
+                                result = slab_activity(s1) < slab_activity(s2);
+                        else
+                                result = strcasecmp(s1->name, s2->name);
+                        if (show_inverted)
+                                result = -result;
+                        if (result > 0) {
+                                struct slabinfo t;
+                                memcpy(&t, s1, sizeof(struct slabinfo));
+                                memcpy(s1, s2, sizeof(struct slabinfo));
+                                memcpy(s2, &t, sizeof(struct slabinfo));
+                        }
+                }
+        }
+}
+static void sort_aliases(void)
+{
+        struct aliasinfo *a1,*a2;
+        for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) {
+                for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) {
+                        char *n1, *n2;
+                        n1 = a1->name;
+                        n2 = a2->name;
+                        if (show_alias && !show_inverted) {
+                                n1 = a1->ref;
+                                n2 = a2->ref;
+                        }
+                        if (strcasecmp(n1, n2) > 0) {
+                                struct aliasinfo t;
+                                memcpy(&t, a1, sizeof(struct aliasinfo));
+                                memcpy(a1, a2, sizeof(struct aliasinfo));
+                                memcpy(a2, &t, sizeof(struct aliasinfo));
+                        }
+                }
+        }
+}
+static void link_slabs(void)
+{
+        struct aliasinfo *a;
+        struct slabinfo *s;
+        for (a = aliasinfo; a < aliasinfo + aliases; a++) {
+                for (s = slabinfo; s < slabinfo + slabs; s++)
+                        if (strcmp(a->ref, s->name) == 0) {
+                                a->slab = s;
+                                s->refs++;
+                                break;
+                        }
+                if (s == slabinfo + slabs)
+                        fatal("Unresolved alias %s\n", a->ref);
+        }
+}
+static void alias(void)
+{
+        struct aliasinfo *a;
+        char *active = NULL;
+        sort_aliases();
+        link_slabs();
+        for(a = aliasinfo; a < aliasinfo + aliases; a++) {
+                if (!show_single_ref && a->slab->refs == 1)
+                        continue;
+                if (!show_inverted) {
+                        if (active) {
+                                if (strcmp(a->slab->name, active) == 0) {
+                                        printf(" %s", a->name);
+                                        continue;
+                                }
+                        }
+                        printf("\n%-12s <- %s", a->slab->name, a->name);
+                        active = a->slab->name;
+                }
+                else
+                        printf("%-20s -> %s\n", a->name, a->slab->name);
+        }
+        if (active)
+                printf("\n");
+}
+static void rename_slabs(void)
+{
+        struct slabinfo *s;
+        struct aliasinfo *a;
+        for (s = slabinfo; s < slabinfo + slabs; s++) {
+                if (*s->name != ':')
+                        continue;
+                if (s->refs > 1 && !show_first_alias)
+                        continue;
+                a = find_one_alias(s);
+                if (a)
+                        s->name = a->name;
+                else {
+                        s->name = "*";
+                        actual_slabs--;
+                }
+        }
+}
+static int slab_mismatch(char *slab)
+{
+        return regexec(&pattern, slab, 0, NULL, 0);
+}
+static void read_slab_dir(void)
+{
+        DIR *dir;
+        struct dirent *de;
+        struct slabinfo *slab = slabinfo;
+        struct aliasinfo *alias = aliasinfo;
+        char *p;
+        char *t;
+        int count;
+        if (chdir("/sys/kernel/slab") && chdir("/sys/slab"))
+                fatal("SYSFS support for SLUB not active\n");
+        dir = opendir(".");
+        while ((de = readdir(dir))) {
+                if (de->d_name[0] == '.' ||
+                        (de->d_name[0] != ':' && slab_mismatch(de->d_name)))
+                                continue;
+                switch (de->d_type) {
+                   case DT_LNK:
+                        alias->name = strdup(de->d_name);
+                        count = readlink(de->d_name, buffer, sizeof(buffer));
+                        if (count < 0)
+                                fatal("Cannot read symlink %s\n", de->d_name);
+                        buffer[count] = 0;
+                        p = buffer + count;
+                        while (p > buffer && p[-1] != '/')
+                                p--;
+                        alias->ref = strdup(p);
+                        alias++;
+                        break;
+                   case DT_DIR:
+                        if (chdir(de->d_name))
+                                fatal("Unable to access slab %s\n", slab->name);
+                        slab->name = strdup(de->d_name);
+                        slab->alias = 0;
+                        slab->refs = 0;
+                        slab->aliases = get_obj("aliases");
+                        slab->align = get_obj("align");
+                        slab->cache_dma = get_obj("cache_dma");
+                        slab->cpu_slabs = get_obj("cpu_slabs");
+                        slab->destroy_by_rcu = get_obj("destroy_by_rcu");
+                        slab->hwcache_align = get_obj("hwcache_align");
+                        slab->object_size = get_obj("object_size");
+                        slab->objects = get_obj("objects");
+                        slab->objects_partial = get_obj("objects_partial");
+                        slab->objects_total = get_obj("objects_total");
+                        slab->objs_per_slab = get_obj("objs_per_slab");
+                        slab->order = get_obj("order");
+                        slab->partial = get_obj("partial");
+                        slab->partial = get_obj_and_str("partial", &t);
+                        decode_numa_list(slab->numa_partial, t);
+                        free(t);
+                        slab->poison = get_obj("poison");
+                        slab->reclaim_account = get_obj("reclaim_account");
+                        slab->red_zone = get_obj("red_zone");
+                        slab->sanity_checks = get_obj("sanity_checks");
+                        slab->slab_size = get_obj("slab_size");
+                        slab->slabs = get_obj_and_str("slabs", &t);
+                        decode_numa_list(slab->numa, t);
+                        free(t);
+                        slab->store_user = get_obj("store_user");
+                        slab->trace = get_obj("trace");
+                        slab->alloc_fastpath = get_obj("alloc_fastpath");
+                        slab->alloc_slowpath = get_obj("alloc_slowpath");
+                        slab->free_fastpath = get_obj("free_fastpath");
+                        slab->free_slowpath = get_obj("free_slowpath");
+                        slab->free_frozen= get_obj("free_frozen");
+                        slab->free_add_partial = get_obj("free_add_partial");
+                        slab->free_remove_partial = get_obj("free_remove_partial");
+                        slab->alloc_from_partial = get_obj("alloc_from_partial");
+                        slab->alloc_slab = get_obj("alloc_slab");
+                        slab->alloc_refill = get_obj("alloc_refill");
+                        slab->free_slab = get_obj("free_slab");
+                        slab->cpuslab_flush = get_obj("cpuslab_flush");
+                        slab->deactivate_full = get_obj("deactivate_full");
+                        slab->deactivate_empty = get_obj("deactivate_empty");
+                        slab->deactivate_to_head = get_obj("deactivate_to_head");
+                        slab->deactivate_to_tail = get_obj("deactivate_to_tail");
+                        slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
+                        slab->order_fallback = get_obj("order_fallback");
+                        chdir("..");
+                        if (slab->name[0] == ':')
+                                alias_targets++;
+                        slab++;
+                        break;
+                   default :
+                        fatal("Unknown file type %lx\n", de->d_type);
+                }
+        }
+        closedir(dir);
+        slabs = slab - slabinfo;
+        actual_slabs = slabs;
+        aliases = alias - aliasinfo;
+        if (slabs > MAX_SLABS)
+                fatal("Too many slabs\n");
+        if (aliases > MAX_ALIASES)
+                fatal("Too many aliases\n");
+}
+static void output_slabs(void)
+{
+        struct slabinfo *slab;
+        for (slab = slabinfo; slab < slabinfo + slabs; slab++) {
+                if (slab->alias)
+                        continue;
+                if (show_numa)
+                        slab_numa(slab, 0);
+                else if (show_track)
+                        show_tracking(slab);
+                else if (validate)
+                        slab_validate(slab);
+                else if (shrink)
+                        slab_shrink(slab);
+                else if (set_debug)
+                        slab_debug(slab);
+                else if (show_ops)
+                        ops(slab);
+                else if (show_slab)
+                        slabcache(slab);
+                else if (show_report)
+                        report(slab);
+        }
+}
+struct option opts[] = {
+        { "aliases", 0, NULL, 'a' },
+        { "activity", 0, NULL, 'A' },
+        { "debug", 2, NULL, 'd' },
+        { "display-activity", 0, NULL, 'D' },
+        { "empty", 0, NULL, 'e' },
+        { "first-alias", 0, NULL, 'f' },
+        { "help", 0, NULL, 'h' },
+        { "inverted", 0, NULL, 'i'},
+        { "numa", 0, NULL, 'n' },
+        { "ops", 0, NULL, 'o' },
+        { "report", 0, NULL, 'r' },
+        { "shrink", 0, NULL, 's' },
+        { "slabs", 0, NULL, 'l' },
+        { "track", 0, NULL, 't'},
+        { "validate", 0, NULL, 'v' },
+        { "zero", 0, NULL, 'z' },
+        { "1ref", 0, NULL, '1'},
+        { NULL, 0, NULL, 0 }
+};
+int main(int argc, char *argv[])
+{
+        int c;
+        int err;
+        char *pattern_source;
+        page_size = getpagesize();
+        while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS",
+                                                opts, NULL)) != -1)
+                switch (c) {
+                case '1':
+                        show_single_ref = 1;
+                        break;
+                case 'a':
+                        show_alias = 1;
+                        break;
+                case 'A':
+                        sort_active = 1;
+                        break;
+                case 'd':
+                        set_debug = 1;
+                        if (!debug_opt_scan(optarg))
+                                fatal("Invalid debug option '%s'\n", optarg);
+                        break;
+                case 'D':
+                        show_activity = 1;
+                        break;
+                case 'e':
+                        show_empty = 1;
+                        break;
+                case 'f':
+                        show_first_alias = 1;
+                        break;
+                case 'h':
+                        usage();
+                        return 0;
+                case 'i':
+                        show_inverted = 1;
+                        break;
+                case 'n':
+                        show_numa = 1;
+                        break;
+                case 'o':
+                        show_ops = 1;
+                        break;
+                case 'r':
+                        show_report = 1;
+                        break;
+                case 's':
+                        shrink = 1;
+                        break;
+                case 'l':
+                        show_slab = 1;
+                        break;
+                case 't':
+                        show_track = 1;
+                        break;
+                case 'v':
+                        validate = 1;
+                        break;
+                case 'z':
+                        skip_zero = 0;
+                        break;
+                case 'T':
+                        show_totals = 1;
+                        break;
+                case 'S':
+                        sort_size = 1;
+                        break;
+                default:
+                        fatal("%s: Invalid option '%c'\n", argv[0], optopt);
+        }
+        if (!show_slab && !show_alias && !show_track && !show_report
+                && !validate && !shrink && !set_debug && !show_ops)
+                        show_slab = 1;
+        if (argc > optind)
+                pattern_source = argv[optind];
+        else
+                pattern_source = ".*";
+        err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB);
+        if (err)
+                fatal("%s: Invalid pattern '%s' code %d\n",
+                        argv[0], pattern_source, err);
+        read_slab_dir();
+        if (show_alias)
+                alias();
+        else
+        if (show_totals)
+                totals();
+        else {
+                link_slabs();
+                rename_slabs();
+                sort_slabs();
+                output_slabs();
+        }
+        return 0;
+}
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
new file mode 100755
index 000000000000..9a571e71683c
--- /dev/null
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@@ -0,0 +1,30 @@
+#!/usr/bin/perl
+open (IN,"ktest.pl");
+while (<IN>) {
+    if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
+        /set_test_option\("(.*?)"/) {
+        $opt{$1} = 1;
+    }
+}
+close IN;
+open (IN, "sample.conf");
+while (<IN>) {
+    if (/^\s*#?\s*(\S+)\s*=/) {
+        $samp{$1} = 1;
+    }
+}
+close IN;
+foreach $opt (keys %opt) {
+    if (!defined($samp{$opt})) {
+        print "opt = $opt\n";
+    }
+}
+foreach $samp (keys %samp) {
+    if (!defined($opt{$samp})) {
+        print "samp = $samp\n";
+    }
+}
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
new file mode 100755
index 000000000000..e1c62eeb88f5
--- /dev/null
+++ b/tools/testing/ktest/ktest.pl
@@ -0,0 +1,2023 @@
+#!/usr/bin/perl -w
+#
+# Copywrite 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+# Licensed under the terms of the GNU GPL License version 2
+#
+use strict;
+use IPC::Open2;
+use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
+use File::Path qw(mkpath);
+use File::Copy qw(cp);
+use FileHandle;
+my $VERSION = "0.2";
+$| = 1;
+my %opt;
+my %repeat_tests;
+my %repeats;
+my %default;
+#default opts
+$default{"NUM_TESTS"}           = 1;
+$default{"REBOOT_TYPE"}         = "grub";
+$default{"TEST_TYPE"}           = "test";
+$default{"BUILD_TYPE"}          = "randconfig";
+$default{"MAKE_CMD"}            = "make";
+$default{"TIMEOUT"}             = 120;
+$default{"TMP_DIR"}             = "/tmp/ktest";
+$default{"SLEEP_TIME"}          = 60;   # sleep time between tests
+$default{"BUILD_NOCLEAN"}       = 0;
+$default{"REBOOT_ON_ERROR"}     = 0;
+$default{"POWEROFF_ON_ERROR"}   = 0;
+$default{"REBOOT_ON_SUCCESS"}   = 1;
+$default{"POWEROFF_ON_SUCCESS"} = 0;
+$default{"BUILD_OPTIONS"}       = "";
+$default{"BISECT_SLEEP_TIME"}   = 60;   # sleep time between bisects
+$default{"CLEAR_LOG"}           = 0;
+$default{"SUCCESS_LINE"}        = "login:";
+$default{"BOOTED_TIMEOUT"}      = 1;
+$default{"DIE_ON_FAILURE"}      = 1;
+$default{"SSH_EXEC"}            = "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND";
+$default{"SCP_TO_TARGET"}       = "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE";
+$default{"REBOOT"}              = "ssh \$SSH_USER\@\$MACHINE reboot";
+$default{"STOP_AFTER_SUCCESS"}  = 10;
+$default{"STOP_AFTER_FAILURE"}  = 60;
+$default{"LOCALVERSION"}        = "-test";
+my $ktest_config;
+my $version;
+my $machine;
+my $ssh_user;
+my $tmpdir;
+my $builddir;
+my $outputdir;
+my $output_config;
+my $test_type;
+my $build_type;
+my $build_options;
+my $reboot_type;
+my $reboot_script;
+my $power_cycle;
+my $reboot;
+my $reboot_on_error;
+my $poweroff_on_error;
+my $die_on_failure;
+my $powercycle_after_reboot;
+my $poweroff_after_halt;
+my $ssh_exec;
+my $scp_to_target;
+my $power_off;
+my $grub_menu;
+my $grub_number;
+my $target;
+my $make;
+my $post_install;
+my $noclean;
+my $minconfig;
+my $addconfig;
+my $in_bisect = 0;
+my $bisect_bad = "";
+my $reverse_bisect;
+my $in_patchcheck = 0;
+my $run_test;
+my $redirect;
+my $buildlog;
+my $dmesg;
+my $monitor_fp;
+my $monitor_pid;
+my $monitor_cnt = 0;
+my $sleep_time;
+my $bisect_sleep_time;
+my $store_failures;
+my $timeout;
+my $booted_timeout;
+my $console;
+my $success_line;
+my $stop_after_success;
+my $stop_after_failure;
+my $build_target;
+my $target_image;
+my $localversion;
+my $iteration = 0;
+my $successes = 0;
+my %entered_configs;
+my %config_help;
+$config_help{"MACHINE"} = << "EOF"
+ The machine hostname that you will test.
+EOF
+    ;
+$config_help{"SSH_USER"} = << "EOF"
+ The box is expected to have ssh on normal bootup, provide the user
+  (most likely root, since you need privileged operations)
+EOF
+    ;
+$config_help{"BUILD_DIR"} = << "EOF"
+ The directory that contains the Linux source code (full path).
+EOF
+    ;
+$config_help{"OUTPUT_DIR"} = << "EOF"
+ The directory that the objects will be built (full path).
+ (can not be same as BUILD_DIR)
+EOF
+    ;
+$config_help{"BUILD_TARGET"} = << "EOF"
+ The location of the compiled file to copy to the target.
+ (relative to OUTPUT_DIR)
+EOF
+    ;
+$config_help{"TARGET_IMAGE"} = << "EOF"
+ The place to put your image on the test machine.
+EOF
+    ;
+$config_help{"POWER_CYCLE"} = << "EOF"
+ A script or command to reboot the box.
+ Here is a digital loggers power switch example
+ POWER_CYCLE = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin\@power/outlet?5=CCL'
+ Here is an example to reboot a virtual box on the current host
+ with the name "Guest".
+ POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+EOF
+    ;
+$config_help{"CONSOLE"} = << "EOF"
+ The script or command that reads the console
+  If you use ttywatch server, something like the following would work.
+CONSOLE = nc -d localhost 3001
+ For a virtual machine with guest name "Guest".
+CONSOLE =  virsh console Guest
+EOF
+    ;
+$config_help{"LOCALVERSION"} = << "EOF"
+ Required version ending to differentiate the test
+ from other linux builds on the system.
+EOF
+    ;
+$config_help{"REBOOT_TYPE"} = << "EOF"
+ Way to reboot the box to the test kernel.
+ Only valid options so far are "grub" and "script".
+ If you specify grub, it will assume grub version 1
+ and will search in /boot/grub/menu.lst for the title \$GRUB_MENU
+ and select that target to reboot to the kernel. If this is not
+ your setup, then specify "script" and have a command or script
+ specified in REBOOT_SCRIPT to boot to the target.
+ The entry in /boot/grub/menu.lst must be entered in manually.
+ The test will not modify that file.
+EOF
+    ;
+$config_help{"GRUB_MENU"} = << "EOF"
+ The grub title name for the test kernel to boot
+ (Only mandatory if REBOOT_TYPE = grub)
+ Note, ktest.pl will not update the grub menu.lst, you need to
+ manually add an option for the test. ktest.pl will search
+ the grub menu.lst for this option to find what kernel to
+ reboot into.
+ For example, if in the /boot/grub/menu.lst the test kernel title has:
+ title Test Kernel
+ kernel vmlinuz-test
+ GRUB_MENU = Test Kernel
+EOF
+    ;
+$config_help{"REBOOT_SCRIPT"} = << "EOF"
+ A script to reboot the target into the test kernel
+ (Only mandatory if REBOOT_TYPE = script)
+EOF
+    ;
+sub get_ktest_config {
+    my ($config) = @_;
+    return if (defined($opt{$config}));
+    if (defined($config_help{$config})) {
+        print "\n";
+        print $config_help{$config};
+    }
+    for (;;) {
+        print "$config = ";
+        if (defined($default{$config})) {
+            print "\[$default{$config}\] ";
+        }
+        $entered_configs{$config} = <STDIN>;
+        $entered_configs{$config} =~ s/^\s*(.*\S)\s*$/$1/;
+        if ($entered_configs{$config} =~ /^\s*$/) {
+            if ($default{$config}) {
+                $entered_configs{$config} = $default{$config};
+            } else {
+                print "Your answer can not be blank\n";
+                next;
+            }
+        }
+        last;
+    }
+}
+sub get_ktest_configs {
+    get_ktest_config("MACHINE");
+    get_ktest_config("SSH_USER");
+    get_ktest_config("BUILD_DIR");
+    get_ktest_config("OUTPUT_DIR");
+    get_ktest_config("BUILD_TARGET");
+    get_ktest_config("TARGET_IMAGE");
+    get_ktest_config("POWER_CYCLE");
+    get_ktest_config("CONSOLE");
+    get_ktest_config("LOCALVERSION");
+    my $rtype = $opt{"REBOOT_TYPE"};
+    if (!defined($rtype)) {
+        if (!defined($opt{"GRUB_MENU"})) {
+            get_ktest_config("REBOOT_TYPE");
+            $rtype = $entered_configs{"REBOOT_TYPE"};
+        } else {
+            $rtype = "grub";
+        }
+    }
+    if ($rtype eq "grub") {
+        get_ktest_config("GRUB_MENU");
+    } else {
+        get_ktest_config("REBOOT_SCRIPT");
+    }
+}
+sub set_value {
+    my ($lvalue, $rvalue) = @_;
+    if (defined($opt{$lvalue})) {
+        die "Error: Option $lvalue defined more than once!\n";
+    }
+    if ($rvalue =~ /^\s*$/) {
+        delete $opt{$lvalue};
+    } else {
+        $opt{$lvalue} = $rvalue;
+    }
+}
+sub read_config {
+    my ($config) = @_;
+    open(IN, $config) || die "can't read file $config";
+    my $name = $config;
+    $name =~ s,.*/(.*),$1,;
+    my $test_num = 0;
+    my $default = 1;
+    my $repeat = 1;
+    my $num_tests_set = 0;
+    my $skip = 0;
+    my $rest;
+    while (<IN>) {
+        # ignore blank lines and comments
+        next if (/^\s*$/ || /\s*\#/);
+        if (/^\s*TEST_START(.*)/) {
+            $rest = $1;
+            if ($num_tests_set) {
+                die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+            }
+            my $old_test_num = $test_num;
+            my $old_repeat = $repeat;
+            $test_num += $repeat;
+            $default = 0;
+            $repeat = 1;
+            if ($rest =~ /\s+SKIP(.*)/) {
+                $rest = $1;
+                $skip = 1;
+            } else {
+                $skip = 0;
+            }
+            if ($rest =~ /\s+ITERATE\s+(\d+)(.*)$/) {
+                $repeat = $1;
+                $rest = $2;
+                $repeat_tests{"$test_num"} = $repeat;
+            }
+            if ($rest =~ /\s+SKIP(.*)/) {
+                $rest = $1;
+                $skip = 1;
+            }
+            if ($rest !~ /^\s*$/) {
+                die "$name: $.: Gargbage found after TEST_START\n$_";
+            }
+            if ($skip) {
+                $test_num = $old_test_num;
+                $repeat = $old_repeat;
+            }
+        } elsif (/^\s*DEFAULTS(.*)$/) {
+            $default = 1;
+            $rest = $1;
+            if ($rest =~ /\s+SKIP(.*)/) {
+                $rest = $1;
+                $skip = 1;
+            } else {
+                $skip = 0;
+            }
+            if ($rest !~ /^\s*$/) {
+                die "$name: $.: Gargbage found after DEFAULTS\n$_";
+            }
+        } elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+            next if ($skip);
+            my $lvalue = $1;
+            my $rvalue = $2;
+            if (!$default &&
+                ($lvalue eq "NUM_TESTS" ||
+                 $lvalue eq "LOG_FILE" ||
+                 $lvalue eq "CLEAR_LOG")) {
+                die "$name: $.: $lvalue must be set in DEFAULTS section\n";
+            }
+            if ($lvalue eq "NUM_TESTS") {
+                if ($test_num) {
+                    die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+                }
+                if (!$default) {
+                    die "$name: $.: NUM_TESTS must be set in default section\n";
+                }
+                $num_tests_set = 1;
+            }
+            if ($default || $lvalue =~ /\[\d+\]$/) {
+                set_value($lvalue, $rvalue);
+            } else {
+                my $val = "$lvalue\[$test_num\]";
+                set_value($val, $rvalue);
+                if ($repeat > 1) {
+                    $repeats{$val} = $repeat;
+                }
+            }
+        } else {
+            die "$name: $.: Garbage found in config\n$_";
+        }
+    }
+    close(IN);
+    if ($test_num) {
+        $test_num += $repeat - 1;
+        $opt{"NUM_TESTS"} = $test_num;
+    }
+    # make sure we have all mandatory configs
+    get_ktest_configs;
+    # set any defaults
+    foreach my $default (keys %default) {
+        if (!defined($opt{$default})) {
+            $opt{$default} = $default{$default};
+        }
+    }
+}
+sub _logit {
+    if (defined($opt{"LOG_FILE"})) {
+        open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+        print OUT @_;
+        close(OUT);
+    }
+}
+sub logit {
+    if (defined($opt{"LOG_FILE"})) {
+        _logit @_;
+    } else {
+        print @_;
+    }
+}
+sub doprint {
+    print @_;
+    _logit @_;
+}
+sub run_command;
+sub reboot {
+    # try to reboot normally
+    if (run_command $reboot) {
+        if (defined($powercycle_after_reboot)) {
+            sleep $powercycle_after_reboot;
+            run_command "$power_cycle";
+        }
+    } else {
+        # nope? power cycle it.
+        run_command "$power_cycle";
+    }
+}
+sub do_not_reboot {
+    my $i = $iteration;
+    return $test_type eq "build" ||
+        ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
+        ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
+}
+sub dodie {
+    doprint "CRITICAL FAILURE... ", @_, "\n";
+    my $i = $iteration;
+    if ($reboot_on_error && !do_not_reboot) {
+        doprint "REBOOTING\n";
+        reboot;
+    } elsif ($poweroff_on_error && defined($power_off)) {
+        doprint "POWERING OFF\n";
+        `$power_off`;
+    }
+    die @_, "\n";
+}
+sub open_console {
+    my ($fp) = @_;
+    my $flags;
+    my $pid = open($fp, "$console|") or
+        dodie "Can't open console $console";
+    $flags = fcntl($fp, F_GETFL, 0) or
+        dodie "Can't get flags for the socket: $!";
+    $flags = fcntl($fp, F_SETFL, $flags | O_NONBLOCK) or
+        dodie "Can't set flags for the socket: $!";
+    return $pid;
+}
+sub close_console {
+    my ($fp, $pid) = @_;
+    doprint "kill child process $pid\n";
+    kill 2, $pid;
+    print "closing!\n";
+    close($fp);
+}
+sub start_monitor {
+    if ($monitor_cnt++) {
+        return;
+    }
+    $monitor_fp = \*MONFD;
+    $monitor_pid = open_console $monitor_fp;
+    return;
+    open(MONFD, "Stop perl from warning about single use of MONFD");
+}
+sub end_monitor {
+    if (--$monitor_cnt) {
+        return;
+    }
+    close_console($monitor_fp, $monitor_pid);
+}
+sub wait_for_monitor {
+    my ($time) = @_;
+    my $line;
+    doprint "** Wait for monitor to settle down **\n";
+    # read the monitor and wait for the system to calm down
+    do {
+        $line = wait_for_input($monitor_fp, $time);
+        print "$line" if (defined($line));
+    } while (defined($line));
+    print "** Monitor flushed **\n";
+}
+sub fail {
+        if ($die_on_failure) {
+                dodie @_;
+        }
+        doprint "FAILED\n";
+        my $i = $iteration;
+        # no need to reboot for just building.
+        if (!do_not_reboot) {
+            doprint "REBOOTING\n";
+            reboot;
+            start_monitor;
+            wait_for_monitor $sleep_time;
+            end_monitor;
+        }
+        doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+        doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+        doprint "KTEST RESULT: TEST $i Failed: ", @_, "\n";
+        doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+        doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+        return 1 if (!defined($store_failures));
+        my @t = localtime;
+        my $date = sprintf "%04d%02d%02d%02d%02d%02d",
+                1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
+        my $type = $build_type;
+        if ($type =~ /useconfig/) {
+            $type = "useconfig";
+        }
+        my $dir = "$machine-$test_type-$type-fail-$date";
+        my $faildir = "$store_failures/$dir";
+        if (!-d $faildir) {
+            mkpath($faildir) or
+                die "can't create $faildir";
+        }
+        if (-f "$output_config") {
+            cp "$output_config", "$faildir/config" or
+                die "failed to copy .config";
+        }
+        if (-f $buildlog) {
+            cp $buildlog, "$faildir/buildlog" or
+                die "failed to move $buildlog";
+        }
+        if (-f $dmesg) {
+            cp $dmesg, "$faildir/dmesg" or
+                die "failed to move $dmesg";
+        }
+        doprint "*** Saved info to $faildir ***\n";
+        return 1;
+}
+sub run_command {
+    my ($command) = @_;
+    my $dolog = 0;
+    my $dord = 0;
+    my $pid;
+    $command =~ s/\$SSH_USER/$ssh_user/g;
+    $command =~ s/\$MACHINE/$machine/g;
+    doprint("$command ... ");
+    $pid = open(CMD, "$command 2>&1 |") or
+        (fail "unable to exec $command" and return 0);
+    if (defined($opt{"LOG_FILE"})) {
+        open(LOG, ">>$opt{LOG_FILE}") or
+            dodie "failed to write to log";
+        $dolog = 1;
+    }
+    if (defined($redirect)) {
+        open (RD, ">$redirect") or
+            dodie "failed to write to redirect $redirect";
+        $dord = 1;
+    }
+    while (<CMD>) {
+        print LOG if ($dolog);
+        print RD  if ($dord);
+    }
+    waitpid($pid, 0);
+    my $failed = $?;
+    close(CMD);
+    close(LOG) if ($dolog);
+    close(RD)  if ($dord);
+    if ($failed) {
+        doprint "FAILED!\n";
+    } else {
+        doprint "SUCCESS\n";
+    }
+    return !$failed;
+}
+sub run_ssh {
+    my ($cmd) = @_;
+    my $cp_exec = $ssh_exec;
+    $cp_exec =~ s/\$SSH_COMMAND/$cmd/g;
+    return run_command "$cp_exec";
+}
+sub run_scp {
+    my ($src, $dst) = @_;
+    my $cp_scp = $scp_to_target;
+    $cp_scp =~ s/\$SRC_FILE/$src/g;
+    $cp_scp =~ s/\$DST_FILE/$dst/g;
+    return run_command "$cp_scp";
+}
+sub get_grub_index {
+    if ($reboot_type ne "grub") {
+        return;
+    }
+    return if (defined($grub_number));
+    doprint "Find grub menu ... ";
+    $grub_number = -1;
+    my $ssh_grub = $ssh_exec;
+    $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
+    open(IN, "$ssh_grub |")
+        or die "unable to get menu.lst";
+    while (<IN>) {
+        if (/^\s*title\s+$grub_menu\s*$/) {
+            $grub_number++;
+            last;
+        } elsif (/^\s*title\s/) {
+            $grub_number++;
+        }
+    }
+    close(IN);
+    die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+        if ($grub_number < 0);
+    doprint "$grub_number\n";
+}
+sub wait_for_input
+{
+    my ($fp, $time) = @_;
+    my $rin;
+    my $ready;
+    my $line;
+    my $ch;
+    if (!defined($time)) {
+        $time = $timeout;
+    }
+    $rin = '';
+    vec($rin, fileno($fp), 1) = 1;
+    $ready = select($rin, undef, undef, $time);
+    $line = "";
+    # try to read one char at a time
+    while (sysread $fp, $ch, 1) {
+        $line .= $ch;
+        last if ($ch eq "\n");
+    }
+    if (!length($line)) {
+        return undef;
+    }
+    return $line;
+}
+sub reboot_to {
+    if ($reboot_type eq "grub") {
+        run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch; reboot)'";
+        return;
+    }
+    run_command "$reboot_script";
+}
+sub get_sha1 {
+    my ($commit) = @_;
+    doprint "git rev-list --max-count=1 $commit ... ";
+    my $sha1 = `git rev-list --max-count=1 $commit`;
+    my $ret = $?;
+    logit $sha1;
+    if ($ret) {
+        doprint "FAILED\n";
+        dodie "Failed to get git $commit";
+    }
+    print "SUCCESS\n";
+    chomp $sha1;
+    return $sha1;
+}
+sub monitor {
+    my $booted = 0;
+    my $bug = 0;
+    my $skip_call_trace = 0;
+    my $loops;
+    wait_for_monitor 5;
+    my $line;
+    my $full_line = "";
+    open(DMESG, "> $dmesg") or
+        die "unable to write to $dmesg";
+    reboot_to;
+    my $success_start;
+    my $failure_start;
+    for (;;) {
+        if ($booted) {
+            $line = wait_for_input($monitor_fp, $booted_timeout);
+        } else {
+            $line = wait_for_input($monitor_fp);
+        }
+        last if (!defined($line));
+        doprint $line;
+        print DMESG $line;
+        # we are not guaranteed to get a full line
+        $full_line .= $line;
+        if ($full_line =~ /$success_line/) {
+            $booted = 1;
+            $success_start = time;
+        }
+        if ($booted && defined($stop_after_success) &&
+            $stop_after_success >= 0) {
+            my $now = time;
+            if ($now - $success_start >= $stop_after_success) {
+                doprint "Test forced to stop after $stop_after_success seconds after success\n";
+                last;
+            }
+        }
+        if ($full_line =~ /\[ backtrace testing \]/) {
+            $skip_call_trace = 1;
+        }
+        if ($full_line =~ /call trace:/i) {
+            if (!$skip_call_trace) {
+                $bug = 1;
+                $failure_start = time;
+            }
+        }
+        if ($bug && defined($stop_after_failure) &&
+            $stop_after_failure >= 0) {
+            my $now = time;
+            if ($now - $failure_start >= $stop_after_failure) {
+                doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+                last;
+            }
+        }
+        if ($full_line =~ /\[ end of backtrace testing \]/) {
+            $skip_call_trace = 0;
+        }
+        if ($full_line =~ /Kernel panic -/) {
+            $bug = 1;
+        }
+        if ($line =~ /\n/) {
+            $full_line = "";
+        }
+    }
+    close(DMESG);
+    if ($bug) {
+        return 0 if ($in_bisect);
+        fail "failed - got a bug report" and return 0;
+    }
+    if (!$booted) {
+        return 0 if ($in_bisect);
+        fail "failed - never got a boot prompt." and return 0;
+    }
+    return 1;
+}
+sub install {
+    run_scp "$outputdir/$build_target", "$target_image" or
+        dodie "failed to copy image";
+    my $install_mods = 0;
+    # should we process modules?
+    $install_mods = 0;
+    open(IN, "$output_config") or dodie("Can't read config file");
+    while (<IN>) {
+        if (/CONFIG_MODULES(=y)?/) {
+            $install_mods = 1 if (defined($1));
+            last;
+        }
+    }
+    close(IN);
+    if (!$install_mods) {
+        doprint "No modules needed\n";
+        return;
+    }
+    run_command "$make INSTALL_MOD_PATH=$tmpdir modules_install" or
+        dodie "Failed to install modules";
+    my $modlib = "/lib/modules/$version";
+    my $modtar = "ktest-mods.tar.bz2";
+    run_ssh "rm -rf $modlib" or
+        dodie "failed to remove old mods: $modlib";
+    # would be nice if scp -r did not follow symbolic links
+    run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or
+        dodie "making tarball";
+    run_scp "$tmpdir/$modtar", "/tmp" or
+        dodie "failed to copy modules";
+    unlink "$tmpdir/$modtar";
+    run_ssh "'(cd / && tar xf /tmp/$modtar)'" or
+        dodie "failed to tar modules";
+    run_ssh "rm -f /tmp/$modtar";
+    return if (!defined($post_install));
+    my $cp_post_install = $post_install;
+    $cp_post_install = s/\$KERNEL_VERSION/$version/g;
+    run_command "$cp_post_install" or
+        dodie "Failed to run post install";
+}
+sub check_buildlog {
+    my ($patch) = @_;
+    my @files = `git show $patch | diffstat -l`;
+    open(IN, "git show $patch |") or
+        dodie "failed to show $patch";
+    while (<IN>) {
+        if (m,^--- a/(.*),) {
+            chomp $1;
+            $files[$#files] = $1;
+        }
+    }
+    close(IN);
+    open(IN, $buildlog) or dodie "Can't open $buildlog";
+    while (<IN>) {
+        if (/^\s*(.*?):.*(warning|error)/) {
+            my $err = $1;
+            foreach my $file (@files) {
+                my $fullpath = "$builddir/$file";
+                if ($file eq $err || $fullpath eq $err) {
+                    fail "$file built with warnings" and return 0;
+                }
+            }
+        }
+    }
+    close(IN);
+    return 1;
+}
+sub build {
+    my ($type) = @_;
+    my $defconfig = "";
+    unlink $buildlog;
+    if ($type =~ /^useconfig:(.*)/) {
+        run_command "cp $1 $output_config" or
+            dodie "could not copy $1 to .config";
+        $type = "oldconfig";
+    }
+    # old config can ask questions
+    if ($type eq "oldconfig") {
+        $type = "oldnoconfig";
+        # allow for empty configs
+        run_command "touch $output_config";
+        run_command "mv $output_config $outputdir/config_temp" or
+            dodie "moving .config";
+        if (!$noclean && !run_command "$make mrproper") {
+            dodie "make mrproper";
+        }
+        run_command "mv $outputdir/config_temp $output_config" or
+            dodie "moving config_temp";
+    } elsif (!$noclean) {
+        unlink "$output_config";
+        run_command "$make mrproper" or
+            dodie "make mrproper";
+    }
+    # add something to distinguish this build
+    open(OUT, "> $outputdir/localversion") or dodie("Can't make localversion file");
+    print OUT "$localversion\n";
+    close(OUT);
+    if (defined($minconfig)) {
+        $defconfig = "KCONFIG_ALLCONFIG=$minconfig";
+    }
+    run_command "$defconfig $make $type" or
+        dodie "failed make config";
+    $redirect = "$buildlog";
+    if (!run_command "$make $build_options") {
+        undef $redirect;
+        # bisect may need this to pass
+        return 0 if ($in_bisect);
+        fail "failed build" and return 0;
+    }
+    undef $redirect;
+    return 1;
+}
+sub halt {
+    if (!run_ssh "halt" or defined($power_off)) {
+        if (defined($poweroff_after_halt)) {
+            sleep $poweroff_after_halt;
+            run_command "$power_off";
+        }
+    } else {
+        # nope? the zap it!
+        run_command "$power_off";
+    }
+}
+sub success {
+    my ($i) = @_;
+    $successes++;
+    doprint "\n\n*******************************************\n";
+    doprint     "*******************************************\n";
+    doprint     "KTEST RESULT: TEST $i SUCCESS!!!!         **\n";
+    doprint     "*******************************************\n";
+    doprint     "*******************************************\n";
+    if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
+        doprint "Reboot and wait $sleep_time seconds\n";
+        reboot;
+        start_monitor;
+        wait_for_monitor $sleep_time;
+        end_monitor;
+    }
+}
+sub get_version {
+    # get the release name
+    doprint "$make kernelrelease ... ";
+    $version = `$make kernelrelease | tail -1`;
+    chomp($version);
+    doprint "$version\n";
+}
+sub child_run_test {
+    my $failed = 0;
+    # child should have no power
+    $reboot_on_error = 0;
+    $poweroff_on_error = 0;
+    $die_on_failure = 1;
+    run_command $run_test or $failed = 1;
+    exit $failed;
+}
+my $child_done;
+sub child_finished {
+    $child_done = 1;
+}
+sub do_run_test {
+    my $child_pid;
+    my $child_exit;
+    my $line;
+    my $full_line;
+    my $bug = 0;
+    wait_for_monitor 1;
+    doprint "run test $run_test\n";
+    $child_done = 0;
+    $SIG{CHLD} = qw(child_finished);
+    $child_pid = fork;
+    child_run_test if (!$child_pid);
+    $full_line = "";
+    do {
+        $line = wait_for_input($monitor_fp, 1);
+        if (defined($line)) {
+            # we are not guaranteed to get a full line
+            $full_line .= $line;
+            if ($full_line =~ /call trace:/i) {
+                $bug = 1;
+            }
+            if ($full_line =~ /Kernel panic -/) {
+                $bug = 1;
+            }
+            if ($line =~ /\n/) {
+                $full_line = "";
+            }
+        }
+    } while (!$child_done && !$bug);
+    if ($bug) {
+        doprint "Detected kernel crash!\n";
+        # kill the child with extreme prejudice
+        kill 9, $child_pid;
+    }
+    waitpid $child_pid, 0;
+    $child_exit = $?;
+    if ($bug || $child_exit) {
+        return 0 if $in_bisect;
+        fail "test failed" and return 0;
+    }
+    return 1;
+}
+sub run_git_bisect {
+    my ($command) = @_;
+    doprint "$command ... ";
+    my $output = `$command 2>&1`;
+    my $ret = $?;
+    logit $output;
+    if ($ret) {
+        doprint "FAILED\n";
+        dodie "Failed to git bisect";
+    }
+    doprint "SUCCESS\n";
+    if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) {
+        doprint "$1 [$2]\n";
+    } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) {
+        $bisect_bad = $1;
+        doprint "Found bad commit... $1\n";
+        return 0;
+    } else {
+        # we already logged it, just print it now.
+        print $output;
+    }
+    return 1;
+}
+# returns 1 on success, 0 on failure
+sub run_bisect_test {
+    my ($type, $buildtype) = @_;
+    my $failed = 0;
+    my $result;
+    my $output;
+    my $ret;
+    $in_bisect = 1;
+    build $buildtype or $failed = 1;
+    if ($type ne "build") {
+        dodie "Failed on build" if $failed;
+        # Now boot the box
+        get_grub_index;
+        get_version;
+        install;
+        start_monitor;
+        monitor or $failed = 1;
+        if ($type ne "boot") {
+            dodie "Failed on boot" if $failed;
+            do_run_test or $failed = 1;
+        }
+        end_monitor;
+    }
+    if ($failed) {
+        $result = 0;
+        # reboot the box to a good kernel
+        if ($type ne "build") {
+            doprint "Reboot and sleep $bisect_sleep_time seconds\n";
+            reboot;
+            start_monitor;
+            wait_for_monitor $bisect_sleep_time;
+            end_monitor;
+        }
+    } else {
+        $result = 1;
+    }
+    $in_bisect = 0;
+    return $result;
+}
+sub run_bisect {
+    my ($type) = @_;
+    my $buildtype = "oldconfig";
+    # We should have a minconfig to use?
+    if (defined($minconfig)) {
+        $buildtype = "useconfig:$minconfig";
+    }
+    my $ret = run_bisect_test $type, $buildtype;
+    # Are we looking for where it worked, not failed?
+    if ($reverse_bisect) {
+        $ret = !$ret;
+    }
+    if ($ret) {
+        return "good";
+    } else {
+        return  "bad";
+    }
+}
+sub bisect {
+    my ($i) = @_;
+    my $result;
+    die "BISECT_GOOD[$i] not defined\n" if (!defined($opt{"BISECT_GOOD[$i]"}));
+    die "BISECT_BAD[$i] not defined\n"  if (!defined($opt{"BISECT_BAD[$i]"}));
+    die "BISECT_TYPE[$i] not defined\n" if (!defined($opt{"BISECT_TYPE[$i]"}));
+    my $good = $opt{"BISECT_GOOD[$i]"};
+    my $bad = $opt{"BISECT_BAD[$i]"};
+    my $type = $opt{"BISECT_TYPE[$i]"};
+    my $start = $opt{"BISECT_START[$i]"};
+    my $replay = $opt{"BISECT_REPLAY[$i]"};
+    # convert to true sha1's
+    $good = get_sha1($good);
+    $bad = get_sha1($bad);
+    if (defined($opt{"BISECT_REVERSE[$i]"}) &&
+        $opt{"BISECT_REVERSE[$i]"} == 1) {
+        doprint "Performing a reverse bisect (bad is good, good is bad!)\n";
+        $reverse_bisect = 1;
+    } else {
+        $reverse_bisect = 0;
+    }
+    # Can't have a test without having a test to run
+    if ($type eq "test" && !defined($run_test)) {
+        $type = "boot";
+    }
+    my $check = $opt{"BISECT_CHECK[$i]"};
+    if (defined($check) && $check ne "0") {
+        # get current HEAD
+        my $head = get_sha1("HEAD");
+        if ($check ne "good") {
+            doprint "TESTING BISECT BAD [$bad]\n";
+            run_command "git checkout $bad" or
+                die "Failed to checkout $bad";
+            $result = run_bisect $type;
+            if ($result ne "bad") {
+                fail "Tested BISECT_BAD [$bad] and it succeeded" and return 0;
+            }
+        }
+        if ($check ne "bad") {
+            doprint "TESTING BISECT GOOD [$good]\n";
+            run_command "git checkout $good" or
+                die "Failed to checkout $good";
+            $result = run_bisect $type;
+            if ($result ne "good") {
+                fail "Tested BISECT_GOOD [$good] and it failed" and return 0;
+            }
+        }
+        # checkout where we started
+        run_command "git checkout $head" or
+            die "Failed to checkout $head";
+    }
+    run_command "git bisect start" or
+        dodie "could not start bisect";
+    run_command "git bisect good $good" or
+        dodie "could not set bisect good to $good";
+    run_git_bisect "git bisect bad $bad" or
+        dodie "could not set bisect bad to $bad";
+    if (defined($replay)) {
+        run_command "git bisect replay $replay" or
+            dodie "failed to run replay";
+    }
+    if (defined($start)) {
+        run_command "git checkout $start" or
+            dodie "failed to checkout $start";
+    }
+    my $test;
+    do {
+        $result = run_bisect $type;
+        $test = run_git_bisect "git bisect $result";
+    } while ($test);
+    run_command "git bisect log" or
+        dodie "could not capture git bisect log";
+    run_command "git bisect reset" or
+        dodie "could not reset git bisect";
+    doprint "Bad commit was [$bisect_bad]\n";
+    success $i;
+}
+my %config_ignore;
+my %config_set;
+my %config_list;
+my %null_config;
+my %dependency;
+sub process_config_ignore {
+    my ($config) = @_;
+    open (IN, $config)
+        or dodie "Failed to read $config";
+    while (<IN>) {
+        if (/^(.*?(CONFIG\S*)(=.*| is not set))/) {
+            $config_ignore{$2} = $1;
+        }
+    }
+    close(IN);
+}
+sub read_current_config {
+    my ($config_ref) = @_;
+    %{$config_ref} = ();
+    undef %{$config_ref};
+    my @key = keys %{$config_ref};
+    if ($#key >= 0) {
+        print "did not delete!\n";
+        exit;
+    }
+    open (IN, "$output_config");
+    while (<IN>) {
+        if (/^(CONFIG\S+)=(.*)/) {
+            ${$config_ref}{$1} = $2;
+        }
+    }
+    close(IN);
+}
+sub get_dependencies {
+    my ($config) = @_;
+    my $arr = $dependency{$config};
+    if (!defined($arr)) {
+        return ();
+    }
+    my @deps = @{$arr};
+    foreach my $dep (@{$arr}) {
+        print "ADD DEP $dep\n";
+        @deps = (@deps, get_dependencies $dep);
+    }
+    return @deps;
+}
+sub create_config {
+    my @configs = @_;
+    open(OUT, ">$output_config") or dodie "Can not write to $output_config";
+    foreach my $config (@configs) {
+        print OUT "$config_set{$config}\n";
+        my @deps = get_dependencies $config;
+        foreach my $dep (@deps) {
+            print OUT "$config_set{$dep}\n";
+        }
+    }
+    foreach my $config (keys %config_ignore) {
+        print OUT "$config_ignore{$config}\n";
+    }
+    close(OUT);
+#    exit;
+    run_command "$make oldnoconfig" or
+        dodie "failed make config oldconfig";
+}
+sub compare_configs {
+    my (%a, %b) = @_;
+    foreach my $item (keys %a) {
+        if (!defined($b{$item})) {
+            print "diff $item\n";
+            return 1;
+        }
+        delete $b{$item};
+    }
+    my @keys = keys %b;
+    if ($#keys) {
+        print "diff2 $keys[0]\n";
+    }
+    return -1 if ($#keys >= 0);
+    return 0;
+}
+sub run_config_bisect_test {
+    my ($type) = @_;
+    return run_bisect_test $type, "oldconfig";
+}
+sub process_passed {
+    my (%configs) = @_;
+    doprint "These configs had no failure: (Enabling them for further compiles)\n";
+    # Passed! All these configs are part of a good compile.
+    # Add them to the min options.
+    foreach my $config (keys %configs) {
+        if (defined($config_list{$config})) {
+            doprint " removing $config\n";
+            $config_ignore{$config} = $config_list{$config};
+            delete $config_list{$config};
+        }
+    }
+    doprint "config copied to $outputdir/config_good\n";
+    run_command "cp -f $output_config $outputdir/config_good";
+}
+sub process_failed {
+    my ($config) = @_;
+    doprint "\n\n***************************************\n";
+    doprint "Found bad config: $config\n";
+    doprint "***************************************\n\n";
+}
+sub run_config_bisect {
+    my @start_list = keys %config_list;
+    if ($#start_list < 0) {
+        doprint "No more configs to test!!!\n";
+        return -1;
+    }
+    doprint "***** RUN TEST ***\n";
+    my $type = $opt{"CONFIG_BISECT_TYPE[$iteration]"};
+    my $ret;
+    my %current_config;
+    my $count = $#start_list + 1;
+    doprint "  $count configs to test\n";
+    my $half = int($#start_list / 2);
+    do {
+        my @tophalf = @start_list[0 .. $half];
+        create_config @tophalf;
+        read_current_config \%current_config;
+        $count = $#tophalf + 1;
+        doprint "Testing $count configs\n";
+        my $found = 0;
+        # make sure we test something
+        foreach my $config (@tophalf) {
+            if (defined($current_config{$config})) {
+                logit " $config\n";
+                $found = 1;
+            }
+        }
+        if (!$found) {
+            # try the other half
+            doprint "Top half produced no set configs, trying bottom half\n";
+            @tophalf = @start_list[$half .. $#start_list];
+            create_config @tophalf;
+            read_current_config \%current_config;
+            foreach my $config (@tophalf) {
+                if (defined($current_config{$config})) {
+                    logit " $config\n";
+                    $found = 1;
+                }
+            }
+            if (!$found) {
+                doprint "Failed: Can't make new config with current configs\n";
+                foreach my $config (@start_list) {
+                    doprint "  CONFIG: $config\n";
+                }
+                return -1;
+            }
+            $count = $#tophalf + 1;
+            doprint "Testing $count configs\n";
+        }
+        $ret = run_config_bisect_test $type;
+        if ($ret) {
+            process_passed %current_config;
+            return 0;
+        }
+        doprint "This config had a failure.\n";
+        doprint "Removing these configs that were not set in this config:\n";
+        doprint "config copied to $outputdir/config_bad\n";
+        run_command "cp -f $output_config $outputdir/config_bad";
+        # A config exists in this group that was bad.
+        foreach my $config (keys %config_list) {
+            if (!defined($current_config{$config})) {
+                doprint " removing $config\n";
+                delete $config_list{$config};
+            }
+        }
+        @start_list = @tophalf;
+        if ($#start_list == 0) {
+            process_failed $start_list[0];
+            return 1;
+        }
+        # remove half the configs we are looking at and see if
+        # they are good.
+        $half = int($#start_list / 2);
+    } while ($half > 0);
+    # we found a single config, try it again
+    my @tophalf = @start_list[0 .. 0];
+    $ret = run_config_bisect_test $type;
+    if ($ret) {
+        process_passed %current_config;
+        return 0;
+    }
+    process_failed $start_list[0];
+    return 1;
+}
+sub config_bisect {
+    my ($i) = @_;
+    my $start_config = $opt{"CONFIG_BISECT[$i]"};
+    my $tmpconfig = "$tmpdir/use_config";
+    # Make the file with the bad config and the min config
+    if (defined($minconfig)) {
+        # read the min config for things to ignore
+        run_command "cp $minconfig $tmpconfig" or
+            dodie "failed to copy $minconfig to $tmpconfig";
+    } else {
+        unlink $tmpconfig;
+    }
+    # Add other configs
+    if (defined($addconfig)) {
+        run_command "cat $addconfig >> $tmpconfig" or
+            dodie "failed to append $addconfig";
+    }
+    my $defconfig = "";
+    if (-f $tmpconfig) {
+        $defconfig = "KCONFIG_ALLCONFIG=$tmpconfig";
+        process_config_ignore $tmpconfig;
+    }
+    # now process the start config
+    run_command "cp $start_config $output_config" or
+        dodie "failed to copy $start_config to $output_config";
+    # read directly what we want to check
+    my %config_check;
+    open (IN, $output_config)
+        or dodie "faied to open $output_config";
+    while (<IN>) {
+        if (/^((CONFIG\S*)=.*)/) {
+            $config_check{$2} = $1;
+        }
+    }
+    close(IN);
+    # Now run oldconfig with the minconfig (and addconfigs)
+    run_command "$defconfig $make oldnoconfig" or
+        dodie "failed make config oldconfig";
+    # check to see what we lost (or gained)
+    open (IN, $output_config)
+        or dodie "Failed to read $start_config";
+    my %removed_configs;
+    my %added_configs;
+    while (<IN>) {
+        if (/^((CONFIG\S*)=.*)/) {
+            # save off all options
+            $config_set{$2} = $1;
+            if (defined($config_check{$2})) {
+                if (defined($config_ignore{$2})) {
+                    $removed_configs{$2} = $1;
+                } else {
+                    $config_list{$2} = $1;
+                }
+            } elsif (!defined($config_ignore{$2})) {
+                $added_configs{$2} = $1;
+                $config_list{$2} = $1;
+            }
+        }
+    }
+    close(IN);
+    my @confs = keys %removed_configs;
+    if ($#confs >= 0) {
+        doprint "Configs overridden by default configs and removed from check:\n";
+        foreach my $config (@confs) {
+            doprint " $config\n";
+        }
+    }
+    @confs = keys %added_configs;
+    if ($#confs >= 0) {
+        doprint "Configs appearing in make oldconfig and added:\n";
+        foreach my $config (@confs) {
+            doprint " $config\n";
+        }
+    }
+    my %config_test;
+    my $once = 0;
+    # Sometimes kconfig does weird things. We must make sure
+    # that the config we autocreate has everything we need
+    # to test, otherwise we may miss testing configs, or
+    # may not be able to create a new config.
+    # Here we create a config with everything set.
+    create_config (keys %config_list);
+    read_current_config \%config_test;
+    foreach my $config (keys %config_list) {
+        if (!defined($config_test{$config})) {
+            if (!$once) {
+                $once = 1;
+                doprint "Configs not produced by kconfig (will not be checked):\n";
+            }
+            doprint "  $config\n";
+            delete $config_list{$config};
+        }
+    }
+    my $ret;
+    do {
+        $ret = run_config_bisect;
+    } while (!$ret);
+    return $ret if ($ret < 0);
+    success $i;
+}
+sub patchcheck {
+    my ($i) = @_;
+    die "PATCHCHECK_START[$i] not defined\n"
+        if (!defined($opt{"PATCHCHECK_START[$i]"}));
+    die "PATCHCHECK_TYPE[$i] not defined\n"
+        if (!defined($opt{"PATCHCHECK_TYPE[$i]"}));
+    my $start = $opt{"PATCHCHECK_START[$i]"};
+    my $end = "HEAD";
+    if (defined($opt{"PATCHCHECK_END[$i]"})) {
+        $end = $opt{"PATCHCHECK_END[$i]"};
+    }
+    # Get the true sha1's since we can use things like HEAD~3
+    $start = get_sha1($start);
+    $end = get_sha1($end);
+    my $type = $opt{"PATCHCHECK_TYPE[$i]"};
+    # Can't have a test without having a test to run
+    if ($type eq "test" && !defined($run_test)) {
+        $type = "boot";
+    }
+    open (IN, "git log --pretty=oneline $end|") or
+        dodie "could not get git list";
+    my @list;
+    while (<IN>) {
+        chomp;
+        $list[$#list+1] = $_;
+        last if (/^$start/);
+    }
+    close(IN);
+    if ($list[$#list] !~ /^$start/) {
+        fail "SHA1 $start not found";
+    }
+    # go backwards in the list
+    @list = reverse @list;
+    my $save_clean = $noclean;
+    $in_patchcheck = 1;
+    foreach my $item (@list) {
+        my $sha1 = $item;
+        $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+        doprint "\nProcessing commit $item\n\n";
+        run_command "git checkout $sha1" or
+            die "Failed to checkout $sha1";
+        # only clean on the first and last patch
+        if ($item eq $list[0] ||
+            $item eq $list[$#list]) {
+            $noclean = $save_clean;
+        } else {
+            $noclean = 1;
+        }
+        if (defined($minconfig)) {
+            build "useconfig:$minconfig" or return 0;
+        } else {
+            # ?? no config to use?
+            build "oldconfig" or return 0;
+        }
+        check_buildlog $sha1 or return 0;
+        next if ($type eq "build");
+        get_grub_index;
+        get_version;
+        install;
+        my $failed = 0;
+        start_monitor;
+        monitor or $failed = 1;
+        if (!$failed && $type ne "boot"){
+            do_run_test or $failed = 1;
+        }
+        end_monitor;
+        return 0 if ($failed);
+    }
+    $in_patchcheck = 0;
+    success $i;
+    return 1;
+}
+$#ARGV < 1 or die "ktest.pl version: $VERSION\n   usage: ktest.pl config-file\n";
+if ($#ARGV == 0) {
+    $ktest_config = $ARGV[0];
+    if (! -f $ktest_config) {
+        print "$ktest_config does not exist.\n";
+        my $ans;
+        for (;;) {
+            print "Create it? [Y/n] ";
+            $ans = <STDIN>;
+            chomp $ans;
+            if ($ans =~ /^\s*$/) {
+                $ans = "y";
+            }
+            last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+            print "Please answer either 'y' or 'n'.\n";
+        }
+        if ($ans !~ /^y$/i) {
+            exit 0;
+        }
+    }
+} else {
+    $ktest_config = "ktest.conf";
+}
+if (! -f $ktest_config) {
+    open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
+    print OUT << "EOF"
+# Generated by ktest.pl
+#
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START
+DEFAULTS
+EOF
+;
+    close(OUT);
+}
+read_config $ktest_config;
+# Append any configs entered in manually to the config file.
+my @new_configs = keys %entered_configs;
+if ($#new_configs >= 0) {
+    print "\nAppending entered in configs to $ktest_config\n";
+    open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
+    foreach my $config (@new_configs) {
+        print OUT "$config = $entered_configs{$config}\n";
+        $opt{$config} = $entered_configs{$config};
+    }
+}
+if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
+    unlink $opt{"LOG_FILE"};
+}
+doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
+for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
+    if (!$i) {
+        doprint "DEFAULT OPTIONS:\n";
+    } else {
+        doprint "\nTEST $i OPTIONS";
+        if (defined($repeat_tests{$i})) {
+            $repeat = $repeat_tests{$i};
+            doprint " ITERATE $repeat";
+        }
+        doprint "\n";
+    }
+    foreach my $option (sort keys %opt) {
+        if ($option =~ /\[(\d+)\]$/) {
+            next if ($i != $1);
+        } else {
+            next if ($i);
+        }
+        doprint "$option = $opt{$option}\n";
+    }
+}
+sub set_test_option {
+    my ($name, $i) = @_;
+    my $option = "$name\[$i\]";
+    if (defined($opt{$option})) {
+        return $opt{$option};
+    }
+    foreach my $test (keys %repeat_tests) {
+        if ($i >= $test &&
+            $i < $test + $repeat_tests{$test}) {
+            $option = "$name\[$test\]";
+            if (defined($opt{$option})) {
+                return $opt{$option};
+            }
+        }
+    }
+    if (defined($opt{$name})) {
+        return $opt{$name};
+    }
+    return undef;
+}
+# First we need to do is the builds
+for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
+    $iteration = $i;
+    my $makecmd = set_test_option("MAKE_CMD", $i);
+    $machine = set_test_option("MACHINE", $i);
+    $ssh_user = set_test_option("SSH_USER", $i);
+    $tmpdir = set_test_option("TMP_DIR", $i);
+    $outputdir = set_test_option("OUTPUT_DIR", $i);
+    $builddir = set_test_option("BUILD_DIR", $i);
+    $test_type = set_test_option("TEST_TYPE", $i);
+    $build_type = set_test_option("BUILD_TYPE", $i);
+    $build_options = set_test_option("BUILD_OPTIONS", $i);
+    $power_cycle = set_test_option("POWER_CYCLE", $i);
+    $reboot = set_test_option("REBOOT", $i);
+    $noclean = set_test_option("BUILD_NOCLEAN", $i);
+    $minconfig = set_test_option("MIN_CONFIG", $i);
+    $run_test = set_test_option("TEST", $i);
+    $addconfig = set_test_option("ADD_CONFIG", $i);
+    $reboot_type = set_test_option("REBOOT_TYPE", $i);
+    $grub_menu = set_test_option("GRUB_MENU", $i);
+    $post_install = set_test_option("POST_INSTALL", $i);
+    $reboot_script = set_test_option("REBOOT_SCRIPT", $i);
+    $reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i);
+    $poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i);
+    $die_on_failure = set_test_option("DIE_ON_FAILURE", $i);
+    $power_off = set_test_option("POWER_OFF", $i);
+    $powercycle_after_reboot = set_test_option("POWERCYCLE_AFTER_REBOOT", $i);
+    $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i);
+    $sleep_time = set_test_option("SLEEP_TIME", $i);
+    $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i);
+    $store_failures = set_test_option("STORE_FAILURES", $i);
+    $timeout = set_test_option("TIMEOUT", $i);
+    $booted_timeout = set_test_option("BOOTED_TIMEOUT", $i);
+    $console = set_test_option("CONSOLE", $i);
+    $success_line = set_test_option("SUCCESS_LINE", $i);
+    $stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i);
+    $stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i);
+    $build_target = set_test_option("BUILD_TARGET", $i);
+    $ssh_exec = set_test_option("SSH_EXEC", $i);
+    $scp_to_target = set_test_option("SCP_TO_TARGET", $i);
+    $target_image = set_test_option("TARGET_IMAGE", $i);
+    $localversion = set_test_option("LOCALVERSION", $i);
+    chdir $builddir || die "can't change directory to $builddir";
+    if (!-d $tmpdir) {
+        mkpath($tmpdir) or
+            die "can't create $tmpdir";
+    }
+    $ENV{"SSH_USER"} = $ssh_user;
+    $ENV{"MACHINE"} = $machine;
+    $target = "$ssh_user\@$machine";
+    $buildlog = "$tmpdir/buildlog-$machine";
+    $dmesg = "$tmpdir/dmesg-$machine";
+    $make = "$makecmd O=$outputdir";
+    $output_config = "$outputdir/.config";
+    if ($reboot_type eq "grub") {
+        dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+    } elsif (!defined($reboot_script)) {
+        dodie "REBOOT_SCRIPT not defined"
+    }
+    my $run_type = $build_type;
+    if ($test_type eq "patchcheck") {
+        $run_type = $opt{"PATCHCHECK_TYPE[$i]"};
+    } elsif ($test_type eq "bisect") {
+        $run_type = $opt{"BISECT_TYPE[$i]"};
+    } elsif ($test_type eq "config_bisect") {
+        $run_type = $opt{"CONFIG_BISECT_TYPE[$i]"};
+    }
+    # mistake in config file?
+    if (!defined($run_type)) {
+        $run_type = "ERROR";
+    }
+    doprint "\n\n";
+    doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type\n\n";
+    unlink $dmesg;
+    unlink $buildlog;
+    if (!defined($minconfig)) {
+        $minconfig = $addconfig;
+    } elsif (defined($addconfig)) {
+        run_command "cat $addconfig $minconfig > $tmpdir/add_config" or
+            dodie "Failed to create temp config";
+        $minconfig = "$tmpdir/add_config";
+    }
+    my $checkout = $opt{"CHECKOUT[$i]"};
+    if (defined($checkout)) {
+        run_command "git checkout $checkout" or
+            die "failed to checkout $checkout";
+    }
+    if ($test_type eq "bisect") {
+        bisect $i;
+        next;
+    } elsif ($test_type eq "config_bisect") {
+        config_bisect $i;
+        next;
+    } elsif ($test_type eq "patchcheck") {
+        patchcheck $i;
+        next;
+    }
+    if ($build_type ne "nobuild") {
+        build $build_type or next;
+    }
+    if ($test_type ne "build") {
+        get_grub_index;
+        get_version;
+        install;
+        my $failed = 0;
+        start_monitor;
+        monitor or $failed = 1;;
+        if (!$failed && $test_type ne "boot" && defined($run_test)) {
+            do_run_test or $failed = 1;
+        }
+        end_monitor;
+        next if ($failed);
+    }
+    success $i;
+}
+if ($opt{"POWEROFF_ON_SUCCESS"}) {
+    halt;
+} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) {
+    reboot;
+}
+doprint "\n    $successes of $opt{NUM_TESTS} tests were successful\n\n";
+exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
new file mode 100644
index 000000000000..3408c594b2de
--- /dev/null
+++ b/tools/testing/ktest/sample.conf
@@ -0,0 +1,622 @@
+#
+# Config file for ktest.pl
+#
+# Note, all paths must be absolute
+#
+# Options set in the beginning of the file are considered to be
+# default options. These options can be overriden by test specific
+# options, with the following exceptions:
+#
+#  LOG_FILE
+#  CLEAR_LOG
+#  POWEROFF_ON_SUCCESS
+#  REBOOT_ON_SUCCESS
+#
+# Test specific options are set after the label:
+#
+# TEST_START
+#
+# The options after a TEST_START label are specific to that test.
+# Each TEST_START label will set up a new test. If you want to
+# perform a test more than once, you can add the ITERATE label
+# to it followed by the number of times you want that test
+# to iterate. If the ITERATE is left off, the test will only
+# be performed once.
+#
+# TEST_START ITERATE 10
+#
+# You can skip a test by adding SKIP (before or after the ITERATE
+# and number)
+#
+# TEST_START SKIP
+#
+# TEST_START SKIP ITERATE 10
+#
+# TEST_START ITERATE 10 SKIP
+#
+# The SKIP label causes the options and the test itself to be ignored.
+# This is useful to set up several different tests in one config file, and
+# only enabling the ones you want to use for a current test run.
+#
+# You can add default options anywhere in the file as well
+# with the DEFAULTS tag. This allows you to have default options
+# after the test options to keep the test options at the top
+# of the file. You can even place the DEFAULTS tag between
+# test cases (but not in the middle of a single test case)
+#
+# TEST_START
+# MIN_CONFIG = /home/test/config-test1
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-default
+#
+# TEST_START ITERATE 10
+#
+# The above will run the first test with MIN_CONFIG set to
+# /home/test/config-test-1. Then 10 tests will be executed
+# with MIN_CONFIG with /home/test/config-default.
+#
+# You can also disable defaults with the SKIP option
+#
+# DEFAULTS SKIP
+# MIN_CONFIG = /home/test/config-use-sometimes
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-most-times
+#
+# The above will ignore the first MIN_CONFIG. If you want to
+# use the first MIN_CONFIG, remove the SKIP from the first
+# DEFAULTS tag and add it to the second. Be careful, options
+# may only be declared once per test or default. If you have
+# the same option name under the same test or as default
+# ktest will fail to execute, and no tests will run.
+#
+#### Mandatory Default Options ####
+# These options must be in the default section, although most
+# may be overridden by test options.
+# The machine hostname that you will test
+#MACHINE = target
+# The box is expected to have ssh on normal bootup, provide the user
+#  (most likely root, since you need privileged operations)
+#SSH_USER = root
+# The directory that contains the Linux source code
+#BUILD_DIR = /home/test/linux.git
+# The directory that the objects will be built
+# (can not be same as BUILD_DIR)
+#OUTPUT_DIR = /home/test/build/target
+# The location of the compiled file to copy to the target
+# (relative to OUTPUT_DIR)
+#BUILD_TARGET = arch/x86/boot/bzImage
+# The place to put your image on the test machine
+#TARGET_IMAGE = /boot/vmlinuz-test
+# A script or command to reboot the box
+#
+# Here is a digital loggers power switch example
+#POWER_CYCLE = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin@power/outlet?5=CCL'
+#
+# Here is an example to reboot a virtual box on the current host
+# with the name "Guest".
+#POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+# The script or command that reads the console
+#
+#  If you use ttywatch server, something like the following would work.
+#CONSOLE = nc -d localhost 3001
+#
+# For a virtual machine with guest name "Guest".
+#CONSOLE =  virsh console Guest
+# Required version ending to differentiate the test
+# from other linux builds on the system.
+#LOCALVERSION = -test
+# The grub title name for the test kernel to boot
+# (Only mandatory if REBOOT_TYPE = grub)
+#
+# Note, ktest.pl will not update the grub menu.lst, you need to
+# manually add an option for the test. ktest.pl will search
+# the grub menu.lst for this option to find what kernel to
+# reboot into.
+#
+# For example, if in the /boot/grub/menu.lst the test kernel title has:
+# title Test Kernel
+# kernel vmlinuz-test
+#GRUB_MENU = Test Kernel
+# A script to reboot the target into the test kernel
+# (Only mandatory if REBOOT_TYPE = script)
+#REBOOT_SCRIPT =
+#### Optional Config Options (all have defaults) ####
+# Start a test setup. If you leave this off, all options
+# will be default and the test will run once.
+# This is a label and not really an option (it takes no value).
+# You can append ITERATE and a number after it to iterate the
+# test a number of times, or SKIP to ignore this test.
+#
+#TEST_START
+#TEST_START ITERATE 5
+#TEST_START SKIP
+# Have the following options as default again. Used after tests
+# have already been defined by TEST_START. Optionally, you can
+# just define all default options before the first TEST_START
+# and you do not need this option.
+#
+# This is a label and not really an option (it takes no value).
+# You can append SKIP to this label and the options within this
+# section will be ignored.
+#
+# DEFAULTS
+# DEFAULTS SKIP
+# The default test type (default test)
+# The test types may be:
+#   build - only build the kernel, do nothing else
+#   boot - build and boot the kernel
+#   test - build, boot and if TEST is set, run the test script
+#          (If TEST is not set, it defaults back to boot)
+#   bisect - Perform a bisect on the kernel (see BISECT_TYPE below)
+#   patchcheck - Do a test on a series of commits in git (see PATCHCHECK below)
+#TEST_TYPE = test
+# Test to run if there is a successful boot and TEST_TYPE is test.
+# Must exit with 0 on success and non zero on error
+# default (undefined)
+#TEST = ssh user@machine /root/run_test
+# The build type is any make config type or special command
+#  (default randconfig)
+#   nobuild - skip the clean and build step
+#   useconfig:/path/to/config - use the given config and run
+#              oldconfig on it.
+# This option is ignored if TEST_TYPE is patchcheck or bisect
+#BUILD_TYPE = randconfig
+# The make command (default make)
+# If you are building a 32bit x86 on a 64 bit host
+#MAKE_CMD = CC=i386-gcc AS=i386-as make ARCH=i386
+# Any build options for the make of the kernel (not for other makes, like configs)
+# (default "")
+#BUILD_OPTIONS = -j20
+# If you need an initrd, you can add a script or code here to install
+# it. The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used. Remember to add the initrd line
+# to your grub menu.lst file.
+#
+# Here's a couple of examples to use:
+#POST_INSTALL = ssh user@target /sbin/mkinitrd --allow-missing -f /boot/initramfs-test.img $KERNEL_VERSION
+#
+# or on some systems:
+#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+# Way to reboot the box to the test kernel.
+# Only valid options so far are "grub" and "script"
+# (default grub)
+# If you specify grub, it will assume grub version 1
+# and will search in /boot/grub/menu.lst for the title $GRUB_MENU
+# and select that target to reboot to the kernel. If this is not
+# your setup, then specify "script" and have a command or script
+# specified in REBOOT_SCRIPT to boot to the target.
+#
+# The entry in /boot/grub/menu.lst must be entered in manually.
+# The test will not modify that file.
+#REBOOT_TYPE = grub
+# The min config that is needed to build for the machine
+# A nice way to create this is with the following:
+#
+#   $ ssh target
+#   $ lsmod > mymods
+#   $ scp mymods host:/tmp
+#   $ exit
+#   $ cd linux.git
+#   $ rm .config
+#   $ make LSMOD=mymods localyesconfig
+#   $ grep '^CONFIG' .config > /home/test/config-min
+#
+# If you want even less configs:
+#
+#   log in directly to target (do not ssh)
+#
+#   $ su
+#   # lsmod | cut -d' ' -f1 | xargs rmmod
+#
+#   repeat the above several times
+#
+#   # lsmod > mymods
+#   # reboot
+#
+# May need to reboot to get your network back to copy the mymods
+# to the host, and then remove the previous .config and run the
+# localyesconfig again. The CONFIG_MIN generated like this will
+# not guarantee network activity to the box so the TEST_TYPE of
+# test may fail.
+#
+# You might also want to set:
+#   CONFIG_CMDLINE="<your options here>"
+#  randconfig may set the above and override your real command
+#  line options.
+# (default undefined)
+#MIN_CONFIG = /home/test/config-min
+# Sometimes there's options that just break the boot and
+# you do not care about. Here are a few:
+#   # CONFIG_STAGING is not set
+#  Staging drivers are horrible, and can break the build.
+#   # CONFIG_SCSI_DEBUG is not set
+#  SCSI_DEBUG may change your root partition
+#   # CONFIG_KGDB_SERIAL_CONSOLE is not set
+#  KGDB may cause oops waiting for a connection that's not there.
+# This option points to the file containing config options that will be prepended
+# to the MIN_CONFIG (or be the MIN_CONFIG if it is not set)
+#
+# Note, config options in MIN_CONFIG will override these options.
+#
+# (default undefined)
+#ADD_CONFIG = /home/test/config-broken
+# The location on the host where to write temp files
+# (default /tmp/ktest)
+#TMP_DIR = /tmp/ktest
+# Optional log file to write the status (recommended)
+#  Note, this is a DEFAULT section only option.
+# (default undefined)
+#LOG_FILE = /home/test/logfiles/target.log
+# Remove old logfile if it exists before starting all tests.
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#CLEAR_LOG = 0
+# Line to define a successful boot up in console output.
+# This is what the line contains, not the entire line. If you need
+# the entire line to match, then use regural expression syntax like:
+#  (do not add any quotes around it)
+#
+#  SUCCESS_LINE = ^MyBox Login:$
+#
+# (default "login:")
+#SUCCESS_LINE = login:
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after success is recommended.
+# (in seconds)
+# (default 10)
+#STOP_AFTER_SUCCESS = 10
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after failure is recommended.
+# (in seconds)
+# (default 60)
+#STOP_AFTER_FAILURE = 60
+# Stop testing if a build fails. If set, the script will end if
+# a failure is detected, otherwise it will save off the .config,
+# dmesg and bootlog in a directory called
+# MACHINE-TEST_TYPE_BUILD_TYPE-fail-yyyymmddhhmmss
+# if the STORE_FAILURES directory is set.
+# (default 1)
+# Note, even if this is set to zero, there are some errors that still
+# stop the tests.
+#DIE_ON_FAILURE = 1
+# Directory to store failure directories on failure. If this is not
+# set, DIE_ON_FAILURE=0 will not save off the .config, dmesg and
+# bootlog. This option is ignored if DIE_ON_FAILURE is not set.
+# (default undefined)
+#STORE_FAILURES = /home/test/failures
+# Build without doing a make mrproper, or removing .config
+# (default 0)
+#BUILD_NOCLEAN = 0
+# As the test reads the console, after it hits the SUCCESS_LINE
+# the time it waits for the monitor to settle down between reads
+# can usually be lowered.
+# (in seconds) (default 1)
+#BOOTED_TIMEOUT = 1
+# The timeout in seconds when we consider the box hung after
+# the console stop producing output. Be sure to leave enough
+# time here to get pass a reboot. Some machines may not produce
+# any console output for a long time during a reboot. You do
+# not want the test to fail just because the system was in
+# the process of rebooting to the test kernel.
+# (default 120)
+#TIMEOUT = 120
+# In between tests, a reboot of the box may occur, and this
+# is the time to wait for the console after it stops producing
+# output. Some machines may not produce a large lag on reboot
+# so this should accommodate it.
+# The difference between this and TIMEOUT, is that TIMEOUT happens
+# when rebooting to the test kernel. This sleep time happens
+# after a test has completed and we are about to start running
+# another test. If a reboot to the reliable kernel happens,
+# we wait SLEEP_TIME for the console to stop producing output
+# before starting the next test.
+# (default 60)
+#SLEEP_TIME = 60
+# The time in between bisects to sleep (in seconds)
+# (default 60)
+#BISECT_SLEEP_TIME = 60
+# Reboot the target box on error (default 0)
+#REBOOT_ON_ERROR = 0
+# Power off the target on error (ignored if REBOOT_ON_ERROR is set)
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_ERROR = 0
+# Power off the target after all tests have completed successfully
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_SUCCESS = 0
+# Reboot the target after all test completed successfully (default 1)
+# (ignored if POWEROFF_ON_SUCCESS is set)
+#REBOOT_ON_SUCCESS = 1
+# In case there are isses with rebooting, you can specify this
+# to always powercycle after this amount of time after calling
+# reboot.
+# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
+# makes it powercycle immediately after rebooting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWERCYCLE_AFTER_REBOOT = 5
+# In case there's isses with halting, you can specify this
+# to always poweroff after this amount of time after calling
+# halt.
+# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
+# makes it poweroff immediately after halting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWEROFF_AFTER_HALT = 20
+# A script or command to power off the box (default undefined)
+# Needed for POWEROFF_ON_ERROR and SUCCESS
+#
+# Example for digital loggers power switch:
+#POWER_OFF = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin@power/outlet?5=OFF'
+#
+# Example for a virtual guest call "Guest".
+#POWER_OFF = virsh destroy Guest
+# The way to execute a command on the target
+# (default ssh $SSH_USER@$MACHINE $SSH_COMMAND";)
+# The variables SSH_USER, MACHINE and SSH_COMMAND are defined
+#SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND";
+# The way to copy a file to the target
+# (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE)
+# The variables SSH_USER, MACHINE, SRC_FILE and DST_FILE are defined.
+#SCP_TO_TARGET = scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE
+# The nice way to reboot the target
+# (default ssh $SSH_USER@$MACHINE reboot)
+# The variables SSH_USER and MACHINE are defined.
+#REBOOT = ssh $SSH_USER@$MACHINE reboot
+#### Per test run options ####
+# The following options are only allowed in TEST_START sections.
+# They are ignored in the DEFAULTS sections.
+#
+# All of these are optional and undefined by default, although
+#  some of these options are required for TEST_TYPE of patchcheck
+#  and bisect.
+#
+#
+# CHECKOUT = branch
+#
+#  If the BUILD_DIR is a git repository, then you can set this option
+#  to checkout the given branch before running the TEST. If you
+#  specify this for the first run, that branch will be used for
+#  all preceding tests until a new CHECKOUT is set.
+#
+#
+#
+# For TEST_TYPE = patchcheck
+#
+#  This expects the BUILD_DIR to be a git repository, and
+#  will checkout the PATCHCHECK_START commit.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  The MIN_CONFIG will be used for all builds of the patchcheck. The build type
+#  used for patchcheck is oldconfig.
+#
+#  PATCHCHECK_START is required and is the first patch to
+#   test (the SHA1 of the commit). You may also specify anything
+#   that git checkout allows (branch name, tage, HEAD~3).
+#
+#  PATCHCHECK_END is the last patch to check (default HEAD)
+#
+#  PATCHCHECK_TYPE is required and is the type of test to run:
+#      build, boot, test.
+#
+#   Note, the build test will look for warnings, if a warning occurred
+#     in a file that a commit touches, the build will fail.
+#
+#   If BUILD_NOCLEAN is set, then make mrproper will not be run on
+#   any of the builds, just like all other TEST_TYPE tests. But
+#   what makes patchcheck different from the other tests, is if
+#   BUILD_NOCLEAN is not set, only the first and last patch run
+#   make mrproper. This helps speed up the test.
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = patchcheck
+#   CHECKOUT = mybranch
+#   PATCHCHECK_TYPE = boot
+#   PATCHCHECK_START = 747e94ae3d1b4c9bf5380e569f614eb9040b79e7
+#   PATCHCHECK_END = HEAD~2
+#
+#
+#
+# For TEST_TYPE = bisect
+#
+#  You can specify a git bisect if the BUILD_DIR is a git repository.
+#  The MIN_CONFIG will be used for all builds of the bisect. The build type
+#  used for bisecting is oldconfig.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  BISECT_TYPE is the type of test to perform:
+#       build   - bad fails to build
+#       boot    - bad builds but fails to boot
+#       test    - bad boots but fails a test
+#
+# BISECT_GOOD is the commit (SHA1) to label as good (accepts all git good commit types)
+# BISECT_BAD is the commit to label as bad (accepts all git bad commit types)
+#
+# The above three options are required for a bisect operation.
+#
+# BISECT_REPLAY = /path/to/replay/file (optional, default undefined)
+#
+#   If an operation failed in the bisect that was not expected to
+#   fail. Then the test ends. The state of the BUILD_DIR will be
+#   left off at where the failure occurred. You can examine the
+#   reason for the failure, and perhaps even find a git commit
+#   that would work to continue with. You can run:
+#
+#   git bisect log > /path/to/replay/file
+#
+#   The adding:
+#
+#    BISECT_REPLAY= /path/to/replay/file
+#
+#   And running the test again. The test will perform the initial
+#    git bisect start, git bisect good, and git bisect bad, and
+#    then it will run git bisect replay on this file, before
+#    continuing with the bisect.
+#
+# BISECT_START = commit (optional, default undefined)
+#
+#   As with BISECT_REPLAY, if the test failed on a commit that
+#   just happen to have a bad commit in the middle of the bisect,
+#   and you need to skip it. If BISECT_START is defined, it
+#   will checkout that commit after doing the initial git bisect start,
+#   git bisect good, git bisect bad, and running the git bisect replay
+#   if the BISECT_REPLAY is set.
+#
+# BISECT_REVERSE = 1 (optional, default 0)
+#
+#   In those strange instances where it was broken forever
+#   and you are trying to find where it started to work!
+#   Set BISECT_GOOD to the commit that was last known to fail
+#   Set BISECT_BAD to the commit that is known to start working.
+#   With BISECT_REVERSE = 1, The test will consider failures as
+#   good, and success as bad.
+#
+# BISECT_CHECK = 1 (optional, default 0)
+#
+#   Just to be sure the good is good and bad is bad, setting
+#   BISECT_CHECK to 1 will start the bisect by first checking
+#   out BISECT_BAD and makes sure it fails, then it will check
+#   out BISECT_GOOD and makes sure it succeeds before starting
+#   the bisect (it works for BISECT_REVERSE too).
+#
+#   You can limit the test to just check BISECT_GOOD or
+#   BISECT_BAD with BISECT_CHECK = good or
+#   BISECT_CHECK = bad, respectively.
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = bisect
+#   BISECT_GOOD = v2.6.36
+#   BISECT_BAD = b5153163ed580e00c67bdfecb02b2e3843817b3e
+#   BISECT_TYPE = build
+#   MIN_CONFIG = /home/test/config-bisect
+#
+#
+#
+# For TEST_TYPE = config_bisect
+#
+#  In those cases that you have two different configs. One of them
+#  work, the other does not, and you do not know what config causes
+#  the problem.
+#  The TEST_TYPE config_bisect will bisect the bad config looking for
+#  what config causes the failure.
+#
+#  The way it works is this:
+#
+#   First it finds a config to work with. Since a different version, or
+#   MIN_CONFIG may cause different dependecies, it must run through this
+#   preparation.
+#
+#   Overwrites any config set in the bad config with a config set in
+#   either the MIN_CONFIG or ADD_CONFIG. Thus, make sure these configs
+#   are minimal and do not disable configs you want to test:
+#   (ie.  # CONFIG_FOO is not set).
+#
+#   An oldconfig is run on the bad config and any new config that
+#   appears will be added to the configs to test.
+#
+#   Finally, it generates a config with the above result and runs it
+#   again through make oldconfig to produce a config that should be
+#   satisfied by kconfig.
+#
+#   Then it starts the bisect.
+#
+#   The configs to test are cut in half. If all the configs in this
+#   half depend on a config in the other half, then the other half
+#   is tested instead. If no configs are enabled by either half, then
+#   this means a circular dependency exists and the test fails.
+#
+#   A config is created with the test half, and the bisect test is run.
+#
+#   If the bisect succeeds, then all configs in the generated config
+#   are removed from the configs to test and added to the configs that
+#   will be enabled for all builds (they will be enabled, but not be part
+#   of the configs to examine).
+#
+#   If the bisect fails, then all test configs that were not enabled by
+#   the config file are removed from the test. These configs will not
+#   be enabled in future tests. Since current config failed, we consider
+#   this to be a subset of the config that we started with.
+#
+#   When we are down to one config, it is considered the bad config.
+#
+#   Note, the config chosen may not be the true bad config. Due to
+#   dependencies and selections of the kbuild system, mulitple
+#   configs may be needed to cause a failure. If you disable the
+#   config that was found and restart the test, if the test fails
+#   again, it is recommended to rerun the config_bisect with a new
+#   bad config without the found config enabled.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  CONFIG_BISECT_TYPE is the type of test to perform:
+#       build   - bad fails to build
+#       boot    - bad builds but fails to boot
+#       test    - bad boots but fails a test
+#
+#   CONFIG_BISECT is the config that failed to boot
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = config_bisect
+#   CONFIG_BISECT_TYPE = build
+#   CONFIG_BISECT = /home/test/�onfig-bad
+#   MIN_CONFIG = /home/test/config-min
+#
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
new file mode 100644
index 000000000000..d1d442ed106a
--- /dev/null
+++ b/tools/virtio/Makefile
@@ -0,0 +1,12 @@
+all: test mod
+test: virtio_test
+virtio_test: virtio_ring.o virtio_test.o
+CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow  -MMD
+vpath %.c ../../drivers/virtio
+mod:
+        ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
+.PHONY: all test mod clean
+clean:
+        ${RM} *.o vhost_test/*.o vhost_test/.*.cmd \
+              vhost_test/Module.symvers vhost_test/modules.order *.d
+-include *.d
diff --git a/tools/virtio/linux/device.h b/tools/virtio/linux/device.h
new file mode 100644
index 000000000000..4ad7e1df0db5
--- /dev/null
+++ b/tools/virtio/linux/device.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_DEVICE_H
+#endif
diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h
new file mode 100644
index 000000000000..81baeac8ae40
--- /dev/null
+++ b/tools/virtio/linux/slab.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_SLAB_H
+#endif
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
new file mode 100644
index 000000000000..669bcdd45805
--- /dev/null
+++ b/tools/virtio/linux/virtio.h
@@ -0,0 +1,223 @@
+#ifndef LINUX_VIRTIO_H
+#define LINUX_VIRTIO_H
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <linux/types.h>
+#include <errno.h>
+typedef unsigned long long dma_addr_t;
+struct scatterlist {
+        unsigned long   page_link;
+        unsigned int    offset;
+        unsigned int    length;
+        dma_addr_t      dma_address;
+};
+struct page {
+        unsigned long long dummy;
+};
+#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
+/* Physical == Virtual */
+#define virt_to_phys(p) ((unsigned long)p)
+#define phys_to_virt(a) ((void *)(unsigned long)(a))
+/* Page address: Virtual / 4K */
+#define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \
+                                        sizeof(struct page)))
+#define offset_in_page(p) (((unsigned long)p) % 4096)
+#define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \
+                     sg->offset)
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+        /*
+         * Set termination bit, clear potential chain bit
+         */
+        sg->page_link |= 0x02;
+        sg->page_link &= ~0x01;
+}
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+        memset(sgl, 0, sizeof(*sgl) * nents);
+        sg_mark_end(&sgl[nents - 1]);
+}
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+        unsigned long page_link = sg->page_link & 0x3;
+        /*
+         * In order for the low bit stealing approach to work, pages
+         * must be aligned at a 32-bit boundary as a minimum.
+         */
+        BUG_ON((unsigned long) page & 0x03);
+        sg->page_link = page_link | (unsigned long) page;
+}
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+                               unsigned int len, unsigned int offset)
+{
+        sg_assign_page(sg, page);
+        sg->offset = offset;
+        sg->length = len;
+}
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+                              unsigned int buflen)
+{
+        sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
+{
+        sg_init_table(sg, 1);
+        sg_set_buf(sg, buf, buflen);
+}
+typedef __u16 u16;
+typedef enum {
+        GFP_KERNEL,
+        GFP_ATOMIC,
+} gfp_t;
+typedef enum {
+        IRQ_NONE,
+        IRQ_HANDLED
+} irqreturn_t;
+static inline void *kmalloc(size_t s, gfp_t gfp)
+{
+        return malloc(s);
+}
+static inline void kfree(void *p)
+{
+        free(p);
+}
+#define container_of(ptr, type, member) ({                      \
+        const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+        (type *)( (char *)__mptr - offsetof(type,member) );})
+#define uninitialized_var(x) x = x
+# ifndef likely
+#  define likely(x)     (__builtin_expect(!!(x), 1))
+# endif
+# ifndef unlikely
+#  define unlikely(x)   (__builtin_expect(!!(x), 0))
+# endif
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
+#define list_add_tail(a, b) do {} while (0)
+#define list_del(a) do {} while (0)
+#define BIT_WORD(nr)            ((nr) / BITS_PER_LONG)
+#define BITS_PER_BYTE           8
+#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
+#define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_LONG))
+/* TODO: Not atomic as it should be:
+ * we don't use this for anything important. */
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+        unsigned long mask = BIT_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+        *p &= ~mask;
+}
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+        return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+/* The only feature we care to support */
+#define virtio_has_feature(dev, feature) \
+        test_bit((feature), (dev)->features)
+/* end of stubs */
+struct virtio_device {
+        void *dev;
+        unsigned long features[1];
+};
+struct virtqueue {
+        /* TODO: commented as list macros are empty stubs for now.
+         * Broken but enough for virtio_ring.c
+         * struct list_head list; */
+        void (*callback)(struct virtqueue *vq);
+        const char *name;
+        struct virtio_device *vdev;
+        void *priv;
+};
+#define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \
+        void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \
+}
+#define MODULE_LICENSE(__MODULE_LICENSE_value) \
+        const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value
+#define CONFIG_SMP
+#if defined(__i386__) || defined(__x86_64__)
+#define barrier() asm volatile("" ::: "memory")
+#define mb() __sync_synchronize()
+#define smp_mb()        mb()
+# define smp_rmb()      barrier()
+# define smp_wmb()      barrier()
+#else
+#error Please fill in barrier macros
+#endif
+/* Interfaces exported by virtio_ring. */
+int virtqueue_add_buf_gfp(struct virtqueue *vq,
+                          struct scatterlist sg[],
+                          unsigned int out_num,
+                          unsigned int in_num,
+                          void *data,
+                          gfp_t gfp);
+static inline int virtqueue_add_buf(struct virtqueue *vq,
+                                    struct scatterlist sg[],
+                                    unsigned int out_num,
+                                    unsigned int in_num,
+                                    void *data)
+{
+        return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
+}
+void virtqueue_kick(struct virtqueue *vq);
+void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
+void virtqueue_disable_cb(struct virtqueue *vq);
+bool virtqueue_enable_cb(struct virtqueue *vq);
+void *virtqueue_detach_unused_buf(struct virtqueue *vq);
+struct virtqueue *vring_new_virtqueue(unsigned int num,
+                                      unsigned int vring_align,
+                                      struct virtio_device *vdev,
+                                      void *pages,
+                                      void (*notify)(struct virtqueue *vq),
+                                      void (*callback)(struct virtqueue *vq),
+                                      const char *name);
+void vring_del_virtqueue(struct virtqueue *vq);
+#endif
diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile
new file mode 100644
index 000000000000..a1d35b81b314
--- /dev/null
+++ b/tools/virtio/vhost_test/Makefile
@@ -0,0 +1,2 @@
+obj-m += vhost_test.o
+EXTRA_CFLAGS += -Idrivers/vhost
diff --git a/tools/virtio/vhost_test/vhost_test.c b/tools/virtio/vhost_test/vhost_test.c
new file mode 100644
index 000000000000..18735189e62b
--- /dev/null
+++ b/tools/virtio/vhost_test/vhost_test.c
@@ -0,0 +1 @@
+#include "test.c"
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
new file mode 100644
index 000000000000..df0c6d2c3860
--- /dev/null
+++ b/tools/virtio/virtio_test.c
@@ -0,0 +1,248 @@
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <linux/vhost.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+#include "../../drivers/vhost/test.h"
+struct vq_info {
+        int kick;
+        int call;
+        int num;
+        int idx;
+        void *ring;
+        /* copy used for control */
+        struct vring vring;
+        struct virtqueue *vq;
+};
+struct vdev_info {
+        struct virtio_device vdev;
+        int control;
+        struct pollfd fds[1];
+        struct vq_info vqs[1];
+        int nvqs;
+        void *buf;
+        size_t buf_size;
+        struct vhost_memory *mem;
+};
+void vq_notify(struct virtqueue *vq)
+{
+        struct vq_info *info = vq->priv;
+        unsigned long long v = 1;
+        int r;
+        r = write(info->kick, &v, sizeof v);
+        assert(r == sizeof v);
+}
+void vq_callback(struct virtqueue *vq)
+{
+}
+void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
+{
+        struct vhost_vring_state state = { .index = info->idx };
+        struct vhost_vring_file file = { .index = info->idx };
+        unsigned long long features = dev->vdev.features[0];
+        struct vhost_vring_addr addr = {
+                .index = info->idx,
+                .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
+                .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
+                .used_user_addr = (uint64_t)(unsigned long)info->vring.used,
+        };
+        int r;
+        r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+        assert(r >= 0);
+        state.num = info->vring.num;
+        r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+        assert(r >= 0);
+        state.num = 0;
+        r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+        assert(r >= 0);
+        r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+        assert(r >= 0);
+        file.fd = info->kick;
+        r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+        assert(r >= 0);
+        file.fd = info->call;
+        r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+        assert(r >= 0);
+}
+static void vq_info_add(struct vdev_info *dev, int num)
+{
+        struct vq_info *info = &dev->vqs[dev->nvqs];
+        int r;
+        info->idx = dev->nvqs;
+        info->kick = eventfd(0, EFD_NONBLOCK);
+        info->call = eventfd(0, EFD_NONBLOCK);
+        r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
+        assert(r >= 0);
+        memset(info->ring, 0, vring_size(num, 4096));
+        vring_init(&info->vring, num, info->ring, 4096);
+        info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+                                       vq_notify, vq_callback, "test");
+        assert(info->vq);
+        info->vq->priv = info;
+        vhost_vq_setup(dev, info);
+        dev->fds[info->idx].fd = info->call;
+        dev->fds[info->idx].events = POLLIN;
+        dev->nvqs++;
+}
+static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
+{
+        int r;
+        memset(dev, 0, sizeof *dev);
+        dev->vdev.features[0] = features;
+        dev->vdev.features[1] = features >> 32;
+        dev->buf_size = 1024;
+        dev->buf = malloc(dev->buf_size);
+        assert(dev->buf);
+        dev->control = open("/dev/vhost-test", O_RDWR);
+        assert(dev->control >= 0);
+        r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
+        assert(r >= 0);
+        dev->mem = malloc(offsetof(struct vhost_memory, regions) +
+                          sizeof dev->mem->regions[0]);
+        assert(dev->mem);
+        memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
+                          sizeof dev->mem->regions[0]);
+        dev->mem->nregions = 1;
+        dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
+        dev->mem->regions[0].userspace_addr = (long)dev->buf;
+        dev->mem->regions[0].memory_size = dev->buf_size;
+        r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+        assert(r >= 0);
+}
+/* TODO: this is pretty bad: we get a cache line bounce
+ * for the wait queue on poll and another one on read,
+ * plus the read which is there just to clear the
+ * current state. */
+static void wait_for_interrupt(struct vdev_info *dev)
+{
+        int i;
+        unsigned long long val;
+        poll(dev->fds, dev->nvqs, -1);
+        for (i = 0; i < dev->nvqs; ++i)
+                if (dev->fds[i].revents & POLLIN) {
+                        read(dev->fds[i].fd, &val, sizeof val);
+                }
+}
+static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
+{
+        struct scatterlist sl;
+        long started = 0, completed = 0;
+        long completed_before;
+        int r, test = 1;
+        unsigned len;
+        long long spurious = 0;
+        r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+        assert(r >= 0);
+        for (;;) {
+                virtqueue_disable_cb(vq->vq);
+                completed_before = completed;
+                do {
+                        if (started < bufs) {
+                                sg_init_one(&sl, dev->buf, dev->buf_size);
+                                r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
+                                                      dev->buf + started);
+                                if (likely(r >= 0)) {
+                                        ++started;
+                                        virtqueue_kick(vq->vq);
+                                }
+                        } else
+                                r = -1;
+                        /* Flush out completed bufs if any */
+                        if (virtqueue_get_buf(vq->vq, &len)) {
+                                ++completed;
+                                r = 0;
+                        }
+                } while (r >= 0);
+                if (completed == completed_before)
+                        ++spurious;
+                assert(completed <= bufs);
+                assert(started <= bufs);
+                if (completed == bufs)
+                        break;
+                if (virtqueue_enable_cb(vq->vq)) {
+                        wait_for_interrupt(dev);
+                }
+        }
+        test = 0;
+        r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+        assert(r >= 0);
+        fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
+}
+const char optstring[] = "h";
+const struct option longopts[] = {
+        {
+                .name = "help",
+                .val = 'h',
+        },
+        {
+                .name = "indirect",
+                .val = 'I',
+        },
+        {
+                .name = "no-indirect",
+                .val = 'i',
+        },
+        {
+        }
+};
+static void help()
+{
+        fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n");
+}
+int main(int argc, char **argv)
+{
+        struct vdev_info dev;
+        unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC;
+        int o;
+        for (;;) {
+                o = getopt_long(argc, argv, optstring, longopts, NULL);
+                switch (o) {
+                case -1:
+                        goto done;
+                case '?':
+                        help();
+                        exit(2);
+                case 'h':
+                        help();
+                        goto done;
+                case 'i':
+                        features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+                        break;
+                default:
+                        assert(0);
+                        break;
+                }
+        }
+done:
+        vdev_info_init(&dev, features);
+        vq_info_add(&dev, 256);
+        run_test(&dev, &dev.vqs[0], 0x100000);
+        return 0;
+}