4 files changed, 314 insertions, 1 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 944239296f13..dde1d46f77e5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -284,6 +284,17 @@ config KMEMTRACE
          If unsure, say N.
+config WORKQUEUE_TRACER
+        bool "Trace workqueues"
+        select TRACING
+        help
+          The workqueue tracer provides some statistical informations
+          about each cpu workqueue thread such as the number of the
+          works inserted and executed since their creation. It can help
+          to evaluate the amount of work each of them have to perform.
+          For example it can help a developer to decide whether he should
+          choose a per cpu workqueue instead of a singlethreaded one.
 config DYNAMIC_FTRACE
        bool "enable/disable ftrace tracepoints dynamically"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 05c9182061de..f76d48f3527d 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
+obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644
index 000000000000..f8118d39ca9b
--- /dev/null
+++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,287 @@
+/*
+ * Workqueue statistical tracer.
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+#include <trace/workqueue.h>
+#include <linux/list.h>
+#include "trace_stat.h"
+#include "trace.h"
+/* A cpu workqueue thread */
+struct cpu_workqueue_stats {
+        struct list_head            list;
+/* Useful to know if we print the cpu headers */
+        bool                        first_entry;
+        int                         cpu;
+        pid_t                       pid;
+/* Can be inserted from interrupt or user context, need to be atomic */
+        atomic_t                    inserted;
+/*
+ *  Don't need to be atomic, works are serialized in a single workqueue thread
+ *  on a single CPU.
+ */
+        unsigned int                executed;
+};
+/* List of workqueue threads on one cpu */
+struct workqueue_global_stats {
+        struct list_head        list;
+        spinlock_t              lock;
+};
+/* Don't need a global lock because allocated before the workqueues, and
+ * never freed.
+ */
+static struct workqueue_global_stats *all_workqueue_stat;
+/* Insertion of a work */
+static void
+probe_workqueue_insertion(struct task_struct *wq_thread,
+                          struct work_struct *work)
+{
+        int cpu = cpumask_first(&wq_thread->cpus_allowed);
+        struct cpu_workqueue_stats *node, *next;
+        unsigned long flags;
+        spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+        list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+                                                        list) {
+                if (node->pid == wq_thread->pid) {
+                        atomic_inc(&node->inserted);
+                        goto found;
+                }
+        }
+        pr_debug("trace_workqueue: entry not found\n");
+found:
+        spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+}
+/* Execution of a work */
+static void
+probe_workqueue_execution(struct task_struct *wq_thread,
+                          struct work_struct *work)
+{
+        int cpu = cpumask_first(&wq_thread->cpus_allowed);
+        struct cpu_workqueue_stats *node, *next;
+        unsigned long flags;
+        spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+        list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+                                                        list) {
+                if (node->pid == wq_thread->pid) {
+                        node->executed++;
+                        goto found;
+                }
+        }
+        pr_debug("trace_workqueue: entry not found\n");
+found:
+        spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+}
+/* Creation of a cpu workqueue thread */
+static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
+{
+        struct cpu_workqueue_stats *cws;
+        unsigned long flags;
+        WARN_ON(cpu < 0 || cpu >= num_possible_cpus());
+        /* Workqueues are sometimes created in atomic context */
+        cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
+        if (!cws) {
+                pr_warning("trace_workqueue: not enough memory\n");
+                return;
+        }
+        tracing_record_cmdline(wq_thread);
+        INIT_LIST_HEAD(&cws->list);
+        cws->cpu = cpu;
+        cws->pid = wq_thread->pid;
+        spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+        if (list_empty(&all_workqueue_stat[cpu].list))
+                cws->first_entry = true;
+        list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
+        spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+}
+/* Destruction of a cpu workqueue thread */
+static void probe_workqueue_destruction(struct task_struct *wq_thread)
+{
+        /* Workqueue only execute on one cpu */
+        int cpu = cpumask_first(&wq_thread->cpus_allowed);
+        struct cpu_workqueue_stats *node, *next;
+        unsigned long flags;
+        spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+        list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+                                                        list) {
+                if (node->pid == wq_thread->pid) {
+                        list_del(&node->list);
+                        kfree(node);
+                        goto found;
+                }
+        }
+        pr_debug("trace_workqueue: don't find workqueue to destroy\n");
+found:
+        spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+}
+static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
+{
+        unsigned long flags;
+        struct cpu_workqueue_stats *ret = NULL;
+        spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+        if (!list_empty(&all_workqueue_stat[cpu].list))
+                ret = list_entry(all_workqueue_stat[cpu].list.next,
+                                 struct cpu_workqueue_stats, list);
+        spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+        return ret;
+}
+static void *workqueue_stat_start(void)
+{
+        int cpu;
+        void *ret = NULL;
+        for_each_possible_cpu(cpu) {
+                ret = workqueue_stat_start_cpu(cpu);
+                if (ret)
+                        return ret;
+        }
+        return NULL;
+}
+static void *workqueue_stat_next(void *prev, int idx)
+{
+        struct cpu_workqueue_stats *prev_cws = prev;
+        int cpu = prev_cws->cpu;
+        unsigned long flags;
+        void *ret = NULL;
+        spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+        if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
+                spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+                for (++cpu ; cpu < num_possible_cpus(); cpu++) {
+                        ret = workqueue_stat_start_cpu(cpu);
+                        if (ret)
+                                return ret;
+                }
+                return NULL;
+        }
+        spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+        return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
+                          list);
+}
+static int workqueue_stat_show(struct seq_file *s, void *p)
+{
+        struct cpu_workqueue_stats *cws = p;
+        unsigned long flags;
+        int cpu = cws->cpu;
+        seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
+                   atomic_read(&cws->inserted),
+                   cws->executed,
+                   trace_find_cmdline(cws->pid));
+        spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+        if (&cws->list == all_workqueue_stat[cpu].list.next)
+                seq_printf(s, "\n");
+        spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+        return 0;
+}
+static int workqueue_stat_headers(struct seq_file *s)
+{
+        seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
+        seq_printf(s, "# |      |         |          |\n\n");
+        return 0;
+}
+struct tracer_stat workqueue_stats __read_mostly = {
+        .name = "workqueues",
+        .stat_start = workqueue_stat_start,
+        .stat_next = workqueue_stat_next,
+        .stat_show = workqueue_stat_show,
+        .stat_headers = workqueue_stat_headers
+};
+int __init stat_workqueue_init(void)
+{
+        if (register_stat_tracer(&workqueue_stats)) {
+                pr_warning("Unable to register workqueue stat tracer\n");
+                return 1;
+        }
+        return 0;
+}
+fs_initcall(stat_workqueue_init);
+/*
+ * Workqueues are created very early, just after pre-smp initcalls.
+ * So we must register our tracepoints at this stage.
+ */
+int __init trace_workqueue_early_init(void)
+{
+        int ret, cpu;
+        ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
+        if (ret)
+                goto out;
+        ret = register_trace_workqueue_execution(probe_workqueue_execution);
+        if (ret)
+                goto no_insertion;
+        ret = register_trace_workqueue_creation(probe_workqueue_creation);
+        if (ret)
+                goto no_execution;
+        ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
+        if (ret)
+                goto no_creation;
+        all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
+                                     * num_possible_cpus(), GFP_KERNEL);
+        if (!all_workqueue_stat) {
+                pr_warning("trace_workqueue: not enough memory\n");
+                goto no_creation;
+        }
+        for_each_possible_cpu(cpu) {
+                spin_lock_init(&all_workqueue_stat[cpu].lock);
+                INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
+        }
+        return 0;
+no_creation:
+        unregister_trace_workqueue_creation(probe_workqueue_creation);
+no_execution:
+        unregister_trace_workqueue_execution(probe_workqueue_execution);
+no_insertion:
+        unregister_trace_workqueue_insertion(probe_workqueue_insertion);
+out:
+        pr_warning("trace_workqueue: unable to trace workqueues\n");
+        return 1;
+}
+early_initcall(trace_workqueue_early_init);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2f445833ae37..1fc2bc20603f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,6 +33,7 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
+#include <trace/workqueue.h>
 /*
 * The per-CPU workqueue (if single thread, we always use the first
@@ -125,9 +126,13 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
        return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
+DEFINE_TRACE(workqueue_insertion);
 static void insert_work(struct cpu_workqueue_struct *cwq,
                        struct work_struct *work, struct list_head *head)
 {
+        trace_workqueue_insertion(cwq->thread, work);
        set_wq_data(work, cwq);
        /*
         * Ensure that we get the right work->data if we see the
@@ -259,6 +264,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+DEFINE_TRACE(workqueue_execution);
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
        spin_lock_irq(&cwq->lock);
@@ -284,7 +291,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
                 */
                struct lockdep_map lockdep_map = work->lockdep_map;
 #endif
+                trace_workqueue_execution(cwq->thread, work);
                cwq->current_work = work;
                list_del_init(cwq->worklist.next);
                spin_unlock_irq(&cwq->lock);
@@ -765,6 +772,8 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
        return cwq;
 }
+DEFINE_TRACE(workqueue_creation);
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -787,6 +796,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
                sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
        cwq->thread = p;
+        trace_workqueue_creation(cwq->thread, cpu);
        return 0;
 }
@@ -868,6 +879,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
+DEFINE_TRACE(workqueue_destruction);
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
        /*
@@ -891,6 +904,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
         * checks list_empty(), and a "normal" queue_work() can't use
         * a dead CPU.
         */
+        trace_workqueue_destruction(cwq->thread);
        kthread_stop(cwq->thread);
        cwq->thread = NULL;
 }

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 944239296f13..dde1d46f77e5 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig
@@ -284,6 +284,17 @@ config KMEMTRACE
284		284
285	If unsure, say N.	285	If unsure, say N.
286		286
		287	config WORKQUEUE_TRACER
		288	bool "Trace workqueues"
		289	select TRACING
		290	help
		291	The workqueue tracer provides some statistical informations
		292	about each cpu workqueue thread such as the number of the
		293	works inserted and executed since their creation. It can help
		294	to evaluate the amount of work each of them have to perform.
		295	For example it can help a developer to decide whether he should
		296	choose a per cpu workqueue instead of a singlethreaded one.
		297
287		298
288	config DYNAMIC_FTRACE	299	config DYNAMIC_FTRACE
289	bool "enable/disable ftrace tracepoints dynamically"	300	bool "enable/disable ftrace tracepoints dynamically"


diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 05c9182061de..f76d48f3527d 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
36	obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o	36	obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
37	obj-$(CONFIG_POWER_TRACER) += trace_power.o	37	obj-$(CONFIG_POWER_TRACER) += trace_power.o
38	obj-$(CONFIG_KMEMTRACE) += kmemtrace.o	38	obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
		39	obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
39		40
40	libftrace-y := ftrace.o	41	libftrace-y := ftrace.o


diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c new file mode 100644 index 000000000000..f8118d39ca9b --- /dev/null +++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,287 @@
		1	/*
		2	* Workqueue statistical tracer.
		3	*
		4	* Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
		5	*
		6	*/
		7
		8
		9	#include <trace/workqueue.h>
		10	#include <linux/list.h>
		11	#include "trace_stat.h"
		12	#include "trace.h"
		13
		14
		15	/* A cpu workqueue thread */
		16	struct cpu_workqueue_stats {
		17	struct list_head list;
		18	/* Useful to know if we print the cpu headers */
		19	bool first_entry;
		20	int cpu;
		21	pid_t pid;
		22	/* Can be inserted from interrupt or user context, need to be atomic */
		23	atomic_t inserted;
		24	/*
		25	* Don't need to be atomic, works are serialized in a single workqueue thread
		26	* on a single CPU.
		27	*/
		28	unsigned int executed;
		29	};
		30
		31	/* List of workqueue threads on one cpu */
		32	struct workqueue_global_stats {
		33	struct list_head list;
		34	spinlock_t lock;
		35	};
		36
		37	/* Don't need a global lock because allocated before the workqueues, and
		38	* never freed.
		39	*/
		40	static struct workqueue_global_stats *all_workqueue_stat;
		41
		42	/* Insertion of a work */
		43	static void
		44	probe_workqueue_insertion(struct task_struct *wq_thread,
		45	struct work_struct *work)
		46	{
		47	int cpu = cpumask_first(&wq_thread->cpus_allowed);
		48	struct cpu_workqueue_stats node, next;
		49	unsigned long flags;
		50
		51	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
		52	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
		53	list) {
		54	if (node->pid == wq_thread->pid) {
		55	atomic_inc(&node->inserted);
		56	goto found;
		57	}
		58	}
		59	pr_debug("trace_workqueue: entry not found\n");
		60	found:
		61	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		62	}
		63
		64	/* Execution of a work */
		65	static void
		66	probe_workqueue_execution(struct task_struct *wq_thread,
		67	struct work_struct *work)
		68	{
		69	int cpu = cpumask_first(&wq_thread->cpus_allowed);
		70	struct cpu_workqueue_stats node, next;
		71	unsigned long flags;
		72
		73	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
		74	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
		75	list) {
		76	if (node->pid == wq_thread->pid) {
		77	node->executed++;
		78	goto found;
		79	}
		80	}
		81	pr_debug("trace_workqueue: entry not found\n");
		82	found:
		83	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		84	}
		85
		86	/* Creation of a cpu workqueue thread */
		87	static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
		88	{
		89	struct cpu_workqueue_stats *cws;
		90	unsigned long flags;
		91
		92	WARN_ON(cpu < 0 \|\| cpu >= num_possible_cpus());
		93
		94	/* Workqueues are sometimes created in atomic context */
		95	cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
		96	if (!cws) {
		97	pr_warning("trace_workqueue: not enough memory\n");
		98	return;
		99	}
		100	tracing_record_cmdline(wq_thread);
		101
		102	INIT_LIST_HEAD(&cws->list);
		103	cws->cpu = cpu;
		104
		105	cws->pid = wq_thread->pid;
		106
		107	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
		108	if (list_empty(&all_workqueue_stat[cpu].list))
		109	cws->first_entry = true;
		110	list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
		111	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		112	}
		113
		114	/* Destruction of a cpu workqueue thread */
		115	static void probe_workqueue_destruction(struct task_struct *wq_thread)
		116	{
		117	/* Workqueue only execute on one cpu */
		118	int cpu = cpumask_first(&wq_thread->cpus_allowed);
		119	struct cpu_workqueue_stats node, next;
		120	unsigned long flags;
		121
		122	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
		123	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
		124	list) {
		125	if (node->pid == wq_thread->pid) {
		126	list_del(&node->list);
		127	kfree(node);
		128	goto found;
		129	}
		130	}
		131
		132	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
		133	found:
		134	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		135
		136	}
		137
		138	static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
		139	{
		140	unsigned long flags;
		141	struct cpu_workqueue_stats *ret = NULL;
		142
		143
		144	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
		145
		146	if (!list_empty(&all_workqueue_stat[cpu].list))
		147	ret = list_entry(all_workqueue_stat[cpu].list.next,
		148	struct cpu_workqueue_stats, list);
		149
		150	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		151
		152	return ret;
		153	}
		154
		155	static void *workqueue_stat_start(void)
		156	{
		157	int cpu;
		158	void *ret = NULL;
		159
		160	for_each_possible_cpu(cpu) {
		161	ret = workqueue_stat_start_cpu(cpu);
		162	if (ret)
		163	return ret;
		164	}
		165	return NULL;
		166	}
		167
		168	static void workqueue_stat_next(void prev, int idx)
		169	{
		170	struct cpu_workqueue_stats *prev_cws = prev;
		171	int cpu = prev_cws->cpu;
		172	unsigned long flags;
		173	void *ret = NULL;
		174
		175	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
		176	if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
		177	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		178	for (++cpu ; cpu < num_possible_cpus(); cpu++) {
		179	ret = workqueue_stat_start_cpu(cpu);
		180	if (ret)
		181	return ret;
		182	}
		183	return NULL;
		184	}
		185	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		186
		187	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
		188	list);
		189	}
		190
		191	static int workqueue_stat_show(struct seq_file s, void p)
		192	{
		193	struct cpu_workqueue_stats *cws = p;
		194	unsigned long flags;
		195	int cpu = cws->cpu;
		196
		197	seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
		198	atomic_read(&cws->inserted),
		199	cws->executed,
		200	trace_find_cmdline(cws->pid));
		201
		202	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
		203	if (&cws->list == all_workqueue_stat[cpu].list.next)
		204	seq_printf(s, "\n");
		205	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
		206
		207	return 0;
		208	}
		209
		210	static int workqueue_stat_headers(struct seq_file *s)
		211	{
		212	seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
		213	seq_printf(s, "# \| \| \| \|\n\n");
		214	return 0;
		215	}
		216
		217	struct tracer_stat workqueue_stats __read_mostly = {
		218	.name = "workqueues",
		219	.stat_start = workqueue_stat_start,
		220	.stat_next = workqueue_stat_next,
		221	.stat_show = workqueue_stat_show,
		222	.stat_headers = workqueue_stat_headers
		223	};
		224
		225
		226	int __init stat_workqueue_init(void)
		227	{
		228	if (register_stat_tracer(&workqueue_stats)) {
		229	pr_warning("Unable to register workqueue stat tracer\n");
		230	return 1;
		231	}
		232
		233	return 0;
		234	}
		235	fs_initcall(stat_workqueue_init);
		236
		237	/*
		238	* Workqueues are created very early, just after pre-smp initcalls.
		239	* So we must register our tracepoints at this stage.
		240	*/
		241	int __init trace_workqueue_early_init(void)
		242	{
		243	int ret, cpu;
		244
		245	ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
		246	if (ret)
		247	goto out;
		248
		249	ret = register_trace_workqueue_execution(probe_workqueue_execution);
		250	if (ret)
		251	goto no_insertion;
		252
		253	ret = register_trace_workqueue_creation(probe_workqueue_creation);
		254	if (ret)
		255	goto no_execution;
		256
		257	ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
		258	if (ret)
		259	goto no_creation;
		260
		261	all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
		262	* num_possible_cpus(), GFP_KERNEL);
		263
		264	if (!all_workqueue_stat) {
		265	pr_warning("trace_workqueue: not enough memory\n");
		266	goto no_creation;
		267	}
		268
		269	for_each_possible_cpu(cpu) {
		270	spin_lock_init(&all_workqueue_stat[cpu].lock);
		271	INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
		272	}
		273
		274	return 0;
		275
		276	no_creation:
		277	unregister_trace_workqueue_creation(probe_workqueue_creation);
		278	no_execution:
		279	unregister_trace_workqueue_execution(probe_workqueue_execution);
		280	no_insertion:
		281	unregister_trace_workqueue_insertion(probe_workqueue_insertion);
		282	out:
		283	pr_warning("trace_workqueue: unable to trace workqueues\n");
		284
		285	return 1;
		286	}
		287	early_initcall(trace_workqueue_early_init);


diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2f445833ae37..1fc2bc20603f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -33,6 +33,7 @@
33	#include <linux/kallsyms.h>	33	#include <linux/kallsyms.h>
34	#include <linux/debug_locks.h>	34	#include <linux/debug_locks.h>
35	#include <linux/lockdep.h>	35	#include <linux/lockdep.h>
		36	#include <trace/workqueue.h>
36		37
37	/*	38	/*
38	* The per-CPU workqueue (if single thread, we always use the first	39	* The per-CPU workqueue (if single thread, we always use the first
@@ -125,9 +126,13 @@ struct cpu_workqueue_struct get_wq_data(struct work_struct work)
125	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);	126	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
126	}	127	}
127		128
		129	DEFINE_TRACE(workqueue_insertion);
		130
128	static void insert_work(struct cpu_workqueue_struct *cwq,	131	static void insert_work(struct cpu_workqueue_struct *cwq,
129	struct work_struct work, struct list_head head)	132	struct work_struct work, struct list_head head)
130	{	133	{
		134	trace_workqueue_insertion(cwq->thread, work);
		135
131	set_wq_data(work, cwq);	136	set_wq_data(work, cwq);
132	/*	137	/*
133	* Ensure that we get the right work->data if we see the	138	* Ensure that we get the right work->data if we see the
@@ -259,6 +264,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
259	}	264	}
260	EXPORT_SYMBOL_GPL(queue_delayed_work_on);	265	EXPORT_SYMBOL_GPL(queue_delayed_work_on);
261		266
		267	DEFINE_TRACE(workqueue_execution);
		268
262	static void run_workqueue(struct cpu_workqueue_struct *cwq)	269	static void run_workqueue(struct cpu_workqueue_struct *cwq)
263	{	270	{
264	spin_lock_irq(&cwq->lock);	271	spin_lock_irq(&cwq->lock);
@@ -284,7 +291,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
284	*/	291	*/
285	struct lockdep_map lockdep_map = work->lockdep_map;	292	struct lockdep_map lockdep_map = work->lockdep_map;
286	#endif	293	#endif
287		294	trace_workqueue_execution(cwq->thread, work);
288	cwq->current_work = work;	295	cwq->current_work = work;
289	list_del_init(cwq->worklist.next);	296	list_del_init(cwq->worklist.next);
290	spin_unlock_irq(&cwq->lock);	297	spin_unlock_irq(&cwq->lock);
@@ -765,6 +772,8 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
765	return cwq;	772	return cwq;
766	}	773	}
767		774
		775	DEFINE_TRACE(workqueue_creation);
		776
768	static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)	777	static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
769	{	778	{
770	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };	779	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -787,6 +796,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
787	sched_setscheduler_nocheck(p, SCHED_FIFO, &param);	796	sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
788	cwq->thread = p;	797	cwq->thread = p;
789		798
		799	trace_workqueue_creation(cwq->thread, cpu);
		800
790	return 0;	801	return 0;
791	}	802	}
792		803
@@ -868,6 +879,8 @@ struct workqueue_struct __create_workqueue_key(const char name,
868	}	879	}
869	EXPORT_SYMBOL_GPL(__create_workqueue_key);	880	EXPORT_SYMBOL_GPL(__create_workqueue_key);
870		881
		882	DEFINE_TRACE(workqueue_destruction);
		883
871	static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)	884	static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
872	{	885	{
873	/*	886	/*
@@ -891,6 +904,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
891	* checks list_empty(), and a "normal" queue_work() can't use	904	* checks list_empty(), and a "normal" queue_work() can't use
892	* a dead CPU.	905	* a dead CPU.
893	*/	906	*/
		907	trace_workqueue_destruction(cwq->thread);
894	kthread_stop(cwq->thread);	908	kthread_stop(cwq->thread);
895	cwq->thread = NULL;	909	cwq->thread = NULL;
896	}	910	}