From 154cd387cdf0e5566ce523cbddf92dd2a062dfd6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 22 Sep 2010 15:58:45 +0200 Subject: genirq: Remove early_init_irq_lock_class() early_init_irq_lock_class() is called way before anything touches the irq descriptors. In case of SPARSE_IRQ=y this is a NOP operation because the radix tree is empty at this point. For the SPARSE_IRQ=n case it's sufficient to set the lock class in early_init_irq(). Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- include/linux/lockdep.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux/lockdep.h') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 06aed8305bf3..17d050ce7ab8 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -424,14 +424,6 @@ do { \ #endif /* CONFIG_LOCKDEP */ -#ifdef CONFIG_GENERIC_HARDIRQS -extern void early_init_irq_lock_class(void); -#else -static inline void early_init_irq_lock_class(void) -{ -} -#endif - #ifdef CONFIG_TRACE_IRQFLAGS extern void early_boot_irqs_off(void); extern void early_boot_irqs_on(void); -- cgit v1.2.2 From 620162505e5d46bc4494b1761743e4b0b3bf8e16 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 5 Oct 2010 18:01:51 +0900 Subject: lockdep: Add improved subclass caching Current lockdep_map only caches one class with subclass == 0, and looks up hash table of classes when subclass != 0. It seems that this has no problem because the case of subclass != 0 is rare. But locks of struct rq are acquired with subclass == 1 when task migration is executed. Task migration is high frequent event, so I modified lockdep to cache subclasses. I measured the score of perf bench sched messaging. This patch has slightly but certain (order of milli seconds or 10 milli seconds) effect when lots of tasks are running. I'll show the result in the tail of this description. NR_LOCKDEP_CACHING_CLASSES specifies how many classes can be cached in the instances of lockdep_map. I discussed with Peter Zijlstra in LinuxCon Japan about this approach and he taught me that caching every subclasses(8) is cleary waste of memory. So number of cached classes should be configurable. === Score comparison of benchmarks === # "min" means best score, and "max" means worst score for i in `seq 1 10`; do ./perf bench -f simple sched messaging; done before: min: 0.565000, max: 0.583000, avg: 0.572500 after: min: 0.559000, max: 0.568000, avg: 0.563300 # with more processes for i in `seq 1 10`; do ./perf bench -f simple sched messaging -g 40; done before: min: 2.274000, max: 2.298000, avg: 2.286300 after: min: 2.242000, max: 2.270000, avg: 2.259700 Signed-off-by: Hitoshi Mitake Cc: Frederic Weisbecker Signed-off-by: Peter Zijlstra LKML-Reference: <1286269311-28336-2-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux/lockdep.h') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 06aed8305bf3..2186a64ee4b5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -31,6 +31,17 @@ extern int lock_stat; #define MAX_LOCKDEP_SUBCLASSES 8UL +/* + * NR_LOCKDEP_CACHING_CLASSES ... Number of classes + * cached in the instance of lockdep_map + * + * Currently main class (subclass == 0) and signle depth subclass + * are cached in lockdep_map. This optimization is mainly targeting + * on rq->lock. double_rq_lock() acquires this highly competitive with + * single depth. + */ +#define NR_LOCKDEP_CACHING_CLASSES 2 + /* * Lock-classes are keyed via unique addresses, by embedding the * lockclass-key into the kernel (or module) .data section. (For @@ -138,7 +149,7 @@ void clear_lock_stats(struct lock_class *class); */ struct lockdep_map { struct lock_class_key *key; - struct lock_class *class_cache; + struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; const char *name; #ifdef CONFIG_LOCK_STAT int cpu; -- cgit v1.2.2 From e159489baa717dbae70f9903770a6a4990865887 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 9 Jan 2011 23:32:15 +0100 Subject: workqueue: relax lockdep annotation on flush_work() Currently, the lockdep annotation in flush_work() requires exclusive access on the workqueue the target work is queued on and triggers warning if a work is trying to flush another work on the same workqueue; however, this is no longer true as workqueues can now execute multiple works concurrently. This patch adds lock_map_acquire_read() and make process_one_work() hold read access to the workqueue while executing a work and start_flush_work() check for write access if concurrnecy level is one or the workqueue has a rescuer (as only one execution resource - the rescuer - is guaranteed to be available under memory pressure), and read access if higher. This better represents what's going on and removes spurious lockdep warnings which are triggered by fake dependency chain created through flush_work(). * Peter pointed out that flushing another work from a WQ_MEM_RECLAIM wq breaks forward progress guarantee under memory pressure. Condition check accordingly updated. Signed-off-by: Tejun Heo Reported-by: "Rafael J. Wysocki" Tested-by: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: stable@kernel.org --- include/linux/lockdep.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/lockdep.h') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 71c09b26c759..9f19430c7d07 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -522,12 +522,15 @@ static inline void print_irqtrace_events(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCK_ALLOC # ifdef CONFIG_PROVE_LOCKING # define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 2, NULL, _THIS_IP_) +# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 2, NULL, _THIS_IP_) # else # define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 1, NULL, _THIS_IP_) +# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 1, NULL, _THIS_IP_) # endif # define lock_map_release(l) lock_release(l, 1, _THIS_IP_) #else # define lock_map_acquire(l) do { } while (0) +# define lock_map_acquire_read(l) do { } while (0) # define lock_map_release(l) do { } while (0) #endif -- cgit v1.2.2 From 2ce802f62ba32a7d95748ac92bf351f76affb6ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Jan 2011 12:06:35 +0100 Subject: lockdep: Move early boot local IRQ enable/disable status to init/main.c During early boot, local IRQ is disabled until IRQ subsystem is properly initialized. During this time, no one should enable local IRQ and some operations which usually are not allowed with IRQ disabled, e.g. operations which might sleep or require communications with other processors, are allowed. lockdep tracked this with early_boot_irqs_off/on() callbacks. As other subsystems need this information too, move it to init/main.c and make it generally available. While at it, toggle the boolean to early_boot_irqs_disabled instead of enabled so that it can be initialized with %false and %true indicates the exceptional condition. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Pekka Enberg Cc: Linus Torvalds LKML-Reference: <20110120110635.GB6036@htj.dyndns.org> Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux/lockdep.h') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 71c09b26c759..f638fd78d106 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -436,16 +436,8 @@ do { \ #endif /* CONFIG_LOCKDEP */ #ifdef CONFIG_TRACE_IRQFLAGS -extern void early_boot_irqs_off(void); -extern void early_boot_irqs_on(void); extern void print_irqtrace_events(struct task_struct *curr); #else -static inline void early_boot_irqs_off(void) -{ -} -static inline void early_boot_irqs_on(void) -{ -} static inline void print_irqtrace_events(struct task_struct *curr) { } -- cgit v1.2.2 From e4c70a6629f9c74c4b0de258a3951890e9047c82 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 May 2011 17:12:03 -0700 Subject: lockdep, mutex: provide mutex_lock_nest_lock In order to convert i_mmap_lock to a mutex we need a mutex equivalent to spin_lock_nest_lock(), thus provide the mutex_lock_nest_lock() annotation. As with spin_lock_nest_lock(), mutex_lock_nest_lock() allows annotation of the locking pattern where an outer lock serializes the acquisition order of nested locks. That is, if every time you lock multiple locks A, say A1 and A2 you first acquire N, the order of acquiring A1 and A2 is irrelevant. Signed-off-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: David Miller Cc: Martin Schwidefsky Cc: Russell King Cc: Paul Mundt Cc: Jeff Dike Cc: Richard Weinberger Cc: Tony Luck Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Nick Piggin Cc: Namhyung Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/lockdep.h') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 4aef1dda6406..ef820a3c378b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -487,12 +487,15 @@ static inline void print_irqtrace_events(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCK_ALLOC # ifdef CONFIG_PROVE_LOCKING # define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) +# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) # else # define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) +# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) # endif # define mutex_release(l, n, i) lock_release(l, n, i) #else # define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_acquire_nest(l, s, t, n, i) do { } while (0) # define mutex_release(l, n, i) do { } while (0) #endif -- cgit v1.2.2