From a57eb940d130477a799dfb24a570ee04979c0f7f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Jun 2010 16:49:16 -0700 Subject: rcu: Add a TINY_PREEMPT_RCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a small-memory-footprint uniprocessor-only implementation of preemptible RCU. This implementation uses but a single blocked-tasks list rather than the combinatorial number used per leaf rcu_node by TREE_PREEMPT_RCU, which reduces memory consumption and greatly simplifies processing. This version also takes advantage of uniprocessor execution to accelerate grace periods in the case where there are no readers. The general design is otherwise broadly similar to that of TREE_PREEMPT_RCU. This implementation is a step towards having RCU implementation driven off of the SMP and PREEMPT kernel configuration variables, which can happen once this implementation has accumulated sufficient experience. Removed ACCESS_ONCE() from __rcu_read_unlock() and added barrier() as suggested by Steve Rostedt in order to avoid the compiler-reordering issue noted by Mathieu Desnoyers (http://lkml.org/lkml/2010/8/16/183). As can be seen below, CONFIG_TINY_PREEMPT_RCU represents almost 5Kbyte savings compared to CONFIG_TREE_PREEMPT_RCU. Of course, for non-real-time workloads, CONFIG_TINY_RCU is even better. CONFIG_TREE_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 6170 825 28 7023 kernel/rcutree.o ---- 7026 Total CONFIG_TINY_PREEMPT_RCU text data bss dec filename 13 0 0 13 kernel/rcupdate.o 2081 81 8 2170 kernel/rcutiny.o ---- 2183 Total CONFIG_TINY_RCU (non-preemptible) text data bss dec filename 13 0 0 13 kernel/rcupdate.o 719 25 0 744 kernel/rcutiny.o --- 757 Total Requested-by: Loïc Minier Signed-off-by: Paul E. McKenney --- include/linux/hardirq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669dab..1f4517d55b19 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) -#if defined(CONFIG_TINY_RCU) +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) extern void rcu_enter_nohz(void); extern void rcu_exit_nohz(void); -- cgit v1.2.2 From bcdb714c8856c76383ca455294f0074168705eab Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 7 Oct 2010 14:08:53 +0100 Subject: Drop a couple of unnecessary asm/system.h inclusions Drop inclusions of asm/system.h from linux/hardirq.h and linux/list.h as they're no longer required and prevent the M68K arch's IRQ flag handling macros from being made into inlined functions due to circular dependencies. Signed-off-by: David Howells Acked-by: Greg Ungerer Acked-by: Geert Uytterhoeven --- include/linux/hardirq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669dab..7dfdc06c7e18 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -8,7 +8,6 @@ #include #include #include -#include /* * We put the hardirq and softirq counter into the preemption -- cgit v1.2.2 From 75e1056f5c57050415b64cb761a3acc35d91f013 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 4 Oct 2010 17:03:16 -0700 Subject: sched: Fix softirq time accounting Peter Zijlstra found a bug in the way softirq time is accounted in VIRT_CPU_ACCOUNTING on this thread: http://lkml.indiana.edu/hypermail//linux/kernel/1009.2/01366.html The problem is, softirq processing uses local_bh_disable internally. There is no way, later in the flow, to differentiate between whether softirq is being processed or is it just that bh has been disabled. So, a hardirq when bh is disabled results in time being wrongly accounted as softirq. Looking at the code a bit more, the problem exists in !VIRT_CPU_ACCOUNTING as well. As account_system_time() in normal tick based accouting also uses softirq_count, which will be set even when not in softirq with bh disabled. Peter also suggested solution of using 2*SOFTIRQ_OFFSET as irq count for local_bh_{disable,enable} and using just SOFTIRQ_OFFSET while softirq processing. The patch below does that and adds API in_serving_softirq() which returns whether we are currently processing softirq or not. Also changes one of the usages of softirq_count in net/sched/cls_cgroup.c to in_serving_softirq. Looks like many usages of in_softirq really want in_serving_softirq. Those changes can be made individually on a case by case basis. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1286237003-12406-2-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b387669dab..e37a77cbd588 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -64,6 +64,8 @@ #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) +#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) + #ifndef PREEMPT_ACTIVE #define PREEMPT_ACTIVE_BITS 1 #define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) @@ -82,10 +84,13 @@ /* * Are we doing bottom half or hardware interrupt processing? * Are we in a softirq context? Interrupt context? + * in_softirq - Are we currently processing softirq or have bh disabled? + * in_serving_softirq - Are we currently processing softirq? */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) /* * Are we in NMI context? -- cgit v1.2.2 From e1e10a265d28273ab8c70be19d43dcbdeead6c5a Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 4 Oct 2010 17:03:17 -0700 Subject: sched: Consolidate account_system_vtime extern declaration Just a minor cleanup patch that makes things easier to the following patches. No functionality change in this patch. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1286237003-12406-3-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index e37a77cbd588..41367c5c3c68 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -141,6 +141,8 @@ struct task_struct; static inline void account_system_vtime(struct task_struct *tsk) { } +#else +extern void account_system_vtime(struct task_struct *tsk); #endif #if defined(CONFIG_NO_HZ) -- cgit v1.2.2 From b52bfee445d315549d41eacf2fa7c156e7d153d5 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Mon, 4 Oct 2010 17:03:19 -0700 Subject: sched: Add IRQ_TIME_ACCOUNTING, finer accounting of irq time s390/powerpc/ia64 have support for CONFIG_VIRT_CPU_ACCOUNTING which does the fine granularity accounting of user, system, hardirq, softirq times. Adding that option on archs like x86 will be challenging however, given the state of TSC reliability on various platforms and also the overhead it will add in syscall entry exit. Instead, add a lighter variant that only does finer accounting of hardirq and softirq times, providing precise irq times (instead of timer tick based samples). This accounting is added with a new config option CONFIG_IRQ_TIME_ACCOUNTING so that there won't be any overhead for users not interested in paying the perf penalty. This accounting is based on sched_clock, with the code being generic. So, other archs may find it useful as well. This patch just adds the core logic and does not enable this logic yet. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Peter Zijlstra LKML-Reference: <1286237003-12406-5-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 41367c5c3c68..ff43e9268449 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -137,7 +137,7 @@ extern void synchronize_irq(unsigned int irq); struct task_struct; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING) static inline void account_system_vtime(struct task_struct *tsk) { } -- cgit v1.2.2 From 7fe19da4ca38fc20cdbc7020fcf2eca8fc756410 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 28 Oct 2010 16:12:33 +0200 Subject: preempt: fix kernel build with !CONFIG_BKL The preempt count logic tries to take the BKL into account, which breaks when CONFIG_BKL is not set. Use the same preempt_count offset that we use without CONFIG_PREEMPT when CONFIG_BKL is disabled. Signed-off-by: Arnd Bergmann Reported-and-tested-by: Kirill A. Shutemov Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8a389b608ce3..41cb31f14ee3 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -96,11 +96,15 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) -#if defined(CONFIG_PREEMPT) +#if defined(CONFIG_PREEMPT) && defined(CONFIG_BKL) # define PREEMPT_INATOMIC_BASE kernel_locked() -# define PREEMPT_CHECK_OFFSET 1 #else # define PREEMPT_INATOMIC_BASE 0 +#endif + +#if defined(CONFIG_PREEMPT) +# define PREEMPT_CHECK_OFFSET 1 +#else # define PREEMPT_CHECK_OFFSET 0 #endif -- cgit v1.2.2 From 451a3c24b0135bce54542009b5fde43846c7cf67 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Nov 2010 16:26:55 +0100 Subject: BKL: remove extraneous #include The big kernel lock has been removed from all these files at some point, leaving only the #include. Remove this too as a cleanup. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 41cb31f14ee3..8f3f467c57c6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -3,7 +3,6 @@ #include #ifdef CONFIG_PREEMPT -#include #endif #include #include -- cgit v1.2.2 From 7957f0a857754c555e07f58a3fb83ac29501478c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 Nov 2010 14:58:36 -0800 Subject: Fix build failure due to hwirq.h needing smp_lock.h Arnd Bergmann did an automated scripting run to find left-over instances of , and had made it trigger it on the normal BKL use of lock_kernel and unlock_lernel (and apparently release_kernel_lock and reacquire_kernel_lock too, used by the scheduler). That resulted in commit 451a3c24b013 ("BKL: remove extraneous #include "). However, hardirq.h was the only remaining user of the old 'kernel_locked()' interface, and Arnd's script hadn't checked for that. So depending on your configuration and what header files had been included, you would get errors like "implicit declaration of function 'kernel_locked'" during the build. The right fix is not to just re-instate the smp_lock.h include - it is to just remove 'kernel_locked()' entirely, since the only use was this one special low-level detail. Just make hardirq.h do it directly. In fact this simplifies and clarifies the code, because some trivial analysis makes it clear that hardirq.h only ever used _one_ of the two definitions of kernel_locked(), so we can remove the other one entirely. Reported-by: Zimny Lech Reported-and-acked-by: Randy Dunlap Acked-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 8f3f467c57c6..bea1612d8f5c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -96,7 +96,7 @@ #define in_nmi() (preempt_count() & NMI_MASK) #if defined(CONFIG_PREEMPT) && defined(CONFIG_BKL) -# define PREEMPT_INATOMIC_BASE kernel_locked() +# define PREEMPT_INATOMIC_BASE (current->lock_depth >= 0) #else # define PREEMPT_INATOMIC_BASE 0 #endif -- cgit v1.2.2 From 0a5b871ea4c6bfb2723ac2ffc7ef5c32452abb89 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 17 Nov 2010 18:36:25 -0800 Subject: hardirq.h: remove now-empty #ifdef/#endif pair Commit 451a3c24b013 ("BKL: remove extraneous #include ") removed the #include line that was the only thing that was surrounded by the #ifdef/#endif. So now that #ifdef is guarding nothing at all. Just remove it. Reported-by: Byeong-ryeol Kim Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index bea1612d8f5c..714da7e5d10c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -2,8 +2,6 @@ #define LINUX_HARDIRQ_H #include -#ifdef CONFIG_PREEMPT -#endif #include #include #include -- cgit v1.2.2 From ed1d77b18c9f4ff06d5b42c65041aa55a1447053 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Nov 2010 10:56:29 -0800 Subject: hardirq.h: needs sched.h if using BKL This really isn't the right thing to do, and strictly speaking we should have the BKL depth count in the thread info right next to the preempt count. The two really do go together. However, since that would involve a patch to all architectures, and the BKL is finally going away, it's simply not worth the effort to do the RightThing(tm). Just re-instate the include that we used to get accidentally from the smp_lock.h one. This is all fallout from the same old "BKL: remove extraneous #include " commit. Reported-by: Ingo Molnar Tested-by: Randy Dunlap Cc: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 714da7e5d10c..32f9fd6619b4 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -94,6 +94,7 @@ #define in_nmi() (preempt_count() & NMI_MASK) #if defined(CONFIG_PREEMPT) && defined(CONFIG_BKL) +# include # define PREEMPT_INATOMIC_BASE (current->lock_depth >= 0) #else # define PREEMPT_INATOMIC_BASE 0 -- cgit v1.2.2 From 4ba8216cd90560bc402f52076f64d8546e8aefcb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Jan 2011 22:52:22 +0100 Subject: BKL: That's all, folks This removes the implementation of the big kernel lock, at last. A lot of people have worked on this in the past, I so the credit for this patch should be with everyone who participated in the hunt. The names on the Cc list are the people that were the most active in this, according to the recorded git history, in alphabetical order. Signed-off-by: Arnd Bergmann Acked-by: Alan Cox Cc: Alessio Igor Bogani Cc: Al Viro Cc: Andrew Hendry Cc: Andrew Morton Cc: Christoph Hellwig Cc: Eric W. Biederman Cc: Frederic Weisbecker Cc: Hans Verkuil Acked-by: Ingo Molnar Cc: Jan Blunck Cc: John Kacur Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Oliver Neukum Cc: Paul Menage Acked-by: Thomas Gleixner Cc: Trond Myklebust --- include/linux/hardirq.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 32f9fd6619b4..ba362171e8ae 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -93,13 +93,6 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) -#if defined(CONFIG_PREEMPT) && defined(CONFIG_BKL) -# include -# define PREEMPT_INATOMIC_BASE (current->lock_depth >= 0) -#else -# define PREEMPT_INATOMIC_BASE 0 -#endif - #if defined(CONFIG_PREEMPT) # define PREEMPT_CHECK_OFFSET 1 #else @@ -113,7 +106,7 @@ * used in the general case to determine whether sleeping is possible. * Do not use in_atomic() in driver code. */ -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE) +#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) /* * Check whether we were atomic before we did preempt_disable(): -- cgit v1.2.2 From 83b11ea1c6ad113519c488853cf06e626c95a64d Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Tue, 24 Jan 2012 09:36:12 +0100 Subject: Feather-Trace: keep track of interrupt-related interference. Increment a processor-local counter whenever an interrupt is handled. This allows Feather-Trace to include a (truncated) counter and a flag to report interference from interrupts. This could be used to filter samples that were disturbed by interrupts. --- include/linux/hardirq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/hardirq.h') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index ba362171e8ae..e6dd5a456bae 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -6,6 +6,8 @@ #include #include +#include + /* * We put the hardirq and softirq counter into the preemption * counter. The bitmask has the following meaning: @@ -186,6 +188,7 @@ extern void rcu_nmi_exit(void); account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ + ft_irq_fired(); \ } while (0) /* @@ -216,6 +219,7 @@ extern void irq_exit(void); lockdep_off(); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ + ft_irq_fired(); \ } while (0) #define nmi_exit() \ -- cgit v1.2.2