From 17d9ddc72fb8bba0d4f67868c9c612e472a594a9 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Wed, 10 Feb 2010 15:23:44 -0800 Subject: rbtree: Add support for augmented rbtrees Add support for augmented rbtrees in core rbtree code. This will be used in subsequent patches, in x86 PAT code, which needs interval trees to efficiently keep track of PAT ranges. Signed-off-by: Venkatesh Pallipadi LKML-Reference: <20100210232343.GA11465@linux-os.sc.intel.com> Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- lib/rbtree.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/rbtree.c b/lib/rbtree.c index e2aa3be29858..15e10b1afdd2 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -44,6 +44,11 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) else root->rb_node = right; rb_set_parent(node, right); + + if (root->augment_cb) { + root->augment_cb(node); + root->augment_cb(right); + } } static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) @@ -67,12 +72,20 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) else root->rb_node = left; rb_set_parent(node, left); + + if (root->augment_cb) { + root->augment_cb(node); + root->augment_cb(left); + } } void rb_insert_color(struct rb_node *node, struct rb_root *root) { struct rb_node *parent, *gparent; + if (root->augment_cb) + root->augment_cb(node); + while ((parent = rb_parent(node)) && rb_is_red(parent)) { gparent = rb_parent(parent); @@ -227,12 +240,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root) else { struct rb_node *old = node, *left; + int old_parent_cb = 0; + int successor_parent_cb = 0; node = node->rb_right; while ((left = node->rb_left) != NULL) node = left; if (rb_parent(old)) { + old_parent_cb = 1; if (rb_parent(old)->rb_left == old) rb_parent(old)->rb_left = node; else @@ -247,8 +263,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (parent == old) { parent = node; } else { + successor_parent_cb = 1; if (child) rb_set_parent(child, parent); + parent->rb_left = child; node->rb_right = old->rb_right; @@ -259,6 +277,24 @@ void rb_erase(struct rb_node *node, struct rb_root *root) node->rb_left = old->rb_left; rb_set_parent(old->rb_left, node); + if (root->augment_cb) { + /* + * Here, three different nodes can have new children. + * The parent of the successor node that was selected + * to replace the node to be erased. + * The node that is getting erased and is now replaced + * by its successor. + * The parent of the node getting erased-replaced. + */ + if (successor_parent_cb) + root->augment_cb(parent); + + root->augment_cb(node); + + if (old_parent_cb) + root->augment_cb(rb_parent(old)); + } + goto color; } @@ -267,15 +303,19 @@ void rb_erase(struct rb_node *node, struct rb_root *root) if (child) rb_set_parent(child, parent); - if (parent) - { + + if (parent) { if (parent->rb_left == node) parent->rb_left = child; else parent->rb_right = child; - } - else + + if (root->augment_cb) + root->augment_cb(parent); + + } else { root->rb_node = child; + } color: if (color == RB_BLACK) -- cgit v1.2.2 From 4d1ee80f3a7df7fe9cdec26e651e6201c45b10d4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 29 Jan 2010 20:59:17 +0000 Subject: idr: export idr_get_next() idr_get_next() was accidentally not exported when added. It is about to be used by mtdcore, which may be built as a module. Signed-off-by: Ben Hutchings Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- lib/idr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 1cac726c44bc..21f9266d1e41 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -621,7 +621,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) } return NULL; } - +EXPORT_SYMBOL(idr_get_next); /** -- cgit v1.2.2 From 86a8938078a8bb518c5376de493e348c7490d506 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 10:54:24 +0100 Subject: lib: Add self-test for atomic64_t This patch adds self-test on boot code for atomic64_t. This has been used to test the later changes in this patchset. Signed-off-by: Luca Barbieri LKML-Reference: <1267005265-27958-4-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/Kconfig.debug | 7 +++ lib/Makefile | 2 + lib/atomic64_test.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 lib/atomic64_test.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 25c3ed594c54..3676c517a073 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1054,6 +1054,13 @@ config DMA_API_DEBUG This option causes a performance degredation. Use only if you want to debug device drivers. If unsure, say N. +config ATOMIC64_SELFTEST + bool "Perform an atomic64_t self-test at boot" + help + Enable this option to test the atomic64_t functions at boot. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 347ad8db29d3..4af4786fe281 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -99,6 +99,8 @@ obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o +obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c new file mode 100644 index 000000000000..4ff649e46bad --- /dev/null +++ b/lib/atomic64_test.c @@ -0,0 +1,158 @@ +/* + * Testsuite for atomic64_t functions + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include +#include + +#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0) +static __init int test_atomic64(void) +{ + long long v0 = 0xaaa31337c001d00dLL; + long long v1 = 0xdeadbeefdeafcafeLL; + long long v2 = 0xfaceabadf00df001LL; + long long onestwos = 0x1111111122222222LL; + long long one = 1LL; + + atomic64_t v = ATOMIC64_INIT(v0); + long long r = v0; + BUG_ON(v.counter != r); + + atomic64_set(&v, v1); + r = v1; + BUG_ON(v.counter != r); + BUG_ON(atomic64_read(&v) != r); + + INIT(v0); + atomic64_add(onestwos, &v); + r += onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_add(-one, &v); + r += -one; + BUG_ON(v.counter != r); + + INIT(v0); + r += onestwos; + BUG_ON(atomic64_add_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r += -one; + BUG_ON(atomic64_add_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(onestwos, &v); + r -= onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(-one, &v); + r -= -one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= onestwos; + BUG_ON(atomic64_sub_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r -= -one; + BUG_ON(atomic64_sub_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_inc(&v); + r += one; + BUG_ON(v.counter != r); + + INIT(v0); + r += one; + BUG_ON(atomic64_inc_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_dec(&v); + r -= one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= one; + BUG_ON(atomic64_dec_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_xchg(&v, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(!atomic64_add_unless(&v, one, v0)); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_add_unless(&v, one, v1)); + r += one; + BUG_ON(v.counter != r); + + INIT(onestwos); + BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); + r -= one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_dec_if_positive(&v) != -one); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); + BUG_ON(v.counter != r); + + INIT(onestwos); + BUG_ON(atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(!atomic64_inc_not_zero(&v)); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + +#ifdef CONFIG_X86 + printk(KERN_INFO "atomic64 test passed for %s+ platform %s CX8 and %s SSE\n", +#ifdef CONFIG_X86_CMPXCHG64 + "586", +#else + "386", +#endif + boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", + boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); +#else + printk(KERN_INFO "atomic64 test passed\n"); +#endif + + return 0; +} + +core_initcall(test_atomic64); -- cgit v1.2.2 From 8f4f202b335144bf5be5c9e5b1bc9477ecdae958 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 26 Feb 2010 12:22:40 +0100 Subject: lib: Only test atomic64_dec_if_positive on archs having it Currently atomic64_dec_if_positive() is only supported by PowerPC, MIPS and x86-32. Signed-off-by: Luca Barbieri LKML-Reference: <1267183361-20775-1-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64_test.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 4ff649e46bad..0effcacbebda 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -112,6 +112,7 @@ static __init int test_atomic64(void) r += one; BUG_ON(v.counter != r); +#if defined(CONFIG_X86_32) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H) INIT(onestwos); BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); r -= one; @@ -124,6 +125,9 @@ static __init int test_atomic64(void) INIT(-one); BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); BUG_ON(v.counter != r); +#else +#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above +#endif INIT(onestwos); BUG_ON(atomic64_inc_not_zero(&v)); -- cgit v1.2.2 From d7f6de1e9c4a12e11ba7186c70f0f40caa76f590 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 26 Feb 2010 12:22:41 +0100 Subject: x86: Implement atomic[64]_dec_if_positive() Add support for atomic_dec_if_positive(), and atomic64_dec_if_positive() for x86-64. atomic64_dec_if_positive() for x86-32 was already implemented in a previous patch. Signed-off-by: Luca Barbieri LKML-Reference: <1267183361-20775-2-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 0effcacbebda..58efdabb3845 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -112,7 +112,7 @@ static __init int test_atomic64(void) r += one; BUG_ON(v.counter != r); -#if defined(CONFIG_X86_32) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H) +#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H) INIT(onestwos); BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); r -= one; -- cgit v1.2.2 From 9efbcd590243045111670c171a951923b877b57d Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 1 Mar 2010 19:55:45 +0100 Subject: lib: Fix atomic64_add_unless test atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The test assumed the opposite convention. Reported-by: H. Peter Anvin Signed-off-by: Luca Barbieri LKML-Reference: <1267469749-11878-2-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index 58efdabb3845..ee8e6de8b413 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -104,11 +104,11 @@ static __init int test_atomic64(void) BUG_ON(v.counter != r); INIT(v0); - BUG_ON(!atomic64_add_unless(&v, one, v0)); + BUG_ON(atomic64_add_unless(&v, one, v0)); BUG_ON(v.counter != r); INIT(v0); - BUG_ON(atomic64_add_unless(&v, one, v1)); + BUG_ON(!atomic64_add_unless(&v, one, v1)); r += one; BUG_ON(v.counter != r); -- cgit v1.2.2 From 97577896f6b9c056fa0a5e9f6a608110cb3dcd33 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 1 Mar 2010 19:55:47 +0100 Subject: lib: Fix atomic64_add_unless return value convention atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise. The generic implementation did the opposite thing. Reported-by: H. Peter Anvin Confirmed-by: Paul Mackerras Signed-off-by: Luca Barbieri LKML-Reference: <1267469749-11878-4-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/atomic64.c b/lib/atomic64.c index 8bee16ec7524..a21c12bc727c 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -162,12 +162,12 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; spinlock_t *lock = lock_addr(v); - int ret = 1; + int ret = 0; spin_lock_irqsave(lock, flags); if (v->counter != u) { v->counter += a; - ret = 0; + ret = 1; } spin_unlock_irqrestore(lock, flags); return ret; -- cgit v1.2.2 From 25a304f277ad70166eeae25a4958d2049005c33a Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Mon, 1 Mar 2010 19:55:48 +0100 Subject: lib: Fix atomic64_inc_not_zero test atomic64_inc_not_zero must return 1 if it perfomed the add and 0 otherwise. The test assumed the opposite convention. Signed-off-by: Luca Barbieri LKML-Reference: <1267469749-11878-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin --- lib/atomic64_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index ee8e6de8b413..f7bb706c9c3a 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -130,16 +130,16 @@ static __init int test_atomic64(void) #endif INIT(onestwos); - BUG_ON(atomic64_inc_not_zero(&v)); + BUG_ON(!atomic64_inc_not_zero(&v)); r += one; BUG_ON(v.counter != r); INIT(0); - BUG_ON(!atomic64_inc_not_zero(&v)); + BUG_ON(atomic64_inc_not_zero(&v)); BUG_ON(v.counter != r); INIT(-one); - BUG_ON(atomic64_inc_not_zero(&v)); + BUG_ON(!atomic64_inc_not_zero(&v)); r += one; BUG_ON(v.counter != r); -- cgit v1.2.2 From a5c9161f27c3e1ae6c0094d262f03a7e98262181 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 1 Mar 2010 11:49:23 -0800 Subject: x86, atomic64: In selftest, distinguish x86-64 from 586+ The x86-64 implementation of the atomics is totally different from the i586+ implementation, which makes it quite confusing to call it "586+". Also fix indentation, and add "i" for "i386" and "i586" as used elsewhere in the kernel. Signed-off-by: H. Peter Anvin Cc: Luca Barbieri LKML-Reference: <1267005265-27958-4-git-send-email-luca@luca-barbieri.com> --- lib/atomic64_test.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index f7bb706c9c3a..65e482caf5e9 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -144,14 +144,16 @@ static __init int test_atomic64(void) BUG_ON(v.counter != r); #ifdef CONFIG_X86 - printk(KERN_INFO "atomic64 test passed for %s+ platform %s CX8 and %s SSE\n", -#ifdef CONFIG_X86_CMPXCHG64 - "586", + printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n", +#ifdef CONFIG_X86_64 + "x86-64", +#elif defined(CONFIG_X86_CMPXCHG64) + "i586+", #else - "386", + "i386+", #endif - boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", - boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); + boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", + boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); #else printk(KERN_INFO "atomic64 test passed\n"); #endif -- cgit v1.2.2 From 1fb2f77c037624601fd214fb7c29faa84cd7bdd7 Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Fri, 26 Mar 2010 20:38:35 +0100 Subject: debugobjects: Section mismatch cleanup This patch marks two functions, which only get called at initialization, as __init. Here is also interesting, that modpost doesn't catch here the right function name. WARNING: lib/built-in.o(.text+0x585f): Section mismatch in reference from the function T.506() to the variable .init.data:obj The function T.506() references the variable __initdata obj. This is often because T.506 lacks a __initdata annotation or the annotation of obj is wrong. Signed-off-by: Henrik Kretzschmar LKML-Reference: <1269632315-19403-1-git-send-email-henne@nachtwindheim.de> Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a9a8996d286a..c4ecd3ce7fd4 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -773,7 +773,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state) } } -static int +static int __init check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) { struct debug_bucket *db; @@ -916,7 +916,7 @@ void __init debug_objects_early_init(void) /* * Convert the statically allocated objects to dynamic ones: */ -static int debug_objects_replace_static_objects(void) +static int __init debug_objects_replace_static_objects(void) { struct debug_bucket *db = obj_hash; struct hlist_node *node, *tmp; -- cgit v1.2.2 From 1527bc8b928dd1399c3d3467dd47d9ede210978a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Feb 2010 15:03:07 +0100 Subject: bitops: Optimize hweight() by making use of compile-time evaluation Rename the extisting runtime hweight() implementations to __arch_hweight(), rename the compile-time versions to __const_hweight() and then have hweight() pick between them. Suggested-by: H. Peter Anvin Signed-off-by: Peter Zijlstra LKML-Reference: <20100318111929.GB11152@aftab> Acked-by: H. Peter Anvin LKML-Reference: <1265028224.24455.154.camel@laptop> Signed-off-by: H. Peter Anvin --- lib/hweight.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/hweight.c b/lib/hweight.c index 63ee4eb1228d..a6927e76840f 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,7 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ -unsigned int hweight32(unsigned int w) +unsigned int __arch_hweight32(unsigned int w) { #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x55555555; @@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w) return (res + (res >> 16)) & 0x000000FF; #endif } -EXPORT_SYMBOL(hweight32); +EXPORT_SYMBOL(__arch_hweight32); -unsigned int hweight16(unsigned int w) +unsigned int __arch_hweight16(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x5555); res = (res & 0x3333) + ((res >> 2) & 0x3333); res = (res + (res >> 4)) & 0x0F0F; return (res + (res >> 8)) & 0x00FF; } -EXPORT_SYMBOL(hweight16); +EXPORT_SYMBOL(__arch_hweight16); -unsigned int hweight8(unsigned int w) +unsigned int __arch_hweight8(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x55); res = (res & 0x33) + ((res >> 2) & 0x33); return (res + (res >> 4)) & 0x0F; } -EXPORT_SYMBOL(hweight8); +EXPORT_SYMBOL(__arch_hweight8); -unsigned long hweight64(__u64 w) +unsigned long __arch_hweight64(__u64 w) { #if BITS_PER_LONG == 32 - return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); + return __arch_hweight32((unsigned int)(w >> 32)) + + __arch_hweight32((unsigned int)w); #elif BITS_PER_LONG == 64 #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x5555555555555555ul; @@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w) #endif #endif } -EXPORT_SYMBOL(hweight64); +EXPORT_SYMBOL(__arch_hweight64); -- cgit v1.2.2 From d61931d89be506372d01a90d1755f6d0a9fafe2d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Mar 2010 17:34:46 +0100 Subject: x86: Add optimized popcnt variants Add support for the hardware version of the Hamming weight function, popcnt, present in CPUs which advertize it under CPUID, Function 0x0000_0001_ECX[23]. On CPUs which don't support it, we fallback to the default lib/hweight.c sw versions. A synthetic benchmark comparing popcnt with __sw_hweight64 showed almost a 3x speedup on a F10h machine. Signed-off-by: Borislav Petkov LKML-Reference: <20100318112015.GC11152@aftab> Signed-off-by: H. Peter Anvin --- lib/Makefile | 3 +++ lib/hweight.c | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 2e152aed7198..abe63a8ad143 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -39,7 +39,10 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o + +CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o + obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o diff --git a/lib/hweight.c b/lib/hweight.c index a6927e76840f..3c79d50814cf 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,7 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ -unsigned int __arch_hweight32(unsigned int w) +unsigned int __sw_hweight32(unsigned int w) { #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x55555555; @@ -24,30 +24,30 @@ unsigned int __arch_hweight32(unsigned int w) return (res + (res >> 16)) & 0x000000FF; #endif } -EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__sw_hweight32); -unsigned int __arch_hweight16(unsigned int w) +unsigned int __sw_hweight16(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x5555); res = (res & 0x3333) + ((res >> 2) & 0x3333); res = (res + (res >> 4)) & 0x0F0F; return (res + (res >> 8)) & 0x00FF; } -EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__sw_hweight16); -unsigned int __arch_hweight8(unsigned int w) +unsigned int __sw_hweight8(unsigned int w) { unsigned int res = w - ((w >> 1) & 0x55); res = (res & 0x33) + ((res >> 2) & 0x33); return (res + (res >> 4)) & 0x0F; } -EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__sw_hweight8); -unsigned long __arch_hweight64(__u64 w) +unsigned long __sw_hweight64(__u64 w) { #if BITS_PER_LONG == 32 - return __arch_hweight32((unsigned int)(w >> 32)) + - __arch_hweight32((unsigned int)w); + return __sw_hweight32((unsigned int)(w >> 32)) + + __sw_hweight32((unsigned int)w); #elif BITS_PER_LONG == 64 #ifdef ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x5555555555555555ul; @@ -64,4 +64,4 @@ unsigned long __arch_hweight64(__u64 w) #endif #endif } -EXPORT_SYMBOL(__arch_hweight64); +EXPORT_SYMBOL(__sw_hweight64); -- cgit v1.2.2 From 2b3fc35f6919344e3cf722dde8308f47235c0b70 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 Apr 2010 16:23:07 +0800 Subject: rcu: optionally leave lockdep enabled after RCU lockdep splat There is no need to disable lockdep after an RCU lockdep splat, so remove the debug_lockdeps_off() from lockdep_rcu_dereference(). To avoid repeated lockdep splats, use a static variable in the inlined rcu_dereference_check() and rcu_dereference_protected() macros so that a given instance splats only once, but so that multiple instances can be detected per boot. This is controlled by a new config variable CONFIG_PROVE_RCU_REPEATEDLY, which is disabled by default. This provides the normal lockdep behavior by default, but permits people who want to find multiple RCU-lockdep splats per boot to easily do so. Requested-by: Eric Paris Signed-off-by: Lai Jiangshan Tested-by: Eric Paris Signed-off-by: Paul E. McKenney --- lib/Kconfig.debug | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 935248bdbc47..94090b4bb7d2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -512,6 +512,18 @@ config PROVE_RCU Say N if you are unsure. +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say N if you are unsure. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- cgit v1.2.2 From 55ec936ff4e57cc626db336a7bf33b267390e9b4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 13 Apr 2010 12:22:33 -0700 Subject: rcu: enable CPU_STALL_VERBOSE by default The CPU_STALL_VERBOSE kernel configuration parameter was added to 2.6.34 to identify any preempted/blocked tasks that were preventing the current grace period from completing when running preemptible RCU. As is conventional for new configurations parameters, this defaulted disabled. It is now time to enable it by default. Signed-off-by: Paul E. McKenney --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 94090b4bb7d2..930a9e5eae08 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -805,7 +805,7 @@ config RCU_CPU_STALL_DETECTOR config RCU_CPU_STALL_VERBOSE bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU - default n + default y help This option causes RCU to printk detailed per-task information for any tasks that are stalling the current RCU grace period. -- cgit v1.2.2 From a5d8e467f83f6672104f276223a88e3b50cbd375 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sat, 17 Apr 2010 08:48:38 -0400 Subject: Debugobjects transition check Implement a basic state machine checker in the debugobjects. This state machine checker detects races and inconsistencies within the "active" life of a debugobject. The checker only keeps track of the current state; all the state machine logic is kept at the object instance level. The checker works by adding a supplementary "unsigned int astate" field to the debug_obj structure. It keeps track of the current "active state" of the object. The only constraints that are imposed on the states by the debugobjects system is that: - activation of an object sets the current active state to 0, - deactivation of an object expects the current active state to be 0. For the rest of the states, the state mapping is determined by the specific object instance. Therefore, the logic keeping track of the state machine is within the specialized instance, without any need to know about it at the debugobject level. The current object active state is changed by calling: debug_object_active_state(addr, descr, expect, next) where "expect" is the expected state and "next" is the next state to move to if the expected state is found. A warning is generated if the expected is not found. Signed-off-by: Mathieu Desnoyers Reviewed-by: Thomas Gleixner Acked-by: David S. Miller CC: "Paul E. McKenney" CC: akpm@linux-foundation.org CC: mingo@elte.hu CC: laijs@cn.fujitsu.com CC: dipankar@in.ibm.com CC: josh@joshtriplett.org CC: dvhltc@us.ibm.com CC: niv@us.ibm.com CC: peterz@infradead.org CC: rostedt@goodmis.org CC: Valdis.Kletnieks@vt.edu CC: dhowells@redhat.com CC: eric.dumazet@gmail.com CC: Alexey Dobriyan Signed-off-by: Paul E. McKenney --- lib/debugobjects.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index b862b30369ff..076464fd2072 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -141,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) obj->object = addr; obj->descr = descr; obj->state = ODEBUG_STATE_NONE; + obj->astate = 0; hlist_del(&obj->node); hlist_add_head(&obj->node, &b->list); @@ -252,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg) if (limit < 5 && obj->descr != descr_test) { limit++; - WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, - obj_states[obj->state], obj->descr->name); + WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " + "object type: %s\n", + msg, obj_states[obj->state], obj->astate, + obj->descr->name); } debug_objects_warnings++; } @@ -447,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: case ODEBUG_STATE_ACTIVE: - obj->state = ODEBUG_STATE_INACTIVE; + if (!obj->astate) + obj->state = ODEBUG_STATE_INACTIVE; + else + debug_print_object(obj, "deactivate"); break; case ODEBUG_STATE_DESTROYED: @@ -553,6 +559,53 @@ out_unlock: raw_spin_unlock_irqrestore(&db->lock, flags); } +/** + * debug_object_active_state - debug checks object usage state machine + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + * @expect: expected state + * @next: state to move to if expected state is found + */ +void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + if (obj->astate == expect) + obj->astate = next; + else + debug_print_object(obj, "active_state"); + break; + + default: + debug_print_object(obj, "active_state"); + break; + } + } else { + struct debug_obj o = { .object = addr, + .state = ODEBUG_STATE_NOTAVAILABLE, + .descr = descr }; + + debug_print_object(&o, "active_state"); + } + + raw_spin_unlock_irqrestore(&db->lock, flags); +} + #ifdef CONFIG_DEBUG_OBJECTS_FREE static void __debug_check_no_obj_freed(const void *address, unsigned long size) { -- cgit v1.2.2 From b2be05273a1744d175bf4b67f6665637bb9ac7a8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 3 Apr 2010 19:34:56 +0100 Subject: panic: Allow warnings to set different taint flags WARN() is used in some places to report firmware or hardware bugs that are then worked-around. These bugs do not affect the stability of the kernel and should not set the flag for TAINT_WARN. To allow for this, add WARN_TAINT() and WARN_TAINT_ONCE() macros that take a taint number as argument. Architectures that implement warnings using trap instructions instead of calls to warn_slowpath_*() now implement __WARN_TAINT(taint) instead of __WARN(). Signed-off-by: Ben Hutchings Acked-by: Helge Deller Tested-by: Paul Mundt Signed-off-by: David Woodhouse --- lib/bug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/bug.c b/lib/bug.c index 300e41afbf97..f13daf435211 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -165,7 +165,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) (void *)bugaddr); show_regs(regs); - add_taint(TAINT_WARN); + add_taint(BUG_GET_TAINT(bug)); return BUG_TRAP_TYPE_WARN; } -- cgit v1.2.2 From dcc7871128e99458ca86186b7bc8bf27ff0c47b5 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:21 -0500 Subject: kgdb: core changes to support kdb These are the minimum changes to the kgdb core in order to enable an API to connect a new front end (kdb) to the debug core. This patch introduces the dbg_kdb_mode variable controls where the user level I/O is routed. It will be routed to the gdbstub (kgdb) or to the kdb front end which is a simple shell available over the kgdboc connection. You can switch back and forth between kdb or the gdb stub mode of operation dynamically. From gdb stub mode you can blindly type "$3#33", or from the kdb mode you can enter "kgdb" to switch to the gdb stub. The logic in the debug core depends on kdb to look for the typical gdb connection sequences and return immediately with KGDB_PASS_EVENT if a gdb serial command sequence is detected. That should allow a reasonably seamless transition between kdb -> gdb without leaving the kernel exception state. The two gdb serial queries that kdb is responsible for detecting are the "?" and "qSupported" packets. CC: Ingo Molnar Signed-off-by: Jason Wessel Acked-by: Martin Hicks --- lib/Kconfig.kgdb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 9b5d1d7f2ef7..78de43a5e902 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB bool menuconfig KGDB - bool "KGDB: kernel debugging with remote gdb" + bool "KGDB: kernel debugger" depends on HAVE_ARCH_KGDB depends on DEBUG_KERNEL && EXPERIMENTAL help @@ -57,4 +57,10 @@ config KGDB_TESTS_BOOT_STRING information about other strings you could use beyond the default of V1F100. +config KGDB_KDB + bool "KGDB_KDB: include kdb frontend for kgdb" + default n + help + KDB frontend for kernel + endif # KGDB -- cgit v1.2.2 From ada64e4c98eb5f04a9ca223c5ff9e7ac22ce6404 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:24 -0500 Subject: kgdboc,keyboard: Keyboard driver for kdb with kgdb This patch adds in the kdb PS/2 keyboard driver. This was mostly a direct port from the original kdb where I cleaned up the code against checkpatch.pl and added the glue to stitch it into kgdb. This patch also enables early kdb debug via kgdbwait and the keyboard. All the access to configure kdb using either a serial console or the keyboard is done via kgdboc. If you want to use only the keyboard and want to break in early you would add to your kernel command arguments: kgdboc=kbd kgdbwait If you wanted serial and or the keyboard access you could use: kgdboc=kbd,ttyS0 You can also configure kgdboc as a kernel module or at run time with the sysfs where you can activate and deactivate kgdb. Turn it on: echo kbd,ttyS0 > /sys/module/kgdboc/parameters/kgdboc Turn it off: echo "" > /sys/module/kgdboc/parameters/kgdboc Signed-off-by: Jason Wessel Reviewed-by: Dmitry Torokhov --- lib/Kconfig.kgdb | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 78de43a5e902..ee8ae7132f20 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -63,4 +63,11 @@ config KGDB_KDB help KDB frontend for kernel +config KDB_KEYBOARD + bool "KGDB_KDB: keyboard as input device" + depends on VT && KGDB_KDB + default n + help + KDB can use a PS/2 type keyboard for an input device + endif # KGDB -- cgit v1.2.2 From f503b5ae53cb557ac351a668fcac1baab1cef0db Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:25 -0500 Subject: x86,kgdb: Add low level debug hook The only way the debugger can handle a trap in inside rcu_lock, notify_die, or atomic_notifier_call_chain without a triple fault is to have a low level "first opportunity handler" in the int3 exception handler. Generally this will be something the vast majority of folks will not need, but for those who need it, it is added as a kernel .config option called KGDB_LOW_LEVEL_TRAP. CC: Ingo Molnar CC: Thomas Gleixner CC: H. Peter Anvin CC: x86@kernel.org Signed-off-by: Jason Wessel --- lib/Kconfig.kgdb | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index ee8ae7132f20..c56ccb4ad292 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -57,6 +57,15 @@ config KGDB_TESTS_BOOT_STRING information about other strings you could use beyond the default of V1F100. +config KGDB_LOW_LEVEL_TRAP + bool "KGDB: Allow debugging with traps in notifiers" + depends on X86 + default n + help + This will add an extra call back to kgdb for the breakpoint + exception handler on which will will allow kgdb to step + through a notify handler. + config KGDB_KDB bool "KGDB_KDB: include kdb frontend for kgdb" default n -- cgit v1.2.2 From 5dd11d5d47d248850c58292513f0e164ba98b01e Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Thu, 20 May 2010 21:04:26 -0500 Subject: mips,kgdb: kdb low level trap catch and stack trace The only way the debugger can handle a trap in inside rcu_lock, notify_die, or atomic_notifier_call_chain without a recursive fault is to have a low level "first opportunity handler" do_trap_or_bp() handler. Generally this will be something the vast majority of folks will not need, but for those who need it, it is added as a kernel .config option called KGDB_LOW_LEVEL_TRAP. Also added was a die notification for oops such that kdb can catch an oops for analysis. There appeared to be no obvious way to pass the struct pt_regs from the original exception back to the stack back tracer, so a special case was added to show_stack() for when kdb is active because you generally desire to generally look at the back trace of the original exception. Signed-off-by: Jason Wessel Acked-by: Ralf Baechle --- lib/Kconfig.kgdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index c56ccb4ad292..43cb93fa2651 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -59,7 +59,7 @@ config KGDB_TESTS_BOOT_STRING config KGDB_LOW_LEVEL_TRAP bool "KGDB: Allow debugging with traps in notifiers" - depends on X86 + depends on X86 || MIPS default n help This will add an extra call back to kgdb for the breakpoint -- cgit v1.2.2 From db1afffab0b5d9f6d31f8f4bea44c9cb3bc59351 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Mar 2010 15:14:51 +1100 Subject: kref: remove kref_set Of the three uses of kref_set in the kernel: One really should be kref_put as the code is letting go of a reference, Two really should be kref_init because the kref is being initialised. This suggests that making kref_set available encourages bad code. So fix the three uses and remove kref_set completely. Signed-off-by: NeilBrown Acked-by: Mimi Zohar Acked-by: Serge Hallyn Signed-off-by: Greg Kroah-Hartman --- lib/kref.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/kref.c b/lib/kref.c index 6d19f690380b..d3d227a08a4b 100644 --- a/lib/kref.c +++ b/lib/kref.c @@ -15,24 +15,14 @@ #include #include -/** - * kref_set - initialize object and set refcount to requested number. - * @kref: object in question. - * @num: initial reference counter - */ -void kref_set(struct kref *kref, int num) -{ - atomic_set(&kref->refcount, num); - smp_mb(); -} - /** * kref_init - initialize object. * @kref: object in question. */ void kref_init(struct kref *kref) { - kref_set(kref, 1); + atomic_set(&kref->refcount, 1); + smp_mb(); } /** @@ -72,7 +62,6 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref)) return 0; } -EXPORT_SYMBOL(kref_set); EXPORT_SYMBOL(kref_init); EXPORT_SYMBOL(kref_get); EXPORT_SYMBOL(kref_put); -- cgit v1.2.2 From bc451f2058238013e1cdf4acd443c01734d332f0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 11:31:25 -0700 Subject: kobj: Add basic infrastructure for dealing with namespaces. Move complete knowledge of namespaces into the kobject layer so we can use that information when reporting kobjects to userspace. Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 8115eb1bbf4d..bbb2bb40ee1f 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -850,6 +850,109 @@ struct kset *kset_create_and_add(const char *name, } EXPORT_SYMBOL_GPL(kset_create_and_add); + +static DEFINE_SPINLOCK(kobj_ns_type_lock); +static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; + +int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) +{ + enum kobj_ns_type type = ops->type; + int error; + + spin_lock(&kobj_ns_type_lock); + + error = -EINVAL; + if (type >= KOBJ_NS_TYPES) + goto out; + + error = -EINVAL; + if (type <= KOBJ_NS_TYPE_NONE) + goto out; + + error = -EBUSY; + if (kobj_ns_ops_tbl[type]) + goto out; + + error = 0; + kobj_ns_ops_tbl[type] = ops; + +out: + spin_unlock(&kobj_ns_type_lock); + return error; +} + +int kobj_ns_type_registered(enum kobj_ns_type type) +{ + int registered = 0; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES)) + registered = kobj_ns_ops_tbl[type] != NULL; + spin_unlock(&kobj_ns_type_lock); + + return registered; +} + +const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) +{ + const struct kobj_ns_type_operations *ops = NULL; + + if (parent && parent->ktype->child_ns_type) + ops = parent->ktype->child_ns_type(parent); + + return ops; +} + +const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) +{ + return kobj_child_ns_ops(kobj->parent); +} + + +const void *kobj_ns_current(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->current_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->netlink_ns(sk); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_initial(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->initial_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +void kobj_ns_exit(enum kobj_ns_type type, const void *ns) +{ +} + + EXPORT_SYMBOL(kobject_get); EXPORT_SYMBOL(kobject_put); EXPORT_SYMBOL(kobject_del); -- cgit v1.2.2 From 3ff195b011d7decf501a4d55aeed312731094796 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Mar 2010 11:31:26 -0700 Subject: sysfs: Implement sysfs tagged directory support. The problem. When implementing a network namespace I need to be able to have multiple network devices with the same name. Currently this is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and potentially a few other directories of the form /sys/ ... /net/*. What this patch does is to add an additional tag field to the sysfs dirent structure. For directories that should show different contents depending on the context such as /sys/class/net/, and /sys/devices/virtual/net/ this tag field is used to specify the context in which those directories should be visible. Effectively this is the same as creating multiple distinct directories with the same name but internally to sysfs the result is nicer. I am calling the concept of a single directory that looks like multiple directories all at the same path in the filesystem tagged directories. For the networking namespace the set of directories whose contents I need to filter with tags can depend on the presence or absence of hotplug hardware or which modules are currently loaded. Which means I need a simple race free way to setup those directories as tagged. To achieve a reace free design all tagged directories are created and managed by sysfs itself. Users of this interface: - define a type in the sysfs_tag_type enumeration. - call sysfs_register_ns_types with the type and it's operations - sysfs_exit_ns when an individual tag is no longer valid - Implement mount_ns() which returns the ns of the calling process so we can attach it to a sysfs superblock. - Implement ktype.namespace() which returns the ns of a syfs kobject. Everything else is left up to sysfs and the driver layer. For the network namespace mount_ns and namespace() are essentially one line functions, and look to remain that. Tags are currently represented a const void * pointers as that is both generic, prevides enough information for equality comparisons, and is trivial to create for current users, as it is just the existing namespace pointer. The work needed in sysfs is more extensive. At each directory or symlink creating I need to check if the directory it is being created in is a tagged directory and if so generate the appropriate tag to place on the sysfs_dirent. Likewise at each symlink or directory removal I need to check if the sysfs directory it is being removed from is a tagged directory and if so figure out which tag goes along with the name I am deleting. Currently only directories which hold kobjects, and symlinks are supported. There is not enough information in the current file attribute interfaces to give us anything to discriminate on which makes it useless, and there are no potential users which makes it an uninteresting problem to solve. Signed-off-by: Eric W. Biederman Signed-off-by: Benjamin Thery Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index bbb2bb40ee1f..b2c6d1f56e65 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -950,6 +950,7 @@ const void *kobj_ns_initial(enum kobj_ns_type type) void kobj_ns_exit(enum kobj_ns_type type, const void *ns) { + sysfs_exit_ns(type, ns); } -- cgit v1.2.2 From be867b194a3ae3c680c29521287ae49b4d44d420 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 3 May 2010 16:23:15 -0500 Subject: sysfs: Comment sysfs directory tagging logic Add some in-line comments to explain the new infrastructure, which was introduced to support sysfs directory tagging with namespaces. I think an overall description someplace might be good too, but it didn't really seem to fit into Documentation/filesystems/sysfs.txt, which appears more geared toward users, rather than maintainers, of sysfs. (Tejun, please let me know if I can make anything clearer or failed altogether to comment something that should be commented.) Signed-off-by: Serge E. Hallyn Cc: Eric W. Biederman Cc: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index b2c6d1f56e65..f07c57252e82 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -948,6 +948,17 @@ const void *kobj_ns_initial(enum kobj_ns_type type) return ns; } +/* + * kobj_ns_exit - invalidate a namespace tag + * + * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) + * @ns: the actual namespace being invalidated + * + * This is called when a tag is no longer valid. For instance, + * when a network namespace exits, it uses this helper to + * make sure no sb's sysfs_info points to the now-invalidated + * netns. + */ void kobj_ns_exit(enum kobj_ns_type type, const void *ns) { sysfs_exit_ns(type, ns); -- cgit v1.2.2 From 07e98962fa778b9782c8845dfcb06a84cc050744 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2010 17:36:44 -0700 Subject: kobject: Send hotplug events in all network namespaces Open a copy of the uevent kernel socket in each network namespace so we can send uevents in all network namespaces. Signed-off-by: Eric W. Biederman Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 68 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 7b48d44ced6e..9084f2550c2a 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -24,13 +24,19 @@ #include #include #include +#include u64 uevent_seqnum; char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; static DEFINE_SPINLOCK(sequence_lock); -#if defined(CONFIG_NET) -static struct sock *uevent_sock; +#ifdef CONFIG_NET +struct uevent_sock { + struct list_head list; + struct sock *sk; +}; +static LIST_HEAD(uevent_sock_list); +static DEFINE_MUTEX(uevent_sock_mutex); #endif /* the strings here must match the enum in include/linux/kobject.h */ @@ -100,6 +106,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, u64 seq; int i = 0; int retval = 0; +#ifdef CONFIG_NET + struct uevent_sock *ue_sk; +#endif pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -211,7 +220,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, #if defined(CONFIG_NET) /* send netlink message */ - if (uevent_sock) { + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + struct sock *uevent_sock = ue_sk->sk; struct sk_buff *skb; size_t len; @@ -241,6 +252,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } else retval = -ENOMEM; } + mutex_unlock(&uevent_sock_mutex); #endif /* call uevent_helper, usually only enabled during early boot */ @@ -320,18 +332,58 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) EXPORT_SYMBOL_GPL(add_uevent_var); #if defined(CONFIG_NET) -static int __init kobject_uevent_init(void) +static int uevent_net_init(struct net *net) { - uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, - 1, NULL, NULL, THIS_MODULE); - if (!uevent_sock) { + struct uevent_sock *ue_sk; + + ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL); + if (!ue_sk) + return -ENOMEM; + + ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, + 1, NULL, NULL, THIS_MODULE); + if (!ue_sk->sk) { printk(KERN_ERR "kobject_uevent: unable to create netlink socket!\n"); return -ENODEV; } - netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + mutex_lock(&uevent_sock_mutex); + list_add_tail(&ue_sk->list, &uevent_sock_list); + mutex_unlock(&uevent_sock_mutex); return 0; } +static void uevent_net_exit(struct net *net) +{ + struct uevent_sock *ue_sk; + + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + if (sock_net(ue_sk->sk) == net) + goto found; + } + mutex_unlock(&uevent_sock_mutex); + return; + +found: + list_del(&ue_sk->list); + mutex_unlock(&uevent_sock_mutex); + + netlink_kernel_release(ue_sk->sk); + kfree(ue_sk); +} + +static struct pernet_operations uevent_net_ops = { + .init = uevent_net_init, + .exit = uevent_net_exit, +}; + +static int __init kobject_uevent_init(void) +{ + netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + return register_pernet_subsys(&uevent_net_ops); +} + + postcore_initcall(kobject_uevent_init); #endif -- cgit v1.2.2 From 5f71a29629b4717445f8b7f5fb8f50c2d262b68e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2010 17:36:47 -0700 Subject: kobj: Send hotplug events in the proper namespace. Utilize netlink_broacast_filtered to allow sending hotplug events in the proper namespace. Signed-off-by: Eric W. Biederman Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 9084f2550c2a..239c8e83fc28 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -83,6 +83,22 @@ out: return ret; } +static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct kobject *kobj = data; + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *sock_ns, *ns; + ns = kobj->ktype->namespace(kobj); + sock_ns = ops->netlink_ns(dsk); + return sock_ns != ns; + } + + return 0; +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -244,8 +260,10 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, } NETLINK_CB(skb).dst_group = 1; - retval = netlink_broadcast(uevent_sock, skb, 0, 1, - GFP_KERNEL); + retval = netlink_broadcast_filtered(uevent_sock, skb, + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); /* ENOBUFS should be handled in userspace */ if (retval == -ENOBUFS) retval = 0; -- cgit v1.2.2 From 417daa1e8f893fbac88fd395340ba7779fd3926c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2010 17:36:48 -0700 Subject: hotplug: netns aware uevent_helper It only makes sense for uevent_helper to get events in the intial namespaces. It's invocation is not per namespace and it is not clear how we could make it's invocation namespace aware. Signed-off-by: Eric W. Biederman Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 239c8e83fc28..59c15511d58a 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -19,7 +19,7 @@ #include #include #include - +#include #include #include #include @@ -99,6 +99,21 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) return 0; } +static int kobj_usermode_filter(struct kobject *kobj) +{ + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *init_ns, *ns; + ns = kobj->ktype->namespace(kobj); + init_ns = ops->initial_ns(); + return ns != init_ns; + } + + return 0; +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -274,7 +289,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, #endif /* call uevent_helper, usually only enabled during early boot */ - if (uevent_helper[0]) { + if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { char *argv [3]; argv [0] = uevent_helper; -- cgit v1.2.2