From 5db53f3e80dee2d9dff5e534f9e9fe1db17c9936 Mon Sep 17 00:00:00 2001
From: Joern Engel <joern@logfs.org>
Date: Fri, 20 Nov 2009 20:13:39 +0100
Subject: [LogFS] add new flash file system

This is a new flash file system. See
Documentation/filesystems/logfs.txt

Signed-off-by: Joern Engel <joern@logfs.org>
---
 lib/Kconfig  |   3 +
 lib/Makefile |   1 +
 lib/btree.c  | 797 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 801 insertions(+)
 create mode 100644 lib/btree.c

(limited to 'lib')

diff --git a/lib/Kconfig b/lib/Kconfig
index bb1326d3839c..277fbfb233b9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -156,6 +156,9 @@ config TEXTSEARCH_BM
 config TEXTSEARCH_FSM
 	tristate
 
+config BTREE
+	boolean
+
 config HAS_IOMEM
 	boolean
 	depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index 2e78277eff9d..cff82612e98b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -41,6 +41,7 @@ lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 000000000000..41859a820218
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,797 @@
+/*
+ * lib/btree.c	- Simple In-memory B+Tree
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Bits and pieces stolen from Peter Zijlstra's code, which is
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * GPLv2
+ *
+ * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
+ *
+ * A relatively simple B+Tree implementation.  I have written it as a learning
+ * excercise to understand how B+Trees work.  Turned out to be useful as well.
+ *
+ * B+Trees can be used similar to Linux radix trees (which don't have anything
+ * in common with textbook radix trees, beware).  Prerequisite for them working
+ * well is that access to a random tree node is much faster than a large number
+ * of operations within each node.
+ *
+ * Disks have fulfilled the prerequisite for a long time.  More recently DRAM
+ * has gained similar properties, as memory access times, when measured in cpu
+ * cycles, have increased.  Cacheline sizes have increased as well, which also
+ * helps B+Trees.
+ *
+ * Compared to radix trees, B+Trees are more efficient when dealing with a
+ * sparsely populated address space.  Between 25% and 50% of the memory is
+ * occupied with valid pointers.  When densely populated, radix trees contain
+ * ~98% pointers - hard to beat.  Very sparse radix trees contain only ~2%
+ * pointers.
+ *
+ * This particular implementation stores pointers identified by a long value.
+ * Storing NULL pointers is illegal, lookup will return NULL when no entry
+ * was found.
+ *
+ * A tricks was used that is not commonly found in textbooks.  The lowest
+ * values are to the right, not to the left.  All used slots within a node
+ * are on the left, all unused slots contain NUL values.  Most operations
+ * simply loop once over all slots and terminate on the first NUL.
+ */
+
+#include <linux/btree.h>
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define NODESIZE MAX(L1_CACHE_BYTES, 128)
+
+struct btree_geo {
+	int keylen;
+	int no_pairs;
+	int no_longs;
+};
+
+struct btree_geo btree_geo32 = {
+	.keylen = 1,
+	.no_pairs = NODESIZE / sizeof(long) / 2,
+	.no_longs = NODESIZE / sizeof(long) / 2,
+};
+EXPORT_SYMBOL_GPL(btree_geo32);
+
+#define LONG_PER_U64 (64 / BITS_PER_LONG)
+struct btree_geo btree_geo64 = {
+	.keylen = LONG_PER_U64,
+	.no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
+	.no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo64);
+
+struct btree_geo btree_geo128 = {
+	.keylen = 2 * LONG_PER_U64,
+	.no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
+	.no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo128);
+
+static struct kmem_cache *btree_cachep;
+
+void *btree_alloc(gfp_t gfp_mask, void *pool_data)
+{
+	return kmem_cache_alloc(btree_cachep, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(btree_alloc);
+
+void btree_free(void *element, void *pool_data)
+{
+	kmem_cache_free(btree_cachep, element);
+}
+EXPORT_SYMBOL_GPL(btree_free);
+
+static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
+{
+	unsigned long *node;
+
+	node = mempool_alloc(head->mempool, gfp);
+	memset(node, 0, NODESIZE);
+	return node;
+}
+
+static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++) {
+		if (l1[i] < l2[i])
+			return -1;
+		if (l1[i] > l2[i])
+			return 1;
+	}
+	return 0;
+}
+
+static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
+		size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		dest[i] = src[i];
+	return dest;
+}
+
+static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		s[i] = c;
+	return s;
+}
+
+static void dec_key(struct btree_geo *geo, unsigned long *key)
+{
+	unsigned long val;
+	int i;
+
+	for (i = geo->keylen - 1; i >= 0; i--) {
+		val = key[i];
+		key[i] = val - 1;
+		if (val)
+			break;
+	}
+}
+
+static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
+{
+	return &node[n * geo->keylen];
+}
+
+static void *bval(struct btree_geo *geo, unsigned long *node, int n)
+{
+	return (void *)node[geo->no_longs + n];
+}
+
+static void setkey(struct btree_geo *geo, unsigned long *node, int n,
+		   unsigned long *key)
+{
+	longcpy(bkey(geo, node, n), key, geo->keylen);
+}
+
+static void setval(struct btree_geo *geo, unsigned long *node, int n,
+		   void *val)
+{
+	node[geo->no_longs + n] = (unsigned long) val;
+}
+
+static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
+{
+	longset(bkey(geo, node, n), 0, geo->keylen);
+	node[geo->no_longs + n] = 0;
+}
+
+static inline void __btree_init(struct btree_head *head)
+{
+	head->node = NULL;
+	head->height = 0;
+}
+
+void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
+{
+	__btree_init(head);
+	head->mempool = mempool;
+}
+EXPORT_SYMBOL_GPL(btree_init_mempool);
+
+int btree_init(struct btree_head *head)
+{
+	__btree_init(head);
+	head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
+	if (!head->mempool)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btree_init);
+
+void btree_destroy(struct btree_head *head)
+{
+	mempool_destroy(head->mempool);
+	head->mempool = NULL;
+}
+EXPORT_SYMBOL_GPL(btree_destroy);
+
+void *btree_last(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key)
+{
+	int height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return NULL;
+
+	for ( ; height > 1; height--)
+		node = bval(geo, node, 0);
+
+	longcpy(key, bkey(geo, node, 0), geo->keylen);
+	return bval(geo, node, 0);
+}
+EXPORT_SYMBOL_GPL(btree_last);
+
+static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
+		  unsigned long *key)
+{
+	return longcmp(bkey(geo, node, pos), key, geo->keylen);
+}
+
+static int keyzero(struct btree_geo *geo, unsigned long *key)
+{
+	int i;
+
+	for (i = 0; i < geo->keylen; i++)
+		if (key[i])
+			return 0;
+
+	return 1;
+}
+
+void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key)
+{
+	int i, height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return NULL;
+
+	for ( ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			return NULL;
+		node = bval(geo, node, i);
+		if (!node)
+			return NULL;
+	}
+
+	if (!node)
+		return NULL;
+
+	for (i = 0; i < geo->no_pairs; i++)
+		if (keycmp(geo, node, i, key) == 0)
+			return bval(geo, node, i);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(btree_lookup);
+
+int btree_update(struct btree_head *head, struct btree_geo *geo,
+		 unsigned long *key, void *val)
+{
+	int i, height = head->height;
+	unsigned long *node = head->node;
+
+	if (height == 0)
+		return -ENOENT;
+
+	for ( ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			return -ENOENT;
+		node = bval(geo, node, i);
+		if (!node)
+			return -ENOENT;
+	}
+
+	if (!node)
+		return -ENOENT;
+
+	for (i = 0; i < geo->no_pairs; i++)
+		if (keycmp(geo, node, i, key) == 0) {
+			setval(geo, node, i, val);
+			return 0;
+		}
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(btree_update);
+
+/*
+ * Usually this function is quite similar to normal lookup.  But the key of
+ * a parent node may be smaller than the smallest key of all its siblings.
+ * In such a case we cannot just return NULL, as we have only proven that no
+ * key smaller than __key, but larger than this parent key exists.
+ * So we set __key to the parent key and retry.  We have to use the smallest
+ * such parent key, which is the last parent key we encountered.
+ */
+void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long *__key)
+{
+	int i, height;
+	unsigned long *node, *oldnode;
+	unsigned long *retry_key = NULL, key[geo->keylen];
+
+	if (keyzero(geo, __key))
+		return NULL;
+
+	if (head->height == 0)
+		return NULL;
+retry:
+	longcpy(key, __key, geo->keylen);
+	dec_key(geo, key);
+
+	node = head->node;
+	for (height = head->height ; height > 1; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+		if (i == geo->no_pairs)
+			goto miss;
+		oldnode = node;
+		node = bval(geo, node, i);
+		if (!node)
+			goto miss;
+		retry_key = bkey(geo, oldnode, i);
+	}
+
+	if (!node)
+		goto miss;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		if (keycmp(geo, node, i, key) <= 0) {
+			if (bval(geo, node, i)) {
+				longcpy(__key, bkey(geo, node, i), geo->keylen);
+				return bval(geo, node, i);
+			} else
+				goto miss;
+		}
+	}
+miss:
+	if (retry_key) {
+		__key = retry_key;
+		retry_key = NULL;
+		goto retry;
+	}
+	return NULL;
+}
+
+static int getpos(struct btree_geo *geo, unsigned long *node,
+		unsigned long *key)
+{
+	int i;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		if (keycmp(geo, node, i, key) <= 0)
+			break;
+	}
+	return i;
+}
+
+static int getfill(struct btree_geo *geo, unsigned long *node, int start)
+{
+	int i;
+
+	for (i = start; i < geo->no_pairs; i++)
+		if (!bval(geo, node, i))
+			break;
+	return i;
+}
+
+/*
+ * locate the correct leaf node in the btree
+ */
+static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level)
+{
+	unsigned long *node = head->node;
+	int i, height;
+
+	for (height = head->height; height > level; height--) {
+		for (i = 0; i < geo->no_pairs; i++)
+			if (keycmp(geo, node, i, key) <= 0)
+				break;
+
+		if ((i == geo->no_pairs) || !bval(geo, node, i)) {
+			/* right-most key is too large, update it */
+			/* FIXME: If the right-most key on higher levels is
+			 * always zero, this wouldn't be necessary. */
+			i--;
+			setkey(geo, node, i, key);
+		}
+		BUG_ON(i < 0);
+		node = bval(geo, node, i);
+	}
+	BUG_ON(!node);
+	return node;
+}
+
+static int btree_grow(struct btree_head *head, struct btree_geo *geo,
+		      gfp_t gfp)
+{
+	unsigned long *node;
+	int fill;
+
+	node = btree_node_alloc(head, gfp);
+	if (!node)
+		return -ENOMEM;
+	if (head->node) {
+		fill = getfill(geo, head->node, 0);
+		setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
+		setval(geo, node, 0, head->node);
+	}
+	head->node = node;
+	head->height++;
+	return 0;
+}
+
+static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
+{
+	unsigned long *node;
+	int fill;
+
+	if (head->height <= 1)
+		return;
+
+	node = head->node;
+	fill = getfill(geo, node, 0);
+	BUG_ON(fill > 1);
+	head->node = bval(geo, node, 0);
+	head->height--;
+	mempool_free(node, head->mempool);
+}
+
+static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
+			      unsigned long *key, void *val, int level,
+			      gfp_t gfp)
+{
+	unsigned long *node;
+	int i, pos, fill, err;
+
+	BUG_ON(!val);
+	if (head->height < level) {
+		err = btree_grow(head, geo, gfp);
+		if (err)
+			return err;
+	}
+
+retry:
+	node = find_level(head, geo, key, level);
+	pos = getpos(geo, node, key);
+	fill = getfill(geo, node, pos);
+	/* two identical keys are not allowed */
+	BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
+
+	if (fill == geo->no_pairs) {
+		/* need to split node */
+		unsigned long *new;
+
+		new = btree_node_alloc(head, gfp);
+		if (!new)
+			return -ENOMEM;
+		err = btree_insert_level(head, geo,
+				bkey(geo, node, fill / 2 - 1),
+				new, level + 1, gfp);
+		if (err) {
+			mempool_free(new, head->mempool);
+			return err;
+		}
+		for (i = 0; i < fill / 2; i++) {
+			setkey(geo, new, i, bkey(geo, node, i));
+			setval(geo, new, i, bval(geo, node, i));
+			setkey(geo, node, i, bkey(geo, node, i + fill / 2));
+			setval(geo, node, i, bval(geo, node, i + fill / 2));
+			clearpair(geo, node, i + fill / 2);
+		}
+		if (fill & 1) {
+			setkey(geo, node, i, bkey(geo, node, fill - 1));
+			setval(geo, node, i, bval(geo, node, fill - 1));
+			clearpair(geo, node, fill - 1);
+		}
+		goto retry;
+	}
+	BUG_ON(fill >= geo->no_pairs);
+
+	/* shift and insert */
+	for (i = fill; i > pos; i--) {
+		setkey(geo, node, i, bkey(geo, node, i - 1));
+		setval(geo, node, i, bval(geo, node, i - 1));
+	}
+	setkey(geo, node, pos, key);
+	setval(geo, node, pos, val);
+
+	return 0;
+}
+
+int btree_insert(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, void *val, gfp_t gfp)
+{
+	return btree_insert_level(head, geo, key, val, 1, gfp);
+}
+EXPORT_SYMBOL_GPL(btree_insert);
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level);
+static void merge(struct btree_head *head, struct btree_geo *geo, int level,
+		unsigned long *left, int lfill,
+		unsigned long *right, int rfill,
+		unsigned long *parent, int lpos)
+{
+	int i;
+
+	for (i = 0; i < rfill; i++) {
+		/* Move all keys to the left */
+		setkey(geo, left, lfill + i, bkey(geo, right, i));
+		setval(geo, left, lfill + i, bval(geo, right, i));
+	}
+	/* Exchange left and right child in parent */
+	setval(geo, parent, lpos, right);
+	setval(geo, parent, lpos + 1, left);
+	/* Remove left (formerly right) child from parent */
+	btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
+	mempool_free(right, head->mempool);
+}
+
+static void rebalance(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level, unsigned long *child, int fill)
+{
+	unsigned long *parent, *left = NULL, *right = NULL;
+	int i, no_left, no_right;
+
+	if (fill == 0) {
+		/* Because we don't steal entries from a neigbour, this case
+		 * can happen.  Parent node contains a single child, this
+		 * node, so merging with a sibling never happens.
+		 */
+		btree_remove_level(head, geo, key, level + 1);
+		mempool_free(child, head->mempool);
+		return;
+	}
+
+	parent = find_level(head, geo, key, level + 1);
+	i = getpos(geo, parent, key);
+	BUG_ON(bval(geo, parent, i) != child);
+
+	if (i > 0) {
+		left = bval(geo, parent, i - 1);
+		no_left = getfill(geo, left, 0);
+		if (fill + no_left <= geo->no_pairs) {
+			merge(head, geo, level,
+					left, no_left,
+					child, fill,
+					parent, i - 1);
+			return;
+		}
+	}
+	if (i + 1 < getfill(geo, parent, i)) {
+		right = bval(geo, parent, i + 1);
+		no_right = getfill(geo, right, 0);
+		if (fill + no_right <= geo->no_pairs) {
+			merge(head, geo, level,
+					child, fill,
+					right, no_right,
+					parent, i);
+			return;
+		}
+	}
+	/*
+	 * We could also try to steal one entry from the left or right
+	 * neighbor.  By not doing so we changed the invariant from
+	 * "all nodes are at least half full" to "no two neighboring
+	 * nodes can be merged".  Which means that the average fill of
+	 * all nodes is still half or better.
+	 */
+}
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key, int level)
+{
+	unsigned long *node;
+	int i, pos, fill;
+	void *ret;
+
+	if (level > head->height) {
+		/* we recursed all the way up */
+		head->height = 0;
+		head->node = NULL;
+		return NULL;
+	}
+
+	node = find_level(head, geo, key, level);
+	pos = getpos(geo, node, key);
+	fill = getfill(geo, node, pos);
+	if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
+		return NULL;
+	ret = bval(geo, node, pos);
+
+	/* remove and shift */
+	for (i = pos; i < fill - 1; i++) {
+		setkey(geo, node, i, bkey(geo, node, i + 1));
+		setval(geo, node, i, bval(geo, node, i + 1));
+	}
+	clearpair(geo, node, fill - 1);
+
+	if (fill - 1 < geo->no_pairs / 2) {
+		if (level < head->height)
+			rebalance(head, geo, key, level, node, fill - 1);
+		else if (fill - 1 == 1)
+			btree_shrink(head, geo);
+	}
+
+	return ret;
+}
+
+void *btree_remove(struct btree_head *head, struct btree_geo *geo,
+		unsigned long *key)
+{
+	if (head->height == 0)
+		return NULL;
+
+	return btree_remove_level(head, geo, key, 1);
+}
+EXPORT_SYMBOL_GPL(btree_remove);
+
+int btree_merge(struct btree_head *target, struct btree_head *victim,
+		struct btree_geo *geo, gfp_t gfp)
+{
+	unsigned long key[geo->keylen];
+	unsigned long dup[geo->keylen];
+	void *val;
+	int err;
+
+	BUG_ON(target == victim);
+
+	if (!(target->node)) {
+		/* target is empty, just copy fields over */
+		target->node = victim->node;
+		target->height = victim->height;
+		__btree_init(victim);
+		return 0;
+	}
+
+	/* TODO: This needs some optimizations.  Currently we do three tree
+	 * walks to remove a single object from the victim.
+	 */
+	for (;;) {
+		if (!btree_last(victim, geo, key))
+			break;
+		val = btree_lookup(victim, geo, key);
+		err = btree_insert(target, geo, key, val, gfp);
+		if (err)
+			return err;
+		/* We must make a copy of the key, as the original will get
+		 * mangled inside btree_remove. */
+		longcpy(dup, key, geo->keylen);
+		btree_remove(victim, geo, dup);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btree_merge);
+
+static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
+			       unsigned long *node, unsigned long opaque,
+			       void (*func)(void *elem, unsigned long opaque,
+					    unsigned long *key, size_t index,
+					    void *func2),
+			       void *func2, int reap, int height, size_t count)
+{
+	int i;
+	unsigned long *child;
+
+	for (i = 0; i < geo->no_pairs; i++) {
+		child = bval(geo, node, i);
+		if (!child)
+			break;
+		if (height > 1)
+			count = __btree_for_each(head, geo, child, opaque,
+					func, func2, reap, height - 1, count);
+		else
+			func(child, opaque, bkey(geo, node, i), count++,
+					func2);
+	}
+	if (reap)
+		mempool_free(node, head->mempool);
+	return count;
+}
+
+static void empty(void *elem, unsigned long opaque, unsigned long *key,
+		  size_t index, void *func2)
+{
+}
+
+void visitorl(void *elem, unsigned long opaque, unsigned long *key,
+	      size_t index, void *__func)
+{
+	visitorl_t func = __func;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitorl);
+
+void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
+	       size_t index, void *__func)
+{
+	visitor32_t func = __func;
+	u32 *key = (void *)__key;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor32);
+
+void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
+	       size_t index, void *__func)
+{
+	visitor64_t func = __func;
+	u64 *key = (void *)__key;
+
+	func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor64);
+
+void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
+		size_t index, void *__func)
+{
+	visitor128_t func = __func;
+	u64 *key = (void *)__key;
+
+	func(elem, opaque, key[0], key[1], index);
+}
+EXPORT_SYMBOL_GPL(visitor128);
+
+size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
+		     unsigned long opaque,
+		     void (*func)(void *elem, unsigned long opaque,
+		     		  unsigned long *key,
+		     		  size_t index, void *func2),
+		     void *func2)
+{
+	size_t count = 0;
+
+	if (!func2)
+		func = empty;
+	if (head->node)
+		count = __btree_for_each(head, geo, head->node, opaque, func,
+				func2, 0, head->height, 0);
+	return count;
+}
+EXPORT_SYMBOL_GPL(btree_visitor);
+
+size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
+			  unsigned long opaque,
+			  void (*func)(void *elem, unsigned long opaque,
+				       unsigned long *key,
+				       size_t index, void *func2),
+			  void *func2)
+{
+	size_t count = 0;
+
+	if (!func2)
+		func = empty;
+	if (head->node)
+		count = __btree_for_each(head, geo, head->node, opaque, func,
+				func2, 1, head->height, 0);
+	__btree_init(head);
+	return count;
+}
+EXPORT_SYMBOL_GPL(btree_grim_visitor);
+
+static int __init btree_module_init(void)
+{
+	btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
+			SLAB_HWCACHE_ALIGN, NULL);
+	return 0;
+}
+
+static void __exit btree_module_exit(void)
+{
+	kmem_cache_destroy(btree_cachep);
+}
+
+/* If core code starts using btree, initialization should happen even earlier */
+module_init(btree_module_init);
+module_exit(btree_module_exit);
+
+MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.2


From 39d997b514e12d5aff0dca206eb8996b3957927e Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Mon, 21 Dec 2009 16:20:16 -0800
Subject: x86, core: Optimize hweight32()

Optimize hweight32 by using the same technique in hweight64.

The proof of this technique can be found in the commit log for
f9b4192923fa6e38331e88214b1fe5fc21583fcc ("bitops: hweight()
speedup").

The userspace benchmark on x86_32 showed 20% speedup with
bitmap_weight() which uses hweight32 to count bits for each
unsigned long on 32bit architectures.

 int main(void)
 {
	#define SZ (1024 * 1024 * 512)

	static DECLARE_BITMAP(bitmap, SZ) = {
	        [0 ... 100] = 1,
	};

	return bitmap_weight(bitmap, SZ);
 }

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1258603932-4590-1-git-send-email-akinobu.mita@gmail.com>
[ only x86 sets ARCH_HAS_FAST_MULTIPLIER so we do this via the x86 tree]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/hweight.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'lib')

diff --git a/lib/hweight.c b/lib/hweight.c
index 389424ecb129..63ee4eb1228d 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -11,11 +11,18 @@
 
 unsigned int hweight32(unsigned int w)
 {
+#ifdef ARCH_HAS_FAST_MULTIPLIER
+	w -= (w >> 1) & 0x55555555;
+	w =  (w & 0x33333333) + ((w >> 2) & 0x33333333);
+	w =  (w + (w >> 4)) & 0x0f0f0f0f;
+	return (w * 0x01010101) >> 24;
+#else
 	unsigned int res = w - ((w >> 1) & 0x55555555);
 	res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
 	res = (res + (res >> 4)) & 0x0F0F0F0F;
 	res = res + (res >> 8);
 	return (res + (res >> 16)) & 0x000000FF;
+#endif
 }
 EXPORT_SYMBOL(hweight32);
 
-- 
cgit v1.2.2


From bc7259a2ce764ea16200eb9e53f6e136e918d065 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 7 Jan 2010 11:43:50 +0000
Subject: lib/vsprintf.c: Add %pMF to format FDDI bit reversed MAC addresses

On Mon, 2010-01-04 at 23:43 +0000, Maciej W. Rozycki wrote:
> The example below shows an address, and the sequence of bits or symbols
> that would be transmitted when the address is used in the Source Address
> or Destination Address fields on the MAC header.  The transmission line
> shows the address bits in the order transmitted, from left to right.  For
> IEEE 802 LANs these correspond to actual bits on the medium.  The FDDI
> symbols line shows how the FDDI PHY sends the address bits as encoded
> symbols.
>
>         MSB:            35:7B:12:00:00:01
>         Canonical:      AC-DE-48-00-00-80
>         Transmission:   00110101 01111011 00010010 00000000 00000000 00000001
>         FDDI Symbols:   35 7B 12 00 00 01"
>
> Please note that this address has its group bit clear.
>
>  This notation is also defined in the "FDDI MEDIA ACCESS CONTROL-2
> (MAC-2)" (X3T9/92-120) document although that book does not have a need
> to use the MSB form and it's skipped.

Adds 6 bytes to object size for x86

New:
$ size lib/vsprintf.o
   text	   data	    bss	    dec	    hex	filename
   8664	      0	      2	   8666	   21da	lib/vsprintf.o
$ size lib/vsprintf.o
   text    data     bss     dec     hex filename
   8658       0       2    8660    21d4 lib/vsprintf.o

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/vsprintf.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d4996cf46eb6..dc48d2b32ebd 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -25,6 +25,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ioport.h>
+#include <linux/bitrev.h>
 #include <net/addrconf.h>
 
 #include <asm/page.h>		/* for PAGE_SIZE */
@@ -681,11 +682,21 @@ static char *mac_address_string(char *buf, char *end, u8 *addr,
 	char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
 	char *p = mac_addr;
 	int i;
+	bool bitrev;
+	char separator;
+
+	if (fmt[1] == 'F') {		/* FDDI canonical format */
+		bitrev = true;
+		separator = '-';
+	} else {
+		bitrev = false;
+		separator = ':';
+	}
 
 	for (i = 0; i < 6; i++) {
-		p = pack_hex_byte(p, addr[i]);
+		p = pack_hex_byte(p, bitrev ? bitrev8(addr[i]) : addr[i]);
 		if (fmt[0] == 'M' && i != 5)
-			*p++ = ':';
+			*p++ = separator;
 	}
 	*p = '\0';
 
@@ -896,6 +907,10 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
  * - 'M' For a 6-byte MAC address, it prints the address in the
  *       usual colon-separated hex notation
  * - 'm' For a 6-byte MAC address, it prints the hex address without colons
+ * - 'MF' For a 6-byte MAC FDDI address, it prints the address
+ *       with a dash-separated hex notation with bit reversed bytes
+ * - 'mF' For a 6-byte MAC FDDI address, it prints the address
+ *       in hex notation without separators with bit reversed bytes
  * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
  *       IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
  *       IPv6 uses colon separated network-order 16 bit hex with leading 0's
@@ -939,6 +954,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		return resource_string(buf, end, ptr, spec, fmt);
 	case 'M':			/* Colon separated: 00:01:02:03:04:05 */
 	case 'm':			/* Contiguous: 000102030405 */
+					/* [mM]F (FDDI, bit reversed) */
 		return mac_address_string(buf, end, ptr, spec, fmt);
 	case 'I':			/* Formatted IP supported
 					 * 4:	1.2.3.4
-- 
cgit v1.2.2


From c8e000604bce02a87742240a9b716a0f1b680c0b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 11 Jan 2010 00:44:14 -0800
Subject: lib: Kill bit-reversed FDDI MAC output case, it's bogus.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/vsprintf.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index dc48d2b32ebd..e83e3e79a989 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -25,7 +25,6 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ioport.h>
-#include <linux/bitrev.h>
 #include <net/addrconf.h>
 
 #include <asm/page.h>		/* for PAGE_SIZE */
@@ -682,19 +681,16 @@ static char *mac_address_string(char *buf, char *end, u8 *addr,
 	char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
 	char *p = mac_addr;
 	int i;
-	bool bitrev;
 	char separator;
 
 	if (fmt[1] == 'F') {		/* FDDI canonical format */
-		bitrev = true;
 		separator = '-';
 	} else {
-		bitrev = false;
 		separator = ':';
 	}
 
 	for (i = 0; i < 6; i++) {
-		p = pack_hex_byte(p, bitrev ? bitrev8(addr[i]) : addr[i]);
+		p = pack_hex_byte(p, addr[i]);
 		if (fmt[0] == 'M' && i != 5)
 			*p++ = separator;
 	}
@@ -908,9 +904,7 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
  *       usual colon-separated hex notation
  * - 'm' For a 6-byte MAC address, it prints the hex address without colons
  * - 'MF' For a 6-byte MAC FDDI address, it prints the address
- *       with a dash-separated hex notation with bit reversed bytes
- * - 'mF' For a 6-byte MAC FDDI address, it prints the address
- *       in hex notation without separators with bit reversed bytes
+ *       with a dash-separated hex notation
  * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
  *       IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
  *       IPv6 uses colon separated network-order 16 bit hex with leading 0's
-- 
cgit v1.2.2


From 0159f24ee764927bf44c1a25473bd4517febd21c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 13 Jan 2010 20:23:30 -0800
Subject: lib/vsprintf.c: Add IPV4 options %pI4[hnbl] for host, network, big
 and little endian

This should allow the removal of the #defines and uses
of NIPQUAD and NIPQUAD_FMT

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/vsprintf.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index e83e3e79a989..add0446dd921 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -699,13 +699,37 @@ static char *mac_address_string(char *buf, char *end, u8 *addr,
 	return string(buf, end, mac_addr, spec);
 }
 
-static char *ip4_string(char *p, const u8 *addr, bool leading_zeros)
+static char *ip4_string(char *p, const u8 *addr, const char *fmt)
 {
 	int i;
-
+	bool leading_zeros = (fmt[0] == 'i');
+	int index;
+	int step;
+
+	switch (fmt[2]) {
+	case 'h':
+#ifdef __BIG_ENDIAN
+		index = 0;
+		step = 1;
+#else
+		index = 3;
+		step = -1;
+#endif
+		break;
+	case 'l':
+		index = 3;
+		step = -1;
+		break;
+	case 'n':
+	case 'b':
+	default:
+		index = 0;
+		step = 1;
+		break;
+	}
 	for (i = 0; i < 4; i++) {
 		char temp[3];	/* hold each IP quad in reverse order */
-		int digits = put_dec_trunc(temp, addr[i]) - temp;
+		int digits = put_dec_trunc(temp, addr[index]) - temp;
 		if (leading_zeros) {
 			if (digits < 3)
 				*p++ = '0';
@@ -717,6 +741,7 @@ static char *ip4_string(char *p, const u8 *addr, bool leading_zeros)
 			*p++ = temp[digits];
 		if (i < 3)
 			*p++ = '.';
+		index += step;
 	}
 	*p = '\0';
 
@@ -796,7 +821,7 @@ static char *ip6_compressed_string(char *p, const char *addr)
 	if (useIPv4) {
 		if (needcolon)
 			*p++ = ':';
-		p = ip4_string(p, &in6.s6_addr[12], false);
+		p = ip4_string(p, &in6.s6_addr[12], "I4");
 	}
 	*p = '\0';
 
@@ -836,7 +861,7 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr,
 {
 	char ip4_addr[sizeof("255.255.255.255")];
 
-	ip4_string(ip4_addr, addr, fmt[0] == 'i');
+	ip4_string(ip4_addr, addr, fmt);
 
 	return string(buf, end, ip4_addr, spec);
 }
@@ -911,6 +936,7 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
  * - 'i' [46] for 'raw' IPv4/IPv6 addresses
  *       IPv6 omits the colons (01020304...0f)
  *       IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
+ * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order
  * - 'I6c' for IPv6 addresses printed as specified by
  *       http://www.ietf.org/id/draft-kawamura-ipv6-text-representation-03.txt
  * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
-- 
cgit v1.2.2


From 4c54005ca438a8b46dd542b497d4f0dc2ca375e8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 Jan 2010 16:10:57 -0800
Subject: rcu: 1Q2010 update for RCU documentation

Add expedited functions.  Review documentation and update
obsolete verbiage.  Also fix the advice for the RCU CPU-stall
kernel configuration parameter, and document RCU CPU-stall
warnings.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <12635142581866-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/Kconfig.debug | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c54..6bf97d176326 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -765,9 +765,9 @@ config RCU_CPU_STALL_DETECTOR
 	  CPUs are delaying the current grace period, but only when
 	  the grace period extends for excessive time periods.
 
-	  Say Y if you want RCU to perform such checks.
+	  Say N if you want to disable such checks.
 
-	  Say N if you are unsure.
+	  Say Y if you are unsure.
 
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
-- 
cgit v1.2.2


From aeb583d08172e038552bdefe0a79a9aa9e2ecd7c Mon Sep 17 00:00:00 2001
From: Thiago Farina <tfransosi@gmail.com>
Date: Mon, 18 Jan 2010 18:57:33 -0500
Subject: lib/dma-debug.c: mark file-local struct symbol static.

warning: symbol 'filter_fops' was not declared. Should it be static?

Signed-off-by: Thiago Farina <tfransosi@gmail.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 lib/dma-debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 739974460c32..e03995851e60 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -587,7 +587,7 @@ out_unlock:
 	return count;
 }
 
-const struct file_operations filter_fops = {
+static const struct file_operations filter_fops = {
 	.read  = filter_read,
 	.write = filter_write,
 };
-- 
cgit v1.2.2


From 660e2acad81c19b404f7d7d06e57a6d5e6ce7426 Mon Sep 17 00:00:00 2001
From: Chris Smith <chris.smith@st.com>
Date: Wed, 27 Jan 2010 22:03:11 +0900
Subject: sh: kmemleak support.

Enables support for kmemleak on sh.

Signed-off-by: Chris Smith <chris.smith@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c54..d62e3cdab357 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -355,7 +355,7 @@ config SLUB_STATS
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
 	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
-		(X86 || ARM || PPC || S390)
+		(X86 || ARM || PPC || S390 || SUPERH)
 
 	select DEBUG_FS if SYSFS
 	select STACKTRACE if STACKTRACE_SUPPORT
-- 
cgit v1.2.2


From 859ddf09743a8cc680af33f7259ccd0fd36bfe9d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Feb 2010 13:43:58 -0800
Subject: idr: fix a critical misallocation bug

Eric Paris located a bug in idr.  With IDR_BITS of 6, it grows to three
layers when id 4096 is first allocated.  When that happens, idr wraps
incorrectly and searches the idr array ignoring the high bits.  The
following test code from Eric demonstrates the bug nicely.

#include <linux/idr.h>
#include <linux/kernel.h>
#include <linux/module.h>

static DEFINE_IDR(test_idr);

int init_module(void)
{
	int ret, forty95, forty96;
	void *addr;

	/* add 2 entries both with 4095 as the start address */
again1:
	if (!idr_pre_get(&test_idr, GFP_KERNEL))
		return -ENOMEM;
	ret = idr_get_new_above(&test_idr, (void *)4095, 4095, &forty95);
	if (ret) {
		if (ret == -EAGAIN)
			goto again1;
		return ret;
	}
	if (forty95 != 4095)
		printk(KERN_ERR "hmmm, forty95=%d\n", forty95);

again2:
	if (!idr_pre_get(&test_idr, GFP_KERNEL))
		return -ENOMEM;
	ret = idr_get_new_above(&test_idr, (void *)4096, 4095, &forty96);
	if (ret) {
		if (ret == -EAGAIN)
			goto again2;
		return ret;
	}
	if (forty96 != 4096)
		printk(KERN_ERR "hmmm, forty96=%d\n", forty96);

	/* try to find the 2 entries, noticing that 4096 broke */
	addr = idr_find(&test_idr, forty95);
	if ((int)addr != forty95)
		printk(KERN_ERR "hmmm, after find forty95=%d addr=%d\n", forty95, (int)addr);
	addr = idr_find(&test_idr, forty96);
	if ((int)addr != forty96)
		printk(KERN_ERR "hmmm, after find forty96=%d addr=%d\n", forty96, (int)addr);
	/* really weird, the entry which should be at 4096 is actually at 0!! */
	addr = idr_find(&test_idr, 0);
	if ((int)addr)
		printk(KERN_ERR "found an entry at id=0 for addr=%d\n", (int)addr);

	idr_remove(&test_idr, forty95);
	idr_remove(&test_idr, forty96);

	return 0;
}

void cleanup_module(void)
{
}

MODULE_AUTHOR("Eric Paris <eparis@redhat.com>");
MODULE_DESCRIPTION("Simple idr test");
MODULE_LICENSE("GPL");

This happens because when sub_alloc() back tracks it doesn't always do it
step-by-step while the over-the-limit detection assumes step-by-step
backtracking.  The logic in sub_alloc() looks like the following.

  restart:
    clear pa[top level + 1] for end cond detection
    l = top level
    while (true) {
	search for empty slot at this level
	if (not found) {
	    push id to the next possible value
	    l++
A:	    if (pa[l] is clear)
	        failed, return asking caller to grow the tree
	    if (going up 1 level gives more slots to search)
	        continue the while loop above with the incremented l
	    else
C:	        goto restart
	}
	adjust id accordingly to the found slot
	if (l == 0)
	    return found id;
	create lower level if not there yet
	record pa[l] and l--
    }

Test A is the fail exit condition but this assumes that failure is
propagated upwared one level at a time but the B optimization path breaks
the assumption and restarts the whole thing with a start value which is
above the possible limit with the current layers.  sub_alloc() assumes the
start id value is inside the limit when called and test A is the only exit
condition check, so it ends up searching for empty slot while ignoring
high set bit.

So, for 4095->4096 test, level0 search fails but pa[1] contains a valid
pointer.  However, going up 1 level wouldn't give any more empty slot so
it takes C and when the whole thing restarts nobody notices the high bit
set beyond the top level.

This patch fixes the bug by changing the fail exit condition check to full
id limit check.

Based-on-patch-from: Eric Paris <eparis@redhat.com>
Reported-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 1cac726c44bc..ba7d37cf7847 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -140,8 +140,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 	id = *starting_id;
  restart:
 	p = idp->top;
-	l = idp->layers;
-	pa[l--] = NULL;
+	l = p->layer;
 	while (1) {
 		/*
 		 * We run around this while until we reach the leaf node...
@@ -155,8 +154,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			oid = id;
 			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
 
-			/* if already at the top layer, we need to grow */
-			if (!(p = pa[l])) {
+			/* did id go over the limit? */
+			if (id >= (1 << (idp->layers * IDR_BITS))) {
 				*starting_id = id;
 				return IDR_NEED_TO_GROW;
 			}
-- 
cgit v1.2.2


From 24551f64d47af9539a7f324343bffeea09d9dcfa Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 12 Jan 2010 21:25:24 +0000
Subject: lmb: Add lmb_free()

We can free memory allocated with lmb_alloc() by removing it from the
list of reserved LMBs. Rework lmb_remove() to allow that possibility
and add lmb_free() which exploits it.

BenH: Removed some useless parenthesis

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 lib/lmb.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/lmb.c b/lib/lmb.c
index 9cee17142b2c..b1fc52606524 100644
--- a/lib/lmb.c
+++ b/lib/lmb.c
@@ -205,9 +205,8 @@ long lmb_add(u64 base, u64 size)
 
 }
 
-long lmb_remove(u64 base, u64 size)
+static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size)
 {
-	struct lmb_region *rgn = &(lmb.memory);
 	u64 rgnbegin, rgnend;
 	u64 end = base + size;
 	int i;
@@ -254,6 +253,16 @@ long lmb_remove(u64 base, u64 size)
 	return lmb_add_region(rgn, end, rgnend - end);
 }
 
+long lmb_remove(u64 base, u64 size)
+{
+	return __lmb_remove(&lmb.memory, base, size);
+}
+
+long __init lmb_free(u64 base, u64 size)
+{
+	return __lmb_remove(&lmb.reserved, base, size);
+}
+
 long __init lmb_reserve(u64 base, u64 size)
 {
 	struct lmb_region *_rgn = &lmb.reserved;
-- 
cgit v1.2.2


From 6f14a668f1a8b715a6e855f4e32705e54a6e86a1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 4 Feb 2010 17:57:37 +0900
Subject: idr: revert misallocation bug fix

Commit 859ddf09743a8cc680af33f7259ccd0fd36bfe9d tried to fix
misallocation bug but broke full bit marking by not clearing
pa[idp->layers] and also is causing X failures due to lookup failure
in drm code.  The cause of the latter hasn't been found yet.  Revert
the fix for now.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index ba7d37cf7847..1cac726c44bc 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -140,7 +140,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 	id = *starting_id;
  restart:
 	p = idp->top;
-	l = p->layer;
+	l = idp->layers;
+	pa[l--] = NULL;
 	while (1) {
 		/*
 		 * We run around this while until we reach the leaf node...
@@ -154,8 +155,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			oid = id;
 			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
 
-			/* did id go over the limit? */
-			if (id >= (1 << (idp->layers * IDR_BITS))) {
+			/* if already at the top layer, we need to grow */
+			if (!(p = pa[l])) {
 				*starting_id = id;
 				return IDR_NEED_TO_GROW;
 			}
-- 
cgit v1.2.2


From 17d9ddc72fb8bba0d4f67868c9c612e472a594a9 Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Wed, 10 Feb 2010 15:23:44 -0800
Subject: rbtree: Add support for augmented rbtrees

Add support for augmented rbtrees in core rbtree code.

This will be used in subsequent patches, in x86 PAT code, which needs
interval trees to efficiently keep track of PAT ranges.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
LKML-Reference: <20100210232343.GA11465@linux-os.sc.intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/rbtree.c | 48 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/rbtree.c b/lib/rbtree.c
index e2aa3be29858..15e10b1afdd2 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -44,6 +44,11 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 	else
 		root->rb_node = right;
 	rb_set_parent(node, right);
+
+	if (root->augment_cb) {
+		root->augment_cb(node);
+		root->augment_cb(right);
+	}
 }
 
 static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
@@ -67,12 +72,20 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
 	else
 		root->rb_node = left;
 	rb_set_parent(node, left);
+
+	if (root->augment_cb) {
+		root->augment_cb(node);
+		root->augment_cb(left);
+	}
 }
 
 void rb_insert_color(struct rb_node *node, struct rb_root *root)
 {
 	struct rb_node *parent, *gparent;
 
+	if (root->augment_cb)
+		root->augment_cb(node);
+
 	while ((parent = rb_parent(node)) && rb_is_red(parent))
 	{
 		gparent = rb_parent(parent);
@@ -227,12 +240,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 	else
 	{
 		struct rb_node *old = node, *left;
+		int old_parent_cb = 0;
+		int successor_parent_cb = 0;
 
 		node = node->rb_right;
 		while ((left = node->rb_left) != NULL)
 			node = left;
 
 		if (rb_parent(old)) {
+			old_parent_cb = 1;
 			if (rb_parent(old)->rb_left == old)
 				rb_parent(old)->rb_left = node;
 			else
@@ -247,8 +263,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 		if (parent == old) {
 			parent = node;
 		} else {
+			successor_parent_cb = 1;
 			if (child)
 				rb_set_parent(child, parent);
+
 			parent->rb_left = child;
 
 			node->rb_right = old->rb_right;
@@ -259,6 +277,24 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 		node->rb_left = old->rb_left;
 		rb_set_parent(old->rb_left, node);
 
+		if (root->augment_cb) {
+			/*
+			 * Here, three different nodes can have new children.
+			 * The parent of the successor node that was selected
+			 * to replace the node to be erased.
+			 * The node that is getting erased and is now replaced
+			 * by its successor.
+			 * The parent of the node getting erased-replaced.
+			 */
+			if (successor_parent_cb)
+				root->augment_cb(parent);
+
+			root->augment_cb(node);
+
+			if (old_parent_cb)
+				root->augment_cb(rb_parent(old));
+		}
+
 		goto color;
 	}
 
@@ -267,15 +303,19 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 
 	if (child)
 		rb_set_parent(child, parent);
-	if (parent)
-	{
+
+	if (parent) {
 		if (parent->rb_left == node)
 			parent->rb_left = child;
 		else
 			parent->rb_right = child;
-	}
-	else
+
+		if (root->augment_cb)
+			root->augment_cb(parent);
+
+	} else {
 		root->rb_node = child;
+	}
 
  color:
 	if (color == RB_BLACK)
-- 
cgit v1.2.2


From d2e7276b6b5e4bc2148891a056d5862c5314342d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 22 Feb 2010 12:44:19 -0800
Subject: idr: fix a critical misallocation bug, take#2

This is retry of reverted 859ddf09743a8cc680af33f7259ccd0fd36bfe9d
("idr: fix a critical misallocation bug") which contained two bugs.

* pa[idp->layers] should be cleared even if it's not used by
  sub_alloc() because it's used by mark idr_mark_full().

* The original condition check also assigned pa[l] to p which the new
  code didn't do thus leaving p pointing at the wrong layer.

Both problems have been fixed and the idr code has received good amount
testing using userland testing setup where simple bitmap allocator is
run parallel to verify the result of idr allocation.

The bug this patch fixes is caused by sub_alloc() optimization path
bypassing out-of-room condition check and restarting allocation loop
with starting value higher than maximum allowed value.  For detailed
description, please read commit message of 859ddf09.

Signed-off-by: Tejun Heo <tj@kernel.org>
Based-on-patch-from: Eric Paris <eparis@redhat.com>
Reported-by: Eric Paris <eparis@redhat.com>
Tested-by: Stefan Lippers-Hollmann <s.l-h@gmx.de>
Tested-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 1cac726c44bc..0dc782216d4b 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -156,10 +156,12 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
 
 			/* if already at the top layer, we need to grow */
-			if (!(p = pa[l])) {
+			if (id >= 1 << (idp->layers * IDR_BITS)) {
 				*starting_id = id;
 				return IDR_NEED_TO_GROW;
 			}
+			p = pa[l];
+			BUG_ON(!p);
 
 			/* If we need to go up one layer, continue the
 			 * loop; otherwise, restart from the top.
-- 
cgit v1.2.2


From 632ee200130899252508c478ad0e808222573fbc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:45 -0800
Subject: rcu: Introduce lockdep-based checking to RCU read-side primitives

Inspection is proving insufficient to catch all RCU misuses,
which is understandable given that rcu_dereference() might be
protected by any of four different flavors of RCU (RCU, RCU-bh,
RCU-sched, and SRCU), and might also/instead be protected by any
of a number of locking primitives. It is therefore time to
enlist the aid of lockdep.

This set of patches is inspired by earlier work by Peter
Zijlstra and Thomas Gleixner, and takes the following approach:

o	Set up separate lockdep classes for RCU, RCU-bh, and RCU-sched.

o	Set up separate lockdep classes for each instance of SRCU.

o	Create primitives that check for being in an RCU read-side
	critical section.  These return exact answers if lockdep is
	fully enabled, but if unsure, report being in an RCU read-side
	critical section.  (We want to avoid false positives!)
	The primitives are:

	For RCU: rcu_read_lock_held(void)

	For RCU-bh: rcu_read_lock_bh_held(void)

	For RCU-sched: rcu_read_lock_sched_held(void)

	For SRCU: srcu_read_lock_held(struct srcu_struct *sp)

o	Add rcu_dereference_check(), which takes a second argument
	in which one places a boolean expression based on the above
	primitives and/or lockdep_is_held().

o	A new kernel configuration parameter, CONFIG_PROVE_RCU, enables
	rcu_dereference_check().  This depends on CONFIG_PROVE_LOCKING,
	and should be quite helpful during the transition period while
	CONFIG_PROVE_RCU-unaware patches are in flight.

The existing rcu_dereference() primitive does no checking, but
upcoming patches will change that.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-1-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/Kconfig.debug | 12 ++++++++++++
 lib/debug_locks.c |  1 +
 2 files changed, 13 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6bf97d176326..6af20a8a0a54 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -499,6 +499,18 @@ config PROVE_LOCKING
 
 	 For more details, see Documentation/lockdep-design.txt.
 
+config PROVE_RCU
+	bool "RCU debugging: prove RCU correctness"
+	depends on PROVE_LOCKING
+	default n
+	help
+	 This feature enables lockdep extensions that check for correct
+	 use of RCU APIs.  This is currently under development.  Say Y
+	 if you want to debug RCU usage or help work on the PROVE_RCU
+	 feature.
+
+	 Say N if you are unsure.
+
 config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index bc3b11731b9c..5bf0020b9248 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -23,6 +23,7 @@
  * shut up after that.
  */
 int debug_locks = 1;
+EXPORT_SYMBOL_GPL(debug_locks);
 
 /*
  * The locking-testsuite uses <debug_locks_silent> to get a
-- 
cgit v1.2.2


From 2676a58c980b7ef076cc9bbff3fd8c9d2d5417ea Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:54 -0800
Subject: radix-tree: Disable RCU lockdep checking in radix tree

Because the radix tree is used with many different locking
designs, we cannot do any effective checking without changing
the radix-tree APIs. It might make sense to do this later, but
only if the RCU lockdep checking proves itself sufficiently
valuable.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-10-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/radix-tree.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 92cdd9936e3d..6b9670d6bbf9 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -364,7 +364,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
 	unsigned int height, shift;
 	struct radix_tree_node *node, **slot;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (node == NULL)
 		return NULL;
 
@@ -384,7 +384,7 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root,
 	do {
 		slot = (struct radix_tree_node **)
 			(node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
-		node = rcu_dereference(*slot);
+		node = rcu_dereference_raw(*slot);
 		if (node == NULL)
 			return NULL;
 
@@ -568,7 +568,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 	if (!root_tag_get(root, tag))
 		return 0;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (node == NULL)
 		return 0;
 
@@ -602,7 +602,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 			BUG_ON(ret && saw_unset_tag);
 			return !!ret;
 		}
-		node = rcu_dereference(node->slots[offset]);
+		node = rcu_dereference_raw(node->slots[offset]);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
@@ -711,7 +711,7 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 		}
 
 		shift -= RADIX_TREE_MAP_SHIFT;
-		slot = rcu_dereference(slot->slots[i]);
+		slot = rcu_dereference_raw(slot->slots[i]);
 		if (slot == NULL)
 			goto out;
 	}
@@ -758,7 +758,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 	unsigned long cur_index = first_index;
 	unsigned int ret;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (!node)
 		return 0;
 
@@ -787,7 +787,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			slot = *(((void ***)results)[ret + i]);
 			if (!slot)
 				continue;
-			results[ret + nr_found] = rcu_dereference(slot);
+			results[ret + nr_found] = rcu_dereference_raw(slot);
 			nr_found++;
 		}
 		ret += nr_found;
@@ -826,7 +826,7 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 	unsigned long cur_index = first_index;
 	unsigned int ret;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (!node)
 		return 0;
 
@@ -915,7 +915,7 @@ __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
 			}
 		}
 		shift -= RADIX_TREE_MAP_SHIFT;
-		slot = rcu_dereference(slot->slots[i]);
+		slot = rcu_dereference_raw(slot->slots[i]);
 		if (slot == NULL)
 			break;
 	}
@@ -951,7 +951,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 	if (!root_tag_get(root, tag))
 		return 0;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (!node)
 		return 0;
 
@@ -980,7 +980,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 			slot = *(((void ***)results)[ret + i]);
 			if (!slot)
 				continue;
-			results[ret + nr_found] = rcu_dereference(slot);
+			results[ret + nr_found] = rcu_dereference_raw(slot);
 			nr_found++;
 		}
 		ret += nr_found;
@@ -1020,7 +1020,7 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
 	if (!root_tag_get(root, tag))
 		return 0;
 
-	node = rcu_dereference(root->rnode);
+	node = rcu_dereference_raw(root->rnode);
 	if (!node)
 		return 0;
 
-- 
cgit v1.2.2


From 96be753af91fc9d582450a84722f6a6721d218ad Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:04:55 -0800
Subject: idr: Apply lockdep-based diagnostics to rcu_dereference() uses

Because idr can be used with any of a number of locks or with
any flavor of RCU, just disable the lockdep-based diagnostics.
If idr needs diagnostics, the check expression will need to be
passed into the relevant idr primitives as an additional
argument.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-11-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/idr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 0dc782216d4b..2eb1dca03681 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -504,7 +504,7 @@ void *idr_find(struct idr *idp, int id)
 	int n;
 	struct idr_layer *p;
 
-	p = rcu_dereference(idp->top);
+	p = rcu_dereference_raw(idp->top);
 	if (!p)
 		return NULL;
 	n = (p->layer+1) * IDR_BITS;
@@ -519,7 +519,7 @@ void *idr_find(struct idr *idp, int id)
 	while (n > 0 && p) {
 		n -= IDR_BITS;
 		BUG_ON(n != p->layer*IDR_BITS);
-		p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+		p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
 	}
 	return((void *)p);
 }
@@ -552,7 +552,7 @@ int idr_for_each(struct idr *idp,
 	struct idr_layer **paa = &pa[0];
 
 	n = idp->layers * IDR_BITS;
-	p = rcu_dereference(idp->top);
+	p = rcu_dereference_raw(idp->top);
 	max = 1 << n;
 
 	id = 0;
@@ -560,7 +560,7 @@ int idr_for_each(struct idr *idp,
 		while (n > 0 && p) {
 			n -= IDR_BITS;
 			*paa++ = p;
-			p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
 		}
 
 		if (p) {
-- 
cgit v1.2.2


From 1ed509a225008c9e8c0644fbd22168e09a7383a0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 22 Feb 2010 17:05:05 -0800
Subject: rcu: Add RCU_CPU_STALL_VERBOSE to dump detailed per-task information

When RCU detects a grace-period stall, it currently just prints
out the PID of any tasks doing the stalling.  This patch adds
RCU_CPU_STALL_VERBOSE, which enables the more-verbose reporting
from sched_show_task().

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
LKML-Reference: <1266887105-1528-21-git-send-email-paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/Kconfig.debug | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 6af20a8a0a54..4cdab452bfe2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -781,6 +781,18 @@ config RCU_CPU_STALL_DETECTOR
 
 	  Say Y if you are unsure.
 
+config RCU_CPU_STALL_VERBOSE
+	bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
+	depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
+	default n
+	help
+	  This option causes RCU to printk detailed per-task information
+	  for any tasks that are stalling the current RCU grace period.
+
+	  Say N if you are unsure.
+
+	  Say Y if you want to enable such checks.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.2


From 4d1ee80f3a7df7fe9cdec26e651e6201c45b10d4 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 29 Jan 2010 20:59:17 +0000
Subject: idr: export idr_get_next()

idr_get_next() was accidentally not exported when added.  It is about
to be used by mtdcore, which may be built as a module.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 lib/idr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 1cac726c44bc..21f9266d1e41 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -621,7 +621,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 	}
 	return NULL;
 }
-
+EXPORT_SYMBOL(idr_get_next);
 
 
 /**
-- 
cgit v1.2.2


From 86a8938078a8bb518c5376de493e348c7490d506 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 24 Feb 2010 10:54:24 +0100
Subject: lib: Add self-test for atomic64_t

This patch adds self-test on boot code for atomic64_t.

This has been used to test the later changes in this patchset.

Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
LKML-Reference: <1267005265-27958-4-git-send-email-luca@luca-barbieri.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/Kconfig.debug   |   7 +++
 lib/Makefile        |   2 +
 lib/atomic64_test.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 167 insertions(+)
 create mode 100644 lib/atomic64_test.c

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c54..3676c517a073 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1054,6 +1054,13 @@ config DMA_API_DEBUG
 	  This option causes a performance degredation.  Use only if you want
 	  to debug device drivers. If unsure, say N.
 
+config ATOMIC64_SELFTEST
+	bool "Perform an atomic64_t self-test at boot"
+	help
+	  Enable this option to test the atomic64_t functions at boot.
+
+	  If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/Makefile b/lib/Makefile
index 347ad8db29d3..4af4786fe281 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -99,6 +99,8 @@ obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 
 obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
 
+obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
new file mode 100644
index 000000000000..4ff649e46bad
--- /dev/null
+++ b/lib/atomic64_test.c
@@ -0,0 +1,158 @@
+/*
+ * Testsuite for atomic64_t functions
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/init.h>
+#include <asm/atomic.h>
+
+#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
+static __init int test_atomic64(void)
+{
+	long long v0 = 0xaaa31337c001d00dLL;
+	long long v1 = 0xdeadbeefdeafcafeLL;
+	long long v2 = 0xfaceabadf00df001LL;
+	long long onestwos = 0x1111111122222222LL;
+	long long one = 1LL;
+
+	atomic64_t v = ATOMIC64_INIT(v0);
+	long long r = v0;
+	BUG_ON(v.counter != r);
+
+	atomic64_set(&v, v1);
+	r = v1;
+	BUG_ON(v.counter != r);
+	BUG_ON(atomic64_read(&v) != r);
+
+	INIT(v0);
+	atomic64_add(onestwos, &v);
+	r += onestwos;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_add(-one, &v);
+	r += -one;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r += onestwos;
+	BUG_ON(atomic64_add_return(onestwos, &v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r += -one;
+	BUG_ON(atomic64_add_return(-one, &v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_sub(onestwos, &v);
+	r -= onestwos;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_sub(-one, &v);
+	r -= -one;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r -= onestwos;
+	BUG_ON(atomic64_sub_return(onestwos, &v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r -= -one;
+	BUG_ON(atomic64_sub_return(-one, &v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_inc(&v);
+	r += one;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r += one;
+	BUG_ON(atomic64_inc_return(&v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	atomic64_dec(&v);
+	r -= one;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	r -= one;
+	BUG_ON(atomic64_dec_return(&v) != r);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(atomic64_xchg(&v, v1) != v0);
+	r = v1;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
+	r = v1;
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(!atomic64_add_unless(&v, one, v0));
+	BUG_ON(v.counter != r);
+
+	INIT(v0);
+	BUG_ON(atomic64_add_unless(&v, one, v1));
+	r += one;
+	BUG_ON(v.counter != r);
+
+	INIT(onestwos);
+	BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
+	r -= one;
+	BUG_ON(v.counter != r);
+
+	INIT(0);
+	BUG_ON(atomic64_dec_if_positive(&v) != -one);
+	BUG_ON(v.counter != r);
+
+	INIT(-one);
+	BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
+	BUG_ON(v.counter != r);
+
+	INIT(onestwos);
+	BUG_ON(atomic64_inc_not_zero(&v));
+	r += one;
+	BUG_ON(v.counter != r);
+
+	INIT(0);
+	BUG_ON(!atomic64_inc_not_zero(&v));
+	BUG_ON(v.counter != r);
+
+	INIT(-one);
+	BUG_ON(atomic64_inc_not_zero(&v));
+	r += one;
+	BUG_ON(v.counter != r);
+
+#ifdef CONFIG_X86
+	printk(KERN_INFO "atomic64 test passed for %s+ platform %s CX8 and %s SSE\n",
+#ifdef CONFIG_X86_CMPXCHG64
+			"586",
+#else
+			"386",
+#endif
+			boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
+			boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
+#else
+	printk(KERN_INFO "atomic64 test passed\n");
+#endif
+
+	return 0;
+}
+
+core_initcall(test_atomic64);
-- 
cgit v1.2.2


From 84c6f88fc8265d7a712d7d6ed8fc1a878dfc84d1 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Thu, 4 Feb 2010 16:08:15 +0900
Subject: perf lock: Fix and add misc documentally things

I've forgot to add 'perf lock' line to command-list.txt,
so users of perf could not find perf lock when they type 'perf'.

Fixing command-list.txt requires document
(tools/perf/Documentation/perf-lock.txt).
But perf lock is too much "under construction" to write a
stable document, so this is something like pseudo document for now.

And I wrote description of perf lock at help section of
CONFIG_LOCK_STAT, this will navigate users of lock trace events.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
LKML-Reference: <1265267295-8388-1-git-send-email-mitake@dcl.info.waseda.ac.jp>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 lib/Kconfig.debug | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c54..65f964e7fe78 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -520,6 +520,12 @@ config LOCK_STAT
 
 	 For more details, see Documentation/lockstat.txt
 
+	 You can analyze lock events with "perf lock", subcommand of perf.
+	 If you want to use "perf lock", you need to turn on CONFIG_EVENT_TRACING.
+
+	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
+ 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
+
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
-- 
cgit v1.2.2


From dd8b1cf681eab40bc5afb67bdd06b2ca341f5669 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 27 Feb 2010 17:10:39 +0100
Subject: perf: Remove pointless breakpoint union

Remove pointless union in the breakpoint field of hw_perf_event.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
---
 lib/Kconfig.debug | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 65f964e7fe78..4dc24cc13f5c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -520,11 +520,13 @@ config LOCK_STAT
 
 	 For more details, see Documentation/lockstat.txt
 
-	 You can analyze lock events with "perf lock", subcommand of perf.
-	 If you want to use "perf lock", you need to turn on CONFIG_EVENT_TRACING.
+	 This also enables lock events required by "perf lock",
+	 subcommand of perf.
+	 If you want to use "perf lock", you also need to turn on
+	 CONFIG_EVENT_TRACING.
 
 	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
- 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
+	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
 
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
-- 
cgit v1.2.2


From 8f4f202b335144bf5be5c9e5b1bc9477ecdae958 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Fri, 26 Feb 2010 12:22:40 +0100
Subject: lib: Only test atomic64_dec_if_positive on archs having it

Currently atomic64_dec_if_positive() is only supported by PowerPC,
MIPS and x86-32.

Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
LKML-Reference: <1267183361-20775-1-git-send-email-luca@luca-barbieri.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/atomic64_test.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib')

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 4ff649e46bad..0effcacbebda 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -112,6 +112,7 @@ static __init int test_atomic64(void)
 	r += one;
 	BUG_ON(v.counter != r);
 
+#if defined(CONFIG_X86_32) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H)
 	INIT(onestwos);
 	BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
 	r -= one;
@@ -124,6 +125,9 @@ static __init int test_atomic64(void)
 	INIT(-one);
 	BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
 	BUG_ON(v.counter != r);
+#else
+#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above
+#endif
 
 	INIT(onestwos);
 	BUG_ON(atomic64_inc_not_zero(&v));
-- 
cgit v1.2.2


From d7f6de1e9c4a12e11ba7186c70f0f40caa76f590 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Fri, 26 Feb 2010 12:22:41 +0100
Subject: x86: Implement atomic[64]_dec_if_positive()

Add support for atomic_dec_if_positive(), and
atomic64_dec_if_positive() for x86-64.

atomic64_dec_if_positive() for x86-32 was already implemented in a previous patch.

Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
LKML-Reference: <1267183361-20775-2-git-send-email-luca@luca-barbieri.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/atomic64_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 0effcacbebda..58efdabb3845 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -112,7 +112,7 @@ static __init int test_atomic64(void)
 	r += one;
 	BUG_ON(v.counter != r);
 
-#if defined(CONFIG_X86_32) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H)
+#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H)
 	INIT(onestwos);
 	BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
 	r -= one;
-- 
cgit v1.2.2


From 9efbcd590243045111670c171a951923b877b57d Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Mon, 1 Mar 2010 19:55:45 +0100
Subject: lib: Fix atomic64_add_unless test

atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise.
The test assumed the opposite convention.

Reported-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
LKML-Reference: <1267469749-11878-2-git-send-email-luca@luca-barbieri.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/atomic64_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 58efdabb3845..ee8e6de8b413 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -104,11 +104,11 @@ static __init int test_atomic64(void)
 	BUG_ON(v.counter != r);
 
 	INIT(v0);
-	BUG_ON(!atomic64_add_unless(&v, one, v0));
+	BUG_ON(atomic64_add_unless(&v, one, v0));
 	BUG_ON(v.counter != r);
 
 	INIT(v0);
-	BUG_ON(atomic64_add_unless(&v, one, v1));
+	BUG_ON(!atomic64_add_unless(&v, one, v1));
 	r += one;
 	BUG_ON(v.counter != r);
 
-- 
cgit v1.2.2


From 97577896f6b9c056fa0a5e9f6a608110cb3dcd33 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Mon, 1 Mar 2010 19:55:47 +0100
Subject: lib: Fix atomic64_add_unless return value convention

atomic64_add_unless must return 1 if it perfomed the add and 0 otherwise.
The generic implementation did the opposite thing.

Reported-by: H. Peter Anvin <hpa@zytor.com>
Confirmed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
LKML-Reference: <1267469749-11878-4-git-send-email-luca@luca-barbieri.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/atomic64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/atomic64.c b/lib/atomic64.c
index 8bee16ec7524..a21c12bc727c 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -162,12 +162,12 @@ int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	unsigned long flags;
 	spinlock_t *lock = lock_addr(v);
-	int ret = 1;
+	int ret = 0;
 
 	spin_lock_irqsave(lock, flags);
 	if (v->counter != u) {
 		v->counter += a;
-		ret = 0;
+		ret = 1;
 	}
 	spin_unlock_irqrestore(lock, flags);
 	return ret;
-- 
cgit v1.2.2


From 25a304f277ad70166eeae25a4958d2049005c33a Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Mon, 1 Mar 2010 19:55:48 +0100
Subject: lib: Fix atomic64_inc_not_zero test

atomic64_inc_not_zero must return 1 if it perfomed the add and 0 otherwise.
The test assumed the opposite convention.

Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
LKML-Reference: <1267469749-11878-5-git-send-email-luca@luca-barbieri.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/atomic64_test.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index ee8e6de8b413..f7bb706c9c3a 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -130,16 +130,16 @@ static __init int test_atomic64(void)
 #endif
 
 	INIT(onestwos);
-	BUG_ON(atomic64_inc_not_zero(&v));
+	BUG_ON(!atomic64_inc_not_zero(&v));
 	r += one;
 	BUG_ON(v.counter != r);
 
 	INIT(0);
-	BUG_ON(!atomic64_inc_not_zero(&v));
+	BUG_ON(atomic64_inc_not_zero(&v));
 	BUG_ON(v.counter != r);
 
 	INIT(-one);
-	BUG_ON(atomic64_inc_not_zero(&v));
+	BUG_ON(!atomic64_inc_not_zero(&v));
 	r += one;
 	BUG_ON(v.counter != r);
 
-- 
cgit v1.2.2


From a5c9161f27c3e1ae6c0094d262f03a7e98262181 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 1 Mar 2010 11:49:23 -0800
Subject: x86, atomic64: In selftest, distinguish x86-64 from 586+

The x86-64 implementation of the atomics is totally different from the
i586+ implementation, which makes it quite confusing to call it
"586+".  Also fix indentation, and add "i" for "i386" and "i586" as
used elsewhere in the kernel.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Luca Barbieri <luca@luca-barbieri.com>
LKML-Reference: <1267005265-27958-4-git-send-email-luca@luca-barbieri.com>
---
 lib/atomic64_test.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index f7bb706c9c3a..65e482caf5e9 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -144,14 +144,16 @@ static __init int test_atomic64(void)
 	BUG_ON(v.counter != r);
 
 #ifdef CONFIG_X86
-	printk(KERN_INFO "atomic64 test passed for %s+ platform %s CX8 and %s SSE\n",
-#ifdef CONFIG_X86_CMPXCHG64
-			"586",
+	printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n",
+#ifdef CONFIG_X86_64
+	       "x86-64",
+#elif defined(CONFIG_X86_CMPXCHG64)
+	       "i586+",
 #else
-			"386",
+	       "i386+",
 #endif
-			boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
-			boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
+	       boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
+	       boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
 #else
 	printk(KERN_INFO "atomic64 test passed\n");
 #endif
-- 
cgit v1.2.2


From f047f4f3792344901e1ea18a180515d7d5349e02 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 5 Mar 2010 13:42:24 -0800
Subject: mm: use the same log level for show_mem()

Use the same log level for printk's in show_mem(), so that those messages
can be shown completely when using log level 6.

Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/show_mem.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/show_mem.c b/lib/show_mem.c
index 238e72a18ce1..fdc77c82f922 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -15,7 +15,7 @@ void show_mem(void)
 	unsigned long total = 0, reserved = 0, shared = 0,
 		nonshared = 0, highmem = 0;
 
-	printk(KERN_INFO "Mem-Info:\n");
+	printk("Mem-Info:\n");
 	show_free_areas();
 
 	for_each_online_pgdat(pgdat) {
@@ -49,15 +49,15 @@ void show_mem(void)
 		pgdat_resize_unlock(pgdat, &flags);
 	}
 
-	printk(KERN_INFO "%lu pages RAM\n", total);
+	printk("%lu pages RAM\n", total);
 #ifdef CONFIG_HIGHMEM
-	printk(KERN_INFO "%lu pages HighMem\n", highmem);
+	printk("%lu pages HighMem\n", highmem);
 #endif
-	printk(KERN_INFO "%lu pages reserved\n", reserved);
-	printk(KERN_INFO "%lu pages shared\n", shared);
-	printk(KERN_INFO "%lu pages non-shared\n", nonshared);
+	printk("%lu pages reserved\n", reserved);
+	printk("%lu pages shared\n", shared);
+	printk("%lu pages non-shared\n", nonshared);
 #ifdef CONFIG_QUICKLIST
-	printk(KERN_INFO "%lu pages in pagetable cache\n",
+	printk("%lu pages in pagetable cache\n",
 		quicklist_total_size());
 #endif
 }
-- 
cgit v1.2.2


From 0347af4ee3922220f6bfe74b87b526aa709a0365 Mon Sep 17 00:00:00 2001
From: Simon Kagstrom <simon.kagstrom@netinsight.net>
Date: Fri, 5 Mar 2010 13:42:49 -0800
Subject: lkdtm: add debugfs access and loosen KPROBE ties

Add adds a debugfs interface and additional failure modes to LKDTM to
provide similar functionality to the provoke-crash driver submitted here:

  http://lwn.net/Articles/371208/

Crashes can now be induced either through module parameters (as before)
or through the debugfs interface as in provoke-crash.

The patch also provides a new "direct" interface, where KPROBES are not
used, i.e., the crash is invoked directly upon write to the debugfs
file. When built without KPROBES configured, only this mode is available.

Signed-off-by: Simon Kagstrom <simon.kagstrom@netinsight.net>
Cc: M. Mohan Kumar <mohan@in.ibm.com>
Cc: Americo Wang <xiyou.wangcong@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5e3407d997b2..b520ec1f33c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -864,8 +864,7 @@ config DEBUG_FORCE_WEAK_PER_CPU
 
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
-	depends on DEBUG_KERNEL
-	depends on KPROBES
+	depends on DEBUG_FS
 	depends on BLOCK
 	default n
 	help
@@ -876,7 +875,7 @@ config LKDTM
 	called lkdtm.
 
 	Documentation on how to use the module can be found in
-	drivers/misc/lkdtm.c
+	Documentation/fault-injection/provoke-crashes.txt
 
 config FAULT_INJECTION
 	bool "Fault-injection framework"
-- 
cgit v1.2.2


From a11d2b64e1f2556953120d516241243ea365f0ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= <andre.goddard@gmail.com>
Date: Fri, 5 Mar 2010 13:43:11 -0800
Subject: lib/string.c: simplify stricmp()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes 32 bytes on core2 with gcc 4.4.1:
   text    data     bss     dec     hex filename
   3196       0       0    3196     c7c lib/string-BEFORE.o
   3164       0       0    3164     c5c lib/string-AFTER.o

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Cc: Joe Perches <joe@perches.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/string.c | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

(limited to 'lib')

diff --git a/lib/string.c b/lib/string.c
index a1cdcfcc42d0..0f8624532082 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -36,25 +36,21 @@ int strnicmp(const char *s1, const char *s2, size_t len)
 	/* Yes, Virginia, it had better be unsigned */
 	unsigned char c1, c2;
 
-	c1 = c2 = 0;
-	if (len) {
-		do {
-			c1 = *s1;
-			c2 = *s2;
-			s1++;
-			s2++;
-			if (!c1)
-				break;
-			if (!c2)
-				break;
-			if (c1 == c2)
-				continue;
-			c1 = tolower(c1);
-			c2 = tolower(c2);
-			if (c1 != c2)
-				break;
-		} while (--len);
-	}
+	if (!len)
+		return 0;
+
+	do {
+		c1 = *s1++;
+		c2 = *s2++;
+		if (!c1 || !c2)
+			break;
+		if (c1 == c2)
+			continue;
+		c1 = tolower(c1);
+		c2 = tolower(c2);
+		if (c1 != c2)
+			break;
+	} while (--len);
 	return (int)c1 - (int)c2;
 }
 EXPORT_SYMBOL(strnicmp);
-- 
cgit v1.2.2


From d6a2eedfddcded92c8f9b0ac022a99c4134696b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= <andre.goddard@gmail.com>
Date: Fri, 5 Mar 2010 13:43:12 -0800
Subject: lib/string.c: simplify strnstr()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Joe Perches <joe@perches.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/string.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/string.c b/lib/string.c
index 0f8624532082..f71bead1be3e 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -689,13 +689,13 @@ EXPORT_SYMBOL(strstr);
  */
 char *strnstr(const char *s1, const char *s2, size_t len)
 {
-	size_t l1 = len, l2;
+	size_t l2;
 
 	l2 = strlen(s2);
 	if (!l2)
 		return (char *)s1;
-	while (l1 >= l2) {
-		l1--;
+	while (len >= l2) {
+		len--;
 		if (!memcmp(s1, s2, l2))
 			return (char *)s1;
 		s1++;
-- 
cgit v1.2.2


From 835cc0c8477fdbc59e0217891d6f11061b1ac4e2 Mon Sep 17 00:00:00 2001
From: Don Mullis <don.mullis@gmail.com>
Date: Fri, 5 Mar 2010 13:43:15 -0800
Subject: lib: more scalable list_sort()

XFS and UBIFS can pass long lists to list_sort(); this alternative
implementation scales better, reaching ~3x performance gain when list
length exceeds the L2 cache size.

Stand-alone program timings were run on a Core 2 duo L1=32KB L2=4MB,
gcc-4.4, with flags extracted from an Ubuntu kernel build.  Object size is
581 bytes compared to 455 for Mark J.  Roberts' code.

Worst case for either implementation is a list length just over a power of
two, and to roughly the same degree, so here are timing results for a
range of 2^N+1 lengths.  List elements were 16 bytes each including malloc
overhead; initial order was random.

                      time (msec)
                      Tatham-Roberts
                      |       generic-Mullis-v2
loop_count  length    |       |    ratio
4000000       2     206     294    1.427
2000000       3     176     227    1.289
1000000       5     199     172    0.864
 500000       9     235     178    0.757
 250000      17     243     182    0.748
 125000      33     261     196    0.750
  62500      65     277     209    0.754
  31250     129     292     219    0.75
  15625     257     317     235    0.741
   7812     513     340     252    0.741
   3906    1025     362     267    0.737
   1953    2049     388     283    0.729  ~ L1 size
    976    4097     556     323    0.580
    488    8193     678     361    0.532
    244   16385     773     395    0.510
    122   32769     844     418    0.495
     61   65537     917     454    0.495
     30  131073    1128     543    0.481
     15  262145    2355     869    0.369  ~ L2 size
      7  524289    5597    1714    0.306
      3 1048577    6218    2022    0.325

Mark's code does not actually implement the usual or generic mergesort,
but rather a variant from Simon Tatham described here:

    http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Simon's algorithm performs O(log N) passes over the entire input list,
doing merges of sublists that double in size on each pass.  The generic
algorithm instead merges pairs of equal length lists as early as possible,
in recursive order.  For either algorithm, the elements that extend the
list beyond power-of-two length are a special case, handled as nearly as
possible as a "rounding-up" to a full POT.

Some intuition for the locality of reference implications of merge order
may be gotten by watching this animation:

    http://www.sorting-algorithms.com/merge-sort

Simon's algorithm requires only O(1) extra space rather than the generic
algorithm's O(log N), but in my non-recursive implementation the actual
O(log N) data is merely a vector of ~20 pointers, which I've put on the
stack.

Long-running list_sort() calls: If the list passed in may be long, or the
client's cmp() callback function is slow, the client's cmp() may
periodically invoke cond_resched() to voluntarily yield the CPU.  All
inner loops of list_sort() call back to cmp().

Stability of the sort: distinct elements that compare equal emerge from
the sort in the same order as with Mark's code, for simple test cases.  A
boot-time test is provided to verify this and other correctness
requirements.

A kernel that uses drm.ko appears to run normally with this change; I have
no suitable hardware to similarly test the use by UBIFS.

[akpm@linux-foundation.org: style tweaks, fix comment, make list_sort_test __init]
Signed-off-by: Don Mullis <don.mullis@gmail.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Artem Bityutskiy <dedekind@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/list_sort.c | 252 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 183 insertions(+), 69 deletions(-)

(limited to 'lib')

diff --git a/lib/list_sort.c b/lib/list_sort.c
index 19d11e0bb958..362c10f1653f 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -4,99 +4,213 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 
+#define MAX_LIST_LENGTH_BITS 20
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+static struct list_head *merge(void *priv,
+				int (*cmp)(void *priv, struct list_head *a,
+					struct list_head *b),
+				struct list_head *a, struct list_head *b)
+{
+	struct list_head head, *tail = &head;
+
+	while (a && b) {
+		/* if equal, take 'a' -- important for sort stability */
+		if ((*cmp)(priv, a, b) <= 0) {
+			tail->next = a;
+			a = a->next;
+		} else {
+			tail->next = b;
+			b = b->next;
+		}
+		tail = tail->next;
+	}
+	tail->next = a?:b;
+	return head.next;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure.  This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+static void merge_and_restore_back_links(void *priv,
+				int (*cmp)(void *priv, struct list_head *a,
+					struct list_head *b),
+				struct list_head *head,
+				struct list_head *a, struct list_head *b)
+{
+	struct list_head *tail = head;
+
+	while (a && b) {
+		/* if equal, take 'a' -- important for sort stability */
+		if ((*cmp)(priv, a, b) <= 0) {
+			tail->next = a;
+			a->prev = tail;
+			a = a->next;
+		} else {
+			tail->next = b;
+			b->prev = tail;
+			b = b->next;
+		}
+		tail = tail->next;
+	}
+	tail->next = a ? : b;
+
+	do {
+		/*
+		 * In worst cases this loop may run many iterations.
+		 * Continue callbacks to the client even though no
+		 * element comparison is needed, so the client's cmp()
+		 * routine can invoke cond_resched() periodically.
+		 */
+		(*cmp)(priv, tail, tail);
+
+		tail->next->prev = tail;
+		tail = tail->next;
+	} while (tail->next);
+
+	tail->next = head;
+	head->prev = tail;
+}
+
 /**
  * list_sort - sort a list.
  * @priv: private data, passed to @cmp
  * @head: the list to sort
  * @cmp: the elements comparison function
  *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * in ascending order.
+ * This function implements "merge sort" which has O(nlog(n)) complexity.
+ * The list is sorted in ascending order.
  *
  * The comparison function @cmp is supposed to return a negative value if @a is
  * less than @b, and a positive value if @a is greater than @b. If @a and @b
  * are equivalent, then it does not matter what this function returns.
  */
 void list_sort(void *priv, struct list_head *head,
-	       int (*cmp)(void *priv, struct list_head *a,
-			  struct list_head *b))
+		int (*cmp)(void *priv, struct list_head *a,
+			struct list_head *b))
 {
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
+	struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
+						-- last slot is a sentinel */
+	int lev;  /* index into part[] */
+	int max_lev = 0;
+	struct list_head *list;
 
 	if (list_empty(head))
 		return;
 
+	memset(part, 0, sizeof(part));
+
+	head->prev->next = NULL;
 	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
 
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(priv, p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
+	while (list) {
+		struct list_head *cur = list;
+		list = list->next;
+		cur->next = NULL;
+
+		for (lev = 0; part[lev]; lev++) {
+			cur = merge(priv, cmp, part[lev], cur);
+			part[lev] = NULL;
+		}
+		if (lev > max_lev) {
+			if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
+				printk_once(KERN_DEBUG "list passed to"
+					" list_sort() too long for"
+					" efficiency\n");
+				lev--;
 			}
-			p = q;
+			max_lev = lev;
 		}
+		part[lev] = cur;
+	}
 
-		tail->next = list;
-		list->prev = tail;
+	for (lev = 0; lev < max_lev; lev++)
+		if (part[lev])
+			list = merge(priv, cmp, part[lev], list);
 
-		if (nmerges <= 1)
-			break;
+	merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
+}
+EXPORT_SYMBOL(list_sort);
 
-		insize *= 2;
-	}
+#ifdef DEBUG_LIST_SORT
+struct debug_el {
+	struct list_head l_h;
+	int value;
+	unsigned serial;
+};
 
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
+static int cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	return container_of(a, struct debug_el, l_h)->value
+	     - container_of(b, struct debug_el, l_h)->value;
 }
 
-EXPORT_SYMBOL(list_sort);
+/*
+ * The pattern of set bits in the list length determines which cases
+ * are hit in list_sort().
+ */
+#define LIST_SORT_TEST_LENGTH (512+128+2) /* not including head */
+
+static int __init list_sort_test(void)
+{
+	int i, r = 1, count;
+	struct list_head *head = kmalloc(sizeof(*head), GFP_KERNEL);
+	struct list_head *cur;
+
+	printk(KERN_WARNING "testing list_sort()\n");
+
+	cur = head;
+	for (i = 0; i < LIST_SORT_TEST_LENGTH; i++) {
+		struct debug_el *el = kmalloc(sizeof(*el), GFP_KERNEL);
+		BUG_ON(!el);
+		 /* force some equivalencies */
+		el->value = (r = (r * 725861) % 6599) % (LIST_SORT_TEST_LENGTH/3);
+		el->serial = i;
+
+		el->l_h.prev = cur;
+		cur->next = &el->l_h;
+		cur = cur->next;
+	}
+	head->prev = cur;
+
+	list_sort(NULL, head, cmp);
+
+	count = 1;
+	for (cur = head->next; cur->next != head; cur = cur->next) {
+		struct debug_el *el = container_of(cur, struct debug_el, l_h);
+		int cmp_result = cmp(NULL, cur, cur->next);
+		if (cur->next->prev != cur) {
+			printk(KERN_EMERG "list_sort() returned "
+						"a corrupted list!\n");
+			return 1;
+		} else if (cmp_result > 0) {
+			printk(KERN_EMERG "list_sort() failed to sort!\n");
+			return 1;
+		} else if (cmp_result == 0 &&
+				el->serial >= container_of(cur->next,
+					struct debug_el, l_h)->serial) {
+			printk(KERN_EMERG "list_sort() failed to preserve order"
+						 " of equivalent elements!\n");
+			return 1;
+		}
+		kfree(cur->prev);
+		count++;
+	}
+	kfree(cur);
+	if (count != LIST_SORT_TEST_LENGTH) {
+		printk(KERN_EMERG "list_sort() returned list of"
+						"different length!\n");
+		return 1;
+	}
+	return 0;
+}
+module_init(list_sort_test);
+#endif
-- 
cgit v1.2.2


From 02b12b7a28faa2e9ed5a361cd08ea576ab1f1509 Mon Sep 17 00:00:00 2001
From: Don Mullis <don.mullis@gmail.com>
Date: Fri, 5 Mar 2010 13:43:15 -0800
Subject: lib: revise list_sort() header comment

Clarify and correct header comment of list_sort().

Signed-off-by: Don Mullis <don.mullis@gmail.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Artem Bityutskiy <dedekind@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/list_sort.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/list_sort.c b/lib/list_sort.c
index 362c10f1653f..4b5cb794c38b 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -81,17 +81,18 @@ static void merge_and_restore_back_links(void *priv,
 }
 
 /**
- * list_sort - sort a list.
- * @priv: private data, passed to @cmp
+ * list_sort - sort a list
+ * @priv: private data, opaque to list_sort(), passed to @cmp
  * @head: the list to sort
  * @cmp: the elements comparison function
  *
- * This function implements "merge sort" which has O(nlog(n)) complexity.
- * The list is sorted in ascending order.
+ * This function implements "merge sort", which has O(nlog(n))
+ * complexity.
  *
- * The comparison function @cmp is supposed to return a negative value if @a is
- * less than @b, and a positive value if @a is greater than @b. If @a and @b
- * are equivalent, then it does not matter what this function returns.
+ * The comparison function @cmp must return a negative value if @a
+ * should sort before @b, and a positive value if @a should sort after
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0.
  */
 void list_sort(void *priv, struct list_head *head,
 		int (*cmp)(void *priv, struct list_head *a,
-- 
cgit v1.2.2


From a069c266ae5fdfbf5b4aecf2c672413aa33b2504 Mon Sep 17 00:00:00 2001
From: Don Mullis <don.mullis@gmail.com>
Date: Fri, 5 Mar 2010 13:43:16 -0800
Subject: lib: build list_sort() only if needed

Build list_sort() only for configs that need it -- those that don't save
~581 bytes (i386).

Signed-off-by: Don Mullis <don.mullis@gmail.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Artem Bityutskiy <dedekind@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig  | 3 +++
 lib/Makefile | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig b/lib/Kconfig
index 97b136ff117e..8034c46327cb 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -160,6 +160,9 @@ config TEXTSEARCH_BM
 config TEXTSEARCH_FSM
 	tristate
 
+config LIST_SORT
+	boolean
+
 config HAS_IOMEM
 	boolean
 	depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index 3b0b4a696db9..e39c361b0be3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o list_sort.o
+	 string_helpers.o gcd.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -40,6 +40,7 @@ lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+obj-$(CONFIG_LIST_SORT) += list_sort.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
-- 
cgit v1.2.2


From 9a86e2bad0b9fbf3290ae496da6dab9536dd6bf7 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 5 Mar 2010 13:43:17 -0800
Subject: lib: fix first line of kernel-doc for a few functions

The function name must be followed by a space, hypen, space, and a short
description.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 11bf49750583..61998c5924fe 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -487,7 +487,7 @@ int __bitmap_parse(const char *buf, unsigned int buflen,
 EXPORT_SYMBOL(__bitmap_parse);
 
 /**
- * bitmap_parse_user()
+ * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
  *
  * @ubuf: pointer to user buffer containing string.
  * @ulen: buffer size in bytes.  If string is smaller than this
@@ -619,7 +619,7 @@ int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
 EXPORT_SYMBOL(bitmap_parselist);
 
 /**
- * bitmap_pos_to_ord(buf, pos, bits)
+ * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
  *	@buf: pointer to a bitmap
  *	@pos: a bit position in @buf (0 <= @pos < @bits)
  *	@bits: number of valid bit positions in @buf
@@ -655,7 +655,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
 }
 
 /**
- * bitmap_ord_to_pos(buf, ord, bits)
+ * bitmap_ord_to_pos - find position of n-th set bit in bitmap
  *	@buf: pointer to bitmap
  *	@ord: ordinal bit position (n-th set bit, n >= 0)
  *	@bits: number of valid bit positions in @buf
-- 
cgit v1.2.2


From 08564fb7ab9ead9226b6154439c3fecd17972eb0 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 5 Mar 2010 13:43:18 -0800
Subject: bitmap: use for_each_set_bit()

Replace open-coded loop with for_each_set_bit().

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 61998c5924fe..ffb78c916ccd 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -733,10 +733,9 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
 	bitmap_zero(dst, bits);
 
 	w = bitmap_weight(new, bits);
-	for (oldbit = find_first_bit(src, bits);
-	     oldbit < bits;
-	     oldbit = find_next_bit(src, bits, oldbit + 1)) {
+	for_each_set_bit(oldbit, src, bits) {
 	     	int n = bitmap_pos_to_ord(old, oldbit, bits);
+
 		if (n < 0 || w == 0)
 			set_bit(oldbit, dst);	/* identity map */
 		else
@@ -903,9 +902,7 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
 	 */
 
 	m = 0;
-	for (n = find_first_bit(relmap, bits);
-	     n < bits;
-	     n = find_next_bit(relmap, bits, n + 1)) {
+	for_each_set_bit(n, relmap, bits) {
 		/* m == bitmap_pos_to_ord(relmap, n, bits) */
 		if (test_bit(m, orig))
 			set_bit(n, dst);
@@ -934,9 +931,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
 		return;
 	bitmap_zero(dst, bits);
 
-	for (oldbit = find_first_bit(orig, bits);
-	     oldbit < bits;
-	     oldbit = find_next_bit(orig, bits, oldbit + 1))
+	for_each_set_bit(oldbit, orig, bits)
 		set_bit(oldbit % sz, dst);
 }
 EXPORT_SYMBOL(bitmap_fold);
-- 
cgit v1.2.2


From 4f2a9463d18517a9839401c3de6419ee1435875b Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Fri, 5 Mar 2010 13:43:55 -0800
Subject: crc32: some minor cleanups

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/crc32.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

(limited to 'lib')

diff --git a/lib/crc32.c b/lib/crc32.c
index 02e3b31b3a79..0f45fbff34cb 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -30,11 +30,15 @@
 #include <asm/atomic.h>
 #include "crc32defs.h"
 #if CRC_LE_BITS == 8
-#define tole(x) __constant_cpu_to_le32(x)
-#define tobe(x) __constant_cpu_to_be32(x)
+# define tole(x) __constant_cpu_to_le32(x)
 #else
-#define tole(x) (x)
-#define tobe(x) (x)
+# define tole(x) (x)
+#endif
+
+#if CRC_BE_BITS == 8
+# define tobe(x) __constant_cpu_to_be32(x)
+#else
+# define tobe(x) (x)
 #endif
 #include "crc32table.h"
 
@@ -52,20 +56,19 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
 # else
 #  define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
 # endif
-	const u32 *b = (const u32 *)buf;
+	const u32 *b;
 	size_t    rem_len;
 
 	/* Align it */
-	if (unlikely((long)b & 3 && len)) {
-		u8 *p = (u8 *)b;
+	if (unlikely((long)buf & 3 && len)) {
 		do {
-			DO_CRC(*p++);
-		} while ((--len) && ((long)p)&3);
-		b = (u32 *)p;
+			DO_CRC(*buf++);
+		} while ((--len) && ((long)buf)&3);
 	}
 	rem_len = len & 3;
 	/* load data 32 bits wide, xor data 32 bits wide. */
 	len = len >> 2;
+	b = (const u32 *)buf;
 	for (--b; len; --len) {
 		crc ^= *++b; /* use pre increment for speed */
 		DO_CRC(0);
@@ -82,6 +85,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
 		} while (--len);
 	}
 	return crc;
+#undef DO_CRC
 }
 #endif
 /**
@@ -119,9 +123,6 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 	crc = __cpu_to_le32(crc);
 	crc = crc32_body(crc, p, len, tab);
 	return __le32_to_cpu(crc);
-#undef ENDIAN_SHIFT
-#undef DO_CRC
-
 # elif CRC_LE_BITS == 4
 	while (len--) {
 		crc ^= *p++;
@@ -179,9 +180,6 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 	crc = __cpu_to_be32(crc);
 	crc = crc32_body(crc, p, len, tab);
 	return __be32_to_cpu(crc);
-#undef ENDIAN_SHIFT
-#undef DO_CRC
-
 # elif CRC_BE_BITS == 4
 	while (len--) {
 		crc ^= *p++ << 24;
-- 
cgit v1.2.2


From ef0658f3de484bf9b173639cd47544584e01efa5 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 6 Mar 2010 17:10:14 -0800
Subject: vsprintf.c: Reduce sizeof struct printf_spec from 24 to 8 bytes

Reducing the size of struct printf_spec is a good thing because multiple
instances are commonly passed on stack.

It's possible for type to be u8 and field_width to be s8, but this is
likely small enough for now.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index af4aaa6c36f3..e994cea385c8 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -408,12 +408,12 @@ enum format_type {
 };
 
 struct printf_spec {
-	enum format_type	type;
-	int			flags;		/* flags to number() */
-	int			field_width;	/* width of output field */
-	int			base;
-	int			precision;	/* # of digits/chars */
-	int			qualifier;
+	u16	type;
+	s16	field_width;	/* width of output field */
+	u8	flags;		/* flags to number() */
+	u8	base;
+	s8	precision;	/* # of digits/chars */
+	u8	qualifier;
 };
 
 static char *number(char *buf, char *end, unsigned long long num,
@@ -1333,7 +1333,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 			break;
 
 		case FORMAT_TYPE_NRCHARS: {
-			int qualifier = spec.qualifier;
+			u8 qualifier = spec.qualifier;
 
 			if (qualifier == 'l') {
 				long *ip = va_arg(args, long *);
@@ -1619,7 +1619,7 @@ do {									\
 
 		case FORMAT_TYPE_NRCHARS: {
 			/* skip %n 's argument */
-			int qualifier = spec.qualifier;
+			u8 qualifier = spec.qualifier;
 			void *skip_arg;
 			if (qualifier == 'l')
 				skip_arg = va_arg(args, long *);
@@ -1885,7 +1885,9 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 	char *next;
 	char digit;
 	int num = 0;
-	int qualifier, base, field_width;
+	u8 qualifier;
+	u8 base;
+	s16 field_width;
 	bool is_sign;
 
 	while (*fmt && *str) {
@@ -1963,7 +1965,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 		{
 			char *s = (char *)va_arg(args, char *);
 			if (field_width == -1)
-				field_width = INT_MAX;
+				field_width = SHORT_MAX;
 			/* first, skip leading white space in buffer */
 			str = skip_spaces(str);
 
-- 
cgit v1.2.2


From b89dc5d6b0981c1096ccffbf8f4413c7bb1bcc0a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:31 -0700
Subject: vsprintf: clarify comments for printf_spec flags

Add clues about what the SMALL and SPECIAL flags do.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index e994cea385c8..a900d136e643 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -381,8 +381,8 @@ static noinline char *put_dec(char *buf, unsigned long long num)
 #define PLUS	4		/* show plus */
 #define SPACE	8		/* space if plus */
 #define LEFT	16		/* left justified */
-#define SMALL	32		/* Must be 32 == 0x20 */
-#define SPECIAL	64		/* 0x */
+#define SMALL	32		/* use lowercase in hex (must be 32 == 0x20) */
+#define SPECIAL	64		/* prefix hex with "0x", octal with "0" */
 
 enum format_type {
 	FORMAT_TYPE_NONE, /* Just a string part */
-- 
cgit v1.2.2


From 4da0b66c6e9ea7ba78a19f9f186779826d89f8b0 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:37 -0700
Subject: vsprintf: move %pR resource printf_specs off the stack

This adds separate I/O and memory specs, so we don't have to change the
field width in a shared spec, which then lets us make all the specs const
and static, since they never change.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index a900d136e643..0d461c7c14db 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -597,22 +597,29 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 #ifndef MEM_RSRC_PRINTK_SIZE
 #define MEM_RSRC_PRINTK_SIZE	10
 #endif
-	struct printf_spec hex_spec = {
+	static const struct printf_spec io_spec = {
 		.base = 16,
+		.field_width = IO_RSRC_PRINTK_SIZE,
 		.precision = -1,
 		.flags = SPECIAL | SMALL | ZEROPAD,
 	};
-	struct printf_spec dec_spec = {
+	static const struct printf_spec mem_spec = {
+		.base = 16,
+		.field_width = MEM_RSRC_PRINTK_SIZE,
+		.precision = -1,
+		.flags = SPECIAL | SMALL | ZEROPAD,
+	};
+	static const struct printf_spec dec_spec = {
 		.base = 10,
 		.precision = -1,
 		.flags = 0,
 	};
-	struct printf_spec str_spec = {
+	static const struct printf_spec str_spec = {
 		.field_width = -1,
 		.precision = 10,
 		.flags = LEFT,
 	};
-	struct printf_spec flag_spec = {
+	static const struct printf_spec flag_spec = {
 		.base = 16,
 		.precision = -1,
 		.flags = SPECIAL | SMALL,
@@ -628,35 +635,31 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 		     2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
 
 	char *p = sym, *pend = sym + sizeof(sym);
-	int size = -1, addr = 0;
 	int decode = (fmt[0] == 'R') ? 1 : 0;
-
-	if (res->flags & IORESOURCE_IO) {
-		size = IO_RSRC_PRINTK_SIZE;
-		addr = 1;
-	} else if (res->flags & IORESOURCE_MEM) {
-		size = MEM_RSRC_PRINTK_SIZE;
-		addr = 1;
-	}
+	const struct printf_spec *specp;
 
 	*p++ = '[';
-	if (res->flags & IORESOURCE_IO)
+	if (res->flags & IORESOURCE_IO) {
 		p = string(p, pend, "io  ", str_spec);
-	else if (res->flags & IORESOURCE_MEM)
+		specp = &io_spec;
+	} else if (res->flags & IORESOURCE_MEM) {
 		p = string(p, pend, "mem ", str_spec);
-	else if (res->flags & IORESOURCE_IRQ)
+		specp = &mem_spec;
+	} else if (res->flags & IORESOURCE_IRQ) {
 		p = string(p, pend, "irq ", str_spec);
-	else if (res->flags & IORESOURCE_DMA)
+		specp = &dec_spec;
+	} else if (res->flags & IORESOURCE_DMA) {
 		p = string(p, pend, "dma ", str_spec);
-	else {
+		specp = &dec_spec;
+	} else {
 		p = string(p, pend, "??? ", str_spec);
+		specp = &mem_spec;
 		decode = 0;
 	}
-	hex_spec.field_width = size;
-	p = number(p, pend, res->start, addr ? hex_spec : dec_spec);
+	p = number(p, pend, res->start, *specp);
 	if (res->start != res->end) {
 		*p++ = '-';
-		p = number(p, pend, res->end, addr ? hex_spec : dec_spec);
+		p = number(p, pend, res->end, *specp);
 	}
 	if (decode) {
 		if (res->flags & IORESOURCE_MEM_64)
-- 
cgit v1.2.2


From b8fa05719ba4349be80ce929237249b57886a203 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 7 Mar 2010 09:54:44 -0800
Subject: Revert "lib: build list_sort() only if needed"

This reverts commit a069c266ae5fdfbf5b4aecf2c672413aa33b2504.

It turns ou that not only was it missing a case (XFS) that needed it,
but perhaps more importantly, people sometimes want to enable new
modules that they hadn't had enabled before, and if such a module uses
list_sort(), it can't easily be inserted any more.

So rather than add a "select LIST_SORT" to the XFS case, just leave it
compiled in.  It's not all _that_ big, after all, and the inconvenience
isn't worth it.

Requested-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Don Mullis <don.mullis@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig  | 3 ---
 lib/Makefile | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig b/lib/Kconfig
index 496d16e1fa2c..170d8ca901d8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -160,9 +160,6 @@ config TEXTSEARCH_BM
 config TEXTSEARCH_FSM
 	tristate
 
-config LIST_SORT
-	boolean
-
 config BTREE
 	boolean
 
diff --git a/lib/Makefile b/lib/Makefile
index 59e46a014bc6..2e152aed7198 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o
+	 string_helpers.o gcd.o list_sort.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
@@ -40,7 +40,6 @@ lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
-obj-$(CONFIG_LIST_SORT) += list_sort.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
-- 
cgit v1.2.2


From 9cd43611ccfb46632bfa7d19f688924ea93f1613 Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Thu, 31 Dec 2009 14:52:51 +0100
Subject: kobject: Constify struct kset_uevent_ops

Constify struct kset_uevent_ops.

This is part of the ops structure constification
effort started by Arjan van de Ven et al.

Benefits of this constification:

 * prevents modification of data that is shared
   (referenced) by many other structure instances
   at runtime

 * detects/prevents accidental (but not intentional)
   modification attempts on archs that enforce
   read-only kernel data at runtime

 * potentially better optimized code as the compiler
   can assume that the const data cannot be changed

 * the compiler/linker move const data into .rodata
   and therefore exclude them from false sharing

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject.c        | 4 ++--
 lib/kobject_uevent.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index b512b746d2af..cecf5a0ef6e1 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -789,7 +789,7 @@ static struct kobj_type kset_ktype = {
  * If the kset was not able to be created, NULL will be returned.
  */
 static struct kset *kset_create(const char *name,
-				struct kset_uevent_ops *uevent_ops,
+				const struct kset_uevent_ops *uevent_ops,
 				struct kobject *parent_kobj)
 {
 	struct kset *kset;
@@ -832,7 +832,7 @@ static struct kset *kset_create(const char *name,
  * If the kset was not able to be created, NULL will be returned.
  */
 struct kset *kset_create_and_add(const char *name,
-				 struct kset_uevent_ops *uevent_ops,
+				 const struct kset_uevent_ops *uevent_ops,
 				 struct kobject *parent_kobj)
 {
 	struct kset *kset;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 920a3ca6e259..c9d3a3e8405d 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -95,7 +95,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	const char *subsystem;
 	struct kobject *top_kobj;
 	struct kset *kset;
-	struct kset_uevent_ops *uevent_ops;
+	const struct kset_uevent_ops *uevent_ops;
 	u64 seq;
 	int i = 0;
 	int retval = 0;
-- 
cgit v1.2.2


From 52cf25d0ab7f78eeecc59ac652ed5090f69b619e Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Tue, 19 Jan 2010 02:58:23 +0100
Subject: Driver core: Constify struct sysfs_ops in struct kobj_type

Constify struct sysfs_ops.

This is part of the ops structure constification
effort started by Arjan van de Ven et al.

Benefits of this constification:

 * prevents modification of data that is shared
   (referenced) by many other structure instances
   at runtime

 * detects/prevents accidental (but not intentional)
   modification attempts on archs that enforce
   read-only kernel data at runtime

 * potentially better optimized code as the compiler
   can assume that the const data cannot be changed

 * the compiler/linker move const data into .rodata
   and therefore exclude them from false sharing

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Acked-by: David Teigland <teigland@redhat.com>
Acked-by: Matt Domsch <Matt_Domsch@dell.com>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Acked-by: Hans J. Koch <hjk@linutronix.de>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index cecf5a0ef6e1..8115eb1bbf4d 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -700,7 +700,7 @@ static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
 	return ret;
 }
 
-struct sysfs_ops kobj_sysfs_ops = {
+const struct sysfs_ops kobj_sysfs_ops = {
 	.show	= kobj_attr_show,
 	.store	= kobj_attr_store,
 };
-- 
cgit v1.2.2


From e69eae65523b457a3ac4262a66cfff57f2c924a9 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Wed, 10 Mar 2010 15:23:55 -0800
Subject: zlib: make new optimized inflate endian independent

Commit 6846ee5ca68d81e6baccf0d56221d7a00c1be18b ("zlib: Fix build of
powerpc boot wrapper") made the new optimized inflate only available on
arch's that define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.

This patch will again enable the optimization for all arch's by defining
our own endian independent version of unaligned access.  As an added
bonus, arch's that define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS do a
plain load instead.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/zlib_inflate/inffast.c | 70 ++++++++++++++++++++--------------------------
 1 file changed, 30 insertions(+), 40 deletions(-)

(limited to 'lib')

diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index 215447c55261..fa62fc7a94f9 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -8,21 +8,6 @@
 #include "inflate.h"
 #include "inffast.h"
 
-/* Only do the unaligned "Faster" variant when
- * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set
- *
- * On powerpc, it won't be as we don't include autoconf.h
- * automatically for the boot wrapper, which is intended as
- * we run in an environment where we may not be able to deal
- * with (even rare) alignment faults. In addition, we do not
- * define __KERNEL__ for arch/powerpc/boot unlike x86
- */
-
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#include <asm/unaligned.h>
-#include <asm/byteorder.h>
-#endif
-
 #ifndef ASMINF
 
 /* Allow machine dependent optimization for post-increment or pre-increment.
@@ -36,14 +21,31 @@
    - Pentium III (Anderson)
    - M68060 (Nikl)
  */
+union uu {
+	unsigned short us;
+	unsigned char b[2];
+};
+
+/* Endian independed version */
+static inline unsigned short
+get_unaligned16(const unsigned short *p)
+{
+	union uu  mm;
+	unsigned char *b = (unsigned char *)p;
+
+	mm.b[0] = b[0];
+	mm.b[1] = b[1];
+	return mm.us;
+}
+
 #ifdef POSTINC
 #  define OFF 0
 #  define PUP(a) *(a)++
-#  define UP_UNALIGNED(a) get_unaligned((a)++)
+#  define UP_UNALIGNED(a) get_unaligned16((a)++)
 #else
 #  define OFF 1
 #  define PUP(a) *++(a)
-#  define UP_UNALIGNED(a) get_unaligned(++(a))
+#  define UP_UNALIGNED(a) get_unaligned16(++(a))
 #endif
 
 /*
@@ -256,7 +258,6 @@ void inflate_fast(z_streamp strm, unsigned start)
                     }
                 }
                 else {
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 		    unsigned short *sout;
 		    unsigned long loops;
 
@@ -274,7 +275,11 @@ void inflate_fast(z_streamp strm, unsigned start)
 			sfrom = (unsigned short *)(from - OFF);
 			loops = len >> 1;
 			do
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+			    PUP(sout) = PUP(sfrom);
+#else
 			    PUP(sout) = UP_UNALIGNED(sfrom);
+#endif
 			while (--loops);
 			out = (unsigned char *)sout + OFF;
 			from = (unsigned char *)sfrom + OFF;
@@ -282,14 +287,13 @@ void inflate_fast(z_streamp strm, unsigned start)
 			unsigned short pat16;
 
 			pat16 = *(sout-2+2*OFF);
-			if (dist == 1)
-#if defined(__BIG_ENDIAN)
-			    pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8);
-#elif defined(__LITTLE_ENDIAN)
-			    pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8);
-#else
-#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
-#endif
+			if (dist == 1) {
+				union uu mm;
+				/* copy one char pattern to both bytes */
+				mm.us = pat16;
+				mm.b[0] = mm.b[1];
+				pat16 = mm.us;
+			}
 			loops = len >> 1;
 			do
 			    PUP(sout) = pat16;
@@ -298,20 +302,6 @@ void inflate_fast(z_streamp strm, unsigned start)
 		    }
 		    if (len & 1)
 			PUP(out) = PUP(from);
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-                    from = out - dist;          /* copy direct from output */
-                    do {                        /* minimum length is three */
-			 PUP(out) = PUP(from);
-			 PUP(out) = PUP(from);
-			 PUP(out) = PUP(from);
-			 len -= 3;
-                    } while (len > 2);
-                    if (len) {
-			 PUP(out) = PUP(from);
-			 if (len > 1)
-			     PUP(out) = PUP(from);
-                    }
-#endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
                 }
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */
-- 
cgit v1.2.2


From 51ea3f6a4571e9283e2ff79b74bcedfc2986dbe2 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Wed, 10 Mar 2010 15:23:57 -0800
Subject: inflate_fast: sout is already a short so ptr arith was off by one.

inflate_fast() can do either POST INC or PRE INC on its pointers walking
the memory to decompress.  Default is PRE INC.

The sout pointer offset was miscalculated in one case as the calculation
assumed sout was a char * This breaks inflate_fast() iff configured to do
POST INC.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/zlib_inflate/inffast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
index fa62fc7a94f9..2c13ecc5bb2c 100644
--- a/lib/zlib_inflate/inffast.c
+++ b/lib/zlib_inflate/inffast.c
@@ -286,7 +286,7 @@ void inflate_fast(z_streamp strm, unsigned start)
 		    } else { /* dist == 1 or dist == 2 */
 			unsigned short pat16;
 
-			pat16 = *(sout-2+2*OFF);
+			pat16 = *(sout-1+OFF);
 			if (dist == 1) {
 				union uu mm;
 				/* copy one char pattern to both bytes */
-- 
cgit v1.2.2


From 0f4050c7d3ba0275e5f39513c0670a717d43048c Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:42 -0700
Subject: resource: add bus number support

Add support for bus number resources.  This is for bridges with a range of
bus numbers behind them.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 lib/vsprintf.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0d461c7c14db..ebbecf90d5d7 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -609,6 +609,12 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 		.precision = -1,
 		.flags = SPECIAL | SMALL | ZEROPAD,
 	};
+	static const struct printf_spec bus_spec = {
+		.base = 16,
+		.field_width = 2,
+		.precision = -1,
+		.flags = SMALL | ZEROPAD,
+	};
 	static const struct printf_spec dec_spec = {
 		.base = 10,
 		.precision = -1,
@@ -651,6 +657,9 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 	} else if (res->flags & IORESOURCE_DMA) {
 		p = string(p, pend, "dma ", str_spec);
 		specp = &dec_spec;
+	} else if (res->flags & IORESOURCE_BUS) {
+		p = string(p, pend, "bus ", str_spec);
+		specp = &bus_spec;
 	} else {
 		p = string(p, pend, "??? ", str_spec);
 		specp = &mem_spec;
-- 
cgit v1.2.2


From 9d7cca04211d4eb104eaaa424b98f650bc29c730 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 5 Mar 2010 10:47:47 -0700
Subject: resource: add window support

Add support for resource windows.  This is for bridge resources, i.e.,
regions where a bridge forwards transactions from the primary to the
secondary side.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 lib/vsprintf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index ebbecf90d5d7..24112e5a5780 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -635,7 +635,7 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 	 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
 #define RSRC_BUF_SIZE		((2 * sizeof(resource_size_t)) + 4)
 #define FLAG_BUF_SIZE		(2 * sizeof(res->flags))
-#define DECODED_BUF_SIZE	sizeof("[mem - 64bit pref disabled]")
+#define DECODED_BUF_SIZE	sizeof("[mem - 64bit pref window disabled]")
 #define RAW_BUF_SIZE		sizeof("[mem - flags 0x]")
 	char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
 		     2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
@@ -675,6 +675,8 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 			p = string(p, pend, " 64bit", str_spec);
 		if (res->flags & IORESOURCE_PREFETCH)
 			p = string(p, pend, " pref", str_spec);
+		if (res->flags & IORESOURCE_WINDOW)
+			p = string(p, pend, " window", str_spec);
 		if (res->flags & IORESOURCE_DISABLED)
 			p = string(p, pend, " disabled", str_spec);
 	} else {
-- 
cgit v1.2.2


From 2cda2728aa1c8c006418a24f867b25e5eb7a32e2 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 15 Mar 2010 12:46:51 +0100
Subject: block: Fix overrun in lcm() and move it to lib

lcm() was defined to take integer-sized arguments.  The supplied
arguments are multiplied, however, causing us to overflow given
sufficiently large input.  That in turn led to incorrect optimal I/O
size reporting in some cases (RAID over RAID).

Switch lcm() over to unsigned long similar to gcd() and move the
function from blk-settings.c to lib.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/Makefile |  2 +-
 lib/lcm.c    | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 lib/lcm.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index 2e152aed7198..0d4015205c64 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o list_sort.o
+	 string_helpers.o gcd.o lcm.o list_sort.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 000000000000..157cd88a6ffc
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+
+/* Lowest common multiple */
+unsigned long lcm(unsigned long a, unsigned long b)
+{
+	if (a && b)
+		return (a * b) / gcd(a, b);
+	else if (b)
+		return b;
+
+	return a;
+}
+EXPORT_SYMBOL_GPL(lcm);
-- 
cgit v1.2.2


From 1d53661d26aa779dcd74f8a1c5a94e181cc101d8 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 23 Mar 2010 13:35:17 -0700
Subject: blackfin: enable DEBUG_SECTION_MISMATCH

We see only one section mismatch now after thousands of randconfigs, and a
bug has been filed about that one.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8e5ec5e1ab91..1fafb4b99c9b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -103,7 +103,8 @@ config HEADERS_CHECK
 
 config DEBUG_SECTION_MISMATCH
 	bool "Enable full Section mismatch analysis"
-	depends on UNDEFINED
+	depends on UNDEFINED || (BLACKFIN)
+	default y
 	# This option is on purpose disabled for now.
 	# It will be enabled when we are down to a reasonable number
 	# of section mismatch warnings (< 10 for an allyesconfig build)
-- 
cgit v1.2.2


From 1fb2f77c037624601fd214fb7c29faa84cd7bdd7 Mon Sep 17 00:00:00 2001
From: Henrik Kretzschmar <henne@nachtwindheim.de>
Date: Fri, 26 Mar 2010 20:38:35 +0100
Subject: debugobjects: Section mismatch cleanup

This patch marks two functions, which only get called at
initialization, as __init.

Here is also interesting, that modpost doesn't catch here the right
function name.

WARNING: lib/built-in.o(.text+0x585f): Section mismatch in reference
from the function T.506() to the variable .init.data:obj
The function T.506() references the variable __initdata obj.
This is often because T.506 lacks a __initdata annotation or the
annotation of obj is wrong.

Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de>
LKML-Reference: <1269632315-19403-1-git-send-email-henne@nachtwindheim.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 lib/debugobjects.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a9a8996d286a..c4ecd3ce7fd4 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -773,7 +773,7 @@ static int __init fixup_free(void *addr, enum debug_obj_state state)
 	}
 }
 
-static int
+static int __init
 check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
 {
 	struct debug_bucket *db;
@@ -916,7 +916,7 @@ void __init debug_objects_early_init(void)
 /*
  * Convert the statically allocated objects to dynamic ones:
  */
-static int debug_objects_replace_static_objects(void)
+static int __init debug_objects_replace_static_objects(void)
 {
 	struct debug_bucket *db = obj_hash;
 	struct hlist_node *node, *tmp;
-- 
cgit v1.2.2


From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Mar 2010 17:04:11 +0900
Subject: include cleanup: Update gfp.h and slab.h includes to prepare for
 breaking implicit slab.h inclusion from percpu.h

percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files.  percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.

percpu.h -> slab.h dependency is about to be removed.  Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability.  As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.

  http://userweb.kernel.org/~tj/misc/slabh-sweep.py

The script does the followings.

* Scan files for gfp and slab usages and update includes such that
  only the necessary includes are there.  ie. if only gfp is used,
  gfp.h, if slab is used, slab.h.

* When the script inserts a new include, it looks at the include
  blocks and try to put the new include such that its order conforms
  to its surrounding.  It's put in the include block which contains
  core kernel includes, in the same order that the rest are ordered -
  alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
  doesn't seem to be any matching order.

* If the script can't find a place to put a new include (mostly
  because the file doesn't have fitting include block), it prints out
  an error message indicating which .h file needs to be added to the
  file.

The conversion was done in the following steps.

1. The initial automatic conversion of all .c files updated slightly
   over 4000 files, deleting around 700 includes and adding ~480 gfp.h
   and ~3000 slab.h inclusions.  The script emitted errors for ~400
   files.

2. Each error was manually checked.  Some didn't need the inclusion,
   some needed manual addition while adding it to implementation .h or
   embedding .c file was more appropriate for others.  This step added
   inclusions to around 150 files.

3. The script was run again and the output was compared to the edits
   from #2 to make sure no file was left behind.

4. Several build tests were done and a couple of problems were fixed.
   e.g. lib/decompress_*.c used malloc/free() wrappers around slab
   APIs requiring slab.h to be added manually.

5. The script was run on all .h files but without automatically
   editing them as sprinkling gfp.h and slab.h inclusions around .h
   files could easily lead to inclusion dependency hell.  Most gfp.h
   inclusion directives were ignored as stuff from gfp.h was usually
   wildly available and often used in preprocessor macros.  Each
   slab.h inclusion directive was examined and added manually as
   necessary.

6. percpu.h was updated not to include slab.h.

7. Build test were done on the following configurations and failures
   were fixed.  CONFIG_GCOV_KERNEL was turned off for all tests (as my
   distributed build env didn't work with gcov compiles) and a few
   more options had to be turned off depending on archs to make things
   build (like ipr on powerpc/64 which failed due to missing writeq).

   * x86 and x86_64 UP and SMP allmodconfig and a custom test config.
   * powerpc and powerpc64 SMP allmodconfig
   * sparc and sparc64 SMP allmodconfig
   * ia64 SMP allmodconfig
   * s390 SMP allmodconfig
   * alpha SMP allmodconfig
   * um on x86_64 SMP allmodconfig

8. percpu.h modifications were reverted so that it could be applied as
   a separate patch and serve as bisection point.

Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
---
 lib/cpumask.c        | 1 +
 lib/crc32.c          | 1 -
 lib/debugobjects.c   | 1 +
 lib/devres.c         | 1 +
 lib/dynamic_debug.c  | 1 +
 lib/genalloc.c       | 1 +
 lib/inflate.c        | 1 +
 lib/kasprintf.c      | 1 +
 lib/kobject_uevent.c | 1 +
 lib/kref.c           | 1 +
 lib/radix-tree.c     | 1 -
 lib/scatterlist.c    | 1 +
 lib/swiotlb.c        | 1 +
 lib/textsearch.c     | 1 +
 14 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 7bb4142a502f..05d6aca7fc19 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -1,3 +1,4 @@
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
diff --git a/lib/crc32.c b/lib/crc32.c
index 0f45fbff34cb..bc5b936e9142 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -25,7 +25,6 @@
 #include <linux/module.h>
 #include <linux/compiler.h>
 #include <linux/types.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 #include <asm/atomic.h>
 #include "crc32defs.h"
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a9a8996d286a..b862b30369ff 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
 #include <linux/hash.h>
 
 #define ODEBUG_HASH_BITS	14
diff --git a/lib/devres.c b/lib/devres.c
index 72c8909006da..49368608f988 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,5 +1,6 @@
 #include <linux/pci.h>
 #include <linux/io.h>
+#include <linux/gfp.h>
 #include <linux/module.h>
 
 void devm_ioremap_release(struct device *dev, void *res)
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index f93502915988..d6b8b9b1abfe 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -25,6 +25,7 @@
 #include <linux/uaccess.h>
 #include <linux/dynamic_debug.h>
 #include <linux/debugfs.h>
+#include <linux/slab.h>
 
 extern struct _ddebug __start___verbose[];
 extern struct _ddebug __stop___verbose[];
diff --git a/lib/genalloc.c b/lib/genalloc.c
index e67f97495dd5..736c3b06398e 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -10,6 +10,7 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/bitmap.h>
 #include <linux/genalloc.h>
diff --git a/lib/inflate.c b/lib/inflate.c
index d10255973a9f..677b738c2204 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -103,6 +103,7 @@
       the two sets of lengths.
  */
 #include <linux/compiler.h>
+#include <linux/slab.h>
 
 #ifdef RCSID
 static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #";
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
index c5ff1fd10030..9c4233b23783 100644
--- a/lib/kasprintf.c
+++ b/lib/kasprintf.c
@@ -6,6 +6,7 @@
 
 #include <stdarg.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/string.h>
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index c9d3a3e8405d..7b48d44ced6e 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -18,6 +18,7 @@
 #include <linux/string.h>
 #include <linux/kobject.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include <linux/socket.h>
 #include <linux/skbuff.h>
diff --git a/lib/kref.c b/lib/kref.c
index 9ecd6e865610..6d19f690380b 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -13,6 +13,7 @@
 
 #include <linux/kref.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 
 /**
  * kref_set - initialize object and set refcount to requested number.
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 6b9670d6bbf9..0871582aa29d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -28,7 +28,6 @@
 #include <linux/slab.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
-#include <linux/gfp.h>
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/rcupdate.h>
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 0d475d8167bf..9afa25b52a83 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -7,6 +7,7 @@
  * Version 2. See the file COPYING for more details.
  */
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/scatterlist.h>
 #include <linux/highmem.h>
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 437eedb5a53b..5fddf720da73 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/highmem.h>
+#include <linux/gfp.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 9fbcb44c554f..d608331b3e47 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -103,6 +103,7 @@
 #include <linux/rcupdate.h>
 #include <linux/err.h>
 #include <linux/textsearch.h>
+#include <linux/slab.h>
 
 static LIST_HEAD(ts_ops);
 static DEFINE_SPINLOCK(ts_mod_lock);
-- 
cgit v1.2.2


From 1527bc8b928dd1399c3d3467dd47d9ede210978a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 1 Feb 2010 15:03:07 +0100
Subject: bitops: Optimize hweight() by making use of compile-time evaluation

Rename the extisting runtime hweight() implementations to
__arch_hweight(), rename the compile-time versions to __const_hweight()
and then have hweight() pick between them.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100318111929.GB11152@aftab>
Acked-by: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <1265028224.24455.154.camel@laptop>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/hweight.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'lib')

diff --git a/lib/hweight.c b/lib/hweight.c
index 63ee4eb1228d..a6927e76840f 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,7 +9,7 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
-unsigned int hweight32(unsigned int w)
+unsigned int __arch_hweight32(unsigned int w)
 {
 #ifdef ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x55555555;
@@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w)
 	return (res + (res >> 16)) & 0x000000FF;
 #endif
 }
-EXPORT_SYMBOL(hweight32);
+EXPORT_SYMBOL(__arch_hweight32);
 
-unsigned int hweight16(unsigned int w)
+unsigned int __arch_hweight16(unsigned int w)
 {
 	unsigned int res = w - ((w >> 1) & 0x5555);
 	res = (res & 0x3333) + ((res >> 2) & 0x3333);
 	res = (res + (res >> 4)) & 0x0F0F;
 	return (res + (res >> 8)) & 0x00FF;
 }
-EXPORT_SYMBOL(hweight16);
+EXPORT_SYMBOL(__arch_hweight16);
 
-unsigned int hweight8(unsigned int w)
+unsigned int __arch_hweight8(unsigned int w)
 {
 	unsigned int res = w - ((w >> 1) & 0x55);
 	res = (res & 0x33) + ((res >> 2) & 0x33);
 	return (res + (res >> 4)) & 0x0F;
 }
-EXPORT_SYMBOL(hweight8);
+EXPORT_SYMBOL(__arch_hweight8);
 
-unsigned long hweight64(__u64 w)
+unsigned long __arch_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
-	return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+	return __arch_hweight32((unsigned int)(w >> 32)) +
+	       __arch_hweight32((unsigned int)w);
 #elif BITS_PER_LONG == 64
 #ifdef ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x5555555555555555ul;
@@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w)
 #endif
 #endif
 }
-EXPORT_SYMBOL(hweight64);
+EXPORT_SYMBOL(__arch_hweight64);
-- 
cgit v1.2.2


From d61931d89be506372d01a90d1755f6d0a9fafe2d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 5 Mar 2010 17:34:46 +0100
Subject: x86: Add optimized popcnt variants

Add support for the hardware version of the Hamming weight function,
popcnt, present in CPUs which advertize it under CPUID, Function
0x0000_0001_ECX[23]. On CPUs which don't support it, we fallback to the
default lib/hweight.c sw versions.

A synthetic benchmark comparing popcnt with __sw_hweight64 showed almost
a 3x speedup on a F10h machine.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <20100318112015.GC11152@aftab>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 lib/Makefile  |  3 +++
 lib/hweight.c | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index 2e152aed7198..abe63a8ad143 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -39,7 +39,10 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
+
+CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
diff --git a/lib/hweight.c b/lib/hweight.c
index a6927e76840f..3c79d50814cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,7 +9,7 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
-unsigned int __arch_hweight32(unsigned int w)
+unsigned int __sw_hweight32(unsigned int w)
 {
 #ifdef ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x55555555;
@@ -24,30 +24,30 @@ unsigned int __arch_hweight32(unsigned int w)
 	return (res + (res >> 16)) & 0x000000FF;
 #endif
 }
-EXPORT_SYMBOL(__arch_hweight32);
+EXPORT_SYMBOL(__sw_hweight32);
 
-unsigned int __arch_hweight16(unsigned int w)
+unsigned int __sw_hweight16(unsigned int w)
 {
 	unsigned int res = w - ((w >> 1) & 0x5555);
 	res = (res & 0x3333) + ((res >> 2) & 0x3333);
 	res = (res + (res >> 4)) & 0x0F0F;
 	return (res + (res >> 8)) & 0x00FF;
 }
-EXPORT_SYMBOL(__arch_hweight16);
+EXPORT_SYMBOL(__sw_hweight16);
 
-unsigned int __arch_hweight8(unsigned int w)
+unsigned int __sw_hweight8(unsigned int w)
 {
 	unsigned int res = w - ((w >> 1) & 0x55);
 	res = (res & 0x33) + ((res >> 2) & 0x33);
 	return (res + (res >> 4)) & 0x0F;
 }
-EXPORT_SYMBOL(__arch_hweight8);
+EXPORT_SYMBOL(__sw_hweight8);
 
-unsigned long __arch_hweight64(__u64 w)
+unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
-	return __arch_hweight32((unsigned int)(w >> 32)) +
-	       __arch_hweight32((unsigned int)w);
+	return __sw_hweight32((unsigned int)(w >> 32)) +
+	       __sw_hweight32((unsigned int)w);
 #elif BITS_PER_LONG == 64
 #ifdef ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x5555555555555555ul;
@@ -64,4 +64,4 @@ unsigned long __arch_hweight64(__u64 w)
 #endif
 #endif
 }
-EXPORT_SYMBOL(__arch_hweight64);
+EXPORT_SYMBOL(__sw_hweight64);
-- 
cgit v1.2.2


From 47c4c864af60d14926f4017d23968a8341b8ab9f Mon Sep 17 00:00:00 2001
From: Michal Simek <monstr@monstr.eu>
Date: Tue, 6 Apr 2010 10:19:30 +0200
Subject: microblaze: Enable memory leak detector

Enable DEBUG_KMEMLEAK for microblaze

Signed-off-by: Michal Simek <monstr@monstr.eu>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1fafb4b99c9b..ff017108700d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -356,7 +356,7 @@ config SLUB_STATS
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
 	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
-		(X86 || ARM || PPC || S390 || SUPERH)
+		(X86 || ARM || PPC || S390 || SUPERH || MICROBLAZE)
 
 	select DEBUG_FS if SYSFS
 	select STACKTRACE if STACKTRACE_SUPPORT
-- 
cgit v1.2.2


From 39a37ce1cc5eef420604fa68b776ff5dab400340 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 6 Apr 2010 19:45:12 +0300
Subject: dma-debug: Cleanup for copy-loop in filter_write()

Earlier in this function we set the last byte of "buf" to NULL so we
always hit the break statement and "i" is never equal to NAME_MAX_LEN.
This patch doesn't change how the driver works but it silences a Smatch
warning and it makes it clearer that we don't write past the end of the
array.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 lib/dma-debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index ba8b67039d13..01e64270e246 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -570,7 +570,7 @@ static ssize_t filter_write(struct file *file, const char __user *userbuf,
 	 * Now parse out the first token and use it as the name for the
 	 * driver to filter for.
 	 */
-	for (i = 0; i < NAME_MAX_LEN; ++i) {
+	for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
 		current_driver_name[i] = buf[i];
 		if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
 			break;
-- 
cgit v1.2.2


From 2a7268abc48c8009b284bd88605d14fcb65772ec Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Tue, 6 Apr 2010 14:35:01 -0700
Subject: ratelimit: annotate ___ratelimit()

To prevent from wrongly using the return value.

[akpm@linux-foundation.org: fix spello]
Signed-off-by: Yong Zhang <yong.zhang@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/ratelimit.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 09f5ce1810dc..b4cd24ff9b29 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -16,9 +16,14 @@
 /*
  * __ratelimit - rate limiting
  * @rs: ratelimit_state data
+ * @func: name of calling function
  *
- * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
- * in every @rs->ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->burst callbacks
+ * in every @rs->interval
+ *
+ * RETURNS:
+ * 0 means callbacks will be suppressed.
+ * 1 means go ahead and do it.
  */
 int ___ratelimit(struct ratelimit_state *rs, const char *func)
 {
-- 
cgit v1.2.2


From 57119c34e53bbb8d244c3ff1335ef5145768538f Mon Sep 17 00:00:00 2001
From: Yong Zhang <yong.zhang@windriver.com>
Date: Tue, 6 Apr 2010 14:35:03 -0700
Subject: ratelimit: fix the return value when __ratelimit() fails to acquire
 the lock

The log of commit edaac8e3167501cda336231d00611bf59c164346 ("ratelimit:
Fix/allow use in atomic contexts"), indicates that we want to suppress the
callback when the trylock fails.

Signed-off-by: Yong Zhang <yong.zhang@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/ratelimit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index b4cd24ff9b29..027a03f4c56d 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -40,7 +40,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
 	 * the entity that is holding the lock already:
 	 */
 	if (!spin_trylock_irqsave(&rs->lock, flags))
-		return 1;
+		return 0;
 
 	if (!rs->begin)
 		rs->begin = jiffies;
-- 
cgit v1.2.2


From 3eac4abaa69949af0e2f64e5c55ee8a22bbdd3e7 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 7 Apr 2010 11:52:46 -0700
Subject: rwsem generic spinlock: use IRQ save/restore spinlocks

rwsems can be used with IRQs disabled, particularily in early boot
before IRQs are enabled.  Currently the spin_unlock_irq() usage in the
slow-patch will unconditionally enable interrupts and cause problems
since interrupts are not yet initialized or enabled.

This patch uses save/restore versions of IRQ spinlocks in the slowpath
to ensure interrupts are not unintentionally disabled.

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/rwsem-spinlock.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index ccf95bff7984..ffc9fc7f3b05 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -143,13 +143,14 @@ void __sched __down_read(struct rw_semaphore *sem)
 {
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
+	unsigned long flags;
 
-	spin_lock_irq(&sem->wait_lock);
+	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
 		/* granted */
 		sem->activity++;
-		spin_unlock_irq(&sem->wait_lock);
+		spin_unlock_irqrestore(&sem->wait_lock, flags);
 		goto out;
 	}
 
@@ -164,7 +165,7 @@ void __sched __down_read(struct rw_semaphore *sem)
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irq(&sem->wait_lock);
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -209,13 +210,14 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
+	unsigned long flags;
 
-	spin_lock_irq(&sem->wait_lock);
+	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
 		/* granted */
 		sem->activity = -1;
-		spin_unlock_irq(&sem->wait_lock);
+		spin_unlock_irqrestore(&sem->wait_lock, flags);
 		goto out;
 	}
 
@@ -230,7 +232,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irq(&sem->wait_lock);
+	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
 	/* wait to be given the lock */
 	for (;;) {
-- 
cgit v1.2.2


From ce82653d6cfcc95ba88c25908664878459fb1b8d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 6 Apr 2010 22:36:20 +0100
Subject: radix_tree_tag_get() is not as safe as the docs make out [ver #2]

radix_tree_tag_get() is not safe to use concurrently with radix_tree_tag_set()
or radix_tree_tag_clear().  The problem is that the double tag_get() in
radix_tree_tag_get():

		if (!tag_get(node, tag, offset))
			saw_unset_tag = 1;
		if (height == 1) {
			int ret = tag_get(node, tag, offset);

may see the value change due to the action of set/clear.  RCU is no protection
against this as no pointers are being changed, no nodes are being replaced
according to a COW protocol - set/clear alter the node directly.

The documentation in linux/radix-tree.h, however, says that
radix_tree_tag_get() is an exception to the rule that "any function modifying
the tree or tags (...) must exclude other modifications, and exclude any
functions reading the tree".

The problem is that the next statement in radix_tree_tag_get() checks that the
tag doesn't vary over time:

			BUG_ON(ret && saw_unset_tag);

This has been seen happening in FS-Cache:

	https://www.redhat.com/archives/linux-cachefs/2010-April/msg00013.html

To this end, remove the BUG_ON() from radix_tree_tag_get() and note in various
comments that the value of the tag may change whilst the RCU read lock is held,
and thus that the return value of radix_tree_tag_get() may not be relied upon
unless radix_tree_tag_set/clear() and radix_tree_delete() are excluded from
running concurrently with it.

Reported-by: Romain DEGEZ <romain.degez@smartjog.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 0871582aa29d..2a087e0f9863 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -555,6 +555,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
  *
  *  0: tag not present or not set
  *  1: tag set
+ *
+ * Note that the return value of this function may not be relied on, even if
+ * the RCU lock is held, unless tag modification and node deletion are excluded
+ * from concurrency.
  */
 int radix_tree_tag_get(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag)
@@ -595,12 +599,8 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 		 */
 		if (!tag_get(node, tag, offset))
 			saw_unset_tag = 1;
-		if (height == 1) {
-			int ret = tag_get(node, tag, offset);
-
-			BUG_ON(ret && saw_unset_tag);
-			return !!ret;
-		}
+		if (height == 1)
+			return !!tag_get(node, tag, offset);
 		node = rcu_dereference_raw(node->slots[offset]);
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
-- 
cgit v1.2.2


From 8b8d8e2840a440d62e8dc0ef36ba433b26f70d32 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 9 Apr 2010 00:14:35 -0700
Subject: sparc64: Support kmemleak.

Only missing thing was an _sdata marker in vmlinux.lds.S

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1fafb4b99c9b..6c2be6089559 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -356,7 +356,7 @@ config SLUB_STATS
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
 	depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
-		(X86 || ARM || PPC || S390 || SUPERH)
+		(X86 || ARM || PPC || S390 || SPARC64 || SUPERH)
 
 	select DEBUG_FS if SYSFS
 	select STACKTRACE if STACKTRACE_SUPPORT
-- 
cgit v1.2.2


From 4e310fda91cb095915395f811d10b2c900c9589e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 14 Apr 2010 09:27:40 -0700
Subject: vsprintf: Change struct printf_spec.precision from s8 to s16

Commit ef0658f3de484bf9b173639cd47544584e01efa5 changed precision
from int to s8.

There is existing kernel code that uses a larger precision.

An example from the audit code:
	vsnprintf(...,..., " msg='%.1024s'", (char *)data);
which overflows precision and truncates to nothing.

Extending precision size fixes the audit system issue.

Other changes:

Change the size of the struct printf_spec.type from u16 to u8 so
sizeof(struct printf_spec) stays as small as possible.
Reorder the struct members so sizeof(struct printf_spec) remains 64 bits
without alignment holes.
Document the struct members a bit more.

Original-patch-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Joe Perches <joe@perches.com>
Tested-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 24112e5a5780..7376b7c55ffe 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -408,12 +408,12 @@ enum format_type {
 };
 
 struct printf_spec {
-	u16	type;
-	s16	field_width;	/* width of output field */
+	u8	type;		/* format_type enum */
 	u8	flags;		/* flags to number() */
-	u8	base;
-	s8	precision;	/* # of digits/chars */
-	u8	qualifier;
+	u8	base;		/* number base, 8, 10 or 16 only */
+	u8	qualifier;	/* number qualifier, one of 'hHlLtzZ' */
+	s16	field_width;	/* width of output field */
+	s16	precision;	/* # of digits/chars */
 };
 
 static char *number(char *buf, char *end, unsigned long long num,
-- 
cgit v1.2.2


From e59464c735db19619cde2aa331609adb02005f5b Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 23 Apr 2010 13:17:45 -0400
Subject: flex_array: fix the panic when calling flex_array_alloc() without
 __GFP_ZERO

memset() is called with the wrong address and the kernel panics.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Acked-by: David Rientjes <rientjes@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/flex_array.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/flex_array.c b/lib/flex_array.c
index 66eef2e4483e..41b1804fa728 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -99,7 +99,7 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
 	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
-		memset(ret->parts[0], FLEX_ARRAY_FREE,
+		memset(&ret->parts[0], FLEX_ARRAY_FREE,
 						FLEX_ARRAY_BASE_BYTES_LEFT);
 	return ret;
 }
-- 
cgit v1.2.2


From ccdb40048b2972f10bdc944913c0e0ee26b5d1f2 Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Fri, 23 Apr 2010 13:17:58 -0400
Subject: lib: fix the use of LZO to decompress initramfs images

This patch fixes 2 issues with the LZO decompressor:

- It doesn't handle the case where a block isn't compressed at all.  In
  this case, calling lzo1x_decompress_safe will fail, so we need to just
  use memcpy() instead (the upstream LZO code does something similar)

- Since commit 54291362d2a5738e1b0495df2abcb9e6b0563a3f ("initramfs: add
  missing decompressor error check") , the decompressor return code is
  checked in the init/initramfs.c The LZO decompressor didn't return the
  expected value, causing the initramfs code to falsely believe a
  decompression error occured

Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Tested-by: bert schulze <spambemyguest@googlemail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/decompress_unlzo.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index db521f45626e..bcb3a4bd68ff 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c
@@ -97,7 +97,7 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
 	u32 src_len, dst_len;
 	size_t tmp;
 	u8 *in_buf, *in_buf_save, *out_buf;
-	int obytes_processed = 0;
+	int ret = -1;
 
 	set_error_fn(error_fn);
 
@@ -174,15 +174,22 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
 
 		/* decompress */
 		tmp = dst_len;
-		r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+
+		/* When the input data is not compressed at all,
+		 * lzo1x_decompress_safe will fail, so call memcpy()
+		 * instead */
+		if (unlikely(dst_len == src_len))
+			memcpy(out_buf, in_buf, src_len);
+		else {
+			r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
 						out_buf, &tmp);
 
-		if (r != LZO_E_OK || dst_len != tmp) {
-			error("Compressed data violation");
-			goto exit_2;
+			if (r != LZO_E_OK || dst_len != tmp) {
+				error("Compressed data violation");
+				goto exit_2;
+			}
 		}
 
-		obytes_processed += dst_len;
 		if (flush)
 			flush(out_buf, dst_len);
 		if (output)
@@ -196,6 +203,7 @@ STATIC inline int INIT unlzo(u8 *input, int in_len,
 			in_buf += src_len;
 	}
 
+	ret = 0;
 exit_2:
 	if (!input)
 		free(in_buf);
@@ -203,7 +211,7 @@ exit_1:
 	if (!output)
 		free(out_buf);
 exit:
-	return obytes_processed;
+	return ret;
 }
 
 #define decompress unlzo
-- 
cgit v1.2.2


From 98d5ce0d0044666fc85a01915a1d22407eb546fd Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 23 Apr 2010 13:18:04 -0400
Subject: lib/vsprintf.c: add missing EXPORT_SYMBOL(simple_strtoll)

Add a missing EXPORT_SYMBOL.

I must be the first person that wants to use this function :-)

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 7376b7c55ffe..46d34b0b74a8 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -118,6 +118,7 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 
 	return simple_strtoull(cp, endp, base);
 }
+EXPORT_SYMBOL(simple_strtoll);
 
 /**
  * strict_strtoul - convert a string to an unsigned long strictly
-- 
cgit v1.2.2


From 2b3fc35f6919344e3cf722dde8308f47235c0b70 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 20 Apr 2010 16:23:07 +0800
Subject: rcu: optionally leave lockdep enabled after RCU lockdep splat

There is no need to disable lockdep after an RCU lockdep splat,
so remove the debug_lockdeps_off() from lockdep_rcu_dereference().
To avoid repeated lockdep splats, use a static variable in the inlined
rcu_dereference_check() and rcu_dereference_protected() macros so that
a given instance splats only once, but so that multiple instances can
be detected per boot.

This is controlled by a new config variable CONFIG_PROVE_RCU_REPEATEDLY,
which is disabled by default.  This provides the normal lockdep behavior
by default, but permits people who want to find multiple RCU-lockdep
splats per boot to easily do so.

Requested-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Tested-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 lib/Kconfig.debug | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 935248bdbc47..94090b4bb7d2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -512,6 +512,18 @@ config PROVE_RCU
 
 	 Say N if you are unsure.
 
+config PROVE_RCU_REPEATEDLY
+	bool "RCU debugging: don't disable PROVE_RCU on first splat"
+	depends on PROVE_RCU
+	default n
+	help
+	 By itself, PROVE_RCU will disable checking upon issuing the
+	 first warning (or "splat").  This feature prevents such
+	 disabling, allowing multiple RCU-lockdep warnings to be printed
+	 on a single reboot.
+
+	 Say N if you are unsure.
+
 config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-- 
cgit v1.2.2


From 55ec936ff4e57cc626db336a7bf33b267390e9b4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 13 Apr 2010 12:22:33 -0700
Subject: rcu: enable CPU_STALL_VERBOSE by default

The CPU_STALL_VERBOSE kernel configuration parameter was added to
2.6.34 to identify any preempted/blocked tasks that were preventing
the current grace period from completing when running preemptible
RCU.  As is conventional for new configurations parameters, this
defaulted disabled.  It is now time to enable it by default.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 94090b4bb7d2..930a9e5eae08 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -805,7 +805,7 @@ config RCU_CPU_STALL_DETECTOR
 config RCU_CPU_STALL_VERBOSE
 	bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
 	depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
-	default n
+	default y
 	help
 	  This option causes RCU to printk detailed per-task information
 	  for any tasks that are stalling the current RCU grace period.
-- 
cgit v1.2.2


From a5d8e467f83f6672104f276223a88e3b50cbd375 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 17 Apr 2010 08:48:38 -0400
Subject: Debugobjects transition check

Implement a basic state machine checker in the debugobjects.

This state machine checker detects races and inconsistencies within the "active"
life of a debugobject. The checker only keeps track of the current state; all
the state machine logic is kept at the object instance level.

The checker works by adding a supplementary "unsigned int astate" field to the
debug_obj structure. It keeps track of the current "active state" of the object.

The only constraints that are imposed on the states by the debugobjects system
is that:

- activation of an object sets the current active state to 0,
- deactivation of an object expects the current active state to be 0.

For the rest of the states, the state mapping is determined by the specific
object instance. Therefore, the logic keeping track of the state machine is
within the specialized instance, without any need to know about it at the
debugobject level.

The current object active state is changed by calling:

debug_object_active_state(addr, descr, expect, next)

where "expect" is the expected state and "next" is the next state to move to if
the expected state is found. A warning is generated if the expected is not
found.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David S. Miller <davem@davemloft.net>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: akpm@linux-foundation.org
CC: mingo@elte.hu
CC: laijs@cn.fujitsu.com
CC: dipankar@in.ibm.com
CC: josh@joshtriplett.org
CC: dvhltc@us.ibm.com
CC: niv@us.ibm.com
CC: peterz@infradead.org
CC: rostedt@goodmis.org
CC: Valdis.Kletnieks@vt.edu
CC: dhowells@redhat.com
CC: eric.dumazet@gmail.com
CC: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 lib/debugobjects.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index b862b30369ff..076464fd2072 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -141,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
 		obj->object = addr;
 		obj->descr  = descr;
 		obj->state  = ODEBUG_STATE_NONE;
+		obj->astate = 0;
 		hlist_del(&obj->node);
 
 		hlist_add_head(&obj->node, &b->list);
@@ -252,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
 
 	if (limit < 5 && obj->descr != descr_test) {
 		limit++;
-		WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg,
-		       obj_states[obj->state], obj->descr->name);
+		WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
+				 "object type: %s\n",
+			msg, obj_states[obj->state], obj->astate,
+			obj->descr->name);
 	}
 	debug_objects_warnings++;
 }
@@ -447,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
 		case ODEBUG_STATE_INIT:
 		case ODEBUG_STATE_INACTIVE:
 		case ODEBUG_STATE_ACTIVE:
-			obj->state = ODEBUG_STATE_INACTIVE;
+			if (!obj->astate)
+				obj->state = ODEBUG_STATE_INACTIVE;
+			else
+				debug_print_object(obj, "deactivate");
 			break;
 
 		case ODEBUG_STATE_DESTROYED:
@@ -553,6 +559,53 @@ out_unlock:
 	raw_spin_unlock_irqrestore(&db->lock, flags);
 }
 
+/**
+ * debug_object_active_state - debug checks object usage state machine
+ * @addr:	address of the object
+ * @descr:	pointer to an object specific debug description structure
+ * @expect:	expected state
+ * @next:	state to move to if expected state is found
+ */
+void
+debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+			  unsigned int expect, unsigned int next)
+{
+	struct debug_bucket *db;
+	struct debug_obj *obj;
+	unsigned long flags;
+
+	if (!debug_objects_enabled)
+		return;
+
+	db = get_bucket((unsigned long) addr);
+
+	raw_spin_lock_irqsave(&db->lock, flags);
+
+	obj = lookup_object(addr, db);
+	if (obj) {
+		switch (obj->state) {
+		case ODEBUG_STATE_ACTIVE:
+			if (obj->astate == expect)
+				obj->astate = next;
+			else
+				debug_print_object(obj, "active_state");
+			break;
+
+		default:
+			debug_print_object(obj, "active_state");
+			break;
+		}
+	} else {
+		struct debug_obj o = { .object = addr,
+				       .state = ODEBUG_STATE_NOTAVAILABLE,
+				       .descr = descr };
+
+		debug_print_object(&o, "active_state");
+	}
+
+	raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
 #ifdef CONFIG_DEBUG_OBJECTS_FREE
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
-- 
cgit v1.2.2


From 91af70814105f4c05e6e11b51c3269907b71794b Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Wed, 12 May 2010 11:38:45 +0100
Subject: rwsem: Test for no active locks in __rwsem_do_wake undo code

If there are no active threasd using a semaphore, it is always correct
to unqueue blocked threads.  This seems to be what was intended in the
undo code.

What was done instead, was to look for a sem count of zero - this is an
impossible situation, given that at least one thread is known to be
queued on the semaphore.  The code might be correct as written, but it's
hard to reason about and it's not what was intended (otherwise the goto
out would have been unconditional).

Go for checking the active count - the alternative is not worth the
headache.

Signed-off-by: Michel Lespinasse <walken@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/rwsem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/rwsem.c b/lib/rwsem.c
index 3e3365e5665e..ceba8e28807a 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -136,9 +136,10 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
  out:
 	return sem;
 
-	/* undo the change to count, but check for a transition 1->0 */
+	/* undo the change to the active count, but check for a transition
+	 * 1->0 */
  undo:
-	if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0)
+	if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) & RWSEM_ACTIVE_MASK)
 		goto out;
 	goto try_again;
 }
-- 
cgit v1.2.2


From 43aa7ac736a4e21aae4882bd8f7c67403aed45b8 Mon Sep 17 00:00:00 2001
From: "kirjanov@gmail.com" <kirjanov@gmail.com>
Date: Sat, 15 May 2010 12:32:34 -0400
Subject: lib/btree: fix possible NULL pointer dereference

mempool_alloc() can return null in atomic case.

Signed-off-by: Denis Kirjanov <kirjanov@gmail.com>
Cc: Joern Engel <joern@logfs.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/btree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/btree.c b/lib/btree.c
index 41859a820218..c9c6f0351526 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -95,7 +95,8 @@ static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
 	unsigned long *node;
 
 	node = mempool_alloc(head->mempool, gfp);
-	memset(node, 0, NODESIZE);
+	if (likely(node))
+		memset(node, 0, NODESIZE);
 	return node;
 }
 
-- 
cgit v1.2.2


From b2be05273a1744d175bf4b67f6665637bb9ac7a8 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 3 Apr 2010 19:34:56 +0100
Subject: panic: Allow warnings to set different taint flags

WARN() is used in some places to report firmware or hardware bugs that
are then worked-around.  These bugs do not affect the stability of the
kernel and should not set the flag for TAINT_WARN.  To allow for this,
add WARN_TAINT() and WARN_TAINT_ONCE() macros that take a taint number
as argument.

Architectures that implement warnings using trap instructions instead
of calls to warn_slowpath_*() now implement __WARN_TAINT(taint)
instead of __WARN().

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Acked-by: Helge Deller <deller@gmx.de>
Tested-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 lib/bug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/bug.c b/lib/bug.c
index 300e41afbf97..f13daf435211 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -165,7 +165,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 			       (void *)bugaddr);
 
 		show_regs(regs);
-		add_taint(TAINT_WARN);
+		add_taint(BUG_GET_TAINT(bug));
 		return BUG_TRAP_TYPE_WARN;
 	}
 
-- 
cgit v1.2.2


From fab1c23242528771a955c475ef23d99156a71a7f Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Tue, 18 May 2010 14:35:18 +0800
Subject: Unified UUID/GUID definition

There are many different UUID/GUID definitions in kernel, such as that
in EFI, many file systems, some drivers, etc. Every kernel components
need UUID/GUID has its own definition. This patch provides a unified
definition for UUID/GUID.

UUID is defined via typedef. This makes that UUID appears more like a
preliminary type, and makes the data type explicit (comparing with
implicit "u8 uuid[16]").

The binary representation of UUID/GUID can be little-endian (used by
EFI, etc) or big-endian (defined by RFC4122), so both is defined.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 lib/Makefile |  2 +-
 lib/uuid.c   | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 lib/uuid.c

(limited to 'lib')

diff --git a/lib/Makefile b/lib/Makefile
index 0d4015205c64..f3eb6e8766be 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o lcm.o list_sort.o
+	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 000000000000..8fadd7cef46c
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,53 @@
+/*
+ * Unified UUID/GUID definition
+ *
+ * Copyright (C) 2009, Intel Corp.
+ *	Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uuid.h>
+#include <linux/random.h>
+
+static void __uuid_gen_common(__u8 b[16])
+{
+	int i;
+	u32 r;
+
+	for (i = 0; i < 4; i++) {
+		r = random32();
+		memcpy(b + i * 4, &r, 4);
+	}
+	/* reversion 0b10 */
+	b[8] = (b[8] & 0x3F) | 0x80;
+}
+
+void uuid_le_gen(uuid_le *lu)
+{
+	__uuid_gen_common(lu->b);
+	/* version 4 : random generation */
+	lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_le_gen);
+
+void uuid_be_gen(uuid_be *bu)
+{
+	__uuid_gen_common(bu->b);
+	/* version 4 : random generation */
+	bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_be_gen);
-- 
cgit v1.2.2


From dcc7871128e99458ca86186b7bc8bf27ff0c47b5 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:21 -0500
Subject: kgdb: core changes to support kdb

These are the minimum changes to the kgdb core in order to enable an
API to connect a new front end (kdb) to the debug core.

This patch introduces the dbg_kdb_mode variable controls where the
user level I/O is routed.  It will be routed to the gdbstub (kgdb) or
to the kdb front end which is a simple shell available over the kgdboc
connection.

You can switch back and forth between kdb or the gdb stub mode of
operation dynamically.  From gdb stub mode you can blindly type
"$3#33", or from the kdb mode you can enter "kgdb" to switch to the
gdb stub.

The logic in the debug core depends on kdb to look for the typical gdb
connection sequences and return immediately with KGDB_PASS_EVENT if a
gdb serial command sequence is detected.  That should allow a
reasonably seamless transition between kdb -> gdb without leaving the
kernel exception state.  The two gdb serial queries that kdb is
responsible for detecting are the "?" and "qSupported" packets.

CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Martin Hicks <mort@sgi.com>
---
 lib/Kconfig.kgdb | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 9b5d1d7f2ef7..78de43a5e902 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB
 	bool
 
 menuconfig KGDB
-	bool "KGDB: kernel debugging with remote gdb"
+	bool "KGDB: kernel debugger"
 	depends on HAVE_ARCH_KGDB
 	depends on DEBUG_KERNEL && EXPERIMENTAL
 	help
@@ -57,4 +57,10 @@ config KGDB_TESTS_BOOT_STRING
 	  information about other strings you could use beyond the
 	  default of V1F100.
 
+config KGDB_KDB
+	bool "KGDB_KDB: include kdb frontend for kgdb"
+	default n
+	help
+	  KDB frontend for kernel
+
 endif # KGDB
-- 
cgit v1.2.2


From ada64e4c98eb5f04a9ca223c5ff9e7ac22ce6404 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:24 -0500
Subject: kgdboc,keyboard: Keyboard driver for kdb with kgdb

This patch adds in the kdb PS/2 keyboard driver.  This was mostly a
direct port from the original kdb where I cleaned up the code against
checkpatch.pl and added the glue to stitch it into kgdb.

This patch also enables early kdb debug via kgdbwait and the keyboard.

All the access to configure kdb using either a serial console or the
keyboard is done via kgdboc.

If you want to use only the keyboard and want to break in early you
would add to your kernel command arguments:

    kgdboc=kbd kgdbwait

If you wanted serial and or the keyboard access you could use:

    kgdboc=kbd,ttyS0

You can also configure kgdboc as a kernel module or at run time with
the sysfs where you can activate and deactivate kgdb.

Turn it on:
    echo kbd,ttyS0 > /sys/module/kgdboc/parameters/kgdboc

Turn it off:
    echo "" > /sys/module/kgdboc/parameters/kgdboc

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 lib/Kconfig.kgdb | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 78de43a5e902..ee8ae7132f20 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -63,4 +63,11 @@ config KGDB_KDB
 	help
 	  KDB frontend for kernel
 
+config KDB_KEYBOARD
+	bool "KGDB_KDB: keyboard as input device"
+	depends on VT && KGDB_KDB
+	default n
+	help
+	  KDB can use a PS/2 type keyboard for an input device
+
 endif # KGDB
-- 
cgit v1.2.2


From f503b5ae53cb557ac351a668fcac1baab1cef0db Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:25 -0500
Subject: x86,kgdb: Add low level debug hook

The only way the debugger can handle a trap in inside rcu_lock,
notify_die, or atomic_notifier_call_chain without a triple fault is
to have a low level "first opportunity handler" in the int3 exception
handler.

Generally this will be something the vast majority of folks will not
need, but for those who need it, it is added as a kernel .config
option called KGDB_LOW_LEVEL_TRAP.

CC: Ingo Molnar <mingo@elte.hu>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: H. Peter Anvin <hpa@zytor.com>
CC: x86@kernel.org
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 lib/Kconfig.kgdb | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index ee8ae7132f20..c56ccb4ad292 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -57,6 +57,15 @@ config KGDB_TESTS_BOOT_STRING
 	  information about other strings you could use beyond the
 	  default of V1F100.
 
+config KGDB_LOW_LEVEL_TRAP
+       bool "KGDB: Allow debugging with traps in notifiers"
+       depends on X86
+       default n
+       help
+         This will add an extra call back to kgdb for the breakpoint
+         exception handler on which will will allow kgdb to step
+         through a notify handler.
+
 config KGDB_KDB
 	bool "KGDB_KDB: include kdb frontend for kgdb"
 	default n
-- 
cgit v1.2.2


From 5dd11d5d47d248850c58292513f0e164ba98b01e Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:26 -0500
Subject: mips,kgdb: kdb low level trap catch and stack trace

The only way the debugger can handle a trap in inside rcu_lock,
notify_die, or atomic_notifier_call_chain without a recursive fault is
to have a low level "first opportunity handler" do_trap_or_bp() handler.

Generally this will be something the vast majority of folks will not
need, but for those who need it, it is added as a kernel .config
option called KGDB_LOW_LEVEL_TRAP.

Also added was a die notification for oops such that kdb can catch an
oops for analysis.

There appeared to be no obvious way to pass the struct pt_regs from
the original exception back to the stack back tracer, so a special
case was added to show_stack() for when kdb is active because you
generally desire to generally look at the back trace of the original
exception.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
---
 lib/Kconfig.kgdb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index c56ccb4ad292..43cb93fa2651 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -59,7 +59,7 @@ config KGDB_TESTS_BOOT_STRING
 
 config KGDB_LOW_LEVEL_TRAP
        bool "KGDB: Allow debugging with traps in notifiers"
-       depends on X86
+       depends on X86 || MIPS
        default n
        help
          This will add an extra call back to kgdb for the breakpoint
-- 
cgit v1.2.2


From db1afffab0b5d9f6d31f8f4bea44c9cb3bc59351 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 16 Mar 2010 15:14:51 +1100
Subject: kref: remove kref_set

Of the three uses of kref_set in the kernel:

 One really should be kref_put as the code is letting go of a
    reference,
 Two really should be kref_init because the kref is being
    initialised.

This suggests that making kref_set available encourages bad code.
So fix the three uses and remove kref_set completely.

Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kref.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'lib')

diff --git a/lib/kref.c b/lib/kref.c
index 6d19f690380b..d3d227a08a4b 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -15,24 +15,14 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 
-/**
- * kref_set - initialize object and set refcount to requested number.
- * @kref: object in question.
- * @num: initial reference counter
- */
-void kref_set(struct kref *kref, int num)
-{
-	atomic_set(&kref->refcount, num);
-	smp_mb();
-}
-
 /**
  * kref_init - initialize object.
  * @kref: object in question.
  */
 void kref_init(struct kref *kref)
 {
-	kref_set(kref, 1);
+	atomic_set(&kref->refcount, 1);
+	smp_mb();
 }
 
 /**
@@ -72,7 +62,6 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
 	return 0;
 }
 
-EXPORT_SYMBOL(kref_set);
 EXPORT_SYMBOL(kref_init);
 EXPORT_SYMBOL(kref_get);
 EXPORT_SYMBOL(kref_put);
-- 
cgit v1.2.2


From bc451f2058238013e1cdf4acd443c01734d332f0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 30 Mar 2010 11:31:25 -0700
Subject: kobj: Add basic infrastructure for dealing with namespaces.

Move complete knowledge of namespaces into the kobject layer
so we can use that information when reporting kobjects to
userspace.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index 8115eb1bbf4d..bbb2bb40ee1f 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -850,6 +850,109 @@ struct kset *kset_create_and_add(const char *name,
 }
 EXPORT_SYMBOL_GPL(kset_create_and_add);
 
+
+static DEFINE_SPINLOCK(kobj_ns_type_lock);
+static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
+
+int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
+{
+	enum kobj_ns_type type = ops->type;
+	int error;
+
+	spin_lock(&kobj_ns_type_lock);
+
+	error = -EINVAL;
+	if (type >= KOBJ_NS_TYPES)
+		goto out;
+
+	error = -EINVAL;
+	if (type <= KOBJ_NS_TYPE_NONE)
+		goto out;
+
+	error = -EBUSY;
+	if (kobj_ns_ops_tbl[type])
+		goto out;
+
+	error = 0;
+	kobj_ns_ops_tbl[type] = ops;
+
+out:
+	spin_unlock(&kobj_ns_type_lock);
+	return error;
+}
+
+int kobj_ns_type_registered(enum kobj_ns_type type)
+{
+	int registered = 0;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES))
+		registered = kobj_ns_ops_tbl[type] != NULL;
+	spin_unlock(&kobj_ns_type_lock);
+
+	return registered;
+}
+
+const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
+{
+	const struct kobj_ns_type_operations *ops = NULL;
+
+	if (parent && parent->ktype->child_ns_type)
+		ops = parent->ktype->child_ns_type(parent);
+
+	return ops;
+}
+
+const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
+{
+	return kobj_child_ns_ops(kobj->parent);
+}
+
+
+const void *kobj_ns_current(enum kobj_ns_type type)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->current_ns();
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+const void *kobj_ns_initial(enum kobj_ns_type type)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->initial_ns();
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
+{
+}
+
+
 EXPORT_SYMBOL(kobject_get);
 EXPORT_SYMBOL(kobject_put);
 EXPORT_SYMBOL(kobject_del);
-- 
cgit v1.2.2


From 3ff195b011d7decf501a4d55aeed312731094796 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 30 Mar 2010 11:31:26 -0700
Subject: sysfs: Implement sysfs tagged directory support.

The problem.  When implementing a network namespace I need to be able
to have multiple network devices with the same name.  Currently this
is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and
potentially a few other directories of the form /sys/ ... /net/*.

What this patch does is to add an additional tag field to the
sysfs dirent structure.  For directories that should show different
contents depending on the context such as /sys/class/net/, and
/sys/devices/virtual/net/ this tag field is used to specify the
context in which those directories should be visible.  Effectively
this is the same as creating multiple distinct directories with
the same name but internally to sysfs the result is nicer.

I am calling the concept of a single directory that looks like multiple
directories all at the same path in the filesystem tagged directories.

For the networking namespace the set of directories whose contents I need
to filter with tags can depend on the presence or absence of hotplug
hardware or which modules are currently loaded.  Which means I need
a simple race free way to setup those directories as tagged.

To achieve a reace free design all tagged directories are created
and managed by sysfs itself.

Users of this interface:
- define a type in the sysfs_tag_type enumeration.
- call sysfs_register_ns_types with the type and it's operations
- sysfs_exit_ns when an individual tag is no longer valid

- Implement mount_ns() which returns the ns of the calling process
  so we can attach it to a sysfs superblock.
- Implement ktype.namespace() which returns the ns of a syfs kobject.

Everything else is left up to sysfs and the driver layer.

For the network namespace mount_ns and namespace() are essentially
one line functions, and look to remain that.

Tags are currently represented a const void * pointers as that is
both generic, prevides enough information for equality comparisons,
and is trivial to create for current users, as it is just the
existing namespace pointer.

The work needed in sysfs is more extensive.  At each directory
or symlink creating I need to check if the directory it is being
created in is a tagged directory and if so generate the appropriate
tag to place on the sysfs_dirent.  Likewise at each symlink or
directory removal I need to check if the sysfs directory it is
being removed from is a tagged directory and if so figure out
which tag goes along with the name I am deleting.

Currently only directories which hold kobjects, and
symlinks are supported.  There is not enough information
in the current file attribute interfaces to give us anything
to discriminate on which makes it useless, and there are
no potential users which makes it an uninteresting problem
to solve.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index bbb2bb40ee1f..b2c6d1f56e65 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -950,6 +950,7 @@ const void *kobj_ns_initial(enum kobj_ns_type type)
 
 void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
 {
+	sysfs_exit_ns(type, ns);
 }
 
 
-- 
cgit v1.2.2


From be867b194a3ae3c680c29521287ae49b4d44d420 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 3 May 2010 16:23:15 -0500
Subject: sysfs: Comment sysfs directory tagging logic

Add some in-line comments to explain the new infrastructure, which
was introduced to support sysfs directory tagging with namespaces.
I think an overall description someplace might be good too, but it
didn't really seem to fit into Documentation/filesystems/sysfs.txt,
which appears more geared toward users, rather than maintainers, of
sysfs.

(Tejun, please let me know if I can make anything clearer or failed
altogether to comment something that should be commented.)

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'lib')

diff --git a/lib/kobject.c b/lib/kobject.c
index b2c6d1f56e65..f07c57252e82 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -948,6 +948,17 @@ const void *kobj_ns_initial(enum kobj_ns_type type)
 	return ns;
 }
 
+/*
+ * kobj_ns_exit - invalidate a namespace tag
+ *
+ * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
+ * @ns: the actual namespace being invalidated
+ *
+ * This is called when a tag is no longer valid.  For instance,
+ * when a network namespace exits, it uses this helper to
+ * make sure no sb's sysfs_info points to the now-invalidated
+ * netns.
+ */
 void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
 {
 	sysfs_exit_ns(type, ns);
-- 
cgit v1.2.2


From 07e98962fa778b9782c8845dfcb06a84cc050744 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:44 -0700
Subject: kobject: Send hotplug events in all network namespaces

Open a copy of the uevent kernel socket in each network
namespace so we can send uevents in all network namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject_uevent.c | 68 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 60 insertions(+), 8 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 7b48d44ced6e..9084f2550c2a 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -24,13 +24,19 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 
 
 u64 uevent_seqnum;
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 static DEFINE_SPINLOCK(sequence_lock);
-#if defined(CONFIG_NET)
-static struct sock *uevent_sock;
+#ifdef CONFIG_NET
+struct uevent_sock {
+	struct list_head list;
+	struct sock *sk;
+};
+static LIST_HEAD(uevent_sock_list);
+static DEFINE_MUTEX(uevent_sock_mutex);
 #endif
 
 /* the strings here must match the enum in include/linux/kobject.h */
@@ -100,6 +106,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 	u64 seq;
 	int i = 0;
 	int retval = 0;
+#ifdef CONFIG_NET
+	struct uevent_sock *ue_sk;
+#endif
 
 	pr_debug("kobject: '%s' (%p): %s\n",
 		 kobject_name(kobj), kobj, __func__);
@@ -211,7 +220,9 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 
 #if defined(CONFIG_NET)
 	/* send netlink message */
-	if (uevent_sock) {
+	mutex_lock(&uevent_sock_mutex);
+	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+		struct sock *uevent_sock = ue_sk->sk;
 		struct sk_buff *skb;
 		size_t len;
 
@@ -241,6 +252,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 		} else
 			retval = -ENOMEM;
 	}
+	mutex_unlock(&uevent_sock_mutex);
 #endif
 
 	/* call uevent_helper, usually only enabled during early boot */
@@ -320,18 +332,58 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
-static int __init kobject_uevent_init(void)
+static int uevent_net_init(struct net *net)
 {
-	uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT,
-					    1, NULL, NULL, THIS_MODULE);
-	if (!uevent_sock) {
+	struct uevent_sock *ue_sk;
+
+	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
+	if (!ue_sk)
+		return -ENOMEM;
+
+	ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
+					  1, NULL, NULL, THIS_MODULE);
+	if (!ue_sk->sk) {
 		printk(KERN_ERR
 		       "kobject_uevent: unable to create netlink socket!\n");
 		return -ENODEV;
 	}
-	netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+	mutex_lock(&uevent_sock_mutex);
+	list_add_tail(&ue_sk->list, &uevent_sock_list);
+	mutex_unlock(&uevent_sock_mutex);
 	return 0;
 }
 
+static void uevent_net_exit(struct net *net)
+{
+	struct uevent_sock *ue_sk;
+
+	mutex_lock(&uevent_sock_mutex);
+	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+		if (sock_net(ue_sk->sk) == net)
+			goto found;
+	}
+	mutex_unlock(&uevent_sock_mutex);
+	return;
+
+found:
+	list_del(&ue_sk->list);
+	mutex_unlock(&uevent_sock_mutex);
+
+	netlink_kernel_release(ue_sk->sk);
+	kfree(ue_sk);
+}
+
+static struct pernet_operations uevent_net_ops = {
+	.init	= uevent_net_init,
+	.exit	= uevent_net_exit,
+};
+
+static int __init kobject_uevent_init(void)
+{
+	netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+	return register_pernet_subsys(&uevent_net_ops);
+}
+
+
 postcore_initcall(kobject_uevent_init);
 #endif
-- 
cgit v1.2.2


From 5f71a29629b4717445f8b7f5fb8f50c2d262b68e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:47 -0700
Subject: kobj: Send hotplug events in the proper namespace.

Utilize netlink_broacast_filtered to allow sending hotplug events
in the proper namespace.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject_uevent.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9084f2550c2a..239c8e83fc28 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -83,6 +83,22 @@ out:
 	return ret;
 }
 
+static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
+{
+	struct kobject *kobj = data;
+	const struct kobj_ns_type_operations *ops;
+
+	ops = kobj_ns_ops(kobj);
+	if (ops) {
+		const void *sock_ns, *ns;
+		ns = kobj->ktype->namespace(kobj);
+		sock_ns = ops->netlink_ns(dsk);
+		return sock_ns != ns;
+	}
+
+	return 0;
+}
+
 /**
  * kobject_uevent_env - send an uevent with environmental data
  *
@@ -244,8 +260,10 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 			}
 
 			NETLINK_CB(skb).dst_group = 1;
-			retval = netlink_broadcast(uevent_sock, skb, 0, 1,
-						   GFP_KERNEL);
+			retval = netlink_broadcast_filtered(uevent_sock, skb,
+							    0, 1, GFP_KERNEL,
+							    kobj_bcast_filter,
+							    kobj);
 			/* ENOBUFS should be handled in userspace */
 			if (retval == -ENOBUFS)
 				retval = 0;
-- 
cgit v1.2.2


From 417daa1e8f893fbac88fd395340ba7779fd3926c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:48 -0700
Subject: hotplug: netns aware uevent_helper

It only makes sense for uevent_helper to get events
in the intial namespaces.  It's invocation is not
per namespace and it is not clear how we could make
it's invocation namespace aware.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject_uevent.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 239c8e83fc28..59c15511d58a 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -19,7 +19,7 @@
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-
+#include <linux/user_namespace.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
@@ -99,6 +99,21 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 	return 0;
 }
 
+static int kobj_usermode_filter(struct kobject *kobj)
+{
+	const struct kobj_ns_type_operations *ops;
+
+	ops = kobj_ns_ops(kobj);
+	if (ops) {
+		const void *init_ns, *ns;
+		ns = kobj->ktype->namespace(kobj);
+		init_ns = ops->initial_ns();
+		return ns != init_ns;
+	}
+
+	return 0;
+}
+
 /**
  * kobject_uevent_env - send an uevent with environmental data
  *
@@ -274,7 +289,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 #endif
 
 	/* call uevent_helper, usually only enabled during early boot */
-	if (uevent_helper[0]) {
+	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
 		char *argv [3];
 
 		argv [0] = uevent_helper;
-- 
cgit v1.2.2


From 0dbdd1bfe0b83798763540111118b78b70792f00 Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Mon, 24 May 2010 12:13:20 -0700
Subject: lib/atomic64_test: fix missing include of linux/kernel.h

Fix a build-failure
(http://kisskb.ellerman.id.au/kisskb/buildresult/2601239/) by adding the
missing include file (linux/kernel.h) for printk and KERN_INFO.

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
LKML-Reference: <201005241913.o4OJDKdf010884@imap1.linux-foundation.org>
Cc: Luca Barbieri <luca@luca-barbieri.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 lib/atomic64_test.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib')

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 65e482caf5e9..9087d71537dd 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -9,6 +9,7 @@
  * (at your option) any later version.
  */
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <asm/atomic.h>
 
 #define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
-- 
cgit v1.2.2


From 4be929be34f9bdeffa40d815d32d7d60d2c7f03b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 24 May 2010 14:33:03 -0700
Subject: kernel-wide: replace USHORT_MAX, SHORT_MAX and SHORT_MIN with
 USHRT_MAX, SHRT_MAX and SHRT_MIN

- C99 knows about USHRT_MAX/SHRT_MAX/SHRT_MIN, not
  USHORT_MAX/SHORT_MAX/SHORT_MIN.

- Make SHRT_MIN of type s16, not int, for consistency.

[akpm@linux-foundation.org: fix drivers/dma/timb_dma.c]
[akpm@linux-foundation.org: fix security/keys/keyring.c]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 46d34b0b74a8..20c95121d8a1 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1980,7 +1980,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 		{
 			char *s = (char *)va_arg(args, char *);
 			if (field_width == -1)
-				field_width = SHORT_MAX;
+				field_width = SHRT_MAX;
 			/* first, skip leading white space in buffer */
 			str = skip_spaces(str);
 
-- 
cgit v1.2.2


From cf3b429b03e827c718030f42e7e3ceaca980475e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 24 May 2010 14:33:16 -0700
Subject: vsprintf.c: use noinline_for_stack

Mark static functions with noinline_for_stack

Before:

  akpm:/usr/src/25> objdump -d lib/vsprintf.o | perl scripts/checkstack.pl
  0x00000e82 pointer [vsprintf.o]:                        344
  0x0000198c pointer [vsprintf.o]:                        344
  0x000025d6 scnprintf [vsprintf.o]:                      216
  0x00002648 scnprintf [vsprintf.o]:                      216
  0x00002565 snprintf [vsprintf.o]:                       208
  0x0000267c sprintf [vsprintf.o]:                        208
  0x000030a3 bprintf [vsprintf.o]:                        208
  0x00003b1e sscanf [vsprintf.o]:                         208
  0x00000608 number [vsprintf.o]:                         136
  0x00000937 number [vsprintf.o]:                         136

After:

  akpm:/usr/src/25> objdump -d lib/vsprintf.o | perl scripts/checkstack.pl
  0x00000a7c symbol_string [vsprintf.o]:                  248
  0x00000ae8 symbol_string [vsprintf.o]:                  248
  0x00002310 scnprintf [vsprintf.o]:                      216
  0x00002382 scnprintf [vsprintf.o]:                      216
  0x0000229f snprintf [vsprintf.o]:                       208
  0x000023b6 sprintf [vsprintf.o]:                        208
  0x00002ddd bprintf [vsprintf.o]:                        208
  0x00003858 sscanf [vsprintf.o]:                         208
  0x00000625 number [vsprintf.o]:                         136
  0x00000954 number [vsprintf.o]:                         136

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/vsprintf.c | 67 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 42 insertions(+), 25 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 20c95121d8a1..b8a2f549ab0e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -267,7 +267,8 @@ int strict_strtoll(const char *cp, unsigned int base, long long *res)
 }
 EXPORT_SYMBOL(strict_strtoll);
 
-static int skip_atoi(const char **s)
+static noinline_for_stack
+int skip_atoi(const char **s)
 {
 	int i = 0;
 
@@ -287,7 +288,8 @@ static int skip_atoi(const char **s)
 /* Formats correctly any integer in [0,99999].
  * Outputs from one to five digits depending on input.
  * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
-static char *put_dec_trunc(char *buf, unsigned q)
+static noinline_for_stack
+char *put_dec_trunc(char *buf, unsigned q)
 {
 	unsigned d3, d2, d1, d0;
 	d1 = (q>>4) & 0xf;
@@ -324,7 +326,8 @@ static char *put_dec_trunc(char *buf, unsigned q)
 	return buf;
 }
 /* Same with if's removed. Always emits five digits */
-static char *put_dec_full(char *buf, unsigned q)
+static noinline_for_stack
+char *put_dec_full(char *buf, unsigned q)
 {
 	/* BTW, if q is in [0,9999], 8-bit ints will be enough, */
 	/* but anyway, gcc produces better code with full-sized ints */
@@ -366,7 +369,8 @@ static char *put_dec_full(char *buf, unsigned q)
 	return buf;
 }
 /* No inlining helps gcc to use registers better */
-static noinline char *put_dec(char *buf, unsigned long long num)
+static noinline_for_stack
+char *put_dec(char *buf, unsigned long long num)
 {
 	while (1) {
 		unsigned rem;
@@ -417,8 +421,9 @@ struct printf_spec {
 	s16	precision;	/* # of digits/chars */
 };
 
-static char *number(char *buf, char *end, unsigned long long num,
-			struct printf_spec spec)
+static noinline_for_stack
+char *number(char *buf, char *end, unsigned long long num,
+	     struct printf_spec spec)
 {
 	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
 	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -537,7 +542,8 @@ static char *number(char *buf, char *end, unsigned long long num,
 	return buf;
 }
 
-static char *string(char *buf, char *end, const char *s, struct printf_spec spec)
+static noinline_for_stack
+char *string(char *buf, char *end, const char *s, struct printf_spec spec)
 {
 	int len, i;
 
@@ -567,8 +573,9 @@ static char *string(char *buf, char *end, const char *s, struct printf_spec spec
 	return buf;
 }
 
-static char *symbol_string(char *buf, char *end, void *ptr,
-				struct printf_spec spec, char ext)
+static noinline_for_stack
+char *symbol_string(char *buf, char *end, void *ptr,
+		    struct printf_spec spec, char ext)
 {
 	unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
@@ -588,8 +595,9 @@ static char *symbol_string(char *buf, char *end, void *ptr,
 #endif
 }
 
-static char *resource_string(char *buf, char *end, struct resource *res,
-				struct printf_spec spec, const char *fmt)
+static noinline_for_stack
+char *resource_string(char *buf, char *end, struct resource *res,
+		      struct printf_spec spec, const char *fmt)
 {
 #ifndef IO_RSRC_PRINTK_SIZE
 #define IO_RSRC_PRINTK_SIZE	6
@@ -690,8 +698,9 @@ static char *resource_string(char *buf, char *end, struct resource *res,
 	return string(buf, end, sym, spec);
 }
 
-static char *mac_address_string(char *buf, char *end, u8 *addr,
-				struct printf_spec spec, const char *fmt)
+static noinline_for_stack
+char *mac_address_string(char *buf, char *end, u8 *addr,
+			 struct printf_spec spec, const char *fmt)
 {
 	char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
 	char *p = mac_addr;
@@ -714,7 +723,8 @@ static char *mac_address_string(char *buf, char *end, u8 *addr,
 	return string(buf, end, mac_addr, spec);
 }
 
-static char *ip4_string(char *p, const u8 *addr, const char *fmt)
+static noinline_for_stack
+char *ip4_string(char *p, const u8 *addr, const char *fmt)
 {
 	int i;
 	bool leading_zeros = (fmt[0] == 'i');
@@ -763,7 +773,8 @@ static char *ip4_string(char *p, const u8 *addr, const char *fmt)
 	return p;
 }
 
-static char *ip6_compressed_string(char *p, const char *addr)
+static noinline_for_stack
+char *ip6_compressed_string(char *p, const char *addr)
 {
 	int i, j, range;
 	unsigned char zerolength[8];
@@ -843,7 +854,8 @@ static char *ip6_compressed_string(char *p, const char *addr)
 	return p;
 }
 
-static char *ip6_string(char *p, const char *addr, const char *fmt)
+static noinline_for_stack
+char *ip6_string(char *p, const char *addr, const char *fmt)
 {
 	int i;
 
@@ -858,8 +870,9 @@ static char *ip6_string(char *p, const char *addr, const char *fmt)
 	return p;
 }
 
-static char *ip6_addr_string(char *buf, char *end, const u8 *addr,
-			     struct printf_spec spec, const char *fmt)
+static noinline_for_stack
+char *ip6_addr_string(char *buf, char *end, const u8 *addr,
+		      struct printf_spec spec, const char *fmt)
 {
 	char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
 
@@ -871,8 +884,9 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, ip6_addr, spec);
 }
 
-static char *ip4_addr_string(char *buf, char *end, const u8 *addr,
-			     struct printf_spec spec, const char *fmt)
+static noinline_for_stack
+char *ip4_addr_string(char *buf, char *end, const u8 *addr,
+		      struct printf_spec spec, const char *fmt)
 {
 	char ip4_addr[sizeof("255.255.255.255")];
 
@@ -881,8 +895,9 @@ static char *ip4_addr_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, ip4_addr, spec);
 }
 
-static char *uuid_string(char *buf, char *end, const u8 *addr,
-			 struct printf_spec spec, const char *fmt)
+static noinline_for_stack
+char *uuid_string(char *buf, char *end, const u8 *addr,
+		  struct printf_spec spec, const char *fmt)
 {
 	char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
 	char *p = uuid;
@@ -970,8 +985,9 @@ static char *uuid_string(char *buf, char *end, const u8 *addr,
  * function pointers are really function descriptors, which contain a
  * pointer to the real address.
  */
-static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
-			struct printf_spec spec)
+static noinline_for_stack
+char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+	      struct printf_spec spec)
 {
 	if (!ptr)
 		return string(buf, end, "(null)", spec);
@@ -1040,7 +1056,8 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
  * @precision: precision of a number
  * @qualifier: qualifier of a number (long, size_t, ...)
  */
-static int format_decode(const char *fmt, struct printf_spec *spec)
+static noinline_for_stack
+int format_decode(const char *fmt, struct printf_spec *spec)
 {
 	const char *start = fmt;
 
-- 
cgit v1.2.2


From ea46c8f774f295c45fac48101d54be347d3d453b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 24 May 2010 14:33:21 -0700
Subject: dynamic_debug: small cleanup in ddebug_proc_write()

This doesn't change behavior at all.  In the original code, if nwords was
zero then ddebug_parse_query() would return -EINVAL, now we just do it
earlier.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/dynamic_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index d6b8b9b1abfe..3df8eb17a607 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -456,7 +456,7 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
 			__func__, (int)len);
 
 	nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS);
-	if (nwords < 0)
+	if (nwords <= 0)
 		return -EINVAL;
 	if (ddebug_parse_query(words, nwords-1, &query))
 		return -EINVAL;
-- 
cgit v1.2.2


From 2b2f68b5383ea107295d7f1483256866e2daa1e3 Mon Sep 17 00:00:00 2001
From: Florian Ragwitz <rafl@debian.org>
Date: Mon, 24 May 2010 14:33:21 -0700
Subject: DYNAMIC_DEBUG: fix documentation errors

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Florian Ragwitz <rafl@debian.org>
Cc: Jason Baron <jbaron@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d85be90d5888..231208948363 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1039,10 +1039,10 @@ config DYNAMIC_DEBUG
 
 	  Usage:
 
-	  Dynamic debugging is controlled via the 'dynamic_debug/ddebug' file,
+	  Dynamic debugging is controlled via the 'dynamic_debug/control' file,
 	  which is contained in the 'debugfs' filesystem. Thus, the debugfs
 	  filesystem must first be mounted before making use of this feature.
-	  We refer the control file as: <debugfs>/dynamic_debug/ddebug. This
+	  We refer the control file as: <debugfs>/dynamic_debug/control. This
 	  file contains a list of the debug statements that can be enabled. The
 	  format for each line of the file is:
 
@@ -1057,7 +1057,7 @@ config DYNAMIC_DEBUG
 
 	  From a live system:
 
-		nullarbor:~ # cat <debugfs>/dynamic_debug/ddebug
+		nullarbor:~ # cat <debugfs>/dynamic_debug/control
 		# filename:lineno [module]function flags format
 		fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012"
 		fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012"
@@ -1067,23 +1067,23 @@ config DYNAMIC_DEBUG
 
 		// enable the message at line 1603 of file svcsock.c
 		nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 		// enable all the messages in file svcsock.c
 		nullarbor:~ # echo -n 'file svcsock.c +p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 		// enable all the messages in the NFS server module
 		nullarbor:~ # echo -n 'module nfsd +p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 		// enable all 12 messages in the function svc_process()
 		nullarbor:~ # echo -n 'func svc_process +p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 		// disable all 12 messages in the function svc_process()
 		nullarbor:~ # echo -n 'func svc_process -p' >
-						<debugfs>/dynamic_debug/ddebug
+						<debugfs>/dynamic_debug/control
 
 	  See Documentation/dynamic-debug-howto.txt for additional information.
 
-- 
cgit v1.2.2


From db0fd97c270f1e80321f7ae55234643ca0978c54 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 24 May 2010 14:33:22 -0700
Subject: lib/hexdump.c: reduce stack variable size and cleanups

Reduce char linebuf[200] to the actual size required., which is 32 * 3 + 2
+ 32 + 1, ie: linebuf[131].

Change examples to use bool true not int 1.

Align multiline argument indentation to open parenthesis.

Use temporary for ptr[j] so trigraph fits on single line.

Convert printk ptr from %*p, (int)(2 * sizeof(void *)) to %p as %p uses
the same calculation for size.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/hexdump.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

(limited to 'lib')

diff --git a/lib/hexdump.c b/lib/hexdump.c
index 39af2560f765..1bd6a9779774 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -34,7 +34,7 @@ EXPORT_SYMBOL(hex_asc);
  *
  * E.g.:
  *   hex_dump_to_buffer(frame->data, frame->len, 16, 1,
- *			linebuf, sizeof(linebuf), 1);
+ *			linebuf, sizeof(linebuf), true);
  *
  * example output buffer:
  * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f  @ABCDEFGHIJKLMNO
@@ -65,8 +65,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%s%16.16llx", j ? " " : "",
-				(unsigned long long)*(ptr8 + j));
+					"%s%16.16llx", j ? " " : "",
+					(unsigned long long)*(ptr8 + j));
 		ascii_column = 17 * ngroups + 2;
 		break;
 	}
@@ -77,7 +77,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%s%8.8x", j ? " " : "", *(ptr4 + j));
+					"%s%8.8x", j ? " " : "", *(ptr4 + j));
 		ascii_column = 9 * ngroups + 2;
 		break;
 	}
@@ -88,7 +88,7 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 		for (j = 0; j < ngroups; j++)
 			lx += scnprintf(linebuf + lx, linebuflen - lx,
-				"%s%4.4x", j ? " " : "", *(ptr2 + j));
+					"%s%4.4x", j ? " " : "", *(ptr2 + j));
 		ascii_column = 5 * ngroups + 2;
 		break;
 	}
@@ -111,9 +111,10 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 
 	while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
 		linebuf[lx++] = ' ';
-	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++)
-		linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j]
-				: '.';
+	for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) {
+		ch = ptr[j];
+		linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.';
+	}
 nil:
 	linebuf[lx++] = '\0';
 }
@@ -143,7 +144,7 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
  *
  * E.g.:
  *   print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS,
- *		16, 1, frame->data, frame->len, 1);
+ *		    16, 1, frame->data, frame->len, true);
  *
  * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode:
  * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f  @ABCDEFGHIJKLMNO
@@ -151,12 +152,12 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
  * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c  pqrstuvwxyz{|}~.
  */
 void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
-			int rowsize, int groupsize,
-			const void *buf, size_t len, bool ascii)
+		    int rowsize, int groupsize,
+		    const void *buf, size_t len, bool ascii)
 {
 	const u8 *ptr = buf;
 	int i, linelen, remaining = len;
-	unsigned char linebuf[200];
+	unsigned char linebuf[32 * 3 + 2 + 32 + 1];
 
 	if (rowsize != 16 && rowsize != 32)
 		rowsize = 16;
@@ -164,13 +165,14 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
 	for (i = 0; i < len; i += rowsize) {
 		linelen = min(remaining, rowsize);
 		remaining -= rowsize;
+
 		hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
-				linebuf, sizeof(linebuf), ascii);
+				   linebuf, sizeof(linebuf), ascii);
 
 		switch (prefix_type) {
 		case DUMP_PREFIX_ADDRESS:
-			printk("%s%s%*p: %s\n", level, prefix_str,
-				(int)(2 * sizeof(void *)), ptr + i, linebuf);
+			printk("%s%s%p: %s\n",
+			       level, prefix_str, ptr + i, linebuf);
 			break;
 		case DUMP_PREFIX_OFFSET:
 			printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
@@ -196,9 +198,9 @@ EXPORT_SYMBOL(print_hex_dump);
  * rowsize of 16, groupsize of 1, and ASCII output included.
  */
 void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-			const void *buf, size_t len)
+			  const void *buf, size_t len)
 {
 	print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
-			buf, len, 1);
+		       buf, len, true);
 }
 EXPORT_SYMBOL(print_hex_dump_bytes);
-- 
cgit v1.2.2


From 903788892ea0fc7fcaf7e8e5fac9a77379fc215b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <ext-andriy.shevchenko@nokia.com>
Date: Mon, 24 May 2010 14:33:23 -0700
Subject: lib: introduce common method to convert hex digits

hex_to_bin() is a little method which converts hex digit to its actual
value.  There are plenty of places where such functionality is needed.

[akpm@linux-foundation.org: use tolower(), saving 3 bytes, test the more common case first - it's quicker]
[akpm@linux-foundation.org: relocate tolower to make it even faster! (Joe)]
Signed-off-by: Andy Shevchenko <ext-andriy.shevchenko@nokia.com>
Cc: Tilman Schmidt <tilman@imap.cc>
Cc: Duncan Sands <duncan.sands@free.fr>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: "Richard Russon (FlatCap)" <ldm@flatcap.org>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/hexdump.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'lib')

diff --git a/lib/hexdump.c b/lib/hexdump.c
index 1bd6a9779774..5d7a4802c562 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -15,6 +15,24 @@
 const char hex_asc[] = "0123456789abcdef";
 EXPORT_SYMBOL(hex_asc);
 
+/**
+ * hex_to_bin - convert a hex digit to its real value
+ * @ch: ascii character represents hex digit
+ *
+ * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
+ * input.
+ */
+int hex_to_bin(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	ch = tolower(ch);
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	return -1;
+}
+EXPORT_SYMBOL(hex_to_bin);
+
 /**
  * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
  * @buf: data blob to dump
-- 
cgit v1.2.2


From 836e2af92503f1642dbc3c3281ec68ec1dd39d2e Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Mon, 24 May 2010 14:33:31 -0700
Subject: crc32: major optimization

Precompute more crc32 values(0xcc00, 0xcc0000 and 0xcc000000) into tables.
 This increases the table size from 1KB to 4KB but the performance benfit
makes it worth it:

28% faster on MPC8321, 266 MHz
2x faster on Core 2 Duo, 3.1GHz

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/crc32.c          | 24 +++++++++++++++---------
 lib/gen_crc32table.c | 47 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 47 insertions(+), 24 deletions(-)

(limited to 'lib')

diff --git a/lib/crc32.c b/lib/crc32.c
index bc5b936e9142..4855995fcde9 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -48,12 +48,20 @@ MODULE_LICENSE("GPL");
 #if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
 
 static inline u32
-crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
+crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
-#  define DO_CRC(x) crc = tab[(crc ^ (x)) & 255 ] ^ (crc >> 8)
+#  define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
+#  define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
+		tab[2][(crc >> 8) & 255] ^ \
+		tab[1][(crc >> 16) & 255] ^ \
+		tab[0][(crc >> 24) & 255]
 # else
-#  define DO_CRC(x) crc = tab[((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
+#  define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
+#  define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
+		tab[1][(crc >> 8) & 255] ^ \
+		tab[2][(crc >> 16) & 255] ^ \
+		tab[3][(crc >> 24) & 255]
 # endif
 	const u32 *b;
 	size_t    rem_len;
@@ -70,10 +78,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
 	b = (const u32 *)buf;
 	for (--b; len; --len) {
 		crc ^= *++b; /* use pre increment for speed */
-		DO_CRC(0);
-		DO_CRC(0);
-		DO_CRC(0);
-		DO_CRC(0);
+		DO_CRC4;
 	}
 	len = rem_len;
 	/* And the last few bytes */
@@ -85,6 +90,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 *tab)
 	}
 	return crc;
 #undef DO_CRC
+#undef DO_CRC4
 }
 #endif
 /**
@@ -117,7 +123,7 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
 # if CRC_LE_BITS == 8
-	const u32      *tab = crc32table_le;
+	const u32      (*tab)[] = crc32table_le;
 
 	crc = __cpu_to_le32(crc);
 	crc = crc32_body(crc, p, len, tab);
@@ -174,7 +180,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 # if CRC_BE_BITS == 8
-	const u32      *tab = crc32table_be;
+	const u32      (*tab)[] = crc32table_be;
 
 	crc = __cpu_to_be32(crc);
 	crc = crc32_body(crc, p, len, tab);
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index bea5d97df991..85d0e412a04f 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -7,8 +7,8 @@
 #define LE_TABLE_SIZE (1 << CRC_LE_BITS)
 #define BE_TABLE_SIZE (1 << CRC_BE_BITS)
 
-static uint32_t crc32table_le[LE_TABLE_SIZE];
-static uint32_t crc32table_be[BE_TABLE_SIZE];
+static uint32_t crc32table_le[4][LE_TABLE_SIZE];
+static uint32_t crc32table_be[4][BE_TABLE_SIZE];
 
 /**
  * crc32init_le() - allocate and initialize LE table data
@@ -22,12 +22,19 @@ static void crc32init_le(void)
 	unsigned i, j;
 	uint32_t crc = 1;
 
-	crc32table_le[0] = 0;
+	crc32table_le[0][0] = 0;
 
 	for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
 		crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
 		for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
-			crc32table_le[i + j] = crc ^ crc32table_le[j];
+			crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
+	}
+	for (i = 0; i < LE_TABLE_SIZE; i++) {
+		crc = crc32table_le[0][i];
+		for (j = 1; j < 4; j++) {
+			crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
+			crc32table_le[j][i] = crc;
+		}
 	}
 }
 
@@ -39,25 +46,35 @@ static void crc32init_be(void)
 	unsigned i, j;
 	uint32_t crc = 0x80000000;
 
-	crc32table_be[0] = 0;
+	crc32table_be[0][0] = 0;
 
 	for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
 		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
 		for (j = 0; j < i; j++)
-			crc32table_be[i + j] = crc ^ crc32table_be[j];
+			crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
+	}
+	for (i = 0; i < BE_TABLE_SIZE; i++) {
+		crc = crc32table_be[0][i];
+		for (j = 1; j < 4; j++) {
+			crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
+			crc32table_be[j][i] = crc;
+		}
 	}
 }
 
-static void output_table(uint32_t table[], int len, char *trans)
+static void output_table(uint32_t table[4][256], int len, char *trans)
 {
-	int i;
+	int i, j;
 
-	for (i = 0; i < len - 1; i++) {
-		if (i % ENTRIES_PER_LINE == 0)
-			printf("\n");
-		printf("%s(0x%8.8xL), ", trans, table[i]);
+	for (j = 0 ; j < 4; j++) {
+		printf("{");
+		for (i = 0; i < len - 1; i++) {
+			if (i % ENTRIES_PER_LINE == 0)
+				printf("\n");
+			printf("%s(0x%8.8xL), ", trans, table[j][i]);
+		}
+		printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
 	}
-	printf("%s(0x%8.8xL)\n", trans, table[len - 1]);
 }
 
 int main(int argc, char** argv)
@@ -66,14 +83,14 @@ int main(int argc, char** argv)
 
 	if (CRC_LE_BITS > 1) {
 		crc32init_le();
-		printf("static const u32 crc32table_le[] = {");
+		printf("static const u32 crc32table_le[4][256] = {");
 		output_table(crc32table_le, LE_TABLE_SIZE, "tole");
 		printf("};\n");
 	}
 
 	if (CRC_BE_BITS > 1) {
 		crc32init_be();
-		printf("static const u32 crc32table_be[] = {");
+		printf("static const u32 crc32table_be[4][256] = {");
 		output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
 		printf("};\n");
 	}
-- 
cgit v1.2.2


From 4762bbc1a3a1f22095278b74dd1b8cee04858641 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Mon, 24 May 2010 14:33:31 -0700
Subject: crc32: use __BYTE_ORDER macro for endian detection.

Since crc32.c contains a nifty test program that can be executed in user
space, make sure endian detection works reliably in user space too.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/crc32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/crc32.c b/lib/crc32.c
index 4855995fcde9..3087ed899ee3 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -50,7 +50,7 @@ MODULE_LICENSE("GPL");
 static inline u32
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
-# ifdef __LITTLE_ENDIAN
+# if __BYTE_ORDER == __LITTLE_ENDIAN
 #  define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
 #  define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
 		tab[2][(crc >> 8) & 255] ^ \
-- 
cgit v1.2.2


From 0d2daf5cc858bd9305bae187310a1dabaad0a2db Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 25 May 2010 23:43:03 -0700
Subject: revert "crc32: use __BYTE_ORDER macro for endian detection"

It doesn't work on big-endian - those architectures don't define
__LITTLE_ENDIAN.

Cc: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/crc32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/crc32.c b/lib/crc32.c
index 3087ed899ee3..4855995fcde9 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -50,7 +50,7 @@ MODULE_LICENSE("GPL");
 static inline u32
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
-# if __BYTE_ORDER == __LITTLE_ENDIAN
+# ifdef __LITTLE_ENDIAN
 #  define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
 #  define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
 		tab[2][(crc >> 8) & 255] ^ \
-- 
cgit v1.2.2


From 0ac0c0d0f837c499afd02a802f9cf52d3027fa3b Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Wed, 26 May 2010 14:42:51 -0700
Subject: cpusets: randomize node rotor used in cpuset_mem_spread_node()

Some workloads that create a large number of small files tend to assign
too many pages to node 0 (multi-node systems).  Part of the reason is that
the rotor (in cpuset_mem_spread_node()) used to assign nodes starts at
node 0 for newly created tasks.

This patch changes the rotor to be initialized to a random node number of
the cpuset.

[akpm@linux-foundation.org: fix layout]
[Lee.Schermerhorn@hp.com: Define stub numa_random() for !NUMA configuration]
Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Paul Menage <menage@google.com>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index ffb78c916ccd..d7137e7e06e8 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -672,7 +672,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
  *
  * The bit positions 0 through @bits are valid positions in @buf.
  */
-static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
+int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
 {
 	int pos = 0;
 
-- 
cgit v1.2.2


From c9d221f86e43d9fb16260fe18a8cd6767f36c8a5 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 26 May 2010 14:43:36 -0700
Subject: fault-injection: add CPU notifier error injection module

I used this module to test the series of modification to the cpu notifiers
code.

Example1: inject CPU offline error (-1 == -EPERM)

	# modprobe cpu-notifier-error-inject cpu_down_prepare_error=-1
	# echo 0 > /sys/devices/system/cpu/cpu1/online
	bash: echo: write error: Operation not permitted

Example2: inject CPU online error (-2 == -ENOENT)

	# modprobe cpu-notifier-error-inject cpu_up_prepare_error=-2
	# echo 1 > /sys/devices/system/cpu/cpu1/online
	bash: echo: write error: No such file or directory

[akpm@linux-foundation.org: fix Kconfig help text]
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug               | 12 ++++++++
 lib/Makefile                    |  1 +
 lib/cpu-notifier-error-inject.c | 63 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 lib/cpu-notifier-error-inject.c

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 231208948363..e722e9d62221 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -898,6 +898,18 @@ config LKDTM
 	Documentation on how to use the module can be found in
 	Documentation/fault-injection/provoke-crashes.txt
 
+config CPU_NOTIFIER_ERROR_INJECT
+	tristate "CPU notifier error injection module"
+	depends on HOTPLUG_CPU && DEBUG_KERNEL
+	help
+	  This option provides a kernel module that can be used to test
+	  the error handling of the cpu notifiers
+
+	  To compile this code as a module, choose M here: the module will
+	  be called cpu-notifier-error-inject.
+
+	  If unsure, say N.
+
 config FAULT_INJECTION
 	bool "Fault-injection framework"
 	depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 9e6d3c29d73a..c8567a59d316 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
+obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
new file mode 100644
index 000000000000..4dc20321b0d5
--- /dev/null
+++ b/lib/cpu-notifier-error-inject.c
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+
+static int priority;
+static int cpu_up_prepare_error;
+static int cpu_down_prepare_error;
+
+module_param(priority, int, 0);
+MODULE_PARM_DESC(priority, "specify cpu notifier priority");
+
+module_param(cpu_up_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_up_prepare_error,
+		"specify error code to inject CPU_UP_PREPARE action");
+
+module_param(cpu_down_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_down_prepare_error,
+		"specify error code to inject CPU_DOWN_PREPARE action");
+
+static int err_inject_cpu_callback(struct notifier_block *nfb,
+				unsigned long action, void *hcpu)
+{
+	int err = 0;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		err = cpu_up_prepare_error;
+		break;
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		err = cpu_down_prepare_error;
+		break;
+	}
+	if (err)
+		printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err);
+
+	return notifier_from_errno(err);
+}
+
+static struct notifier_block err_inject_cpu_notifier = {
+	.notifier_call = err_inject_cpu_callback,
+};
+
+static int err_inject_init(void)
+{
+	err_inject_cpu_notifier.priority = priority;
+
+	return register_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+static void err_inject_exit(void)
+{
+	unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_DESCRIPTION("CPU notifier error injection module");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
-- 
cgit v1.2.2


From 2dcb22b346be7b7b7e630a8970d69cf3f1111ec1 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Wed, 26 May 2010 14:43:38 -0700
Subject: idr: fix backtrack logic in idr_remove_all

Currently idr_remove_all will fail with a use after free error if
idr::layers is bigger than 2, which on 32 bit systems corresponds to items
more than 1024.  This is due to stepping back too many levels during
backtracking.  For simplicity let's assume that IDR_BITS=1 -> we have 2
nodes at each level below the root node and each leaf node stores two IDs.
 (In reality for 32 bit systems IDR_BITS=5, with 32 nodes at each sub-root
level and 32 IDs in each leaf node).  The sequence of freeing the nodes at
the moment is as follows:

layer
1 ->                       a(7)
2 ->            b(3)                  c(5)
3 ->        d(1)   e(2)           f(4)    g(6)

Until step 4 things go fine, but then node c is freed, whereas node g
should be freed first.  Since node c contains the pointer to node g we'll
have a use after free error at step 6.

How many levels we step back after visiting the leaf nodes is currently
determined by the msb of the id we are currently visiting:

Step
1.          node d with IDs 0,1 is freed, current ID is advanced to 2.
            msb of the current ID bit 1. This means we need to step back
            1 level to node b and take the next sibling, node e.
2-3.        node e with IDs 2,3 is freed, current ID is 4, msb is bit 2.
            This means we need to step back 2 levels to node a, freeing
            node b on the way.
4-5.        node f with IDs 4,5 is freed, current ID is 6, msb is still
            bit 2. This means we again need to step back 2 levels to node
            a and free c on the way.
6.          We should visit node g, but its pointer is not available as
            node c was freed.

The fix changes how we determine the number of levels to step back.
Instead of deducting this merely from the msb of the current ID, we should
really check if advancing the ID causes an overflow to a bit position
corresponding to a given layer.  In the above example overflow from bit 0
to bit 1 should mean stepping back 1 level.  Overflow from bit 1 to bit 2
should mean stepping back 2 levels and so on.

The fix was tested with IDs up to 1 << 20, which corresponds to 4 layers
on 32 bit systems.

Signed-off-by: Imre Deak <imre.deak@nokia.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: <stable@kernel.org>		[2.6.34.1]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/idr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index 422a9d5069cc..c1a206901761 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -445,6 +445,7 @@ EXPORT_SYMBOL(idr_remove);
 void idr_remove_all(struct idr *idp)
 {
 	int n, id, max;
+	int bt_mask;
 	struct idr_layer *p;
 	struct idr_layer *pa[MAX_LEVEL];
 	struct idr_layer **paa = &pa[0];
@@ -462,8 +463,10 @@ void idr_remove_all(struct idr *idp)
 			p = p->ary[(id >> n) & IDR_MASK];
 		}
 
+		bt_mask = id;
 		id += 1 << n;
-		while (n < fls(id)) {
+		/* Get the highest bit that the above add changed from 0->1. */
+		while (n < fls(id ^ bt_mask)) {
 			if (p)
 				free_layer(p);
 			n += IDR_BITS;
-- 
cgit v1.2.2


From 5960164fde9bc2f2a99e751d3393faea316e7e36 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Wed, 26 May 2010 14:44:13 -0700
Subject: lib/random32: export pseudo-random number generator for modules

This patch moves the definition of struct rnd_state and the inline
__seed() function to linux/random.h.  It renames the static __random32()
function to prandom32() and exports it for use in modules.

prandom32() is useful as a privately-seeded pseudo random number generator
that can give the same result every time it is initialized.

For FCoE FC-BB-6 VN2VN mode self-selected unique FC address generation, we
need an pseudo-random number generator seeded with the 64-bit world-wide
port name.  A truly random generator or one seeded with randomness won't
do because the same sequence of numbers should be generated each time we
boot or the link comes up.

A prandom32_seed() inline function is added to the header file.  It is
inlined not for speed, but so the function won't be expanded in the base
kernel, but only in the module that uses it.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/random32.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

(limited to 'lib')

diff --git a/lib/random32.c b/lib/random32.c
index 217d5c4b666d..870dc3fc0f0f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -39,13 +39,16 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 
-struct rnd_state {
-	u32 s1, s2, s3;
-};
-
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
 
-static u32 __random32(struct rnd_state *state)
+/**
+ *	prandom32 - seeded pseudo-random number generator.
+ *	@state: pointer to state structure holding seeded state.
+ *
+ *	This is used for pseudo-randomness with no outside seeding.
+ *	For more random results, use random32().
+ */
+u32 prandom32(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
 
@@ -55,14 +58,7 @@ static u32 __random32(struct rnd_state *state)
 
 	return (state->s1 ^ state->s2 ^ state->s3);
 }
-
-/*
- * Handle minimum values for seeds
- */
-static inline u32 __seed(u32 x, u32 m)
-{
-	return (x < m) ? x + m : x;
-}
+EXPORT_SYMBOL(prandom32);
 
 /**
  *	random32 - pseudo random number generator
@@ -75,7 +71,7 @@ u32 random32(void)
 {
 	unsigned long r;
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
-	r = __random32(state);
+	r = prandom32(state);
 	put_cpu_var(state);
 	return r;
 }
@@ -118,12 +114,12 @@ static int __init random32_init(void)
 		state->s3 = __seed(LCG(state->s2), 15);
 
 		/* "warm it up" */
-		__random32(state);
-		__random32(state);
-		__random32(state);
-		__random32(state);
-		__random32(state);
-		__random32(state);
+		prandom32(state);
+		prandom32(state);
+		prandom32(state);
+		prandom32(state);
+		prandom32(state);
+		prandom32(state);
 	}
 	return 0;
 }
@@ -147,7 +143,7 @@ static int __init random32_reseed(void)
 		state->s3 = __seed(seeds[2], 15);
 
 		/* mix it in */
-		__random32(state);
+		prandom32(state);
 	}
 	return 0;
 }
-- 
cgit v1.2.2


From 38388301b7b9d2921b58cfa1cd9b14c02d508c63 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 26 May 2010 14:44:18 -0700
Subject: swiotlb: remove unnecessary swiotlb_sync_single_range_*

swiotlb_sync_single_range_for_cpu and swiotlb_sync_single_range_for_device
are unnecessary because swiotlb_sync_single_for_cpu and
swiotlb_sync_single_for_device can be used instead.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/swiotlb.c | 31 -------------------------------
 1 file changed, 31 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 5fddf720da73..a009055140ec 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -756,37 +756,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
 }
 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
 
-/*
- * Same as above, but for a sub-range of the mapping.
- */
-static void
-swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
-			  unsigned long offset, size_t size,
-			  int dir, int target)
-{
-	swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
-}
-
-void
-swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-				  unsigned long offset, size_t size,
-				  enum dma_data_direction dir)
-{
-	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-				  SYNC_FOR_CPU);
-}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
-
-void
-swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
-				     unsigned long offset, size_t size,
-				     enum dma_data_direction dir)
-{
-	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
-				  SYNC_FOR_DEVICE);
-}
-EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
-
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
  * This is the scatter-gather version of the above swiotlb_map_page
-- 
cgit v1.2.2


From edcd1d843adf09d1742d49ae04fa51bb63ddd1c3 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Wed, 26 May 2010 14:44:27 -0700
Subject: radix-tree: fix radix_tree_prev_hole() underflow case

radix_tree_prev_hole() used LONG_MAX to detect underflow; however,
ULONG_MAX is clearly what was intended, both here and by its only user
(count_history_pages at mm/readahead.c).

Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 2a087e0f9863..05da38bcc298 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -656,7 +656,7 @@ EXPORT_SYMBOL(radix_tree_next_hole);
  *
  *	Returns: the index of the hole if found, otherwise returns an index
  *	outside of the set specified (in which case 'index - return >= max_scan'
- *	will be true). In rare cases of wrap-around, LONG_MAX will be returned.
+ *	will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
  *
  *	radix_tree_next_hole may be called under rcu_read_lock. However, like
  *	radix_tree_gang_lookup, this will not atomically search a snapshot of
@@ -674,7 +674,7 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
 		if (!radix_tree_lookup(root, index))
 			break;
 		index--;
-		if (index == LONG_MAX)
+		if (index == ULONG_MAX)
 			break;
 	}
 
-- 
cgit v1.2.2


From 35926ff5fba8245bd1c6ac04155048f6f89232b1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 30 May 2010 09:00:03 -0700
Subject: Revert "cpusets: randomize node rotor used in
 cpuset_mem_spread_node()"

This reverts commit 0ac0c0d0f837c499afd02a802f9cf52d3027fa3b, which
caused cross-architecture build problems for all the wrong reasons.
IA64 already added its own version of __node_random(), but the fact is,
there is nothing architectural about the function, and the original
commit was just badly done. Revert it, since no fix is forthcoming.

Requested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index d7137e7e06e8..ffb78c916ccd 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -672,7 +672,7 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
  *
  * The bit positions 0 through @bits are valid positions in @buf.
  */
-int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
+static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
 {
 	int pos = 0;
 
-- 
cgit v1.2.2


From c842128607a50a670df5f9c75261db2e21db3c45 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 21 May 2010 15:05:21 -0700
Subject: lib/kobject_uevent.c: fix CONIG_NET=n warning

lib/kobject_uevent.c:87: warning: 'kobj_bcast_filter' defined but not used

Repairs "hotplug: netns aware uevent_helper"

Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject_uevent.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib')

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 59c15511d58a..e2eb989d3223 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -83,6 +83,7 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_NET
 static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 {
 	struct kobject *kobj = data;
@@ -98,6 +99,7 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 
 	return 0;
 }
+#endif
 
 static int kobj_usermode_filter(struct kobject *kobj)
 {
-- 
cgit v1.2.2


From 743db2d903bc4e963a31496328d847d69f75047c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 25 May 2010 11:51:10 +0200
Subject: kobject: free memory if netlink_kernel_create() fails

There is a kfree(ue_sk) missing on the error path if
netlink_kernel_create() fails.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 lib/kobject_uevent.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib')

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index e2eb989d3223..b93579504dfa 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -380,6 +380,7 @@ static int uevent_net_init(struct net *net)
 	if (!ue_sk->sk) {
 		printk(KERN_ERR
 		       "kobject_uevent: unable to create netlink socket!\n");
+		kfree(ue_sk);
 		return -ENODEV;
 	}
 	mutex_lock(&uevent_sock_mutex);
-- 
cgit v1.2.2


From 007d08678eb87478b65b3f229960c81dd7c7b8f3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 4 Jun 2010 14:15:02 -0700
Subject: lib: add s390 to atomic64_dec_if_positive archs

Add s390 to list of architectures that have atomic64_dec_if_positive
implemented so we get rid of this warning:

lib/atomic64_test.c:129:2: warning: #warning Please implement
atomic64_dec_if_positive for your architecture, and add it to the IF above

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Luca Barbieri <luca@luca-barbieri.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/atomic64_test.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 9087d71537dd..250ed11d3ed2 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -113,7 +113,8 @@ static __init int test_atomic64(void)
 	r += one;
 	BUG_ON(v.counter != r);
 
-#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(_ASM_GENERIC_ATOMIC64_H)
+#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
+    defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H)
 	INIT(onestwos);
 	BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
 	r -= one;
-- 
cgit v1.2.2


From 94bfa3b6692c7a3f6f119596724204ec975d3ef0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 7 Jun 2010 17:09:45 -0700
Subject: idr: fix RCU lockdep splat in idr_get_next()

Convert to rcu_dereference_raw() given that many callers may have many
different locking models.

Located-by: Miles Lane <miles.lane@gmail.com>
Tested-by: Miles Lane <miles.lane@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 lib/idr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index c1a206901761..7f1a4f0acf50 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -602,7 +602,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 	/* find first ent */
 	n = idp->layers * IDR_BITS;
 	max = 1 << n;
-	p = rcu_dereference(idp->top);
+	p = rcu_dereference_raw(idp->top);
 	if (!p)
 		return NULL;
 
@@ -610,7 +610,7 @@ void *idr_get_next(struct idr *idp, int *nextidp)
 		while (n > 0 && p) {
 			n -= IDR_BITS;
 			*paa++ = p;
-			p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
+			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
 		}
 
 		if (p) {
-- 
cgit v1.2.2


From e621ba9932aa0a90e47c12c958b3a3104915f3b9 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@nokia.com>
Date: Tue, 29 Jun 2010 15:05:15 -0700
Subject: genalloc: fix allocation from end of pool

bitmap_find_next_zero_area requires the size of the bitmap, we instead
passed the last suitable position.  This made it impossible to allocate
from the end of the pool.

Fixes a regression introduced by 243797f59b748f679ab88d456fcc4f92236d724b
("genalloc: use bitmap_find_next_zero_area").

Signed-off-by: Imre Deak <imre.deak@nokia.com>
Cc: Zygo Blaxell <zygo.blaxell@xandros.com>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/genalloc.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib')

diff --git a/lib/genalloc.c b/lib/genalloc.c
index 736c3b06398e..1923f1490e72 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -128,7 +128,6 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 		chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
 
 		end_bit = (chunk->end_addr - chunk->start_addr) >> order;
-		end_bit -= nbits + 1;
 
 		spin_lock_irqsave(&chunk->lock, flags);
 		start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0,
-- 
cgit v1.2.2


From ff49d74ad383f54041378144ca1a229ee9aeaa59 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Sat, 3 Jul 2010 13:07:35 +1000
Subject: module: initialize module dynamic debug later

We should initialize the module dynamic debug datastructures
only after determining that the module is not loaded yet. This
fixes a bug that introduced in 2.6.35-rc2, where when a trying
to load a module twice, we also load it's dynamic printing data
twice which causes all sorts of nasty issues. Also handle
the dynamic debug cleanup later on failure.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (removed a #ifdef)
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/dynamic_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 3df8eb17a607..02afc2533728 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -692,7 +692,7 @@ static void ddebug_table_free(struct ddebug_table *dt)
  * Called in response to a module being unloaded.  Removes
  * any ddebug_table's which point at the module.
  */
-int ddebug_remove_module(char *mod_name)
+int ddebug_remove_module(const char *mod_name)
 {
 	struct ddebug_table *dt, *nextdt;
 	int ret = -ENOENT;
-- 
cgit v1.2.2


From b945d6b2554d550fe95caadc61e521c0ad71fb9c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 29 May 2010 15:31:43 +0200
Subject: rbtree: Undo augmented trees performance damage and regression

Reimplement augmented RB-trees without sprinkling extra branches
all over the RB-tree code (which lives in the scheduler hot path).

This approach is 'borrowed' from Fabio's BFQ implementation and
relies on traversing the rebalance path after the RB-tree-op to
correct the heap property for insertion/removal and make up for
the damage done by the tree rotations.

For insertion the rebalance path is trivially that from the new
node upwards to the root, for removal it is that from the deepest
node in the path from the to be removed node that will still
be around after the removal.

[ This patch also fixes a video driver regression reported by
  Ali Gholami Rudi - the memtype->subtree_max_end was updated
  incorrectly. ]

Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Ali Gholami Rudi <ali@rudi.ir>
Cc: Fabio Checconi <fabio@gandalf.sssup.it>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1275414172.27810.27961.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/rbtree.c | 116 ++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 72 insertions(+), 44 deletions(-)

(limited to 'lib')

diff --git a/lib/rbtree.c b/lib/rbtree.c
index 15e10b1afdd2..4693f79195d3 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -44,11 +44,6 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
 	else
 		root->rb_node = right;
 	rb_set_parent(node, right);
-
-	if (root->augment_cb) {
-		root->augment_cb(node);
-		root->augment_cb(right);
-	}
 }
 
 static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
@@ -72,20 +67,12 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
 	else
 		root->rb_node = left;
 	rb_set_parent(node, left);
-
-	if (root->augment_cb) {
-		root->augment_cb(node);
-		root->augment_cb(left);
-	}
 }
 
 void rb_insert_color(struct rb_node *node, struct rb_root *root)
 {
 	struct rb_node *parent, *gparent;
 
-	if (root->augment_cb)
-		root->augment_cb(node);
-
 	while ((parent = rb_parent(node)) && rb_is_red(parent))
 	{
 		gparent = rb_parent(parent);
@@ -240,15 +227,12 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 	else
 	{
 		struct rb_node *old = node, *left;
-		int old_parent_cb = 0;
-		int successor_parent_cb = 0;
 
 		node = node->rb_right;
 		while ((left = node->rb_left) != NULL)
 			node = left;
 
 		if (rb_parent(old)) {
-			old_parent_cb = 1;
 			if (rb_parent(old)->rb_left == old)
 				rb_parent(old)->rb_left = node;
 			else
@@ -263,10 +247,8 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 		if (parent == old) {
 			parent = node;
 		} else {
-			successor_parent_cb = 1;
 			if (child)
 				rb_set_parent(child, parent);
-
 			parent->rb_left = child;
 
 			node->rb_right = old->rb_right;
@@ -277,24 +259,6 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 		node->rb_left = old->rb_left;
 		rb_set_parent(old->rb_left, node);
 
-		if (root->augment_cb) {
-			/*
-			 * Here, three different nodes can have new children.
-			 * The parent of the successor node that was selected
-			 * to replace the node to be erased.
-			 * The node that is getting erased and is now replaced
-			 * by its successor.
-			 * The parent of the node getting erased-replaced.
-			 */
-			if (successor_parent_cb)
-				root->augment_cb(parent);
-
-			root->augment_cb(node);
-
-			if (old_parent_cb)
-				root->augment_cb(rb_parent(old));
-		}
-
 		goto color;
 	}
 
@@ -303,19 +267,15 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 
 	if (child)
 		rb_set_parent(child, parent);
-
-	if (parent) {
+	if (parent)
+	{
 		if (parent->rb_left == node)
 			parent->rb_left = child;
 		else
 			parent->rb_right = child;
-
-		if (root->augment_cb)
-			root->augment_cb(parent);
-
-	} else {
-		root->rb_node = child;
 	}
+	else
+		root->rb_node = child;
 
  color:
 	if (color == RB_BLACK)
@@ -323,6 +283,74 @@ void rb_erase(struct rb_node *node, struct rb_root *root)
 }
 EXPORT_SYMBOL(rb_erase);
 
+static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
+{
+	struct rb_node *parent;
+
+up:
+	func(node, data);
+	parent = rb_parent(node);
+	if (!parent)
+		return;
+
+	if (node == parent->rb_left && parent->rb_right)
+		func(parent->rb_right, data);
+	else if (parent->rb_left)
+		func(parent->rb_left, data);
+
+	node = parent;
+	goto up;
+}
+
+/*
+ * after inserting @node into the tree, update the tree to account for
+ * both the new entry and any damage done by rebalance
+ */
+void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
+{
+	if (node->rb_left)
+		node = node->rb_left;
+	else if (node->rb_right)
+		node = node->rb_right;
+
+	rb_augment_path(node, func, data);
+}
+
+/*
+ * before removing the node, find the deepest node on the rebalance path
+ * that will still be there after @node gets removed
+ */
+struct rb_node *rb_augment_erase_begin(struct rb_node *node)
+{
+	struct rb_node *deepest;
+
+	if (!node->rb_right && !node->rb_left)
+		deepest = rb_parent(node);
+	else if (!node->rb_right)
+		deepest = node->rb_left;
+	else if (!node->rb_left)
+		deepest = node->rb_right;
+	else {
+		deepest = rb_next(node);
+		if (deepest->rb_right)
+			deepest = deepest->rb_right;
+		else if (rb_parent(deepest) != node)
+			deepest = rb_parent(deepest);
+	}
+
+	return deepest;
+}
+
+/*
+ * after removal, update the tree to account for the removed entry
+ * and any rebalance damage.
+ */
+void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
+{
+	if (node)
+		rb_augment_path(node, func, data);
+}
+
 /*
  * This function returns the first node (in sort order) of the tree.
  */
-- 
cgit v1.2.2


From 95f72d1ed41a66f1c1c29c24d479de81a0bea36f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 12 Jul 2010 14:36:09 +1000
Subject: lmb: rename to memblock

via following scripts

      FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')

      sed -i \
        -e 's/lmb/memblock/g' \
        -e 's/LMB/MEMBLOCK/g' \
        $FILES

      for N in $(find . -name lmb.[ch]); do
        M=$(echo $N | sed 's/lmb/memblock/g')
        mv $N $M
      done

and remove some wrong change like lmbench and dlmb etc.

also move memblock.c from lib/ to mm/

Suggested-by: Ingo Molnar <mingo@elte.hu>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 lib/Kconfig  |   3 -
 lib/Makefile |   2 -
 lib/lmb.c    | 541 -----------------------------------------------------------
 3 files changed, 546 deletions(-)
 delete mode 100644 lib/lmb.c

(limited to 'lib')

diff --git a/lib/Kconfig b/lib/Kconfig
index 170d8ca901d8..5b916bc0fbae 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -181,9 +181,6 @@ config HAS_DMA
 config CHECK_SIGNATURE
 	bool
 
-config HAVE_LMB
-	boolean
-
 config CPUMASK_OFFSTACK
 	bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 3f1062cbbff4..0bfabba1bb32 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -89,8 +89,6 @@ obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
-obj-$(CONFIG_HAVE_LMB) += lmb.o
-
 obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
 
 obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
diff --git a/lib/lmb.c b/lib/lmb.c
deleted file mode 100644
index b1fc52606524..000000000000
--- a/lib/lmb.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * Procedures for maintaining information about logical memory blocks.
- *
- * Peter Bergner, IBM Corp.	June 2001.
- * Copyright (C) 2001 Peter Bergner.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <linux/lmb.h>
-
-#define LMB_ALLOC_ANYWHERE	0
-
-struct lmb lmb;
-
-static int lmb_debug;
-
-static int __init early_lmb(char *p)
-{
-	if (p && strstr(p, "debug"))
-		lmb_debug = 1;
-	return 0;
-}
-early_param("lmb", early_lmb);
-
-static void lmb_dump(struct lmb_region *region, char *name)
-{
-	unsigned long long base, size;
-	int i;
-
-	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt);
-
-	for (i = 0; i < region->cnt; i++) {
-		base = region->region[i].base;
-		size = region->region[i].size;
-
-		pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
-		    name, i, base, base + size - 1, size);
-	}
-}
-
-void lmb_dump_all(void)
-{
-	if (!lmb_debug)
-		return;
-
-	pr_info("LMB configuration:\n");
-	pr_info(" rmo_size    = 0x%llx\n", (unsigned long long)lmb.rmo_size);
-	pr_info(" memory.size = 0x%llx\n", (unsigned long long)lmb.memory.size);
-
-	lmb_dump(&lmb.memory, "memory");
-	lmb_dump(&lmb.reserved, "reserved");
-}
-
-static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
-					u64 size2)
-{
-	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
-}
-
-static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
-{
-	if (base2 == base1 + size1)
-		return 1;
-	else if (base1 == base2 + size2)
-		return -1;
-
-	return 0;
-}
-
-static long lmb_regions_adjacent(struct lmb_region *rgn,
-		unsigned long r1, unsigned long r2)
-{
-	u64 base1 = rgn->region[r1].base;
-	u64 size1 = rgn->region[r1].size;
-	u64 base2 = rgn->region[r2].base;
-	u64 size2 = rgn->region[r2].size;
-
-	return lmb_addrs_adjacent(base1, size1, base2, size2);
-}
-
-static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
-{
-	unsigned long i;
-
-	for (i = r; i < rgn->cnt - 1; i++) {
-		rgn->region[i].base = rgn->region[i + 1].base;
-		rgn->region[i].size = rgn->region[i + 1].size;
-	}
-	rgn->cnt--;
-}
-
-/* Assumption: base addr of region 1 < base addr of region 2 */
-static void lmb_coalesce_regions(struct lmb_region *rgn,
-		unsigned long r1, unsigned long r2)
-{
-	rgn->region[r1].size += rgn->region[r2].size;
-	lmb_remove_region(rgn, r2);
-}
-
-void __init lmb_init(void)
-{
-	/* Create a dummy zero size LMB which will get coalesced away later.
-	 * This simplifies the lmb_add() code below...
-	 */
-	lmb.memory.region[0].base = 0;
-	lmb.memory.region[0].size = 0;
-	lmb.memory.cnt = 1;
-
-	/* Ditto. */
-	lmb.reserved.region[0].base = 0;
-	lmb.reserved.region[0].size = 0;
-	lmb.reserved.cnt = 1;
-}
-
-void __init lmb_analyze(void)
-{
-	int i;
-
-	lmb.memory.size = 0;
-
-	for (i = 0; i < lmb.memory.cnt; i++)
-		lmb.memory.size += lmb.memory.region[i].size;
-}
-
-static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
-{
-	unsigned long coalesced = 0;
-	long adjacent, i;
-
-	if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
-		rgn->region[0].base = base;
-		rgn->region[0].size = size;
-		return 0;
-	}
-
-	/* First try and coalesce this LMB with another. */
-	for (i = 0; i < rgn->cnt; i++) {
-		u64 rgnbase = rgn->region[i].base;
-		u64 rgnsize = rgn->region[i].size;
-
-		if ((rgnbase == base) && (rgnsize == size))
-			/* Already have this region, so we're done */
-			return 0;
-
-		adjacent = lmb_addrs_adjacent(base, size, rgnbase, rgnsize);
-		if (adjacent > 0) {
-			rgn->region[i].base -= size;
-			rgn->region[i].size += size;
-			coalesced++;
-			break;
-		} else if (adjacent < 0) {
-			rgn->region[i].size += size;
-			coalesced++;
-			break;
-		}
-	}
-
-	if ((i < rgn->cnt - 1) && lmb_regions_adjacent(rgn, i, i+1)) {
-		lmb_coalesce_regions(rgn, i, i+1);
-		coalesced++;
-	}
-
-	if (coalesced)
-		return coalesced;
-	if (rgn->cnt >= MAX_LMB_REGIONS)
-		return -1;
-
-	/* Couldn't coalesce the LMB, so add it to the sorted table. */
-	for (i = rgn->cnt - 1; i >= 0; i--) {
-		if (base < rgn->region[i].base) {
-			rgn->region[i+1].base = rgn->region[i].base;
-			rgn->region[i+1].size = rgn->region[i].size;
-		} else {
-			rgn->region[i+1].base = base;
-			rgn->region[i+1].size = size;
-			break;
-		}
-	}
-
-	if (base < rgn->region[0].base) {
-		rgn->region[0].base = base;
-		rgn->region[0].size = size;
-	}
-	rgn->cnt++;
-
-	return 0;
-}
-
-long lmb_add(u64 base, u64 size)
-{
-	struct lmb_region *_rgn = &lmb.memory;
-
-	/* On pSeries LPAR systems, the first LMB is our RMO region. */
-	if (base == 0)
-		lmb.rmo_size = size;
-
-	return lmb_add_region(_rgn, base, size);
-
-}
-
-static long __lmb_remove(struct lmb_region *rgn, u64 base, u64 size)
-{
-	u64 rgnbegin, rgnend;
-	u64 end = base + size;
-	int i;
-
-	rgnbegin = rgnend = 0; /* supress gcc warnings */
-
-	/* Find the region where (base, size) belongs to */
-	for (i=0; i < rgn->cnt; i++) {
-		rgnbegin = rgn->region[i].base;
-		rgnend = rgnbegin + rgn->region[i].size;
-
-		if ((rgnbegin <= base) && (end <= rgnend))
-			break;
-	}
-
-	/* Didn't find the region */
-	if (i == rgn->cnt)
-		return -1;
-
-	/* Check to see if we are removing entire region */
-	if ((rgnbegin == base) && (rgnend == end)) {
-		lmb_remove_region(rgn, i);
-		return 0;
-	}
-
-	/* Check to see if region is matching at the front */
-	if (rgnbegin == base) {
-		rgn->region[i].base = end;
-		rgn->region[i].size -= size;
-		return 0;
-	}
-
-	/* Check to see if the region is matching at the end */
-	if (rgnend == end) {
-		rgn->region[i].size -= size;
-		return 0;
-	}
-
-	/*
-	 * We need to split the entry -  adjust the current one to the
-	 * beginging of the hole and add the region after hole.
-	 */
-	rgn->region[i].size = base - rgn->region[i].base;
-	return lmb_add_region(rgn, end, rgnend - end);
-}
-
-long lmb_remove(u64 base, u64 size)
-{
-	return __lmb_remove(&lmb.memory, base, size);
-}
-
-long __init lmb_free(u64 base, u64 size)
-{
-	return __lmb_remove(&lmb.reserved, base, size);
-}
-
-long __init lmb_reserve(u64 base, u64 size)
-{
-	struct lmb_region *_rgn = &lmb.reserved;
-
-	BUG_ON(0 == size);
-
-	return lmb_add_region(_rgn, base, size);
-}
-
-long lmb_overlaps_region(struct lmb_region *rgn, u64 base, u64 size)
-{
-	unsigned long i;
-
-	for (i = 0; i < rgn->cnt; i++) {
-		u64 rgnbase = rgn->region[i].base;
-		u64 rgnsize = rgn->region[i].size;
-		if (lmb_addrs_overlap(base, size, rgnbase, rgnsize))
-			break;
-	}
-
-	return (i < rgn->cnt) ? i : -1;
-}
-
-static u64 lmb_align_down(u64 addr, u64 size)
-{
-	return addr & ~(size - 1);
-}
-
-static u64 lmb_align_up(u64 addr, u64 size)
-{
-	return (addr + (size - 1)) & ~(size - 1);
-}
-
-static u64 __init lmb_alloc_nid_unreserved(u64 start, u64 end,
-					   u64 size, u64 align)
-{
-	u64 base, res_base;
-	long j;
-
-	base = lmb_align_down((end - size), align);
-	while (start <= base) {
-		j = lmb_overlaps_region(&lmb.reserved, base, size);
-		if (j < 0) {
-			/* this area isn't reserved, take it */
-			if (lmb_add_region(&lmb.reserved, base, size) < 0)
-				base = ~(u64)0;
-			return base;
-		}
-		res_base = lmb.reserved.region[j].base;
-		if (res_base < size)
-			break;
-		base = lmb_align_down(res_base - size, align);
-	}
-
-	return ~(u64)0;
-}
-
-static u64 __init lmb_alloc_nid_region(struct lmb_property *mp,
-				       u64 (*nid_range)(u64, u64, int *),
-				       u64 size, u64 align, int nid)
-{
-	u64 start, end;
-
-	start = mp->base;
-	end = start + mp->size;
-
-	start = lmb_align_up(start, align);
-	while (start < end) {
-		u64 this_end;
-		int this_nid;
-
-		this_end = nid_range(start, end, &this_nid);
-		if (this_nid == nid) {
-			u64 ret = lmb_alloc_nid_unreserved(start, this_end,
-							   size, align);
-			if (ret != ~(u64)0)
-				return ret;
-		}
-		start = this_end;
-	}
-
-	return ~(u64)0;
-}
-
-u64 __init lmb_alloc_nid(u64 size, u64 align, int nid,
-			 u64 (*nid_range)(u64 start, u64 end, int *nid))
-{
-	struct lmb_region *mem = &lmb.memory;
-	int i;
-
-	BUG_ON(0 == size);
-
-	size = lmb_align_up(size, align);
-
-	for (i = 0; i < mem->cnt; i++) {
-		u64 ret = lmb_alloc_nid_region(&mem->region[i],
-					       nid_range,
-					       size, align, nid);
-		if (ret != ~(u64)0)
-			return ret;
-	}
-
-	return lmb_alloc(size, align);
-}
-
-u64 __init lmb_alloc(u64 size, u64 align)
-{
-	return lmb_alloc_base(size, align, LMB_ALLOC_ANYWHERE);
-}
-
-u64 __init lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
-	u64 alloc;
-
-	alloc = __lmb_alloc_base(size, align, max_addr);
-
-	if (alloc == 0)
-		panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
-		      (unsigned long long) size, (unsigned long long) max_addr);
-
-	return alloc;
-}
-
-u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr)
-{
-	long i, j;
-	u64 base = 0;
-	u64 res_base;
-
-	BUG_ON(0 == size);
-
-	size = lmb_align_up(size, align);
-
-	/* On some platforms, make sure we allocate lowmem */
-	/* Note that LMB_REAL_LIMIT may be LMB_ALLOC_ANYWHERE */
-	if (max_addr == LMB_ALLOC_ANYWHERE)
-		max_addr = LMB_REAL_LIMIT;
-
-	for (i = lmb.memory.cnt - 1; i >= 0; i--) {
-		u64 lmbbase = lmb.memory.region[i].base;
-		u64 lmbsize = lmb.memory.region[i].size;
-
-		if (lmbsize < size)
-			continue;
-		if (max_addr == LMB_ALLOC_ANYWHERE)
-			base = lmb_align_down(lmbbase + lmbsize - size, align);
-		else if (lmbbase < max_addr) {
-			base = min(lmbbase + lmbsize, max_addr);
-			base = lmb_align_down(base - size, align);
-		} else
-			continue;
-
-		while (base && lmbbase <= base) {
-			j = lmb_overlaps_region(&lmb.reserved, base, size);
-			if (j < 0) {
-				/* this area isn't reserved, take it */
-				if (lmb_add_region(&lmb.reserved, base, size) < 0)
-					return 0;
-				return base;
-			}
-			res_base = lmb.reserved.region[j].base;
-			if (res_base < size)
-				break;
-			base = lmb_align_down(res_base - size, align);
-		}
-	}
-	return 0;
-}
-
-/* You must call lmb_analyze() before this. */
-u64 __init lmb_phys_mem_size(void)
-{
-	return lmb.memory.size;
-}
-
-u64 lmb_end_of_DRAM(void)
-{
-	int idx = lmb.memory.cnt - 1;
-
-	return (lmb.memory.region[idx].base + lmb.memory.region[idx].size);
-}
-
-/* You must call lmb_analyze() after this. */
-void __init lmb_enforce_memory_limit(u64 memory_limit)
-{
-	unsigned long i;
-	u64 limit;
-	struct lmb_property *p;
-
-	if (!memory_limit)
-		return;
-
-	/* Truncate the lmb regions to satisfy the memory limit. */
-	limit = memory_limit;
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		if (limit > lmb.memory.region[i].size) {
-			limit -= lmb.memory.region[i].size;
-			continue;
-		}
-
-		lmb.memory.region[i].size = limit;
-		lmb.memory.cnt = i + 1;
-		break;
-	}
-
-	if (lmb.memory.region[0].size < lmb.rmo_size)
-		lmb.rmo_size = lmb.memory.region[0].size;
-
-	memory_limit = lmb_end_of_DRAM();
-
-	/* And truncate any reserves above the limit also. */
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		p = &lmb.reserved.region[i];
-
-		if (p->base > memory_limit)
-			p->size = 0;
-		else if ((p->base + p->size) > memory_limit)
-			p->size = memory_limit - p->base;
-
-		if (p->size == 0) {
-			lmb_remove_region(&lmb.reserved, i);
-			i--;
-		}
-	}
-}
-
-int __init lmb_is_reserved(u64 addr)
-{
-	int i;
-
-	for (i = 0; i < lmb.reserved.cnt; i++) {
-		u64 upper = lmb.reserved.region[i].base +
-			lmb.reserved.region[i].size - 1;
-		if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
-			return 1;
-	}
-	return 0;
-}
-
-int lmb_is_region_reserved(u64 base, u64 size)
-{
-	return lmb_overlaps_region(&lmb.reserved, base, size);
-}
-
-/*
- * Given a <base, len>, find which memory regions belong to this range.
- * Adjust the request and return a contiguous chunk.
- */
-int lmb_find(struct lmb_property *res)
-{
-	int i;
-	u64 rstart, rend;
-
-	rstart = res->base;
-	rend = rstart + res->size - 1;
-
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		u64 start = lmb.memory.region[i].base;
-		u64 end = start + lmb.memory.region[i].size - 1;
-
-		if (start > rend)
-			return -1;
-
-		if ((end >= rstart) && (start < rend)) {
-			/* adjust the request */
-			if (rstart < start)
-				rstart = start;
-			if (rend > end)
-				rend = end;
-			res->base = rstart;
-			res->size = rend - rstart + 1;
-			return 0;
-		}
-	}
-	return -1;
-}
-- 
cgit v1.2.2