From 5e1f8c9e20a92743eefc9a82c2db835213905e26 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 28 Oct 2008 13:21:55 -0400
Subject: ext3: Add support for non-native signed/unsigned htree hash
 algorithms

The original ext3 hash algorithms assumed that variables of type char
were signed, as God and K&R intended.  Unfortunately, this assumption
is not true on some architectures.  Userspace support for marking
filesystems with non-native signed/unsigned chars was added two years
ago, but the kernel-side support was never added (until now).

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/ext3_fs.h    | 28 +++++++++++++++++++++++++++-
 include/linux/ext3_fs_sb.h |  1 +
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index d14f02918483..9004794a35fe 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -353,6 +353,13 @@ struct ext3_inode {
 #define	EXT3_ERROR_FS			0x0002	/* Errors detected */
 #define	EXT3_ORPHAN_FS			0x0004	/* Orphans being recovered */
 
+/*
+ * Misc. filesystem flags
+ */
+#define EXT2_FLAGS_SIGNED_HASH		0x0001  /* Signed dirhash in use */
+#define EXT2_FLAGS_UNSIGNED_HASH	0x0002  /* Unsigned dirhash in use */
+#define EXT2_FLAGS_TEST_FILESYS		0x0004	/* to test development code */
+
 /*
  * Mount flags
  */
@@ -489,7 +496,23 @@ struct ext3_super_block {
 	__u16	s_reserved_word_pad;
 	__le32	s_default_mount_opts;
 	__le32	s_first_meta_bg;	/* First metablock block group */
-	__u32	s_reserved[190];	/* Padding to the end of the block */
+	__le32	s_mkfs_time;		/* When the filesystem was created */
+	__le32	s_jnl_blocks[17];	/* Backup of the journal inode */
+	/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+/*150*/	__le32	s_blocks_count_hi;	/* Blocks count */
+	__le32	s_r_blocks_count_hi;	/* Reserved blocks count */
+	__le32	s_free_blocks_count_hi;	/* Free blocks count */
+	__le16	s_min_extra_isize;	/* All inodes have at least # bytes */
+	__le16	s_want_extra_isize; 	/* New inodes should reserve # bytes */
+	__le32	s_flags;		/* Miscellaneous flags */
+	__le16  s_raid_stride;		/* RAID stride */
+	__le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
+	__le64  s_mmp_block;            /* Block for multi-mount protection */
+	__le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+	__u8	s_log_groups_per_flex;  /* FLEX_BG group size */
+	__u8	s_reserved_char_pad2;
+	__le16  s_reserved_pad;
+	__u32   s_reserved[162];        /* Padding to the end of the block */
 };
 
 #ifdef __KERNEL__
@@ -694,6 +717,9 @@ static inline __le16 ext3_rec_len_to_disk(unsigned len)
 #define DX_HASH_LEGACY		0
 #define DX_HASH_HALF_MD4	1
 #define DX_HASH_TEA		2
+#define DX_HASH_LEGACY_UNSIGNED	3
+#define DX_HASH_HALF_MD4_UNSIGNED	4
+#define DX_HASH_TEA_UNSIGNED		5
 
 #ifdef __KERNEL__
 
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index e024e38248ff..a4e9216b3a6d 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -57,6 +57,7 @@ struct ext3_sb_info {
 	u32 s_next_generation;
 	u32 s_hash_seed[4];
 	int s_def_hash_version;
+	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
 	struct percpu_counter s_freeblocks_counter;
 	struct percpu_counter s_freeinodes_counter;
 	struct percpu_counter s_dirs_counter;
-- 
cgit v1.2.2


From a20c7ab570ffdce1d6f67c7acf8c1c502a3b3839 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Thu, 16 Oct 2008 18:43:48 +0400
Subject: [MTD] sharpsl-nand: use platform_data for model-specific values

Add platform_data which holds all model-specific values, like badblocks
pattern, oobinfo, partitions.

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
---
 include/linux/mtd/sharpsl.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/linux/mtd/sharpsl.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h
new file mode 100644
index 000000000000..25f4d2a845c1
--- /dev/null
+++ b/include/linux/mtd/sharpsl.h
@@ -0,0 +1,20 @@
+/*
+ * SharpSL NAND support
+ *
+ * Copyright (C) 2008 Dmitry Baryshkov
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mtd/nand.h>
+#include <linux/mtd/nand_ecc.h>
+#include <linux/mtd/partitions.h>
+
+struct sharpsl_nand_platform_data {
+	struct nand_bbt_descr	*badblock_pattern;
+	struct nand_ecclayout	*ecc_layout;
+	struct mtd_partition	*partitions;
+	unsigned int		nr_partitions;
+};
-- 
cgit v1.2.2


From 171bbfbeab7730031eec8025341401fabe540bd5 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Tue, 25 Nov 2008 17:42:31 -0500
Subject: jbd2: Add BH_JBDPrivateStart

Add this so that file systems using JBD2 can safely allocate unused b_state
bits.

In this case, we add it so that Ocfs2 can define a single bit for tracking
the validation state of a buffer.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index c7d106ef22e2..f36645745489 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -329,6 +329,7 @@ enum jbd_state_bits {
 	BH_State,		/* Pins most journal_head state */
 	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
 	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
+	BH_JBDPrivateStart,	/* First bit available for private use by FS */
 };
 
 BUFFER_FNS(JBD, jbd)
-- 
cgit v1.2.2


From e07f7183a486cf9783d1f8c9d2997b5b39eeb2d4 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@redhat.com>
Date: Wed, 26 Nov 2008 01:14:26 -0500
Subject: jbd2: improve jbd2 fsync batching

This patch removes the static sleep time in favor of a more self
optimizing approach where we measure the average amount of time it
takes to commit a transaction to disk and the ammount of time a
transaction has been running.  If somebody does a sync write or an
fsync() traditionally we would sleep for 1 jiffies, which depending on
the value of HZ could be a significant amount of time compared to how
long it takes to commit a transaction to the underlying storage.  With
this patch instead of sleeping for a jiffie, we check to see if the
amount of time this transaction has been running is less than the
average commit time, and if it is we sleep for the delta using
schedule_hrtimeout to give us a higher precision sleep time.  This
greatly benefits high end storage where you could end up sleeping for
longer than it takes to commit the transaction and therefore sitting
idle instead of allowing the transaction to be committed by keeping
the sleep time to a minimum so you are sure to always be doing
something.

Signed-off-by: Josef Bacik <jbacik@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f36645745489..ab8cef130c28 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -637,6 +637,11 @@ struct transaction_s
 	 */
 	unsigned long		t_expires;
 
+	/*
+	 * When this transaction started, in nanoseconds [no locking]
+	 */
+	ktime_t			t_start_time;
+
 	/*
 	 * How many handles used this transaction? [t_handle_lock]
 	 */
@@ -939,8 +944,18 @@ struct journal_s
 	struct buffer_head	**j_wbuf;
 	int			j_wbufsize;
 
+	/*
+	 * this is the pid of hte last person to run a synchronous operation
+	 * through the journal
+	 */
 	pid_t			j_last_sync_writer;
 
+	/*
+	 * the average amount of time in nanoseconds it takes to commit a
+	 * transaction to disk. [j_state_lock]
+	 */
+	u64			j_average_commit_time;
+
 	/* This function is called when a transaction is closed */
 	void			(*j_commit_callback)(journal_t *,
 						     transaction_t *);
-- 
cgit v1.2.2


From 30773840c19cea60dcef39545960d541b1ac1cf8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 3 Jan 2009 20:27:38 -0500
Subject: ext4: add fsync batch tuning knobs

Add new mount options, min_batch_time and max_batch_time, which
controls how long the jbd2 layer should wait for additional filesystem
operations to get batched with a synchronous write transaction.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index ab8cef130c28..a3cd647ea1bc 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -956,6 +956,14 @@ struct journal_s
 	 */
 	u64			j_average_commit_time;
 
+	/*
+	 * minimum and maximum times that we should wait for
+	 * additional filesystem operations to get batched into a
+	 * synchronous handle in microseconds
+	 */
+	u32			j_min_batch_time;
+	u32			j_max_batch_time;
+
 	/* This function is called when a transaction is closed */
 	void			(*j_commit_callback)(journal_t *,
 						     transaction_t *);
-- 
cgit v1.2.2


From 1a0d3786dd57dbd74f340322054c3d618b999dcf Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 5 Nov 2008 00:09:22 -0500
Subject: jbd2: Remove a large array of bh's from the stack of the checkpoint
 routine

jbd2_log_do_checkpoint()n is one of the kernel's largest stack users.
Move the array of buffer head's from the stack of jbd2_log_do_checkpoint()
to the in-core journal structure.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a3cd647ea1bc..004c9a8d63ed 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -687,6 +687,8 @@ jbd2_time_diff(unsigned long start, unsigned long end)
 	return end + (MAX_JIFFY_OFFSET - start);
 }
 
+#define JBD2_NR_BATCH	64
+
 /**
  * struct journal_s - The journal_s type is the concrete type associated with
  *     journal_t.
@@ -830,6 +832,14 @@ struct journal_s
 	/* Semaphore for locking against concurrent checkpoints */
 	struct mutex		j_checkpoint_mutex;
 
+	/*
+	 * List of buffer heads used by the checkpoint routine.  This
+	 * was moved from jbd2_log_do_checkpoint() to reduce stack
+	 * usage.  Access to this array is controlled by the
+	 * j_checkpoint_mutex.  [j_checkpoint_mutex]
+	 */
+	struct buffer_head	*j_chkpt_bhs[JBD2_NR_BATCH];
+	
 	/*
 	 * Journal head: identifies the first unused block in the journal.
 	 * [j_state_lock]
-- 
cgit v1.2.2


From fb68407b0d9efba962c03f55009c797e22f024bc Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 6 Nov 2008 17:50:21 -0500
Subject: jbd2: Call journal commit callback without holding j_list_lock

Avoid freeing the transaction in __jbd2_journal_drop_transaction() so
the journal commit callback can run without holding j_list_lock, to
avoid lock contention on this spinlock.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 004c9a8d63ed..9d82084a1605 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1179,8 +1179,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
 int jbd2_log_do_checkpoint(journal_t *journal);
 
 void __jbd2_log_wait_for_space(journal_t *journal);
-extern void	__jbd2_journal_drop_transaction(journal_t *, transaction_t *);
-extern int	jbd2_cleanup_journal_tail(journal_t *);
+extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
+extern int jbd2_cleanup_journal_tail(journal_t *);
 
 /* Debugging code only: */
 
-- 
cgit v1.2.2


From 69423d99fc182a81f3c5db3eb5c140acc6fc64be Mon Sep 17 00:00:00 2001
From: Adrian Hunter <ext-adrian.hunter@nokia.com>
Date: Wed, 10 Dec 2008 13:37:21 +0000
Subject: [MTD] update internal API to support 64-bit device size

MTD internal API presently uses 32-bit values to represent
device size.  This patch updates them to 64-bits but leaves
the external API unchanged.  Extending the external API
is a separate issue for several reasons.  First, no one
needs it at the moment.  Secondly, whether the implementation
is done with IOCTLs, sysfs or both is still debated.  Thirdly
external API changes require the internal API to be accepted
first.

Note that although the MTD API will be able to support 64-bit
device sizes, existing drivers do not and are not required
to do so, although NAND base has been updated.

In general, changing from 32-bit to 64-bit values cause little
or no changes to the majority of the code with the following
exceptions:
    	- printk message formats
    	- division and modulus of 64-bit values
    	- NAND base support
	- 32-bit local variables used by mtdpart and mtdconcat
	- naughtily assuming one structure maps to another
	in MEMERASE ioctl

Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h        | 57 ++++++++++++++++++++++++++++++++++++------
 include/linux/mtd/nand.h       |  2 +-
 include/linux/mtd/partitions.h |  4 +--
 3 files changed, 52 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index eae26bb6430a..95e585ecc297 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -15,6 +15,8 @@
 #include <linux/mtd/compatmac.h>
 #include <mtd/mtd-abi.h>
 
+#include <asm/div64.h>
+
 #define MTD_CHAR_MAJOR 90
 #define MTD_BLOCK_MAJOR 31
 #define MAX_MTD_DEVICES 32
@@ -25,16 +27,16 @@
 #define MTD_ERASE_DONE          0x08
 #define MTD_ERASE_FAILED        0x10
 
-#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff
+#define MTD_FAIL_ADDR_UNKNOWN -1LL
 
 /* If the erase fails, fail_addr might indicate exactly which block failed.  If
    fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not
    specific to any particular block. */
 struct erase_info {
 	struct mtd_info *mtd;
-	u_int32_t addr;
-	u_int32_t len;
-	u_int32_t fail_addr;
+	uint64_t addr;
+	uint64_t len;
+	uint64_t fail_addr;
 	u_long time;
 	u_long retries;
 	u_int dev;
@@ -46,7 +48,7 @@ struct erase_info {
 };
 
 struct mtd_erase_region_info {
-	u_int32_t offset;			/* At which this region starts, from the beginning of the MTD */
+	uint64_t offset;			/* At which this region starts, from the beginning of the MTD */
 	u_int32_t erasesize;		/* For this region */
 	u_int32_t numblocks;		/* Number of blocks of erasesize in this region */
 	unsigned long *lockmap;		/* If keeping bitmap of locks */
@@ -101,7 +103,7 @@ struct mtd_oob_ops {
 struct mtd_info {
 	u_char type;
 	u_int32_t flags;
-	u_int32_t size;	 // Total size of the MTD
+	uint64_t size;	 // Total size of the MTD
 
 	/* "Major" erase size for the device. Naïve users may take this
 	 * to be the only erase size available, or may use the more detailed
@@ -120,6 +122,16 @@ struct mtd_info {
 	u_int32_t oobsize;   // Amount of OOB data per block (e.g. 16)
 	u_int32_t oobavail;  // Available OOB bytes per block
 
+	/*
+	 * If erasesize is a power of 2 then the shift is stored in
+	 * erasesize_shift otherwise erasesize_shift is zero. Ditto writesize.
+	 */
+	unsigned int erasesize_shift;
+	unsigned int writesize_shift;
+	/* Masks based on erasesize_shift and writesize_shift */
+	unsigned int erasesize_mask;
+	unsigned int writesize_mask;
+
 	// Kernel-only stuff starts here.
 	const char *name;
 	int index;
@@ -190,8 +202,8 @@ struct mtd_info {
 	void (*sync) (struct mtd_info *mtd);
 
 	/* Chip-supported device locking */
-	int (*lock) (struct mtd_info *mtd, loff_t ofs, size_t len);
-	int (*unlock) (struct mtd_info *mtd, loff_t ofs, size_t len);
+	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Power Management functions */
 	int (*suspend) (struct mtd_info *mtd);
@@ -221,6 +233,35 @@ struct mtd_info {
 	void (*put_device) (struct mtd_info *mtd);
 };
 
+static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
+{
+	if (mtd->erasesize_shift)
+		return sz >> mtd->erasesize_shift;
+	do_div(sz, mtd->erasesize);
+	return sz;
+}
+
+static inline u_int32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd)
+{
+	if (mtd->erasesize_shift)
+		return sz & mtd->erasesize_mask;
+	return do_div(sz, mtd->erasesize);
+}
+
+static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd)
+{
+	if (mtd->writesize_shift)
+		return sz >> mtd->writesize_shift;
+	do_div(sz, mtd->writesize);
+	return sz;
+}
+
+static inline u_int32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd)
+{
+	if (mtd->writesize_shift)
+		return sz & mtd->writesize_mask;
+	return do_div(sz, mtd->writesize);
+}
 
 	/* Kernel-side ioctl definitions */
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 733d3f3b4eb8..c0677b8082be 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -399,7 +399,7 @@ struct nand_chip {
 	int		bbt_erase_shift;
 	int		chip_shift;
 	int		numchips;
-	unsigned long	chipsize;
+	uint64_t	chipsize;
 	int		pagemask;
 	int		pagebuf;
 	int		subpagesize;
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index c92b4d439609..164c7d78687d 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -36,8 +36,8 @@
 
 struct mtd_partition {
 	char *name;			/* identifier string */
-	u_int32_t size;			/* partition size */
-	u_int32_t offset;		/* offset within the master MTD space */
+	uint64_t size;			/* partition size */
+	uint64_t offset;		/* offset within the master MTD space */
 	u_int32_t mask_flags;		/* master MTD flags to mask out for this partition */
 	struct nand_ecclayout *ecclayout;	/* out of band layout for this partition (NAND only)*/
 	struct mtd_info **mtdp;		/* pointer to store the MTD object */
-- 
cgit v1.2.2


From 3854be7712f7b4bdcaed14664fc7c7124b3fef0d Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 10 Dec 2008 14:06:42 +0000
Subject: [MTD] Remove strange u_int32_t types from FTL

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/ftl.h | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/ftl.h b/include/linux/mtd/ftl.h
index 0be442f881dd..0555f7a0b9ed 100644
--- a/include/linux/mtd/ftl.h
+++ b/include/linux/mtd/ftl.h
@@ -32,25 +32,25 @@
 #define _LINUX_FTL_H
 
 typedef struct erase_unit_header_t {
-    u_int8_t	LinkTargetTuple[5];
-    u_int8_t	DataOrgTuple[10];
-    u_int8_t	NumTransferUnits;
-    u_int32_t	EraseCount;
-    u_int16_t	LogicalEUN;
-    u_int8_t	BlockSize;
-    u_int8_t	EraseUnitSize;
-    u_int16_t	FirstPhysicalEUN;
-    u_int16_t	NumEraseUnits;
-    u_int32_t	FormattedSize;
-    u_int32_t	FirstVMAddress;
-    u_int16_t	NumVMPages;
-    u_int8_t	Flags;
-    u_int8_t	Code;
-    u_int32_t	SerialNumber;
-    u_int32_t	AltEUHOffset;
-    u_int32_t	BAMOffset;
-    u_int8_t	Reserved[12];
-    u_int8_t	EndTuple[2];
+    uint8_t	LinkTargetTuple[5];
+    uint8_t	DataOrgTuple[10];
+    uint8_t	NumTransferUnits;
+    uint32_t	EraseCount;
+    uint16_t	LogicalEUN;
+    uint8_t	BlockSize;
+    uint8_t	EraseUnitSize;
+    uint16_t	FirstPhysicalEUN;
+    uint16_t	NumEraseUnits;
+    uint32_t	FormattedSize;
+    uint32_t	FirstVMAddress;
+    uint16_t	NumVMPages;
+    uint8_t	Flags;
+    uint8_t	Code;
+    uint32_t	SerialNumber;
+    uint32_t	AltEUHOffset;
+    uint32_t	BAMOffset;
+    uint8_t	Reserved[12];
+    uint8_t	EndTuple[2];
 } erase_unit_header_t;
 
 /* Flags in erase_unit_header_t */
-- 
cgit v1.2.2


From 26cdb67c74aedc22367e6d0271f7f955220cca65 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 10 Dec 2008 14:08:12 +0000
Subject: [MTD] Remove more strange u_intxx_t types

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h        | 26 +++++++++++++-------------
 include/linux/mtd/partitions.h |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 95e585ecc297..adef674855f3 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -39,8 +39,8 @@ struct erase_info {
 	uint64_t fail_addr;
 	u_long time;
 	u_long retries;
-	u_int dev;
-	u_int cell;
+	unsigned dev;
+	unsigned cell;
 	void (*callback) (struct erase_info *self);
 	u_long priv;
 	u_char state;
@@ -49,8 +49,8 @@ struct erase_info {
 
 struct mtd_erase_region_info {
 	uint64_t offset;			/* At which this region starts, from the beginning of the MTD */
-	u_int32_t erasesize;		/* For this region */
-	u_int32_t numblocks;		/* Number of blocks of erasesize in this region */
+	uint32_t erasesize;		/* For this region */
+	uint32_t numblocks;		/* Number of blocks of erasesize in this region */
 	unsigned long *lockmap;		/* If keeping bitmap of locks */
 };
 
@@ -102,14 +102,14 @@ struct mtd_oob_ops {
 
 struct mtd_info {
 	u_char type;
-	u_int32_t flags;
+	uint32_t flags;
 	uint64_t size;	 // Total size of the MTD
 
 	/* "Major" erase size for the device. Naïve users may take this
 	 * to be the only erase size available, or may use the more detailed
 	 * information below if they desire
 	 */
-	u_int32_t erasesize;
+	uint32_t erasesize;
 	/* Minimal writable flash unit size. In case of NOR flash it is 1 (even
 	 * though individual bits can be cleared), in case of NAND flash it is
 	 * one NAND page (or half, or one-fourths of it), in case of ECC-ed NOR
@@ -117,10 +117,10 @@ struct mtd_info {
 	 * Any driver registering a struct mtd_info must ensure a writesize of
 	 * 1 or larger.
 	 */
-	u_int32_t writesize;
+	uint32_t writesize;
 
-	u_int32_t oobsize;   // Amount of OOB data per block (e.g. 16)
-	u_int32_t oobavail;  // Available OOB bytes per block
+	uint32_t oobsize;   // Amount of OOB data per block (e.g. 16)
+	uint32_t oobavail;  // Available OOB bytes per block
 
 	/*
 	 * If erasesize is a power of 2 then the shift is stored in
@@ -233,7 +233,7 @@ struct mtd_info {
 	void (*put_device) (struct mtd_info *mtd);
 };
 
-static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
+static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
 {
 	if (mtd->erasesize_shift)
 		return sz >> mtd->erasesize_shift;
@@ -241,14 +241,14 @@ static inline u_int32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
 	return sz;
 }
 
-static inline u_int32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd)
+static inline uint32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd)
 {
 	if (mtd->erasesize_shift)
 		return sz & mtd->erasesize_mask;
 	return do_div(sz, mtd->erasesize);
 }
 
-static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd)
+static inline uint32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd)
 {
 	if (mtd->writesize_shift)
 		return sz >> mtd->writesize_shift;
@@ -256,7 +256,7 @@ static inline u_int32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd)
 	return sz;
 }
 
-static inline u_int32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd)
+static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd)
 {
 	if (mtd->writesize_shift)
 		return sz & mtd->writesize_mask;
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index 164c7d78687d..a45dd831b3f8 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -38,7 +38,7 @@ struct mtd_partition {
 	char *name;			/* identifier string */
 	uint64_t size;			/* partition size */
 	uint64_t offset;		/* offset within the master MTD space */
-	u_int32_t mask_flags;		/* master MTD flags to mask out for this partition */
+	uint32_t mask_flags;		/* master MTD flags to mask out for this partition */
 	struct nand_ecclayout *ecclayout;	/* out of band layout for this partition (NAND only)*/
 	struct mtd_info **mtdp;		/* pointer to store the MTD object */
 };
-- 
cgit v1.2.2


From d3af0f048c114dd53713d5920c54f6d5b6b12139 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 1 Dec 2008 14:23:38 -0800
Subject: [MTD] [NAND] remove excess kernel-doc notation

Delete extra kernel-doc notation for struct fields and function
parameters that don't exist:

Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'wq' description in 'nand_chip'
Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'datbuf' description in 'nand_chip'
Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'oobbuf' description in 'nand_chip'
Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'oobdirty' description in 'nand_chip'
Warning(include/linux/mtd/nand.h:428): Excess struct/union/enum/typedef member 'data_poi' description in 'nand_chip'
Warning(drivers/mtd/nand/nand_base.c:2527): Excess function parameter 'maxchips' description in 'nand_scan_tail'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/nand.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c0677b8082be..db5b63da2a7e 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -335,17 +335,12 @@ struct nand_buffers {
  * @erase_cmd:		[INTERN] erase command write function, selectable due to AND support
  * @scan_bbt:		[REPLACEABLE] function to scan bad block table
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR)
- * @wq:			[INTERN] wait queue to sleep on if a NAND operation is in progress
  * @state:		[INTERN] the current state of the NAND device
  * @oob_poi:		poison value buffer
  * @page_shift:		[INTERN] number of address bits in a page (column address bits)
  * @phys_erase_shift:	[INTERN] number of address bits in a physical eraseblock
  * @bbt_erase_shift:	[INTERN] number of address bits in a bbt entry
  * @chip_shift:		[INTERN] number of address bits in one chip
- * @datbuf:		[INTERN] internal buffer for one page + oob
- * @oobbuf:		[INTERN] oob buffer for one eraseblock
- * @oobdirty:		[INTERN] indicates that oob_buf must be reinitialized
- * @data_poi:		[INTERN] pointer to a data buffer
  * @options:		[BOARDSPECIFIC] various chip options. They can partly be set to inform nand_scan about
  *			special functionality. See the defines for further explanation
  * @badblockpos:	[INTERN] position of the bad block marker in the oob area
-- 
cgit v1.2.2


From 3f4b0ef7f2899c91b1d6958779f084b44dd59d32 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 26 Oct 2008 20:52:15 +0100
Subject: ACPI hibernate: Add a mechanism to save/restore ACPI NVS memory

According to the ACPI Specification 3.0b, Section 15.3.2,
"OSPM will call the _PTS control method some time before entering a
sleeping state, to allow the platform's AML code to update this
memory image before entering the sleeping state. After the system
awakes from an S4 state, OSPM will restore this memory area and call
the _WAK control method to enable the BIOS to reclaim its memory
image."  For this reason, implement a mechanism allowing us to save
the NVS memory during hibernation and to restore it during the
subsequent resume.

Based on a patch by Zhang Rui.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Nigel Cunningham <nigel@tuxonice.net>
Cc: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/suspend.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 2ce8207686e2..2b409c44db83 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -232,6 +232,11 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
 
 extern void hibernation_set_ops(struct platform_hibernation_ops *ops);
 extern int hibernate(void);
+extern int hibernate_nvs_register(unsigned long start, unsigned long size);
+extern int hibernate_nvs_alloc(void);
+extern void hibernate_nvs_free(void);
+extern void hibernate_nvs_save(void);
+extern void hibernate_nvs_restore(void);
 #else /* CONFIG_HIBERNATION */
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
 static inline void swsusp_set_page_free(struct page *p) {}
@@ -239,6 +244,14 @@ static inline void swsusp_unset_page_free(struct page *p) {}
 
 static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {}
 static inline int hibernate(void) { return -ENOSYS; }
+static inline int hibernate_nvs_register(unsigned long a, unsigned long b)
+{
+	return 0;
+}
+static inline int hibernate_nvs_alloc(void) { return 0; }
+static inline void hibernate_nvs_free(void) {}
+static inline void hibernate_nvs_save(void) {}
+static inline void hibernate_nvs_restore(void) {}
 #endif /* CONFIG_HIBERNATION */
 
 #ifdef CONFIG_PM_SLEEP
-- 
cgit v1.2.2


From ba84ed9546e91348fdf3ff2bff859b0ee53b407a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 26 Oct 2008 20:56:30 +0100
Subject: ACPI hibernate: Introduce new kernel parameter acpi_sleep=s4_nonvs

On some machines it may be necessary to disable the saving/restoring
of the ACPI NVS memory region during hibernation/resume.  For this
purpose, introduce new ACPI kernel command line option
acpi_sleep=s4_nonvs.

Based on a patch by Zhang Rui.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Nigel Cunningham <nigel@tuxonice.net>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index fba8051fb297..dfa0a5356c53 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -270,6 +270,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
 #ifdef CONFIG_PM_SLEEP
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
+void __init acpi_s4_no_nvs(void);
 #endif /* CONFIG_PM_SLEEP */
 #else	/* CONFIG_ACPI */
 
-- 
cgit v1.2.2


From 160bbab3000dafccbe43688e48208cecf4deb879 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 23 Dec 2008 10:00:14 +0000
Subject: [MTD] struct device - replace bus_id with dev_name(), dev_set_name()

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/concat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/concat.h b/include/linux/mtd/concat.h
index c02f3d264ecf..e80c674daeb3 100644
--- a/include/linux/mtd/concat.h
+++ b/include/linux/mtd/concat.h
@@ -13,7 +13,7 @@
 struct mtd_info *mtd_concat_create(
     struct mtd_info *subdev[],  /* subdevices to concatenate */
     int num_devs,               /* number of subdevices      */
-    char *name);                /* name for the new device   */
+    const char *name);          /* name for the new device   */
 
 void mtd_concat_destroy(struct mtd_info *mtd);
 
-- 
cgit v1.2.2


From f748bafa3ca1fb056e63afdeecacc1c68d8104df Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 8 Dec 2008 21:30:31 -0700
Subject: ACPI: PCI: move struct acpi_prt_entry declaration out of public
 header file

The struct acpi_prt_entry is used only in pci_irq.c, so there's no
need for the declaration to be public.  This patch moves it into
pci_irq.c.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index fba8051fb297..813f937b3ab4 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -131,22 +131,6 @@ extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity);
  */
 void acpi_unregister_gsi (u32 gsi);
 
-struct acpi_prt_entry {
-	struct list_head	node;
-	struct acpi_pci_id	id;
-	u8			pin;
-	struct {
-		acpi_handle		handle;
-		u32			index;
-	}			link;
-	u32			irq;
-};
-
-struct acpi_prt_list {
-	int			count;
-	struct list_head	entries;
-};
-
 struct pci_dev;
 
 int acpi_pci_irq_enable (struct pci_dev *dev);
-- 
cgit v1.2.2


From ea7e96e0f2277107d9ea14c3f16c86ba82b2e560 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Tue, 16 Dec 2008 16:28:17 +0800
Subject: ACPI: remove private acpica headers from driver files

External driver files should not include any private acpica headers.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/pci_hotplug.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index a00bd1a0f156..c2d1a7d1886a 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -223,7 +223,6 @@ struct hotplug_params {
 #ifdef CONFIG_ACPI
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
-#include <acpi/actypes.h>
 extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 				struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
-- 
cgit v1.2.2


From 56c451f4b583ccdf80c9e676179c9cb49de86745 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 18 Dec 2008 14:49:37 +0900
Subject: [SCSI] block: fix the partial mappings with struct rq_map_data

This fixes bio_copy_user_iov to properly handle the partial mappings
with struct rq_map_data (which only sg uses for now but st and osst
will shortly). It adds the offset member to struct rq_map_data and
changes blk_rq_map_user to update it so that bio_copy_user_iov can add
an appropriate page frame via bio_add_pc_page().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7035cec583b6..811e5342c452 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -690,6 +690,7 @@ struct rq_map_data {
 	struct page **pages;
 	int page_order;
 	int nr_entries;
+	unsigned long offset;
 };
 
 struct req_iterator {
-- 
cgit v1.2.2


From 97ae77a1cd332c7b011d71315c8faabce6840c72 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 18 Dec 2008 14:49:38 +0900
Subject: [SCSI] block: make blk_rq_map_user take a NULL user-space buffer for
 WRITE

The commit 818827669d85b84241696ffef2de485db46b0b5e (block: make
blk_rq_map_user take a NULL user-space buffer) extended
blk_rq_map_user to accept a NULL user-space buffer with a READ
command. It was necessary to convert sg to use the block layer mapping
API.

This patch extends blk_rq_map_user again for a WRITE command. It is
necessary to convert st and osst drivers to use the block layer
apping API.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 811e5342c452..044467ef7b11 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -691,6 +691,7 @@ struct rq_map_data {
 	int page_order;
 	int nr_entries;
 	unsigned long offset;
+	int null_mapped;
 };
 
 struct req_iterator {
-- 
cgit v1.2.2


From 87d8fe1ee6b8d2f95076142d58c440dba4e7bdc2 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 3 Jan 2009 09:47:09 -0500
Subject: add releasepage hooks to block devices which can be used by file
 systems

Implement blkdev_releasepage() to release the buffer_heads and pages
after we release private data belonging to a mounted filesystem.

Cc: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f2a3010140e3..0f54ae0f0ccd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -565,6 +565,7 @@ struct address_space {
 struct block_device {
 	dev_t			bd_dev;  /* not a kdev_t - it's a search key */
 	struct inode *		bd_inode;	/* will die */
+	struct super_block *	bd_super;
 	int			bd_openers;
 	struct mutex		bd_mutex;	/* open/close mutex */
 	struct semaphore	bd_mount_sem;
@@ -1385,6 +1386,7 @@ struct super_operations {
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
+	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
 };
 
 /*
-- 
cgit v1.2.2


From c31910672376dfb8d020e32afa7249763bcd924a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 6 Jan 2009 11:14:25 -0500
Subject: ext4: Remove code to create the journal inode

This code has been obsolete in quite some time, since the supported
method for adding a journal inode is to use tune2fs (or to creating
new filesystem with a journal via mke2fs or mkfs.ext4).

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 9d82084a1605..adef1c9940d3 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1104,7 +1104,6 @@ extern int	   jbd2_journal_set_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
 extern void	   jbd2_journal_clear_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int	   jbd2_journal_create     (journal_t *);
 extern int	   jbd2_journal_load       (journal_t *journal);
 extern int	   jbd2_journal_destroy    (journal_t *);
 extern int	   jbd2_journal_recover    (journal_t *journal);
-- 
cgit v1.2.2


From 922ab535bbe73975ce62f71ab9bf8ec9bce71c29 Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Tue, 16 Dec 2008 18:13:58 +0000
Subject: [MTD] LPDDR QINFO records definitions

There are declaraton of structures and macros definitions
necessary for operations with QINFO in this patch.

Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Acked-by: Jared Hulbert <jaredeh@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/qinfo.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 include/linux/mtd/qinfo.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h
new file mode 100644
index 000000000000..7b3d487d8b3f
--- /dev/null
+++ b/include/linux/mtd/qinfo.h
@@ -0,0 +1,91 @@
+#ifndef __LINUX_MTD_QINFO_H
+#define __LINUX_MTD_QINFO_H
+
+#include <linux/mtd/map.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/flashchip.h>
+#include <linux/mtd/partitions.h>
+
+/* lpddr_private describes lpddr flash chip in memory map
+ * @ManufactId - Chip Manufacture ID
+ * @DevId - Chip Device ID
+ * @qinfo - pointer to qinfo records describing the chip
+ * @numchips - number of chips including virual RWW partitions
+ * @chipshift - Chip/partiton size 2^chipshift
+ * @chips - per-chip data structure
+ */
+struct lpddr_private {
+	uint16_t ManufactId;
+	uint16_t DevId;
+	struct qinfo_chip *qinfo;
+	int numchips;
+	unsigned long chipshift;
+	struct flchip chips[0];
+};
+
+/* qinfo_query_info structure contains request information for
+ * each qinfo record
+ * @major - major number of qinfo record
+ * @major - minor number of qinfo record
+ * @id_str - descriptive string to access the record
+ * @desc - detailed description for the qinfo record
+ */
+struct qinfo_query_info {
+	uint8_t	major;
+	uint8_t	minor;
+	char *id_str;
+	char *desc;
+};
+
+/*
+ * qinfo_chip structure contains necessary qinfo records data
+ * @DevSizeShift - Device size 2^n bytes
+ * @BufSizeShift - Program buffer size 2^n bytes
+ * @TotalBlocksNum - Total number of blocks
+ * @UniformBlockSizeShift - Uniform block size 2^UniformBlockSizeShift bytes
+ * @HWPartsNum - Number of hardware partitions
+ * @SuspEraseSupp - Suspend erase supported
+ * @SingleWordProgTime - Single word program 2^SingleWordProgTime u-sec
+ * @ProgBufferTime - Program buffer write 2^ProgBufferTime u-sec
+ * @BlockEraseTime - Block erase 2^BlockEraseTime m-sec
+ */
+struct qinfo_chip {
+	/* General device info */
+	uint16_t DevSizeShift;
+	uint16_t BufSizeShift;
+	/* Erase block information */
+	uint16_t TotalBlocksNum;
+	uint16_t UniformBlockSizeShift;
+	/* Partition information */
+	uint16_t HWPartsNum;
+	/* Optional features */
+	uint16_t SuspEraseSupp;
+	/* Operation typical time */
+	uint16_t SingleWordProgTime;
+	uint16_t ProgBufferTime;
+	uint16_t BlockEraseTime;
+};
+
+/* defines for fixup usage */
+#define LPDDR_MFR_ANY		0xffff
+#define LPDDR_ID_ANY		0xffff
+#define NUMONYX_MFGR_ID		0x0089
+#define R18_DEVICE_ID_1G	0x893c
+
+static inline map_word lpddr_build_cmd(u_long cmd, struct map_info *map)
+{
+	map_word val = { {0} };
+	val.x[0] = cmd;
+	return val;
+}
+
+#define CMD(x) lpddr_build_cmd(x, map)
+#define CMDVAL(cmd) cmd.x[0]
+
+struct mtd_info *lpddr_cmdset(struct map_info *);
+
+#endif
+
-- 
cgit v1.2.2


From eb3db27507f74b99241abfa11824d8b6d92b84ef Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Tue, 16 Dec 2008 18:15:33 +0000
Subject: [MTD] LPDDR PFOW definition

LPDDR chips use PFOW window for sending commands, reading status and
capabilites requesting.
This pfow.h - contains definitions for PFOW window fileds, possible commands,
error flags and some common macro function to avoid code duplications.

Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Acked-by: Jared Hulbert <jaredeh@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/pfow.h | 159 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 include/linux/mtd/pfow.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
new file mode 100644
index 000000000000..b730d4f84655
--- /dev/null
+++ b/include/linux/mtd/pfow.h
@@ -0,0 +1,159 @@
+/* Primary function overlay window definitions
+ * and service functions used by LPDDR chips
+ */
+#ifndef __LINUX_MTD_PFOW_H
+#define __LINUX_MTD_PFOW_H
+
+#include <linux/mtd/qinfo.h>
+
+/* PFOW registers addressing */
+/* Address of symbol "P" */
+#define PFOW_QUERY_STRING_P			0x0000
+/* Address of symbol "F" */
+#define PFOW_QUERY_STRING_F			0x0002
+/* Address of symbol "O" */
+#define PFOW_QUERY_STRING_O			0x0004
+/* Address of symbol "W" */
+#define PFOW_QUERY_STRING_W			0x0006
+/* Identification info for LPDDR chip */
+#define PFOW_MANUFACTURER_ID			0x0020
+#define PFOW_DEVICE_ID				0x0022
+/* Address in PFOW where prog buffer can can be found */
+#define PFOW_PROGRAM_BUFFER_OFFSET		0x0040
+/* Size of program buffer in words */
+#define PFOW_PROGRAM_BUFFER_SIZE		0x0042
+/* Address command code register */
+#define PFOW_COMMAND_CODE			0x0080
+/* command data register */
+#define PFOW_COMMAND_DATA			0x0084
+/* command address register lower address bits */
+#define PFOW_COMMAND_ADDRESS_L			0x0088
+/* command address register upper address bits */
+#define PFOW_COMMAND_ADDRESS_H			0x008a
+/* number of bytes to be proggrammed lower address bits */
+#define PFOW_DATA_COUNT_L			0x0090
+/* number of bytes to be proggrammed higher address bits */
+#define PFOW_DATA_COUNT_H			0x0092
+/* command execution register, the only possible value is 0x01 */
+#define PFOW_COMMAND_EXECUTE			0x00c0
+/* 0x01 should be written at this address to clear buffer */
+#define PFOW_CLEAR_PROGRAM_BUFFER		0x00c4
+/* device program/erase suspend register */
+#define PFOW_PROGRAM_ERASE_SUSPEND		0x00c8
+/* device status register */
+#define PFOW_DSR				0x00cc
+
+/* LPDDR memory device command codes */
+/* They are possible values of PFOW command code register */
+#define LPDDR_WORD_PROGRAM		0x0041
+#define LPDDR_BUFF_PROGRAM		0x00E9
+#define LPDDR_BLOCK_ERASE		0x0020
+#define LPDDR_LOCK_BLOCK		0x0061
+#define LPDDR_UNLOCK_BLOCK		0x0062
+#define LPDDR_READ_BLOCK_LOCK_STATUS	0x0065
+#define LPDDR_INFO_QUERY		0x0098
+#define LPDDR_READ_OTP			0x0097
+#define LPDDR_PROG_OTP			0x00C0
+#define LPDDR_RESUME			0x00D0
+
+/* Defines possible value of PFOW command execution register */
+#define LPDDR_START_EXECUTION			0x0001
+
+/* Defines possible value of PFOW program/erase suspend register */
+#define LPDDR_SUSPEND				0x0001
+
+/* Possible values of PFOW device status register */
+/* access R - read; RC read & clearable */
+#define DSR_DPS			(1<<1) /* RC; device protect status
+					* 0 - not protected 1 - locked */
+#define DSR_PSS			(1<<2) /* R; program suspend status;
+					* 0-prog in progress/completed,
+					* 1- prog suspended */
+#define DSR_VPPS		(1<<3) /* RC; 0-Vpp OK, * 1-Vpp low */
+#define DSR_PROGRAM_STATUS	(1<<4) /* RC; 0-successful, 1-error */
+#define DSR_ERASE_STATUS	(1<<5) /* RC; erase or blank check status;
+					* 0-success erase/blank check,
+					* 1 blank check error */
+#define DSR_ESS			(1<<6) /* R; erase suspend status;
+					* 0-erase in progress/complete,
+					* 1 erase suspended */
+#define DSR_READY_STATUS	(1<<7) /* R; Device status
+					* 0-busy,
+					* 1-ready */
+#define DSR_RPS			(0x3<<8) /* RC;  region program status
+					* 00 - Success,
+					* 01-re-program attempt in region with
+					* object mode data,
+					* 10-object mode program w attempt in
+					* region with control mode data
+					* 11-attempt to program invalid half
+					* with 0x41 command */
+#define DSR_AOS			(1<<12) /* RC; 1- AO related failure */
+#define DSR_AVAILABLE		(1<<15) /* R; Device availbility
+					* 1 - Device available
+					* 0 - not available */
+
+/* The superset of all possible error bits in DSR */
+#define DSR_ERR			0x133A
+
+static inline void send_pfow_command(struct map_info *map,
+				unsigned long cmd_code, unsigned long adr,
+				unsigned long len, map_word *datum)
+{
+	int bits_per_chip = map_bankwidth(map) * 8;
+	int chipnum;
+	struct lpddr_private *lpddr = map->fldrv_priv;
+	chipnum = adr >> lpddr->chipshift;
+
+	map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE);
+	map_write(map, CMD(adr & ((1<<bits_per_chip) - 1)),
+				map->pfow_base + PFOW_COMMAND_ADDRESS_L);
+	map_write(map, CMD(adr>>bits_per_chip),
+				map->pfow_base + PFOW_COMMAND_ADDRESS_H);
+	if (len) {
+		map_write(map, CMD(len & ((1<<bits_per_chip) - 1)),
+					map->pfow_base + PFOW_DATA_COUNT_L);
+		map_write(map, CMD(len>>bits_per_chip),
+					map->pfow_base + PFOW_DATA_COUNT_H);
+	}
+	if (datum)
+		map_write(map, *datum, map->pfow_base + PFOW_COMMAND_DATA);
+
+	/* Command execution start */
+	map_write(map, CMD(LPDDR_START_EXECUTION),
+			map->pfow_base + PFOW_COMMAND_EXECUTE);
+}
+
+static inline void print_drs_error(unsigned dsr)
+{
+	int prog_status = (dsr & DSR_RPS) >> 8;
+
+	if (!(dsr & DSR_AVAILABLE))
+		printk(KERN_NOTICE"DSR.15: (0) Device not Available\n");
+	if (prog_status & 0x03)
+		printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid "
+						"half with 41h command\n");
+	else if (prog_status & 0x02)
+		printk(KERN_NOTICE"DSR.9,8: (10) Object Mode Program attempt "
+					"in region with Control Mode data\n");
+	else if (prog_status &  0x01)
+		printk(KERN_NOTICE"DSR.9,8: (01) Program attempt in region "
+						"with Object Mode data\n");
+	if (!(dsr & DSR_READY_STATUS))
+		printk(KERN_NOTICE"DSR.7: (0) Device is Busy\n");
+	if (dsr & DSR_ESS)
+		printk(KERN_NOTICE"DSR.6: (1) Erase Suspended\n");
+	if (dsr & DSR_ERASE_STATUS)
+		printk(KERN_NOTICE"DSR.5: (1) Erase/Blank check error\n");
+	if (dsr & DSR_PROGRAM_STATUS)
+		printk(KERN_NOTICE"DSR.4: (1) Program Error\n");
+	if (dsr & DSR_VPPS)
+		printk(KERN_NOTICE"DSR.3: (1) Vpp low detect, operation "
+					"aborted\n");
+	if (dsr & DSR_PSS)
+		printk(KERN_NOTICE"DSR.2: (1) Program suspended\n");
+	if (dsr & DSR_DPS)
+		printk(KERN_NOTICE"DSR.1: (1) Aborted Erase/Program attempt "
+					"on locked block\n");
+}
+#endif /* __LINUX_MTD_PFOW_H */
-- 
cgit v1.2.2


From d13e51e747fee301b404dffcf4a7e1bdc558969b Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Tue, 16 Dec 2008 18:21:10 +0000
Subject: [MTD] LPDDR added new pfow_base parameter

We need to supply additional parameter to mapping driver and tell
LPDDR drivers where PFOW window is in chip mapping.
It leads to necessity of map_info structure extendoing.

Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Acked-by: Jared Hulbert <jaredeh@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/map.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index aa30244492c6..b981b8772217 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -223,6 +223,7 @@ struct map_info {
 	   must leave it enabled. */
 	void (*set_vpp)(struct map_info *, int);
 
+	unsigned long pfow_base;
 	unsigned long map_priv_1;
 	unsigned long map_priv_2;
 	void *fldrv_priv;
-- 
cgit v1.2.2


From d81408304b06a71c28417445202af9cd6673168d Mon Sep 17 00:00:00 2001
From: Alexey Korolev <akorolev@infradead.org>
Date: Tue, 16 Dec 2008 18:22:39 +0000
Subject: [MTD] LPDDR extended physmap driver to support LPDDR flash

Physmap is a generic map driver for different platforms and flash types.
We added support of LPDDR to physmap.
All changes here are related to introduction of new pfow_base parameter.
This parameter is valid in case of LPDDR chips only.

Signed-off-by: Alexey Korolev <akorolev@infradead.org>
Acked-by: Jared Hulbert <jaredeh@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/physmap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index c8e63a5ee72e..76f7cabf07d3 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -24,6 +24,7 @@ struct physmap_flash_data {
 	unsigned int		width;
 	void			(*set_vpp)(struct map_info *, int);
 	unsigned int		nr_parts;
+	unsigned int		pfow_base;
 	struct mtd_partition	*parts;
 };
 
-- 
cgit v1.2.2


From 07f2211e4fbce6990722d78c4f04225da9c0e9cf Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Jan 2009 17:14:31 -0700
Subject: dmaengine: remove dependency on async_tx

async_tx.ko is a consumer of dma channels.  A circular dependency arises
if modules in drivers/dma rely on common code in async_tx.ko.  It
prevents either module from being unloaded.

Move dma_wait_for_async_tx and async_tx_run_dependencies to dmaeninge.o
where they should have been from the beginning.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/async_tx.h  | 15 ---------------
 include/linux/dmaengine.h |  9 +++++++++
 2 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 0f50d4cc4360..1c816775f135 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -60,8 +60,6 @@ enum async_tx_flags {
 
 #ifdef CONFIG_DMA_ENGINE
 void async_tx_issue_pending_all(void);
-enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
-void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx);
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 #include <asm/async_tx.h>
 #else
@@ -77,19 +75,6 @@ static inline void async_tx_issue_pending_all(void)
 	do { } while (0);
 }
 
-static inline enum dma_status
-dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
-{
-	return DMA_SUCCESS;
-}
-
-static inline void
-async_tx_run_dependencies(struct dma_async_tx_descriptor *tx,
-	struct dma_chan *host_chan)
-{
-	do { } while (0);
-}
-
 static inline struct dma_chan *
 async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
 	enum dma_transaction_type tx_type, struct page **dst, int dst_count,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index adb0b084eb5a..e4ec7e7b8056 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -475,11 +475,20 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
 }
 
 enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
+#ifdef CONFIG_DMA_ENGINE
+enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
+#else
+static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
+{
+	return DMA_SUCCESS;
+}
+#endif
 
 /* --- DMA device --- */
 
 int dma_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
+void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 
 /* --- Helper iov-locking functions --- */
 
-- 
cgit v1.2.2


From b3881f74b31b7d47d0f1c4d89ac3e7f0b9c05e3e Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 5 Jan 2009 22:46:26 -0500
Subject: ext4: Add mount option to set kjournald's I/O priority

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/ioprio.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index f98a656b17e5..76dad4808847 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -86,4 +86,6 @@ static inline int task_nice_ioclass(struct task_struct *task)
  */
 extern int ioprio_best(unsigned short aprio, unsigned short bprio);
 
+extern int set_task_ioprio(struct task_struct *task, int ioprio);
+
 #endif
-- 
cgit v1.2.2


From 835481d9bcd65720b473db6b38746a74a3964218 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sun, 4 Jan 2009 05:18:06 -0800
Subject: cpumask: convert struct cpufreq_policy to cpumask_var_t

Impact: use new cpumask API to reduce memory usage

This is part of an effort to reduce structure sizes for machines
configured with large NR_CPUS.  cpumask_t gets replaced by
cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or
struct cpumask * (large NR_CPUS).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Dave Jones <davej@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpufreq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 484b3abf61bb..384b38d3e8e2 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -80,8 +80,8 @@ struct cpufreq_real_policy {
 };
 
 struct cpufreq_policy {
-	cpumask_t		cpus;	/* CPUs requiring sw coordination */
-	cpumask_t		related_cpus; /* CPUs with any coordination */
+	cpumask_var_t		cpus;	/* CPUs requiring sw coordination */
+	cpumask_var_t		related_cpus; /* CPUs with any coordination */
 	unsigned int		shared_type; /* ANY or ALL affected CPUs
 						should set cpufreq */
 	unsigned int		cpu;    /* cpu nr of registered CPU */
-- 
cgit v1.2.2


From 548eaca46b3cf4419b6c2be839a106d8641ffb70 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 20 Oct 2008 17:48:43 -0400
Subject: nfsd: document new filehandle fsid types

Descriptions taken from mountd code (in nfs-utils/utils/mountd/cache.c).

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsfh.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index d1941cb965e9..b2e093870bc6 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -68,6 +68,10 @@ struct nfs_fhbase_old {
  *     1  - 4 byte user specified identifier
  *     2  - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED
  *     3  - 4 byte device id, encoded for user-space, 4 byte inode number
+ *     4  - 4 byte inode number and 4 byte uuid
+ *     5  - 8 byte uuid
+ *     6  - 16 byte uuid
+ *     7  - 8 byte inode number and 16 byte uuid
  *
  * The fileid_type identified how the file within the filesystem is encoded.
  * This is (will be) passed to, and set by, the underlying filesystem if it supports
-- 
cgit v1.2.2


From c9233eb7b0b11ef176d4bf68da2ce85464b6ec39 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 20 Oct 2008 11:51:57 -0400
Subject: sunrpc: add sv_maxconn field to svc_serv (try #3)

svc_check_conn_limits() attempts to prevent denial of service attacks
by having the service close old connections once it reaches a
threshold. This threshold is based on the number of threads in the
service:

	(serv->sv_nrthreads + 3) * 20

Once we reach this, we drop the oldest connections and a printk pops
to warn the admin that they should increase the number of threads.

Increasing the number of threads isn't an option however for services
like lockd. We don't want to eliminate this check entirely for such
services but we need some way to increase this limit.

This patch adds a sv_maxconn field to the svc_serv struct. When it's
set to 0, we use the current method to calculate the max number of
connections. RPC services can then set this on an as-needed basis.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3afe7fb403b2..3435d24bfe55 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -58,10 +58,13 @@ struct svc_serv {
 	struct svc_stat *	sv_stats;	/* RPC statistics */
 	spinlock_t		sv_lock;
 	unsigned int		sv_nrthreads;	/* # of server threads */
+	unsigned int		sv_maxconn;	/* max connections allowed or
+						 * '0' causing max to be based
+						 * on number of threads. */
+
 	unsigned int		sv_max_payload;	/* datagram payload size */
 	unsigned int		sv_max_mesg;	/* max_payload + 1 page for overheads */
 	unsigned int		sv_xdrsize;	/* XDR buffer size */
-
 	struct list_head	sv_permsocks;	/* all permanent sockets */
 	struct list_head	sv_tempsocks;	/* all temporary sockets */
 	int			sv_tmpcnt;	/* count of temporary sockets */
-- 
cgit v1.2.2


From 7538ce1eb656a1477bedd5b1c202226e7abf5e7b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:19:45 -0500
Subject: NLM: Use modern style for pointer fields in nlm_host

Clean up: I'm about to add another "char *" field to the nlm_host
structure.  The h_name field, for example, uses an older style of
declaring a "char *" field.  If I match that style for the new field,
checkpatch.pl will complain.

So, fix pointer fields to use the new style.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 23da3fa69efa..3dbdd353156c 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -43,8 +43,8 @@ struct nlm_host {
 	struct sockaddr_storage	h_addr;		/* peer address */
 	size_t			h_addrlen;
 	struct sockaddr_storage	h_srcaddr;	/* our address (optional) */
-	struct rpc_clnt	*	h_rpcclnt;	/* RPC client to talk to peer */
-	char *			h_name;		/* remote hostname */
+	struct rpc_clnt		*h_rpcclnt;	/* RPC client to talk to peer */
+	char			*h_name;		/* remote hostname */
 	u32			h_version;	/* interface version */
 	unsigned short		h_proto;	/* transport proto */
 	unsigned short		h_reclaiming : 1,
@@ -64,7 +64,7 @@ struct nlm_host {
 	spinlock_t		h_lock;
 	struct list_head	h_granted;	/* Locks in GRANTED state */
 	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
-	struct nsm_handle *	h_nsmhandle;	/* NSM status handle */
+	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
 
 	char			h_addrbuf[48],	/* address eyecatchers */
 				h_srcaddrbuf[48];
-- 
cgit v1.2.2


From 1df40b609ad5a622904eb652109c287fe9c93ec5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:19:53 -0500
Subject: NLM: Remove address eye-catcher buffers from nlm_host

The h_name field in struct nlm_host is a just copy of
h_nsmhandle->sm_name.  Likewise, the contents of the h_addrbuf field
should be identical to the sm_addrbuf field.

The h_srcaddrbuf field is used only in one place for debugging.  We can
live without this until we get %pI formatting for printk().

Currently these buffers are 48 bytes, but we need to support scope IDs
in IPv6 presentation addresses, which means making the buffers even
larger.  Instead, let's find ways to eliminate them to save space.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 3dbdd353156c..dae22cb4c38d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -65,9 +65,7 @@ struct nlm_host {
 	struct list_head	h_granted;	/* Locks in GRANTED state */
 	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
 	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
-
-	char			h_addrbuf[48],	/* address eyecatchers */
-				h_srcaddrbuf[48];
+	char			*h_addrbuf;	/* address eyecatcher */
 };
 
 struct nsm_handle {
-- 
cgit v1.2.2


From bc995801a09d1fead0bec1356bfd836911c8eed7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:20:08 -0500
Subject: NLM: Support IPv6 scope IDs in nlm_display_address()

Scope ID support is needed since the kernel's NSM implementation is
about to use these displayed addresses as a mon_name in some cases.

When nsm_use_hostnames is zero, without scope ID support NSM will fail
to handle peers that contact us via a link-local address.  Link-local
addresses do not work without an interface ID, which is stored in the
sockaddr's sin6_scope_id field.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index dae22cb4c38d..80a0a2cff2b8 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -68,6 +68,14 @@ struct nlm_host {
 	char			*h_addrbuf;	/* address eyecatcher */
 };
 
+/*
+ * The largest string sm_addrbuf should hold is a full-size IPv6 address
+ * (no "::" anywhere) with a scope ID.  The buffer size is computed to
+ * hold eight groups of colon-separated four-hex-digit numbers, a
+ * percent sign, a scope id (at most 32 bits, in decimal), and NUL.
+ */
+#define NSM_ADDRBUF		((8 * 4 + 7) + (1 + 10) + 1)
+
 struct nsm_handle {
 	struct list_head	sm_link;
 	atomic_t		sm_count;
@@ -76,7 +84,7 @@ struct nsm_handle {
 	size_t			sm_addrlen;
 	unsigned int		sm_monitored : 1,
 				sm_sticky : 1;	/* don't unmonitor */
-	char			sm_addrbuf[48];	/* address eyecatcher */
+	char			sm_addrbuf[NSM_ADDRBUF];
 };
 
 /*
-- 
cgit v1.2.2


From f47534f7f0ac7727e05ec4274b764b181df2cf7f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:20:38 -0500
Subject: NSM: Use modern style for sm_name field in nsm_handle

Clean up: I'm about to add another "char *" field to the nsm_handle
structure.  The sm_name field uses an older style of declaring a
"char *" field.  If I match that style for the new field, checkpatch.pl
will complain.

So, fix the sm_name field to use the new style.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 80a0a2cff2b8..54dbb458e73c 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -79,7 +79,7 @@ struct nlm_host {
 struct nsm_handle {
 	struct list_head	sm_link;
 	atomic_t		sm_count;
-	char *			sm_name;
+	char			*sm_name;
 	struct sockaddr_storage	sm_addr;
 	size_t			sm_addrlen;
 	unsigned int		sm_monitored : 1,
-- 
cgit v1.2.2


From 29ed1407ed81086b778ebf12145b048ac3f7e10e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:20:46 -0500
Subject: NSM: Support IPv6 version of mon_name

The "mon_name" argument of the NSMPROC_MON and NSMPROC_UNMON upcalls
is a string that contains the hostname or IP address of the remote peer
to be notified when this host has rebooted.  The sm-notify command uses
this identifier to contact the peer when we reboot, so it must be
either a well-qualified DNS hostname or a presentation format IP
address string.

When the "nsm_use_hostnames" sysctl is set to zero, the kernel's NSM
provides a presentation format IP address in the "mon_name" argument.
Otherwise, the "caller_name" argument from NLM requests is used,
which is usually just the DNS hostname of the peer.

To support IPv6 addresses for the mon_name argument, we use the
nsm_handle's address eye-catcher, which already contains an appropriate
presentation format address string.  Using the eye-catcher string
obviates the need to use a large buffer on the stack to form the
presentation address string for the upcall.

This patch also addresses a subtle bug.

An NSMPROC_MON request and the subsequent NSMPROC_UNMON request for the
same peer are required to use the same value for the "mon_name"
argument.  Otherwise, rpc.statd's NSMPROC_UNMON processing cannot
locate the database entry for that peer and remove it.

If the setting of nsm_use_hostnames is changed between the time the
kernel sends an NSMPROC_MON request and the time it sends the
NSMPROC_UNMON request for the same peer, the "mon_name" argument for
these two requests may not be the same.  This is because the value of
"mon_name" is currently chosen at the moment the call is made based on
the setting of nsm_use_hostnames

To ensure both requests pass identical contents in the "mon_name"
argument, we now select which string to use for the argument in the
nsm_monitor() function.  A pointer to this string is saved in the
nsm_handle so it can be used for a subsequent NSMPROC_UNMON upcall.

NB: There are other potential problems, such as how nlm_host_rebooted()
might behave if nsm_use_hostnames were changed while hosts are still
being monitored.  This patch does not attempt to address those
problems.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 54dbb458e73c..d3c7247d23e8 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -79,6 +79,7 @@ struct nlm_host {
 struct nsm_handle {
 	struct list_head	sm_link;
 	atomic_t		sm_count;
+	char			*sm_mon_name;
 	char			*sm_name;
 	struct sockaddr_storage	sm_addr;
 	size_t			sm_addrlen;
-- 
cgit v1.2.2


From 1e49323c4ab044d05bbc68cf13cadcbd4372468c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:21:24 -0500
Subject: NLM: Move the public declaration of nsm_monitor() to lockd.h

Clean up.

Make the nlm_host argument "const," and move the public declaration to
lockd.h with other NSM public function (nsm_release, eg) and global
variable declarations.

Add a documenting comment.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h    | 4 ++++
 include/linux/lockd/sm_inter.h | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index d3c7247d23e8..f15a4f5ccbfb 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -242,6 +242,10 @@ extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
 					unsigned int, u32);
 void		  nsm_release(struct nsm_handle *);
 
+/*
+ * Host monitoring
+ */
+int		  nsm_monitor(const struct nlm_host *host);
 
 /*
  * This is used in garbage collection and resource reclaim
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 5a5448bdb17d..546b6102b0d7 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -41,7 +41,6 @@ struct nsm_res {
 	u32		state;
 };
 
-int		nsm_monitor(struct nlm_host *);
 int		nsm_unmonitor(struct nlm_host *);
 extern int	nsm_local_state;
 
-- 
cgit v1.2.2


From c8c23c423dec49cb439697d3dc714e1500ff1610 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:21:31 -0500
Subject: NSM: Release nsmhandle in nlm_destroy_host

The nsm_handle's reference count is bumped in nlm_lookup_host().  It
should be decremented in nlm_destroy_host() to make it easier to see
the balance of these two operations.

Move the nsm_release() call to fs/lockd/host.c.

The h_nsmhandle pointer is set in nlm_lookup_host(), and never cleared.
The nlm_destroy_host() function is never called for the same nlm_host
twice, so h_nsmhandle won't ever be NULL when nsm_unmonitor() is
called.

All references to the nlm_host are gone before it is freed.  We can
skip making h_nsmhandle NULL just before the nlm_host is deallocated.

It's also likely we can remove the h_nsmhandle NULL check in
nlmsvc_is_client() as well, but we can do that later when rearchitect-
ing the nlm_host cache.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f15a4f5ccbfb..30a6a9c1ce42 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -240,7 +240,6 @@ void		  nlm_release_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
 extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
 					unsigned int, u32);
-void		  nsm_release(struct nsm_handle *);
 
 /*
  * Host monitoring
-- 
cgit v1.2.2


From 356c3eb466fd1a12afd6448d90fba3922836e5f1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 4 Dec 2008 14:21:38 -0500
Subject: NLM: Move the public declaration of nsm_unmonitor() to lockd.h

Clean up.

Make the nlm_host argument "const," and move the public declaration to
lockd.h.  Add a documenting comment.

Bruce observed that nsm_unmonitor()'s only caller doesn't care about
its return code, so make nsm_unmonitor() return void.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h    | 1 +
 include/linux/lockd/sm_inter.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 30a6a9c1ce42..38344bfb814a 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -245,6 +245,7 @@ extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
  * Host monitoring
  */
 int		  nsm_monitor(const struct nlm_host *host);
+void		  nsm_unmonitor(const struct nlm_host *host);
 
 /*
  * This is used in garbage collection and resource reclaim
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 546b6102b0d7..896a5e303323 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -41,7 +41,6 @@ struct nsm_res {
 	u32		state;
 };
 
-int		nsm_unmonitor(struct nlm_host *);
 extern int	nsm_local_state;
 
 #endif /* LINUX_LOCKD_SM_INTER_H */
-- 
cgit v1.2.2


From 9c1bfd037f7ff8badaecb47418f109148d88bf45 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:01:59 -0500
Subject: NSM: Move NSM-related XDR data structures to lockd's xdr.h

Clean up: NSM's XDR data structures are used only in fs/lockd/mon.c,
so move them there.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/sm_inter.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 896a5e303323..dd9d8a5bb316 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -21,26 +21,6 @@
 #define SM_MAXSTRLEN	1024
 #define SM_PRIV_SIZE	16
 
-/*
- * Arguments for all calls to statd
- */
-struct nsm_args {
-	__be32		addr;		/* remote address */
-	u32		prog;		/* RPC callback info */
-	u32		vers;
-	u32		proc;
-
-	char *		mon_name;
-};
-
-/*
- * Result returned by statd
- */
-struct nsm_res {
-	u32		status;
-	u32		state;
-};
-
 extern int	nsm_local_state;
 
 #endif /* LINUX_LOCKD_SM_INTER_H */
-- 
cgit v1.2.2


From 36e8e668d3e6a61848a8921ddeb663b417299fa5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:02:07 -0500
Subject: NSM: Move NSM program and procedure numbers to fs/lockd/mon.c

Clean up: Move the RPC program and procedure numbers for NSM into the
one source file that needs them: fs/lockd/mon.c.

And, as with NLM, NFS, and rpcbind calls, use NSMPROC_FOO instead of
SM_FOO for NSM procedure numbers.

Finally, make a couple of comments more precise: what is referred to
here as SM_NOTIFY is really the NLM (lockd) NLMPROC_SM_NOTIFY downcall,
not NSMPROC_NOTIFY.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/sm_inter.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index dd9d8a5bb316..116bf38535a0 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -9,15 +9,6 @@
 #ifndef LINUX_LOCKD_SM_INTER_H
 #define LINUX_LOCKD_SM_INTER_H
 
-#define SM_PROGRAM	100024
-#define SM_VERSION	1
-#define SM_STAT		1
-#define SM_MON		2
-#define SM_UNMON	3
-#define SM_UNMON_ALL	4
-#define SM_SIMU_CRASH	5
-#define SM_NOTIFY	6
-
 #define SM_MAXSTRLEN	1024
 #define SM_PRIV_SIZE	16
 
-- 
cgit v1.2.2


From 67c6d107a689243979a2b5f15244b5261634a924 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:02:45 -0500
Subject: NSM: Move nsm_find() to fs/lockd/mon.c

The nsm_find() function sets up fresh nsm_handle entries.  This is
where we will store the "priv" cookie used to lookup nsm_handles during
reboot recovery.  The cookie will be constructed when nsm_find()
creates a new nsm_handle.

As much as possible, I would like to keep everything that handles a
"priv" cookie in fs/lockd/mon.c so that all the smarts are in one
source file.  That organization should make it pretty simple to see how
all this works.

To me, it makes more sense than the current arrangement to keep
nsm_find() with nsm_monitor() and nsm_unmonitor().

So, start reorganizing by moving nsm_find() into fs/lockd/mon.c.  The
nsm_release() function comes along too, since it shares the nsm_lock
global variable.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 38344bfb814a..8d715363c6ac 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -247,6 +247,12 @@ extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
 int		  nsm_monitor(const struct nlm_host *host);
 void		  nsm_unmonitor(const struct nlm_host *host);
 
+struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen,
+					const char *hostname,
+					const size_t hostname_len,
+					const int create);
+void		  nsm_release(struct nsm_handle *nsm);
+
 /*
  * This is used in garbage collection and resource reclaim
  * A return value != 0 means destroy the lock/block/share
-- 
cgit v1.2.2


From 7e44d3bea21fbb9494930d1cd35ca92a9a4a3279 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:03:16 -0500
Subject: NSM: Generate NSMPROC_MON's "priv" argument when nsm_handle is
 created

Introduce a new data type, used by both the in-kernel NLM and NSM
implementations, that is used to manage the opaque "priv" argument
for the NSMPROC_MON and NLMPROC_SM_NOTIFY calls.

Construct the "priv" cookie when the nsm_handle is created.

The nsm_init_private() function may look a little strange, but it is
roughly equivalent to how the XDR encoder formed the "priv" argument.
It's going to go away soon.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h    | 1 +
 include/linux/lockd/sm_inter.h | 1 -
 include/linux/lockd/xdr.h      | 6 ++++++
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 8d715363c6ac..194fa8a66398 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -85,6 +85,7 @@ struct nsm_handle {
 	size_t			sm_addrlen;
 	unsigned int		sm_monitored : 1,
 				sm_sticky : 1;	/* don't unmonitor */
+	struct nsm_private	sm_priv;
 	char			sm_addrbuf[NSM_ADDRBUF];
 };
 
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 116bf38535a0..5cef5a79dd94 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -10,7 +10,6 @@
 #define LINUX_LOCKD_SM_INTER_H
 
 #define SM_MAXSTRLEN	1024
-#define SM_PRIV_SIZE	16
 
 extern int	nsm_local_state;
 
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d6b3a802c046..6b5199263858 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -13,6 +13,12 @@
 #include <linux/nfs.h>
 #include <linux/sunrpc/xdr.h>
 
+#define SM_PRIV_SIZE		16
+
+struct nsm_private {
+	unsigned char		data[SM_PRIV_SIZE];
+};
+
 struct svc_rqst;
 
 #define NLM_MAXCOOKIELEN    	32
-- 
cgit v1.2.2


From 7fefc9cb9d5f129c238d93166f705c96ca2e7e51 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:03:31 -0500
Subject: NLM: Change nlm_host_rebooted() to take a single nlm_reboot argument

Pass the nlm_reboot data structure directly from the NLMPROC_SM_NOTIFY
XDR decoders to nlm_host_rebooted().  This eliminates some packing and
unpacking of the NLMPROC_SM_NOTIFY results, and prepares for passing
these results, including the "priv" cookie, directly to a lookup
routine in fs/lockd/mon.c.

This patch changes code organization but should not cause any
behavioral change.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 194fa8a66398..2a3533ea38dd 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -239,8 +239,7 @@ void		  nlm_rebind_host(struct nlm_host *);
 struct nlm_host * nlm_get_host(struct nlm_host *);
 void		  nlm_release_host(struct nlm_host *);
 void		  nlm_shutdown_hosts(void);
-extern void	  nlm_host_rebooted(const struct sockaddr_in *, const char *,
-					unsigned int, u32);
+void		  nlm_host_rebooted(const struct nlm_reboot *);
 
 /*
  * Host monitoring
-- 
cgit v1.2.2


From 576df4634e37e46b441fefb91915184edb13bb94 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:03:39 -0500
Subject: NLM: Decode "priv" argument of NLMPROC_SM_NOTIFY as an opaque

The NLM XDR decoders for the NLMPROC_SM_NOTIFY procedure should treat
their "priv" argument truly as an opaque, as defined by the protocol,
and let the upper layers figure out what is in it.

This will make it easier to modify the contents and interpretation of
the "priv" argument, and keep knowledge about what's in "priv" local
to fs/lockd/mon.c.

For now, the NLM and NSM implementations should behave exactly as they
did before.

The formation of the address of the rebooted host in
nlm_host_rebooted() may look a little strange, but it is the inverse
of how nsm_init_private() forms the private cookie.  Plus, it's
going away soon anyway.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/xdr.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 6b5199263858..6338866222a8 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -83,10 +83,10 @@ struct nlm_res {
  * statd callback when client has rebooted
  */
 struct nlm_reboot {
-	char *		mon;
-	unsigned int	len;
-	u32		state;
-	__be32		addr;
+	char			*mon;
+	unsigned int		len;
+	u32			state;
+	struct nsm_private	priv;
 };
 
 /*
-- 
cgit v1.2.2


From 3420a8c4359a189f7d854ed7075d151257415447 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:03:46 -0500
Subject: NSM: Add nsm_lookup() function

Introduce a new API to fs/lockd/mon.c that allows nlm_host_rebooted()
to lookup up nsm_handles via the contents of an nlm_reboot struct.

The new function is equivalent to calling nsm_find() with @create set
to zero, but it takes a struct nlm_reboot instead of separate
arguments.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 2a3533ea38dd..5e3ad926de89 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -251,6 +251,7 @@ struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen,
 					const char *hostname,
 					const size_t hostname_len,
 					const int create);
+struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info);
 void		  nsm_release(struct nsm_handle *nsm);
 
 /*
-- 
cgit v1.2.2


From 92fd91b998a5216a6d6606704e71d541a180216c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 5 Dec 2008 19:04:01 -0500
Subject: NLM: Remove "create" argument from nsm_find()

Clean up: nsm_find() now has only one caller, and that caller
unconditionally sets the @create argument. Thus the @create
argument is no longer needed.

Since nsm_find() now has a more specific purpose, pick a more
appropriate name for it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 5e3ad926de89..1ccd49e97a7f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -247,10 +247,10 @@ void		  nlm_host_rebooted(const struct nlm_reboot *);
 int		  nsm_monitor(const struct nlm_host *host);
 void		  nsm_unmonitor(const struct nlm_host *host);
 
-struct nsm_handle *nsm_find(const struct sockaddr *sap, const size_t salen,
+struct nsm_handle *nsm_get_handle(const struct sockaddr *sap,
+					const size_t salen,
 					const char *hostname,
-					const size_t hostname_len,
-					const int create);
+					const size_t hostname_len);
 struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info);
 void		  nsm_release(struct nsm_handle *nsm);
 
-- 
cgit v1.2.2


From e6765b83977f07983c7a10e6bbb19d6c7bbfc3a4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 11 Dec 2008 17:56:14 -0500
Subject: NSM: Remove include/linux/lockd/sm_inter.h

Clean up: The include/linux/lockd/sm_inter.h header is nearly empty
now.  Remove it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h    |  1 +
 include/linux/lockd/sm_inter.h | 16 ----------------
 include/linux/lockd/xdr.h      |  1 +
 3 files changed, 2 insertions(+), 16 deletions(-)
 delete mode 100644 include/linux/lockd/sm_inter.h

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 1ccd49e97a7f..8b57467375cc 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -205,6 +205,7 @@ extern struct svc_procedure	nlmsvc_procedures4[];
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
 extern int			nsm_use_hostnames;
+extern int			nsm_local_state;
 
 /*
  * Lockd client functions
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
deleted file mode 100644
index 5cef5a79dd94..000000000000
--- a/include/linux/lockd/sm_inter.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * linux/include/linux/lockd/sm_inter.h
- *
- * Declarations for the kernel statd client.
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_SM_INTER_H
-#define LINUX_LOCKD_SM_INTER_H
-
-#define SM_MAXSTRLEN	1024
-
-extern int	nsm_local_state;
-
-#endif /* LINUX_LOCKD_SM_INTER_H */
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 6338866222a8..7dc5b6cb44cd 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -13,6 +13,7 @@
 #include <linux/nfs.h>
 #include <linux/sunrpc/xdr.h>
 
+#define SM_MAXSTRLEN		1024
 #define SM_PRIV_SIZE		16
 
 struct nsm_private {
-- 
cgit v1.2.2


From 8529bc51d30b8f001734b29b21a51b579c260f5b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 11 Dec 2008 17:56:22 -0500
Subject: NSM: Move nsm_addr() to fs/lockd/mon.c

Clean up: nsm_addr_in() is no longer used, and nsm_addr() is used only in
fs/lockd/mon.c, so move it there.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 8b57467375cc..6ab0449bc828 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -112,16 +112,6 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
 	return (struct sockaddr *)&host->h_srcaddr;
 }
 
-static inline struct sockaddr_in *nsm_addr_in(const struct nsm_handle *handle)
-{
-	return (struct sockaddr_in *)&handle->sm_addr;
-}
-
-static inline struct sockaddr *nsm_addr(const struct nsm_handle *handle)
-{
-	return (struct sockaddr *)&handle->sm_addr;
-}
-
 /*
  * Map an fl_owner_t into a unique 32-bit "pid"
  */
-- 
cgit v1.2.2


From d1208f70738c91f13b4eadb1b7a694082e439da2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 11 Dec 2008 17:56:44 -0500
Subject: NLM: nlm_privileged_requester() doesn't recognize mapped loopback
 address

Commit b85e4676 added the nlm_privileged_requester() helper to check
whether an RPC request was sent from a local privileged caller.  It
recognizes IPv4 privileged callers (from "127.0.0.1"), and IPv6
privileged callers (from "::1").

However, IPV6_ADDR_LOOPBACK is not set for the mapped IPv4 loopback
address (::ffff:7f00:0001), so the test breaks when the kernel's RPC
service is IPv6-enabled but user space is calling via the IPv4
loopback address.  This is actually the most common case for IPv6-
enabled RPC services on Linux.

Rewrite the IPv6 check to handle the mapped IPv4 loopback address as
well as a normal IPv6 loopback address.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 6ab0449bc828..80d7e8a8257d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -299,8 +299,14 @@ static inline int __nlm_privileged_request4(const struct sockaddr *sap)
 static inline int __nlm_privileged_request6(const struct sockaddr *sap)
 {
 	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-	return (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK) &&
-			(ntohs(sin6->sin6_port) < 1024);
+
+	if (ntohs(sin6->sin6_port) > 1023)
+		return 0;
+
+	if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED)
+		return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]);
+
+	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
 }
 #else	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
 static inline int __nlm_privileged_request6(const struct sockaddr *sap)
-- 
cgit v1.2.2


From 57ef692588bc225853ca3267ca5b7cea2b07e058 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 11 Dec 2008 17:56:52 -0500
Subject: NLM: Rewrite IPv4 privileged requester's check

Clean up.

For consistency, rewrite the IPv4 check to match the same style as the
new IPv6 check.  Note that ipv4_is_loopback() is somewhat broader in
its interpretation of what is a loopback address than simply
"127.0.0.1".

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/lockd/lockd.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 80d7e8a8257d..aa6fe7026de7 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -291,8 +291,11 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
 {
 	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-	return (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) &&
-			(ntohs(sin->sin_port) < 1024);
+
+	if (ntohs(sin->sin_port) > 1023)
+		return 0;
+
+	return ipv4_is_loopback(sin->sin_addr.s_addr);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-- 
cgit v1.2.2


From 6f49a57aa5a0c6d4e4e27c85f7af6c83325a12d1 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:14 -0700
Subject: dmaengine: up-level reference counting to the module level

Simply, if a client wants any dmaengine channel then prevent all dmaengine
modules from being removed.  Once the clients are done re-enable module
removal.

Why?, beyond reducing complication:
1/ Tracking reference counts per-transaction in an efficient manner, as
   is currently done, requires a complicated scheme to avoid cache-line
   bouncing effects.
2/ Per-transaction ref-counting gives the false impression that a
   dma-driver can be gracefully removed ahead of its user (net, md, or
   dma-slave)
3/ None of the in-tree dma-drivers talk to hot pluggable hardware, but
   if such an engine were built one day we still would not need to notify
   clients of remove events.  The driver can simply return NULL to a
   ->prep() request, something that is much easier for a client to handle.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index e4ec7e7b8056..d18d37d1015d 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -165,7 +165,6 @@ struct dma_slave {
  */
 
 struct dma_chan_percpu {
-	local_t refcount;
 	/* stats */
 	unsigned long memcpy_count;
 	unsigned long bytes_transferred;
@@ -205,26 +204,6 @@ struct dma_chan {
 
 void dma_chan_cleanup(struct kref *kref);
 
-static inline void dma_chan_get(struct dma_chan *chan)
-{
-	if (unlikely(chan->slow_ref))
-		kref_get(&chan->refcount);
-	else {
-		local_inc(&(per_cpu_ptr(chan->local, get_cpu())->refcount));
-		put_cpu();
-	}
-}
-
-static inline void dma_chan_put(struct dma_chan *chan)
-{
-	if (unlikely(chan->slow_ref))
-		kref_put(&chan->refcount, dma_chan_cleanup);
-	else {
-		local_dec(&(per_cpu_ptr(chan->local, get_cpu())->refcount));
-		put_cpu();
-	}
-}
-
 /*
  * typedef dma_event_callback - function pointer to a DMA event callback
  * For each channel added to the system this routine is called for each client.
-- 
cgit v1.2.2


From bec085134e446577a983f17f57d642a88d1af53b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:14 -0700
Subject: dmaengine: centralize channel allocation, introduce dma_find_channel

Allowing multiple clients to each define their own channel allocation
scheme quickly leads to a pathological situation.  For memory-to-memory
offload all clients can share a central allocator.

This simply moves the existing async_tx allocator to dmaengine with
minimal fixups:
* async_tx.c:get_chan_ref_by_cap --> dmaengine.c:nth_chan
* async_tx.c:async_tx_rebalance --> dmaengine.c:dma_channel_rebalance
* split out common code from async_tx.c:__async_tx_find_channel -->
  dma_find_channel

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d18d37d1015d..b466f02e2433 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -182,6 +182,7 @@ struct dma_chan_percpu {
  * @device_node: used to add this to the device chan list
  * @local: per-cpu pointer to a struct dma_chan_percpu
  * @client-count: how many clients are using this channel
+ * @table_count: number of appearances in the mem-to-mem allocation table
  */
 struct dma_chan {
 	struct dma_device *device;
@@ -198,6 +199,7 @@ struct dma_chan {
 	struct list_head device_node;
 	struct dma_chan_percpu *local;
 	int client_count;
+	int table_count;
 };
 
 #define to_dma_chan(p) container_of(p, struct dma_chan, dev)
@@ -468,6 +470,7 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript
 int dma_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
+struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
 
 /* --- Helper iov-locking functions --- */
 
-- 
cgit v1.2.2


From 2ba05622b8b143b0c95968ba59bddfbd6d2f2559 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:14 -0700
Subject: dmaengine: provide a common 'issue_pending_all' implementation

async_tx and net_dma each have open-coded versions of issue_pending_all,
so provide a common routine in dmaengine.

The implementation needs to walk the global device list, so implement
rcu to allow dma_issue_pending_all to run lockless.  Clients protect
themselves from channel removal events by holding a dmaengine reference.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/async_tx.h  | 2 +-
 include/linux/dmaengine.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 1c816775f135..45f6297821bd 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -59,7 +59,7 @@ enum async_tx_flags {
 };
 
 #ifdef CONFIG_DMA_ENGINE
-void async_tx_issue_pending_all(void);
+#define async_tx_issue_pending_all dma_issue_pending_all
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 #include <asm/async_tx.h>
 #else
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index b466f02e2433..57a43adfc39e 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -471,6 +471,7 @@ int dma_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
+void dma_issue_pending_all(void);
 
 /* --- Helper iov-locking functions --- */
 
-- 
cgit v1.2.2


From f67b45999205164958de4ec0658d51fa4bee066d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:15 -0700
Subject: net_dma: convert to dma_find_channel

Use the general-purpose channel allocation provided by dmaengine.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/netdevice.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 41e1224651cf..bac2c458d9b8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1113,9 +1113,6 @@ struct softnet_data
 	struct sk_buff		*completion_queue;
 
 	struct napi_struct	backlog;
-#ifdef CONFIG_NET_DMA
-	struct dma_chan		*net_dma;
-#endif
 };
 
 DECLARE_PER_CPU(struct softnet_data,softnet_data);
-- 
cgit v1.2.2


From 59b5ec21446b9239d706ab237fb261d525b75e81 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:15 -0700
Subject: dmaengine: introduce dma_request_channel and private channels

This interface is primarily for device-to-memory clients which need to
search for dma channels with platform-specific characteristics.  The
prototype is:

struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
                                     dma_filter_fn filter_fn,
                                     void *filter_param);

When the optional 'filter_fn' parameter is set to NULL
dma_request_channel simply returns the first channel that satisfies the
capability mask.  Otherwise, when the mask parameter is insufficient for
specifying the necessary channel, the filter_fn routine can be used to
disposition the available channels in the system. The filter_fn routine
is called once for each free channel in the system.  Upon seeing a
suitable channel filter_fn returns DMA_ACK which flags that channel to
be the return value from dma_request_channel.  A channel allocated via
this interface is exclusive to the caller, until dma_release_channel()
is called.

To ensure that all channels are not consumed by the general-purpose
allocator the DMA_PRIVATE capability is provided to exclude a dma_device
from general-purpose (memory-to-memory) consideration.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 57a43adfc39e..fe40bc020af6 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -89,6 +89,7 @@ enum dma_transaction_type {
 	DMA_MEMSET,
 	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
+	DMA_PRIVATE,
 	DMA_SLAVE,
 };
 
@@ -223,6 +224,18 @@ struct dma_client;
 typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
 		struct dma_chan *chan, enum dma_state state);
 
+/**
+ * typedef dma_filter_fn - callback filter for dma_request_channel
+ * @chan: channel to be reviewed
+ * @filter_param: opaque parameter passed through dma_request_channel
+ *
+ * When this optional parameter is specified in a call to dma_request_channel a
+ * suitable channel is passed to this routine for further dispositioning before
+ * being returned.  Where 'suitable' indicates a non-busy channel that
+ * satisfies the given capability mask.
+ */
+typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+
 /**
  * struct dma_client - info on the entity making use of DMA services
  * @event_callback: func ptr to call when something happens
@@ -472,6 +485,9 @@ void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
 void dma_issue_pending_all(void);
+#define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
+struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
+void dma_release_channel(struct dma_chan *chan);
 
 /* --- Helper iov-locking functions --- */
 
-- 
cgit v1.2.2


From 33df8ca068123457db56c316946a3c0e4ef787d6 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:15 -0700
Subject: dmatest: convert to dma_request_channel

Replace the client registration infrastructure with a custom loop to
poll for channels.  Once dma_request_channel returns NULL stop asking
for channels.  A userspace side effect of this change if that loading
the dmatest module before loading a dma driver will result in no
channels being found, previously dmatest would get a callback.  To
facilitate testing in the built-in case dmatest_init is marked as a
late_initcall.  Another side effect is that channels under test can not
be used for any other purpose.

Cc: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index fe40bc020af6..6f2d070ac7f3 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -400,6 +400,12 @@ __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
 	set_bit(tx_type, dstp->bits);
 }
 
+#define dma_cap_zero(mask) __dma_cap_zero(&(mask))
+static inline void __dma_cap_zero(dma_cap_mask_t *dstp)
+{
+	bitmap_zero(dstp->bits, DMA_TX_TYPE_END);
+}
+
 #define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask))
 static inline int
 __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
-- 
cgit v1.2.2


From 74465b4ff9ac1da503025c0a0042e023bfa6505c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:16 -0700
Subject: atmel-mci: convert to dma_request_channel and down-level dma_slave

dma_request_channel provides an exclusive channel, so we no longer need to
pass slave data through dmaengine.

Cc: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 38 --------------------------------------
 include/linux/dw_dmac.h   | 31 +++++++++++++++++++++++--------
 2 files changed, 23 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 6f2d070ac7f3..d63544cf8a1a 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -96,17 +96,6 @@ enum dma_transaction_type {
 /* last transaction type for creation of the capabilities mask */
 #define DMA_TX_TYPE_END (DMA_SLAVE + 1)
 
-/**
- * enum dma_slave_width - DMA slave register access width.
- * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses
- * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses
- * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses
- */
-enum dma_slave_width {
-	DMA_SLAVE_WIDTH_8BIT,
-	DMA_SLAVE_WIDTH_16BIT,
-	DMA_SLAVE_WIDTH_32BIT,
-};
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
@@ -132,32 +121,6 @@ enum dma_ctrl_flags {
  */
 typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
 
-/**
- * struct dma_slave - Information about a DMA slave
- * @dev: device acting as DMA slave
- * @dma_dev: required DMA master device. If non-NULL, the client can not be
- *	bound to other masters than this.
- * @tx_reg: physical address of data register used for
- *	memory-to-peripheral transfers
- * @rx_reg: physical address of data register used for
- *	peripheral-to-memory transfers
- * @reg_width: peripheral register width
- *
- * If dma_dev is non-NULL, the client can not be bound to other DMA
- * masters than the one corresponding to this device. The DMA master
- * driver may use this to determine if there is controller-specific
- * data wrapped around this struct. Drivers of platform code that sets
- * the dma_dev field must therefore make sure to use an appropriate
- * controller-specific dma slave structure wrapping this struct.
- */
-struct dma_slave {
-	struct device		*dev;
-	struct device		*dma_dev;
-	dma_addr_t		tx_reg;
-	dma_addr_t		rx_reg;
-	enum dma_slave_width	reg_width;
-};
-
 /**
  * struct dma_chan_percpu - the per-CPU part of struct dma_chan
  * @refcount: local_t used for open-coded "bigref" counting
@@ -248,7 +211,6 @@ typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filt
 struct dma_client {
 	dma_event_callback	event_callback;
 	dma_cap_mask_t		cap_mask;
-	struct dma_slave	*slave;
 	struct list_head	global_node;
 };
 
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 04d217b442bf..d797dde247f7 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -21,15 +21,35 @@ struct dw_dma_platform_data {
 	unsigned int	nr_channels;
 };
 
+/**
+ * enum dw_dma_slave_width - DMA slave register access width.
+ * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses
+ * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses
+ * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses
+ */
+enum dw_dma_slave_width {
+	DW_DMA_SLAVE_WIDTH_8BIT,
+	DW_DMA_SLAVE_WIDTH_16BIT,
+	DW_DMA_SLAVE_WIDTH_32BIT,
+};
+
 /**
  * struct dw_dma_slave - Controller-specific information about a slave
- * @slave: Generic information about the slave
- * @ctl_lo: Platform-specific initializer for the CTL_LO register
+ *
+ * @dma_dev: required DMA master device
+ * @tx_reg: physical address of data register used for
+ *	memory-to-peripheral transfers
+ * @rx_reg: physical address of data register used for
+ *	peripheral-to-memory transfers
+ * @reg_width: peripheral register width
  * @cfg_hi: Platform-specific initializer for the CFG_HI register
  * @cfg_lo: Platform-specific initializer for the CFG_LO register
  */
 struct dw_dma_slave {
-	struct dma_slave	slave;
+	struct device		*dma_dev;
+	dma_addr_t		tx_reg;
+	dma_addr_t		rx_reg;
+	enum dw_dma_slave_width	reg_width;
 	u32			cfg_hi;
 	u32			cfg_lo;
 };
@@ -54,9 +74,4 @@ struct dw_dma_slave {
 #define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
 #define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
 
-static inline struct dw_dma_slave *to_dw_dma_slave(struct dma_slave *slave)
-{
-	return container_of(slave, struct dw_dma_slave, slave);
-}
-
 #endif /* DW_DMAC_H */
-- 
cgit v1.2.2


From 209b84a88fe81341b4d8d465acc4a67cb7c3feb3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:17 -0700
Subject: dmaengine: replace dma_async_client_register with dmaengine_get

Now that clients no longer need to be notified of channel arrival
dma_async_client_register can simply increment the dmaengine_ref_count.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d63544cf8a1a..37d95db156d3 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -318,8 +318,8 @@ struct dma_device {
 
 /* --- public DMA engine API --- */
 
-void dma_async_client_register(struct dma_client *client);
-void dma_async_client_unregister(struct dma_client *client);
+void dmaengine_get(void);
+void dmaengine_put(void);
 void dma_async_client_chan_request(struct dma_client *client);
 dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
 	void *dest, void *src, size_t len);
-- 
cgit v1.2.2


From aa1e6f1a385eb2b04171ec841f3b760091e4a8ee Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:17 -0700
Subject: dmaengine: kill struct dma_client and supporting infrastructure

All users have been converted to either the general-purpose allocator,
dma_find_channel, or dma_request_channel.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 50 +----------------------------------------------
 1 file changed, 1 insertion(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 37d95db156d3..db050e97d2b4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -28,20 +28,6 @@
 #include <linux/rcupdate.h>
 #include <linux/dma-mapping.h>
 
-/**
- * enum dma_state - resource PNP/power management state
- * @DMA_RESOURCE_SUSPEND: DMA device going into low power state
- * @DMA_RESOURCE_RESUME: DMA device returning to full power
- * @DMA_RESOURCE_AVAILABLE: DMA device available to the system
- * @DMA_RESOURCE_REMOVED: DMA device removed from the system
- */
-enum dma_state {
-	DMA_RESOURCE_SUSPEND,
-	DMA_RESOURCE_RESUME,
-	DMA_RESOURCE_AVAILABLE,
-	DMA_RESOURCE_REMOVED,
-};
-
 /**
  * enum dma_state_client - state of the channel in the client
  * @DMA_ACK: client would like to use, or was using this channel
@@ -170,23 +156,6 @@ struct dma_chan {
 
 void dma_chan_cleanup(struct kref *kref);
 
-/*
- * typedef dma_event_callback - function pointer to a DMA event callback
- * For each channel added to the system this routine is called for each client.
- * If the client would like to use the channel it returns '1' to signal (ack)
- * the dmaengine core to take out a reference on the channel and its
- * corresponding device.  A client must not 'ack' an available channel more
- * than once.  When a channel is removed all clients are notified.  If a client
- * is using the channel it must 'ack' the removal.  A client must not 'ack' a
- * removed channel more than once.
- * @client - 'this' pointer for the client context
- * @chan - channel to be acted upon
- * @state - available or removed
- */
-struct dma_client;
-typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
-		struct dma_chan *chan, enum dma_state state);
-
 /**
  * typedef dma_filter_fn - callback filter for dma_request_channel
  * @chan: channel to be reviewed
@@ -199,21 +168,6 @@ typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
  */
 typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
 
-/**
- * struct dma_client - info on the entity making use of DMA services
- * @event_callback: func ptr to call when something happens
- * @cap_mask: only return channels that satisfy the requested capabilities
- *  a value of zero corresponds to any capability
- * @slave: data for preparing slave transfer. Must be non-NULL iff the
- *  DMA_SLAVE capability is requested.
- * @global_node: list_head for global dma_client_list
- */
-struct dma_client {
-	dma_event_callback	event_callback;
-	dma_cap_mask_t		cap_mask;
-	struct list_head	global_node;
-};
-
 typedef void (*dma_async_tx_callback)(void *dma_async_param);
 /**
  * struct dma_async_tx_descriptor - async transaction descriptor
@@ -285,8 +239,7 @@ struct dma_device {
 	int dev_id;
 	struct device *dev;
 
-	int (*device_alloc_chan_resources)(struct dma_chan *chan,
-			struct dma_client *client);
+	int (*device_alloc_chan_resources)(struct dma_chan *chan);
 	void (*device_free_chan_resources)(struct dma_chan *chan);
 
 	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
@@ -320,7 +273,6 @@ struct dma_device {
 
 void dmaengine_get(void);
 void dmaengine_put(void);
-void dma_async_client_chan_request(struct dma_client *client);
 dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
 	void *dest, void *src, size_t len);
 dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
-- 
cgit v1.2.2


From f27c580c3628d79b17f38976d842a6d7f3616e2e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:18 -0700
Subject: dmaengine: remove 'bigref' infrastructure

Reference counting is done at the module level so clients need not worry
that a channel will leave while they are actively using dmaengine.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index db050e97d2b4..bca2fc758894 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -142,10 +142,6 @@ struct dma_chan {
 	int chan_id;
 	struct device dev;
 
-	struct kref refcount;
-	int slow_ref;
-	struct rcu_head rcu;
-
 	struct list_head device_node;
 	struct dma_chan_percpu *local;
 	int client_count;
@@ -233,9 +229,6 @@ struct dma_device {
 	dma_cap_mask_t  cap_mask;
 	int max_xor;
 
-	struct kref refcount;
-	struct completion done;
-
 	int dev_id;
 	struct device *dev;
 
-- 
cgit v1.2.2


From 7dd602510128d7a64b11ff3b7d4f30ac8e3946ce Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:19 -0700
Subject: dmaengine: kill enum dma_state_client

DMA_NAK is now useless.  We can just use a bool instead.

Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index bca2fc758894..1419a5094478 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -28,18 +28,6 @@
 #include <linux/rcupdate.h>
 #include <linux/dma-mapping.h>
 
-/**
- * enum dma_state_client - state of the channel in the client
- * @DMA_ACK: client would like to use, or was using this channel
- * @DMA_DUP: client has already seen this channel, or is not using this channel
- * @DMA_NAK: client does not want to see any more channels
- */
-enum dma_state_client {
-	DMA_ACK,
-	DMA_DUP,
-	DMA_NAK,
-};
-
 /**
  * typedef dma_cookie_t - an opaque DMA cookie
  *
@@ -160,9 +148,10 @@ void dma_chan_cleanup(struct kref *kref);
  * When this optional parameter is specified in a call to dma_request_channel a
  * suitable channel is passed to this routine for further dispositioning before
  * being returned.  Where 'suitable' indicates a non-busy channel that
- * satisfies the given capability mask.
+ * satisfies the given capability mask.  It returns 'true' to indicate that the
+ * channel is suitable.
  */
-typedef enum dma_state_client (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
 
 typedef void (*dma_async_tx_callback)(void *dma_async_param);
 /**
-- 
cgit v1.2.2


From 41d5e59c1299f27983977bcfe3b360600996051c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:21 -0700
Subject: dmaengine: add a release for dma class devices and dependent
 infrastructure

Resolves:
WARNING: at drivers/base/core.c:122 device_release+0x4d/0x52()
Device 'dma0chan0' does not have a release() function, it is broken and must be fixed.

The dma_chan_dev object is introduced to gear-match sysfs kobject and
dmaengine channel lifetimes.  When a channel is removed access to the
sysfs entries return -ENODEV until the kobject can be released.

The bulk of the change is updates to existing code to handle the extra
layer of indirection between a dma_chan and its struct device.

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Cc: Haavard Skinnemoen <haavard.skinnemoen@atmel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1419a5094478..d6b6bff355f4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -113,7 +113,7 @@ struct dma_chan_percpu {
  * @device: ptr to the dma device who supplies this channel, always !%NULL
  * @cookie: last cookie value returned to client
  * @chan_id: channel ID for sysfs
- * @class_dev: class device for sysfs
+ * @dev: class device for sysfs
  * @refcount: kref, used in "bigref" slow-mode
  * @slow_ref: indicates that the DMA channel is free
  * @rcu: the DMA channel's RCU head
@@ -128,7 +128,7 @@ struct dma_chan {
 
 	/* sysfs */
 	int chan_id;
-	struct device dev;
+	struct dma_chan_dev *dev;
 
 	struct list_head device_node;
 	struct dma_chan_percpu *local;
@@ -136,7 +136,20 @@ struct dma_chan {
 	int table_count;
 };
 
-#define to_dma_chan(p) container_of(p, struct dma_chan, dev)
+/**
+ * struct dma_chan_dev - relate sysfs device node to backing channel device
+ * @chan - driver channel device
+ * @device - sysfs device
+ */
+struct dma_chan_dev {
+	struct dma_chan *chan;
+	struct device device;
+};
+
+static inline const char *dma_chan_name(struct dma_chan *chan)
+{
+	return dev_name(&chan->dev->device);
+}
 
 void dma_chan_cleanup(struct kref *kref);
 
-- 
cgit v1.2.2


From 864498aaa9fef69ee166da023d12413a7776342d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 6 Jan 2009 11:38:21 -0700
Subject: dmaengine: use idr for registering dma device numbers

This brings some predictability to dma device numbers, i.e. an rmmod/insmod
cycle may now result in /sys/class/dma/dma0chan0 being restored rather than
/sys/class/dma/dma1chan0 appearing.

Cc: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d6b6bff355f4..64dea2ab326c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -140,10 +140,14 @@ struct dma_chan {
  * struct dma_chan_dev - relate sysfs device node to backing channel device
  * @chan - driver channel device
  * @device - sysfs device
+ * @dev_id - parent dma_device dev_id
+ * @idr_ref - reference count to gate release of dma_device dev_id
  */
 struct dma_chan_dev {
 	struct dma_chan *chan;
 	struct device device;
+	int dev_id;
+	atomic_t *idr_ref;
 };
 
 static inline const char *dma_chan_name(struct dma_chan *chan)
-- 
cgit v1.2.2


From 96e93eab20337d063c70d537bd7bc70d90e04fa3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 6 Jan 2009 10:49:34 -0800
Subject: gro: Add internal interfaces for VLAN

Previously GRO's only entry point from the outside is through
napi_gro_receive and napi_gro_frags.  These interfaces are for
device drivers.

This patch rearranges things to provide a new set of interfaces
for VLANs.  These interfaces are for internal use only.  The
VLAN code itself can then provide a set of entry points for
device drivers.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c28bbba3c23d..114091be8872 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1373,8 +1373,14 @@ extern int		netif_rx_ni(struct sk_buff *skb);
 #define HAVE_NETIF_RECEIVE_SKB 1
 extern int		netif_receive_skb(struct sk_buff *skb);
 extern void		napi_gro_flush(struct napi_struct *napi);
+extern int		dev_gro_receive(struct napi_struct *napi,
+					struct sk_buff *skb);
 extern int		napi_gro_receive(struct napi_struct *napi,
 					 struct sk_buff *skb);
+extern void		napi_reuse_skb(struct napi_struct *napi,
+				       struct sk_buff *skb);
+extern struct sk_buff *	napi_fraginfo_skb(struct napi_struct *napi,
+					  struct napi_gro_fraginfo *info);
 extern int		napi_gro_frags(struct napi_struct *napi,
 				       struct napi_gro_fraginfo *info);
 extern void		netif_nit_deliver(struct sk_buff *skb);
-- 
cgit v1.2.2


From e1c096e251e52773afeffbbcb74d0a072be47ea3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 6 Jan 2009 10:50:09 -0800
Subject: vlan: Add GRO interfaces

This patch adds GRO interfaces for hardware-assisted VLAN reception.
With this in place we're now at parity with LRO as far as the
interface is concerned.  That is, you can now take any LRO driver
and convert it over to GRO.

As the CB memory clashes with GRO's use of CB, I've removed it
entirely by storing dev in skb->dev.  This is OK because VLAN
gets called first thing in netif_receive_skb and skb->dev is
not used in between us calling netif_rx and netif_receive_skb
getting called.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a5cb0c3f6dcf..f8ff918c208f 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -115,6 +115,11 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 			     u16 vlan_tci, int polling);
 extern int vlan_hwaccel_do_receive(struct sk_buff *skb);
+extern int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
+			    unsigned int vlan_tci, struct sk_buff *skb);
+extern int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
+			  unsigned int vlan_tci,
+			  struct napi_gro_fraginfo *info);
 
 #else
 static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -140,6 +145,20 @@ static inline int vlan_hwaccel_do_receive(struct sk_buff *skb)
 {
 	return 0;
 }
+
+static inline int vlan_gro_receive(struct napi_struct *napi,
+				   struct vlan_group *grp,
+				   unsigned int vlan_tci, struct sk_buff *skb)
+{
+	return NET_RX_DROP;
+}
+
+static inline int vlan_gro_frags(struct napi_struct *napi,
+				 struct vlan_group *grp, unsigned int vlan_tci,
+				 struct napi_gro_fraginfo *info)
+{
+	return NET_RX_DROP;
+}
 #endif
 
 /**
-- 
cgit v1.2.2


From 1fa17d4ba43d7e5aab5e90777b07da06524f6748 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Tue, 6 Jan 2009 11:07:54 -0800
Subject: can: omit unneeded skb_clone() calls

The AF_CAN core delivered always cloned sk_buffs to the AF_CAN
protocols, although this was _only_ needed by the can-raw protocol.
With this (additionally documented) change, the AF_CAN core calls the
callback functions of the registered AF_CAN protocols with the original
(uncloned) sk_buff pointer and let's the can-raw protocol do the
skb_clone() itself which omits all unneeded skb_clone() calls for other
AF_CAN protocols.

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: Urs Thuermann <urs.thuermann@volkswagen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/can/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index f50785ad4781..25085cbadcfc 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -19,7 +19,7 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
-#define CAN_VERSION "20081130"
+#define CAN_VERSION "20090105"
 
 /* increment this number each time you change some user-space interface */
 #define CAN_ABI_VERSION "8"
-- 
cgit v1.2.2


From 22a9d645677feefd402befd02edd59b122289ef1 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 7 Jan 2009 08:45:46 -0800
Subject: async: Asynchronous function calls to speed up kernel boot

Right now, most of the kernel boot is strictly synchronous, such that
various hardware delays are done sequentially.

In order to make the kernel boot faster, this patch introduces
infrastructure to allow doing some of the initialization steps
asynchronously, which will hide significant portions of the hardware delays
in practice.

In order to not change device order and other similar observables, this
patch does NOT do full parallel initialization.

Rather, it operates more in the way an out of order CPU does; the work may
be done out of order and asynchronous, but the observable effects
(instruction retiring for the CPU) are still done in the original sequence.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 include/linux/async.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 include/linux/async.h

(limited to 'include/linux')

diff --git a/include/linux/async.h b/include/linux/async.h
new file mode 100644
index 000000000000..c4ecacd0b327
--- /dev/null
+++ b/include/linux/async.h
@@ -0,0 +1,25 @@
+/*
+ * async.h: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+typedef u64 async_cookie_t;
+typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
+
+extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data);
+extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list);
+extern void async_synchronize_full(void);
+extern void async_synchronize_full_special(struct list_head *list);
+extern void async_synchronize_cookie(async_cookie_t cookie);
+extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list);
+
-- 
cgit v1.2.2


From efaee192063a54749c56b7383803e16fe553630e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Tue, 6 Jan 2009 07:20:54 -0800
Subject: async: make the final inode deletion an asynchronous event

this makes "rm -rf" on a (names cached) kernel tree go from
11.6 to 8.6 seconds on an ext3 filesystem

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7eba77f666e..e38a64d71eff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1184,6 +1184,11 @@ struct super_block {
 	 * generic_show_options()
 	 */
 	char *s_options;
+
+	/*
+	 * storage for asynchronous operations
+	 */
+	struct list_head s_async_list;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v1.2.2


From b92a78e582b1a45649143dc86e526f5824092478 Mon Sep 17 00:00:00 2001
From: Rodolfo Giometti <giometti@linux.it>
Date: Thu, 23 Oct 2008 10:08:07 +0200
Subject: usb host: Oxford OXU210HP HCD driver.

This driver implements the support for Oxford OXU210HP USB high-speed host,
no peripheral nor OTG.

Signed-off-by: Rodolfo Giometti <giometti@linux.it>
Cc: Kan Liu <kan.k.liu@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/oxu210hp.h | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 include/linux/oxu210hp.h

(limited to 'include/linux')

diff --git a/include/linux/oxu210hp.h b/include/linux/oxu210hp.h
new file mode 100644
index 000000000000..0bf96eae5389
--- /dev/null
+++ b/include/linux/oxu210hp.h
@@ -0,0 +1,7 @@
+/* platform data for the OXU210HP HCD */
+
+struct oxu210hp_platform_data {
+	unsigned int bus16:1;
+	unsigned int use_hcd_otg:1;
+	unsigned int use_hcd_sph:1;
+};
-- 
cgit v1.2.2


From d767d888750a8e15656b7ee15d68f90a151b8936 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 6 Nov 2008 22:32:15 -0800
Subject: USB: wusb: annotate association types withe proper endianness

Also a trivial annotation in rh.c for:
drivers/usb/wusbcore/rh.c:366:9: warning: incorrect type in assignment (different base types)
drivers/usb/wusbcore/rh.c:366:9:    expected unsigned short [unsigned] [short] [usertype] <noident>
drivers/usb/wusbcore/rh.c:366:9:    got restricted __le16 [usertype] <noident>
drivers/usb/wusbcore/rh.c:367:9: warning: incorrect type in assignment (different base types)
drivers/usb/wusbcore/rh.c:367:9:    expected unsigned short [unsigned] [short] [usertype] <noident>
drivers/usb/wusbcore/rh.c:367:9:    got restricted __le16 [usertype] <noident>

Association types annotation fixes piles of warnings similar to:
drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types)
drivers/usb/wusbcore/cbaf.c:238:30:    expected restricted __le16 [usertype] id
drivers/usb/wusbcore/cbaf.c:238:30:    got int
drivers/usb/wusbcore/cbaf.c:238:30: warning: incorrect type in initializer (different base types)
drivers/usb/wusbcore/cbaf.c:238:30:    expected restricted __le16 [usertype] len
drivers/usb/wusbcore/cbaf.c:238:30:    got int

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: David Vrabel <david.vrabel@csr.com>
Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/association.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/association.h b/include/linux/usb/association.h
index 07c5e3cf5898..0a4a18b3c1bb 100644
--- a/include/linux/usb/association.h
+++ b/include/linux/usb/association.h
@@ -28,17 +28,17 @@ struct wusb_am_attr {
 };
 
 /* Different fields defined by the spec */
-#define WUSB_AR_AssociationTypeId	{ .id = 0x0000, .len =  2 }
-#define WUSB_AR_AssociationSubTypeId	{ .id = 0x0001, .len =  2 }
-#define WUSB_AR_Length			{ .id = 0x0002, .len =  4 }
-#define WUSB_AR_AssociationStatus	{ .id = 0x0004, .len =  4 }
-#define WUSB_AR_LangID			{ .id = 0x0008, .len =  2 }
-#define WUSB_AR_DeviceFriendlyName	{ .id = 0x000b, .len = 64 } /* max */
-#define WUSB_AR_HostFriendlyName	{ .id = 0x000c, .len = 64 } /* max */
-#define WUSB_AR_CHID			{ .id = 0x1000, .len = 16 }
-#define WUSB_AR_CDID			{ .id = 0x1001, .len = 16 }
-#define WUSB_AR_ConnectionContext	{ .id = 0x1002, .len = 48 }
-#define WUSB_AR_BandGroups		{ .id = 0x1004, .len =  2 }
+#define WUSB_AR_AssociationTypeId	{ .id = cpu_to_le16(0x0000), .len = cpu_to_le16(2) }
+#define WUSB_AR_AssociationSubTypeId	{ .id = cpu_to_le16(0x0001), .len = cpu_to_le16(2) }
+#define WUSB_AR_Length			{ .id = cpu_to_le16(0x0002), .len = cpu_to_le16(4) }
+#define WUSB_AR_AssociationStatus	{ .id = cpu_to_le16(0x0004), .len = cpu_to_le16(4) }
+#define WUSB_AR_LangID			{ .id = cpu_to_le16(0x0008), .len = cpu_to_le16(2) }
+#define WUSB_AR_DeviceFriendlyName	{ .id = cpu_to_le16(0x000b), .len = cpu_to_le16(64) } /* max */
+#define WUSB_AR_HostFriendlyName	{ .id = cpu_to_le16(0x000c), .len = cpu_to_le16(64) } /* max */
+#define WUSB_AR_CHID			{ .id = cpu_to_le16(0x1000), .len = cpu_to_le16(16) }
+#define WUSB_AR_CDID			{ .id = cpu_to_le16(0x1001), .len = cpu_to_le16(16) }
+#define WUSB_AR_ConnectionContext	{ .id = cpu_to_le16(0x1002), .len = cpu_to_le16(48) }
+#define WUSB_AR_BandGroups		{ .id = cpu_to_le16(0x1004), .len = cpu_to_le16(2) }
 
 /* CBAF Control Requests (AMS1.0[T4-1] */
 enum {
-- 
cgit v1.2.2


From 9ac39f28b5237a629e41ccfc1f73d3a55723045c Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 12 Nov 2008 16:19:49 -0500
Subject: USB: add asynchronous autosuspend/autoresume support

This patch (as1160b) adds support routines for asynchronous autosuspend
and autoresume, with accompanying documentation updates.  There
already are several potential users of this interface, and others are
likely to arise as autosuspend support becomes more widespread.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index f72aa51f7bcd..859a88e6ce9c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -398,6 +398,7 @@ struct usb_tt;
  * @urbnum: number of URBs submitted for the whole device
  * @active_duration: total time device is not suspended
  * @autosuspend: for delayed autosuspends
+ * @autoresume: for autoresumes requested while in_interrupt
  * @pm_mutex: protects PM operations
  * @last_busy: time of last use
  * @autosuspend_delay: in jiffies
@@ -476,6 +477,7 @@ struct usb_device {
 
 #ifdef CONFIG_PM
 	struct delayed_work autosuspend;
+	struct work_struct autoresume;
 	struct mutex pm_mutex;
 
 	unsigned long last_busy;
@@ -513,6 +515,8 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 extern int usb_autopm_set_interface(struct usb_interface *intf);
 extern int usb_autopm_get_interface(struct usb_interface *intf);
 extern void usb_autopm_put_interface(struct usb_interface *intf);
+extern int usb_autopm_get_interface_async(struct usb_interface *intf);
+extern void usb_autopm_put_interface_async(struct usb_interface *intf);
 
 static inline void usb_autopm_enable(struct usb_interface *intf)
 {
@@ -539,8 +543,13 @@ static inline int usb_autopm_set_interface(struct usb_interface *intf)
 static inline int usb_autopm_get_interface(struct usb_interface *intf)
 { return 0; }
 
+static inline int usb_autopm_get_interface_async(struct usb_interface *intf)
+{ return 0; }
+
 static inline void usb_autopm_put_interface(struct usb_interface *intf)
 { }
+static inline void usb_autopm_put_interface_async(struct usb_interface *intf)
+{ }
 static inline void usb_autopm_enable(struct usb_interface *intf)
 { }
 static inline void usb_autopm_disable(struct usb_interface *intf)
-- 
cgit v1.2.2


From dc023dceec861c60bc1d1a17a2c6496ddac26ee7 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Thu, 13 Nov 2008 10:31:35 -0800
Subject: USB: Introduce usb_queue_reset() to do resets from atomic contexts

This patch introduces a new call to be able to do a USB reset from an
atomic contect. This is quite helpful in USB callbacks to handle
errors (when the only thing that can be done is to do a device
reset).

It is done queuing a work struct that will do the actual reset. The
struct is "attached" to an interface so pending requests from an
interface are removed when said interface is unbound from the driver.

The call flow then becomes:

usb_queue_reset_device()
  __usb_queue_reset_device() [workqueue]
    usb_reset_device()

usb_probe_interface()
  usb_cancel_queue_reset()      [error path]

usb_unbind_interface()
  usb_cancel_queue_reset()

usb_driver_release_interface()
  usb_cancel_queue_reset()

Note usb_cancel_queue_reset() needs smarts to try not to unqueue when
it is actually being executed. This happens when we run the reset from
the workqueue: usb_reset_device() is called and on interface unbind
time, usb_cancel_queue_reset() would be called. That would deadlock on
cancel_work_sync(). To avoid that, we set (before running
usb_reset_device()) usb_intf->reset_running and clear it inmediately
after returning.

Patch is against 2.6.28-rc2 and depends on
http://marc.info/?l=linux-usb&m=122581634925308&w=2 (as submitted by
Alan Stern).

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 859a88e6ce9c..c8e55aa979de 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -120,6 +120,11 @@ enum usb_interface_condition {
  *	to the sysfs representation for that device.
  * @pm_usage_cnt: PM usage counter for this interface; autosuspend is not
  *	allowed unless the counter is 0.
+ * @reset_ws: Used for scheduling resets from atomic context.
+ * @reset_running: set to 1 if the interface is currently running a
+ *      queued reset so that usb_cancel_queued_reset() doesn't try to
+ *      remove from the workqueue when running inside the worker
+ *      thread. See __usb_queue_reset_device().
  *
  * USB device drivers attach to interfaces on a physical device.  Each
  * interface encapsulates a single high level function, such as feeding
@@ -168,10 +173,12 @@ struct usb_interface {
 	unsigned needs_remote_wakeup:1;	/* driver requires remote wakeup */
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
 	unsigned needs_binding:1;	/* needs delayed unbind/rebind */
+	unsigned reset_running:1;
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
 	int pm_usage_cnt;		/* usage counter for autosuspend */
+	struct work_struct reset_ws;	/* for resets in atomic context */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
 #define	interface_to_usbdev(intf) \
@@ -507,6 +514,7 @@ extern int usb_lock_device_for_reset(struct usb_device *udev,
 
 /* USB port reset for device reinitialization */
 extern int usb_reset_device(struct usb_device *dev);
+extern void usb_queue_reset_device(struct usb_interface *dev);
 
 extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
-- 
cgit v1.2.2


From f150fa1afbf69a87f54752579ff2bb769aad88b3 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Thu, 13 Nov 2008 21:31:21 -0700
Subject: USB: Allow usbmon as a module even if usbcore is builtin

usbmon can only be built as a module if usbcore is a module too. Trivial
changes to the relevant Kconfig and Makefile (and a few trivial changes
elsewhere) allow usbmon to be built as a module even if usbcore is
builtin.

This is verified to work in all 9 permutations (3 correctly prohibited
by Kconfig, 6 build a suitable result).

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index c8e55aa979de..8bc81bffc195 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -336,7 +336,7 @@ struct usb_bus {
 #endif
 	struct device *dev;		/* device for this bus */
 
-#if defined(CONFIG_USB_MON)
+#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 	struct mon_bus *mon_bus;	/* non-null when associated */
 	int monitored;			/* non-zero when monitored */
 #endif
-- 
cgit v1.2.2


From 1537e0ad944acf3a4c2b311a646d7993b89499f7 Mon Sep 17 00:00:00 2001
From: Ben Efros <ben@pc-doctor.com>
Date: Tue, 18 Nov 2008 13:31:13 -0800
Subject: USB: storage devices and SAT

Add the SANE SENSE flag to indicate that a device is capable of handling
more than 18-bytes of sense data.  This functionality is required for
USB-ATA bridges implementing SAT.  A future patch will actually enable this
function for several devices.

The logic behind this is that we can detect support for SANE_SENSE in a few ways:
 1) ATA PASS THROUGH (12) or (16) execute successfully
 2) SPC-3 or higher is in use
 3) A previous CHECK CONDITION occurred with sense format 70-73 and had
    a length greater than 18-bytes total

Signed-off-by: Ben Efros <ben@pc-doctor.com>
Signed-off-by: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb_usual.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index d9a3bbe38e6b..998e5cbbf29e 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -52,8 +52,9 @@
 	US_FLAG(MAX_SECTORS_MIN,0x00002000)			\
 		/* Sets max_sectors to arch min */		\
 	US_FLAG(BULK_IGNORE_TAG,0x00004000)			\
-		/* Ignore tag mismatch in bulk operations */
-
+		/* Ignore tag mismatch in bulk operations */    \
+	US_FLAG(SANE_SENSE,     0x00008000)
+		/* Sane Sense (> 18 bytes) */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.2


From 6084f1bf0c51a99cbba612ee90a4607cffb8b042 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Mon, 24 Nov 2008 12:00:01 -0800
Subject: USB: otg: gpio_vbus transceiver stub

gpio_vbus provides simple GPIO VBUS sensing for peripheral
controllers with an internal transceiver.
Optionally, a second GPIO can be used to control D+ pullup.

It also interfaces with the regulator framework to limit charging
currents when powered via USB. gpio_vbus requests the regulator
supplying "vbus_draw" and can enable/disable it or limit its
current depending on USB state.

[dbrownell@users.sourceforge.net: use drivers/otg, cleanups ]

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/gpio_vbus.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 include/linux/usb/gpio_vbus.h

(limited to 'include/linux')

diff --git a/include/linux/usb/gpio_vbus.h b/include/linux/usb/gpio_vbus.h
new file mode 100644
index 000000000000..d9f03ccc2d60
--- /dev/null
+++ b/include/linux/usb/gpio_vbus.h
@@ -0,0 +1,30 @@
+/*
+ * A simple GPIO VBUS sensing driver for B peripheral only devices
+ * with internal transceivers.
+ * Optionally D+ pullup can be controlled by a second GPIO.
+ *
+ * Copyright (c) 2008 Philipp Zabel <philipp.zabel@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/**
+ * struct gpio_vbus_mach_info - configuration for gpio_vbus
+ * @gpio_vbus: VBUS sensing GPIO
+ * @gpio_pullup: optional D+ or D- pullup GPIO (else negative/invalid)
+ * @gpio_vbus_inverted: true if gpio_vbus is active low
+ * @gpio_pullup_inverted: true if gpio_pullup is active low
+ *
+ * The VBUS sensing GPIO should have a pulldown, which will normally be
+ * part of a resistor ladder turning a 4.0V-5.25V level on VBUS into a
+ * value the GPIO detects as active.  Some systems will use comparators.
+ */
+struct gpio_vbus_mach_info {
+	int gpio_vbus;
+	int gpio_pullup;
+	bool gpio_vbus_inverted;
+	bool gpio_pullup_inverted;
+};
-- 
cgit v1.2.2


From 68144e0cc92125f41157ede7b060f83367bc4fe7 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <philipp.zabel@gmail.com>
Date: Mon, 24 Nov 2008 12:01:17 -0800
Subject: USB: otg: add otg_put_transceiver()

As Russell King points out, calling put_device(otg_transceiver->dev)
directly in driver cleanup paths makes assumptions about otg_transceiver
internals.

Signed-off-by: Philipp Zabel <philipp.zabel@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/otg.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 1db25d152ad8..94df4fe6c6c0 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -84,6 +84,7 @@ extern int otg_set_transceiver(struct otg_transceiver *);
 
 /* for usb host and peripheral controller drivers */
 extern struct otg_transceiver *otg_get_transceiver(void);
+extern void otg_put_transceiver(struct otg_transceiver *);
 
 static inline int
 otg_start_hnp(struct otg_transceiver *otg)
-- 
cgit v1.2.2


From 65bfd2967c906ca322a4bb69a285fe0de8916ac6 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 25 Nov 2008 16:39:18 -0500
Subject: USB: Enhance usage of pm_message_t

This patch (as1177) modifies the USB core suspend and resume
routines.  The resume functions now will take a pm_message_t argument,
so they will know what sort of resume is occurring.  The new argument
is also passed to the port suspend/resume and bus suspend/resume
routines (although they don't use it for anything but debugging).

In addition, special pm_message_t values are used for user-initiated,
device-initiated (i.e., remote wakeup), and automatic suspend/resume.
By testing these values, drivers can tell whether or not a particular
suspend was an autosuspend.  Unfortunately, they can't do the same for
resumes -- not until the pm_message_t argument is also passed to the
drivers' resume methods.  That will require a bigger change.

IMO, the whole Power Management framework should have been set up this
way in the first place.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 8bc81bffc195..74d0b9990c73 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1067,7 +1067,7 @@ struct usb_device_driver {
 	void (*disconnect) (struct usb_device *udev);
 
 	int (*suspend) (struct usb_device *udev, pm_message_t message);
-	int (*resume) (struct usb_device *udev);
+	int (*resume) (struct usb_device *udev, pm_message_t message);
 	struct usbdrv_wrap drvwrap;
 	unsigned int supports_autosuspend:1;
 };
-- 
cgit v1.2.2


From 2ffcdb3bdadaf8260986e96384df26c94a6ad42c Mon Sep 17 00:00:00 2001
From: Bryan Wu <cooloney@kernel.org>
Date: Tue, 2 Dec 2008 21:33:43 +0200
Subject: USB: musb: use new platform data interface of musb to replace old one

Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/musb.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 630962c04ca4..d6aad0ea6033 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -47,6 +47,11 @@ struct musb_hdrc_config {
 	u8		ram_bits;	/* ram address size */
 
 	struct musb_hdrc_eps_bits *eps_bits;
+#ifdef CONFIG_BLACKFIN
+        /* A GPIO controlling VRSEL in Blackfin */
+        unsigned int    gpio_vrsel;
+#endif
+
 };
 
 struct musb_hdrc_platform_data {
-- 
cgit v1.2.2


From 3b23dd6f8a718e5339de4f7d86ce76a078b5f771 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 5 Dec 2008 14:10:34 -0500
Subject: USB: utilize the bus notifiers

This patch (as1185) makes usbcore take advantage of the bus
notifications sent out by the driver core.  Now we can create all our
device and interface attribute files before the device or interface
uevent is broadcast.

A side effect is that we no longer create the endpoint "pseudo"
devices at the same time as a device or interface is registered -- it
seems like a bad idea to try registering an endpoint before the
registration of its parent is complete.  So the routines for creating
and removing endpoint devices have been split out and renamed, and
they are called explicitly when needed.  A new bitflag is used for
keeping track of whether or not the interface's endpoint devices have
been created, since (just as with the interface attributes) they vary
with the altsetting and hence can be changed at random times.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 74d0b9990c73..e9d63562325a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -108,6 +108,7 @@ enum usb_interface_condition {
  *	(in probe()), bound to a driver, or unbinding (in disconnect())
  * @is_active: flag set when the interface is bound and not suspended.
  * @sysfs_files_created: sysfs attributes exist
+ * @ep_devs_created: endpoint child pseudo-devices exist
  * @unregistering: flag set when the interface is being unregistered
  * @needs_remote_wakeup: flag set when the driver requires remote-wakeup
  *	capability during autosuspend.
@@ -169,6 +170,7 @@ struct usb_interface {
 	enum usb_interface_condition condition;		/* state of binding */
 	unsigned is_active:1;		/* the interface is not suspended */
 	unsigned sysfs_files_created:1;	/* the sysfs attributes exist */
+	unsigned ep_devs_created:1;	/* endpoint "devices" exist */
 	unsigned unregistering:1;	/* unregistration is in progress */
 	unsigned needs_remote_wakeup:1;	/* driver requires remote wakeup */
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
-- 
cgit v1.2.2


From 49367d8f1d9f26482cf7089489e90f0afd0a942c Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Fri, 12 Dec 2008 21:38:45 +0800
Subject: USB: mark "reject" field of struct urb as atomic_t

It is enough to protect accesses to reject field of urb
by marking it as atomic_t,also it is the only reason of
existence of usb_reject_lock,so remove the lock to make
code more clean.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Acked-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index e9d63562325a..4e8654a18250 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1340,7 +1340,7 @@ struct urb {
 	struct kref kref;		/* reference count of the URB */
 	void *hcpriv;			/* private data for host controller */
 	atomic_t use_count;		/* concurrent submissions counter */
-	u8 reject;			/* submissions will fail */
+	atomic_t reject;		/* submissions will fail */
 	int unlinked;			/* unlink error code */
 
 	/* public: documented fields in the urb that can be used by drivers */
-- 
cgit v1.2.2


From 856395d6e137b4e7194972cb7765f3de6a72ba61 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Thu, 18 Dec 2008 09:17:49 +0100
Subject: USB: extension of anchor API to unpoison an anchor

This extension allows unpoisoning an anchor allowing drivers that
resubmit URBs to reuse an anchor for methods like resume()

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 4e8654a18250..e89639896508 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1485,6 +1485,7 @@ extern void usb_poison_urb(struct urb *urb);
 extern void usb_unpoison_urb(struct urb *urb);
 extern void usb_kill_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_poison_anchored_urbs(struct usb_anchor *anchor);
+extern void usb_unpoison_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_unlink_anchored_urbs(struct usb_anchor *anchor);
 extern void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor);
 extern void usb_unanchor_urb(struct urb *urb);
-- 
cgit v1.2.2


From 25ff1c316f6a763f1eefe7f8984b2d8c03888432 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 15 Dec 2008 12:43:41 -0500
Subject: USB: storage: add last-sector hacks

This patch (as1189b) adds some hacks to usb-storage for dealing with
the growing problems involving bad capacity values and last-sector
accesses:

	A new flag, US_FL_CAPACITY_OK, is created to indicate that
	the device is known to report its capacity correctly.  An
	unusual_devs entry for Linux's own File-backed Storage Gadget
	is added with this flag set, since g_file_storage always
	reports the correct capacity and since the capacity need
	not be even (it is determined by the size of the backing
	file).

	An entry in unusual_devs.h which has only the CAPACITY_OK
	flag set shouldn't prejudice libusual, since the device will
	work perfectly well with either usb-storage or ub.  So a
	new macro, COMPLIANT_DEV, is added to let libusual know
	about these entries.

	When a last-sector access succeeds and the total number of
	sectors is odd (the unexpected case, in which guessing that
	the number is even might cause trouble), a WARN is triggered.
	The kerneloops.org project will collect these warnings,
	allowing us to add CAPACITY_OK flags for the devices in
	question before implementing the default-to-even heuristic.
	If users want to prevent the stack dump produced by the WARN,
	they can disable the hack by adding an unusual_devs entry
	for their device with the CAPACITY_OK flag.

	When a last-sector access fails three times in a row and
	neither the FIX_CAPACITY nor the CAPACITY_OK flag is set,
	we assume the last-sector bug is present.  We replace the
	existing status and sense data with values that will cause
	the SCSI core to fail the access immediately rather than
	retry indefinitely.  This should fix the difficulties
	people have been having with Nokia phones.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb_usual.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 998e5cbbf29e..1eea1ab68dc4 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -53,8 +53,10 @@
 		/* Sets max_sectors to arch min */		\
 	US_FLAG(BULK_IGNORE_TAG,0x00004000)			\
 		/* Ignore tag mismatch in bulk operations */    \
-	US_FLAG(SANE_SENSE,     0x00008000)
-		/* Sane Sense (> 18 bytes) */
+	US_FLAG(SANE_SENSE,     0x00008000)			\
+		/* Sane Sense (> 18 bytes) */			\
+	US_FLAG(CAPACITY_OK,	0x00010000)			\
+		/* READ CAPACITY response is correct */
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.2


From 338b67b0c1a97ca705023a8189cf41aa0828d294 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 14 Aug 2008 09:37:34 -0700
Subject: USB: remove warn() macro from usb.h

USB should not be having it's own printk macros, so remove warn() and
use the system-wide standard of dev_warn() wherever possible.  In the
few places that will not work out, use a basic printk().

Now that all in-tree users are gone, remove the macro.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index e89639896508..28f68f587793 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1744,8 +1744,6 @@ extern void usb_unregister_notify(struct notifier_block *nb);
 	format "\n" , ## arg)
 #define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \
 	format "\n" , ## arg)
-#define warn(format, arg...) printk(KERN_WARNING KBUILD_MODNAME ": " \
-	format "\n" , ## arg)
 
 #endif  /* __KERNEL__ */
 
-- 
cgit v1.2.2


From 34c65d82e02147331701c7795e3144d511adf4e9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 18 Aug 2008 13:21:04 -0700
Subject: USB: remove info() macro from usb.h

USB should not be having it's own printk macros, so remove info() and
use the system-wide standard of dev_info() wherever possible.

No one in the tree is using the macro, so it can now be removed.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 28f68f587793..85ee9be9361e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1742,8 +1742,6 @@ extern void usb_unregister_notify(struct notifier_block *nb);
 
 #define err(format, arg...) printk(KERN_ERR KBUILD_MODNAME ": " \
 	format "\n" , ## arg)
-#define info(format, arg...) printk(KERN_INFO KBUILD_MODNAME ": " \
-	format "\n" , ## arg)
 
 #endif  /* __KERNEL__ */
 
-- 
cgit v1.2.2


From 5e07878787ad07361571150230cc3a8d522ae046 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:39 -0800
Subject: debugfs: add helpers for exporting a size_t simple value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the same spirit as debugfs_create_*(), introduce helpers for
exporting size_t values over debugfs.

The only trick done is that the format verifier is kept at %llu
instead of %zu; otherwise type warnings would pop up:

format ‘%zu’ expects type ‘size_t’, but argument 2 has type ‘long long unsigned int’

There is no real way to fix this one--however, we can consider %llu
and %zu to be compatible if we consider that we are using the same for
validating in debugfs_create_{x,u}{8,16,32}().

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/debugfs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index e1a6c046cea3..23936b16426b 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -63,6 +63,8 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode,
 				  struct dentry *parent, u16 *value);
 struct dentry *debugfs_create_x32(const char *name, mode_t mode,
 				  struct dentry *parent, u32 *value);
+struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+				     struct dentry *parent, size_t *value);
 struct dentry *debugfs_create_bool(const char *name, mode_t mode,
 				  struct dentry *parent, u32 *value);
 
-- 
cgit v1.2.2


From ace22f0881e1333d0c55ddf484e5352fe03a806a Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:33 -0800
Subject: wimax: headers for kernel API and user space interaction

Definitions for the user/kernel API protocol through generic
netlink. User space can copy it verbatim and use it.

Kernel API definition declares the main data types and calls for the
drivers to integrate into the WiMAX stack. Provides usage
documentation.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/wimax.h | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 include/linux/wimax.h

(limited to 'include/linux')

diff --git a/include/linux/wimax.h b/include/linux/wimax.h
new file mode 100644
index 000000000000..c89de7f4e5b9
--- /dev/null
+++ b/include/linux/wimax.h
@@ -0,0 +1,234 @@
+/*
+ * Linux WiMax
+ * API for user space
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * This file declares the user/kernel protocol that is spoken over
+ * Generic Netlink, as well as any type declaration that is to be used
+ * by kernel and user space.
+ *
+ * It is intended for user space to clone it verbatim to use it as a
+ * primary reference for definitions.
+ *
+ * Stuff intended for kernel usage as well as full protocol and stack
+ * documentation is rooted in include/net/wimax.h.
+ */
+
+#ifndef __LINUX__WIMAX_H__
+#define __LINUX__WIMAX_H__
+
+#include <linux/types.h>
+
+enum {
+	/**
+	 * Version of the interface (unsigned decimal, MMm, max 25.5)
+	 * M - Major: change if removing or modifying an existing call.
+	 * m - minor: change when adding a new call
+	 */
+	WIMAX_GNL_VERSION = 00,
+	/* Generic NetLink attributes */
+	WIMAX_GNL_ATTR_INVALID = 0x00,
+	WIMAX_GNL_ATTR_MAX = 10,
+};
+
+
+/*
+ * Generic NetLink operations
+ *
+ * Most of these map to an API call; _OP_ stands for operation, _RP_
+ * for reply and _RE_ for report (aka: signal).
+ */
+enum {
+	WIMAX_GNL_OP_MSG_FROM_USER,	/* User to kernel message */
+	WIMAX_GNL_OP_MSG_TO_USER,	/* Kernel to user message */
+	WIMAX_GNL_OP_RFKILL,	/* Run wimax_rfkill() */
+	WIMAX_GNL_OP_RESET,	/* Run wimax_rfkill() */
+	WIMAX_GNL_RE_STATE_CHANGE,	/* Report: status change */
+};
+
+
+/* Message from user / to user */
+enum {
+	WIMAX_GNL_MSG_IFIDX = 1,
+	WIMAX_GNL_MSG_PIPE_NAME,
+	WIMAX_GNL_MSG_DATA,
+};
+
+
+/*
+ * wimax_rfkill()
+ *
+ * The state of the radio (ON/OFF) is mapped to the rfkill subsystem's
+ * switch state (DISABLED/ENABLED).
+ */
+enum wimax_rf_state {
+	WIMAX_RF_OFF = 0,	/* Radio is off, rfkill on/enabled */
+	WIMAX_RF_ON = 1,	/* Radio is on, rfkill off/disabled */
+	WIMAX_RF_QUERY = 2,
+};
+
+/* Attributes */
+enum {
+	WIMAX_GNL_RFKILL_IFIDX = 1,
+	WIMAX_GNL_RFKILL_STATE,
+};
+
+
+/* Attributes for wimax_reset() */
+enum {
+	WIMAX_GNL_RESET_IFIDX = 1,
+};
+
+
+/*
+ * Attributes for the Report State Change
+ *
+ * For now we just have the old and new states; new attributes might
+ * be added later on.
+ */
+enum {
+	WIMAX_GNL_STCH_IFIDX = 1,
+	WIMAX_GNL_STCH_STATE_OLD,
+	WIMAX_GNL_STCH_STATE_NEW,
+};
+
+
+/**
+ * enum wimax_st - The different states of a WiMAX device
+ * @__WIMAX_ST_NULL: The device structure has been allocated and zeroed,
+ *     but still wimax_dev_add() hasn't been called. There is no state.
+ *
+ * @WIMAX_ST_DOWN: The device has been registered with the WiMAX and
+ *     networking stacks, but it is not initialized (normally that is
+ *     done with 'ifconfig DEV up' [or equivalent], which can upload
+ *     firmware and enable communications with the device).
+ *     In this state, the device is powered down and using as less
+ *     power as possible.
+ *     This state is the default after a call to wimax_dev_add(). It
+ *     is ok to have drivers move directly to %WIMAX_ST_UNINITIALIZED
+ *     or %WIMAX_ST_RADIO_OFF in _probe() after the call to
+ *     wimax_dev_add().
+ *     It is recommended that the driver leaves this state when
+ *     calling 'ifconfig DEV up' and enters it back on 'ifconfig DEV
+ *     down'.
+ *
+ * @__WIMAX_ST_QUIESCING: The device is being torn down, so no API
+ *     operations are allowed to proceed except the ones needed to
+ *     complete the device clean up process.
+ *
+ * @WIMAX_ST_UNINITIALIZED: [optional] Communication with the device
+ *     is setup, but the device still requires some configuration
+ *     before being operational.
+ *     Some WiMAX API calls might work.
+ *
+ * @WIMAX_ST_RADIO_OFF: The device is fully up; radio is off (wether
+ *     by hardware or software switches).
+ *     It is recommended to always leave the device in this state
+ *     after initialization.
+ *
+ * @WIMAX_ST_READY: The device is fully up and radio is on.
+ *
+ * @WIMAX_ST_SCANNING: [optional] The device has been instructed to
+ *     scan. In this state, the device cannot be actively connected to
+ *     a network.
+ *
+ * @WIMAX_ST_CONNECTING: The device is connecting to a network. This
+ *     state exists because in some devices, the connect process can
+ *     include a number of negotiations between user space, kernel
+ *     space and the device. User space needs to know what the device
+ *     is doing. If the connect sequence in a device is atomic and
+ *     fast, the device can transition directly to CONNECTED
+ *
+ * @WIMAX_ST_CONNECTED: The device is connected to a network.
+ *
+ * @__WIMAX_ST_INVALID: This is an invalid state used to mark the
+ *     maximum numeric value of states.
+ *
+ * Description:
+ *
+ * Transitions from one state to another one are atomic and can only
+ * be caused in kernel space with wimax_state_change(). To read the
+ * state, use wimax_state_get().
+ *
+ * States starting with __ are internal and shall not be used or
+ * referred to by drivers or userspace. They look ugly, but that's the
+ * point -- if any use is made non-internal to the stack, it is easier
+ * to catch on review.
+ *
+ * All API operations [with well defined exceptions] will take the
+ * device mutex before starting and then check the state. If the state
+ * is %__WIMAX_ST_NULL, %WIMAX_ST_DOWN, %WIMAX_ST_UNINITIALIZED or
+ * %__WIMAX_ST_QUIESCING, it will drop the lock and quit with
+ * -%EINVAL, -%ENOMEDIUM, -%ENOTCONN or -%ESHUTDOWN.
+ *
+ * The order of the definitions is important, so we can do numerical
+ * comparisons (eg: < %WIMAX_ST_RADIO_OFF means the device is not ready
+ * to operate).
+ */
+/*
+ * The allowed state transitions are described in the table below
+ * (states in rows can go to states in columns where there is an X):
+ *
+ *                                  UNINI   RADIO READY SCAN CONNEC CONNEC
+ *             NULL DOWN QUIESCING TIALIZED  OFF        NING  TING   TED
+ * NULL         -    x
+ * DOWN              -      x        x       x
+ * QUIESCING         x      -
+ * UNINITIALIZED            x        -       x
+ * RADIO_OFF                x                -     x
+ * READY                    x                x     -     x     x      x
+ * SCANNING                 x                x     x     -     x      x
+ * CONNECTING               x                x     x     x     -      x
+ * CONNECTED                x                x     x                  -
+ *
+ * This table not available in kernel-doc because the formatting messes it up.
+ */
+ enum wimax_st {
+	__WIMAX_ST_NULL = 0,
+	WIMAX_ST_DOWN,
+	__WIMAX_ST_QUIESCING,
+	WIMAX_ST_UNINITIALIZED,
+	WIMAX_ST_RADIO_OFF,
+	WIMAX_ST_READY,
+	WIMAX_ST_SCANNING,
+	WIMAX_ST_CONNECTING,
+	WIMAX_ST_CONNECTED,
+	__WIMAX_ST_INVALID			/* Always keep last */
+};
+
+
+#endif /* #ifndef __LINUX__WIMAX_H__ */
-- 
cgit v1.2.2


From ea912f4e7f264981faf8665cfb63d46d7f948117 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:35 -0800
Subject: wimax: debug macros and debug settings for the WiMAX stack

This file contains a simple debug framework that is used in the stack;
it allows the debug level to be controlled at compile-time (so the
debug code is optimized out) and at run-time (for what wasn't compiled
out).

This is eventually going to be moved to use dynamic_printk(). Just
need to find time to do it.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/wimax/debug.h | 453 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 453 insertions(+)
 create mode 100644 include/linux/wimax/debug.h

(limited to 'include/linux')

diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h
new file mode 100644
index 000000000000..ba0c49399a83
--- /dev/null
+++ b/include/linux/wimax/debug.h
@@ -0,0 +1,453 @@
+/*
+ * Linux WiMAX
+ * Collection of tools to manage debug operations.
+ *
+ *
+ * Copyright (C) 2005-2007 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * Don't #include this file directly, read on!
+ *
+ *
+ * EXECUTING DEBUGGING ACTIONS OR NOT
+ *
+ * The main thing this framework provides is decission power to take a
+ * debug action (like printing a message) if the current debug level
+ * allows it.
+ *
+ * The decission power is at two levels: at compile-time (what does
+ * not make it is compiled out) and at run-time. The run-time
+ * selection is done per-submodule (as they are declared by the user
+ * of the framework).
+ *
+ * A call to d_test(L) (L being the target debug level) returns true
+ * if the action should be taken because the current debug levels
+ * allow it (both compile and run time).
+ *
+ * It follows that a call to d_test() that can be determined to be
+ * always false at compile time will get the code depending on it
+ * compiled out by optimization.
+ *
+ *
+ * DEBUG LEVELS
+ *
+ * It is up to the caller to define how much a debugging level is.
+ *
+ * Convention sets 0 as "no debug" (so an action marked as debug level 0
+ * will always be taken). The increasing debug levels are used for
+ * increased verbosity.
+ *
+ *
+ * USAGE
+ *
+ * Group the code in modules and submodules inside each module [which
+ * in most cases maps to Linux modules and .c files that compose
+ * those].
+ *
+ *
+ * For each module, there is:
+ *
+ *  - a MODULENAME (single word, legal C identifier)
+ *
+ *  - a debug-levels.h header file that declares the list of
+ *    submodules and that is included by all .c files that use
+ *    the debugging tools. The file name can be anything.
+ *
+ *  - some (optional) .c code to manipulate the runtime debug levels
+ *    through debugfs.
+ *
+ * The debug-levels.h file would look like:
+ *
+ *     #ifndef __debug_levels__h__
+ *     #define __debug_levels__h__
+ *
+ *     #define D_MODULENAME modulename
+ *     #define D_MASTER 10
+ *
+ *     #include <linux/wimax/debug.h>
+ *
+ *     enum d_module {
+ *             D_SUBMODULE_DECLARE(submodule_1),
+ *             D_SUBMODULE_DECLARE(submodule_2),
+ *             ...
+ *             D_SUBMODULE_DECLARE(submodule_N)
+ *     };
+ *
+ *     #endif
+ *
+ * D_MASTER is the maximum compile-time debug level; any debug actions
+ * above this will be out. D_MODULENAME is the module name (legal C
+ * identifier), which has to be unique for each module (to avoid
+ * namespace collisions during linkage). Note those #defines need to
+ * be done before #including debug.h
+ *
+ * We declare N different submodules whose debug level can be
+ * independently controlled during runtime.
+ *
+ * In a .c file of the module (and only in one of them), define the
+ * following code:
+ *
+ *     struct d_level D_LEVEL[] = {
+ *             D_SUBMODULE_DEFINE(submodule_1),
+ *             D_SUBMODULE_DEFINE(submodule_2),
+ *             ...
+ *             D_SUBMODULE_DEFINE(submodule_N),
+ *     };
+ *     size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
+ *
+ * Externs for d_level_MODULENAME and d_level_size_MODULENAME are used
+ * and declared in this file using the D_LEVEL and D_LEVEL_SIZE macros
+ * #defined also in this file.
+ *
+ * To manipulate from user space the levels, create a debugfs dentry
+ * and then register each submodule with:
+ *
+ *     result = d_level_register_debugfs("PREFIX_", submodule_X, parent);
+ *     if (result < 0)
+ *            goto error;
+ *
+ * Where PREFIX_ is a name of your chosing. This will create debugfs
+ * file with a single numeric value that can be use to tweak it. To
+ * remove the entires, just use debugfs_remove_recursive() on 'parent'.
+ *
+ * NOTE: remember that even if this will show attached to some
+ *     particular instance of a device, the settings are *global*.
+ *
+ *
+ * On each submodule (for example, .c files), the debug infrastructure
+ * should be included like this:
+ *
+ *     #define D_SUBMODULE submodule_x     // matches one in debug-levels.h
+ *     #include "debug-levels.h"
+ *
+ * after #including all your include files.
+ *
+ *
+ * Now you can use the d_*() macros below [d_test(), d_fnstart(),
+ * d_fnend(), d_printf(), d_dump()].
+ *
+ * If their debug level is greater than D_MASTER, they will be
+ * compiled out.
+ *
+ * If their debug level is lower or equal than D_MASTER but greater
+ * than the current debug level of their submodule, they'll be
+ * ignored.
+ *
+ * Otherwise, the action will be performed.
+ */
+#ifndef __debug__h__
+#define __debug__h__
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+
+/* Backend stuff */
+
+/*
+ * Debug backend: generate a message header from a 'struct device'
+ *
+ * @head: buffer where to place the header
+ * @head_size: length of @head
+ * @dev: pointer to device used to generate a header from. If NULL,
+ *     an empty ("") header is generated.
+ */
+static inline
+void __d_head(char *head, size_t head_size,
+	      struct device *dev)
+{
+	if (dev == NULL)
+		head[0] = 0;
+	else if ((unsigned long)dev < 4096) {
+		printk(KERN_ERR "E: Corrupt dev %p\n", dev);
+		WARN_ON(1);
+	} else
+		snprintf(head, head_size, "%s %s: ",
+			 dev_driver_string(dev), dev->bus_id);
+}
+
+
+/*
+ * Debug backend: log some message if debugging is enabled
+ *
+ * @l: intended debug level
+ * @tag: tag to prefix the message with
+ * @dev: 'struct device' associated to this message
+ * @f: printf-like format and arguments
+ *
+ * Note this is optimized out if it doesn't pass the compile-time
+ * check; however, it is *always* compiled. This is useful to make
+ * sure the printf-like formats and variables are always checked and
+ * they don't get bit rot if you have all the debugging disabled.
+ */
+#define _d_printf(l, tag, dev, f, a...)					\
+do {									\
+	char head[64];							\
+	if (!d_test(l))							\
+		break;							\
+	__d_head(head, sizeof(head), dev);				\
+	printk(KERN_ERR "%s%s%s: " f, head, __func__, tag, ##a);	\
+} while (0)
+
+
+/*
+ * CPP sintatic sugar to generate A_B like symbol names when one of
+ * the arguments is a a preprocessor #define.
+ */
+#define __D_PASTE__(varname, modulename) varname##_##modulename
+#define __D_PASTE(varname, modulename) (__D_PASTE__(varname, modulename))
+#define _D_SUBMODULE_INDEX(_name) (D_SUBMODULE_DECLARE(_name))
+
+
+/*
+ * Store a submodule's runtime debug level and name
+ */
+struct d_level {
+	u8 level;
+	const char *name;
+};
+
+
+/*
+ * List of available submodules and their debug levels
+ *
+ * We call them d_level_MODULENAME and d_level_size_MODULENAME; the
+ * macros D_LEVEL and D_LEVEL_SIZE contain the name already for
+ * convenience.
+ *
+ * This array and the size are defined on some .c file that is part of
+ * the current module.
+ */
+#define D_LEVEL __D_PASTE(d_level, D_MODULENAME)
+#define D_LEVEL_SIZE __D_PASTE(d_level_size, D_MODULENAME)
+
+extern struct d_level D_LEVEL[];
+extern size_t D_LEVEL_SIZE;
+
+
+/*
+ * Frontend stuff
+ *
+ *
+ * Stuff you need to declare prior to using the actual "debug" actions
+ * (defined below).
+ */
+
+#ifndef D_MODULENAME
+#error D_MODULENAME is not defined in your debug-levels.h file
+/**
+ * D_MODULE - Name of the current module
+ *
+ * #define in your module's debug-levels.h, making sure it is
+ * unique. This has to be a legal C identifier.
+ */
+#define D_MODULENAME undefined_modulename
+#endif
+
+
+#ifndef D_MASTER
+#warning D_MASTER not defined, but debug.h included! [see docs]
+/**
+ * D_MASTER - Compile time maximum debug level
+ *
+ * #define in your debug-levels.h file to the maximum debug level the
+ * runtime code will be allowed to have. This allows you to provide a
+ * main knob.
+ *
+ * Anything above that level will be optimized out of the compile.
+ *
+ * Defaults to zero (no debug code compiled in).
+ *
+ * Maximum one definition per module (at the debug-levels.h file).
+ */
+#define D_MASTER 0
+#endif
+
+#ifndef D_SUBMODULE
+#error D_SUBMODULE not defined, but debug.h included! [see docs]
+/**
+ * D_SUBMODULE - Name of the current submodule
+ *
+ * #define in your submodule .c file before #including debug-levels.h
+ * to the name of the current submodule as previously declared and
+ * defined with D_SUBMODULE_DECLARE() (in your module's
+ * debug-levels.h) and D_SUBMODULE_DEFINE().
+ *
+ * This is used to provide runtime-control over the debug levels.
+ *
+ * Maximum one per .c file! Can be shared among different .c files
+ * (meaning they belong to the same submodule categorization).
+ */
+#define D_SUBMODULE undefined_module
+#endif
+
+
+/**
+ * D_SUBMODULE_DECLARE - Declare a submodule for runtime debug level control
+ *
+ * @_name: name of the submodule, restricted to the chars that make up a
+ *     valid C identifier ([a-zA-Z0-9_]).
+ *
+ * Declare in the module's debug-levels.h header file as:
+ *
+ * enum d_module {
+ *         D_SUBMODULE_DECLARE(submodule_1),
+ *         D_SUBMODULE_DECLARE(submodule_2),
+ *         D_SUBMODULE_DECLARE(submodule_3),
+ * };
+ *
+ * Some corresponding .c file needs to have a matching
+ * D_SUBMODULE_DEFINE().
+ */
+#define D_SUBMODULE_DECLARE(_name) __D_SUBMODULE_##_name
+
+
+/**
+ * D_SUBMODULE_DEFINE - Define a submodule for runtime debug level control
+ *
+ * @_name: name of the submodule, restricted to the chars that make up a
+ *     valid C identifier ([a-zA-Z0-9_]).
+ *
+ * Use once per module (in some .c file) as:
+ *
+ * static
+ * struct d_level d_level_SUBMODULENAME[] = {
+ *         D_SUBMODULE_DEFINE(submodule_1),
+ *         D_SUBMODULE_DEFINE(submodule_2),
+ *         D_SUBMODULE_DEFINE(submodule_3),
+ * };
+ * size_t d_level_size_SUBDMODULENAME = ARRAY_SIZE(d_level_SUBDMODULENAME);
+ *
+ * Matching D_SUBMODULE_DECLARE()s have to be present in a
+ * debug-levels.h header file.
+ */
+#define D_SUBMODULE_DEFINE(_name)		\
+[__D_SUBMODULE_##_name] = {			\
+	.level = 0,				\
+	.name = #_name				\
+}
+
+
+
+/* The actual "debug" operations */
+
+
+/**
+ * d_test - Returns true if debugging should be enabled
+ *
+ * @l: intended debug level (unsigned)
+ *
+ * If the master debug switch is enabled and the current settings are
+ * higher or equal to the requested level, then debugging
+ * output/actions should be enabled.
+ *
+ * NOTE:
+ *
+ * This needs to be coded so that it can be evaluated in compile
+ * time; this is why the ugly BUG_ON() is placed in there, so the
+ * D_MASTER evaluation compiles all out if it is compile-time false.
+ */
+#define d_test(l)							\
+({									\
+	unsigned __l = l;	/* type enforcer */			\
+	(D_MASTER) >= __l						\
+	&& ({								\
+		BUG_ON(_D_SUBMODULE_INDEX(D_SUBMODULE) >= D_LEVEL_SIZE);\
+		D_LEVEL[_D_SUBMODULE_INDEX(D_SUBMODULE)].level >= __l;	\
+	});								\
+})
+
+
+/**
+ * d_fnstart - log message at function start if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_fnstart(l, _dev, f, a...) _d_printf(l, " FNSTART", _dev, f, ## a)
+
+
+/**
+ * d_fnend - log message at function end if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_fnend(l, _dev, f, a...) _d_printf(l, " FNEND", _dev, f, ## a)
+
+
+/**
+ * d_printf - log message if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_printf(l, _dev, f, a...) _d_printf(l, "", _dev, f, ## a)
+
+
+/**
+ * d_dump - log buffer hex dump if debugging enabled
+ *
+ * @l: intended debug level
+ * @_dev: 'struct device' pointer, NULL if none (for context)
+ * @f: printf-like format and arguments
+ */
+#define d_dump(l, dev, ptr, size)			\
+do {							\
+	char head[64];					\
+	if (!d_test(l))					\
+		break;					\
+	__d_head(head, sizeof(head), dev);		\
+	print_hex_dump(KERN_ERR, head, 0, 16, 1,	\
+		       ((void *) ptr), (size), 0);	\
+} while (0)
+
+
+/**
+ * Export a submodule's debug level over debugfs as PREFIXSUBMODULE
+ *
+ * @prefix: string to prefix the name with
+ * @submodule: name of submodule (not a string, just the name)
+ * @dentry: debugfs parent dentry
+ *
+ * Returns: 0 if ok, < 0 errno on error.
+ *
+ * For removing, just use debugfs_remove_recursive() on the parent.
+ */
+#define d_level_register_debugfs(prefix, name, parent)			\
+({									\
+	int rc;								\
+	struct dentry *fd;						\
+	struct dentry *verify_parent_type = parent;			\
+	fd = debugfs_create_u8(						\
+		prefix #name, 0600, verify_parent_type,			\
+		&(D_LEVEL[__D_SUBMODULE_ ## name].level));		\
+	rc = PTR_ERR(fd);						\
+	if (IS_ERR(fd) && rc != -ENODEV)				\
+		printk(KERN_ERR "%s: Can't create debugfs entry %s: "	\
+		       "%d\n", __func__, prefix #name, rc);		\
+	else								\
+		rc = 0;							\
+	rc;								\
+})
+
+
+#endif /* #ifndef __debug__h__ */
-- 
cgit v1.2.2


From ea24652d253eabfb83e955e55ce032228d9d99b9 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:43 -0800
Subject: i2400m: host/device procotol and core driver definitions

The wimax/i2400m.h defines the structures and constants for the
host-device protocols:

 - boot / firmware upload protocol

 - general data transport protocol

 - control protocol

It is done in such a way that can also be used verbatim by user space.

drivers/net/wimax/i2400m.h defines all the APIs used by the core,
bus-generic driver (i2400m) and the bus specific drivers
(i2400m-BUSNAME). It also gives a roadmap to the driver
implementation.

debug-levels.h adds the core driver's debug settings.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/wimax/i2400m.h | 512 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 512 insertions(+)
 create mode 100644 include/linux/wimax/i2400m.h

(limited to 'include/linux')

diff --git a/include/linux/wimax/i2400m.h b/include/linux/wimax/i2400m.h
new file mode 100644
index 000000000000..74198f5bb4dc
--- /dev/null
+++ b/include/linux/wimax/i2400m.h
@@ -0,0 +1,512 @@
+/*
+ * Intel Wireless WiMax Connection 2400m
+ * Host-Device protocol interface definitions
+ *
+ *
+ * Copyright (C) 2007-2008 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Intel Corporation <linux-wimax@intel.com>
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *  - Initial implementation
+ *
+ *
+ * This header defines the data structures and constants used to
+ * communicate with the device.
+ *
+ * BOOTMODE/BOOTROM/FIRMWARE UPLOAD PROTOCOL
+ *
+ * The firmware upload protocol is quite simple and only requires a
+ * handful of commands. See drivers/net/wimax/i2400m/fw.c for more
+ * details.
+ *
+ * The BCF data structure is for the firmware file header.
+ *
+ *
+ * THE DATA / CONTROL PROTOCOL
+ *
+ * This is the normal protocol spoken with the device once the
+ * firmware is uploaded. It transports data payloads and control
+ * messages back and forth.
+ *
+ * It consists 'messages' that pack one or more payloads each. The
+ * format is described in detail in drivers/net/wimax/i2400m/rx.c and
+ * tx.c.
+ *
+ *
+ * THE L3L4 PROTOCOL
+ *
+ * The term L3L4 refers to Layer 3 (the device), Layer 4 (the
+ * driver/host software).
+ *
+ * This is the control protocol used by the host to control the i2400m
+ * device (scan, connect, disconnect...). This is sent to / received
+ * as control frames. These frames consist of a header and zero or
+ * more TLVs with information. We call each control frame a "message".
+ *
+ * Each message is composed of:
+ *
+ * HEADER
+ * [TLV0 + PAYLOAD0]
+ * [TLV1 + PAYLOAD1]
+ * [...]
+ * [TLVN + PAYLOADN]
+ *
+ * The HEADER is defined by 'struct i2400m_l3l4_hdr'. The payloads are
+ * defined by a TLV structure (Type Length Value) which is a 'header'
+ * (struct i2400m_tlv_hdr) and then the payload.
+ *
+ * All integers are represented as Little Endian.
+ *
+ * - REQUESTS AND EVENTS
+ *
+ * The requests can be clasified as follows:
+ *
+ *   COMMAND:  implies a request from the host to the device requesting
+ *             an action being performed. The device will reply with a
+ *             message (with the same type as the command), status and
+ *             no (TLV) payload. Execution of a command might cause
+ *             events (of different type) to be sent later on as
+ *             device's state changes.
+ *
+ *   GET/SET:  similar to COMMAND, but will not cause other
+ *             EVENTs. The reply, in the case of GET, will contain
+ *             TLVs with the requested information.
+ *
+ *   EVENT:    asynchronous messages sent from the device, maybe as a
+ *             consequence of previous COMMANDs but disassociated from
+ *             them.
+ *
+ * Only one request might be pending at the same time (ie: don't
+ * parallelize nor post another GET request before the previous
+ * COMMAND has been acknowledged with it's corresponding reply by the
+ * device).
+ *
+ * The different requests and their formats are described below:
+ *
+ *  I2400M_MT_*   Message types
+ *  I2400M_MS_*   Message status (for replies, events)
+ *  i2400m_tlv_*  TLVs
+ *
+ * data types are named 'struct i2400m_msg_OPNAME', OPNAME matching the
+ * operation.
+ */
+
+#ifndef __LINUX__WIMAX__I2400M_H__
+#define __LINUX__WIMAX__I2400M_H__
+
+#include <linux/types.h>
+
+
+/*
+ * Host Device Interface (HDI) common to all busses
+ */
+
+/* Boot-mode (firmware upload mode) commands */
+
+/* Header for the firmware file */
+struct i2400m_bcf_hdr {
+	__le32 module_type;
+	__le32 header_len;
+	__le32 header_version;
+	__le32 module_id;
+	__le32 module_vendor;
+	__le32 date;		/* BCD YYYMMDD */
+	__le32 size;
+	__le32 key_size;	/* in dwords */
+	__le32 modulus_size;	/* in dwords */
+	__le32 exponent_size;	/* in dwords */
+	__u8 reserved[88];
+} __attribute__ ((packed));
+
+/* Boot mode opcodes */
+enum i2400m_brh_opcode {
+	I2400M_BRH_READ = 1,
+	I2400M_BRH_WRITE = 2,
+	I2400M_BRH_JUMP = 3,
+	I2400M_BRH_SIGNED_JUMP = 8,
+	I2400M_BRH_HASH_PAYLOAD_ONLY = 9,
+};
+
+/* Boot mode command masks and stuff */
+enum i2400m_brh {
+	I2400M_BRH_SIGNATURE = 0xcbbc0000,
+	I2400M_BRH_SIGNATURE_MASK = 0xffff0000,
+	I2400M_BRH_SIGNATURE_SHIFT = 16,
+	I2400M_BRH_OPCODE_MASK = 0x0000000f,
+	I2400M_BRH_RESPONSE_MASK = 0x000000f0,
+	I2400M_BRH_RESPONSE_SHIFT = 4,
+	I2400M_BRH_DIRECT_ACCESS = 0x00000400,
+	I2400M_BRH_RESPONSE_REQUIRED = 0x00000200,
+	I2400M_BRH_USE_CHECKSUM = 0x00000100,
+};
+
+
+/* Constants for bcf->module_id */
+enum i2400m_bcf_mod_id {
+	/* Firmware file carries its own pokes -- pokes are a set of
+	 * magical values that have to be written in certain memory
+	 * addresses to get the device up and ready for firmware
+	 * download when it is in non-signed boot mode. */
+	I2400M_BCF_MOD_ID_POKES = 0x000000001,
+};
+
+
+/**
+ * i2400m_bootrom_header - Header for a boot-mode command
+ *
+ * @cmd: the above command descriptor
+ * @target_addr: where on the device memory should the action be performed.
+ * @data_size: for read/write, amount of data to be read/written
+ * @block_checksum: checksum value (if applicable)
+ * @payload: the beginning of data attached to this header
+ */
+struct i2400m_bootrom_header {
+	__le32 command;		/* Compose with enum i2400_brh */
+	__le32 target_addr;
+	__le32 data_size;
+	__le32 block_checksum;
+	char payload[0];
+} __attribute__ ((packed));
+
+
+/*
+ * Data / control protocol
+ */
+
+/* Packet types for the host-device interface */
+enum i2400m_pt {
+	I2400M_PT_DATA = 0,
+	I2400M_PT_CTRL,
+	I2400M_PT_TRACE,	/* For device debug */
+	I2400M_PT_RESET_WARM,	/* device reset */
+	I2400M_PT_RESET_COLD,	/* USB[transport] reset, like reconnect */
+	I2400M_PT_ILLEGAL
+};
+
+
+/*
+ * Payload for a data packet
+ *
+ * This is prefixed to each and every outgoing DATA type.
+ */
+struct i2400m_pl_data_hdr {
+	__le32 reserved;
+} __attribute__((packed));
+
+
+/* Misc constants */
+enum {
+	I2400M_PL_PAD = 16,	/* Payload data size alignment */
+	I2400M_PL_SIZE_MAX = 0x3EFF,
+	I2400M_MAX_PLS_IN_MSG = 60,
+	/* protocol barkers: sync sequences; for notifications they
+	 * are sent in groups of four. */
+	I2400M_H2D_PREVIEW_BARKER = 0xcafe900d,
+	I2400M_COLD_RESET_BARKER = 0xc01dc01d,
+	I2400M_WARM_RESET_BARKER = 0x50f750f7,
+	I2400M_NBOOT_BARKER = 0xdeadbeef,
+	I2400M_SBOOT_BARKER = 0x0ff1c1a1,
+	I2400M_ACK_BARKER = 0xfeedbabe,
+	I2400M_D2H_MSG_BARKER = 0xbeefbabe,
+};
+
+
+/*
+ * Hardware payload descriptor
+ *
+ * Bitfields encoded in a struct to enforce typing semantics.
+ *
+ * Look in rx.c and tx.c for a full description of the format.
+ */
+struct i2400m_pld {
+	__le32 val;
+} __attribute__ ((packed));
+
+#define I2400M_PLD_SIZE_MASK 0x00003fff
+#define I2400M_PLD_TYPE_SHIFT 16
+#define I2400M_PLD_TYPE_MASK 0x000f0000
+
+/*
+ * Header for a TX message or RX message
+ *
+ * @barker: preamble
+ * @size: used for management of the FIFO queue buffer; before
+ *     sending, this is converted to be a real preamble. This
+ *     indicates the real size of the TX message that starts at this
+ *     point. If the highest bit is set, then this message is to be
+ *     skipped.
+ * @sequence: sequence number of this message
+ * @offset: offset where the message itself starts -- see the comments
+ *     in the file header about message header and payload descriptor
+ *     alignment.
+ * @num_pls: number of payloads in this message
+ * @padding: amount of padding bytes at the end of the message to make
+ *           it be of block-size aligned
+ *
+ * Look in rx.c and tx.c for a full description of the format.
+ */
+struct i2400m_msg_hdr {
+	union {
+		__le32 barker;
+		__u32 size;	/* same size type as barker!! */
+	};
+	union {
+		__le32 sequence;
+		__u32 offset;	/* same size type as barker!! */
+	};
+	__le16 num_pls;
+	__le16 rsv1;
+	__le16 padding;
+	__le16 rsv2;
+	struct i2400m_pld pld[0];
+} __attribute__ ((packed));
+
+
+
+/*
+ * L3/L4 control protocol
+ */
+
+enum {
+	/* Interface version */
+	I2400M_L3L4_VERSION             = 0x0100,
+};
+
+/* Message types */
+enum i2400m_mt {
+	I2400M_MT_RESERVED              = 0x0000,
+	I2400M_MT_INVALID               = 0xffff,
+	I2400M_MT_REPORT_MASK		= 0x8000,
+
+	I2400M_MT_GET_SCAN_RESULT  	= 0x4202,
+	I2400M_MT_SET_SCAN_PARAM   	= 0x4402,
+	I2400M_MT_CMD_RF_CONTROL   	= 0x4602,
+	I2400M_MT_CMD_SCAN         	= 0x4603,
+	I2400M_MT_CMD_CONNECT      	= 0x4604,
+	I2400M_MT_CMD_DISCONNECT   	= 0x4605,
+	I2400M_MT_CMD_EXIT_IDLE   	= 0x4606,
+	I2400M_MT_GET_LM_VERSION   	= 0x5201,
+	I2400M_MT_GET_DEVICE_INFO  	= 0x5202,
+	I2400M_MT_GET_LINK_STATUS  	= 0x5203,
+	I2400M_MT_GET_STATISTICS   	= 0x5204,
+	I2400M_MT_GET_STATE        	= 0x5205,
+	I2400M_MT_GET_MEDIA_STATUS	= 0x5206,
+	I2400M_MT_SET_INIT_CONFIG	= 0x5404,
+	I2400M_MT_CMD_INIT	        = 0x5601,
+	I2400M_MT_CMD_TERMINATE		= 0x5602,
+	I2400M_MT_CMD_MODE_OF_OP	= 0x5603,
+	I2400M_MT_CMD_RESET_DEVICE	= 0x5604,
+	I2400M_MT_CMD_MONITOR_CONTROL   = 0x5605,
+	I2400M_MT_CMD_ENTER_POWERSAVE   = 0x5606,
+	I2400M_MT_GET_TLS_OPERATION_RESULT = 0x6201,
+	I2400M_MT_SET_EAP_SUCCESS       = 0x6402,
+	I2400M_MT_SET_EAP_FAIL          = 0x6403,
+	I2400M_MT_SET_EAP_KEY          	= 0x6404,
+	I2400M_MT_CMD_SEND_EAP_RESPONSE = 0x6602,
+	I2400M_MT_REPORT_SCAN_RESULT    = 0xc002,
+	I2400M_MT_REPORT_STATE		= 0xd002,
+	I2400M_MT_REPORT_POWERSAVE_READY = 0xd005,
+	I2400M_MT_REPORT_EAP_REQUEST    = 0xe002,
+	I2400M_MT_REPORT_EAP_RESTART    = 0xe003,
+	I2400M_MT_REPORT_ALT_ACCEPT    	= 0xe004,
+	I2400M_MT_REPORT_KEY_REQUEST 	= 0xe005,
+};
+
+
+/*
+ * Message Ack Status codes
+ *
+ * When a message is replied-to, this status is reported.
+ */
+enum i2400m_ms {
+	I2400M_MS_DONE_OK                  = 0,
+	I2400M_MS_DONE_IN_PROGRESS         = 1,
+	I2400M_MS_INVALID_OP               = 2,
+	I2400M_MS_BAD_STATE                = 3,
+	I2400M_MS_ILLEGAL_VALUE            = 4,
+	I2400M_MS_MISSING_PARAMS           = 5,
+	I2400M_MS_VERSION_ERROR            = 6,
+	I2400M_MS_ACCESSIBILITY_ERROR      = 7,
+	I2400M_MS_BUSY                     = 8,
+	I2400M_MS_CORRUPTED_TLV            = 9,
+	I2400M_MS_UNINITIALIZED            = 10,
+	I2400M_MS_UNKNOWN_ERROR            = 11,
+	I2400M_MS_PRODUCTION_ERROR         = 12,
+	I2400M_MS_NO_RF                    = 13,
+	I2400M_MS_NOT_READY_FOR_POWERSAVE  = 14,
+	I2400M_MS_THERMAL_CRITICAL         = 15,
+	I2400M_MS_MAX
+};
+
+
+/**
+ * i2400m_tlv - enumeration of the different types of TLVs
+ *
+ * TLVs stand for type-length-value and are the header for a payload
+ * composed of almost anything. Each payload has a type assigned
+ * and a length.
+ */
+enum i2400m_tlv {
+	I2400M_TLV_L4_MESSAGE_VERSIONS = 129,
+	I2400M_TLV_SYSTEM_STATE = 141,
+	I2400M_TLV_MEDIA_STATUS = 161,
+	I2400M_TLV_RF_OPERATION = 162,
+	I2400M_TLV_RF_STATUS = 163,
+	I2400M_TLV_DEVICE_RESET_TYPE = 132,
+	I2400M_TLV_CONFIG_IDLE_PARAMETERS = 601,
+};
+
+
+struct i2400m_tlv_hdr {
+	__le16 type;
+	__le16 length;		/* payload's */
+	__u8   pl[0];
+} __attribute__((packed));
+
+
+struct i2400m_l3l4_hdr {
+	__le16 type;
+	__le16 length;		/* payload's */
+	__le16 version;
+	__le16 resv1;
+	__le16 status;
+	__le16 resv2;
+	struct i2400m_tlv_hdr pl[0];
+} __attribute__((packed));
+
+
+/**
+ * i2400m_system_state - different states of the device
+ */
+enum i2400m_system_state {
+	I2400M_SS_UNINITIALIZED = 1,
+	I2400M_SS_INIT,
+	I2400M_SS_READY,
+	I2400M_SS_SCAN,
+	I2400M_SS_STANDBY,
+	I2400M_SS_CONNECTING,
+	I2400M_SS_WIMAX_CONNECTED,
+	I2400M_SS_DATA_PATH_CONNECTED,
+	I2400M_SS_IDLE,
+	I2400M_SS_DISCONNECTING,
+	I2400M_SS_OUT_OF_ZONE,
+	I2400M_SS_SLEEPACTIVE,
+	I2400M_SS_PRODUCTION,
+	I2400M_SS_CONFIG,
+	I2400M_SS_RF_OFF,
+	I2400M_SS_RF_SHUTDOWN,
+	I2400M_SS_DEVICE_DISCONNECT,
+	I2400M_SS_MAX,
+};
+
+
+/**
+ * i2400m_tlv_system_state - report on the state of the system
+ *
+ * @state: see enum i2400m_system_state
+ */
+struct i2400m_tlv_system_state {
+	struct i2400m_tlv_hdr hdr;
+	__le32 state;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_l4_message_versions {
+	struct i2400m_tlv_hdr hdr;
+	__le16 major;
+	__le16 minor;
+	__le16 branch;
+	__le16 reserved;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_detailed_device_info {
+	struct i2400m_tlv_hdr hdr;
+	__u8 reserved1[400];
+	__u8 mac_address[6];
+	__u8 reserved2[2];
+} __attribute__((packed));
+
+
+enum i2400m_rf_switch_status {
+	I2400M_RF_SWITCH_ON = 1,
+	I2400M_RF_SWITCH_OFF = 2,
+};
+
+struct i2400m_tlv_rf_switches_status {
+	struct i2400m_tlv_hdr hdr;
+	__u8 sw_rf_switch;	/* 1 ON, 2 OFF */
+	__u8 hw_rf_switch;	/* 1 ON, 2 OFF */
+	__u8 reserved[2];
+} __attribute__((packed));
+
+
+enum {
+	i2400m_rf_operation_on = 1,
+	i2400m_rf_operation_off = 2
+};
+
+struct i2400m_tlv_rf_operation {
+	struct i2400m_tlv_hdr hdr;
+	__le32 status;	/* 1 ON, 2 OFF */
+} __attribute__((packed));
+
+
+enum i2400m_tlv_reset_type {
+	I2400M_RESET_TYPE_COLD = 1,
+	I2400M_RESET_TYPE_WARM
+};
+
+struct i2400m_tlv_device_reset_type {
+	struct i2400m_tlv_hdr hdr;
+	__le32 reset_type;
+} __attribute__((packed));
+
+
+struct i2400m_tlv_config_idle_parameters {
+	struct i2400m_tlv_hdr hdr;
+	__le32 idle_timeout;	/* 100 to 300000 ms [5min], 100 increments
+				 * 0 disabled */
+	__le32 idle_paging_interval;	/* frames */
+} __attribute__((packed));
+
+
+enum i2400m_media_status {
+	I2400M_MEDIA_STATUS_LINK_UP = 1,
+	I2400M_MEDIA_STATUS_LINK_DOWN,
+	I2400M_MEDIA_STATUS_LINK_RENEW,
+};
+
+struct i2400m_tlv_media_status {
+	struct i2400m_tlv_hdr hdr;
+	__le32 media_status;
+} __attribute__((packed));
+
+#endif /* #ifndef __LINUX__WIMAX__I2400M_H__ */
-- 
cgit v1.2.2


From e30698743419d20dce03d033761f203b4d847ab0 Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 20 Dec 2008 16:57:59 -0800
Subject: wimax: export linux/wimax.h and linux/wimax/i2400m.h with
 headers_install

These two files are what user space can use to establish communication
with the WiMAX kernel API and to speak the Intel 2400m Wireless WiMAX
connection's control protocol.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/Kbuild       | 2 ++
 include/linux/wimax/Kbuild | 1 +
 2 files changed, 3 insertions(+)
 create mode 100644 include/linux/wimax/Kbuild

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index a3323f337e4d..12e9a2957caf 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -371,3 +371,5 @@ unifdef-y += xattr.h
 unifdef-y += xfrm.h
 
 objhdr-y += version.h
+header-y += wimax.h
+header-y += wimax/
diff --git a/include/linux/wimax/Kbuild b/include/linux/wimax/Kbuild
new file mode 100644
index 000000000000..3cb4f269bb09
--- /dev/null
+++ b/include/linux/wimax/Kbuild
@@ -0,0 +1 @@
+header-y += i2400m.h
-- 
cgit v1.2.2


From f7b7baae6b30ff04124259ff8d7c0c0d281320e6 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 11 Nov 2008 17:17:46 +0800
Subject: PCI: add PCI Advanced Feature Capability defines

PCI Advanced Features Capability is introduced by "Conventional PCI
Advanced Caps ECN" (can be downloaded in pcisig.com).  Add defines for
the various AF capabilities, including function level reset (FLR).

Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_regs.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index e5effd47ed74..7766488470e4 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -210,6 +210,7 @@
 #define  PCI_CAP_ID_AGP3	0x0E	/* AGP Target PCI-PCI bridge */
 #define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
 #define  PCI_CAP_ID_MSIX	0x11	/* MSI-X */
+#define  PCI_CAP_ID_AF		0x13	/* PCI Advanced Features */
 #define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
 #define PCI_CAP_FLAGS		2	/* Capability defined flags (16 bits) */
 #define PCI_CAP_SIZEOF		4
@@ -316,6 +317,17 @@
 #define  PCI_CHSWP_EXT		0x40	/* ENUM# status - extraction */
 #define  PCI_CHSWP_INS		0x80	/* ENUM# status - insertion */
 
+/* PCI Advanced Feature registers */
+
+#define PCI_AF_LENGTH		2
+#define PCI_AF_CAP		3
+#define  PCI_AF_CAP_TP		0x01
+#define  PCI_AF_CAP_FLR		0x02
+#define PCI_AF_CTRL		4
+#define  PCI_AF_CTRL_FLR	0x01
+#define PCI_AF_STATUS		5
+#define  PCI_AF_STATUS_TP	0x01
+
 /* PCI-X registers */
 
 #define PCI_X_CMD		2	/* Modes & Features */
-- 
cgit v1.2.2


From 8b62091e20215730be1b94b7cd135a78a3e692ca Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:40 -0700
Subject: ACPI/PCI: include missing acpi.h file in pci-acpi.h.

The pci-acpi.h file will not compile without including linux/acpi.h.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 8837928fbf33..a9e4c34e9389 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -8,6 +8,8 @@
 #ifndef _PCI_ACPI_H_
 #define _PCI_ACPI_H_
 
+#include <linux/acpi.h>
+
 #define OSC_QUERY_TYPE			0
 #define OSC_SUPPORT_TYPE 		1
 #define OSC_CONTROL_TYPE		2
-- 
cgit v1.2.2


From 990a7ac5645883a833a11b900bb6f25b65dea65b Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:45 -0700
Subject: ACPI/PCI: call _OSC support during root bridge discovery

Add pci_acpi_osc_support() and call it when a PCI bridge is added.  This
allows us to avoid having every individual PCI root bridge driver call
_OSC support for every root bridge in their probe functions, a
significant savings in boot time.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-acpi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index a9e4c34e9389..424f06f84cab 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -51,6 +51,7 @@
 #ifdef CONFIG_ACPI
 extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags);
 extern acpi_status __pci_osc_support_set(u32 flags, const char *hid);
+int pci_acpi_osc_support(acpi_handle handle, u32 flags);
 static inline acpi_status pci_osc_support_set(u32 flags)
 {
 	return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING);
-- 
cgit v1.2.2


From 0ef5f8f6159e44b4faa997be08d1a3bcbf44ad08 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:50 -0700
Subject: ACPI/PCI: PCI extended config _OSC support called when root bridge
 added

The _OSC capability OSC_EXT_PCI_CONFIG_SUPPORT is set when the root
bridge is added with pci_acpi_osc_support() if we can access PCI
extended config space.

This adds the function pci_ext_cfg_avail which returns true if we can
access PCI extended config space (offset greater than 0xff). It
currently only returns false if arch=x86 and raw_pci_ext_ops is not set
(which might happen if pci=nommcfg is set on the kernel command-line).

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4bb156ba854a..6fd47654ca4e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1140,6 +1140,8 @@ static inline void pci_mmcfg_early_init(void) { }
 static inline void pci_mmcfg_late_init(void) { }
 #endif
 
+int pci_ext_cfg_avail(struct pci_dev *dev);
+
 #ifdef CONFIG_HAS_IOMEM
 static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
 {
-- 
cgit v1.2.2


From 3e1b16002af29758b6bc9c38939d43838d9335bc Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:30:55 -0700
Subject: ACPI/PCI: PCIe ASPM _OSC support capabilities called when root bridge
 added

The _OSC capabilities OSC_ACTIVE_STATE_PWR_SUPPORT and
OSC_CLOCK_PWR_CAPABILITY_SUPPORT are set when the root bridge is added
with pci_acpi_osc_support(), so we no longer need to do it in the ASPM
driver.  Also add the function pcie_aspm_enabled, which returns true if
pcie_aspm=off is not on the kernel command-line.

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6fd47654ca4e..eae97a2bf603 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -791,6 +791,15 @@ extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
 extern void pci_restore_msi_state(struct pci_dev *dev);
 #endif
 
+#ifndef CONFIG_PCIEASPM
+static inline int pcie_aspm_enabled(void)
+{
+	return 0;
+}
+#else
+extern int pcie_aspm_enabled(void);
+#endif
+
 #ifdef CONFIG_HT_IRQ
 /* The functions a driver should call */
 int  ht_create_irq(struct pci_dev *dev, int idx);
-- 
cgit v1.2.2


From 07ae95f988a34465bdcb384bfa73c03424fe2312 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:31:05 -0700
Subject: ACPI/PCI: PCI MSI _OSC support capabilities called when root bridge
 added

The _OSC capability OSC_MSI_SUPPORT is set when the root bridge is added
with pci_acpi_osc_support(), so we no longer need to do it in the PCI
MSI driver.  Also adds the function pci_msi_enabled, which returns true
if pci=nomsi is not on the kernel command-line.

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index eae97a2bf603..59a3dc2059d3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -779,6 +779,10 @@ static inline void msi_remove_pci_irq_vectors(struct pci_dev *dev)
 
 static inline void pci_restore_msi_state(struct pci_dev *dev)
 { }
+static inline int pci_msi_enabled(void)
+{
+	return 0;
+}
 #else
 extern int pci_enable_msi(struct pci_dev *dev);
 extern void pci_msi_shutdown(struct pci_dev *dev);
@@ -789,6 +793,7 @@ extern void pci_msix_shutdown(struct pci_dev *dev);
 extern void pci_disable_msix(struct pci_dev *dev);
 extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
 extern void pci_restore_msi_state(struct pci_dev *dev);
+extern int pci_msi_enabled(void);
 #endif
 
 #ifndef CONFIG_PCIEASPM
-- 
cgit v1.2.2


From 23616941914917cf25b94789856b5326b68d8ee8 Mon Sep 17 00:00:00 2001
From: Andrew Patterson <andrew.patterson@hp.com>
Date: Mon, 10 Nov 2008 15:31:10 -0700
Subject: ACPI/PCI: remove obsolete _OSC capability support functions

The acpi_query_osc, __pci_osc_support_set, pci_osc_support_set, and
pcie_osc_support_set functions have been obsoleted in favor of setting
these capabilities during root bridge discovery with
pci_acpi_osc_support.  There are no longer any callers of these
functions, so remove them.

Signed-off-by: Andrew Patterson <andrew.patterson@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-acpi.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 424f06f84cab..871e096e0fbc 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -50,16 +50,7 @@
 
 #ifdef CONFIG_ACPI
 extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags);
-extern acpi_status __pci_osc_support_set(u32 flags, const char *hid);
 int pci_acpi_osc_support(acpi_handle handle, u32 flags);
-static inline acpi_status pci_osc_support_set(u32 flags)
-{
-	return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING);
-}
-static inline acpi_status pcie_osc_support_set(u32 flags)
-{
-	return __pci_osc_support_set(flags, PCI_EXPRESS_ROOT_HID_STRING);
-}
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 {
 	/* Find root host bridge */
@@ -76,8 +67,6 @@ typedef u32 		acpi_status;
 #endif    
 static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags)
 {return AE_ERROR;}
-static inline acpi_status pci_osc_support_set(u32 flags) {return AE_ERROR;} 
-static inline acpi_status pcie_osc_support_set(u32 flags) {return AE_ERROR;}
 static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 { return NULL; }
 #endif
-- 
cgit v1.2.2


From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 22 Oct 2008 19:55:31 -0700
Subject: resource: allow MMIO exclusivity for device drivers

Device drivers that use pci_request_regions() (and similar APIs) have a
reasonable expectation that they are the only ones accessing their device.
As part of the e1000e hunt, we were afraid that some userland (X or some
bootsplash stuff) was mapping the MMIO region that the driver thought it
had exclusively via /dev/mem or via various sysfs resource mappings.

This patch adds the option for device drivers to cause their reserved
regions to the "banned from /dev/mem use" list, so now both kernel memory
and device-exclusive MMIO regions are banned.
NOTE: This is only active when CONFIG_STRICT_DEVMEM is set.

In addition to the config option, a kernel parameter iomem=relaxed is
provided for the cases where developers want to diagnose, in the field,
drivers issues from userspace.

Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/ioport.h | 11 ++++++++---
 include/linux/pci.h    |  3 +++
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 041e95aac2bf..f6bb2ca8e3ba 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -49,6 +49,7 @@ struct resource_list {
 #define IORESOURCE_SIZEALIGN	0x00020000	/* size indicates alignment */
 #define IORESOURCE_STARTALIGN	0x00040000	/* start field is alignment */
 
+#define IORESOURCE_EXCLUSIVE	0x08000000	/* Userland may not map this resource */
 #define IORESOURCE_DISABLED	0x10000000
 #define IORESOURCE_UNSET	0x20000000
 #define IORESOURCE_AUTO		0x40000000
@@ -133,13 +134,16 @@ static inline unsigned long resource_type(struct resource *res)
 }
 
 /* Convenience shorthand with allocation */
-#define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name))
-#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name))
+#define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name), 0)
+#define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl)
+#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0)
+#define request_mem_region_exclusive(start,n,name) \
+	__request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE)
 #define rename_region(region, newname) do { (region)->name = (newname); } while (0)
 
 extern struct resource * __request_region(struct resource *,
 					resource_size_t start,
-					resource_size_t n, const char *name);
+					resource_size_t n, const char *name, int relaxed);
 
 /* Compatibility cruft */
 #define release_region(start,n)	__release_region(&ioport_resource, (start), (n))
@@ -175,6 +179,7 @@ extern struct resource * __devm_request_region(struct device *dev,
 extern void __devm_release_region(struct device *dev, struct resource *parent,
 				  resource_size_t start, resource_size_t n);
 extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
+extern int iomem_is_exclusive(u64 addr);
 
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 59a3dc2059d3..bfcb39ca8879 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -686,10 +686,13 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
 		    int (*)(struct pci_dev *, u8, u8));
 #define HAVE_PCI_REQ_REGIONS	2
 int __must_check pci_request_regions(struct pci_dev *, const char *);
+int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *);
 void pci_release_regions(struct pci_dev *);
 int __must_check pci_request_region(struct pci_dev *, int, const char *);
+int __must_check pci_request_region_exclusive(struct pci_dev *, int, const char *);
 void pci_release_region(struct pci_dev *, int);
 int pci_request_selected_regions(struct pci_dev *, int, const char *);
+int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
-- 
cgit v1.2.2


From 57c2cf71c12318b72ebaa5720d210476b6bac4d4 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 11 Dec 2008 11:24:23 -0700
Subject: PCI: add pci_swizzle_interrupt_pin()

This patch adds pci_swizzle_interrupt_pin(), which implements the
INTx swizzling algorithm specified in Table 9-1 of the "PCI-to-PCI
Bridge Architecture Specification," revision 1.2.

There are many architecture-specific implementations of this
swizzle that can be replaced by this common one.

Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index bfcb39ca8879..58357d14f94c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -532,6 +532,7 @@ int __must_check pci_bus_add_device(struct pci_dev *dev);
 void pci_read_bridge_bases(struct pci_bus *child);
 struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 					  struct resource *res);
+u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin);
 int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
 extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
 extern void pci_dev_put(struct pci_dev *dev);
-- 
cgit v1.2.2


From 1684f5ddd4c0c754f52c78eaa2c5c69ad09fb18c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 1 Dec 2008 14:30:30 -0800
Subject: PCI: uninline pci_ioremap_bar()

It's too large to be inlined.

Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 58357d14f94c..0d8bc920c2e5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1160,20 +1160,7 @@ static inline void pci_mmcfg_late_init(void) { }
 
 int pci_ext_cfg_avail(struct pci_dev *dev);
 
-#ifdef CONFIG_HAS_IOMEM
-static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar)
-{
-	/*
-	 * Make sure the BAR is actually a memory resource, not an IO resource
-	 */
-	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
-		WARN_ON(1);
-		return NULL;
-	}
-	return ioremap_nocache(pci_resource_start(pdev, bar),
-				     pci_resource_len(pdev, bar));
-}
-#endif
+void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar);
 
 #endif /* __KERNEL__ */
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.2


From 14add80b5120966fe0659d61815b9e9b4b68fdc5 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Sat, 22 Nov 2008 02:38:52 +0800
Subject: PCI: remove unnecessary arg of pci_update_resource()

This cleanup removes unnecessary argument 'struct resource *res' in
pci_update_resource(), so it takes same arguments as other companion
functions (pci_assign_resource(), etc.).

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0d8bc920c2e5..c5e02f324e13 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -648,7 +648,7 @@ int pcie_get_readrq(struct pci_dev *dev);
 int pcie_set_readrq(struct pci_dev *dev, int rq);
 int pci_reset_function(struct pci_dev *dev);
 int pci_execute_reset_function(struct pci_dev *dev);
-void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno);
+void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int pci_select_bars(struct pci_dev *dev, unsigned long flags);
 
-- 
cgit v1.2.2


From fde09c6d8f92de0c9f75698a75f0989f2234c517 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Sat, 22 Nov 2008 02:39:32 +0800
Subject: PCI: define PCI resource names in an 'enum'

This patch moves all definitions of the PCI resource names to an 'enum',
and also replaces some hard-coded resource variables with symbol
names. This change eases introduction of device specific resources.

Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index c5e02f324e13..da1c22bab40e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -82,7 +82,30 @@ enum pci_mmap_state {
 #define PCI_DMA_FROMDEVICE	2
 #define PCI_DMA_NONE		3
 
-#define DEVICE_COUNT_RESOURCE	12
+/*
+ *  For PCI devices, the region numbers are assigned this way:
+ */
+enum {
+	/* #0-5: standard PCI resources */
+	PCI_STD_RESOURCES,
+	PCI_STD_RESOURCE_END = 5,
+
+	/* #6: expansion ROM resource */
+	PCI_ROM_RESOURCE,
+
+	/* resources assigned to buses behind the bridge */
+#define PCI_BRIDGE_RESOURCE_NUM 4
+
+	PCI_BRIDGE_RESOURCES,
+	PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES +
+				  PCI_BRIDGE_RESOURCE_NUM - 1,
+
+	/* total resources associated with a PCI device */
+	PCI_NUM_RESOURCES,
+
+	/* preserve this for compatibility */
+	DEVICE_COUNT_RESOURCE
+};
 
 typedef int __bitwise pci_power_t;
 
@@ -274,18 +297,6 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev,
 	hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space);
 }
 
-/*
- *  For PCI devices, the region numbers are assigned this way:
- *
- *	0-5	standard PCI regions
- *	6	expansion ROM
- *	7-10	bridges: address space assigned to buses behind the bridge
- */
-
-#define PCI_ROM_RESOURCE	6
-#define PCI_BRIDGE_RESOURCES	7
-#define PCI_NUM_RESOURCES	11
-
 #ifndef PCI_BUS_NUM_RESOURCES
 #define PCI_BUS_NUM_RESOURCES	16
 #endif
-- 
cgit v1.2.2


From e8c331e963c58b83db24b7d0e39e8c07f687dbc6 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Wed, 17 Dec 2008 12:09:12 +0900
Subject: PCI hotplug: introduce functions for ACPI slot detection

Some ACPI related PCI hotplug code can be shared among PCI hotplug
drivers. This patch introduces the following functions in
drivers/pci/hotplug/acpi_pcihp.c to share the code, and changes
acpiphp and pciehp to use them.

- int acpi_pci_detect_ejectable(struct pci_bus *pbus)
  This checks if the specified PCI bus has ejectable slots.

- int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle)
  This checks if the specified handle is ejectable ACPI PCI slot. The
  'pbus' parameter is needed to check if 'handle' is PCI related ACPI
  object.

This patch also introduces the following inline function in
include/linux/pci-acpi.h, which is useful to get ACPI handle of the
PCI bridge from struct pci_bus of the bridge's secondary bus.

- static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
  This returns ACPI handle of the PCI bridge which generates PCI bus
  specified by 'pbus'.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci-acpi.h    | 9 +++++++++
 include/linux/pci_hotplug.h | 2 ++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 871e096e0fbc..042c166f65d5 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -60,6 +60,15 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev)
 	return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus),
 			pdev->bus->number);
 }
+
+static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
+{
+	int seg = pci_domain_nr(pbus), busnr = pbus->number;
+	struct pci_dev *bridge = pbus->self;
+	if (bridge)
+		return DEVICE_ACPI_HANDLE(&(bridge->dev));
+	return acpi_get_pci_rootbridge_handle(seg, busnr);
+}
 #else
 #if !defined(AE_ERROR)
 typedef u32 		acpi_status;
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index a00bd1a0f156..f7cc204fab07 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -228,6 +228,8 @@ extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 				struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_root_bridge(acpi_handle handle);
+int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
+int acpi_pci_detect_ejectable(struct pci_bus *pbus);
 #endif
 #endif
 
-- 
cgit v1.2.2


From 68feac87de15edfc2c700d2d81b814288c93d003 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Tue, 16 Dec 2008 21:36:55 -0700
Subject: PCI: add pci_common_swizzle() for INTx swizzling

This patch adds pci_common_swizzle(), which swizzles INTx values all the
way up to a root bridge.

This common implementation can replace several architecture-specific
ones.  This should someday be combined with pci_get_interrupt_pin(),
but I left it separate for now to make reviewing easier.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index da1c22bab40e..170f9ae2d8a0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -545,6 +545,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 					  struct resource *res);
 u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin);
 int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge);
+u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp);
 extern struct pci_dev *pci_dev_get(struct pci_dev *dev);
 extern void pci_dev_put(struct pci_dev *dev);
 extern void pci_remove_bus(struct pci_bus *b);
-- 
cgit v1.2.2


From 287d19ce2e67c15e79a187b3bdcbbea1a0a51a7d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 18 Dec 2008 09:17:16 -0800
Subject: PCI: revise VPD access interface

Change PCI VPD API which was only used by sysfs to something usable
in drivers.
   * move iteration over multiple words to the low level
   * use conventional types for arguments
   * add exportable wrapper

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 170f9ae2d8a0..76079e106895 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -687,6 +687,10 @@ int pci_back_from_sleep(struct pci_dev *dev);
 /* Functions for PCI Hotplug drivers to use */
 int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 
+/* Vital product data routines */
+ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
+ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
+
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
 void pci_bus_assign_resources(struct pci_bus *bus);
 void pci_bus_size_bridges(struct pci_bus *bus);
-- 
cgit v1.2.2


From db5679437a2b938c9127480a3923633721583a4f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 18 Dec 2008 09:17:16 -0800
Subject: PCI: add interface to set visible size of VPD

The VPD on all devices may not be 32K. Unfortunately, there is no
generic way to find the size, so this adds a simple API hook
to reset it.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 76079e106895..7cbecef19bb6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -690,6 +690,7 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap);
 /* Vital product data routines */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
 ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
+int pci_vpd_truncate(struct pci_dev *dev, size_t size);
 
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
 void pci_bus_assign_resources(struct pci_bus *bus);
-- 
cgit v1.2.2


From 322162a71bd9fc4edb1b11236e7bc8aa27ccac22 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Fri, 19 Dec 2008 15:19:02 +0900
Subject: PCI: pciehp: cleanup register and field definitions

Clean up register definitions related to PCI Express Hot plug.

  - Add register definitions into include/linux/pci_regs.h, and use
    them instead of pciehp's locally definied register definitions.
  - Remove pciehp's locally defined register definitions
  - Remove unused register definitions in pciehp.
  - Some minor cleanups.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_regs.h | 64 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 57 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 7766488470e4..027815b4635e 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -411,20 +411,70 @@
 #define  PCI_EXP_DEVSTA_AUXPD	0x10	/* AUX Power Detected */
 #define  PCI_EXP_DEVSTA_TRPND	0x20	/* Transactions Pending */
 #define PCI_EXP_LNKCAP		12	/* Link Capabilities */
-#define  PCI_EXP_LNKCAP_ASPMS	0xc00	/* ASPM Support */
-#define  PCI_EXP_LNKCAP_L0SEL	0x7000	/* L0s Exit Latency */
-#define  PCI_EXP_LNKCAP_L1EL	0x38000	/* L1 Exit Latency */
-#define  PCI_EXP_LNKCAP_CLKPM	0x40000	/* L1 Clock Power Management */
+#define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
+#define  PCI_EXP_LNKCAP_MLW	0x000003f0 /* Maximum Link Width */
+#define  PCI_EXP_LNKCAP_ASPMS	0x00000c00 /* ASPM Support */
+#define  PCI_EXP_LNKCAP_L0SEL	0x00007000 /* L0s Exit Latency */
+#define  PCI_EXP_LNKCAP_L1EL	0x00038000 /* L1 Exit Latency */
+#define  PCI_EXP_LNKCAP_CLKPM	0x00040000 /* L1 Clock Power Management */
+#define  PCI_EXP_LNKCAP_SDERC	0x00080000 /* Suprise Down Error Reporting Capable */
+#define  PCI_EXP_LNKCAP_DLLLARC	0x00100000 /* Data Link Layer Link Active Reporting Capable */
+#define  PCI_EXP_LNKCAP_LBNC	0x00200000 /* Link Bandwidth Notification Capability */
+#define  PCI_EXP_LNKCAP_PN	0xff000000 /* Port Number */
 #define PCI_EXP_LNKCTL		16	/* Link Control */
-#define  PCI_EXP_LNKCTL_RL	0x20	/* Retrain Link */
-#define  PCI_EXP_LNKCTL_CCC	0x40	/* Common Clock COnfiguration */
+#define  PCI_EXP_LNKCTL_ASPMC	0x0003	/* ASPM Control */
+#define  PCI_EXP_LNKCTL_RCB	0x0008	/* Read Completion Boundary */
+#define  PCI_EXP_LNKCTL_LD	0x0010	/* Link Disable */
+#define  PCI_EXP_LNKCTL_RL	0x0020	/* Retrain Link */
+#define  PCI_EXP_LNKCTL_CCC	0x0040	/* Common Clock Configuration */
+#define  PCI_EXP_LNKCTL_ES	0x0080	/* Extended Synch */
 #define  PCI_EXP_LNKCTL_CLKREQ_EN 0x100	/* Enable clkreq */
+#define  PCI_EXP_LNKCTL_HAWD	0x0200	/* Hardware Autonomous Width Disable */
+#define  PCI_EXP_LNKCTL_LBMIE	0x0400	/* Link Bandwidth Management Interrupt Enable */
+#define  PCI_EXP_LNKCTL_LABIE	0x0800	/* Lnk Autonomous Bandwidth Interrupt Enable */
 #define PCI_EXP_LNKSTA		18	/* Link Status */
-#define  PCI_EXP_LNKSTA_LT	0x800	/* Link Training */
+#define  PCI_EXP_LNKSTA_CLS	0x000f	/* Current Link Speed */
+#define  PCI_EXP_LNKSTA_NLW	0x03f0	/* Nogotiated Link Width */
+#define  PCI_EXP_LNKSTA_LT	0x0800	/* Link Training */
 #define  PCI_EXP_LNKSTA_SLC	0x1000	/* Slot Clock Configuration */
+#define  PCI_EXP_LNKSTA_DLLLA	0x2000	/* Data Link Layer Link Active */
+#define  PCI_EXP_LNKSTA_LBMS	0x4000	/* Link Bandwidth Management Status */
+#define  PCI_EXP_LNKSTA_LABS	0x8000	/* Link Autonomous Bandwidth Status */
 #define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
+#define  PCI_EXP_SLTCAP_ABP	0x00000001 /* Attention Button Present */
+#define  PCI_EXP_SLTCAP_PCP	0x00000002 /* Power Controller Present */
+#define  PCI_EXP_SLTCAP_MRLSP	0x00000004 /* MRL Sensor Present */
+#define  PCI_EXP_SLTCAP_AIP	0x00000008 /* Attention Indicator Present */
+#define  PCI_EXP_SLTCAP_PIP	0x00000010 /* Power Indicator Present */
+#define  PCI_EXP_SLTCAP_HPS	0x00000020 /* Hot-Plug Surprise */
+#define  PCI_EXP_SLTCAP_HPC	0x00000040 /* Hot-Plug Capable */
+#define  PCI_EXP_SLTCAP_SPLV	0x00007f80 /* Slot Power Limit Value */
+#define  PCI_EXP_SLTCAP_SPLS	0x00018000 /* Slot Power Limit Scale */
+#define  PCI_EXP_SLTCAP_EIP	0x00020000 /* Electromechanical Interlock Present */
+#define  PCI_EXP_SLTCAP_NCCS	0x00040000 /* No Command Completed Support */
+#define  PCI_EXP_SLTCAP_PSN	0xfff80000 /* Physical Slot Number */
 #define PCI_EXP_SLTCTL		24	/* Slot Control */
+#define  PCI_EXP_SLTCTL_ABPE	0x0001	/* Attention Button Pressed Enable */
+#define  PCI_EXP_SLTCTL_PFDE	0x0002	/* Power Fault Detected Enable */
+#define  PCI_EXP_SLTCTL_MRLSCE	0x0004	/* MRL Sensor Changed Enable */
+#define  PCI_EXP_SLTCTL_PDCE	0x0008	/* Presence Detect Changed Enable */
+#define  PCI_EXP_SLTCTL_CCIE	0x0010	/* Command Completed Interrupt Enable */
+#define  PCI_EXP_SLTCTL_HPIE	0x0020	/* Hot-Plug Interrupt Enable */
+#define  PCI_EXP_SLTCTL_AIC	0x00c0	/* Attention Indicator Control */
+#define  PCI_EXP_SLTCTL_PIC	0x0300	/* Power Indicator Control */
+#define  PCI_EXP_SLTCTL_PCC	0x0400	/* Power Controller Control */
+#define  PCI_EXP_SLTCTL_EIC	0x0800	/* Electromechanical Interlock Control */
+#define  PCI_EXP_SLTCTL_DLLSCE	0x1000	/* Data Link Layer State Changed Enable */
 #define PCI_EXP_SLTSTA		26	/* Slot Status */
+#define  PCI_EXP_SLTSTA_ABP	0x0001	/* Attention Button Pressed */
+#define  PCI_EXP_SLTSTA_PFD	0x0002	/* Power Fault Detected */
+#define  PCI_EXP_SLTSTA_MRLSC	0x0004	/* MRL Sensor Changed */
+#define  PCI_EXP_SLTSTA_PDC	0x0008	/* Presence Detect Changed */
+#define  PCI_EXP_SLTSTA_CC	0x0010	/* Command Completed */
+#define  PCI_EXP_SLTSTA_MRLSS	0x0020	/* MRL Sensor State */
+#define  PCI_EXP_SLTSTA_PDS	0x0040	/* Presence Detect State */
+#define  PCI_EXP_SLTSTA_EIS	0x0080	/* Electromechanical Interlock Status */
+#define  PCI_EXP_SLTSTA_DLLSC	0x0100	/* Data Link Layer State Changed */
 #define PCI_EXP_RTCTL		28	/* Root Control */
 #define  PCI_EXP_RTCTL_SECEE	0x01	/* System Error on Correctable Error */
 #define  PCI_EXP_RTCTL_SENFEE	0x02	/* System Error on Non-Fatal Error */
-- 
cgit v1.2.2


From 6a479079c07211bf348ac8a79754f26bea258f26 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 23 Dec 2008 03:08:29 +0000
Subject: PCI: Add pci_clear_master() as opposite of pci_set_master()

During an online device reset it may be useful to disable bus-mastering.
pci_disable_device() does that, and far more besides, so is not suitable
for an online reset.

Add pci_clear_master() which does just this.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cbecef19bb6..0f6d2bb1df9c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -642,6 +642,7 @@ static inline int pci_is_managed(struct pci_dev *pdev)
 
 void pci_disable_device(struct pci_dev *dev);
 void pci_set_master(struct pci_dev *dev);
+void pci_clear_master(struct pci_dev *dev);
 int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state);
 #define HAVE_PCI_SET_MWI
 int __must_check pci_set_mwi(struct pci_dev *dev);
-- 
cgit v1.2.2


From 16cf0ebc35dd63f72628ba1246132a6fd17bced2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 5 Jan 2009 14:50:27 +0100
Subject: x86/PCI: Do not use interrupt links for devices using MSI-X

pcibios_enable_device() and pcibios_disable_device() don't handle
IRQs for devices that have MSI enabled and it should treat the
devices with MSI-X enabled in the same way.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0f6d2bb1df9c..80f8b8b65fde 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -336,6 +336,15 @@ struct pci_bus {
 #define pci_bus_b(n)	list_entry(n, struct pci_bus, node)
 #define to_pci_bus(n)	container_of(n, struct pci_bus, dev)
 
+#ifdef CONFIG_PCI_MSI
+static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
+{
+	return pci_dev->msi_enabled || pci_dev->msix_enabled;
+}
+#else
+static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; }
+#endif
+
 /*
  * Error values that may be returned by PCI functions.
  */
-- 
cgit v1.2.2


From 8d1a0a13edecfdcb47fee3238ed4a2af2a2867f9 Mon Sep 17 00:00:00 2001
From: Anders Larsen <al@alarsen.net>
Date: Thu, 1 Jan 2009 17:17:35 +0100
Subject: qnx: include <linux/types.h> for definitions of __[us]{8,16,32,64}
 types

On 2008-12-30 11:32:33, Sam Ravnborg wrote:
> We have added a few additional validation checks of the userspace headers:
...
> 3) We should include <linux/types.h> and not <asm/types.h>
> 4) If we use a __[us]{8,16,32,64} type then we must include <linux/types.h>

Satisfy these requirements for the linux/qnx*.h headers.

Signed-off-by: Anders Larsen <al@alarsen.net>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/qnx4_fs.h  | 4 +---
 include/linux/qnxtypes.h | 5 ++---
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index 34a196ee7941..787d19ea9f46 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -2,14 +2,12 @@
  *  Name                         : qnx4_fs.h
  *  Author                       : Richard Frowijn
  *  Function                     : qnx4 global filesystem definitions
- *  Version                      : 1.0.2
- *  Last modified                : 2000-01-31
- *
  *  History                      : 23-03-1998 created
  */
 #ifndef _LINUX_QNX4_FS_H
 #define _LINUX_QNX4_FS_H
 
+#include <linux/types.h>
 #include <linux/qnxtypes.h>
 #include <linux/magic.h>
 
diff --git a/include/linux/qnxtypes.h b/include/linux/qnxtypes.h
index a3eb1137857b..bebbe5cc4fb8 100644
--- a/include/linux/qnxtypes.h
+++ b/include/linux/qnxtypes.h
@@ -2,9 +2,6 @@
  *  Name                         : qnxtypes.h
  *  Author                       : Richard Frowijn
  *  Function                     : standard qnx types
- *  Version                      : 1.0.2
- *  Last modified                : 2000-01-06
- *
  *  History                      : 22-03-1998 created
  *
  */
@@ -12,6 +9,8 @@
 #ifndef _QNX4TYPES_H
 #define _QNX4TYPES_H
 
+#include <linux/types.h>
+
 typedef __le16 qnx4_nxtnt_t;
 typedef __u8  qnx4_ftype_t;
 
-- 
cgit v1.2.2


From 14f0ca8eaea42a5b5a69cfcb699665dd2618db5f Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 7 Jan 2009 21:50:22 +0100
Subject: oprofile: make new cpu buffer functions part of the api

This patch creates the new functions

 oprofile_write_reserve()
 oprofile_add_data()
 oprofile_write_commit()

and makes them part of the oprofile api.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 include/linux/oprofile.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 1ce9fe572e51..1d9518bc4c58 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -164,4 +164,22 @@ void oprofile_put_buff(unsigned long *buf, unsigned int start,
 unsigned long oprofile_get_cpu_buffer_size(void);
 void oprofile_cpu_buffer_inc_smpl_lost(void);
  
+/* cpu buffer functions */
+
+struct op_sample;
+
+struct op_entry {
+	struct ring_buffer_event *event;
+	struct op_sample *sample;
+	unsigned long irq_flags;
+	unsigned long size;
+	unsigned long *data;
+};
+
+void oprofile_write_reserve(struct op_entry *entry,
+			    struct pt_regs * const regs,
+			    unsigned long pc, int code, int size);
+int oprofile_add_data(struct op_entry *entry, unsigned long val);
+int oprofile_write_commit(struct op_entry *entry);
+
 #endif /* OPROFILE_H */
-- 
cgit v1.2.2


From 87df4de8073f922a1f643b9fa6ba0412d5529ecf Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 15 Dec 2008 19:42:03 +0200
Subject: nfsd: last_byte_offset

refactor the nfs4 server lock code to use last_byte_offset
to compute the last byte covered by the lock.  Check for overflow
so that the last byte is set to NFS4_MAX_UINT64 if offset + len
wraps around.

Also, use NFS4_MAX_UINT64 for ~(u64)0 where appropriate.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfs4.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index ea0366769484..b912311a56b1 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -88,6 +88,8 @@
 #define NFS4_ACE_GENERIC_EXECUTE              0x001200A0
 #define NFS4_ACE_MASK_ALL                     0x001F01FF
 
+#define NFS4_MAX_UINT64	(~(u64)0)
+
 enum nfs4_acl_whotype {
 	NFS4_ACL_WHO_NAMED = 0,
 	NFS4_ACL_WHO_OWNER,
-- 
cgit v1.2.2


From db43910cb42285a99f45f7e0a0a32e32d0b61dcf Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 15 Dec 2008 19:42:24 +0200
Subject: nfsd: get rid of NFSD_VERSION

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsd.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 21269405ffe2..e19f45991b2e 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -23,7 +23,6 @@
 /*
  * nfsd version
  */
-#define NFSD_VERSION		"0.5"
 #define NFSD_SUPPORTED_MINOR_VERSION	0
 
 /*
-- 
cgit v1.2.2


From 5886188dc7ba9a76babcd37452f44079a9a77f71 Mon Sep 17 00:00:00 2001
From: Benjamin Krill <ben@codiert.org>
Date: Wed, 7 Jan 2009 10:32:38 +0100
Subject: serial: Add driver for the Cell Network Processor serial port NWP
 device

Add support for the nwp serial device which is connected to a DCR bus. It
uses the of_serial device driver to determine necessary properties from
the device tree.  The supported device is added as serial port number 85.

NWP stands for network processor and it is part of the QPACE - Quantum
Chromodynamics Parallel Computing on the Cell Broadband Engine project.
The implementation is a lightweight uart implementation with the focus
to consume as little resources as possible and it is connected to a
DCR bus.

Signed-off-by: Benjamin Krill <ben@codiert.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/nwpserial.h   | 18 ++++++++++++++++++
 include/linux/serial_core.h |  3 +++
 2 files changed, 21 insertions(+)
 create mode 100644 include/linux/nwpserial.h

(limited to 'include/linux')

diff --git a/include/linux/nwpserial.h b/include/linux/nwpserial.h
new file mode 100644
index 000000000000..9acb21572eaf
--- /dev/null
+++ b/include/linux/nwpserial.h
@@ -0,0 +1,18 @@
+/*
+ *  Serial Port driver for a NWP uart device
+ *
+ *    Copyright (C) 2008 IBM Corp., Benjamin Krill <ben@codiert.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#ifndef _NWPSERIAL_H
+#define _NWPSERIAL_H
+
+int nwpserial_register_port(struct uart_port *port);
+void nwpserial_unregister_port(int line);
+
+#endif /* _NWPSERIAL_H */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index b4199841f1fc..90bbbf0b1161 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -161,6 +161,9 @@
 
 #define PORT_S3C6400	84
 
+/* NWPSERIAL */
+#define PORT_NWPSERIAL	85
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.2


From 8feae13110d60cc6287afabc2887366b0eb226c2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 8 Jan 2009 12:04:47 +0000
Subject: NOMMU: Make VMAs per MM as for MMU-mode linux

Make VMAs per mm_struct as for MMU-mode linux.  This solves two problems:

 (1) In SYSV SHM where nattch for a segment does not reflect the number of
     shmat's (and forks) done.

 (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an
     exec'ing process when VM_EXECUTABLE is specified, regardless of the fact
     that a VMA might be shared and already have its vm_mm assigned to another
     process or a dead process.

A new struct (vm_region) is introduced to track a mapped region and to remember
the circumstances under which it may be shared and the vm_list_struct structure
is discarded as it's no longer required.

This patch makes the following additional changes:

 (1) Regions are now allocated with alloc_pages() rather than kmalloc() and
     with no recourse to __GFP_COMP, so the pages are not composite.  Instead,
     each page has a reference on it held by the region.  Anything else that is
     interested in such a page will have to get a reference on it to retain it.
     When the pages are released due to unmapping, each page is passed to
     put_page() and will be freed when the page usage count reaches zero.

 (2) Excess pages are trimmed after an allocation as the allocation must be
     made as a power-of-2 quantity of pages.

 (3) VMAs are added to the parent MM's R/B tree and mmap lists.  As an MM may
     end up with overlapping VMAs within the tree, the VMA struct address is
     appended to the sort key.

 (4) Non-anonymous VMAs are now added to the backing inode's prio list.

 (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of
     the backing region.  The VMA and region structs will be split if
     necessary.

 (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory
     segment instead of all the attachments at that addresss.  Multiple
     shmat()'s return the same address under NOMMU-mode instead of different
     virtual addresses as under MMU-mode.

 (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode.

 (8) /proc/maps is now the global list of mapped regions, and may list bits
     that aren't actually mapped anywhere.

 (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount
     of RAM currently allocated by mmap to hold mappable regions that can't be
     mapped directly.  These are copies of the backing device or file if not
     anonymous.

These changes make NOMMU mode more similar to MMU mode.  The downside is that
NOMMU mode requires some extra memory to track things over NOMMU without this
patch (VMAs are no longer shared, and there are now region structs).

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Mike Frysinger <vapier.adi@gmail.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/mm.h       | 18 ++++++------------
 include/linux/mm_types.h | 18 +++++++++++++++++-
 2 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4a3d28c86443..b91a73fd1bcc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr;
 
 extern struct kmem_cache *vm_area_cachep;
 
-/*
- * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is
- * disabled, then there's a single shared list of VMAs maintained by the
- * system, and mm's subscribe to these individually
- */
-struct vm_list_struct {
-	struct vm_list_struct	*next;
-	struct vm_area_struct	*vma;
-};
-
 #ifndef CONFIG_MMU
-extern struct rb_root nommu_vma_tree;
-extern struct rw_semaphore nommu_vma_sem;
+extern struct rb_root nommu_region_tree;
+extern struct rw_semaphore nommu_region_sem;
 
 extern unsigned int kobjsize(const void *objp);
 #endif
@@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long,
 				unsigned long, enum memmap_context);
 extern void setup_per_zone_pages_min(void);
 extern void mem_init(void);
+extern void __init mmap_init(void);
 extern void show_mem(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
@@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void);
 static inline void setup_per_cpu_pageset(void) {}
 #endif
 
+/* nommu.c */
+extern atomic_t mmap_pages_allocated;
+
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
 void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9cfc9b627fdd..1c1e0d3a1714 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -96,6 +96,22 @@ struct page {
 #endif /* WANT_PAGE_VIRTUAL */
 };
 
+/*
+ * A region containing a mapping of a non-memory backed file under NOMMU
+ * conditions.  These are held in a global tree and are pinned by the VMAs that
+ * map parts of them.
+ */
+struct vm_region {
+	struct rb_node	vm_rb;		/* link in global region tree */
+	unsigned long	vm_flags;	/* VMA vm_flags */
+	unsigned long	vm_start;	/* start address of region */
+	unsigned long	vm_end;		/* region initialised to here */
+	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
+	struct file	*vm_file;	/* the backing file or NULL */
+
+	atomic_t	vm_usage;	/* region usage count */
+};
+
 /*
  * This struct defines a memory VMM memory area. There is one of these
  * per VM-area/task.  A VM area is any part of the process virtual memory
@@ -152,7 +168,7 @@ struct vm_area_struct {
 	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
 
 #ifndef CONFIG_MMU
-	atomic_t vm_usage;		/* refcount (VMAs shared if !MMU) */
+	struct vm_region *vm_region;	/* NOMMU mapping region */
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
-- 
cgit v1.2.2


From dd8632a12e500a684478fea0951f380478d56fed Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 8 Jan 2009 12:04:47 +0000
Subject: NOMMU: Make mmap allocation page trimming behaviour configurable.

NOMMU mmap allocates a piece of memory for an mmap that's rounded up in size to
the nearest power-of-2 number of pages.  Currently it then discards the excess
pages back to the page allocator, making that memory available for use by other
things.  This can, however, cause greater amount of fragmentation.

To counter this, a sysctl is added in order to fine-tune the trimming
behaviour.  The default behaviour remains to trim pages aggressively, while
this can either be disabled completely or set to a higher page-granular
watermark in order to have finer-grained control.

vm region vm_top bits taken from an earlier patch by David Howells.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Mike Frysinger <vapier.adi@gmail.com>
---
 include/linux/mm_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 1c1e0d3a1714..92915e81443f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -106,6 +106,7 @@ struct vm_region {
 	unsigned long	vm_flags;	/* VMA vm_flags */
 	unsigned long	vm_start;	/* start address of region */
 	unsigned long	vm_end;		/* region initialised to here */
+	unsigned long	vm_top;		/* region allocated to here */
 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
 	struct file	*vm_file;	/* the backing file or NULL */
 
-- 
cgit v1.2.2


From e2387d6c20752ccdb2895ba5de664fa39652f4cc Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Mon, 17 Nov 2008 14:35:44 +0000
Subject: leds: Make header variable naming consistent

There is one place where the struct led_classdev as the function
argument is named differently. Fix it.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index d3a73f5a48c3..3c1a8ce6a5ea 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -62,7 +62,7 @@ struct led_classdev {
 
 extern int led_classdev_register(struct device *parent,
 				 struct led_classdev *led_cdev);
-extern void led_classdev_unregister(struct led_classdev *lcd);
+extern void led_classdev_unregister(struct led_classdev *led_cdev);
 extern void led_classdev_suspend(struct led_classdev *led_cdev);
 extern void led_classdev_resume(struct led_classdev *led_cdev);
 
-- 
cgit v1.2.2


From 934cd3f979a1daacbd403398f2c7a8f6720c33aa Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@iki.fi>
Date: Wed, 3 Dec 2008 08:21:36 +0000
Subject: leds: leds-pcs9532 - Move i2c work to a workqueque

Apparently these might be called under atomic context,
and i2c operations may sleep. BUG found by
Ross Burton <ross@burtonini.com>

Signed-off-by: Riku Voipio <riku.voipio@iki.fi>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds-pca9532.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h
index 81b4207deb95..96eea90f01a8 100644
--- a/include/linux/leds-pca9532.h
+++ b/include/linux/leds-pca9532.h
@@ -15,6 +15,7 @@
 #define __LINUX_PCA9532_H
 
 #include <linux/leds.h>
+#include <linux/workqueue.h>
 
 enum pca9532_state {
 	PCA9532_OFF  = 0x0,
@@ -31,6 +32,7 @@ struct pca9532_led {
 	struct i2c_client *client;
 	char *name;
 	struct led_classdev ldev;
+       struct work_struct work;
 	enum pca9532_type type;
 	enum pca9532_state state;
 };
-- 
cgit v1.2.2


From 0081e8020ebd814a99e45720a10e869a54ee08a6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 4 Dec 2008 16:52:33 +0000
Subject: leds: Add WM8350 LED driver

The voltage and current regulators on the WM8350 AudioPlus PMIC can be
used in concert to provide a power efficient LED driver.  This driver
implements support for this within the standard LED class.

Platform initialisation code should configure the LED hardware in the
init callback provided by the WM8350 core driver.  The callback should
use wm8350_isink_set_flash(), wm8350_dcdc25_set_mode() and
wm8350_dcdc_set_slot() to configure the operating parameters of the
regulators for their hardware and then then use wm8350_register_led() to
instantiate the LED driver.

This driver was originally written by Liam Girdwood, though it has been
extensively modified since then.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/mfd/wm8350/pmic.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index 96acbfc8aa12..be3264e286e0 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -13,6 +13,10 @@
 #ifndef __LINUX_MFD_WM8350_PMIC_H
 #define __LINUX_MFD_WM8350_PMIC_H
 
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/regulator/machine.h>
+
 /*
  * Register values.
  */
@@ -700,6 +704,33 @@ struct wm8350;
 struct platform_device;
 struct regulator_init_data;
 
+/*
+ * WM8350 LED platform data
+ */
+struct wm8350_led_platform_data {
+	const char *name;
+	const char *default_trigger;
+	int max_uA;
+};
+
+struct wm8350_led {
+	struct platform_device *pdev;
+	struct mutex mutex;
+	struct work_struct work;
+	spinlock_t value_lock;
+	enum led_brightness value;
+	struct led_classdev cdev;
+	int max_uA_index;
+	int enabled;
+
+	struct regulator *isink;
+	struct regulator_consumer_supply isink_consumer;
+	struct regulator_init_data isink_init;
+	struct regulator *dcdc;
+	struct regulator_consumer_supply dcdc_consumer;
+	struct regulator_init_data dcdc_init;
+};
+
 struct wm8350_pmic {
 	/* Number of regulators of each type on this device */
 	int max_dcdc;
@@ -717,10 +748,15 @@ struct wm8350_pmic {
 
 	/* regulator devices */
 	struct platform_device *pdev[NUM_WM8350_REGULATORS];
+
+	/* LED devices */
+	struct wm8350_led led[2];
 };
 
 int wm8350_register_regulator(struct wm8350 *wm8350, int reg,
 			      struct regulator_init_data *initdata);
+int wm8350_register_led(struct wm8350 *wm8350, int lednum, int dcdc, int isink,
+			struct wm8350_led_platform_data *pdata);
 
 /*
  * Additional DCDC control not supported via regulator API
-- 
cgit v1.2.2


From c835ee7f4154992e6cf0674d7ee136f5d36247a4 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@linux.intel.com>
Date: Tue, 6 Jan 2009 21:00:19 +0000
Subject: backlight: Add suspend/resume support to the backlight core

Add suspend/resume support to the backlight core and enable use of it
by appropriate drivers.

Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/backlight.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 1ee9488ca2e4..79ca2da81c87 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -31,6 +31,10 @@ struct backlight_device;
 struct fb_info;
 
 struct backlight_ops {
+	unsigned int options;
+
+#define BL_CORE_SUSPENDRESUME	(1 << 0)
+
 	/* Notify the backlight driver some property has changed */
 	int (*update_status)(struct backlight_device *);
 	/* Return the current backlight brightness (accounting for power,
@@ -51,7 +55,19 @@ struct backlight_properties {
 	   modes; 4: full off), see FB_BLANK_XXX */
 	int power;
 	/* FB Blanking active? (values as for power) */
+	/* Due to be removed, please use (state & BL_CORE_FBBLANK) */
 	int fb_blank;
+	/* Flags used to signal drivers of state changes */
+	/* Upper 4 bits are reserved for driver internal use */
+	unsigned int state;
+
+#define BL_CORE_SUSPENDED	(1 << 0)	/* backlight is suspended */
+#define BL_CORE_FBBLANK		(1 << 1)	/* backlight is under an fb blank event */
+#define BL_CORE_DRIVER4		(1 << 28)	/* reserved for driver specific use */
+#define BL_CORE_DRIVER3		(1 << 29)	/* reserved for driver specific use */
+#define BL_CORE_DRIVER2		(1 << 30)	/* reserved for driver specific use */
+#define BL_CORE_DRIVER1		(1 << 31)	/* reserved for driver specific use */
+
 };
 
 struct backlight_device {
-- 
cgit v1.2.2


From 1107ba885e46964316c083d441d5dd185b6c9e49 Mon Sep 17 00:00:00 2001
From: Alex Zeffertt <alex.zeffertt@eu.citrix.com>
Date: Wed, 7 Jan 2009 18:07:11 -0800
Subject: xen: add xenfs to allow usermode <-> Xen interaction

The xenfs filesystem exports various interfaces to usermode.  Initially
this exports a file to allow usermode to interact with xenbus/xenstore.

Traditionally this appeared in /proc/xen.  Rather than extending procfs,
this patch adds a backward-compat mountpoint on /proc/xen, and provides
a xenfs filesystem which can be mounted there.

Signed-off-by: Alex Zeffertt <alex.zeffertt@eu.citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/magic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/magic.h b/include/linux/magic.h
index f7f3fdddbef0..439f6f3cb0c4 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -13,6 +13,7 @@
 #define EFS_SUPER_MAGIC		0x414A53
 #define EXT2_SUPER_MAGIC	0xEF53
 #define EXT3_SUPER_MAGIC	0xEF53
+#define XENFS_SUPER_MAGIC	0xabba1974
 #define EXT4_SUPER_MAGIC	0xEF53
 #define HPFS_SUPER_MAGIC	0xf995e849
 #define ISOFS_SUPER_MAGIC	0x9660
-- 
cgit v1.2.2


From 18a82eb9f980b5e02cea651e4ecda26265d98933 Mon Sep 17 00:00:00 2001
From: Pekka J Enberg <penberg@cs.helsinki.fi>
Date: Wed, 7 Jan 2009 18:07:19 -0800
Subject: ext2: allocate ->s_blockgroup_lock separately

As spotted by kmemtrace, struct ext2_sb_info is 17024 bytes on 64-bit
which makes it a very bad fit for SLAB allocators.  The culprit of the
wasted memory is ->s_blockgroup_lock which can be as big as 16 KB when
NR_CPUS >= 32.

To fix that, allocate ->s_blockgroup_lock, which fits nicely in a order 2
page in the worst case, separately.  This shinks down struct ext2_sb_info
enough to fit a 1 KB slab cache so now we allocate 16 KB + 1 KB instead of
32 KB saving 15 KB of memory.

Acked-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext2_fs_sb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h
index dc541f3653d1..1cdb66367c98 100644
--- a/include/linux/ext2_fs_sb.h
+++ b/include/linux/ext2_fs_sb.h
@@ -101,7 +101,7 @@ struct ext2_sb_info {
 	struct percpu_counter s_freeblocks_counter;
 	struct percpu_counter s_freeinodes_counter;
 	struct percpu_counter s_dirs_counter;
-	struct blockgroup_lock s_blockgroup_lock;
+	struct blockgroup_lock *s_blockgroup_lock;
 	/* root of the per fs reservation window tree */
 	spinlock_t s_rsv_window_lock;
 	struct rb_root s_rsv_window_root;
@@ -111,7 +111,7 @@ struct ext2_sb_info {
 static inline spinlock_t *
 sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group)
 {
-	return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+	return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
 }
 
 #endif	/* _LINUX_EXT2_FS_SB */
-- 
cgit v1.2.2


From 0e090f1e05a563cc9acdda442767176bf1616001 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Wed, 7 Jan 2009 18:07:20 -0800
Subject: ext2: don't inherit inappropriate inode flags from parent

At present BTREE/INDEX is the only flag that new ext2 inodes do NOT
inherit from their parent.  In addition prevent the flags DIRTY, ECOMPR,
INDEX, IMAGIC and TOPDIR from being inherited.  List inheritable flags
explicitly to prevent future flags from accidentally being inherited.

This fixes the TOPDIR flag inheritance bug reported at
http://bugzilla.kernel.org/show_bug.cgi?id=9866.

Signed-off-by: Duane Griffin <duaneg@dghda.com>
Acked-by: Andreas Dilger <adilger@sun.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext2_fs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 78c775a83f7c..c3a051819363 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -194,6 +194,13 @@ struct ext2_group_desc
 #define EXT2_FL_USER_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
 #define EXT2_FL_USER_MODIFIABLE		FS_FL_USER_MODIFIABLE	/* User modifiable flags */
 
+/* Flags that should be inherited by new inodes from their parent. */
+#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\
+			   EXT2_SYNC_FL | EXT2_IMMUTABLE_FL | EXT2_APPEND_FL |\
+			   EXT2_NODUMP_FL | EXT2_NOATIME_FL | EXT2_COMPRBLK_FL|\
+			   EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
+			   EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)
+
 /*
  * ioctl commands
  */
-- 
cgit v1.2.2


From ef8b646183868b2d042fa6cde0eef2a31263ff85 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Wed, 7 Jan 2009 18:07:21 -0800
Subject: ext2: tighten restrictions on inode flags

At the moment there are few restrictions on which flags may be set on
which inodes.  Specifically DIRSYNC may only be set on directories and
IMMUTABLE and APPEND may not be set on links.  Tighten that to disallow
TOPDIR being set on non-directories and only NODUMP and NOATIME to be set
on non-regular file, non-directories.

Introduces a flags masking function which masks flags based on mode and
use it during inode creation and when flags are set via the ioctl to
facilitate future consistency.

Signed-off-by: Duane Griffin <duaneg@dghda.com>
Acked-by: Andreas Dilger <adilger@sun.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext2_fs.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index c3a051819363..121720d74e15 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -201,6 +201,23 @@ struct ext2_group_desc
 			   EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\
 			   EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL)
 
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define EXT2_REG_FLMASK (~(EXT2_DIRSYNC_FL | EXT2_TOPDIR_FL))
+
+/* Flags that are appropriate for non-directories/regular files. */
+#define EXT2_OTHER_FLMASK (EXT2_NODUMP_FL | EXT2_NOATIME_FL)
+
+/* Mask out flags that are inappropriate for the given type of inode. */
+static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags)
+{
+	if (S_ISDIR(mode))
+		return flags;
+	else if (S_ISREG(mode))
+		return flags & EXT2_REG_FLMASK;
+	else
+		return flags & EXT2_OTHER_FLMASK;
+}
+
 /*
  * ioctl commands
  */
-- 
cgit v1.2.2


From f420d4dc4272fd223986762df2ad06056ddebada Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@redhat.com>
Date: Wed, 7 Jan 2009 18:07:24 -0800
Subject: jbd: improve fsync batching

There is a flaw with the way jbd handles fsync batching.  If we fsync() a
file and we were not the last person to run fsync() on this fs then we
automatically sleep for 1 jiffie in order to wait for new writers to join
into the transaction before forcing the commit.  The problem with this is
that with really fast storage (ie a Clariion) the time it takes to commit
a transaction to disk is way faster than 1 jiffie in most cases, so
sleeping means waiting longer with nothing to do than if we just committed
the transaction and kept going.  Ric Wheeler noticed this when using
fs_mark with more than 1 thread, the throughput would plummet as he added
more threads.

This patch attempts to fix this problem by recording the average time in
nanoseconds that it takes to commit a transaction to disk, and what time
we started the transaction.  If we run an fsync() and we have been running
for less time than it takes to commit the transaction to disk, we sleep
for the delta amount of time and then commit to disk.  We acheive
sub-jiffie sleeping using schedule_hrtimeout.  This means that the wait
time is auto-tuned to the speed of the underlying disk, instead of having
this static timeout.  I weighted the average according to somebody's
comments (Andreas Dilger I think) in order to help normalize random
outliers where we take way longer or way less time to commit than the
average.  I also have a min() check in there to make sure we don't sleep
longer than a jiffie in case our storage is super slow, this was requested
by Andrew.

I unfortunately do not have access to a Clariion, so I had to use a
ramdisk to represent a super fast array.  I tested with a SATA drive with
barrier=1 to make sure there was no regression with local disks, I tested
with a 4 way multipathed Apple Xserve RAID array and of course the
ramdisk.  I ran the following command

fs_mark -d /mnt/ext3-test -s 4096 -n 2000 -D 64 -t $i

where $i was 2, 4, 8, 16 and 32.  I mkfs'ed the fs each time.  Here are my
results

type	threads		with patch	without patch
sata	2		24.6		26.3
sata	4		49.2		48.1
sata	8		70.1		67.0
sata	16		104.0		94.1
sata	32		153.6		142.7

xserve	2		246.4		222.0
xserve	4		480.0		440.8
xserve	8		829.5		730.8
xserve	16		1172.7		1026.9
xserve	32		1816.3		1650.5

ramdisk	2		2538.3		1745.6
ramdisk	4		2942.3		661.9
ramdisk	8		2882.5		999.8
ramdisk	16		2738.7		1801.9
ramdisk	32		2541.9		2394.0

Signed-off-by: Josef Bacik <jbacik@redhat.com>
Cc: Andreas Dilger <adilger@sun.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Ric Wheeler <rwheeler@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/jbd.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 346e2b80be7d..6384b19efe64 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -542,6 +542,11 @@ struct transaction_s
 	 */
 	unsigned long		t_expires;
 
+	/*
+	 * When this transaction started, in nanoseconds [no locking]
+	 */
+	ktime_t			t_start_time;
+
 	/*
 	 * How many handles used this transaction? [t_handle_lock]
 	 */
@@ -798,8 +803,18 @@ struct journal_s
 	struct buffer_head	**j_wbuf;
 	int			j_wbufsize;
 
+	/*
+	 * this is the pid of the last person to run a synchronous operation
+	 * through the journal.
+	 */
 	pid_t			j_last_sync_writer;
 
+	/*
+	 * the average amount of time in nanoseconds it takes to commit a
+	 * transaction to the disk.  [j_state_lock]
+	 */
+	u64			j_average_commit_time;
+
 	/*
 	 * An opaque pointer to fs-private information.  ext3 puts its
 	 * superblock pointer here
-- 
cgit v1.2.2


From 5df096d67ec2b6578518caed7d57317a4b807aa1 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 7 Jan 2009 18:07:25 -0800
Subject: ext3: allocate ->s_blockgroup_lock separately

As spotted by kmemtrace, struct ext3_sb_info is 17152 bytes on 64-bit
which makes it a very bad fit for SLAB allocators.  The culprit of the
wasted memory is ->s_blockgroup_lock which can be as big as 16 KB when
NR_CPUS >= 32.

To fix that, allocate ->s_blockgroup_lock, which fits nicely in a order 2
page in the worst case, separately.  This shinks down struct ext3_sb_info
enough to fit a 1 KB slab cache so now we allocate 16 KB + 1 KB instead of
32 KB saving 15 KB of memory.

Acked-by: Andreas Dilger <adilger@sun.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext3_fs_sb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index e024e38248ff..76fdc0f4b028 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -60,7 +60,7 @@ struct ext3_sb_info {
 	struct percpu_counter s_freeblocks_counter;
 	struct percpu_counter s_freeinodes_counter;
 	struct percpu_counter s_dirs_counter;
-	struct blockgroup_lock s_blockgroup_lock;
+	struct blockgroup_lock *s_blockgroup_lock;
 
 	/* root of the per fs reservation window tree */
 	spinlock_t s_rsv_window_lock;
@@ -86,7 +86,7 @@ struct ext3_sb_info {
 static inline spinlock_t *
 sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
 {
-	return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+	return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
 }
 
 #endif	/* _LINUX_EXT3_FS_SB */
-- 
cgit v1.2.2


From 2e8671cb566da993425d324fc355af31edc6e7f1 Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Wed, 7 Jan 2009 18:07:26 -0800
Subject: ext3: don't inherit inappropriate inode flags from parent

At present INDEX is the only flag that new ext3 inodes do NOT inherit from
their parent.  In addition prevent the flags DIRTY, ECOMPR, IMAGIC and
TOPDIR from being inherited.  List inheritable flags explicitly to prevent
future flags from accidentally being inherited.

This fixes the TOPDIR flag inheritance bug reported at
http://bugzilla.kernel.org/show_bug.cgi?id=9866.

Signed-off-by: Duane Griffin <duaneg@dghda.com>
Acked-by: Andreas Dilger <adilger@sun.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext3_fs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index d14f02918483..b745619a9b8e 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -178,6 +178,13 @@ struct ext3_group_desc
 #define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
 #define EXT3_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
 
+/* Flags that should be inherited by new inodes from their parent. */
+#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
+			   EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
+			   EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
+			   EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
+			   EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
+
 /*
  * Inode dynamic state flags
  */
-- 
cgit v1.2.2


From 04143e2fb9d512c21e1dcfb561dbb0445dcfdc8c Mon Sep 17 00:00:00 2001
From: Duane Griffin <duaneg@dghda.com>
Date: Wed, 7 Jan 2009 18:07:26 -0800
Subject: ext3: tighten restrictions on inode flags

At the moment there are few restrictions on which flags may be set on
which inodes.  Specifically DIRSYNC may only be set on directories and
IMMUTABLE and APPEND may not be set on links.  Tighten that to disallow
TOPDIR being set on non-directories and only NODUMP and NOATIME to be set
on non-regular file, non-directories.

Introduces a flags masking function which masks flags based on mode and
use it during inode creation and when flags are set via the ioctl to
facilitate future consistency.

Signed-off-by: Duane Griffin <duaneg@dghda.com>
Acked-by: Andreas Dilger <adilger@sun.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ext3_fs.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index b745619a9b8e..d76800f6ecf0 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -185,6 +185,23 @@ struct ext3_group_desc
 			   EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
 			   EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
 
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
+
+/* Flags that are appropriate for non-directories/regular files. */
+#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
+
+/* Mask out flags that are inappropriate for the given type of inode. */
+static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
+{
+	if (S_ISDIR(mode))
+		return flags;
+	else if (S_ISREG(mode))
+		return flags & EXT3_REG_FLMASK;
+	else
+		return flags & EXT3_OTHER_FLMASK;
+}
+
 /*
  * Inode dynamic state flags
  */
-- 
cgit v1.2.2


From b2aa30f7bb381e04c93eed106089ba55553955f1 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 7 Jan 2009 18:07:37 -0800
Subject: cgroups: don't put struct cgroupfs_root protected by RCU

We don't access struct cgroupfs_root in fast path, so we should not put
struct cgroupfs_root protected by RCU

But the comment in struct cgroup_subsys.root confuse us.

struct cgroup_subsys.root is used in these places:

1 find_css_set(): if (ss->root->subsys_list.next == &ss->sibling)
2 rebind_subsystems(): if (ss->root != &rootnode)
                       rcu_assign_pointer(ss->root, root);
                       rcu_assign_pointer(subsys[i]->root, &rootnode);
3 cgroup_has_css_refs(): if (ss->root != cgrp->root)
4 cgroup_init_subsys(): ss->root = &rootnode;
5 proc_cgroupstats_show(): ss->name, ss->root->subsys_bits,
                           ss->root->number_of_cgroups, !ss->disabled);
6 cgroup_clone(): root = subsys->root;
                  if ((root != subsys->root) ||

All these place we have held cgroup_lock() or we don't dereference to
struct cgroupfs_root.  It's means wo don't need RCU when use struct
cgroup_subsys.root, and we should not put struct cgroupfs_root protected
by RCU.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 08b78c09b09a..f68dfd8dd53a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -337,7 +337,6 @@ struct cgroup_subsys {
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
 
-	/* Protected by RCU */
 	struct cgroupfs_root *root;
 
 	struct list_head sibling;
-- 
cgit v1.2.2


From a47295e6bc42ad35f9c15ac66f598aa24debd4e2 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Wed, 7 Jan 2009 18:07:44 -0800
Subject: cgroups: make cgroup_path() RCU-safe

Fix races between /proc/sched_debug by freeing cgroup objects via an RCU
callback.  Thus any cgroup reference obtained from an RCU-safe source will
remain valid during the RCU section.  Since dentries are also RCU-safe,
this allows us to traverse up the tree safely.

Additionally, make cgroup_path() check for a NULL cgrp->dentry to avoid
trying to report a path for a partially-created cgroup.

[lizf@cn.fujitsu.com: call deactive_super() in cgroup_diput()]
Signed-off-by: Paul Menage <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Tested-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f68dfd8dd53a..73d1c730c3c4 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -116,7 +116,7 @@ struct cgroup {
 	struct list_head children;	/* my children */
 
 	struct cgroup *parent;	/* my parent */
-	struct dentry *dentry;	  	/* cgroup fs entry */
+	struct dentry *dentry;	  	/* cgroup fs entry, RCU protected */
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
@@ -145,6 +145,9 @@ struct cgroup {
 	int pids_use_count;
 	/* Length of the current tasks_pids array */
 	int pids_length;
+
+	/* For RCU-protected deletion */
+	struct rcu_head rcu_head;
 };
 
 /* A css_set is a structure holding pointers to a set of
-- 
cgit v1.2.2


From 7a81b88cb53e335ff7d019e6398c95792c817d93 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:07:48 -0800
Subject: memcg: introduce charge-commit-cancel style of functions

There is a small race in do_swap_page().  When the page swapped-in is
charged, the mapcount can be greater than 0.  But, at the same time some
process (shares it ) call unmap and make mapcount 1->0 and the page is
uncharged.

      CPUA 			CPUB
       mapcount == 1.
   (1) charge if mapcount==0     zap_pte_range()
                                (2) mapcount 1 => 0.
			        (3) uncharge(). (success)
   (4) set page's rmap()
       mapcount 0=>1

Then, this swap page's account is leaked.

For fixing this, I added a new interface.
  - charge
   account to res_counter by PAGE_SIZE and try to free pages if necessary.
  - commit
   register page_cgroup and add to LRU if necessary.
  - cancel
   uncharge PAGE_SIZE because of do_swap_page failure.

     CPUA
  (1) charge (always)
  (2) set page's rmap (mapcount > 0)
  (3) commit charge was necessary or not after set_pte().

This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting.
Usual mem_cgroup_charge_common() does charge -> commit at a time.

And this patch also adds following function to clarify all charges.

  - mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge()
	called against newly allocated anon pages.

  - mem_cgroup_charge_migrate_fixup()
        called only from remove_migration_ptes().
	we'll have to rewrite this later.(this patch just keeps old behavior)
	This function will be removed by additional patch to make migration
	clearer.

Good for clarifying "what we do"

Then, we have 4 following charge points.
  - newpage
  - swap-in
  - add-to-cache.
  - migration.

[akpm@linux-foundation.org: add missing inline directives to stubs]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1fbe14d39521..c592f315cd02 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,8 +27,17 @@ struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
-extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
+extern int mem_cgroup_charge_migrate_fixup(struct page *page,
+				struct mm_struct *mm, gfp_t gfp_mask);
+/* for swap handling */
+extern int mem_cgroup_try_charge(struct mm_struct *mm,
+		gfp_t gfp_mask, struct mem_cgroup **ptr);
+extern void mem_cgroup_commit_charge_swapin(struct page *page,
+					struct mem_cgroup *ptr);
+extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
+
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
 extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
@@ -71,7 +80,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 
 
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
-static inline int mem_cgroup_charge(struct page *page,
+struct mem_cgroup;
+
+static inline int mem_cgroup_newpage_charge(struct page *page,
 					struct mm_struct *mm, gfp_t gfp_mask)
 {
 	return 0;
@@ -83,6 +94,27 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 	return 0;
 }
 
+static inline int mem_cgroup_charge_migrate_fixup(struct page *page,
+					struct mm_struct *mm, gfp_t gfp_mask)
+{
+	return 0;
+}
+
+static inline int mem_cgroup_try_charge(struct mm_struct *mm,
+				gfp_t gfp_mask, struct mem_cgroup **ptr)
+{
+	return 0;
+}
+
+static inline void mem_cgroup_commit_charge_swapin(struct page *page,
+					  struct mem_cgroup *ptr)
+{
+}
+
+static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
+{
+}
+
 static inline void mem_cgroup_uncharge_page(struct page *page)
 {
 }
-- 
cgit v1.2.2


From 01b1ae63c2270cbacfd43fea94578c17950eb548 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:07:50 -0800
Subject: memcg: simple migration handling

Now, management of "charge" under page migration is done under following
manner. (Assume migrate page contents from oldpage to newpage)

 before
  - "newpage" is charged before migration.
 at success.
  - "oldpage" is uncharged at somewhere(unmap, radix-tree-replace)
 at failure
  - "newpage" is uncharged.
  - "oldpage" is charged if necessary (*1)

But (*1) is not reliable....because of GFP_ATOMIC.

This patch tries to change behavior as following by charge/commit/cancel ops.

 before
  - charge PAGE_SIZE (no target page)
 success
  - commit charge against "newpage".
 failure
  - commit charge against "oldpage".
    (PCG_USED bit works effectively to avoid double-counting)
  - if "oldpage" is obsolete, cancel charge of PAGE_SIZE.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c592f315cd02..b095f5f6ecf7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -29,8 +29,6 @@ struct mm_struct;
 
 extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
-extern int mem_cgroup_charge_migrate_fixup(struct page *page,
-				struct mm_struct *mm, gfp_t gfp_mask);
 /* for swap handling */
 extern int mem_cgroup_try_charge(struct mm_struct *mm,
 		gfp_t gfp_mask, struct mem_cgroup **ptr);
@@ -60,8 +58,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 	((cgroup) == mem_cgroup_from_task((mm)->owner))
 
 extern int
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
-extern void mem_cgroup_end_migration(struct page *page);
+mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
+extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
+	struct page *oldpage, struct page *newpage);
 
 /*
  * For memory reclaim.
@@ -94,12 +93,6 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 	return 0;
 }
 
-static inline int mem_cgroup_charge_migrate_fixup(struct page *page,
-					struct mm_struct *mm, gfp_t gfp_mask)
-{
-	return 0;
-}
-
 static inline int mem_cgroup_try_charge(struct mm_struct *mm,
 				gfp_t gfp_mask, struct mem_cgroup **ptr)
 {
@@ -144,12 +137,14 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
 }
 
 static inline int
-mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
+mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 {
 	return 0;
 }
 
-static inline void mem_cgroup_end_migration(struct page *page)
+static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
+					struct page *oldpage,
+					struct page *newpage)
 {
 }
 
-- 
cgit v1.2.2


From d13d144309d2e5a3e6ad978b16c1d0226ddc9231 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:07:56 -0800
Subject: memcg: handle swap caches

SwapCache support for memory resource controller (memcg)

Before mem+swap controller, memcg itself should handle SwapCache in proper
way.  This is cut-out from it.

In current memcg, SwapCache is just leaked and the user can create tons of
SwapCache.  This is a leak of account and should be handled.

SwapCache accounting is done as following.

  charge (anon)
	- charged when it's mapped.
	  (because of readahead, charge at add_to_swap_cache() is not sane)
  uncharge (anon)
	- uncharged when it's dropped from swapcache and fully unmapped.
	  means it's not uncharged at unmap.
	  Note: delete from swap cache at swap-in is done after rmap information
	        is established.
  charge (shmem)
	- charged at swap-in. this prevents charge at add_to_page_cache().

  uncharge (shmem)
	- uncharged when it's dropped from swapcache and not on shmem's
	  radix-tree.

  at migration, check against 'old page' is modified to handle shmem.

Comparing to the old version discussed (and caused troubles), we have
advantages of
  - PCG_USED bit.
  - simple migrating handling.

So, situation is much easier than several months ago, maybe.

[hugh@veritas.com: memcg: handle swap caches build fix]
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 91dee50fe260..f8f3907533f0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -333,6 +333,22 @@ static inline void disable_swap_token(void)
 	put_swap_token(swap_token_mm);
 }
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+extern int mem_cgroup_cache_charge_swapin(struct page *page,
+				struct mm_struct *mm, gfp_t mask, bool locked);
+extern void mem_cgroup_uncharge_swapcache(struct page *page);
+#else
+static inline
+int mem_cgroup_cache_charge_swapin(struct page *page,
+				struct mm_struct *mm, gfp_t mask, bool locked)
+{
+	return 0;
+}
+static inline void mem_cgroup_uncharge_swapcache(struct page *page)
+{
+}
+#endif
+
 #else /* CONFIG_SWAP */
 
 #define nr_swap_pages				0L
@@ -409,6 +425,12 @@ static inline swp_entry_t get_swap_page(void)
 #define has_swap_token(x) 0
 #define disable_swap_token() do { } while(0)
 
+static inline int mem_cgroup_cache_charge_swapin(struct page *page,
+			struct mm_struct *mm, gfp_t mask, bool locked)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
-- 
cgit v1.2.2


From c077719be8e9e6b55702117513d1b5f41d80404a Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:07:57 -0800
Subject: memcg: mem+swap controller Kconfig

Config and control variable for mem+swap controller.

This patch adds CONFIG_CGROUP_MEM_RES_CTLR_SWAP
(memory resource controller swap extension.)

For accounting swap, it's obvious that we have to use additional memory to
remember "who uses swap".  This adds more overhead.  So, it's better to
offer "choice" to users.  This patch adds 2 choices.

This patch adds 2 parameters to enable swap extension or not.
  - CONFIG
  - boot option

Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b095f5f6ecf7..41b46cc9d1f1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -77,6 +77,9 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 					int priority, enum lru_list lru);
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+extern int do_swap_account;
+#endif
 
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
-- 
cgit v1.2.2


From 27a7faa0779dd13729196c1a818c294f44bbd1ee Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:07:58 -0800
Subject: memcg: swap cgroup for remembering usage

For accounting swap, we need a record per swap entry, at least.

This patch adds following function.
  - swap_cgroup_swapon() .... called from swapon
  - swap_cgroup_swapoff() ... called at the end of swapoff.

  - swap_cgroup_record() .... record information of swap entry.
  - swap_cgroup_lookup() .... lookup information of swap entry.

This patch just implements "how to record information".  No actual method
for limit the usage of swap.  These routine uses flat table to record and
lookup.  "wise" lookup system like radix-tree requires requires memory
allocation at new records but swap-out is usually called under memory
shortage (or memcg hits limit.) So, I used static allocation.  (maybe
dynamic allocation is not very hard but it adds additional memory
allocation in memory shortage path.)

Note1: In this, we use pointer to record information and this means
      8bytes per swap entry. I think we can reduce this when we
      create "id of cgroup" in the range of 0-65535 or 0-255.

Reported-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reported-by: Hugh Dickins <hugh@veritas.com>
Reported-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 1e6d34bfa094..d754b2dfbf2d 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -104,5 +104,40 @@ static inline void page_cgroup_init(void)
 {
 }
 
+#endif
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+#include <linux/swap.h>
+extern struct mem_cgroup *
+swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
+extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
+extern int swap_cgroup_swapon(int type, unsigned long max_pages);
+extern void swap_cgroup_swapoff(int type);
+#else
+#include <linux/swap.h>
+
+static inline
+struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
+{
+	return NULL;
+}
+
+static inline
+struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
+{
+	return NULL;
+}
+
+static inline int
+swap_cgroup_swapon(int type, unsigned long max_pages)
+{
+	return 0;
+}
+
+static inline void swap_cgroup_swapoff(int type)
+{
+	return;
+}
+
 #endif
 #endif
-- 
cgit v1.2.2


From 8c7c6e34a1256a5082d38c8e9bd1474476912715 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:00 -0800
Subject: memcg: mem+swap controller core

This patch implements per cgroup limit for usage of memory+swap.  However
there are SwapCache, double counting of swap-cache and swap-entry is
avoided.

Mem+Swap controller works as following.
  - memory usage is limited by memory.limit_in_bytes.
  - memory + swap usage is limited by memory.memsw_limit_in_bytes.

This has following benefits.
  - A user can limit total resource usage of mem+swap.

    Without this, because memory resource controller doesn't take care of
    usage of swap, a process can exhaust all the swap (by memory leak.)
    We can avoid this case.

    And Swap is shared resource but it cannot be reclaimed (goes back to memory)
    until it's used. This characteristic can be trouble when the memory
    is divided into some parts by cpuset or memcg.
    Assume group A and group B.
    After some application executes, the system can be..

    Group A -- very large free memory space but occupy 99% of swap.
    Group B -- under memory shortage but cannot use swap...it's nearly full.

    Ability to set appropriate swap limit for each group is required.

Maybe someone wonder "why not swap but mem+swap ?"

  - The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
    to move account from memory to swap...there is no change in usage of
    mem+swap.

    In other words, when we want to limit the usage of swap without affecting
    global LRU, mem+swap limit is better than just limiting swap.

Accounting target information is stored in swap_cgroup which is
per swap entry record.

Charge is done as following.
  map
    - charge  page and memsw.

  unmap
    - uncharge page/memsw if not SwapCache.

  swap-out (__delete_from_swap_cache)
    - uncharge page
    - record mem_cgroup information to swap_cgroup.

  swap-in (do_swap_page)
    - charged as page and memsw.
      record in swap_cgroup is cleared.
      memsw accounting is decremented.

  swap-free (swap_free())
    - if swap entry is freed, memsw is uncharged by PAGE_SIZE.

There are people work under never-swap environments and consider swap as
something bad. For such people, this mem+swap controller extension is just an
overhead.  This overhead is avoided by config or boot option.
(see Kconfig. detail is not in this patch.)

TODO:
 - maybe more optimization can be don in swap-in path. (but not very safe.)
   But we just do simple accounting at this stage.

[nishimura@mxp.nes.nec.co.jp: make resize limit hold mutex]
[hugh@veritas.com: memswap controller core swapcache fixes]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 11 +++++++++--
 include/linux/swap.h       | 14 +++++++++++---
 2 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 41b46cc9d1f1..ca51ac72d6c0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,6 +32,8 @@ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 /* for swap handling */
 extern int mem_cgroup_try_charge(struct mm_struct *mm,
 		gfp_t gfp_mask, struct mem_cgroup **ptr);
+extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
+		struct page *page, gfp_t mask, struct mem_cgroup **ptr);
 extern void mem_cgroup_commit_charge_swapin(struct page *page,
 					struct mem_cgroup *ptr);
 extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
@@ -80,7 +82,6 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
 #endif
-
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
@@ -97,7 +98,13 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 }
 
 static inline int mem_cgroup_try_charge(struct mm_struct *mm,
-				gfp_t gfp_mask, struct mem_cgroup **ptr)
+			gfp_t gfp_mask, struct mem_cgroup **ptr)
+{
+	return 0;
+}
+
+static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
+		struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
 {
 	return 0;
 }
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f8f3907533f0..be938ce4895a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,7 +214,7 @@ static inline void lru_cache_add_active_file(struct page *page)
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
-							gfp_t gfp_mask);
+						gfp_t gfp_mask, bool noswap);
 extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
@@ -336,7 +336,7 @@ static inline void disable_swap_token(void)
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 extern int mem_cgroup_cache_charge_swapin(struct page *page,
 				struct mm_struct *mm, gfp_t mask, bool locked);
-extern void mem_cgroup_uncharge_swapcache(struct page *page);
+extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent);
 #else
 static inline
 int mem_cgroup_cache_charge_swapin(struct page *page,
@@ -344,7 +344,15 @@ int mem_cgroup_cache_charge_swapin(struct page *page,
 {
 	return 0;
 }
-static inline void mem_cgroup_uncharge_swapcache(struct page *page)
+static inline void
+mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
+{
+}
+#endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
+extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
+#else
+static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
 {
 }
 #endif
-- 
cgit v1.2.2


From 08e552c69c6930d64722de3ec18c51844d06ee28 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:01 -0800
Subject: memcg: synchronized LRU

A big patch for changing memcg's LRU semantics.

Now,
  - page_cgroup is linked to mem_cgroup's its own LRU (per zone).

  - LRU of page_cgroup is not synchronous with global LRU.

  - page and page_cgroup is one-to-one and statically allocated.

  - To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
    - lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);

  - SwapCache is handled.

And, when we handle LRU list of page_cgroup, we do following.

	pc = lookup_page_cgroup(page);
	lock_page_cgroup(pc); .....................(1)
	mz = page_cgroup_zoneinfo(pc);
	spin_lock(&mz->lru_lock);
	.....add to LRU
	spin_unlock(&mz->lru_lock);
	unlock_page_cgroup(pc);

But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.

This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as

        spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
	mem_cgroup_add/remove/etc_lru() {
		pc = lookup_page_cgroup(page);
		mz = page_cgroup_zoneinfo(pc);
		if (PageCgroupUsed(pc)) {
			....add to LRU
		}
        spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU

This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
    1. When pc->mem_cgroup can be modified.
       - at charge.
       - at account_move().
    2. at charge
       the PCG_USED bit is not set before pc->mem_cgroup is fixed.
    3. at account_move()
       the page is isolated and not on LRU.

Pros.
  - easy for maintenance.
  - memcg can make use of laziness of pagevec.
  - we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
  - LRU status of memcg will be synchronized with global LRU's one.
  - # of locks are reduced.
  - account_move() is simplified very much.
Cons.
  - may increase cost of LRU rotation.
    (no impact if memcg is not configured.)

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h  | 29 +++++++++++++++++++++++++++--
 include/linux/mm_inline.h   |  3 +++
 include/linux/page_cgroup.h | 17 -----------------
 3 files changed, 30 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ca51ac72d6c0..32c07b1852d6 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -40,7 +40,12 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
 
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
-extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
+extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
+extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
+extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
+extern void mem_cgroup_del_lru(struct page *page);
+extern void mem_cgroup_move_lists(struct page *page,
+				  enum lru_list from, enum lru_list to);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
 extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
@@ -131,7 +136,27 @@ static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
 	return 0;
 }
 
-static inline void mem_cgroup_move_lists(struct page *page, bool active)
+static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
+{
+}
+
+static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
+{
+	return ;
+}
+
+static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
+{
+	return ;
+}
+
+static inline void mem_cgroup_del_lru(struct page *page)
+{
+	return ;
+}
+
+static inline void
+mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
 {
 }
 
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c948350c378e..37ef13d0f01e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -28,6 +28,7 @@ add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
 	list_add(&page->lru, &zone->lru[l].list);
 	__inc_zone_state(zone, NR_LRU_BASE + l);
+	mem_cgroup_add_lru_list(page, l);
 }
 
 static inline void
@@ -35,6 +36,7 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
 	list_del(&page->lru);
 	__dec_zone_state(zone, NR_LRU_BASE + l);
+	mem_cgroup_del_lru_list(page, l);
 }
 
 static inline void
@@ -54,6 +56,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
 		l += page_is_file_cache(page);
 	}
 	__dec_zone_state(zone, NR_LRU_BASE + l);
+	mem_cgroup_del_lru_list(page, l);
 }
 
 /**
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index d754b2dfbf2d..602cc1fdee90 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -26,10 +26,6 @@ enum {
 	PCG_LOCK,  /* page cgroup is locked */
 	PCG_CACHE, /* charged as cache */
 	PCG_USED, /* this object is in use. */
-	/* flags for LRU placement */
-	PCG_ACTIVE, /* page is active in this cgroup */
-	PCG_FILE, /* page is file system backed */
-	PCG_UNEVICTABLE, /* page is unevictableable */
 };
 
 #define TESTPCGFLAG(uname, lname)			\
@@ -50,19 +46,6 @@ TESTPCGFLAG(Cache, CACHE)
 TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
 
-/* LRU management flags (from global-lru definition) */
-TESTPCGFLAG(File, FILE)
-SETPCGFLAG(File, FILE)
-CLEARPCGFLAG(File, FILE)
-
-TESTPCGFLAG(Active, ACTIVE)
-SETPCGFLAG(Active, ACTIVE)
-CLEARPCGFLAG(Active, ACTIVE)
-
-TESTPCGFLAG(Unevictable, UNEVICTABLE)
-SETPCGFLAG(Unevictable, UNEVICTABLE)
-CLEARPCGFLAG(Unevictable, UNEVICTABLE)
-
 static inline int page_cgroup_nid(struct page_cgroup *pc)
 {
 	return page_to_nid(pc->page);
-- 
cgit v1.2.2


From f8d665422603ee1b8ed04dcad4242f14d623c941 Mon Sep 17 00:00:00 2001
From: Hirokazu Takahashi <taka@valinux.co.jp>
Date: Wed, 7 Jan 2009 18:08:02 -0800
Subject: memcg: add mem_cgroup_disabled()

We check mem_cgroup is disabled or not by checking
mem_cgroup_subsys.disabled.  I think it has more references than expected,
now.

replacing
   if (mem_cgroup_subsys.disabled)
with
   if (mem_cgroup_disabled())

give us good look, I think.

[kamezawa.hiroyu@jp.fujitsu.com: fix typo]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 32c07b1852d6..472efd09118c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -19,7 +19,7 @@
 
 #ifndef _LINUX_MEMCONTROL_H
 #define _LINUX_MEMCONTROL_H
-
+#include <linux/cgroup.h>
 struct mem_cgroup;
 struct page_cgroup;
 struct page;
@@ -87,6 +87,14 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
 #endif
+
+static inline bool mem_cgroup_disabled(void)
+{
+	if (mem_cgroup_subsys.disabled)
+		return true;
+	return false;
+}
+
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
@@ -214,6 +222,11 @@ static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
 {
 	return 0;
 }
+
+static inline bool mem_cgroup_disabled(void)
+{
+	return true;
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
-- 
cgit v1.2.2


From 28dbc4b6a01fb579a9441c7b81e3d3413dc452df Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 7 Jan 2009 18:08:05 -0800
Subject: memcg: memory cgroup resource counters for hierarchy

Add support for building hierarchies in resource counters.  Cgroups allows
us to build a deep hierarchy, but we currently don't link the resource
counters belonging to the memory controller control groups, in the same
fashion as the corresponding cgroup entries in the cgroup hierarchy.  This
patch provides the infrastructure for resource counters that have the same
hiearchy as their cgroup counter parts.

These set of patches are based on the resource counter hiearchy patches
posted by Pavel Emelianov.

NOTE: Building hiearchies is expensive, deeper hierarchies imply charging
the all the way up to the root.  It is known that hiearchies are
expensive, so the user needs to be careful and aware of the trade-offs
before creating very deep ones.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 271c1c2c9f6f..dede0a2cfc45 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -43,6 +43,10 @@ struct res_counter {
 	 * the routines below consider this to be IRQ-safe
 	 */
 	spinlock_t lock;
+	/*
+	 * Parent counter, used for hierarchial resource accounting
+	 */
+	struct res_counter *parent;
 };
 
 /**
@@ -87,7 +91,7 @@ enum {
  * helpers for accounting
  */
 
-void res_counter_init(struct res_counter *counter);
+void res_counter_init(struct res_counter *counter, struct res_counter *parent);
 
 /*
  * charge - try to consume more resource.
@@ -103,7 +107,7 @@ void res_counter_init(struct res_counter *counter);
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val);
+		unsigned long val, struct res_counter **limit_fail_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
-- 
cgit v1.2.2


From 2e4d40915fb85207fe48cfc31201824ec6d7426e Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:07 -0800
Subject: memcontrol: rcu_read_lock() to protect mm_match_cgroup()

mm_match_cgroup() calls cgroup_subsys_state().

We must use rcu_read_lock() to protect cgroup_subsys_state().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 472efd09118c..2de6504e01fb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -61,8 +61,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
-#define mm_match_cgroup(mm, cgroup)	\
-	((cgroup) == mem_cgroup_from_task((mm)->owner))
+static inline
+int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
+{
+	struct mem_cgroup *mem;
+	rcu_read_lock();
+	mem = mem_cgroup_from_task((mm)->owner);
+	rcu_read_unlock();
+	return cgroup == mem;
+}
 
 extern int
 mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr);
-- 
cgit v1.2.2


From a636b327f731143ccc544b966cfd8de6cb6d72c6 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:08 -0800
Subject: memcg: avoid unnecessary system-wide-oom-killer

Current mmtom has new oom function as pagefault_out_of_memory().  It's
added for select bad process rathar than killing current.

When memcg hit limit and calls OOM at page_fault, this handler called and
system-wide-oom handling happens.  (means kernel panics if panic_on_oom is
true....)

To avoid overkill, check memcg's recent behavior before starting
system-wide-oom.

And this patch also fixes to guarantee "don't accnout against process with
TIF_MEMDIE".  This is necessary for smooth OOM.

[akpm@linux-foundation.org: build fix]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Jan Blunck <jblunck@suse.de>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2de6504e01fb..2fdd1380bf0a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -102,6 +102,8 @@ static inline bool mem_cgroup_disabled(void)
 	return false;
 }
 
+extern bool mem_cgroup_oom_called(struct task_struct *task);
+
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
@@ -234,6 +236,11 @@ static inline bool mem_cgroup_disabled(void)
 {
 	return true;
 }
+
+static inline bool mem_cgroup_oom_called(struct task_struct *task)
+{
+	return false;
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
-- 
cgit v1.2.2


From 2c26fdd70c3094fa3e84caf9ef434911933d5477 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:10 -0800
Subject: memcg: revert gfp mask fix

My patch, memcg-fix-gfp_mask-of-callers-of-charge.patch changed gfp_mask
of callers of charge to be GFP_HIGHUSER_MOVABLE for showing what will
happen at memory reclaim.

But in recent discussion, it's NACKed because it sounds ugly.

This patch is for reverting it and add some clean up to gfp_mask of
callers of charge.  No behavior change but need review before generating
HUNK in deep queue.

This patch also adds explanation to meaning of gfp_mask passed to charge
functions in memcontrol.h.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2fdd1380bf0a..59ac95a64508 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -26,6 +26,16 @@ struct page;
 struct mm_struct;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
+/*
+ * All "charge" functions with gfp_mask should use GFP_KERNEL or
+ * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't
+ * alloc memory but reclaims memory from all available zones. So, "where I want
+ * memory from" bits of gfp_mask has no meaning. So any bits of that field is
+ * available but adding a rule is better. charge functions' gfp_mask should
+ * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous
+ * codes.
+ * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
+ */
 
 extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
-- 
cgit v1.2.2


From f89eb90e33fd4e4e0cc1a6d20afd63c5a561885a Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:14 -0800
Subject: inactive_anon_is_low: move to vmscan

The inactive_anon_is_low() is called only vmscan.  Then it can move to
vmscan.c

This patch doesn't have any functional change.

Reviewd-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 37ef13d0f01e..7fbb97267556 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -81,23 +81,4 @@ static inline enum lru_list page_lru(struct page *page)
 	return lru;
 }
 
-/**
- * inactive_anon_is_low - check if anonymous pages need to be deactivated
- * @zone: zone to check
- *
- * Returns true if the zone does not have enough inactive anon pages,
- * meaning some active anon pages need to be deactivated.
- */
-static inline int inactive_anon_is_low(struct zone *zone)
-{
-	unsigned long active, inactive;
-
-	active = zone_page_state(zone, NR_ACTIVE_ANON);
-	inactive = zone_page_state(zone, NR_INACTIVE_ANON);
-
-	if (inactive * zone->inactive_ratio < active)
-		return 1;
-
-	return 0;
-}
 #endif
-- 
cgit v1.2.2


From 6e9015716ae9b59e9635d692fddfcfb9582c146c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:15 -0800
Subject: mm: introduce zone_reclaim struct

Add zone_reclam_stat struct for later enhancement.

A later patch uses this.  This patch doesn't any behavior change (yet).

Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 35a7b5e19465..09c14e213b63 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -263,6 +263,19 @@ enum zone_type {
 #error ZONES_SHIFT -- too many zones configured adjust calculation
 #endif
 
+struct zone_reclaim_stat {
+	/*
+	 * The pageout code in vmscan.c keeps track of how many of the
+	 * mem/swap backed and file backed pages are refeferenced.
+	 * The higher the rotated/scanned ratio, the more valuable
+	 * that cache is.
+	 *
+	 * The anon LRU stats live in [0], file LRU stats in [1]
+	 */
+	unsigned long		recent_rotated[2];
+	unsigned long		recent_scanned[2];
+};
+
 struct zone {
 	/* Fields commonly accessed by the page allocator */
 	unsigned long		pages_min, pages_low, pages_high;
@@ -315,16 +328,7 @@ struct zone {
 		unsigned long nr_scan;
 	} lru[NR_LRU_LISTS];
 
-	/*
-	 * The pageout code in vmscan.c keeps track of how many of the
-	 * mem/swap backed and file backed pages are refeferenced.
-	 * The higher the rotated/scanned ratio, the more valuable
-	 * that cache is.
-	 *
-	 * The anon LRU stats live in [0], file LRU stats in [1]
-	 */
-	unsigned long		recent_rotated[2];
-	unsigned long		recent_scanned[2];
+	struct zone_reclaim_stat reclaim_stat;
 
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	unsigned long		flags;		   /* zone flags, see below */
-- 
cgit v1.2.2


From 14797e2363c2b2f1ce139fd1c5a215e4e05aa1d9 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:18 -0800
Subject: memcg: add inactive_anon_is_low()

The inactive_anon_is_low() is key component of active/inactive anon
balancing on reclaim.  However current inactive_anon_is_low() function
only consider global reclaim.

Therefore, we need following ugly scan_global_lru() condition.

	if (lru == LRU_ACTIVE_ANON &&
	    (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
		shrink_active_list(nr_to_scan, zone, sc, priority, file);
		return 0;

it cause that memcg reclaim always deactivate pages when shrink_list() is
called.  To make mem_cgroup_inactive_anon_is_low() improve active/inactive
anon balancing of memcgroup.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: "Pekka Enberg" <penberg@cs.helsinki.fi>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 59ac95a64508..aad9377c9828 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -100,6 +100,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 
 extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 					int priority, enum lru_list lru);
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
+				    struct zone *zone);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
@@ -251,6 +253,13 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task)
 {
 	return false;
 }
+
+static inline int
+mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
+{
+	return 1;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
-- 
cgit v1.2.2


From a3d8e0549d913e30968fa02e505dfe02c0a23e0d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:19 -0800
Subject: memcg: add mem_cgroup_zone_nr_pages()

Introduce mem_cgroup_zone_nr_pages().  It is called by zone_nr_pages()
helper function.

This patch doesn't have any behavior change.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index aad9377c9828..b1defd6a2783 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -102,6 +102,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
 					int priority, enum lru_list lru);
 int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
 				    struct zone *zone);
+unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
+				       struct zone *zone,
+				       enum lru_list lru);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
@@ -260,6 +263,14 @@ mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
 	return 1;
 }
 
+static inline unsigned long
+mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
+			 enum lru_list lru)
+{
+	return 0;
+}
+
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
-- 
cgit v1.2.2


From 3e2f41f1f64744f7942980d93cc93dd3e5924560 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:20 -0800
Subject: memcg: add zone_reclaim_stat

Introduce mem_cgroup_per_zone::reclaim_stat member and its statics
collecting function.

Now, get_scan_ratio() can calculate correct value on memcg reclaim.

[hugh@veritas.com: avoid reclaim_stat oops when disabled]
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b1defd6a2783..36b8ebb39b82 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -105,6 +105,10 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
 unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
 				       struct zone *zone,
 				       enum lru_list lru);
+struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
+						      struct zone *zone);
+struct zone_reclaim_stat*
+mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
@@ -271,6 +275,18 @@ mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg, struct zone *zone,
 }
 
 
+static inline struct zone_reclaim_stat*
+mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone)
+{
+	return NULL;
+}
+
+static inline struct zone_reclaim_stat*
+mem_cgroup_get_reclaim_stat_from_page(struct page *page)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
-- 
cgit v1.2.2


From 9439c1c95b5c25b8031b2a7eb7e1590eb84be7f5 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:21 -0800
Subject: memcg: remove mem_cgroup_cal_reclaim()

Now, get_scan_ratio() return correct value although memcg reclaim.  Then,
mem_cgroup_calc_reclaim() can be removed.

So, memcg reclaim get the same capability of anon/file reclaim balancing
as global reclaim now.

Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 36b8ebb39b82..8752052da8df 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -97,9 +97,6 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 							int priority);
 extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 							int priority);
-
-extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
-					int priority, enum lru_list lru);
 int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
 				    struct zone *zone);
 unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
@@ -244,13 +241,6 @@ static inline void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 {
 }
 
-static inline long mem_cgroup_calc_reclaim(struct mem_cgroup *mem,
-					struct zone *zone, int priority,
-					enum lru_list lru)
-{
-	return 0;
-}
-
 static inline bool mem_cgroup_disabled(void)
 {
 	return true;
-- 
cgit v1.2.2


From a7885eb8ad465ec9db99ac5b5e6680f0ca8e11c8 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:24 -0800
Subject: memcg: swappiness

Currently, /proc/sys/vm/swappiness can change swappiness ratio for global
reclaim.  However, memcg reclaim doesn't have tuning parameter for itself.

In general, the optimal swappiness depend on workload.  (e.g.  hpc
workload need to low swappiness than the others.)

Then, per cgroup swappiness improve administrator tunability.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index be938ce4895a..4ccca25d0f05 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,7 +214,8 @@ static inline void lru_cache_add_active_file(struct page *page)
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
-						gfp_t gfp_mask, bool noswap);
+						  gfp_t gfp_mask, bool noswap,
+						  unsigned int swappiness);
 extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
-- 
cgit v1.2.2


From c772be939e078afd2505ede7d596a30f8f61de95 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:25 -0800
Subject: memcg: fix calculation of active_ratio

Currently, inactive_ratio of memcg is calculated at setting limit.
because page_alloc.c does so and current implementation is straightforward
porting.

However, memcg introduced hierarchy feature recently.  In hierarchy
restriction, memory limit is not only decided memory.limit_in_bytes of
current cgroup, but also parent limit and sibling memory usage.

Then, The optimal inactive_ratio is changed frequently.  So, everytime
calculation is better.

Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8752052da8df..056cf82c0e86 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -97,8 +97,7 @@ extern void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem,
 							int priority);
 extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
 							int priority);
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg,
-				    struct zone *zone);
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,
 				       struct zone *zone,
 				       enum lru_list lru);
@@ -252,7 +251,7 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task)
 }
 
 static inline int
-mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
+mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
 {
 	return 1;
 }
-- 
cgit v1.2.2


From a5e924f5f8abf97944e625d74967cc9452cfbce8 Mon Sep 17 00:00:00 2001
From: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Date: Wed, 7 Jan 2009 18:08:28 -0800
Subject: memcg: remove mem_cgroup_try_charge

After previous patch, mem_cgroup_try_charge is not used by anyone, so we
can remove it.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 056cf82c0e86..8ae6ece8c962 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -40,8 +40,6 @@ struct mm_struct;
 extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 /* for swap handling */
-extern int mem_cgroup_try_charge(struct mm_struct *mm,
-		gfp_t gfp_mask, struct mem_cgroup **ptr);
 extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 		struct page *page, gfp_t mask, struct mem_cgroup **ptr);
 extern void mem_cgroup_commit_charge_swapin(struct page *page,
@@ -134,12 +132,6 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 	return 0;
 }
 
-static inline int mem_cgroup_try_charge(struct mm_struct *mm,
-			gfp_t gfp_mask, struct mem_cgroup **ptr)
-{
-	return 0;
-}
-
 static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 		struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
 {
-- 
cgit v1.2.2


From b5a84319a4343a0db753436fd8147e61eaafa7ea Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:35 -0800
Subject: memcg: fix shmem's swap accounting

Now, you can see following even when swap accounting is enabled.

 1. Create Group 01, and 02.
 2. allocate a "file" on tmpfs by a task under 01.
 3. swap out the "file" (by memory pressure)
 4. Read "file" from a task in group 02.
 5. the charge of "file" is moved to group 02.

This is not ideal behavior. This is because SwapCache which was loaded
by read-ahead is not taken into account..

This is a patch to fix shmem's swapcache behavior.
  - remove mem_cgroup_cache_charge_swapin().
  - Add SwapCache handler routine to mem_cgroup_cache_charge().
    By this, shmem's file cache is charged at add_to_page_cache()
    with GFP_NOWAIT.
  - pass the page of swapcache to shrink_mem_cgroup.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 ++++--
 include/linux/swap.h       | 8 --------
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8ae6ece8c962..326f45c86530 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -56,7 +56,8 @@ extern void mem_cgroup_move_lists(struct page *page,
 				  enum lru_list from, enum lru_list to);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
+extern int mem_cgroup_shrink_usage(struct page *page,
+			struct mm_struct *mm, gfp_t gfp_mask);
 
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
@@ -155,7 +156,8 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
-static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+static inline int mem_cgroup_shrink_usage(struct page *page,
+			struct mm_struct *mm, gfp_t gfp_mask)
 {
 	return 0;
 }
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4ccca25d0f05..d30215578877 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -335,16 +335,8 @@ static inline void disable_swap_token(void)
 }
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
-extern int mem_cgroup_cache_charge_swapin(struct page *page,
-				struct mm_struct *mm, gfp_t mask, bool locked);
 extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent);
 #else
-static inline
-int mem_cgroup_cache_charge_swapin(struct page *page,
-				struct mm_struct *mm, gfp_t mask, bool locked)
-{
-	return 0;
-}
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
 {
-- 
cgit v1.2.2


From 999cd8a450f8f93701669a61cac4d3b19eca07e8 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Wed, 7 Jan 2009 18:08:36 -0800
Subject: cgroups: add a per-subsystem hierarchy_mutex

These patches introduce new locking/refcount support for cgroups to
reduce the need for subsystems to call cgroup_lock(). This will
ultimately allow the atomicity of cgroup_rmdir() (which was removed
recently) to be restored.

These three patches give:

1/3 - introduce a per-subsystem hierarchy_mutex which a subsystem can
     use to prevent changes to its own cgroup tree

2/3 - use hierarchy_mutex in place of calling cgroup_lock() in the
     memory controller

3/3 - introduce a css_tryget() function similar to the one recently
      proposed by Kamezawa, but avoiding spurious refcount failures in
      the event of a race between a css_tryget() and an unsuccessful
      cgroup_rmdir()

Future patches will likely involve:

- using hierarchy mutex in place of cgroup_lock() in more subsystems
 where appropriate

- restoring the atomicity of cgroup_rmdir() with respect to cgroup_create()

This patch:

Add a hierarchy_mutex to the cgroup_subsys object that protects changes to
the hierarchy observed by that subsystem.  It is taken by the cgroup
subsystem (in addition to cgroup_mutex) for the following operations:

- linking a cgroup into that subsystem's cgroup tree
- unlinking a cgroup from that subsystem's cgroup tree
- moving the subsystem to/from a hierarchy (including across the
  bind() callback)

Thus if the subsystem holds its own hierarchy_mutex, it can safely
traverse its own hierarchy.

Signed-off-by: Paul Menage <menage@google.com>
Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 73d1c730c3c4..ce1c1f34c30c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -340,8 +340,23 @@ struct cgroup_subsys {
 #define MAX_CGROUP_TYPE_NAMELEN 32
 	const char *name;
 
-	struct cgroupfs_root *root;
+	/*
+	 * Protects sibling/children links of cgroups in this
+	 * hierarchy, plus protects which hierarchy (or none) the
+	 * subsystem is a part of (i.e. root/sibling).  To avoid
+	 * potential deadlocks, the following operations should not be
+	 * undertaken while holding any hierarchy_mutex:
+	 *
+	 * - allocating memory
+	 * - initiating hotplug events
+	 */
+	struct mutex hierarchy_mutex;
 
+	/*
+	 * Link to parent, and list entry in parent's children.
+	 * Protected by this->hierarchy_mutex and cgroup_lock()
+	 */
+	struct cgroupfs_root *root;
 	struct list_head sibling;
 };
 
-- 
cgit v1.2.2


From e7c5ec9193d32b9559a3bb8893ceedbda85201ff Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Wed, 7 Jan 2009 18:08:38 -0800
Subject: cgroups: add css_tryget()

Add css_tryget(), that obtains a counted reference on a CSS.  It is used
in situations where the caller has a "weak" reference to the CSS, i.e.
one that does not protect the cgroup from removal via a reference count,
but would instead be cleaned up by a destroy() callback.

css_tryget() will return true on success, or false if the cgroup is being
removed.

This is similar to Kamezawa Hiroyuki's patch from a week or two ago, but
with the difference that in the event of css_tryget() racing with a
cgroup_rmdir(), css_tryget() will only return false if the cgroup really
does get removed.

This implementation is done by biasing css->refcnt, so that a refcnt of 1
means "releasable" and 0 means "released or releasing".  In the event of a
race, css_tryget() distinguishes between "released" and "releasing" by
checking for the CSS_REMOVED flag in css->flags.

Signed-off-by: Paul Menage <menage@google.com>
Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ce1c1f34c30c..e267e62827bb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -52,9 +52,9 @@ struct cgroup_subsys_state {
 	 * hierarchy structure */
 	struct cgroup *cgroup;
 
-	/* State maintained by the cgroup system to allow
-	 * subsystems to be "busy". Should be accessed via css_get()
-	 * and css_put() */
+	/* State maintained by the cgroup system to allow subsystems
+	 * to be "busy". Should be accessed via css_get(),
+	 * css_tryget() and and css_put(). */
 
 	atomic_t refcnt;
 
@@ -64,11 +64,14 @@ struct cgroup_subsys_state {
 /* bits in struct cgroup_subsys_state flags field */
 enum {
 	CSS_ROOT, /* This CSS is the root of the subsystem */
+	CSS_REMOVED, /* This CSS is dead */
 };
 
 /*
- * Call css_get() to hold a reference on the cgroup;
- *
+ * Call css_get() to hold a reference on the css; it can be used
+ * for a reference obtained via:
+ * - an existing ref-counted reference to the css
+ * - task->cgroups for a locked task
  */
 
 static inline void css_get(struct cgroup_subsys_state *css)
@@ -77,9 +80,32 @@ static inline void css_get(struct cgroup_subsys_state *css)
 	if (!test_bit(CSS_ROOT, &css->flags))
 		atomic_inc(&css->refcnt);
 }
+
+static inline bool css_is_removed(struct cgroup_subsys_state *css)
+{
+	return test_bit(CSS_REMOVED, &css->flags);
+}
+
+/*
+ * Call css_tryget() to take a reference on a css if your existing
+ * (known-valid) reference isn't already ref-counted. Returns false if
+ * the css has been destroyed.
+ */
+
+static inline bool css_tryget(struct cgroup_subsys_state *css)
+{
+	if (test_bit(CSS_ROOT, &css->flags))
+		return true;
+	while (!atomic_inc_not_zero(&css->refcnt)) {
+		if (test_bit(CSS_REMOVED, &css->flags))
+			return false;
+	}
+	return true;
+}
+
 /*
  * css_put() should be called to release a reference taken by
- * css_get()
+ * css_get() or css_tryget()
  */
 
 extern void __css_put(struct cgroup_subsys_state *css);
-- 
cgit v1.2.2


From 6af866af34a96fed24a55979a78b6f73bd4e8e87 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 7 Jan 2009 18:08:45 -0800
Subject: cpuset: remove remaining pointers to cpumask_t

Impact: cleanups, use new cpumask API

Final trivial cleanups: mainly s/cpumask_t/struct cpumask

Note there is a FIXME in generate_sched_domains(). A future patch will
change struct cpumask *doms to struct cpumask *doms[].
(I suppose Rusty will do this.)

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Mike Travis <travis@sgi.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 51ea2bdea0f9..90c6074a36ca 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -20,8 +20,9 @@ extern int number_of_cpusets;	/* How many cpusets are defined in system? */
 extern int cpuset_init_early(void);
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
-extern void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask);
-extern void cpuset_cpus_allowed_locked(struct task_struct *p, cpumask_t *mask);
+extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+extern void cpuset_cpus_allowed_locked(struct task_struct *p,
+				       struct cpumask *mask);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
@@ -86,12 +87,13 @@ static inline int cpuset_init_early(void) { return 0; }
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 
-static inline void cpuset_cpus_allowed(struct task_struct *p, cpumask_t *mask)
+static inline void cpuset_cpus_allowed(struct task_struct *p,
+				       struct cpumask *mask)
 {
 	*mask = cpu_possible_map;
 }
 static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
-								cpumask_t *mask)
+					      struct cpumask *mask)
 {
 	*mask = cpu_possible_map;
 }
-- 
cgit v1.2.2


From f9fb860f67b9542cd78d1558dec7058092b57d8e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 7 Jan 2009 18:08:46 -0800
Subject: pid: implement ns_of_pid

A current problem with the pid namespace is that it is easy to do pid
related work after exit_task_namespaces which drops the nsproxy pointer.

However if we are doing pid namespace related work we are always operating
on some struct pid which retains the pid_namespace pointer of the pid
namespace it was allocated in.

So provide ns_of_pid which allows us to find the pid namespace a pid was
allocated in.

Using this we have the needed infrastructure to do pid namespace related
work at anytime we have a struct pid, removing the chance of accidentally
having a NULL pointer dereference when accessing current->nsproxy.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Bastian Blank <bastian@waldi.eu.org>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index bb206c56d1f0..49f1c2f66e95 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -122,6 +122,24 @@ int next_pidmap(struct pid_namespace *pid_ns, int last);
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
 
+/*
+ * ns_of_pid() returns the pid namespace in which the specified pid was
+ * allocated.
+ *
+ * NOTE:
+ * 	ns_of_pid() is expected to be called for a process (task) that has
+ * 	an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid
+ * 	is expected to be non-NULL. If @pid is NULL, caller should handle
+ * 	the resulting NULL pid-ns.
+ */
+static inline struct pid_namespace *ns_of_pid(struct pid *pid)
+{
+	struct pid_namespace *ns = NULL;
+	if (pid)
+		ns = pid->numbers[pid->level].ns;
+	return ns;
+}
+
 /*
  * the helpers to get the pid's id seen from different namespaces
  *
-- 
cgit v1.2.2


From 61bce0f1371cfff497fe85594fd39d1a0b15ebe1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 7 Jan 2009 18:08:49 -0800
Subject: pid: generalize task_active_pid_ns

Currently task_active_pid_ns is not safe to call after a task becomes a
zombie and exit_task_namespaces is called, as nsproxy becomes NULL.  By
reading the pid namespace from the pid of the task we can trivially solve
this problem at the cost of one extra memory read in what should be the
same cacheline as we read the namespace from.

When moving things around I have made task_active_pid_ns out of line
because keeping it in pid_namespace.h would require adding includes of
pid.h and sched.h that I don't think we want.

This change does make task_active_pid_ns unsafe to call during
copy_process until we attach a pid on the task_struct which seems to be a
reasonable trade off.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Bastian Blank <bastian@waldi.eu.org>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Nadia Derbey <Nadia.Derbey@bull.net>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid_namespace.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index d82fe825d62f..38d10326246a 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -79,11 +79,7 @@ static inline void zap_pid_ns_processes(struct pid_namespace *ns)
 }
 #endif /* CONFIG_PID_NS */
 
-static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
-{
-	return tsk->nsproxy->pid_ns;
-}
-
+extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
 void pidhash_init(void);
 void pidmap_init(void);
 
-- 
cgit v1.2.2


From f06295b44c296c8fb08823a3118468ae343b60f2 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Wed, 7 Jan 2009 18:08:52 -0800
Subject: ELF: implement AT_RANDOM for glibc PRNG seeding

While discussing[1] the need for glibc to have access to random bytes
during program load, it seems that an earlier attempt to implement
AT_RANDOM got stalled.  This implements a random 16 byte string, available
to every ELF program via a new auxv AT_RANDOM vector.

[1] http://sourceware.org/ml/libc-alpha/2008-10/msg00006.html

Ulrich said:

glibc needs right after startup a bit of random data for internal
protections (stack canary etc).  What is now in upstream glibc is that we
always unconditionally open /dev/urandom, read some data, and use it.  For
every process startup.  That's slow.

...

The solution is to provide a limited amount of random data to the
starting process in the aux vector.  I suggested 16 bytes and this is
what the patch implements.  If we need only 16 bytes or less we use the
data directly.  If we need more we'll use the 16 bytes to see a PRNG.
This avoids the costly /dev/urandom use and it allows the kernel to use
the most adequate source of random data for this purpose.  It might not
be the same pool as that for /dev/urandom.

Concerns were expressed about the depletion of the randomness pool.  But
this patch doesn't make the situation worse, it doesn't deplete entropy
more than happens now.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/auxvec.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h
index d7afa9dd6635..f3b5d4e3a2ac 100644
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -23,16 +23,16 @@
 #define AT_PLATFORM 15  /* string identifying CPU for optimizations */
 #define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
 #define AT_CLKTCK 17	/* frequency at which times() increments */
-
+/* AT_* values 18 through 22 are reserved */
 #define AT_SECURE 23   /* secure mode boolean */
-
 #define AT_BASE_PLATFORM 24	/* string identifying real platform, may
 				 * differ from AT_PLATFORM. */
+#define AT_RANDOM 25	/* address of 16 random bytes */
 
 #define AT_EXECFN  31	/* filename of program */
 
 #ifdef __KERNEL__
-#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 19 /* NEW_AUX_ENT entries in auxiliary table */
   /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
 #endif
 
-- 
cgit v1.2.2


From 91f68b7359144aa40bb9668124543d15284750b4 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 7 Jan 2009 18:09:12 -0800
Subject: generic swap(): introduce global macro swap(a, b)

There have been some local definitions of swap(), it's time to replace
them all with a uniform one.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 6b8e2027165e..343df9ef2412 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -476,6 +476,12 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
 	__val = __val < __min ? __min: __val;	\
 	__val > __max ? __max: __val; })
 
+
+/*
+ * swap - swap value of @a and @b
+ */
+#define swap(a, b) ({ typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; })
+
 /**
  * container_of - cast a member of a structure out to the containing structure
  * @ptr:	the pointer to the member.
-- 
cgit v1.2.2


From 859cb7f2a4244ea6da206d3fe9cc8a6810947a68 Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@linux.intel.com>
Date: Thu, 8 Jan 2009 17:55:03 +0000
Subject: leds: Add suspend/resume to the core class

Add suspend/resume to the core class and remove all the now unneeded
code from various drivers. Originally the class code couldn't support
suspend/resume but since class_device can there is no reason for
each driver doing its own suspend/resume anymore.
---
 include/linux/leds.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 3c1a8ce6a5ea..24489da701e3 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -32,7 +32,10 @@ struct led_classdev {
 	int			 brightness;
 	int			 flags;
 
+	/* Lower 16 bits reflect status */
 #define LED_SUSPENDED		(1 << 0)
+	/* Upper 16 bits reflect control information */
+#define LED_CORE_SUSPENDRESUME	(1 << 16)
 
 	/* Set LED brightness level */
 	/* Must not sleep, use a workqueue if needed */
-- 
cgit v1.2.2


From 69279fb9a95051971ac03e558c4d46e7ba84ab3a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 31 Dec 2008 12:52:41 +0000
Subject: regulator: Clean up kerneldoc warnings

Remove kerneldoc warnings that don't relate to missing documentation,
mostly by renaming parameters in the documentation to match their
actual names.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/consumer.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index afdc4558bb94..801bf77ff4e2 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -104,10 +104,10 @@ struct regulator;
 /**
  * struct regulator_bulk_data - Data used for bulk regulator operations.
  *
- * @supply   The name of the supply.  Initialised by the user before
- *           using the bulk regulator APIs.
- * @consumer The regulator consumer for the supply.  This will be managed
- *           by the bulk API.
+ * @supply:   The name of the supply.  Initialised by the user before
+ *            using the bulk regulator APIs.
+ * @consumer: The regulator consumer for the supply.  This will be managed
+ *            by the bulk API.
  *
  * The regulator APIs provide a series of regulator_bulk_() API calls as
  * a convenience to consumers which require multiple supplies.  This
-- 
cgit v1.2.2


From c8e7e4640facbe99d10a6e262523b25be129b9b9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 31 Dec 2008 12:52:42 +0000
Subject: regulator: Add missing kerneldoc

This is only the documentation that the kerneldoc system warns about.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/driver.h  | 40 +++++++++++++++++++++++++++++++++++++-
 include/linux/regulator/machine.h | 41 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index e37d80561985..84c3737c2d26 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -24,7 +24,37 @@ struct regulator_init_data;
 /**
  * struct regulator_ops - regulator operations.
  *
- * This struct describes regulator operations.
+ * This struct describes regulator operations which can be implemented by
+ * regulator chip drivers.
+ *
+ * @enable: Enable the regulator.
+ * @disable: Disable the regulator.
+ * @is_enabled: Return 1 if the reguator is enabled, 0 otherwise.
+ *
+ * @set_voltage: Set the voltage for the regulator within the range specified.
+ *               The driver should select the voltage closest to min_uV.
+ * @get_voltage: Return the currently configured voltage for the regulator.
+ *
+ * @set_current: Set the current for the regulator within the range specified.
+ *               The driver should select the current closest to min_uA.
+ * @get_current: Return the currently configured current for the regulator.
+ *
+ * @set_current_limit: Configure a limit for a current-limited regulator.
+ * @get_current_limit: Get the limit for a current-limited regulator.
+ *
+ * @set_mode: Set the operating mode for the regulator.
+ * @get_mode: Get the current operating mode for the regulator.
+ * @get_optimum_mode: Get the most efficient operating mode for the regulator
+ *                    when running with the specified parameters.
+ *
+ * @set_suspend_voltage: Set the voltage for the regulator when the system
+ *                       is suspended.
+ * @set_suspend_enable: Mark the regulator as enabled when the system is
+ *                      suspended.
+ * @set_suspend_disable: Mark the regulator as disabled when the system is
+ *                       suspended.
+ * @set_suspend_mode: Set the operating mode for the regulator when the
+ *                    system is suspended.
  */
 struct regulator_ops {
 
@@ -75,6 +105,14 @@ enum regulator_type {
 /**
  * struct regulator_desc - Regulator descriptor
  *
+ * Each regulator registered with the core is described with a structure of
+ * this type.
+ *
+ * @name: Identifying name for the regulator.
+ * @id: Numerical identifier for the regulator.
+ * @ops: Regulator operations table.
+ * @type: Indicates if the regulator is a voltage or current regulator.
+ * @owner: Module providing the regulator, used for refcounting.
  */
 struct regulator_desc {
 	const char *name;
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index c6d69331a81e..3794773b23d2 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -44,6 +44,10 @@ struct regulator;
  * struct regulator_state - regulator state during low power syatem states
  *
  * This describes a regulators state during a system wide low power state.
+ *
+ * @uV: Operating voltage during suspend.
+ * @mode: Operating mode during suspend.
+ * @enabled: Enabled during suspend.
  */
 struct regulator_state {
 	int uV;	/* suspend voltage */
@@ -55,6 +59,30 @@ struct regulator_state {
  * struct regulation_constraints - regulator operating constraints.
  *
  * This struct describes regulator and board/machine specific constraints.
+ *
+ * @name: Descriptive name for the constraints, used for display purposes.
+ *
+ * @min_uV: Smallest voltage consumers may set.
+ * @max_uV: Largest voltage consumers may set.
+ *
+ * @min_uA: Smallest consumers consumers may set.
+ * @max_uA: Largest current consumers may set.
+ *
+ * @valid_modes_mask: Mask of modes which may be configured by consumers.
+ * @valid_ops_mask: Operations which may be performed by consumers.
+ *
+ * @always_on: Set if the regulator should never be disabled.
+ * @boot_on: Set if the regulator is enabled when the system is initially
+ *           started.
+ * @apply_uV: Apply the voltage constraint when initialising.
+ *
+ * @input_uV: Input voltage for regulator when supplied by another regulator.
+ *
+ * @state_disk: State for regulator when system is suspended in disk mode.
+ * @state_mem: State for regulator when system is suspended in mem mode.
+ * @state_standby: State for regulator when system is suspended in standby
+ *                 mode.
+ * @initial_state: Suspend state to set by default.
  */
 struct regulation_constraints {
 
@@ -93,6 +121,9 @@ struct regulation_constraints {
  * struct regulator_consumer_supply - supply -> device mapping
  *
  * This maps a supply name to a device.
+ *
+ * @dev: Device structure for the consumer.
+ * @supply: Name for the supply.
  */
 struct regulator_consumer_supply {
 	struct device *dev;	/* consumer */
@@ -103,6 +134,16 @@ struct regulator_consumer_supply {
  * struct regulator_init_data - regulator platform initialisation data.
  *
  * Initialisation constraints, our supply and consumers supplies.
+ *
+ * @supply_regulator_dev: Parent regulator (if any).
+ *
+ * @constraints: Constraints.  These must be specified for the regulator to
+ *               be usable.
+ * @num_consumer_supplies: Number of consumer device supplies.
+ * @consumer_supplies: Consumer device supply configuration.
+ *
+ * @regulator_init: Callback invoked when the regulator has been registered.
+ * @driver_data: Data passed to regulator_init.
  */
 struct regulator_init_data {
 	struct device *supply_regulator_dev; /* or NULL for LINE */
-- 
cgit v1.2.2


From 0ba4887c6329043d6cee5b5b477cfe50c2b57674 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 8 Jan 2009 11:50:23 -0800
Subject: regulator: fix kernel-doc warnings

Fix kernel-doc warnings in regulator/driver.h:

Warning(linux-next-20090108//include/linux/regulator/driver.h:95): Excess struct/union/enum/typedef member 'set_current' description in 'regulator_ops'
Warning(linux-next-20090108//include/linux/regulator/driver.h:95): Excess struct/union/enum/typedef member 'get_current' description in 'regulator_ops'
Warning(linux-next-20090108//include/linux/regulator/driver.h:124): No description found for parameter 'irq'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
cc: Liam Girdwood <lrg@slimlogic.co.uk>
cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/driver.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 84c3737c2d26..2dae05705f13 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -29,16 +29,12 @@ struct regulator_init_data;
  *
  * @enable: Enable the regulator.
  * @disable: Disable the regulator.
- * @is_enabled: Return 1 if the reguator is enabled, 0 otherwise.
+ * @is_enabled: Return 1 if the regulator is enabled, 0 otherwise.
  *
  * @set_voltage: Set the voltage for the regulator within the range specified.
  *               The driver should select the voltage closest to min_uV.
  * @get_voltage: Return the currently configured voltage for the regulator.
  *
- * @set_current: Set the current for the regulator within the range specified.
- *               The driver should select the current closest to min_uA.
- * @get_current: Return the currently configured current for the regulator.
- *
  * @set_current_limit: Configure a limit for a current-limited regulator.
  * @get_current_limit: Get the limit for a current-limited regulator.
  *
@@ -111,6 +107,7 @@ enum regulator_type {
  * @name: Identifying name for the regulator.
  * @id: Numerical identifier for the regulator.
  * @ops: Regulator operations table.
+ * @irq: Interrupt number for the regulator.
  * @type: Indicates if the regulator is a voltage or current regulator.
  * @owner: Module providing the regulator, used for refcounting.
  */
-- 
cgit v1.2.2


From f4f6bda00fc6bf995a35d8246db45aacaa9b3f09 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <mike@compulab.co.il>
Date: Wed, 3 Dec 2008 08:48:52 +0000
Subject: backlight: add support for Toppoly TDO35S series to tdo24m lcd driver

Signed-off-by: Mike Rapoport <mike@compulab.co.il>
Acked-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/spi/tdo24m.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 include/linux/spi/tdo24m.h

(limited to 'include/linux')

diff --git a/include/linux/spi/tdo24m.h b/include/linux/spi/tdo24m.h
new file mode 100644
index 000000000000..7572d4e1fe76
--- /dev/null
+++ b/include/linux/spi/tdo24m.h
@@ -0,0 +1,13 @@
+#ifndef __TDO24M_H__
+#define __TDO24M_H__
+
+enum tdo24m_model {
+	TDO24M,
+	TDO35S,
+};
+
+struct tdo24m_platform_data {
+	enum tdo24m_model model;
+};
+
+#endif /* __TDO24M_H__ */
-- 
cgit v1.2.2


From 0c3573f19d135d718264e38c46597295bd6154b7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 9 Jan 2009 08:31:05 +1100
Subject: md: use sysfs_notify_dirent to notify changes to md/sync_action.

There is no compelling need for this, but sysfs_notify_dirent is a
nicer interface and the change is good for consistency.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 8fc909ef6787..663803eaf0de 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -244,6 +244,7 @@ struct mddev_s
 	struct sysfs_dirent		*sysfs_state;	/* handle for 'array_state'
 							 * file in sysfs.
 							 */
+	struct sysfs_dirent		*sysfs_action;  /* handle for 'sync_action' */
 
 	spinlock_t			write_lock;
 	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
-- 
cgit v1.2.2


From 019c4e2f3e02aac4b44003913b54ca4b332e4371 Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Fri, 9 Jan 2009 08:31:06 +1100
Subject: md: raid0: Represent device offset in sectors.

Rename zone->dev_offset to zone->dev_start to make sure all users
have been converted.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/raid0.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h
index 1b2dda035f8e..61c3d29dc158 100644
--- a/include/linux/raid/raid0.h
+++ b/include/linux/raid/raid0.h
@@ -6,7 +6,7 @@
 struct strip_zone
 {
 	sector_t zone_offset;	/* Zone offset in md_dev */
-	sector_t dev_offset;	/* Zone offset in real dev */
+	sector_t dev_start;	/* Zone offset in real dev (in sectors) */
 	sector_t size;		/* Zone size */
 	int nb_dev;		/* # of devices attached to the zone */
 	mdk_rdev_t **dev;	/* Devices attached to the zone */
-- 
cgit v1.2.2


From 6199d3db0fc34f8ada895879d04a353a6ae632bc Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Fri, 9 Jan 2009 08:31:07 +1100
Subject: md: raid0: Represent zone->zone_offset in sectors.

For the same reason as in the previous patch, rename it from zone_offset
to zone_start.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/raid0.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h
index 61c3d29dc158..eaf4f6ac55f6 100644
--- a/include/linux/raid/raid0.h
+++ b/include/linux/raid/raid0.h
@@ -5,7 +5,7 @@
 
 struct strip_zone
 {
-	sector_t zone_offset;	/* Zone offset in md_dev */
+	sector_t zone_start;	/* Zone offset in md_dev (in sectors) */
 	sector_t dev_start;	/* Zone offset in real dev (in sectors) */
 	sector_t size;		/* Zone size */
 	int nb_dev;		/* # of devices attached to the zone */
-- 
cgit v1.2.2


From 83838ed87898e0a8ff8dbf001e54e6c017f0a011 Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Fri, 9 Jan 2009 08:31:07 +1100
Subject: md: raid0: Represent the size of strip zones in sectors.

This completes the block -> sector conversion of struct strip_zone.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/raid0.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h
index eaf4f6ac55f6..c12521d027e2 100644
--- a/include/linux/raid/raid0.h
+++ b/include/linux/raid/raid0.h
@@ -7,7 +7,7 @@ struct strip_zone
 {
 	sector_t zone_start;	/* Zone offset in md_dev (in sectors) */
 	sector_t dev_start;	/* Zone offset in real dev (in sectors) */
-	sector_t size;		/* Zone size */
+	sector_t sectors;	/* Zone size in sectors */
 	int nb_dev;		/* # of devices attached to the zone */
 	mdk_rdev_t **dev;	/* Devices attached to the zone */
 };
-- 
cgit v1.2.2


From ccacc7d2cf03114a24ab903f710118e9e5d43273 Mon Sep 17 00:00:00 2001
From: Andre Noll <maan@systemlinux.org>
Date: Fri, 9 Jan 2009 08:31:08 +1100
Subject: md: raid0: make hash_spacing and preshift sector-based.

This patch renames the hash_spacing and preshift members of struct
raid0_private_data to spacing and sector_shift respectively and
changes the semantics as follows:

We always have spacing = 2 * hash_spacing. In case
sizeof(sector_t) > sizeof(u32) we also have sector_shift = preshift + 1
while sector_shift = preshift = 0 otherwise.

Note that the values of nb_zone and zone are unaffected by these changes
because in the sector_div() preceeding the assignement of these two
variables both arguments double.

Signed-off-by: Andre Noll <maan@systemlinux.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/raid0.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid0.h b/include/linux/raid/raid0.h
index c12521d027e2..fd42aa87c391 100644
--- a/include/linux/raid/raid0.h
+++ b/include/linux/raid/raid0.h
@@ -19,8 +19,8 @@ struct raid0_private_data
 	mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
 	int nr_strip_zones;
 
-	sector_t hash_spacing;
-	int preshift;			/* shift this before divide by hash_spacing */
+	sector_t spacing;
+	int sector_shift; /* shift this before divide by spacing */
 };
 
 typedef struct raid0_private_data raid0_conf_t;
-- 
cgit v1.2.2


From 159ec1fc060ab22b157a62364045f5e98749c4d3 Mon Sep 17 00:00:00 2001
From: Cheng Renquan <crquan@gmail.com>
Date: Fri, 9 Jan 2009 08:31:08 +1100
Subject: md: use list_for_each_entry macro directly

The rdev_for_each macro defined in <linux/raid/md_k.h> is identical to
list_for_each_entry_safe, from <linux/list.h>, it should be defined to
use list_for_each_entry_safe, instead of reinventing the wheel.

But some calls to each_entry_safe don't really need a safe version,
just a direct list_for_each_entry is enough, this could save a temp
variable (tmp) in every function that used rdev_for_each.

In this patch, most rdev_for_each loops are replaced by list_for_each_entry,
totally save many tmp vars; and only in the other situations that will call
list_del to delete an entry, the safe version is used.

Signed-off-by: Cheng Renquan <crquan@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 663803eaf0de..8f9a54c1fb0e 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -335,17 +335,14 @@ static inline char * mdname (mddev_t * mddev)
  * iterates through some rdev ringlist. It's safe to remove the
  * current 'rdev'. Dont touch 'tmp' though.
  */
-#define rdev_for_each_list(rdev, tmp, list)				\
-									\
-	for ((tmp) = (list).next;					\
-		(rdev) = (list_entry((tmp), mdk_rdev_t, same_set)),	\
-			(tmp) = (tmp)->next, (tmp)->prev != &(list)	\
-		; )
+#define rdev_for_each_list(rdev, tmp, head)				\
+	list_for_each_entry_safe(rdev, tmp, head, same_set)
+
 /*
  * iterates through the 'same array disks' ringlist
  */
 #define rdev_for_each(rdev, tmp, mddev)				\
-	rdev_for_each_list(rdev, tmp, (mddev)->disks)
+	list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
 
 #define rdev_for_each_rcu(rdev, mddev)				\
 	list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
-- 
cgit v1.2.2


From cd2ac9321c26dc7a76455cd2a4df89123fa2b73e Mon Sep 17 00:00:00 2001
From: Cheng Renquan <crquan@gmail.com>
Date: Fri, 9 Jan 2009 08:31:08 +1100
Subject: md: need another print_sb for mdp_superblock_1

md_print_devices is called in two code path: MD_BUG(...), and md_ioctl
with PRINT_RAID_DEBUG.  it will dump out all in use md devices
information;

However, it wrongly processed two types of superblock in one:

The header file <linux/raid/md_p.h> has defined two types of superblock,
struct mdp_superblock_s (typedefed with mdp_super_t) according to md with
metadata 0.90, and struct mdp_superblock_1 according to md with metadata
1.0 and later,

These two types of superblock are very different,

The md_print_devices code processed them both in mdp_super_t, that would
lead to wrong informaton dump like:

	[ 6742.345877]
	[ 6742.345887] md:	**********************************
	[ 6742.345890] md:	* <COMPLETE RAID STATE PRINTOUT> *
	[ 6742.345892] md:	**********************************
	[ 6742.345896] md1: <ram7><ram6><ram5><ram4>
	[ 6742.345907] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3
	[ 6742.345909] md: rdev superblock:
	[ 6742.345914] md:  SB: (V:0.90.0) ID:<42ef13c7.598c059a.5f9f1645.801e9ee6> CT:4919856d
	[ 6742.345918] md:     L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536
	[ 6742.345922] md:     UT:4919856d ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:b7992907 E:00000001
	[ 6742.345924]      D  0:  DISK<N:0,(1,8),R:0,S:6>
	[ 6742.345930]      D  1:  DISK<N:1,(1,10),R:1,S:6>
	[ 6742.345933]      D  2:  DISK<N:2,(1,12),R:2,S:6>
	[ 6742.345937]      D  3:  DISK<N:3,(1,14),R:3,S:6>
	[ 6742.345942] md:     THIS:  DISK<N:3,(1,14),R:3,S:6>
	...
	[ 6742.346058] md0: <ram3><ram2><ram1><ram0>
	[ 6742.346067] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3
	[ 6742.346070] md: rdev superblock:
	[ 6742.346073] md:  SB: (V:1.0.0) ID:<369aad81.00000000.00000000.00000000> CT:9a322a9c
	[ 6742.346077] md:     L-1507699579 S976570180 ND:48 RD:0 md0 LO:65536 CS:196610
	[ 6742.346081] md:     UT:00000018 ST:0 AD:131048 WD:0 FD:8 SD:0 CSUM:00000000 E:00000000
	[ 6742.346084]      D  0:  DISK<N:-1,(-1,-1),R:-1,S:-1>
	[ 6742.346089]      D  1:  DISK<N:-1,(-1,-1),R:-1,S:-1>
	[ 6742.346092]      D  2:  DISK<N:-1,(-1,-1),R:-1,S:-1>
	[ 6742.346096]      D  3:  DISK<N:-1,(-1,-1),R:-1,S:-1>
	[ 6742.346102] md:     THIS:  DISK<N:0,(0,0),R:0,S:0>
	...
	[ 6742.346219] md:	**********************************
	[ 6742.346221]

Here md1 is metadata 0.90.0, and md0 is metadata 1.2

After some more code to distinguish these two types of superblock, in this patch,

it will generate dump information like:

	[ 7906.755790]
	[ 7906.755799] md:	**********************************
	[ 7906.755802] md:	* <COMPLETE RAID STATE PRINTOUT> *
	[ 7906.755804] md:	**********************************
	[ 7906.755808] md1: <ram7><ram6><ram5><ram4>
	[ 7906.755819] md: rdev ram7, SZ:00065472 F:0 S:1 DN:3
	[ 7906.755821] md: rdev superblock (MJ:0):
	[ 7906.755826] md:  SB: (V:0.90.0) ID:<3fca7a0d.a612bfed.5f9f1645.801e9ee6> CT:491989f3
	[ 7906.755830] md:     L5 S00065472 ND:4 RD:4 md1 LO:2 CS:65536
	[ 7906.755834] md:     UT:491989f3 ST:1 AD:4 WD:4 FD:0 SD:0 CSUM:00fb52ad E:00000001
	[ 7906.755836]      D  0:  DISK<N:0,(1,8),R:0,S:6>
	[ 7906.755842]      D  1:  DISK<N:1,(1,10),R:1,S:6>
	[ 7906.755845]      D  2:  DISK<N:2,(1,12),R:2,S:6>
	[ 7906.755849]      D  3:  DISK<N:3,(1,14),R:3,S:6>
	[ 7906.755855] md:     THIS:  DISK<N:3,(1,14),R:3,S:6>
	...
	[ 7906.755972] md0: <ram3><ram2><ram1><ram0>
	[ 7906.755981] md: rdev ram3, SZ:00065472 F:0 S:1 DN:3
	[ 7906.755984] md: rdev superblock (MJ:1):
	[ 7906.755989] md:  SB: (V:1) (F:0) Array-ID:<5fbcf158:55aa:5fbe:9a79:1e939880dcbd>
	[ 7906.755990] md:    Name: "DG5:0" CT:1226410480
	[ 7906.755998] md:       L5 SZ130944 RD:4 LO:2 CS:128 DO:24 DS:131048 SO:8 RO:0
	[ 7906.755999] md:     Dev:00000003 UUID: 9194d744:87f7:a448:85f2:7497b84ce30a
	[ 7906.756001] md:       (F:0) UT:1226410480 Events:0 ResyncOffset:-1 CSUM:0dbcd829
	[ 7906.756003] md:         (MaxDev:384)
	...
	[ 7906.756113] md:	**********************************
	[ 7906.756116]

this md0 (metadata 1.2) information dumping is exactly according to struct
mdp_superblock_1.

Signed-off-by: Cheng Renquan <crquan@gmail.com>
Cc: Neil Brown <neilb@suse.de>
Cc: Dan Williams <dan.j.williams@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_p.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 8b4de4a41ff1..9491026afe66 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -194,6 +194,8 @@ static inline __u64 md_event(mdp_super_t *sb) {
 	return (ev<<32)| sb->events_lo;
 }
 
+#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1)
+
 /*
  * The version-1 superblock :
  * All numeric fields are little-endian.
-- 
cgit v1.2.2


From d3374825ce57ba2214d375023979f6197ccc1385 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 9 Jan 2009 08:31:10 +1100
Subject: md: make devices disappear when they are no longer needed.

Currently md devices, once created, never disappear until the module
is unloaded.  This is essentially because the gendisk holds a
reference to the mddev, and the mddev holds a reference to the
gendisk, this a circular reference.

If we drop the reference from mddev to gendisk, then we need to ensure
that the mddev is destroyed when the gendisk is destroyed.  However it
is not possible to hook into the gendisk destruction process to enable
this.

So we drop the reference from the gendisk to the mddev and destroy the
gendisk when the mddev gets destroyed.  However this has a
complication.
Between the call
   __blkdev_get->get_gendisk->kobj_lookup->md_probe
and the call
   __blkdev_get->md_open

there is no obvious way to hold a reference on the mddev any more, so
unless something is done, it will disappear and gendisk will be
destroyed prematurely.

Also, once we decide to destroy the mddev, there will be an unlockable
moment before the gendisk is unlinked (blk_unregister_region) during
which a new reference to the gendisk can be created.  We need to
ensure that this reference can not be used.  i.e. the ->open must
fail.

So:
 1/  in md_probe we set a flag in the mddev (hold_active) which
     indicates that the array should be treated as active, even
     though there are no references, and no appearance of activity.
     This is cleared by md_release when the device is closed if it
     is no longer needed.
     This ensures that the gendisk will survive between md_probe and
     md_open.

 2/  In md_open we check if the mddev we expect to open matches
     the gendisk that we did open.
     If there is a mismatch we return -ERESTARTSYS and modify
     __blkdev_get to retry from the top in that case.
     In the -ERESTARTSYS sys case we make sure to wait until
     the old gendisk (that we succeeded in opening) is really gone so
     we loop at most once.

Some udev configurations will always open an md device when it first
appears.   If we allow an md device that was just created by an open
to disappear on an immediate close, then this can race with such udev
configurations and result in an infinite loop the device being opened
and closed, then re-open due to the 'ADD' even from the first open,
and then close and so on.
So we make sure an md device, once created by an open, remains active
at least until some md 'ioctl' has been made on it.  This means that
all normal usage of md devices will allow them to disappear promptly
when not needed, but the worst that an incorrect usage will do it
cause an inactive md device to be left in existence (it can easily be
removed).

As an array can be stopped by writing to a sysfs attribute
  echo clear > /sys/block/mdXXX/md/array_state
we need to use scheduled work for deleting the gendisk and other
kobjects.  This allows us to wait for any pending gendisk deletion to
complete by simply calling flush_scheduled_work().


Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 8f9a54c1fb0e..e3d17c7f954e 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -137,6 +137,8 @@ struct mddev_s
 	struct gendisk			*gendisk;
 
 	struct kobject			kobj;
+	int				hold_active;
+#define	UNTIL_IOCTL	1
 
 	/* Superblock information */
 	int				major_version,
@@ -246,6 +248,8 @@ struct mddev_s
 							 */
 	struct sysfs_dirent		*sysfs_action;  /* handle for 'sync_action' */
 
+	struct work_struct del_work;	/* used for delayed sysfs removal */
+
 	spinlock_t			write_lock;
 	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
 	atomic_t			pending_writes;	/* number of active superblock writes */
-- 
cgit v1.2.2


From efeb53c0e57213e843b7ef3cc6ebcdea7d6186ac Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 9 Jan 2009 08:31:10 +1100
Subject: md: Allow md devices to be created by name.

Using sequential numbers to identify md devices is somewhat artificial.
Using names can be a lot more user-friendly.

Also, creating md devices by opening the device special file is a bit
awkward.

So this patch provides a new option for creating and naming devices.

Writing a name such as "md_home" to
    /sys/modules/md_mod/parameters/new_array
will cause an array with that name to be created.  It will appear in
/sys/block/ /proc/partitions and /proc/mdstat as 'md_home'.
It will have an arbitrary minor number allocated.

md devices that a created by an open are destroyed on the last
close when the device is inactive.
For named md devices, they will not be destroyed until the array
is explicitly stopped, either with the STOP_ARRAY ioctl or by
writing 'clear' to /sys/block/md_XXXX/md/array_state.

The name of the array must start 'md_' to avoid conflict with
other devices.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index e3d17c7f954e..dac4217194b8 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -139,6 +139,7 @@ struct mddev_s
 	struct kobject			kobj;
 	int				hold_active;
 #define	UNTIL_IOCTL	1
+#define	UNTIL_STOP	2
 
 	/* Superblock information */
 	int				major_version,
-- 
cgit v1.2.2


From 4044ba58dd15cb01797c4fd034f39ef4a75f7cc3 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 9 Jan 2009 08:31:11 +1100
Subject: md: don't retry recovery of raid1 that fails due to error on source
 drive.

If a raid1 has only one working drive and it has a sector which
gives an error on read, then an attempt to recover onto a spare will
fail, but as the single remaining drive is not removed from the
array, the recovery will be immediately re-attempted, resulting
in an infinite recovery loop.

So detect this situation and don't retry recovery once an error
on the lone remaining drive is detected.

Allow recovery to be retried once every time a spare is added
in case the problem wasn't actually a media error.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/md_k.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index dac4217194b8..9743e4dbc918 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -218,6 +218,9 @@ struct mddev_s
 #define	MD_RECOVERY_FROZEN	9
 
 	unsigned long			recovery;
+	int				recovery_disabled; /* if we detect that recovery
+							    * will always fail, set this
+							    * so we don't loop trying */
 
 	int				in_sync;	/* know to not need resync */
 	struct mutex			reconfig_mutex;
-- 
cgit v1.2.2


From 871af1210f13966ab911ed2166e4ab2ce775b99d Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@redhat.com>
Date: Mon, 5 Jan 2009 14:16:39 +0000
Subject: libata: Add 32bit PIO support

This matters for some controllers and in one or two cases almost doubles
PIO performance. Add a bmdma32 operations set we can inherit and activate
it for some controllers

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3449de597eff..4f7c8fb4d3fe 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1518,6 +1518,7 @@ extern void sata_pmp_error_handler(struct ata_port *ap);
 
 extern const struct ata_port_operations ata_sff_port_ops;
 extern const struct ata_port_operations ata_bmdma_port_ops;
+extern const struct ata_port_operations ata_bmdma32_port_ops;
 
 /* PIO only, sg_tablesize and dma_boundary limits can be removed */
 #define ATA_PIO_SHT(drv_name)					\
@@ -1545,6 +1546,8 @@ extern void ata_sff_exec_command(struct ata_port *ap,
 				 const struct ata_taskfile *tf);
 extern unsigned int ata_sff_data_xfer(struct ata_device *dev,
 			unsigned char *buf, unsigned int buflen, int rw);
+extern unsigned int ata_sff_data_xfer32(struct ata_device *dev,
+			unsigned char *buf, unsigned int buflen, int rw);
 extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev,
 			unsigned char *buf, unsigned int buflen, int rw);
 extern u8 ata_sff_irq_on(struct ata_port *ap);
-- 
cgit v1.2.2


From fefae48bf8caab7d56ee4f8181f06602cf73d29e Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Thu, 8 Jan 2009 19:21:27 +0100
Subject: [MTD] CFI: remove major/minor version check for command set 0x0002

The NOR Flash memory K8P2815UQB from Samsung uses the major version
number '0'. Add a quirk to cope with it.

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/cfi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 00e2b575021f..88d3d8fbf9f2 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -520,6 +520,7 @@ struct cfi_fixup {
 
 #define CFI_MFR_AMD 0x0001
 #define CFI_MFR_ATMEL 0x001F
+#define CFI_MFR_SAMSUNG 0x00EC
 #define CFI_MFR_ST  0x0020 	/* STMicroelectronics */
 
 void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups);
-- 
cgit v1.2.2


From 8dd2f36f317569665e454268a2677cfba3e848f1 Mon Sep 17 00:00:00 2001
From: Andreas Eversberg <andreas@eversberg.eu>
Date: Sat, 2 Aug 2008 22:51:52 +0200
Subject: mISDN: Add feature via MISDN_CTRL_FILL_EMPTY to fill fifo if empty

This prevents underrun of fifo when filled and in case of an underrun it
prevents subsequent underruns due to jitter.
Improve dsp, so buffers are kept filled with a certain delay, so moderate
jitter will not cause underrun all the time -> the audio quality is highly
improved. tones are not interrupted by gaps anymore, except when CPU is
stalling or in high load.

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNhw.h | 25 +++++++++++++------------
 include/linux/mISDNif.h |  1 +
 2 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
index e794dfb87504..9384b92dfc65 100644
--- a/include/linux/mISDNhw.h
+++ b/include/linux/mISDNhw.h
@@ -57,20 +57,21 @@
 #define FLG_L2DATA		14	/* channel use L2 DATA primitivs */
 #define FLG_ORIGIN		15	/* channel is on origin site */
 /* channel specific stuff */
+#define FLG_FILLEMPTY		16	/* fill fifo on first frame (empty) */
 /* arcofi specific */
-#define FLG_ARCOFI_TIMER	16
-#define FLG_ARCOFI_ERROR	17
+#define FLG_ARCOFI_TIMER	17
+#define FLG_ARCOFI_ERROR	18
 /* isar specific */
-#define FLG_INITIALIZED		16
-#define FLG_DLEETX		17
-#define FLG_LASTDLE		18
-#define FLG_FIRST		19
-#define FLG_LASTDATA		20
-#define FLG_NMD_DATA		21
-#define FLG_FTI_RUN		22
-#define FLG_LL_OK		23
-#define FLG_LL_CONN		24
-#define FLG_DTMFSEND		25
+#define FLG_INITIALIZED		17
+#define FLG_DLEETX		18
+#define FLG_LASTDLE		19
+#define FLG_FIRST		20
+#define FLG_LASTDATA		21
+#define FLG_NMD_DATA		22
+#define FLG_FTI_RUN		23
+#define FLG_LL_OK		24
+#define FLG_LL_CONN		25
+#define FLG_DTMFSEND		26
 
 /* workq events */
 #define FLG_RECVQUEUE		30
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 8f2d60da04e7..74c903cd7a0a 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -312,6 +312,7 @@ clear_channelmap(u_int nr, u_char *map)
 #define MISDN_CTRL_SETPEER		0x0040
 #define MISDN_CTRL_UNSETPEER		0x0080
 #define MISDN_CTRL_RX_OFF		0x0100
+#define MISDN_CTRL_FILL_EMPTY		0x0200
 #define MISDN_CTRL_HW_FEATURES_OP	0x2000
 #define MISDN_CTRL_HW_FEATURES		0x2001
 #define MISDN_CTRL_HFC_OP		0x4000
-- 
cgit v1.2.2


From 8b6015f736125050722dbe59c4f943e78cd626f0 Mon Sep 17 00:00:00 2001
From: Matthias Urlichs <smurf@smurf.noris.de>
Date: Tue, 12 Aug 2008 10:12:09 +0200
Subject: mISDN: Added an ioctl to change the device name

To get persistent device names with hotplug we need to rename devices
sometime.

Signed-off-by: Matthias Urlichs <matthias@urlichs.de>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNif.h | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 74c903cd7a0a..be09476ed854 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -36,8 +36,8 @@
  *              - should be incremented on every checkin
  */
 #define	MISDN_MAJOR_VERSION	1
-#define	MISDN_MINOR_VERSION	0
-#define MISDN_RELEASE		19
+#define	MISDN_MINOR_VERSION	1
+#define MISDN_RELEASE		20
 
 /* primitives for information exchange
  * generell format
@@ -255,16 +255,6 @@ struct sockaddr_mISDN {
 	unsigned char	tei;
 };
 
-/* timer device ioctl */
-#define IMADDTIMER	_IOR('I', 64, int)
-#define IMDELTIMER	_IOR('I', 65, int)
-/* socket ioctls */
-#define	IMGETVERSION	_IOR('I', 66, int)
-#define	IMGETCOUNT	_IOR('I', 67, int)
-#define IMGETDEVINFO	_IOR('I', 68, int)
-#define IMCTRLREQ	_IOR('I', 69, int)
-#define IMCLEAR_L2	_IOR('I', 70, int)
-
 struct mISDNversion {
 	unsigned char	major;
 	unsigned char	minor;
@@ -281,6 +271,23 @@ struct mISDN_devinfo {
 	char			name[MISDN_MAX_IDLEN];
 };
 
+struct mISDN_devrename {
+	u_int			id;
+	char			name[MISDN_MAX_IDLEN]; /* new name */
+};
+
+/* timer device ioctl */
+#define IMADDTIMER	_IOR('I', 64, int)
+#define IMDELTIMER	_IOR('I', 65, int)
+
+/* socket ioctls */
+#define	IMGETVERSION	_IOR('I', 66, int)
+#define	IMGETCOUNT	_IOR('I', 67, int)
+#define IMGETDEVINFO	_IOR('I', 68, int)
+#define IMCTRLREQ	_IOR('I', 69, int)
+#define IMCLEAR_L2	_IOR('I', 70, int)
+#define IMSETDEVNAME	_IOR('I', 71, struct mISDN_devrename)
+
 static inline int
 test_channelmap(u_int nr, u_char *map)
 {
-- 
cgit v1.2.2


From 837468d135dcc49cdabc9fa92fc9550479f60704 Mon Sep 17 00:00:00 2001
From: Matthias Urlichs <smurf@smurf.noris.de>
Date: Sat, 16 Aug 2008 00:04:33 +0200
Subject: mISDN: Use struct device name field

struct device already has a 'name' member, use it.

Signed-off-by: Matthias Urlichs <matthias@urlichs.de>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNif.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index be09476ed854..a59febb6143a 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -431,7 +431,6 @@ struct mISDN_sock {
 struct mISDNdevice {
 	struct mISDNchannel	D;
 	u_int			id;
-	char			name[MISDN_MAX_IDLEN];
 	u_int			Dprotocols;
 	u_int			Bprotocols;
 	u_int			nrbchan;
-- 
cgit v1.2.2


From 1f28fa19d34c0d9186f274e61e4b3dcfc6428c5c Mon Sep 17 00:00:00 2001
From: Martin Bachem <m.bachem@gmx.de>
Date: Wed, 3 Sep 2008 15:17:45 +0200
Subject: mISDN: Add E-Channel logging features

New prim PH_DATA_E_IND.

 - all E-ch frames are indicated by recv_Echannel(), which pushes E-Channel
   frames into dch's rqueue
 - if dchannel is opened with channel nr 0, no E-Channel logging
   is requested
 - if dchannel is opened with channel nr 1, E-Channel logging
   is requested. if layer1 does not support that, -EINVAL
   in return is appropriate

Signed-off-by: Martin Bachem <m.bachem@gmx.de>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNhw.h | 1 +
 include/linux/mISDNif.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
index 9384b92dfc65..97ffdc1d3442 100644
--- a/include/linux/mISDNhw.h
+++ b/include/linux/mISDNhw.h
@@ -184,6 +184,7 @@ extern void	queue_ch_frame(struct mISDNchannel *, u_int,
 extern int	dchannel_senddata(struct dchannel *, struct sk_buff *);
 extern int	bchannel_senddata(struct bchannel *, struct sk_buff *);
 extern void	recv_Dchannel(struct dchannel *);
+extern void	recv_Echannel(struct dchannel *, struct dchannel *);
 extern void	recv_Bchannel(struct bchannel *);
 extern void	recv_Dchannel_skb(struct dchannel *, struct sk_buff *);
 extern void	recv_Bchannel_skb(struct bchannel *, struct sk_buff *);
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index a59febb6143a..f75d596c5316 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -80,6 +80,7 @@
 #define PH_DEACTIVATE_IND	0x0202
 #define PH_DEACTIVATE_CNF	0x4202
 #define PH_DATA_IND		0x2002
+#define PH_DATA_E_IND		0x3002
 #define MPH_ACTIVATE_IND	0x0502
 #define MPH_DEACTIVATE_IND	0x0602
 #define MPH_INFORMATION_IND	0x0702
-- 
cgit v1.2.2


From 3bd69ad197a4a3d0085a5dc3b5796111bf176b12 Mon Sep 17 00:00:00 2001
From: Andreas Eversberg <andreas@eversberg.eu>
Date: Sat, 6 Sep 2008 09:03:46 +0200
Subject: mISDN: Add ISDN sample clock API to mISDN core

Add ISDN sample clock API to mISDN core (new file clock.c)
hfcmulti and mISDNdsp use clock API.

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNif.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index f75d596c5316..364f1018f0d1 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -371,6 +371,7 @@ struct mISDN_ctrl_req {
 #define DEBUG_L2_TEI		0x00100000
 #define DEBUG_L2_TEIFSM		0x00200000
 #define DEBUG_TIMER		0x01000000
+#define DEBUG_CLOCK		0x02000000
 
 #define mISDN_HEAD_P(s)		((struct mISDNhead *)&s->cb[0])
 #define mISDN_HEAD_PRIM(s)	(((struct mISDNhead *)&s->cb[0])->prim)
@@ -384,6 +385,7 @@ struct mISDN_ctrl_req {
 struct mISDNchannel;
 struct mISDNdevice;
 struct mISDNstack;
+struct mISDNclock;
 
 struct channel_req {
 	u_int			protocol;
@@ -460,6 +462,16 @@ struct mISDNstack {
 #endif
 };
 
+typedef	int	(clockctl_func_t)(void *, int);
+
+struct	mISDNclock {
+	struct list_head	list;
+	char			name[64];
+	int			pri;
+	clockctl_func_t		*ctl;
+	void			*priv;
+};
+
 /* global alloc/queue functions */
 
 static inline struct sk_buff *
@@ -510,8 +522,13 @@ extern int	mISDN_register_device(struct mISDNdevice *, char *name);
 extern void	mISDN_unregister_device(struct mISDNdevice *);
 extern int	mISDN_register_Bprotocol(struct Bprotocol *);
 extern void	mISDN_unregister_Bprotocol(struct Bprotocol *);
+extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *,
+						void *);
+extern void	mISDN_unregister_clock(struct mISDNclock *);
 
 extern void	set_channel_address(struct mISDNchannel *, u_int, u_int);
+extern void	mISDN_clock_update(struct mISDNclock *, int, struct timeval *);
+extern unsigned short mISDN_clock_get(void);
 
 #endif /* __KERNEL__ */
 #endif /* mISDNIF_H */
-- 
cgit v1.2.2


From 02282eee56b75a35e6bbc42cc34c9005eb1653f4 Mon Sep 17 00:00:00 2001
From: Martin Bachem <m.bachem@gmx.de>
Date: Mon, 8 Sep 2008 15:57:48 +0200
Subject: mISDN: Add ISDN_P_TE_UP0 / ISDN_P_NT_UP0

- new layer1 protocols for UP0 bus
- helper #defines to test for TE/NT/S0/E1/UP0

Signed-off-by: Martin Bachem <m.bachem@gmx.de>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNif.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 364f1018f0d1..7f65aa0c1cc5 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -200,6 +200,18 @@
 #define ISDN_P_NT_S0  		0x02
 #define ISDN_P_TE_E1		0x03
 #define ISDN_P_NT_E1  		0x04
+#define ISDN_P_TE_UP0		0x05
+#define ISDN_P_NT_UP0		0x06
+
+#define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \
+				(p == ISDN_P_TE_UP0))
+#define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \
+				(p == ISDN_P_NT_UP0))
+#define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0))
+#define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1))
+#define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0))
+
+
 #define ISDN_P_LAPD_TE		0x10
 #define	ISDN_P_LAPD_NT		0x11
 
-- 
cgit v1.2.2


From 1b4d33121f1d991f6ae226cc3333428ff87627bb Mon Sep 17 00:00:00 2001
From: Andreas Eversberg <andreas@eversberg.eu>
Date: Sun, 14 Sep 2008 12:30:18 +0200
Subject: mISDN: Fix deactivation, if peer IP is removed from l1oip instance.

 Added GETPEER operation.
 Socket now checks if device is already busy at a differen mode.

Signed-off-by: Andreas Eversberg <andreas@eversberg.eu>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNif.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 7f65aa0c1cc5..3f9988849f32 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -204,9 +204,9 @@
 #define ISDN_P_NT_UP0		0x06
 
 #define IS_ISDN_P_TE(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_TE_E1) || \
-				(p == ISDN_P_TE_UP0))
+				(p == ISDN_P_TE_UP0) || (p == ISDN_P_LAPD_TE))
 #define IS_ISDN_P_NT(p) ((p == ISDN_P_NT_S0) || (p == ISDN_P_NT_E1) || \
-				(p == ISDN_P_NT_UP0))
+				(p == ISDN_P_NT_UP0) || (p == ISDN_P_LAPD_NT))
 #define IS_ISDN_P_S0(p) ((p == ISDN_P_TE_S0) || (p == ISDN_P_NT_S0))
 #define IS_ISDN_P_E1(p) ((p == ISDN_P_TE_E1) || (p == ISDN_P_NT_E1))
 #define IS_ISDN_P_UP0(p) ((p == ISDN_P_TE_UP0) || (p == ISDN_P_NT_UP0))
@@ -333,6 +333,7 @@ clear_channelmap(u_int nr, u_char *map)
 #define MISDN_CTRL_UNSETPEER		0x0080
 #define MISDN_CTRL_RX_OFF		0x0100
 #define MISDN_CTRL_FILL_EMPTY		0x0200
+#define MISDN_CTRL_GETPEER		0x0400
 #define MISDN_CTRL_HW_FEATURES_OP	0x2000
 #define MISDN_CTRL_HW_FEATURES		0x2001
 #define MISDN_CTRL_HFC_OP		0x4000
-- 
cgit v1.2.2


From b36b654a7e82308cea063cdf909a7f246105c2a3 Mon Sep 17 00:00:00 2001
From: Matthias Urlichs <smurf@smurf.noris.de>
Date: Sat, 16 Aug 2008 00:09:24 +0200
Subject: mISDN: Create /sys/class/mISDN

Create /sys/class/mISDN and implement functions to handle
device renames.

Signed-Off-By: Matthias Urlichs <matthias@urlichs.de>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNif.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 3f9988849f32..d4229aebf648 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -531,7 +531,8 @@ _queue_data(struct mISDNchannel *ch, u_int prim,
 
 /* global register/unregister functions */
 
-extern int	mISDN_register_device(struct mISDNdevice *, char *name);
+extern int	mISDN_register_device(struct mISDNdevice *,
+					struct device *parent, char *name);
 extern void	mISDN_unregister_device(struct mISDNdevice *);
 extern int	mISDN_register_Bprotocol(struct Bprotocol *);
 extern void	mISDN_unregister_Bprotocol(struct Bprotocol *);
@@ -539,6 +540,11 @@ extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *,
 						void *);
 extern void	mISDN_unregister_clock(struct mISDNclock *);
 
+static inline struct mISDNdevice *dev_to_mISDN(struct device *dev)
+{
+	return dev_get_drvdata(dev);
+}
+
 extern void	set_channel_address(struct mISDNchannel *, u_int, u_int);
 extern void	mISDN_clock_update(struct mISDNclock *, int, struct timeval *);
 extern unsigned short mISDN_clock_get(void);
-- 
cgit v1.2.2


From 3f75e84a6a697c5cffb78ee15e79498a35473e05 Mon Sep 17 00:00:00 2001
From: Martin Bachem <m.bachem@gmx.de>
Date: Tue, 4 Nov 2008 14:11:22 +0100
Subject: mISDN: Add layer1 prim MPH_INFORMATION_REQ

MPH_INFORMATION provides full D- and B-Channel status overview

- new layer1 primitive: MPF_INFORMATON_REQ
- layer1 replies with MPH_INFORMATION_IND containing
   - dch->[state,Flags,nrbchan]
   - bch[]->[protocol,Flags]
- hardware driver should send MPH_INFORMATION_IND
  on all ph state changes and BChannel state changes to MISDN_ID_ANY

Signed-off-by: Martin Bachem <m.bachem@gmx.de>
Signed-off-by: Karsten Keil <kkeil@suse.de>
---
 include/linux/mISDNif.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index d4229aebf648..557477ac3d5b 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -289,6 +289,23 @@ struct mISDN_devrename {
 	char			name[MISDN_MAX_IDLEN]; /* new name */
 };
 
+/* MPH_INFORMATION_REQ payload */
+struct ph_info_ch {
+        __u32 protocol;
+        __u64 Flags;
+};
+
+struct ph_info_dch {
+        struct ph_info_ch ch;
+        __u16 state;
+        __u16 num_bch;
+};
+
+struct ph_info {
+        struct ph_info_dch dch;
+        struct ph_info_ch  bch[];
+};
+
 /* timer device ioctl */
 #define IMADDTIMER	_IOR('I', 64, int)
 #define IMDELTIMER	_IOR('I', 65, int)
-- 
cgit v1.2.2


From 4db8e282f2d1dfa43d51ce2a4817901312c9134d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 9 Jan 2009 14:32:46 -0800
Subject: Revert "driver core: move knode_bus into private structure"

This reverts commit b9daa99ee533578e3f88231e7a16784dcb44ec42.

Turns out that device_initialize shouldn't fail silently.
This series needs to be reworked in order to get into proper
shape.

Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 7d9da4b4993f..8987f4776064 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -366,6 +366,7 @@ struct device_dma_parameters {
 };
 
 struct device {
+	struct klist_node	knode_bus;
 	struct device		*parent;
 
 	struct device_private	*p;
-- 
cgit v1.2.2


From cda5e83fdea476dce9c0a9b1152cd6ca46832cc4 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 9 Jan 2009 14:44:18 -0800
Subject: Revert "driver core: move knode_driver into private structure"

This reverts commit 93e746db183b3bdbbda67900f79b5835f9cb388f.

Turns out that device_initialize shouldn't fail silently.
This series needs to be reworked in order to get into proper
shape.

Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 8987f4776064..c66ceb15acd8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -366,6 +366,7 @@ struct device_dma_parameters {
 };
 
 struct device {
+	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
 	struct device		*parent;
 
-- 
cgit v1.2.2


From e2d4077678c7ec7661003c268120582adc544897 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 9 Jan 2009 14:55:37 -0800
Subject: Revert "driver core: move klist_children into private structure"

This reverts commit 11c3b5c3e08f4d855cbef52883c266b9ab9df879.

Turns out that device_initialize shouldn't fail silently.
This series needs to be reworked in order to get into proper
shape.

Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index c66ceb15acd8..2975351635d3 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -366,6 +366,8 @@ struct device_dma_parameters {
 };
 
 struct device {
+	struct klist		klist_children;
+	struct klist_node	knode_parent;	/* node in sibling list */
 	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
 	struct device		*parent;
-- 
cgit v1.2.2


From 926beadb3dfaddccb3348a5b9e6c2a1f8290a220 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 9 Jan 2009 15:06:12 -0800
Subject: Revert "driver core: create a private portion of struct device"

This reverts commit 2831fe6f9cc4e16c103504ee09a47a084297c0f3.

Turns out that device_initialize shouldn't fail silently.
This series needs to be reworked in order to get into proper
shape.

Reported-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 2975351635d3..45e5b1921fbb 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -28,7 +28,6 @@
 #define BUS_ID_SIZE		20
 
 struct device;
-struct device_private;
 struct device_driver;
 struct driver_private;
 struct class;
@@ -372,8 +371,6 @@ struct device {
 	struct klist_node	knode_bus;
 	struct device		*parent;
 
-	struct device_private	*p;
-
 	struct kobject kobj;
 	char	bus_id[BUS_ID_SIZE];	/* position on parent bus */
 	unsigned		uevent_suppress:1;
-- 
cgit v1.2.2


From 85c210edc46d602a1562aeea0fc74919349c8cf0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 9 Jan 2009 16:40:53 -0800
Subject: compiler-gcc.h: add more comments to RELOC_HIDE

Requested by C. Lameter

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Mike Travis <travis@sgi.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index af40f8eb86f0..1514d534deeb 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -11,9 +11,19 @@
 /* The "volatile" is due to gcc bugs */
 #define barrier() __asm__ __volatile__("": : :"memory")
 
-/* This macro obfuscates arithmetic on a variable address so that gcc
-   shouldn't recognize the original var, and make assumptions about it */
 /*
+ * This macro obfuscates arithmetic on a variable address so that gcc
+ * shouldn't recognize the original var, and make assumptions about it.
+ *
+ * This is needed because the C standard makes it undefined to do
+ * pointer arithmetic on "objects" outside their boundaries and the
+ * gcc optimizers assume this is the case. In particular they
+ * assume such arithmetic does not wrap.
+ *
+ * A miscompilation has been observed because of this on PPC.
+ * To work around it we hide the relationship of the pointer and the object
+ * using this macro.
+ *
  * Versions of the ppc64 compiler before 4.1 had a bug where use of
  * RELOC_HIDE could trash r30. The bug can be worked around by changing
  * the inline assembly constraint from =g to =r, in this particular
-- 
cgit v1.2.2


From 69347a236b22c3962ea812511495e502dedfd50c Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 9 Jan 2009 16:40:56 -0800
Subject: memstick: annotate endianness of attribute structs

The code was shifting the endianness appropriately everywhere, annotate
the structs to avoid the sparse warnings when assigning the endian types
to the struct members, or passing them to be[16|32]_to_cpu:

drivers/memstick/core/mspro_block.c:331:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:333:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:335:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:337:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:341:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:347:4: warning: cast to restricted __be32
drivers/memstick/core/mspro_block.c:356:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:358:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:364:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:367:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:369:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:371:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:377:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:478:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:480:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:482:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:484:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:486:4: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:689:22:    expected unsigned int [unsigned] [assigned] data_address
drivers/memstick/core/mspro_block.c:689:22:    got restricted __be32 [usertype] <noident>
drivers/memstick/core/mspro_block.c:697:3: warning: cast to restricted __be32
drivers/memstick/core/mspro_block.c:960:17: warning: incorrect type in initializer (different base types)
drivers/memstick/core/mspro_block.c:960:17:    expected unsigned short [unsigned] data_count
drivers/memstick/core/mspro_block.c:960:17:    got restricted __be16 [usertype] <noident>
drivers/memstick/core/mspro_block.c:993:6: warning: cast to restricted __be16
drivers/memstick/core/mspro_block.c:995:28: warning: cast to restricted __be16

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: Alex Dubov <oakad@yahoo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memstick.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memstick.h b/include/linux/memstick.h
index d0c37e682234..690c35a9d4cc 100644
--- a/include/linux/memstick.h
+++ b/include/linux/memstick.h
@@ -100,8 +100,8 @@ struct mspro_param_register {
 #define MEMSTICK_SYS_PAR8   0x40
 #define MEMSTICK_SYS_SERIAL 0x80
 
-	unsigned short data_count;
-	unsigned int   data_address;
+	__be16 data_count;
+	__be32 data_address;
 	unsigned char  tpc_param;
 } __attribute__((packed));
 
-- 
cgit v1.2.2


From c4be0c1dc4cdc37b175579be1460f15ac6495e9a Mon Sep 17 00:00:00 2001
From: Takashi Sato <t-sato@yk.jp.nec.com>
Date: Fri, 9 Jan 2009 16:40:58 -0800
Subject: filesystem freeze: add error handling of write_super_lockfs/unlockfs

Currently, ext3 in mainline Linux doesn't have the freeze feature which
suspends write requests.  So, we cannot take a backup which keeps the
filesystem's consistency with the storage device's features (snapshot and
replication) while it is mounted.

In many case, a commercial filesystem (e.g.  VxFS) has the freeze feature
and it would be used to get the consistent backup.

If Linux's standard filesystem ext3 has the freeze feature, we can do it
without a commercial filesystem.

So I have implemented the ioctls of the freeze feature.
I think we can take the consistent backup with the following steps.
1. Freeze the filesystem with the freeze ioctl.
2. Separate the replication volume or create the snapshot
   with the storage device's feature.
3. Unfreeze the filesystem with the unfreeze ioctl.
4. Take the backup from the separated replication volume
   or the snapshot.

This patch:

VFS:
Changed the type of write_super_lockfs and unlockfs from "void"
to "int" so that they can return an error.
Rename write_super_lockfs and unlockfs of the super block operation
freeze_fs and unfreeze_fs to avoid a confusion.

ext3, ext4, xfs, gfs2, jfs:
Changed the type of write_super_lockfs and unlockfs from "void"
to "int" so that write_super_lockfs returns an error if needed,
and unlockfs always returns 0.

reiserfs:
Changed the type of write_super_lockfs and unlockfs from "void"
to "int" so that they always return 0 (success) to keep a current behavior.

Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com>
Cc: <xfs-masters@oss.sgi.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0b87b29f4797..3e59182de9df 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1377,8 +1377,8 @@ struct super_operations {
 	void (*put_super) (struct super_block *);
 	void (*write_super) (struct super_block *);
 	int (*sync_fs)(struct super_block *sb, int wait);
-	void (*write_super_lockfs) (struct super_block *);
-	void (*unlockfs) (struct super_block *);
+	int (*freeze_fs) (struct super_block *);
+	int (*unfreeze_fs) (struct super_block *);
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*clear_inode) (struct inode *);
-- 
cgit v1.2.2


From fcccf502540e3d752d33b2d8e976034dee81f9f7 Mon Sep 17 00:00:00 2001
From: Takashi Sato <t-sato@yk.jp.nec.com>
Date: Fri, 9 Jan 2009 16:40:59 -0800
Subject: filesystem freeze: implement generic freeze feature

The ioctls for the generic freeze feature are below.
o Freeze the filesystem
  int ioctl(int fd, int FIFREEZE, arg)
    fd: The file descriptor of the mountpoint
    FIFREEZE: request code for the freeze
    arg: Ignored
    Return value: 0 if the operation succeeds. Otherwise, -1

o Unfreeze the filesystem
  int ioctl(int fd, int FITHAW, arg)
    fd: The file descriptor of the mountpoint
    FITHAW: request code for unfreeze
    arg: Ignored
    Return value: 0 if the operation succeeds. Otherwise, -1
    Error number: If the filesystem has already been unfrozen,
                  errno is set to EINVAL.

[akpm@linux-foundation.org: fix CONFIG_BLOCK=n]
Signed-off-by: Takashi Sato <t-sato@yk.jp.nec.com>
Signed-off-by: Masayuki Hamaguchi <m-hamaguchi@ys.jp.nec.com>
Cc: <xfs-masters@oss.sgi.com>
Cc: <linux-ext4@vger.kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/buffer_head.h | 11 ++++++++++-
 include/linux/fs.h          |  7 +++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 8605f8a74df9..bd7ac793be19 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -171,7 +171,7 @@ void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 int fsync_bdev(struct block_device *);
 struct super_block *freeze_bdev(struct block_device *);
-void thaw_bdev(struct block_device *, struct super_block *);
+int thaw_bdev(struct block_device *, struct super_block *);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
@@ -346,6 +346,15 @@ static inline int remove_inode_buffers(struct inode *inode) { return 1; }
 static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
 static inline void invalidate_bdev(struct block_device *bdev) {}
 
+static inline struct super_block *freeze_bdev(struct block_device *sb)
+{
+	return NULL;
+}
+
+static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+	return 0;
+}
 
 #endif /* CONFIG_BLOCK */
 #endif /* _LINUX_BUFFER_HEAD_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3e59182de9df..6022f44043f2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -234,6 +234,8 @@ struct inodes_stat_t {
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
+#define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
+#define FITHAW		_IOWR('X', 120, int)	/* Thaw */
 
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
@@ -591,6 +593,11 @@ struct block_device {
 	 * care to not mess up bd_private for that case.
 	 */
 	unsigned long		bd_private;
+
+	/* The counter of freeze processes */
+	int			bd_fsfreeze_count;
+	/* Mutex for freeze */
+	struct mutex		bd_fsfreeze_mutex;
 };
 
 /*
-- 
cgit v1.2.2


From f4b477c47332367d35686bd2b808c2156b96d7c7 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sat, 10 Jan 2009 11:12:09 +0000
Subject: rbtree: add const qualifier to some functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 'rb_first()', 'rb_last()', 'rb_next()' and 'rb_prev()' calls
take a pointer to an RB node or RB root. They do not change the
pointed objects, so add a 'const' qualifier in order to make life
of the users of these functions easier.

Indeed, if I have my own constant pointer &const struct my_type *p,
and I call 'rb_next(&p->rb)', I get a GCC warning:

warning: passing argument 1 of ‘rb_next’ discards qualifiers from pointer target type

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 344bc3495ddb..9c295411d01f 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -140,10 +140,10 @@ extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
 
 /* Find logical next and previous nodes in a tree */
-extern struct rb_node *rb_next(struct rb_node *);
-extern struct rb_node *rb_prev(struct rb_node *);
-extern struct rb_node *rb_first(struct rb_root *);
-extern struct rb_node *rb_last(struct rb_root *);
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
 
 /* Fast replacement of a single node without remove/rebalance/add/rebalance */
 extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 
-- 
cgit v1.2.2


From 886ad09fc83342aa1c5a02a0b6d3298b78a8067f Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 9 Jan 2009 15:54:07 -0800
Subject: libata: Add a per-host flag to opt-in into parallel port probes

This patch adds a per host flag that allows drivers to opt in into
having its busses scanned in parallel.

Drivers that do not set this flag get their ports scanned in
the "original" sequence.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 4f7c8fb4d3fe..b6b8a7f3ec66 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -239,6 +239,7 @@ enum {
 	/* host set flags */
 	ATA_HOST_SIMPLEX	= (1 << 0),	/* Host is simplex, one DMA channel per host only */
 	ATA_HOST_STARTED	= (1 << 1),	/* Host started */
+	ATA_HOST_PARALLEL_SCAN	= (1 << 2),	/* Ports on this host can be scanned in parallel */
 
 	/* bits 24:31 of host->flags are reserved for LLD specific flags */
 
-- 
cgit v1.2.2