From c2f0c7c356dc9ae15419f00c725a2fcc58eeff58 Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Fri, 6 May 2005 12:38:39 +0100
Subject: The attached patch addresses the problem with getting the audit
 daemon shutdown credential information. It creates a new message type
 AUDIT_TERM_INFO, which is used by the audit daemon to query who issued the
 shutdown.

It requires the placement of a hook function that gathers the information. The
hook is after the DAC & MAC checks and before the function returns. Racing
threads could overwrite the uid & pid - but they would have to be root and
have policy that allows signalling the audit daemon. That should be a
manageable risk.

The userspace component will be released later in audit 0.7.2. When it
receives the TERM signal, it queries the kernel for shutdown information.
When it receives it, it writes the message and exits. The message looks
like this:

type=DAEMON msg=auditd(1114551182.000) auditd normal halt, sending pid=2650
uid=525, auditd pid=1685

Signed-off-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 19f04b049798..baa80760824c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -28,14 +28,16 @@
 #include <linux/elf.h>
 
 /* Request and reply types */
-#define AUDIT_GET      1000	/* Get status */
-#define AUDIT_SET      1001	/* Set status (enable/disable/auditd) */
-#define AUDIT_LIST     1002	/* List filtering rules */
-#define AUDIT_ADD      1003	/* Add filtering rule */
-#define AUDIT_DEL      1004	/* Delete filtering rule */
-#define AUDIT_USER     1005	/* Send a message from user-space */
-#define AUDIT_LOGIN    1006     /* Define the login id and informaiton */
-#define AUDIT_KERNEL   2000	/* Asynchronous audit record. NOT A REQUEST. */
+#define AUDIT_GET		1000	/* Get status */
+#define AUDIT_SET		1001	/* Set status (enable/disable/auditd) */
+#define AUDIT_LIST		1002	/* List filtering rules */
+#define AUDIT_ADD		1003	/* Add filtering rule */
+#define AUDIT_DEL		1004	/* Delete filtering rule */
+#define AUDIT_USER		1005	/* Send a message from user-space */
+#define AUDIT_LOGIN		1006	/* Define the login id and information */
+#define AUDIT_SIGNAL_INFO	1010	/* Get information about sender of signal*/
+
+#define AUDIT_KERNEL		2000	/* Asynchronous audit record. NOT A REQUEST. */
 
 /* Rule flags */
 #define AUDIT_PER_TASK 0x01	/* Apply rule at task creation (not syscall) */
@@ -161,6 +163,11 @@ struct audit_rule {		/* for AUDIT_LIST, AUDIT_ADD, and AUDIT_DEL */
 
 #ifdef __KERNEL__
 
+struct audit_sig_info {
+	uid_t		uid;
+	pid_t		pid;
+};
+
 struct audit_buffer;
 struct audit_context;
 struct inode;
@@ -190,6 +197,7 @@ extern void audit_get_stamp(struct audit_context *ctx,
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
 extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
+extern void audit_signal_info(int sig, struct task_struct *t);
 #else
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
@@ -200,6 +208,7 @@ extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mo
 #define audit_inode(n,i) do { ; } while (0)
 #define audit_get_loginuid(c) ({ -1; })
 #define audit_ipc_perms(q,u,g,m) ({ 0; })
+#define audit_signal_info(s,t) do { ; } while (0)
 #endif
 
 #ifdef CONFIG_AUDIT
-- 
cgit v1.2.2


From 197c69c6afd2deb7eec44040ff533d90d26c6161 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@osdl.org>
Date: Wed, 11 May 2005 10:54:05 +0100
Subject: Move ifdef CONFIG_AUDITSYSCALL to header

Remove code conditionally dependent on CONFIG_AUDITSYSCALL from audit.c.
Move these dependencies to audit.h with the rest.

Signed-off-by: Chris Wright <chrisw@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index baa80760824c..58c5589b531f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -192,7 +192,7 @@ extern void audit_inode(const char *name, const struct inode *inode);
 				/* Private API (for audit.c only) */
 extern int  audit_receive_filter(int type, int pid, int uid, int seq,
 				 void *data, uid_t loginuid);
-extern void audit_get_stamp(struct audit_context *ctx,
+extern int audit_get_stamp(struct audit_context *ctx,
 			    struct timespec *t, unsigned int *serial);
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
@@ -206,6 +206,8 @@ extern void audit_signal_info(int sig, struct task_struct *t);
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
 #define audit_inode(n,i) do { ; } while (0)
+#define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; })
+#define audit_get_stamp(c,t,s) ({ 0; })
 #define audit_get_loginuid(c) ({ -1; })
 #define audit_ipc_perms(q,u,g,m) ({ 0; })
 #define audit_signal_info(s,t) do { ; } while (0)
-- 
cgit v1.2.2


From c1b773d87eadc3972d697444127e89a7291769a2 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@osdl.org>
Date: Wed, 11 May 2005 10:55:10 +0100
Subject: Add audit_log_type

Add audit_log_type to allow callers to specify type and pid when logging.
Convert audit_log to wrapper around audit_log_type.  Could have
converted all audit_log callers directly, but common case is default
of type AUDIT_KERNEL and pid 0.  Update audit_log_start to take type
and pid values when creating a new audit_buffer.  Move sequences that
did audit_log_start, audit_log_format, audit_set_type, audit_log_end,
to simply call audit_log_type directly.  This obsoletes audit_set_type
and audit_set_pid, so remove them.

Signed-off-by: Chris Wright <chrisw@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 58c5589b531f..405332ebf3c6 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -216,11 +216,14 @@ extern void audit_signal_info(int sig, struct task_struct *t);
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
 				/* Public API */
-extern void		    audit_log(struct audit_context *ctx,
-				      const char *fmt, ...)
-			    __attribute__((format(printf,2,3)));
+#define audit_log(ctx, fmt, args...) \
+	audit_log_type(ctx, AUDIT_KERNEL, 0, fmt, ##args)
+extern void		    audit_log_type(struct audit_context *ctx, int type,
+				      int pid, const char *fmt, ...)
+			    __attribute__((format(printf,4,5)));
 
-extern struct audit_buffer *audit_log_start(struct audit_context *ctx);
+extern struct audit_buffer *audit_log_start(struct audit_context *ctx, int type,
+					    int pid);
 extern void		    audit_log_format(struct audit_buffer *ab,
 					     const char *fmt, ...)
 			    __attribute__((format(printf,2,3)));
@@ -240,8 +243,9 @@ extern void		    audit_send_reply(int pid, int seq, int type,
 					     void *payload, int size);
 extern void		    audit_log_lost(const char *message);
 #else
-#define audit_log(t,f,...) do { ; } while (0)
-#define audit_log_start(t) ({ NULL; })
+#define audit_log(c,f,...) do { ; } while (0)
+#define audit_log_type(c,t,p,f,...) do { ; } while (0)
+#define audit_log_start(c,t,p) ({ NULL; })
 #define audit_log_vformat(b,f,a) do { ; } while (0)
 #define audit_log_format(b,f,...) do { ; } while (0)
 #define audit_log_end(b) do { ; } while (0)
-- 
cgit v1.2.2


From 6f2f38128170814e151cfedf79532e19cd179567 Mon Sep 17 00:00:00 2001
From: Brad Campbell <brad@wasp.net.au>
Date: Thu, 12 May 2005 15:07:47 -0400
Subject: [PATCH] libata basic detection and errata for PATA->SATA bridges

This patch works around an issue with WD drives (and possibly others)
over SiL PATA->SATA Bridges on SATA controllers locking up with
transfers > 200 sectors.

Signed-off-by: Brad Campbell <brad@wasp.net.au>
---
 include/linux/ata.h    | 1 +
 include/linux/libata.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index f178894edd04..ca5fcadf9981 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -224,6 +224,7 @@ struct ata_taskfile {
 };
 
 #define ata_id_is_ata(id)	(((id)[0] & (1 << 15)) == 0)
+#define ata_id_is_sata(id)	((id)[93] == 0)
 #define ata_id_rahead_enabled(id) ((id)[85] & (1 << 6))
 #define ata_id_wcache_enabled(id) ((id)[85] & (1 << 5))
 #define ata_id_has_flush(id) ((id)[83] & (1 << 12))
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 505160ab472b..d33e70361a7d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -420,6 +420,7 @@ extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
 extern unsigned int ata_dev_classify(struct ata_taskfile *tf);
 extern void ata_dev_id_string(u16 *id, unsigned char *s,
 			      unsigned int ofs, unsigned int len);
+extern void ata_dev_config(struct ata_port *ap, unsigned int i);
 extern void ata_bmdma_setup (struct ata_queued_cmd *qc);
 extern void ata_bmdma_start (struct ata_queued_cmd *qc);
 extern void ata_bmdma_stop(struct ata_port *ap);
-- 
cgit v1.2.2


From 7d17c1d606f6e89778f05554ddea43791d5c92a0 Mon Sep 17 00:00:00 2001
From:  <tgraf@suug.ch>
Date: Thu, 12 May 2005 19:45:25 -0400
Subject: [netdrvrs] Use netif_carrier_* instead of IFF_RUNNING

---
 include/linux/if.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if.h b/include/linux/if.h
index d73a9d62f208..ce627d9092ef 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -33,7 +33,7 @@
 #define	IFF_LOOPBACK	0x8		/* is a loopback net		*/
 #define	IFF_POINTOPOINT	0x10		/* interface is has p-p link	*/
 #define	IFF_NOTRAILERS	0x20		/* avoid use of trailers	*/
-#define	IFF_RUNNING	0x40		/* resources allocated		*/
+#define	IFF_RUNNING	0x40		/* interface running and carrier ok */
 #define	IFF_NOARP	0x80		/* no ARP protocol		*/
 #define	IFF_PROMISC	0x100		/* receive all packets		*/
 #define	IFF_ALLMULTI	0x200		/* receive all multicast packets*/
-- 
cgit v1.2.2


From c04049939f88b29e235d2da217bce6e8ead44f32 Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Fri, 13 May 2005 18:17:42 +0100
Subject: AUDIT: Add message types to audit records

This patch adds more messages types to the audit subsystem so that audit
analysis is quicker, intuitive, and more useful.

Signed-off-by: Steve Grubb <sgrubb@redhat.com>
---
I forgot one type in the big patch. I need to add one for user space
originating SE Linux avc messages. This is used by dbus and nscd.

-Steve
---
Updated to 2.6.12-rc4-mm1.
-dwmw2

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 66 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 405332ebf3c6..1a15ba38c660 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -27,15 +27,53 @@
 #include <linux/sched.h>
 #include <linux/elf.h>
 
-/* Request and reply types */
+/* The netlink messages for the audit system is divided into blocks:
+ * 1000 - 1099 are for commanding the audit system
+ * 1100 - 1199 user space trusted application messages
+ * 1200 - 1299 messages internal to the audit daemon
+ * 1300 - 1399 audit event messages
+ * 1400 - 1499 SE Linux use
+ * 1500 - 1999 future use
+ * 2000 is for otherwise unclassified kernel audit messages
+ *
+ * Messages from 1000-1199 are bi-directional. 1200-1299 are exclusively user
+ * space. Anything over that is kernel --> user space communication.
+ */
 #define AUDIT_GET		1000	/* Get status */
 #define AUDIT_SET		1001	/* Set status (enable/disable/auditd) */
-#define AUDIT_LIST		1002	/* List filtering rules */
-#define AUDIT_ADD		1003	/* Add filtering rule */
-#define AUDIT_DEL		1004	/* Delete filtering rule */
-#define AUDIT_USER		1005	/* Send a message from user-space */
+#define AUDIT_LIST		1002	/* List syscall filtering rules */
+#define AUDIT_ADD		1003	/* Add syscall filtering rule */
+#define AUDIT_DEL		1004	/* Delete syscall filtering rule */
+#define AUDIT_USER		1005	/* Message from userspace -- deprecated */
 #define AUDIT_LOGIN		1006	/* Define the login id and information */
-#define AUDIT_SIGNAL_INFO	1010	/* Get information about sender of signal*/
+#define AUDIT_WATCH_INS		1007	/* Insert file/dir watch entry */
+#define AUDIT_WATCH_REM		1008	/* Remove file/dir watch entry */
+#define AUDIT_WATCH_LIST	1009	/* List all file/dir watches */
+#define AUDIT_SIGNAL_INFO	1010	/* Get info about sender of signal to auditd */
+
+#define AUDIT_USER_AUTH		1100	/* User space authentication */
+#define AUDIT_USER_ACCT		1101	/* User space acct change */
+#define AUDIT_USER_MGMT		1102	/* User space acct management */
+#define AUDIT_CRED_ACQ		1103	/* User space credential acquired */
+#define AUDIT_CRED_DISP		1104	/* User space credential disposed */
+#define AUDIT_USER_START	1105	/* User space session start */ 
+#define AUDIT_USER_END		1106	/* User space session end */
+#define AUDIT_USER_AVC		1107	/* User space avc message */
+ 
+#define AUDIT_DAEMON_START      1200    /* Daemon startup record */
+#define AUDIT_DAEMON_END        1201    /* Daemon normal stop record */
+#define AUDIT_DAEMON_ABORT      1202    /* Daemon error stop record */
+#define AUDIT_DAEMON_CONFIG     1203    /* Daemon config change */
+
+#define AUDIT_SYSCALL		1300	/* Syscall event */
+#define AUDIT_FS_WATCH		1301	/* Filesystem watch event */
+#define AUDIT_PATH		1302	/* Filname path information */
+#define AUDIT_IPC		1303	/* IPC record */
+#define AUDIT_SOCKET		1304	/* Socket record */
+#define AUDIT_CONFIG_CHANGE	1305	/* Audit system configuration change */
+
+#define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
+#define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
 
 #define AUDIT_KERNEL		2000	/* Asynchronous audit record. NOT A REQUEST. */
 
@@ -216,14 +254,11 @@ extern void audit_signal_info(int sig, struct task_struct *t);
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
 				/* Public API */
-#define audit_log(ctx, fmt, args...) \
-	audit_log_type(ctx, AUDIT_KERNEL, 0, fmt, ##args)
-extern void		    audit_log_type(struct audit_context *ctx, int type,
-				      int pid, const char *fmt, ...)
-			    __attribute__((format(printf,4,5)));
+extern void		    audit_log(struct audit_context *ctx, int type,
+				      const char *fmt, ...)
+			    __attribute__((format(printf,3,4)));
 
-extern struct audit_buffer *audit_log_start(struct audit_context *ctx, int type,
-					    int pid);
+extern struct audit_buffer *audit_log_start(struct audit_context *ctx,int type);
 extern void		    audit_log_format(struct audit_buffer *ab,
 					     const char *fmt, ...)
 			    __attribute__((format(printf,2,3)));
@@ -243,9 +278,8 @@ extern void		    audit_send_reply(int pid, int seq, int type,
 					     void *payload, int size);
 extern void		    audit_log_lost(const char *message);
 #else
-#define audit_log(c,f,...) do { ; } while (0)
-#define audit_log_type(c,t,p,f,...) do { ; } while (0)
-#define audit_log_start(c,t,p) ({ NULL; })
+#define audit_log(c,t,f,...) do { ; } while (0)
+#define audit_log_start(c,t) ({ NULL; })
 #define audit_log_vformat(b,f,a) do { ; } while (0)
 #define audit_log_format(b,f,...) do { ; } while (0)
 #define audit_log_end(b) do { ; } while (0)
-- 
cgit v1.2.2


From 23f32d18aa589e228c5a9e12e0d0c67c9b5bcdce Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Fri, 13 May 2005 18:35:15 +0100
Subject: AUDIT: Fix some spelling errors

I'm going through the kernel code and have a patch that corrects
several spelling errors in comments.

From: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 1a15ba38c660..51e5879af7fc 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -181,7 +181,7 @@ struct audit_message {
 
 struct audit_status {
 	__u32		mask;		/* Bit mask for valid entries */
-	__u32		enabled;	/* 1 = enabled, 0 = disbaled */
+	__u32		enabled;	/* 1 = enabled, 0 = disabled */
 	__u32		failure;	/* Failure-to-log action */
 	__u32		pid;		/* pid of auditd process */
 	__u32		rate_limit;	/* messages rate limit (per second) */
-- 
cgit v1.2.2


From a1365275e745bb0a173c918a52bcdfa6ce122f7e Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 5 May 2005 15:14:15 -0700
Subject: [PATCH] DM9000 network driver

This patch adds support for the davicom dm9000 network driver.  The dm9000
is found on some embedded arm boards such as the pimx1 or the scb9328.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>

diff -puN /dev/null drivers/net/dm9000.c
---
 include/linux/dm9000.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 include/linux/dm9000.h

(limited to 'include/linux')

diff --git a/include/linux/dm9000.h b/include/linux/dm9000.h
new file mode 100644
index 000000000000..0008e2ad0c9f
--- /dev/null
+++ b/include/linux/dm9000.h
@@ -0,0 +1,36 @@
+/* include/linux/dm9000.h
+ *
+ * Copyright (c) 2004 Simtec Electronics
+ *   Ben Dooks <ben@simtec.co.uk>
+ *
+ * Header file for dm9000 platform data
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+*/
+
+#ifndef __DM9000_PLATFORM_DATA
+#define __DM9000_PLATFORM_DATA __FILE__
+
+/* IO control flags */
+
+#define DM9000_PLATF_8BITONLY	(0x0001)
+#define DM9000_PLATF_16BITONLY	(0x0002)
+#define DM9000_PLATF_32BITONLY	(0x0004)
+
+/* platfrom data for platfrom device structure's platfrom_data field */
+
+struct dm9000_plat_data {
+	unsigned int	flags;
+
+	/* allow replacement IO routines */
+
+	void	(*inblk)(void __iomem *reg, void *data, int len);
+	void	(*outblk)(void __iomem *reg, void *data, int len);
+	void	(*dumpblk)(void __iomem *reg, int len);
+};
+
+#endif /* __DM9000_PLATFORM_DATA */
+
-- 
cgit v1.2.2


From b3dd65f958354226275522b5a64157834bdc5415 Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khc@pm.waw.pl>
Date: Thu, 21 Apr 2005 15:57:25 +0200
Subject: [PATCH] Generic HDLC update

The attached patch updates generic HDLC to version 1.18.
FR Cisco LMI production-tested. Please apply to Linux 2.6. Thanks.

Changes:
- doc updates
- added Cisco LMI support to Frame-Relay code
- cleaned hdlc_fr.c a bit, removed some orphaned #defines etc.
- fixed a problem with non-functional LMI in FR DCE mode.
- changed diagnostic messages to better conform to FR standards
- all protocols: information about carrier changes (DCD line) is now
  printed to kernel logs.

Signed-Off-By: Krzysztof Halasa <khc@pm.waw.pl>
---
 include/linux/hdlc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index 503194e62fe1..ed2927ef1ff7 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -1,7 +1,7 @@
 /*
  * Generic HDLC support routines for Linux
  *
- * Copyright (C) 1999-2003 Krzysztof Halasa <khc@pm.waw.pl>
+ * Copyright (C) 1999-2005 Krzysztof Halasa <khc@pm.waw.pl>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License
@@ -41,6 +41,7 @@
 #define LMI_NONE		1 /* No LMI, all PVCs are static */
 #define LMI_ANSI		2 /* ANSI Annex D */
 #define LMI_CCITT		3 /* ITU-T Annex A */
+#define LMI_CISCO		4 /* The "original" LMI, aka Gang of Four */
 
 #define HDLC_MAX_MTU 1500	/* Ethernet 1500 bytes */
 #define HDLC_MAX_MRU (HDLC_MAX_MTU + 10 + 14 + 4) /* for ETH+VLAN over FR */
@@ -89,6 +90,7 @@ typedef struct pvc_device_struct {
 		unsigned int deleted: 1;
 		unsigned int fecn: 1;
 		unsigned int becn: 1;
+		unsigned int bandwidth;	/* Cisco LMI reporting only */
 	}state;
 }pvc_device;
 
-- 
cgit v1.2.2


From 3ec3b2fba526ead2fa3f3d7c91924f39a0733749 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@shinybook.infradead.org>
Date: Tue, 17 May 2005 12:08:48 +0100
Subject: AUDIT: Capture sys_socketcall arguments and sockaddrs

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 51e5879af7fc..2f5dc60f8bbd 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -69,8 +69,9 @@
 #define AUDIT_FS_WATCH		1301	/* Filesystem watch event */
 #define AUDIT_PATH		1302	/* Filname path information */
 #define AUDIT_IPC		1303	/* IPC record */
-#define AUDIT_SOCKET		1304	/* Socket record */
+#define AUDIT_SOCKETCALL	1304	/* sys_socketcall arguments */
 #define AUDIT_CONFIG_CHANGE	1305	/* Audit system configuration change */
+#define AUDIT_SOCKADDR		1306	/* sockaddr copied as syscall arg */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
@@ -235,6 +236,8 @@ extern int audit_get_stamp(struct audit_context *ctx,
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
 extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
+extern int audit_socketcall(int nargs, unsigned long *args);
+extern int audit_sockaddr(int len, void *addr);
 extern void audit_signal_info(int sig, struct task_struct *t);
 #else
 #define audit_alloc(t) ({ 0; })
@@ -248,6 +251,8 @@ extern void audit_signal_info(int sig, struct task_struct *t);
 #define audit_get_stamp(c,t,s) ({ 0; })
 #define audit_get_loginuid(c) ({ -1; })
 #define audit_ipc_perms(q,u,g,m) ({ 0; })
+#define audit_socketcall(n,a) ({ 0; })
+#define audit_sockaddr(len, addr) ({ 0; })
 #define audit_signal_info(s,t) do { ; } while (0)
 #endif
 
-- 
cgit v1.2.2


From 209aba03243ee42a22f8df8d08aa9963f62aec64 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@shinybook.infradead.org>
Date: Wed, 18 May 2005 10:21:07 +0100
Subject: AUDIT: Treat all user messages identically.

It's silly to have to add explicit entries for new userspace messages
as we invent them. Just treat all messages in the user range the same.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 2f5dc60f8bbd..17ea5d522d81 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -51,14 +51,8 @@
 #define AUDIT_WATCH_LIST	1009	/* List all file/dir watches */
 #define AUDIT_SIGNAL_INFO	1010	/* Get info about sender of signal to auditd */
 
-#define AUDIT_USER_AUTH		1100	/* User space authentication */
-#define AUDIT_USER_ACCT		1101	/* User space acct change */
-#define AUDIT_USER_MGMT		1102	/* User space acct management */
-#define AUDIT_CRED_ACQ		1103	/* User space credential acquired */
-#define AUDIT_CRED_DISP		1104	/* User space credential disposed */
-#define AUDIT_USER_START	1105	/* User space session start */ 
-#define AUDIT_USER_END		1106	/* User space session end */
-#define AUDIT_USER_AVC		1107	/* User space avc message */
+#define AUDIT_FIRST_USER_MSG	1100	/* Userspace messages uninteresting to kernel */
+#define AUDIT_LAST_USER_MSG	1199
  
 #define AUDIT_DAEMON_START      1200    /* Daemon startup record */
 #define AUDIT_DAEMON_END        1201    /* Daemon normal stop record */
@@ -173,13 +167,6 @@
 #define AUDIT_ARCH_V850		(EM_V850|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_X86_64	(EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 
-#ifndef __KERNEL__
-struct audit_message {
-	struct nlmsghdr nlh;
-	char		data[1200];
-};
-#endif
-
 struct audit_status {
 	__u32		mask;		/* Bit mask for valid entries */
 	__u32		enabled;	/* 1 = enabled, 0 = disabled */
-- 
cgit v1.2.2


From 867d1191fca388a79e4bb500dd85a9e871c96b99 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sun, 24 Apr 2005 02:06:05 -0500
Subject: [SCSI] remove requeue feature from blk_insert_request()

blk_insert_request() has a unobivous feature of requeuing a
request setting REQ_SPECIAL|REQ_SOFTBARRIER.  SCSI midlayer
was the only user and as previous patches removed the usage,
remove the feature from blk_insert_request().  Only special
requests should be queued with blk_insert_request().  All
requeueing should go through blk_requeue_request().

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ef1afc178c0a..4a99b76c5a33 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -544,7 +544,7 @@ extern void blk_end_sync_rq(struct request *rq);
 extern void blk_attempt_remerge(request_queue_t *, struct request *);
 extern void __blk_attempt_remerge(request_queue_t *, struct request *);
 extern struct request *blk_get_request(request_queue_t *, int, int);
-extern void blk_insert_request(request_queue_t *, struct request *, int, void *, int);
+extern void blk_insert_request(request_queue_t *, struct request *, int, void *);
 extern void blk_requeue_request(request_queue_t *, struct request *);
 extern void blk_plug_device(request_queue_t *);
 extern int blk_remove_plug(request_queue_t *);
-- 
cgit v1.2.2


From daa6eda65a53e5addf86c6bc829129ff51b08bda Mon Sep 17 00:00:00 2001
From: Gerd Knorr <kraxel@bytesex.org>
Date: Tue, 10 May 2005 10:59:13 +0200
Subject: [SCSI] add scsi changer driver

This patch adds a device driver for scsi media changer devices.

Signed-off-by: Gerd Knorr <kraxel@bytesex.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/chio.h  | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/major.h |   1 +
 2 files changed, 169 insertions(+)
 create mode 100644 include/linux/chio.h

(limited to 'include/linux')

diff --git a/include/linux/chio.h b/include/linux/chio.h
new file mode 100644
index 000000000000..63035ae67e63
--- /dev/null
+++ b/include/linux/chio.h
@@ -0,0 +1,168 @@
+/*
+ * ioctl interface for the scsi media changer driver
+ */
+
+/* changer element types */
+#define CHET_MT   0	/* media transport element (robot) */
+#define CHET_ST   1	/* storage element (media slots) */
+#define CHET_IE   2	/* import/export element */
+#define CHET_DT   3	/* data transfer element (tape/cdrom/whatever) */
+#define CHET_V1   4	/* vendor specific #1 */
+#define CHET_V2   5	/* vendor specific #2 */
+#define CHET_V3   6	/* vendor specific #3 */
+#define CHET_V4   7	/* vendor specific #4 */
+
+
+/*
+ * CHIOGPARAMS
+ *    query changer properties
+ *
+ * CHIOVGPARAMS
+ *    query vendor-specific element types
+ *
+ *    accessing elements works by specifing type and unit of the element.
+ *    for eample, storage elements are addressed with type = CHET_ST and
+ *    unit = 0 .. cp_nslots-1
+ *
+ */
+struct changer_params {
+	int cp_curpicker;  /* current transport element */
+	int cp_npickers;   /* number of transport elements      (CHET_MT) */
+	int cp_nslots;     /* number of storage elements        (CHET_ST) */
+	int cp_nportals;   /* number of import/export elements  (CHET_IE) */
+	int cp_ndrives;    /* number of data transfer elements  (CHET_DT) */
+};
+struct changer_vendor_params {
+	int  cvp_n1;       /* number of vendor specific elems   (CHET_V1) */
+	char cvp_label1[16];
+	int  cvp_n2;       /* number of vendor specific elems   (CHET_V2) */
+	char cvp_label2[16];
+	int  cvp_n3;       /* number of vendor specific elems   (CHET_V3) */
+	char cvp_label3[16];
+	int  cvp_n4;       /* number of vendor specific elems   (CHET_V4) */
+	char cvp_label4[16];
+	int  reserved[8];
+};
+
+
+/*
+ * CHIOMOVE
+ *    move a medium from one element to another
+ */
+struct changer_move {
+	int cm_fromtype;	/* type/unit of source element */
+	int cm_fromunit;	
+	int cm_totype;	/* type/unit of destination element */
+	int cm_tounit;
+	int cm_flags;
+};
+#define CM_INVERT   1   /* flag: rotate media (for double-sided like MOD) */
+
+
+/*
+ * CHIOEXCHANGE
+ *    move one medium from element #1 to element #2,
+ *    and another one from element #2 to element #3.
+ *    element #1 and #3 are allowed to be identical.
+ */
+struct changer_exchange {
+	int ce_srctype;	    /* type/unit of element #1 */
+	int ce_srcunit;
+	int ce_fdsttype;    /* type/unit of element #2 */
+	int ce_fdstunit;
+	int ce_sdsttype;    /* type/unit of element #3 */
+	int ce_sdstunit;
+	int ce_flags;
+};
+#define CE_INVERT1   1
+#define CE_INVERT2   2
+
+
+/*
+ * CHIOPOSITION
+ *    move the transport element (robot arm) to a specific element.
+ */
+struct changer_position {
+	int cp_type;
+	int cp_unit;
+	int cp_flags;
+};
+#define CP_INVERT   1
+
+
+/*
+ * CHIOGSTATUS
+ *    get element status for all elements of a specific type
+ */
+struct changer_element_status {
+	int             ces_type;
+	unsigned char   *ces_data;
+};
+#define CESTATUS_FULL     0x01 /* full */
+#define CESTATUS_IMPEXP   0x02	/* media was imported (inserted by sysop) */
+#define CESTATUS_EXCEPT   0x04	/* error condition */
+#define CESTATUS_ACCESS   0x08	/* access allowed */
+#define CESTATUS_EXENAB   0x10	/* element can export media */
+#define CESTATUS_INENAB   0x20	/* element can import media */
+
+
+/*
+ * CHIOGELEM
+ *    get more detailed status informtion for a single element
+ */
+struct changer_get_element {
+	int	cge_type;	 /* type/unit */
+	int	cge_unit;
+	int	cge_status;      /* status */
+	int     cge_errno;       /* errno */
+	int     cge_srctype;     /* source element of the last move/exchange */
+	int     cge_srcunit;
+	int     cge_id;          /* scsi id  (for data transfer elements) */
+	int     cge_lun;         /* scsi lun (for data transfer elements) */
+	char    cge_pvoltag[36]; /* primary volume tag */
+	char    cge_avoltag[36]; /* alternate volume tag */
+	int     cge_flags;
+};
+/* flags */
+#define CGE_ERRNO     0x01       /* errno available       */
+#define CGE_INVERT    0x02       /* media inverted        */
+#define CGE_SRC       0x04       /* media src available   */
+#define CGE_IDLUN     0x08       /* ID+LUN available      */
+#define CGE_PVOLTAG   0x10       /* primary volume tag available */
+#define CGE_AVOLTAG   0x20       /* alternate volume tag available */
+
+
+/*
+ * CHIOSVOLTAG
+ *    set volume tag
+ */
+struct changer_set_voltag {
+	int	csv_type;	 /* type/unit */
+	int	csv_unit;
+	char    csv_voltag[36];  /* volume tag */
+	int     csv_flags;
+};
+#define CSV_PVOLTAG   0x01       /* primary volume tag */
+#define CSV_AVOLTAG   0x02       /* alternate volume tag */
+#define CSV_CLEARTAG  0x04       /* clear volume tag */
+
+/* ioctls */
+#define CHIOMOVE       _IOW('c', 1,struct changer_move)
+#define CHIOEXCHANGE   _IOW('c', 2,struct changer_exchange)
+#define CHIOPOSITION   _IOW('c', 3,struct changer_position)
+#define CHIOGPICKER    _IOR('c', 4,int)                        /* not impl. */
+#define CHIOSPICKER    _IOW('c', 5,int)                        /* not impl. */
+#define CHIOGPARAMS    _IOR('c', 6,struct changer_params)
+#define CHIOGSTATUS    _IOW('c', 8,struct changer_element_status)
+#define CHIOGELEM      _IOW('c',16,struct changer_get_element)
+#define CHIOINITELEM   _IO('c',17)
+#define CHIOSVOLTAG    _IOW('c',18,struct changer_set_voltag)
+#define CHIOGVPARAMS   _IOR('c',19,struct changer_vendor_params)
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/include/linux/major.h b/include/linux/major.h
index 4b62c42b842c..e36a46702d94 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h
@@ -100,6 +100,7 @@
 #define I2O_MAJOR		80	/* 80->87 */
 
 #define SHMIQ_MAJOR		85   /* Linux/mips, SGI /dev/shmiq */
+#define SCSI_CHANGER_MAJOR      86
 
 #define IDE6_MAJOR		88
 #define IDE7_MAJOR		89
-- 
cgit v1.2.2


From 011161051bbc25f7f8b7df059dbd934c534443f0 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Sat, 21 May 2005 00:15:52 +0100
Subject: AUDIT: Avoid sleeping function in SElinux AVC audit.

This patch changes the SELinux AVC to defer logging of paths to the audit
framework upon syscall exit, by saving a reference to the (dentry,vfsmount)
pair in an auxiliary audit item on the current audit context for processing
by audit_log_exit.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 17ea5d522d81..4b7caf0c6e10 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -69,6 +69,7 @@
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
+#define AUDIT_AVC_PATH		1402	/* dentry, vfsmount pair from avc */
 
 #define AUDIT_KERNEL		2000	/* Asynchronous audit record. NOT A REQUEST. */
 
@@ -225,6 +226,7 @@ extern uid_t audit_get_loginuid(struct audit_context *ctx);
 extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
+extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt);
 extern void audit_signal_info(int sig, struct task_struct *t);
 #else
 #define audit_alloc(t) ({ 0; })
@@ -240,6 +242,7 @@ extern void audit_signal_info(int sig, struct task_struct *t);
 #define audit_ipc_perms(q,u,g,m) ({ 0; })
 #define audit_socketcall(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
+#define audit_avc_path(dentry, mnt) ({ 0; })
 #define audit_signal_info(s,t) do { ; } while (0)
 #endif
 
-- 
cgit v1.2.2


From bfb4496e7239c9132d732a65cdcf3d6a7431ad1a Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@shinybook.infradead.org>
Date: Sat, 21 May 2005 21:08:09 +0100
Subject: AUDIT: Assign serial number to non-syscall messages

Move audit_serial() into audit.c and use it to generate serial numbers
on messages even when there is no audit context from syscall auditing.
This allows us to disambiguate audit records when more than one is
generated in the same millisecond.

Based on a patch by Steve Grubb after he observed the problem.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 4b7caf0c6e10..3278ddf41ce6 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -219,8 +219,9 @@ extern void audit_inode(const char *name, const struct inode *inode);
 				/* Private API (for audit.c only) */
 extern int  audit_receive_filter(int type, int pid, int uid, int seq,
 				 void *data, uid_t loginuid);
-extern int audit_get_stamp(struct audit_context *ctx,
-			    struct timespec *t, unsigned int *serial);
+extern unsigned int audit_serial(void);
+extern void auditsc_get_stamp(struct audit_context *ctx,
+			      struct timespec *t, unsigned int *serial);
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
 extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
@@ -237,7 +238,7 @@ extern void audit_signal_info(int sig, struct task_struct *t);
 #define audit_putname(n) do { ; } while (0)
 #define audit_inode(n,i) do { ; } while (0)
 #define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; })
-#define audit_get_stamp(c,t,s) ({ 0; })
+#define auditsc_get_stamp(c,t,s) do { BUG(); } while (0)
 #define audit_get_loginuid(c) ({ -1; })
 #define audit_ipc_perms(q,u,g,m) ({ 0; })
 #define audit_socketcall(n,a) ({ 0; })
-- 
cgit v1.2.2


From aa8f0dc6c3dbf1cf3ff58f3e945c981be134814d Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Thu, 26 May 2005 21:54:27 -0400
Subject: libata: Fix use-after-iounmap

Jens Axboe pointed out that the iounmap() call in libata was occurring
too early, and some drivers (ahci, probably others) were using ioremap'd
memory after it had been unmapped.

The patch should address that problem by way of improving the libata
driver API:

* move ->host_stop() call after all ->port_stop() calls have occurred.

* create default helper function ata_host_stop(), and move iounmap()
call there.

* add ->host_stop_prewalk() hook, use it in sata_qstor.c (hi Mark).
sata_qstor appears to require the host-stop-before-port-stop ordering
that existed prior to applying the attached patch.
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1f7e2039a04e..e74f301e9bae 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -410,6 +410,7 @@ extern u8 ata_chk_err(struct ata_port *ap);
 extern void ata_exec_command(struct ata_port *ap, struct ata_taskfile *tf);
 extern int ata_port_start (struct ata_port *ap);
 extern void ata_port_stop (struct ata_port *ap);
+extern void ata_host_stop (struct ata_host_set *host_set);
 extern irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs);
 extern void ata_qc_prep(struct ata_queued_cmd *qc);
 extern int ata_qc_issue_prot(struct ata_queued_cmd *qc);
-- 
cgit v1.2.2


From 8f37d47c9bf74cb48692691086b482e315d07f40 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@shinybook.infradead.org>
Date: Fri, 27 May 2005 12:17:28 +0100
Subject: AUDIT: Record working directory when syscall arguments are pathnames

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 include/linux/audit.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3278ddf41ce6..bf2ad3ba72eb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -61,11 +61,12 @@
 
 #define AUDIT_SYSCALL		1300	/* Syscall event */
 #define AUDIT_FS_WATCH		1301	/* Filesystem watch event */
-#define AUDIT_PATH		1302	/* Filname path information */
+#define AUDIT_PATH		1302	/* Filename path information */
 #define AUDIT_IPC		1303	/* IPC record */
 #define AUDIT_SOCKETCALL	1304	/* sys_socketcall arguments */
 #define AUDIT_CONFIG_CHANGE	1305	/* Audit system configuration change */
 #define AUDIT_SOCKADDR		1306	/* sockaddr copied as syscall arg */
+#define AUDIT_CWD		1307	/* Current working directory */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
-- 
cgit v1.2.2


From 668d1e6093110f7534e661e2ff43d54c74659b6d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 28 May 2005 02:11:12 -0500
Subject: Input: This patch adds dummy gameport_register_port,
 gameport_unregister_port and gameport_set_phys functions to gameport.h for
 the case when a driver can't use gameport.

This fixes the compilation of some OSS drivers with GAMEPORT=n without
the need to #if inside every single driver.

This patch also removes the non-working and now obsolete SOUND_GAMEPORT.

This patch is also an alternative solution for ALSA drivers with similar
problems (but #if's inside the drivers might have the advantage of
saving some more bytes of gameport is not available).

The only user-visible change is that for GAMEPORT=m the affected OSS
drivers are now allowed to be built statically (but they won't have
gameport support).

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/gameport.h | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index b1272f822cfa..cd623eccdbea 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -67,6 +67,8 @@ int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mo
 void gameport_close(struct gameport *gameport);
 void gameport_rescan(struct gameport *gameport);
 
+#if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE))
+
 void __gameport_register_port(struct gameport *gameport, struct module *owner);
 static inline void gameport_register_port(struct gameport *gameport)
 {
@@ -75,6 +77,29 @@ static inline void gameport_register_port(struct gameport *gameport)
 
 void gameport_unregister_port(struct gameport *gameport);
 
+void gameport_set_phys(struct gameport *gameport, const char *fmt, ...)
+	__attribute__ ((format (printf, 2, 3)));
+
+#else
+
+static inline void gameport_register_port(struct gameport *gameport)
+{
+	return;
+}
+
+static inline void gameport_unregister_port(struct gameport *gameport)
+{
+	return;
+}
+
+static inline void gameport_set_phys(struct gameport *gameport,
+				     const char *fmt, ...)
+{
+	return;
+}
+
+#endif
+
 static inline struct gameport *gameport_allocate_port(void)
 {
 	struct gameport *gameport = kcalloc(1, sizeof(struct gameport), GFP_KERNEL);
@@ -92,9 +117,6 @@ static inline void gameport_set_name(struct gameport *gameport, const char *name
 	strlcpy(gameport->name, name, sizeof(gameport->name));
 }
 
-void gameport_set_phys(struct gameport *gameport, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-
 /*
  * Use the following fucntions to manipulate gameport's per-port
  * driver-specific data.
-- 
cgit v1.2.2


From 8f28e8fa46625310102aea06fac61ba04c8b5b88 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Sat, 28 May 2005 15:52:02 -0700
Subject: [PATCH] irq code: Add coherence test for PREEMPT_ACTIVE

After porting this fixlet to UML:

http://linux.bkbits.net:8080/linux-2.5/cset@41791ab52lfMuF2i3V-eTIGRBbDYKQ

, I've also added a warning which should refuse compilation with insane values
for PREEMPT_ACTIVE...  maybe we should simply move PREEMPT_ACTIVE out of
architectures using GENERIC_IRQS.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hardirq.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index ebc712e91066..8336dba18971 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -43,13 +43,17 @@
 #define __IRQ_MASK(x)	((1UL << (x))-1)
 
 #define PREEMPT_MASK	(__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
-#define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
 #define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
 
 #define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)
 #define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
 #define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
 
+#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS))
+#error PREEMPT_ACTIVE is too low!
+#endif
+
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
 #define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
-- 
cgit v1.2.2


From d8a33ac435c43a1a404b2ec560ef1d1536710c36 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Sun, 29 May 2005 14:13:47 -0700
Subject: [BRIDGE]: features change notification

Resend of earlier patch (no changes) from Catalin used to provide
device feature change notification.

Signed-off-by: Catalin BOIE <catab at umbrella.ro>
Acked-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 include/linux/notifier.h  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b25bd02720d3..d8c65ecef9d9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -913,6 +913,7 @@ extern void		dev_mc_discard(struct net_device *dev);
 extern void		dev_set_promiscuity(struct net_device *dev, int inc);
 extern void		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
+extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(const char *name);
 extern void		dev_mcast_init(void);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 9303a003e9ab..5937dd6053c3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -56,6 +56,7 @@ extern int notifier_call_chain(struct notifier_block **n, unsigned long val, voi
 #define NETDEV_CHANGEADDR	0x0008
 #define NETDEV_GOING_DOWN	0x0009
 #define NETDEV_CHANGENAME	0x000A
+#define NETDEV_FEAT_CHANGE	0x000B
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
-- 
cgit v1.2.2


From 81e8157583c559c27aac75c708d40a35f563d734 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Sun, 29 May 2005 14:14:35 -0700
Subject: [BRIDGE]: make dev->features unsigned

The features field in netdevice is really a bitmask, and bitmask's should
be unsigned.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d8c65ecef9d9..470af8c1a4a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -401,7 +401,7 @@ struct net_device
 	} reg_state;
 
 	/* Net device features */
-	int			features;
+	unsigned long		features;
 #define NETIF_F_SG		1	/* Scatter/gather IO. */
 #define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
 #define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
-- 
cgit v1.2.2


From 8f937c6099858eee15fae14009dcbd05177fa91d Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Sun, 29 May 2005 20:23:46 -0700
Subject: [IPV4]: Primary and secondary addresses

Add an option to make secondary IP addresses get promoted
when primary IP addresses are removed from the device.
It defaults to off to preserve existing behavior.

Signed-off-by: Harald Welte <laforge@gnumonks.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 2 ++
 include/linux/sysctl.h     | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 6fafb27877a7..7e1e15f934f3 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -29,6 +29,7 @@ struct ipv4_devconf
 	int	no_xfrm;
 	int	no_policy;
 	int	force_igmp_version;
+	int	promote_secondaries;
 	void	*sysctl;
 };
 
@@ -71,6 +72,7 @@ struct in_device
 #define IN_DEV_SEC_REDIRECTS(in_dev)	(ipv4_devconf.secure_redirects || (in_dev)->cnf.secure_redirects)
 #define IN_DEV_IDTAG(in_dev)		((in_dev)->cnf.tag)
 #define IN_DEV_MEDIUM_ID(in_dev)	((in_dev)->cnf.medium_id)
+#define IN_DEV_PROMOTE_SECONDARIES(in_dev)	(ipv4_devconf.promote_secondaries || (in_dev)->cnf.promote_secondaries)
 
 #define IN_DEV_RX_REDIRECTS(in_dev) \
 	((IN_DEV_FORWARD(in_dev) && \
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 772998147e3e..23032d9d6071 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -399,6 +399,7 @@ enum
 	NET_IPV4_CONF_FORCE_IGMP_VERSION=17,
 	NET_IPV4_CONF_ARP_ANNOUNCE=18,
 	NET_IPV4_CONF_ARP_IGNORE=19,
+	NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
 	__NET_IPV4_CONF_MAX
 };
 
-- 
cgit v1.2.2


From 79165121bc09c209451487d977df910c4ff6fc94 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Sun, 29 May 2005 20:24:30 -0700
Subject: [NET]: Add is_multicast_ether_addr() in include/linux/etherdevice.h

This patch adds is_multicast_ether_addr() to go along with
is_valid_ether_addr() and friends. It then changes
is_valid_ether_addr() to use the new macro, and fixes up the comment
on that function to move implementation details out of the API doco.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 220748b7abea..a1478258d002 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -55,19 +55,33 @@ static inline int is_zero_ether_addr(const u8 *addr)
 	return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
 }
 
+/**
+ * is_multicast_ether_addr - Determine if the given Ethernet address is a
+ * multicast address.
+ *
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Return true if the address is a multicast address.
+ */
+static inline int is_multicast_ether_addr(const u8 *addr)
+{
+	return addr[0] & 0x01;
+}
+
 /**
  * is_valid_ether_addr - Determine if the given Ethernet address is valid
  * @addr: Pointer to a six-byte array containing the Ethernet address
  *
  * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not
- * a multicast address, and is not FF:FF:FF:FF:FF:FF.  The multicast
- * and FF:FF:... tests are combined into the single test "!(addr[0]&1)".
+ * a multicast address, and is not FF:FF:FF:FF:FF:FF.
  *
  * Return true if the address is valid.
  */
 static inline int is_valid_ether_addr(const u8 *addr)
 {
-	return !(addr[0]&1) && !is_zero_ether_addr(addr);
+	/* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to
+	 * explicitly check for it here. */
+	return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr);
 }
 
 /**
@@ -83,6 +97,6 @@ static inline void random_ether_addr(u8 *addr)
 	addr [0] &= 0xfe;	/* clear multicast bit */
 	addr [0] |= 0x02;	/* set local assignment bit (IEEE802) */
 }
-#endif
+#endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_ETHERDEVICE_H */
-- 
cgit v1.2.2


From 69f6a0fafcdf0bfe85af182695d6d38f80f9d549 Mon Sep 17 00:00:00 2001
From: Jon Mason <jdmason@us.ibm.com>
Date: Sun, 29 May 2005 20:27:24 -0700
Subject: [NET]: Add ethtool support for NETIF_F_HW_CSUM.

Signed-off-by: Jon Mason <jdmason@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c85b210490ea..a0ab26aab450 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -256,6 +256,7 @@ struct net_device;
 u32 ethtool_op_get_link(struct net_device *dev);
 u32 ethtool_op_get_tx_csum(struct net_device *dev);
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data);
+int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data);
 u32 ethtool_op_get_sg(struct net_device *dev);
 int ethtool_op_set_sg(struct net_device *dev, u32 data);
 u32 ethtool_op_get_tso(struct net_device *dev);
-- 
cgit v1.2.2


From af00f9811e0ccbd3db84ddc4cffb0da942653393 Mon Sep 17 00:00:00 2001
From: Andy Currid <acurrid@nvidia.com>
Date: Mon, 23 May 2005 08:55:45 -0700
Subject: [PATCH] PCI: amd74xx patch for new NVIDIA device IDs

Here's the 2.6 amd74xx patch for NVIDIA MCP51.

Signed-off-by: Andy Currid <acurrid@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/pci_ids.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 7ccbc2e4272c..b0d6134e1ee6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1230,6 +1230,12 @@
 #define PCI_DEVICE_ID_NVIDIA_QUADRO4_900XGL	0x0258
 #define PCI_DEVICE_ID_NVIDIA_QUADRO4_750XGL	0x0259
 #define PCI_DEVICE_ID_NVIDIA_QUADRO4_700XGL	0x025B
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE	0x0265
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA	0x0266
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2	0x0267
+#define PCI_DEVICE_ID_NVIDIA_NVENET_12		0x0268
+#define PCI_DEVICE_ID_NVIDIA_NVENET_13		0x0269
+#define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO	0x026B
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800	0x0280
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800_8X    0x0281
 #define PCI_DEVICE_ID_NVIDIA_GEFORCE4_TI_4800SE     0x0282
-- 
cgit v1.2.2


From b53cc6ead046093477ec7a3354d620337101ea5b Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 31 May 2005 19:03:47 -0700
Subject: [CPUFREQ] fix up comment in cpufreq.h

Fix up comment in cpufreq.h stating transition latency should be passed
in microseconds -- it was decided long ago to switch to nanoseconds.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 include/linux/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index f21af067d015..927daa86c9b3 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -49,7 +49,7 @@ int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list);
 /* Frequency values here are CPU kHz so that hardware which doesn't run 
  * with some frequencies can complain without having to guess what per 
  * cent / per mille means. 
- * Maximum transition latency is in microseconds - if it's unknown,
+ * Maximum transition latency is in nanoseconds - if it's unknown,
  * CPUFREQ_ETERNAL shall be used.
  */
 
-- 
cgit v1.2.2


From 0baab86b00cdf9785ac2bb2ce1ab63995b3866ca Mon Sep 17 00:00:00 2001
From: Edward Falk <efalk@google.com>
Date: Thu, 2 Jun 2005 18:17:13 -0400
Subject: libata: update inline source docs

---
 include/linux/libata.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1f7e2039a04e..ad410590664f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -466,12 +466,34 @@ static inline u8 ata_chk_status(struct ata_port *ap)
 	return ap->ops->check_status(ap);
 }
 
+
+/**
+ *	ata_pause - Flush writes and pause 400 nanoseconds.
+ *	@ap: Port to wait for.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+
 static inline void ata_pause(struct ata_port *ap)
 {
 	ata_altstatus(ap);
 	ndelay(400);
 }
 
+
+/**
+ *	ata_busy_wait - Wait for a port status register
+ *	@ap: Port to wait for.
+ *
+ *	Waits up to max*10 microseconds for the selected bits in the port's
+ *	status register to be cleared.
+ *	Returns final value of status register.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+
 static inline u8 ata_busy_wait(struct ata_port *ap, unsigned int bits,
 			       unsigned int max)
 {
@@ -486,6 +508,18 @@ static inline u8 ata_busy_wait(struct ata_port *ap, unsigned int bits,
 	return status;
 }
 
+
+/**
+ *	ata_wait_idle - Wait for a port to be idle.
+ *	@ap: Port to wait for.
+ *
+ *	Waits up to 10ms for port's BUSY and DRQ signals to clear.
+ *	Returns final value of status register.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+
 static inline u8 ata_wait_idle(struct ata_port *ap)
 {
 	u8 status = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
@@ -524,6 +558,18 @@ static inline void ata_tf_init(struct ata_port *ap, struct ata_taskfile *tf, uns
 		tf->device = ATA_DEVICE_OBS | ATA_DEV1;
 }
 
+
+/**
+ *	ata_irq_on - Enable interrupts on a port.
+ *	@ap: Port on which interrupts are enabled.
+ *
+ *	Enable interrupts on a legacy IDE device using MMIO or PIO,
+ *	wait for idle, clear any pending interrupts.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+
 static inline u8 ata_irq_on(struct ata_port *ap)
 {
 	struct ata_ioports *ioaddr = &ap->ioaddr;
@@ -543,6 +589,18 @@ static inline u8 ata_irq_on(struct ata_port *ap)
 	return tmp;
 }
 
+
+/**
+ *	ata_irq_ack - Acknowledge a device interrupt.
+ *	@ap: Port on which interrupts are enabled.
+ *
+ *	Wait up to 10 ms for legacy IDE device to become idle (BUSY
+ *	or BUSY+DRQ clear).  Obtain dma status and port status from
+ *	device.  Clear the interrupt.  Return port status.
+ *
+ *	LOCKING:
+ */
+
 static inline u8 ata_irq_ack(struct ata_port *ap, unsigned int chk_drq)
 {
 	unsigned int bits = chk_drq ? ATA_BUSY | ATA_DRQ : ATA_BUSY;
-- 
cgit v1.2.2


From b597ef4712c05c962640a655386a7f06cc1a1fbc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 2 Jun 2005 16:36:00 -0700
Subject: [NET]: Fix locking in shaper driver.

 o use a semaphore instead of an opencoded and racy lock
 o move locking out of shaper_kick and into the callers - most just
   released the lock before calling shaper_kick
 o remove in_interrupt() tests.  from ->close we can always block, from
   ->hard_start_xmit and timer context never

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_shaper.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_shaper.h b/include/linux/if_shaper.h
index 0485b256d043..004e6f09a6e2 100644
--- a/include/linux/if_shaper.h
+++ b/include/linux/if_shaper.h
@@ -23,7 +23,7 @@ struct shaper
 	__u32 shapeclock;
 	unsigned long recovery;	/* Time we can next clock a packet out on
 				   an empty queue */
-        unsigned long locked;
+	struct semaphore sem;
         struct net_device_stats stats;
 	struct net_device *dev;
 	int  (*hard_start_xmit) (struct sk_buff *skb,
@@ -38,7 +38,6 @@ struct shaper
 	int (*hard_header_cache)(struct neighbour *neigh, struct hh_cache *hh);
 	void (*header_cache_update)(struct hh_cache *hh, struct net_device *dev, unsigned char *  haddr);
 	struct net_device_stats* (*get_stats)(struct net_device *dev);
-	wait_queue_head_t  wait_queue;
 	struct timer_list timer;
 };
 
-- 
cgit v1.2.2


From 5ba0eac6e0b7e2889649a1105d97c600595e2bb1 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@suse.cz>
Date: Thu, 2 Jun 2005 16:48:05 -0700
Subject: [NET]: Fix HH_DATA_OFF.

When the hardware header size is a multiple of HH_DATA_MOD, HH_DATA_OFF()
incorrectly returns HH_DATA_MOD (instead of 0). This affects ieee80211 layer
as 802.11 header is 32 bytes long.

Signed-off-by: Jiri Benc <jbenc@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 470af8c1a4a0..ba5d1236aa17 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -204,7 +204,7 @@ struct hh_cache
 	/* cached hardware header; allow for machine alignment needs.        */
 #define HH_DATA_MOD	16
 #define HH_DATA_OFF(__len) \
-	(HH_DATA_MOD - ((__len) & (HH_DATA_MOD - 1)))
+	(HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1))
 #define HH_DATA_ALIGN(__len) \
 	(((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1))
 	unsigned long	hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
-- 
cgit v1.2.2


From 719df469cb51199316ae2a11c75a8046be34b899 Mon Sep 17 00:00:00 2001
From: Roman Kagan <rkagan@mail.ru>
Date: Fri, 6 May 2005 00:55:56 +0400
Subject: [PATCH] USB: update urb documentation

On Wed, May 04, 2005 at 01:37:30PM -0700, David Brownell wrote:
> On Wednesday 04 May 2005 12:19 pm, Roman Kagan wrote:
> > struct urb {
> > 	/* private, usb core and host controller only fields in the urb */
> > 	...
> > 	struct list_head urb_list;	/* list pointer to all active urbs */
> > 	...
> > };
> >
> > Is it safe to use it for driver's purposes when the driver owns the urb,
> > that is, starting from the completion routine until the urb is submitted
> > with usb_submit_urb()?
>
> Right now, it should be.

Great!  FWIW I've briefly tested a modified version of usbatm using
the list head in struct urb instead of creating a wrapper struct, and I
haven't seen any failures yet.  So I tend to believe that your "should
be" actually means "is" :)

> > If it is, can it be guaranteed in future, e.g.
> > by moving the list head into the public section of struct urb?
>
> In fact I'm not sure why it ever got called "private" to usbcore/hcds.
> I thought the idea was that it should be like urb->status, reserved for
> whoever controls the URB.

OK then how about the following (essentially documentation) patch?

Signed-off-by: Roman Kagan <rkagan@mail.ru>
Acked-by: David Brownell <david-b@pacbell.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 41d1a644c9d4..2d1ac5058534 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -796,6 +796,10 @@ typedef void (*usb_complete_t)(struct urb *, struct pt_regs *);
  * of the iso_frame_desc array, and the number of errors is reported in
  * error_count.  Completion callbacks for ISO transfers will normally
  * (re)submit URBs to ensure a constant transfer rate.
+ *
+ * Note that even fields marked "public" should not be touched by the driver
+ * when the urb is owned by the hcd, that is, since the call to
+ * usb_submit_urb() till the entry into the completion routine.
  */
 struct urb
 {
@@ -803,12 +807,12 @@ struct urb
 	struct kref kref;		/* reference count of the URB */
 	spinlock_t lock;		/* lock for the URB */
 	void *hcpriv;			/* private data for host controller */
-	struct list_head urb_list;	/* list pointer to all active urbs */
 	int bandwidth;			/* bandwidth for INT/ISO request */
 	atomic_t use_count;		/* concurrent submissions counter */
 	u8 reject;			/* submissions will fail */
 
 	/* public, documented fields in the urb that can be used by drivers */
+	struct list_head urb_list;	/* list head for use by the urb owner */
 	struct usb_device *dev; 	/* (in) pointer to associated device */
 	unsigned int pipe;		/* (in) pipe information */
 	int status;			/* (return) non-ISO status */
-- 
cgit v1.2.2


From 3f5948fa2cbbda1261eec9a39ef3004b3caf73fb Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@napali.hpl.hp.com>
Date: Mon, 6 Jun 2005 15:50:09 -0700
Subject: [PATCH] Include <linux/config.h> before testing CONFIG_ACPI

I'm not sure why this issue is suddenly showing, but without this
patchlet, the zx1 config won't compile anymore (e.g., to see the
compilation-error, look for "***" in [1]).

[1] http://www.gelato.unsw.edu.au/kerncomp/results//2005-06-06-17-00/zx1_defconfig-log.html

Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/acpi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d5a55bdb9c3c..b123cc08773d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -25,6 +25,8 @@
 #ifndef _LINUX_ACPI_H
 #define _LINUX_ACPI_H
 
+#include <linux/config.h>
+
 #ifdef	CONFIG_ACPI
 
 #ifndef _LINUX
-- 
cgit v1.2.2


From d0de98fa16169562bd74913c6c9b3857f9065c79 Mon Sep 17 00:00:00 2001
From: Alan Hourihane <alanh@fairlite.demon.co.uk>
Date: Tue, 31 May 2005 19:50:49 +0100
Subject: [PATCH] i945G patch for agpgart

Attached is a small patch for i945G support against 2.6.11.11.

From: Alan Hourihane <alanh@fairlite.demon.co.uk>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b0d6134e1ee6..18f734ec9181 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2382,6 +2382,8 @@
 #define PCI_DEVICE_ID_INTEL_82915G_IG	0x2582
 #define PCI_DEVICE_ID_INTEL_82915GM_HB	0x2590
 #define PCI_DEVICE_ID_INTEL_82915GM_IG	0x2592
+#define PCI_DEVICE_ID_INTEL_82945G_HB	0x2770
+#define PCI_DEVICE_ID_INTEL_82945G_IG	0x2772
 #define PCI_DEVICE_ID_INTEL_ICH6_0	0x2640
 #define PCI_DEVICE_ID_INTEL_ICH6_1	0x2641
 #define PCI_DEVICE_ID_INTEL_ICH6_2	0x2642
-- 
cgit v1.2.2


From 6d1cfbab4de64f2d0c5b0f81177ade0d75b69288 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Wed, 8 Jun 2005 14:13:14 -0700
Subject: [TG3]: Fix 5700/5701 DMA corruption on Apple G4.

Fix 5700/5701 DMA write corruption on Apple G4 by detecting the Apple
UniNorth PCI 1.5 chipset and adjusting the DMA write boundary to 16. DMA
test fails to detect the problem with this chipset.

Thanks to Manuel Perez Ayala for reporting the problem and helping to
debug it.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 18f734ec9181..b8b4ebf9abf1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -874,6 +874,7 @@
 #define PCI_DEVICE_ID_APPLE_KL_USB_P	0x0026
 #define PCI_DEVICE_ID_APPLE_UNI_N_AGP_P	0x0027
 #define PCI_DEVICE_ID_APPLE_UNI_N_AGP15	0x002d
+#define PCI_DEVICE_ID_APPLE_UNI_N_PCI15	0x002e
 #define PCI_DEVICE_ID_APPLE_UNI_N_FW2	0x0030
 #define PCI_DEVICE_ID_APPLE_UNI_N_GMAC2	0x0032
 #define PCI_DEVIEC_ID_APPLE_UNI_N_ATA	0x0033
-- 
cgit v1.2.2


From 4890062960cbc4d3cebdbd8261a68bc85efcf5d4 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 8 Jun 2005 15:10:48 -0700
Subject: [PKT_SCHED]: Allow socket attributes to be matched on via meta ematch

Adds meta collectors for all socket attributes that make sense
to be filtered upon. Some of them are only useful for debugging
but having them doesn't hurt.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tc_ematch/tc_em_meta.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h
index aa6b48bb4dcd..a6b2cc530af5 100644
--- a/include/linux/tc_ematch/tc_em_meta.h
+++ b/include/linux/tc_ematch/tc_em_meta.h
@@ -56,6 +56,36 @@ enum
 	TCF_META_ID_TCCLASSID,
 	TCF_META_ID_RTCLASSID,
 	TCF_META_ID_RTIIF,
+	TCF_META_ID_SK_FAMILY,
+	TCF_META_ID_SK_STATE,
+	TCF_META_ID_SK_REUSE,
+	TCF_META_ID_SK_BOUND_IF,
+	TCF_META_ID_SK_REFCNT,
+	TCF_META_ID_SK_SHUTDOWN,
+	TCF_META_ID_SK_PROTO,
+	TCF_META_ID_SK_TYPE,
+	TCF_META_ID_SK_RCVBUF,
+	TCF_META_ID_SK_RMEM_ALLOC,
+	TCF_META_ID_SK_WMEM_ALLOC,
+	TCF_META_ID_SK_OMEM_ALLOC,
+	TCF_META_ID_SK_WMEM_QUEUED,
+	TCF_META_ID_SK_RCV_QLEN,
+	TCF_META_ID_SK_SND_QLEN,
+ 	TCF_META_ID_SK_ERR_QLEN,
+	TCF_META_ID_SK_FORWARD_ALLOCS,
+	TCF_META_ID_SK_SNDBUF,
+ 	TCF_META_ID_SK_ALLOCS,
+ 	TCF_META_ID_SK_ROUTE_CAPS,
+ 	TCF_META_ID_SK_HASHENT,
+ 	TCF_META_ID_SK_LINGERTIME,
+ 	TCF_META_ID_SK_ACK_BACKLOG,
+ 	TCF_META_ID_SK_MAX_ACK_BACKLOG,
+ 	TCF_META_ID_SK_PRIO,
+ 	TCF_META_ID_SK_RCVLOWAT,
+ 	TCF_META_ID_SK_RCVTIMEO,
+ 	TCF_META_ID_SK_SNDTIMEO,
+ 	TCF_META_ID_SK_SENDMSG_OFF,
+ 	TCF_META_ID_SK_WRITE_PENDING,
 	__TCF_META_ID_MAX
 };
 #define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1)
-- 
cgit v1.2.2


From a58e76f25432dc5e3e84d04c27bec03347ca365b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 12 Jun 2005 10:56:26 +0200
Subject: [PATCH] Remove obsolete HAVE_ARCH_GET_SIGNAL_TO_DELIVER?

Now m68k no longer sets HAVE_ARCH_GET_SIGNAL_TO_DELIVER, can it be removed
completely? Or may ARM26 still need it? Note that its usage was removed from
kernel/signal.c about 2 months ago.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/signal.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 0a98f5ec5cae..7be18b5e2fb4 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -231,10 +231,8 @@ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 
-#ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER
 struct pt_regs;
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
-#endif
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.2


From 03722adce90a248d0bea77d390decbd05991e2d2 Mon Sep 17 00:00:00 2001
From: Tom Rini <trini@kernel.crashing.org>
Date: Mon, 13 Jun 2005 13:57:10 -0700
Subject: [NET]: linux/if_tr.h needs asm/byteorder.h

<linux/if_tr.h> uses __be16, but does not directly include
<asm/byteorder.h>.  Add this in, so that dhcp/net-tools token ring code
can compile again.

Signed-off-by: Tom Rini <trini@kernel.crashing.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h
index 6688b414c529..3fba9e2f5427 100644
--- a/include/linux/if_tr.h
+++ b/include/linux/if_tr.h
@@ -19,6 +19,8 @@
 #ifndef _LINUX_IF_TR_H
 #define _LINUX_IF_TR_H
 
+#include <asm/byteorder.h>	/* For __be16 */
+
 /* IEEE 802.5 Token-Ring magic constants.  The frame sizes omit the preamble
    and FCS/CRC (frame check sequence). */
 #define TR_ALEN		6		/* Octets in one token-ring addr */
-- 
cgit v1.2.2


From 1c2fb7f93cb20621772bf304f3dba0849942e5db Mon Sep 17 00:00:00 2001
From: "J. Simonetti" <jeroen@simonetti.nl>
Date: Mon, 13 Jun 2005 15:19:03 -0700
Subject: [IPV4]: Sysctl configurable icmp error source address.

This patch alows you to change the source address of icmp error
messages. It applies cleanly to 2.6.11.11 and retains the default
behaviour.

In the old (default) behaviour icmp error messages are sent with the ip
of the exiting interface.

The new behaviour (when the sysctl variable is toggled on), it will send
the message with the ip of the interface that received the packet that
caused the icmp error. This is the behaviour network administrators will
expect from a router. It makes debugging complicated network layouts
much easier. Also, all 'vendor routers' I know of have the later
behaviour.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 23032d9d6071..a17745c80a91 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -346,6 +346,7 @@ enum
 	NET_TCP_MODERATE_RCVBUF=106,
 	NET_TCP_TSO_WIN_DIVISOR=107,
 	NET_TCP_BIC_BETA=108,
+	NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
 };
 
 enum {
-- 
cgit v1.2.2


From 986a80d5c154808cc78170584670324a22fd8219 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Thu, 16 Jun 2005 15:14:00 -0700
Subject: [PATCH] avoid signed vs unsigned comparison in efi_range_is_wc()

warning when building with gcc -W :

 include/linux/efi.h: In function `efi_range_is_wc':
 include/linux/efi.h:320: warning: comparison between signed and unsigned

It looks to me like a significantly large 'len' passed in could cause the
loop to never end. Isn't it safer to make 'i' an unsigned long as well?
Like this little patch below (which of course also kills the warning) :

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/efi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 047e7222df7a..73781ec165b4 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -315,7 +315,7 @@ extern struct efi_memory_map memmap;
  */
 static inline int efi_range_is_wc(unsigned long start, unsigned long len)
 {
-	int i;
+	unsigned long i;
 
 	for (i = 0; i < len; i += (1UL << EFI_PAGE_SHIFT)) {
 		unsigned long paddr = __pa(start + i);
-- 
cgit v1.2.2


From b8112df71cae7d6a86158caeb19d215f56c4f9ab Mon Sep 17 00:00:00 2001
From: Lee Revell <rlrevell@joe-job.com>
Date: Wed, 15 Jun 2005 14:19:03 -0400
Subject: [SCSI] Add DMA mask constants other than 32 and 64 bit

Signed-Off-By: Lee Revell <rlrevell@joe-job.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/dma-mapping.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 806c305332c1..2d80cc761a15 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -14,7 +14,12 @@ enum dma_data_direction {
 };
 
 #define DMA_64BIT_MASK	0xffffffffffffffffULL
+#define DMA_40BIT_MASK	0x000000ffffffffffULL
+#define DMA_39BIT_MASK	0x0000007fffffffffULL
 #define DMA_32BIT_MASK	0x00000000ffffffffULL
+#define DMA_31BIT_MASK	0x000000007fffffffULL
+#define DMA_30BIT_MASK	0x000000003fffffffULL
+#define DMA_29BIT_MASK	0x000000001fffffffULL
 
 #include <asm/dma-mapping.h>
 
-- 
cgit v1.2.2


From 26b15dad9f1c19d6d4f7b999b07eaa6d98e4b375 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Sat, 18 Jun 2005 22:42:13 -0700
Subject: [IPSEC] Add complete xfrm event notification

Heres the final patch.
What this patch provides

- netlink xfrm events
- ability to have events generated by netlink propagated to pfkey
  and vice versa.
- fixes the acquire lets-be-happy-with-one-success issue

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/xfrm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index fd2ef742a9fd..03bc600516ea 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -257,5 +257,7 @@ struct xfrm_usersa_flush {
 
 #define XFRMGRP_ACQUIRE		1
 #define XFRMGRP_EXPIRE		2
+#define XFRMGRP_SA		4
+#define XFRMGRP_POLICY		8
 
 #endif /* _LINUX_XFRM_H */
-- 
cgit v1.2.2


From 1944972d3bb651474a5021c9da8d0166ae19f1eb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 18 Jun 2005 22:46:19 -0700
Subject: [SLAB] Introduce kmem_cache_name

This is for use with slab users that pass a dynamically allocated slab name in
kmem_cache_create, so that before destroying the slab one can retrieve the name
and free its memory.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/slab.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7d66385ae750..76cf7e60216c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -64,6 +64,7 @@ extern int kmem_cache_shrink(kmem_cache_t *);
 extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast);
 extern void kmem_cache_free(kmem_cache_t *, void *);
 extern unsigned int kmem_cache_size(kmem_cache_t *);
+extern const char *kmem_cache_name(kmem_cache_t *);
 extern kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags);
 
 /* Size description struct for general caches. */
-- 
cgit v1.2.2


From 2e6599cb899ba4b133f42cbf9d2b1883d2dc583a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 18 Jun 2005 22:46:52 -0700
Subject: [NET] Generalise TCP's struct open_request minisock infrastructure

Kept this first changeset minimal, without changing existing names to
ease peer review.

Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn
has two new members:

->slab, that replaces tcp_openreq_cachep
->obj_size, to inform the size of the openreq descendant for
  a specific protocol

The protocol specific fields in struct open_request were moved to a
class hierarchy, with the things that are common to all connection
oriented PF_INET protocols in struct inet_request_sock, the TCP ones
in tcp_request_sock, that is an inet_request_sock, that is an
open_request.

I.e. this uses the same approach used for the struct sock class
hierarchy, with sk_prot indicating if the protocol wants to use the
open_request infrastructure by filling in sk_prot->rsk_prot with an
or_calltable.

Results? Performance is improved and TCP v4 now uses only 64 bytes per
open request minisock, down from 96 without this patch :-)

Next changeset will rename some of the structs, fields and functions
mentioned above, struct or_calltable is way unclear, better name it
struct request_sock_ops, s/struct open_request/struct request_sock/g,
etc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ip.h   | 21 +++++++++++++++++++++
 include/linux/ipv6.h | 13 +++++++++++++
 include/linux/tcp.h  | 11 +++++++++++
 3 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ip.h b/include/linux/ip.h
index 8438c68591f9..d5b7c907204e 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -81,6 +81,7 @@
 #ifdef __KERNEL__
 #include <linux/config.h>
 #include <linux/types.h>
+#include <net/request_sock.h>
 #include <net/sock.h>
 #include <linux/igmp.h>
 #include <net/flow.h>
@@ -107,6 +108,26 @@ struct ip_options {
 
 #define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
 
+struct inet_request_sock {
+	struct open_request	req;
+	u32			loc_addr;
+	u32			rmt_addr;
+	u16			rmt_port;
+	u16			snd_wscale : 4, 
+				rcv_wscale : 4, 
+				tstamp_ok  : 1,
+				sack_ok	   : 1,
+				wscale_ok  : 1,
+				ecn_ok	   : 1,
+				acked	   : 1;
+	struct ip_options	*opt;
+};
+
+static inline struct inet_request_sock *inet_rsk(const struct open_request *sk)
+{
+	return (struct inet_request_sock *)sk;
+}
+
 struct ipv6_pinfo;
 
 struct inet_sock {
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ab0d0efbf240..98acdbf3d446 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -193,6 +193,19 @@ struct inet6_skb_parm {
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
 
+struct tcp6_request_sock {
+	struct tcp_request_sock	req;
+	struct in6_addr		loc_addr;
+	struct in6_addr		rmt_addr;
+	struct sk_buff		*pktopts;
+	int			iif;
+};
+
+static inline struct tcp6_request_sock *tcp6_rsk(const struct open_request *sk)
+{
+	return (struct tcp6_request_sock *)sk;
+}
+
 /**
  * struct ipv6_pinfo - ipv6 private area
  *
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 14a55e3e3a50..86771b37b80d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -230,6 +230,17 @@ struct tcp_options_received {
 	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
 };
 
+struct tcp_request_sock {
+	struct inet_request_sock req;
+	__u32			 rcv_isn;
+	__u32			 snt_isn;
+};
+
+static inline struct tcp_request_sock *tcp_rsk(const struct open_request *req)
+{
+	return (struct tcp_request_sock *)req;
+}
+
 struct tcp_sock {
 	/* inet_sock has to be the first member of tcp_sock */
 	struct inet_sock	inet;
-- 
cgit v1.2.2


From 60236fdd08b2169045a3bbfc5ffe1576e6c3c17b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 18 Jun 2005 22:47:21 -0700
Subject: [NET] Rename open_request to request_sock

Ok, this one just renames some stuff to have a better namespace and to
dissassociate it from TCP:

struct open_request  -> struct request_sock
tcp_openreq_alloc    -> reqsk_alloc
tcp_openreq_free     -> reqsk_free
tcp_openreq_fastfree -> __reqsk_free

With this most of the infrastructure closely resembles a struct
sock methods subset.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ip.h   | 4 ++--
 include/linux/ipv6.h | 2 +-
 include/linux/tcp.h  | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ip.h b/include/linux/ip.h
index d5b7c907204e..31e7cedd9f84 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -109,7 +109,7 @@ struct ip_options {
 #define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
 
 struct inet_request_sock {
-	struct open_request	req;
+	struct request_sock	req;
 	u32			loc_addr;
 	u32			rmt_addr;
 	u16			rmt_port;
@@ -123,7 +123,7 @@ struct inet_request_sock {
 	struct ip_options	*opt;
 };
 
-static inline struct inet_request_sock *inet_rsk(const struct open_request *sk)
+static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
 {
 	return (struct inet_request_sock *)sk;
 }
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 98acdbf3d446..6fcd6a0ade24 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -201,7 +201,7 @@ struct tcp6_request_sock {
 	int			iif;
 };
 
-static inline struct tcp6_request_sock *tcp6_rsk(const struct open_request *sk)
+static inline struct tcp6_request_sock *tcp6_rsk(const struct request_sock *sk)
 {
 	return (struct tcp6_request_sock *)sk;
 }
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 86771b37b80d..fb54292a15aa 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -236,7 +236,7 @@ struct tcp_request_sock {
 	__u32			 snt_isn;
 };
 
-static inline struct tcp_request_sock *tcp_rsk(const struct open_request *req)
+static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 {
 	return (struct tcp_request_sock *)req;
 }
@@ -393,8 +393,8 @@ struct tcp_sock {
 	struct tcp_listen_opt	*listen_opt;
 
 	/* FIFO of established children */
-	struct open_request	*accept_queue;
-	struct open_request	*accept_queue_tail;
+	struct request_sock	*accept_queue;
+	struct request_sock	*accept_queue_tail;
 
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
-- 
cgit v1.2.2


From 0e87506fcc734647c7b2497eee4eb81e785c857a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 18 Jun 2005 22:47:59 -0700
Subject: [NET] Generalise tcp_listen_opt

This chunks out the accept_queue and tcp_listen_opt code and moves
them to net/core/request_sock.c and include/net/request_sock.h, to
make it useful for other transport protocols, DCCP being the first one
to use it.

Next patches will rename tcp_listen_opt to accept_sock and remove the
inline tcp functions that just call a reqsk_queue_ function.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fb54292a15aa..97a7c9e03df5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -379,22 +379,7 @@ struct tcp_sock {
 
 	__u32	total_retrans;	/* Total retransmits for entire connection */
 
-	/* The syn_wait_lock is necessary only to avoid proc interface having
-	 * to grab the main lock sock while browsing the listening hash
-	 * (otherwise it's deadlock prone).
-	 * This lock is acquired in read mode only from listening_get_next()
-	 * and it's acquired in write mode _only_ from code that is actively
-	 * changing the syn_wait_queue. All readers that are holding
-	 * the master sock lock don't need to grab this lock in read mode
-	 * too as the syn_wait_queue writes are always protected from
-	 * the main sock lock.
-	 */
-	rwlock_t		syn_wait_lock;
-	struct tcp_listen_opt	*listen_opt;
-
-	/* FIFO of established children */
-	struct request_sock	*accept_queue;
-	struct request_sock	*accept_queue_tail;
+	struct request_sock_queue accept_queue; /* FIFO of established children */
 
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
-- 
cgit v1.2.2


From f88a10d65620d97b6d0a7e352a3493c1b7e7409b Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:50:12 -0700
Subject: [NETLINK]: New message building macros

 NLMSG_PUT_ANSWER(skb, nlcb, type, length)
   Start a new netlink message as answer to a request,
   returns the message header.

 NLMSG_END(skb, nlh)
   End a netlink message, fixes total message length,
   returns skb->len.

 NLMSG_CANCEL(skb, nlh)
   Cancel the building process and trim whole message
   from skb again, returns -1.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index b2738ac8bc99..8d1cb419a930 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -171,8 +171,21 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len)
 }
 
 #define NLMSG_PUT(skb, pid, seq, type, len) \
-({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) goto nlmsg_failure; \
-   __nlmsg_put(skb, pid, seq, type, len); })
+({	if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) \
+		goto nlmsg_failure; \
+	__nlmsg_put(skb, pid, seq, type, len); })
+
+#define NLMSG_PUT_ANSWER(skb, cb, type, len) \
+	NLMSG_PUT(skb, NETLINK_CB((cb)->skb).pid, \
+		  (cb)->nlh->nlmsg_seq, type, len)
+
+#define NLMSG_END(skb, nlh) \
+({	(nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \
+	(skb)->len; })
+
+#define NLMSG_CANCEL(skb, nlh) \
+({	skb_trim(skb, (unsigned char *) (nlh) - (skb)->data); \
+	-1; })
 
 extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 			      struct nlmsghdr *nlh,
-- 
cgit v1.2.2


From 00768244923f66801958a8d2d103f7b65608c9b6 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:50:38 -0700
Subject: [NETLINK] Routing attribute related shortcuts

 RTA_GET_U(32|64)(tlv)
   Assumes TLV is a u32/u64 field and returns its value.

 RTA_GET_[M]SECS(tlv)
   Assumes TLV is a u64 and transports jiffies converted
   to seconds or milliseconds and returns its value.

 RTA_PUT_U(32|64)(skb, type, value)
   Appends %value as fixed u32/u64 to %skb as TLV %type.

 RTA_PUT_[M]SECS(skb, type, jiffies)
   Converts %jiffies to secs/msecs and appends it as u64
   to %skb as TLV %type.

 RTA_PUT_STRING(skb, type, string)
   Appends %NUL terminated %string to %skb as TLV %type.

 RTA_NEST(skb, type)
   Starts a nested TLV %type and returns the nesting handle.

 RTA_NEST_END(skb, nesting_handle)
   Finishes the nested TLV %nesting_handle, must be called
   symmetric to RTA_NEST(). Returns skb->len

 RTA_NEST_CANCEL(skb, nesting_handle)
   Cancel the nested TLV %nesting_handle and trim nested TLV
   from skb again, returns -1.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 91ac97c20777..a09b5d42babf 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -789,6 +789,51 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 ({	if (unlikely(skb_tailroom(skb) < (int)(attrlen))) \
 		goto rtattr_failure; \
 	memcpy(skb_put(skb, RTA_ALIGN(attrlen)), data, attrlen); })
+
+#define RTA_PUT_U32(skb, attrtype, value) \
+({	u32 _tmp = (value); \
+	RTA_PUT(skb, attrtype, sizeof(u32), &_tmp); })
+
+#define RTA_PUT_U64(skb, attrtype, value) \
+({	u64 _tmp = (value); \
+	RTA_PUT(skb, attrtype, sizeof(u64), &_tmp); })
+
+#define RTA_PUT_SECS(skb, attrtype, value) \
+	RTA_PUT_U64(skb, attrtype, (value) / HZ)
+
+#define RTA_PUT_MSECS(skb, attrtype, value) \
+	RTA_PUT_U64(skb, attrtype, jiffies_to_msecs(value))
+
+#define RTA_PUT_STRING(skb, attrtype, value) \
+	RTA_PUT(skb, attrtype, strlen(value) + 1, value)
+
+#define RTA_NEST(skb, type) \
+({	struct rtattr *__start = (struct rtattr *) (skb)->tail; \
+	RTA_PUT(skb, type, 0, NULL); \
+	__start;  })
+
+#define RTA_NEST_END(skb, start) \
+({	(start)->rta_len = ((skb)->tail - (unsigned char *) (start)); \
+	(skb)->len; })
+
+#define RTA_NEST_CANCEL(skb, start) \
+({	skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
+	-1; })
+
+#define RTA_GET_U32(rta) \
+({	if (!rta || RTA_PAYLOAD(rta) < sizeof(u32)) \
+		goto rtattr_failure; \
+	*(u32 *) RTA_DATA(rta); })
+
+#define RTA_GET_U64(rta) \
+({	u64 _tmp; \
+	if (!rta || RTA_PAYLOAD(rta) < sizeof(u64)) \
+		goto rtattr_failure; \
+	memcpy(&_tmp, RTA_DATA(rta), sizeof(_tmp)); \
+	_tmp; })
+
+#define RTA_GET_SECS(rta) ((unsigned long) RTA_GET_U64(rta) * HZ)
+#define RTA_GET_MSECS(rta) (msecs_to_jiffies((unsigned long) RTA_GET_U64(rta)))
 		
 static inline struct rtattr *
 __rta_reserve(struct sk_buff *skb, int attrtype, int attrlen)
-- 
cgit v1.2.2


From c7fb64db001f83ece669c76a02d8ec2fdb1dd307 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:50:55 -0700
Subject: [NETLINK]: Neighbour table configuration and statistics via rtnetlink

To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
NLM_F_DUMP flag set. Every neighbour table configuration is
spread over multiple messages to avoid running into message
size limits on systems with many interfaces. The first message
in the sequence transports all not device specific data such as
statistics, configuration, and the default parameter set.
This message is followed by 0..n messages carrying device
specific parameter sets.

Although the ordering should be sufficient, NDTA_NAME can be
used to identify sequences. The initial message can be identified
by checking for NDTA_CONFIG. The device specific messages do
not contain this TLV but have NDTPA_IFINDEX set to the
corresponding interface index.

To change neighbour table attributes, send RTM_SETNEIGHTBL
with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
otherwise. Device specific parameter sets can be changed by
setting NDTPA_IFINDEX to the interface index of the corresponding
device.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 107 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index a09b5d42babf..5a5cda160267 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -89,6 +89,13 @@ enum {
 	RTM_GETANYCAST	= 62,
 #define RTM_GETANYCAST	RTM_GETANYCAST
 
+	RTM_NEWNEIGHTBL	= 64,
+#define RTM_NEWNEIGHTBL	RTM_NEWNEIGHTBL
+	RTM_GETNEIGHTBL	= 66,
+#define RTM_GETNEIGHTBL	RTM_GETNEIGHTBL
+	RTM_SETNEIGHTBL,
+#define RTM_SETNEIGHTBL	RTM_SETNEIGHTBL
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
@@ -493,6 +500,106 @@ struct nda_cacheinfo
 	__u32		ndm_refcnt;
 };
 
+
+/*****************************************************************
+ *		Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats
+{
+	__u64		ndts_allocs;
+	__u64		ndts_destroys;
+	__u64		ndts_hash_grows;
+	__u64		ndts_res_failed;
+	__u64		ndts_lookups;
+	__u64		ndts_hits;
+	__u64		ndts_rcv_probes_mcast;
+	__u64		ndts_rcv_probes_ucast;
+	__u64		ndts_periodic_gc_runs;
+	__u64		ndts_forced_gc_runs;
+};
+
+enum {
+	NDTPA_UNSPEC,
+	NDTPA_IFINDEX,			/* u32, unchangeable */
+	NDTPA_REFCNT,			/* u32, read-only */
+	NDTPA_REACHABLE_TIME,		/* u64, read-only, msecs */
+	NDTPA_BASE_REACHABLE_TIME,	/* u64, msecs */
+	NDTPA_RETRANS_TIME,		/* u64, msecs */
+	NDTPA_GC_STALETIME,		/* u64, msecs */
+	NDTPA_DELAY_PROBE_TIME,		/* u64, msecs */
+	NDTPA_QUEUE_LEN,		/* u32 */
+	NDTPA_APP_PROBES,		/* u32 */
+	NDTPA_UCAST_PROBES,		/* u32 */
+	NDTPA_MCAST_PROBES,		/* u32 */
+	NDTPA_ANYCAST_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_DELAY,		/* u64, msecs */
+	NDTPA_PROXY_QLEN,		/* u32 */
+	NDTPA_LOCKTIME,			/* u64, msecs */
+	__NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg
+{
+	__u8		ndtm_family;
+	__u8		ndtm_pad1;
+	__u16		ndtm_pad2;
+};
+
+struct ndt_config
+{
+	__u16		ndtc_key_len;
+	__u16		ndtc_entry_size;
+	__u32		ndtc_entries;
+	__u32		ndtc_last_flush;	/* delta to now in msecs */
+	__u32		ndtc_last_rand;		/* delta to now in msecs */
+	__u32		ndtc_hash_rnd;
+	__u32		ndtc_hash_mask;
+	__u32		ndtc_hash_chain_gc;
+	__u32		ndtc_proxy_qlen;
+};
+
+enum {
+	NDTA_UNSPEC,
+	NDTA_NAME,			/* char *, unchangeable */
+	NDTA_THRESH1,			/* u32 */
+	NDTA_THRESH2,			/* u32 */
+	NDTA_THRESH3,			/* u32 */
+	NDTA_CONFIG,			/* struct ndt_config, read-only */
+	NDTA_PARMS,			/* nested TLV NDTPA_* */
+	NDTA_STATS,			/* struct ndt_stats, read-only */
+	NDTA_GC_INTERVAL,		/* u64, msecs */
+	__NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \
+		     NLMSG_ALIGN(sizeof(struct ndtmsg))))
+#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg))
+
+
 /****
  *		General form of address family dependent message.
  ****/
-- 
cgit v1.2.2


From c52a3f89f882b84fc422000655c023fe73e701cf Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:51:26 -0700
Subject: [NETLINK]: Fix RTA_NEST_CANCEL().

Only skb_trim() if 'start' is non-NULL.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5a5cda160267..6fff8c4c99c7 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -924,7 +924,8 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 	(skb)->len; })
 
 #define RTA_NEST_CANCEL(skb, start) \
-({	skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
+({	if (start) \
+		skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
 	-1; })
 
 #define RTA_GET_U32(rta) \
-- 
cgit v1.2.2


From 8f48bcd4ef11a69add178fc3111a77e7ee95bacd Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:52:36 -0700
Subject: [RTNETLINK]: Add RTA_(PUT|GET) shortcuts for u8, u16, and flag

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 6fff8c4c99c7..e68dbf0bf579 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -897,6 +897,14 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 		goto rtattr_failure; \
 	memcpy(skb_put(skb, RTA_ALIGN(attrlen)), data, attrlen); })
 
+#define RTA_PUT_U8(skb, attrtype, value) \
+({	u8 _tmp = (value); \
+	RTA_PUT(skb, attrtype, sizeof(u8), &_tmp); })
+
+#define RTA_PUT_U16(skb, attrtype, value) \
+({	u16 _tmp = (value); \
+	RTA_PUT(skb, attrtype, sizeof(u16), &_tmp); })
+
 #define RTA_PUT_U32(skb, attrtype, value) \
 ({	u32 _tmp = (value); \
 	RTA_PUT(skb, attrtype, sizeof(u32), &_tmp); })
@@ -914,6 +922,9 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 #define RTA_PUT_STRING(skb, attrtype, value) \
 	RTA_PUT(skb, attrtype, strlen(value) + 1, value)
 
+#define RTA_PUT_FLAG(skb, attrtype) \
+	RTA_PUT(skb, attrtype, 0, NULL);
+
 #define RTA_NEST(skb, type) \
 ({	struct rtattr *__start = (struct rtattr *) (skb)->tail; \
 	RTA_PUT(skb, type, 0, NULL); \
@@ -928,6 +939,16 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 		skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
 	-1; })
 
+#define RTA_GET_U8(rta) \
+({	if (!rta || RTA_PAYLOAD(rta) < sizeof(u8)) \
+		goto rtattr_failure; \
+	*(u8 *) RTA_DATA(rta); })
+
+#define RTA_GET_U16(rta) \
+({	if (!rta || RTA_PAYLOAD(rta) < sizeof(u16)) \
+		goto rtattr_failure; \
+	*(u16 *) RTA_DATA(rta); })
+
 #define RTA_GET_U32(rta) \
 ({	if (!rta || RTA_PAYLOAD(rta) < sizeof(u32)) \
 		goto rtattr_failure; \
@@ -940,6 +961,8 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 	memcpy(&_tmp, RTA_DATA(rta), sizeof(_tmp)); \
 	_tmp; })
 
+#define RTA_GET_FLAG(rta) (!!(rta))
+
 #define RTA_GET_SECS(rta) ((unsigned long) RTA_GET_U64(rta) * HZ)
 #define RTA_GET_MSECS(rta) (msecs_to_jiffies((unsigned long) RTA_GET_U64(rta)))
 		
-- 
cgit v1.2.2


From 1797754ea7ee5e0d859b0a32506ff999f8d5fb71 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 18 Jun 2005 22:53:48 -0700
Subject: [NETLINK]: Introduce NLMSG_NEW macro to better handle netlink flags

Introduces a new macro NLMSG_NEW which extends NLMSG_PUT but takes
a flags argument. NLMSG_PUT stays there for compatibility but now
calls NLMSG_NEW with flags == 0. NLMSG_PUT_ANSWER is renamed to
NLMSG_NEW_ANSWER which now also takes a flags argument.

Also converts the users of NLMSG_PUT_ANSWER to use NLMSG_NEW_ANSWER
and fixes the two direct users of __nlmsg_put to either provide
the flags or use NLMSG_NEW(_ANSWER).

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 8d1cb419a930..e38407a23d04 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -156,7 +156,7 @@ struct netlink_notify
 };
 
 static __inline__ struct nlmsghdr *
-__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len)
+__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 {
 	struct nlmsghdr *nlh;
 	int size = NLMSG_LENGTH(len);
@@ -164,20 +164,23 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len)
 	nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
 	nlh->nlmsg_type = type;
 	nlh->nlmsg_len = size;
-	nlh->nlmsg_flags = 0;
+	nlh->nlmsg_flags = flags;
 	nlh->nlmsg_pid = pid;
 	nlh->nlmsg_seq = seq;
 	return nlh;
 }
 
-#define NLMSG_PUT(skb, pid, seq, type, len) \
+#define NLMSG_NEW(skb, pid, seq, type, len, flags) \
 ({	if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) \
 		goto nlmsg_failure; \
-	__nlmsg_put(skb, pid, seq, type, len); })
+	__nlmsg_put(skb, pid, seq, type, len, flags); })
+
+#define NLMSG_PUT(skb, pid, seq, type, len) \
+	NLMSG_NEW(skb, pid, seq, type, len, 0)
 
-#define NLMSG_PUT_ANSWER(skb, cb, type, len) \
-	NLMSG_PUT(skb, NETLINK_CB((cb)->skb).pid, \
-		  (cb)->nlh->nlmsg_seq, type, len)
+#define NLMSG_NEW_ANSWER(skb, cb, type, len, flags) \
+	NLMSG_NEW(skb, NETLINK_CB((cb)->skb).pid, \
+		  (cb)->nlh->nlmsg_seq, type, len, flags)
 
 #define NLMSG_END(skb, nlh) \
 ({	(nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \
-- 
cgit v1.2.2


From 0603eac0d6b77acac5924a2734228cbaf072f993 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 18 Jun 2005 22:54:36 -0700
Subject: [IPSEC]: Add XFRMA_SA/XFRMA_POLICY for delete notification

This patch changes the format of the XFRM_MSG_DELSA and
XFRM_MSG_DELPOLICY notification so that the main message
sent is of the same format as that received by the kernel
if the original message was via netlink.  This also means
that we won't lose the byid information carried in km_event.

Since this user interface is introduced by Jamal's patch
we can still afford to change it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 03bc600516ea..d68391a9b9f3 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -174,6 +174,8 @@ enum xfrm_attr_type_t {
 	XFRMA_ALG_COMP,		/* struct xfrm_algo */
 	XFRMA_ENCAP,		/* struct xfrm_algo + struct xfrm_encap_tmpl */
 	XFRMA_TMPL,		/* 1 or more struct xfrm_user_tmpl */
+	XFRMA_SA,
+	XFRMA_POLICY,
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
-- 
cgit v1.2.2


From dd87147eed934eaff92869f3d158697c7239d1d2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 20 Jun 2005 13:21:43 -0700
Subject: [IPSEC]: Add XFRM_STATE_NOPMTUDISC flag

This patch adds the flag XFRM_STATE_NOPMTUDISC for xfrm states.  It is
similar to the nopmtudisc on IPIP/GRE tunnels.  It only has an effect
on IPv4 tunnel mode states.  For these states, it will ensure that the
DF flag is always cleared.

This is primarily useful to work around ICMP blackholes.

In future this flag could also allow a larger MTU to be set within the
tunnel just like IPIP/GRE tunnels.  This could be useful for short haul
tunnels where temporary fragmentation outside the tunnel is desired over
smaller fragments inside the tunnel.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: James Morris <jmorris@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pfkeyv2.h | 1 +
 include/linux/xfrm.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index e6b519220245..724066778aff 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -245,6 +245,7 @@ struct sadb_x_nat_t_port {
 
 /* Security Association flags */
 #define SADB_SAFLAGS_PFS	1
+#define SADB_SAFLAGS_NOPMTUDISC	0x20000000
 #define SADB_SAFLAGS_DECAP_DSCP	0x40000000
 #define SADB_SAFLAGS_NOECN	0x80000000
 
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index d68391a9b9f3..f0d423300d84 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -196,6 +196,7 @@ struct xfrm_usersa_info {
 	__u8				flags;
 #define XFRM_STATE_NOECN	1
 #define XFRM_STATE_DECAP_DSCP	2
+#define XFRM_STATE_NOPMTUDISC	4
 };
 
 struct xfrm_usersa_id {
-- 
cgit v1.2.2


From f6e276ee67c0ac9efafd24bc6f7a84aa359656df Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 20 Jun 2005 13:32:05 -0700
Subject: [ATALK]: endian annotations

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 31d3fc25ccbd..09a1451c1159 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -20,7 +20,7 @@
 #define SIOCATALKDIFADDR       (SIOCPROTOPRIVATE + 0)
 
 struct atalk_addr {
-	__u16	s_net;
+	__be16	s_net;
 	__u8	s_node;
 };
 
@@ -33,8 +33,8 @@ struct sockaddr_at {
 
 struct atalk_netrange {
 	__u8	nr_phase;
-	__u16	nr_firstnet;
-	__u16	nr_lastnet;
+	__be16	nr_firstnet;
+	__be16	nr_lastnet;
 };
 
 #ifdef __KERNEL__
@@ -70,8 +70,8 @@ struct atalk_iface {
 struct atalk_sock {
 	/* struct sock has to be the first member of atalk_sock */
 	struct sock	sk;
-	unsigned short	dest_net;
-	unsigned short	src_net;
+	__be16		dest_net;
+	__be16		src_net;
 	unsigned char	dest_node;
 	unsigned char	src_node;
 	unsigned char	dest_port;
@@ -95,9 +95,9 @@ struct ddpehdr {
 		deh_hops:4,
 		deh_len:10;
 #endif
-	__u16	deh_sum;
-	__u16	deh_dnet;
-	__u16	deh_snet;
+	__be16	deh_sum;
+	__be16	deh_dnet;
+	__be16	deh_snet;
 	__u8	deh_dnode;
 	__u8	deh_snode;
 	__u8	deh_dport;
@@ -142,24 +142,24 @@ struct ddpshdr {
 
 /* AppleTalk AARP headers */
 struct elapaarp {
-	__u16	hw_type;
+	__be16	hw_type;
 #define AARP_HW_TYPE_ETHERNET		1
 #define AARP_HW_TYPE_TOKENRING		2
-	__u16	pa_type;
+	__be16	pa_type;
 	__u8	hw_len;
 	__u8	pa_len;
 #define AARP_PA_ALEN			4
-	__u16	function;
+	__be16	function;
 #define AARP_REQUEST			1
 #define AARP_REPLY			2
 #define AARP_PROBE			3
 	__u8	hw_src[ETH_ALEN]	__attribute__ ((packed));
 	__u8	pa_src_zero		__attribute__ ((packed));
-	__u16	pa_src_net		__attribute__ ((packed));
+	__be16	pa_src_net		__attribute__ ((packed));
 	__u8	pa_src_node		__attribute__ ((packed));
 	__u8	hw_dst[ETH_ALEN]	__attribute__ ((packed));
 	__u8	pa_dst_zero		__attribute__ ((packed));
-	__u16	pa_dst_net		__attribute__ ((packed));
+	__be16	pa_dst_net		__attribute__ ((packed));
 	__u8	pa_dst_node		__attribute__ ((packed));	
 };
 
-- 
cgit v1.2.2


From 246955fe4c38bd706ae30e37c64892c94213775d Mon Sep 17 00:00:00 2001
From: Robert Olsson <Robert.Olsson@data.slu.se>
Date: Mon, 20 Jun 2005 13:36:39 -0700
Subject: [NETLINK]: fib_lookup() via netlink

Below is a more generic patch to do fib_lookup via netlink. For others
we should say that we discussed this as a way to verify route selection.
It's also possible there are others uses for this.

In short the fist half of struct fib_result_nl is filled in by caller
and netlink call fills in the other half and returns it.

In case anyone is interested there is a corresponding user app to compare
the full routing table this was used to test implementation of the LC-trie.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index e38407a23d04..561d4dc75836 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -14,6 +14,7 @@
 #define NETLINK_SELINUX		7	/* SELinux event notifications */
 #define NETLINK_ARPD		8
 #define NETLINK_AUDIT		9	/* auditing */
+#define NETLINK_FIB_LOOKUP	10	
 #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
 #define NETLINK_IP6_FW		13
 #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
-- 
cgit v1.2.2


From e3a15db2415579d5136b9ba9b52fe27c66da8780 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:31:08 -0500
Subject: [PATCH] sysfs_{create|remove}_link should take const char *

sysfs: make sysfs_{create|remove}_link to take const char * name.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 38b58b30814a..931b5aaaf380 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -105,11 +105,11 @@ sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode);
 extern void
 sysfs_remove_file(struct kobject *, const struct attribute *);
 
-extern int 
-sysfs_create_link(struct kobject * kobj, struct kobject * target, char * name);
+extern int
+sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name);
 
 extern void
-sysfs_remove_link(struct kobject *, char * name);
+sysfs_remove_link(struct kobject *, const char * name);
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr);
 int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr);
@@ -153,12 +153,12 @@ static inline void sysfs_remove_file(struct kobject * k, const struct attribute
 	;
 }
 
-static inline int sysfs_create_link(struct kobject * k, struct kobject * t, char * n)
+static inline int sysfs_create_link(struct kobject * k, struct kobject * t, const char * n)
 {
 	return 0;
 }
 
-static inline void sysfs_remove_link(struct kobject * k, char * name)
+static inline void sysfs_remove_link(struct kobject * k, const char * name)
 {
 	;
 }
-- 
cgit v1.2.2


From f3b4f3c6dec04c6c8261fe22645f07b39976595a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:32:00 -0500
Subject: [PATCH] Make kobject's name be const char *

kobject: make kobject's name const char * since users should not
	 attempt to change it (except by calling kobject_rename).

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 765d660d3bea..76dc67245c0c 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -33,7 +33,7 @@
 extern u64 hotplug_seqnum;
 
 struct kobject {
-	char			* k_name;
+	const char		* k_name;
 	char			name[KOBJ_NAME_LEN];
 	struct kref		kref;
 	struct list_head	entry;
@@ -46,7 +46,7 @@ struct kobject {
 extern int kobject_set_name(struct kobject *, const char *, ...)
 	__attribute__((format(printf,2,3)));
 
-static inline char * kobject_name(struct kobject * kobj)
+static inline const char * kobject_name(const struct kobject * kobj)
 {
 	return kobj->k_name;
 }
@@ -57,7 +57,7 @@ extern void kobject_cleanup(struct kobject *);
 extern int kobject_add(struct kobject *);
 extern void kobject_del(struct kobject *);
 
-extern int kobject_rename(struct kobject *, char *new_name);
+extern int kobject_rename(struct kobject *, const char *new_name);
 
 extern int kobject_register(struct kobject *);
 extern void kobject_unregister(struct kobject *);
-- 
cgit v1.2.2


From 419cab3fc69588ebe35b845cc3a584ae172463de Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:32:54 -0500
Subject: [PATCH] kset_hotplug_ops->name shoudl return const char *

kobject: change name() method in kset_hotplug_ops return const char *
	 since users shoudl not try to modify returned data.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 76dc67245c0c..3b22304f12fd 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -94,7 +94,7 @@ struct kobj_type {
  */
 struct kset_hotplug_ops {
 	int (*filter)(struct kset *kset, struct kobject *kobj);
-	char *(*name)(struct kset *kset, struct kobject *kobj);
+	const char *(*name)(struct kset *kset, struct kobject *kobj);
 	int (*hotplug)(struct kset *kset, struct kobject *kobj, char **envp,
 			int num_envp, char *buffer, int buffer_size);
 };
-- 
cgit v1.2.2


From 8d790d74085833ba2a3e84b5bcd683be4981c29a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Tue, 26 Apr 2005 02:34:05 -0500
Subject: [PATCH] make driver's name be const char *

Driver core:
  change driver's, bus's, class's and platform device's names
  to be const char * so one can use
            const char *drv_name = "asdfg";
  when initializing structures.
  Also kill couple of whitespaces.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index df94c0de53f2..fa9e6ca08f5a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -47,7 +47,7 @@ struct class_device;
 struct class_simple;
 
 struct bus_type {
-	char			* name;
+	const char		* name;
 
 	struct subsystem	subsys;
 	struct kset		drivers;
@@ -98,17 +98,17 @@ extern int bus_create_file(struct bus_type *, struct bus_attribute *);
 extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
 
 struct device_driver {
-	char			* name;
+	const char		* name;
 	struct bus_type		* bus;
 
 	struct completion	unloaded;
 	struct kobject		kobj;
 	struct list_head	devices;
 
-	struct module 		* owner;
+	struct module		* owner;
 
 	int	(*probe)	(struct device * dev);
-	int 	(*remove)	(struct device * dev);
+	int	(*remove)	(struct device * dev);
 	void	(*shutdown)	(struct device * dev);
 	int	(*suspend)	(struct device * dev, pm_message_t state, u32 level);
 	int	(*resume)	(struct device * dev, u32 level);
@@ -142,7 +142,7 @@ extern void driver_remove_file(struct device_driver *, struct driver_attribute *
  * device classes
  */
 struct class {
-	char			* name;
+	const char		* name;
 
 	struct subsystem	subsys;
 	struct list_head	children;
@@ -366,7 +366,7 @@ extern struct device *device_find(const char *name, struct bus_type *bus);
 /* drivers/base/platform.c */
 
 struct platform_device {
-	char		* name;
+	const char	* name;
 	u32		id;
 	struct device	dev;
 	u32		num_resources;
-- 
cgit v1.2.2


From d48593bf208e0d046c35fb0707ae5b23fef8c4ff Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Fri, 29 Apr 2005 00:58:46 -0500
Subject: [PATCH] Make attributes names const char *

sysfs: make attributes and attribute_group's names const char *

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 931b5aaaf380..d9cd2d31d377 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -16,13 +16,13 @@ struct kobject;
 struct module;
 
 struct attribute {
-	char			* name;
+	const char		* name;
 	struct module 		* owner;
 	mode_t			mode;
 };
 
 struct attribute_group {
-	char			* name;
+	const char		* name;
 	struct attribute	** attrs;
 };
 
-- 
cgit v1.2.2


From e9ba6365fd4f0d9e7d022c883bd044fbaa48257f Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Tue, 15 Mar 2005 11:54:21 -0800
Subject: [PATCH] CLASS: move a "simple" class logic into the class core.

One step on improving the class api so that it can not be used incorrectly.
This also fixes the module owner issue with the dev files that happened when
the devt logic moved to the class core.

Based on a patch originally written by Kay Sievers <kay.sievers@vrfy.org>

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index fa9e6ca08f5a..73250d01c01f 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -143,6 +143,7 @@ extern void driver_remove_file(struct device_driver *, struct driver_attribute *
  */
 struct class {
 	const char		* name;
+	struct module		* owner;
 
 	struct subsystem	subsys;
 	struct list_head	children;
@@ -185,6 +186,7 @@ struct class_device {
 	struct kobject		kobj;
 	struct class		* class;	/* required */
 	dev_t			devt;		/* dev_t, creates the sysfs "dev" */
+	struct class_device_attribute *devt_attr;
 	struct device		* dev;		/* not necessary, but nice to have */
 	void			* class_data;	/* class-specific data */
 
@@ -245,6 +247,13 @@ struct class_interface {
 extern int class_interface_register(struct class_interface *);
 extern void class_interface_unregister(struct class_interface *);
 
+extern struct class *class_create(struct module *owner, char *name);
+extern void class_destroy(struct class *cls);
+extern struct class_device *class_device_create(struct class *cls, dev_t devt,
+						struct device *device, char *fmt, ...)
+					__attribute__((format(printf,4,5)));
+extern void class_device_destroy(struct class *cls, dev_t devt);
+
 /* interface for class simple stuff */
 extern struct class_simple *class_simple_create(struct module *owner, char *name);
 extern void class_simple_destroy(struct class_simple *cs);
-- 
cgit v1.2.2


From 1235686f6e67cf30c460eb77d90a6cb4be57b92f Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Tue, 15 Mar 2005 14:26:30 -0800
Subject: [PATCH] INPUT: move to use the new class code, instead of
 class_simple

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/input.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 72731d7d189e..9d9598ed760d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1015,7 +1015,7 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min
 	dev->absbit[LONG(axis)] |= BIT(axis);
 }
 
-extern struct class_simple *input_class;
+extern struct class *input_class;
 
 #endif
 #endif
-- 
cgit v1.2.2


From 8561b10f6e7ef0a085709ffc844f74130a067abe Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Tue, 15 Mar 2005 15:10:13 -0800
Subject: [PATCH] USB: move the usb hcd code to use the new class code.

This moves a kref into the main hcd structure, which detaches it from
the class device structure.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 2d1ac5058534..3d508bf08402 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -287,15 +287,14 @@ struct usb_bus {
 
 	struct dentry *usbfs_dentry;	/* usbfs dentry entry for the bus */
 
-	struct class_device class_dev;	/* class device for this bus */
+	struct class_device *class_dev;	/* class device for this bus */
+	struct kref kref;		/* handles reference counting this bus */
 	void (*release)(struct usb_bus *bus);	/* function to destroy this bus's memory */
 #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 	struct mon_bus *mon_bus;	/* non-null when associated */
 	int monitored;			/* non-zero when monitored */
 #endif
 };
-#define	to_usb_bus(d) container_of(d, struct usb_bus, class_dev)
-
 
 /* -------------------------------------------------------------------------- */
 
-- 
cgit v1.2.2


From cd987d38cc59053e0bab8150ffaca33b109067f3 Mon Sep 17 00:00:00 2001
From: "gregkh@suse.de" <gregkh@suse.de>
Date: Wed, 23 Mar 2005 11:12:38 -0800
Subject: [PATCH] class: remove class_simple code, as no one in the tree is
 using it anymore.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 73250d01c01f..68a95358013d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -44,7 +44,6 @@ struct device;
 struct device_driver;
 struct class;
 struct class_device;
-struct class_simple;
 
 struct bus_type {
 	const char		* name;
@@ -254,15 +253,6 @@ extern struct class_device *class_device_create(struct class *cls, dev_t devt,
 					__attribute__((format(printf,4,5)));
 extern void class_device_destroy(struct class *cls, dev_t devt);
 
-/* interface for class simple stuff */
-extern struct class_simple *class_simple_create(struct module *owner, char *name);
-extern void class_simple_destroy(struct class_simple *cs);
-extern struct class_device *class_simple_device_add(struct class_simple *cs, dev_t dev, struct device *device, const char *fmt, ...)
-	__attribute__((format(printf,4,5)));
-extern int class_simple_set_hotplug(struct class_simple *, 
-	int (*hotplug)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size));
-extern void class_simple_device_remove(dev_t dev);
-
 
 struct device {
 	struct list_head node;		/* node in sibling list */
-- 
cgit v1.2.2


From af70316af182f4716cc5eec7e0d27fc731d164bd Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 10:41:04 -0800
Subject: [PATCH] Add a semaphore to struct device to synchronize calls to its
 driver.

This adds a per-device semaphore that is taken before every call from the core to a
driver method. This prevents e.g. simultaneous calls to the ->suspend() or ->resume()
and ->probe() or ->release(), potentially saving a whole lot of headaches.

It also moves us a step closer to removing the bus rwsem, since it protects the fields
in struct device that are modified by the core.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 68a95358013d..195b327f3e19 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -264,6 +264,10 @@ struct device {
 	struct kobject kobj;
 	char	bus_id[BUS_ID_SIZE];	/* position on parent bus */
 
+	struct semaphore	sem;	/* semaphore to synchronize calls to
+					 * its driver.
+					 */
+
 	struct bus_type	* bus;		/* type of bus device is on */
 	struct device_driver *driver;	/* which driver has allocated this
 					   device */
-- 
cgit v1.2.2


From fae3cd00255e3e51ffd59fedb1bdb91ec96be395 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 10:59:56 -0800
Subject: [PATCH] Add driver_for_each_device().

Now there's an iterator for accessing each device bound to a driver.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

Index: linux-2.6.12-rc2/drivers/base/driver.c
===================================================================
---
 include/linux/device.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 195b327f3e19..10f5aa20e041 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -136,6 +136,9 @@ struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store)
 extern int driver_create_file(struct device_driver *, struct driver_attribute *);
 extern void driver_remove_file(struct device_driver *, struct driver_attribute *);
 
+extern int driver_for_each_device(struct device_driver * drv, struct device * start,
+				  void * data, int (*fn)(struct device *, void *));
+
 
 /*
  * device classes
-- 
cgit v1.2.2


From 9a19fea43616066561e221359596ce532e631395 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 11:45:16 -0800
Subject: [PATCH] Add initial implementation of klist helpers.

This klist interface provides a couple of structures that wrap around
struct list_head to provide explicit list "head" (struct klist) and
list "node" (struct klist_node) objects. For struct klist, a spinlock
is included that protects access to the actual list itself. struct
klist_node provides a pointer to the klist that owns it and a kref
reference count that indicates the number of current users of that node
in the list.

The entire point is to provide an interface for iterating over a list
that is safe and allows for modification of the list during the
iteration (e.g. insertion and removal), including modification of the
current node on the list.

It works using a 3rd object type - struct klist_iter - that is declared
and initialized before an iteration. klist_next() is used to acquire the
next element in the list. It returns NULL if there are no more items.
This klist interface provides a couple of structures that wrap around
struct list_head to provide explicit list "head" (struct klist) and
list "node" (struct klist_node) objects. For struct klist, a spinlock
is included that protects access to the actual list itself. struct
klist_node provides a pointer to the klist that owns it and a kref
reference count that indicates the number of current users of that node
in the list.

The entire point is to provide an interface for iterating over a list
that is safe and allows for modification of the list during the
iteration (e.g. insertion and removal), including modification of the
current node on the list.

It works using a 3rd object type - struct klist_iter - that is declared
and initialized before an iteration. klist_next() is used to acquire the
next element in the list. It returns NULL if there are no more items.
Internally, that routine takes the klist's lock, decrements the reference
count of the previous klist_node and increments the count of the next
klist_node. It then drops the lock and returns.

There are primitives for adding and removing nodes to/from a klist.
When deleting, klist_del() will simply decrement the reference count.
Only when the count goes to 0 is the node removed from the list.
klist_remove() will try to delete the node from the list and block
until it is actually removed. This is useful for objects (like devices)
that have been removed from the system and must be freed (but must wait
until all accessors have finished).

Internally, that routine takes the klist's lock, decrements the reference
count of the previous klist_node and increments the count of the next
klist_node. It then drops the lock and returns.

There are primitives for adding and removing nodes to/from a klist.
When deleting, klist_del() will simply decrement the reference count.
Only when the count goes to 0 is the node removed from the list.
klist_remove() will try to delete the node from the list and block
until it is actually removed. This is useful for objects (like devices)
that have been removed from the system and must be freed (but must wait
until all accessors have finished).

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff -Nru a/include/linux/klist.h b/include/linux/klist.h
---
 include/linux/klist.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 include/linux/klist.h

(limited to 'include/linux')

diff --git a/include/linux/klist.h b/include/linux/klist.h
new file mode 100644
index 000000000000..fb52f9d9d611
--- /dev/null
+++ b/include/linux/klist.h
@@ -0,0 +1,53 @@
+/*
+ *	klist.h - Some generic list helpers, extending struct list_head a bit.
+ *
+ *	Implementations are found in lib/klist.c
+ *
+ *
+ *	Copyright (C) 2005 Patrick Mochel
+ *
+ *	This file is rleased under the GPL v2.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+
+
+struct klist {
+	spinlock_t		k_lock;
+	struct list_head	k_list;
+};
+
+
+extern void klist_init(struct klist * k);
+
+
+struct klist_node {
+	struct klist		* n_klist;
+	struct list_head	n_node;
+	struct kref		n_ref;
+	struct completion	n_removed;
+};
+
+extern void klist_add_tail(struct klist * k, struct klist_node * n);
+extern void klist_add_head(struct klist * k, struct klist_node * n);
+
+extern void klist_del(struct klist_node * n);
+extern void klist_remove(struct klist_node * n);
+
+
+struct klist_iter {
+	struct klist		* i_klist;
+	struct list_head	* i_head;
+	struct klist_node	* i_cur;
+};
+
+
+extern void klist_iter_init(struct klist * k, struct klist_iter * i);
+extern void klist_iter_init_node(struct klist * k, struct klist_iter * i, 
+				 struct klist_node * n);
+extern void klist_iter_exit(struct klist_iter * i);
+extern struct klist_node * klist_next(struct klist_iter * i);
+
-- 
cgit v1.2.2


From 465c7a3a3a5aabcedd2e43612cac5a12f23da19a Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 11:49:14 -0800
Subject: [PATCH] Add a klist to struct bus_type for its devices.

- Use it for bus_for_each_dev().
- Use the klist spinlock instead of the bus rwsem.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 10f5aa20e041..e36953cf7f14 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -14,6 +14,7 @@
 #include <linux/config.h>
 #include <linux/ioport.h>
 #include <linux/kobject.h>
+#include <linux/klist.h>
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/module.h>
@@ -51,6 +52,7 @@ struct bus_type {
 	struct subsystem	subsys;
 	struct kset		drivers;
 	struct kset		devices;
+	struct klist		klist_devices;
 
 	struct bus_attribute	* bus_attrs;
 	struct device_attribute	* dev_attrs;
@@ -262,6 +264,7 @@ struct device {
 	struct list_head bus_list;	/* node in bus's list */
 	struct list_head driver_list;
 	struct list_head children;
+	struct klist_node	knode_bus;
 	struct device 	* parent;
 
 	struct kobject kobj;
-- 
cgit v1.2.2


From 38fdac3cdce276554b4484a41f8ec2daf81cb2ff Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 12:00:18 -0800
Subject: [PATCH] Add a klist to struct bus_type for its drivers.

- Use it in bus_for_each_drv().
- Use the klist spinlock instead of the bus rwsem.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index e36953cf7f14..ea9ab33dfe71 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -53,6 +53,7 @@ struct bus_type {
 	struct kset		drivers;
 	struct kset		devices;
 	struct klist		klist_devices;
+	struct klist		klist_drivers;
 
 	struct bus_attribute	* bus_attrs;
 	struct device_attribute	* dev_attrs;
@@ -105,6 +106,7 @@ struct device_driver {
 	struct completion	unloaded;
 	struct kobject		kobj;
 	struct list_head	devices;
+	struct klist_node	knode_bus;
 
 	struct module		* owner;
 
-- 
cgit v1.2.2


From 94e7b1c5ff2055571703e38b059afffe17658432 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Mon, 21 Mar 2005 12:25:36 -0800
Subject: [PATCH] Add a klist to struct device_driver for the devices bound to
 it.

- Use it in driver_for_each_device() instead of the regular list_head and stop using
  the bus's rwsem for protection.
- Use driver_for_each_device() in driver_detach() so we don't deadlock on the
  bus's rwsem.
- Remove ->devices.
- Move klist access and sysfs link access out from under device's semaphore, since
  they're synchronized through other means.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index ea9ab33dfe71..96c71b59fdef 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -105,7 +105,7 @@ struct device_driver {
 
 	struct completion	unloaded;
 	struct kobject		kobj;
-	struct list_head	devices;
+	struct klist		klist_devices;
 	struct klist_node	knode_bus;
 
 	struct module		* owner;
@@ -266,6 +266,7 @@ struct device {
 	struct list_head bus_list;	/* node in bus's list */
 	struct list_head driver_list;
 	struct list_head children;
+	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
 	struct device 	* parent;
 
-- 
cgit v1.2.2


From cb85b6f1cc811ecb9ed4b950206d8941ba710e68 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 10:48:35 -0800
Subject: [PATCH] Remove the unused device_find().

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 96c71b59fdef..f1c38d869ac9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -372,7 +372,6 @@ extern int (*platform_notify_remove)(struct device * dev);
  */
 extern struct device * get_device(struct device * dev);
 extern void put_device(struct device * dev);
-extern struct device *device_find(const char *name, struct bus_type *bus);
 
 
 /* drivers/base/platform.c */
-- 
cgit v1.2.2


From 8b0c250be489dcbf1a3a33bb4ec4c7f33735a365 Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 12:58:57 -0800
Subject: [PATCH] add klist_node_attached() to determine if a node is on a list
 or not.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff -Nru a/include/linux/klist.h b/include/linux/klist.h
---
 include/linux/klist.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/klist.h b/include/linux/klist.h
index fb52f9d9d611..eebf5e5696ec 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -37,6 +37,8 @@ extern void klist_add_head(struct klist * k, struct klist_node * n);
 extern void klist_del(struct klist_node * n);
 extern void klist_remove(struct klist_node * n);
 
+extern int klist_node_attached(struct klist_node * n);
+
 
 struct klist_iter {
 	struct klist		* i_klist;
-- 
cgit v1.2.2


From 7dc35cdf69149a7f2b5216ada9bafe359746ac1c Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 13:03:35 -0800
Subject: [PATCH] Remove struct device::bus_list.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index f1c38d869ac9..13f65853e583 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -263,7 +263,6 @@ extern void class_device_destroy(struct class *cls, dev_t devt);
 
 struct device {
 	struct list_head node;		/* node in sibling list */
-	struct list_head bus_list;	/* node in bus's list */
 	struct list_head driver_list;
 	struct list_head children;
 	struct klist_node	knode_driver;
-- 
cgit v1.2.2


From 63c4f204ffc8219696bda88eb20c9873d007a2fc Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 13:08:05 -0800
Subject: [PATCH] Remove struct device::driver_list.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 13f65853e583..d2434934d091 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -263,7 +263,6 @@ extern void class_device_destroy(struct class *cls, dev_t devt);
 
 struct device {
 	struct list_head node;		/* node in sibling list */
-	struct list_head driver_list;
 	struct list_head children;
 	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
-- 
cgit v1.2.2


From 36239577cfb6b9a7c111209536b54200b0252ebf Mon Sep 17 00:00:00 2001
From: "mochel@digitalimplant.org" <mochel@digitalimplant.org>
Date: Thu, 24 Mar 2005 19:08:30 -0800
Subject: [PATCH] Use a klist for device child lists.

- Use klist iterator in device_for_each_child(), making it safe to use for
  removing devices.
- Remove unused list_to_dev() function.
- Kills all usage of devices_subsys.rwsem.

Signed-off-by: Patrick Mochel <mochel@digitalimplant.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index d2434934d091..43249260cd1c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -262,8 +262,8 @@ extern void class_device_destroy(struct class *cls, dev_t devt);
 
 
 struct device {
-	struct list_head node;		/* node in sibling list */
-	struct list_head children;
+	struct klist		klist_children;
+	struct klist_node	knode_parent;		/* node in sibling list */
 	struct klist_node	knode_driver;
 	struct klist_node	knode_bus;
 	struct device 	* parent;
@@ -298,12 +298,6 @@ struct device {
 	void	(*release)(struct device * dev);
 };
 
-static inline struct device *
-list_to_dev(struct list_head *node)
-{
-	return list_entry(node, struct device, node);
-}
-
 static inline void *
 dev_get_drvdata (struct device *dev)
 {
-- 
cgit v1.2.2


From 0d3e5a2e39b6ba2974e9e7c2a429018c45de8e76 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@digitalimplant.org>
Date: Tue, 5 Apr 2005 23:46:33 -0700
Subject: [PATCH] Driver Core: fix bk-driver-core kills ppc64

There's no check to see if the device is already bound to a driver, which
could do bad things.  The first thing to go wrong is that it will try to match
a driver with a device already bound to one.  In some cases (it appears with
USB with drivers/usb/core/usb.c::usb_match_id()), some drivers will match a
device based on the class type, so it would be common (especially for HID
devices) to match a device that is already bound.

The fun comes when ->probe() is called, it fails, then
driver_probe_device() does this:

	dev->driver = NULL;

Later on, that pointer could be be dereferenced without checking and cause
hell to break loose.

This problem could be nasty. It's very hardware dependent, since some
devices could have a different set of matching qualifiers than others.

Now, I don't quite see exactly where/how you were getting that crash.
You're dereferencing bad memory, but I'm not sure which pointer was bad
and where it came from, but it could have come from a couple of different
places.

The patch below will hopefully fix it all up for you. It's against
2.6.12-rc2-mm1, and does the following:

- Move logic to driver_probe_device() and comments uncommon returns:
  1 - If device is bound
  0 - If device not bound, and no error
  error - If there was an error.

- Move locking to caller of that function, since we want to lock a
  device for the entire time we're trying to bind it to a driver (to
  prevent against a driver being loaded at the same time).

- Update __device_attach() and __driver_attach() to do that locking.

- Check if device is already bound in __driver_attach()

- Update the converse device_release_driver() so it locks the device
  around all of the operations.

- Mark driver_probe_device() as static and remove export. It's an
  internal function, it should stay that way, and there are no other
  callers. If there is ever a need to export it, we can audit it as
  necessary.

Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 include/linux/device.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 43249260cd1c..91aac349b9a7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -325,7 +325,6 @@ extern int device_for_each_child(struct device *, void *,
  * Manual binding of a device to driver. See drivers/base/bus.c
  * for information on use.
  */
-extern int  driver_probe_device(struct device_driver * drv, struct device * dev);
 extern void device_bind_driver(struct device * dev);
 extern void device_release_driver(struct device * dev);
 extern int  device_attach(struct device * dev);
-- 
cgit v1.2.2


From 4b45099b75832434c5113b9aed1499f8a69d13d5 Mon Sep 17 00:00:00 2001
From: Keiichiro Tokunaga <tokunaga.keiich@jp.fujitsu.com>
Date: Sun, 8 May 2005 21:28:53 +0900
Subject: [PATCH] Driver core: unregister_node() for hotplug use

This adds a generic function 'unregister_node()'.
It is used to remove objects of a node going away
for hotplug.  All the devices on the node must be
unregistered before calling this function.

Signed-off-by: Keiichiro Tokunaga <tokunaga.keiich@jp.fujitsu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff -puN drivers/base/node.c~numa_hp_base drivers/base/node.c
---
 include/linux/node.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/node.h b/include/linux/node.h
index 6e0a697e594e..254dc3de650b 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -27,6 +27,7 @@ struct node {
 };
 
 extern int register_node(struct node *, int, struct node *);
+extern void unregister_node(struct node *node);
 
 #define to_node(sys_device) container_of(sys_device, struct node, sysdev)
 
-- 
cgit v1.2.2


From acaefc25d21f850e47ecc5098d1e0bc442c526be Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 May 2005 14:40:59 +0200
Subject: [PATCH] libfs: add simple attribute files

Based on the discussion about spufs attributes, this is my suggestion
for a more generic attribute file support that can be used by both
debugfs and spufs.

Simple attribute files behave similarly to sequential files from
a kernel programmers perspective in that a standard set of file
operations is provided and only an open operation needs to
be written that registers file specific get() and set() functions.

These operations are defined as

void foo_set(void *data, u64 val); and
u64 foo_get(void *data);

where data is the inode->u.generic_ip pointer of the file and the
operations just need to make send of that pointer. The infrastructure
makes sure this works correctly with concurrent access and partial
read calls.

A macro named DEFINE_SIMPLE_ATTRIBUTE is provided to further simplify
using the attributes.

This patch already contains the changes for debugfs to use attributes
for its internal file operations.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/fs.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0180102dace1..9b8b696d4f15 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1657,6 +1657,52 @@ static inline void simple_transaction_set(struct file *file, size_t n)
 	ar->size = n;
 }
 
+/*
+ * simple attribute files
+ *
+ * These attributes behave similar to those in sysfs:
+ *
+ * Writing to an attribute immediately sets a value, an open file can be
+ * written to multiple times.
+ *
+ * Reading from an attribute creates a buffer from the value that might get
+ * read with multiple read calls. When the attribute has been read
+ * completely, no further read calls are possible until the file is opened
+ * again.
+ *
+ * All attributes contain a text representation of a numeric value
+ * that are accessed with the get() and set() functions.
+ */
+#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)		\
+static int __fops ## _open(struct inode *inode, struct file *file)	\
+{									\
+	__simple_attr_check_format(__fmt, 0ull);			\
+	return simple_attr_open(inode, file, __get, __set, __fmt);	\
+}									\
+static struct file_operations __fops = {				\
+	.owner	 = THIS_MODULE,						\
+	.open	 = __fops ## _open,					\
+	.release = simple_attr_close,					\
+	.read	 = simple_attr_read,					\
+	.write	 = simple_attr_write,					\
+};
+
+static inline void __attribute__((format(printf, 1, 2)))
+__simple_attr_check_format(const char *fmt, ...)
+{
+	/* don't do anything, just let the compiler check the arguments; */
+}
+
+int simple_attr_open(struct inode *inode, struct file *file,
+		     u64 (*get)(void *), void (*set)(void *, u64),
+		     const char *fmt);
+int simple_attr_close(struct inode *inode, struct file *file);
+ssize_t simple_attr_read(struct file *file, char __user *buf,
+			 size_t len, loff_t *ppos);
+ssize_t simple_attr_write(struct file *file, const char __user *buf,
+			  size_t len, loff_t *ppos);
+
+
 #ifdef CONFIG_SECURITY
 static inline char *alloc_secdata(void)
 {
-- 
cgit v1.2.2


From 54b6f35c99974e99e64c05c2895718355123c55f Mon Sep 17 00:00:00 2001
From: Yani Ioannou <yani.ioannou@gmail.com>
Date: Tue, 17 May 2005 06:39:34 -0400
Subject: [PATCH] Driver core: change device_attribute callbacks

This patch adds the device_attribute paramerter to the
device_attribute store and show sysfs callback functions, and passes a
reference to the attribute when the callbacks are called.

Signed-off-by: Yani Ioannou <yani.ioannou@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 91aac349b9a7..7b781a72b293 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -335,8 +335,10 @@ extern void driver_attach(struct device_driver * drv);
 
 struct device_attribute {
 	struct attribute	attr;
-	ssize_t (*show)(struct device * dev, char * buf);
-	ssize_t (*store)(struct device * dev, const char * buf, size_t count);
+	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
 };
 
 #define DEVICE_ATTR(_name,_mode,_show,_store) \
-- 
cgit v1.2.2


From 0a3e7eeabc9f76b7496488aad2d11626ff6a2a4f Mon Sep 17 00:00:00 2001
From: Yani Ioannou <yani.ioannou@gmail.com>
Date: Tue, 17 May 2005 22:59:05 -0400
Subject: [PATCH] I2C: add i2c sensor_device_attribute and macros

This patch creates a new header with a potential standard i2c sensor
attribute type (which simply includes an int representing the sensor
number/index) and the associated macros, SENSOR_DEVICE_ATTR to define
a static attribute and to_sensor_dev_attr to get a
sensor_device_attribute reference from an embedded device_attribute
reference.

Signed-off-by: Yani Ioannou <yani.ioannou@gmail.com>
---
 include/linux/i2c-sysfs.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 include/linux/i2c-sysfs.h

(limited to 'include/linux')

diff --git a/include/linux/i2c-sysfs.h b/include/linux/i2c-sysfs.h
new file mode 100644
index 000000000000..d7bf6ce11679
--- /dev/null
+++ b/include/linux/i2c-sysfs.h
@@ -0,0 +1,36 @@
+/*
+ *  i2c-sysfs.h - i2c chip driver sysfs defines
+ *
+ *  Copyright (C) 2005 Yani Ioannou <yani.ioannou@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _LINUX_I2C_SYSFS_H
+#define _LINUX_I2C_SYSFS_H
+
+struct sensor_device_attribute{
+	struct device_attribute dev_attr;
+	int index;
+};
+#define to_sensor_dev_attr(_dev_attr) \
+	container_of(_dev_attr, struct sensor_device_attribute, dev_attr)
+
+#define SENSOR_DEVICE_ATTR(_name,_mode,_show,_store,_index)	\
+struct sensor_device_attribute sensor_dev_attr_##_name = {	\
+	.dev_attr =	__ATTR(_name,_mode,_show,_store),	\
+	.index =	_index,					\
+}
+
+#endif /* _LINUX_I2C_SYSFS_H */
-- 
cgit v1.2.2


From 988d186de5b6966a71a8cc52e6cb4895fd2f7799 Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Tue, 31 May 2005 10:39:14 +0530
Subject: [PATCH] sysfs-iattr: add sysfs_setattr

o This adds ->i_op->setattr VFS method for sysfs inodes. The changed
  attribues are saved in the persistent sysfs_dirent structure as a pointer
  to struct iattr. The struct iattr is allocated only for those sysfs_dirent's
  for which default attributes are getting changed. Thanks to Jon Smirl for
  this suggestion.

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index d9cd2d31d377..392da5a6dacb 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -73,6 +73,7 @@ struct sysfs_dirent {
 	int			s_type;
 	umode_t			s_mode;
 	struct dentry		* s_dentry;
+	struct iattr		* s_iattr;
 };
 
 #define SYSFS_ROOT		0x0001
-- 
cgit v1.2.2


From 18b504e25fd617bee8830d2cdcaff7fb7b5931bb Mon Sep 17 00:00:00 2001
From: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Tue, 21 Jun 2005 12:38:48 -0700
Subject: [NETLINK]: netlink_callback structure needs 5 args not 4

net/ipv4/tcp_diag.c uses up to ->args[4]

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 561d4dc75836..3029cad63a01 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -147,7 +147,7 @@ struct netlink_callback
 	int		(*dump)(struct sk_buff * skb, struct netlink_callback *cb);
 	int		(*done)(struct netlink_callback *cb);
 	int		family;
-	long		args[4];
+	long		args[5];
 };
 
 struct netlink_notify
-- 
cgit v1.2.2


From e45b1be8bcb3643808975a426fa3e201a2588e87 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 21 Jun 2005 14:01:30 -0700
Subject: [NETFILTER]: Kill lockhelp.h

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack_core.h |   3 +-
 include/linux/netfilter_ipv4/ip_nat.h            |   3 +-
 include/linux/netfilter_ipv4/listhelp.h          |   1 -
 include/linux/netfilter_ipv4/lockhelp.h          | 129 -----------------------
 4 files changed, 2 insertions(+), 134 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/lockhelp.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
index d84be02cb4fc..694aec9b4784 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -1,7 +1,6 @@
 #ifndef _IP_CONNTRACK_CORE_H
 #define _IP_CONNTRACK_CORE_H
 #include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/lockhelp.h>
 
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
@@ -47,6 +46,6 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb)
 
 extern struct list_head *ip_conntrack_hash;
 extern struct list_head ip_conntrack_expect_list;
-DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
+extern rwlock_t ip_conntrack_lock;
 #endif /* _IP_CONNTRACK_CORE_H */
 
diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index 2b72b86176f0..e201ec6e9905 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -50,10 +50,9 @@ struct ip_nat_multi_range_compat
 
 #ifdef __KERNEL__
 #include <linux/list.h>
-#include <linux/netfilter_ipv4/lockhelp.h>
 
 /* Protects NAT hash tables, and NAT-private part of conntracks. */
-DECLARE_RWLOCK_EXTERN(ip_nat_lock);
+extern rwlock_t ip_nat_lock;
 
 /* The structure embedded in the conntrack structure. */
 struct ip_nat_info
diff --git a/include/linux/netfilter_ipv4/listhelp.h b/include/linux/netfilter_ipv4/listhelp.h
index f2ae7c5e57bb..360429f48737 100644
--- a/include/linux/netfilter_ipv4/listhelp.h
+++ b/include/linux/netfilter_ipv4/listhelp.h
@@ -2,7 +2,6 @@
 #define _LISTHELP_H
 #include <linux/config.h>
 #include <linux/list.h>
-#include <linux/netfilter_ipv4/lockhelp.h>
 
 /* Header to do more comprehensive job than linux/list.h; assume list
    is first entry in structure. */
diff --git a/include/linux/netfilter_ipv4/lockhelp.h b/include/linux/netfilter_ipv4/lockhelp.h
deleted file mode 100644
index a3288633ab46..000000000000
--- a/include/linux/netfilter_ipv4/lockhelp.h
+++ /dev/null
@@ -1,129 +0,0 @@
-#ifndef _LOCKHELP_H
-#define _LOCKHELP_H
-#include <linux/config.h>
-
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <linux/interrupt.h>
-#include <linux/smp.h>
-
-/* Header to do help in lock debugging. */
-
-#ifdef CONFIG_NETFILTER_DEBUG
-struct spinlock_debug
-{
-	spinlock_t l;
-	atomic_t locked_by;
-};
-
-struct rwlock_debug
-{
-	rwlock_t l;
-	long read_locked_map;
-	long write_locked_map;
-};
-
-#define DECLARE_LOCK(l) 						\
-struct spinlock_debug l = { SPIN_LOCK_UNLOCKED, ATOMIC_INIT(-1) }
-#define DECLARE_LOCK_EXTERN(l) 			\
-extern struct spinlock_debug l
-#define DECLARE_RWLOCK(l)				\
-struct rwlock_debug l = { RW_LOCK_UNLOCKED, 0, 0 }
-#define DECLARE_RWLOCK_EXTERN(l)		\
-extern struct rwlock_debug l
-
-#define MUST_BE_LOCKED(l)						\
-do { if (atomic_read(&(l)->locked_by) != smp_processor_id())		\
-	printk("ASSERT %s:%u %s unlocked\n", __FILE__, __LINE__, #l);	\
-} while(0)
-
-#define MUST_BE_UNLOCKED(l)						\
-do { if (atomic_read(&(l)->locked_by) == smp_processor_id())		\
-	printk("ASSERT %s:%u %s locked\n", __FILE__, __LINE__, #l);	\
-} while(0)
-
-/* Write locked OK as well. */
-#define MUST_BE_READ_LOCKED(l)						    \
-do { if (!((l)->read_locked_map & (1UL << smp_processor_id()))		    \
-	 && !((l)->write_locked_map & (1UL << smp_processor_id())))	    \
-	printk("ASSERT %s:%u %s not readlocked\n", __FILE__, __LINE__, #l); \
-} while(0)
-
-#define MUST_BE_WRITE_LOCKED(l)						     \
-do { if (!((l)->write_locked_map & (1UL << smp_processor_id())))	     \
-	printk("ASSERT %s:%u %s not writelocked\n", __FILE__, __LINE__, #l); \
-} while(0)
-
-#define MUST_BE_READ_WRITE_UNLOCKED(l)					  \
-do { if ((l)->read_locked_map & (1UL << smp_processor_id()))		  \
-	printk("ASSERT %s:%u %s readlocked\n", __FILE__, __LINE__, #l);	  \
- else if ((l)->write_locked_map & (1UL << smp_processor_id()))		  \
-	 printk("ASSERT %s:%u %s writelocked\n", __FILE__, __LINE__, #l); \
-} while(0)
-
-#define LOCK_BH(lk)						\
-do {								\
-	MUST_BE_UNLOCKED(lk);					\
-	spin_lock_bh(&(lk)->l);					\
-	atomic_set(&(lk)->locked_by, smp_processor_id());	\
-} while(0)
-
-#define UNLOCK_BH(lk)				\
-do {						\
-	MUST_BE_LOCKED(lk);			\
-	atomic_set(&(lk)->locked_by, -1);	\
-	spin_unlock_bh(&(lk)->l);		\
-} while(0)
-
-#define READ_LOCK(lk) 						\
-do {								\
-	MUST_BE_READ_WRITE_UNLOCKED(lk);			\
-	read_lock_bh(&(lk)->l);					\
-	set_bit(smp_processor_id(), &(lk)->read_locked_map);	\
-} while(0)
-
-#define WRITE_LOCK(lk)							  \
-do {									  \
-	MUST_BE_READ_WRITE_UNLOCKED(lk);				  \
-	write_lock_bh(&(lk)->l);					  \
-	set_bit(smp_processor_id(), &(lk)->write_locked_map);		  \
-} while(0)
-
-#define READ_UNLOCK(lk)							\
-do {									\
-	if (!((lk)->read_locked_map & (1UL << smp_processor_id())))	\
-		printk("ASSERT: %s:%u %s not readlocked\n", 		\
-		       __FILE__, __LINE__, #lk);			\
-	clear_bit(smp_processor_id(), &(lk)->read_locked_map);		\
-	read_unlock_bh(&(lk)->l);					\
-} while(0)
-
-#define WRITE_UNLOCK(lk)					\
-do {								\
-	MUST_BE_WRITE_LOCKED(lk);				\
-	clear_bit(smp_processor_id(), &(lk)->write_locked_map);	\
-	write_unlock_bh(&(lk)->l);				\
-} while(0)
-
-#else
-#define DECLARE_LOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
-#define DECLARE_LOCK_EXTERN(l) extern spinlock_t l
-#define DECLARE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED
-#define DECLARE_RWLOCK_EXTERN(l) extern rwlock_t l
-
-#define MUST_BE_LOCKED(l)
-#define MUST_BE_UNLOCKED(l)
-#define MUST_BE_READ_LOCKED(l)
-#define MUST_BE_WRITE_LOCKED(l)
-#define MUST_BE_READ_WRITE_UNLOCKED(l)
-
-#define LOCK_BH(l) spin_lock_bh(l)
-#define UNLOCK_BH(l) spin_unlock_bh(l)
-
-#define READ_LOCK(l) read_lock_bh(l)
-#define WRITE_LOCK(l) write_lock_bh(l)
-#define READ_UNLOCK(l) read_unlock_bh(l)
-#define WRITE_UNLOCK(l) write_unlock_bh(l)
-#endif /*CONFIG_NETFILTER_DEBUG*/
-
-#endif /* _LOCKHELP_H */
-- 
cgit v1.2.2


From 18b8afc771102b1b6af97962808291a7d27f52af Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 21 Jun 2005 14:01:57 -0700
Subject: [NETFILTER]: Kill nf_debug

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4.h |  6 ------
 include/linux/skbuff.h         | 13 -------------
 2 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 9e5750079e09..3ebc36afae1a 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -75,12 +75,6 @@ enum nf_ip_hook_priorities {
 #define SO_ORIGINAL_DST 80
 
 #ifdef __KERNEL__
-#ifdef CONFIG_NETFILTER_DEBUG
-void nf_debug_ip_local_deliver(struct sk_buff *skb);
-void nf_debug_ip_loopback_xmit(struct sk_buff *newskb);
-void nf_debug_ip_finish_output2(struct sk_buff *skb);
-#endif /*CONFIG_NETFILTER_DEBUG*/
-
 extern int ip_route_me_harder(struct sk_buff **pskb);
 
 /* Call this before modifying an existing IP packet: ensures it is
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cc04f5cd2286..d7c839a21842 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -193,7 +193,6 @@ struct skb_shared_info {
  *	@nfcache: Cache info
  *	@nfct: Associated connection, if any
  *	@nfctinfo: Relationship of this skb to the connection
- *	@nf_debug: Netfilter debugging
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *      @private: Data which is private to the HIPPI implementation
  *	@tc_index: Traffic control index
@@ -264,9 +263,6 @@ struct sk_buff {
 	__u32			nfcache;
 	__u32			nfctinfo;
 	struct nf_conntrack	*nfct;
-#ifdef CONFIG_NETFILTER_DEBUG
-        unsigned int		nf_debug;
-#endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
@@ -1219,15 +1215,6 @@ static inline void nf_reset(struct sk_buff *skb)
 {
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
-#ifdef CONFIG_NETFILTER_DEBUG
-	skb->nf_debug = 0;
-#endif
-}
-static inline void nf_reset_debug(struct sk_buff *skb)
-{
-#ifdef CONFIG_NETFILTER_DEBUG
-	skb->nf_debug = 0;
-#endif
 }
 
 #ifdef CONFIG_BRIDGE_NETFILTER
-- 
cgit v1.2.2


From 39c715b71740c4a78ba4769fb54826929bac03cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 21 Jun 2005 17:14:34 -0700
Subject: [PATCH] smp_processor_id() cleanup

This patch implements a number of smp_processor_id() cleanup ideas that
Arjan van de Ven and I came up with.

The previous __smp_processor_id/_smp_processor_id/smp_processor_id API
spaghetti was hard to follow both on the implementational and on the
usage side.

Some of the complexity arose from picking wrong names, some of the
complexity comes from the fact that not all architectures defined
__smp_processor_id.

In the new code, there are two externally visible symbols:

 - smp_processor_id(): debug variant.

 - raw_smp_processor_id(): nondebug variant. Replaces all existing
   uses of _smp_processor_id() and __smp_processor_id(). Defined
   by every SMP architecture in include/asm-*/smp.h.

There is one new internal symbol, dependent on DEBUG_PREEMPT:

 - debug_smp_processor_id(): internal debug variant, mapped to
                             smp_processor_id().

Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new
lib/smp_processor_id.c file.  All related comments got updated and/or
clarified.

I have build/boot tested the following 8 .config combinations on x86:

 {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT}

I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT.  (Other
architectures are untested, but should work just fine.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h |  2 +-
 include/linux/smp.h    | 40 ++++++++++++++++------------------------
 2 files changed, 17 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e530c6c092f1..beacd931b606 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -381,7 +381,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
-#define numa_node_id()		(cpu_to_node(_smp_processor_id()))
+#define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
 
 #ifndef CONFIG_DISCONTIGMEM
 
diff --git a/include/linux/smp.h b/include/linux/smp.h
index dcf1db3b35d3..9dfa3ee769ae 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -92,10 +92,7 @@ void smp_prepare_boot_cpu(void);
 /*
  *	These macros fold the SMP functionality into a single CPU system
  */
-
-#if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT)
-# define smp_processor_id()			0
-#endif
+#define raw_smp_processor_id()			0
 #define hard_smp_processor_id()			0
 #define smp_call_function(func,info,retry,wait)	({ 0; })
 #define on_each_cpu(func,info,retry,wait)	({ func(info); 0; })
@@ -106,30 +103,25 @@ static inline void smp_send_reschedule(int cpu) { }
 #endif /* !SMP */
 
 /*
- * DEBUG_PREEMPT support: check whether smp_processor_id() is being
- * used in a preemption-safe way.
+ * smp_processor_id(): get the current CPU ID.
  *
- * An architecture has to enable this debugging code explicitly.
- * It can do so by renaming the smp_processor_id() macro to
- * __smp_processor_id().  This should only be done after some minimal
- * testing, because usually there are a number of false positives
- * that an architecture will trigger.
+ * if DEBUG_PREEMPT is enabled the we check whether it is
+ * used in a preemption-safe way. (smp_processor_id() is safe
+ * if it's used in a preemption-off critical section, or in
+ * a thread that is bound to the current CPU.)
  *
- * To fix a false positive (i.e. smp_processor_id() use that the
- * debugging code reports but which use for some reason is legal),
- * change the smp_processor_id() reference to _smp_processor_id(),
- * which is the nondebug variant.  NOTE: don't use this to hack around
- * real bugs.
+ * NOTE: raw_smp_processor_id() is for internal use only
+ * (smp_processor_id() is the preferred variant), but in rare
+ * instances it might also be used to turn off false positives
+ * (i.e. smp_processor_id() use that the debugging code reports but
+ * which use for some reason is legal). Don't use this to hack around
+ * the warning message, as your code might not work under PREEMPT.
  */
-#ifdef __smp_processor_id
-# if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
-   extern unsigned int smp_processor_id(void);
-# else
-#  define smp_processor_id() __smp_processor_id()
-# endif
-# define _smp_processor_id() __smp_processor_id()
+#ifdef CONFIG_DEBUG_PREEMPT
+  extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
 #else
-# define _smp_processor_id() smp_processor_id()
+# define smp_processor_id() raw_smp_processor_id()
 #endif
 
 #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
-- 
cgit v1.2.2


From 753ee728964e5afb80c17659cc6c3a6fd0a42fe0 Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@sgi.com>
Date: Tue, 21 Jun 2005 17:14:41 -0700
Subject: [PATCH] VM: early zone reclaim

This is the core of the (much simplified) early reclaim.  The goal of this
patch is to reclaim some easily-freed pages from a zone before falling back
onto another zone.

One of the major uses of this is NUMA machines.  With the default allocator
behavior the allocator would look for memory in another zone, which might be
off-node, before trying to reclaim from the current zone.

This adds a zone tuneable to enable early zone reclaim.  It is selected on a
per-zone basis and is turned on/off via syscall.

Adding some extra throttling on the reclaim was also required (patch
4/4).  Without the machine would grind to a crawl when doing a "make -j"
kernel build.  Even with this patch the System Time is higher on
average, but it seems tolerable.  Here are some numbers for kernbench
runs on a 2-node, 4cpu, 8Gig RAM Altix in the "make -j" run:

			wall  user   sys   %cpu  ctx sw.  sleeps
			----  ----   ---   ----   ------  ------
No patch		1009  1384   847   258   298170   504402
w/patch, no reclaim     880   1376   667   288   254064   396745
w/patch & reclaim       1079  1385   926   252   291625   548873

These numbers are the average of 2 runs of 3 "make -j" runs done right
after system boot.  Run-to-run variability for "make -j" is huge, so
these numbers aren't terribly useful except to seee that with reclaim
the benchmark still finishes in a reasonable amount of time.

I also looked at the NUMA hit/miss stats for the "make -j" runs and the
reclaim doesn't make any difference when the machine is thrashing away.

Doing a "make -j8" on a single node that is filled with page cache pages
takes 700 seconds with reclaim turned on and 735 seconds without reclaim
(due to remote memory accesses).

The simple zone_reclaim syscall program is at
http://www.bork.org/~mort/sgi/zone_reclaim.c

Signed-off-by: Martin Hicks <mort@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 6 ++++++
 include/linux/swap.h   | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index beacd931b606..dfc2452ccb10 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -144,6 +144,12 @@ struct zone {
 	unsigned long		pages_scanned;	   /* since last reclaim */
 	int			all_unreclaimable; /* All pages pinned */
 
+	/*
+	 * Does the allocator try to reclaim pages from the zone as soon
+	 * as it fails a watermark_ok() in __alloc_pages?
+	 */
+	int			reclaim_pages;
+
 	/*
 	 * prev_priority holds the scanning priority for this zone.  It is
 	 * defined as the scanning priority at which we achieved our reclaim
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3bbc41be9bd0..0d21e682d99d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -173,6 +173,7 @@ extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
 extern int try_to_free_pages(struct zone **, unsigned int, unsigned int);
+extern int zone_reclaim(struct zone *, unsigned int, unsigned int);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
 
-- 
cgit v1.2.2


From 0c35bbadc59f5ed105c34471143eceb4c0dd9c95 Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@sgi.com>
Date: Tue, 21 Jun 2005 17:14:42 -0700
Subject: [PATCH] VM: add __GFP_NORECLAIM

When using the early zone reclaim, it was noticed that allocating new pages
that should be spread across the whole system caused eviction of local pages.

This adds a new GFP flag to prevent early reclaim from happening during
certain allocation attempts.  The example that is implemented here is for page
cache pages.  We want page cache pages to be spread across the whole system,
and we don't want page cache pages to evict other pages to get local memory.

Signed-off-by:  Martin Hicks <mort@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h     | 3 ++-
 include/linux/pagemap.h | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index af7407e8cfc5..208535fd4832 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,6 +39,7 @@ struct vm_area_struct;
 #define __GFP_COMP	0x4000u	/* Add compound page metadata */
 #define __GFP_ZERO	0x8000u	/* Return zeroed page on success */
 #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
+#define __GFP_NORECLAIM  0x20000u /* No realy zone reclaim during allocation */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
@@ -47,7 +48,7 @@ struct vm_area_struct;
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
-			__GFP_NOMEMALLOC)
+			__GFP_NOMEMALLOC|__GFP_NORECLAIM)
 
 #define GFP_ATOMIC	(__GFP_HIGH)
 #define GFP_NOIO	(__GFP_WAIT)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0422031161ba..d9a25647a295 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -52,12 +52,12 @@ void release_pages(struct page **pages, int nr, int cold);
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x), 0);
+	return alloc_pages(mapping_gfp_mask(x)|__GFP_NORECLAIM, 0);
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
+	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD|__GFP_NORECLAIM, 0);
 }
 
 typedef int filler_t(void *, struct page *);
-- 
cgit v1.2.2


From 1e7e5a9048b30c57ba1ddaa6cdf59b21b65cde99 Mon Sep 17 00:00:00 2001
From: Martin Hicks <mort@sgi.com>
Date: Tue, 21 Jun 2005 17:14:43 -0700
Subject: [PATCH] VM: rate limit early reclaim

When early zone reclaim is turned on the LRU is scanned more frequently when a
zone is low on memory.  This limits when the zone reclaim can be called by
skipping the scan if another thread (either via kswapd or sync reclaim) is
already reclaiming from the zone.

Signed-off-by: Martin Hicks <mort@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dfc2452ccb10..18fed8b67943 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -149,6 +149,8 @@ struct zone {
 	 * as it fails a watermark_ok() in __alloc_pages?
 	 */
 	int			reclaim_pages;
+	/* A count of how many reclaimers are scanning this zone */
+	atomic_t		reclaim_in_progress;
 
 	/*
 	 * prev_priority holds the scanning priority for this zone.  It is
-- 
cgit v1.2.2


From 63551ae0feaaa23807ebea60de1901564bbef32e Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 21 Jun 2005 17:14:44 -0700
Subject: [PATCH] Hugepage consolidation

A lot of the code in arch/*/mm/hugetlbpage.c is quite similar.  This patch
attempts to consolidate a lot of the code across the arch's, putting the
combined version in mm/hugetlb.c.  There are a couple of uglyish hacks in
order to covert all the hugepage archs, but the result is a very large
reduction in the total amount of code.  It also means things like hugepage
lazy allocation could be implemented in one place, instead of six.

Tested, at least a little, on ppc64, i386 and x86_64.

Notes:
	- this patch changes the meaning of set_huge_pte() to be more
	  analagous to set_pte()
	- does SH4 need s special huge_ptep_get_and_clear()??

Acked-by: William Lee Irwin <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hugetlb.h | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6af1ae4a8211..f529d1442815 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -4,6 +4,7 @@
 #ifdef CONFIG_HUGETLB_PAGE
 
 #include <linux/mempolicy.h>
+#include <asm/tlbflush.h>
 
 struct ctl_table;
 
@@ -22,12 +23,6 @@ int hugetlb_report_meminfo(char *);
 int hugetlb_report_node_meminfo(int, char *);
 int is_hugepage_mem_enough(size_t);
 unsigned long hugetlb_total_pages(void);
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
-			      int write);
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-				pmd_t *pmd, int write);
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
-int pmd_huge(pmd_t pmd);
 struct page *alloc_huge_page(void);
 void free_huge_page(struct page *);
 
@@ -35,6 +30,17 @@ extern unsigned long max_huge_pages;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 
+/* arch callbacks */
+
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr);
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
+struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
+			      int write);
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+				pmd_t *pmd, int write);
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
+int pmd_huge(pmd_t pmd);
+
 #ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
@@ -48,6 +54,28 @@ extern int sysctl_hugetlb_shm_group;
 int prepare_hugepage_range(unsigned long addr, unsigned long len);
 #endif
 
+#ifndef ARCH_HAS_SETCLEAR_HUGE_PTE
+#define set_huge_pte_at(mm, addr, ptep, pte)	set_pte_at(mm, addr, ptep, pte)
+#define huge_ptep_get_and_clear(mm, addr, ptep) ptep_get_and_clear(mm, addr, ptep)
+#else
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep);
+#endif
+
+#ifndef ARCH_HAS_HUGETLB_PREFAULT_HOOK
+#define hugetlb_prefault_arch_hook(mm)		do { } while (0)
+#else
+void hugetlb_prefault_arch_hook(struct mm_struct *mm);
+#endif
+
+#ifndef ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE
+#define hugetlb_clean_stale_pgtable(pte)	BUG()
+#else
+void hugetlb_clean_stale_pgtable(pte_t *pte);
+#endif
+
 #else /* !CONFIG_HUGETLB_PAGE */
 
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
-- 
cgit v1.2.2


From e7c8d5c9955a4d2e88e36b640563f5d6d5aba48a Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@lameter.com>
Date: Tue, 21 Jun 2005 17:14:47 -0700
Subject: [PATCH] node local per-cpu-pages

This patch modifies the way pagesets in struct zone are managed.

Each zone has a per-cpu array of pagesets.  So any particular CPU has some
memory in each zone structure which belongs to itself.  Even if that CPU is
not local to that zone.

So the patch relocates the pagesets for each cpu to the node that is nearest
to the cpu instead of allocating the pagesets in the (possibly remote) target
zone.  This means that the operations to manage pages on remote zone can be
done with information available locally.

We play a macro trick so that non-NUMA pmachines avoid the additional
pointer chase on the page allocator fastpath.

AIM7 benchmark on a 32 CPU SGI Altix

w/o patches:
Tasks    jobs/min  jti  jobs/min/task      real       cpu
    1      484.68  100       484.6769     12.01      1.97   Fri Mar 25 11:01:42 2005
  100    27140.46   89       271.4046     21.44    148.71   Fri Mar 25 11:02:04 2005
  200    30792.02   82       153.9601     37.80    296.72   Fri Mar 25 11:02:42 2005
  300    32209.27   81       107.3642     54.21    451.34   Fri Mar 25 11:03:37 2005
  400    34962.83   78        87.4071     66.59    588.97   Fri Mar 25 11:04:44 2005
  500    31676.92   75        63.3538     91.87    742.71   Fri Mar 25 11:06:16 2005
  600    36032.69   73        60.0545     96.91    885.44   Fri Mar 25 11:07:54 2005
  700    35540.43   77        50.7720    114.63   1024.28   Fri Mar 25 11:09:49 2005
  800    33906.70   74        42.3834    137.32   1181.65   Fri Mar 25 11:12:06 2005
  900    34120.67   73        37.9119    153.51   1325.26   Fri Mar 25 11:14:41 2005
 1000    34802.37   74        34.8024    167.23   1465.26   Fri Mar 25 11:17:28 2005

with slab API changes and pageset patch:

Tasks    jobs/min  jti  jobs/min/task      real       cpu
    1      485.00  100       485.0000     12.00      1.96   Fri Mar 25 11:46:18 2005
  100    28000.96   89       280.0096     20.79    150.45   Fri Mar 25 11:46:39 2005
  200    32285.80   79       161.4290     36.05    293.37   Fri Mar 25 11:47:16 2005
  300    40424.15   84       134.7472     43.19    438.42   Fri Mar 25 11:47:59 2005
  400    39155.01   79        97.8875     59.46    590.05   Fri Mar 25 11:48:59 2005
  500    37881.25   82        75.7625     76.82    730.19   Fri Mar 25 11:50:16 2005
  600    39083.14   78        65.1386     89.35    872.79   Fri Mar 25 11:51:46 2005
  700    38627.83   77        55.1826    105.47   1022.46   Fri Mar 25 11:53:32 2005
  800    39631.94   78        49.5399    117.48   1169.94   Fri Mar 25 11:55:30 2005
  900    36903.70   79        41.0041    141.94   1310.78   Fri Mar 25 11:57:53 2005
 1000    36201.23   77        36.2012    160.77   1458.31   Fri Mar 25 12:00:34 2005

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Shobhit Dayal <shobhit@calsoftinc.com>
Signed-off-by: Shai Fultheim <Shai@Scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     |  6 ++++++
 include/linux/mmzone.h | 11 ++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 17518fe0b311..1813b162b0a8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -691,6 +691,12 @@ extern void show_mem(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 
+#ifdef CONFIG_NUMA
+extern void setup_per_cpu_pageset(void);
+#else
+static inline void setup_per_cpu_pageset(void) {}
+#endif
+
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
 void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 18fed8b67943..4733d35d8223 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -63,6 +63,12 @@ struct per_cpu_pageset {
 #endif
 } ____cacheline_aligned_in_smp;
 
+#ifdef CONFIG_NUMA
+#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
+#else
+#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
+#endif
+
 #define ZONE_DMA		0
 #define ZONE_NORMAL		1
 #define ZONE_HIGHMEM		2
@@ -122,8 +128,11 @@ struct zone {
 	 */
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
 
+#ifdef CONFIG_NUMA
+	struct per_cpu_pageset	*pageset[NR_CPUS];
+#else
 	struct per_cpu_pageset	pageset[NR_CPUS];
-
+#endif
 	/*
 	 * free areas of different sizes
 	 */
-- 
cgit v1.2.2


From 1363c3cd8603a913a27e2995dccbd70d5312d8e6 Mon Sep 17 00:00:00 2001
From: Wolfgang Wander <wwc@rentec.com>
Date: Tue, 21 Jun 2005 17:14:49 -0700
Subject: [PATCH] Avoiding mmap fragmentation

Ingo recently introduced a great speedup for allocating new mmaps using the
free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and
causes huge performance increases in thread creation.

The downside of this patch is that it does lead to fragmentation in the
mmap-ed areas (visible via /proc/self/maps), such that some applications
that work fine under 2.4 kernels quickly run out of memory on any 2.6
kernel.

The problem is twofold:

  1) the free_area_cache is used to continue a search for memory where
     the last search ended.  Before the change new areas were always
     searched from the base address on.

     So now new small areas are cluttering holes of all sizes
     throughout the whole mmap-able region whereas before small holes
     tended to close holes near the base leaving holes far from the base
     large and available for larger requests.

  2) the free_area_cache also is set to the location of the last
     munmap-ed area so in scenarios where we allocate e.g.  five regions of
     1K each, then free regions 4 2 3 in this order the next request for 1K
     will be placed in the position of the old region 3, whereas before we
     appended it to the still active region 1, placing it at the location
     of the old region 2.  Before we had 1 free region of 2K, now we only
     get two free regions of 1K -> fragmentation.

The patch addresses thes issues by introducing yet another cache descriptor
cached_hole_size that contains the largest known hole size below the
current free_area_cache.  If a new request comes in the size is compared
against the cached_hole_size and if the request can be filled with a hole
below free_area_cache the search is started from the base instead.

The results look promising: Whereas 2.6.12-rc4 fragments quickly and my
(earlier posted) leakme.c test program terminates after 50000+ iterations
with 96 distinct and fragmented maps in /proc/self/maps it performs nicely
(as expected) with thread creation, Ingo's test_str02 with 20000 threads
requires 0.7s system time.

Taking out Ingo's patch (un-patch available per request) by basically
deleting all mentions of free_area_cache from the kernel and starting the
search for new memory always at the respective bases we observe: leakme
terminates successfully with 11 distinctive hardly fragmented areas in
/proc/self/maps but thread creating is gringdingly slow: 30+s(!) system
time for Ingo's test_str02 with 20000 threads.

Now - drumroll ;-) the appended patch works fine with leakme: it ends with
only 7 distinct areas in /proc/self/maps and also thread creation seems
sufficiently fast with 0.71s for 20000 threads.

Signed-off-by: Wolfgang Wander <wwc@rentec.com>
Credit-to: "Richard Purdie" <rpurdie@rpsys.net>
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu> (partly)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4dbb109022f3..b58afd97a180 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -201,8 +201,8 @@ extern unsigned long
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long len, unsigned long pgoff,
 			  unsigned long flags);
-extern void arch_unmap_area(struct vm_area_struct *area);
-extern void arch_unmap_area_topdown(struct vm_area_struct *area);
+extern void arch_unmap_area(struct mm_struct *, unsigned long);
+extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 
 #define set_mm_counter(mm, member, value) (mm)->_##member = (value)
 #define get_mm_counter(mm, member) ((mm)->_##member)
@@ -218,9 +218,10 @@ struct mm_struct {
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
-	void (*unmap_area) (struct vm_area_struct *area);
-	unsigned long mmap_base;		/* base of mmap area */
-	unsigned long free_area_cache;		/* first hole */
+	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+        unsigned long mmap_base;		/* base of mmap area */
+        unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
+	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
-- 
cgit v1.2.2


From cbe37d093707762fc0abb280781e6a82a9d8d568 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Tue, 21 Jun 2005 17:14:52 -0700
Subject: [PATCH] mm: remove PG_highmem

Remove PG_highmem, to save a page flag.  Use is_highmem() instead.  It'll
generate a little more code, but we don't use PageHigheMem() in many places.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/page-flags.h | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 39ab8c6b5652..df313891db10 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -61,21 +61,20 @@
 #define PG_active		 6
 #define PG_slab			 7	/* slab debug (Suparna wants this) */
 
-#define PG_highmem		 8
-#define PG_checked		 9	/* kill me in 2.5.<early>. */
-#define PG_arch_1		10
-#define PG_reserved		11
-
-#define PG_private		12	/* Has something at ->private */
-#define PG_writeback		13	/* Page is under writeback */
-#define PG_nosave		14	/* Used for system suspend/resume */
-#define PG_compound		15	/* Part of a compound page */
-
-#define PG_swapcache		16	/* Swap page: swp_entry_t in private */
-#define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
-#define PG_reclaim		18	/* To be reclaimed asap */
-#define PG_nosave_free		19	/* Free, should not be written */
-#define PG_uncached		20	/* Page has been mapped as uncached */
+#define PG_checked		 8	/* kill me in 2.5.<early>. */
+#define PG_arch_1		 9
+#define PG_reserved		10
+#define PG_private		11	/* Has something at ->private */
+
+#define PG_writeback		12	/* Page is under writeback */
+#define PG_nosave		13	/* Used for system suspend/resume */
+#define PG_compound		14	/* Part of a compound page */
+#define PG_swapcache		15	/* Swap page: swp_entry_t in private */
+
+#define PG_mappedtodisk		16	/* Has blocks allocated on-disk */
+#define PG_reclaim		17	/* To be reclaimed asap */
+#define PG_nosave_free		18	/* Free, should not be written */
+#define PG_uncached		19	/* Page has been mapped as uncached */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -215,7 +214,7 @@ extern void __mod_page_state(unsigned offset, unsigned long delta);
 #define TestSetPageSlab(page)	test_and_set_bit(PG_slab, &(page)->flags)
 
 #ifdef CONFIG_HIGHMEM
-#define PageHighMem(page)	test_bit(PG_highmem, &(page)->flags)
+#define PageHighMem(page)	is_highmem(page_zone(page))
 #else
 #define PageHighMem(page)	0 /* needed to optimize away at compile time */
 #endif
-- 
cgit v1.2.2


From 1ad539b2bd89bf2e129123eb24d5bcc4484a35de Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhltc@us.ibm.com>
Date: Tue, 21 Jun 2005 17:14:53 -0700
Subject: [PATCH] vm: try_to_free_pages unused argument

try_to_free_pages accepts a third argument, order, but hasn't used it since
before 2.6.0.  The following patch removes the argument and updates all the
calls to try_to_free_pages.

Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0d21e682d99d..2343f999e6e1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -172,7 +172,7 @@ extern int rotate_reclaimable_page(struct page *page);
 extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
-extern int try_to_free_pages(struct zone **, unsigned int, unsigned int);
+extern int try_to_free_pages(struct zone **, unsigned int);
 extern int zone_reclaim(struct zone *, unsigned int, unsigned int);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
-- 
cgit v1.2.2


From 83e5d8f7253cb7b14472385a6d57df1e9f848e8e Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Tue, 21 Jun 2005 17:14:54 -0700
Subject: [PATCH] __mod_page_state(): pass unsigned long instead of unsigned

By making the offset argument of __mod_page_state an unsigned long instead
of unsigned, we can avoid forcing the compiler to sign extend a usually
constant argument.  This saves 1 instruction on x86-64.

Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index df313891db10..f2ee9b2332e3 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -136,7 +136,7 @@ struct page_state {
 extern void get_page_state(struct page_state *ret);
 extern void get_full_page_state(struct page_state *ret);
 extern unsigned long __read_page_state(unsigned offset);
-extern void __mod_page_state(unsigned offset, unsigned long delta);
+extern void __mod_page_state(unsigned long offset, unsigned long delta);
 
 #define read_page_state(member) \
 	__read_page_state(offsetof(struct page_state, member))
-- 
cgit v1.2.2


From c2f29ea111e3344ed48257c2a142c3db514e1529 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Tue, 21 Jun 2005 17:14:55 -0700
Subject: [PATCH] __read_page_state(): pass unsigned long instead of unsigned

By making the offset argument of __read_page_state an unsigned long instead of
unsigned, we can avoid forcing the compiler to sign extend a usually constant
argument.  This saves 1 instruction on x86-64.

Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f2ee9b2332e3..f5a6695d4d21 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -135,7 +135,7 @@ struct page_state {
 
 extern void get_page_state(struct page_state *ret);
 extern void get_full_page_state(struct page_state *ret);
-extern unsigned long __read_page_state(unsigned offset);
+extern unsigned long __read_page_state(unsigned long offset);
 extern void __mod_page_state(unsigned long offset, unsigned long delta);
 
 #define read_page_state(member) \
-- 
cgit v1.2.2


From 4ae7c03943fca73f23bc0cdb938070f41b98101f Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Tue, 21 Jun 2005 17:14:57 -0700
Subject: [PATCH] Periodically drain non local pagesets

The pageset array can potentially acquire a huge amount of memory on large
NUMA systems.  F.e.  on a system with 512 processors and 256 nodes there
will be 256*512 pagesets.  If each pageset only holds 5 pages then we are
talking about 655360 pages.With a 16K page size on IA64 this results in
potentially 10 Gigabytes of memory being trapped in pagesets.  The typical
cases are much less for smaller systems but there is still the potential of
memory being trapped in off node pagesets.  Off node memory may be rarely
used if local memory is available and so we may potentially have memory in
seldom used pagesets without this patch.

The slab allocator flushes its per cpu caches every 2 seconds.  The
following patch flushes the off node pageset caches in the same way by
tying into the slab flush.

The patch also changes /proc/zoneinfo to include the number of pages
currently in each pageset.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/gfp.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 208535fd4832..8d6bf608b199 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -133,5 +133,10 @@ extern void FASTCALL(free_cold_page(struct page *page));
 #define free_page(addr) free_pages((addr),0)
 
 void page_alloc_init(void);
+#ifdef CONFIG_NUMA
+void drain_remote_pages(void);
+#else
+static inline void drain_remote_pages(void) { };
+#endif
 
 #endif /* __LINUX_GFP_H */
-- 
cgit v1.2.2


From f14f75b81187cdbe10cc53a521bf9fdf97b59f8c Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@wildopensource.com>
Date: Tue, 21 Jun 2005 17:15:02 -0700
Subject: [PATCH] ia64 uncached alloc

This patch contains the ia64 uncached page allocator and the generic
allocator (genalloc).  The uncached allocator was formerly part of the SN2
mspec driver but there are several other users of it so it has been split
off from the driver.

The generic allocator can be used by device driver to manage special memory
etc.  The generic allocator is based on the allocator from the sym53c8xx_2
driver.

Various users on ia64 needs uncached memory.  The SGI SN architecture requires
it for inter-partition communication between partitions within a large NUMA
cluster.  The specific user for this is the XPC code.  Another application is
large MPI style applications which use it for synchronization, on SN this can
be done using special 'fetchop' operations but it also benefits non SN
hardware which may use regular uncached memory for this purpose.  Performance
of doing this through uncached vs cached memory is pretty substantial.  This
is handled by the mspec driver which I will push out in a seperate patch.

Rather than creating a specific allocator for just uncached memory I came up
with genalloc which is a generic purpose allocator that can be used by device
drivers and other subsystems as they please.  For instance to handle onboard
device memory.  It was derived from the sym53c7xx_2 driver's allocator which
is also an example of a potential user (I am refraining from modifying sym2
right now as it seems to have been under fairly heavy development recently).

On ia64 memory has various properties within a granule, ie.  it isn't safe to
access memory as uncached within the same granule as currently has memory
accessed in cached mode.  The regular system therefore doesn't utilize memory
in the lower granules which is mixed in with device PAL code etc.  The
uncached driver walks the EFI memmap and pulls out the spill uncached pages
and sticks them into the uncached pool.  Only after these chunks have been
utilized, will it start converting regular cached memory into uncached memory.
Hence the reason for the EFI related code additions.

Signed-off-by: Jes Sorensen <jes@wildopensource.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/genalloc.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 include/linux/genalloc.h

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
new file mode 100644
index 000000000000..7fd0576a4454
--- /dev/null
+++ b/include/linux/genalloc.h
@@ -0,0 +1,40 @@
+/*
+ * Basic general purpose allocator for managing special purpose memory
+ * not managed by the regular kmalloc/kfree interface.
+ * Uses for this includes on-device special memory, uncached memory
+ * etc.
+ *
+ * This code is based on the buddy allocator found in the sym53c8xx_2
+ * driver, adapted for general purpose use.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/spinlock.h>
+
+#define ALLOC_MIN_SHIFT		5 /* 32 bytes minimum */
+/*
+ *  Link between free memory chunks of a given size.
+ */
+struct gen_pool_link {
+	struct gen_pool_link *next;
+};
+
+/*
+ *  Memory pool descriptor.
+ */
+struct gen_pool {
+	spinlock_t lock;
+	unsigned long (*get_new_chunk)(struct gen_pool *);
+	struct gen_pool *next;
+	struct gen_pool_link *h;
+	unsigned long private;
+	int max_chunk_shift;
+};
+
+unsigned long gen_pool_alloc(struct gen_pool *poolp, int size);
+void gen_pool_free(struct gen_pool *mp, unsigned long ptr, int size);
+struct gen_pool *gen_pool_create(int nr_chunks, int max_chunk_shift,
+				 unsigned long (*fp)(struct gen_pool *),
+				 unsigned long data);
-- 
cgit v1.2.2


From 5b37b700f7c491a9320f4e29472bbaf23dded8fd Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@freescale.com>
Date: Tue, 21 Jun 2005 17:15:18 -0700
Subject: [PATCH] ppc32: Added support for new MPC8548 family of PowerQUICC III
 processors

Added descriptions of the new MPC8548 family processors, e500 core and
peripherals.

Signed-off-by: Kumar Gala <kumar.gala@freescale.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fsl_devices.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index faaff4c64559..70f54af87b9f 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -51,6 +51,7 @@ struct gianfar_platform_data {
 
 	/* board specific information */
 	u32 board_flags;
+	u32 phy_flags;
 	u32 phyid;
 	u32 interruptPHY;
 	u8 mac_addr[6];
@@ -61,9 +62,14 @@ struct gianfar_platform_data {
 #define FSL_GIANFAR_DEV_HAS_COALESCE		0x00000002
 #define FSL_GIANFAR_DEV_HAS_RMON		0x00000004
 #define FSL_GIANFAR_DEV_HAS_MULTI_INTR		0x00000008
+#define FSL_GIANFAR_DEV_HAS_CSUM		0x00000010
+#define FSL_GIANFAR_DEV_HAS_VLAN		0x00000020
+#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH	0x00000040
+#define FSL_GIANFAR_DEV_HAS_PADDING		0x00000080
 
 /* Flags in gianfar_platform_data */
-#define FSL_GIANFAR_BRD_HAS_PHY_INTR	0x00000001	/* if not set use a timer */
+#define FSL_GIANFAR_BRD_HAS_PHY_INTR	0x00000001 /* set or use a timer */
+#define FSL_GIANFAR_BRD_IS_REDUCED	0x00000002 /* Set if RGMII, RMII */
 
 struct fsl_i2c_platform_data {
 	/* device specific information */
-- 
cgit v1.2.2


From 22329b511a97557b293583194037d1f4c71e1504 Mon Sep 17 00:00:00 2001
From: Brent Casavant <bcasavan@sgi.com>
Date: Tue, 21 Jun 2005 17:15:59 -0700
Subject: [PATCH] ioc4: Core driver rewrite

This series of patches reworks the configuration and internal structure
of the SGI IOC4 I/O controller device drivers.

These changes are motivated by several factors:

- The IOC4 chip PCI resources are of mixed use between functions (i.e.
  multiple functions are handled in the same address range, sometimes
  within the same register), muddling resource ownership and initialization
  issues.  Centralizing this ownership in a core driver is desirable.

- The IOC4 chip implements multiple functions (serial, IDE, others not
  yet implemented in the mainline kernel) but is not a multifunction
  PCI device.  In order to properly handle device addition and removal
  as well as module insertion and deletion, an intermediary IOC4-specific
  driver layer is needed to handle these operations cleanly.

- All IOC4 drivers are currently enabled by a single CONFIG value.  As
  not all systems need all IOC4 functions, it is desireable to enable
  these drivers independently.

- The current IOC4 core driver will trigger loading of all function-level
  drivers, as it makes direct calls to them.  This situation should be
  reversed (i.e. function-level drivers cause loading of core driver)
  in order to maintain a clear and least-surprise driver loading model.

- IOC4 hardware design necessitates some driver-level dependency on
  the PCI bus clock speed.  Current code assumes a 66MHz bus, but the
  speed should be autodetected and appropriate compensation taken.

This patch series effects the above changes by a newly and better designed
IOC4 core driver with which the function-level drivers can register and
deregister themselves upon module insertion/removal.  By tracking these
modules, device addition/removal is also handled properly.  PCI resource
management and ownership issues are centralized in this core driver, and
IOC4-wide configuration actions such as bus speed detection are also
handled in this core driver.

This patch:

The SGI IOC4 I/O controller chip implements multiple functions, though it is
not a multi-function PCI device.  Additionally, various PCI resources of the
IOC4 are shared by multiple hardware functions, and thus resource ownership by
driver is not clearly delineated.  Due to the current driver design, all core
and subordinate drivers must be loaded, or none, which is undesirable if not
all IOC4 hardware features are being used.

This patch reorganizes the IOC4 drivers so that the core driver provides a
subdriver registration service.  Through appropriate callbacks the subdrivers
can now handle device addition and removal, as well as module insertion and
deletion (though the IOC4 IDE driver requires further work before module
deletion will work).  The core driver now takes care of allocating PCI
resources and data which must be shared between subdrivers, to clearly
delineate module ownership of these items.

Signed-off-by: Brent Casavant <bcasavan@sgi.com>
Acked-by: Pat Gefre <pfg@sgi.com
Acked-by: Jeremy Higdon <jeremy@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ioc4.h        | 156 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ioc4_common.h |  21 ------
 2 files changed, 156 insertions(+), 21 deletions(-)
 create mode 100644 include/linux/ioc4.h
 delete mode 100644 include/linux/ioc4_common.h

(limited to 'include/linux')

diff --git a/include/linux/ioc4.h b/include/linux/ioc4.h
new file mode 100644
index 000000000000..729bfa4c4ac5
--- /dev/null
+++ b/include/linux/ioc4.h
@@ -0,0 +1,156 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#ifndef _LINUX_IOC4_H
+#define _LINUX_IOC4_H
+
+#include <linux/interrupt.h>
+
+/***********************************
+ * Structures needed by subdrivers *
+ ***********************************/
+
+/* This structure fully describes the IOC4 miscellaneous registers which
+ * appear at bar[0]+0x00000 through bar[0]+0x0005c.  The corresponding
+ * PCI resource is managed by the main IOC4 driver because it contains
+ * registers of interest to many different IOC4 subdrivers.
+ */
+struct ioc4_misc_regs {
+	/* Miscellaneous IOC4 registers */
+	union ioc4_pci_err_addr_l {
+		uint32_t raw;
+		struct {
+			uint32_t valid:1;	/* Address captured */
+			uint32_t master_id:4;	/* Unit causing error
+						 * 0/1: Serial port 0 TX/RX
+						 * 2/3: Serial port 1 TX/RX
+						 * 4/5: Serial port 2 TX/RX
+						 * 6/7: Serial port 3 TX/RX
+						 * 8: ATA/ATAPI
+						 * 9-15: Undefined
+						 */
+			uint32_t mul_err:1;	/* Multiple errors occurred */
+			uint32_t addr:26;	/* Bits 31-6 of error addr */
+		} fields;
+	} pci_err_addr_l;
+	uint32_t pci_err_addr_h;	/* Bits 63-32 of error addr */
+	union ioc4_sio_int {
+		uint32_t raw;
+		struct {
+			uint8_t tx_mt:1;	/* TX ring buffer empty */
+			uint8_t rx_full:1;	/* RX ring buffer full */
+			uint8_t rx_high:1;	/* RX high-water exceeded */
+			uint8_t rx_timer:1;	/* RX timer has triggered */
+			uint8_t delta_dcd:1;	/* DELTA_DCD seen */
+			uint8_t delta_cts:1;	/* DELTA_CTS seen */
+			uint8_t intr_pass:1;	/* Interrupt pass-through */
+			uint8_t tx_explicit:1;	/* TX, MCW, or delay complete */
+		} fields[4];
+	} sio_ir;		/* Serial interrupt state */
+	union ioc4_other_int {
+		uint32_t raw;
+		struct {
+			uint32_t ata_int:1;	/* ATA port passthru */
+			uint32_t ata_memerr:1;	/* ATA halted by mem error */
+			uint32_t memerr:4;	/* Serial halted by mem err */
+			uint32_t kbd_int:1;	/* kbd/mouse intr asserted */
+			uint32_t reserved:16;	/* zero */
+			uint32_t rt_int:1;	/* INT_OUT section latch */
+			uint32_t gen_int:8;	/* Intr. from generic pins */
+		} fields;
+	} other_ir;		/* Other interrupt state */
+	union ioc4_sio_int sio_ies;	/* Serial interrupt enable set */
+	union ioc4_other_int other_ies;	/* Other interrupt enable set */
+	union ioc4_sio_int sio_iec;	/* Serial interrupt enable clear */
+	union ioc4_other_int other_iec;	/* Other interrupt enable clear */
+	union ioc4_sio_cr {
+		uint32_t raw;
+		struct {
+			uint32_t cmd_pulse:4;	/* Bytebus strobe width */
+			uint32_t arb_diag:3;	/* PCI bus requester */
+			uint32_t sio_diag_idle:1;	/* Active ser req? */
+			uint32_t ata_diag_idle:1;	/* Active ATA req? */
+			uint32_t ata_diag_active:1;	/* ATA req is winner */
+			uint32_t reserved:22;	/* zero */
+		} fields;
+	} sio_cr;
+	uint32_t unused1;
+	union ioc4_int_out {
+		uint32_t raw;
+		struct {
+			uint32_t count:16;	/* Period control */
+			uint32_t mode:3;	/* Output signal shape */
+			uint32_t reserved:11;	/* zero */
+			uint32_t diag:1;	/* Timebase control */
+			uint32_t int_out:1;	/* Current value */
+		} fields;
+	} int_out;		/* External interrupt output control */
+	uint32_t unused2;
+	union ioc4_gpcr {
+		uint32_t raw;
+		struct {
+			uint32_t dir:8;	/* Pin direction */
+			uint32_t edge:8;	/* Edge/level mode */
+			uint32_t reserved1:4;	/* zero */
+			uint32_t int_out_en:1;	/* INT_OUT enable */
+			uint32_t reserved2:11;	/* zero */
+		} fields;
+	} gpcr_s;		/* Generic PIO control set */
+	union ioc4_gpcr gpcr_c;	/* Generic PIO control clear */
+	union ioc4_gpdr {
+		uint32_t raw;
+		struct {
+			uint32_t gen_pin:8;	/* State of pins */
+			uint32_t reserved:24;
+		} fields;
+	} gpdr;			/* Generic PIO data */
+	uint32_t unused3;
+	union ioc4_gppr {
+		uint32_t raw;
+		struct {
+			uint32_t gen_pin:1;	/* Single pin state */
+			uint32_t reserved:31;
+		} fields;
+	} gppr[8];		/* Generic PIO pins */
+};
+
+/* One of these per IOC4
+ *
+ * The idd_serial_data field is present here, even though it's used
+ * solely by the serial subdriver, because the main IOC4 module
+ * properly owns pci_{get,set}_drvdata functionality.  This field
+ * allows that subdriver to stash its own drvdata somewhere.
+ */
+struct ioc4_driver_data {
+	struct list_head idd_list;
+	unsigned long idd_bar0;
+	struct pci_dev *idd_pdev;
+	const struct pci_device_id *idd_pci_id;
+	struct __iomem ioc4_misc_regs *idd_misc_regs;
+	void *idd_serial_data;
+};
+
+/* One per submodule */
+struct ioc4_submodule {
+	struct list_head is_list;
+	char *is_name;
+	struct module *is_owner;
+	int (*is_probe) (struct ioc4_driver_data *);
+	int (*is_remove) (struct ioc4_driver_data *);
+};
+
+#define IOC4_NUM_CARDS		8	/* max cards per partition */
+
+/**********************************
+ * Functions needed by submodules *
+ **********************************/
+
+extern int ioc4_register_submodule(struct ioc4_submodule *);
+extern void ioc4_unregister_submodule(struct ioc4_submodule *);
+
+#endif				/* _LINUX_IOC4_H */
diff --git a/include/linux/ioc4_common.h b/include/linux/ioc4_common.h
deleted file mode 100644
index b03bcc46df55..000000000000
--- a/include/linux/ioc4_common.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-#ifndef _LINUX_IOC4_COMMON_H
-#define _LINUX_IOC4_COMMON_H
-
-/* prototypes */
-
-int ioc4_serial_init(void);
-
-int ioc4_serial_attach_one(struct pci_dev *pdev, const struct
-				pci_device_id *pci_id);
-int ioc4_ide_attach_one(struct pci_dev *pdev, const struct
-				pci_device_id *pci_id);
-
-#endif	/* _LINUX_IOC4_COMMON_H */
-- 
cgit v1.2.2


From d4c477ca5448f19afaaf6c0cfd655009ea9e614d Mon Sep 17 00:00:00 2001
From: Brent Casavant <bcasavan@sgi.com>
Date: Tue, 21 Jun 2005 17:16:01 -0700
Subject: [PATCH] ioc4: PCI bus speed detection

Several hardware features of SGI's IOC4 I/O controller chip require
timing-related driver calculations dependent upon the PCI bus speed.  This
patch enables the core IOC4 driver code to detect the actual bus speed and
store a value that can later be used by the IOC4 subdrivers as needed.

Signed-off-by: Brent Casavant <bcasavan@sgi.com>
Acked-by: Pat Gefre <pfg@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ioc4.h | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ioc4.h b/include/linux/ioc4.h
index 729bfa4c4ac5..3dd18b785ebd 100644
--- a/include/linux/ioc4.h
+++ b/include/linux/ioc4.h
@@ -11,6 +11,14 @@
 
 #include <linux/interrupt.h>
 
+/***************
+ * Definitions *
+ ***************/
+
+/* Miscellaneous values inherent to hardware */
+
+#define IOC4_EXTINT_COUNT_DIVISOR 520	/* PCI clocks per COUNT tick */
+
 /***********************************
  * Structures needed by subdrivers *
  ***********************************/
@@ -119,19 +127,34 @@ struct ioc4_misc_regs {
 	} gppr[8];		/* Generic PIO pins */
 };
 
-/* One of these per IOC4
- *
- * The idd_serial_data field is present here, even though it's used
- * solely by the serial subdriver, because the main IOC4 module
- * properly owns pci_{get,set}_drvdata functionality.  This field
- * allows that subdriver to stash its own drvdata somewhere.
- */
+/* Masks for GPCR DIR pins */
+#define IOC4_GPCR_DIR_0 0x01	/* External interrupt output */
+#define IOC4_GPCR_DIR_1 0x02	/* External interrupt input */
+#define IOC4_GPCR_DIR_2 0x04
+#define IOC4_GPCR_DIR_3 0x08	/* Keyboard/mouse presence */
+#define IOC4_GPCR_DIR_4 0x10	/* Ser. port 0 xcvr select (0=232, 1=422) */
+#define IOC4_GPCR_DIR_5 0x20	/* Ser. port 1 xcvr select (0=232, 1=422) */
+#define IOC4_GPCR_DIR_6 0x40	/* Ser. port 2 xcvr select (0=232, 1=422) */
+#define IOC4_GPCR_DIR_7 0x80	/* Ser. port 3 xcvr select (0=232, 1=422) */
+
+/* Masks for GPCR EDGE pins */
+#define IOC4_GPCR_EDGE_0 0x01
+#define IOC4_GPCR_EDGE_1 0x02	/* External interrupt input */
+#define IOC4_GPCR_EDGE_2 0x04
+#define IOC4_GPCR_EDGE_3 0x08
+#define IOC4_GPCR_EDGE_4 0x10
+#define IOC4_GPCR_EDGE_5 0x20
+#define IOC4_GPCR_EDGE_6 0x40
+#define IOC4_GPCR_EDGE_7 0x80
+
+/* One of these per IOC4 */
 struct ioc4_driver_data {
 	struct list_head idd_list;
 	unsigned long idd_bar0;
 	struct pci_dev *idd_pdev;
 	const struct pci_device_id *idd_pci_id;
 	struct __iomem ioc4_misc_regs *idd_misc_regs;
+	unsigned long count_period;
 	void *idd_serial_data;
 };
 
-- 
cgit v1.2.2


From dbce706e2550253c5ab6043f4f5dfde0cd02470f Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Tue, 21 Jun 2005 17:16:19 -0700
Subject: [PATCH] uml: add and use generic hw_controller_type->release

With Chris Wedgwood <cw@f00f.org>

Currently UML must explicitly call the UML-specific
free_irq_by_irq_and_dev() for each free_irq call it's done.

This is needed because ->shutdown and/or ->disable are only called when the
last "action" for that irq is removed.

Instead, for UML shared IRQs (UML IRQs are very often, if not always,
shared), for each dev_id some setup is done, which must be cleared on the
release of that fd.  For instance, for each open console a new instance
(i.e.  new dev_id) of the same IRQ is requested().

Exactly, a fd is stored in an array (pollfds), which is after read by a
host thread and passed to poll().  Each event registered by poll() triggers
an interrupt.  So, for each free_irq() we must remove the corresponding
host fd from the table, which we do via this -release() method.

In this patch we add an appropriate hook for this, and remove all uses of
it by pointing the hook to the said procedure; this is safe to do since the
said procedure.

Also some cosmetic improvements are included.

This is heavily based on some work by Chris Wedgwood, which however didn't
get the patch merged for something I'd call a "misunderstanding" (the need
for this patch wasn't cleanly explained, thus adding the generic hook was
felt as undesirable).

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
CC: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index c3ff4d101667..b68ad80e2464 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -47,6 +47,7 @@ struct hw_interrupt_type {
 	void (*ack)(unsigned int irq);
 	void (*end)(unsigned int irq);
 	void (*set_affinity)(unsigned int irq, cpumask_t dest);
+	void (*release)(unsigned int irq, void *dev_id);
 };
 
 typedef struct hw_interrupt_type  hw_irq_controller;
-- 
cgit v1.2.2


From b77d6adc922b8bbf8b16b67f567958c42962cf88 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Tue, 21 Jun 2005 17:16:24 -0700
Subject: [PATCH] uml: make hw_controller_type->release exist only for archs
 needing it

With Chris Wedgwood <cw@f00f.org>

As suggested by Chris, we can make the "just added" method ->release
conditional to UML only (better: to archs requesting it, i.e.  only UML
currently), so that other archs don't get this unneeded crud, and if UML
won't need it any more we can kill this.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
CC: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b68ad80e2464..7fc1022be9ee 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -47,7 +47,10 @@ struct hw_interrupt_type {
 	void (*ack)(unsigned int irq);
 	void (*end)(unsigned int irq);
 	void (*set_affinity)(unsigned int irq, cpumask_t dest);
+	/* Currently used only by UML, might disappear one day.*/
+#ifdef CONFIG_IRQ_RELEASE_METHOD
 	void (*release)(unsigned int irq, void *dev_id);
+#endif
 };
 
 typedef struct hw_interrupt_type  hw_irq_controller;
-- 
cgit v1.2.2


From 8a96619145840c6eb50d85759f72d9337db411f7 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 21 Jun 2005 17:16:41 -0700
Subject: [PATCH] autofs4: subversion bump to identify these changes

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/auto_fs4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/auto_fs4.h b/include/linux/auto_fs4.h
index a1657fb99516..9343c89d843c 100644
--- a/include/linux/auto_fs4.h
+++ b/include/linux/auto_fs4.h
@@ -23,7 +23,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	3
 #define AUTOFS_MAX_PROTO_VERSION	4
 
-#define AUTOFS_PROTO_SUBVERSION		6
+#define AUTOFS_PROTO_SUBVERSION		7
 
 /* Mask for expire behaviour */
 #define AUTOFS_EXP_IMMEDIATE		1
-- 
cgit v1.2.2


From f5a9951c94e7a285a3d00648e3d790a7f016bd11 Mon Sep 17 00:00:00 2001
From: James Simmons <jsimmons@pentafluge.infradead.org>
Date: Tue, 21 Jun 2005 17:16:58 -0700
Subject: [PATCH] fbdev: iomove removal

Since no one is using the inbuf, outbuf of struct fb_pixmap I removed their
use in the framebuffer console.  The idea is instead move the pixmap
functionality below the accelerated functions intead of on top as the way
it is now.  If there is no objection please apply.  This is against Linus
latestr GIT tree.  Thank you.

Signed-off-by: James Simmons <jsimmons@www.infradead.org>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fb.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index b468bf496547..e14942805dbd 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -524,11 +524,11 @@ struct fb_pixmap {
 	u32 offset;		/* current offset to buffer		*/
 	u32 buf_align;		/* byte alignment of each bitmap	*/
 	u32 scan_align;		/* alignment per scanline		*/
-	u32 access_align;	/* alignment per read/write		*/
+	u32 access_align;	/* alignment per read/write (bits)	*/
 	u32 flags;		/* see FB_PIXMAP_*			*/
 	/* access methods */
-	void (*outbuf)(struct fb_info *info, u8 *addr, u8 *src, unsigned int size);
-	u8   (*inbuf) (struct fb_info *info, u8 *addr);
+	void (*writeio)(struct fb_info *info, void __iomem *dst, void *src, unsigned int size);
+	void (*readio) (struct fb_info *info, void *dst, void __iomem *src, unsigned int size);
 };
 
 
@@ -816,12 +816,6 @@ extern int unregister_framebuffer(struct fb_info *fb_info);
 extern int fb_prepare_logo(struct fb_info *fb_info);
 extern int fb_show_logo(struct fb_info *fb_info);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
-extern void fb_iomove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf,
-				u8 *dst, u32 d_pitch, u8 *src, u32 idx,
-				u32 height, u32 shift_high, u32 shift_low, u32 mod);
-extern void fb_iomove_buf_aligned(struct fb_info *info, struct fb_pixmap *buf,
-				u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch,
-				u32 height);
 extern void fb_sysmove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf,
 				u8 *dst, u32 d_pitch, u8 *src, u32 idx,
 				u32 height, u32 shift_high, u32 shift_low, u32 mod);
-- 
cgit v1.2.2


From 1154ea7dcd8eed758fb5ec47393a79d5a1f0bc43 Mon Sep 17 00:00:00 2001
From: Jaya Kumar <jayalk@intworks.biz>
Date: Tue, 21 Jun 2005 17:17:04 -0700
Subject: [PATCH] Framebuffer driver for Arc LCD board

Add support for the Arc monochrome LCD board.

The board uses KS108 controllers to drive individual 64x64 LCD matrices.
The board can be paneled in a variety of setups such as 2x1=128x64,
4x4=256x256 and so on.  The board/host interface is through GPIO.

Signed-off-by: Jaya Kumar <jayalk@intworks.biz>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: <linux-fbdev-devel@lists.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/arcfb.h | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 include/linux/arcfb.h

(limited to 'include/linux')

diff --git a/include/linux/arcfb.h b/include/linux/arcfb.h
new file mode 100644
index 000000000000..721e7654daeb
--- /dev/null
+++ b/include/linux/arcfb.h
@@ -0,0 +1,8 @@
+#ifndef __LINUX_ARCFB_H__
+#define __LINUX_ARCFB_H__
+
+#define FBIO_WAITEVENT		_IO('F', 0x88)
+#define FBIO_GETCONTROL2	_IOR('F', 0x89, size_t)
+
+#endif
+
-- 
cgit v1.2.2


From d5881eb4883ef7dd28a4dcea237714817c4b2f8e Mon Sep 17 00:00:00 2001
From: James Simmons <jsimmons@www.infradead.org>
Date: Tue, 21 Jun 2005 17:17:05 -0700
Subject: [PATCH] fbdev: new pci id for chipsfb

Patch adds pci ID for CT 69000 chipset.

Signed-off-by: James Simmons <jsimmons@www.infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b8b4ebf9abf1..63e89e47b8e9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -575,6 +575,7 @@
 #define PCI_DEVICE_ID_CT_65550		0x00e0
 #define PCI_DEVICE_ID_CT_65554		0x00e4
 #define PCI_DEVICE_ID_CT_65555		0x00e5
+#define PCI_DEVICE_ID_CT_69000		0x00c0
 
 #define PCI_VENDOR_ID_MIRO		0x1031
 #define PCI_DEVICE_ID_MIRO_36050	0x5601
-- 
cgit v1.2.2


From 303b86d9913eca0cbfc3c5cb41e7006f6e13b755 Mon Sep 17 00:00:00 2001
From: Jurriaan <thunder7@xs4all.nl>
Date: Tue, 21 Jun 2005 17:17:06 -0700
Subject: [PATCH] New framebuffer fonts + updated 12x22 font available

Improve the fonts for use with the framebuffer.

I've added all the characters marked 'FIXME' in the sun12x22 font and
created a 10x18 font (based on the sun12x22 font) and a 7x14 font (based
on the vga8x16 font).

This patch is non-intrusive, no options are enabled by default so most
users won't notice a thing.

I am placing my changes under the GPL, however, I've not seen any copyright
notices on the sun12x22 font and the vga8x16 font which I derived my new
fonts from so I don't know what the copyright status is.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/font.h | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/font.h b/include/linux/font.h
index fc2d690c9d5f..8fc80a7d78ac 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -25,19 +25,23 @@ struct font_desc {
 #define VGA8x16_IDX	1
 #define PEARL8x8_IDX	2
 #define VGA6x11_IDX	3
-#define SUN8x16_IDX	4
-#define SUN12x22_IDX	5
-#define ACORN8x8_IDX	6
-#define	MINI4x6_IDX	7
+#define FONT7x14_IDX	4
+#define	FONT10x18_IDX	5
+#define SUN8x16_IDX	6
+#define SUN12x22_IDX	7
+#define ACORN8x8_IDX	8
+#define	MINI4x6_IDX	9
 
 extern struct font_desc	font_vga_8x8,
-				font_vga_8x16,
-				font_pearl_8x8,
-				font_vga_6x11,
-				font_sun_8x16,
-				font_sun_12x22,
-				font_acorn_8x8,
-				font_mini_4x6;
+			font_vga_8x16,
+			font_pearl_8x8,
+			font_vga_6x11,
+			font_7x14,
+			font_10x18,
+			font_sun_8x16,
+			font_sun_12x22,
+			font_acorn_8x8,
+			font_mini_4x6;
 
 /* Find a font with a specific name */
 
-- 
cgit v1.2.2


From f1ab5dac251bb4514607918b0019a3b3f5f5fb48 Mon Sep 17 00:00:00 2001
From: James Simmons <jsimmons@pentafluge.infradead.org>
Date: Tue, 21 Jun 2005 17:17:07 -0700
Subject: [PATCH] fbdev: stack reduction

Shrink the stack when calling the drawing alignment functions.

Signed-off-by: James Simmons <jsimmons@www.infradead.org>
Cc: "Antonino A. Daplas" <adaplas@hotpop.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fb.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index e14942805dbd..bc24beeed971 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -816,12 +816,9 @@ extern int unregister_framebuffer(struct fb_info *fb_info);
 extern int fb_prepare_logo(struct fb_info *fb_info);
 extern int fb_show_logo(struct fb_info *fb_info);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
-extern void fb_sysmove_buf_unaligned(struct fb_info *info, struct fb_pixmap *buf,
-				u8 *dst, u32 d_pitch, u8 *src, u32 idx,
+extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx,
 				u32 height, u32 shift_high, u32 shift_low, u32 mod);
-extern void fb_sysmove_buf_aligned(struct fb_info *info, struct fb_pixmap *buf,
-				u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch,
-				u32 height);
+extern void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height);
 extern void fb_set_suspend(struct fb_info *info, int state);
 extern int fb_get_color_depth(struct fb_var_screeninfo *var);
 extern int fb_get_options(char *name, char **option);
-- 
cgit v1.2.2


From 06d91a5fe0b50c9060e70bdf7786f8a3c66249db Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:12 -0700
Subject: [PATCH] md: improve locking on 'safemode' and move superblock writes

When md marks the superblock dirty before a write, it calls
generic_make_request (to write the superblock) from within
generic_make_request (to write the first dirty block), which could cause
problems later.

With this patch, the superblock write is always done by the helper thread, and
write request are delayed until that write completes.

Also, the locking around marking the array dirty and writing the superblock is
improved to avoid possible races.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md.h   | 2 +-
 include/linux/raid/md_k.h | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index a6a67d102bfa..cfde8f497d6d 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
 extern void md_unregister_thread (mdk_thread_t *thread);
 extern void md_wakeup_thread(mdk_thread_t *thread);
 extern void md_check_recovery(mddev_t *mddev);
-extern void md_write_start(mddev_t *mddev);
+extern int md_write_start(mddev_t *mddev, struct bio *bi);
 extern void md_write_end(mddev_t *mddev);
 extern void md_handle_safemode(mddev_t *mddev);
 extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index c9a0d4013be7..d92db54255a3 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -15,6 +15,9 @@
 #ifndef _MD_K_H
 #define _MD_K_H
 
+/* and dm-bio-list.h is not under include/linux because.... ??? */
+#include "../../../drivers/md/dm-bio-list.h"
+
 #define MD_RESERVED       0UL
 #define LINEAR            1UL
 #define RAID0             2UL
@@ -252,6 +255,10 @@ struct mddev_s
 	atomic_t			recovery_active; /* blocks scheduled, but not written */
 	wait_queue_head_t		recovery_wait;
 	sector_t			recovery_cp;
+
+	spinlock_t			write_lock;
+	struct bio_list			write_list;
+
 	unsigned int			safemode;	/* if set, update "clean" superblock
 							 * when no writes pending.
 							 */ 
-- 
cgit v1.2.2


From 57afd89f98a990747445f01c458ecae64263b2f8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:13 -0700
Subject: [PATCH] md: improve the interface to sync_request

1/ change the return value (which is number-of-sectors synced)
 from 'int' to 'sector_t'.
 The number of sectors is usually easily small enough to fit
 in an int, but if resync needs to abort, it may want to return
 the total number of remaining sectors, which could be large.
 Also errors cannot be returned as negative numbers now, so use
 0 instead
2/ Add a 'skipped' return parameter to allow the array to report
 that it skipped the sectors.  This allows md to take this into account
 in the speed calculations.
 Currently there is no important skipping, but the bitmap-based-resync
 that is coming will use this.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md_k.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index d92db54255a3..bce0032decff 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -298,7 +298,7 @@ struct mdk_personality_s
 	int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
 	int (*hot_remove_disk) (mddev_t *mddev, int number);
 	int (*spare_active) (mddev_t *mddev);
-	int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster);
+	sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
 	int (*resize) (mddev_t *mddev, sector_t sectors);
 	int (*reshape) (mddev_t *mddev, int raid_disks);
 	int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
-- 
cgit v1.2.2


From 32a7627cf3a35396a8e834faf34e38ae9f3b1309 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:14 -0700
Subject: [PATCH] md: optimised resync using Bitmap based intent logging

With this patch, the intent to write to some block in the array can be logged
to a bitmap file.  Each bit represents some number of sectors and is set
before any update happens, and only cleared when all writes relating to all
sectors are complete.

After an unclean shutdown, information in this bitmap can be used to optimise
resync - only sectors which could be out-of-sync need to be updated.

Also if a drive is removed and then added back into an array, the recovery can
make use of the bitmap to optimise reconstruction.  This is not implemented in
this patch.

Currently the bitmap is stored in a file which must (obviously) be stored on a
separate device.

The patch only provided infrastructure.  It does not update any personalities
to bitmap intent logging.

Md arrays can still be used with no bitmap file.  This patch has minimal
impact on such arrays.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/bitmap.h | 280 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/raid/md_k.h   |   4 +
 include/linux/raid/md_u.h   |   7 ++
 3 files changed, 291 insertions(+)
 create mode 100644 include/linux/raid/bitmap.h

(limited to 'include/linux')

diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
new file mode 100644
index 000000000000..f785cf26cbad
--- /dev/null
+++ b/include/linux/raid/bitmap.h
@@ -0,0 +1,280 @@
+/*
+ * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
+ *
+ * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
+ */
+#ifndef BITMAP_H
+#define BITMAP_H 1
+
+#define BITMAP_MAJOR 3
+#define BITMAP_MINOR 38
+
+/*
+ * in-memory bitmap:
+ *
+ * Use 16 bit block counters to track pending writes to each "chunk".
+ * The 2 high order bits are special-purpose, the first is a flag indicating
+ * whether a resync is needed.  The second is a flag indicating whether a
+ * resync is active.
+ * This means that the counter is actually 14 bits:
+ *
+ * +--------+--------+------------------------------------------------+
+ * | resync | resync |               counter                          |
+ * | needed | active |                                                |
+ * |  (0-1) |  (0-1) |              (0-16383)                         |
+ * +--------+--------+------------------------------------------------+
+ *
+ * The "resync needed" bit is set when:
+ *    a '1' bit is read from storage at startup.
+ *    a write request fails on some drives
+ *    a resync is aborted on a chunk with 'resync active' set
+ * It is cleared (and resync-active set) when a resync starts across all drives
+ * of the chunk.
+ *
+ *
+ * The "resync active" bit is set when:
+ *    a resync is started on all drives, and resync_needed is set.
+ *       resync_needed will be cleared (as long as resync_active wasn't already set).
+ * It is cleared when a resync completes.
+ *
+ * The counter counts pending write requests, plus the on-disk bit.
+ * When the counter is '1' and the resync bits are clear, the on-disk
+ * bit can be cleared aswell, thus setting the counter to 0.
+ * When we set a bit, or in the counter (to start a write), if the fields is
+ * 0, we first set the disk bit and set the counter to 1.
+ *
+ * If the counter is 0, the on-disk bit is clear and the stipe is clean
+ * Anything that dirties the stipe pushes the counter to 2 (at least)
+ * and sets the on-disk bit (lazily).
+ * If a periodic sweep find the counter at 2, it is decremented to 1.
+ * If the sweep find the counter at 1, the on-disk bit is cleared and the
+ * counter goes to zero.
+ *
+ * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
+ * counters as a fallback when "page" memory cannot be allocated:
+ *
+ * Normal case (page memory allocated):
+ *
+ *     page pointer (32-bit)
+ *
+ *     [ ] ------+
+ *               |
+ *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
+ *                          c1   c2    c2048
+ *
+ * Hijacked case (page memory allocation failed):
+ *
+ *     hijacked page pointer (32-bit)
+ *
+ *     [		  ][		  ] (no page memory allocated)
+ *      counter #1 (16-bit) counter #2 (16-bit)
+ *
+ */
+
+#ifdef __KERNEL__
+
+#define PAGE_BITS (PAGE_SIZE << 3)
+#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
+
+typedef __u16 bitmap_counter_t;
+#define COUNTER_BITS 16
+#define COUNTER_BIT_SHIFT 4
+#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
+#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
+
+#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
+#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
+#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
+#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
+#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
+#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
+
+/* how many counters per page? */
+#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
+/* same, except a shift value for more efficient bitops */
+#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
+/* same, except a mask value for more efficient bitops */
+#define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
+
+#define BITMAP_BLOCK_SIZE 512
+#define BITMAP_BLOCK_SHIFT 9
+
+/* how many blocks per chunk? (this is variable) */
+#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
+
+/* when hijacked, the counters and bits represent even larger "chunks" */
+/* there will be 1024 chunks represented by each counter in the page pointers */
+#define PAGEPTR_BLOCK_RATIO(bitmap) \
+			(CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
+#define PAGEPTR_BLOCK_SHIFT(bitmap) \
+			(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
+#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
+
+/*
+ * on-disk bitmap:
+ *
+ * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
+ * file a page at a time. There's a superblock at the start of the file.
+ */
+
+/* map chunks (bits) to file pages - offset by the size of the superblock */
+#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
+
+#endif
+
+/*
+ * bitmap structures:
+ */
+
+#define BITMAP_MAGIC 0x6d746962
+
+/* use these for bitmap->flags and bitmap->sb->state bit-fields */
+enum bitmap_state {
+	BITMAP_ACTIVE = 0x001, /* the bitmap is in use */
+	BITMAP_STALE  = 0x002  /* the bitmap file is out of date or had -EIO */
+};
+
+/* the superblock at the front of the bitmap file -- little endian */
+typedef struct bitmap_super_s {
+	__u32 magic;        /*  0  BITMAP_MAGIC */
+	__u32 version;      /*  4  the bitmap major for now, could change... */
+	__u8  uuid[16];     /*  8  128 bit uuid - must match md device uuid */
+	__u64 events;       /* 24  event counter for the bitmap (1)*/
+	__u64 events_cleared;/*32  event counter when last bit cleared (2) */
+	__u64 sync_size;    /* 40  the size of the md device's sync range(3) */
+	__u32 state;        /* 48  bitmap state information */
+	__u32 chunksize;    /* 52  the bitmap chunk size in bytes */
+	__u32 daemon_sleep; /* 56  seconds between disk flushes */
+
+	__u8  pad[256 - 60]; /* set to zero */
+} bitmap_super_t;
+
+/* notes:
+ * (1) This event counter is updated before the eventcounter in the md superblock
+ *    When a bitmap is loaded, it is only accepted if this event counter is equal
+ *    to, or one greater than, the event counter in the superblock.
+ * (2) This event counter is updated when the other one is *if*and*only*if* the
+ *    array is not degraded.  As bits are not cleared when the array is degraded,
+ *    this represents the last time that any bits were cleared.
+ *    If a device is being added that has an event count with this value or
+ *    higher, it is accepted as conforming to the bitmap.
+ * (3)This is the number of sectors represented by the bitmap, and is the range that
+ *    resync happens across.  For raid1 and raid5/6 it is the size of individual
+ *    devices.  For raid10 it is the size of the array.
+ */
+
+#ifdef __KERNEL__
+
+/* the in-memory bitmap is represented by bitmap_pages */
+struct bitmap_page {
+	/*
+	 * map points to the actual memory page
+	 */
+	char *map;
+	/*
+	 * in emergencies (when map cannot be alloced), hijack the map
+	 * pointer and use it as two counters itself
+	 */
+	unsigned int hijacked:1;
+	/*
+	 * count of dirty bits on the page
+	 */
+	unsigned int  count:31;
+};
+
+/* keep track of bitmap file pages that have pending writes on them */
+struct page_list {
+	struct list_head list;
+	struct page *page;
+};
+
+/* the main bitmap structure - one per mddev */
+struct bitmap {
+	struct bitmap_page *bp;
+	unsigned long pages; /* total number of pages in the bitmap */
+	unsigned long missing_pages; /* number of pages not yet allocated */
+
+	mddev_t *mddev; /* the md device that the bitmap is for */
+
+	int counter_bits; /* how many bits per block counter */
+
+	/* bitmap chunksize -- how much data does each bit represent? */
+	unsigned long chunksize;
+	unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
+	unsigned long chunks; /* total number of data chunks for the array */
+
+	/* We hold a count on the chunk currently being synced, and drop
+	 * it when the last block is started.  If the resync is aborted
+	 * midway, we need to be able to drop that count, so we remember
+	 * the counted chunk..
+	 */
+	unsigned long syncchunk;
+
+	__u64	events_cleared;
+
+	/* bitmap spinlock */
+	spinlock_t lock;
+
+	struct file *file; /* backing disk file */
+	struct page *sb_page; /* cached copy of the bitmap file superblock */
+	struct page **filemap; /* list of cache pages for the file */
+	unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
+	unsigned long file_pages; /* number of pages in the file */
+
+	unsigned long flags;
+
+	/*
+	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
+	 * file, cleaning up bits and flushing out pages to disk as necessary
+	 */
+	unsigned long daemon_lastrun; /* jiffies of last run */
+	unsigned long daemon_sleep; /* how many seconds between updates? */
+
+	/*
+	 * bitmap write daemon - this daemon performs writes to the bitmap file
+	 * this thread is only needed because of a limitation in ext3 (jbd)
+	 * that does not allow a task to have two journal transactions ongoing
+	 * simultaneously (even if the transactions are for two different
+	 * filesystems) -- in the case of bitmap, that would be the filesystem
+	 * that the bitmap file resides on and the filesystem that is mounted
+	 * on the md device -- see current->journal_info in jbd/transaction.c
+	 */
+	mdk_thread_t *writeback_daemon;
+	spinlock_t write_lock;
+	struct semaphore write_ready;
+	struct semaphore write_done;
+	unsigned long writes_pending;
+	wait_queue_head_t write_wait;
+	struct list_head write_pages;
+	struct list_head complete_pages;
+	mempool_t *write_pool;
+};
+
+/* the bitmap API */
+
+/* these are used only by md/bitmap */
+int  bitmap_create(mddev_t *mddev);
+void bitmap_destroy(mddev_t *mddev);
+int  bitmap_active(struct bitmap *bitmap);
+
+char *file_path(struct file *file, char *buf, int count);
+void bitmap_print_sb(struct bitmap *bitmap);
+int bitmap_update_sb(struct bitmap *bitmap);
+
+int  bitmap_setallbits(struct bitmap *bitmap);
+
+/* these are exported */
+int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
+void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
+		     int success);
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks);
+void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
+void bitmap_close_sync(struct bitmap *bitmap);
+
+int bitmap_unplug(struct bitmap *bitmap);
+int bitmap_daemon_work(struct bitmap *bitmap);
+#endif
+
+#endif
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index bce0032decff..16e94a9f0f8c 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -267,6 +267,9 @@ struct mddev_s
 	atomic_t			writes_pending; 
 	request_queue_t			*queue;	/* for plugging ... */
 
+	struct bitmap                   *bitmap; /* the bitmap for the device */
+	struct file			*bitmap_file; /* the bitmap file */
+
 	struct list_head		all_mddevs;
 };
 
@@ -341,6 +344,7 @@ typedef struct mdk_thread_s {
 	unsigned long           flags;
 	struct completion	*event;
 	struct task_struct	*tsk;
+	unsigned long		timeout;
 	const char		*name;
 } mdk_thread_t;
 
diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h
index a2df5c2a42af..81da20ccec4d 100644
--- a/include/linux/raid/md_u.h
+++ b/include/linux/raid/md_u.h
@@ -23,6 +23,7 @@
 #define GET_DISK_INFO		_IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
 #define PRINT_RAID_DEBUG	_IO (MD_MAJOR, 0x13)
 #define RAID_AUTORUN		_IO (MD_MAJOR, 0x14)
+#define GET_BITMAP_FILE		_IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t)
 
 /* configuration */
 #define CLEAR_ARRAY		_IO (MD_MAJOR, 0x20)
@@ -36,6 +37,7 @@
 #define HOT_ADD_DISK		_IO (MD_MAJOR, 0x28)
 #define SET_DISK_FAULTY		_IO (MD_MAJOR, 0x29)
 #define HOT_GENERATE_ERROR	_IO (MD_MAJOR, 0x2a)
+#define SET_BITMAP_FILE		_IOW (MD_MAJOR, 0x2b, int)
 
 /* usage */
 #define RUN_ARRAY		_IOW (MD_MAJOR, 0x30, mdu_param_t)
@@ -106,6 +108,11 @@ typedef struct mdu_start_info_s {
 
 } mdu_start_info_t;
 
+typedef struct mdu_bitmap_file_s
+{
+	char pathname[4096];
+} mdu_bitmap_file_t;
+
 typedef struct mdu_param_s
 {
 	int			personality;	/* 1,2,3,4 */
-- 
cgit v1.2.2


From 77ad4bc706fe6c52ab953f31c287a6af712d080c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:21 -0700
Subject: [PATCH] md: enable the bitmap write-back daemon and wait for it.

Currently we don't wait for updates to the bitmap to be flushed to disk
properly.  The infrastructure all there, but it isn't being used....

A separate kernel thread (bitmap_writeback_daemon) is needed to wait for each
page as we cannot get callbacks when a page write completes.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/bitmap.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index f785cf26cbad..cfe60cfc8f3d 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -233,21 +233,12 @@ struct bitmap {
 	unsigned long daemon_sleep; /* how many seconds between updates? */
 
 	/*
-	 * bitmap write daemon - this daemon performs writes to the bitmap file
-	 * this thread is only needed because of a limitation in ext3 (jbd)
-	 * that does not allow a task to have two journal transactions ongoing
-	 * simultaneously (even if the transactions are for two different
-	 * filesystems) -- in the case of bitmap, that would be the filesystem
-	 * that the bitmap file resides on and the filesystem that is mounted
-	 * on the md device -- see current->journal_info in jbd/transaction.c
+	 * bitmap_writeback_daemon waits for file-pages that have been written,
+	 * as there is no way to get a call-back when a page write completes.
 	 */
 	mdk_thread_t *writeback_daemon;
 	spinlock_t write_lock;
-	struct semaphore write_ready;
-	struct semaphore write_done;
-	unsigned long writes_pending;
 	wait_queue_head_t write_wait;
-	struct list_head write_pages;
 	struct list_head complete_pages;
 	mempool_t *write_pool;
 };
-- 
cgit v1.2.2


From 191ea9b2c7cc3ebbe0678834ab710d7d95ad3f9a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:23 -0700
Subject: [PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/raid1.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h
index abbfdd9afe1e..9d93cf12e890 100644
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -36,12 +36,21 @@ struct r1_private_data_s {
 	spinlock_t		device_lock;
 
 	struct list_head	retry_list;
+	/* queue pending writes and submit them on unplug */
+	struct bio_list		pending_bio_list;
+	/* queue of writes that have been unplugged */
+	struct bio_list		flushing_bio_list;
+
 	/* for use when syncing mirrors: */
 
 	spinlock_t		resync_lock;
-	int nr_pending;
-	int barrier;
+	int			nr_pending;
+	int			barrier;
 	sector_t		next_resync;
+	int			fullsync;  /* set to 1 if a full sync is needed,
+					    * (fresh device added).
+					    * Cleared when a sync completes.
+					    */
 
 	wait_queue_head_t	wait_idle;
 	wait_queue_head_t	wait_resume;
@@ -85,14 +94,17 @@ struct r1bio_s {
 	int			read_disk;
 
 	struct list_head	retry_list;
+	struct bitmap_update	*bitmap_update;
 	/*
 	 * if the IO is in WRITE direction, then multiple bios are used.
 	 * We choose the number when they are allocated.
 	 */
 	struct bio		*bios[0];
+	/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
 };
 
 /* bits for r1bio.state */
 #define	R1BIO_Uptodate	0
 #define	R1BIO_IsSync	1
+#define	R1BIO_Degraded	2
 #endif
-- 
cgit v1.2.2


From 41158c7eb22312cfaa256744e1553bb4042ff085 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:25 -0700
Subject: [PATCH] md: optimise reconstruction when re-adding a recently failed
 drive.

When an array is degraded, bit in the intent-bitmap are never cleared.  So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.

This patch adds support for this re-adding.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md_k.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 16e94a9f0f8c..6cdcb4434c6c 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -183,6 +183,10 @@ struct mdk_rdev_s
 
 	int desc_nr;			/* descriptor index in the superblock */
 	int raid_disk;			/* role of device in array */
+	int saved_raid_disk;		/* role that device used to have in the
+					 * array and could again if we did a partial
+					 * resync from the bitmap
+					 */
 
 	atomic_t	nr_pending;	/* number of pending requests.
 					 * only maintained for arrays that
-- 
cgit v1.2.2


From 3d310eb7b3df1252e8595d059d982b0a9825a137 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:26 -0700
Subject: [PATCH] md: fix deadlock due to md thread processing delayed
 requests.

Before completing a 'write' the md superblock might need to be updated.
This is best done by the md_thread.

The current code schedules this up and queues the write request for later
handling by the md_thread.

However some personalities (Raid5/raid6) will deadlock if the md_thread
tries to submit requests to its own array.

So this patch changes things so the processes submitting the request waits
for the superblock to be written and then submits the request itself.

This fixes a recently-created deadlock in raid5/raid6

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md.h   | 2 +-
 include/linux/raid/md_k.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index cfde8f497d6d..75f41d8faed2 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
 extern void md_unregister_thread (mdk_thread_t *thread);
 extern void md_wakeup_thread(mdk_thread_t *thread);
 extern void md_check_recovery(mddev_t *mddev);
-extern int md_write_start(mddev_t *mddev, struct bio *bi);
+extern void md_write_start(mddev_t *mddev, struct bio *bi);
 extern void md_write_end(mddev_t *mddev);
 extern void md_handle_safemode(mddev_t *mddev);
 extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 6cdcb4434c6c..3e977025cf43 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -261,7 +261,7 @@ struct mddev_s
 	sector_t			recovery_cp;
 
 	spinlock_t			write_lock;
-	struct bio_list			write_list;
+	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
 
 	unsigned int			safemode;	/* if set, update "clean" superblock
 							 * when no writes pending.
-- 
cgit v1.2.2


From a654b9d8f851f4ca02649d5825cbe6c608adb10c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:27 -0700
Subject: [PATCH] md: allow md intent bitmap to be stored near the superblock.

This provides an alternate to storing the bitmap in a separate file.  The
bitmap can be stored at a given offset from the superblock.  Obviously the
creator of the array must make sure this doesn't intersect with data....
After is good for version-0.90 superblocks.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/bitmap.h |  2 ++
 include/linux/raid/md.h     | 15 ++++++++++++++-
 include/linux/raid/md_k.h   |  4 ++++
 include/linux/raid/md_p.h   |  7 ++++++-
 4 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index cfe60cfc8f3d..e24b74b11150 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -217,6 +217,7 @@ struct bitmap {
 	/* bitmap spinlock */
 	spinlock_t lock;
 
+	long offset; /* offset from superblock if file is NULL */
 	struct file *file; /* backing disk file */
 	struct page *sb_page; /* cached copy of the bitmap file superblock */
 	struct page **filemap; /* list of cache pages for the file */
@@ -255,6 +256,7 @@ void bitmap_print_sb(struct bitmap *bitmap);
 int bitmap_update_sb(struct bitmap *bitmap);
 
 int  bitmap_setallbits(struct bitmap *bitmap);
+void bitmap_write_all(struct bitmap *bitmap);
 
 /* these are exported */
 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index 75f41d8faed2..ffa316ce4dc8 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -60,7 +60,14 @@
  */
 #define MD_MAJOR_VERSION                0
 #define MD_MINOR_VERSION                90
-#define MD_PATCHLEVEL_VERSION           1
+/*
+ * MD_PATCHLEVEL_VERSION indicates kernel functionality.
+ * >=1 means different superblock formats are selectable using SET_ARRAY_INFO
+ *     and major_version/minor_version accordingly
+ * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
+ *     in the super status byte
+ */
+#define MD_PATCHLEVEL_VERSION           2
 
 extern int register_md_personality (int p_num, mdk_personality_t *p);
 extern int unregister_md_personality (int p_num);
@@ -78,6 +85,12 @@ extern void md_unplug_mddev(mddev_t *mddev);
 
 extern void md_print_devices (void);
 
+extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
+			   sector_t sector, int size, struct page *page);
+extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
+			struct page *page, int rw);
+
+
 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
 
 #endif 
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 3e977025cf43..a3725b57fb7d 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -273,6 +273,10 @@ struct mddev_s
 
 	struct bitmap                   *bitmap; /* the bitmap for the device */
 	struct file			*bitmap_file; /* the bitmap file */
+	long				bitmap_offset; /* offset from superblock of
+							* start of bitmap. May be
+							* negative, but not '0'
+							*/
 
 	struct list_head		all_mddevs;
 };
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 8ba95d67329f..8e592a25a8b5 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -96,6 +96,7 @@ typedef struct mdp_device_descriptor_s {
 #define MD_SB_CLEAN		0
 #define MD_SB_ERRORS		1
 
+#define	MD_SB_BITMAP_PRESENT	8 /* bitmap may be present nearby */
 typedef struct mdp_superblock_s {
 	/*
 	 * Constant generic information
@@ -184,7 +185,7 @@ struct mdp_superblock_1 {
 	/* constant array information - 128 bytes */
 	__u32	magic;		/* MD_SB_MAGIC: 0xa92b4efc - little endian */
 	__u32	major_version;	/* 1 */
-	__u32	feature_map;	/* 0 for now */
+	__u32	feature_map;	/* bit 0 set if 'bitmap_offset' is meaningful */
 	__u32	pad0;		/* always set to 0 when writing */
 
 	__u8	set_uuid[16];	/* user-space generated. */
@@ -197,6 +198,10 @@ struct mdp_superblock_1 {
 
 	__u32	chunksize;	/* in 512byte sectors */
 	__u32	raid_disks;
+	__u32	bitmap_offset;	/* sectors after start of superblock that bitmap starts
+				 * NOTE: signed, so bitmap can be before superblock
+				 * only meaningful of feature_map[0] is set.
+				 */
 	__u8	pad1[128-96];	/* set to 0 when written */
 
 	/* constant this-device information - 64 bytes */
-- 
cgit v1.2.2


From 7bfa19f2748000d646dbdf8f48258cfe1d257b52 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:28 -0700
Subject: [PATCH] md: allow md to update multiple superblocks in parallel.

currently, md updates all superblocks (one on each device) in series.  It
waits for one write to complete before starting the next.  This isn't a big
problem as superblock updates don't happen that often.

However it is neater to do it in parallel, and if the drives in the array have
gone to "sleep" after a period of idleness, then waking them is parallel is
faster (and someone else should be worrying about power drain).

Futher, we will need parallel superblock updates for a future patch which
keeps the intent-logging bitmap near the superblock.

Also remove the silly code that retired superblock updates 100 times.  This
simply never made sense.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md_k.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index a3725b57fb7d..8c14ba565a45 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -262,6 +262,7 @@ struct mddev_s
 
 	spinlock_t			write_lock;
 	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
+	atomic_t			pending_writes;	/* number of active superblock writes */
 
 	unsigned int			safemode;	/* if set, update "clean" superblock
 							 * when no writes pending.
-- 
cgit v1.2.2


From 39730960d94306d7be414e8d54f4e5c071af1278 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Tue, 21 Jun 2005 17:17:28 -0700
Subject: [PATCH] Two small fixes for md verion-1 superblocks.

1/ Must typecast int to (sector_t) before inverting or we
 might not invert enough bits.

2/ When "bitmap_offset" was added to mdp_superblock_1, we didn't increase
   the count of words-used (96 to 100).

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/raid/md_p.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index 8e592a25a8b5..dc65cd435494 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -202,7 +202,7 @@ struct mdp_superblock_1 {
 				 * NOTE: signed, so bitmap can be before superblock
 				 * only meaningful of feature_map[0] is set.
 				 */
-	__u8	pad1[128-96];	/* set to 0 when written */
+	__u8	pad1[128-100];	/* set to 0 when written */
 
 	/* constant this-device information - 64 bytes */
 	__u64	data_offset;	/* sector start of data, often 0 */
-- 
cgit v1.2.2


From b3d5496ea5915fa4848fe307af9f7097f312e932 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 2 Apr 2005 20:31:02 +0200
Subject: [PATCH] I2C: Kill address ranges in non-sensors i2c chip drivers

Some months ago, you killed the address ranges mechanism from all
sensors i2c chip drivers (both the module parameters and the in-code
address lists). I think it was a very good move, as the ranges can
easily be replaced by individual addresses, and this allowed for
significant cleanups in the i2c core (let alone the impressive size
shrink for all these drivers).

Unfortunately you did not do the same for non-sensors i2c chip drivers.
These need the address ranges even less, so we could get rid of the
ranges here as well for another significant i2c core cleanup. Here comes
a patch which does just that. Since the process is exactly the same as
what you did for the other drivers set already, I did not split this one
in parts.

A documentation update is included.

The change saves 308 bytes in the i2c core, and an average 1382 bytes
for chip drivers which use I2C_CLIENT_INSMOD, 126 bytes for those which
do not.

This change is required if we want to merge the sensors and non-sensors
i2c code (and we want to do this).

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

Index: gregkh-2.6/Documentation/i2c/writing-clients
===================================================================
---
 include/linux/i2c.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index ebcd745f4cd6..be837b13f297 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -290,11 +290,8 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
  */
 struct i2c_client_address_data {
 	unsigned short *normal_i2c;
-	unsigned short *normal_i2c_range;
 	unsigned short *probe;
-	unsigned short *probe_range;
 	unsigned short *ignore;
-	unsigned short *ignore_range;
 	unsigned short *force;
 };
 
@@ -563,24 +560,15 @@ union i2c_smbus_data {
 #define I2C_CLIENT_INSMOD \
   I2C_CLIENT_MODULE_PARM(probe, \
                       "List of adapter,address pairs to scan additionally"); \
-  I2C_CLIENT_MODULE_PARM(probe_range, \
-                      "List of adapter,start-addr,end-addr triples to scan " \
-                      "additionally"); \
   I2C_CLIENT_MODULE_PARM(ignore, \
                       "List of adapter,address pairs not to scan"); \
-  I2C_CLIENT_MODULE_PARM(ignore_range, \
-                      "List of adapter,start-addr,end-addr triples not to " \
-                      "scan"); \
   I2C_CLIENT_MODULE_PARM(force, \
                       "List of adapter,address pairs to boldly assume " \
                       "to be present"); \
 	static struct i2c_client_address_data addr_data = {		\
 			.normal_i2c = 		normal_i2c,		\
-			.normal_i2c_range =	normal_i2c_range,	\
 			.probe =		probe,			\
-			.probe_range =		probe_range,		\
 			.ignore =		ignore,			\
-			.ignore_range =		ignore_range,		\
 			.force =		force,			\
 		}
 
-- 
cgit v1.2.2


From 3886246a257e828248ce1e72ced00408a3557f0d Mon Sep 17 00:00:00 2001
From: Sebastian Witt <se.witt@gmx.net>
Date: Wed, 13 Apr 2005 22:25:39 +0200
Subject: [PATCH] I2C: i2c-vid.h: Support for VID to reg conversion

Adds conversion from VID (mV) to register value. Used by the atxp1 I2C module.
Removed uneeded switch case.

Signed-off-by: Sebastian Witt <se.witt@gmx.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c-vid.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-vid.h b/include/linux/i2c-vid.h
index 974835e3530f..41d0635e0ba9 100644
--- a/include/linux/i2c-vid.h
+++ b/include/linux/i2c-vid.h
@@ -97,3 +97,15 @@ static inline int vid_from_reg(int val, int vrm)
 		                     2050 - (val) * 50);
 	}
 }
+
+static inline int vid_to_reg(int val, int vrm)
+{
+	switch (vrm) {
+	case 91:		/* VRM 9.1 */
+	case 90:		/* VRM 9.0 */
+		return ((val >= 1100) && (val <= 1850) ?
+			((18499 - val * 10) / 25 + 5) / 10 : -1);
+	default:
+		return -1;
+	}
+}
-- 
cgit v1.2.2


From 10c08f8100ee2c4d27b862635574cdf4ef439e67 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 6 Jun 2005 19:34:45 +0200
Subject: [PATCH] I2C: rename i2c-sysfs.h to hwmon-sysfs.h

This patch renames the new linux/i2c-sysfs.h header file to
linux/hwmon-sysfs.h. This names seems to be more appropriate since this
file defines macros and structures not related to i2c but to hardware
monitoring drivers. The patch also updates the five hardware monitoring
driver which include that header file already.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/hwmon-sysfs.h | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/i2c-sysfs.h   | 36 ------------------------------------
 2 files changed, 36 insertions(+), 36 deletions(-)
 create mode 100644 include/linux/hwmon-sysfs.h
 delete mode 100644 include/linux/i2c-sysfs.h

(limited to 'include/linux')

diff --git a/include/linux/hwmon-sysfs.h b/include/linux/hwmon-sysfs.h
new file mode 100644
index 000000000000..1b5018a965f5
--- /dev/null
+++ b/include/linux/hwmon-sysfs.h
@@ -0,0 +1,36 @@
+/*
+ *  hwmon-sysfs.h - hardware monitoring chip driver sysfs defines
+ *
+ *  Copyright (C) 2005 Yani Ioannou <yani.ioannou@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _LINUX_HWMON_SYSFS_H
+#define _LINUX_HWMON_SYSFS_H
+
+struct sensor_device_attribute{
+	struct device_attribute dev_attr;
+	int index;
+};
+#define to_sensor_dev_attr(_dev_attr) \
+	container_of(_dev_attr, struct sensor_device_attribute, dev_attr)
+
+#define SENSOR_DEVICE_ATTR(_name,_mode,_show,_store,_index)	\
+struct sensor_device_attribute sensor_dev_attr_##_name = {	\
+	.dev_attr =	__ATTR(_name,_mode,_show,_store),	\
+	.index =	_index,					\
+}
+
+#endif /* _LINUX_HWMON_SYSFS_H */
diff --git a/include/linux/i2c-sysfs.h b/include/linux/i2c-sysfs.h
deleted file mode 100644
index d7bf6ce11679..000000000000
--- a/include/linux/i2c-sysfs.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- *  i2c-sysfs.h - i2c chip driver sysfs defines
- *
- *  Copyright (C) 2005 Yani Ioannou <yani.ioannou@gmail.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#ifndef _LINUX_I2C_SYSFS_H
-#define _LINUX_I2C_SYSFS_H
-
-struct sensor_device_attribute{
-	struct device_attribute dev_attr;
-	int index;
-};
-#define to_sensor_dev_attr(_dev_attr) \
-	container_of(_dev_attr, struct sensor_device_attribute, dev_attr)
-
-#define SENSOR_DEVICE_ATTR(_name,_mode,_show,_store,_index)	\
-struct sensor_device_attribute sensor_dev_attr_##_name = {	\
-	.dev_attr =	__ATTR(_name,_mode,_show,_store),	\
-	.index =	_index,					\
-}
-
-#endif /* _LINUX_I2C_SYSFS_H */
-- 
cgit v1.2.2


From c124a78d8c7475ecc43f385f34112b638c4228d9 Mon Sep 17 00:00:00 2001
From: Randy Vinson <rvinson@mvista.com>
Date: Fri, 3 Jun 2005 14:36:06 -0700
Subject: [PATCH] I2C: Add support for Maxim/Dallas DS1374 Real-Time Clock Chip
 (1/2)

Add support for Maxim/Dallas DS1374 Real-Time Clock Chip

This change adds support for the Maxim/Dallas DS1374 RTC chip. This chip
is an I2C-based RTC that maintains a simple 32-bit binary seconds count
with battery backup support.

Signed-off-by: Randy Vinson <rvinson@mvista.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c-id.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index 89270ce51470..33f08258f22b 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -108,6 +108,7 @@
 #define I2C_DRIVERID_TDA7313	62	/* TDA7313 audio processor	*/
 #define I2C_DRIVERID_MAX6900	63	/* MAX6900 real-time clock	*/
 #define I2C_DRIVERID_SAA7114H	64	/* video decoder		*/
+#define I2C_DRIVERID_DS1374	65	/* DS1374 real time clock	*/
 
 
 #define I2C_DRIVERID_EXP0	0xF0	/* experimental use id's	*/
-- 
cgit v1.2.2


From dd7f0b80926befc8c70a873b5b0c0c7b5fd1e7b9 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Wed, 22 Jun 2005 12:38:33 -0700
Subject: [NETFILTER]: Fix "iptables -D" rule deletion with ipt_CLUSTERIP
 target.

The patch just changes the order of structure members.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
index baa83e757156..d9bceedfb3dc 100644
--- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
+++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
@@ -18,7 +18,6 @@ struct clusterip_config;
 struct ipt_clusterip_tgt_info {
 
 	u_int32_t flags;
-	struct clusterip_config *config;
 	
 	/* only relevant for new ones */
 	u_int8_t clustermac[6];
@@ -27,6 +26,8 @@ struct ipt_clusterip_tgt_info {
 	u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
 	enum clusterip_hashmode hash_mode;
 	u_int32_t hash_initval;
+
+	struct clusterip_config *config;
 };
 
 #endif /*_IPT_CLUSTERIP_H_target*/
-- 
cgit v1.2.2


From 5ee0ed7d3ab620a764740fb018f469d45f561931 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:20 +0000
Subject: [PATCH] RPC: Make rpc_create_client() probe server for RPC
 program+version support

 Ensure that we don't create an RPC client without checking that the server
 does indeed support the RPC program + version that we are trying to set up.

 This enables us to immediately return an error to "mount" if it turns out
 that the server is only supporting NFSv2, when we requested NFSv3 or NFSv4.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 2709caf4d128..d25e80f77ff5 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -111,6 +111,9 @@ struct rpc_procinfo {
 struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
 				struct rpc_program *info,
 				u32 version, rpc_authflavor_t authflavor);
+struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname,
+				struct rpc_program *info,
+				u32 version, rpc_authflavor_t authflavor);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
@@ -129,6 +132,7 @@ void		rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
+int		rpc_ping(struct rpc_clnt *clnt, int flags);
 
 static __inline__
 int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-- 
cgit v1.2.2


From 4ce79717ce32a9f88c1ddce4b9658556cb59d37a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: Header file cleanup...

 - Move NFSv4 state definitions into a private header file.
 - Clean up gunk in nfs_fs.h

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 241 -------------------------------------------------
 1 file changed, 241 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index dbac7f363e5d..fb33e7655cfa 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -15,7 +15,6 @@
 #include <linux/pagemap.h>
 #include <linux/rwsem.h>
 #include <linux/wait.h>
-#include <linux/uio.h>
 
 #include <linux/nfs_fs_sb.h>
 
@@ -29,7 +28,6 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_xdr.h>
 #include <linux/rwsem.h>
-#include <linux/workqueue.h>
 #include <linux/mempool.h>
 
 /*
@@ -43,13 +41,6 @@
 #define NFS_MAX_FILE_IO_BUFFER_SIZE	32768
 #define NFS_DEF_FILE_IO_BUFFER_SIZE	4096
 
-/*
- * The upper limit on timeouts for the exponential backoff algorithm.
- */
-#define NFS_WRITEBACK_DELAY		(5*HZ)
-#define NFS_WRITEBACK_LOCKDELAY		(60*HZ)
-#define NFS_COMMIT_DELAY		(5*HZ)
-
 /*
  * superblock magic number for NFS
  */
@@ -60,9 +51,6 @@
  */
 #define NFS_RPC_SWAPFLAGS		(RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
 
-#define NFS_RW_SYNC		0x0001	/* O_SYNC handling */
-#define NFS_RW_SWAP		0x0002	/* This is a swap request */
-
 /*
  * When flushing a cluster of dirty pages, there can be different
  * strategies:
@@ -434,11 +422,6 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
 	mempool_free(p, nfs_wdata_mempool);
 }
 
-/* Hack for future NFS swap support */
-#ifndef IS_SWAPFILE
-# define IS_SWAPFILE(inode)	(0)
-#endif
-
 /*
  * linux/fs/nfs/read.c
  */
@@ -515,230 +498,6 @@ extern void * nfs_root_data(void);
 
 #define NFS_JUKEBOX_RETRY_TIME (5 * HZ)
 
-#ifdef CONFIG_NFS_V4
-
-struct idmap;
-
-/*
- * In a seqid-mutating op, this macro controls which error return
- * values trigger incrementation of the seqid.
- *
- * from rfc 3010:
- * The client MUST monotonically increment the sequence number for the
- * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE
- * operations.  This is true even in the event that the previous
- * operation that used the sequence number received an error.  The only
- * exception to this rule is if the previous operation received one of
- * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID,
- * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR,
- * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE.
- *
- */
-#define seqid_mutating_err(err)       \
-(((err) != NFSERR_STALE_CLIENTID) &&  \
- ((err) != NFSERR_STALE_STATEID)  &&  \
- ((err) != NFSERR_BAD_STATEID)    &&  \
- ((err) != NFSERR_BAD_SEQID)      &&  \
- ((err) != NFSERR_BAD_XDR)        &&  \
- ((err) != NFSERR_RESOURCE)       &&  \
- ((err) != NFSERR_NOFILEHANDLE))
-
-enum nfs4_client_state {
-	NFS4CLNT_OK  = 0,
-};
-
-/*
- * The nfs4_client identifies our client state to the server.
- */
-struct nfs4_client {
-	struct list_head	cl_servers;	/* Global list of servers */
-	struct in_addr		cl_addr;	/* Server identifier */
-	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
-	unsigned long		cl_state;
-
-	u32			cl_lockowner_id;
-
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
-	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
-	spinlock_t		cl_lock;
-	atomic_t		cl_count;
-
-	struct rpc_clnt *	cl_rpcclient;
-	struct rpc_cred *	cl_cred;
-
-	struct list_head	cl_superblocks;	/* List of nfs_server structs */
-
-	unsigned long		cl_lease_time;
-	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
-
-	wait_queue_head_t	cl_waitq;
-	struct rpc_wait_queue	cl_rpcwaitq;
-
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
-	/* idmapper */
-	struct idmap *		cl_idmap;
-
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			cl_ipaddr[16];
-	unsigned char		cl_id_uniquifier;
-};
-
-/*
- * NFS4 state_owners and lock_owners are simply labels for ordered
- * sequences of RPC calls. Their sole purpose is to provide once-only
- * semantics by allowing the server to identify replayed requests.
- *
- * The ->so_sema is held during all state_owner seqid-mutating operations:
- * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
- * so_seqid.
- */
-struct nfs4_state_owner {
-	struct list_head     so_list;	 /* per-clientid list of state_owners */
-	struct nfs4_client   *so_client;
-	u32                  so_id;      /* 32-bit identifier, unique */
-	struct semaphore     so_sema;
-	u32                  so_seqid;   /* protected by so_sema */
-	atomic_t	     so_count;
-
-	struct rpc_cred	     *so_cred;	 /* Associated cred */
-	struct list_head     so_states;
-	struct list_head     so_delegations;
-};
-
-/*
- * struct nfs4_state maintains the client-side state for a given
- * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
- *
- * OPEN:
- * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server,
- * we need to know how many files are open for reading or writing on a
- * given inode. This information too is stored here.
- *
- * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
- */
-
-struct nfs4_lock_state {
-	struct list_head	ls_locks;	/* Other lock stateids */
-	fl_owner_t		ls_owner;	/* POSIX lock owner */
-#define NFS_LOCK_INITIALIZED 1
-	int			ls_flags;
-	u32			ls_seqid;
-	u32			ls_id;
-	nfs4_stateid		ls_stateid;
-	atomic_t		ls_count;
-};
-
-/* bits for nfs4_state->flags */
-enum {
-	LK_STATE_IN_USE,
-	NFS_DELEGATED_STATE,
-};
-
-struct nfs4_state {
-	struct list_head open_states;	/* List of states for the same state_owner */
-	struct list_head inode_states;	/* List of states for the same inode */
-	struct list_head lock_states;	/* List of subservient lock stateids */
-
-	struct nfs4_state_owner *owner;	/* Pointer to the open owner */
-	struct inode *inode;		/* Pointer to the inode */
-
-	unsigned long flags;		/* Do we hold any locks? */
-	struct semaphore lock_sema;	/* Serializes file locking operations */
-	rwlock_t state_lock;		/* Protects the lock_states list */
-
-	nfs4_stateid stateid;
-
-	unsigned int nreaders;
-	unsigned int nwriters;
-	int state;			/* State on the server (R,W, or RW) */
-	atomic_t count;
-};
-
-
-struct nfs4_exception {
-	long timeout;
-	int retry;
-};
-
-struct nfs4_state_recovery_ops {
-	int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
-	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
-};
-
-extern struct dentry_operations nfs4_dentry_operations;
-extern struct inode_operations nfs4_dir_inode_operations;
-
-/* nfs4proc.c */
-extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
-extern int nfs4_proc_async_renew(struct nfs4_client *);
-extern int nfs4_proc_renew(struct nfs4_client *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
-extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
-extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
-
-extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
-extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
-
-/* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
-extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
-
-/* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern int nfs4_init_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
-
-extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
-extern void nfs4_put_state_owner(struct nfs4_state_owner *);
-extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
-extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
-extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct nfs4_state *, mode_t);
-extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
-extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
-extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t);
-extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t);
-extern void nfs4_put_lock_state(struct nfs4_lock_state *state);
-extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
-extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
-extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *);
-extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
-
-
-
-struct nfs4_mount_data;
-#else
-#define init_nfsv4_state(server)  do { } while (0)
-#define destroy_nfsv4_state(server)       do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
-#define nfs4_close_state(a, b) do { } while (0)
-#define nfs4_renewd_prepare_shutdown(server) do { } while (0)
-#endif
-
 #endif /* __KERNEL__ */
 
 /*
-- 
cgit v1.2.2


From a656db998785324a818005bcf71bae6dcbbb3cf5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: Remove unused NFS inode field readdir_timestamp.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fb33e7655cfa..68d5aae89972 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -128,7 +128,6 @@ struct nfs_inode {
 	 *
 	 *	mtime != read_cache_mtime
 	 */
-	unsigned long		readdir_timestamp;
 	unsigned long		read_cache_jiffies;
 	unsigned long		attrtimeo;
 	unsigned long		attrtimeo_timestamp;
-- 
cgit v1.2.2


From 96651ab341cde0fee940ec837f323d711cbfa7d5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] RPC: Shrink struct rpc_task by switching to wait_on_bit()

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 99d17ed7cebb..4d77e90d0b30 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -31,7 +31,6 @@ struct rpc_wait_queue;
 struct rpc_wait {
 	struct list_head	list;		/* wait queue links */
 	struct list_head	links;		/* Links to related tasks */
-	wait_queue_head_t	waitq;		/* sync: sleep on this q */
 	struct rpc_wait_queue *	rpc_waitq;	/* RPC wait queue we're on */
 };
 
-- 
cgit v1.2.2


From 464a98bd70bae8c559cfc82af799faf44824ce64 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:21 +0000
Subject: [PATCH] NFS: cleanup: shrink struct nfs_open_context

 Remove the wait queue, and replace the functions that depended on it
 with wait_on_bit().

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 68d5aae89972..0b01b96337f8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -84,7 +84,6 @@ struct nfs_open_context {
 	int error;
 
 	struct list_head list;
-	wait_queue_head_t waitq;
 };
 
 /*
-- 
cgit v1.2.2


From 92cfc62cb8412c9563860b1bf70cd4701f03092e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFS: Allow NFS versions to support different sets of inode
 operations.

 ACL support will require supporting additional inode operations in v4
 (getxattr, setxattr, listxattr).  This patch allows different protocol versions
 to support different inode operations by adding a file_inode_ops to the
 nfs_rpc_ops (to match the existing dir_inode_ops).

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 47037d9521cb..5b45bafd9db5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -667,6 +667,7 @@ struct nfs_rpc_ops {
 	int	version;		/* Protocol version */
 	struct dentry_operations *dentry_ops;
 	struct inode_operations *dir_inode_ops;
+	struct inode_operations *file_inode_ops;
 
 	int	(*getroot) (struct nfs_server *, struct nfs_fh *,
 			    struct nfs_fsinfo *);
-- 
cgit v1.2.2


From ada70d9425bcc5e376fef8591e4e76e204c0834c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFS: Add hooks to allow common NFS attribute code to clear
 cached acls

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h  | 1 +
 include/linux/nfs_xdr.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0b01b96337f8..140bdf489f71 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -189,6 +189,7 @@ struct nfs_inode {
 #define NFS_INO_INVALID_DATA	0x0010		/* cached data is invalid */
 #define NFS_INO_INVALID_ATIME	0x0020		/* cached atime is invalid */
 #define NFS_INO_INVALID_ACCESS	0x0040		/* cached access cred invalid */
+#define NFS_INO_INVALID_ACL	0x0080		/* cached acls are invalid */
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5b45bafd9db5..cf38db59f347 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -714,6 +714,7 @@ struct nfs_rpc_ops {
 	int	(*file_open)   (struct inode *, struct file *);
 	int	(*file_release) (struct inode *, struct file *);
 	int	(*lock)(struct file *, int, struct file_lock *);
+	void	(*clear_acl_cache)(struct inode *);
 };
 
 /*
-- 
cgit v1.2.2


From 029d105e66e5a90850d5a09dad76815d0bcfcaa3 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: Client-side xdr for reading NFSv4 acls

 Client-side support for NFSv4 acls: xdr encoding and decoding routines for
 reading acls

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4.h    | 1 +
 include/linux/nfs_xdr.h | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 5ca8a8d8ccdf..6ee7e2585af5 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -382,6 +382,7 @@ enum {
 	NFSPROC4_CLNT_READDIR,
 	NFSPROC4_CLNT_SERVER_CAPS,
 	NFSPROC4_CLNT_DELEGRETURN,
+	NFSPROC4_CLNT_GETACL,
 };
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index cf38db59f347..9f5e1d407c7b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -326,6 +326,13 @@ struct nfs_setattrargs {
 	const u32 *			bitmask;
 };
 
+struct nfs_getaclargs {
+	struct nfs_fh *			fh;
+	size_t				acl_len;
+	unsigned int			acl_pgbase;
+	struct page **			acl_pages;
+};
+
 struct nfs_setattrres {
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
-- 
cgit v1.2.2


From 23ec6965c20db96bc8ea7af0ec178f074dd31c40 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:22 +0000
Subject: [PATCH] NFSv4: Client-side xdr for writing NFSv4 acls

 Client-side support for NFSv4 acls: xdr encoding and decoding routines for
 writing acls

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs4.h    | 1 +
 include/linux/nfs_xdr.h | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 6ee7e2585af5..5bb5b2fd7ba2 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -383,6 +383,7 @@ enum {
 	NFSPROC4_CLNT_SERVER_CAPS,
 	NFSPROC4_CLNT_DELEGRETURN,
 	NFSPROC4_CLNT_GETACL,
+	NFSPROC4_CLNT_SETACL,
 };
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9f5e1d407c7b..46b206b460c0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -326,6 +326,13 @@ struct nfs_setattrargs {
 	const u32 *			bitmask;
 };
 
+struct nfs_setaclargs {
+	struct nfs_fh *			fh;
+	size_t				acl_len;
+	unsigned int			acl_pgbase;
+	struct page **			acl_pages;
+};
+
 struct nfs_getaclargs {
 	struct nfs_fh *			fh;
 	size_t				acl_len;
-- 
cgit v1.2.2


From e50a1c2e1f816c81eed6a589019052cb44189267 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 22 Jun 2005 17:16:23 +0000
Subject: [PATCH] NFSv4: client-side caching NFSv4 ACLs

 Add nfs4_acl field to the nfs_inode, and use it to cache acls.  Only cache
 acls of size up to a page.  Also prepare for up to a page of acl data even
 when the user doesn't pass in a buffer, as when they want to get the acl
 length to decide what size buffer to allocate.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 140bdf489f71..d2b5d7e0e85a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -169,13 +169,13 @@ struct nfs_inode {
 	wait_queue_head_t	nfs_i_wait;
 
 #ifdef CONFIG_NFS_V4
+	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
 	struct list_head	open_states;
 	struct nfs_delegation	*delegation;
 	int			 delegation_state;
 	struct rw_semaphore	rwsem;
 #endif /* CONFIG_NFS_V4*/
-
 	struct inode		vfs_inode;
 };
 
-- 
cgit v1.2.2


From 007e251f2b2760f738c92adc8c80cbae0bed3ce5 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:23 +0000
Subject: [PATCH] RPC: Allow multiple RPC client programs to share the same
 transport

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index d25e80f77ff5..ab151bbb66df 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -114,6 +114,8 @@ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname,
 struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname,
 				struct rpc_program *info,
 				u32 version, rpc_authflavor_t authflavor);
+struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
+				struct rpc_program *, int);
 struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
 int		rpc_shutdown_client(struct rpc_clnt *);
 int		rpc_destroy_client(struct rpc_clnt *);
-- 
cgit v1.2.2


From e053d1ab62c8ef0eff3dd4c95448cad3c6d2fbf4 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <okir@suse.de>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] RPC: Lazy RPC receive buffer allocation

 Signed-off-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 541dcf838abf..0f5b7a5a7432 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -160,7 +160,7 @@ typedef struct {
 
 typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
 
-extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
+extern int xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
 		skb_reader_t *, skb_read_actor_t);
 
 struct socket;
-- 
cgit v1.2.2


From 7e06b53d796a3740307b54aa2799077f8a0c84e7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] RPC: fix accounting bug in the case of a truncated RPC
 message

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 0f5b7a5a7432..5d1eed2b58a1 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -160,7 +160,7 @@ typedef struct {
 
 typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
 
-extern int xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
+extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
 		skb_reader_t *, skb_read_actor_t);
 
 struct socket;
-- 
cgit v1.2.2


From bd8100e7eda87507649c6ba4cb32173b34e49986 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] RPC: Encode and decode arbitrary XDR arrays

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 5d1eed2b58a1..34ec3e8d99b3 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -146,7 +146,8 @@ extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int);
 extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int);
-extern int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len);
+extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int);
+extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int);
 
 /*
  * Helper structure for copying from an sk_buff.
@@ -168,6 +169,22 @@ struct sockaddr;
 extern int xdr_sendpages(struct socket *, struct sockaddr *, int,
 		struct xdr_buf *, unsigned int, int);
 
+extern int xdr_encode_word(struct xdr_buf *, int, u32);
+extern int xdr_decode_word(struct xdr_buf *, int, u32 *);
+
+struct xdr_array2_desc;
+typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem);
+struct xdr_array2_desc {
+	unsigned int elem_size;
+	unsigned int array_len;
+	xdr_xcode_elem_t xcode;
+};
+
+extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
+                             struct xdr_array2_desc *desc);
+extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
+			     struct xdr_array2_desc *desc);
+
 /*
  * Provide some simple tools for XDR buffer overflow-checking etc.
  */
-- 
cgit v1.2.2


From 9ba02638e4be28dd4ff724202a640264427c62d1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:24 +0000
Subject: [PATCH] RPC: Allow the sunrpc server to multiplex serveral programs
 on a single port

 The NFS and NFSACL programs run on the same RPC transport.  This patch adds
 support for this by converting svc_program into a chained list of programs
 (server-side).

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Signed-off-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 37003970cf2e..facb94488bb1 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -240,9 +240,10 @@ struct svc_deferred_req {
 };
 
 /*
- * RPC program
+ * List of RPC programs on the same transport endpoint
  */
 struct svc_program {
+	struct svc_program *	pg_next;	/* other programs (same xprt) */
 	u32			pg_prog;	/* program number */
 	unsigned int		pg_lovers;	/* lowest version */
 	unsigned int		pg_hivers;	/* lowest version */
-- 
cgit v1.2.2


From a257cdd0e2179630d3201c32ba14d7fcb3c3a055 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:26 +0000
Subject: [PATCH] NFSD: Add server support for NFSv3 ACLs.

 This adds functions for encoding and decoding POSIX ACLs for the NFSACL
 protocol extension, and the GETACL and SETACL RPCs.  The implementation is
 compatible with NFSACL in Solaris.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfsacl.h     | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/nfsd.h  | 16 +++++++++++++
 include/linux/nfsd/xdr.h   |  4 ++++
 include/linux/nfsd/xdr3.h  | 26 +++++++++++++++++++++
 include/linux/sunrpc/svc.h | 11 +++++++++
 5 files changed, 115 insertions(+)
 create mode 100644 include/linux/nfsacl.h

(limited to 'include/linux')

diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h
new file mode 100644
index 000000000000..54487a99beb8
--- /dev/null
+++ b/include/linux/nfsacl.h
@@ -0,0 +1,58 @@
+/*
+ * File: linux/nfsacl.h
+ *
+ * (C) 2003 Andreas Gruenbacher <agruen@suse.de>
+ */
+#ifndef __LINUX_NFSACL_H
+#define __LINUX_NFSACL_H
+
+#define NFS_ACL_PROGRAM	100227
+
+#define ACLPROC2_GETACL		1
+#define ACLPROC2_SETACL		2
+#define ACLPROC2_GETATTR	3
+#define ACLPROC2_ACCESS		4
+
+#define ACLPROC3_GETACL		1
+#define ACLPROC3_SETACL		2
+
+
+/* Flags for the getacl/setacl mode */
+#define NFS_ACL			0x0001
+#define NFS_ACLCNT		0x0002
+#define NFS_DFACL		0x0004
+#define NFS_DFACLCNT		0x0008
+
+/* Flag for Default ACL entries */
+#define NFS_ACL_DEFAULT		0x1000
+
+#ifdef __KERNEL__
+
+#include <linux/posix_acl.h>
+
+/* Maximum number of ACL entries over NFS */
+#define NFS_ACL_MAX_ENTRIES	1024
+
+#define NFSACL_MAXWORDS		(2*(2+3*NFS_ACL_MAX_ENTRIES))
+#define NFSACL_MAXPAGES		((2*(8+12*NFS_ACL_MAX_ENTRIES) + PAGE_SIZE-1) \
+				 >> PAGE_SHIFT)
+
+static inline unsigned int
+nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default)
+{
+	unsigned int w = 16;
+	w += max(acl_access ? (int)acl_access->a_count : 3, 4) * 12;
+	if (acl_default)
+		w += max((int)acl_default->a_count, 4) * 12;
+	return w;
+}
+
+extern unsigned int
+nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
+	      struct posix_acl *acl, int encode_entries, int typeflag);
+extern unsigned int
+nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
+	      struct posix_acl **pacl);
+
+#endif /* __KERNEL__ */
+#endif  /* __LINUX_NFSACL_H */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 8f85d9a59607..4bf931d5ff56 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -15,6 +15,7 @@
 #include <linux/unistd.h>
 #include <linux/dirent.h>
 #include <linux/fs.h>
+#include <linux/posix_acl.h>
 #include <linux/mount.h>
 
 #include <linux/nfsd/debug.h>
@@ -124,6 +125,21 @@ int		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 int		nfsd_notify_change(struct inode *, struct iattr *);
 int		nfsd_permission(struct svc_export *, struct dentry *, int);
 
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+#ifdef CONFIG_NFSD_V2_ACL
+extern struct svc_version nfsd_acl_version2;
+#else
+#define nfsd_acl_version2 NULL
+#endif
+#ifdef CONFIG_NFSD_V3_ACL
+extern struct svc_version nfsd_acl_version3;
+#else
+#define nfsd_acl_version3 NULL
+#endif
+struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
+int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
+#endif
+
 
 /* 
  * NFSv4 State
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
index ecccef777dae..130d4f588a37 100644
--- a/include/linux/nfsd/xdr.h
+++ b/include/linux/nfsd/xdr.h
@@ -169,4 +169,8 @@ int nfssvc_encode_entry(struct readdir_cd *, const char *name,
 
 int nfssvc_release_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *);
 
+/* Helper functions for NFSv2 ACL code */
+u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp);
+u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp);
+
 #endif /* LINUX_NFSD_H */
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h
index 0ae9e0ef5f68..21e18ce7ca63 100644
--- a/include/linux/nfsd/xdr3.h
+++ b/include/linux/nfsd/xdr3.h
@@ -110,6 +110,19 @@ struct nfsd3_commitargs {
 	__u32			count;
 };
 
+struct nfsd3_getaclargs {
+	struct svc_fh		fh;
+	int			mask;
+};
+
+struct posix_acl;
+struct nfsd3_setaclargs {
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
 struct nfsd3_attrstat {
 	__u32			status;
 	struct svc_fh		fh;
@@ -209,6 +222,14 @@ struct nfsd3_commitres {
 	struct svc_fh		fh;
 };
 
+struct nfsd3_getaclres {
+	__u32			status;
+	struct svc_fh		fh;
+	int			mask;
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+};
+
 /* dummy type for release */
 struct nfsd3_fhandle_pair {
 	__u32			dummy;
@@ -241,6 +262,7 @@ union nfsd3_xdrstore {
 	struct nfsd3_fsinfores		fsinfores;
 	struct nfsd3_pathconfres	pathconfres;
 	struct nfsd3_commitres		commitres;
+	struct nfsd3_getaclres		getaclres;
 };
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
@@ -316,6 +338,10 @@ int nfs3svc_encode_entry(struct readdir_cd *, const char *name,
 int nfs3svc_encode_entry_plus(struct readdir_cd *, const char *name,
 				int namlen, loff_t offset, ino_t ino,
 				unsigned int);
+/* Helper functions for NFSv3 ACL code */
+u32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p,
+				struct svc_fh *fhp);
+u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp);
 
 
 #endif /* _LINUX_NFSD_XDR3_H */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index facb94488bb1..5af8800e0ce3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -185,6 +185,17 @@ xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
 	return vec->iov_len <= PAGE_SIZE;
 }
 
+static inline struct page *
+svc_take_res_page(struct svc_rqst *rqstp)
+{
+	if (rqstp->rq_arghi <= rqstp->rq_argused)
+		return NULL;
+	rqstp->rq_arghi--;
+	rqstp->rq_respages[rqstp->rq_resused] =
+		rqstp->rq_argpages[rqstp->rq_arghi];
+	return rqstp->rq_respages[rqstp->rq_resused++];
+}
+
 static inline int svc_take_page(struct svc_rqst *rqstp)
 {
 	if (rqstp->rq_arghi <= rqstp->rq_argused)
-- 
cgit v1.2.2


From b7fa0554cf1ba6d6895cd0a5b02989a26e0bc704 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Add support for NFSv3 ACLs

 This adds acl support fo nfs clients via the NFSACL protocol extension, by
 implementing the getxattr, listxattr, setxattr, and removexattr iops for the
 system.posix_acl_access and system.posix_acl_default attributes.  This patch
 implements a dumb version that uses no caching (and thus adds some overhead).
 (Another patch in this patchset adds caching as well.)

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h    | 31 +++++++++++++++++++++++++++++++
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_mount.h |  1 +
 include/linux/nfs_xdr.h   | 27 +++++++++++++++++++++++++++
 4 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d2b5d7e0e85a..3a5e442ac776 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -301,6 +301,9 @@ extern u32 root_nfs_parse_addr(char *name); /*__init*/
  * linux/fs/nfs/file.c
  */
 extern struct inode_operations nfs_file_inode_operations;
+#ifdef CONFIG_NFS_V3
+extern struct inode_operations nfs3_file_inode_operations;
+#endif /* CONFIG_NFS_V3 */
 extern struct file_operations nfs_file_operations;
 extern struct address_space_operations nfs_file_aops;
 
@@ -315,6 +318,22 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file)
 	return NULL;
 }
 
+/*
+ * linux/fs/nfs/xattr.c
+ */
+#ifdef CONFIG_NFS_V3_ACL
+extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
+extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t);
+extern int nfs3_setxattr(struct dentry *, const char *,
+			const void *, size_t, int);
+extern int nfs3_removexattr (struct dentry *, const char *name);
+#else
+# define nfs3_listxattr NULL
+# define nfs3_getxattr NULL
+# define nfs3_setxattr NULL
+# define nfs3_removexattr NULL
+#endif
+
 /*
  * linux/fs/nfs/direct.c
  */
@@ -329,6 +348,9 @@ extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf,
  * linux/fs/nfs/dir.c
  */
 extern struct inode_operations nfs_dir_inode_operations;
+#ifdef CONFIG_NFS_V3
+extern struct inode_operations nfs3_dir_inode_operations;
+#endif /* CONFIG_NFS_V3 */
 extern struct file_operations nfs_dir_operations;
 extern struct dentry_operations nfs_dentry_operations;
 
@@ -449,6 +471,15 @@ static inline void nfs_readdata_free(struct nfs_read_data *p)
 
 extern void  nfs_readdata_release(struct rpc_task *task);
 
+/*
+ * linux/fs/nfs3proc.c
+ */
+#ifdef CONFIG_NFS_V3_ACL
+extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type);
+extern int nfs3_proc_setacl(struct inode *inode, int type,
+			    struct posix_acl *acl);
+#endif /* CONFIG_NFS_V3_ACL */
+
 /*
  * linux/fs/mount_clnt.c
  * (Used only by nfsroot module)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fc51645d61ee..3d3a305488cf 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -10,6 +10,7 @@
 struct nfs_server {
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
+	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index 0071428231f9..659c75438454 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -58,6 +58,7 @@ struct nfs_mount_data {
 #define NFS_MOUNT_KERBEROS	0x0100	/* 3 */
 #define NFS_MOUNT_NONLM		0x0200	/* 3 */
 #define NFS_MOUNT_BROKEN_SUID	0x0400	/* 4 */
+#define NFS_MOUNT_NOACL		0x0800	/* 4 */
 #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
 #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
 #define NFS_MOUNT_FLAGMASK	0xFFFF
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 46b206b460c0..a2bf6914ff1b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -2,6 +2,7 @@
 #define _LINUX_NFS_XDR_H
 
 #include <linux/sunrpc/xprt.h>
+#include <linux/nfsacl.h>
 
 struct nfs4_fsid {
 	__u64 major;
@@ -368,6 +369,20 @@ struct nfs_readdirargs {
 	struct page **		pages;
 };
 
+struct nfs3_getaclargs {
+	struct nfs_fh *		fh;
+	int			mask;
+	struct page **		pages;
+};
+
+struct nfs3_setaclargs {
+	struct inode *		inode;
+	int			mask;
+	struct posix_acl *	acl_access;
+	struct posix_acl *	acl_default;
+	struct page **		pages;
+};
+
 struct nfs_diropok {
 	struct nfs_fh *		fh;
 	struct nfs_fattr *	fattr;
@@ -491,6 +506,15 @@ struct nfs3_readdirres {
 	int			plus;
 };
 
+struct nfs3_getaclres {
+	struct nfs_fattr *	fattr;
+	int			mask;
+	unsigned int		acl_access_count;
+	unsigned int		acl_default_count;
+	struct posix_acl *	acl_access;
+	struct posix_acl *	acl_default;
+};
+
 #ifdef CONFIG_NFS_V4
 
 typedef u64 clientid4;
@@ -748,4 +772,7 @@ extern struct rpc_version	nfs_version2;
 extern struct rpc_version	nfs_version3;
 extern struct rpc_version	nfs_version4;
 
+extern struct rpc_version	nfsacl_version3;
+extern struct rpc_program	nfsacl_program;
+
 #endif
-- 
cgit v1.2.2


From 055ffbea0596942579b0dae71d5dab78de8135f6 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Fix handling of the umask when an NFSv3 default acl is
 present.

 NFSv3 has no concept of a umask on the server side: The client applies
 the umask locally, and sends the effective permissions to the server.
 This behavior is wrong when files are created in a directory that has a
 default ACL.  In this case, the umask is supposed to be ignored, and
 only the default ACL determines the file's effective permissions.

 Usually its the server's task to conditionally apply the umask.  But
 since the server knows nothing about the umask, we have to do it on the
 client side.  This patch tries to fetch the parent directory's default
 ACL before creating a new file, computes the appropriate create mode to
 send to the server, and finally sets the new file's access and default
 acl appropriately.

 Many thanks to Buck Huppmann <buchk@pobox.com> for sending the initial
 version of this patch, as well as for arguing why we need this change.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 3a5e442ac776..7662c5131b47 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -478,6 +478,15 @@ extern void  nfs_readdata_release(struct rpc_task *task);
 extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type);
 extern int nfs3_proc_setacl(struct inode *inode, int type,
 			    struct posix_acl *acl);
+extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
+		mode_t mode);
+#else
+static inline int nfs3_proc_set_default_acl(struct inode *dir,
+					    struct inode *inode,
+					    mode_t mode)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V3_ACL */
 
 /*
-- 
cgit v1.2.2


From 5c6a9f7d92291c832d47e792ed1fafa44acb066e Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 22 Jun 2005 17:16:27 +0000
Subject: [PATCH] NFS: Cache the NFSv3 acls.

 Attach acls to inodes in the icache to avoid unnecessary GETACL RPC
 round-trips.  As long as the client doesn't retrieve any acls itself, only the
 default acls of exiting directories and the default and access acls of new
 directories will end up in the cache, which preserves some memory compared to
 always caching the access and default acl of all files.

 Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
 Acked-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7662c5131b47..4ceac9ddac93 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -91,6 +91,8 @@ struct nfs_open_context {
  */
 struct nfs_delegation;
 
+struct posix_acl;
+
 /*
  * nfs fs inode data in memory
  */
@@ -144,6 +146,10 @@ struct nfs_inode {
 	atomic_t		data_updates;
 
 	struct nfs_access_entry	cache_access;
+#ifdef CONFIG_NFS_V3_ACL
+	struct posix_acl	*acl_access;
+	struct posix_acl	*acl_default;
+#endif
 
 	/*
 	 * This is the cookie verifier used for NFSv3 readdir
@@ -480,6 +486,7 @@ extern int nfs3_proc_setacl(struct inode *inode, int type,
 			    struct posix_acl *acl);
 extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
 		mode_t mode);
+extern void nfs3_forget_cached_acls(struct inode *inode);
 #else
 static inline int nfs3_proc_set_default_acl(struct inode *dir,
 					    struct inode *inode,
@@ -487,6 +494,10 @@ static inline int nfs3_proc_set_default_acl(struct inode *dir,
 {
 	return 0;
 }
+
+static inline void nfs3_forget_cached_acls(struct inode *inode)
+{
+}
 #endif /* CONFIG_NFS_V3_ACL */
 
 /*
-- 
cgit v1.2.2


From 00a926422765064cb28e218d4837411c88bf6a3e Mon Sep 17 00:00:00 2001
From: Olivier Galibert <galibert@pobox.com>
Date: Wed, 22 Jun 2005 17:16:29 +0000
Subject: [PATCH] NFS: Hide NFS server-generated readdir cookies from userland

 NFSv3 currently returns the unsigned 64-bit cookie directly to
 userspace. The following patch causes the kernel to generate
 loff_t offsets for the benefit of userland.
 The current server-generated READDIR cookie is cached in the
 nfs_open_context instead of in filp->f_pos, so we still end up work
 correctly under directory insertions/deletion.

 Signed-off-by: Olivier Galibert <galibert@pobox.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4ceac9ddac93..f810195ef7ad 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -84,6 +84,9 @@ struct nfs_open_context {
 	int error;
 
 	struct list_head list;
+
+	int dir_pos;		/* Directory cookie cache */
+	__u64 dir_cookie;
 };
 
 /*
-- 
cgit v1.2.2


From f0dd2136da6d2070e12bfa6d199b136318e666c7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:29 +0000
Subject: [PATCH] NFS: Clean up readdir changes.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f810195ef7ad..c90313bfa435 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -85,7 +85,6 @@ struct nfs_open_context {
 
 	struct list_head list;
 
-	int dir_pos;		/* Directory cookie cache */
 	__u64 dir_cookie;
 };
 
-- 
cgit v1.2.2


From 951a143b3fcf15cfa9d38250b7462f821db241db Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Fix the file size revalidation

 Instead of looking at whether or not the file is open for writes before
 we accept to update the length using the server value, we should rather
 be looking at whether or not we are currently caching any writes.

 Failure to do so means in particular that we're not updating the file
 length correctly after obtaining a POSIX or BSD lock.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c90313bfa435..211266c56ce5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -294,7 +294,6 @@ extern void nfs_begin_attr_update(struct inode *);
 extern void nfs_end_attr_update(struct inode *);
 extern void nfs_begin_data_update(struct inode *);
 extern void nfs_end_data_update(struct inode *);
-extern void nfs_end_data_update_defer(struct inode *);
 extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
-- 
cgit v1.2.2


From 7d52e86274e09fce8ac8f963e3605a84d0a305a7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Cleanup of caching code, and slight optimization of
 writes.

 Unless we're doing O_APPEND writes, we really don't care about revalidating
 the file length. Just make sure that we catch any page cache invalidations.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 211266c56ce5..443103c13e53 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -289,6 +289,7 @@ extern int nfs_release(struct inode *, struct file *);
 extern int nfs_attribute_timeout(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
+extern void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_setattr(struct dentry *, struct iattr *);
 extern void nfs_begin_attr_update(struct inode *);
 extern void nfs_end_attr_update(struct inode *);
-- 
cgit v1.2.2


From fe51beecc55d0b0dce289e4758e7c529a642f63e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Ensure that fstat() always returns the correct mtime

 Even if the file is open for writes.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 443103c13e53..2954e44ed498 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -198,6 +198,7 @@ struct nfs_inode {
 #define NFS_INO_INVALID_ATIME	0x0020		/* cached atime is invalid */
 #define NFS_INO_INVALID_ACCESS	0x0040		/* cached access cred invalid */
 #define NFS_INO_INVALID_ACL	0x0080		/* cached acls are invalid */
+#define NFS_INO_REVAL_PAGECACHE	0x1000		/* must revalidate pagecache */
 
 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
-- 
cgit v1.2.2


From c6a556b88adfacd2af90be84357c8165d716c27d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:30 +0000
Subject: [PATCH] NFS: Make searching and waiting on busy writeback requests
 more efficient.

 Basically copies the VFS's method for tracking writebacks and applies
 it to the struct nfs_page.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_page.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 39e4895bcdb4..db40e4590ba2 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -19,6 +19,11 @@
 
 #include <asm/atomic.h>
 
+/*
+ * Valid flags for the radix tree
+ */
+#define NFS_PAGE_TAG_WRITEBACK	1
+
 /*
  * Valid flags for a dirty buffer
  */
@@ -62,6 +67,9 @@ extern	int nfs_coalesce_requests(struct list_head *, struct list_head *,
 				  unsigned int);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
+extern  int nfs_set_page_writeback_locked(struct nfs_page *req);
+extern  void nfs_clear_page_writeback(struct nfs_page *req);
+
 
 /*
  * Lock the page of an asynchronous request without incrementing the wb_count
@@ -96,10 +104,6 @@ nfs_list_remove_request(struct nfs_page *req)
 {
 	if (list_empty(&req->wb_list))
 		return;
-	if (!NFS_WBACK_BUSY(req)) {
-		printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n");
-		BUG();
-	}
 	list_del_init(&req->wb_list);
 	req->wb_list_head = NULL;
 }
-- 
cgit v1.2.2


From 3da28eb1c6545fe73263a24eba0996217490e1eb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:31 +0000
Subject: [PATCH] NFS: Replace nfs_page insertion sort with a radix sort

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs.h   |  4 ++--
 include/linux/nfs_page.h | 18 ++++++++++++++++--
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2954e44ed498..8ea249110fb0 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -395,10 +395,10 @@ extern void nfs_commit_done(struct rpc_task *);
  */
 extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, unsigned long, unsigned int, int);
+extern int  nfs_commit_inode(struct inode *, int);
 #else
 static inline int
-nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how)
+nfs_commit_inode(struct inode *inode, int how)
 {
 	return 0;
 }
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index db40e4590ba2..da2e077b65e2 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -22,6 +22,7 @@
 /*
  * Valid flags for the radix tree
  */
+#define NFS_PAGE_TAG_DIRTY	0
 #define NFS_PAGE_TAG_WRITEBACK	1
 
 /*
@@ -31,6 +32,7 @@
 #define PG_NEED_COMMIT		1
 #define PG_NEED_RESCHED		2
 
+struct nfs_inode;
 struct nfs_page {
 	struct list_head	wb_list,	/* Defines state of page: */
 				*wb_list_head;	/*      read/write/commit */
@@ -59,8 +61,8 @@ extern	void nfs_clear_request(struct nfs_page *req);
 extern	void nfs_release_request(struct nfs_page *req);
 
 
-extern	void nfs_list_add_request(struct nfs_page *, struct list_head *);
-
+extern  int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
+				unsigned long idx_start, unsigned int npages);
 extern	int nfs_scan_list(struct list_head *, struct list_head *,
 			  unsigned long, unsigned int);
 extern	int nfs_coalesce_requests(struct list_head *, struct list_head *,
@@ -94,6 +96,18 @@ nfs_lock_request(struct nfs_page *req)
 	return 1;
 }
 
+/**
+ * nfs_list_add_request - Insert a request into a list
+ * @req: request
+ * @head: head of list into which to insert the request.
+ */
+static inline void
+nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+{
+	list_add_tail(&req->wb_list, head);
+	req->wb_list_head = head;
+}
+
 
 /**
  * nfs_list_remove_request - Remove a request from its wb_list
-- 
cgit v1.2.2


From ecdbf769b2cb8903e07cd482334c714d89fd1146 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:31 +0000
Subject: [PATCH] NLM: fix a client-side race on blocking locks.

 If the lock blocks, the server may send us a GRANTED message that
 races with the reply to our LOCK request. Make sure that we catch
 the GRANTED by queueing up our request on the nlm_blocked list
 before we send off the first LOCK rpc call.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/lockd/lockd.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0d9d22578212..16d4e5a08e1d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -72,6 +72,8 @@ struct nlm_lockowner {
 	uint32_t pid;
 };
 
+struct nlm_wait;
+
 /*
  * Memory chunk for NLM client RPC request.
  */
@@ -81,6 +83,7 @@ struct nlm_rqst {
 	struct nlm_host *	a_host;		/* host handle */
 	struct nlm_args		a_args;		/* arguments */
 	struct nlm_res		a_res;		/* result */
+	struct nlm_wait *	a_block;
 	char			a_owner[NLMCLNT_OHSIZE];
 };
 
@@ -142,7 +145,9 @@ extern unsigned long		nlmsvc_timeout;
  * Lockd client functions
  */
 struct nlm_rqst * nlmclnt_alloc_call(void);
-int		  nlmclnt_block(struct nlm_host *, struct file_lock *, u32 *);
+int		  nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
+void		  nlmclnt_finish_block(struct nlm_rqst *req);
+long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
 int		  nlmclnt_cancel(struct nlm_host *, struct file_lock *);
 u32		  nlmclnt_grant(struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
-- 
cgit v1.2.2


From 8d0a8a9d0ec790086c64d210af413ac351d89e35 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 22 Jun 2005 17:16:32 +0000
Subject: [PATCH] NFSv4: Clean up nfs4 lock state accounting

 Ensure that lock owner structures are not released prematurely.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/fs.h       | 1 +
 include/linux/nfs_fs_i.h | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9b8b696d4f15..e5a8db00df29 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -674,6 +674,7 @@ struct file_lock {
 	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 		struct nfs_lock_info	nfs_fl;
+		struct nfs4_lock_info	nfs4_fl;
 	} fl_u;
 };
 
diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h
index e9a749588a7b..e2c18dabff86 100644
--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -16,6 +16,11 @@ struct nfs_lock_info {
 	struct nlm_lockowner *owner;
 };
 
+struct nfs4_lock_state;
+struct nfs4_lock_info {
+	struct nfs4_lock_state *owner;
+};
+
 /*
  * Lock flag values
  */
-- 
cgit v1.2.2


From 6ca4f65e6b390d09e1de7280cf9fd4f5d8e4b48b Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 22 Jun 2005 22:04:55 -0700
Subject: [NETPOLL]: Set poll_owner to -1 before unlocking in
 netpoll_poll_unlock()

This trivial patch moves the assignment of poll_owner to -1 inside of
the lock.  This fixes a potential SMP race in the code.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index c0d8b90c5202..449a4fde6587 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -53,8 +53,8 @@ static inline void netpoll_poll_lock(struct net_device *dev)
 static inline void netpoll_poll_unlock(struct net_device *dev)
 {
 	if (dev->np) {
-		spin_unlock(&dev->np->poll_lock);
 		dev->np->poll_owner = -1;
+		spin_unlock(&dev->np->poll_lock);
 	}
 }
 
-- 
cgit v1.2.2


From 115c1d6e61b70851d9a363328c3b8d4c2559a1d3 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 22 Jun 2005 22:05:31 -0700
Subject: [NETPOLL]: Introduce a netpoll_info struct

This patch introduces a netpoll_info structure, which the struct net_device
will now point to instead of pointing to a struct netpoll.  The reason for
this is two-fold: 1) fields such as the rx_flags, poll_owner, and poll_lock
should be maintained per net_device, not per netpoll;  and 2) this is a first
step in providing support for multiple netpoll clients to register against the
same net_device.

The struct netpoll is now pointed to by the netpoll_info structure.  As
such, the previous behaviour of the code is preserved.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 include/linux/netpoll.h   | 25 +++++++++++++++++--------
 2 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ba5d1236aa17..d6afd440cf7b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -41,7 +41,7 @@
 struct divert_blk;
 struct vlan_group;
 struct ethtool_ops;
-struct netpoll;
+struct netpoll_info;
 					/* source back-compat hooks */
 #define SET_ETHTOOL_OPS(netdev,ops) \
 	( (netdev)->ethtool_ops = (ops) )
@@ -468,7 +468,7 @@ struct net_device
 						     unsigned char *haddr);
 	int			(*neigh_setup)(struct net_device *dev, struct neigh_parms *);
 #ifdef CONFIG_NETPOLL
-	struct netpoll		*np;
+	struct netpoll_info	*npinfo;
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*poll_controller)(struct net_device *dev);
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 449a4fde6587..388cd91bc7a6 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -16,14 +16,18 @@ struct netpoll;
 struct netpoll {
 	struct net_device *dev;
 	char dev_name[16], *name;
-	int rx_flags;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
 	void (*drop)(struct sk_buff *skb);
 	u32 local_ip, remote_ip;
 	u16 local_port, remote_port;
 	unsigned char local_mac[6], remote_mac[6];
+};
+
+struct netpoll_info {
 	spinlock_t poll_lock;
 	int poll_owner;
+	int rx_flags;
+	struct netpoll *np;
 };
 
 void netpoll_poll(struct netpoll *np);
@@ -39,22 +43,27 @@ void netpoll_queue(struct sk_buff *skb);
 #ifdef CONFIG_NETPOLL
 static inline int netpoll_rx(struct sk_buff *skb)
 {
-	return skb->dev->np && skb->dev->np->rx_flags && __netpoll_rx(skb);
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+
+	if (!npinfo || !npinfo->rx_flags)
+		return 0;
+
+	return npinfo->np && __netpoll_rx(skb);
 }
 
 static inline void netpoll_poll_lock(struct net_device *dev)
 {
-	if (dev->np) {
-		spin_lock(&dev->np->poll_lock);
-		dev->np->poll_owner = smp_processor_id();
+	if (dev->npinfo) {
+		spin_lock(&dev->npinfo->poll_lock);
+		dev->npinfo->poll_owner = smp_processor_id();
 	}
 }
 
 static inline void netpoll_poll_unlock(struct net_device *dev)
 {
-	if (dev->np) {
-		dev->np->poll_owner = -1;
-		spin_unlock(&dev->np->poll_lock);
+	if (dev->npinfo) {
+		dev->npinfo->poll_owner = -1;
+		spin_unlock(&dev->npinfo->poll_lock);
 	}
 }
 
-- 
cgit v1.2.2


From fbeec2e1552949002065435c9829dc244ad85407 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 22 Jun 2005 22:05:59 -0700
Subject: [NETPOLL]: allow multiple netpoll_clients to register against one
 interface

This patch provides support for registering multiple netpoll clients to the
same network device.  Only one of these clients may register an rx_hook,
however.  In practice, this restriction has not been problematic.  It is
worth mentioning, though, that the current design can be easily extended to
allow for the registration of multiple rx_hooks.

The basic idea of the patch is that the rx_np pointer in the netpoll_info
structure points to the struct netpoll that has rx_hook filled in.  Aside
from this one case, there is no need for a pointer from the struct
net_device to an individual struct netpoll.

A lock is introduced to protect the setting and clearing of the np_rx
pointer.  The pointer will only be cleared upon netpoll client module
removal, and the lock should be uncontested.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 388cd91bc7a6..bcd0ac33f592 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -27,7 +27,8 @@ struct netpoll_info {
 	spinlock_t poll_lock;
 	int poll_owner;
 	int rx_flags;
-	struct netpoll *np;
+	spinlock_t rx_lock;
+	struct netpoll *rx_np; /* netpoll that registered an rx_hook */
 };
 
 void netpoll_poll(struct netpoll *np);
@@ -44,11 +45,19 @@ void netpoll_queue(struct sk_buff *skb);
 static inline int netpoll_rx(struct sk_buff *skb)
 {
 	struct netpoll_info *npinfo = skb->dev->npinfo;
+	unsigned long flags;
+	int ret = 0;
 
-	if (!npinfo || !npinfo->rx_flags)
+	if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags))
 		return 0;
 
-	return npinfo->np && __netpoll_rx(skb);
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	/* check rx_flags again with the lock held */
+	if (npinfo->rx_flags && __netpoll_rx(skb))
+		ret = 1;
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+
+	return ret;
 }
 
 static inline void netpoll_poll_lock(struct net_device *dev)
-- 
cgit v1.2.2


From cb65d506c34c86df5bcef939ce5a8666a451bd8b Mon Sep 17 00:00:00 2001
From: Shaun Pereira <spereira@tusc.com.au>
Date: Wed, 22 Jun 2005 22:15:01 -0700
Subject: [X25]: Selective sub-address matching with call user data.

From: Shaun Pereira <spereira@tusc.com.au>

This is the first (independent of the second) patch of two that I am
working on with x25 on linux (tested with xot on a cisco router).  Details
are as follows.

Current state of module:

A server using the current implementation (2.6.11.7) of the x25 module will
accept a call request/ incoming call packet at the listening x.25 address,
from all callers to that address, as long as NO call user data is present
in the packet header.

If the server needs to choose to accept a particular call request/ incoming
call packet arriving at its listening x25 address, then the kernel has to
allow a match of call user data present in the call request packet with its
own.  This is required when multiple servers listen at the same x25 address
and device interface.  The kernel currently matches ALL call user data, if
present.

Current Changes:

This patch is a follow up to the patch submitted previously by Andrew
Hendry, and allows the user to selectively control the number of octets of
call user data in the call request packet, that the kernel will match.  By
default no call user data is matched, even if call user data is present.
To allow call user data matching, a cudmatchlength > 0 has to be passed
into the kernel after which the passed number of octets will be matched.
Otherwise the kernel behavior is exactly as the original implementation.

This patch also ensures that as is normally the case, no call user data
will be present in the Call accepted / call connected packet sent back to
the caller

Future Changes on next patch:

There are cases however when call user data may be present in the call
accepted packet.  According to the X.25 recommendation (ITU-T 10/96)
section 5.2.3.2 call user data may be present in the call accepted packet
provided the fast select facility is used.  My next patch will include this
fast select utility and the ability to send up to 128 octets call user data
in the call accepted packet provided the fast select facility is used.  I
am currently testing this, again with xot on linux and cisco.

Signed-off-by: Shaun Pereira <spereira@tusc.com.au>

(With a fix from Alexey Dobriyan <adobriyan@gmail.com>)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/x25.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/x25.h b/include/linux/x25.h
index 7531cfed5885..6f43b3d20248 100644
--- a/include/linux/x25.h
+++ b/include/linux/x25.h
@@ -4,6 +4,8 @@
  * 	History
  *	mar/20/00	Daniela Squassoni Disabling/enabling of facilities 
  *					  negotiation.
+ *	apr/02/05	Shaun Pereira Selective sub address matching with
+ *					call user data
  */
 
 #ifndef	X25_KERNEL_H
@@ -16,6 +18,7 @@
 #define	SIOCX25GCALLUSERDATA	(SIOCPROTOPRIVATE + 4)
 #define	SIOCX25SCALLUSERDATA	(SIOCPROTOPRIVATE + 5)
 #define	SIOCX25GCAUSEDIAG	(SIOCPROTOPRIVATE + 6)
+#define SIOCX25SCUDMATCHLEN	(SIOCPROTOPRIVATE + 7)
 
 /*
  *	Values for {get,set}sockopt.
@@ -109,4 +112,11 @@ struct x25_causediag {
 	unsigned char	diagnostic;
 };
 
+/*
+ *	Further optional call user data match length selection
+ */
+struct x25_subaddr {
+	unsigned int cudmatchlength;
+};
+
 #endif
-- 
cgit v1.2.2


From ebc3f64b864fc16a594c2e63bf55a55c7d42084b Mon Sep 17 00:00:00 2001
From: Shaun Pereira <spereira@tusc.com.au>
Date: Wed, 22 Jun 2005 22:16:17 -0700
Subject: [X25]: Fast select with no restriction on response

This patch is a follow up to patch 1 regarding "Selective Sub Address
matching with call user data".  It allows use of the Fast-Select-Acceptance
optional user facility for X.25.

This patch just implements fast select with no restriction on response
(NRR).  What this means (according to ITU-T Recomendation 10/96 section
6.16) is that if in an incoming call packet, the relevant facility bits are
set for fast-select-NRR, then the called DTE can issue a direct response to
the incoming packet using a call-accepted packet that contains
call-user-data.  This patch allows such a response.

The called DTE can also respond with a clear-request packet that contains
call-user-data.  However, this feature is currently not implemented by the
patch.

How is Fast Select Acceptance used?
By default, the system does not allow fast select acceptance (as before).
To enable a response to fast select acceptance,
After a listen socket in created and bound as follows
	socket(AF_X25, SOCK_SEQPACKET, 0);
	bind(call_soc, (struct sockaddr *)&locl_addr, sizeof(locl_addr));
but before a listen system call is made, the following ioctl should be used.
	ioctl(call_soc,SIOCX25CALLACCPTAPPRV);
Now the listen system call can be made
	listen(call_soc, 4);
After this, an incoming-call packet will be accepted, but no call-accepted
packet will be sent back until the following system call is made on the socket
that accepts the call
	ioctl(vc_soc,SIOCX25SENDCALLACCPT);
The network (or cisco xot router used for testing here) will allow the
application server's call-user-data in the call-accepted packet,
provided the call-request was made with Fast-select NRR.

Signed-off-by: Shaun Pereira <spereira@tusc.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/x25.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/x25.h b/include/linux/x25.h
index 6f43b3d20248..16d44931afa0 100644
--- a/include/linux/x25.h
+++ b/include/linux/x25.h
@@ -19,6 +19,8 @@
 #define	SIOCX25SCALLUSERDATA	(SIOCPROTOPRIVATE + 5)
 #define	SIOCX25GCAUSEDIAG	(SIOCPROTOPRIVATE + 6)
 #define SIOCX25SCUDMATCHLEN	(SIOCPROTOPRIVATE + 7)
+#define SIOCX25CALLACCPTAPPRV   (SIOCPROTOPRIVATE + 8)
+#define SIOCX25SENDCALLACCPT    (SIOCPROTOPRIVATE + 9)
 
 /*
  *	Values for {get,set}sockopt.
-- 
cgit v1.2.2


From 408fde81c1bff15c875a3618481e93a01dcc79ea Mon Sep 17 00:00:00 2001
From: Dave Hansen <haveblue@us.ibm.com>
Date: Thu, 23 Jun 2005 00:07:37 -0700
Subject: [PATCH] remove non-DISCONTIG use of pgdat->node_mem_map

This patch effectively eliminates direct use of pgdat->node_mem_map outside
of the DISCONTIG code.  On a flat memory system, these fields aren't
currently used, neither are they on a sparsemem system.

There was also a node_mem_map(nid) macro on many architectures.  Its use
along with the use of ->node_mem_map itself was not consistent.  It has
been removed in favor of two new, more explicit, arch-independent macros:

	pgdat_page_nr(pgdat, pagenr)
	nid_page_nr(nid, pagenr)

I called them "pgdat" and "nid" because we overload the term "node" to mean
"NUMA node", "DISCONTIG node" or "pg_data_t" in very confusing ways.  I
believe the newer names are much clearer.

These macros can be overridden in the sparsemem case with a theoretically
slower operation using node_start_pfn and pfn_to_page(), instead.  We could
make this the only behavior if people want, but I don't want to change too
much at once.  One thing at a time.

This patch removes more code than it adds.

Compile tested on alpha, alpha discontig, arm, arm-discontig, i386, i386
generic, NUMAQ, Summit, ppc64, ppc64 discontig, and x86_64.  Full list
here: http://sr71.net/patches/2.6.12/2.6.12-rc1-mhp2/configs/

Boot tested on NUMAQ, x86 SMP and ppc64 power4/5 LPARs.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Martin J. Bligh <mbligh@aracnet.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4733d35d8223..b79633d3a97b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -284,6 +284,8 @@ typedef struct pglist_data {
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
 #define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
+#define pgdat_page_nr(pgdat, pagenr)	((pgdat)->node_mem_map + (pagenr))
+#define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
 extern struct pglist_data *pgdat_list;
 
-- 
cgit v1.2.2


From 6f167ec721108c9282d54424516a12c805e3c306 Mon Sep 17 00:00:00 2001
From: Dave Hansen <haveblue@us.ibm.com>
Date: Thu, 23 Jun 2005 00:07:39 -0700
Subject: [PATCH] sparsemem base: simple NUMA remap space allocator

Introduce a simple allocator for the NUMA remap space.  This space is very
scarce, used for structures which are best allocated node local.

This mechanism is also used on non-NUMA ia64 systems with a vmem_map to keep
the pgdat->node_mem_map initialized in a consistent place for all
architectures.

Issues:
o alloc_remap takes a node_id where we might expect a pgdat which was intended
  to allow us to allocate the pgdat's using this mechanism; which we do not yet
  do.  Could have alloc_remap_node() and alloc_remap_nid() for this purpose.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 0dd8ca1a3d5a..500f451ce0c0 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size,
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
+#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
+extern void *alloc_remap(int nid, unsigned long size);
+#else
+static inline void *alloc_remap(int nid, unsigned long size)
+{
+	return NULL;
+}
+#endif
+
 extern unsigned long __initdata nr_kernel_pages;
 extern unsigned long __initdata nr_all_pages;
 
-- 
cgit v1.2.2


From 348f8b6c4837a07304d2f72b11ce8d96588065e0 Mon Sep 17 00:00:00 2001
From: Dave Hansen <haveblue@us.ibm.com>
Date: Thu, 23 Jun 2005 00:07:40 -0700
Subject: [PATCH] sparsemem base: reorganize page->flags bit operations

Generify the value fields in the page_flags.  The aim is to allow the location
and size of these fields to be varied.  Additionally we want to move away from
fixed allocations per field whilst still enforcing the overall bit utilisation
limits.  We rely on the compiler to spot and optimise the accessor functions.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     | 53 +++++++++++++++++++++++++++++++++++++++++---------
 include/linux/mmzone.h | 19 +++++++-----------
 2 files changed, 51 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1813b162b0a8..57b2ead51dba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -395,19 +395,41 @@ static inline void put_page(struct page *page)
 /*
  * The zone field is never updated after free_area_init_core()
  * sets it, so none of the operations on it need to be atomic.
- * We'll have up to (MAX_NUMNODES * MAX_NR_ZONES) zones total,
- * so we use (MAX_NODES_SHIFT + MAX_ZONES_SHIFT) here to get enough bits.
  */
-#define NODEZONE_SHIFT (sizeof(page_flags_t)*8 - MAX_NODES_SHIFT - MAX_ZONES_SHIFT)
+
+/* Page flags: | NODE | ZONE | ... | FLAGS | */
+#define NODES_PGOFF		((sizeof(page_flags_t)*8) - NODES_SHIFT)
+#define ZONES_PGOFF		(NODES_PGOFF - ZONES_SHIFT)
+
+/*
+ * Define the bit shifts to access each section.  For non-existant
+ * sections we define the shift as 0; that plus a 0 mask ensures
+ * the compiler will optimise away reference to them.
+ */
+#define NODES_PGSHIFT		(NODES_PGOFF * (NODES_SHIFT != 0))
+#define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_SHIFT != 0))
+
+/* NODE:ZONE is used to lookup the zone from a page. */
+#define ZONETABLE_SHIFT		(NODES_SHIFT + ZONES_SHIFT)
+#define ZONETABLE_PGSHIFT	ZONES_PGSHIFT
+
+#if NODES_SHIFT+ZONES_SHIFT > FLAGS_RESERVED
+#error NODES_SHIFT+ZONES_SHIFT > FLAGS_RESERVED
+#endif
+
 #define NODEZONE(node, zone)	((node << ZONES_SHIFT) | zone)
 
+#define ZONES_MASK		((1UL << ZONES_SHIFT) - 1)
+#define NODES_MASK		((1UL << NODES_SHIFT) - 1)
+#define ZONETABLE_MASK		((1UL << ZONETABLE_SHIFT) - 1)
+
 static inline unsigned long page_zonenum(struct page *page)
 {
-	return (page->flags >> NODEZONE_SHIFT) & (~(~0UL << ZONES_SHIFT));
+	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
 static inline unsigned long page_to_nid(struct page *page)
 {
-	return (page->flags >> (NODEZONE_SHIFT + ZONES_SHIFT));
+	return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
 }
 
 struct zone;
@@ -415,13 +437,26 @@ extern struct zone *zone_table[];
 
 static inline struct zone *page_zone(struct page *page)
 {
-	return zone_table[page->flags >> NODEZONE_SHIFT];
+	return zone_table[(page->flags >> ZONETABLE_PGSHIFT) &
+			ZONETABLE_MASK];
+}
+
+static inline void set_page_zone(struct page *page, unsigned long zone)
+{
+	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
+	page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
+}
+static inline void set_page_node(struct page *page, unsigned long node)
+{
+	page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
+	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
 }
 
-static inline void set_page_zone(struct page *page, unsigned long nodezone_num)
+static inline void set_page_links(struct page *page, unsigned long zone,
+	unsigned long node)
 {
-	page->flags &= ~(~0UL << NODEZONE_SHIFT);
-	page->flags |= nodezone_num << NODEZONE_SHIFT;
+	set_page_zone(page, zone);
+	set_page_node(page, node);
 }
 
 #ifndef CONFIG_DISCONTIGMEM
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b79633d3a97b..39e912708e2a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -414,30 +414,25 @@ extern struct pglist_data contig_page_data;
 
 #include <asm/mmzone.h>
 
+#endif /* !CONFIG_DISCONTIGMEM */
+
 #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
 /*
  * with 32 bit page->flags field, we reserve 8 bits for node/zone info.
  * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes.
  */
-#define MAX_NODES_SHIFT		6
+#define FLAGS_RESERVED		8
+
 #elif BITS_PER_LONG == 64
 /*
  * with 64 bit flags field, there's plenty of room.
  */
-#define MAX_NODES_SHIFT		10
-#endif
+#define FLAGS_RESERVED		32
 
-#endif /* !CONFIG_DISCONTIGMEM */
-
-#if NODES_SHIFT > MAX_NODES_SHIFT
-#error NODES_SHIFT > MAX_NODES_SHIFT
-#endif
+#else
 
-/* There are currently 3 zones: DMA, Normal & Highmem, thus we need 2 bits */
-#define MAX_ZONES_SHIFT		2
+#error BITS_PER_LONG not defined
 
-#if ZONES_SHIFT > MAX_ZONES_SHIFT
-#error ZONES_SHIFT > MAX_ZONES_SHIFT
 #endif
 
 #endif /* !__ASSEMBLY__ */
-- 
cgit v1.2.2


From 93b7504e3e6c1d98586854806e51bea329ea3aa9 Mon Sep 17 00:00:00 2001
From: Dave Hansen <haveblue@us.ibm.com>
Date: Thu, 23 Jun 2005 00:07:47 -0700
Subject: [PATCH] Introduce new Kconfig option for NUMA or DISCONTIG

There is some confusion that arose when working on SPARSEMEM patch between
what is needed for DISCONTIG vs. NUMA.

Multiple pg_data_t's are needed for DISCONTIGMEM or NUMA, independently.
All of the current NUMA implementations require an implementation of
DISCONTIG.  Because of this, quite a lot of code which is really needed for
NUMA is actually under DISCONTIG #ifdefs.  For SPARSEMEM, we changed some
of these #ifdefs to CONFIG_NUMA, but that broke the DISCONTIG=y and NUMA=n
case.

Introducing this new NEED_MULTIPLE_NODES config option allows code that is
needed for both NUMA or DISCONTIG to be separated out from code that is
specific to DISCONTIG.

One great advantage of this approach is that it doesn't require every
architecture to be converted over.  All of the current implementations
should "just work", only the ones implementing SPARSEMEM will have to be
fixed up.

The change to free_area_init() makes it work inside, or out of the new
config option.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 39e912708e2a..95f4a780ea66 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -402,7 +402,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
 /* Returns the number of the current Node. */
 #define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
 
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_NEED_MULTIPLE_NODES
 
 extern struct pglist_data contig_page_data;
 #define NODE_DATA(nid)		(&contig_page_data)
@@ -410,11 +410,11 @@ extern struct pglist_data contig_page_data;
 #define MAX_NODES_SHIFT		1
 #define pfn_to_nid(pfn)		(0)
 
-#else /* CONFIG_DISCONTIGMEM */
+#else /* CONFIG_NEED_MULTIPLE_NODES */
 
 #include <asm/mmzone.h>
 
-#endif /* !CONFIG_DISCONTIGMEM */
+#endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
 #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
 /*
-- 
cgit v1.2.2


From b159d43fbf7eaaac6ecc647f51cf4257332db47b Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Thu, 23 Jun 2005 00:07:52 -0700
Subject: [PATCH] generify early_pfn_to_nid

Provide a default implementation for early_pfn_to_nid returning node 0.  Allow
architectures to override this with their own implementation out of
asm/mmzone.h.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Martin Bligh <mbligh@aracnet.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 95f4a780ea66..6ef07de98d69 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -435,6 +435,10 @@ extern struct pglist_data contig_page_data;
 
 #endif
 
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+#define early_pfn_to_nid(nid)  (0UL)
+#endif
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MMZONE_H */
-- 
cgit v1.2.2


From d41dee369bff3b9dcb6328d4d822926c28cc2594 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Thu, 23 Jun 2005 00:07:54 -0700
Subject: [PATCH] sparsemem memory model

Sparsemem abstracts the use of discontiguous mem_maps[].  This kind of
mem_map[] is needed by discontiguous memory machines (like in the old
CONFIG_DISCONTIGMEM case) as well as memory hotplug systems.  Sparsemem
replaces DISCONTIGMEM when enabled, and it is hoped that it can eventually
become a complete replacement.

A significant advantage over DISCONTIGMEM is that it's completely separated
from CONFIG_NUMA.  When producing this patch, it became apparent in that NUMA
and DISCONTIG are often confused.

Another advantage is that sparse doesn't require each NUMA node's ranges to be
contiguous.  It can handle overlapping ranges between nodes with no problems,
where DISCONTIGMEM currently throws away that memory.

Sparsemem uses an array to provide different pfn_to_page() translations for
each SECTION_SIZE area of physical memory.  This is what allows the mem_map[]
to be chopped up.

In order to do quick pfn_to_page() operations, the section number of the page
is encoded in page->flags.  Part of the sparsemem infrastructure enables
sharing of these bits more dynamically (at compile-time) between the
page_zone() and sparsemem operations.  However, on 32-bit architectures, the
number of bits is quite limited, and may require growing the size of the
page->flags type in certain conditions.  Several things might force this to
occur: a decrease in the SECTION_SIZE (if you want to hotplug smaller areas of
memory), an increase in the physical address space, or an increase in the
number of used page->flags.

One thing to note is that, once sparsemem is present, the NUMA node
information no longer needs to be stored in the page->flags.  It might provide
speed increases on certain platforms and will be stored there if there is
room.  But, if out of room, an alternate (theoretically slower) mechanism is
used.

This patch introduces CONFIG_FLATMEM.  It is used in almost all cases where
there used to be an #ifndef DISCONTIG, because SPARSEMEM and DISCONTIGMEM
often have to compile out the same areas of code.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Martin Bligh <mbligh@aracnet.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h     | 92 ++++++++++++++++++++++++++++++++++++++---------
 include/linux/mmzone.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/numa.h   |  2 +-
 3 files changed, 172 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 57b2ead51dba..6eb7f48317f8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -397,40 +397,80 @@ static inline void put_page(struct page *page)
  * sets it, so none of the operations on it need to be atomic.
  */
 
-/* Page flags: | NODE | ZONE | ... | FLAGS | */
-#define NODES_PGOFF		((sizeof(page_flags_t)*8) - NODES_SHIFT)
-#define ZONES_PGOFF		(NODES_PGOFF - ZONES_SHIFT)
+
+/*
+ * page->flags layout:
+ *
+ * There are three possibilities for how page->flags get
+ * laid out.  The first is for the normal case, without
+ * sparsemem.  The second is for sparsemem when there is
+ * plenty of space for node and section.  The last is when
+ * we have run out of space and have to fall back to an
+ * alternate (slower) way of determining the node.
+ *
+ *        No sparsemem: |       NODE     | ZONE | ... | FLAGS |
+ * with space for node: | SECTION | NODE | ZONE | ... | FLAGS |
+ *   no space for node: | SECTION |     ZONE    | ... | FLAGS |
+ */
+#ifdef CONFIG_SPARSEMEM
+#define SECTIONS_WIDTH		SECTIONS_SHIFT
+#else
+#define SECTIONS_WIDTH		0
+#endif
+
+#define ZONES_WIDTH		ZONES_SHIFT
+
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= FLAGS_RESERVED
+#define NODES_WIDTH		NODES_SHIFT
+#else
+#define NODES_WIDTH		0
+#endif
+
+/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
+#define SECTIONS_PGOFF		((sizeof(page_flags_t)*8) - SECTIONS_WIDTH)
+#define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
+#define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
+
+/*
+ * We are going to use the flags for the page to node mapping if its in
+ * there.  This includes the case where there is no node, so it is implicit.
+ */
+#define FLAGS_HAS_NODE		(NODES_WIDTH > 0 || NODES_SHIFT == 0)
+
+#ifndef PFN_SECTION_SHIFT
+#define PFN_SECTION_SHIFT 0
+#endif
 
 /*
  * Define the bit shifts to access each section.  For non-existant
  * sections we define the shift as 0; that plus a 0 mask ensures
  * the compiler will optimise away reference to them.
  */
-#define NODES_PGSHIFT		(NODES_PGOFF * (NODES_SHIFT != 0))
-#define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_SHIFT != 0))
+#define SECTIONS_PGSHIFT	(SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
+#define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
+#define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
 
-/* NODE:ZONE is used to lookup the zone from a page. */
+/* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */
+#if FLAGS_HAS_NODE
 #define ZONETABLE_SHIFT		(NODES_SHIFT + ZONES_SHIFT)
+#else
+#define ZONETABLE_SHIFT		(SECTIONS_SHIFT + ZONES_SHIFT)
+#endif
 #define ZONETABLE_PGSHIFT	ZONES_PGSHIFT
 
-#if NODES_SHIFT+ZONES_SHIFT > FLAGS_RESERVED
-#error NODES_SHIFT+ZONES_SHIFT > FLAGS_RESERVED
+#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
+#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
 #endif
 
-#define NODEZONE(node, zone)	((node << ZONES_SHIFT) | zone)
-
-#define ZONES_MASK		((1UL << ZONES_SHIFT) - 1)
-#define NODES_MASK		((1UL << NODES_SHIFT) - 1)
+#define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)
+#define NODES_MASK		((1UL << NODES_WIDTH) - 1)
+#define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
 #define ZONETABLE_MASK		((1UL << ZONETABLE_SHIFT) - 1)
 
 static inline unsigned long page_zonenum(struct page *page)
 {
 	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
-static inline unsigned long page_to_nid(struct page *page)
-{
-	return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
-}
 
 struct zone;
 extern struct zone *zone_table[];
@@ -441,6 +481,18 @@ static inline struct zone *page_zone(struct page *page)
 			ZONETABLE_MASK];
 }
 
+static inline unsigned long page_to_nid(struct page *page)
+{
+	if (FLAGS_HAS_NODE)
+		return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
+	else
+		return page_zone(page)->zone_pgdat->node_id;
+}
+static inline unsigned long page_to_section(struct page *page)
+{
+	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
+}
+
 static inline void set_page_zone(struct page *page, unsigned long zone)
 {
 	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
@@ -451,12 +503,18 @@ static inline void set_page_node(struct page *page, unsigned long node)
 	page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
 	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
 }
+static inline void set_page_section(struct page *page, unsigned long section)
+{
+	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
+	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
+}
 
 static inline void set_page_links(struct page *page, unsigned long zone,
-	unsigned long node)
+	unsigned long node, unsigned long pfn)
 {
 	set_page_zone(page, zone);
 	set_page_node(page, node);
+	set_page_section(page, pfn_to_section_nr(pfn));
 }
 
 #ifndef CONFIG_DISCONTIGMEM
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6ef07de98d69..19860d317ec2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -269,7 +269,9 @@ typedef struct pglist_data {
 	struct zone node_zones[MAX_NR_ZONES];
 	struct zonelist node_zonelists[GFP_ZONETYPES];
 	int nr_zones;
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
 	struct page *node_mem_map;
+#endif
 	struct bootmem_data *bdata;
 	unsigned long node_start_pfn;
 	unsigned long node_present_pages; /* total number of physical pages */
@@ -284,7 +286,11 @@ typedef struct pglist_data {
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
 #define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
 #define pgdat_page_nr(pgdat, pagenr)	((pgdat)->node_mem_map + (pagenr))
+#else
+#define pgdat_page_nr(pgdat, pagenr)	pfn_to_page((pgdat)->node_start_pfn + (pagenr))
+#endif
 #define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
 extern struct pglist_data *pgdat_list;
@@ -416,6 +422,10 @@ extern struct pglist_data contig_page_data;
 
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
+#ifdef CONFIG_SPARSEMEM
+#include <asm/sparsemem.h>
+#endif
+
 #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED)
 /*
  * with 32 bit page->flags field, we reserve 8 bits for node/zone info.
@@ -439,6 +449,92 @@ extern struct pglist_data contig_page_data;
 #define early_pfn_to_nid(nid)  (0UL)
 #endif
 
+#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
+#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+
+#ifdef CONFIG_SPARSEMEM
+
+/*
+ * SECTION_SHIFT    		#bits space required to store a section #
+ *
+ * PA_SECTION_SHIFT		physical address to/from section number
+ * PFN_SECTION_SHIFT		pfn to/from section number
+ */
+#define SECTIONS_SHIFT		(MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
+
+#define PA_SECTION_SHIFT	(SECTION_SIZE_BITS)
+#define PFN_SECTION_SHIFT	(SECTION_SIZE_BITS - PAGE_SHIFT)
+
+#define NR_MEM_SECTIONS		(1UL << SECTIONS_SHIFT)
+
+#define PAGES_PER_SECTION       (1UL << PFN_SECTION_SHIFT)
+#define PAGE_SECTION_MASK	(~(PAGES_PER_SECTION-1))
+
+#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
+#error Allocator MAX_ORDER exceeds SECTION_SIZE
+#endif
+
+struct page;
+struct mem_section {
+	struct page *section_mem_map;
+};
+
+extern struct mem_section mem_section[NR_MEM_SECTIONS];
+
+/*
+ * Given a kernel address, find the home node of the underlying memory.
+ */
+#define kvaddr_to_nid(kaddr)	pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT)
+
+static inline struct mem_section *__pfn_to_section(unsigned long pfn)
+{
+	return &mem_section[pfn_to_section_nr(pfn)];
+}
+
+#define pfn_to_page(pfn) 						\
+({ 									\
+	unsigned long __pfn = (pfn);					\
+	__pfn_to_section(__pfn)->section_mem_map + __pfn;		\
+})
+#define page_to_pfn(page)						\
+({									\
+	page - mem_section[page_to_section(page)].section_mem_map;	\
+})
+
+static inline int pfn_valid(unsigned long pfn)
+{
+	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
+		return 0;
+	return mem_section[pfn_to_section_nr(pfn)].section_mem_map != 0;
+}
+
+/*
+ * These are _only_ used during initialisation, therefore they
+ * can use __initdata ...  They could have names to indicate
+ * this restriction.
+ */
+#ifdef CONFIG_NUMA
+#define pfn_to_nid		early_pfn_to_nid
+#endif
+
+#define pfn_to_pgdat(pfn)						\
+({									\
+	NODE_DATA(pfn_to_nid(pfn));					\
+})
+
+#define early_pfn_valid(pfn)	pfn_valid(pfn)
+void sparse_init(void);
+#else
+#define sparse_init()	do {} while (0)
+#endif /* CONFIG_SPARSEMEM */
+
+#ifndef early_pfn_valid
+#define early_pfn_valid(pfn)	(1)
+#endif
+
+void memory_present(int nid, unsigned long start, unsigned long end);
+unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MMZONE_H */
diff --git a/include/linux/numa.h b/include/linux/numa.h
index bd0c8c4e9a95..f0c539bd3cfc 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -3,7 +3,7 @@
 
 #include <linux/config.h>
 
-#ifdef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_FLATMEM
 #include <asm/numnodes.h>
 #endif
 
-- 
cgit v1.2.2


From 641c767389b19859a45e6de46d8e18cd935bdb60 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Thu, 23 Jun 2005 00:07:59 -0700
Subject: [PATCH] sparsemem swiss cheese numa layouts

The part of the sparsemem patch which modifies memmap_init_zone() has recently
become a problem.  It changes behavior so that there is a call to
pfn_to_page() for each individual page inside of a node's range:
node_start_pfn through node_end_pfn.  It used to simply do this once, at the
beginning of the node, but having sparsemem's non-contiguous mem_map[]s inside
of a node made it necessary to change.

Mike Kravetz recently wrote a patch which made the NUMA code accept some new
kinds of layouts.  The system's memory was laid out like this, with node 0's
memory in two pieces: one before and one after node 1's memory:

	Node 0: +++++     +++++
	Node 1:      +++++

Previous behavior before Mike's patch was to assign nodes like this:

	Node 0: 00000     XXXXX
	Node 1:      11111

Where the 'X' areas were simply thrown away.  The new behavior was to make the
pg_data_t span node 0 across all of its areas, including areas that are really
node 1's: Node 0: 000000000000000 Node 1: 11111

This wastes a little bit of mem_map space, but ends up being OK, and more
fully utilizes the system's memory.  memmap_init_zone() initializes all of the
"struct page"s for node 0, even for the "hole", but those never get used,
because there is no pfn_to_page() that resolves to those pages.  However, only
calling pfn_to_page() once, memmap_init_zone() always uses the pages that were
allocated for node0->node_mem_map because:

	struct page *start = pfn_to_page(start_pfn);
	// effectively start = &node->node_mem_map[0]
	for (page = start; page < (start + size); page++) {
		init_page_here();...
		page++;
	}

Slow, and wasteful, but generally harmless.

But, modify that to call pfn_to_page() for each loop iteration (like sparsemem
does):

	for (pfn = start_pfn; pfn < < (start_pfn + size); pfn++++) {
		page = pfn_to_page(pfn);
	}

And you end up trying to initialize node 1's pages too early, along with bogus
data from node 0.  This patch checks for those weird layouts and declines to
touch the pages, making the more frequent pfn_to_page() calls OK to do.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 19860d317ec2..746b57e3d370 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -528,6 +528,12 @@ void sparse_init(void);
 #define sparse_init()	do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
+#ifdef CONFIG_NODES_SPAN_OTHER_NODES
+#define early_pfn_in_nid(pfn, nid)	(early_pfn_to_nid(pfn) == (nid))
+#else
+#define early_pfn_in_nid(pfn, nid)	(1)
+#endif
+
 #ifndef early_pfn_valid
 #define early_pfn_valid(pfn)	(1)
 #endif
-- 
cgit v1.2.2


From 29751f6991e845f7d002a6ae520bf996b38c8dcd Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Thu, 23 Jun 2005 00:08:00 -0700
Subject: [PATCH] sparsemem hotplug base

Make sparse's initalization be accessible at runtime.  This allows sparse
mappings to be created after boot in a hotplug situation.

This patch is separated from the previous one just to give an indication how
much of the sparse infrastructure is *just* for hotplug memory.

The section_mem_map doesn't really store a pointer.  It stores something that
is convenient to do some math against to get a pointer.  It isn't valid to
just do *section_mem_map, so I don't think it should be stored as a pointer.

There are a couple of things I'd like to store about a section.  First of all,
the fact that it is !NULL does not mean that it is present.  There could be
such a combination where section_mem_map *is* NULL, but the math gets you
properly to a real mem_map.  So, I don't think that check is safe.

Since we're storing 32-bit-aligned structures, we have a few bits in the
bottom of the pointer to play with.  Use one bit to encode whether there's
really a mem_map there, and the other one to tell whether there's a valid
section there.  We need to distinguish between the two because sometimes
there's a gap between when a section is discovered to be present and when we
can get the mem_map for it.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 56 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 746b57e3d370..6c90461ed99f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -476,11 +476,56 @@ extern struct pglist_data contig_page_data;
 
 struct page;
 struct mem_section {
-	struct page *section_mem_map;
+	/*
+	 * This is, logically, a pointer to an array of struct
+	 * pages.  However, it is stored with some other magic.
+	 * (see sparse.c::sparse_init_one_section())
+	 *
+	 * Making it a UL at least makes someone do a cast
+	 * before using it wrong.
+	 */
+	unsigned long section_mem_map;
 };
 
 extern struct mem_section mem_section[NR_MEM_SECTIONS];
 
+static inline struct mem_section *__nr_to_section(unsigned long nr)
+{
+	return &mem_section[nr];
+}
+
+/*
+ * We use the lower bits of the mem_map pointer to store
+ * a little bit of information.  There should be at least
+ * 3 bits here due to 32-bit alignment.
+ */
+#define	SECTION_MARKED_PRESENT	(1UL<<0)
+#define SECTION_HAS_MEM_MAP	(1UL<<1)
+#define SECTION_MAP_LAST_BIT	(1UL<<2)
+#define SECTION_MAP_MASK	(~(SECTION_MAP_LAST_BIT-1))
+
+static inline struct page *__section_mem_map_addr(struct mem_section *section)
+{
+	unsigned long map = section->section_mem_map;
+	map &= SECTION_MAP_MASK;
+	return (struct page *)map;
+}
+
+static inline int valid_section(struct mem_section *section)
+{
+	return (section->section_mem_map & SECTION_MARKED_PRESENT);
+}
+
+static inline int section_has_mem_map(struct mem_section *section)
+{
+	return (section->section_mem_map & SECTION_HAS_MEM_MAP);
+}
+
+static inline int valid_section_nr(unsigned long nr)
+{
+	return valid_section(__nr_to_section(nr));
+}
+
 /*
  * Given a kernel address, find the home node of the underlying memory.
  */
@@ -488,24 +533,25 @@ extern struct mem_section mem_section[NR_MEM_SECTIONS];
 
 static inline struct mem_section *__pfn_to_section(unsigned long pfn)
 {
-	return &mem_section[pfn_to_section_nr(pfn)];
+	return __nr_to_section(pfn_to_section_nr(pfn));
 }
 
 #define pfn_to_page(pfn) 						\
 ({ 									\
 	unsigned long __pfn = (pfn);					\
-	__pfn_to_section(__pfn)->section_mem_map + __pfn;		\
+	__section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn;	\
 })
 #define page_to_pfn(page)						\
 ({									\
-	page - mem_section[page_to_section(page)].section_mem_map;	\
+	page - __section_mem_map_addr(__nr_to_section(			\
+		page_to_section(page)));				\
 })
 
 static inline int pfn_valid(unsigned long pfn)
 {
 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
 		return 0;
-	return mem_section[pfn_to_section_nr(pfn)].section_mem_map != 0;
+	return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
 }
 
 /*
-- 
cgit v1.2.2


From 1946089a109251655c5438d92c539bd2930e71ea Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@lameter.com>
Date: Thu, 23 Jun 2005 00:08:19 -0700
Subject: [PATCH] NUMA aware block device control structure allocation

Patch to allocate the control structures for for ide devices on the node of
the device itself (for NUMA systems).  The patch depends on the Slab API
change patch by Manfred and me (in mm) and the pcidev_to_node patch that I
posted today.

Does some realignment too.

Signed-off-by: Justin M. Forbes <jmforbes@linuxtx.org>
Signed-off-by: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Pravin Shelar <pravin@calsoftinc.com>
Signed-off-by: Shobhit Dayal <shobhit@calsoftinc.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/blkdev.h  |  6 +++++-
 include/linux/genhd.h   |  1 +
 include/linux/ide.h     |  2 +-
 include/linux/mempool.h | 11 ++++++++---
 4 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4a99b76c5a33..235c3414d268 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -396,6 +396,7 @@ struct request_queue
 	 */
 	unsigned int		sg_timeout;
 	unsigned int		sg_reserved_size;
+	int			node;
 
 	struct list_head	drain_list;
 
@@ -615,6 +616,8 @@ static inline void blkdev_dequeue_request(struct request *req)
 /*
  * Access functions for manipulating queue properties
  */
+extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn,
+					spinlock_t *lock, int node_id);
 extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern void blk_cleanup_queue(request_queue_t *);
 extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
@@ -646,7 +649,8 @@ extern void blk_wait_queue_drained(request_queue_t *, int);
 extern void blk_finish_queue_drain(request_queue_t *);
 
 int blk_get_queue(request_queue_t *);
-request_queue_t *blk_alloc_queue(int);
+request_queue_t *blk_alloc_queue(int gfp_mask);
+request_queue_t *blk_alloc_queue_node(int,int);
 #define blk_put_queue(q) blk_cleanup_queue((q))
 
 /*
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 47dedaf971d6..af26dc718ef6 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -403,6 +403,7 @@ extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
 extern void add_partition(struct gendisk *, int, sector_t, sector_t);
 extern void delete_partition(struct gendisk *, int);
 
+extern struct gendisk *alloc_disk_node(int minors, int node_id);
 extern struct gendisk *alloc_disk(int minors);
 extern struct kobject *get_disk(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 336d6e509f59..92129078d4f3 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -917,7 +917,7 @@ typedef struct hwif_s {
 	unsigned dma;
 
 	void (*led_act)(void *data, int rw);
-} ide_hwif_t;
+} ____cacheline_maxaligned_in_smp ide_hwif_t;
 
 /*
  *  internal ide interrupt handler type
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 4a36edf1c974..796220ce47cc 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -20,9 +20,14 @@ typedef struct mempool_s {
 	mempool_free_t *free;
 	wait_queue_head_t wait;
 } mempool_t;
-extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-				 mempool_free_t *free_fn, void *pool_data);
-extern int mempool_resize(mempool_t *pool, int new_min_nr, unsigned int __nocast gfp_mask);
+
+extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+			mempool_free_t *free_fn, void *pool_data);
+extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
+			mempool_free_t *free_fn, void *pool_data, int nid);
+
+extern int mempool_resize(mempool_t *pool, int new_min_nr,
+			unsigned int __nocast gfp_mask);
 extern void mempool_destroy(mempool_t *pool);
 extern void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask);
 extern void mempool_free(void *element, mempool_t *pool);
-- 
cgit v1.2.2


From fa72b903f75e4f0f0b2c2feed093005167da4023 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 23 Jun 2005 00:08:49 -0700
Subject: [PATCH] blk: remove blk_queue_tag->real_max_depth optimization

blk_queue_tag->real_max_depth was used to optimize out unnecessary
allocations/frees on tag resize.  However, the whole thing was very broken -
tag_map was never allocated to real_max_depth resulting in access beyond the
end of the map, bits in [max_depth..real_max_depth] were set when initializing
a map and copied when resizing resulting in pre-occupied tags.

As the gain of the optimization is very small, well, almost nill, remove the
whole thing.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 235c3414d268..8d7e2f4151d0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -294,7 +294,6 @@ struct blk_queue_tag {
 	struct list_head busy_list;	/* fifo list of busy tags */
 	int busy;			/* current depth */
 	int max_depth;			/* what we will send to device */
-	int real_max_depth;		/* what the array can hold */
 	atomic_t refcnt;		/* map can be shared */
 };
 
-- 
cgit v1.2.2


From f7d37d028dfba90b1b747f8ac685bf0959aeda8b Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 23 Jun 2005 00:08:50 -0700
Subject: [PATCH] blk: remove BLK_TAGS_{PER_LONG|MASK}

Replace BLK_TAGS_PER_LONG with BITS_PER_LONG and remove unused BLK_TAGS_MASK.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/blkdev.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8d7e2f4151d0..60272141ff19 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -285,9 +285,6 @@ enum blk_queue_state {
 	Queue_up,
 };
 
-#define BLK_TAGS_PER_LONG	(sizeof(unsigned long) * 8)
-#define BLK_TAGS_MASK		(BLK_TAGS_PER_LONG - 1)
-
 struct blk_queue_tag {
 	struct request **tag_index;	/* map of busy tags */
 	unsigned long *tag_map;		/* bit map of free/busy tags */
-- 
cgit v1.2.2


From 55c888d6d09a0df236adfaf8ccf06ff5d0646775 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 23 Jun 2005 00:08:56 -0700
Subject: [PATCH] timers fixes/improvements

This patch tries to solve following problems:

1. del_timer_sync() is racy. The timer can be fired again after
   del_timer_sync have checked all cpus and before it will recheck
   timer_pending().

2. It has scalability problems. All cpus are scanned to determine
   if the timer is running on that cpu.

   With this patch del_timer_sync is O(1) and no slower than plain
   del_timer(pending_timer), unless it has to actually wait for
   completion of the currently running timer.

   The only restriction is that the recurring timer should not use
   add_timer_on().

3. The timers are not serialized wrt to itself.

   If CPU_0 does mod_timer(jiffies+1) while the timer is currently
   running on CPU 1, it is quite possible that local interrupt on
   CPU_0 will start that timer before it finished on CPU_1.

4. The timers locking is suboptimal. __mod_timer() takes 3 locks
   at once and still requires wmb() in del_timer/run_timers.

   The new implementation takes 2 locks sequentially and does not
   need memory barriers.

Currently ->base != NULL means that the timer is pending. In that case
->base.lock is used to lock the timer. __mod_timer also takes timer->lock
because ->base can be == NULL.

This patch uses timer->entry.next != NULL as indication that the timer is
pending. So it does __list_del(), entry->next = NULL instead of list_del()
when the timer is deleted.

The ->base field is used for hashed locking only, it is initialized
in init_timer() which sets ->base = per_cpu(tvec_bases). When the
tvec_bases.lock is locked, it means that all timers which are tied
to this base via timer->base are locked, and the base itself is locked
too.

So __run_timers/migrate_timers can safely modify all timers which could
be found on ->tvX lists (pending timers).

When the timer's base is locked, and the timer removed from ->entry list
(which means that _run_timers/migrate_timers can't see this timer), it is
possible to set timer->base = NULL and drop the lock: the timer remains
locked.

This patch adds lock_timer_base() helper, which waits for ->base != NULL,
locks the ->base, and checks it is still the same.

__mod_timer() schedules the timer on the local CPU and changes it's base.
However, it does not lock both old and new bases at once. It locks the
timer via lock_timer_base(), deletes the timer, sets ->base = NULL, and
unlocks old base. Then __mod_timer() locks new_base, sets ->base = new_base,
and adds this timer. This simplifies the code, because AB-BA deadlock is not
possible. __mod_timer() also ensures that the timer's base is not changed
while the timer's handler is running on the old base.

__run_timers(), del_timer() do not change ->base anymore, they only clear
pending flag.

So del_timer_sync() can test timer->base->running_timer == timer to detect
whether it is running or not.

We don't need timer_list->lock anymore, this patch kills it.

We also don't need barriers. del_timer() and __run_timers() used smp_wmb()
before clearing timer's pending flag. It was needed because __mod_timer()
did not lock old_base if the timer is not pending, so __mod_timer()->list_add()
could race with del_timer()->list_del(). With this patch these functions are
serialized through base->lock.

One problem. TIMER_INITIALIZER can't use per_cpu(tvec_bases). So this patch
adds global

        struct timer_base_s {
                spinlock_t lock;
                struct timer_list *running_timer;
        } __init_timer_base;

which is used by TIMER_INITIALIZER. The corresponding fields in tvec_t_base_s
struct are replaced by struct timer_base_s t_base.

It is indeed ugly. But this can't have scalability problems. The global
__init_timer_base.lock is used only when __mod_timer() is called for the first
time AND the timer was compile time initialized. After that the timer migrates
to the local CPU.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Renaud Lienhart <renaud.lienhart@free.fr>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/timer.h | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 90db1cc62ddd..2e78fedfc069 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -6,45 +6,33 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 
-struct tvec_t_base_s;
+struct timer_base_s;
 
 struct timer_list {
 	struct list_head entry;
 	unsigned long expires;
 
-	spinlock_t lock;
 	unsigned long magic;
 
 	void (*function)(unsigned long);
 	unsigned long data;
 
-	struct tvec_t_base_s *base;
+	struct timer_base_s *base;
 };
 
 #define TIMER_MAGIC	0x4b87ad6e
 
+extern struct timer_base_s __init_timer_base;
+
 #define TIMER_INITIALIZER(_function, _expires, _data) {		\
 		.function = (_function),			\
 		.expires = (_expires),				\
 		.data = (_data),				\
-		.base = NULL,					\
+		.base = &__init_timer_base,			\
 		.magic = TIMER_MAGIC,				\
-		.lock = SPIN_LOCK_UNLOCKED,			\
 	}
 
-/***
- * init_timer - initialize a timer.
- * @timer: the timer to be initialized
- *
- * init_timer() must be done to a timer prior calling *any* of the
- * other timer functions.
- */
-static inline void init_timer(struct timer_list * timer)
-{
-	timer->base = NULL;
-	timer->magic = TIMER_MAGIC;
-	spin_lock_init(&timer->lock);
-}
+void fastcall init_timer(struct timer_list * timer);
 
 /***
  * timer_pending - is a timer pending?
@@ -58,7 +46,7 @@ static inline void init_timer(struct timer_list * timer)
  */
 static inline int timer_pending(const struct timer_list * timer)
 {
-	return timer->base != NULL;
+	return timer->entry.next != NULL;
 }
 
 extern void add_timer_on(struct timer_list *timer, int cpu);
@@ -89,12 +77,12 @@ static inline void add_timer(struct timer_list * timer)
 
 #ifdef CONFIG_SMP
   extern int del_timer_sync(struct timer_list *timer);
-  extern int del_singleshot_timer_sync(struct timer_list *timer);
 #else
 # define del_timer_sync(t) del_timer(t)
-# define del_singleshot_timer_sync(t) del_timer(t)
 #endif
 
+#define del_singleshot_timer_sync(t) del_timer_sync(t)
+
 extern void init_timers(void);
 extern void run_local_timers(void);
 extern void it_real_fn(unsigned long);
-- 
cgit v1.2.2


From fd450b7318b75343fd76b3d95416853e34e72c95 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 23 Jun 2005 00:08:59 -0700
Subject: [PATCH] timers: introduce try_to_del_timer_sync()

This patch splits del_timer_sync() into 2 functions.  The new one,
try_to_del_timer_sync(), returns -1 when it hits executing timer.

It can be used in interrupt context, or when the caller hold locks which
can prevent completion of the timer's handler.

NOTE.  Currently it can't be used in interrupt context in UP case, because
->running_timer is used only with CONFIG_SMP.

Should the need arise, it is possible to kill #ifdef CONFIG_SMP in
set_running_timer(), it is cheap.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/timer.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 2e78fedfc069..221f81ac2002 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -76,9 +76,11 @@ static inline void add_timer(struct timer_list * timer)
 }
 
 #ifdef CONFIG_SMP
+  extern int try_to_del_timer_sync(struct timer_list *timer);
   extern int del_timer_sync(struct timer_list *timer);
 #else
-# define del_timer_sync(t) del_timer(t)
+# define try_to_del_timer_sync(t)	del_timer(t)
+# define del_timer_sync(t)		del_timer(t)
 #endif
 
 #define del_singleshot_timer_sync(t) del_timer_sync(t)
-- 
cgit v1.2.2


From 991114c6fa6a21d1fa4d544abe78592352860c82 Mon Sep 17 00:00:00 2001
From: Alexander Viro <aviro@redhat.com>
Date: Thu, 23 Jun 2005 00:09:01 -0700
Subject: [PATCH] fix for prune_icache()/forced final iput() races

Based on analysis and a patch from Russ Weight <rweight@us.ibm.com>

There is a race condition that can occur if an inode is allocated and then
released (using iput) during the ->fill_super functions.  The race
condition is between kswapd and mount.

For most filesystems this can only happen in an error path when kswapd is
running concurrently.  For isofs, however, the error can occur in a more
common code path (which is how the bug was found).

The logic here is "we want final iput() to free inode *now* instead of
letting it sit in cache if fs is going down or had not quite come up".  The
problem is with kswapd seeing such inodes in the middle of being killed and
happily taking over.

The clean solution would be to tell kswapd to leave those inodes alone and
let our final iput deal with them.  I.e.  add a new flag
(I_FORCED_FREEING), set it before write_inode_now() there and make
prune_icache() leave those alone.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e5a8db00df29..3622e952e98c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1025,6 +1025,7 @@ struct super_operations {
 #define I_FREEING		16
 #define I_CLEAR			32
 #define I_NEW			64
+#define I_WILL_FREE		128
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
 
-- 
cgit v1.2.2


From 543537bd922692bc978e2e356fcd8bfc9c2ee7d5 Mon Sep 17 00:00:00 2001
From: Paulo Marques <pmarques@grupopie.com>
Date: Thu, 23 Jun 2005 00:09:02 -0700
Subject: [PATCH] create a kstrdup library function

This patch creates a new kstrdup library function and changes the "local"
implementations in several places to use this function.

Most of the changes come from the sound and net subsystems.  The sound part
had already been acknowledged by Takashi Iwai and the net part by David S.
Miller.

I left UML alone for now because I would need more time to read the code
carefully before making changes there.

Signed-off-by: Paulo Marques <pmarques@grupopie.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/netdevice.h | 4 ----
 include/linux/string.h    | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d6afd440cf7b..d89816ad642f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -925,10 +925,6 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward);
 extern void		net_enable_timestamp(void);
 extern void		net_disable_timestamp(void);
 
-#ifdef CONFIG_SYSCTL
-extern char *net_sysctl_strdup(const char *s);
-#endif
-
 #endif /* __KERNEL__ */
 
 #endif	/* _LINUX_DEV_H */
diff --git a/include/linux/string.h b/include/linux/string.h
index b9fc59469956..93994c613095 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -88,6 +88,8 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
 
+extern char *kstrdup(const char *s, int gfp);
+
 #ifdef __cplusplus
 }
 #endif
-- 
cgit v1.2.2


From 35a82d1a53e1a9ad54efafcc940f9335beaed5c3 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Thu, 23 Jun 2005 00:09:06 -0700
Subject: [PATCH] optimise loop driver a bit

Looks like locking can be optimised quite a lot.  Increase lock widths
slightly so lo_lock is taken fewer times per request.  Also it was quite
trivial to cover lo_pending with that lock, and remove the atomic
requirement.  This also makes memory ordering explicitly correct, which is
nice (not that I particularly saw any mem ordering bugs).

Test was reading 4 250MB files in parallel on ext2-on-tmpfs filesystem (1K
block size, 4K page size).  System is 2 socket Xeon with HT (4 thread).

intel:/home/npiggin# umount /dev/loop0 ; mount /dev/loop0 /mnt/loop ; /usr/bin/time ./mtloop.sh

Before:
0.24user 5.51system 0:02.84elapsed 202%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.52system 0:02.88elapsed 198%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.57system 0:02.89elapsed 198%CPU (0avgtext+0avgdata 0maxresident)k
0.22user 5.51system 0:02.90elapsed 197%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.44system 0:02.91elapsed 193%CPU (0avgtext+0avgdata 0maxresident)k

After:
0.07user 2.34system 0:01.68elapsed 143%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.37system 0:01.68elapsed 144%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.39system 0:01.68elapsed 145%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.36system 0:01.68elapsed 144%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.42system 0:01.68elapsed 147%CPU (0avgtext+0avgdata 0maxresident)k

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/loop.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/loop.h b/include/linux/loop.h
index 8220d9c9da00..53fa51595443 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -61,7 +61,7 @@ struct loop_device {
 	struct semaphore	lo_sem;
 	struct semaphore	lo_ctl_mutex;
 	struct semaphore	lo_bh_mutex;
-	atomic_t		lo_pending;
+	int			lo_pending;
 
 	request_queue_t		*lo_queue;
 };
-- 
cgit v1.2.2


From ac20427ef6aa63da663bdc88b71d16f7394f5e23 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@redhat.com>
Date: Thu, 23 Jun 2005 00:09:11 -0700
Subject: [PATCH] add check to /proc/devices read routines

Patch to add check to get_chrdev_list and get_blkdev_list to prevent reads
of /proc/devices from spilling over the provided page if more than 4096
bytes of string data are generated from all the registered character and
block devices in a system

Signed-off-by: Neil Horman <nhorman@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/genhd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index af26dc718ef6..01796c41c951 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -224,7 +224,7 @@ static inline void free_disk_stats(struct gendisk *disk)
 extern void disk_round_stats(struct gendisk *disk);
 
 /* drivers/block/genhd.c */
-extern int get_blkdev_list(char *);
+extern int get_blkdev_list(char *, int);
 extern void add_disk(struct gendisk *disk);
 extern void del_gendisk(struct gendisk *gp);
 extern void unlink_gendisk(struct gendisk *gp);
-- 
cgit v1.2.2


From 84de856ed30c568c2bb7b9ac0679772bd2737d9b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:16 -0700
Subject: [PATCH] quota: consolidate code surrounding vfs_quota_on_mount

Move some code duplicated in both callers into vfs_quota_on_mount

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/quotaops.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e57baa85e744..d211507ab246 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -39,7 +39,8 @@ extern int dquot_commit_info(struct super_block *sb, int type);
 extern int dquot_mark_dquot_dirty(struct dquot *dquot);
 
 extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path);
-extern int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry);
+extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+		int format_id, int type);
 extern int vfs_quota_off(struct super_block *sb, int type);
 #define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type)
 extern int vfs_quota_sync(struct super_block *sb, int type);
-- 
cgit v1.2.2


From b94cce926b2b902b79380ccba370d6f9f2980de0 Mon Sep 17 00:00:00 2001
From: Hien Nguyen <hien@us.ibm.com>
Date: Thu, 23 Jun 2005 00:09:19 -0700
Subject: [PATCH] kprobes: function-return probes

This patch adds function-return probes to kprobes for the i386
architecture.  This enables you to establish a handler to be run when a
function returns.

1. API

Two new functions are added to kprobes:

	int register_kretprobe(struct kretprobe *rp);
	void unregister_kretprobe(struct kretprobe *rp);

2. Registration and unregistration

2.1 Register

  To register a function-return probe, the user populates the following
  fields in a kretprobe object and calls register_kretprobe() with the
  kretprobe address as an argument:

  kp.addr - the function's address

  handler - this function is run after the ret instruction executes, but
  before control returns to the return address in the caller.

  maxactive - The maximum number of instances of the probed function that
  can be active concurrently.  For example, if the function is non-
  recursive and is called with a spinlock or mutex held, maxactive = 1
  should be enough.  If the function is non-recursive and can never
  relinquish the CPU (e.g., via a semaphore or preemption), NR_CPUS should
  be enough.  maxactive is used to determine how many kretprobe_instance
  objects to allocate for this particular probed function.  If maxactive <=
  0, it is set to a default value (if CONFIG_PREEMPT maxactive=max(10, 2 *
  NR_CPUS) else maxactive=NR_CPUS)

  For example:

    struct kretprobe rp;
    rp.kp.addr = /* entrypoint address */
    rp.handler = /*return probe handler */
    rp.maxactive = /* e.g., 1 or NR_CPUS or 0, see the above explanation */
    register_kretprobe(&rp);

  The following field may also be of interest:

  nmissed - Initialized to zero when the function-return probe is
  registered, and incremented every time the probed function is entered but
  there is no kretprobe_instance object available for establishing the
  function-return probe (i.e., because maxactive was set too low).

2.2 Unregister

  To unregiter a function-return probe, the user calls
  unregister_kretprobe() with the same kretprobe object as registered
  previously.  If a probed function is running when the return probe is
  unregistered, the function will return as expected, but the handler won't
  be run.

3. Limitations

3.1 This patch supports only the i386 architecture, but patches for
    x86_64 and ppc64 are anticipated soon.

3.2 Return probes operates by replacing the return address in the stack
    (or in a known register, such as the lr register for ppc).  This may
    cause __builtin_return_address(0), when invoked from the return-probed
    function, to return the address of the return-probes trampoline.

3.3 This implementation uses the "Multiprobes at an address" feature in
    2.6.12-rc3-mm3.

3.4 Due to a limitation in multi-probes, you cannot currently establish
    a return probe and a jprobe on the same function.  A patch to remove
    this limitation is being tested.

This feature is required by SystemTap (http://sourceware.org/systemtap),
and reflects ideas contributed by several SystemTap developers, including
Will Cohen and Ananth Mavinakayanahalli.

Signed-off-by: Hien Nguyen <hien@us.ibm.com>
Signed-off-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@laposte.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kprobes.h | 90 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 88 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 99ddba5a4e00..fba39f87efec 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -25,21 +25,31 @@
  *		Rusty Russell).
  * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
  *		interface to access function arguments.
+ * 2005-May	Hien Nguyen <hien@us.ibm.com> and Jim Keniston
+ *		<jkenisto@us.ibm.com>  and Prasanna S Panchamukhi
+ *		<prasanna@in.ibm.com> added function-return probes.
  */
 #include <linux/config.h>
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
+#include <linux/spinlock.h>
+
 #include <asm/kprobes.h>
 
 struct kprobe;
 struct pt_regs;
+struct kretprobe;
+struct kretprobe_instance;
 typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *);
 typedef int (*kprobe_break_handler_t) (struct kprobe *, struct pt_regs *);
 typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *,
 				       unsigned long flags);
 typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *,
 				       int trapnr);
+typedef int (*kretprobe_handler_t) (struct kretprobe_instance *,
+				    struct pt_regs *);
+
 struct kprobe {
 	struct hlist_node hlist;
 
@@ -85,6 +95,62 @@ struct jprobe {
 	kprobe_opcode_t *entry;	/* probe handling code to jump to */
 };
 
+#ifdef ARCH_SUPPORTS_KRETPROBES
+extern int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs);
+extern void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
+							unsigned long flags);
+extern struct task_struct *arch_get_kprobe_task(void *ptr);
+extern void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs);
+extern void arch_kprobe_flush_task(struct task_struct *tk, spinlock_t *kp_lock);
+#else /* ARCH_SUPPORTS_KRETPROBES */
+static inline void kretprobe_trampoline(void)
+{
+}
+static inline int trampoline_probe_handler(struct kprobe *p,
+						struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void trampoline_post_handler(struct kprobe *p,
+				struct pt_regs *regs, unsigned long flags)
+{
+}
+static inline void arch_prepare_kretprobe(struct kretprobe *rp,
+					struct pt_regs *regs)
+{
+}
+static inline void arch_kprobe_flush_task(struct task_struct *tk)
+{
+}
+#define arch_get_kprobe_task(ptr) ((struct task_struct *)NULL)
+#endif /* ARCH_SUPPORTS_KRETPROBES */
+/*
+ * Function-return probe -
+ * Note:
+ * User needs to provide a handler function, and initialize maxactive.
+ * maxactive - The maximum number of instances of the probed function that
+ * can be active concurrently.
+ * nmissed - tracks the number of times the probed function's return was
+ * ignored, due to maxactive being too low.
+ *
+ */
+struct kretprobe {
+	struct kprobe kp;
+	kretprobe_handler_t handler;
+	int maxactive;
+	int nmissed;
+	struct hlist_head free_instances;
+	struct hlist_head used_instances;
+};
+
+struct kretprobe_instance {
+	struct hlist_node uflist; /* either on free list or used list */
+	struct hlist_node hlist;
+	struct kretprobe *rp;
+	void *ret_addr;
+	void *stack_addr;
+};
+
 #ifdef CONFIG_KPROBES
 /* Locks kprobe: irq must be disabled */
 void lock_kprobes(void);
@@ -104,6 +170,7 @@ extern void show_registers(struct pt_regs *regs);
 
 /* Get the kprobe at this addr (if any).  Must have called lock_kprobes */
 struct kprobe *get_kprobe(void *addr);
+struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
 
 int register_kprobe(struct kprobe *p);
 void unregister_kprobe(struct kprobe *p);
@@ -113,7 +180,16 @@ int register_jprobe(struct jprobe *p);
 void unregister_jprobe(struct jprobe *p);
 void jprobe_return(void);
 
-#else
+int register_kretprobe(struct kretprobe *rp);
+void unregister_kretprobe(struct kretprobe *rp);
+
+struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp);
+struct kretprobe_instance *get_rp_inst(void *sara);
+struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk);
+void add_rp_inst(struct kretprobe_instance *ri);
+void kprobe_flush_task(struct task_struct *tk);
+void recycle_rp_inst(struct kretprobe_instance *ri);
+#else /* CONFIG_KPROBES */
 static inline int kprobe_running(void)
 {
 	return 0;
@@ -135,5 +211,15 @@ static inline void unregister_jprobe(struct jprobe *p)
 static inline void jprobe_return(void)
 {
 }
-#endif
+static inline int register_kretprobe(struct kretprobe *rp)
+{
+	return -ENOSYS;
+}
+static inline void unregister_kretprobe(struct kretprobe *rp)
+{
+}
+static inline void kprobe_flush_task(struct task_struct *tk)
+{
+}
+#endif				/* CONFIG_KPROBES */
 #endif				/* _LINUX_KPROBES_H */
-- 
cgit v1.2.2


From 7e1048b11c5afe79aac46a42e3ccec86b8365c6d Mon Sep 17 00:00:00 2001
From: Rusty Lynch <rusty.lynch@intel.com>
Date: Thu, 23 Jun 2005 00:09:25 -0700
Subject: [PATCH] Move kprobe [dis]arming into arch specific code

The architecture independent code of the current kprobes implementation is
arming and disarming kprobes at registration time.  The problem is that the
code is assuming that arming and disarming is a just done by a simple write
of some magic value to an address.  This is problematic for ia64 where our
instructions look more like structures, and we can not insert break points
by just doing something like:

*p->addr = BREAKPOINT_INSTRUCTION;

The following patch to 2.6.12-rc4-mm2 adds two new architecture dependent
functions:

     * void arch_arm_kprobe(struct kprobe *p)
     * void arch_disarm_kprobe(struct kprobe *p)

and then adds the new functions for each of the architectures that already
implement kprobes (spar64/ppc64/i386/x86_64).

I thought arch_[dis]arm_kprobe was the most descriptive of what was really
happening, but each of the architectures already had a disarm_kprobe()
function that was really a "disarm and do some other clean-up items as
needed when you stumble across a recursive kprobe." So...  I took the
liberty of changing the code that was calling disarm_kprobe() to call
arch_disarm_kprobe(), and then do the cleanup in the block of code dealing
with the recursive kprobe case.

So far this patch as been tested on i386, x86_64, and ppc64, but still
needs to be tested in sparc64.

Signed-off-by: Rusty Lynch <rusty.lynch@intel.com>
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kprobes.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index fba39f87efec..0f90466fb8b0 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -165,6 +165,8 @@ static inline int kprobe_running(void)
 
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_copy_kprobe(struct kprobe *p);
+extern void arch_arm_kprobe(struct kprobe *p);
+extern void arch_disarm_kprobe(struct kprobe *p);
 extern void arch_remove_kprobe(struct kprobe *p);
 extern void show_registers(struct pt_regs *regs);
 
-- 
cgit v1.2.2


From 0aa55e4d7db822059fe8132fe9f2b7773c48216c Mon Sep 17 00:00:00 2001
From: Hien Nguyen <hien@us.ibm.com>
Date: Thu, 23 Jun 2005 00:09:26 -0700
Subject: [PATCH] kprobes: moves lock-unlock to non-arch kprobe_flush_task

This patch moves the lock/unlock of the arch specific kprobe_flush_task()
to the non-arch specific kprobe_flusk_task().

Signed-off-by: Hien Nguyen <hien@us.ibm.com>
Acked-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kprobes.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0f90466fb8b0..461391decc46 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -33,7 +33,6 @@
 #include <linux/list.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
-#include <linux/spinlock.h>
 
 #include <asm/kprobes.h>
 
@@ -101,7 +100,7 @@ extern void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs,
 							unsigned long flags);
 extern struct task_struct *arch_get_kprobe_task(void *ptr);
 extern void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs);
-extern void arch_kprobe_flush_task(struct task_struct *tk, spinlock_t *kp_lock);
+extern void arch_kprobe_flush_task(struct task_struct *tk);
 #else /* ARCH_SUPPORTS_KRETPROBES */
 static inline void kretprobe_trampoline(void)
 {
-- 
cgit v1.2.2


From ea32c65cc2d2294c04e9f81d0578a6f51febfdbf Mon Sep 17 00:00:00 2001
From: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Date: Thu, 23 Jun 2005 00:09:36 -0700
Subject: [PATCH] kprobes: Temporary disarming of reentrant probe

In situations where a kprobes handler calls a routine which has a probe on it,
then kprobes_handler() disarms the new probe forever.  This patch removes the
above limitation by temporarily disarming the new probe.  When the another
probe hits while handling the old probe, the kprobes_handler() saves previous
kprobes state and handles the new probe without calling the new kprobes
registered handlers.  kprobe_post_handler() restores back the previous kprobes
state and the normal execution continues.

However on x86_64 architecture, re-rentrancy is provided only through
pre_handler().  If a routine having probe is referenced through
post_handler(), then the probes on that routine are disarmed forever, since
the exception stack is gets changed after the processor single steps the
instruction of the new probe.

This patch includes generic changes to support temporary disarming on
reentrancy of probes.

Signed-of-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kprobes.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 461391decc46..5e1a7b0d7b3f 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -36,6 +36,12 @@
 
 #include <asm/kprobes.h>
 
+/* kprobe_status settings */
+#define KPROBE_HIT_ACTIVE	0x00000001
+#define KPROBE_HIT_SS		0x00000002
+#define KPROBE_REENTER		0x00000004
+#define KPROBE_HIT_SSDONE	0x00000008
+
 struct kprobe;
 struct pt_regs;
 struct kretprobe;
@@ -55,6 +61,9 @@ struct kprobe {
 	/* list of kprobes for multi-handler support */
 	struct list_head list;
 
+	/*count the number of times this probe was temporarily disarmed */
+	unsigned long nmissed;
+
 	/* location of the probe point */
 	kprobe_opcode_t *addr;
 
-- 
cgit v1.2.2


From d6e711448137ca3301512cec41a2c2ce852b3d0a Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 23 Jun 2005 00:09:43 -0700
Subject: [PATCH] setuid core dump

Add a new `suid_dumpable' sysctl:

This value can be used to query and set the core dump mode for setuid
or otherwise protected/tainted binaries. The modes are

0 - (default) - traditional behaviour.  Any process which has changed
    privilege levels or is execute only will not be dumped

1 - (debug) - all processes dump core when possible.  The core dump is
    owned by the current user and no security is applied.  This is intended
    for system debugging situations only.  Ptrace is unchecked.

2 - (suidsafe) - any binary which normally would not be dumped is dumped
    readable by root only.  This allows the end user to remove such a dump but
    not access it directly.  For security reasons core dumps in this mode will
    not overwrite one another or other files.  This mode is appropriate when
    adminstrators are attempting to debug problems in a normal environment.

(akpm:

> > +EXPORT_SYMBOL(suid_dumpable);
>
> EXPORT_SYMBOL_GPL?

No problem to me.

> >  	if (current->euid == current->uid && current->egid == current->gid)
> >  		current->mm->dumpable = 1;
>
> Should this be SUID_DUMP_USER?

Actually the feedback I had from last time was that the SUID_ defines
should go because its clearer to follow the numbers. They can go
everywhere (and there are lots of places where dumpable is tested/used
as a bool in untouched code)

> Maybe this should be renamed to `dump_policy' or something.  Doing that
> would help us catch any code which isn't using the #defines, too.

Fair comment. The patch was designed to be easy to maintain for Red Hat
rather than for merging. Changing that field would create a gigantic
diff because it is used all over the place.

)

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/binfmts.h | 5 +++++
 include/linux/sched.h   | 2 +-
 include/linux/sysctl.h  | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 7e736e201c46..c1e82c514443 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -69,6 +69,11 @@ extern void remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *,struct pt_regs *);
 extern int flush_old_exec(struct linux_binprm * bprm);
 
+extern int suid_dumpable;
+#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
+#define SUID_DUMP_USER		1	/* Dump as user of process */
+#define SUID_DUMP_ROOT		2	/* Dump as root */
+
 /* Stack area protections */
 #define EXSTACK_DEFAULT   0	/* Whatever the arch defaults to */
 #define EXSTACK_DISABLE_X 1	/* Disable executable stacks */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b58afd97a180..901742f92389 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -246,7 +246,7 @@ struct mm_struct {
 
 	unsigned long saved_auxv[42]; /* for /proc/PID/auxv */
 
-	unsigned dumpable:1;
+	unsigned dumpable:2;
 	cpumask_t cpu_vm_mask;
 
 	/* Architecture-specific MM context */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index a17745c80a91..614e939c78a4 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -136,6 +136,7 @@ enum
 	KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */
 	KERN_BOOTLOADER_TYPE=67, /* int: boot loader type */
 	KERN_RANDOMIZE=68, /* int: randomize virtual address space */
+	KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
 };
 
 
-- 
cgit v1.2.2


From ef3daeda7b58f046f94b26637d500354038d39f4 Mon Sep 17 00:00:00 2001
From: Yoav Zach <yoav_zach@yahoo.com>
Date: Thu, 23 Jun 2005 00:09:58 -0700
Subject: [PATCH] Don't force O_LARGEFILE for 32 bit processes on ia64

In ia64 kernel, the O_LARGEFILE flag is forced when opening a file.  This
is problematic for execution of 32 bit processes, which are not largefile
aware, either by SW emulation or by HW execution.

For such processes, the problem is two-fold:

1) When trying to open a file that is larger than 4G
   the operation should fail, but it's not
2) Writing to offset larger than 4G should fail, but
   it's not

The proposed patch takes advantage of the way 32 bit processes are
identified in ia64 systems.  Such processes have PER_LINUX32 for their
personality.  With the patch, the ia64 kernel will not enforce the
O_LARGEFILE flag if the current process has PER_LINUX32 set.  The behavior
for all other architectures remains unchanged.

Signed-off-by: Yoav Zach <yoav.zach@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fcntl.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 704fb76b6334..8a7c82151de9 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -25,6 +25,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef force_o_largefile
+#define force_o_largefile() (BITS_PER_LONG != 32)
+#endif
+
 #if BITS_PER_LONG == 32
 #define IS_GETLK32(cmd)		((cmd) == F_GETLK)
 #define IS_SETLK32(cmd)		((cmd) == F_SETLK)
-- 
cgit v1.2.2


From 46c271bedd2c8444b1d05bc44928beec0c07debc Mon Sep 17 00:00:00 2001
From: Peter Osterlund <petero2@telia.com>
Date: Thu, 23 Jun 2005 00:10:02 -0700
Subject: [PATCH] Improve CD/DVD packet driver write performance

This patch improves write performance for the CD/DVD packet writing driver.
 The logic for switching between reading and writing has been changed so
that streaming writes are no longer interrupted by read requests.

Signed-off-by: Peter Osterlund <petero2@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pktcdvd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 4e2d2a942ecb..4b32bce9a289 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -159,7 +159,7 @@ struct packet_iosched
 	struct bio		*read_queue_tail;
 	struct bio		*write_queue;
 	struct bio		*write_queue_tail;
-	int			high_prio_read;	/* An important read request has been queued */
+	sector_t		last_write;	/* The sector where the last write ended */
 	int			successive_reads;
 };
 
-- 
cgit v1.2.2


From bb93e3a52f8db7210258a1a2134cced0b78a46e1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 23 Jun 2005 00:10:15 -0700
Subject: [PATCH] block: add unlocked_ioctl support for block devices

This patch allows block device drivers to convert their ioctl functions to
unlocked_ioctl() like character devices and other subsystems.  All
functions that were called with the BKL held before are still used that
way, but I would not be surprised if it could be removed from the ioctl
functions in drivers/block/ioctl.c themselves.

As a side note, I found that compat_blkdev_ioctl() acquires the BKL as
well, which looks like a bug.  I have checked that every user of
disk->fops->compat_ioctl() in the current git tree gets the BKL itself, so
it could easily be removed from compat_blkdev_ioctl().

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3622e952e98c..9b1278e21279 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -884,6 +884,7 @@ struct block_device_operations {
 	int (*open) (struct inode *, struct file *);
 	int (*release) (struct inode *, struct file *);
 	int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
+	long (*unlocked_ioctl) (struct file *, unsigned, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned, unsigned long);
 	int (*media_changed) (struct gendisk *);
 	int (*revalidate_disk) (struct gendisk *);
-- 
cgit v1.2.2


From 45778ca819accab1a4a3378b3566cab0f189164f Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@graphe.net>
Date: Thu, 23 Jun 2005 00:10:17 -0700
Subject: [PATCH] Remove f_error field from struct file

The following patch removes the f_error field and all checks of f_error.

Trond said:

  f_error was introduced for NFS, and made sense when we were guaranteed
  always to have a file pointer around when write errors occurred.  Since
  then, we have (for various reasons) had to introduce the nfs_open_context in
  order to track the file read/write state, and it made sense to move our
  f_error tracking there too.

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9b1278e21279..517bf4966bf5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -581,7 +581,6 @@ struct file {
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
-	int			f_error;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
-- 
cgit v1.2.2


From f9fd27a253d5e0b23531d12ce7ad15b6535d4486 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:10:19 -0700
Subject: [PATCH] acl endianess annotations

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/posix_acl_xattr.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index 5efd0a6dad94..fe271c1947b2 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -23,13 +23,13 @@
 #define ACL_UNDEFINED_ID	(-1)
 
 typedef struct {
-	__u16			e_tag;
-	__u16			e_perm;
-	__u32			e_id;
+	__le16			e_tag;
+	__le16			e_perm;
+	__le32			e_id;
 } posix_acl_xattr_entry;
 
 typedef struct {
-	__u32			a_version;
+	__le32			a_version;
 	posix_acl_xattr_entry	a_entries[0];
 } posix_acl_xattr_header;
 
-- 
cgit v1.2.2


From 9a59f452abe11f569e13ec16c51e6d61c54b9838 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:10:19 -0700
Subject: [PATCH] remove <linux/xattr_acl.h>

This file duplicates <linux/posix_acl_xattr.h>, using slightly different
names.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/posix_acl_xattr.h | 3 +++
 include/linux/reiserfs_acl.h    | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h
index fe271c1947b2..6e53c34035cd 100644
--- a/include/linux/posix_acl_xattr.h
+++ b/include/linux/posix_acl_xattr.h
@@ -52,4 +52,7 @@ posix_acl_xattr_count(size_t size)
 	return size / sizeof(posix_acl_xattr_entry);
 }
 
+struct posix_acl *posix_acl_from_xattr(const void *value, size_t size);
+int posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size);
+
 #endif	/* _POSIX_ACL_XATTR_H */
diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h
index 2aef9c3f5ce8..0760507a545b 100644
--- a/include/linux/reiserfs_acl.h
+++ b/include/linux/reiserfs_acl.h
@@ -1,6 +1,5 @@
 #include <linux/init.h>
 #include <linux/posix_acl.h>
-#include <linux/xattr_acl.h>
 
 #define REISERFS_ACL_VERSION	0x0001
 
-- 
cgit v1.2.2


From c43dc2fd885b5658cfd7cedb7bcca20910c517a4 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Thu, 23 Jun 2005 00:10:27 -0700
Subject: [PATCH] aio: make wait_queue ->task ->private

In the upcoming aio_down patch, it is useful to store a private data
pointer in the kiocb's wait_queue.  Since we provide our own wake up
function and do not require the task_struct pointer, it makes sense to
convert the task pointer into a generic private pointer.

Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/wait.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index c9486c3efb4a..d38c9fecdc36 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -33,7 +33,7 @@ int default_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key
 struct __wait_queue {
 	unsigned int flags;
 #define WQ_FLAG_EXCLUSIVE	0x01
-	struct task_struct * task;
+	void *private;
 	wait_queue_func_t func;
 	struct list_head task_list;
 };
@@ -60,7 +60,7 @@ typedef struct __wait_queue_head wait_queue_head_t;
  */
 
 #define __WAITQUEUE_INITIALIZER(name, tsk) {				\
-	.task		= tsk,						\
+	.private	= tsk,						\
 	.func		= default_wake_function,			\
 	.task_list	= { NULL, NULL } }
 
@@ -86,7 +86,7 @@ static inline void init_waitqueue_head(wait_queue_head_t *q)
 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
 {
 	q->flags = 0;
-	q->task = p;
+	q->private = p;
 	q->func = default_wake_function;
 }
 
@@ -94,7 +94,7 @@ static inline void init_waitqueue_func_entry(wait_queue_t *q,
 					wait_queue_func_t func)
 {
 	q->flags = 0;
-	q->task = NULL;
+	q->private = NULL;
 	q->func = func;
 }
 
@@ -110,7 +110,7 @@ static inline int waitqueue_active(wait_queue_head_t *q)
  * aio specifies a wait queue entry with an async notification
  * callback routine, not associated with any task.
  */
-#define is_sync_wait(wait)	(!(wait) || ((wait)->task))
+#define is_sync_wait(wait)	(!(wait) || ((wait)->private))
 
 extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
 extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
@@ -384,7 +384,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
 #define DEFINE_WAIT(name)						\
 	wait_queue_t name = {						\
-		.task		= current,				\
+		.private	= current,				\
 		.func		= autoremove_wake_function,		\
 		.task_list	= LIST_HEAD_INIT((name).task_list),	\
 	}
@@ -393,7 +393,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 	struct wait_bit_queue name = {					\
 		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),		\
 		.wait	= {						\
-			.task		= current,			\
+			.private	= current,			\
 			.func		= wake_bit_function,		\
 			.task_list	=				\
 				LIST_HEAD_INIT((name).wait.task_list),	\
@@ -402,7 +402,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
 #define init_wait(wait)							\
 	do {								\
-		(wait)->task = current;					\
+		(wait)->private = current;				\
 		(wait)->func = autoremove_wake_function;		\
 		INIT_LIST_HEAD(&(wait)->task_list);			\
 	} while (0)
-- 
cgit v1.2.2


From bfb07599da289881d3bcbb601a110e997fc7444b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 23 Jun 2005 00:10:32 -0700
Subject: [PATCH] Introduce tty_unregister_ldisc()

It's a bit strange to see tty_register_ldisc call in modules' exit
functions.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/tty.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1b76106272d3..59ff42c629ec 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -345,6 +345,7 @@ extern int tty_check_change(struct tty_struct * tty);
 extern void stop_tty(struct tty_struct * tty);
 extern void start_tty(struct tty_struct * tty);
 extern int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc);
+extern int tty_unregister_ldisc(int disc);
 extern int tty_register_driver(struct tty_driver *driver);
 extern int tty_unregister_driver(struct tty_driver *driver);
 extern void tty_register_device(struct tty_driver *driver, unsigned index, struct device *dev);
-- 
cgit v1.2.2


From 4749f32da939d4e4160541b2cadc22492bb507ec Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 23 Jun 2005 11:36:56 +0200
Subject: [PATCH] better USB_MON dependencies

This makes the USB_MON less confusing.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3d508bf08402..eb282b581546 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -290,7 +290,7 @@ struct usb_bus {
 	struct class_device *class_dev;	/* class device for this bus */
 	struct kref kref;		/* handles reference counting this bus */
 	void (*release)(struct usb_bus *bus);	/* function to destroy this bus's memory */
-#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
+#if defined(CONFIG_USB_MON)
 	struct mon_bus *mon_bus;	/* non-null when associated */
 	int monitored;			/* non-zero when monitored */
 #endif
-- 
cgit v1.2.2


From 317a76f9a44b437d6301718f4e5d08bd93f98da7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 23 Jun 2005 12:19:55 -0700
Subject: [TCP]: Add pluggable congestion control algorithm infrastructure.

Allow TCP to have multiple pluggable congestion control algorithms.
Algorithms are defined by a set of operations and can be built in
or modules.  The legacy "new RENO" algorithm is used as a starting
point and fallback.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h |  9 +--------
 include/linux/tcp.h    | 49 ++++++++++---------------------------------------
 2 files changed, 11 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 614e939c78a4..72965bfe6cfb 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -333,21 +333,14 @@ enum
 	NET_TCP_FRTO=92,
 	NET_TCP_LOW_LATENCY=93,
 	NET_IPV4_IPFRAG_SECRET_INTERVAL=94,
-	NET_TCP_WESTWOOD=95,
 	NET_IPV4_IGMP_MAX_MSF=96,
 	NET_TCP_NO_METRICS_SAVE=97,
-	NET_TCP_VEGAS=98,
-	NET_TCP_VEGAS_ALPHA=99,
-	NET_TCP_VEGAS_BETA=100,
-	NET_TCP_VEGAS_GAMMA=101,
- 	NET_TCP_BIC=102,
- 	NET_TCP_BIC_FAST_CONVERGENCE=103,
-	NET_TCP_BIC_LOW_WINDOW=104,
 	NET_TCP_DEFAULT_WIN_SCALE=105,
 	NET_TCP_MODERATE_RCVBUF=106,
 	NET_TCP_TSO_WIN_DIVISOR=107,
 	NET_TCP_BIC_BETA=108,
 	NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
+	NET_TCP_CONG_CONTROL=110,
 };
 
 enum {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 97a7c9e03df5..3ea75dd6640a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -203,13 +203,6 @@ struct tcp_sack_block {
 	__u32	end_seq;
 };
 
-enum tcp_congestion_algo {
-	TCP_RENO=0,
-	TCP_VEGAS,
-	TCP_WESTWOOD,
-	TCP_BIC,
-};
-
 struct tcp_options_received {
 /*	PAWS/RTTM data	*/
 	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
@@ -305,7 +298,7 @@ struct tcp_sock {
 	__u8	reordering;	/* Packet reordering metric.		*/
 	__u8	frto_counter;	/* Number of new acks after RTO */
 
-	__u8	adv_cong;	/* Using Vegas, Westwood, or BIC */
+	__u8	unused;
 	__u8	defer_accept;	/* User waits for some data after accept() */
 
 /* RTT measurement */
@@ -401,37 +394,10 @@ struct tcp_sock {
 		__u32	time;
 	} rcvq_space;
 
-/* TCP Westwood structure */
-        struct {
-                __u32    bw_ns_est;        /* first bandwidth estimation..not too smoothed 8) */
-                __u32    bw_est;           /* bandwidth estimate */
-                __u32    rtt_win_sx;       /* here starts a new evaluation... */
-                __u32    bk;
-                __u32    snd_una;          /* used for evaluating the number of acked bytes */
-                __u32    cumul_ack;
-                __u32    accounted;
-                __u32    rtt;
-                __u32    rtt_min;          /* minimum observed RTT */
-        } westwood;
-
-/* Vegas variables */
-	struct {
-		__u32	beg_snd_nxt;	/* right edge during last RTT */
-		__u32	beg_snd_una;	/* left edge  during last RTT */
-		__u32	beg_snd_cwnd;	/* saves the size of the cwnd */
-		__u8	doing_vegas_now;/* if true, do vegas for this RTT */
-		__u16	cntRTT;		/* # of RTTs measured within last RTT */
-		__u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
-		__u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
-	} vegas;
-
-	/* BI TCP Parameters */
-	struct {
-		__u32	cnt;		/* increase cwnd by 1 after this number of ACKs */
-		__u32 	last_max_cwnd;	/* last maximium snd_cwnd */
-		__u32	last_cwnd;	/* the last snd_cwnd */
-		__u32   last_stamp;     /* time when updated last_cwnd */
-	} bictcp;
+	/* Pluggable TCP congestion control hook */
+	struct tcp_congestion_ops *ca_ops;
+	u32	ca_priv[16];
+#define TCP_CA_PRIV_SIZE	(16*sizeof(u32))
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -439,6 +405,11 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
 	return (struct tcp_sock *)sk;
 }
 
+static inline void *tcp_ca(const struct tcp_sock *tp)
+{
+	return (void *) tp->ca_priv;
+}
+
 #endif
 
 #endif	/* _LINUX_TCP_H */
-- 
cgit v1.2.2


From 056ede6cface66b400cd3b8e60ed077cc5b85c18 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 23 Jun 2005 12:21:28 -0700
Subject: [TCP]: Report congestion control algorithm in tcp_diag.

Enhancement to the tcp_diag interface used by the iproute2 ss command
to report the tcp congestion control being used by a socket.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp_diag.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h
index ceee962e1d15..7a5996743946 100644
--- a/include/linux/tcp_diag.h
+++ b/include/linux/tcp_diag.h
@@ -99,9 +99,10 @@ enum
 	TCPDIAG_MEMINFO,
 	TCPDIAG_INFO,
 	TCPDIAG_VEGASINFO,
+	TCPDIAG_CONG,
 };
 
-#define TCPDIAG_MAX TCPDIAG_VEGASINFO
+#define TCPDIAG_MAX TCPDIAG_CONG
 
 
 /* TCPDIAG_MEM */
@@ -123,5 +124,4 @@ struct tcpvegas_info {
 	__u32	tcpv_minrtt;
 };
 
-
 #endif /* _TCP_DIAG_H_ */
-- 
cgit v1.2.2


From c1ebcdb8c422cd73f54bcd2b9953e443a47667e5 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 23 Jun 2005 20:08:59 -0700
Subject: [NET]: Remove obsolete fastroute stats.

Remove last vestiages of fastroute code that is no longer used.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d89816ad642f..c2e15e381a58 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -165,11 +165,6 @@ struct netif_rx_stats
 	unsigned dropped;
 	unsigned time_squeeze;
 	unsigned throttled;
-	unsigned fastroute_hit;
-	unsigned fastroute_success;
-	unsigned fastroute_defer;
-	unsigned fastroute_deferred_out;
-	unsigned fastroute_latency_reduction;
 	unsigned cpu_collision;
 };
 
-- 
cgit v1.2.2


From 34008d8c631d067caffa136313260525f3ae48a2 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 23 Jun 2005 20:10:00 -0700
Subject: [NET]: Remove obsolete netif_rx congestion sensing mechanism.

Remove the congestion sensing mechanism from netif_rx, and always
return either full or empty.  Almost no driver checks the return value
from netif_rx, and those that do only use it for debug messages.

The original design of netif_rx was to do flow control based on the
receive queue, but NAPI has supplanted this and no driver uses the
feedback.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c2e15e381a58..718ad579c65c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -558,8 +558,6 @@ static inline int unregister_gifconf(unsigned int family)
 struct softnet_data
 {
 	int			throttle;
-	int			cng_level;
-	int			avg_blog;
 	struct sk_buff_head	input_pkt_queue;
 	struct list_head	poll_list;
 	struct net_device	*output_queue;
-- 
cgit v1.2.2


From 31aa02c53c84658f6694f319f09e232ede27be5a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 23 Jun 2005 20:12:48 -0700
Subject: [NET]: Eliminate netif_rx massive packet drops.

Eliminate the throttling behaviour when the netif receive queue fills
because it behaves badly when using high speed networks under load.
The throttling cause multiple packet drops that cause TCP to go into
slow start mode. The same effective patch has been part of BIC TCP and
H-TCP as well as part of Web100.

The existing code drops 100's of packets when the queue fills;
this changes it to individual packet drop-tail.

Signed-off-by: Stephen Hemmminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 718ad579c65c..3a0ed7f9e801 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -164,7 +164,6 @@ struct netif_rx_stats
 	unsigned total;
 	unsigned dropped;
 	unsigned time_squeeze;
-	unsigned throttled;
 	unsigned cpu_collision;
 };
 
@@ -557,10 +556,9 @@ static inline int unregister_gifconf(unsigned int family)
 
 struct softnet_data
 {
-	int			throttle;
+	struct net_device	*output_queue;
 	struct sk_buff_head	input_pkt_queue;
 	struct list_head	poll_list;
-	struct net_device	*output_queue;
 	struct sk_buff		*completion_queue;
 
 	struct net_device	backlog_dev;	/* Sorry. 8) */
-- 
cgit v1.2.2


From 51b0bdedb8e784d0d969a6b77151911130812400 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 23 Jun 2005 20:14:40 -0700
Subject: [NET]: Separate two usages of netdev_max_backlog.

Separate out the two uses of netdev_max_backlog. One controls the
upper bound on packets processed per softirq, the new name for this is
netdev_budget; the other controls the limit on packets queued via
netif_rx.

Increase the max_backlog default to account for faster processors.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 72965bfe6cfb..ebfe1250f0a4 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -243,6 +243,7 @@ enum
 	NET_CORE_MOD_CONG=16,
 	NET_CORE_DEV_WEIGHT=17,
 	NET_CORE_SOMAXCONN=18,
+	NET_CORE_BUDGET=19,
 };
 
 /* /proc/sys/net/ethernet */
-- 
cgit v1.2.2


From 5f8ef48d240963093451bcf83df89f1a1364f51d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Thu, 23 Jun 2005 20:37:36 -0700
Subject: [TCP]: Allow choosing TCP congestion control via sockopt.

Allow using setsockopt to set TCP congestion control to use on a per
socket basis.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3ea75dd6640a..dfd93d03f5d2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -127,6 +127,7 @@ enum {
 #define TCP_WINDOW_CLAMP	10	/* Bound advertised window */
 #define TCP_INFO		11	/* Information about this connection. */
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
+#define TCP_CONGESTION		13	/* Congestion control algorithm */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
-- 
cgit v1.2.2


From 2de4ff7bd658c97fb357efa3095a509674dacb5a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 23 Jun 2005 20:49:30 -0700
Subject: [LIB]: Textsearch infrastructure.

The textsearch infrastructure provides text searching
facitilies for both linear and non-linear data.
Individual search algorithms are implemented in modules
and chosen by the user.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/textsearch.h | 180 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 180 insertions(+)
 create mode 100644 include/linux/textsearch.h

(limited to 'include/linux')

diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h
new file mode 100644
index 000000000000..941f45ac117a
--- /dev/null
+++ b/include/linux/textsearch.h
@@ -0,0 +1,180 @@
+#ifndef __LINUX_TEXTSEARCH_H
+#define __LINUX_TEXTSEARCH_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+
+struct ts_config;
+
+/**
+ * TS_AUTOLOAD - Automatically load textsearch modules when needed
+ */
+#define TS_AUTOLOAD	1
+
+/**
+ * struct ts_state - search state
+ * @offset: offset for next match
+ * @cb: control buffer, for persistant variables of get_next_block()
+ */
+struct ts_state
+{
+	unsigned int		offset;
+	char			cb[40];
+};
+
+/**
+ * struct ts_ops - search module operations
+ * @name: name of search algorithm
+ * @init: initialization function to prepare a search
+ * @find: find the next occurrence of the pattern
+ * @destroy: destroy algorithm specific parts of a search configuration
+ * @get_pattern: return head of pattern
+ * @get_pattern_len: return length of pattern
+ * @owner: module reference to algorithm
+ */
+struct ts_ops
+{
+	const char		*name;
+	struct ts_config *	(*init)(const void *, unsigned int, int);
+	unsigned int		(*find)(struct ts_config *,
+					struct ts_state *);
+	void			(*destroy)(struct ts_config *);
+	void *			(*get_pattern)(struct ts_config *);
+	unsigned int		(*get_pattern_len)(struct ts_config *);
+	struct module		*owner;
+	struct list_head	list;
+};
+
+/**
+ * struct ts_config - search configuration
+ * @ops: operations of chosen algorithm
+ * @get_next_block: callback to fetch the next block to search in
+ * @finish: callback to finalize a search
+ */
+struct ts_config
+{
+	struct ts_ops		*ops;
+
+	/**
+	 * get_next_block - fetch next block of data
+	 * @consumed: number of bytes consumed by the caller
+	 * @dst: destination buffer
+	 * @conf: search configuration
+	 * @state: search state
+	 *
+	 * Called repeatedly until 0 is returned. Must assign the
+	 * head of the next block of data to &*dst and return the length
+	 * of the block or 0 if at the end. consumed == 0 indicates
+	 * a new search. May store/read persistant values in state->cb.
+	 */
+	unsigned int		(*get_next_block)(unsigned int consumed,
+						  const u8 **dst,
+						  struct ts_config *conf,
+						  struct ts_state *state);
+
+	/**
+	 * finish - finalize/clean a series of get_next_block() calls
+	 * @conf: search configuration
+	 * @state: search state
+	 *
+	 * Called after the last use of get_next_block(), may be used
+	 * to cleanup any leftovers.
+	 */
+	void			(*finish)(struct ts_config *conf,
+					  struct ts_state *state);
+};
+
+/**
+ * textsearch_next - continue searching for a pattern
+ * @conf: search configuration
+ * @state: search state
+ *
+ * Continues a search looking for more occurrences of the pattern.
+ * textsearch_find() must be called to find the first occurrence
+ * in order to reset the state.
+ *
+ * Returns the position of the next occurrence of the pattern or
+ * UINT_MAX if not match was found.
+ */ 
+static inline unsigned int textsearch_next(struct ts_config *conf,
+					   struct ts_state *state)
+{
+	unsigned int ret = conf->ops->find(conf, state);
+
+	if (conf->finish)
+		conf->finish(conf, state);
+
+	return ret;
+}
+
+/**
+ * textsearch_find - start searching for a pattern
+ * @conf: search configuration
+ * @state: search state
+ *
+ * Returns the position of first occurrence of the pattern or
+ * UINT_MAX if no match was found.
+ */ 
+static inline unsigned int textsearch_find(struct ts_config *conf,
+					   struct ts_state *state)
+{
+	state->offset = 0;
+	return textsearch_next(conf, state);
+}
+
+/**
+ * textsearch_get_pattern - return head of the pattern
+ * @conf: search configuration
+ */
+static inline void *textsearch_get_pattern(struct ts_config *conf)
+{
+	return conf->ops->get_pattern(conf);
+}
+
+/**
+ * textsearch_get_pattern_len - return length of the pattern
+ * @conf: search configuration
+ */
+static inline unsigned int textsearch_get_pattern_len(struct ts_config *conf)
+{
+	return conf->ops->get_pattern_len(conf);
+}
+
+extern int textsearch_register(struct ts_ops *);
+extern int textsearch_unregister(struct ts_ops *);
+extern struct ts_config *textsearch_prepare(const char *, const void *,
+					    unsigned int, int, int);
+extern void textsearch_destroy(struct ts_config *conf);
+extern unsigned int textsearch_find_continuous(struct ts_config *,
+					       struct ts_state *,
+					       const void *, unsigned int);
+
+
+#define TS_PRIV_ALIGNTO	8
+#define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1))
+
+static inline struct ts_config *alloc_ts_config(size_t payload, int gfp_mask)
+{
+	struct ts_config *conf;
+
+	conf = kmalloc(TS_PRIV_ALIGN(sizeof(*conf)) + payload, gfp_mask);
+	if (conf == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	memset(conf, 0, TS_PRIV_ALIGN(sizeof(*conf)) + payload);
+	return conf;
+}
+
+static inline void *ts_config_priv(struct ts_config *conf)
+{
+	return ((u8 *) conf + TS_PRIV_ALIGN(sizeof(struct ts_config)));
+}
+
+#endif /* __KERNEL__ */
+
+#endif
-- 
cgit v1.2.2


From 6408f79cce401e1bfecf923e7156f84f96e021e3 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 23 Jun 2005 20:59:16 -0700
Subject: [LIB]: Naive finite state machine based textsearch

A finite state machine consists of n states (struct ts_fsm_token)
representing the pattern as a finite automation. The data is read
sequentially on a octet basis. Every state token specifies the number
of recurrences and the type of value accepted which can be either a
specific character or ctype based set of characters. The available
type of recurrences include 1, (0|1), [0 n], and [1 n].

The algorithm differs between strict/non-strict mode specyfing
whether the pattern has to start at the first octect. Strict mode
is enabled by default and can be disabled by inserting
TS_FSM_HEAD_IGNORE as the first token in the chain.

The runtime performance of the algorithm should be around O(n),
however while in strict mode the average runtime can be better.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/textsearch_fsm.h | 48 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 include/linux/textsearch_fsm.h

(limited to 'include/linux')

diff --git a/include/linux/textsearch_fsm.h b/include/linux/textsearch_fsm.h
new file mode 100644
index 000000000000..fdfa078c66e5
--- /dev/null
+++ b/include/linux/textsearch_fsm.h
@@ -0,0 +1,48 @@
+#ifndef __LINUX_TEXTSEARCH_FSM_H
+#define __LINUX_TEXTSEARCH_FSM_H
+
+#include <linux/types.h>
+
+enum {
+	TS_FSM_SPECIFIC,	/* specific character */
+	TS_FSM_WILDCARD,	/* any character */
+	TS_FSM_DIGIT,		/* isdigit() */
+	TS_FSM_XDIGIT,		/* isxdigit() */
+	TS_FSM_PRINT,		/* isprint() */
+	TS_FSM_ALPHA,		/* isalpha() */
+	TS_FSM_ALNUM,		/* isalnum() */
+	TS_FSM_ASCII,		/* isascii() */
+	TS_FSM_CNTRL,		/* iscntrl() */
+	TS_FSM_GRAPH,		/* isgraph() */
+	TS_FSM_LOWER,		/* islower() */
+	TS_FSM_UPPER,		/* isupper() */
+	TS_FSM_PUNCT,		/* ispunct() */
+	TS_FSM_SPACE,		/* isspace() */
+	__TS_FSM_TYPE_MAX,
+};
+#define TS_FSM_TYPE_MAX (__TS_FSM_TYPE_MAX - 1)
+
+enum {
+	TS_FSM_SINGLE,		/* 1 occurrence */
+	TS_FSM_PERHAPS,		/* 1 or 0 occurrence */
+	TS_FSM_ANY,		/* 0..n occurrences */
+	TS_FSM_MULTI,		/* 1..n occurrences */
+	TS_FSM_HEAD_IGNORE,	/* 0..n ignored occurrences at head */
+	__TS_FSM_RECUR_MAX,
+};
+#define TS_FSM_RECUR_MAX (__TS_FSM_RECUR_MAX - 1)
+
+/**
+ * struct ts_fsm_token - state machine token (state)
+ * @type: type of token
+ * @recur: number of recurrences
+ * @value: character value for TS_FSM_SPECIFIC
+ */
+struct ts_fsm_token
+{
+	__u16		type;
+	__u8		recur;
+	__u8		value;
+};
+
+#endif
-- 
cgit v1.2.2


From 677e90eda3bd8cfde0b748daaa46476162a03950 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 23 Jun 2005 20:59:51 -0700
Subject: [NET]: Zerocopy sequential reading of skb data

Implements sequential reading for both linear and non-linear
skb data at zerocopy cost. The data is returned in chunks of
arbitary length, therefore random access is not possible.

Usage:
	from	 := 0
	to	 := 128
	state	 := undef
	data	 := undef
	len	 := undef
	consumed := 0

	skb_prepare_seq_read(skb, from, to, &state)
	while (len = skb_seq_read(consumed, &data, &state)) != 0 do
		/* do something with 'data' of length 'len' */
		if abort then
			/* abort read if we don't wait for
			 * skb_seq_read() to return 0 */
			skb_abort_seq_read(&state)
			return
		endif
		/* not necessary to consume all of 'len' */
		consumed += len
	done

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d7c839a21842..171a37dff83a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -321,6 +321,24 @@ extern void	      skb_over_panic(struct sk_buff *skb, int len,
 extern void	      skb_under_panic(struct sk_buff *skb, int len,
 				      void *here);
 
+struct skb_seq_state
+{
+	__u32		lower_offset;
+	__u32		upper_offset;
+	__u32		frag_idx;
+	__u32		stepped_offset;
+	struct sk_buff	*root_skb;
+	struct sk_buff	*cur_skb;
+	__u8		*frag_data;
+};
+
+extern void	      skb_prepare_seq_read(struct sk_buff *skb,
+					   unsigned int from, unsigned int to,
+					   struct skb_seq_state *st);
+extern unsigned int   skb_seq_read(unsigned int consumed, const u8 **data,
+				   struct skb_seq_state *st);
+extern void	      skb_abort_seq_read(struct skb_seq_state *st);
+
 /* Internal */
 #define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
 
-- 
cgit v1.2.2


From 3fc7e8a6d842f72d16d2623b1022814a635ab961 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 23 Jun 2005 21:00:17 -0700
Subject: [NET]: skb_find_text() - Find a text pattern in skb data

Finds a pattern in the skb data according to the specified
textsearch configuration. Use textsearch_next() to retrieve
subsequent occurrences of the pattern. Returns the offset
to the first occurrence or UINT_MAX if no match was found.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 171a37dff83a..416a2e4024b2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -27,6 +27,7 @@
 #include <linux/highmem.h>
 #include <linux/poll.h>
 #include <linux/net.h>
+#include <linux/textsearch.h>
 #include <net/checksum.h>
 
 #define HAVE_ALLOC_SKB		/* For the drivers to know */
@@ -339,6 +340,10 @@ extern unsigned int   skb_seq_read(unsigned int consumed, const u8 **data,
 				   struct skb_seq_state *st);
 extern void	      skb_abort_seq_read(struct skb_seq_state *st);
 
+extern unsigned int   skb_find_text(struct sk_buff *skb, unsigned int from,
+				    unsigned int to, struct ts_config *config,
+				    struct ts_state *state);
+
 /* Internal */
 #define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
 
-- 
cgit v1.2.2


From d675c989ed2d4ba23dff615330b04371aea83534 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 23 Jun 2005 21:00:58 -0700
Subject: [PKT_SCHED]: Packet classification based on textsearch (ematch)

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_cls.h              |  1 +
 include/linux/rtnetlink.h            |  7 +++++--
 include/linux/tc_ematch/tc_em_text.h | 19 +++++++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/tc_ematch/tc_em_text.h

(limited to 'include/linux')

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index d2aa214d6803..25d2d67c1faf 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -408,6 +408,7 @@ enum
 	TCF_EM_NBYTE,
 	TCF_EM_U32,
 	TCF_EM_META,
+	TCF_EM_TEXT,
 	__TCF_EM_MAX
 };
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index e68dbf0bf579..d021888b58f1 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -892,10 +892,13 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 		 goto rtattr_failure; \
    	__rta_fill(skb, attrtype, attrlen, data); }) 
 
-#define RTA_PUT_NOHDR(skb, attrlen, data) \
+#define RTA_APPEND(skb, attrlen, data) \
 ({	if (unlikely(skb_tailroom(skb) < (int)(attrlen))) \
 		goto rtattr_failure; \
-	memcpy(skb_put(skb, RTA_ALIGN(attrlen)), data, attrlen); })
+	memcpy(skb_put(skb, attrlen), data, attrlen); })
+
+#define RTA_PUT_NOHDR(skb, attrlen, data) \
+	RTA_APPEND(skb, RTA_ALIGN(attrlen), data)
 
 #define RTA_PUT_U8(skb, attrtype, value) \
 ({	u8 _tmp = (value); \
diff --git a/include/linux/tc_ematch/tc_em_text.h b/include/linux/tc_ematch/tc_em_text.h
new file mode 100644
index 000000000000..7cd43e99c7f5
--- /dev/null
+++ b/include/linux/tc_ematch/tc_em_text.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_TC_EM_TEXT_H
+#define __LINUX_TC_EM_TEXT_H
+
+#include <linux/pkt_cls.h>
+
+#define TC_EM_TEXT_ALGOSIZ	16
+
+struct tcf_em_text
+{
+	char		algo[TC_EM_TEXT_ALGOSIZ];
+	__u16		from_offset;
+	__u16		to_offset;
+	__u16		pattern_len;
+	__u8		from_layer:4;
+	__u8		to_layer:4;
+	__u8		pad;
+};
+
+#endif
-- 
cgit v1.2.2


From 76d8aeabfeb1c42641a81c44280177b9a08670d8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 23 Jun 2005 22:00:49 -0700
Subject: [PATCH] keys: Discard key spinlock and use RCU for key payload

The attached patch changes the key implementation in a number of ways:

 (1) It removes the spinlock from the key structure.

 (2) The key flags are now accessed using atomic bitops instead of
     write-locking the key spinlock and using C bitwise operators.

     The three instantiation flags are dealt with with the construction
     semaphore held during the request_key/instantiate/negate sequence, thus
     rendering the spinlock superfluous.

     The key flags are also now bit numbers not bit masks.

 (3) The key payload is now accessed using RCU. This permits the recursive
     keyring search algorithm to be simplified greatly since no locks need be
     taken other than the usual RCU preemption disablement. Searching now does
     not require any locks or semaphores to be held; merely that the starting
     keyring be pinned.

 (4) The keyring payload now includes an RCU head so that it can be disposed
     of by call_rcu(). This requires that the payload be copied on unlink to
     prevent introducing races in copy-down vs search-up.

 (5) The user key payload is now a structure with the data following it. It
     includes an RCU head like the keyring payload and for the same reason. It
     also contains a data length because the data length in the key may be
     changed on another CPU whilst an RCU protected read is in progress on the
     payload. This would then see the supposed RCU payload and the on-key data
     length getting out of sync.

     I'm tempted to drop the key's datalen entirely, except that it's used in
     conjunction with quota management and so is a little tricky to get rid
     of.

 (6) Update the keys documentation.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/key-ui.h |  6 ++++--
 include/linux/key.h    | 25 +++++++++++++++----------
 2 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h
index 60cc7b762e78..159ca8d54e9a 100644
--- a/include/linux/key-ui.h
+++ b/include/linux/key-ui.h
@@ -31,8 +31,10 @@ extern spinlock_t key_serial_lock;
  * subscribed
  */
 struct keyring_list {
-	unsigned	maxkeys;	/* max keys this list can hold */
-	unsigned	nkeys;		/* number of keys currently held */
+	struct rcu_head	rcu;		/* RCU deletion hook */
+	unsigned short	maxkeys;	/* max keys this list can hold */
+	unsigned short	nkeys;		/* number of keys currently held */
+	unsigned short	delkey;		/* key to be unlinked by RCU */
 	struct key	*keys[0];
 };
 
diff --git a/include/linux/key.h b/include/linux/key.h
index 6aa46d0e812f..2c24ffaca86f 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -18,7 +18,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
-#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
 #include <asm/atomic.h>
 
 #ifdef __KERNEL__
@@ -78,7 +78,6 @@ struct key {
 	key_serial_t		serial;		/* key serial number */
 	struct rb_node		serial_node;
 	struct key_type		*type;		/* type of key */
-	rwlock_t		lock;		/* examination vs change lock */
 	struct rw_semaphore	sem;		/* change vs change sem */
 	struct key_user		*user;		/* owner of this key */
 	time_t			expiry;		/* time at which key expires (or 0) */
@@ -86,14 +85,10 @@ struct key {
 	gid_t			gid;
 	key_perm_t		perm;		/* access permissions */
 	unsigned short		quotalen;	/* length added to quota */
-	unsigned short		datalen;	/* payload data length */
-	unsigned short		flags;		/* status flags (change with lock writelocked) */
-#define KEY_FLAG_INSTANTIATED	0x00000001	/* set if key has been instantiated */
-#define KEY_FLAG_DEAD		0x00000002	/* set if key type has been deleted */
-#define KEY_FLAG_REVOKED	0x00000004	/* set if key had been revoked */
-#define KEY_FLAG_IN_QUOTA	0x00000008	/* set if key consumes quota */
-#define KEY_FLAG_USER_CONSTRUCT	0x00000010	/* set if key is being constructed in userspace */
-#define KEY_FLAG_NEGATIVE	0x00000020	/* set if key is negative */
+	unsigned short		datalen;	/* payload data length
+						 * - may not match RCU dereferenced payload
+						 * - payload should contain own length
+						 */
 
 #ifdef KEY_DEBUGGING
 	unsigned		magic;
@@ -101,6 +96,14 @@ struct key {
 #define KEY_DEBUG_MAGIC_X	0xf8e9dacbu
 #endif
 
+	unsigned long		flags;		/* status flags (change with bitops) */
+#define KEY_FLAG_INSTANTIATED	0	/* set if key has been instantiated */
+#define KEY_FLAG_DEAD		1	/* set if key type has been deleted */
+#define KEY_FLAG_REVOKED	2	/* set if key had been revoked */
+#define KEY_FLAG_IN_QUOTA	3	/* set if key consumes quota */
+#define KEY_FLAG_USER_CONSTRUCT	4	/* set if key is being constructed in userspace */
+#define KEY_FLAG_NEGATIVE	5	/* set if key is negative */
+
 	/* the description string
 	 * - this is used to match a key against search criteria
 	 * - this should be a printable string
@@ -250,6 +253,8 @@ extern int keyring_add_key(struct key *keyring,
 
 extern struct key *key_lookup(key_serial_t id);
 
+extern void keyring_replace_payload(struct key *key, void *replacement);
+
 #define key_serial(key) ((key) ? (key)->serial : 0)
 
 /*
-- 
cgit v1.2.2


From 7888e7ff4ee579442128d7d12a9c9dbf2cf7de6a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 23 Jun 2005 22:00:51 -0700
Subject: [PATCH] Keys: Pass session keyring to call_usermodehelper()

The attached patch makes it possible to pass a session keyring through to the
process spawned by call_usermodehelper().  This allows patch 3/3 to pass an
authorisation key through to /sbin/request-key, thus permitting better access
controls when doing just-in-time key creation.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/key.h  | 10 +++++++++-
 include/linux/kmod.h | 13 ++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 2c24ffaca86f..2bfbf88d2740 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -273,14 +273,22 @@ extern void key_fsuid_changed(struct task_struct *tsk);
 extern void key_fsgid_changed(struct task_struct *tsk);
 extern void key_init(void);
 
+#define __install_session_keyring(tsk, keyring)			\
+({								\
+	struct key *old_session = tsk->signal->session_keyring;	\
+	tsk->signal->session_keyring = keyring;			\
+	old_session;						\
+})
+
 #else /* CONFIG_KEYS */
 
 #define key_validate(k)			0
 #define key_serial(k)			0
-#define key_get(k) 			NULL
+#define key_get(k) 			({ NULL; })
 #define key_put(k)			do { } while(0)
 #define alloc_uid_keyring(u)		0
 #define switch_uid_keyring(u)		do { } while(0)
+#define __install_session_keyring(t, k)	({ NULL; })
 #define copy_keys(f,t)			0
 #define copy_thread_group_keys(t)	0
 #define exit_keys(t)			do { } while(0)
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 95d0e4b0814d..e4a231549407 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -19,6 +19,7 @@
  *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/stddef.h>
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/compiler.h>
@@ -34,7 +35,17 @@ static inline int request_module(const char * name, ...) { return -ENOSYS; }
 #endif
 
 #define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x)))
-extern int call_usermodehelper(char *path, char *argv[], char *envp[], int wait);
+
+struct key;
+extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[],
+				    struct key *session_keyring, int wait);
+
+static inline int
+call_usermodehelper(char *path, char **argv, char **envp, int wait)
+{
+	return call_usermodehelper_keys(path, argv, envp, NULL, wait);
+}
+
 extern void usermodehelper_init(void);
 
 #endif /* __LINUX_KMOD_H__ */
-- 
cgit v1.2.2


From 3e30148c3d524a9c1c63ca28261bc24c457eb07a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 23 Jun 2005 22:00:56 -0700
Subject: [PATCH] Keys: Make request-key create an authorisation key

The attached patch makes the following changes:

 (1) There's a new special key type called ".request_key_auth".

     This is an authorisation key for when one process requests a key and
     another process is started to construct it. This type of key cannot be
     created by the user; nor can it be requested by kernel services.

     Authorisation keys hold two references:

     (a) Each refers to a key being constructed. When the key being
     	 constructed is instantiated the authorisation key is revoked,
     	 rendering it of no further use.

     (b) The "authorising process". This is either:

     	 (i) the process that called request_key(), or:

     	 (ii) if the process that called request_key() itself had an
     	      authorisation key in its session keyring, then the authorising
     	      process referred to by that authorisation key will also be
     	      referred to by the new authorisation key.

	 This means that the process that initiated a chain of key requests
	 will authorise the lot of them, and will, by default, wind up with
	 the keys obtained from them in its keyrings.

 (2) request_key() creates an authorisation key which is then passed to
     /sbin/request-key in as part of a new session keyring.

 (3) When request_key() is searching for a key to hand back to the caller, if
     it comes across an authorisation key in the session keyring of the
     calling process, it will also search the keyrings of the process
     specified therein and it will use the specified process's credentials
     (fsuid, fsgid, groups) to do that rather than the calling process's
     credentials.

     This allows a process started by /sbin/request-key to find keys belonging
     to the authorising process.

 (4) A key can be read, even if the process executing KEYCTL_READ doesn't have
     direct read or search permission if that key is contained within the
     keyrings of a process specified by an authorisation key found within the
     calling process's session keyring, and is searchable using the
     credentials of the authorising process.

     This allows a process started by /sbin/request-key to read keys belonging
     to the authorising process.

 (5) The magic KEY_SPEC_*_KEYRING key IDs when passed to KEYCTL_INSTANTIATE or
     KEYCTL_NEGATE will specify a keyring of the authorising process, rather
     than the process doing the instantiation.

 (6) One of the process keyrings can be nominated as the default to which
     request_key() should attach new keys if not otherwise specified. This is
     done with KEYCTL_SET_REQKEY_KEYRING and one of the KEY_REQKEY_DEFL_*
     constants. The current setting can also be read using this call.

 (7) request_key() is partially interruptible. If it is waiting for another
     process to finish constructing a key, it can be interrupted. This permits
     a request-key cycle to be broken without recourse to rebooting.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-Off-By: Benoit Boissinot <benoit.boissinot@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/key-ui.h | 41 +++++++++++++++++++++++++++++++++++++++--
 include/linux/key.h    |  9 ++++-----
 include/linux/keyctl.h | 11 +++++++++++
 include/linux/sched.h  |  8 +++++---
 4 files changed, 59 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h
index 159ca8d54e9a..cc326174a808 100644
--- a/include/linux/key-ui.h
+++ b/include/linux/key-ui.h
@@ -1,4 +1,4 @@
-/* key-ui.h: key userspace interface stuff for use by keyfs
+/* key-ui.h: key userspace interface stuff
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -84,8 +84,45 @@ static inline int key_any_permission(const struct key *key, key_perm_t perm)
 	return kperm != 0;
 }
 
+static inline int key_task_groups_search(struct task_struct *tsk, gid_t gid)
+{
+	int ret;
+
+	task_lock(tsk);
+	ret = groups_search(tsk->group_info, gid);
+	task_unlock(tsk);
+	return ret;
+}
+
+static inline int key_task_permission(const struct key *key,
+				      struct task_struct *context,
+				      key_perm_t perm)
+{
+	key_perm_t kperm;
+
+	if (key->uid == context->fsuid) {
+		kperm = key->perm >> 16;
+	}
+	else if (key->gid != -1 &&
+		 key->perm & KEY_GRP_ALL && (
+			 key->gid == context->fsgid ||
+			 key_task_groups_search(context, key->gid)
+			 )
+		 ) {
+		kperm = key->perm >> 8;
+	}
+	else {
+		kperm = key->perm;
+	}
+
+	kperm = kperm & perm & KEY_ALL;
+
+	return kperm == perm;
+
+}
 
-extern struct key *lookup_user_key(key_serial_t id, int create, int part,
+extern struct key *lookup_user_key(struct task_struct *context,
+				   key_serial_t id, int create, int partial,
 				   key_perm_t perm);
 
 extern long join_session_keyring(const char *name);
diff --git a/include/linux/key.h b/include/linux/key.h
index 2bfbf88d2740..970bbd916cf4 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -199,10 +199,12 @@ extern int key_payload_reserve(struct key *key, size_t datalen);
 extern int key_instantiate_and_link(struct key *key,
 				    const void *data,
 				    size_t datalen,
-				    struct key *keyring);
+				    struct key *keyring,
+				    struct key *instkey);
 extern int key_negate_and_link(struct key *key,
 			       unsigned timeout,
-			       struct key *keyring);
+			       struct key *keyring,
+			       struct key *instkey);
 extern void key_revoke(struct key *key);
 extern void key_put(struct key *key);
 
@@ -245,9 +247,6 @@ extern struct key *keyring_search(struct key *keyring,
 				  struct key_type *type,
 				  const char *description);
 
-extern struct key *search_process_keyrings(struct key_type *type,
-					   const char *description);
-
 extern int keyring_add_key(struct key *keyring,
 			   struct key *key);
 
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
index 381dedc370a3..8d7c59a29e09 100644
--- a/include/linux/keyctl.h
+++ b/include/linux/keyctl.h
@@ -20,6 +20,16 @@
 #define KEY_SPEC_USER_SESSION_KEYRING	-5	/* - key ID for UID-session keyring */
 #define KEY_SPEC_GROUP_KEYRING		-6	/* - key ID for GID-specific keyring */
 
+/* request-key default keyrings */
+#define KEY_REQKEY_DEFL_NO_CHANGE		-1
+#define KEY_REQKEY_DEFL_DEFAULT			0
+#define KEY_REQKEY_DEFL_THREAD_KEYRING		1
+#define KEY_REQKEY_DEFL_PROCESS_KEYRING		2
+#define KEY_REQKEY_DEFL_SESSION_KEYRING		3
+#define KEY_REQKEY_DEFL_USER_KEYRING		4
+#define KEY_REQKEY_DEFL_USER_SESSION_KEYRING	5
+#define KEY_REQKEY_DEFL_GROUP_KEYRING		6
+
 /* keyctl commands */
 #define KEYCTL_GET_KEYRING_ID		0	/* ask for a keyring's ID */
 #define KEYCTL_JOIN_SESSION_KEYRING	1	/* join or start named session keyring */
@@ -35,5 +45,6 @@
 #define KEYCTL_READ			11	/* read a key or keyring's contents */
 #define KEYCTL_INSTANTIATE		12	/* instantiate a partially constructed key */
 #define KEYCTL_NEGATE			13	/* negate a partially constructed key */
+#define KEYCTL_SET_REQKEY_KEYRING	14	/* set default request-key keyring */
 
 #endif /*  _LINUX_KEYCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 901742f92389..2c69682b0444 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -561,9 +561,10 @@ struct group_info {
 		groups_free(group_info); \
 } while (0)
 
-struct group_info *groups_alloc(int gidsetsize);
-void groups_free(struct group_info *group_info);
-int set_current_groups(struct group_info *group_info);
+extern struct group_info *groups_alloc(int gidsetsize);
+extern void groups_free(struct group_info *group_info);
+extern int set_current_groups(struct group_info *group_info);
+extern int groups_search(struct group_info *group_info, gid_t grp);
 /* access the groups "array" with this macro */
 #define GROUP_AT(gi, i) \
     ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
@@ -660,6 +661,7 @@ struct task_struct {
 	struct user_struct *user;
 #ifdef CONFIG_KEYS
 	struct key *thread_keyring;	/* keyring private to this thread */
+	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
 #endif
 	int oomkilladj; /* OOM kill score adjustment (bit shift). */
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
-- 
cgit v1.2.2


From 92198f7eaa5df3479341dd8fa20c2c81aa3b1e25 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 22:00:59 -0700
Subject: [PATCH] pass iocb to dio_iodone_t

XFS will have to look at iocb->private to fix aio+dio.  No other filesystem
is using the blockdev_direct_IO* end_io callback.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 517bf4966bf5..83857d8070d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -220,6 +220,7 @@ extern int dir_notify_enable;
 
 struct iovec;
 struct nameidata;
+struct kiocb;
 struct pipe_inode_info;
 struct poll_table_struct;
 struct kstatfs;
@@ -240,7 +241,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 typedef int (get_blocks_t)(struct inode *inode, sector_t iblock,
 			unsigned long max_blocks,
 			struct buffer_head *bh_result, int create);
-typedef void (dio_iodone_t)(struct inode *inode, loff_t offset,
+typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 			ssize_t bytes, void *private);
 
 /*
@@ -302,7 +303,6 @@ struct iattr {
 struct page;
 struct address_space;
 struct writeback_control;
-struct kiocb;
 
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
-- 
cgit v1.2.2


From 4e5117ba0af4582b6ec9164874f719d7f3f1eb2b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:03 -0700
Subject: [PATCH] quota: improve credits estimates

Improve estimates on the number of needed credits for quota transaction.
Now we distinguish blocks that might need to be allocated and blocks that
only need to be rewritten.  Also we distinguish deleting of a quota
structure and creating of a new one.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/dqblk_v1.h | 6 ++++++
 include/linux/dqblk_v2.h | 6 ++++++
 include/linux/quota.h    | 7 +++++--
 3 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h
index 42fbf4797156..57f1250d5a52 100644
--- a/include/linux/dqblk_v1.h
+++ b/include/linux/dqblk_v1.h
@@ -11,6 +11,12 @@
 /* Root squash turned on */
 #define V1_DQF_RSQUASH 1
 
+/* Numbers of blocks needed for updates */
+#define V1_INIT_ALLOC 1
+#define V1_INIT_REWRITE 1
+#define V1_DEL_ALLOC 0
+#define V1_DEL_REWRITE 2
+
 /* Special information about quotafile */
 struct v1_mem_dqinfo {
 };
diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h
index 4a6c5f6867bb..4f853322cb7f 100644
--- a/include/linux/dqblk_v2.h
+++ b/include/linux/dqblk_v2.h
@@ -10,6 +10,12 @@
 /* id numbers of quota format */
 #define QFMT_VFS_V0 2
 
+/* Numbers of blocks needed for updates */
+#define V2_INIT_ALLOC 4
+#define V2_INIT_REWRITE 2
+#define V2_DEL_ALLOC 0
+#define V2_DEL_REWRITE 6
+
 /* Inmemory copy of version specific information */
 struct v2_mem_dqinfo {
 	unsigned int dqi_blocks;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index ac5b90f4f256..700ead45084f 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -138,8 +138,11 @@ struct if_dqinfo {
 #include <linux/dqblk_v2.h>
 
 /* Maximal numbers of writes for quota operation (insert/delete/update)
- * (over all formats) - info block, 4 pointer blocks, data block */
-#define DQUOT_MAX_WRITES	6
+ * (over VFS all formats) */
+#define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, V2_INIT_ALLOC)
+#define DQUOT_INIT_REWRITE max(V1_INIT_REWRITE, V2_INIT_REWRITE)
+#define DQUOT_DEL_ALLOC max(V1_DEL_ALLOC, V2_DEL_ALLOC)
+#define DQUOT_DEL_REWRITE max(V1_DEL_REWRITE, V2_DEL_REWRITE)
 
 /*
  * Data for one user/group kept in memory
-- 
cgit v1.2.2


From 1f54587bea84a35125c95e19b98c2f464c50871b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:04 -0700
Subject: [PATCH] quota: ext3: Improve quota credit estimates

Use improved credits estimates for quota operations.  Also reserve a space
for a quota operation in a transaction only if filesystem was mounted with
some quota options.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ext3_fs.h  |  1 +
 include/linux/ext3_jbd.h | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 74ad31781e3e..4b6e1ab216a5 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -358,6 +358,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
 #define EXT3_MOUNT_BARRIER		0x20000 /* Use block barriers */
 #define EXT3_MOUNT_NOBH			0x40000 /* No bufferheads */
+#define EXT3_MOUNT_QUOTA		0x80000 /* Some quota option set */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index e8292af9033b..c8307c02dd07 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -42,15 +42,15 @@
  * superblock only gets updated once, of course, so don't bother
  * counting that again for the quota updates. */
 
-#define EXT3_DATA_TRANS_BLOCKS		(EXT3_SINGLEDATA_TRANS_BLOCKS + \
+#define EXT3_DATA_TRANS_BLOCKS(sb)	(EXT3_SINGLEDATA_TRANS_BLOCKS + \
 					 EXT3_XATTR_TRANS_BLOCKS - 2 + \
-					 2*EXT3_QUOTA_TRANS_BLOCKS)
+					 2*EXT3_QUOTA_TRANS_BLOCKS(sb))
 
 /* Delete operations potentially hit one directory's namespace plus an
  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
  * generous.  We can grow the delete transaction later if necessary. */
 
-#define EXT3_DELETE_TRANS_BLOCKS	(2 * EXT3_DATA_TRANS_BLOCKS + 64)
+#define EXT3_DELETE_TRANS_BLOCKS(sb)	(2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64)
 
 /* Define an arbitrary limit for the amount of data we will anticipate
  * writing to any given transaction.  For unbounded transactions such as
@@ -74,14 +74,17 @@
 #ifdef CONFIG_QUOTA
 /* Amount of blocks needed for quota update - we know that the structure was
  * allocated so we need to update only inode+data */
-#define EXT3_QUOTA_TRANS_BLOCKS 2
+#define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
 /* Amount of blocks needed for quota insert/delete - we do some block writes
  * but inode, sb and group updates are done only once */
-#define EXT3_QUOTA_INIT_BLOCKS (DQUOT_MAX_WRITES*\
-				(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3)
+#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+		(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
+#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+		(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
 #else
-#define EXT3_QUOTA_TRANS_BLOCKS 0
-#define EXT3_QUOTA_INIT_BLOCKS 0
+#define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
+#define EXT3_QUOTA_INIT_BLOCKS(sb) 0
+#define EXT3_QUOTA_DEL_BLOCKS(sb) 0
 #endif
 
 int
-- 
cgit v1.2.2


From 556a2a45bce1740f035befaa7201e4ad836c7257 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:06 -0700
Subject: [PATCH] quota: reiserfs: improve quota credit estimates

Use improved credits estimates for quota operations.  Also reserve space
for a quota operation in a transaction only if filesystem was mounted with
some quota option.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/reiserfs_fs.h    | 15 +++++++++++----
 include/linux/reiserfs_fs_sb.h |  2 ++
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 32148625fc2f..4c7c5689ad93 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1644,11 +1644,18 @@ struct reiserfs_journal_header {
 #define JOURNAL_MAX_TRANS_AGE 30
 #define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9)
 #ifdef CONFIG_QUOTA
-#define REISERFS_QUOTA_TRANS_BLOCKS 2	/* We need to update data and inode (atime) */
-#define REISERFS_QUOTA_INIT_BLOCKS (DQUOT_MAX_WRITES*(JOURNAL_PER_BALANCE_CNT+2)+1)	/* 1 balancing, 1 bitmap, 1 data per write + stat data update */
+/* We need to update data and inode (atime) */
+#define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<<REISERFS_QUOTA) ? 2 : 0)
+/* 1 balancing, 1 bitmap, 1 data per write + stat data update */
+#define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<<REISERFS_QUOTA) ? \
+(DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0)
+/* same as with INIT */
+#define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<<REISERFS_QUOTA) ? \
+(DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0)
 #else
-#define REISERFS_QUOTA_TRANS_BLOCKS 0
-#define REISERFS_QUOTA_INIT_BLOCKS 0
+#define REISERFS_QUOTA_TRANS_BLOCKS(s) 0
+#define REISERFS_QUOTA_INIT_BLOCKS(s) 0
+#define REISERFS_QUOTA_DEL_BLOCKS(s) 0
 #endif
 
 /* both of these can be as low as 1, or as high as you want.  The min is the
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index 37a3a7afbec7..31c709d0fe18 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -467,6 +467,8 @@ enum reiserfs_mount_options {
     REISERFS_ERROR_RO,
     REISERFS_ERROR_CONTINUE,
 
+    REISERFS_QUOTA,		/* Some quota option specified */
+
     REISERFS_TEST1,
     REISERFS_TEST2,
     REISERFS_TEST3,
-- 
cgit v1.2.2


From 3b6259432dee81f928c22c48c080d5f6325ed92e Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Thu, 23 Jun 2005 22:01:42 -0700
Subject: [PATCH] ipmi: add power cycle capability

This patch to adds "power cycle" functionality to the IPMI power off module
ipmi_poweroff.  It also contains changes to support procfs control of the
feature.

The power cycle action is considered an optional chassis control in the IPMI
specification.  However, it is definitely useful when the hardware supports
it.  A power cycle is usually required in order to reset a firmware in a bad
state.  This action is critical to allow remote management of servers.

The implementation adds power cycle as optional to the ipmi_poweroff module.
It can be modified dynamically through the proc entry mentioned above.  During
a power down and enabled, the power cycle command is sent to the BMC firmware.
 If it fails either due to non-support or some error, it will retry to send
the command as power off.

Signed-off-by: Christopher A. Poblete <Chris_Poblete@dell.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ipmi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 2ec265e1045f..596ca6130159 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -209,6 +209,11 @@ struct kernel_ipmi_msg
 #include <linux/list.h>
 #include <linux/module.h>
 
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+extern struct proc_dir_entry *proc_ipmi_root;
+#endif /* CONFIG_PROC_FS */
+
 /* Opaque type for a IPMI message user.  One of these is needed to
    send and receive messages. */
 typedef struct ipmi_user *ipmi_user_t;
-- 
cgit v1.2.2


From a6df7da8f7ee99e6fd1995fad852bacb978a6447 Mon Sep 17 00:00:00 2001
From: Kylene Hall <kjhall@us.ibm.com>
Date: Thu, 23 Jun 2005 22:02:04 -0700
Subject: [PATCH] tpm: TPMs on additional LPC bus

Add support for TPMs on additional LPC buses.

Signed-off-by: Kylene Hall <kjhall@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 63e89e47b8e9..bf608808a60c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1568,6 +1568,7 @@
 #define PCI_DEVICE_ID_SERVERWORKS_OSB4USB 0x0220
 #define PCI_DEVICE_ID_SERVERWORKS_CSB5USB PCI_DEVICE_ID_SERVERWORKS_OSB4USB
 #define PCI_DEVICE_ID_SERVERWORKS_CSB6USB 0x0221
+#define PCI_DEVICE_ID_SERVERWORKS_CSB6LPC 0x0227
 #define PCI_DEVICE_ID_SERVERWORKS_GCLE    0x0225
 #define PCI_DEVICE_ID_SERVERWORKS_GCLE2   0x0227
 #define PCI_DEVICE_ID_SERVERWORKS_CSB5ISA 0x0230
-- 
cgit v1.2.2


From 61fbfa8129c1771061a0e9f47747854293081c5b Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Thu, 23 Jun 2005 22:02:11 -0700
Subject: [PATCH] I2O: bugfixes and compability enhancements

Changes:

 - Fixed sysfs bug where user and parent links where added to the I2O
   device itself
 - Fixed bug when calculating TID for the event handler and cleaned up the
   workflow of i2o_driver_dispatch()
 - Fixed oops when no I2O device could be found for an event delivered to
   Exec-OSM
 - Fixed initialization of spinlock in Exec-OSM
 - Fixed memory leak in i2o_cfg_passthru() and i2o_cfg_passthru()
 - Removed MTRR support
 - Added PCI ID of Promise SX6000 with firmware >= 1.20.x.x
 - Turn of caching for ioremapped memory of in_queue
 - Added initialization sequence for Promise controllers
 - Moved definition of u8 / u16 / u32 for raidutils before first use

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/i2o-dev.h | 16 ++++++++--------
 include/linux/i2o.h     |  5 -----
 2 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2o-dev.h b/include/linux/i2o-dev.h
index ef7f644dd873..3414325bdcfd 100644
--- a/include/linux/i2o-dev.h
+++ b/include/linux/i2o-dev.h
@@ -24,6 +24,14 @@
 #define MAX_I2O_CONTROLLERS	32
 
 //#include <linux/ioctl.h>
+#ifndef __KERNEL__
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+
+#endif				/* __KERNEL__ */
+
 
 /*
  * I2O Control IOCTLs and structures
@@ -126,14 +134,6 @@ struct i2o_evt_get {
 #define I2O_BUS_CARDBUS 7
 #define I2O_BUS_UNKNOWN 0x80
 
-#ifndef __KERNEL__
-
-typedef unsigned char u8;
-typedef unsigned short u16;
-typedef unsigned int u32;
-
-#endif				/* __KERNEL__ */
-
 typedef struct _i2o_pci_bus {
 	u8 PciFunctionNumber;
 	u8 PciDeviceNumber;
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index ea9a3ad4b67f..40e45a83d3fb 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -152,11 +152,6 @@ struct i2o_controller {
 	unsigned int raptor:1;		/* split bar */
 	unsigned int promise:1;		/* Promise controller */
 
-#ifdef CONFIG_MTRR
-	int mtrr_reg0;
-	int mtrr_reg1;
-#endif
-
 	struct list_head devices;	/* list of I2O devices */
 
 	struct notifier_block *event_notifer;	/* Events */
-- 
cgit v1.2.2


From f88e119c4b824a5017456fa094950d0f4092d96c Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Thu, 23 Jun 2005 22:02:14 -0700
Subject: [PATCH] I2O: first code cleanup of spare warnings and unused
 functions

Changes:

 - Removed unnecessary checking of NULL before calling kfree()
 - Make some functions static
 - Changed pr_debug() into osm_debug()
 - Use i2o_msg_in_to_virt() for getting a pointer to the message frame
 - Cleaned up some comments
 - Changed some le32_to_cpu() into readl() where necessary
 - Make error messages of OSM's look the same
 - Cleaned up error handling in i2o_block_end_request()
 - Removed unused error handling of failed messages in Block-OSM, which
   are not allowed by the I2O spec
 - Corrected the blocksize detection in i2o_block
 - Added hrt and lct sysfs-attribute to controller
 - Call done() function in SCSI-OSM after freeing DMA buffers
 - Removed unneeded variable for message size calculation in
   i2o_scsi_queuecommand()
 - Make some changes to remove sparse warnings
 - Reordered some functions
 - Cleaned up controller initialization
 - Replaced some magic numbers by defines
 - Removed unnecessary dma_sync_single_for_cpu() call on coherent DMA
 - Removed some unused fields in i2o_controller and removed some unused
   functions

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/i2o.h | 74 ++++++++++++-----------------------------------------
 1 file changed, 16 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 40e45a83d3fb..e8cd11290010 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -153,12 +153,10 @@ struct i2o_controller {
 	unsigned int promise:1;		/* Promise controller */
 
 	struct list_head devices;	/* list of I2O devices */
-
-	struct notifier_block *event_notifer;	/* Events */
-	atomic_t users;
 	struct list_head list;	/* Controller list */
-	void __iomem *post_port;	/* Inbout port address */
-	void __iomem *reply_port;	/* Outbound port address */
+
+	void __iomem *in_port;	/* Inbout port address */
+	void __iomem *out_port;	/* Outbound port address */
 	void __iomem *irq_mask;		/* Interrupt register address */
 
 	/* Dynamic LCT related data */
@@ -182,9 +180,6 @@ struct i2o_controller {
 	struct resource io_resource;	/* I/O resource allocated to the IOP */
 	struct resource mem_resource;	/* Mem resource allocated to the IOP */
 
-	struct proc_dir_entry *proc_entry;	/* /proc dir */
-
-	struct list_head bus_list;	/* list of busses on IOP */
 	struct device device;
 	struct i2o_device *exec;	/* Executive */
 #if BITS_PER_LONG == 64
@@ -380,49 +375,10 @@ extern int i2o_device_claim_release(struct i2o_device *);
 /* Exec OSM functions */
 extern int i2o_exec_lct_get(struct i2o_controller *);
 
-/* device to i2o_device and driver to i2o_driver convertion functions */
+/* device / driver conversion functions */
 #define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver)
 #define to_i2o_device(dev) container_of(dev, struct i2o_device, device)
-
-/*
- *	Messenger inlines
- */
-static inline u32 I2O_POST_READ32(struct i2o_controller *c)
-{
-	rmb();
-	return readl(c->post_port);
-};
-
-static inline void I2O_POST_WRITE32(struct i2o_controller *c, u32 val)
-{
-	wmb();
-	writel(val, c->post_port);
-};
-
-static inline u32 I2O_REPLY_READ32(struct i2o_controller *c)
-{
-	rmb();
-	return readl(c->reply_port);
-};
-
-static inline void I2O_REPLY_WRITE32(struct i2o_controller *c, u32 val)
-{
-	wmb();
-	writel(val, c->reply_port);
-};
-
-static inline u32 I2O_IRQ_READ32(struct i2o_controller *c)
-{
-	rmb();
-	return readl(c->irq_mask);
-};
-
-static inline void I2O_IRQ_WRITE32(struct i2o_controller *c, u32 val)
-{
-	wmb();
-	writel(val, c->irq_mask);
-	wmb();
-};
+#define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device)
 
 /**
  *	i2o_msg_get - obtain an I2O message from the IOP
@@ -440,10 +396,12 @@ static inline void I2O_IRQ_WRITE32(struct i2o_controller *c, u32 val)
 static inline u32 i2o_msg_get(struct i2o_controller *c,
 			      struct i2o_message __iomem **msg)
 {
-	u32 m;
+	u32 m = readl(c->in_port);
 
-	if ((m = I2O_POST_READ32(c)) != I2O_QUEUE_EMPTY)
+	if (m != I2O_QUEUE_EMPTY) {
 		*msg = c->in_queue.virt + m;
+		rmb();
+	}
 
 	return m;
 };
@@ -457,7 +415,8 @@ static inline u32 i2o_msg_get(struct i2o_controller *c,
  */
 static inline void i2o_msg_post(struct i2o_controller *c, u32 m)
 {
-	I2O_POST_WRITE32(c, m);
+	wmb();
+	writel(m, c->in_port);
 };
 
 /**
@@ -486,12 +445,10 @@ static inline int i2o_msg_post_wait(struct i2o_controller *c, u32 m,
  *	The I2O controller must be informed that the reply message is not needed
  *	anymore. If you forget to flush the reply, the message frame can't be
  *	used by the controller anymore and is therefore lost.
- *
- *	FIXME: is there a timeout after which the controller reuse the message?
  */
 static inline void i2o_flush_reply(struct i2o_controller *c, u32 m)
 {
-	I2O_REPLY_WRITE32(c, m);
+	writel(m, c->out_port);
 };
 
 /**
@@ -505,8 +462,9 @@ static inline void i2o_flush_reply(struct i2o_controller *c, u32 m)
  *	work for sender side messages as they are ioremap objects
  *	provided by the I2O controller.
  */
-static inline struct i2o_message *i2o_msg_out_to_virt(struct i2o_controller *c,
-						      u32 m)
+static inline struct i2o_message __iomem *i2o_msg_out_to_virt(struct
+							      i2o_controller *c,
+							      u32 m)
 {
 	BUG_ON(m < c->out_queue.phys
 	       || m >= c->out_queue.phys + c->out_queue.len);
@@ -917,7 +875,7 @@ extern void i2o_debug_state(struct i2o_controller *c);
 #define I2OVER15	0x0001
 #define I2OVER20	0x0002
 
-/* Default is 1.5, FIXME: Need support for both 1.5 and 2.0 */
+/* Default is 1.5 */
 #define I2OVERSION	I2OVER15
 
 #define SGL_OFFSET_0    I2OVERSION
-- 
cgit v1.2.2


From f10378fff658f61307496e0ae00095041725cf07 Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Thu, 23 Jun 2005 22:02:16 -0700
Subject: [PATCH] I2O: new sysfs attributes and Adaptec specific block device
 access and 64-bit DMA support

Changes:
 - Added Bus-OSM which could be used by user space programs to reset a
   channel on the controller
 - Make ioctl's in Config-OSM obsolete in prefer for sysfs attributes and
   move those to its own file
 - Added sysfs attribute for firmware read and write access for I2O
   controllers
 - Added special handling of firmware read and write access for Adaptec
   controllers
 - Added vendor id and product id as sysfs-attribute to Executive classes
 - Added automatic notification of LCT change handling to Exec-OSM
 - Added flushing function to Block-OSM for later barrier implementation
 - Use PRIVATE messages for Block access on Adaptec controllers, which are
   faster then BLOCK class access
 - Cleaned up support for Promise controller
 - New messages are now detected using the IRQ status register as
   suggested by the I2O spec
 - Added i2o_dma_high() and i2o_dma_low() functions
 - Added facility for SG tablesize calculation when using 32-bit and
   64-bit DMA addresses
 - Added i2o_dma_map_single() and i2o_dma_map_sg() which could build the
   SG list for 32-bit as well as 64-bit DMA addresses

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/i2o-dev.h |   6 +-
 include/linux/i2o.h     | 321 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 270 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2o-dev.h b/include/linux/i2o-dev.h
index 3414325bdcfd..90c984ecd521 100644
--- a/include/linux/i2o-dev.h
+++ b/include/linux/i2o-dev.h
@@ -32,6 +32,10 @@ typedef unsigned int u32;
 
 #endif				/* __KERNEL__ */
 
+/*
+ *	Vendors
+ */
+#define I2O_VENDOR_DPT				0x001b
 
 /*
  * I2O Control IOCTLs and structures
@@ -333,7 +337,7 @@ typedef struct _i2o_status_block {
 #define I2O_CLASS_ATE_PERIPHERAL		0x061
 #define I2O_CLASS_FLOPPY_CONTROLLER		0x070
 #define I2O_CLASS_FLOPPY_DEVICE 		0x071
-#define I2O_CLASS_BUS_ADAPTER_PORT		0x080
+#define I2O_CLASS_BUS_ADAPTER			0x080
 #define I2O_CLASS_PEER_TRANSPORT_AGENT		0x090
 #define I2O_CLASS_PEER_TRANSPORT		0x091
 #define	I2O_CLASS_END				0xfff
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index e8cd11290010..497ea574f96b 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -157,7 +157,8 @@ struct i2o_controller {
 
 	void __iomem *in_port;	/* Inbout port address */
 	void __iomem *out_port;	/* Outbound port address */
-	void __iomem *irq_mask;		/* Interrupt register address */
+	void __iomem *irq_status;	/* Interrupt status register address */
+	void __iomem *irq_mask;	/* Interrupt mask register address */
 
 	/* Dynamic LCT related data */
 
@@ -242,15 +243,6 @@ extern int i2o_msg_post_wait_mem(struct i2o_controller *, u32, unsigned long,
 extern void i2o_msg_nop(struct i2o_controller *, u32);
 static inline void i2o_flush_reply(struct i2o_controller *, u32);
 
-/* DMA handling functions */
-static inline int i2o_dma_alloc(struct device *, struct i2o_dma *, size_t,
-				unsigned int);
-static inline void i2o_dma_free(struct device *, struct i2o_dma *);
-int i2o_dma_realloc(struct device *, struct i2o_dma *, size_t, unsigned int);
-
-static inline int i2o_dma_map(struct device *, struct i2o_dma *);
-static inline void i2o_dma_unmap(struct device *, struct i2o_dma *);
-
 /* IOP functions */
 extern int i2o_status_get(struct i2o_controller *);
 
@@ -275,6 +267,16 @@ static inline u32 i2o_ptr_high(void *ptr)
 {
 	return (u32) ((u64) ptr >> 32);
 };
+
+static inline u32 i2o_dma_low(dma_addr_t dma_addr)
+{
+	return (u32) (u64) dma_addr;
+};
+
+static inline u32 i2o_dma_high(dma_addr_t dma_addr)
+{
+	return (u32) ((u64) dma_addr >> 32);
+};
 #else
 static inline u32 i2o_cntxt_list_add(struct i2o_controller *c, void *ptr)
 {
@@ -305,8 +307,246 @@ static inline u32 i2o_ptr_high(void *ptr)
 {
 	return 0;
 };
+
+static inline u32 i2o_dma_low(dma_addr_t dma_addr)
+{
+	return (u32) dma_addr;
+};
+
+static inline u32 i2o_dma_high(dma_addr_t dma_addr)
+{
+	return 0;
+};
+#endif
+
+/**
+ *	i2o_sg_tablesize - Calculate the maximum number of elements in a SGL
+ *	@c: I2O controller for which the calculation should be done
+ *	@body_size: maximum body size used for message in 32-bit words.
+ *
+ *	Return the maximum number of SG elements in a SG list.
+ */
+static inline u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size)
+{
+	i2o_status_block *sb = c->status_block.virt;
+	u16 sg_count =
+	    (sb->inbound_frame_size - sizeof(struct i2o_message) / 4) -
+	    body_size;
+
+	if (c->pae_support) {
+		/*
+		 * for 64-bit a SG attribute element must be added and each
+		 * SG element needs 12 bytes instead of 8.
+		 */
+		sg_count -= 2;
+		sg_count /= 3;
+	} else
+		sg_count /= 2;
+
+	if (c->short_req && (sg_count > 8))
+		sg_count = 8;
+
+	return sg_count;
+};
+
+/**
+ *	i2o_dma_map_single - Map pointer to controller and fill in I2O message.
+ *	@c: I2O controller
+ *	@ptr: pointer to the data which should be mapped
+ *	@size: size of data in bytes
+ *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
+ *	@sg_ptr: pointer to the SG list inside the I2O message
+ *
+ *	This function does all necessary DMA handling and also writes the I2O
+ *	SGL elements into the I2O message. For details on DMA handling see also
+ *	dma_map_single(). The pointer sg_ptr will only be set to the end of the
+ *	SG list if the allocation was successful.
+ *
+ *	Returns DMA address which must be checked for failures using
+ *	dma_mapping_error().
+ */
+static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr,
+					    size_t size,
+					    enum dma_data_direction direction,
+					    u32 __iomem ** sg_ptr)
+{
+	u32 sg_flags;
+	u32 __iomem *mptr = *sg_ptr;
+	dma_addr_t dma_addr;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		sg_flags = 0xd4000000;
+		break;
+	case DMA_FROM_DEVICE:
+		sg_flags = 0xd0000000;
+		break;
+	default:
+		return 0;
+	}
+
+	dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction);
+	if (!dma_mapping_error(dma_addr)) {
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
+			writel(0x7C020002, mptr++);
+			writel(PAGE_SIZE, mptr++);
+		}
+#endif
+
+		writel(sg_flags | size, mptr++);
+		writel(i2o_dma_low(dma_addr), mptr++);
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
+			writel(i2o_dma_high(dma_addr), mptr++);
+#endif
+		*sg_ptr = mptr;
+	}
+	return dma_addr;
+};
+
+/**
+ *	i2o_dma_map_sg - Map a SG List to controller and fill in I2O message.
+ *	@c: I2O controller
+ *	@sg: SG list to be mapped
+ *	@sg_count: number of elements in the SG list
+ *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
+ *	@sg_ptr: pointer to the SG list inside the I2O message
+ *
+ *	This function does all necessary DMA handling and also writes the I2O
+ *	SGL elements into the I2O message. For details on DMA handling see also
+ *	dma_map_sg(). The pointer sg_ptr will only be set to the end of the SG
+ *	list if the allocation was successful.
+ *
+ *	Returns 0 on failure or 1 on success.
+ */
+static inline int i2o_dma_map_sg(struct i2o_controller *c,
+				 struct scatterlist *sg, int sg_count,
+				 enum dma_data_direction direction,
+				 u32 __iomem ** sg_ptr)
+{
+	u32 sg_flags;
+	u32 __iomem *mptr = *sg_ptr;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		sg_flags = 0x14000000;
+		break;
+	case DMA_FROM_DEVICE:
+		sg_flags = 0x10000000;
+		break;
+	default:
+		return 0;
+	}
+
+	sg_count = dma_map_sg(&c->pdev->dev, sg, sg_count, direction);
+	if (!sg_count)
+		return 0;
+
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+	if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
+		writel(0x7C020002, mptr++);
+		writel(PAGE_SIZE, mptr++);
+	}
 #endif
 
+	while (sg_count-- > 0) {
+		if (!sg_count)
+			sg_flags |= 0xC0000000;
+		writel(sg_flags | sg_dma_len(sg), mptr++);
+		writel(i2o_dma_low(sg_dma_address(sg)), mptr++);
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
+			writel(i2o_dma_high(sg_dma_address(sg)), mptr++);
+#endif
+		sg++;
+	}
+	*sg_ptr = mptr;
+
+	return 1;
+};
+
+/**
+ *	i2o_dma_alloc - Allocate DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: i2o_dma struct which should get the DMA buffer
+ *	@len: length of the new DMA memory
+ *	@gfp_mask: GFP mask
+ *
+ *	Allocate a coherent DMA memory and write the pointers into addr.
+ *
+ *	Returns 0 on success or -ENOMEM on failure.
+ */
+static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr,
+				size_t len, unsigned int gfp_mask)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int dma_64 = 0;
+
+	if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) {
+		dma_64 = 1;
+		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK))
+			return -ENOMEM;
+	}
+
+	addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask);
+
+	if ((sizeof(dma_addr_t) > 4) && dma_64)
+		if (pci_set_dma_mask(pdev, DMA_64BIT_MASK))
+			printk(KERN_WARNING "i2o: unable to set 64-bit DMA");
+
+	if (!addr->virt)
+		return -ENOMEM;
+
+	memset(addr->virt, 0, len);
+	addr->len = len;
+
+	return 0;
+};
+
+/**
+ *	i2o_dma_free - Free DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: i2o_dma struct which contains the DMA buffer
+ *
+ *	Free a coherent DMA memory and set virtual address of addr to NULL.
+ */
+static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
+{
+	if (addr->virt) {
+		if (addr->phys)
+			dma_free_coherent(dev, addr->len, addr->virt,
+					  addr->phys);
+		else
+			kfree(addr->virt);
+		addr->virt = NULL;
+	}
+};
+
+/**
+ *	i2o_dma_realloc - Realloc DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: pointer to a i2o_dma struct DMA buffer
+ *	@len: new length of memory
+ *	@gfp_mask: GFP mask
+ *
+ *	If there was something allocated in the addr, free it first. If len > 0
+ *	than try to allocate it and write the addresses back to the addr
+ *	structure. If len == 0 set the virtual address to NULL.
+ *
+ *	Returns the 0 on success or negative error code on failure.
+ */
+static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr,
+				  size_t len, unsigned int gfp_mask)
+{
+	i2o_dma_free(dev, addr);
+
+	if (len)
+		return i2o_dma_alloc(dev, addr, len, gfp_mask);
+
+	return 0;
+};
+
 /* I2O driver (OSM) functions */
 extern int i2o_driver_register(struct i2o_driver *);
 extern void i2o_driver_unregister(struct i2o_driver *);
@@ -375,10 +615,11 @@ extern int i2o_device_claim_release(struct i2o_device *);
 /* Exec OSM functions */
 extern int i2o_exec_lct_get(struct i2o_controller *);
 
-/* device / driver conversion functions */
+/* device / driver / kobject conversion functions */
 #define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver)
 #define to_i2o_device(dev) container_of(dev, struct i2o_device, device)
 #define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device)
+#define kobj_to_i2o_device(kobj) to_i2o_device(container_of(kobj, struct device, kobj))
 
 /**
  *	i2o_msg_get - obtain an I2O message from the IOP
@@ -466,8 +707,10 @@ static inline struct i2o_message __iomem *i2o_msg_out_to_virt(struct
 							      i2o_controller *c,
 							      u32 m)
 {
-	BUG_ON(m < c->out_queue.phys
-	       || m >= c->out_queue.phys + c->out_queue.len);
+	if (unlikely
+	    (m < c->out_queue.phys
+	     || m >= c->out_queue.phys + c->out_queue.len))
+		return NULL;
 
 	return c->out_queue.virt + (m - c->out_queue.phys);
 };
@@ -532,48 +775,6 @@ static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
 	}
 };
 
-/**
- *	i2o_dma_map - Map the memory to DMA
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: i2o_dma struct which should be mapped
- *
- *	Map the memory in addr->virt to coherent DMA memory and write the
- *	physical address into addr->phys.
- *
- *	Returns 0 on success or -ENOMEM on failure.
- */
-static inline int i2o_dma_map(struct device *dev, struct i2o_dma *addr)
-{
-	if (!addr->virt)
-		return -EFAULT;
-
-	if (!addr->phys)
-		addr->phys = dma_map_single(dev, addr->virt, addr->len,
-					    DMA_BIDIRECTIONAL);
-	if (!addr->phys)
-		return -ENOMEM;
-
-	return 0;
-};
-
-/**
- *	i2o_dma_unmap - Unmap the DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: i2o_dma struct which should be unmapped
- *
- *	Unmap the memory in addr->virt from DMA memory.
- */
-static inline void i2o_dma_unmap(struct device *dev, struct i2o_dma *addr)
-{
-	if (!addr->virt)
-		return;
-
-	if (addr->phys) {
-		dma_unmap_single(dev, addr->phys, addr->len, DMA_BIDIRECTIONAL);
-		addr->phys = 0;
-	}
-};
-
 /*
  *	Endian handling wrapped into the macro - keeps the core code
  *	cleaner.
@@ -725,6 +926,14 @@ extern void i2o_debug_state(struct i2o_controller *c);
 #define I2O_CMD_SCSI_ABORT		0x83
 #define I2O_CMD_SCSI_BUSRESET		0x27
 
+/*
+ * Bus Adapter Class
+ */
+#define I2O_CMD_BUS_ADAPTER_RESET	0x85
+#define I2O_CMD_BUS_RESET		0x87
+#define I2O_CMD_BUS_SCAN		0x89
+#define I2O_CMD_BUS_QUIESCE		0x8b
+
 /*
  * Random Block Storage Class
  */
@@ -948,7 +1157,7 @@ extern void i2o_debug_state(struct i2o_controller *c);
 
 /* request queue sizes */
 #define I2O_MAX_SECTORS			1024
-#define I2O_MAX_SEGMENTS		128
+#define I2O_MAX_PHYS_SEGMENTS		MAX_PHYS_SEGMENTS
 
 #define I2O_REQ_MEMPOOL_SIZE		32
 
-- 
cgit v1.2.2


From b2aaee33fbb354a2f08121aa1c1be55841102761 Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Thu, 23 Jun 2005 22:02:19 -0700
Subject: [PATCH] I2O: Adaptec specific SG_IO access, firmware access through
 sysfs and 2400A workaround

Changes:
 - Provide SG_IO access to BLOCK and EXECUTIVE class on Adaptec
   controllers
 - Use PRIVATE messages in SCSI-OSM because on some controllers normal
   SCSI class commands like READ or READ CAPACITY cause errors
 - Use new DMA and SG list creation function
 - Added workaround to limit sectors per request for Adaptec 2400A
   controllers

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/i2o-dev.h | 22 ++++++++++++++++++++++
 include/linux/i2o.h     | 37 ++++++++++++++++++++++++++-----------
 2 files changed, 48 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2o-dev.h b/include/linux/i2o-dev.h
index 90c984ecd521..d4a08d29e36d 100644
--- a/include/linux/i2o-dev.h
+++ b/include/linux/i2o-dev.h
@@ -32,6 +32,13 @@ typedef unsigned int u32;
 
 #endif				/* __KERNEL__ */
 
+/*
+ *	Software module types
+ */
+#define I2O_SOFTWARE_MODULE_IRTOS		0x11
+#define I2O_SOFTWARE_MODULE_IOP_PRIVATE		0x22
+#define I2O_SOFTWARE_MODULE_IOP_CONFIG		0x23
+
 /*
  *	Vendors
  */
@@ -125,6 +132,10 @@ struct i2o_evt_get {
 	int lost;
 };
 
+typedef struct i2o_sg_io_hdr {
+	unsigned int flags;	/* see I2O_DPT_SG_IO_FLAGS */
+} i2o_sg_io_hdr_t;
+
 /**************************************************************************
  * HRT related constants and structures
  **************************************************************************/
@@ -403,4 +414,15 @@ typedef struct _i2o_status_block {
 #define ADAPTER_STATE_FAILED			0x10
 #define ADAPTER_STATE_FAULTED			0x11
 
+
+/*
+ * DPT / Adaptec specific values for i2o_sg_io_hdr flags.
+ */
+#define I2O_DPT_SG_FLAG_INTERPRET		0x00010000
+#define I2O_DPT_SG_FLAG_PHYSICAL		0x00020000
+
+#define I2O_DPT_FLASH_FRAG_SIZE			0x10000
+#define I2O_DPT_FLASH_READ			0x0101
+#define I2O_DPT_FLASH_WRITE			0x0102
+
 #endif				/* _I2O_DEV_H */
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 497ea574f96b..2039a87c2b91 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -147,10 +147,13 @@ struct i2o_controller {
 
 	struct pci_dev *pdev;	/* PCI device */
 
-	unsigned int short_req:1;	/* use small block sizes */
-	unsigned int no_quiesce:1;	/* dont quiesce before reset */
-	unsigned int raptor:1;		/* split bar */
 	unsigned int promise:1;		/* Promise controller */
+	unsigned int adaptec:1;		/* DPT / Adaptec controller */
+	unsigned int raptor:1;	/* split bar */
+	unsigned int no_quiesce:1;	/* dont quiesce before reset */
+	unsigned int short_req:1;	/* use small block sizes */
+	unsigned int limit_sectors:1;	/* limit number of sectors / request */
+	unsigned int pae_support:1;	/* controller has 64-bit SGL support */
 
 	struct list_head devices;	/* list of I2O devices */
 	struct list_head list;	/* Controller list */
@@ -746,7 +749,21 @@ static inline struct i2o_message __iomem *i2o_msg_in_to_virt(struct i2o_controll
 static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr,
 				size_t len, unsigned int gfp_mask)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int dma_64 = 0;
+
+	if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) {
+			dma_64 = 1;
+			if(pci_set_dma_mask(pdev, DMA_32BIT_MASK))
+				return -ENOMEM;
+	}
+
 	addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask);
+
+	if ((sizeof(dma_addr_t) > 4) && dma_64)
+		if(pci_set_dma_mask(pdev, DMA_64BIT_MASK))
+			printk(KERN_WARNING "i2o: unable to set 64-bit DMA");
+
 	if (!addr->virt)
 		return -ENOMEM;
 
@@ -946,7 +963,7 @@ extern void i2o_debug_state(struct i2o_controller *c);
 #define I2O_CMD_BLOCK_MEJECT		0x43
 #define I2O_CMD_BLOCK_POWER		0x70
 
-#define I2O_PRIVATE_MSG			0xFF
+#define I2O_CMD_PRIVATE			0xFF
 
 /* Command status values  */
 
@@ -1095,9 +1112,9 @@ extern void i2o_debug_state(struct i2o_controller *c);
 #define SGL_OFFSET_8    (0x0080 | I2OVERSION)
 #define SGL_OFFSET_9    (0x0090 | I2OVERSION)
 #define SGL_OFFSET_10   (0x00A0 | I2OVERSION)
-
-#define TRL_OFFSET_5    (0x0050 | I2OVERSION)
-#define TRL_OFFSET_6    (0x0060 | I2OVERSION)
+#define SGL_OFFSET_11   (0x00B0 | I2OVERSION)
+#define SGL_OFFSET_12   (0x00C0 | I2OVERSION)
+#define SGL_OFFSET(x)   (((x)<<4) | I2OVERSION)
 
 /* Transaction Reply Lists (TRL) Control Word structure */
 #define TRL_SINGLE_FIXED_LENGTH		0x00
@@ -1130,7 +1147,6 @@ extern void i2o_debug_state(struct i2o_controller *c);
 #define HOST_TID		1
 
 #define MSG_FRAME_SIZE		128	/* i2o_scsi assumes >= 32 */
-#define REPLY_FRAME_SIZE	17
 #define SG_TABLESIZE		30
 #define NMBR_MSG_FRAMES		128
 
@@ -1155,11 +1171,10 @@ extern void i2o_debug_state(struct i2o_controller *c);
 #define I2O_HRT_GET_TRIES		3
 #define I2O_LCT_GET_TRIES		3
 
-/* request queue sizes */
+/* defines for max_sectors and max_phys_segments */
 #define I2O_MAX_SECTORS			1024
+#define I2O_MAX_SECTORS_LIMITED		256
 #define I2O_MAX_PHYS_SEGMENTS		MAX_PHYS_SEGMENTS
 
-#define I2O_REQ_MEMPOOL_SIZE		32
-
 #endif				/* __KERNEL__ */
 #endif				/* _I2O_H */
-- 
cgit v1.2.2


From 9e87545f06930c1d294423a8091d1077e7444a47 Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Thu, 23 Jun 2005 22:02:21 -0700
Subject: [PATCH] I2O: second code cleanup of sparse warnings and unneeded
 syncronization

Changes:
 - Added header "core.h" for i2o_core.ko internal definitions
 - More sparse fixes
 - Changed display of TID's in sysfs attributes from XXX to 0xXXX
 - Use the right functions for accessing I/O and normal memory
 - Removed error handling of SCSI device errors and let the SCSI layer
   take care of it
 - Added new device / removed device handling to SCSI-OSM
 - Make status access volatile
 - Cleaned up activation of I2O controller
 - Removed unnecessary wmb() and rmb() calls
 - Use own struct i2o_io for I/O memory instead of struct i2o_dma

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/i2o-dev.h |  23 +++++-----
 include/linux/i2o.h     | 116 +++++++++++++-----------------------------------
 2 files changed, 42 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2o-dev.h b/include/linux/i2o-dev.h
index d4a08d29e36d..36fd18cdad28 100644
--- a/include/linux/i2o-dev.h
+++ b/include/linux/i2o-dev.h
@@ -32,18 +32,6 @@ typedef unsigned int u32;
 
 #endif				/* __KERNEL__ */
 
-/*
- *	Software module types
- */
-#define I2O_SOFTWARE_MODULE_IRTOS		0x11
-#define I2O_SOFTWARE_MODULE_IOP_PRIVATE		0x22
-#define I2O_SOFTWARE_MODULE_IOP_CONFIG		0x23
-
-/*
- *	Vendors
- */
-#define I2O_VENDOR_DPT				0x001b
-
 /*
  * I2O Control IOCTLs and structures
  */
@@ -414,6 +402,17 @@ typedef struct _i2o_status_block {
 #define ADAPTER_STATE_FAILED			0x10
 #define ADAPTER_STATE_FAULTED			0x11
 
+/*
+ *	Software module types
+ */
+#define I2O_SOFTWARE_MODULE_IRTOS		0x11
+#define I2O_SOFTWARE_MODULE_IOP_PRIVATE		0x22
+#define I2O_SOFTWARE_MODULE_IOP_CONFIG		0x23
+
+/*
+ *	Vendors
+ */
+#define I2O_VENDOR_DPT				0x001b
 
 /*
  * DPT / Adaptec specific values for i2o_sg_io_hdr flags.
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 2039a87c2b91..be937d0372a7 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -119,12 +119,21 @@ struct i2o_driver {
 };
 
 /*
- *	Contains all information which are necessary for DMA operations
+ *	Contains DMA mapped address information
  */
 struct i2o_dma {
 	void *virt;
 	dma_addr_t phys;
-	u32 len;
+	size_t len;
+};
+
+/*
+ *	Contains IO mapped address information
+ */
+struct i2o_io {
+	void __iomem *virt;
+	unsigned long phys;
+	unsigned long len;
 };
 
 /*
@@ -173,8 +182,8 @@ struct i2o_controller {
 	struct semaphore lct_lock;	/* Lock for LCT updates */
 	struct i2o_dma status_block;	/* IOP status block */
 
-	struct i2o_dma base;	/* controller messaging unit */
-	struct i2o_dma in_queue;	/* inbound message queue Host->IOP */
+	struct i2o_io base;	/* controller messaging unit */
+	struct i2o_io in_queue;	/* inbound message queue Host->IOP */
 	struct i2o_dma out_queue;	/* outbound message queue IOP->Host */
 
 	unsigned int battery:1;		/* Has a battery backup */
@@ -185,6 +194,7 @@ struct i2o_controller {
 	struct resource mem_resource;	/* Mem resource allocated to the IOP */
 
 	struct device device;
+	struct class_device classdev;	/* I2O controller class */
 	struct i2o_device *exec;	/* Executive */
 #if BITS_PER_LONG == 64
 	spinlock_t context_list_lock;	/* lock for context_list */
@@ -235,9 +245,10 @@ struct i2o_sys_tbl {
 extern struct list_head i2o_controllers;
 
 /* Message functions */
-static inline u32 i2o_msg_get(struct i2o_controller *, struct i2o_message __iomem **);
-extern u32 i2o_msg_get_wait(struct i2o_controller *, struct i2o_message __iomem **,
-			    int);
+static inline u32 i2o_msg_get(struct i2o_controller *,
+			      struct i2o_message __iomem **);
+extern u32 i2o_msg_get_wait(struct i2o_controller *,
+			    struct i2o_message __iomem **, int);
 static inline void i2o_msg_post(struct i2o_controller *, u32);
 static inline int i2o_msg_post_wait(struct i2o_controller *, u32,
 				    unsigned long);
@@ -638,14 +649,12 @@ extern int i2o_exec_lct_get(struct i2o_controller *);
  *	available returns I2O_QUEUE_EMPTY and msg is leaved untouched.
  */
 static inline u32 i2o_msg_get(struct i2o_controller *c,
-			      struct i2o_message __iomem **msg)
+			      struct i2o_message __iomem ** msg)
 {
 	u32 m = readl(c->in_port);
 
-	if (m != I2O_QUEUE_EMPTY) {
+	if (m != I2O_QUEUE_EMPTY)
 		*msg = c->in_queue.virt + m;
-		rmb();
-	}
 
 	return m;
 };
@@ -659,7 +668,6 @@ static inline u32 i2o_msg_get(struct i2o_controller *c,
  */
 static inline void i2o_msg_post(struct i2o_controller *c, u32 m)
 {
-	wmb();
 	writel(m, c->in_port);
 };
 
@@ -706,14 +714,11 @@ static inline void i2o_flush_reply(struct i2o_controller *c, u32 m)
  *	work for sender side messages as they are ioremap objects
  *	provided by the I2O controller.
  */
-static inline struct i2o_message __iomem *i2o_msg_out_to_virt(struct
-							      i2o_controller *c,
-							      u32 m)
+static inline struct i2o_message *i2o_msg_out_to_virt(struct i2o_controller *c,
+						      u32 m)
 {
-	if (unlikely
-	    (m < c->out_queue.phys
-	     || m >= c->out_queue.phys + c->out_queue.len))
-		return NULL;
+	BUG_ON(m < c->out_queue.phys
+	       || m >= c->out_queue.phys + c->out_queue.len);
 
 	return c->out_queue.virt + (m - c->out_queue.phys);
 };
@@ -729,69 +734,13 @@ static inline struct i2o_message __iomem *i2o_msg_out_to_virt(struct
  *	work for receive side messages as they are kmalloc objects
  *	in a different pool.
  */
-static inline struct i2o_message __iomem *i2o_msg_in_to_virt(struct i2o_controller *c,
-						     u32 m)
+static inline struct i2o_message __iomem *i2o_msg_in_to_virt(struct
+							     i2o_controller *c,
+							     u32 m)
 {
 	return c->in_queue.virt + m;
 };
 
-/**
- *	i2o_dma_alloc - Allocate DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: i2o_dma struct which should get the DMA buffer
- *	@len: length of the new DMA memory
- *	@gfp_mask: GFP mask
- *
- *	Allocate a coherent DMA memory and write the pointers into addr.
- *
- *	Returns 0 on success or -ENOMEM on failure.
- */
-static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr,
-				size_t len, unsigned int gfp_mask)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int dma_64 = 0;
-
-	if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) {
-			dma_64 = 1;
-			if(pci_set_dma_mask(pdev, DMA_32BIT_MASK))
-				return -ENOMEM;
-	}
-
-	addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask);
-
-	if ((sizeof(dma_addr_t) > 4) && dma_64)
-		if(pci_set_dma_mask(pdev, DMA_64BIT_MASK))
-			printk(KERN_WARNING "i2o: unable to set 64-bit DMA");
-
-	if (!addr->virt)
-		return -ENOMEM;
-
-	memset(addr->virt, 0, len);
-	addr->len = len;
-
-	return 0;
-};
-
-/**
- *	i2o_dma_free - Free DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: i2o_dma struct which contains the DMA buffer
- *
- *	Free a coherent DMA memory and set virtual address of addr to NULL.
- */
-static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
-{
-	if (addr->virt) {
-		if (addr->phys)
-			dma_free_coherent(dev, addr->len, addr->virt,
-					  addr->phys);
-		else
-			kfree(addr->virt);
-		addr->virt = NULL;
-	}
-};
-
 /*
  *	Endian handling wrapped into the macro - keeps the core code
  *	cleaner.
@@ -1141,16 +1090,13 @@ extern void i2o_debug_state(struct i2o_controller *c);
 #define ELEVEN_WORD_MSG_SIZE	0x000B0000
 #define I2O_MESSAGE_SIZE(x)	((x)<<16)
 
-/* Special TID Assignments */
-
+/* special TID assignments */
 #define ADAPTER_TID		0
 #define HOST_TID		1
 
-#define MSG_FRAME_SIZE		128	/* i2o_scsi assumes >= 32 */
-#define SG_TABLESIZE		30
-#define NMBR_MSG_FRAMES		128
-
-#define MSG_POOL_SIZE		(MSG_FRAME_SIZE*NMBR_MSG_FRAMES*sizeof(u32))
+/* outbound queue defines */
+#define I2O_MAX_OUTBOUND_MSG_FRAMES	128
+#define I2O_OUTBOUND_MSG_FRAME_SIZE	128	/* in 32-bit words */
 
 #define I2O_POST_WAIT_OK	0
 #define I2O_POST_WAIT_TIMEOUT	-ETIMEDOUT
-- 
cgit v1.2.2


From f33213ecf49c98da4e85121b592c3bea8057c2e6 Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Thu, 23 Jun 2005 22:02:23 -0700
Subject: [PATCH] I2O: Lindent run and replacement of printk through osm
 printing functions

Lindent run and replaced printk() through the corresponding osm_*() function

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/i2o.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index be937d0372a7..bdc286ec947c 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -156,8 +156,8 @@ struct i2o_controller {
 
 	struct pci_dev *pdev;	/* PCI device */
 
-	unsigned int promise:1;		/* Promise controller */
-	unsigned int adaptec:1;		/* DPT / Adaptec controller */
+	unsigned int promise:1;	/* Promise controller */
+	unsigned int adaptec:1;	/* DPT / Adaptec controller */
 	unsigned int raptor:1;	/* split bar */
 	unsigned int no_quiesce:1;	/* dont quiesce before reset */
 	unsigned int short_req:1;	/* use small block sizes */
@@ -174,7 +174,7 @@ struct i2o_controller {
 
 	/* Dynamic LCT related data */
 
-	struct i2o_dma status;	/* status of IOP */
+	struct i2o_dma status;	/* IOP status block */
 
 	struct i2o_dma hrt;	/* HW Resource Table */
 	i2o_lct *lct;		/* Logical Config Table */
@@ -186,7 +186,7 @@ struct i2o_controller {
 	struct i2o_io in_queue;	/* inbound message queue Host->IOP */
 	struct i2o_dma out_queue;	/* outbound message queue IOP->Host */
 
-	unsigned int battery:1;		/* Has a battery backup */
+	unsigned int battery:1;	/* Has a battery backup */
 	unsigned int io_alloc:1;	/* An I/O resource was allocated */
 	unsigned int mem_alloc:1;	/* A memory resource was allocated */
 
-- 
cgit v1.2.2


From 496400014f22c4dbdbc1e89249a2feba46939708 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:58 -0700
Subject: [PATCH] nfsd4: fix fh_expire_type

We're returning NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME for the
fh_expire_type attribute.  This is incorrect:
	1. The spec actually only allows NOEXPIRE_WITH_OPEN when
	   VOLATILE_ANY is also set.
	2. Filehandles for open files can expire, if the file is removed
	   and there is a reboot.
	3. Filehandles are only volatile on rename in the nosubtree check
	   case.

Unfortunately, there's no way to indicate that we only expire on remove.  So
our only choice is FH4_VOLATILE_ANY.  Although it's redundant, we also set
FH4_VOL_RENAME in the subtree check case, since subtreecheck does actually
cause problems in practice and it seems possibly useful to give clients some
way to distinguish that case.

Fix a mispelled #define while we're at it.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfs4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 5bb5b2fd7ba2..0c1c306cdaec 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -28,7 +28,7 @@
 #define NFS4_ACCESS_DELETE      0x0010
 #define NFS4_ACCESS_EXECUTE     0x0020
 
-#define NFS4_FH_PERISTENT		0x0000
+#define NFS4_FH_PERSISTENT		0x0000
 #define NFS4_FH_NOEXPIRE_WITH_OPEN	0x0001
 #define NFS4_FH_VOLATILE_ANY		0x0002
 #define NFS4_FH_VOL_MIGRATION		0x0004
-- 
cgit v1.2.2


From 8beefa249371f55432394ac96864c83b0b309c28 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:08 -0700
Subject: [PATCH] nfsd4: rename nfs4_file fields

Trivial renaming patch:

I can never remember, while looking at various lists relating the nfsd4 state
structures, which are the "heads" and which are items on other lists, or which
structures are actually on the various lists.  The following convention helps
me: given structures foo and bar, with foo containing the head of a list of
bars, use "bars" for the name of the head of the list contained in the struct
foo, and use "per_foo" for the entries in the struct bars.

Go ahead and do this for struct nfs4_file.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/state.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index b6b2fe1e7c63..2c3b42674a4c 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -218,8 +218,8 @@ struct nfs4_stateowner {
 */
 struct nfs4_file {
 	struct list_head        fi_hash;    /* hash by "struct inode *" */
-	struct list_head        fi_perfile; /* list: nfs4_stateid */
-	struct list_head	fi_del_perfile; /* list: nfs4_delegation */
+	struct list_head        fi_stateids;
+	struct list_head	fi_delegations;
 	struct inode		*fi_inode;
 	u32                     fi_id;      /* used with stateowner->so_id 
 					     * for stateid_hashtbl hash */
-- 
cgit v1.2.2


From 13cd21845d6a9729ca95e36ae6e8c669623fbfd4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:10 -0700
Subject: [PATCH] nfsd4: reference count struct nfs4_file

Add a struct kref to each nfs4_file and take a reference to it from each
stateid and delegation that refers to it.  The atomicity guarantees are
overkill given that all this stuff is done under the single nfsd4 state lock,
but a) we'd like finer-grained locking some day, and b) this simplifies the
cleanup of the structures a bit, something that has previously been a bit
complicated and bug-prone.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/state.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 2c3b42674a4c..296e6429fc3b 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -217,6 +217,7 @@ struct nfs4_stateowner {
 *      share_acces, share_deny on the file.
 */
 struct nfs4_file {
+	struct kref		fi_ref;
 	struct list_head        fi_hash;    /* hash by "struct inode *" */
 	struct list_head        fi_stateids;
 	struct list_head	fi_delegations;
-- 
cgit v1.2.2


From 7b190fecfa33d72bcf74c9473134c2ad14ae9545 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:23 -0700
Subject: [PATCH] knfsd: nfsd4: delegation recovery

Allow recovery of delegations after reboot.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/xdr4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index a1f5ad0be1bf..4d24d65c0e88 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -210,6 +210,7 @@ struct nfsd4_open {
 	u32		op_share_access;    /* request */
 	u32		op_share_deny;      /* request */
 	stateid_t	op_stateid;         /* response */
+	u32		op_recall;          /* recall */
 	struct nfsd4_change_info  op_cinfo; /* response */
 	u32		op_rflags;          /* response */
 	int		op_truncate;        /* used during processing */
-- 
cgit v1.2.2


From 76a3550ec50ed86885a10a767ebaebb7c9104721 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:26 -0700
Subject: [PATCH] knfsd: nfsd4: rename nfs4_state_init

Somewhat gratuitous rename to simplify following patch.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/nfsd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 4bf931d5ff56..3855fdc5af77 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -145,12 +145,12 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
  * NFSv4 State
  */
 #ifdef CONFIG_NFSD_V4
-int nfs4_state_init(void);
+int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
 time_t nfs4_lease_time(void);
 void nfs4_reset_lease(time_t leasetime);
 #else
-static inline int nfs4_state_init(void){return 0;}
+static inline int nfs4_state_start(void){return 0;}
 static inline void nfs4_state_shutdown(void){}
 static inline time_t nfs4_lease_time(void){return 0;}
 static inline void nfs4_reset_lease(time_t leasetime){}
-- 
cgit v1.2.2


From ac4d8ff2a57179de3ef7834c6ab3fac430b0a05d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:30 -0700
Subject: [PATCH] knfsd: nfsd4: clean up state initialization

Separate out stuff that needs initialization on startup from stuff that only
needs initialization on module init from static data.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/nfsd.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 3855fdc5af77..21c6e9d86e4f 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -145,11 +145,13 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
  * NFSv4 State
  */
 #ifdef CONFIG_NFSD_V4
+void nfs4_state_init(void);
 int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
 time_t nfs4_lease_time(void);
 void nfs4_reset_lease(time_t leasetime);
 #else
+static inline void nfs4_state_init(void){};
 static inline int nfs4_state_start(void){return 0;}
 static inline void nfs4_state_shutdown(void){}
 static inline time_t nfs4_lease_time(void){return 0;}
-- 
cgit v1.2.2


From bd0b1e954e3ba3e5d2cab941458cf98206471bd2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:35 -0700
Subject: [PATCH] knfsd: nfsd4: idmap initialization

Adopt standard kernel style by defining a no-op function instead of putting
ifdef's in the code where the function is called.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd_idmap.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd_idmap.h b/include/linux/nfsd_idmap.h
index 9bb7f30e923b..e82746fcad14 100644
--- a/include/linux/nfsd_idmap.h
+++ b/include/linux/nfsd_idmap.h
@@ -43,8 +43,13 @@
 /* XXX from linux/nfs_idmap.h */
 #define IDMAP_NAMESZ 128
 
+#ifdef CONFIG_NFSD_V4
 void nfsd_idmap_init(void);
 void nfsd_idmap_shutdown(void);
+#else
+static inline void nfsd_idmap_init(void) {};
+static inline void nfsd_idmap_shutdown(void) {};
+#endif
 
 int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *);
 int nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *);
-- 
cgit v1.2.2


From a55370a3c0106106a975c5a09cee800611d0cf50 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:52 -0700
Subject: [PATCH] knfsd: nfsd4: reboot hash

For the purposes of reboot recovery we keep a directory with subdirectories
each having a name that is the ascii hex representation of the md5 sum of a
client identifier for an active client.

This adds the code to calculate that name.  We also use it for the purposes of
comparing clients, so if someone ever manages to find two client names that
are md5 collisions, then we'll return clid_inuse to the second.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/state.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 296e6429fc3b..fdaa84addadb 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -109,6 +109,8 @@ struct nfs4_callback {
 	struct rpc_clnt *       cb_client;
 };
 
+#define HEXDIR_LEN     33 /* hex version of 16 byte md5 of cl_name plus '\0' */
+
 /*
  * struct nfs4_client - one per client.  Clientids live here.
  * 	o Each nfs4_client is hashed by clientid.
@@ -126,6 +128,7 @@ struct nfs4_client {
 	struct list_head	cl_del_perclnt; /* list: delegations */
 	struct list_head        cl_lru;         /* tail queue */
 	struct xdr_netobj	cl_name; 	/* id generated by client */
+	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
 	u32			cl_addr; 	/* client ipaddress */
@@ -143,7 +146,7 @@ struct nfs4_client {
  */
 struct nfs4_client_reclaim {
 	struct list_head	cr_strhash;	/* hash by cr_name */
-	struct xdr_netobj 	cr_name; 	/* id generated by client */
+	char			cr_recdir[HEXDIR_LEN]; /* recover dir */
 };
 
 static inline void
@@ -283,6 +286,7 @@ extern void nfs4_free_stateowner(struct kref *kref);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+extern int nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
 
 static inline void
 nfs4_put_stateowner(struct nfs4_stateowner *so)
-- 
cgit v1.2.2


From fd39ca9a808c6026989bc2188868a0574eb37108 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:03 -0700
Subject: [PATCH] knfsd: nfsd4: make needlessly global code static

This patch contains the following possible cleanups:

- make needlessly global code static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/state.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index fdaa84addadb..0e18ae22127d 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -61,11 +61,6 @@ typedef struct {
 #define si_stateownerid   si_opaque.so_stateownerid
 #define si_fileid         si_opaque.so_fileid
 
-extern stateid_t zerostateid;
-extern stateid_t onestateid;
-
-#define ZERO_STATEID(stateid)       (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
-#define ONE_STATEID(stateid)        (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
 
 struct nfs4_cb_recall {
 	u32			cbr_ident;
@@ -271,12 +266,9 @@ struct nfs4_stateid {
 	((err) != nfserr_stale_stateid) &&      \
 	((err) != nfserr_bad_stateid))
 
-extern time_t nfs4_laundromat(void);
 extern int nfsd4_renew(clientid_t *clid);
 extern int nfs4_preprocess_stateid_op(struct svc_fh *current_fh, 
 		stateid_t *stateid, int flags, struct file **filp);
-extern int nfs4_share_conflict(struct svc_fh *current_fh, 
-		unsigned int deny_type);
 extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
 extern int nfs4_in_grace(void);
-- 
cgit v1.2.2


From ea1da636e956ad1591a74904f23d98bbc26a644b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:17 -0700
Subject: [PATCH] knfsd: nfsd4: rename state list fields

Trivial renaming patch:

I can never remember, while looking at various lists relating the nfsd4 state
structures, which are the "heads" and which are items on other lists, or which
structures are actually on the various lists.  The following convention helps
me: given structures foo and bar, with foo containing the head of a list of
bars, use "bars" for the name of the head of the list contained in the struct
foo, and use "per_foo" for the entries in the struct bars.

Already done for struct nfs4_file; go ahead and do it for the other nfsd4
state structures.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/state.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 0e18ae22127d..f4f27b76ee64 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -72,8 +72,8 @@ struct nfs4_cb_recall {
 };
 
 struct nfs4_delegation {
-	struct list_head	dl_del_perfile; /* nfs4_file->fi_del_perfile */
-	struct list_head	dl_del_perclnt; /* nfs4_client->cl_del_perclnt*/
+	struct list_head	dl_perfile;
+	struct list_head	dl_perclnt;
 	struct list_head	dl_recall_lru;  /* delegation recalled */
 	atomic_t		dl_count;       /* ref count */
 	struct nfs4_client	*dl_client;
@@ -119,8 +119,8 @@ struct nfs4_callback {
 struct nfs4_client {
 	struct list_head	cl_idhash; 	/* hash by cl_clientid.id */
 	struct list_head	cl_strhash; 	/* hash by cl_name */
-	struct list_head	cl_perclient; 	/* list: stateowners */
-	struct list_head	cl_del_perclnt; /* list: delegations */
+	struct list_head	cl_openowners;
+	struct list_head	cl_delegations;
 	struct list_head        cl_lru;         /* tail queue */
 	struct xdr_netobj	cl_name; 	/* id generated by client */
 	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
@@ -195,9 +195,9 @@ struct nfs4_stateowner {
 	struct kref		so_ref;
 	struct list_head        so_idhash;   /* hash by so_id */
 	struct list_head        so_strhash;   /* hash by op_name */
-	struct list_head        so_perclient; /* nfs4_client->cl_perclient */
-	struct list_head        so_perfilestate; /* list: nfs4_stateid */
-	struct list_head        so_perlockowner; /* nfs4_stateid->st_perlockowner */
+	struct list_head        so_perclient;
+	struct list_head        so_stateids;
+	struct list_head        so_perstateid; /* for lockowners only */
 	struct list_head	so_close_lru; /* tail queue */
 	time_t			so_time; /* time of placement on so_close_lru */
 	int			so_is_open_owner; /* 1=openowner,0=lockowner */
@@ -240,8 +240,8 @@ struct nfs4_file {
 struct nfs4_stateid {
 	struct list_head              st_hash; 
 	struct list_head              st_perfile;
-	struct list_head              st_perfilestate; 
-	struct list_head              st_perlockowner;
+	struct list_head              st_perstateowner;
+	struct list_head              st_lockowners;
 	struct nfs4_stateowner      * st_stateowner;
 	struct nfs4_file            * st_file;
 	stateid_t                     st_stateid;
-- 
cgit v1.2.2


From cb36d6345752fa24827044c68e15f6708a40d9f6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:23 -0700
Subject: [PATCH] knfsd: nfsd4: remove cb_parsed

The cb_parsed field is only used by probe_callback, to determine whether the
callback information has been filled in by setclientid.  But there is no way
that probe_callback() can be called without that having already happened, so
that check is superfluous, as is cb_parsed.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/state.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index f4f27b76ee64..83d29ec03a58 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -92,7 +92,6 @@ struct nfs4_delegation {
 /* client delegation callback info */
 struct nfs4_callback {
 	/* SETCLIENTID info */
-	u32			cb_parsed;  /* addr parsed */
 	u32                     cb_addr;
 	unsigned short          cb_port;
 	u32                     cb_prog;
-- 
cgit v1.2.2


From 190e4fbf96037e5e526ba3210f2bcc2a3b6fe964 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:25 -0700
Subject: [PATCH] knfsd: nfsd4: initialize recovery directory

NFSv4 clients are required to know what state they have on the server so that
they can reclaim it on server reboot.  However, it is possible for
pathalogical combinations of server reboots and network partitions to leave a
client in a state where it cannot know whether it has lost its state on the
server.

For this reason, rfc3530 requires that we store some information about clients
to stable storage.

So we maintain a directory /var/lib/nfs/v4recovery with a subdirectory for
each client with active state.  We leave open the possibility of including
files underneath each such subdirectory with information about the client, but
for now the subdirectories are empty.

We create a client subdirectory whenever a client makes its first non-reclaim
open_confirm.

We remove a client subdirectory whenever either
        a) its lease expires, or
	b) the grace period ends without it reclaiming anything.
When handling reclaims, we allow the reclaim if and only if the client doing
the reclaim has a subdirectory.

This patch adds just the code to scan the recovery directory on nfsd startup.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/state.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 83d29ec03a58..19481ab122df 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -278,6 +278,10 @@ extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
 extern int nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+extern void nfsd4_init_recdir(char *recdir_name);
+extern int nfsd4_recdir_load(void);
+extern void nfsd4_shutdown_recdir(void);
+extern int nfs4_client_to_reclaim(const char *name);
 
 static inline void
 nfs4_put_stateowner(struct nfs4_stateowner *so)
-- 
cgit v1.2.2


From c7b9a45927e74c81d6562153f7fde9d32da00159 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:30 -0700
Subject: [PATCH] knfsd: nfsd4: reboot recovery

This patch adds the code to create and remove client subdirectories from the
recovery directory, as described in the previous patch comment.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/state.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 19481ab122df..a84a3fa99be1 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -131,6 +131,7 @@ struct nfs4_client {
 	nfs4_verifier		cl_confirm;	/* generated by server */
 	struct nfs4_callback	cl_callback;    /* callback info */
 	atomic_t		cl_count;	/* ref count */
+	u32			cl_firststate;	/* recovery dir creation */
 };
 
 /* struct nfs4_client_reset
@@ -282,6 +283,10 @@ extern void nfsd4_init_recdir(char *recdir_name);
 extern int nfsd4_recdir_load(void);
 extern void nfsd4_shutdown_recdir(void);
 extern int nfs4_client_to_reclaim(const char *name);
+extern int nfs4_has_reclaimed_state(const char *name);
+extern void nfsd4_recdir_purge_old(void);
+extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
 
 static inline void
 nfs4_put_stateowner(struct nfs4_stateowner *so)
-- 
cgit v1.2.2


From 0964a3d3f1aa96468091924f6b0c391a46dc6d0b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:32 -0700
Subject: [PATCH] knfsd: nfsd4 reboot dirname fix

Set the recovery directory via /proc/fs/nfsd/nfs4recoverydir.

It may be changed any time, but is used only on startup.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nfsd/nfsd.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 21c6e9d86e4f..5791dfd30dd0 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -150,12 +150,14 @@ int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
 time_t nfs4_lease_time(void);
 void nfs4_reset_lease(time_t leasetime);
+int nfs4_reset_recoverydir(char *recdir);
 #else
 static inline void nfs4_state_init(void){};
 static inline int nfs4_state_start(void){return 0;}
 static inline void nfs4_state_shutdown(void){}
 static inline time_t nfs4_lease_time(void){return 0;}
 static inline void nfs4_reset_lease(time_t leasetime){}
+static inline int nfs4_reset_recoverydir(char *recdir) {return 0;}
 #endif
 
 /*
-- 
cgit v1.2.2


From c988d2b2845495373f666a381d354a7f80981d62 Mon Sep 17 00:00:00 2001
From: Matt Domsch <Matt_Domsch@dell.com>
Date: Thu, 23 Jun 2005 22:05:15 -0700
Subject: [PATCH] modules: add version and srcversion to sysfs

This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).

/sys/module/e1000
|-- srcversion
`-- version

This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.

Why put this in sysfs?

a) Tools like DKMS, which deal with changing out individual kernel
   modules without replacing the whole kernel, can behave smarter if they
   can tell the version of a given module.  The autoinstaller feature, for
   example, which determines if your system has a "good" version of a
   driver (i.e.  if the one provided by DKMS has a newer verson than that
   provided by the kernel package installed), and to automatically compile
   and install a newer version if DKMS has it but your kernel doesn't yet
   have that version.

b) Because sysadmins manually, or with tools like DKMS, can switch out
   modules on the file system, you can't count on 'modinfo foo.ko', which
   looks at /lib/modules/${kernelver}/...  actually matching what is loaded
   into the kernel already.  Hence asking sysfs for this.

c) as the unbind-driver-from-device work takes shape, it will be
   possible to rebind a driver that's built-in (no .ko to modinfo for the
   version) to a newly loaded module.  sysfs will have the
   currently-built-in version info, for comparison.

d) tech support scripts can then easily grab the version info for what's
   running presently - a question I get often.

There has been renewed interest in this patch on linux-scsi by driver
authors.

As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new.  Compiled and run on
x86 and x86_64.

From: Matthew Dobson <colpatch@us.ibm.com>

      build fix

From: Thierry Vignaud <tvignaud@mandriva.com>

      build fix

From: Matthew Dobson <colpatch@us.ibm.com>

      warning fix

Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/module.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 0e432a0f4aee..f05372b7fe77 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -51,6 +51,9 @@ struct module_attribute {
         ssize_t (*show)(struct module_attribute *, struct module *, char *);
         ssize_t (*store)(struct module_attribute *, struct module *,
 			 const char *, size_t count);
+	void (*setup)(struct module *, const char *);
+	int (*test)(struct module *);
+	void (*free)(struct module *);
 };
 
 struct module_kobject
@@ -239,6 +242,8 @@ struct module
 	/* Sysfs stuff. */
 	struct module_kobject mkobj;
 	struct module_param_attrs *param_attrs;
+	const char *version;
+	const char *srcversion;
 
 	/* Exported symbols */
 	const struct kernel_symbol *syms;
-- 
cgit v1.2.2


From 420edbcc09008342c7b2665453f6b370739aadb0 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:23 -0700
Subject: [PATCH] xip: bdev: execute in place

This is the block device related part.  The block device operation
direct_access now has a struct block_device as first parameter.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83857d8070d3..929bf8d20c87 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -885,6 +885,7 @@ struct block_device_operations {
 	int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
 	long (*unlocked_ioctl) (struct file *, unsigned, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned, unsigned long);
+	int (*direct_access) (struct block_device *, sector_t, unsigned long *);
 	int (*media_changed) (struct gendisk *);
 	int (*revalidate_disk) (struct gendisk *);
 	struct module *owner;
-- 
cgit v1.2.2


From ceffc078528befc008c6f2c2c4decda79eabd534 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:25 -0700
Subject: [PATCH] xip: fs/mm: execute in place

- generic_file* file operations do no longer have a xip/non-xip split
- filemap_xip.c implements a new set of fops that require get_xip_page
  aop to work proper. all new fops are exported GPL-only (don't like to
  see whatever code use those except GPL modules)
- __xip_unmap now uses page_check_address, which is no longer static
  in rmap.c, and defined in linux/rmap.h
- mm/filemap.h is now much more clean, plainly having just Linus'
  inline funcs moved here from filemap.c
- fix includes in filemap_xip to make it build cleanly on i386

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h   | 18 ++++++++++++++++++
 include/linux/rmap.h |  6 ++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 929bf8d20c87..79c0fafc0211 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -330,6 +330,8 @@ struct address_space_operations {
 	int (*releasepage) (struct page *, int);
 	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs);
+	struct page* (*get_xip_page)(struct address_space *, sector_t,
+			int);
 };
 
 struct backing_dev_info;
@@ -1497,6 +1499,22 @@ extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
+#ifdef CONFIG_FS_XIP
+extern ssize_t xip_file_aio_read(struct kiocb *iocb, char __user *buf,
+				 size_t count, loff_t pos);
+extern ssize_t xip_file_readv(struct file *filp, const struct iovec *iov,
+			      unsigned long nr_segs, loff_t *ppos);
+extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos,
+				 size_t count, read_actor_t actor,
+				 void *target);
+extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
+extern ssize_t xip_file_aio_write(struct kiocb *iocb, const char __user *buf,
+				  size_t count, loff_t pos);
+extern ssize_t xip_file_writev(struct file *file, const struct iovec *iov,
+			       unsigned long nr_segs, loff_t *ppos);
+extern int xip_truncate_page(struct address_space *mapping, loff_t from);
+#endif
+
 static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
 					read_descriptor_t * desc,
 					read_actor_t actor)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 11b484e37ac9..e80fb7ee6efd 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -92,6 +92,12 @@ static inline void page_dup_rmap(struct page *page)
 int page_referenced(struct page *, int is_locked, int ignore_token);
 int try_to_unmap(struct page *);
 
+/*
+ * Called from mm/filemap_xip.c to unmap empty zero page
+ */
+pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long);
+
+
 /*
  * Used by swapoff to help locate where page is expected in vma.
  */
-- 
cgit v1.2.2


From 6d79125bba55ee82701f1c7d4ebbc1aa20ecbe4e Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:26 -0700
Subject: [PATCH] xip: ext2: execute in place

These are the ext2 related parts.  Ext2 now uses the xip_* file operations
along with the get_xip_page aop when mounted with -o xip.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ext2_fs.h | 25 +++++++++++++------------
 include/linux/fs.h      |  5 +++++
 2 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index fab43527e597..a657130ba03a 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -300,18 +300,19 @@ struct ext2_inode {
 /*
  * Mount flags
  */
-#define EXT2_MOUNT_CHECK		0x0001	/* Do mount-time checks */
-#define EXT2_MOUNT_OLDALLOC		0x0002  /* Don't use the new Orlov allocator */
-#define EXT2_MOUNT_GRPID		0x0004	/* Create files with directory's group */
-#define EXT2_MOUNT_DEBUG		0x0008	/* Some debugging messages */
-#define EXT2_MOUNT_ERRORS_CONT		0x0010	/* Continue on errors */
-#define EXT2_MOUNT_ERRORS_RO		0x0020	/* Remount fs ro on errors */
-#define EXT2_MOUNT_ERRORS_PANIC		0x0040	/* Panic on errors */
-#define EXT2_MOUNT_MINIX_DF		0x0080	/* Mimics the Minix statfs */
-#define EXT2_MOUNT_NOBH			0x0100	/* No buffer_heads */
-#define EXT2_MOUNT_NO_UID32		0x0200  /* Disable 32-bit UIDs */
-#define EXT2_MOUNT_XATTR_USER		0x4000	/* Extended user attributes */
-#define EXT2_MOUNT_POSIX_ACL		0x8000	/* POSIX Access Control Lists */
+#define EXT2_MOUNT_CHECK		0x000001  /* Do mount-time checks */
+#define EXT2_MOUNT_OLDALLOC		0x000002  /* Don't use the new Orlov allocator */
+#define EXT2_MOUNT_GRPID		0x000004  /* Create files with directory's group */
+#define EXT2_MOUNT_DEBUG		0x000008  /* Some debugging messages */
+#define EXT2_MOUNT_ERRORS_CONT		0x000010  /* Continue on errors */
+#define EXT2_MOUNT_ERRORS_RO		0x000020  /* Remount fs ro on errors */
+#define EXT2_MOUNT_ERRORS_PANIC		0x000040  /* Panic on errors */
+#define EXT2_MOUNT_MINIX_DF		0x000080  /* Mimics the Minix statfs */
+#define EXT2_MOUNT_NOBH			0x000100  /* No buffer_heads */
+#define EXT2_MOUNT_NO_UID32		0x000200  /* Disable 32-bit UIDs */
+#define EXT2_MOUNT_XATTR_USER		0x004000  /* Extended user attributes */
+#define EXT2_MOUNT_POSIX_ACL		0x008000  /* POSIX Access Control Lists */
+#define EXT2_MOUNT_XIP			0x010000  /* Execute in place */
 
 #define clear_opt(o, opt)		o &= ~EXT2_MOUNT_##opt
 #define set_opt(o, opt)			o |= EXT2_MOUNT_##opt
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 79c0fafc0211..7e0501895f35 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1513,6 +1513,11 @@ extern ssize_t xip_file_aio_write(struct kiocb *iocb, const char __user *buf,
 extern ssize_t xip_file_writev(struct file *file, const struct iovec *iov,
 			       unsigned long nr_segs, loff_t *ppos);
 extern int xip_truncate_page(struct address_space *mapping, loff_t from);
+#else
+static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
+{
+	return 0;
+}
 #endif
 
 static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
-- 
cgit v1.2.2


From eb6fe0c388e43b02e261f0fdee60e42f6298d7f7 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:28 -0700
Subject: [PATCH] xip: reduce code duplication

This patch reworks filemap_xip.c with the goal to reduce code duplication
from mm/filemap.c.  It applies agains 2.6.12-rc6-mm1.  Instead of
implementing the aio functions, this one implements the synchronous
read/write functions only.  For readv and writev, the generic fallback is
used.  For aio, we rely on the application doing the fallback.  Since our
"synchronous" function does memcpy immediately anyway, there is no
performance difference between using the fallbacks or implementing each
operation.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e0501895f35..3ae8e37bdfc8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1500,18 +1500,14 @@ extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
 #ifdef CONFIG_FS_XIP
-extern ssize_t xip_file_aio_read(struct kiocb *iocb, char __user *buf,
-				 size_t count, loff_t pos);
-extern ssize_t xip_file_readv(struct file *filp, const struct iovec *iov,
-			      unsigned long nr_segs, loff_t *ppos);
+extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
+			     loff_t *ppos);
 extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos,
 				 size_t count, read_actor_t actor,
 				 void *target);
 extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
-extern ssize_t xip_file_aio_write(struct kiocb *iocb, const char __user *buf,
-				  size_t count, loff_t pos);
-extern ssize_t xip_file_writev(struct file *file, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t *ppos);
+extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
+			      size_t len, loff_t *ppos);
 extern int xip_truncate_page(struct address_space *mapping, loff_t from);
 #else
 static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
-- 
cgit v1.2.2


From 52c1da39534fb382c061de58b65f678ad74b59f5 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 23 Jun 2005 22:05:33 -0700
Subject: [PATCH] make various thing static

Another rollup of patches which give various symbols static scope

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h       | 1 -
 include/linux/namespace.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7fc1022be9ee..12277799c007 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -88,7 +88,6 @@ extern fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
 				       struct irqaction *action);
 extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
 extern void note_interrupt(unsigned int irq, irq_desc_t *desc, int action_ret);
-extern void report_bad_irq(unsigned int irq, irq_desc_t *desc, int action_ret);
 extern int can_request_irq(unsigned int irq, unsigned long irqflags);
 
 extern void init_irq_proc(void);
diff --git a/include/linux/namespace.h b/include/linux/namespace.h
index 9eca1558d72f..697991b69f9b 100644
--- a/include/linux/namespace.h
+++ b/include/linux/namespace.h
@@ -12,7 +12,6 @@ struct namespace {
 	struct rw_semaphore	sem;
 };
 
-extern void umount_tree(struct vfsmount *);
 extern int copy_namespace(int, struct task_struct *);
 extern void __put_namespace(struct namespace *namespace);
 
-- 
cgit v1.2.2


From 75043cb5b386e5a01fd03b88f647dd992de02f97 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@mail.ru>
Date: Fri, 24 Jun 2005 20:52:52 +0000
Subject: [PATCH] fs/qnx4/*: fix sparse warnings

This patch fixes sparse warnings in the qnx4fs (and might even make
qnx4fs work on big-endian boxes)

Signed-off-by: Alexey Dobriyan <adobriyan@mail.ru>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Anders Larsen <al@alarsen.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/qnx4_fs.h  | 18 +++++++++---------
 include/linux/qnxtypes.h | 16 ++++++++--------
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h
index 22ba580b0ae8..fc610bb0f733 100644
--- a/include/linux/qnx4_fs.h
+++ b/include/linux/qnx4_fs.h
@@ -46,11 +46,11 @@ struct qnx4_inode_entry {
 	char		di_fname[QNX4_SHORT_NAME_MAX];
 	qnx4_off_t	di_size;
 	qnx4_xtnt_t	di_first_xtnt;
-	__u32		di_xblk;
-	__s32		di_ftime;
-	__s32		di_mtime;
-	__s32		di_atime;
-	__s32		di_ctime;
+	__le32		di_xblk;
+	__le32		di_ftime;
+	__le32		di_mtime;
+	__le32		di_atime;
+	__le32		di_ctime;
 	qnx4_nxtnt_t	di_num_xtnts;
 	qnx4_mode_t	di_mode;
 	qnx4_muid_t	di_uid;
@@ -63,18 +63,18 @@ struct qnx4_inode_entry {
 
 struct qnx4_link_info {
 	char		dl_fname[QNX4_NAME_MAX];
-	__u32		dl_inode_blk;
+	__le32		dl_inode_blk;
 	__u8		dl_inode_ndx;
 	__u8		dl_spare[10];
 	__u8		dl_status;
 };
 
 struct qnx4_xblk {
-	__u32		xblk_next_xblk;
-	__u32		xblk_prev_xblk;
+	__le32		xblk_next_xblk;
+	__le32		xblk_prev_xblk;
 	__u8		xblk_num_xtnts;
 	__u8		xblk_spare[3];
-	__s32		xblk_num_blocks;
+	__le32		xblk_num_blocks;
 	qnx4_xtnt_t	xblk_xtnts[QNX4_MAX_XTNTS_PER_XBLK];
 	char		xblk_signature[8];
 	qnx4_xtnt_t	xblk_first_xtnt;
diff --git a/include/linux/qnxtypes.h b/include/linux/qnxtypes.h
index fb518e318c7c..a3eb1137857b 100644
--- a/include/linux/qnxtypes.h
+++ b/include/linux/qnxtypes.h
@@ -12,18 +12,18 @@
 #ifndef _QNX4TYPES_H
 #define _QNX4TYPES_H
 
-typedef __u16 qnx4_nxtnt_t;
+typedef __le16 qnx4_nxtnt_t;
 typedef __u8  qnx4_ftype_t;
 
 typedef struct {
-	__u32 xtnt_blk;
-	__u32 xtnt_size;
+	__le32 xtnt_blk;
+	__le32 xtnt_size;
 } qnx4_xtnt_t;
 
-typedef __u16 qnx4_mode_t;
-typedef __u16 qnx4_muid_t;
-typedef __u16 qnx4_mgid_t;
-typedef __u32 qnx4_off_t;
-typedef __u16 qnx4_nlink_t;
+typedef __le16 qnx4_mode_t;
+typedef __le16 qnx4_muid_t;
+typedef __le16 qnx4_mgid_t;
+typedef __le32 qnx4_off_t;
+typedef __le16 qnx4_nlink_t;
 
 #endif
-- 
cgit v1.2.2


From e70c9d5e61c6cb2272c866fc1303e62975006752 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Sat, 25 Jun 2005 14:54:25 -0700
Subject: [PATCH] I8K: use standard DMI interface

I8K: Change to use stock dmi infrastructure instead of homegrown
     parsing code. The driver now requires box's DMI data to match
     list of supported models so driver can be safely compiled-in
     by default without fear of it poking into random SMM BIOS
     code. DMI checks can be ignored with i8k.ignore_dmi option.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/dmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index d2bcf556088b..5e93e6dce9a4 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -9,6 +9,7 @@ enum dmi_field {
 	DMI_SYS_VENDOR,
 	DMI_PRODUCT_NAME,
 	DMI_PRODUCT_VERSION,
+	DMI_PRODUCT_SERIAL,
 	DMI_BOARD_VENDOR,
 	DMI_BOARD_NAME,
 	DMI_BOARD_VERSION,
-- 
cgit v1.2.2


From 52a119feaad92d44a0e97d01b22afbcbaf3fc079 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 25 Jun 2005 14:54:57 -0700
Subject: [PATCH] make smp_prepare_cpu to a weak function

I really wish smp_prepare_cpu() would disappear eventually.  In the interim
this is ideally a weak function, so we dont end up changing several places
to define this dummy in headers.

Today since the dummy declaration is done only in drivers/base/cpu.c but
the function is called in kernel/power/smp.c i get undefined reference in
my cpu hotplug code for x86_64 under development.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/cpu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index fe0298e5dae1..e8904c0da686 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,6 +69,7 @@ extern struct semaphore cpucontrol;
 	register_cpu_notifier(&fn##_nb);			\
 }
 int cpu_down(unsigned int cpu);
+extern int __attribute__((weak)) smp_prepare_cpu(int cpu);
 #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
 #else
 #define lock_cpu_hotplug()	do { } while (0)
-- 
cgit v1.2.2


From e6982c671c560da4a0bc5c908cbcbec12bd5991d Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Sat, 25 Jun 2005 14:54:58 -0700
Subject: [PATCH] x86_64: Change init sections for CPU hotplug support

This patch adds __cpuinit and __cpuinitdata sections that need to exist past
boot to support cpu hotplug.

Caveat: This is done *only* for EM64T CPU Hotplug support, on request from
Andi Kleen.  Much of the generic hotplug code in kernel, and none of the other
archs that support CPU hotplug today, i386, ia64, ppc64, s390 and parisc dont
mark sections with __cpuinit, but only mark them as __devinit, and
__devinitdata.

If someone is motivated to change generic code, we need to make sure all
existing hotplug code does not break, on other arch's that dont use __cpuinit,
and __cpudevinit.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/init.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 05c83e0521ca..59008c3826cf 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -229,6 +229,18 @@ void __init parse_early_param(void);
 #define __devexitdata __exitdata
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+#define __cpuinit
+#define __cpuinitdata
+#define __cpuexit
+#define __cpuexitdata
+#else
+#define __cpuinit	__init
+#define __cpuinitdata __initdata
+#define __cpuexit __exit
+#define __cpuexitdata	__exitdata
+#endif
+
 /* Functions marked as __devexit may be discarded at kernel link time, depending
    on config options.  Newer versions of binutils detect references from
    retained sections to discarded sections and flag an error.  Pointers to
-- 
cgit v1.2.2


From 5a72e04df5470df0ec646029d31e5528167ab1a7 Mon Sep 17 00:00:00 2001
From: Li Shaohua <shaohua.li@intel.com>
Date: Sat, 25 Jun 2005 14:55:06 -0700
Subject: [PATCH] suspend/resume SMP support

Using CPU hotplug to support suspend/resume SMP.  Both S3 and S4 use
disable/enable_nonboot_cpus API.  The S4 part is based on Pavel's original S4
SMP patch.

Signed-off-by: Li Shaohua<shaohua.li@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 2bf0d5fabcdb..f2e96fdfaae0 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -58,7 +58,7 @@ static inline int software_suspend(void)
 }
 #endif
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_SUSPEND_SMP
 extern void disable_nonboot_cpus(void);
 extern void enable_nonboot_cpus(void);
 #else
-- 
cgit v1.2.2


From 620b03276488c3cf103caf1e326bd21f00d3df84 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Sat, 25 Jun 2005 14:55:11 -0700
Subject: [PATCH] properly stop devices before poweroff

Without this patch, Linux provokes emergency disk shutdowns and
similar nastiness. It was in SuSE kernels for some time, IIRC.

Signed-off-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pm.h | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index ed2b76e75199..14479325e3f3 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -103,7 +103,8 @@ extern int pm_active;
 /*
  * Register a device with power management
  */
-struct pm_dev __deprecated *pm_register(pm_dev_t type, unsigned long id, pm_callback callback);
+struct pm_dev __deprecated *
+pm_register(pm_dev_t type, unsigned long id, pm_callback callback);
 
 /*
  * Unregister a device with power management
@@ -190,17 +191,18 @@ typedef u32 __bitwise pm_message_t;
 /*
  * There are 4 important states driver can be in:
  * ON     -- driver is working
- * FREEZE -- stop operations and apply whatever policy is applicable to a suspended driver
- *           of that class, freeze queues for block like IDE does, drop packets for
- *           ethernet, etc... stop DMA engine too etc... so a consistent image can be
- *           saved; but do not power any hardware down.
- * SUSPEND - like FREEZE, but hardware is doing as much powersaving as possible. Roughly
- *           pci D3.
+ * FREEZE -- stop operations and apply whatever policy is applicable to a
+ *           suspended driver of that class, freeze queues for block like IDE
+ *           does, drop packets for ethernet, etc... stop DMA engine too etc...
+ *           so a consistent image can be saved; but do not power any hardware
+ *           down.
+ * SUSPEND - like FREEZE, but hardware is doing as much powersaving as
+ *           possible. Roughly pci D3.
  *
- * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3 (SUSPEND).
- * We'll need to fix the drivers. So yes, putting 3 to all diferent defines is intentional,
- * and will go away as soon as drivers are fixed. Also note that typedef is neccessary,
- * we'll probably want to switch to
+ * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3
+ * (SUSPEND).  We'll need to fix the drivers. So yes, putting 3 to all different
+ * defines is intentional, and will go away as soon as drivers are fixed.  Also
+ * note that typedef is neccessary, we'll probably want to switch to
  *   typedef struct pm_message_t { int event; int flags; } pm_message_t
  * or something similar soon.
  */
@@ -222,11 +224,18 @@ struct dev_pm_info {
 
 extern void device_pm_set_parent(struct device * dev, struct device * parent);
 
-extern int device_suspend(pm_message_t state);
 extern int device_power_down(pm_message_t state);
 extern void device_power_up(void);
 extern void device_resume(void);
 
+#ifdef CONFIG_PM
+extern int device_suspend(pm_message_t state);
+#else
+static inline int device_suspend(pm_message_t state)
+{
+	return 0;
+}
+#endif
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.2


From b2b18660066997420b716c1881a6be8b82700d97 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@us.ibm.com>
Date: Sat, 25 Jun 2005 14:55:38 -0700
Subject: [PATCH] RCU: clean up a few remaining synchronize_kernel() calls

2.6.12-rc6-mm1 has a few remaining synchronize_kernel()s, some (but not
all) in comments.  This patch changes these synchronize_kernel() calls (and
comments) to synchronize_rcu() or synchronize_sched() as follows:

- arch/x86_64/kernel/mce.c mce_read(): change to synchronize_sched() to
  handle races with machine-check exceptions (synchronize_rcu() would not cut
  it given RCU implementations intended for hardcore realtime use.

- drivers/input/serio/i8042.c i8042_stop(): change to synchronize_sched() to
  handle races with i8042_interrupt() interrupt handler.  Again,
  synchronize_rcu() would not cut it given RCU implementations intended for
  hardcore realtime use.

- include/*/kdebug.h comments: change to synchronize_sched() to handle races
  with NMIs.  As before, synchronize_rcu() would not cut it...

- include/linux/list.h comment: change to synchronize_rcu(), since this
  comment is for list_del_rcu().

- security/keys/key.c unregister_key_type(): change to synchronize_rcu(),
  since this is interacting with RCU read side.

- security/keys/process_keys.c install_session_keyring(): change to
  synchronize_rcu(), since this is interacting with RCU read side.

Signed-off-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/list.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 399b51d17218..aab2db21b013 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -185,7 +185,7 @@ static inline void list_del(struct list_head *entry)
  * list_for_each_entry_rcu().
  *
  * Note that the caller is not permitted to immediately free
- * the newly deleted entry.  Instead, either synchronize_kernel()
+ * the newly deleted entry.  Instead, either synchronize_rcu()
  * or call_rcu() must be used to defer freeing until an RCU
  * grace period has elapsed.
  */
-- 
cgit v1.2.2


From 7897986bad8f6cd50d6149345aca7f6480f49464 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:13 -0700
Subject: [PATCH] sched: balance timers

Do CPU load averaging over a number of different intervals.  Allow each
interval to be chosen by sending a parameter to source_load and target_load.
0 is instantaneous, idx > 0 returns a decaying average with the most recent
sample weighted at 2^(idx-1).  To a maximum of 3 (could be easily increased).

So generally a higher number will result in more conservative balancing.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h    | 4 ++++
 include/linux/topology.h | 8 ++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c69682b0444..664981ac1fb6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -488,6 +488,10 @@ struct sched_domain {
 	unsigned long long cache_hot_time; /* Task considered cache hot (ns) */
 	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
 	unsigned int per_cpu_gain;	/* CPU % gained by adding domain cpus */
+	unsigned int busy_idx;
+	unsigned int idle_idx;
+	unsigned int newidle_idx;
+	unsigned int wake_idx;
 	int flags;			/* See SD_* */
 
 	/* Runtime fields. */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index d70e8972c67f..ae9c2216dfa6 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -89,6 +89,10 @@
 	.cache_hot_time		= 0,			\
 	.cache_nice_tries	= 0,			\
 	.per_cpu_gain		= 25,			\
+	.busy_idx		= 0,			\
+	.idle_idx		= 0,			\
+	.newidle_idx		= 0,			\
+	.wake_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
@@ -115,6 +119,10 @@
 	.cache_hot_time		= (5*1000000/2),	\
 	.cache_nice_tries	= 1,			\
 	.per_cpu_gain		= 100,			\
+	.busy_idx		= 2,			\
+	.idle_idx		= 0,			\
+	.newidle_idx		= 1,			\
+	.wake_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
-- 
cgit v1.2.2


From cafb20c1f9976a70d633bb1e1c8c24eab00e4e80 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:17 -0700
Subject: [PATCH] sched: no aggressive idle balancing

Remove the very aggressive idle stuff that has recently gone into 2.6 - it is
going against the direction we are trying to go.  Hopefully we can regain
performance through other methods.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/topology.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index ae9c2216dfa6..b23ec64df7f1 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -127,7 +127,6 @@
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
 				| SD_WAKE_AFFINE	\
-				| SD_WAKE_IDLE		\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
-- 
cgit v1.2.2


From 147cbb4bbe991452698f0772d8292f22825710ba Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:19 -0700
Subject: [PATCH] sched: balance on fork

Reimplement the balance on exec balancing to be sched-domains aware.  Use this
to also do balance on fork balancing.  Make x86_64 do balance on fork over the
NUMA domain.

The problem that the non sched domains aware blancing became apparent on dual
core, multi socket opterons.  What we want is for the new tasks to be sent to
a different socket, but more often than not, we would first load up our
sibling core, or fill two cores of a single remote socket before selecting a
new one.

This gives large improvements to STREAM on such systems.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h    | 10 ++++++----
 include/linux/topology.h |  2 ++
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 664981ac1fb6..613491d3a875 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -460,10 +460,11 @@ enum idle_type
 #define SD_LOAD_BALANCE		1	/* Do load balancing on this domain. */
 #define SD_BALANCE_NEWIDLE	2	/* Balance when about to become idle */
 #define SD_BALANCE_EXEC		4	/* Balance on exec */
-#define SD_WAKE_IDLE		8	/* Wake to idle CPU on task wakeup */
-#define SD_WAKE_AFFINE		16	/* Wake task to waking CPU */
-#define SD_WAKE_BALANCE		32	/* Perform balancing at task wakeup */
-#define SD_SHARE_CPUPOWER	64	/* Domain members share cpu power */
+#define SD_BALANCE_FORK		8	/* Balance on fork, clone */
+#define SD_WAKE_IDLE		16	/* Wake to idle CPU on task wakeup */
+#define SD_WAKE_AFFINE		32	/* Wake task to waking CPU */
+#define SD_WAKE_BALANCE		64	/* Perform balancing at task wakeup */
+#define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
 
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
@@ -492,6 +493,7 @@ struct sched_domain {
 	unsigned int idle_idx;
 	unsigned int newidle_idx;
 	unsigned int wake_idx;
+	unsigned int forkexec_idx;
 	int flags;			/* See SD_* */
 
 	/* Runtime fields. */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index b23ec64df7f1..665597207def 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -93,6 +93,7 @@
 	.idle_idx		= 0,			\
 	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
@@ -123,6 +124,7 @@
 	.idle_idx		= 0,			\
 	.newidle_idx		= 1,			\
 	.wake_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
-- 
cgit v1.2.2


From 68767a0ae428801649d510d9a65bb71feed44dd1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:20 -0700
Subject: [PATCH] sched: schedstats update for balance on fork

Add SCHEDSTAT statistics for sched-balance-fork.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 613491d3a875..36a10781c3f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -517,10 +517,16 @@ struct sched_domain {
 	unsigned long alb_failed;
 	unsigned long alb_pushed;
 
-	/* sched_balance_exec() stats */
-	unsigned long sbe_attempts;
+	/* SD_BALANCE_EXEC stats */
+	unsigned long sbe_cnt;
+	unsigned long sbe_balanced;
 	unsigned long sbe_pushed;
 
+	/* SD_BALANCE_FORK stats */
+	unsigned long sbf_cnt;
+	unsigned long sbf_balanced;
+	unsigned long sbf_pushed;
+
 	/* try_to_wake_up() stats */
 	unsigned long ttwu_wake_remote;
 	unsigned long ttwu_move_affine;
-- 
cgit v1.2.2


From 687f1661d302bc70ce906594a6d3f615ef075a50 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:21 -0700
Subject: [PATCH] sched: sched tuning

Do some basic initial tuning.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/topology.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 665597207def..0320225e96da 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -91,7 +91,7 @@
 	.per_cpu_gain		= 25,			\
 	.busy_idx		= 0,			\
 	.idle_idx		= 0,			\
-	.newidle_idx		= 0,			\
+	.newidle_idx		= 1,			\
 	.wake_idx		= 0,			\
 	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
@@ -121,15 +121,14 @@
 	.cache_nice_tries	= 1,			\
 	.per_cpu_gain		= 100,			\
 	.busy_idx		= 2,			\
-	.idle_idx		= 0,			\
-	.newidle_idx		= 1,			\
+	.idle_idx		= 1,			\
+	.newidle_idx		= 2,			\
 	.wake_idx		= 1,			\
-	.forkexec_idx		= 0,			\
+	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_BALANCE,	\
+				| SD_WAKE_AFFINE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
-- 
cgit v1.2.2


From 4866cde064afbb6c2a488c265e696879de616daa Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:23 -0700
Subject: [PATCH] sched: cleanup context switch locking

Instead of requiring architecture code to interact with the scheduler's
locking implementation, provide a couple of defines that can be used by the
architecture to request runqueue unlocked context switches, and ask for
interrupts to be enabled over the context switch.

Also replaces the "switch_lock" used by these architectures with an oncpu
flag (note, not a potentially slow bitflag).  This eliminates one bus
locked memory operation when context switching, and simplifies the
task_running function.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/init_task.h |  1 -
 include/linux/sched.h     | 10 ++++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a6a8c1a38d5e..03206a425d7a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -108,7 +108,6 @@ extern struct group_info init_groups;
 	.blocked	= {{0}},					\
 	.alloc_lock	= SPIN_LOCK_UNLOCKED,				\
 	.proc_lock	= SPIN_LOCK_UNLOCKED,				\
-	.switch_lock	= SPIN_LOCK_UNLOCKED,				\
 	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 36a10781c3f3..d27be9337425 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -368,6 +368,11 @@ struct signal_struct {
 #endif
 };
 
+/* Context switch must be unlocked if interrupts are to be enabled */
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+# define __ARCH_WANT_UNLOCKED_CTXSW
+#endif
+
 /*
  * Bits in flags field of signal_struct.
  */
@@ -594,6 +599,9 @@ struct task_struct {
 
 	int lock_depth;		/* BKL lock depth */
 
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
+	int oncpu;
+#endif
 	int prio, static_prio;
 	struct list_head run_list;
 	prio_array_t *array;
@@ -716,8 +724,6 @@ struct task_struct {
 	spinlock_t alloc_lock;
 /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
 	spinlock_t proc_lock;
-/* context-switch lock */
-	spinlock_t switch_lock;
 
 /* journalling filesystem info */
 	void *journal_info;
-- 
cgit v1.2.2


From 476d139c218e44e045e4bc6d4cc02b010b343939 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sat, 25 Jun 2005 14:57:29 -0700
Subject: [PATCH] sched: consolidate sbe sbf

Consolidate balance-on-exec with balance-on-fork.  This is made easy by the
sched-domains RCU patches.

As well as the general goodness of code reduction, this allows the runqueues
to be unlocked during balance-on-fork.

schedstats is a problem.  Maybe just have balance-on-event instead of
distinguishing fork and exec?

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d27be9337425..edb2c69a8873 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -930,7 +930,7 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void FASTCALL(sched_fork(task_t * p));
+extern void FASTCALL(sched_fork(task_t * p, int clone_flags));
 extern void FASTCALL(sched_exit(task_t * p));
 
 extern int in_group_p(gid_t);
-- 
cgit v1.2.2


From 1a20ff27ef75d866730ee796acd811a925af762f Mon Sep 17 00:00:00 2001
From: Dinakar Guniguntala <dino@in.ibm.com>
Date: Sat, 25 Jun 2005 14:57:33 -0700
Subject: [PATCH] Dynamic sched domains: sched changes

The following patches add dynamic sched domains functionality that was
extensively discussed on lkml and lse-tech.  I would like to see this added to
-mm

o The main advantage with this feature is that it ensures that the scheduler
  load balacing code only balances against the cpus that are in the sched
  domain as defined by an exclusive cpuset and not all of the cpus in the
  system. This removes any overhead due to load balancing code trying to
  pull tasks outside of the cpu exclusive cpuset only to be prevented by
  the tasks' cpus_allowed mask.
o cpu exclusive cpusets are useful for servers running orthogonal
  workloads such as RT applications requiring low latency and HPC
  applications that are throughput sensitive

o It provides a new API partition_sched_domains in sched.c
  that makes dynamic sched domains possible.
o cpu_exclusive cpusets sets are now associated with a sched domain.
  Which means that the users can dynamically modify the sched domains
  through the cpuset file system interface
o ia64 sched domain code has been updated to support this feature as well
o Currently, this does not support hotplug. (However some of my tests
  indicate hotplug+preempt is currently broken)
o I have tested it extensively on x86.
o This should have very minimal impact on performance as none of
  the fast paths are affected

Signed-off-by: Dinakar Guniguntala <dino@in.ibm.com>
Acked-by: Paul Jackson <pj@sgi.com>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Acked-by: Matthew Dobson <colpatch@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index edb2c69a8873..98c109e4f43d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -539,6 +539,8 @@ struct sched_domain {
 #endif
 };
 
+extern void partition_sched_domains(cpumask_t *partition1,
+				    cpumask_t *partition2);
 #ifdef ARCH_HAS_SCHED_DOMAIN
 /* Useful helpers that arch setup code may use. Defined in kernel/sched.c */
 extern cpumask_t cpu_isolated_map;
-- 
cgit v1.2.2


From f8cbd99bd3a023db8d6356d19a5f6f539d367327 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 25 Jun 2005 14:57:39 -0700
Subject: [PATCH] sched: voluntary kernel preemption

This patch adds a new preemption model: 'Voluntary Kernel Preemption'.  The
3 models can be selected from a new menu:

            (X) No Forced Preemption (Server)
            ( ) Voluntary Kernel Preemption (Desktop)
            ( ) Preemptible Kernel (Low-Latency Desktop)

we still default to the stock (Server) preemption model.

Voluntary preemption works by adding a cond_resched()
(reschedule-if-needed) call to every might_sleep() check.  It is lighter
than CONFIG_PREEMPT - at the cost of not having as tight latencies.  It
represents a different latency/complexity/overhead tradeoff.

It has no runtime impact at all if disabled.  Here are size stats that show
how the various preemption models impact the kernel's size:

    text    data     bss     dec     hex filename
 3618774  547184  179896 4345854  424ffe vmlinux.stock
 3626406  547184  179896 4353486  426dce vmlinux.voluntary   +0.2%
 3748414  548640  179896 4476950  445016 vmlinux.preempt     +3.5%

voluntary-preempt is +0.2% of .text, preempt is +3.5%.

This feature has been tested for many months by lots of people (and it's
also included in the RHEL4 distribution and earlier variants were in Fedora
as well), and it's intended for users and distributions who dont want to
use full-blown CONFIG_PREEMPT for one reason or another.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e25b97062ce1..687ba8c9973d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -58,15 +58,23 @@ struct completion;
  * be biten later when the calling function happens to sleep when it is not
  * supposed to.
  */
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+extern int cond_resched(void);
+# define might_resched() cond_resched()
+#else
+# define might_resched() do { } while (0)
+#endif
+
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-#define might_sleep() __might_sleep(__FILE__, __LINE__)
-#define might_sleep_if(cond) do { if (unlikely(cond)) might_sleep(); } while (0)
-void __might_sleep(char *file, int line);
+  void __might_sleep(char *file, int line);
+# define might_sleep() \
+	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
 #else
-#define might_sleep() do {} while(0)
-#define might_sleep_if(cond) do {} while (0)
+# define might_sleep() do { might_resched(); } while (0)
 #endif
 
+#define might_sleep_if(cond) do { if (unlikely(cond)) might_sleep(); } while (0)
+
 #define abs(x) ({				\
 		int __x = (x);			\
 		(__x < 0) ? -__x : __x;		\
-- 
cgit v1.2.2


From dc009d92435f99498cbc579ce76bf28e837e2c14 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 25 Jun 2005 14:57:52 -0700
Subject: [PATCH] kexec: add kexec syscalls

This patch introduces the architecture independent implementation the
sys_kexec_load, the compat_sys_kexec_load system calls.

Kexec on panic support has been integrated into the core patch and is
relatively clean.

In addition the hopefully architecture independent option
crashkernel=size@location has been docuemented.  It's purpose is to reserve
space for the panic kernel to live, and where no DMA transfer will ever be
setup to access.

Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Alexander Nyberg <alexn@telia.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kexec.h    | 127 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/reboot.h   |   3 ++
 include/linux/syscalls.h |   5 +-
 3 files changed, 133 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/kexec.h

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
new file mode 100644
index 000000000000..e3fc35f4e35f
--- /dev/null
+++ b/include/linux/kexec.h
@@ -0,0 +1,127 @@
+#ifndef LINUX_KEXEC_H
+#define LINUX_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/linkage.h>
+#include <linux/compat.h>
+#include <asm/kexec.h>
+
+/* Verify architecture specific macros are defined */
+
+#ifndef KEXEC_SOURCE_MEMORY_LIMIT
+#error KEXEC_SOURCE_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_DESTINATION_MEMORY_LIMIT
+#error KEXEC_DESTINATION_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_CONTROL_MEMORY_LIMIT
+#error KEXEC_CONTROL_MEMORY_LIMIT not defined
+#endif
+
+#ifndef KEXEC_CONTROL_CODE_SIZE
+#error KEXEC_CONTROL_CODE_SIZE not defined
+#endif
+
+#ifndef KEXEC_ARCH
+#error KEXEC_ARCH not defined
+#endif
+
+/*
+ * This structure is used to hold the arguments that are used when loading
+ * kernel binaries.
+ */
+
+typedef unsigned long kimage_entry_t;
+#define IND_DESTINATION  0x1
+#define IND_INDIRECTION  0x2
+#define IND_DONE         0x4
+#define IND_SOURCE       0x8
+
+#define KEXEC_SEGMENT_MAX 8
+struct kexec_segment {
+	void __user *buf;
+	size_t bufsz;
+	unsigned long mem;	/* User space sees this as a (void *) ... */
+	size_t memsz;
+};
+
+#ifdef CONFIG_COMPAT
+struct compat_kexec_segment {
+	compat_uptr_t buf;
+	compat_size_t bufsz;
+	compat_ulong_t mem;	/* User space sees this as a (void *) ... */
+	compat_size_t memsz;
+};
+#endif
+
+struct kimage {
+	kimage_entry_t head;
+	kimage_entry_t *entry;
+	kimage_entry_t *last_entry;
+
+	unsigned long destination;
+
+	unsigned long start;
+	struct page *control_code_page;
+
+	unsigned long nr_segments;
+	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
+
+	struct list_head control_pages;
+	struct list_head dest_pages;
+	struct list_head unuseable_pages;
+
+	/* Address of next control page to allocate for crash kernels. */
+	unsigned long control_page;
+
+	/* Flags to indicate special processing */
+	unsigned int type : 1;
+#define KEXEC_TYPE_DEFAULT 0
+#define KEXEC_TYPE_CRASH   1
+};
+
+
+
+/* kexec interface functions */
+extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
+extern int machine_kexec_prepare(struct kimage *image);
+extern void machine_kexec_cleanup(struct kimage *image);
+extern asmlinkage long sys_kexec_load(unsigned long entry,
+	unsigned long nr_segments, struct kexec_segment __user *segments,
+	unsigned long flags);
+#ifdef CONFIG_COMPAT
+extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
+	unsigned long nr_segments, struct compat_kexec_segment __user *segments,
+	unsigned long flags);
+#endif
+extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
+extern void crash_kexec(void);
+extern struct kimage *kexec_image;
+
+#define KEXEC_ON_CRASH  0x00000001
+#define KEXEC_ARCH_MASK 0xffff0000
+
+/* These values match the ELF architecture values.
+ * Unless there is a good reason that should continue to be the case.
+ */
+#define KEXEC_ARCH_DEFAULT ( 0 << 16)
+#define KEXEC_ARCH_386     ( 3 << 16)
+#define KEXEC_ARCH_X86_64  (62 << 16)
+#define KEXEC_ARCH_PPC     (20 << 16)
+#define KEXEC_ARCH_PPC64   (21 << 16)
+#define KEXEC_ARCH_IA_64   (50 << 16)
+
+#define KEXEC_FLAGS    (KEXEC_ON_CRASH)  /* List of defined/legal kexec flags */
+
+/* Location of a reserved region to hold the crash kernel.
+ */
+extern struct resource crashk_res;
+
+#else /* !CONFIG_KEXEC */
+static inline void crash_kexec(void) { }
+#endif /* CONFIG_KEXEC */
+#endif /* LINUX_KEXEC_H */
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index d60fafc8bdc5..c5a05e16edb2 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -51,6 +51,9 @@ extern void machine_restart(char *cmd);
 extern void machine_halt(void);
 extern void machine_power_off(void);
 
+extern void machine_shutdown(void);
+extern void machine_crash_shutdown(void);
+
 #endif
 
 #endif /* _LINUX_REBOOT_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c39f6f72cbbc..7ba8f8f747aa 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -159,8 +159,9 @@ asmlinkage long sys_shutdown(int, int);
 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd,
 				void __user *arg);
 asmlinkage long sys_restart_syscall(void);
-asmlinkage long sys_kexec_load(void *entry, unsigned long nr_segments,
-			struct kexec_segment *segments, unsigned long flags);
+asmlinkage long sys_kexec_load(unsigned long entry,
+	unsigned long nr_segments, struct kexec_segment __user *segments,
+	unsigned long flags);
 
 asmlinkage long sys_exit(int error_code);
 asmlinkage void sys_exit_group(int error_code);
-- 
cgit v1.2.2


From cf13f0eaffa31bf6a145c53c589654b11c72ddc7 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sat, 25 Jun 2005 14:58:11 -0700
Subject: [PATCH] kexec: s390 support

Add kexec support for s390 architecture.

From: Milton Miller <miltonm@bga.com>

- Fix passing of first argument to relocate_kernel assembly.
- Fix Kconfig description.
- Remove wrong comment and comments that describe obvious things.
- Allow only KEXEC_TYPE_DEFAULT as image type -> dump not supported.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kexec.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index e3fc35f4e35f..0653a27c3d72 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -114,6 +114,7 @@ extern struct kimage *kexec_image;
 #define KEXEC_ARCH_PPC     (20 << 16)
 #define KEXEC_ARCH_PPC64   (21 << 16)
 #define KEXEC_ARCH_IA_64   (50 << 16)
+#define KEXEC_ARCH_S390    (22 << 16)
 
 #define KEXEC_FLAGS    (KEXEC_ON_CRASH)  /* List of defined/legal kexec flags */
 
-- 
cgit v1.2.2


From 92aa63a5a1bf2e7b0c79e6716d24b76dbbdcf951 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:18 -0700
Subject: [PATCH] kdump: Retrieve saved max pfn

This patch retrieves the max_pfn being used by previous kernel and stores it
in a safe location (saved_max_pfn) before it is overwritten due to user
defined memory map.  This pfn is used to make sure that user does not try to
read the physical memory beyond saved_max_pfn.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 500f451ce0c0..82bd8842d11c 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -22,6 +22,10 @@ extern unsigned long min_low_pfn;
  */
 extern unsigned long max_pfn;
 
+#ifdef CONFIG_CRASH_DUMP
+extern unsigned long saved_max_pfn;
+#endif
+
 /*
  * node_bootmem_map is a map pointer - the bits represent all physical 
  * memory pages (including holes) on the node.
-- 
cgit v1.2.2


From 60e64d46a58236e3c718074372cab6a5b56a3b15 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:19 -0700
Subject: [PATCH] kdump: Routines for copying dump pages

This patch provides the interfaces necessary to read the dump contents,
treating it as a high memory device.

Signed off by Hariprasad Nellitheertha <hari@in.ibm.com>
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/crash_dump.h | 13 +++++++++++++
 include/linux/highmem.h    |  1 +
 2 files changed, 14 insertions(+)
 create mode 100644 include/linux/crash_dump.h

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
new file mode 100644
index 000000000000..7d983b817429
--- /dev/null
+++ b/include/linux/crash_dump.h
@@ -0,0 +1,13 @@
+#ifndef LINUX_CRASH_DUMP_H
+#define LINUX_CRASH_DUMP_H
+
+#ifdef CONFIG_CRASH_DUMP
+#include <linux/kexec.h>
+#include <linux/smp_lock.h>
+#include <linux/device.h>
+#include <linux/proc_fs.h>
+
+extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
+						unsigned long, int);
+#endif /* CONFIG_CRASH_DUMP */
+#endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 2a7e6c65c882..6bece9280eb7 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -28,6 +28,7 @@ static inline void *kmap(struct page *page)
 
 #define kmap_atomic(page, idx)		page_address(page)
 #define kunmap_atomic(addr, idx)	do { } while (0)
+#define kmap_atomic_pfn(pfn, idx)	page_address(pfn_to_page(pfn))
 #define kmap_atomic_to_page(ptr)	virt_to_page(ptr)
 
 #endif /* CONFIG_HIGHMEM */
-- 
cgit v1.2.2


From 2030eae52b416a9a9f0ffda74c982b7f1e19496d Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:20 -0700
Subject: [PATCH] Retrieve elfcorehdr address from command line

This patch adds support for retrieving the address of elf core header if one
is passed in command line.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/crash_dump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 7d983b817429..3f25fd1eaa4b 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -7,6 +7,7 @@
 #include <linux/device.h>
 #include <linux/proc_fs.h>
 
+extern unsigned long long elfcorehdr_addr;
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
 #endif /* CONFIG_CRASH_DUMP */
-- 
cgit v1.2.2


From 666bfddbe8b8fd4fd44617d6c55193d5ac7edb29 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:21 -0700
Subject: [PATCH] kdump: Access dump file in elf format (/proc/vmcore)

From: "Vivek Goyal" <vgoyal@in.ibm.com>

o Support for /proc/vmcore interface. This interface exports elf core image
  either in ELF32 or ELF64 format, depending on the format in which elf headers
  have been stored by crashed kernel.
o Added support for CONFIG_VMCORE config option.
o Removed the dependency on /proc/kcore.

From: "Eric W. Biederman" <ebiederm@xmission.com>

This patch has been refactored to more closely match the prevailing style in
the affected files.  And to clearly indicate the dependency between
/proc/kcore and proc/vmcore.c

From: Hariprasad Nellitheertha <hari@in.ibm.com>

This patch contains the code that provides an ELF format interface to the
previous kernel's memory post kexec reboot.

Signed off by Hariprasad Nellitheertha <hari@in.ibm.com>
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/crash_dump.h | 4 ++++
 include/linux/proc_fs.h    | 7 +++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 3f25fd1eaa4b..534d750d922d 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -7,8 +7,12 @@
 #include <linux/device.h>
 #include <linux/proc_fs.h>
 
+#define ELFCORE_ADDR_MAX	(-1ULL)
 extern unsigned long long elfcorehdr_addr;
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
+extern struct file_operations proc_vmcore_operations;
+extern struct proc_dir_entry *proc_vmcore;
+
 #endif /* CONFIG_CRASH_DUMP */
 #endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 59e505261fd6..0563581e3a02 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -74,6 +74,13 @@ struct kcore_list {
 	size_t size;
 };
 
+struct vmcore {
+	struct list_head list;
+	unsigned long long paddr;
+	unsigned long size;
+	loff_t offset;
+};
+
 #ifdef CONFIG_PROC_FS
 
 extern struct proc_dir_entry proc_root;
-- 
cgit v1.2.2


From 6e274d144302068a00794ec22e73520c0615cb6f Mon Sep 17 00:00:00 2001
From: Alexander Nyberg <alexn@telia.com>
Date: Sat, 25 Jun 2005 14:58:26 -0700
Subject: [PATCH] kdump: Use real pt_regs from exception

Makes kexec_crashdump() take a pt_regs * as an argument.  This allows to
get exact register state at the point of the crash.  If we come from direct
panic assertion NULL will be passed and the current registers saved before
crashdump.

This hooks into two places:
die(): check the conditions under which we will panic when calling
do_exit and go there directly with the pt_regs that caused the fatal
fault.

die_nmi(): If we receive an NMI lockup while in the kernel use the
pt_regs and go directly to crash_kexec(). We're probably nested up badly
at this point so this might be the only chance to escape with proper
information.

Signed-off-by: Alexander Nyberg <alexn@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kexec.h  | 8 ++++++--
 include/linux/reboot.h | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 0653a27c3d72..7383173a3a9c 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -99,7 +99,8 @@ extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
 	unsigned long flags);
 #endif
 extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
-extern void crash_kexec(void);
+extern void crash_kexec(struct pt_regs *);
+int kexec_should_crash(struct task_struct *);
 extern struct kimage *kexec_image;
 
 #define KEXEC_ON_CRASH  0x00000001
@@ -123,6 +124,9 @@ extern struct kimage *kexec_image;
 extern struct resource crashk_res;
 
 #else /* !CONFIG_KEXEC */
-static inline void crash_kexec(void) { }
+struct pt_regs;
+struct task_struct;
+static inline void crash_kexec(struct pt_regs *regs) { }
+static inline int kexec_should_crash(struct task_struct *p) { return 0; }
 #endif /* CONFIG_KEXEC */
 #endif /* LINUX_KEXEC_H */
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index c5a05e16edb2..2d4dd23168dd 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -52,7 +52,8 @@ extern void machine_halt(void);
 extern void machine_power_off(void);
 
 extern void machine_shutdown(void);
-extern void machine_crash_shutdown(void);
+struct pt_regs;
+extern void machine_crash_shutdown(struct pt_regs *);
 
 #endif
 
-- 
cgit v1.2.2


From 72414d3f1d22fc3e311b162fca95c430048d38ce Mon Sep 17 00:00:00 2001
From: Maneesh Soni <maneesh@in.ibm.com>
Date: Sat, 25 Jun 2005 14:58:28 -0700
Subject: [PATCH] kexec code cleanup

o Following patch provides purely cosmetic changes and corrects CodingStyle
  guide lines related certain issues like below in kexec related files

  o braces for one line "if" statements, "for" loops,
  o more than 80 column wide lines,
  o No space after "while", "for" and "switch" key words

o Changes:
  o take-2: Removed the extra tab before "case" key words.
  o take-3: Put operator at the end of line and space before "*/"

Signed-off-by: Maneesh Soni <maneesh@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kexec.h    | 13 ++++++++-----
 include/linux/syscalls.h |  6 +++---
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 7383173a3a9c..c8468472aec0 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -91,14 +91,17 @@ extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
 extern int machine_kexec_prepare(struct kimage *image);
 extern void machine_kexec_cleanup(struct kimage *image);
 extern asmlinkage long sys_kexec_load(unsigned long entry,
-	unsigned long nr_segments, struct kexec_segment __user *segments,
-	unsigned long flags);
+					unsigned long nr_segments,
+					struct kexec_segment __user *segments,
+					unsigned long flags);
 #ifdef CONFIG_COMPAT
 extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
-	unsigned long nr_segments, struct compat_kexec_segment __user *segments,
-	unsigned long flags);
+				unsigned long nr_segments,
+				struct compat_kexec_segment __user *segments,
+				unsigned long flags);
 #endif
-extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
+extern struct page *kimage_alloc_control_pages(struct kimage *image,
+						unsigned int order);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
 extern struct kimage *kexec_image;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7ba8f8f747aa..52830b6d94e5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -159,9 +159,9 @@ asmlinkage long sys_shutdown(int, int);
 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd,
 				void __user *arg);
 asmlinkage long sys_restart_syscall(void);
-asmlinkage long sys_kexec_load(unsigned long entry,
-	unsigned long nr_segments, struct kexec_segment __user *segments,
-	unsigned long flags);
+asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
+				struct kexec_segment __user *segments,
+				unsigned long flags);
 
 asmlinkage long sys_exit(int error_code);
 asmlinkage void sys_exit_group(int error_code);
-- 
cgit v1.2.2


From 8c0e33c133021ee241e9d51255b9fb18eb34ef0e Mon Sep 17 00:00:00 2001
From: Nick Wilson <njw@osdl.org>
Date: Sat, 25 Jun 2005 14:59:00 -0700
Subject: [PATCH] Use ALIGN to remove duplicate code

This patch makes use of ALIGN() to remove duplicate round-up code.

Signed-off-by: Nick Wilson <njw@osdl.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/a.out.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/a.out.h b/include/linux/a.out.h
index af8a1dfa5c32..f913cc3e1b0d 100644
--- a/include/linux/a.out.h
+++ b/include/linux/a.out.h
@@ -138,7 +138,7 @@ enum machine_type {
 #endif
 #endif
 
-#define _N_SEGMENT_ROUND(x) (((x) + SEGMENT_SIZE - 1) & ~(SEGMENT_SIZE - 1))
+#define _N_SEGMENT_ROUND(x) ALIGN(x, SEGMENT_SIZE)
 
 #define _N_TXTENDADDR(x) (N_TXTADDR(x)+(x).a_text)
 
-- 
cgit v1.2.2


From 681ea4b930768444e9d88651c1362b0bf6d2a42b Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Jun 2005 14:59:04 -0700
Subject: [PATCH] drivers/char/nvram.c: possible cleanups

This patch contains the following possible cleanups:
- make the needlessly global function __nvram_set_checksum static
- #if 0 the unused global function nvram_set_checksum
- remove the EXPORT_SYMBOL's for both functions

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nvram.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index b031e41b5e0d..9189829c131c 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -20,8 +20,6 @@ extern void __nvram_write_byte(unsigned char c, int i);
 extern void nvram_write_byte(unsigned char c, int i);
 extern int __nvram_check_checksum(void);
 extern int nvram_check_checksum(void);
-extern void __nvram_set_checksum(void);
-extern void nvram_set_checksum(void);
 #endif
 
 #endif  /* _LINUX_NVRAM_H */
-- 
cgit v1.2.2


From 93d17d3d84b7147e8f07aeeb15ec01aa92c6b564 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sat, 25 Jun 2005 14:59:10 -0700
Subject: [PATCH] drivers/block/ll_rw_blk.c: cleanups

This patch contains the following cleanups:
- make needlessly global code static
- remove the following unused global functions:
  - blkdev_scsi_issue_flush_fn
  - __blk_attempt_remerge
- remove the following unused EXPORT_SYMBOL's:
  - blk_phys_contig_segment
  - blk_hw_contig_segment
  - blkdev_scsi_issue_flush_fn
  - __blk_attempt_remerge

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/blkdev.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 60272141ff19..b54a0348a890 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -539,15 +539,12 @@ extern void generic_make_request(struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void blk_end_sync_rq(struct request *rq);
 extern void blk_attempt_remerge(request_queue_t *, struct request *);
-extern void __blk_attempt_remerge(request_queue_t *, struct request *);
 extern struct request *blk_get_request(request_queue_t *, int, int);
 extern void blk_insert_request(request_queue_t *, struct request *, int, void *);
 extern void blk_requeue_request(request_queue_t *, struct request *);
 extern void blk_plug_device(request_queue_t *);
 extern int blk_remove_plug(request_queue_t *);
 extern void blk_recount_segments(request_queue_t *, struct bio *);
-extern int blk_phys_contig_segment(request_queue_t *q, struct bio *, struct bio *);
-extern int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *);
 extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *);
 extern void blk_start_queue(request_queue_t *q);
 extern void blk_stop_queue(request_queue_t *q);
@@ -631,7 +628,6 @@ extern void blk_queue_dma_alignment(request_queue_t *, int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern void blk_queue_ordered(request_queue_t *, int);
 extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
-extern int blkdev_scsi_issue_flush_fn(request_queue_t *, struct gendisk *, sector_t *);
 extern struct request *blk_start_pre_flush(request_queue_t *,struct request *);
 extern int blk_complete_barrier_rq(request_queue_t *, struct request *, int);
 extern int blk_complete_barrier_rq_locked(request_queue_t *, struct request *, int);
@@ -675,8 +671,6 @@ extern int blkdev_issue_flush(struct block_device *, sector_t *);
 
 #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
 
-extern void drive_stat_acct(struct request *, int, int);
-
 static inline int queue_hardsect_size(request_queue_t *q)
 {
 	int retval = 512;
-- 
cgit v1.2.2


From 3e1d1d28d99dabe63c64f7f40f1ca1d646de1f73 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@lameter.com>
Date: Fri, 24 Jun 2005 23:13:50 -0700
Subject: [PATCH] Cleanup patch for process freezing

1. Establish a simple API for process freezing defined in linux/include/sched.h:

   frozen(process)		Check for frozen process
   freezing(process)		Check if a process is being frozen
   freeze(process)		Tell a process to freeze (go to refrigerator)
   thaw_process(process)	Restart process
   frozen_process(process)	Process is frozen now

2. Remove all references to PF_FREEZE and PF_FROZEN from all
   kernel sources except sched.h

3. Fix numerous locations where try_to_freeze is manually done by a driver

4. Remove the argument that is no longer necessary from two function calls.

5. Some whitespace cleanup

6. Clear potential race in refrigerator (provides an open window of PF_FREEZE
   cleared before setting PF_FROZEN, recalc_sigpending does not check
   PF_FROZEN).

This patch does not address the problem of freeze_processes() violating the rule
that a task may only modify its own flags by setting PF_FREEZE. This is not clean
in an SMP environment. freeze(process) is therefore not SMP safe!

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 73 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c69682b0444..e7fd09b0557f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1245,33 +1245,78 @@ extern void normalize_rt_tasks(void);
 
 #endif
 
-/* try_to_freeze
- *
- * Checks whether we need to enter the refrigerator
- * and returns 1 if we did so.
- */
 #ifdef CONFIG_PM
-extern void refrigerator(unsigned long);
+/*
+ * Check if a process has been frozen
+ */
+static inline int frozen(struct task_struct *p)
+{
+	return p->flags & PF_FROZEN;
+}
+
+/*
+ * Check if there is a request to freeze a process
+ */
+static inline int freezing(struct task_struct *p)
+{
+	return p->flags & PF_FREEZE;
+}
+
+/*
+ * Request that a process be frozen
+ * FIXME: SMP problem. We may not modify other process' flags!
+ */
+static inline void freeze(struct task_struct *p)
+{
+	p->flags |= PF_FREEZE;
+}
+
+/*
+ * Wake up a frozen process
+ */
+static inline int thaw_process(struct task_struct *p)
+{
+	if (frozen(p)) {
+		p->flags &= ~PF_FROZEN;
+		wake_up_process(p);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * freezing is complete, mark process as frozen
+ */
+static inline void frozen_process(struct task_struct *p)
+{
+	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
+}
+
+extern void refrigerator(void);
 extern int freeze_processes(void);
 extern void thaw_processes(void);
 
-static inline int try_to_freeze(unsigned long refrigerator_flags)
+static inline int try_to_freeze(void)
 {
-	if (unlikely(current->flags & PF_FREEZE)) {
-		refrigerator(refrigerator_flags);
+	if (freezing(current)) {
+		refrigerator();
 		return 1;
 	} else
 		return 0;
 }
 #else
-static inline void refrigerator(unsigned long flag) {}
+static inline int frozen(struct task_struct *p) { return 0; }
+static inline int freezing(struct task_struct *p) { return 0; }
+static inline void freeze(struct task_struct *p) { BUG(); }
+static inline int thaw_process(struct task_struct *p) { return 1; }
+static inline void frozen_process(struct task_struct *p) { BUG(); }
+
+static inline void refrigerator(void) {}
 static inline int freeze_processes(void) { BUG(); return 0; }
 static inline void thaw_processes(void) {}
 
-static inline int try_to_freeze(unsigned long refrigerator_flags)
-{
-	return 0;
-}
+static inline int try_to_freeze(void) { return 0; }
+
 #endif /* CONFIG_PM */
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.2


From 169a3e66637c667b43dab7c319ffd5c99804cad8 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Sun, 26 Jun 2005 17:54:11 -0400
Subject: bonding: xor/802.3ad improved slave hash

Add support for alternate slave selection algorithms to bonding
balance-xor and 802.3ad modes.  Default mode (what we have now: xor of
MAC addresses) is "layer2", new choice is "layer3+4", using IP and port
information for hashing to select peer.

Originally submitted by Jason Gabler for balance-xor mode;
modified by Jay Vosburgh to additionally support 802.3ad mode.  Jason's
original comment is as follows:

The attached patch to the Linux Etherchannel Bonding driver modifies the
driver's "balance-xor" mode as follows:

      - alternate hashing policy support for mode 2
        * Added kernel parameter "xmit_policy" to allow the specification
          of different hashing policies for mode 2.  The original mode 2
          policy is the default, now found in xmit_hash_policy_layer2().
        * Added xmit_hash_policy_layer34()

This patch was inspired by hashing policies implemented by Cisco,
Foundry and IBM, which are explained in
Foundry documentation found at:
http://www.foundrynet.com/services/documentation/sribcg/Trunking.html#112750

Signed-off-by: Jason Gabler <jygabler@lbl.gov>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
---
 include/linux/if_bonding.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 57024ce2c74f..84598fa2e9de 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -35,6 +35,9 @@
  *
  * 2003/12/01 - Shmulik Hen <shmulik.hen at intel dot com>
  *	- Code cleanup and style changes
+ *
+ * 2005/05/05 - Jason Gabler <jygabler at lbl dot gov>
+ *      - added definitions for various XOR hashing policies
  */
 
 #ifndef _LINUX_IF_BONDING_H
@@ -80,6 +83,10 @@
 
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
+/* hashing types */
+#define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
+#define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ MAC) */
+
 typedef struct ifbond {
 	__s32 bond_mode;
 	__s32 num_slaves;
-- 
cgit v1.2.2


From 32e9e25ef20789c24ffa1f41489a13932cf82c77 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 26 Jun 2005 15:28:10 -0700
Subject: [ATALK]: Include asm/byteorder.h in linux/atalk.h

We're using __be16 in userland visible types, so we
have to include asm/byteorder.h so that works.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 09a1451c1159..911c09cb9bf9 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_ATALK_H__
 #define __LINUX_ATALK_H__
 
+#include <asm/byteorder.h>
+
 /*
  * AppleTalk networking structures
  *
-- 
cgit v1.2.2