From 4cf808eb443ead42777a0230b73aec0cee7fb298 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Fri, 17 Feb 2006 20:38:21 +0100
Subject: [PATCH] Handle holes in node mask in node fallback list setup

Change the find_next_best_node algorithm to correctly skip
over holes in the node online mask. Previously it would not handle
missing nodes correctly and cause crashes at boot.

[Written by Linus, tested by AK]

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 62c122528587..208812b25597 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1541,29 +1541,29 @@ static int __initdata node_load[MAX_NUMNODES];
  */
 static int __init find_next_best_node(int node, nodemask_t *used_node_mask)
 {
-	int i, n, val;
+	int n, val;
 	int min_val = INT_MAX;
 	int best_node = -1;
 
-	for_each_online_node(i) {
-		cpumask_t tmp;
+	/* Use the local node if we haven't already */
+	if (!node_isset(node, *used_node_mask)) {
+		node_set(node, *used_node_mask);
+		return node;
+	}
 
-		/* Start from local node */
-		n = (node+i) % num_online_nodes();
+	for_each_online_node(n) {
+		cpumask_t tmp;
 
 		/* Don't want a node to appear more than once */
 		if (node_isset(n, *used_node_mask))
 			continue;
 
-		/* Use the local node if we haven't already */
-		if (!node_isset(node, *used_node_mask)) {
-			best_node = node;
-			break;
-		}
-
 		/* Use the distance array to find the distance */
 		val = node_distance(node, n);
 
+		/* Penalize nodes under us ("prefer the next node") */
+		val += (n < node);
+
 		/* Give preference to headless and unused nodes */
 		tmp = node_to_cpumask(n);
 		if (!cpus_empty(tmp))
-- 
cgit v1.2.2


From 2ae5b30ff08cee422c7f6388a759f743633c7542 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 14 Dec 2005 13:10:49 -0700
Subject: [PATCH] Necessary evil to get sata_vsc to initialize with Intel
 iq3124h hba

* libata does not care about error interrupts, so handle them locally
* the interrupts that are ignored only appear to happen at init time

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/scsi/sata_vsc.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c
index 2e2c3b7acb0c..e484e8db6810 100644
--- a/drivers/scsi/sata_vsc.c
+++ b/drivers/scsi/sata_vsc.c
@@ -81,6 +81,19 @@
 /* Port stride */
 #define VSC_SATA_PORT_OFFSET		0x200
 
+/* Error interrupt status bit offsets */
+#define VSC_SATA_INT_ERROR_E_OFFSET	2
+#define VSC_SATA_INT_ERROR_P_OFFSET	4
+#define VSC_SATA_INT_ERROR_T_OFFSET	5
+#define VSC_SATA_INT_ERROR_M_OFFSET	1
+#define is_vsc_sata_int_err(port_idx, int_status) \
+	 (int_status & ((1 << (VSC_SATA_INT_ERROR_E_OFFSET + (8 * port_idx))) | \
+		        (1 << (VSC_SATA_INT_ERROR_P_OFFSET + (8 * port_idx))) | \
+		        (1 << (VSC_SATA_INT_ERROR_T_OFFSET + (8 * port_idx))) | \
+		        (1 << (VSC_SATA_INT_ERROR_M_OFFSET + (8 * port_idx)))   \
+		       )\
+ 	 )
+
 
 static u32 vsc_sata_scr_read (struct ata_port *ap, unsigned int sc_reg)
 {
@@ -201,13 +214,28 @@ static irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance,
 			struct ata_port *ap;
 
 			ap = host_set->ports[i];
+
+			if (is_vsc_sata_int_err(i, int_status)) {
+				u32 err_status;
+				printk(KERN_DEBUG "%s: ignoring interrupt(s)\n", __FUNCTION__);
+				err_status = ap ? vsc_sata_scr_read(ap, SCR_ERROR) : 0;
+				vsc_sata_scr_write(ap, SCR_ERROR, err_status);
+				handled++;
+			}
+
 			if (ap && !(ap->flags &
 				    (ATA_FLAG_PORT_DISABLED|ATA_FLAG_NOINTR))) {
 				struct ata_queued_cmd *qc;
 
 				qc = ata_qc_from_tag(ap, ap->active_tag);
-				if (qc && (!(qc->tf.ctl & ATA_NIEN)))
+				if (qc && (!(qc->tf.ctl & ATA_NIEN))) {
 					handled += ata_host_intr(ap, qc);
+				} else {
+					printk(KERN_DEBUG "%s: ignoring interrupt(s)\n", __FUNCTION__);
+					ata_chk_status(ap);
+					handled++;
+				}
+
 			}
 		}
 	}
-- 
cgit v1.2.2


From 0565c26de7b2c069add553106f210b3128b67785 Mon Sep 17 00:00:00 2001
From: Albert Lee <albertcc@tw.ibm.com>
Date: Mon, 13 Feb 2006 18:55:25 +0800
Subject: [PATCH] libata: minor fix for 2.6.16-rc3

 - Fix the array index value in ata_rwcmd_protocol() for the added FUA commands.
 - Filter out ATAPI packet command error messages in ata_pio_error()

Signed-off-by: Albert Lee <albertcc@tw.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/scsi/libata-core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 46c4cdbaee86..7ddd5a69352a 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -614,7 +614,7 @@ int ata_rwcmd_protocol(struct ata_queued_cmd *qc)
 	} else if (lba48 && (qc->ap->flags & ATA_FLAG_PIO_LBA48)) {
 		/* Unable to use DMA due to host limitation */
 		tf->protocol = ATA_PROT_PIO;
-		index = dev->multi_count ? 0 : 4;
+		index = dev->multi_count ? 0 : 8;
 	} else {
 		tf->protocol = ATA_PROT_DMA;
 		index = 16;
@@ -3357,11 +3357,12 @@ static void ata_pio_error(struct ata_port *ap)
 {
 	struct ata_queued_cmd *qc;
 
-	printk(KERN_WARNING "ata%u: PIO error\n", ap->id);
-
 	qc = ata_qc_from_tag(ap, ap->active_tag);
 	assert(qc != NULL);
 
+	if (qc->tf.command != ATA_CMD_PACKET)
+		printk(KERN_WARNING "ata%u: PIO error\n", ap->id);
+
 	/* make sure qc->err_mask is available to 
 	 * know what's wrong and recover
 	 */
-- 
cgit v1.2.2


From c15d85c8f3f73b5f20aae7928e25b6996f16b328 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 15 Feb 2006 15:59:25 +0100
Subject: [PATCH] Add missing FUA write to sata_mv dma command list

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/scsi/sata_mv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index 6fddf17a3b70..2770005324b4 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -997,6 +997,7 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
 	case ATA_CMD_READ_EXT:
 	case ATA_CMD_WRITE:
 	case ATA_CMD_WRITE_EXT:
+	case ATA_CMD_WRITE_FUA_EXT:
 		mv_crqb_pack_cmd(cw++, tf->hob_nsect, ATA_REG_NSECT, 0);
 		break;
 #ifdef LIBATA_NCQ		/* FIXME: remove this line when NCQ added */
-- 
cgit v1.2.2


From b2f49033d80c952a0ffc2d5647bc1a0b8a09c1b3 Mon Sep 17 00:00:00 2001
From: Peter Staubach <staubach@redhat.com>
Date: Fri, 17 Feb 2006 13:52:36 -0800
Subject: [PATCH] fix deadlock in ext2

Fix a deadlock possible in the ext2 file system implementation.  This
deadlock occurs when a file is removed from an ext2 file system which was
mounted with the "sync" mount option.

The problem is that ext2_xattr_delete_inode() was invoking the routine,
sync_dirty_buffer(), using a buffer head which was previously locked via
lock_buffer().  The first thing that sync_dirty_buffer() does is to lock
the buffer head that it was passed.  It does this via lock_buffer().  Oops.

The solution is to unlock the buffer head in ext2_xattr_delete_inode()
before invoking sync_dirty_buffer().  This makes the code in
ext2_xattr_delete_inode() obey the same locking rules as all other callers
of sync_dirty_buffer() in the ext2 file system implementation.

Signed-off-by: Peter Staubach <staubach@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/xattr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index a2ca3107d475..86ae8e93adb9 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -792,18 +792,20 @@ ext2_xattr_delete_inode(struct inode *inode)
 		ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
 		get_bh(bh);
 		bforget(bh);
+		unlock_buffer(bh);
 	} else {
 		HDR(bh)->h_refcount = cpu_to_le32(
 			le32_to_cpu(HDR(bh)->h_refcount) - 1);
 		if (ce)
 			mb_cache_entry_release(ce);
+		ea_bdebug(bh, "refcount now=%d",
+			le32_to_cpu(HDR(bh)->h_refcount));
+		unlock_buffer(bh);
 		mark_buffer_dirty(bh);
 		if (IS_SYNC(inode))
 			sync_dirty_buffer(bh);
 		DQUOT_FREE_BLOCK(inode, 1);
 	}
-	ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
-	unlock_buffer(bh);
 	EXT2_I(inode)->i_file_acl = 0;
 
 cleanup:
-- 
cgit v1.2.2


From 8c9e877949d953e80d0d400bc4d1d1195a2028a4 Mon Sep 17 00:00:00 2001
From: Marcel Selhorst <selhorst@crypto.rub.de>
Date: Fri, 17 Feb 2006 13:52:41 -0800
Subject: [PATCH] Infineon TPM: IO-port leakage fix, WTX-bugfix

Fix IO-port leakage from request_region in case of error during TPM
initialization, adds more pnp-verification and fixes a WTX-bug.

Signed-off-by: Marcel Selhorst <selhorst@crypto.rub.de>
Acked-by: Kylene Jo Hall <kjhall@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/tpm/tpm_infineon.c | 48 +++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index ec7590951af5..24095f6ee6da 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -33,6 +33,7 @@
 static int TPM_INF_DATA;
 static int TPM_INF_ADDR;
 static int TPM_INF_BASE;
+static int TPM_INF_ADDR_LEN;
 static int TPM_INF_PORT_LEN;
 
 /* TPM header definitions */
@@ -195,6 +196,7 @@ static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count)
 	int i;
 	int ret;
 	u32 size = 0;
+	number_of_wtx = 0;
 
 recv_begin:
 	/* start receiving header */
@@ -378,24 +380,35 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev,
 	if (pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) &&
 	    !(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) {
 		TPM_INF_ADDR = pnp_port_start(dev, 0);
+		TPM_INF_ADDR_LEN = pnp_port_len(dev, 0);
 		TPM_INF_DATA = (TPM_INF_ADDR + 1);
 		TPM_INF_BASE = pnp_port_start(dev, 1);
 		TPM_INF_PORT_LEN = pnp_port_len(dev, 1);
-		if (!TPM_INF_PORT_LEN)
-			return -EINVAL;
+		if ((TPM_INF_PORT_LEN < 4) || (TPM_INF_ADDR_LEN < 2)) {
+			rc = -EINVAL;
+			goto err_last;
+		}
 		dev_info(&dev->dev, "Found %s with ID %s\n",
 			 dev->name, dev_id->id);
-		if (!((TPM_INF_BASE >> 8) & 0xff))
-			return -EINVAL;
+		if (!((TPM_INF_BASE >> 8) & 0xff)) {
+			rc = -EINVAL;
+			goto err_last;
+		}
 		/* publish my base address and request region */
 		tpm_inf.base = TPM_INF_BASE;
 		if (request_region
 		    (tpm_inf.base, TPM_INF_PORT_LEN, "tpm_infineon0") == NULL) {
-			release_region(tpm_inf.base, TPM_INF_PORT_LEN);
-			return -EINVAL;
+			rc = -EINVAL;
+			goto err_last;
+		}
+		if (request_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN,
+				"tpm_infineon0") == NULL) {
+			rc = -EINVAL;
+			goto err_last;
 		}
 	} else {
-		return -EINVAL;
+		rc = -EINVAL;
+		goto err_last;
 	}
 
 	/* query chip for its vendor, its version number a.s.o. */
@@ -443,8 +456,8 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev,
 			dev_err(&dev->dev,
 				"Could not set IO-ports to 0x%lx\n",
 				tpm_inf.base);
-			release_region(tpm_inf.base, TPM_INF_PORT_LEN);
-			return -EIO;
+			rc = -EIO;
+			goto err_release_region;
 		}
 
 		/* activate register */
@@ -471,14 +484,21 @@ static int __devinit tpm_inf_pnp_probe(struct pnp_dev *dev,
 
 		rc = tpm_register_hardware(&dev->dev, &tpm_inf);
 		if (rc < 0) {
-			release_region(tpm_inf.base, TPM_INF_PORT_LEN);
-			return -ENODEV;
+			rc = -ENODEV;
+			goto err_release_region;
 		}
 		return 0;
 	} else {
-		dev_info(&dev->dev, "No Infineon TPM found!\n");
-		return -ENODEV;
+		rc = -ENODEV;
+		goto err_release_region;
 	}
+
+err_release_region:
+	release_region(tpm_inf.base, TPM_INF_PORT_LEN);
+	release_region(TPM_INF_ADDR, TPM_INF_ADDR_LEN);
+
+err_last:
+	return rc;
 }
 
 static __devexit void tpm_inf_pnp_remove(struct pnp_dev *dev)
@@ -518,5 +538,5 @@ module_exit(cleanup_inf);
 
 MODULE_AUTHOR("Marcel Selhorst <selhorst@crypto.rub.de>");
 MODULE_DESCRIPTION("Driver for Infineon TPM SLD 9630 TT 1.1 / SLB 9635 TT 1.2");
-MODULE_VERSION("1.6");
+MODULE_VERSION("1.7");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.2


From 05efc67d100ff6c3364604b72729addf1a86fdab Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 17 Feb 2006 13:52:42 -0800
Subject: [PATCH] arch/sh/Kconfig: fix the ISA_DMA_API dependencies

Jean-Luc Leger <reiga@dspnet.fr.eu.org> found this obvious typo.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/sh/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 504d56f8ca7f..e73621d03a28 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -446,7 +446,7 @@ endmenu
 
 config ISA_DMA_API
 	bool
-	depends on MPC1211
+	depends on SH_MPC1211
 	default y
 
 menu "Kernel features"
-- 
cgit v1.2.2


From 4bbf39c29bc3409d6454faf0dfa1b3b0aa2ac2af Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 17 Feb 2006 13:52:44 -0800
Subject: [PATCH] Introduce CONFIG_DEFAULT_MIGRATION_COST

Heiko Carstens <heiko.carstens@de.ibm.com> wrote:

  The boot sequence on s390 sometimes takes ages and we spend a very long
  time (up to one or two minutes) in calibrate_migration_costs.  The time
  spent there differs from boot to boot.  Also the calculated costs differ
  a lot.  I've seen differences by up to a factor of 15 (yes, factor not
  percent).  Also I doubt that making these measurements make much sense on
  a completely virtualized architecture where you cannot tell how much cpu
  time you will get anyway.

So introduce the CONFIG_DEFAULT_MIGRATION_COST method for an architecture
to set the scheduler migration costs.  This turns off automatic detection
of migration costs.  Makes sense on virtual platforms, where migration
costs are hard to measure accurately.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/Kconfig |  4 ++++
 kernel/sched.c    | 13 ++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b66602ad7b33..b7ca5bf9acfc 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -80,6 +80,10 @@ config HOTPLUG_CPU
 	  can be controlled through /sys/devices/system/cpu/cpu#.
 	  Say N if you want to disable CPU hotplug.
 
+config DEFAULT_MIGRATION_COST
+	int
+	default "1000000"
+
 config MATHEMU
 	bool "IEEE FPU emulation"
 	depends on MARCH_G5
diff --git a/kernel/sched.c b/kernel/sched.c
index 66d957227de9..12d291bf3379 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5058,7 +5058,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
 #define MAX_DOMAIN_DISTANCE 32
 
 static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
-		{ [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL };
+		{ [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
+/*
+ * Architectures may override the migration cost and thus avoid
+ * boot-time calibration. Unit is nanoseconds. Mostly useful for
+ * virtualized hardware:
+ */
+#ifdef CONFIG_DEFAULT_MIGRATION_COST
+			CONFIG_DEFAULT_MIGRATION_COST
+#else
+			-1LL
+#endif
+};
 
 /*
  * Allow override of migration cost - in units of microseconds.
-- 
cgit v1.2.2


From 6d751c43b29deb1d990fb9644c13ca941c9d1305 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 17 Feb 2006 13:52:45 -0800
Subject: [PATCH] s390: ccw device disbanding

If __ccw_device_disband_start() fails to initiate disbanding, it should finish
with ccw_device_disband_done() (which leaves the device in offline state)
instead of ccw_device_verify_done() (which leaves the device in online state).

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/s390/cio/device_pgid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index d2a5b04d7cba..85b1020a1fcc 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -405,7 +405,7 @@ __ccw_device_disband_start(struct ccw_device *cdev)
 		cdev->private->iretry = 5;
 		cdev->private->imask >>= 1;
 	}
-	ccw_device_verify_done(cdev, (sch->lpm != 0) ? 0 : -ENODEV);
+	ccw_device_disband_done(cdev, (sch->lpm != 0) ? 0 : -ENODEV);
 }
 
 /*
-- 
cgit v1.2.2


From 1fca251f36fac3fae7d9cf10de69c2c93f6c0000 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 17 Feb 2006 13:52:46 -0800
Subject: [PATCH] s390: fix preempt_count of idle thread with cpu hotplug

Set preempt_count of idle_thread to zero before switching off cpu.  Otherwise
the preempt_count will be wrong if the cpu is switched on again since the
thread will be reused.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/kernel/process.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 008c74526fd3..da6fbae8df91 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -128,8 +128,10 @@ void default_idle(void)
 	__ctl_set_bit(8, 15);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	if (cpu_is_offline(cpu))
+	if (cpu_is_offline(cpu)) {
+		preempt_enable_no_resched();
 		cpu_die();
+	}
 #endif
 
 	local_mcck_disable();
-- 
cgit v1.2.2


From 255acee706b333b79f593dd366f16e1f107cccc3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 17 Feb 2006 13:52:46 -0800
Subject: [PATCH] s390: additional_cpus parameter

Introduce additional_cpus command line option.  By default no additional cpu
can be attached to the system anymore.  Only the cpus present at IPL time can
be switched on/off.  If it is desired that additional cpus can be attached to
the system the maximum number of additional cpus needs to be specified with
this option.

This change is necessary in order to limit the waste of per_cpu data
structures.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/cpu-hotplug.txt | 10 ++++----
 arch/s390/kernel/setup.c      |  2 ++
 arch/s390/kernel/smp.c        | 58 +++++++++++++++++++++++++++++--------------
 include/asm-s390/smp.h        |  2 ++
 4 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index e05278087ffa..4d3355da0e26 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -11,6 +11,8 @@
 			Joel Schopp <jschopp@austin.ibm.com>
 		ia64/x86_64:
 			Ashok Raj <ashok.raj@intel.com>
+		s390:
+			Heiko Carstens <heiko.carstens@de.ibm.com>
 
 Authors: Ashok Raj <ashok.raj@intel.com>
 Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
@@ -44,11 +46,9 @@ maxcpus=n    Restrict boot time cpus to n. Say if you have 4 cpus, using
              maxcpus=2 will only boot 2. You can choose to bring the
              other cpus later online, read FAQ's for more info.
 
-additional_cpus*=n	Use this to limit hotpluggable cpus. This option sets
-			cpu_possible_map = cpu_present_map + additional_cpus
-
-(*) Option valid only for following architectures
-- x86_64, ia64
+additional_cpus=n	[x86_64, s390 only] use this to limit hotpluggable cpus.
+                          This option sets
+  			cpu_possible_map = cpu_present_map + additional_cpus
 
 ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT
 to determine the number of potentially hot-pluggable cpus. The implementation
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index de8784267473..24f62f16c0e5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -600,6 +600,7 @@ setup_arch(char **cmdline_p)
 	init_mm.brk = (unsigned long) &_end;
 
 	parse_cmdline_early(cmdline_p);
+	parse_early_param();
 
 	setup_memory();
 	setup_resources();
@@ -607,6 +608,7 @@ setup_arch(char **cmdline_p)
 
         cpu_init();
         __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
+	smp_setup_cpu_possible_map();
 
 	/*
 	 * Create kernel page tables and switch to virtual addressing.
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0d1ad5dbe2b1..53291e94ac7b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -1,8 +1,7 @@
 /*
  *  arch/s390/kernel/smp.c
  *
- *  S390 version
- *    Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *    Copyright (C) IBM Corp. 1999,2006
  *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
  *               Heiko Carstens (heiko.carstens@de.ibm.com)
@@ -41,8 +40,6 @@
 #include <asm/cpcmd.h>
 #include <asm/tlbflush.h>
 
-/* prototypes */
-
 extern volatile int __cpu_logical_map[];
 
 /*
@@ -51,13 +48,11 @@ extern volatile int __cpu_logical_map[];
 
 struct _lowcore *lowcore_ptr[NR_CPUS];
 
-cpumask_t cpu_online_map;
-cpumask_t cpu_possible_map = CPU_MASK_ALL;
+cpumask_t cpu_online_map = CPU_MASK_NONE;
+cpumask_t cpu_possible_map = CPU_MASK_NONE;
 
 static struct task_struct *current_set[NR_CPUS];
 
-EXPORT_SYMBOL(cpu_online_map);
-
 /*
  * Reboot, halt and power_off routines for SMP.
  */
@@ -490,10 +485,10 @@ void smp_ctl_clear_bit(int cr, int bit) {
  * Lets check how many CPUs we have.
  */
 
-void
-__init smp_check_cpus(unsigned int max_cpus)
+static unsigned int
+__init smp_count_cpus(void)
 {
-	int cpu, num_cpus;
+	unsigned int cpu, num_cpus;
 	__u16 boot_cpu_addr;
 
 	/*
@@ -503,19 +498,20 @@ __init smp_check_cpus(unsigned int max_cpus)
 	boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr;
 	current_thread_info()->cpu = 0;
 	num_cpus = 1;
-	for (cpu = 0; cpu <= 65535 && num_cpus < max_cpus; cpu++) {
+	for (cpu = 0; cpu <= 65535; cpu++) {
 		if ((__u16) cpu == boot_cpu_addr)
 			continue;
-		__cpu_logical_map[num_cpus] = (__u16) cpu;
-		if (signal_processor(num_cpus, sigp_sense) ==
+		__cpu_logical_map[1] = (__u16) cpu;
+		if (signal_processor(1, sigp_sense) ==
 		    sigp_not_operational)
 			continue;
-		cpu_set(num_cpus, cpu_present_map);
 		num_cpus++;
 	}
 
 	printk("Detected %d CPU's\n",(int) num_cpus);
 	printk("Boot cpu address %2X\n", boot_cpu_addr);
+
+	return num_cpus;
 }
 
 /*
@@ -676,6 +672,32 @@ __cpu_up(unsigned int cpu)
 	return 0;
 }
 
+static unsigned int __initdata additional_cpus;
+
+void __init smp_setup_cpu_possible_map(void)
+{
+	unsigned int pcpus, cpu;
+
+	pcpus = smp_count_cpus() + additional_cpus;
+
+	if (pcpus > NR_CPUS)
+		pcpus = NR_CPUS;
+
+	for (cpu = 0; cpu < pcpus; cpu++)
+		cpu_set(cpu, cpu_possible_map);
+
+	cpu_present_map = cpu_possible_map;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int __init setup_additional_cpus(char *s)
+{
+	additional_cpus = simple_strtoul(s, NULL, 0);
+	return 0;
+}
+early_param("additional_cpus", setup_additional_cpus);
+
 int
 __cpu_disable(void)
 {
@@ -744,6 +766,8 @@ cpu_die(void)
 	for(;;);
 }
 
+#endif /* CONFIG_HOTPLUG_CPU */
+
 /*
  *	Cycle through the processors and setup structures.
  */
@@ -757,7 +781,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
         /* request the 0x1201 emergency signal external interrupt */
         if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
                 panic("Couldn't request external interrupt 0x1201");
-        smp_check_cpus(max_cpus);
         memset(lowcore_ptr,0,sizeof(lowcore_ptr));  
         /*
          *  Initialize prefix pages and stacks for all possible cpus
@@ -806,14 +829,12 @@ void __devinit smp_prepare_boot_cpu(void)
 	BUG_ON(smp_processor_id() != 0);
 
 	cpu_set(0, cpu_online_map);
-	cpu_set(0, cpu_present_map);
 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
 	current_set[0] = current;
 }
 
 void smp_cpus_done(unsigned int max_cpus)
 {
-	cpu_present_map = cpu_possible_map;
 }
 
 /*
@@ -845,6 +866,7 @@ static int __init topology_init(void)
 
 subsys_initcall(topology_init);
 
+EXPORT_SYMBOL(cpu_online_map);
 EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_SYMBOL(lowcore_ptr);
 EXPORT_SYMBOL(smp_ctl_set_bit);
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 9c6e9c300eb9..444dae5912e6 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -31,6 +31,7 @@ typedef struct
 	__u16      cpu;
 } sigp_info;
 
+extern void smp_setup_cpu_possible_map(void);
 extern int smp_call_function_on(void (*func) (void *info), void *info,
 				int nonatomic, int wait, int cpu);
 #define NO_PROC_ID		0xFF		/* No processor magic marker */
@@ -104,6 +105,7 @@ smp_call_function_on(void (*func) (void *info), void *info,
 #define smp_cpu_not_running(cpu)	1
 #define smp_get_cpu(cpu) ({ 0; })
 #define smp_put_cpu(cpu) ({ 0; })
+#define smp_setup_cpu_possible_map()
 #endif
 
 #endif
-- 
cgit v1.2.2


From 37a3302618a51520e2056494715ea6b4776dd8ab Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 17 Feb 2006 13:52:47 -0800
Subject: [PATCH] s390: possible_cpus parameter

Introduce possible_cpus command line option.  Hard sets the number of bits set
in cpu_possible_map.  Unlike the additional_cpus parameter this one guarantees
that num_possible_cpus() will stay constant even if the system gets rebooted
and a different number of cpus are present at startup.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/cpu-hotplug.txt |  6 ++++++
 arch/s390/kernel/smp.c        | 14 +++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 4d3355da0e26..e71bc6cbbc5e 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -58,6 +58,12 @@ mark such hot-pluggable cpus as disabled entries, one could use this
 parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map.
 
 
+possible_cpus=n		[s390 only] use this to set hotpluggable cpus.
+			This option sets possible_cpus bits in
+			cpu_possible_map. Thus keeping the numbers of bits set
+			constant even if the machine gets rebooted.
+			This option overrides additional_cpus.
+
 CPU maps and such
 -----------------
 [More on cpumaps and primitive to manipulate, please check
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 53291e94ac7b..d0a2745aec7f 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -673,15 +673,16 @@ __cpu_up(unsigned int cpu)
 }
 
 static unsigned int __initdata additional_cpus;
+static unsigned int __initdata possible_cpus;
 
 void __init smp_setup_cpu_possible_map(void)
 {
 	unsigned int pcpus, cpu;
 
-	pcpus = smp_count_cpus() + additional_cpus;
+	pcpus = min(smp_count_cpus() + additional_cpus, (unsigned int) NR_CPUS);
 
-	if (pcpus > NR_CPUS)
-		pcpus = NR_CPUS;
+	if (possible_cpus)
+		pcpus = min(possible_cpus, (unsigned int) NR_CPUS);
 
 	for (cpu = 0; cpu < pcpus; cpu++)
 		cpu_set(cpu, cpu_possible_map);
@@ -698,6 +699,13 @@ static int __init setup_additional_cpus(char *s)
 }
 early_param("additional_cpus", setup_additional_cpus);
 
+static int __init setup_possible_cpus(char *s)
+{
+	possible_cpus = simple_strtoul(s, NULL, 0);
+	return 0;
+}
+early_param("possible_cpus", setup_possible_cpus);
+
 int
 __cpu_disable(void)
 {
-- 
cgit v1.2.2


From 54330456b2e3398743586254f6d7695061ea0d49 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 17 Feb 2006 13:52:48 -0800
Subject: [PATCH] s390: smp initialization speed

The last changes that introduced the additional_cpus command line parameter
also introduced a regression regarding smp initialization speed.  In
smp_setup_cpu_possible_map() cpu_present_map is set to the same value as
cpu_possible_map.  Especially that means that bits in the present map will be
set for cpus that are not present.  This will cause a slow down in the initial
cpu_up() loop in smp_init() since trying to take cpus online that aren't
present takes a while.

Fix this by setting only bits for present cpus in cpu_present_map and set
cpu_present_map to cpu_possible_map in smp_cpus_done().

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/kernel/smp.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index d0a2745aec7f..7dbe00c76c6b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -677,17 +677,21 @@ static unsigned int __initdata possible_cpus;
 
 void __init smp_setup_cpu_possible_map(void)
 {
-	unsigned int pcpus, cpu;
+	unsigned int phy_cpus, pos_cpus, cpu;
 
-	pcpus = min(smp_count_cpus() + additional_cpus, (unsigned int) NR_CPUS);
+	phy_cpus = smp_count_cpus();
+	pos_cpus = min(phy_cpus + additional_cpus, (unsigned int) NR_CPUS);
 
 	if (possible_cpus)
-		pcpus = min(possible_cpus, (unsigned int) NR_CPUS);
+		pos_cpus = min(possible_cpus, (unsigned int) NR_CPUS);
 
-	for (cpu = 0; cpu < pcpus; cpu++)
+	for (cpu = 0; cpu < pos_cpus; cpu++)
 		cpu_set(cpu, cpu_possible_map);
 
-	cpu_present_map = cpu_possible_map;
+	phy_cpus = min(phy_cpus, pos_cpus);
+
+	for (cpu = 0; cpu < phy_cpus; cpu++)
+		cpu_set(cpu, cpu_present_map);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -843,6 +847,7 @@ void __devinit smp_prepare_boot_cpu(void)
 
 void smp_cpus_done(unsigned int max_cpus)
 {
+	cpu_present_map = cpu_possible_map;
 }
 
 /*
-- 
cgit v1.2.2


From 6cadb78b3bec0a439a99db8fb550dc568e924ae6 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 17 Feb 2006 13:52:49 -0800
Subject: [PATCH] s390: fix assignment instead of check in
 ccw_device_set_online()

Fix assignment instead of check in ccw_device_set_online().  Also remove
unneeded assignment in ccw_device_do_sense().

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/s390/cio/device.c        | 2 +-
 drivers/s390/cio/device_status.c | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 062fb100d94c..afc4e88551ad 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -359,7 +359,7 @@ ccw_device_set_online(struct ccw_device *cdev)
 	else 
 		pr_debug("ccw_device_offline returned %d, device %s\n",
 			 ret, cdev->dev.bus_id);
-	return (ret = 0) ? -ENODEV : ret;
+	return (ret == 0) ? -ENODEV : ret;
 }
 
 static ssize_t
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index dad4dd9887c9..6c762b43f921 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -317,7 +317,6 @@ ccw_device_do_sense(struct ccw_device *cdev, struct irb *irb)
 	/*
 	 * We have ending status but no sense information. Do a basic sense.
 	 */
-	sch = to_subchannel(cdev->dev.parent);
 	sch->sense_ccw.cmd_code = CCW_CMD_BASIC_SENSE;
 	sch->sense_ccw.cda = (__u32) __pa(cdev->private->irb.ecw);
 	sch->sense_ccw.count = SENSE_MAX_COUNT;
-- 
cgit v1.2.2


From ed3d021b823336a2e0c5090a91d083243f756e6c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 17 Feb 2006 13:52:50 -0800
Subject: [PATCH] s390: sys32_fstatat -> sys32_fstatat64

Just rename the compat system call to keep the name consistent with all the
other *64 compat system calls.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/kernel/compat_linux.c   | 4 ++--
 arch/s390/kernel/compat_wrapper.S | 6 +++---
 arch/s390/kernel/syscalls.S       | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 2d021626c1a6..cc058dc3bc8b 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -905,8 +905,8 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * sta
 	return ret;
 }
 
-asmlinkage long sys32_fstatat(unsigned int dfd, char __user *filename,
-			      struct stat64_emu31 __user* statbuf, int flag)
+asmlinkage long sys32_fstatat64(unsigned int dfd, char __user *filename,
+				struct stat64_emu31 __user* statbuf, int flag)
 {
 	struct kstat stat;
 	int error = -EINVAL;
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index dd2d6c3e8df8..615964cca15f 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1523,13 +1523,13 @@ compat_sys_futimesat_wrapper:
 	llgtr	%r4,%r4			# struct timeval *
 	jg	compat_sys_futimesat
 
-	.globl sys32_fstatat_wrapper
-sys32_fstatat_wrapper:
+	.globl sys32_fstatat64_wrapper
+sys32_fstatat64_wrapper:
 	llgfr	%r2,%r2			# unsigned int
 	llgtr	%r3,%r3			# char *
 	llgtr	%r4,%r4			# struct stat64 *
 	lgfr	%r5,%r5			# int
-	jg	sys32_fstatat
+	jg	sys32_fstatat64
 
 	.globl sys_unlinkat_wrapper
 sys_unlinkat_wrapper:
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 84921fe8d266..7c88d85c3597 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -301,7 +301,7 @@ SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper)
 SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper)	/* 290 */
 SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper)
 SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper)
-SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat_wrapper)
+SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper)
 SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper)
 SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper)	/* 295 */
 SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper)
-- 
cgit v1.2.2


From a8534adb74e23374889b84b3d97eb18da542a1b5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 17 Feb 2006 13:52:51 -0800
Subject: [PATCH] swsusp: fix breakage with swap on LVM

Restore the compatibility with the older code and make it possible to
suspend if the kernel command line doesn't contain the "resume=" argument

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/swsusp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 4e90905f0e87..2d9d08f72f76 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */
 {
 	int i;
 
-	if (!swsusp_resume_device)
-		return -ENODEV;
 	spin_lock(&swap_lock);
 	for (i = 0; i < MAX_SWAPFILES; i++) {
 		if (!(swap_info[i].flags & SWP_WRITEOK))
 			continue;
-		if (is_resume_device(swap_info + i)) {
+		if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
 			spin_unlock(&swap_lock);
 			root_swap = i;
 			return 0;
-- 
cgit v1.2.2


From 77e7f250f88cd62844e24c42aff4d0e95969c746 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 17 Feb 2006 13:52:52 -0800
Subject: [PATCH] fuse: fix bug in aborted fuse_release_end()

There's a rather theoretical case of the BUG triggering in
fuse_reset_request():

  - iget() fails because of OOM after a successful CREATE_OPEN request
  - during IO on the resulting RELEASE request the connection is aborted

Fix and add warning to fuse_reset_request().

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dev.c  |  6 ++++++
 fs/fuse/file.c | 11 ++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f556a0d5c0d3..0c9a2ee54c91 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -66,6 +66,12 @@ static void restore_sigs(sigset_t *oldset)
 	sigprocmask(SIG_SETMASK, oldset, NULL);
 }
 
+/*
+ * Reset request, so that it can be reused
+ *
+ * The caller must be _very_ careful to make sure, that it is holding
+ * the only reference to req
+ */
 void fuse_reset_request(struct fuse_req *req)
 {
 	int preallocated = req->preallocated;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 296351615b00..6f05379b0a0d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -116,9 +116,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
 /* Special case for failed iget in CREATE */
 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-	u64 nodeid = req->in.h.nodeid;
-	fuse_reset_request(req);
-	fuse_send_forget(fc, req, nodeid, 1);
+	/* If called from end_io_requests(), req has more than one
+	   reference and fuse_reset_request() cannot work */
+	if (fc->connected) {
+		u64 nodeid = req->in.h.nodeid;
+		fuse_reset_request(req);
+		fuse_send_forget(fc, req, nodeid, 1);
+	} else
+		fuse_put_request(fc, req);
 }
 
 void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
-- 
cgit v1.2.2


From 9127dd1aace4e89acb48fbcafd0ed27d3869847b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 17 Feb 2006 13:52:54 -0800
Subject: [PATCH] allow windfarm_pm112 module to load

The windfarm_pm112 module relies on smu_sat_get_sdb_partition which is in
windfarm_smu_sat.c but is not exported to modules, so despite Kconfig
having the option to build the pm112 as modules, this can never be loaded.

This patch fixes that by exporting smu_sat_get_sdb_partition with
EXPORT_SYMBOL_GPL

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/macintosh/windfarm_smu_sat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index 3a32c59494f2..24e51d5e97fc 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -151,6 +151,7 @@ struct smu_sdbp_header *smu_sat_get_sdb_partition(unsigned int sat_id, int id,
 	kfree(buf);
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(smu_sat_get_sdb_partition);
 
 /* refresh the cache */
 static int wf_sat_read_cache(struct wf_sat *sat)
-- 
cgit v1.2.2


From 9ff4ced4676d3cd1f28b14d93a339f263ca304b0 Mon Sep 17 00:00:00 2001
From: Tim Hockin <thockin@google.com>
Date: Fri, 17 Feb 2006 13:52:54 -0800
Subject: [PATCH] Remove KERN_INFO from middle of printk line

Don't print KERN_INFO in the middle of a printk line.
	printk(KERN_INFO "OEM ID: %s ",str);
is just above this. This is already fixed up in i386 copy.

Signed-off-by: Martin J. Bligh <mbligh@google.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/mpparse.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index dc49bfb6db0a..9013a90b5c2e 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -288,9 +288,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
 
 	memcpy(str,mpc->mpc_productid,12);
 	str[12]=0;
-	printk(KERN_INFO "Product ID: %s ",str);
+	printk("Product ID: %s ",str);
 
-	printk(KERN_INFO "APIC at: 0x%X\n",mpc->mpc_lapic);
+	printk("APIC at: 0x%X\n",mpc->mpc_lapic);
 
 	/* save the local APIC address, it might be non-default */
 	if (!acpi_lapic)
-- 
cgit v1.2.2


From 200a4552af34b9a32e1f68a881a9ed5c7ec699cc Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Fri, 17 Feb 2006 13:52:56 -0800
Subject: [PATCH] powerpc: Fix accidentally-working typo in __pud_free_tlb

One of the parameters to the __pud_free_tlb() macro for powerpc is
incorrect (see patch) .  We get away with it by accident, because the one
place the macro is called, the second parameter is a variable named "pud".

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-powerpc/pgalloc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h
index 9f5b052784a5..a00ee002cd11 100644
--- a/include/asm-powerpc/pgalloc.h
+++ b/include/asm-powerpc/pgalloc.h
@@ -146,7 +146,7 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
 	pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
 		PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
 #ifndef CONFIG_PPC_64K_PAGES
-#define __pud_free_tlb(tlb, pmd)	\
+#define __pud_free_tlb(tlb, pud)	\
 	pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
 		PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
 #endif /* CONFIG_PPC_64K_PAGES */
-- 
cgit v1.2.2


From 74910e6c7dc7471b286a883c1a7af70483ffd2ba Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 17 Feb 2006 13:52:58 -0800
Subject: [PATCH] select: time comparison fixes

I got all of these backwards.  We want to return

	min(input timeout, new timeout)

to userspace to prevent increasing the time-remaining value.

Thanks to Ernst Herzberg <earny@net4u.de> for reporting and diagnosing.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 6 +++---
 fs/select.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index a2ba78bdf7f7..5333c7d7427f 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1757,7 +1757,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 			goto sticky;
 		rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
 		rtv.tv_sec = timeout;
-		if (compat_timeval_compare(&rtv, &tv) < 0)
+		if (compat_timeval_compare(&rtv, &tv) >= 0)
 			rtv = tv;
 		if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
 sticky:
@@ -1834,7 +1834,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
 			rts.tv_sec++;
 			rts.tv_nsec -= NSEC_PER_SEC;
 		}
-		if (compat_timespec_compare(&rts, &ts) < 0)
+		if (compat_timespec_compare(&rts, &ts) >= 0)
 			rts = ts;
 		copy_to_user(tsp, &rts, sizeof(rts));
 	}
@@ -1934,7 +1934,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
 					1000;
 		rts.tv_sec = timeout;
-		if (compat_timespec_compare(&rts, &ts) < 0)
+		if (compat_timespec_compare(&rts, &ts) >= 0)
 			rts = ts;
 		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 sticky:
diff --git a/fs/select.c b/fs/select.c
index 6ce68a9c8976..1815a57d2255 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -404,7 +404,7 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			goto sticky;
 		rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
 		rtv.tv_sec = timeout;
-		if (timeval_compare(&rtv, &tv) < 0)
+		if (timeval_compare(&rtv, &tv) >= 0)
 			rtv = tv;
 		if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
 sticky:
@@ -471,7 +471,7 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
 		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
 						1000;
 		rts.tv_sec = timeout;
-		if (timespec_compare(&rts, &ts) < 0)
+		if (timespec_compare(&rts, &ts) >= 0)
 			rts = ts;
 		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 sticky:
@@ -775,7 +775,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
 		rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
 						1000;
 		rts.tv_sec = timeout;
-		if (timespec_compare(&rts, &ts) < 0)
+		if (timespec_compare(&rts, &ts) >= 0)
 			rts = ts;
 		if (copy_to_user(tsp, &rts, sizeof(rts))) {
 		sticky:
-- 
cgit v1.2.2


From 636f13c174dd7c84a437d3c3e8fa66f03f7fda63 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Fri, 17 Feb 2006 13:59:36 -0800
Subject: [PATCH] sys_mbind sanity checking

Make sure maxnodes is safe size before calculating nlongs in
get_nodes().

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/mempolicy.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 323fdcf128c4..bedfa4f09c80 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -808,6 +808,8 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
 	nodes_clear(*nodes);
 	if (maxnode == 0 || !nmask)
 		return 0;
+	if (maxnode > PAGE_SIZE)
+		return -EINVAL;
 
 	nlongs = BITS_TO_LONGS(maxnode);
 	if ((maxnode % BITS_PER_LONG) == 0)
-- 
cgit v1.2.2


From 35b73ceb9a7d10c81bd9e79e8485f7079ef2b40e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Fri, 17 Feb 2006 13:59:50 -0800
Subject: [PATCH] ACPI: fix vendor resource length computation

acpi_rs_get_list_length() needs to account for all the vendor-defined data
bytes.  Failing to include these causes buffers to be sized too small,
which causes slab corruption when we later convert AML to resources and run
off the end of the buffer.

This causes slab corruption on machines that use ACPI vendor-defined
resources.  All HP ia64 machines do, and I'm told that some NEC machines
may as well.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/acpi/resources/rscalc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/resources/rscalc.c b/drivers/acpi/resources/rscalc.c
index 7d6481d9fbec..4038dbfa63a0 100644
--- a/drivers/acpi/resources/rscalc.c
+++ b/drivers/acpi/resources/rscalc.c
@@ -391,8 +391,7 @@ acpi_rs_get_list_length(u8 * aml_buffer,
 			 * Ensure a 32-bit boundary for the structure
 			 */
 			extra_struct_bytes =
-			    ACPI_ROUND_UP_to_32_bITS(resource_length) -
-			    resource_length;
+			    ACPI_ROUND_UP_to_32_bITS(resource_length);
 			break;
 
 		case ACPI_RESOURCE_NAME_END_TAG:
@@ -408,8 +407,7 @@ acpi_rs_get_list_length(u8 * aml_buffer,
 			 * Add vendor data and ensure a 32-bit boundary for the structure
 			 */
 			extra_struct_bytes =
-			    ACPI_ROUND_UP_to_32_bITS(resource_length) -
-			    resource_length;
+			    ACPI_ROUND_UP_to_32_bITS(resource_length);
 			break;
 
 		case ACPI_RESOURCE_NAME_ADDRESS32:
-- 
cgit v1.2.2


From bd71c2b17468a2531fb4c81ec1d73520845e97e1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 17 Feb 2006 14:23:45 -0800
Subject: Linux v2.6.16-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 48d569d46ae1..77a448c8e776 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 16
-EXTRAVERSION =-rc3
+EXTRAVERSION =-rc4
 NAME=Sliding Snow Leopard
 
 # *DOCUMENTATION*
-- 
cgit v1.2.2


From ee407b02f3f1992bc746876c26f8175c8783562b Mon Sep 17 00:00:00 2001
From: Ayaz Abdulla <aabdulla@nvidia.com>
Date: Sat, 4 Feb 2006 13:13:17 -0500
Subject: [PATCH] forcedeth: Add vlan support

This forcedeth patch adds support for vlan stripping/inserting in hardware.

Signed-off-By: Ayaz Abdulla <aabdulla@nvidia.com>

Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/net/forcedeth.c | 76 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 5 deletions(-)

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 3682ec61e8a8..0fbe342c2ac9 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -102,6 +102,7 @@
  *	0.47: 26 Oct 2005: Add phyaddr 0 in phy scan.
  *	0.48: 24 Dec 2005: Disable TSO, bugfix for pci_map_single
  *	0.49: 10 Dec 2005: Fix tso for large buffers.
+ *	0.50: 20 Jan 2006: Add 8021pq tagging support.
  *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
@@ -113,7 +114,7 @@
  * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
  * superfluous timer interrupts from the nic.
  */
-#define FORCEDETH_VERSION		"0.49"
+#define FORCEDETH_VERSION		"0.50"
 #define DRV_NAME			"forcedeth"
 
 #include <linux/module.h>
@@ -153,6 +154,7 @@
 #define DEV_HAS_LARGEDESC	0x0004	/* device supports jumbo frames and needs packet format 2 */
 #define DEV_HAS_HIGH_DMA        0x0008  /* device supports 64bit dma */
 #define DEV_HAS_CHECKSUM        0x0010  /* device supports tx and rx checksum offloads */
+#define DEV_HAS_VLAN            0x0020  /* device supports vlan tagging and striping */
 
 enum {
 	NvRegIrqStatus = 0x000,
@@ -254,6 +256,8 @@ enum {
 #define NVREG_TXRXCTL_DESC_1	0
 #define NVREG_TXRXCTL_DESC_2	0x02100
 #define NVREG_TXRXCTL_DESC_3	0x02200
+#define NVREG_TXRXCTL_VLANSTRIP 0x00040
+#define NVREG_TXRXCTL_VLANINS	0x00080
 	NvRegMIIStatus = 0x180,
 #define NVREG_MIISTAT_ERROR		0x0001
 #define NVREG_MIISTAT_LINKCHANGE	0x0008
@@ -303,6 +307,8 @@ enum {
 #define NVREG_POWERSTATE_D1		0x0001
 #define NVREG_POWERSTATE_D2		0x0002
 #define NVREG_POWERSTATE_D3		0x0003
+	NvRegVlanControl = 0x300,
+#define NVREG_VLANCONTROL_ENABLE	0x2000
 };
 
 /* Big endian: should work, but is untested */
@@ -314,7 +320,7 @@ struct ring_desc {
 struct ring_desc_ex {
 	u32 PacketBufferHigh;
 	u32 PacketBufferLow;
-	u32 Reserved;
+	u32 TxVlan;
 	u32 FlagLen;
 };
 
@@ -355,6 +361,8 @@ typedef union _ring_type {
 #define NV_TX2_CHECKSUM_L3	(1<<27)
 #define NV_TX2_CHECKSUM_L4	(1<<26)
 
+#define NV_TX3_VLAN_TAG_PRESENT (1<<18)
+
 #define NV_RX_DESCRIPTORVALID	(1<<16)
 #define NV_RX_MISSEDFRAME	(1<<17)
 #define NV_RX_SUBSTRACT1	(1<<18)
@@ -385,6 +393,9 @@ typedef union _ring_type {
 #define NV_RX2_ERROR		(1<<30)
 #define NV_RX2_AVAIL		(1<<31)
 
+#define NV_RX3_VLAN_TAG_PRESENT (1<<16)
+#define NV_RX3_VLAN_TAG_MASK	(0x0000FFFF)
+
 /* Miscelaneous hardware related defines: */
 #define NV_PCI_REGSZ		0x270
 
@@ -511,6 +522,7 @@ struct fe_priv {
 	u32 irqmask;
 	u32 desc_ver;
 	u32 txrxctl_bits;
+	u32 vlanctl_bits;
 
 	void __iomem *base;
 
@@ -540,6 +552,9 @@ struct fe_priv {
 	dma_addr_t tx_dma[TX_RING];
 	unsigned int tx_dma_len[TX_RING];
 	u32 tx_flags;
+
+	/* vlan fields */
+	struct vlan_group *vlangrp;
 };
 
 /*
@@ -1031,6 +1046,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	u32 bcnt;
 	u32 size = skb->len-skb->data_len;
 	u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
+	u32 tx_flags_vlan = 0;
 
 	/* add fragments to entries count */
 	for (i = 0; i < fragments; i++) {
@@ -1111,10 +1127,16 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 #endif
 	tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
 
+	/* vlan tag */
+	if (np->vlangrp && vlan_tx_tag_present(skb)) {
+		tx_flags_vlan = NV_TX3_VLAN_TAG_PRESENT | vlan_tx_tag_get(skb);
+	}
+
 	/* set tx flags */
 	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
 		np->tx_ring.orig[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra);
 	} else {
+		np->tx_ring.ex[start_nr].TxVlan = cpu_to_le32(tx_flags_vlan);
 		np->tx_ring.ex[start_nr].FlagLen |= cpu_to_le32(tx_flags | tx_flags_extra);
 	}	
 
@@ -1342,6 +1364,8 @@ static void nv_rx_process(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u32 Flags;
+	u32 vlanflags = 0;
+
 
 	for (;;) {
 		struct sk_buff *skb;
@@ -1357,6 +1381,7 @@ static void nv_rx_process(struct net_device *dev)
 		} else {
 			Flags = le32_to_cpu(np->rx_ring.ex[i].FlagLen);
 			len = nv_descr_getlength_ex(&np->rx_ring.ex[i], np->desc_ver);
+			vlanflags = le32_to_cpu(np->rx_ring.ex[i].PacketBufferLow);
 		}
 
 		dprintk(KERN_DEBUG "%s: nv_rx_process: looking at packet %d, Flags 0x%x.\n",
@@ -1474,7 +1499,11 @@ static void nv_rx_process(struct net_device *dev)
 		skb->protocol = eth_type_trans(skb, dev);
 		dprintk(KERN_DEBUG "%s: nv_rx_process: packet %d with %d bytes, proto %d accepted.\n",
 					dev->name, np->cur_rx, len, skb->protocol);
-		netif_rx(skb);
+		if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT)) {
+			vlan_hwaccel_rx(skb, np->vlangrp, vlanflags & NV_RX3_VLAN_TAG_MASK);
+		} else {
+			netif_rx(skb);
+		}
 		dev->last_rx = jiffies;
 		np->stats.rx_packets++;
 		np->stats.rx_bytes += len;
@@ -2217,6 +2246,34 @@ static struct ethtool_ops ops = {
 	.get_perm_addr = ethtool_op_get_perm_addr,
 };
 
+static void nv_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
+{
+	struct fe_priv *np = get_nvpriv(dev);
+
+	spin_lock_irq(&np->lock);
+
+	/* save vlan group */
+	np->vlangrp = grp;
+
+	if (grp) {
+		/* enable vlan on MAC */
+		np->txrxctl_bits |= NVREG_TXRXCTL_VLANSTRIP | NVREG_TXRXCTL_VLANINS;
+	} else {
+		/* disable vlan on MAC */
+		np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANSTRIP;
+		np->txrxctl_bits &= ~NVREG_TXRXCTL_VLANINS;
+	}
+
+	writel(np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
+
+	spin_unlock_irq(&np->lock);
+};
+
+static void nv_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+{
+	/* nothing to do */
+};
+
 static int nv_open(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
@@ -2265,6 +2322,7 @@ static int nv_open(struct net_device *dev)
 	writel(np->linkspeed, base + NvRegLinkSpeed);
 	writel(NVREG_UNKSETUP3_VAL1, base + NvRegUnknownSetupReg3);
 	writel(np->txrxctl_bits, base + NvRegTxRxControl);
+	writel(np->vlanctl_bits, base + NvRegVlanControl);
 	pci_push(base);
 	writel(NVREG_TXRXCTL_BIT1|np->txrxctl_bits, base + NvRegTxRxControl);
 	reg_delay(dev, NvRegUnknownSetupReg5, NVREG_UNKSETUP5_BIT31, NVREG_UNKSETUP5_BIT31,
@@ -2496,6 +2554,14 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 #endif
  	}
 
+	np->vlanctl_bits = 0;
+	if (id->driver_data & DEV_HAS_VLAN) {
+		np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE;
+		dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX;
+		dev->vlan_rx_register = nv_vlan_rx_register;
+		dev->vlan_rx_kill_vid = nv_vlan_rx_kill_vid;
+	}
+
 	err = -ENOMEM;
 	np->base = ioremap(addr, NV_PCI_REGSZ);
 	if (!np->base)
@@ -2737,11 +2803,11 @@ static struct pci_device_id pci_tbl[] = {
 	},
 	{	/* MCP55 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN,
 	},
 	{	/* MCP55 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN,
 	},
 	{0,},
 };
-- 
cgit v1.2.2


From 0832b25a75d128e4f9724156380ba071c4f3f20d Mon Sep 17 00:00:00 2001
From: Ayaz Abdulla <aabdulla@nvidia.com>
Date: Sat, 4 Feb 2006 13:13:26 -0500
Subject: [PATCH] forcedeth: Add support for 64bit rings

This forcedeth patch adds high dma support for tx/rx rings.

Signed-off-By: Ayaz Abdulla <aabdulla@nvidia.com>

Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/net/forcedeth.c | 58 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 0fbe342c2ac9..870613bf3fd6 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -103,6 +103,7 @@
  *	0.48: 24 Dec 2005: Disable TSO, bugfix for pci_map_single
  *	0.49: 10 Dec 2005: Fix tso for large buffers.
  *	0.50: 20 Jan 2006: Add 8021pq tagging support.
+ *	0.51: 20 Jan 2006: Add 64bit consistent memory allocation for rings.
  *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
@@ -114,7 +115,7 @@
  * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
  * superfluous timer interrupts from the nic.
  */
-#define FORCEDETH_VERSION		"0.50"
+#define FORCEDETH_VERSION		"0.51"
 #define DRV_NAME			"forcedeth"
 
 #include <linux/module.h>
@@ -258,6 +259,8 @@ enum {
 #define NVREG_TXRXCTL_DESC_3	0x02200
 #define NVREG_TXRXCTL_VLANSTRIP 0x00040
 #define NVREG_TXRXCTL_VLANINS	0x00080
+	NvRegTxRingPhysAddrHigh = 0x148,
+	NvRegRxRingPhysAddrHigh = 0x14C,
 	NvRegMIIStatus = 0x180,
 #define NVREG_MIISTAT_ERROR		0x0001
 #define NVREG_MIISTAT_LINKCHANGE	0x0008
@@ -627,6 +630,33 @@ static int reg_delay(struct net_device *dev, int offset, u32 mask, u32 target,
 	return 0;
 }
 
+#define NV_SETUP_RX_RING 0x01
+#define NV_SETUP_TX_RING 0x02
+
+static void setup_hw_rings(struct net_device *dev, int rxtx_flags)
+{
+	struct fe_priv *np = get_nvpriv(dev);
+	u8 __iomem *base = get_hwbase(dev);
+
+	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+		if (rxtx_flags & NV_SETUP_RX_RING) {
+			writel((u32) cpu_to_le64(np->ring_addr), base + NvRegRxRingPhysAddr);
+		}
+		if (rxtx_flags & NV_SETUP_TX_RING) {
+			writel((u32) cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
+		}
+	} else {
+		if (rxtx_flags & NV_SETUP_RX_RING) {
+			writel((u32) cpu_to_le64(np->ring_addr), base + NvRegRxRingPhysAddr);
+			writel((u32) (cpu_to_le64(np->ring_addr) >> 32), base + NvRegRxRingPhysAddrHigh);
+		}
+		if (rxtx_flags & NV_SETUP_TX_RING) {
+			writel((u32) cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
+			writel((u32) (cpu_to_le64(np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)) >> 32), base + NvRegTxRingPhysAddrHigh);
+		}
+	}
+}
+
 #define MII_READ	(-1)
 /* mii_rw: read/write a register on the PHY.
  *
@@ -1295,10 +1325,7 @@ static void nv_tx_timeout(struct net_device *dev)
 		printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name);
 		nv_drain_tx(dev);
 		np->next_tx = np->nic_tx = 0;
-		if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-			writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
-		else
-			writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
+		setup_hw_rings(dev, NV_SETUP_TX_RING);
 		netif_wake_queue(dev);
 	}
 
@@ -1573,11 +1600,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		}
 		/* reinit nic view of the rx queue */
 		writel(np->rx_buf_sz, base + NvRegOffloadConfig);
-		writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr);
-		if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-			writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
-		else
-			writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
+		setup_hw_rings(dev, NV_SETUP_RX_RING | NV_SETUP_TX_RING);
 		writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT),
 			base + NvRegRingSizes);
 		pci_push(base);
@@ -2310,11 +2333,7 @@ static int nv_open(struct net_device *dev)
 	nv_copy_mac_to_hw(dev);
 
 	/* 4) give hw rings */
-	writel((u32) np->ring_addr, base + NvRegRxRingPhysAddr);
-	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-		writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc)), base + NvRegTxRingPhysAddr);
-	else
-		writel((u32) (np->ring_addr + RX_RING*sizeof(struct ring_desc_ex)), base + NvRegTxRingPhysAddr);
+	setup_hw_rings(dev, NV_SETUP_RX_RING | NV_SETUP_TX_RING);
 	writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT),
 		base + NvRegRingSizes);
 
@@ -2529,7 +2548,14 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 			printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n",
 					pci_name(pci_dev));
 		} else {
-			dev->features |= NETIF_F_HIGHDMA;
+			if (pci_set_consistent_dma_mask(pci_dev, 0x0000007fffffffffULL)) {
+				printk(KERN_INFO "forcedeth: 64-bit DMA (consistent) failed for device %s.\n",
+					pci_name(pci_dev));
+				goto out_relreg;
+			} else {
+				dev->features |= NETIF_F_HIGHDMA;
+				printk(KERN_INFO "forcedeth: using HIGHDMA\n");
+			}
 		}
 		np->txrxctl_bits = NVREG_TXRXCTL_DESC_3;
 	} else if (id->driver_data & DEV_HAS_LARGEDESC) {
-- 
cgit v1.2.2


From d33a73c81241e3d9ab8da2d0558429bdd5b4ef9a Mon Sep 17 00:00:00 2001
From: Ayaz Abdulla <aabdulla@nvidia.com>
Date: Sat, 4 Feb 2006 13:13:31 -0500
Subject: [PATCH] forcedeth: Add support for MSI/MSIX

This forcedeth patch adds support for MSI/MSIX interrupts.

Signed-off-By: Ayaz Abdulla <aabdulla@nvidia.com>

Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/net/forcedeth.c | 467 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 436 insertions(+), 31 deletions(-)

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 870613bf3fd6..e7fc28b07e5a 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -104,6 +104,7 @@
  *	0.49: 10 Dec 2005: Fix tso for large buffers.
  *	0.50: 20 Jan 2006: Add 8021pq tagging support.
  *	0.51: 20 Jan 2006: Add 64bit consistent memory allocation for rings.
+ *	0.52: 20 Jan 2006: Add MSI/MSIX support.
  *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
@@ -115,7 +116,7 @@
  * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
  * superfluous timer interrupts from the nic.
  */
-#define FORCEDETH_VERSION		"0.51"
+#define FORCEDETH_VERSION		"0.52"
 #define DRV_NAME			"forcedeth"
 
 #include <linux/module.h>
@@ -156,6 +157,8 @@
 #define DEV_HAS_HIGH_DMA        0x0008  /* device supports 64bit dma */
 #define DEV_HAS_CHECKSUM        0x0010  /* device supports tx and rx checksum offloads */
 #define DEV_HAS_VLAN            0x0020  /* device supports vlan tagging and striping */
+#define DEV_HAS_MSI             0x0040  /* device supports MSI */
+#define DEV_HAS_MSI_X           0x0080  /* device supports MSI-X */
 
 enum {
 	NvRegIrqStatus = 0x000,
@@ -169,14 +172,17 @@ enum {
 #define NVREG_IRQ_TX_OK			0x0010
 #define NVREG_IRQ_TIMER			0x0020
 #define NVREG_IRQ_LINK			0x0040
-#define NVREG_IRQ_TX_ERROR		0x0080
-#define NVREG_IRQ_TX1			0x0100
+#define NVREG_IRQ_RX_FORCED		0x0080
+#define NVREG_IRQ_TX_FORCED		0x0100
 #define NVREG_IRQMASK_THROUGHPUT	0x00df
 #define NVREG_IRQMASK_CPU		0x0040
+#define NVREG_IRQ_TX_ALL		(NVREG_IRQ_TX_ERR|NVREG_IRQ_TX_OK|NVREG_IRQ_TX_FORCED)
+#define NVREG_IRQ_RX_ALL		(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_RX_FORCED)
+#define NVREG_IRQ_OTHER			(NVREG_IRQ_TIMER|NVREG_IRQ_LINK)
 
 #define NVREG_IRQ_UNKNOWN	(~(NVREG_IRQ_RX_ERROR|NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|NVREG_IRQ_TX_ERR| \
-					NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_TX_ERROR| \
-					NVREG_IRQ_TX1))
+					NVREG_IRQ_TX_OK|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|NVREG_IRQ_RX_FORCED| \
+					NVREG_IRQ_TX_FORCED))
 
 	NvRegUnknownSetupReg6 = 0x008,
 #define NVREG_UNKSETUP6_VAL		3
@@ -188,6 +194,10 @@ enum {
 	NvRegPollingInterval = 0x00c,
 #define NVREG_POLL_DEFAULT_THROUGHPUT	970
 #define NVREG_POLL_DEFAULT_CPU	13
+	NvRegMSIMap0 = 0x020,
+	NvRegMSIMap1 = 0x024,
+	NvRegMSIIrqMask = 0x030,
+#define NVREG_MSI_VECTOR_0_ENABLED 0x01
 	NvRegMisc1 = 0x080,
 #define NVREG_MISC1_HD		0x02
 #define NVREG_MISC1_FORCE	0x3b0f3c
@@ -312,6 +322,9 @@ enum {
 #define NVREG_POWERSTATE_D3		0x0003
 	NvRegVlanControl = 0x300,
 #define NVREG_VLANCONTROL_ENABLE	0x2000
+	NvRegMSIXMap0 = 0x3e0,
+	NvRegMSIXMap1 = 0x3e4,
+	NvRegMSIXIrqStatus = 0x3f0,
 };
 
 /* Big endian: should work, but is untested */
@@ -489,6 +502,18 @@ typedef union _ring_type {
 #define LPA_1000FULL	0x0800
 #define LPA_1000HALF	0x0400
 
+/* MSI/MSI-X defines */
+#define NV_MSI_X_MAX_VECTORS  8
+#define NV_MSI_X_VECTORS_MASK 0x000f
+#define NV_MSI_CAPABLE        0x0010
+#define NV_MSI_X_CAPABLE      0x0020
+#define NV_MSI_ENABLED        0x0040
+#define NV_MSI_X_ENABLED      0x0080
+
+#define NV_MSI_X_VECTOR_ALL   0x0
+#define NV_MSI_X_VECTOR_RX    0x0
+#define NV_MSI_X_VECTOR_TX    0x1
+#define NV_MSI_X_VECTOR_OTHER 0x2
 
 /*
  * SMP locking:
@@ -540,6 +565,7 @@ struct fe_priv {
 	unsigned int pkt_limit;
 	struct timer_list oom_kick;
 	struct timer_list nic_poll;
+	u32 nic_poll_irq;
 
 	/* media detection workaround.
 	 * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
@@ -558,6 +584,10 @@ struct fe_priv {
 
 	/* vlan fields */
 	struct vlan_group *vlangrp;
+
+	/* msi/msi-x fields */
+	u32 msi_flags;
+	struct msix_entry msi_x_entry[NV_MSI_X_MAX_VECTORS];
 };
 
 /*
@@ -585,6 +615,16 @@ static int optimization_mode = NV_OPTIMIZATION_MODE_THROUGHPUT;
  */
 static int poll_interval = -1;
 
+/*
+ * Disable MSI interrupts
+ */
+static int disable_msi = 0;
+
+/*
+ * Disable MSIX interrupts
+ */
+static int disable_msix = 0;
+
 static inline struct fe_priv *get_nvpriv(struct net_device *dev)
 {
 	return netdev_priv(dev);
@@ -948,14 +988,27 @@ static void nv_do_rx_refill(unsigned long data)
 	struct net_device *dev = (struct net_device *) data;
 	struct fe_priv *np = netdev_priv(dev);
 
-	disable_irq(dev->irq);
+
+	if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
+	    ((np->msi_flags & NV_MSI_X_ENABLED) && 
+	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
+		disable_irq(dev->irq);
+	} else {
+		disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+	}
 	if (nv_alloc_rx(dev)) {
 		spin_lock(&np->lock);
 		if (!np->in_shutdown)
 			mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
 		spin_unlock(&np->lock);
 	}
-	enable_irq(dev->irq);
+	if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
+	    ((np->msi_flags & NV_MSI_X_ENABLED) && 
+	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
+		enable_irq(dev->irq);
+	} else {
+		enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+	}
 }
 
 static void nv_init_rx(struct net_device *dev) 
@@ -1010,7 +1063,7 @@ static int nv_release_txskb(struct net_device *dev, unsigned int skbnr)
 	}
 
 	if (np->tx_skbuff[skbnr]) {
-		dev_kfree_skb_irq(np->tx_skbuff[skbnr]);
+		dev_kfree_skb_any(np->tx_skbuff[skbnr]);
 		np->tx_skbuff[skbnr] = NULL;
 		return 1;
 	} else {
@@ -1261,9 +1314,14 @@ static void nv_tx_timeout(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
+	u32 status;
+
+	if (np->msi_flags & NV_MSI_X_ENABLED)
+		status = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK;
+	else
+		status = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK;
 
-	printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name,
-			readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK);
+	printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name, status);
 
 	{
 		int i;
@@ -1579,7 +1637,15 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		 * guessed, there is probably a simpler approach.
 		 * Changing the MTU is a rare event, it shouldn't matter.
 		 */
-		disable_irq(dev->irq);
+		if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
+		    ((np->msi_flags & NV_MSI_X_ENABLED) && 
+		     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
+			disable_irq(dev->irq);
+		} else {
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+		}
 		spin_lock_bh(&dev->xmit_lock);
 		spin_lock(&np->lock);
 		/* stop engines */
@@ -1612,7 +1678,15 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		nv_start_tx(dev);
 		spin_unlock(&np->lock);
 		spin_unlock_bh(&dev->xmit_lock);
-		enable_irq(dev->irq);
+		if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
+		    ((np->msi_flags & NV_MSI_X_ENABLED) && 
+		     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
+			enable_irq(dev->irq);
+		} else {
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+		}
 	}
 	return 0;
 }
@@ -1918,8 +1992,13 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
 	dprintk(KERN_DEBUG "%s: nv_nic_irq\n", dev->name);
 
 	for (i=0; ; i++) {
-		events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK;
-		writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
+		if (!(np->msi_flags & NV_MSI_X_ENABLED)) {
+			events = readl(base + NvRegIrqStatus) & NVREG_IRQSTAT_MASK;
+			writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
+		} else {
+			events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQSTAT_MASK;
+			writel(NVREG_IRQSTAT_MASK, base + NvRegMSIXIrqStatus);
+		}
 		pci_push(base);
 		dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events);
 		if (!(events & np->irqmask))
@@ -1959,11 +2038,16 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
 		if (i > max_interrupt_work) {
 			spin_lock(&np->lock);
 			/* disable interrupts on the nic */
-			writel(0, base + NvRegIrqMask);
+			if (!(np->msi_flags & NV_MSI_X_ENABLED))
+				writel(0, base + NvRegIrqMask);
+			else
+				writel(np->irqmask, base + NvRegIrqMask);
 			pci_push(base);
 
-			if (!np->in_shutdown)
+			if (!np->in_shutdown) {
+				np->nic_poll_irq = np->irqmask;
 				mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
+			}
 			printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq.\n", dev->name, i);
 			spin_unlock(&np->lock);
 			break;
@@ -1975,22 +2059,212 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
 	return IRQ_RETVAL(i);
 }
 
+static irqreturn_t nv_nic_irq_tx(int foo, void *data, struct pt_regs *regs)
+{
+	struct net_device *dev = (struct net_device *) data;
+	struct fe_priv *np = netdev_priv(dev);
+	u8 __iomem *base = get_hwbase(dev);
+	u32 events;
+	int i;
+
+	dprintk(KERN_DEBUG "%s: nv_nic_irq_tx\n", dev->name);
+
+	for (i=0; ; i++) {
+		events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_TX_ALL;
+		writel(NVREG_IRQ_TX_ALL, base + NvRegMSIXIrqStatus);
+		pci_push(base);
+		dprintk(KERN_DEBUG "%s: tx irq: %08x\n", dev->name, events);
+		if (!(events & np->irqmask))
+			break;
+
+		spin_lock(&np->lock);
+		nv_tx_done(dev);
+		spin_unlock(&np->lock);
+		
+		if (events & (NVREG_IRQ_TX_ERR)) {
+			dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n",
+						dev->name, events);
+		}
+		if (i > max_interrupt_work) {
+			spin_lock(&np->lock);
+			/* disable interrupts on the nic */
+			writel(NVREG_IRQ_TX_ALL, base + NvRegIrqMask);
+			pci_push(base);
+
+			if (!np->in_shutdown) {
+				np->nic_poll_irq |= NVREG_IRQ_TX_ALL;
+				mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
+			}
+			printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_tx.\n", dev->name, i);
+			spin_unlock(&np->lock);
+			break;
+		}
+
+	}
+	dprintk(KERN_DEBUG "%s: nv_nic_irq_tx completed\n", dev->name);
+
+	return IRQ_RETVAL(i);
+}
+
+static irqreturn_t nv_nic_irq_rx(int foo, void *data, struct pt_regs *regs)
+{
+	struct net_device *dev = (struct net_device *) data;
+	struct fe_priv *np = netdev_priv(dev);
+	u8 __iomem *base = get_hwbase(dev);
+	u32 events;
+	int i;
+
+	dprintk(KERN_DEBUG "%s: nv_nic_irq_rx\n", dev->name);
+
+	for (i=0; ; i++) {
+		events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_RX_ALL;
+		writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus);
+		pci_push(base);
+		dprintk(KERN_DEBUG "%s: rx irq: %08x\n", dev->name, events);
+		if (!(events & np->irqmask))
+			break;
+		
+		nv_rx_process(dev);
+		if (nv_alloc_rx(dev)) {
+			spin_lock(&np->lock);
+			if (!np->in_shutdown)
+				mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
+			spin_unlock(&np->lock);
+		}
+		
+		if (i > max_interrupt_work) {
+			spin_lock(&np->lock);
+			/* disable interrupts on the nic */
+			writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask);
+			pci_push(base);
+
+			if (!np->in_shutdown) {
+				np->nic_poll_irq |= NVREG_IRQ_RX_ALL;
+				mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
+			}
+			printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_rx.\n", dev->name, i);
+			spin_unlock(&np->lock);
+			break;
+		}
+
+	}
+	dprintk(KERN_DEBUG "%s: nv_nic_irq_rx completed\n", dev->name);
+
+	return IRQ_RETVAL(i);
+}
+
+static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs)
+{
+	struct net_device *dev = (struct net_device *) data;
+	struct fe_priv *np = netdev_priv(dev);
+	u8 __iomem *base = get_hwbase(dev);
+	u32 events;
+	int i;
+
+	dprintk(KERN_DEBUG "%s: nv_nic_irq_other\n", dev->name);
+
+	for (i=0; ; i++) {
+		events = readl(base + NvRegMSIXIrqStatus) & NVREG_IRQ_OTHER;
+		writel(NVREG_IRQ_OTHER, base + NvRegMSIXIrqStatus);
+		pci_push(base);
+		dprintk(KERN_DEBUG "%s: irq: %08x\n", dev->name, events);
+		if (!(events & np->irqmask))
+			break;
+		
+		if (events & NVREG_IRQ_LINK) {
+			spin_lock(&np->lock);
+			nv_link_irq(dev);
+			spin_unlock(&np->lock);
+		}
+		if (np->need_linktimer && time_after(jiffies, np->link_timeout)) {
+			spin_lock(&np->lock);
+			nv_linkchange(dev);
+			spin_unlock(&np->lock);
+			np->link_timeout = jiffies + LINK_TIMEOUT;
+		}
+		if (events & (NVREG_IRQ_UNKNOWN)) {
+			printk(KERN_DEBUG "%s: received irq with unknown events 0x%x. Please report\n",
+						dev->name, events);
+		}
+		if (i > max_interrupt_work) {
+			spin_lock(&np->lock);
+			/* disable interrupts on the nic */
+			writel(NVREG_IRQ_OTHER, base + NvRegIrqMask);
+			pci_push(base);
+
+			if (!np->in_shutdown) {
+				np->nic_poll_irq |= NVREG_IRQ_OTHER;
+				mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
+			}
+			printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_other.\n", dev->name, i);
+			spin_unlock(&np->lock);
+			break;
+		}
+
+	}
+	dprintk(KERN_DEBUG "%s: nv_nic_irq_other completed\n", dev->name);
+
+	return IRQ_RETVAL(i);
+}
+
 static void nv_do_nic_poll(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *) data;
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
+	u32 mask = 0;
 
-	disable_irq(dev->irq);
-	/* FIXME: Do we need synchronize_irq(dev->irq) here? */
 	/*
+	 * First disable irq(s) and then
 	 * reenable interrupts on the nic, we have to do this before calling
 	 * nv_nic_irq because that may decide to do otherwise
 	 */
-	writel(np->irqmask, base + NvRegIrqMask);
+
+	if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
+	    ((np->msi_flags & NV_MSI_X_ENABLED) && 
+	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
+		disable_irq(dev->irq);
+		mask = np->irqmask;
+	} else {
+		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+			mask |= NVREG_IRQ_RX_ALL;
+		}
+		if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+			mask |= NVREG_IRQ_TX_ALL;
+		}
+		if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+			mask |= NVREG_IRQ_OTHER;
+		}
+	}
+	np->nic_poll_irq = 0;
+
+	/* FIXME: Do we need synchronize_irq(dev->irq) here? */
+	
+	writel(mask, base + NvRegIrqMask);
 	pci_push(base);
-	nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL);
-	enable_irq(dev->irq);
+
+	if (!(np->msi_flags & NV_MSI_X_ENABLED) || 
+	    ((np->msi_flags & NV_MSI_X_ENABLED) && 
+	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
+		nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL);
+		enable_irq(dev->irq);
+	} else {
+		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
+			nv_nic_irq_rx((int) 0, (void *) data, (struct pt_regs *) NULL);
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+		}
+		if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
+			nv_nic_irq_tx((int) 0, (void *) data, (struct pt_regs *) NULL);
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+		}
+		if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
+			nv_nic_irq_other((int) 0, (void *) data, (struct pt_regs *) NULL);
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+		}
+	}
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2297,11 +2571,38 @@ static void nv_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	/* nothing to do */
 };
 
+static void set_msix_vector_map(struct net_device *dev, u32 vector, u32 irqmask)
+{
+	u8 __iomem *base = get_hwbase(dev);
+	int i;
+	u32 msixmap = 0;
+
+	/* Each interrupt bit can be mapped to a MSIX vector (4 bits).
+	 * MSIXMap0 represents the first 8 interrupts and MSIXMap1 represents
+	 * the remaining 8 interrupts.
+	 */
+	for (i = 0; i < 8; i++) {
+		if ((irqmask >> i) & 0x1) {
+			msixmap |= vector << (i << 2);
+		}
+	}
+	writel(readl(base + NvRegMSIXMap0) | msixmap, base + NvRegMSIXMap0);
+
+	msixmap = 0;
+	for (i = 0; i < 8; i++) {
+		if ((irqmask >> (i + 8)) & 0x1) {
+			msixmap |= vector << (i << 2);
+		}
+	}
+	writel(readl(base + NvRegMSIXMap1) | msixmap, base + NvRegMSIXMap1);
+}
+
 static int nv_open(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
-	int ret, oom, i;
+	int ret = 1;
+	int oom, i;
 
 	dprintk(KERN_DEBUG "nv_open: begin\n");
 
@@ -2392,9 +2693,77 @@ static int nv_open(struct net_device *dev)
 	writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
 	pci_push(base);
 
-	ret = request_irq(dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev);
-	if (ret)
-		goto out_drain;
+	if (np->msi_flags & NV_MSI_X_CAPABLE) {
+		for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) {
+			np->msi_x_entry[i].entry = i;
+		}
+		if ((ret = pci_enable_msix(np->pci_dev, np->msi_x_entry, (np->msi_flags & NV_MSI_X_VECTORS_MASK))) == 0) {
+			np->msi_flags |= NV_MSI_X_ENABLED;
+			if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) {
+				/* Request irq for rx handling */
+				if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector, &nv_nic_irq_rx, SA_SHIRQ, dev->name, dev) != 0) {
+					printk(KERN_INFO "forcedeth: request_irq failed for rx %d\n", ret);
+					pci_disable_msix(np->pci_dev);
+					np->msi_flags &= ~NV_MSI_X_ENABLED;
+					goto out_drain;
+				}
+				/* Request irq for tx handling */
+				if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector, &nv_nic_irq_tx, SA_SHIRQ, dev->name, dev) != 0) {
+					printk(KERN_INFO "forcedeth: request_irq failed for tx %d\n", ret);
+					pci_disable_msix(np->pci_dev);
+					np->msi_flags &= ~NV_MSI_X_ENABLED;
+					goto out_drain;
+				}
+				/* Request irq for link and timer handling */
+				if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector, &nv_nic_irq_other, SA_SHIRQ, dev->name, dev) != 0) {
+					printk(KERN_INFO "forcedeth: request_irq failed for link %d\n", ret);
+					pci_disable_msix(np->pci_dev);
+					np->msi_flags &= ~NV_MSI_X_ENABLED;
+					goto out_drain;
+				}
+
+				/* map interrupts to their respective vector */
+				writel(0, base + NvRegMSIXMap0);
+				writel(0, base + NvRegMSIXMap1);
+				set_msix_vector_map(dev, NV_MSI_X_VECTOR_RX, NVREG_IRQ_RX_ALL);
+				set_msix_vector_map(dev, NV_MSI_X_VECTOR_TX, NVREG_IRQ_TX_ALL);
+				set_msix_vector_map(dev, NV_MSI_X_VECTOR_OTHER, NVREG_IRQ_OTHER);
+			} else {
+				/* Request irq for all interrupts */
+				if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) {
+					printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
+					pci_disable_msix(np->pci_dev);
+					np->msi_flags &= ~NV_MSI_X_ENABLED;
+					goto out_drain;
+				}
+
+				/* map interrupts to vector 0 */
+				writel(0, base + NvRegMSIXMap0);
+				writel(0, base + NvRegMSIXMap1);
+			}
+		}
+	}
+	if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) {
+		if ((ret = pci_enable_msi(np->pci_dev)) == 0) {
+			np->msi_flags |= NV_MSI_ENABLED;
+			if (request_irq(np->pci_dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) {
+				printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
+				pci_disable_msi(np->pci_dev);
+				np->msi_flags &= ~NV_MSI_ENABLED;
+				goto out_drain;
+			}
+
+			/* map interrupts to vector 0 */
+			writel(0, base + NvRegMSIMap0);
+			writel(0, base + NvRegMSIMap1);
+			/* enable msi vector 0 */
+			writel(NVREG_MSI_VECTOR_0_ENABLED, base + NvRegMSIIrqMask);
+		}
+	}
+	if (ret != 0) {
+		if (request_irq(np->pci_dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0)
+			goto out_drain;
+	}
 
 	/* ask for interrupts */
 	writel(np->irqmask, base + NvRegIrqMask);
@@ -2441,6 +2810,7 @@ static int nv_close(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base;
+	int i;
 
 	spin_lock_irq(&np->lock);
 	np->in_shutdown = 1;
@@ -2458,13 +2828,31 @@ static int nv_close(struct net_device *dev)
 
 	/* disable interrupts on the nic or we will lock up */
 	base = get_hwbase(dev);
-	writel(0, base + NvRegIrqMask);
+	if (np->msi_flags & NV_MSI_X_ENABLED) {
+		writel(np->irqmask, base + NvRegIrqMask);
+	} else {
+		if (np->msi_flags & NV_MSI_ENABLED)
+			writel(0, base + NvRegMSIIrqMask);
+		writel(0, base + NvRegIrqMask);
+	}
 	pci_push(base);
 	dprintk(KERN_INFO "%s: Irqmask is zero again\n", dev->name);
 
 	spin_unlock_irq(&np->lock);
 
-	free_irq(dev->irq, dev);
+	if (np->msi_flags & NV_MSI_X_ENABLED) {
+		for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) {
+			free_irq(np->msi_x_entry[i].vector, dev);
+		}
+		pci_disable_msix(np->pci_dev);
+		np->msi_flags &= ~NV_MSI_X_ENABLED;
+	} else {
+		free_irq(np->pci_dev->irq, dev);
+		if (np->msi_flags & NV_MSI_ENABLED) {
+			pci_disable_msi(np->pci_dev);
+			np->msi_flags &= ~NV_MSI_ENABLED;
+		}
+	}
 
 	drain_ring(dev);
 
@@ -2588,6 +2976,14 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 		dev->vlan_rx_kill_vid = nv_vlan_rx_kill_vid;
 	}
 
+	np->msi_flags = 0;
+	if ((id->driver_data & DEV_HAS_MSI) && !disable_msi) {
+		np->msi_flags |= NV_MSI_CAPABLE;
+	}
+	if ((id->driver_data & DEV_HAS_MSI_X) && !disable_msix) {
+		np->msi_flags |= NV_MSI_X_CAPABLE;
+	}
+
 	err = -ENOMEM;
 	np->base = ioremap(addr, NV_PCI_REGSZ);
 	if (!np->base)
@@ -2670,10 +3066,15 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	} else {
 		np->tx_flags = NV_TX2_VALID;
 	}
-	if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT)
+	if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) {
 		np->irqmask = NVREG_IRQMASK_THROUGHPUT;
-	else
+		if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */
+			np->msi_flags |= 0x0003;
+	} else {
 		np->irqmask = NVREG_IRQMASK_CPU;
+		if (np->msi_flags & NV_MSI_X_CAPABLE) /* set number of vectors */
+			np->msi_flags |= 0x0001;
+	}
 
 	if (id->driver_data & DEV_NEED_TIMERIRQ)
 		np->irqmask |= NVREG_IRQ_TIMER;
@@ -2829,11 +3230,11 @@ static struct pci_device_id pci_tbl[] = {
 	},
 	{	/* MCP55 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X,
 	},
 	{	/* MCP55 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA|DEV_HAS_VLAN|DEV_HAS_MSI|DEV_HAS_MSI_X,
 	},
 	{0,},
 };
@@ -2863,6 +3264,10 @@ module_param(optimization_mode, int, 0);
 MODULE_PARM_DESC(optimization_mode, "In throughput mode (0), every tx & rx packet will generate an interrupt. In CPU mode (1), interrupts are controlled by a timer.");
 module_param(poll_interval, int, 0);
 MODULE_PARM_DESC(poll_interval, "Interval determines how frequent timer interrupt is generated by [(time_in_micro_secs * 100) / (2^10)]. Min is 0 and Max is 65535.");
+module_param(disable_msi, int, 0);
+MODULE_PARM_DESC(disable_msi, "Disable MSI interrupts by setting to 1.");
+module_param(disable_msix, int, 0);
+MODULE_PARM_DESC(disable_msix, "Disable MSIX interrupts by setting to 1.");
 
 MODULE_AUTHOR("Manfred Spraul <manfred@colorfullife.com>");
 MODULE_DESCRIPTION("Reverse Engineered nForce ethernet driver");
-- 
cgit v1.2.2