From 859361a228258edf4821d9f5635825033eca78e8 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 2 Aug 2012 14:05:59 -0600
Subject: NVMe: Free cmdid on nvme_submit_bio error

nvme_map_bio() is called after the cmdid is allocated, so we have to
free the cmdid before returning from nvme_submit_bio() if nvme_map_bio()
returned an error.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 931769e133e5..954a61018dc2 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -237,7 +237,8 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid,
 		*fn = special_completion;
 		return CMD_CTX_INVALID;
 	}
-	*fn = info[cmdid].fn;
+	if (fn)
+		*fn = info[cmdid].fn;
 	ctx = info[cmdid].ctx;
 	info[cmdid].fn = special_completion;
 	info[cmdid].ctx = CMD_CTX_COMPLETED;
@@ -589,7 +590,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 
 	result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs);
 	if (result < 0)
-		goto free_iod;
+		goto free_cmdid;
 	length = result;
 
 	cmnd->rw.command_id = cmdid;
@@ -609,6 +610,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 
 	return 0;
 
+ free_cmdid:
+	free_cmdid(nvmeq, cmdid, NULL);
  free_iod:
 	nvme_free_iod(nvmeq->dev, iod);
  nomem:
-- 
cgit v1.2.2


From 3295874b6074d749516d6decd43afad7bf6e38ff Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 20 Aug 2012 14:57:49 -0600
Subject: NVMe: End queued bio requests when freeing queue

If the queue has bios queued on it when it is freed, bio_endio() must be
called for them first.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 954a61018dc2..af88635e44e4 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -909,6 +909,10 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
 
 	spin_lock_irq(&nvmeq->q_lock);
 	nvme_cancel_ios(nvmeq, false);
+	while (bio_list_peek(&nvmeq->sq_cong)) {
+		struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
+		bio_endio(bio, -EIO);
+	}
 	spin_unlock_irq(&nvmeq->q_lock);
 
 	irq_set_affinity_hint(vector, NULL);
-- 
cgit v1.2.2


From f4f117f64baf8840d22266d518227b2a186d294b Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 21 Sep 2012 10:49:05 -0600
Subject: NVMe: Set result from user admin command

The ioctl data structure includes space for the 'result' of the admin
command to be returned; it just wasn't filled in.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index af88635e44e4..47c860454289 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -1237,12 +1237,17 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev,
 	if (length != cmd.data_len)
 		status = -ENOMEM;
 	else
-		status = nvme_submit_admin_cmd(dev, &c, NULL);
+		status = nvme_submit_admin_cmd(dev, &c, &cmd.result);
 
 	if (cmd.data_len) {
 		nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
 		nvme_free_iod(dev, iod);
 	}
+
+	if (!status && copy_to_user(&ucmd->result, &cmd.result,
+							sizeof(cmd.result)))
+		status = -EFAULT;
+
 	return status;
 }
 
-- 
cgit v1.2.2


From 08df1e05657fc6712e520e7c09cc6c86160ceb35 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 21 Sep 2012 10:52:13 -0600
Subject: NVMe: Add result to nvme_get_features

nvme_get_features() was not returning the result.  Add a parameter
to return the result in (similar to nvme_set_features()) and change
all callers.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 47c860454289..c1d5444f0cb3 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -838,8 +838,8 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
 	return nvme_submit_admin_cmd(dev, &c, NULL);
 }
 
-static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
-				unsigned nsid, dma_addr_t dma_addr)
+static int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+					dma_addr_t dma_addr, u32 *result)
 {
 	struct nvme_command c;
 
@@ -849,7 +849,7 @@ static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
 	c.features.prp1 = cpu_to_le64(dma_addr);
 	c.features.fid = cpu_to_le32(fid);
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	return nvme_submit_admin_cmd(dev, &c, result);
 }
 
 static int nvme_set_features(struct nvme_dev *dev, unsigned fid,
@@ -1535,7 +1535,7 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev)
 			continue;
 
 		res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i,
-							dma_addr + 4096);
+							dma_addr + 4096, NULL);
 		if (res)
 			continue;
 
-- 
cgit v1.2.2


From 6ecec74520d8a357726e6c12f99080dbe7b347dd Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 26 Sep 2012 12:49:27 -0600
Subject: NVMe: Define SMART log

This data structure is defined in the NVMe specification.  It's not used
by the kernel, but is available for use by userspace software.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c |  1 +
 include/linux/nvme.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index c1d5444f0cb3..270805cf8d42 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -135,6 +135,7 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
 	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
 	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
 }
 
 typedef void (*nvme_completion_fn)(struct nvme_dev *, void *,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c25cccaa555a..4fa3b0b9b071 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -137,6 +137,34 @@ enum {
 	NVME_LBAF_RP_DEGRADED	= 3,
 };
 
+struct nvme_smart_log {
+	__u8			critical_warning;
+	__u8			temperature[2];
+	__u8			avail_spare;
+	__u8			spare_thresh;
+	__u8			percent_used;
+	__u8			rsvd6[26];
+	__u8			data_units_read[16];
+	__u8			data_units_written[16];
+	__u8			host_reads[16];
+	__u8			host_writes[16];
+	__u8			ctrl_busy_time[16];
+	__u8			power_cycles[16];
+	__u8			power_on_hours[16];
+	__u8			unsafe_shutdowns[16];
+	__u8			media_errors[16];
+	__u8			num_err_log_entries[16];
+	__u8			rsvd192[320];
+};
+
+enum {
+	NVME_SMART_CRIT_SPARE		= 1 << 0,
+	NVME_SMART_CRIT_TEMPERATURE	= 1 << 1,
+	NVME_SMART_CRIT_RELIABILITY	= 1 << 2,
+	NVME_SMART_CRIT_MEDIA		= 1 << 3,
+	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
+};
+
 struct nvme_lba_range_type {
 	__u8			type;
 	__u8			attributes;
-- 
cgit v1.2.2


From 2b1960341576bf51c01b12fefeb1cc53820923e7 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 6 Nov 2012 11:59:23 -0700
Subject: NVMe: Initialize iod nents to 0

For commands that do not map a scatter list, we need to initilaize the iod's
number of sg entries (nents) to 0 and not unmap in this case.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 270805cf8d42..993c014d195a 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -337,6 +337,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
 		iod->offset = offsetof(struct nvme_iod, sg[nseg]);
 		iod->npages = -1;
 		iod->length = nbytes;
+		iod->nents = 0;
 	}
 
 	return iod;
@@ -377,7 +378,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
 	struct bio *bio = iod->private;
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
 
-	dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
+	if (iod->nents)
+		dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
 			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 	nvme_free_iod(dev, iod);
 	if (status) {
-- 
cgit v1.2.2


From 79461681692a337442c6a5cf44deba120a57186a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Sat, 10 Nov 2012 08:54:53 -0500
Subject: MAINTAINERS: Add entry for the NVMe driver

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 59203e77ce9e..d0bf2252e781 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5160,6 +5160,14 @@ S:	Maintained
 F:	drivers/video/riva/
 F:	drivers/video/nvidia/
 
+NVM EXPRESS DRIVER
+M:	Matthew Wilcox <willy@linux.intel.com>
+L:	linux-nvme@lists.infradead.org
+T:	git git://git.infradead.org/users/willy/linux-nvme.git
+S:	Supported
+F:	drivers/block/nvme.c
+F:	include/linux/nvme.h
+
 OMAP SUPPORT
 M:	Tony Lindgren <tony@atomide.com>
 L:	linux-omap@vger.kernel.org
-- 
cgit v1.2.2


From 3f63c340a72f2872a9362245cb2e03f3d2bb73a6 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Tue, 19 Feb 2013 11:54:16 -0500
Subject: Bluetooth: Add support for atheros 04ca:3004 device to ath3k

Yet another version of the atheros bluetooth chipset

T:  Bus=01 Lev=02 Prnt=02 Port=03 Cnt=01 Dev#=  3 Spd=12  MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=04ca ProdID=3004 Rev=00.01
S:  Manufacturer=Atheros Communications
S:  Product=Bluetooth USB Host Controller
S:  SerialNumber=Alaska Day 2006
C:  #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I:  If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
I:  If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb

This resolves https://bugzilla.redhat.com/show_bug.cgi?id=844750

Reported-by: niktr@mail.ru
Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 33c9a44a9678..b9908dd84529 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -76,6 +76,7 @@ static struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x0CF3, 0x3004) },
 	{ USB_DEVICE(0x0CF3, 0x311D) },
 	{ USB_DEVICE(0x13d3, 0x3375) },
+	{ USB_DEVICE(0x04CA, 0x3004) },
 	{ USB_DEVICE(0x04CA, 0x3005) },
 	{ USB_DEVICE(0x04CA, 0x3006) },
 	{ USB_DEVICE(0x04CA, 0x3008) },
@@ -108,6 +109,7 @@ static struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3006), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 7e351e345476..59cde8e882ff 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -134,6 +134,7 @@ static struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3006), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
-- 
cgit v1.2.2


From 734907e82d21a75a514b80164185427a832a00c0 Mon Sep 17 00:00:00 2001
From: Rich Lane <rlane@bigswitch.com>
Date: Fri, 8 Feb 2013 09:30:23 -0800
Subject: openvswitch: Fix ovs_vport_cmd_del return value on success

If the pointer does not represent an error then the PTR_ERR macro may still
return a nonzero value. The fix is the same as in ovs_vport_cmd_set.

Signed-off-by: Rich Lane <rlane@bigswitch.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f996db343247..5e275b903f3c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1772,6 +1772,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(reply))
 		goto exit_unlock;
 
+	err = 0;
 	ovs_dp_detach_port(vport);
 
 	genl_notify(reply, genl_info_net(info), info->snd_portid,
-- 
cgit v1.2.2


From cb7c5bdffb727a3d4dea5247d9d1d52238b01d90 Mon Sep 17 00:00:00 2001
From: Rich Lane <rlane@bigswitch.com>
Date: Fri, 8 Feb 2013 13:18:01 -0800
Subject: openvswitch: Fix ovs_vport_cmd_new return value on success

If the pointer does not represent an error then the PTR_ERR
macro may still return a nonzero value.

Signed-off-by: Rich Lane <rlane@bigswitch.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 5e275b903f3c..a2cd3e6d03a2 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1691,6 +1691,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(vport))
 		goto exit_unlock;
 
+	err = 0;
 	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
 					 OVS_VPORT_CMD_NEW);
 	if (IS_ERR(reply)) {
-- 
cgit v1.2.2


From a15ff76c955d17cf58313097e4a24124da022b1d Mon Sep 17 00:00:00 2001
From: Rich Lane <rlane@bigswitch.com>
Date: Fri, 15 Feb 2013 11:07:43 -0800
Subject: openvswitch: Call genlmsg_end in queue_userspace_packet

Without genlmsg_end the upcall message ends (according to nlmsg_len)
after the struct ovs_header.

Signed-off-by: Rich Lane <rlane@bigswitch.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a2cd3e6d03a2..cae1062f94ba 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -395,6 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 
 	skb_copy_and_csum_dev(skb, nla_data(nla));
 
+	genlmsg_end(user_skb, upcall);
 	err = genlmsg_unicast(net, user_skb, upcall_info->portid);
 
 out:
-- 
cgit v1.2.2


From 17b682a04841233f827073b327c6533e478dfcd4 Mon Sep 17 00:00:00 2001
From: Rich Lane <rlane@bigswitch.com>
Date: Tue, 19 Feb 2013 11:10:30 -0800
Subject: openvswitch: Fix parsing invalid LLC/SNAP ethertypes

Before this patch, if an LLC/SNAP packet with OUI 00:00:00 had an
ethertype less than 1536 the flow key given to userspace in the upcall
would contain the invalid ethertype (for example, 3). If userspace
attempted to insert a kernel flow for this key it would be rejected
by ovs_flow_from_nlattrs.

This patch allows OVS to pass the OFTest pktact.DirectBadLlcPackets.

Signed-off-by: Rich Lane <rlane@bigswitch.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/flow.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c3294cebc4f2..0c98d406124b 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -484,7 +484,11 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 		return htons(ETH_P_802_2);
 
 	__skb_pull(skb, sizeof(struct llc_snap_hdr));
-	return llc->ethertype;
+
+	if (ntohs(llc->ethertype) >= 1536)
+		return llc->ethertype;
+
+	return htons(ETH_P_802_2);
 }
 
 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
-- 
cgit v1.2.2


From 7b024082b2b279af58e24ebd46e81777723d58da Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Fri, 22 Feb 2013 17:32:26 +0800
Subject: openvswitch: fix the calculation of checksum for vlan header

In vlan_insert_tag(), we insert a 4-byte VLAN header _after_
mac header:

        memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
        ...
        veth->h_vlan_proto = htons(ETH_P_8021Q);
        ...
        veth->h_vlan_TCI = htons(vlan_tci);

so after it, we should recompute the checksum to include these 4 bytes.
skb->data still points to the mac header, therefore VLAN header is at
(2 * ETH_ALEN = 12) bytes after it, not (ETH_HLEN = 14) bytes.

This can also be observed via tcpdump:

         0x0000:  ffff ffff ffff 5254 005d 6f6e 8100 000a
         0x0010:  0806 0001 0800 0604 0001 5254 005d 6f6e
         0x0020:  c0a8 026e 0000 0000 0000 c0a8 0282

Similar for __pop_vlan_tci(), the vlan header we remove is the one
overwritten in:

	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);

Therefore the VLAN_HLEN = 4 bytes after 2 * ETH_ALEN is the part
we want to sub from checksum.

Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Gross <jesse@nicira.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/actions.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ac2defeeba83..d4d5363c7ba7 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -58,7 +58,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_sub(skb->csum, csum_partial(skb->data
-					+ ETH_HLEN, VLAN_HLEN, 0));
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
 
 	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
 	*current_tci = vhdr->h_vlan_TCI;
@@ -115,7 +115,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
 
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
 			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-					+ ETH_HLEN, VLAN_HLEN, 0));
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
 
 	}
 	__vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
-- 
cgit v1.2.2


From d176ca2a48ff2b5d7becfacdcbd1d72c73bd22d1 Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Fri, 22 Feb 2013 19:41:26 +0800
Subject: openvswitch: remove some useless comments

These comments are useless in upstream kernel.

Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Gross <jesse@nicira.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/vport-netdev.c | 3 +--
 net/openvswitch/vport.c        | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 670cbc3518de..2130d61c384a 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -43,8 +43,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
 
 	/* Make our own copy of the packet.  Otherwise we will mangle the
 	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
-	 * (No one comes after us, since we tell handle_bridge() that we took
-	 * the packet.) */
+	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(!skb))
 		return;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 70af0bedbac4..6255e48e64c4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -326,8 +326,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
  * @skb: skb that was received
  *
  * Must be called with rcu_read_lock.  The packet cannot be shared and
- * skb->data should point to the Ethernet header.  The caller must have already
- * called compute_ip_summed() to initialize the checksumming fields.
+ * skb->data should point to the Ethernet header.
  */
 void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
 {
-- 
cgit v1.2.2


From a72d9002f80bffd7e4c7d60e5a9caa0cddffe894 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Thu, 28 Feb 2013 10:34:23 +0800
Subject: xen/xen-blkback: preq.dev is used without initialized

If call xen_vbd_translate failed, the preq.dev will be not initialized.
Use blkif->vbd.pdevice instead (still better to print relative info).

Note that before commit 01c681d4c70d64cb72142a2823f27c4146a02e63
(xen/blkback: Don't trust the handle from the frontend.)
the value bogus, as it was the guest provided value from req->u.rw.handle
rather than the actual device.

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Acked-by: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index de1f319f7bd7..6d1cc3df2ac6 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -904,7 +904,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 		pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n",
 			 operation == READ ? "read" : "write",
 			 preq.sector_number,
-			 preq.sector_number + preq.nr_sects, preq.dev);
+			 preq.sector_number + preq.nr_sects,
+			 blkif->vbd.pdevice);
 		goto fail_response;
 	}
 
-- 
cgit v1.2.2


From cb29529ea0030e60ef1bbbf8399a43d397a51526 Mon Sep 17 00:00:00 2001
From: Tkhai Kirill <tkhai@yandex.ru>
Date: Sat, 23 Feb 2013 23:01:15 +0000
Subject: sunsu: Fix panic in case of nonexistent port at "console=ttySY"
 cmdline option

If a machine has X (X < 4) sunsu ports and cmdline
option "console=ttySY" is passed, where X < Y <= 4,
than the following panic happens:

Unable to handle kernel NULL pointer dereference
TPC: <sunsu_console_setup+0x78/0xe0>
RPC: <sunsu_console_setup+0x74/0xe0>
I7: <register_console+0x378/0x3e0>
Call Trace:
 [0000000000453a38] register_console+0x378/0x3e0
 [0000000000576fa0] uart_add_one_port+0x2e0/0x340
 [000000000057af40] su_probe+0x160/0x2e0
 [00000000005b8a4c] platform_drv_probe+0xc/0x20
 [00000000005b6c2c] driver_probe_device+0x12c/0x220
 [00000000005b6da8] __driver_attach+0x88/0xa0
 [00000000005b4df4] bus_for_each_dev+0x54/0xa0
 [00000000005b5a54] bus_add_driver+0x154/0x260
 [00000000005b7190] driver_register+0x50/0x180
 [00000000006d250c] sunsu_init+0x18c/0x1e0
 [00000000006c2668] do_one_initcall+0xe8/0x160
 [00000000006c282c] kernel_init_freeable+0x12c/0x1e0
 [0000000000603764] kernel_init+0x4/0x100
 [0000000000405f64] ret_from_syscall+0x1c/0x2c
 [0000000000000000]           (null)

1)Fix the panic;
2)Increment registered port number every successful
probe.

Signed-off-by: Kirill Tkhai <tkhai@yandex.ru>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/tty/serial/sunsu.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c
index e343d6670854..451687cb9685 100644
--- a/drivers/tty/serial/sunsu.c
+++ b/drivers/tty/serial/sunsu.c
@@ -968,6 +968,7 @@ static struct uart_ops sunsu_pops = {
 #define UART_NR	4
 
 static struct uart_sunsu_port sunsu_ports[UART_NR];
+static int nr_inst; /* Number of already registered ports */
 
 #ifdef CONFIG_SERIO
 
@@ -1337,13 +1338,8 @@ static int __init sunsu_console_setup(struct console *co, char *options)
 	printk("Console: ttyS%d (SU)\n",
 	       (sunsu_reg.minor - 64) + co->index);
 
-	/*
-	 * Check whether an invalid uart number has been specified, and
-	 * if so, search for the first available port that does have
-	 * console support.
-	 */
-	if (co->index >= UART_NR)
-		co->index = 0;
+	if (co->index > nr_inst)
+		return -ENODEV;
 	port = &sunsu_ports[co->index].port;
 
 	/*
@@ -1408,7 +1404,6 @@ static enum su_type su_get_type(struct device_node *dp)
 
 static int su_probe(struct platform_device *op)
 {
-	static int inst;
 	struct device_node *dp = op->dev.of_node;
 	struct uart_sunsu_port *up;
 	struct resource *rp;
@@ -1418,16 +1413,16 @@ static int su_probe(struct platform_device *op)
 
 	type = su_get_type(dp);
 	if (type == SU_PORT_PORT) {
-		if (inst >= UART_NR)
+		if (nr_inst >= UART_NR)
 			return -EINVAL;
-		up = &sunsu_ports[inst];
+		up = &sunsu_ports[nr_inst];
 	} else {
 		up = kzalloc(sizeof(*up), GFP_KERNEL);
 		if (!up)
 			return -ENOMEM;
 	}
 
-	up->port.line = inst;
+	up->port.line = nr_inst;
 
 	spin_lock_init(&up->port.lock);
 
@@ -1461,6 +1456,8 @@ static int su_probe(struct platform_device *op)
 		}
 		dev_set_drvdata(&op->dev, up);
 
+		nr_inst++;
+
 		return 0;
 	}
 
@@ -1488,7 +1485,7 @@ static int su_probe(struct platform_device *op)
 
 	dev_set_drvdata(&op->dev, up);
 
-	inst++;
+	nr_inst++;
 
 	return 0;
 
-- 
cgit v1.2.2


From a29564289973a519dae0d8936d2e4c414416e2e0 Mon Sep 17 00:00:00 2001
From: Daniel Hellstrom <daniel@gaisler.com>
Date: Thu, 28 Feb 2013 04:31:55 +0000
Subject: sparc,leon: fix GRPCI2 device0 PCI config space access

bus=0 slot=0 (device0) was used internally by the PCI host driver
to access the PCI host controller itself, however that had the
effect that PCI device0 was never accessible, which is wrong
when the motherboard has connected PCI AD16 signal to a slot.
A special case for accessing the PCI host controller itself is
added with this patch, by setting bus to TGT.

Signed-off-by: Daniel Hellstrom <daniel@gaisler.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/leon_pci_grpci2.c | 41 +++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c
index fc4320886a3a..4d1487138d26 100644
--- a/arch/sparc/kernel/leon_pci_grpci2.c
+++ b/arch/sparc/kernel/leon_pci_grpci2.c
@@ -186,6 +186,8 @@ struct grpci2_cap_first {
 #define CAP9_IOMAP_OFS 0x20
 #define CAP9_BARSIZE_OFS 0x24
 
+#define TGT 256
+
 struct grpci2_priv {
 	struct leon_pci_info	info; /* must be on top of this structure */
 	struct grpci2_regs	*regs;
@@ -237,8 +239,12 @@ static int grpci2_cfg_r32(struct grpci2_priv *priv, unsigned int bus,
 	if (where & 0x3)
 		return -EINVAL;
 
-	if (bus == 0 && PCI_SLOT(devfn) != 0)
-		devfn += (0x8 * 6);
+	if (bus == 0) {
+		devfn += (0x8 * 6); /* start at AD16=Device0 */
+	} else if (bus == TGT) {
+		bus = 0;
+		devfn = 0; /* special case: bridge controller itself */
+	}
 
 	/* Select bus */
 	spin_lock_irqsave(&grpci2_dev_lock, flags);
@@ -303,8 +309,12 @@ static int grpci2_cfg_w32(struct grpci2_priv *priv, unsigned int bus,
 	if (where & 0x3)
 		return -EINVAL;
 
-	if (bus == 0 && PCI_SLOT(devfn) != 0)
-		devfn += (0x8 * 6);
+	if (bus == 0) {
+		devfn += (0x8 * 6); /* start at AD16=Device0 */
+	} else if (bus == TGT) {
+		bus = 0;
+		devfn = 0; /* special case: bridge controller itself */
+	}
 
 	/* Select bus */
 	spin_lock_irqsave(&grpci2_dev_lock, flags);
@@ -368,7 +378,7 @@ static int grpci2_read_config(struct pci_bus *bus, unsigned int devfn,
 	unsigned int busno = bus->number;
 	int ret;
 
-	if (PCI_SLOT(devfn) > 15 || (PCI_SLOT(devfn) == 0 && busno == 0)) {
+	if (PCI_SLOT(devfn) > 15 || busno > 255) {
 		*val = ~0;
 		return 0;
 	}
@@ -406,7 +416,7 @@ static int grpci2_write_config(struct pci_bus *bus, unsigned int devfn,
 	struct grpci2_priv *priv = grpci2priv;
 	unsigned int busno = bus->number;
 
-	if (PCI_SLOT(devfn) > 15 || (PCI_SLOT(devfn) == 0 && busno == 0))
+	if (PCI_SLOT(devfn) > 15 || busno > 255)
 		return 0;
 
 #ifdef GRPCI2_DEBUG_CFGACCESS
@@ -578,15 +588,15 @@ void grpci2_hw_init(struct grpci2_priv *priv)
 		REGSTORE(regs->ahbmst_map[i], priv->pci_area);
 
 	/* Get the GRPCI2 Host PCI ID */
-	grpci2_cfg_r32(priv, 0, 0, PCI_VENDOR_ID, &priv->pciid);
+	grpci2_cfg_r32(priv, TGT, 0, PCI_VENDOR_ID, &priv->pciid);
 
 	/* Get address to first (always defined) capability structure */
-	grpci2_cfg_r8(priv, 0, 0, PCI_CAPABILITY_LIST, &capptr);
+	grpci2_cfg_r8(priv, TGT, 0, PCI_CAPABILITY_LIST, &capptr);
 
 	/* Enable/Disable Byte twisting */
-	grpci2_cfg_r32(priv, 0, 0, capptr+CAP9_IOMAP_OFS, &io_map);
+	grpci2_cfg_r32(priv, TGT, 0, capptr+CAP9_IOMAP_OFS, &io_map);
 	io_map = (io_map & ~0x1) | (priv->bt_enabled ? 1 : 0);
-	grpci2_cfg_w32(priv, 0, 0, capptr+CAP9_IOMAP_OFS, io_map);
+	grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_IOMAP_OFS, io_map);
 
 	/* Setup the Host's PCI Target BARs for other peripherals to access,
 	 * and do DMA to the host's memory. The target BARs can be sized and
@@ -617,17 +627,18 @@ void grpci2_hw_init(struct grpci2_priv *priv)
 				pciadr = 0;
 			}
 		}
-		grpci2_cfg_w32(priv, 0, 0, capptr+CAP9_BARSIZE_OFS+i*4, bar_sz);
-		grpci2_cfg_w32(priv, 0, 0, PCI_BASE_ADDRESS_0+i*4, pciadr);
-		grpci2_cfg_w32(priv, 0, 0, capptr+CAP9_BAR_OFS+i*4, ahbadr);
+		grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_BARSIZE_OFS+i*4,
+				bar_sz);
+		grpci2_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_0+i*4, pciadr);
+		grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_BAR_OFS+i*4, ahbadr);
 		printk(KERN_INFO "        TGT BAR[%d]: 0x%08x (PCI)-> 0x%08x\n",
 			i, pciadr, ahbadr);
 	}
 
 	/* set as bus master and enable pci memory responses */
-	grpci2_cfg_r32(priv, 0, 0, PCI_COMMAND, &data);
+	grpci2_cfg_r32(priv, TGT, 0, PCI_COMMAND, &data);
 	data |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
-	grpci2_cfg_w32(priv, 0, 0, PCI_COMMAND, data);
+	grpci2_cfg_w32(priv, TGT, 0, PCI_COMMAND, data);
 
 	/* Enable Error respone (CPU-TRAP) on illegal memory access. */
 	REGSTORE(regs->ctrl, CTRL_ER | CTRL_PE);
-- 
cgit v1.2.2


From 357b66fdc8ad4cea6e6336956a70742f961f0a4d Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 4 Mar 2013 00:34:34 -0500
Subject: ext4: ext4_split_extent should take care of extent zeroout

When ext4_split_extent_at() ends up doing zeroout & conversion to
initialized instead of split & conversion, ext4_split_extent() gets
confused and can wrongly mark the extent back as uninitialized
resulting in end IO code getting confused from large unwritten extents
and may result in data loss.

The example of problematic behavior is:
			    lblk len              lblk len
  ext4_split_extent() (ex=[1000,30,uninit], map=[1010,10])
    ext4_split_extent_at() (split [1000,30,uninit] at 1020)
      ext4_ext_insert_extent() -> ENOSPC
      ext4_ext_zeroout()
	 -> extent [1000,30] is now initialized
    ext4_split_extent_at() (split [1000,30,init] at 1010,
			     MARK_UNINIT1 | MARK_UNINIT2)
      -> extent is split and parts marked as uninitialized

Fix the problem by rechecking extent type after the first
ext4_split_extent_at() returns. None of split_flags can not be applied
to initialized extent so this patch also add BUG_ON to prevent similar
issues in future.

TESTCASE: https://github.com/dmonakhov/xfstests/commit/b8a55eb5ce28c6ff29e620ab090902fcd5833597

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/extents.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 372b2cbee07e..bef194a14437 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2943,6 +2943,10 @@ static int ext4_split_extent_at(handle_t *handle,
 	newblock = split - ee_block + ext4_ext_pblock(ex);
 
 	BUG_ON(split < ee_block || split >= (ee_block + ee_len));
+	BUG_ON(!ext4_ext_is_uninitialized(ex) &&
+	       split_flag & (EXT4_EXT_MAY_ZEROOUT |
+			     EXT4_EXT_MARK_UNINIT1 |
+			     EXT4_EXT_MARK_UNINIT2));
 
 	err = ext4_ext_get_access(handle, inode, path + depth);
 	if (err)
@@ -3061,19 +3065,26 @@ static int ext4_split_extent(handle_t *handle,
 		if (err)
 			goto out;
 	}
-
+	/*
+	 * Update path is required because previous ext4_split_extent_at() may
+	 * result in split of original leaf or extent zeroout.
+	 */
 	ext4_ext_drop_refs(path);
 	path = ext4_ext_find_extent(inode, map->m_lblk, path);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
+	depth = ext_depth(inode);
+	ex = path[depth].p_ext;
+	uninitialized = ext4_ext_is_uninitialized(ex);
+	split_flag1 = 0;
 
 	if (map->m_lblk >= ee_block) {
-		split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
-					    EXT4_EXT_DATA_VALID2);
-		if (uninitialized)
+		split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
+		if (uninitialized) {
 			split_flag1 |= EXT4_EXT_MARK_UNINIT1;
-		if (split_flag & EXT4_EXT_MARK_UNINIT2)
-			split_flag1 |= EXT4_EXT_MARK_UNINIT2;
+			split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
+						     EXT4_EXT_MARK_UNINIT2);
+		}
 		err = ext4_split_extent_at(handle, inode, path,
 				map->m_lblk, split_flag1, flags);
 		if (err)
-- 
cgit v1.2.2


From ec22ba8edb507395c95fbc617eea26a6b2d98797 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 4 Mar 2013 00:36:06 -0500
Subject: ext4: disable merging of uninitialized extents

Derived from Jan's patch:http://permalink.gmane.org/gmane.comp.file-systems.ext4/36470

Merging of uninitialized extents creates all sorts of interesting race
possibilities when writeback / DIO races with fallocate. Thus
ext4_convert_unwritten_extents_endio() has to deal with a case where
extent to be converted needs to be split out first. That isn't nice
for two reasons:

1) It may need allocation of extent tree block so ENOSPC is possible.
2) It complicates end_io handling code

So we disable merging of uninitialized extents which allows us to simplify
the code. Extents will get merged after they are converted to initialized
ones.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/extents.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bef194a14437..60818ed1f6a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 	unsigned short ext1_ee_len, ext2_ee_len, max_len;
 
 	/*
-	 * Make sure that either both extents are uninitialized, or
-	 * both are _not_.
+	 * Make sure that both extents are initialized. We don't merge
+	 * uninitialized extents so that we can be sure that end_io code has
+	 * the extent that was written properly split out and conversion to
+	 * initialized is trivial.
 	 */
-	if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
+	if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
 		return 0;
 
 	if (ext4_ext_is_uninitialized(ex1))
-- 
cgit v1.2.2


From ff95ec22cd7faa0d8b58dcc4207f21502df7b00b Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 4 Mar 2013 00:41:05 -0500
Subject: ext4: add warning to ext4_convert_unwritten_extents_endio

Splitting extents inside endio is a bad thing, but unfortunately it is
still possible.  In fact we are pretty close to the moment when all
related issues will be fixed.  Let's warn developer if it still the
case.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/extents.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 60818ed1f6a9..265cb0e50c51 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3387,8 +3387,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 		"block %llu, max_blocks %u\n", inode->i_ino,
 		  (unsigned long long)ee_block, ee_len);
 
-	/* If extent is larger than requested then split is required */
+	/* If extent is larger than requested it is a clear sign that we still
+	 * have some extent state machine issues left. So extent_split is still
+	 * required.
+	 * TODO: Once all related issues will be fixed this situation should be
+	 * illegal.
+	 */
 	if (ee_block != map->m_lblk || ee_len > map->m_len) {
+#ifdef EXT4_DEBUG
+		ext4_warning("Inode (%ld) finished: extent logical block %llu,"
+			     " len %u; IO logical block %llu, len %u\n",
+			     inode->i_ino, (unsigned long long)ee_block, ee_len,
+			     (unsigned long long)map->m_lblk, map->m_len);
+#endif
 		err = ext4_split_unwritten_extents(handle, inode, map, path,
 						   EXT4_GET_BLOCKS_CONVERT);
 		if (err < 0)
-- 
cgit v1.2.2


From de99fcce1da7933a90198b80a2e896754ea3bdc8 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 4 Mar 2013 00:43:32 -0500
Subject: ext4: remove unnecessary wait for extent conversion in
 ext4_fallocate()

Now that we don't merge uninitialized extents anymore,
ext4_fallocate() is free to operate on the inode while there are still
some extent conversions pending - it won't disturb them in any way.

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 265cb0e50c51..25c86aaa38d6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4392,8 +4392,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (len <= EXT_UNINIT_MAX_LEN << blkbits)
 		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
-	/* Prevent race condition between unwritten */
-	ext4_flush_unwritten_io(inode);
 retry:
 	while (ret >= 0 && ret < max_blocks) {
 		map.m_lblk = map.m_lblk + ret;
-- 
cgit v1.2.2


From 6ca470d7b5e7639b7925b3202e796282703b6d5d Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 4 Mar 2013 00:50:47 -0500
Subject: ext4: invalidate extent status tree during extent migration

mext_replace_branches() will change inode's extents layout so
we have to drop corresponding cache.

TESTCASE:  301'th xfstest was not yet accepted to official xfstest's branch
and can be found here: https://github.com/dmonakhov/xfstests/commit/7b7efeee30a41109201e2040034e71db9b66ddc0

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/move_extent.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index d78c33eed7e5..c1f15b203e98 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -666,6 +666,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
 	int replaced_count = 0;
 	int dext_alen;
 
+	*err = ext4_es_remove_extent(orig_inode, from, count);
+	if (*err)
+		goto out;
+
+	*err = ext4_es_remove_extent(donor_inode, from, count);
+	if (*err)
+		goto out;
+
 	/* Get the original extent for the block "orig_off" */
 	*err = get_ext_path(orig_inode, orig_off, &orig_path);
 	if (*err)
-- 
cgit v1.2.2


From 45d9550a0e7e9230606ca3c4c6f4dc6297848b2f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 19 Feb 2013 12:17:01 -0800
Subject: workqueue: allow more off-queue flag space

When a work item is off-queue, its work->data contains WORK_STRUCT_*
and WORK_OFFQ_* flags.  As WORK_OFFQ_* flags are used only while a
work item is off-queue, it can occupy bits of work->data which aren't
used while off-queue.  WORK_OFFQ_* currently only use bits used by
on-queue CWQ pointer.  As color bits aren't used while off-queue,
there's no reason to not use them.

Lower WORK_OFFQ_FLAG_BASE from WORK_STRUCT_FLAG_BITS to
WORK_STRUCT_COLOR_SHIFT thus giving 4 more bits to off-queue flag
space which is also used to record worker_pool ID while off-queue.

This doesn't introduce any visible behavior difference.

tj: Rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 8afab27cdbc2..5bd030f630a9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -68,7 +68,7 @@ enum {
 				  WORK_STRUCT_COLOR_BITS,
 
 	/* data contains off-queue information when !WORK_STRUCT_PWQ */
-	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
+	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_COLOR_SHIFT,
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
 
-- 
cgit v1.2.2


From f5faa0774e07eada85b0c55ec789b3f337d01412 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 19 Feb 2013 12:17:02 -0800
Subject: workqueue: use %current instead of worker->task in
 worker_maybe_bind_and_lock()

worker_maybe_bind_and_lock() uses both @worker->task and @current at
the same time.  As worker_maybe_bind_and_lock() can only be called by
the current worker task, they are always the same.

Update worker_maybe_bind_and_lock() to use %current consistently.

This doesn't introduce any functional change.

tj: Massaged the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 81f2457811eb..f456433cf535 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1512,7 +1512,7 @@ static void worker_leave_idle(struct worker *worker)
  * flushed from cpu callbacks while cpu is going down, they are
  * guaranteed to execute on the cpu.
  *
- * This function is to be used by rogue workers and rescuers to bind
+ * This function is to be used by unbound workers and rescuers to bind
  * themselves to the target cpu and may race with cpu going down or
  * coming online.  kthread_bind() can't be used because it may put the
  * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
@@ -1537,7 +1537,6 @@ static bool worker_maybe_bind_and_lock(struct worker *worker)
 __acquires(&pool->lock)
 {
 	struct worker_pool *pool = worker->pool;
-	struct task_struct *task = worker->task;
 
 	while (true) {
 		/*
@@ -1547,12 +1546,12 @@ __acquires(&pool->lock)
 		 * against POOL_DISASSOCIATED.
 		 */
 		if (!(pool->flags & POOL_DISASSOCIATED))
-			set_cpus_allowed_ptr(task, get_cpu_mask(pool->cpu));
+			set_cpus_allowed_ptr(current, get_cpu_mask(pool->cpu));
 
 		spin_lock_irq(&pool->lock);
 		if (pool->flags & POOL_DISASSOCIATED)
 			return false;
-		if (task_cpu(task) == pool->cpu &&
+		if (task_cpu(current) == pool->cpu &&
 		    cpumask_equal(&current->cpus_allowed,
 				  get_cpu_mask(pool->cpu)))
 			return true;
-- 
cgit v1.2.2


From f36dc67b27a689eeb3631b11ebef17bbff257fbb Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 19 Feb 2013 12:17:02 -0800
Subject: workqueue: change argument of worker_maybe_bind_and_lock() to @pool

worker_maybe_bind_and_lock() currently takes @worker but only cares
about @worker->pool.  This patch updates worker_maybe_bind_and_lock()
to take @pool instead of @worker.  This will be used to better define
synchronization rules regarding rescuer->pool updates.

This doesn't introduce any functional change.

tj: Updated the comments and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f456433cf535..09545d445a55 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1504,8 +1504,10 @@ static void worker_leave_idle(struct worker *worker)
 }
 
 /**
- * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock pool
- * @worker: self
+ * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
+ * @pool: target worker_pool
+ *
+ * Bind %current to the cpu of @pool if it is associated and lock @pool.
  *
  * Works which are scheduled while the cpu is online must at least be
  * scheduled to a worker which is bound to the cpu so that if they are
@@ -1533,11 +1535,9 @@ static void worker_leave_idle(struct worker *worker)
  * %true if the associated pool is online (@worker is successfully
  * bound), %false if offline.
  */
-static bool worker_maybe_bind_and_lock(struct worker *worker)
+static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
 __acquires(&pool->lock)
 {
-	struct worker_pool *pool = worker->pool;
-
 	while (true) {
 		/*
 		 * The following call may fail, succeed or succeed
@@ -1575,7 +1575,7 @@ __acquires(&pool->lock)
 static void idle_worker_rebind(struct worker *worker)
 {
 	/* CPU may go down again inbetween, clear UNBOUND only on success */
-	if (worker_maybe_bind_and_lock(worker))
+	if (worker_maybe_bind_and_lock(worker->pool))
 		worker_clr_flags(worker, WORKER_UNBOUND);
 
 	/* rebind complete, become available again */
@@ -1593,7 +1593,7 @@ static void busy_worker_rebind_fn(struct work_struct *work)
 {
 	struct worker *worker = container_of(work, struct worker, rebind_work);
 
-	if (worker_maybe_bind_and_lock(worker))
+	if (worker_maybe_bind_and_lock(worker->pool))
 		worker_clr_flags(worker, WORKER_UNBOUND);
 
 	spin_unlock_irq(&worker->pool->lock);
@@ -2038,7 +2038,7 @@ static bool manage_workers(struct worker *worker)
 		 * on @pool's current state.  Try it and adjust
 		 * %WORKER_UNBOUND accordingly.
 		 */
-		if (worker_maybe_bind_and_lock(worker))
+		if (worker_maybe_bind_and_lock(pool))
 			worker->flags &= ~WORKER_UNBOUND;
 		else
 			worker->flags |= WORKER_UNBOUND;
@@ -2358,7 +2358,7 @@ repeat:
 
 		/* migrate to the target cpu if possible */
 		rescuer->pool = pool;
-		worker_maybe_bind_and_lock(rescuer);
+		worker_maybe_bind_and_lock(pool);
 
 		/*
 		 * Slurp in all works issued via this workqueue and
-- 
cgit v1.2.2


From b31041042a8cdece67f925e4bae55b5f5fd754ca Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 19 Feb 2013 12:17:02 -0800
Subject: workqueue: better define synchronization rule around rescuer->pool
 updates

Rescuers visit different worker_pools to process work items from pools
under pressure.  Currently, rescuer->pool is updated outside any
locking and when an outsider looks at a rescuer, there's no way to
tell when and whether rescuer->pool is gonna change.  While this
doesn't currently cause any problem, it is nasty.

With recent worker_maybe_bind_and_lock() changes, we can move
rescuer->pool updates inside pool locks such that if rescuer->pool
equals a locked pool, it's guaranteed to stay that way until the pool
is unlocked.

Move rescuer->pool inside pool->lock.

This patch doesn't introduce any visible behavior difference.

tj: Updated the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c          | 3 ++-
 kernel/workqueue_internal.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 09545d445a55..fd9a28a13afd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2357,8 +2357,8 @@ repeat:
 		mayday_clear_cpu(cpu, wq->mayday_mask);
 
 		/* migrate to the target cpu if possible */
-		rescuer->pool = pool;
 		worker_maybe_bind_and_lock(pool);
+		rescuer->pool = pool;
 
 		/*
 		 * Slurp in all works issued via this workqueue and
@@ -2379,6 +2379,7 @@ repeat:
 		if (keep_working(pool))
 			wake_up_worker(pool);
 
+		rescuer->pool = NULL;
 		spin_unlock_irq(&pool->lock);
 	}
 
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 07650264ec15..f9c887731e2b 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -32,6 +32,7 @@ struct worker {
 	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct worker_pool	*pool;		/* I: the associated pool */
+						/* L: for rescuers */
 	/* 64 bytes boundary on 64bit, 32 on 32bit */
 	unsigned long		last_active;	/* L: last active timestamp */
 	unsigned int		flags;		/* X: flags */
-- 
cgit v1.2.2


From 79e7654cae5a6d6cee333f0366023ecc3ff8abe0 Mon Sep 17 00:00:00 2001
From: Andrew Brownfield <abrownfi@redhat.com>
Date: Thu, 21 Feb 2013 14:01:50 -0500
Subject: ata_piix: Add MODULE_PARM_DESC to prefer_ms_hyperv

In reference to the commit cd006086fa5d91414d8ff9ff2b78fbb593878e3c
"ata_piix: defer disks to the Hyper-V drivers by default",
this trivial patch adds a description to prefer_ms_hyperv.

[rvrbovsk@redhat.com: MODULE_PARM_DESC() string formatting modified]

Signed-off-by: Andrew Brownfield <abrownfi@redhat.com>
Signed-off-by: Radomir Vrbovsky <rvrbovsk@redhat.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ata_piix.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index d2ba439cfe54..ffdd32d22602 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1547,6 +1547,10 @@ static bool piix_broken_system_poweroff(struct pci_dev *pdev)
 
 static int prefer_ms_hyperv = 1;
 module_param(prefer_ms_hyperv, int, 0);
+MODULE_PARM_DESC(prefer_ms_hyperv,
+	"Prefer Hyper-V paravirtualization drivers instead of ATA, "
+	"0 - Use ATA drivers, "
+	"1 (Default) - Use the paravirtualization drivers.");
 
 static void piix_ignore_devices_quirk(struct ata_host *host)
 {
-- 
cgit v1.2.2


From efda332cb66d78d6fdf6f98e7b067480f43624f2 Mon Sep 17 00:00:00 2001
From: James Ralston <james.d.ralston@intel.com>
Date: Thu, 21 Feb 2013 11:08:51 -0800
Subject: ahci: Add Device IDs for Intel Wellsburg PCH

This patch adds the RAID-mode SATA Device IDs for the Intel Wellsburg PCH

Signed-off-by: James Ralston <james.d.ralston@intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index a99112cfd8b1..6a67b07de494 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -281,6 +281,8 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0x1f37), board_ahci }, /* Avoton RAID */
 	{ PCI_VDEVICE(INTEL, 0x1f3e), board_ahci }, /* Avoton RAID */
 	{ PCI_VDEVICE(INTEL, 0x1f3f), board_ahci }, /* Avoton RAID */
+	{ PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg RAID */
+	{ PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg RAID */
 	{ PCI_VDEVICE(INTEL, 0x8d02), board_ahci }, /* Wellsburg AHCI */
 	{ PCI_VDEVICE(INTEL, 0x8d04), board_ahci }, /* Wellsburg RAID */
 	{ PCI_VDEVICE(INTEL, 0x8d06), board_ahci }, /* Wellsburg RAID */
-- 
cgit v1.2.2


From c99cc9a2f19c29108ddb2e1ceb6f3baa536357d2 Mon Sep 17 00:00:00 2001
From: Syam Sidhardhan <syamsidhardh@gmail.com>
Date: Mon, 25 Feb 2013 04:44:07 +0530
Subject: sata_fsl: Remove redundant NULL check before kfree

kfree on NULL pointer is a no-op.

Signed-off-by: Syam Sidhardhan <s.syam@samsung.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/sata_fsl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 124b2c1d9c0b..608f82fed632 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -1511,8 +1511,7 @@ error_exit_with_cleanup:
 
 	if (hcr_base)
 		iounmap(hcr_base);
-	if (host_priv)
-		kfree(host_priv);
+	kfree(host_priv);
 
 	return retval;
 }
-- 
cgit v1.2.2


From dfd573644ce4ba1c9fbd625512bcfccf8c5ce7ff Mon Sep 17 00:00:00 2001
From: Sander Eikelenboom <linux@eikelenboom.it>
Date: Fri, 1 Mar 2013 12:16:42 +0100
Subject: libata-acpi.c: fix copy and paste mistake in
 ata_acpi_register_power_resource

Fix a copy and paste mistake introduced in:

commit bc9b6407bd6df3ab7189e5622816bbc11ae9d2d8
"ACPI / PM: Rework the handling of devices depending on power resources"

Signed-off-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 0ea1018280bd..cb3eab6d520f 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -1027,7 +1027,7 @@ static void ata_acpi_register_power_resource(struct ata_device *dev)
 
 	handle = ata_dev_acpi_handle(dev);
 	if (handle)
-		acpi_dev_pm_remove_dependent(handle, &sdev->sdev_gendev);
+		acpi_dev_pm_add_dependent(handle, &sdev->sdev_gendev);
 }
 
 static void ata_acpi_unregister_power_resource(struct ata_device *dev)
-- 
cgit v1.2.2


From e189551bf74f098bde39cb8fb72a722bb7286f99 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Sat, 2 Mar 2013 13:00:37 +0800
Subject: [libata] Avoid specialized TLA's in ZPODD's Kconfig

ODD is not a common TLA for non-ATA people so they will get confused
by its meaning when they are configuring the kernel. This patch fixed
this problem by using ODD only after stating what it is.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/Kconfig | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 3e751b74615e..a5a3ebcbdd2c 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -59,15 +59,16 @@ config ATA_ACPI
 	  option libata.noacpi=1
 
 config SATA_ZPODD
-	bool "SATA Zero Power ODD Support"
+	bool "SATA Zero Power Optical Disc Drive (ZPODD) support"
 	depends on ATA_ACPI
 	default n
 	help
-	  This option adds support for SATA ZPODD. It requires both
-	  ODD and the platform support, and if enabled, will automatically
-	  power on/off the ODD when certain condition is satisfied. This
-	  does not impact user's experience of the ODD, only power is saved
-	  when ODD is not in use(i.e. no disc inside).
+	  This option adds support for SATA Zero Power Optical Disc
+	  Drive (ZPODD). It requires both the ODD and the platform
+	  support, and if enabled, will automatically power on/off the
+	  ODD when certain condition is satisfied. This does not impact
+	  end user's experience of the ODD, only power is saved when
+	  the ODD is not in use (i.e. no disc inside).
 
 	  If unsure, say N.
 
-- 
cgit v1.2.2


From b186affe0c9d39e4d3152cd34bffea8fe1fa17f4 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Mon, 4 Mar 2013 17:34:42 +0900
Subject: pata_samsung_cf: use module_platform_driver_probe()

This patch uses module_platform_driver_probe() macro which makes
the code smaller and simpler.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/pata_samsung_cf.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
index 70b0e01372b3..6ef27e98c508 100644
--- a/drivers/ata/pata_samsung_cf.c
+++ b/drivers/ata/pata_samsung_cf.c
@@ -661,18 +661,7 @@ static struct platform_driver pata_s3c_driver = {
 	},
 };
 
-static int __init pata_s3c_init(void)
-{
-	return platform_driver_probe(&pata_s3c_driver, pata_s3c_probe);
-}
-
-static void __exit pata_s3c_exit(void)
-{
-	platform_driver_unregister(&pata_s3c_driver);
-}
-
-module_init(pata_s3c_init);
-module_exit(pata_s3c_exit);
+module_platform_driver_probe(pata_s3c_driver, pata_s3c_probe);
 
 MODULE_AUTHOR("Abhilash Kesavan, <a.kesavan@samsung.com>");
 MODULE_DESCRIPTION("low-level driver for Samsung PATA controller");
-- 
cgit v1.2.2


From b83e831a3c2d358fea4bf8fa13f387405f2880b6 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
Date: Tue, 5 Mar 2013 11:08:47 +0900
Subject: ARM: S5PV210: Fix PL330 DMA controller clkdev entries

Since the DMA controller clocks are managed at amba bus level, the
PL330 device clocks handling has been removed from the driver in
commit 7c71b8eb("DMA: PL330: Remove redundant runtime_suspend/
resume functions")

However, this left the S5PV210 platform with only clkdev entries
linking "apb_pclk" clock conn_id to a dummy clock, rather than
to corresponding platform PL330 DMAC clock.
As a result the DMA controller is now attempted to be used on
S5PV210 with the clock disabled and the driver fails with an
error:

dma-pl330 dma-pl330.0: PERIPH_ID 0x0, PCELL_ID 0x0 !
dma-pl330: probe of dma-pl330.0 failed with error -22
dma-pl330 dma-pl330.1: PERIPH_ID 0x0, PCELL_ID 0x0 !
dma-pl330: probe of dma-pl330.1 failed with error -22

Fix this by adding "apb_pclk" clkdev entries for the Peripheral
DMA controllers 0/1 and removing the dummy apb_pclk clock.

Reported-by: Lonsn <lonsn2005@gmail.com>
Tested-by: Lonsn <lonsn2005@gmail.com>
Cc: Inderpal Singh <inderpal.singh@linaro.org>
Cc: Boojin Kim <boojin.kim@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Cc: <stable@vger.kernel.org> # v3.7+
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s5pv210/clock.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/arm/mach-s5pv210/clock.c b/arch/arm/mach-s5pv210/clock.c
index fcdf52dbcc49..f051f53e35b7 100644
--- a/arch/arm/mach-s5pv210/clock.c
+++ b/arch/arm/mach-s5pv210/clock.c
@@ -214,11 +214,6 @@ static struct clk clk_pcmcdclk2 = {
 	.name		= "pcmcdclk",
 };
 
-static struct clk dummy_apb_pclk = {
-	.name		= "apb_pclk",
-	.id		= -1,
-};
-
 static struct clk *clkset_vpllsrc_list[] = {
 	[0] = &clk_fin_vpll,
 	[1] = &clk_sclk_hdmi27m,
@@ -305,18 +300,6 @@ static struct clk_ops clk_fout_apll_ops = {
 
 static struct clk init_clocks_off[] = {
 	{
-		.name		= "dma",
-		.devname	= "dma-pl330.0",
-		.parent		= &clk_hclk_psys.clk,
-		.enable		= s5pv210_clk_ip0_ctrl,
-		.ctrlbit	= (1 << 3),
-	}, {
-		.name		= "dma",
-		.devname	= "dma-pl330.1",
-		.parent		= &clk_hclk_psys.clk,
-		.enable		= s5pv210_clk_ip0_ctrl,
-		.ctrlbit	= (1 << 4),
-	}, {
 		.name		= "rot",
 		.parent		= &clk_hclk_dsys.clk,
 		.enable		= s5pv210_clk_ip0_ctrl,
@@ -573,6 +556,20 @@ static struct clk clk_hsmmc3 = {
 	.ctrlbit	= (1<<19),
 };
 
+static struct clk clk_pdma0 = {
+	.name		= "pdma0",
+	.parent		= &clk_hclk_psys.clk,
+	.enable		= s5pv210_clk_ip0_ctrl,
+	.ctrlbit	= (1 << 3),
+};
+
+static struct clk clk_pdma1 = {
+	.name		= "pdma1",
+	.parent		= &clk_hclk_psys.clk,
+	.enable		= s5pv210_clk_ip0_ctrl,
+	.ctrlbit	= (1 << 4),
+};
+
 static struct clk *clkset_uart_list[] = {
 	[6] = &clk_mout_mpll.clk,
 	[7] = &clk_mout_epll.clk,
@@ -1075,6 +1072,8 @@ static struct clk *clk_cdev[] = {
 	&clk_hsmmc1,
 	&clk_hsmmc2,
 	&clk_hsmmc3,
+	&clk_pdma0,
+	&clk_pdma1,
 };
 
 /* Clock initialisation code */
@@ -1333,6 +1332,8 @@ static struct clk_lookup s5pv210_clk_lookup[] = {
 	CLKDEV_INIT(NULL, "spi_busclk0", &clk_p),
 	CLKDEV_INIT("s5pv210-spi.0", "spi_busclk1", &clk_sclk_spi0.clk),
 	CLKDEV_INIT("s5pv210-spi.1", "spi_busclk1", &clk_sclk_spi1.clk),
+	CLKDEV_INIT("dma-pl330.0", "apb_pclk", &clk_pdma0),
+	CLKDEV_INIT("dma-pl330.1", "apb_pclk", &clk_pdma1),
 };
 
 void __init s5pv210_register_clocks(void)
@@ -1361,6 +1362,5 @@ void __init s5pv210_register_clocks(void)
 	for (ptr = 0; ptr < ARRAY_SIZE(clk_cdev); ptr++)
 		s3c_disable_clocks(clk_cdev[ptr], 1);
 
-	s3c24xx_register_clock(&dummy_apb_pclk);
 	s3c_pwmclk_init();
 }
-- 
cgit v1.2.2


From 6551fbdfd8b85d1ab5822ac98abb4fb449bcfae0 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 28 Feb 2013 16:28:41 +0100
Subject: s390: critical section cleanup vs. machine checks

The current machine check code uses the registers stored by the machine
in the lowcore at __LC_GPREGS_SAVE_AREA as the registers of the interrupted
context. The registers 0-7 of a user process can get clobbered if a machine
checks interrupts the execution of a critical section in entry[64].S.

The reason is that the critical section cleanup code may need to modify
the PSW and the registers for the previous context to get to the end of a
critical section. If registers 0-7 have to be replaced the relevant copy
will be in the registers, which invalidates the copy in the lowcore. The
machine check handler needs to explicitly store registers 0-7 to the stack.

Cc: stable@vger.kernel.org
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S   | 3 ++-
 arch/s390/kernel/entry64.S | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 550228523267..94feff7d6132 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -636,7 +636,8 @@ ENTRY(mcck_int_handler)
 	UPDATE_VTIME %r14,%r15,__LC_MCCK_ENTER_TIMER
 mcck_skip:
 	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+32,__LC_PANIC_STACK,PAGE_SHIFT
-	mvc	__PT_R0(64,%r11),__LC_GPREGS_SAVE_AREA
+	stm	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32
 	stm	%r8,%r9,__PT_PSW(%r11)
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
 	l	%r1,BASED(.Ldo_machine_check)
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9c837c101297..2e6d60c55f90 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -678,8 +678,9 @@ ENTRY(mcck_int_handler)
 	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
 	LAST_BREAK %r14
 mcck_skip:
-	lghi	%r14,__LC_GPREGS_SAVE_AREA
-	mvc	__PT_R0(128,%r11),0(%r14)
+	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),0(%r14)
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
-- 
cgit v1.2.2


From a7bb1ae749e8051434e54936dcefd37ef1cfa753 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 28 Feb 2013 11:16:26 +0100
Subject: s390/mm: fix vmemmap size calculation

The size of the vmemmap must be a multiple of PAGES_PER_SECTION, since the
common code always initializes the vmemmap in such pieces.
So we must round up in order to not have a too small vmemmap.

Fixes an IPL crash on 31 bit with more than 1920MB.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a5360de85ec7..29268859d8ee 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -571,6 +571,8 @@ static void __init setup_memory_end(void)
 
 	/* Split remaining virtual space between 1:1 mapping & vmemmap array */
 	tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+	/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+	tmp = SECTION_ALIGN_UP(tmp);
 	tmp = VMALLOC_START - tmp * sizeof(struct page);
 	tmp &= ~((vmax >> 11) - 1);	/* align to page table level */
 	tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
-- 
cgit v1.2.2


From f6a70a07079518280022286a1dceb797d12e1edf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 4 Mar 2013 14:14:11 +0100
Subject: s390/mm: fix flush_tlb_kernel_range()

Our flush_tlb_kernel_range() implementation calls __tlb_flush_mm() with
&init_mm as argument. __tlb_flush_mm() however will only flush tlbs
for the passed in mm if its mm_cpumask is not empty.

For the init_mm however its mm_cpumask has never any bits set. Which in
turn means that our flush_tlb_kernel_range() implementation doesn't
work at all.

This can be easily verified with a vmalloc/vfree loop which allocates
a page, writes to it and then frees the page again. A crash will follow
almost instantly.

To fix this remove the cpumask_empty() check in __tlb_flush_mm() since
there shouldn't be too many mms with a zero mm_cpumask, besides the
init_mm of course.

Cc: stable@vger.kernel.org
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/tlbflush.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 1d8fe2b17ef6..6b32af30878c 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -74,8 +74,6 @@ static inline void __tlb_flush_idte(unsigned long asce)
 
 static inline void __tlb_flush_mm(struct mm_struct * mm)
 {
-	if (unlikely(cpumask_empty(mm_cpumask(mm))))
-		return;
 	/*
 	 * If the machine has IDTE we prefer to do a per mm flush
 	 * on all cpus instead of doing a local flush if the mm
-- 
cgit v1.2.2


From fbe2d3616cee37418d832b30130811888c5aaf34 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 21 Feb 2013 21:44:17 -0800
Subject: EDAC: Make sysfs functions static

Fixes lots of sparse warnings here.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/edac_mc_sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 4f4b6137d74e..0cbf670efa23 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -143,7 +143,7 @@ static const char *edac_caps[] = {
  * and the per-dimm/per-rank one
  */
 #define DEVICE_ATTR_LEGACY(_name, _mode, _show, _store) \
-	struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store)
+	static struct device_attribute dev_attr_legacy_##_name = __ATTR(_name, _mode, _show, _store)
 
 struct dev_ch_attribute {
 	struct device_attribute attr;
-- 
cgit v1.2.2


From 0a96d4d36978c8aeebf5fce1f3041c2d60f23ac0 Mon Sep 17 00:00:00 2001
From: Padmavathi Venna <padma.v@samsung.com>
Date: Thu, 7 Mar 2013 10:33:07 +0900
Subject: ARM: EXYNOS: Add #dma-cells for generic dma binding support for PL330

This patch adds #dma-cells property to PL330 DMA controller
nodes for supporting generic dma dt bindings on samsung exynos
platforms. #dma-channels and #dma-requests are not required now
but added in advance.

Signed-off-by: Padmavathi Venna <padma.v@samsung.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/boot/dts/exynos4.dtsi    | 9 +++++++++
 arch/arm/boot/dts/exynos5440.dtsi | 6 ++++++
 2 files changed, 15 insertions(+)

diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index e1347fceb5bc..1a62bcf18aa3 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -275,18 +275,27 @@
 			compatible = "arm,pl330", "arm,primecell";
 			reg = <0x12680000 0x1000>;
 			interrupts = <0 35 0>;
+			#dma-cells = <1>;
+			#dma-channels = <8>;
+			#dma-requests = <32>;
 		};
 
 		pdma1: pdma@12690000 {
 			compatible = "arm,pl330", "arm,primecell";
 			reg = <0x12690000 0x1000>;
 			interrupts = <0 36 0>;
+			#dma-cells = <1>;
+			#dma-channels = <8>;
+			#dma-requests = <32>;
 		};
 
 		mdma1: mdma@12850000 {
 			compatible = "arm,pl330", "arm,primecell";
 			reg = <0x12850000 0x1000>;
 			interrupts = <0 34 0>;
+			#dma-cells = <1>;
+			#dma-channels = <8>;
+			#dma-requests = <1>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/exynos5440.dtsi b/arch/arm/boot/dts/exynos5440.dtsi
index 5f3562ad6746..9a99755920c0 100644
--- a/arch/arm/boot/dts/exynos5440.dtsi
+++ b/arch/arm/boot/dts/exynos5440.dtsi
@@ -142,12 +142,18 @@
 			compatible = "arm,pl330", "arm,primecell";
 			reg = <0x120000 0x1000>;
 			interrupts = <0 34 0>;
+			#dma-cells = <1>;
+			#dma-channels = <8>;
+			#dma-requests = <32>;
 		};
 
 		pdma1: pdma@121B0000 {
 			compatible = "arm,pl330", "arm,primecell";
 			reg = <0x121000 0x1000>;
 			interrupts = <0 35 0>;
+			#dma-cells = <1>;
+			#dma-channels = <8>;
+			#dma-requests = <32>;
 		};
 	};
 
-- 
cgit v1.2.2


From ad4e1a7caf937ad395ced585ca85a7d14395dc80 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Sun, 17 Feb 2013 19:42:48 +0800
Subject: gpio: fix wrong checking condition for gpio range

If index++ calculates from 0, the checking condition of "while
(index++)" fails & it doesn't check any more. It doesn't follow
the loop that used at here.

Replace it by endless loop at here. Then it keeps parsing
"gpio-ranges" property until it ends.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index a71a54a3e3f7..5150df6cba08 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -193,7 +193,7 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip)
 	if (!np)
 		return;
 
-	do {
+	for (;; index++) {
 		ret = of_parse_phandle_with_args(np, "gpio-ranges",
 				"#gpio-range-cells", index, &pinspec);
 		if (ret)
@@ -222,8 +222,7 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip)
 
 		if (ret)
 			break;
-
-	} while (index++);
+	}
 }
 
 #else
-- 
cgit v1.2.2


From 8360cb5f389ebd36b708978e0f776a285a2deb5a Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 28 Feb 2013 12:07:27 +0100
Subject: s390/scm_blk: fix request number accounting

If a block device driver cannot fetch all requests from the blocklayer
it's in his responsibility to call the request function at a later time.
Normally this would be done after the next irq for the underlying device
is handled. However in situations where we have no outstanding request
we have to schedule the request function for a later time.

This is determined using an internal counter of requests issued to the
hardware.

In some cases where we give a request back to the block layer unhandled
the number of queued requests was not adjusted. Fix this class of
failures by adjusting queued_requests in all functions used to give
a request back to the block layer.

Reviewed-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 9978ad4433cb..d9c7e940fa35 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -195,14 +195,18 @@ void scm_request_requeue(struct scm_request *scmrq)
 
 	scm_release_cluster(scmrq);
 	blk_requeue_request(bdev->rq, scmrq->request);
+	atomic_dec(&bdev->queued_reqs);
 	scm_request_done(scmrq);
 	scm_ensure_queue_restart(bdev);
 }
 
 void scm_request_finish(struct scm_request *scmrq)
 {
+	struct scm_blk_dev *bdev = scmrq->bdev;
+
 	scm_release_cluster(scmrq);
 	blk_end_request_all(scmrq->request, scmrq->error);
+	atomic_dec(&bdev->queued_reqs);
 	scm_request_done(scmrq);
 }
 
@@ -231,11 +235,13 @@ static void scm_blk_request(struct request_queue *rq)
 			return;
 		}
 		if (scm_need_cluster_request(scmrq)) {
+			atomic_inc(&bdev->queued_reqs);
 			blk_start_request(req);
 			scm_initiate_cluster_request(scmrq);
 			return;
 		}
 		scm_request_prepare(scmrq);
+		atomic_inc(&bdev->queued_reqs);
 		blk_start_request(req);
 
 		ret = scm_start_aob(scmrq->aob);
@@ -244,7 +250,6 @@ static void scm_blk_request(struct request_queue *rq)
 			scm_request_requeue(scmrq);
 			return;
 		}
-		atomic_inc(&bdev->queued_reqs);
 	}
 }
 
@@ -310,7 +315,6 @@ static void scm_blk_tasklet(struct scm_blk_dev *bdev)
 		}
 
 		scm_request_finish(scmrq);
-		atomic_dec(&bdev->queued_reqs);
 		spin_lock_irqsave(&bdev->lock, flags);
 	}
 	spin_unlock_irqrestore(&bdev->lock, flags);
-- 
cgit v1.2.2


From 93481c90200c50c7874b6a773acc87095ee3907d Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 28 Feb 2013 12:07:38 +0100
Subject: s390/scm_drv: extend notify callback

Extend the notify callback of scm_driver by an event parameter
to allow to distinguish between different notifications.

Reviewed-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/eadm.h |  4 +++-
 drivers/s390/block/scm_drv.c | 16 ++++++++++------
 drivers/s390/cio/scm.c       |  2 +-
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 8d4847191ecc..a4a1ea49003e 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -96,11 +96,13 @@ struct scm_device {
 #define OP_STATE_TEMP_ERR	2
 #define OP_STATE_PERM_ERR	3
 
+enum scm_event {SCM_CHANGE};
+
 struct scm_driver {
 	struct device_driver drv;
 	int (*probe) (struct scm_device *scmdev);
 	int (*remove) (struct scm_device *scmdev);
-	void (*notify) (struct scm_device *scmdev);
+	void (*notify) (struct scm_device *scmdev, enum scm_event event);
 	void (*handler) (struct scm_device *scmdev, void *data, int error);
 };
 
diff --git a/drivers/s390/block/scm_drv.c b/drivers/s390/block/scm_drv.c
index 9fa0a908607b..ff8558c4fe25 100644
--- a/drivers/s390/block/scm_drv.c
+++ b/drivers/s390/block/scm_drv.c
@@ -13,12 +13,16 @@
 #include <asm/eadm.h>
 #include "scm_blk.h"
 
-static void notify(struct scm_device *scmdev)
+static void scm_notify(struct scm_device *scmdev, enum scm_event event)
 {
-	pr_info("%lu: The capabilities of the SCM increment changed\n",
-		(unsigned long) scmdev->address);
-	SCM_LOG(2, "State changed");
-	SCM_LOG_STATE(2, scmdev);
+	switch (event) {
+	case SCM_CHANGE:
+		pr_info("%lu: The capabilities of the SCM increment changed\n",
+			(unsigned long) scmdev->address);
+		SCM_LOG(2, "State changed");
+		SCM_LOG_STATE(2, scmdev);
+		break;
+	}
 }
 
 static int scm_probe(struct scm_device *scmdev)
@@ -64,7 +68,7 @@ static struct scm_driver scm_drv = {
 		.name = "scm_block",
 		.owner = THIS_MODULE,
 	},
-	.notify = notify,
+	.notify = scm_notify,
 	.probe = scm_probe,
 	.remove = scm_remove,
 	.handler = scm_blk_irq,
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index bcf20f3aa51b..31ac26499979 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -211,7 +211,7 @@ static void scmdev_update(struct scm_device *scmdev, struct sale *sale)
 		goto out;
 	scmdrv = to_scm_drv(scmdev->dev.driver);
 	if (changed && scmdrv->notify)
-		scmdrv->notify(scmdev);
+		scmdrv->notify(scmdev, SCM_CHANGE);
 out:
 	device_unlock(&scmdev->dev);
 	if (changed)
-- 
cgit v1.2.2


From 4fa3c019640ef776e393345ed35d9ec5c51aa3c1 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 28 Feb 2013 12:07:48 +0100
Subject: s390/scm_blk: suspend writes

Stop writing to scm after certain error conditions such as a concurrent
firmware upgrade. Resume to normal state once scm_blk_set_available is
called (due to an scm availability notification).

Reviewed-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/eadm.h |  2 ++
 drivers/s390/block/scm_blk.c | 61 ++++++++++++++++++++++++++++++++++++++++----
 drivers/s390/block/scm_blk.h |  2 ++
 3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index a4a1ea49003e..78141be88b3d 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -34,6 +34,8 @@ struct arsb {
 	u32 reserved[4];
 } __packed;
 
+#define EQC_WR_PROHIBIT 22
+
 struct msb {
 	u8 fmt:4;
 	u8 oc:4;
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index d9c7e940fa35..5ac9c935c151 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -135,6 +135,11 @@ static const struct block_device_operations scm_blk_devops = {
 	.release = scm_release,
 };
 
+static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req)
+{
+	return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT;
+}
+
 static void scm_request_prepare(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
@@ -222,6 +227,10 @@ static void scm_blk_request(struct request_queue *rq)
 		if (req->cmd_type != REQ_TYPE_FS)
 			continue;
 
+		if (!scm_permit_request(bdev, req)) {
+			scm_ensure_queue_restart(bdev);
+			return;
+		}
 		scmrq = scm_request_fetch();
 		if (!scmrq) {
 			SCM_LOG(5, "no request");
@@ -285,6 +294,38 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, int error)
 	tasklet_hi_schedule(&bdev->tasklet);
 }
 
+static void scm_blk_handle_error(struct scm_request *scmrq)
+{
+	struct scm_blk_dev *bdev = scmrq->bdev;
+	unsigned long flags;
+
+	if (scmrq->error != -EIO)
+		goto restart;
+
+	/* For -EIO the response block is valid. */
+	switch (scmrq->aob->response.eqc) {
+	case EQC_WR_PROHIBIT:
+		spin_lock_irqsave(&bdev->lock, flags);
+		if (bdev->state != SCM_WR_PROHIBIT)
+			pr_info("%lu: Write access to the SCM increment is suspended\n",
+				(unsigned long) bdev->scmdev->address);
+		bdev->state = SCM_WR_PROHIBIT;
+		spin_unlock_irqrestore(&bdev->lock, flags);
+		goto requeue;
+	default:
+		break;
+	}
+
+restart:
+	if (!scm_start_aob(scmrq->aob))
+		return;
+
+requeue:
+	spin_lock_irqsave(&bdev->rq_lock, flags);
+	scm_request_requeue(scmrq);
+	spin_unlock_irqrestore(&bdev->rq_lock, flags);
+}
+
 static void scm_blk_tasklet(struct scm_blk_dev *bdev)
 {
 	struct scm_request *scmrq;
@@ -298,11 +339,8 @@ static void scm_blk_tasklet(struct scm_blk_dev *bdev)
 		spin_unlock_irqrestore(&bdev->lock, flags);
 
 		if (scmrq->error && scmrq->retries-- > 0) {
-			if (scm_start_aob(scmrq->aob)) {
-				spin_lock_irqsave(&bdev->rq_lock, flags);
-				scm_request_requeue(scmrq);
-				spin_unlock_irqrestore(&bdev->rq_lock, flags);
-			}
+			scm_blk_handle_error(scmrq);
+
 			/* Request restarted or requeued, handle next. */
 			spin_lock_irqsave(&bdev->lock, flags);
 			continue;
@@ -336,6 +374,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 	}
 
 	bdev->scmdev = scmdev;
+	bdev->state = SCM_OPER;
 	spin_lock_init(&bdev->rq_lock);
 	spin_lock_init(&bdev->lock);
 	INIT_LIST_HEAD(&bdev->finished_requests);
@@ -400,6 +439,18 @@ void scm_blk_dev_cleanup(struct scm_blk_dev *bdev)
 	put_disk(bdev->gendisk);
 }
 
+void scm_blk_set_available(struct scm_blk_dev *bdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&bdev->lock, flags);
+	if (bdev->state == SCM_WR_PROHIBIT)
+		pr_info("%lu: Write access to the SCM increment is restored\n",
+			(unsigned long) bdev->scmdev->address);
+	bdev->state = SCM_OPER;
+	spin_unlock_irqrestore(&bdev->lock, flags);
+}
+
 static int __init scm_blk_init(void)
 {
 	int ret = -EINVAL;
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 3c1ccf494647..8b387b32fd62 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -21,6 +21,7 @@ struct scm_blk_dev {
 	spinlock_t rq_lock;	/* guard the request queue */
 	spinlock_t lock;	/* guard the rest of the blockdev */
 	atomic_t queued_reqs;
+	enum {SCM_OPER, SCM_WR_PROHIBIT} state;
 	struct list_head finished_requests;
 #ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
 	struct list_head cluster_list;
@@ -48,6 +49,7 @@ struct scm_request {
 
 int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
 void scm_blk_dev_cleanup(struct scm_blk_dev *);
+void scm_blk_set_available(struct scm_blk_dev *);
 void scm_blk_irq(struct scm_device *, void *, int);
 
 void scm_request_finish(struct scm_request *);
-- 
cgit v1.2.2


From aebfa669d9fe77876f120d3d9a28fee240fe5a8e Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 28 Feb 2013 12:07:55 +0100
Subject: s390/scm: process availability

Let the bus code process scm availability information and
notify scm device drivers about the new state.

Reviewed-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/eadm.h |  2 +-
 drivers/s390/block/scm_drv.c |  7 +++++++
 drivers/s390/cio/chsc.c      | 17 +++++++++++++++++
 drivers/s390/cio/chsc.h      |  2 ++
 drivers/s390/cio/scm.c       | 16 ++++++++++++++++
 5 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 78141be88b3d..dc9200ca32ed 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -98,7 +98,7 @@ struct scm_device {
 #define OP_STATE_TEMP_ERR	2
 #define OP_STATE_PERM_ERR	3
 
-enum scm_event {SCM_CHANGE};
+enum scm_event {SCM_CHANGE, SCM_AVAIL};
 
 struct scm_driver {
 	struct device_driver drv;
diff --git a/drivers/s390/block/scm_drv.c b/drivers/s390/block/scm_drv.c
index ff8558c4fe25..5f6180d6ff08 100644
--- a/drivers/s390/block/scm_drv.c
+++ b/drivers/s390/block/scm_drv.c
@@ -15,6 +15,8 @@
 
 static void scm_notify(struct scm_device *scmdev, enum scm_event event)
 {
+	struct scm_blk_dev *bdev = dev_get_drvdata(&scmdev->dev);
+
 	switch (event) {
 	case SCM_CHANGE:
 		pr_info("%lu: The capabilities of the SCM increment changed\n",
@@ -22,6 +24,11 @@ static void scm_notify(struct scm_device *scmdev, enum scm_event event)
 		SCM_LOG(2, "State changed");
 		SCM_LOG_STATE(2, scmdev);
 		break;
+	case SCM_AVAIL:
+		SCM_LOG(2, "Increment available");
+		SCM_LOG_STATE(2, scmdev);
+		scm_blk_set_available(bdev);
+		break;
 	}
 }
 
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 31ceef1beb8b..e16c553f6556 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -433,6 +433,20 @@ static void chsc_process_sei_scm_change(struct chsc_sei_nt0_area *sei_area)
 			      " failed (rc=%d).\n", ret);
 }
 
+static void chsc_process_sei_scm_avail(struct chsc_sei_nt0_area *sei_area)
+{
+	int ret;
+
+	CIO_CRW_EVENT(4, "chsc: scm available information\n");
+	if (sei_area->rs != 7)
+		return;
+
+	ret = scm_process_availability_information();
+	if (ret)
+		CIO_CRW_EVENT(0, "chsc: process availability information"
+			      " failed (rc=%d).\n", ret);
+}
+
 static void chsc_process_sei_nt2(struct chsc_sei_nt2_area *sei_area)
 {
 	switch (sei_area->cc) {
@@ -468,6 +482,9 @@ static void chsc_process_sei_nt0(struct chsc_sei_nt0_area *sei_area)
 	case 12: /* scm change notification */
 		chsc_process_sei_scm_change(sei_area);
 		break;
+	case 14: /* scm available notification */
+		chsc_process_sei_scm_avail(sei_area);
+		break;
 	default: /* other stuff */
 		CIO_CRW_EVENT(2, "chsc: sei nt0 unhandled cc=%d\n",
 			      sei_area->cc);
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 227e05f674b3..349d5fc47196 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -156,8 +156,10 @@ int chsc_scm_info(struct chsc_scm_info *scm_area, u64 token);
 
 #ifdef CONFIG_SCM_BUS
 int scm_update_information(void);
+int scm_process_availability_information(void);
 #else /* CONFIG_SCM_BUS */
 static inline int scm_update_information(void) { return 0; }
+static inline int scm_process_availability_information(void) { return 0; }
 #endif /* CONFIG_SCM_BUS */
 
 
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 31ac26499979..46ec25632e8b 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -297,6 +297,22 @@ int scm_update_information(void)
 	return ret;
 }
 
+static int scm_dev_avail(struct device *dev, void *unused)
+{
+	struct scm_driver *scmdrv = to_scm_drv(dev->driver);
+	struct scm_device *scmdev = to_scm_dev(dev);
+
+	if (dev->driver && scmdrv->notify)
+		scmdrv->notify(scmdev, SCM_AVAIL);
+
+	return 0;
+}
+
+int scm_process_availability_information(void)
+{
+	return bus_for_each_dev(&scm_bus_type, NULL, NULL, scm_dev_avail);
+}
+
 static int __init scm_init(void)
 {
 	int ret;
-- 
cgit v1.2.2


From a7dc19b8652c862d5b7c4d2339bd3c428bd29c4a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 7 Mar 2013 15:09:24 +0000
Subject: clockevents: Don't allow dummy broadcast timers

Currently tick_check_broadcast_device doesn't reject clock_event_devices
with CLOCK_EVT_FEAT_DUMMY, and may select them in preference to real
hardware if they have a higher rating value. In this situation, the
dummy timer is responsible for broadcasting to itself, and the core
clockevents code may attempt to call non-existent callbacks for
programming the dummy, eventually leading to a panic.

This patch makes tick_check_broadcast_device always reject dummy timers,
preventing this problem.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Jon Medhurst (Tixy) <tixy@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-broadcast.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 2fb8cb88df8d..7f32fe0e52cd 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -67,7 +67,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
  */
 int tick_check_broadcast_device(struct clock_event_device *dev)
 {
-	if ((tick_broadcast_device.evtdev &&
+	if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
+	    (tick_broadcast_device.evtdev &&
 	     tick_broadcast_device.evtdev->rating >= dev->rating) ||
 	     (dev->features & CLOCK_EVT_FEAT_C3STOP))
 		return 0;
-- 
cgit v1.2.2


From 5ca1088f10d6179a610067ebedc56edc7d98b986 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Thu, 7 Mar 2013 09:02:38 +0100
Subject: Revert "mtd: bcm47xxpart: improve probing of nvram partition"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit be3781b71ac03723b552dc156931620634ef1b22.

Some CFE bootloaders have NVRAM at offset 0x1000. With that patch we
were detecting such a bootloaders as a standard NVRAM partition.
Changing anything in this pseudo-NVRAM resulted in corrupted bootloader
and bricked device!

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/bcm47xxpart.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index 63feb75cc8e0..4552afbad4d0 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c
@@ -19,6 +19,12 @@
 /* 10 parts were found on sflash on Netgear WNDR4500 */
 #define BCM47XXPART_MAX_PARTS		12
 
+/*
+ * Amount of bytes we read when analyzing each block of flash memory.
+ * Set it big enough to allow detecting partition and reading important data.
+ */
+#define BCM47XXPART_BYTES_TO_READ	0x404
+
 /* Magics */
 #define BOARD_DATA_MAGIC		0x5246504D	/* MPFR */
 #define POT_MAGIC1			0x54544f50	/* POTT */
@@ -57,17 +63,14 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 	struct trx_header *trx;
 	int trx_part = -1;
 	int last_trx_part = -1;
-	int max_bytes_to_read = 0x8004;
 
 	if (blocksize <= 0x10000)
 		blocksize = 0x10000;
-	if (blocksize == 0x20000)
-		max_bytes_to_read = 0x18004;
 
 	/* Alloc */
 	parts = kzalloc(sizeof(struct mtd_partition) * BCM47XXPART_MAX_PARTS,
 			GFP_KERNEL);
-	buf = kzalloc(max_bytes_to_read, GFP_KERNEL);
+	buf = kzalloc(BCM47XXPART_BYTES_TO_READ, GFP_KERNEL);
 
 	/* Parse block by block looking for magics */
 	for (offset = 0; offset <= master->size - blocksize;
@@ -82,7 +85,7 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 		}
 
 		/* Read beginning of the block */
-		if (mtd_read(master, offset, max_bytes_to_read,
+		if (mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ,
 			     &bytes_read, (uint8_t *)buf) < 0) {
 			pr_err("mtd_read error while parsing (offset: 0x%X)!\n",
 			       offset);
@@ -97,16 +100,9 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 		}
 
 		/* Standard NVRAM */
-		if (buf[0x000 / 4] == NVRAM_HEADER ||
-		    buf[0x1000 / 4] == NVRAM_HEADER ||
-		    buf[0x8000 / 4] == NVRAM_HEADER ||
-		    (blocksize == 0x20000 && (
-		      buf[0x10000 / 4] == NVRAM_HEADER ||
-		      buf[0x11000 / 4] == NVRAM_HEADER ||
-		      buf[0x18000 / 4] == NVRAM_HEADER))) {
+		if (buf[0x000 / 4] == NVRAM_HEADER) {
 			bcm47xxpart_add_part(&parts[curr_part++], "nvram",
 					     offset, 0);
-			offset = rounddown(offset, blocksize);
 			continue;
 		}
 
-- 
cgit v1.2.2


From 91d542f4dcc231749c36114ed8e26bb27d4521e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <zajec5@gmail.com>
Date: Thu, 7 Mar 2013 09:02:39 +0100
Subject: mtd: bcm47xxpart: look for NVRAM at the end of device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NVRAM is always placed at the end of device and it does not have to
start at the beginning of a block, so check few possible offsets.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/bcm47xxpart.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index 4552afbad4d0..9279a9174f84 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c
@@ -63,6 +63,7 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 	struct trx_header *trx;
 	int trx_part = -1;
 	int last_trx_part = -1;
+	int possible_nvram_sizes[] = { 0x8000, 0xF000, 0x10000, };
 
 	if (blocksize <= 0x10000)
 		blocksize = 0x10000;
@@ -99,13 +100,6 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 			continue;
 		}
 
-		/* Standard NVRAM */
-		if (buf[0x000 / 4] == NVRAM_HEADER) {
-			bcm47xxpart_add_part(&parts[curr_part++], "nvram",
-					     offset, 0);
-			continue;
-		}
-
 		/*
 		 * board_data starts with board_id which differs across boards,
 		 * but we can use 'MPFR' (hopefully) magic at 0x100
@@ -174,6 +168,30 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 			continue;
 		}
 	}
+
+	/* Look for NVRAM at the end of the last block. */
+	for (i = 0; i < ARRAY_SIZE(possible_nvram_sizes); i++) {
+		if (curr_part > BCM47XXPART_MAX_PARTS) {
+			pr_warn("Reached maximum number of partitions, scanning stopped!\n");
+			break;
+		}
+
+		offset = master->size - possible_nvram_sizes[i];
+		if (mtd_read(master, offset, 0x4, &bytes_read,
+			     (uint8_t *)buf) < 0) {
+			pr_err("mtd_read error while reading at offset 0x%X!\n",
+			       offset);
+			continue;
+		}
+
+		/* Standard NVRAM */
+		if (buf[0] == NVRAM_HEADER) {
+			bcm47xxpart_add_part(&parts[curr_part++], "nvram",
+					     master->size - blocksize, 0);
+			break;
+		}
+	}
+
 	kfree(buf);
 
 	/*
-- 
cgit v1.2.2


From b141e811a0763bb107af4cd99d456193ccdb8053 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 21 Feb 2013 11:04:45 +0100
Subject: NFC: llcp: Decrease socket ack log when accepting a connection

This is really difficult to test with real NFC devices, but without
this fix an LLCP server will eventually refuse new connections.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/llcp/sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index 5332751943a9..5c7cdf3f2a83 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -278,6 +278,8 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
 
 			pr_debug("Returning sk state %d\n", sk->sk_state);
 
+			sk_acceptq_removed(parent);
+
 			return sk;
 		}
 
-- 
cgit v1.2.2


From 3536da06db0baa675f32de608c0a4c0f5ef0e9ff Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 21 Feb 2013 15:40:04 +0100
Subject: NFC: llcp: Clean local timers and works when removing a device

Whenever an adapter is removed we must clean all the local structures,
especially the timers and scheduled work. Otherwise those asynchronous
threads will eventually try to access the freed nfc_dev pointer if an LLCP
link is up.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/llcp/llcp.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 7f8266dd14cb..77e1d97b3996 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -142,20 +142,25 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
 	return local;
 }
 
-static void local_release(struct kref *ref)
+static void local_cleanup(struct nfc_llcp_local *local, bool listen)
 {
-	struct nfc_llcp_local *local;
-
-	local = container_of(ref, struct nfc_llcp_local, ref);
-
-	list_del(&local->list);
-	nfc_llcp_socket_release(local, false);
+	nfc_llcp_socket_release(local, listen);
 	del_timer_sync(&local->link_timer);
 	skb_queue_purge(&local->tx_queue);
 	cancel_work_sync(&local->tx_work);
 	cancel_work_sync(&local->rx_work);
 	cancel_work_sync(&local->timeout_work);
 	kfree_skb(local->rx_pending);
+}
+
+static void local_release(struct kref *ref)
+{
+	struct nfc_llcp_local *local;
+
+	local = container_of(ref, struct nfc_llcp_local, ref);
+
+	list_del(&local->list);
+	local_cleanup(local, false);
 	kfree(local);
 }
 
@@ -1427,6 +1432,8 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev)
 		return;
 	}
 
+	local_cleanup(local, false);
+
 	nfc_llcp_local_put(local);
 }
 
-- 
cgit v1.2.2


From 44e9ac45754a182e8121bf137368452365d4cc4b Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Wed, 13 Feb 2013 09:45:50 +0000
Subject: ARM: shmobile: marzen: Include mmc/host.h

mmc/host.h provides MMC_CAP_SD_HIGHSPEED which is used in board-marzen.c

This resolves a build problem observed when compiling with
"mmc: tmio: remove unused and deprecated symbols" applied.

Acked-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/arm/mach-shmobile/board-marzen.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index cdcb799e802f..fec49ebc359a 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -32,6 +32,7 @@
 #include <linux/smsc911x.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/sh_hspi.h>
+#include <linux/mmc/host.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <linux/usb/otg.h>
-- 
cgit v1.2.2


From e6a3a4bb856a6fba551b43376c80f45836132710 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 21 Feb 2013 16:33:30 +0100
Subject: NFC: llcp: Clean raw sockets from nfc_llcp_socket_release

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/llcp/llcp.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 77e1d97b3996..8a35423ead54 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -133,6 +133,35 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 	}
 
 	write_unlock(&local->sockets.lock);
+
+	/*
+	 * If we want to keep the listening sockets alive,
+	 * we don't touch the RAW ones.
+	 */
+	if (listen == true)
+		return;
+
+	write_lock(&local->raw_sockets.lock);
+
+	sk_for_each_safe(sk, tmp, &local->raw_sockets.head) {
+		llcp_sock = nfc_llcp_sock(sk);
+
+		bh_lock_sock(sk);
+
+		nfc_llcp_socket_purge(llcp_sock);
+
+		sk->sk_state = LLCP_CLOSED;
+
+		sk->sk_state_change(sk);
+
+		bh_unlock_sock(sk);
+
+		sock_orphan(sk);
+
+		sk_del_node_init(sk);
+	}
+
+	write_unlock(&local->raw_sockets.lock);
 }
 
 struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
-- 
cgit v1.2.2


From 3bbc0ceb7ac2bf694d31362eea2c71a680e5deeb Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 21 Feb 2013 17:01:06 +0100
Subject: NFC: llcp: Report error to pending sockets when a device is removed

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 net/nfc/llcp/llcp.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 8a35423ead54..b530afadd76c 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -68,7 +68,8 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
 	}
 }
 
-static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
+static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
+				    int err)
 {
 	struct sock *sk;
 	struct hlist_node *tmp;
@@ -100,7 +101,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 
 				nfc_llcp_accept_unlink(accept_sk);
 
+				if (err)
+					accept_sk->sk_err = err;
 				accept_sk->sk_state = LLCP_CLOSED;
+				accept_sk->sk_state_change(sk);
 
 				bh_unlock_sock(accept_sk);
 
@@ -123,7 +127,10 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 			continue;
 		}
 
+		if (err)
+			sk->sk_err = err;
 		sk->sk_state = LLCP_CLOSED;
+		sk->sk_state_change(sk);
 
 		bh_unlock_sock(sk);
 
@@ -150,8 +157,9 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 
 		nfc_llcp_socket_purge(llcp_sock);
 
+		if (err)
+			sk->sk_err = err;
 		sk->sk_state = LLCP_CLOSED;
-
 		sk->sk_state_change(sk);
 
 		bh_unlock_sock(sk);
@@ -173,7 +181,7 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
 
 static void local_cleanup(struct nfc_llcp_local *local, bool listen)
 {
-	nfc_llcp_socket_release(local, listen);
+	nfc_llcp_socket_release(local, listen, ENXIO);
 	del_timer_sync(&local->link_timer);
 	skb_queue_purge(&local->tx_queue);
 	cancel_work_sync(&local->tx_work);
@@ -1382,7 +1390,7 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev)
 		return;
 
 	/* Close and purge all existing sockets */
-	nfc_llcp_socket_release(local, true);
+	nfc_llcp_socket_release(local, true, 0);
 }
 
 void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
-- 
cgit v1.2.2


From 5f0fabf84d7b52f979dcbafa3d3c530c60d9a92c Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Thu, 7 Mar 2013 20:00:16 -0800
Subject: mwifiex: fix potential out-of-boundary access to ibss rate table

smatch found this error:

CHECK   drivers/net/wireless/mwifiex/join.c
  drivers/net/wireless/mwifiex/join.c:1121
  mwifiex_cmd_802_11_ad_hoc_join()
  error: testing array offset 'i' after use.

Cc: <stable@vger.kernel.org> # 3.0+
Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mwifiex/join.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mwifiex/join.c b/drivers/net/wireless/mwifiex/join.c
index 246aa62a4817..2fe0ceba4400 100644
--- a/drivers/net/wireless/mwifiex/join.c
+++ b/drivers/net/wireless/mwifiex/join.c
@@ -1117,10 +1117,9 @@ mwifiex_cmd_802_11_ad_hoc_join(struct mwifiex_private *priv,
 		adhoc_join->bss_descriptor.bssid,
 		adhoc_join->bss_descriptor.ssid);
 
-	for (i = 0; bss_desc->supported_rates[i] &&
-			i < MWIFIEX_SUPPORTED_RATES;
-			i++)
-			;
+	for (i = 0; i < MWIFIEX_SUPPORTED_RATES &&
+		    bss_desc->supported_rates[i]; i++)
+		;
 	rates_size = i;
 
 	/* Copy Data Rates from the Rates recorded in scan response */
-- 
cgit v1.2.2


From 664899786cb49cb52f620e06ac19c0be524a7cfa Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Wed, 27 Feb 2013 14:10:30 -0600
Subject: rtlwifi: rtl8192cu: Fix schedule while atomic bug splat

When run at debug 3 or higher, rtl8192cu reports a BUG as follows:

BUG: scheduling while atomic: kworker/u:0/5281/0x00000002
INFO: lockdep is turned off.
Modules linked in: rtl8192cu rtl8192c_common rtlwifi fuse af_packet bnep bluetooth b43 mac80211 cfg80211 ipv6 snd_hda_codec_conexant kvm_amd k
vm snd_hda_intel snd_hda_codec bcma rng_core snd_pcm ssb mmc_core snd_seq snd_timer snd_seq_device snd i2c_nforce2 sr_mod pcmcia forcedeth i2c_core soundcore
 cdrom sg serio_raw k8temp hwmon joydev ac battery pcmcia_core snd_page_alloc video button wmi autofs4 ext4 mbcache jbd2 crc16 thermal processor scsi_dh_alua
 scsi_dh_hp_sw scsi_dh_rdac scsi_dh_emc scsi_dh ata_generic pata_acpi pata_amd [last unloaded: rtlwifi]
Pid: 5281, comm: kworker/u:0 Tainted: G        W    3.8.0-wl+ #119
Call Trace:
 [<ffffffff814531e7>] __schedule_bug+0x62/0x70
 [<ffffffff81459af0>] __schedule+0x730/0xa30
 [<ffffffff81326e49>] ? usb_hcd_link_urb_to_ep+0x19/0xa0
 [<ffffffff8145a0d4>] schedule+0x24/0x70
 [<ffffffff814575ec>] schedule_timeout+0x18c/0x2f0
 [<ffffffff81459ec0>] ? wait_for_common+0x40/0x180
 [<ffffffff8133f461>] ? ehci_urb_enqueue+0xf1/0xee0
 [<ffffffff810a579d>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff81459f65>] wait_for_common+0xe5/0x180
 [<ffffffff8107d1c0>] ? try_to_wake_up+0x2d0/0x2d0
 [<ffffffff8145a08e>] wait_for_completion_timeout+0xe/0x10
 [<ffffffff8132ab1c>] usb_start_wait_urb+0x8c/0x100
 [<ffffffff8132adf9>] usb_control_msg+0xd9/0x130
 [<ffffffffa057dd8d>] _usb_read_sync+0xcd/0x140 [rtlwifi]
 [<ffffffffa057de0e>] _usb_read32_sync+0xe/0x10 [rtlwifi]
 [<ffffffffa04b0555>] rtl92cu_update_hal_rate_table+0x1a5/0x1f0 [rtl8192cu]

The cause is a synchronous read from routine rtl92cu_update_hal_rate_table().
The resulting output is not critical, thus the debug statement is
deleted.

Reported-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rtlwifi/rtl8192cu/hw.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
index b1ccff474c79..3c6e18c38e30 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
@@ -2058,8 +2058,6 @@ void rtl92cu_update_hal_rate_table(struct ieee80211_hw *hw,
 			       (shortgi_rate << 4) | (shortgi_rate);
 	}
 	rtl_write_dword(rtlpriv, REG_ARFR0 + ratr_index * 4, ratr_value);
-	RT_TRACE(rtlpriv, COMP_RATR, DBG_DMESG, "%x\n",
-		 rtl_read_dword(rtlpriv, REG_ARFR0));
 }
 
 void rtl92cu_update_hal_rate_mask(struct ieee80211_hw *hw, u8 rssi_level)
-- 
cgit v1.2.2


From eb2834285cf172856cd12f66892fc7467935ebed Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 8 Mar 2013 15:18:28 -0800
Subject: workqueue: fix possible pool stall bug in wq_unbind_fn()

Since multiple pools per cpu have been introduced, wq_unbind_fn() has
a subtle bug which may theoretically stall work item processing.  The
problem is two-fold.

* wq_unbind_fn() depends on the worker executing wq_unbind_fn() itself
  to start unbound chain execution, which works fine when there was
  only single pool.  With multiple pools, only the pool which is
  running wq_unbind_fn() - the highpri one - is guaranteed to have
  such kick-off.  The other pool could stall when its busy workers
  block.

* The current code is setting WORKER_UNBIND / POOL_DISASSOCIATED of
  the two pools in succession without initiating work execution
  inbetween.  Because setting the flags requires grabbing assoc_mutex
  which is held while new workers are created, this could lead to
  stalls if a pool's manager is waiting for the previous pool's work
  items to release memory.  This is almost purely theoretical tho.

Update wq_unbind_fn() such that it sets WORKER_UNBIND /
POOL_DISASSOCIATED, goes over schedule() and explicitly kicks off
execution for a pool and then moves on to the next one.

tj: Updated comments and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org
---
 kernel/workqueue.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 81f2457811eb..604801b91cba 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3446,28 +3446,34 @@ static void wq_unbind_fn(struct work_struct *work)
 
 		spin_unlock_irq(&pool->lock);
 		mutex_unlock(&pool->assoc_mutex);
-	}
 
-	/*
-	 * Call schedule() so that we cross rq->lock and thus can guarantee
-	 * sched callbacks see the %WORKER_UNBOUND flag.  This is necessary
-	 * as scheduler callbacks may be invoked from other cpus.
-	 */
-	schedule();
+		/*
+		 * Call schedule() so that we cross rq->lock and thus can
+		 * guarantee sched callbacks see the %WORKER_UNBOUND flag.
+		 * This is necessary as scheduler callbacks may be invoked
+		 * from other cpus.
+		 */
+		schedule();
 
-	/*
-	 * Sched callbacks are disabled now.  Zap nr_running.  After this,
-	 * nr_running stays zero and need_more_worker() and keep_working()
-	 * are always true as long as the worklist is not empty.  Pools on
-	 * @cpu now behave as unbound (in terms of concurrency management)
-	 * pools which are served by workers tied to the CPU.
-	 *
-	 * On return from this function, the current worker would trigger
-	 * unbound chain execution of pending work items if other workers
-	 * didn't already.
-	 */
-	for_each_std_worker_pool(pool, cpu)
+		/*
+		 * Sched callbacks are disabled now.  Zap nr_running.
+		 * After this, nr_running stays zero and need_more_worker()
+		 * and keep_working() are always true as long as the
+		 * worklist is not empty.  This pool now behaves as an
+		 * unbound (in terms of concurrency management) pool which
+		 * are served by workers tied to the pool.
+		 */
 		atomic_set(&pool->nr_running, 0);
+
+		/*
+		 * With concurrency management just turned off, a busy
+		 * worker blocking could lead to lengthy stalls.  Kick off
+		 * unbound chain execution of currently pending work items.
+		 */
+		spin_lock_irq(&pool->lock);
+		wake_up_worker(pool);
+		spin_unlock_irq(&pool->lock);
+	}
 }
 
 /*
-- 
cgit v1.2.2


From 6659a20a76e0213ad84c33ad35024278811ed010 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 7 Mar 2013 13:50:16 +0530
Subject: ARC: MAINTAINERS update for ARC

* Remove the non-functional mailing list placeholder
* Add entries for arc_uart/Documentation

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e95b1e944eb7..8e2ced44bfe4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7696,9 +7696,10 @@ F:	include/linux/swiotlb.h
 
 SYNOPSYS ARC ARCHITECTURE
 M:	Vineet Gupta <vgupta@synopsys.com>
-L:	linux-snps-arc@vger.kernel.org
 S:	Supported
 F:	arch/arc/
+F:	Documentation/devicetree/bindings/arc/
+F:	drivers/tty/serial/arc-uart.c
 
 SYSV FILESYSTEM
 M:	Christoph Hellwig <hch@infradead.org>
-- 
cgit v1.2.2


From e2f1a3bd8cdc046ece133a9e9ee6bd94727225b1 Mon Sep 17 00:00:00 2001
From: Nikola Pajkovsky <npajkovs@redhat.com>
Date: Tue, 26 Feb 2013 16:12:05 +0100
Subject: amd_iommu_init: remove __init from amd_iommu_erratum_746_workaround

commit 318fe78 ("IOMMU, AMD Family15h Model10-1Fh erratum 746 Workaround")
added amd_iommu_erratum_746_workaround and it's marked as __init, which is wrong

WARNING: drivers/iommu/built-in.o(.text+0x639c): Section mismatch in reference from the function iommu_init_pci() to the function .init.text:amd_iommu_erratum_746_workaround()
The function iommu_init_pci() references
the function __init amd_iommu_erratum_746_workaround().
This is often because iommu_init_pci lacks a __init
annotation or the annotation of amd_iommu_erratum_746_workaround is wrong.

Signed-off-by: Nikola Pajkovsky <npajkovs@redhat.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index b6ecddb63cd0..e3c2d74b7684 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -980,7 +980,7 @@ static void __init free_iommu_all(void)
  *     BIOS should disable L2B micellaneous clock gating by setting
  *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
  */
-static void __init amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
+static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
 {
 	u32 value;
 
-- 
cgit v1.2.2


From ae1915892b4774655a5dc01039ce94efdff4b3fc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 Mar 2013 23:16:48 +0100
Subject: iommu: OMAP: build only on OMAP2+

The OMAP IOMMU driver intentionally fails to build on OMAP1
platforms, so we should not allow enabling it there.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: iommu@lists.linux-foundation.org
Cc: Ohad Ben-Cohen <ohad@wizery.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Omar Ramirez Luna <omar.luna@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 5c514d0711d1..c332fb98480d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -130,7 +130,7 @@ config IRQ_REMAP
 # OMAP IOMMU support
 config OMAP_IOMMU
 	bool "OMAP IOMMU Support"
-	depends on ARCH_OMAP
+	depends on ARCH_OMAP2PLUS
 	select IOMMU_API
 
 config OMAP_IOVMM
-- 
cgit v1.2.2


From bd384364c1185ecb01f3b8242c915ccb5921c60d Mon Sep 17 00:00:00 2001
From: Zheng Liu <wenqing.lz@taobao.com>
Date: Sun, 10 Mar 2013 20:48:59 -0400
Subject: ext4: avoid a potential overflow in ext4_es_can_be_merged()

Check the length of an extent to avoid a potential overflow in
ext4_es_can_be_merged().

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/extents_status.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 95796a1b7522..37f9a2d8fd04 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 static int ext4_es_can_be_merged(struct extent_status *es1,
 				 struct extent_status *es2)
 {
-	if (es1->es_lblk + es1->es_len != es2->es_lblk)
+	if (ext4_es_status(es1) != ext4_es_status(es2))
 		return 0;
 
-	if (ext4_es_status(es1) != ext4_es_status(es2))
+	if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
 		return 0;
 
-	if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
-	    (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
+	if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
 		return 0;
 
-	return 1;
+	if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
+	    (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
+		return 1;
+
+	if (ext4_es_is_hole(es1))
+		return 1;
+
+	/* we need to check delayed extent is without unwritten status */
+	if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
+		return 1;
+
+	return 0;
 }
 
 static struct extent_status *
-- 
cgit v1.2.2


From 921f266bc6bfe6ebb599c559f10443af314c19ec Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Sun, 10 Mar 2013 21:01:03 -0400
Subject: ext4: add self-testing infrastructure to do a sanity check

This commit adds a self-testing infrastructure like extent tree does to
do a sanity check for extent status tree.  After status tree is as a
extent cache, we'd better to make sure that it caches right result.

After applied this commit, we will get a lot of messages when we run
xfstests as below.

...
kernel: ES len assertation failed for inode: 230 retval 1 != map->m_len
3 in ext4_map_blocks (allocation)
...
kernel: ES cache assertation failed for inode: 230 es_cached ex
[974/2/4781/20] != found ex [974/1/4781/1000]
...
kernel: ES insert assertation failed for inode: 635 ex_status
[0/45/21388/w] != es_status [44/1/21432/u]
...

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents_status.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/extents_status.h |   6 ++
 fs/ext4/inode.c          |  96 ++++++++++++++++++++++++++
 3 files changed, 277 insertions(+)

diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 37f9a2d8fd04..d2a8cb74676b 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -399,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
 	return es;
 }
 
+#ifdef ES_AGGRESSIVE_TEST
+static void ext4_es_insert_extent_ext_check(struct inode *inode,
+					    struct extent_status *es)
+{
+	struct ext4_ext_path *path = NULL;
+	struct ext4_extent *ex;
+	ext4_lblk_t ee_block;
+	ext4_fsblk_t ee_start;
+	unsigned short ee_len;
+	int depth, ee_status, es_status;
+
+	path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
+	if (IS_ERR(path))
+		return;
+
+	depth = ext_depth(inode);
+	ex = path[depth].p_ext;
+
+	if (ex) {
+
+		ee_block = le32_to_cpu(ex->ee_block);
+		ee_start = ext4_ext_pblock(ex);
+		ee_len = ext4_ext_get_actual_len(ex);
+
+		ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
+		es_status = ext4_es_is_unwritten(es) ? 1 : 0;
+
+		/*
+		 * Make sure ex and es are not overlap when we try to insert
+		 * a delayed/hole extent.
+		 */
+		if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
+			if (in_range(es->es_lblk, ee_block, ee_len)) {
+				pr_warn("ES insert assertation failed for "
+					"inode: %lu we can find an extent "
+					"at block [%d/%d/%llu/%c], but we "
+					"want to add an delayed/hole extent "
+					"[%d/%d/%llu/%llx]\n",
+					inode->i_ino, ee_block, ee_len,
+					ee_start, ee_status ? 'u' : 'w',
+					es->es_lblk, es->es_len,
+					ext4_es_pblock(es), ext4_es_status(es));
+			}
+			goto out;
+		}
+
+		/*
+		 * We don't check ee_block == es->es_lblk, etc. because es
+		 * might be a part of whole extent, vice versa.
+		 */
+		if (es->es_lblk < ee_block ||
+		    ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
+			pr_warn("ES insert assertation failed for inode: %lu "
+				"ex_status [%d/%d/%llu/%c] != "
+				"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
+				ee_block, ee_len, ee_start,
+				ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
+				ext4_es_pblock(es), es_status ? 'u' : 'w');
+			goto out;
+		}
+
+		if (ee_status ^ es_status) {
+			pr_warn("ES insert assertation failed for inode: %lu "
+				"ex_status [%d/%d/%llu/%c] != "
+				"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
+				ee_block, ee_len, ee_start,
+				ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
+				ext4_es_pblock(es), es_status ? 'u' : 'w');
+		}
+	} else {
+		/*
+		 * We can't find an extent on disk.  So we need to make sure
+		 * that we don't want to add an written/unwritten extent.
+		 */
+		if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
+			pr_warn("ES insert assertation failed for inode: %lu "
+				"can't find an extent at block %d but we want "
+				"to add an written/unwritten extent "
+				"[%d/%d/%llu/%llx]\n", inode->i_ino,
+				es->es_lblk, es->es_lblk, es->es_len,
+				ext4_es_pblock(es), ext4_es_status(es));
+		}
+	}
+out:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+}
+
+static void ext4_es_insert_extent_ind_check(struct inode *inode,
+					    struct extent_status *es)
+{
+	struct ext4_map_blocks map;
+	int retval;
+
+	/*
+	 * Here we call ext4_ind_map_blocks to lookup a block mapping because
+	 * 'Indirect' structure is defined in indirect.c.  So we couldn't
+	 * access direct/indirect tree from outside.  It is too dirty to define
+	 * this function in indirect.c file.
+	 */
+
+	map.m_lblk = es->es_lblk;
+	map.m_len = es->es_len;
+
+	retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
+	if (retval > 0) {
+		if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
+			/*
+			 * We want to add a delayed/hole extent but this
+			 * block has been allocated.
+			 */
+			pr_warn("ES insert assertation failed for inode: %lu "
+				"We can find blocks but we want to add a "
+				"delayed/hole extent [%d/%d/%llu/%llx]\n",
+				inode->i_ino, es->es_lblk, es->es_len,
+				ext4_es_pblock(es), ext4_es_status(es));
+			return;
+		} else if (ext4_es_is_written(es)) {
+			if (retval != es->es_len) {
+				pr_warn("ES insert assertation failed for "
+					"inode: %lu retval %d != es_len %d\n",
+					inode->i_ino, retval, es->es_len);
+				return;
+			}
+			if (map.m_pblk != ext4_es_pblock(es)) {
+				pr_warn("ES insert assertation failed for "
+					"inode: %lu m_pblk %llu != "
+					"es_pblk %llu\n",
+					inode->i_ino, map.m_pblk,
+					ext4_es_pblock(es));
+				return;
+			}
+		} else {
+			/*
+			 * We don't need to check unwritten extent because
+			 * indirect-based file doesn't have it.
+			 */
+			BUG_ON(1);
+		}
+	} else if (retval == 0) {
+		if (ext4_es_is_written(es)) {
+			pr_warn("ES insert assertation failed for inode: %lu "
+				"We can't find the block but we want to add "
+				"an written extent [%d/%d/%llu/%llx]\n",
+				inode->i_ino, es->es_lblk, es->es_len,
+				ext4_es_pblock(es), ext4_es_status(es));
+			return;
+		}
+	}
+}
+
+static inline void ext4_es_insert_extent_check(struct inode *inode,
+					       struct extent_status *es)
+{
+	/*
+	 * We don't need to worry about the race condition because
+	 * caller takes i_data_sem locking.
+	 */
+	BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+		ext4_es_insert_extent_ext_check(inode, es);
+	else
+		ext4_es_insert_extent_ind_check(inode, es);
+}
+#else
+static inline void ext4_es_insert_extent_check(struct inode *inode,
+					       struct extent_status *es)
+{
+}
+#endif
+
 static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
 {
 	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
@@ -481,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
 	ext4_es_store_status(&newes, status);
 	trace_ext4_es_insert_extent(inode, &newes);
 
+	ext4_es_insert_extent_check(inode, &newes);
+
 	write_lock(&EXT4_I(inode)->i_es_lock);
 	err = __es_remove_extent(inode, lblk, end);
 	if (err != 0)
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f190dfe969da..56140ad4150b 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -20,6 +20,12 @@
 #define es_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
+/*
+ * With ES_AGGRESSIVE_TEST defined, the result of es caching will be
+ * checked with old map_block's result.
+ */
+#define ES_AGGRESSIVE_TEST__
+
 /*
  * These flags live in the high bits of extent_status.es_pblk
  */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 95a0c62c5683..3186a43fa4b0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -482,6 +482,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
 	return num;
 }
 
+#ifdef ES_AGGRESSIVE_TEST
+static void ext4_map_blocks_es_recheck(handle_t *handle,
+				       struct inode *inode,
+				       struct ext4_map_blocks *es_map,
+				       struct ext4_map_blocks *map,
+				       int flags)
+{
+	int retval;
+
+	map->m_flags = 0;
+	/*
+	 * There is a race window that the result is not the same.
+	 * e.g. xfstests #223 when dioread_nolock enables.  The reason
+	 * is that we lookup a block mapping in extent status tree with
+	 * out taking i_data_sem.  So at the time the unwritten extent
+	 * could be converted.
+	 */
+	if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+		down_read((&EXT4_I(inode)->i_data_sem));
+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+		retval = ext4_ext_map_blocks(handle, inode, map, flags &
+					     EXT4_GET_BLOCKS_KEEP_SIZE);
+	} else {
+		retval = ext4_ind_map_blocks(handle, inode, map, flags &
+					     EXT4_GET_BLOCKS_KEEP_SIZE);
+	}
+	if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+		up_read((&EXT4_I(inode)->i_data_sem));
+	/*
+	 * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
+	 * because it shouldn't be marked in es_map->m_flags.
+	 */
+	map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
+
+	/*
+	 * We don't check m_len because extent will be collpased in status
+	 * tree.  So the m_len might not equal.
+	 */
+	if (es_map->m_lblk != map->m_lblk ||
+	    es_map->m_flags != map->m_flags ||
+	    es_map->m_pblk != map->m_pblk) {
+		printk("ES cache assertation failed for inode: %lu "
+		       "es_cached ex [%d/%d/%llu/%x] != "
+		       "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
+		       inode->i_ino, es_map->m_lblk, es_map->m_len,
+		       es_map->m_pblk, es_map->m_flags, map->m_lblk,
+		       map->m_len, map->m_pblk, map->m_flags,
+		       retval, flags);
+	}
+}
+#endif /* ES_AGGRESSIVE_TEST */
+
 /*
  * The ext4_map_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
@@ -509,6 +561,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 {
 	struct extent_status es;
 	int retval;
+#ifdef ES_AGGRESSIVE_TEST
+	struct ext4_map_blocks orig_map;
+
+	memcpy(&orig_map, map, sizeof(*map));
+#endif
 
 	map->m_flags = 0;
 	ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
@@ -531,6 +588,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		} else {
 			BUG_ON(1);
 		}
+#ifdef ES_AGGRESSIVE_TEST
+		ext4_map_blocks_es_recheck(handle, inode, map,
+					   &orig_map, flags);
+#endif
 		goto found;
 	}
 
@@ -551,6 +612,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 		int ret;
 		unsigned long long status;
 
+#ifdef ES_AGGRESSIVE_TEST
+		if (retval != map->m_len) {
+			printk("ES len assertation failed for inode: %lu "
+			       "retval %d != map->m_len %d "
+			       "in %s (lookup)\n", inode->i_ino, retval,
+			       map->m_len, __func__);
+		}
+#endif
+
 		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
 				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
 		if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -643,6 +713,15 @@ found:
 		int ret;
 		unsigned long long status;
 
+#ifdef ES_AGGRESSIVE_TEST
+		if (retval != map->m_len) {
+			printk("ES len assertation failed for inode: %lu "
+			       "retval %d != map->m_len %d "
+			       "in %s (allocation)\n", inode->i_ino, retval,
+			       map->m_len, __func__);
+		}
+#endif
+
 		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
 				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
 		if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -1768,6 +1847,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 	struct extent_status es;
 	int retval;
 	sector_t invalid_block = ~((sector_t) 0xffff);
+#ifdef ES_AGGRESSIVE_TEST
+	struct ext4_map_blocks orig_map;
+
+	memcpy(&orig_map, map, sizeof(*map));
+#endif
 
 	if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
 		invalid_block = ~0;
@@ -1809,6 +1893,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
 		else
 			BUG_ON(1);
 
+#ifdef ES_AGGRESSIVE_TEST
+		ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
+#endif
 		return retval;
 	}
 
@@ -1873,6 +1960,15 @@ add_delayed:
 		int ret;
 		unsigned long long status;
 
+#ifdef ES_AGGRESSIVE_TEST
+		if (retval != map->m_len) {
+			printk("ES len assertation failed for inode: %lu "
+			       "retval %d != map->m_len %d "
+			       "in %s (lookup)\n", inode->i_ino, retval,
+			       map->m_len, __func__);
+		}
+#endif
+
 		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
 				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
 		ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
-- 
cgit v1.2.2


From cdee78433c138c2f2018a6884673739af2634787 Mon Sep 17 00:00:00 2001
From: Zheng Liu <wenqing.lz@taobao.com>
Date: Sun, 10 Mar 2013 21:08:52 -0400
Subject: ext4: fix wrong m_len value after unwritten extent conversion

The ext4_ext_handle_uninitialized_extents() function was assuming the
return value of ext4_ext_map_blocks() is equal to map->m_len.  This
incorrect assumption was harmless until we started use status tree as
a extent cache because we need to update status tree according to
'm_len' value.

Meanwhile this commit marks EXT4_MAP_MAPPED flag after unwritten extent
conversion.  It shouldn't cause a bug because we update status tree
according to checking EXT4_MAP_UNWRITTEN flag.  But it should be fixed.

After applied this commit, the following error message from self-testing
infrastructure disappears.

    ...
    kernel: ES len assertation failed for inode: 230 retval 1 !=
    map->m_len 3 in ext4_map_blocks (allocation)
    ...

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/extents.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 25c86aaa38d6..110e85a1f82a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3650,6 +3650,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 						 path, map->m_len);
 		} else
 			err = ret;
+		map->m_flags |= EXT4_MAP_MAPPED;
+		if (allocated > map->m_len)
+			allocated = map->m_len;
+		map->m_len = allocated;
 		goto out2;
 	}
 	/* buffered IO case */
-- 
cgit v1.2.2


From adb2355104b2109e06ba5276485d187d023b2fd2 Mon Sep 17 00:00:00 2001
From: Zheng Liu <wenqing.lz@taobao.com>
Date: Sun, 10 Mar 2013 21:13:05 -0400
Subject: ext4: update extent status tree after an extent is zeroed out

When we try to split an extent, this extent could be zeroed out and mark
as initialized.  But we don't know this in ext4_map_blocks because it
only returns a length of allocated extent.  Meanwhile we will mark this
extent as uninitialized because we only check m_flags.

This commit update extent status tree when we try to split an unwritten
extent.  We don't need to worry about the status of this extent because
we always mark it as initialized.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/extents.c        | 35 +++++++++++++++++++++++++++++++----
 fs/ext4/extents_status.c | 17 +++++++++++++++++
 fs/ext4/extents_status.h |  3 +++
 fs/ext4/inode.c          | 10 ++++++++++
 4 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 110e85a1f82a..7e37018d1753 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2925,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle,
 {
 	ext4_fsblk_t newblock;
 	ext4_lblk_t ee_block;
-	struct ext4_extent *ex, newex, orig_ex;
+	struct ext4_extent *ex, newex, orig_ex, zero_ex;
 	struct ext4_extent *ex2 = NULL;
 	unsigned int ee_len, depth;
 	int err = 0;
@@ -2996,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle,
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
 	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
 		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
-			if (split_flag & EXT4_EXT_DATA_VALID1)
+			if (split_flag & EXT4_EXT_DATA_VALID1) {
 				err = ext4_ext_zeroout(inode, ex2);
-			else
+				zero_ex.ee_block = ex2->ee_block;
+				zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
+				ext4_ext_store_pblock(&zero_ex,
+						      ext4_ext_pblock(ex2));
+			} else {
 				err = ext4_ext_zeroout(inode, ex);
-		} else
+				zero_ex.ee_block = ex->ee_block;
+				zero_ex.ee_len = ext4_ext_get_actual_len(ex);
+				ext4_ext_store_pblock(&zero_ex,
+						      ext4_ext_pblock(ex));
+			}
+		} else {
 			err = ext4_ext_zeroout(inode, &orig_ex);
+			zero_ex.ee_block = orig_ex.ee_block;
+			zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
+			ext4_ext_store_pblock(&zero_ex,
+					      ext4_ext_pblock(&orig_ex));
+		}
 
 		if (err)
 			goto fix_extent_len;
@@ -3009,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle,
 		ex->ee_len = cpu_to_le16(ee_len);
 		ext4_ext_try_to_merge(handle, inode, path, ex);
 		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+		if (err)
+			goto fix_extent_len;
+
+		/* update extent status tree */
+		err = ext4_es_zeroout(inode, &zero_ex);
+
 		goto out;
 	} else if (err)
 		goto fix_extent_len;
@@ -3150,6 +3170,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ee_block = le32_to_cpu(ex->ee_block);
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (map->m_lblk - ee_block);
+	zero_ex.ee_len = 0;
 
 	trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
 
@@ -3247,6 +3268,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		err = ext4_ext_zeroout(inode, ex);
 		if (err)
 			goto out;
+		zero_ex.ee_block = ex->ee_block;
+		zero_ex.ee_len = ext4_ext_get_actual_len(ex);
+		ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
 
 		err = ext4_ext_get_access(handle, inode, path + depth);
 		if (err)
@@ -3305,6 +3329,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		err = allocated;
 
 out:
+	/* If we have gotten a failure, don't zero out status tree */
+	if (!err)
+		err = ext4_es_zeroout(inode, &zero_ex);
 	return err ? err : allocated;
 }
 
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index d2a8cb74676b..fe3337a85ede 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -854,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
 	return err;
 }
 
+int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+	ext4_lblk_t  ee_block;
+	ext4_fsblk_t ee_pblock;
+	unsigned int ee_len;
+
+	ee_block  = le32_to_cpu(ex->ee_block);
+	ee_len    = ext4_ext_get_actual_len(ex);
+	ee_pblock = ext4_ext_pblock(ex);
+
+	if (ee_len == 0)
+		return 0;
+
+	return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
+				     EXTENT_STATUS_WRITTEN);
+}
+
 static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
 {
 	struct ext4_sb_info *sbi = container_of(shrink,
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 56140ad4150b..d8e2d4dc311e 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -39,6 +39,8 @@
 				 EXTENT_STATUS_DELAYED | \
 				 EXTENT_STATUS_HOLE)
 
+struct ext4_extent;
+
 struct extent_status {
 	struct rb_node rb_node;
 	ext4_lblk_t es_lblk;	/* first logical block extent covers */
@@ -64,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
 					struct extent_status *es);
 extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
 				 struct extent_status *es);
+extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
 
 static inline int ext4_es_is_written(struct extent_status *es)
 {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3186a43fa4b0..4f1d54a88d8c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -722,6 +722,15 @@ found:
 		}
 #endif
 
+		/*
+		 * If the extent has been zeroed out, we don't need to update
+		 * extent status tree.
+		 */
+		if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
+		    ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+			if (ext4_es_is_written(&es))
+				goto has_zeroout;
+		}
 		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
 				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
 		if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -734,6 +743,7 @@ found:
 			retval = ret;
 	}
 
+has_zeroout:
 	up_write((&EXT4_I(inode)->i_data_sem));
 	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
 		int ret = check_block_validity(inode, map);
-- 
cgit v1.2.2


From 3a2256702e47f68f921dfad41b1764d05c572329 Mon Sep 17 00:00:00 2001
From: Zheng Liu <wenqing.lz@taobao.com>
Date: Sun, 10 Mar 2013 21:20:23 -0400
Subject: ext4: fix the wrong number of the allocated blocks in
 ext4_split_extent()

This commit fixes a wrong return value of the number of the allocated
blocks in ext4_split_extent.  When the length of blocks we want to
allocate is greater than the length of the current extent, we return a
wrong number.  Let's see what happens in the following case when we
call ext4_split_extent().

  map: [48, 72]
  ex:  [32, 64, u]

'ex' will be split into two parts:
  ex1: [32, 47, u]
  ex2: [48, 64, w]

'map->m_len' is returned from this function, and the value is 24.  But
the real length is 16.  So it should be fixed.

Meanwhile in this commit we use right length of the allocated blocks
when get_reserved_cluster_alloc in ext4_ext_handle_uninitialized_extents
is called.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: stable@vger.kernel.org
---
 fs/ext4/extents.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7e37018d1753..69df02ff96aa 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3067,6 +3067,7 @@ static int ext4_split_extent(handle_t *handle,
 	int err = 0;
 	int uninitialized;
 	int split_flag1, flags1;
+	int allocated = map->m_len;
 
 	depth = ext_depth(inode);
 	ex = path[depth].p_ext;
@@ -3086,6 +3087,8 @@ static int ext4_split_extent(handle_t *handle,
 				map->m_lblk + map->m_len, split_flag1, flags1);
 		if (err)
 			goto out;
+	} else {
+		allocated = ee_len - (map->m_lblk - ee_block);
 	}
 	/*
 	 * Update path is required because previous ext4_split_extent_at() may
@@ -3115,7 +3118,7 @@ static int ext4_split_extent(handle_t *handle,
 
 	ext4_ext_show_leaf(inode, path);
 out:
-	return err ? err : map->m_len;
+	return err ? err : allocated;
 }
 
 /*
@@ -3730,6 +3733,7 @@ out:
 					allocated - map->m_len);
 		allocated = map->m_len;
 	}
+	map->m_len = allocated;
 
 	/*
 	 * If we have done fallocate with the offset that is already
-- 
cgit v1.2.2


From e1c36595bedc2e1b4112f01256cb30f4d9f9ae46 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 10 Mar 2013 22:19:00 -0400
Subject: ext4: fix WARN_ON from ext4_releasepage()

ext4_releasepage() warns when it is passed a page with PageChecked set.
However this can correctly happen when invalidate_inode_pages2_range()
invalidates pages - and we should fail the release in that case. Since
the page was dirty anyway, it won't be discarded and no harm has
happened but it's good to be safe. Also remove bogus page_has_buffers()
check - we are guaranteed page has buffers in this function.

Reported-by: Zheng Liu <gnehzuil.liu@gmail.com>
Tested-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4f1d54a88d8c..117a9e7aa4a0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3018,8 +3018,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 
 	trace_ext4_releasepage(page);
 
-	WARN_ON(PageChecked(page));
-	if (!page_has_buffers(page))
+	/* Page has dirty journalled data -> cannot release */
+	if (PageChecked(page))
 		return 0;
 	if (journal)
 		return jbd2_journal_try_to_free_buffers(journal, page, wait);
-- 
cgit v1.2.2


From e3d85c366089015805f175324bb1780249f44669 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sun, 10 Mar 2013 22:21:49 -0400
Subject: ext4: remove unused variable in ext4_free_blocks()

Remove unused variable 'freed' in ext4_free_blocks().

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7bb713a46fe4..75e05f3a730f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4464,7 +4464,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 	struct buffer_head *bitmap_bh = NULL;
 	struct super_block *sb = inode->i_sb;
 	struct ext4_group_desc *gdp;
-	unsigned long freed = 0;
 	unsigned int overflow;
 	ext4_grpblk_t bit;
 	struct buffer_head *gd_bh;
@@ -4672,8 +4671,6 @@ do_more:
 
 	ext4_mb_unload_buddy(&e4b);
 
-	freed += count;
-
 	if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
 		dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
 
-- 
cgit v1.2.2


From bb8b20ed94bc69120e31399c43cb336300dea109 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sun, 10 Mar 2013 22:28:09 -0400
Subject: ext4: do not use yield()

Using yield() is strongly discouraged (see sched/core.c) especially
since we can just use cond_resched().

Replace all use of yield() with cond_resched().

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c   | 2 +-
 fs/ext4/mballoc.c | 8 ++------
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 117a9e7aa4a0..48fc023ab0a2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1352,7 +1352,7 @@ repeat:
 		ei->i_da_metadata_calc_last_lblock = save_last_lblock;
 		spin_unlock(&ei->i_block_reservation_lock);
 		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
-			yield();
+			cond_resched();
 			goto repeat;
 		}
 		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 75e05f3a730f..8b2ea9f75004 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3692,11 +3692,7 @@ repeat:
 	if (free < needed && busy) {
 		busy = 0;
 		ext4_unlock_group(sb, group);
-		/*
-		 * Yield the CPU here so that we don't get soft lockup
-		 * in non preempt case.
-		 */
-		yield();
+		cond_resched();
 		goto repeat;
 	}
 
@@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 			ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
 
 			/* let others to free the space */
-			yield();
+			cond_resched();
 			ar->len = ar->len >> 1;
 		}
 		if (!ar->len) {
-- 
cgit v1.2.2


From 232ec8720d4e45405e37144c67053042c6b886d3 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sun, 10 Mar 2013 22:46:30 -0400
Subject: ext4: update reserved space after the 'correction'

Currently in ext4_ext_map_blocks() in delayed allocation writeback
we would update the reservation and after that check whether we claimed
cluster outside of the range of the allocation and if so, we'll give the
block back to the reservation pool.

However this also means that if the number of reserved data block
dropped to zero before the correction, we would release all the metadata
reservation as well, however we might still need it because the we're
not done with the delayed allocation and there might be more blocks to
come. This will result in error messages such as:

EXT4-fs warning (device sdb): ext4_da_update_reserve_space:361: ino 12,
allocated 1 with only 0 reserved metadata blocks (releasing 1 blocks
with reserved 1 data blocks)

This will only happen on bigalloc file system and it can be easily
reproduced using fiemap-tester from xfstests like this:

./src/fiemap-tester -m DHDHDHDHD -S -p0 /mnt/test/file

Or using xfstests such as 225.

Fix this by doing the correction first and updating the reservation
after that so that we do not accidentally decrease
i_reserved_data_blocks to zero.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 69df02ff96aa..bd69e906bd91 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4165,9 +4165,6 @@ got_allocated_blocks:
 			}
 		} else {
 			BUG_ON(allocated_clusters < reserved_clusters);
-			/* We will claim quota for all newly allocated blocks.*/
-			ext4_da_update_reserve_space(inode, allocated_clusters,
-							1);
 			if (reserved_clusters < allocated_clusters) {
 				struct ext4_inode_info *ei = EXT4_I(inode);
 				int reservation = allocated_clusters -
@@ -4218,6 +4215,15 @@ got_allocated_blocks:
 				ei->i_reserved_data_blocks += reservation;
 				spin_unlock(&ei->i_block_reservation_lock);
 			}
+			/*
+			 * We will claim quota for all newly allocated blocks.
+			 * We're updating the reserved space *after* the
+			 * correction above so we do not accidentally free
+			 * all the metadata reservation because we might
+			 * actually need it later on.
+			 */
+			ext4_da_update_reserve_space(inode, allocated_clusters,
+							1);
 		}
 	}
 
-- 
cgit v1.2.2


From 386ad67c9ac043890121c066186883d1640348a4 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sun, 10 Mar 2013 22:50:00 -0400
Subject: ext4: reserve metadata block for every delayed write

Currently we only reserve space (data+metadata) in delayed allocation if
we're allocating from new cluster (which is always in non-bigalloc file
system) which is ok for data blocks, because we reserve the whole cluster.

However we have to reserve metadata for every delayed block we're going
to write because every block could potentially require metedata block
when we need to grow the extent tree.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
---
 fs/ext4/inode.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 48fc023ab0a2..65bbc9339aca 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1304,6 +1304,55 @@ static int ext4_journalled_write_end(struct file *file,
 	return ret ? ret : copied;
 }
 
+/*
+ * Reserve a metadata for a single block located at lblock
+ */
+static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
+{
+	int retries = 0;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned int md_needed;
+	ext4_lblk_t save_last_lblock;
+	int save_len;
+
+	/*
+	 * recalculate the amount of metadata blocks to reserve
+	 * in order to allocate nrblocks
+	 * worse case is one extent per block
+	 */
+repeat:
+	spin_lock(&ei->i_block_reservation_lock);
+	/*
+	 * ext4_calc_metadata_amount() has side effects, which we have
+	 * to be prepared undo if we fail to claim space.
+	 */
+	save_len = ei->i_da_metadata_calc_len;
+	save_last_lblock = ei->i_da_metadata_calc_last_lblock;
+	md_needed = EXT4_NUM_B2C(sbi,
+				 ext4_calc_metadata_amount(inode, lblock));
+	trace_ext4_da_reserve_space(inode, md_needed);
+
+	/*
+	 * We do still charge estimated metadata to the sb though;
+	 * we cannot afford to run out of free blocks.
+	 */
+	if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
+		ei->i_da_metadata_calc_len = save_len;
+		ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+		spin_unlock(&ei->i_block_reservation_lock);
+		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+			cond_resched();
+			goto repeat;
+		}
+		return -ENOSPC;
+	}
+	ei->i_reserved_meta_blocks += md_needed;
+	spin_unlock(&ei->i_block_reservation_lock);
+
+	return 0;       /* success */
+}
+
 /*
  * Reserve a single cluster located at lblock
  */
@@ -1940,8 +1989,11 @@ add_delayed:
 		 * XXX: __block_prepare_write() unmaps passed block,
 		 * is it OK?
 		 */
-		/* If the block was allocated from previously allocated cluster,
-		 * then we dont need to reserve it again. */
+		/*
+		 * If the block was allocated from previously allocated cluster,
+		 * then we don't need to reserve it again. However we still need
+		 * to reserve metadata for every block we're going to write.
+		 */
 		if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
 			ret = ext4_da_reserve_space(inode, iblock);
 			if (ret) {
@@ -1949,6 +2001,13 @@ add_delayed:
 				retval = ret;
 				goto out_unlock;
 			}
+		} else {
+			ret = ext4_da_reserve_metadata(inode, iblock);
+			if (ret) {
+				/* not enough space to reserve */
+				retval = ret;
+				goto out_unlock;
+			}
 		}
 
 		ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
-- 
cgit v1.2.2


From aaaf9cf71c43f42285f2b1a5b54c9306f3cc3150 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 6 Feb 2013 17:23:50 +0100
Subject: drivers/i2c: remove !S390 dependency, add missing GENERIC_HARDIRQS
 dependencies

Remove !S390 dependency from i2c Kconfig, since s390 now supports PCI, HAS_IOMEM
and HAS_DMA, however we need to add a couple of GENERIC_HARDIRQS dependecies to
fix compile and link errors like these:

ERROR: "devm_request_threaded_irq" [drivers/i2c/i2c-smbus.ko] undefined!
ERROR: "devm_request_threaded_irq" [drivers/i2c/busses/i2c-ocores.ko] undefined!

Cc: Wolfram Sang <w.sang@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/i2c/Kconfig        | 2 +-
 drivers/i2c/busses/Kconfig | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index 46cde098c11c..e380c6eef3af 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -4,7 +4,6 @@
 
 menuconfig I2C
 	tristate "I2C support"
-	depends on !S390
 	select RT_MUTEXES
 	---help---
 	  I2C (pronounce: I-squared-C) is a slow serial bus protocol used in
@@ -76,6 +75,7 @@ config I2C_HELPER_AUTO
 
 config I2C_SMBUS
 	tristate "SMBus-specific protocols" if !I2C_HELPER_AUTO
+	depends on GENERIC_HARDIRQS
 	help
 	  Say Y here if you want support for SMBus extensions to the I2C
 	  specification. At the moment, the only supported extension is
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index a3725de92384..adfee98486b1 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -114,7 +114,7 @@ config I2C_I801
 
 config I2C_ISCH
 	tristate "Intel SCH SMBus 1.0"
-	depends on PCI
+	depends on PCI && GENERIC_HARDIRQS
 	select LPC_SCH
 	help
 	  Say Y here if you want to use SMBus controller on the Intel SCH
@@ -543,6 +543,7 @@ config I2C_NUC900
 
 config I2C_OCORES
 	tristate "OpenCores I2C Controller"
+	depends on GENERIC_HARDIRQS
 	help
 	  If you say yes to this option, support will be included for the
 	  OpenCores I2C controller. For details see
@@ -777,7 +778,7 @@ config I2C_DIOLAN_U2C
 
 config I2C_PARPORT
 	tristate "Parallel port adapter"
-	depends on PARPORT
+	depends on PARPORT && GENERIC_HARDIRQS
 	select I2C_ALGOBIT
 	select I2C_SMBUS
 	help
@@ -802,6 +803,7 @@ config I2C_PARPORT
 
 config I2C_PARPORT_LIGHT
 	tristate "Parallel port adapter (light)"
+	depends on GENERIC_HARDIRQS
 	select I2C_ALGOBIT
 	select I2C_SMBUS
 	help
-- 
cgit v1.2.2


From 52319b457cd78aa891f5947cf2237dd5f6a4c52d Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 8 Mar 2013 09:29:34 +0100
Subject: s390/kdump: Do not add standby memory for kdump

Standby memory that is located outside [0,OLDMEM_SIZE] is currently used
by the s390 memory detection. This leads to additional memory consumption
due to allocation of page structures. To fix this, we now do not
add standby memory if the kernel is started in kdump mode.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/char/sclp_cmd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 30a2255389e5..cd798386b622 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -627,6 +627,8 @@ static int __init sclp_detect_standby_memory(void)
 	struct read_storage_sccb *sccb;
 	int i, id, assigned, rc;
 
+	if (OLDMEM_BASE) /* No standby memory in kdump mode */
+		return 0;
 	if (!early_read_info_sccb_valid)
 		return 0;
 	if ((sclp_facilities & 0xe00000000000ULL) != 0xe00000000000ULL)
-- 
cgit v1.2.2


From 76950e6e54ccfc98a25b501dbb1bc879cce1aa29 Mon Sep 17 00:00:00 2001
From: Allen Pais <allen.pais@oracle.com>
Date: Tue, 5 Mar 2013 23:47:59 +0000
Subject: sparc64: correctly recognize SPARC64-X chips

The following patch adds support for correctly
recognizing SPARC-X chips.

cpu : Unknown SUN4V CPU
fpu : Unknown SUN4V FPU
pmu : Unknown SUN4V PMU

Signed-off-by: Katayama Yoshihiro <kata1@jp.fujitsu.com>
Signed-off-by: Allen Pais <allen.pais@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/spitfire.h |  1 +
 arch/sparc/kernel/cpu.c           |  6 ++++++
 arch/sparc/kernel/head_64.S       | 25 +++++++++++++++++++++++--
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index d06a26601753..6b67e50fb9b4 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -45,6 +45,7 @@
 #define SUN4V_CHIP_NIAGARA3	0x03
 #define SUN4V_CHIP_NIAGARA4	0x04
 #define SUN4V_CHIP_NIAGARA5	0x05
+#define SUN4V_CHIP_SPARC64X	0x8a
 #define SUN4V_CHIP_UNKNOWN	0xff
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index a6c94a2bf9d4..5c5125895db8 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -493,6 +493,12 @@ static void __init sun4v_cpu_probe(void)
 		sparc_pmu_type = "niagara5";
 		break;
 
+	case SUN4V_CHIP_SPARC64X:
+		sparc_cpu_type = "SPARC64-X";
+		sparc_fpu_type = "SPARC64-X integrated FPU";
+		sparc_pmu_type = "sparc64-x";
+		break;
+
 	default:
 		printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
 		       prom_cpu_compatible);
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 2feb15c35d9e..26b706a1867d 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -134,6 +134,8 @@ prom_niagara_prefix:
 	.asciz	"SUNW,UltraSPARC-T"
 prom_sparc_prefix:
 	.asciz	"SPARC-"
+prom_sparc64x_prefix:
+	.asciz	"SPARC64-X"
 	.align	4
 prom_root_compatible:
 	.skip	64
@@ -412,7 +414,7 @@ sun4v_chip_type:
 	cmp	%g2, 'T'
 	be,pt	%xcc, 70f
 	 cmp	%g2, 'M'
-	bne,pn	%xcc, 4f
+	bne,pn	%xcc, 49f
 	 nop
 
 70:	ldub	[%g1 + 7], %g2
@@ -425,7 +427,7 @@ sun4v_chip_type:
 	cmp	%g2, '5'
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA5, %g4
-	ba,pt	%xcc, 4f
+	ba,pt	%xcc, 49f
 	 nop
 
 91:	sethi	%hi(prom_cpu_compatible), %g1
@@ -439,6 +441,25 @@ sun4v_chip_type:
 	 mov	SUN4V_CHIP_NIAGARA2, %g4
 	
 4:
+	/* Athena */
+	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	sethi	%hi(prom_sparc64x_prefix), %g7
+	or	%g7, %lo(prom_sparc64x_prefix), %g7
+	mov	9, %g3
+41:	ldub	[%g7], %g2
+	ldub	[%g1], %g4
+	cmp	%g2, %g4
+	bne,pn	%icc, 49f
+	add	%g7, 1, %g7
+	subcc	%g3, 1, %g3
+	bne,pt	%xcc, 41b
+	add	%g1, 1, %g1
+	mov	SUN4V_CHIP_SPARC64X, %g4
+	ba,pt	%xcc, 5f
+	nop
+
+49:
 	mov	SUN4V_CHIP_UNKNOWN, %g4
 5:	sethi	%hi(sun4v_chip_type), %g2
 	or	%g2, %lo(sun4v_chip_type), %g2
-- 
cgit v1.2.2


From 1540c85b176180e5e0b312dd98db7f438baf8a24 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 7 Mar 2013 16:47:23 +0530
Subject: ARC: make allyesconfig build breakages

  CC      drivers/mmc/host/mmc_spi.o
drivers/mmc/host/mmc_spi.c:118: error: redefinition of 'struct scratch'
make[3]: *** [drivers/mmc/host/mmc_spi.o] Error 1
make[2]: *** [drivers/mmc/host] Error 2
make[1]: *** [drivers/mmc] Error 2
make: *** [drivers] Error 2

  CC      arch/arc/kernel/kgdb.o
In file included from include/linux/kgdb.h:20,
                 from arch/arc/kernel/kgdb.c:11:
/home/vineetg/arc/k.org/arc-port/arch/arc/include/asm/kgdb.h:34:
warning: 'struct pt_regs' declared inside parameter list
/home/vineetg/arc/k.org/arc-port/arch/arc/include/asm/kgdb.h:34:
warning: its scope is only this definition or declaration, which is
probably not what you want
arch/arc/kernel/kgdb.c:172: error: conflicting types for 'kgdb_trap'

  CC      arch/arc/kernel/kgdb.o
arch/arc/kernel/kgdb.c: In function 'pt_regs_to_gdb_regs':
arch/arc/kernel/kgdb.c:62: error: dereferencing pointer to incomplete
type

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/kgdb.h        | 6 ++----
 arch/arc/include/uapi/asm/ptrace.h | 4 ++--
 arch/arc/kernel/kgdb.c             | 1 +
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h
index f3c4934f0ca9..4930957ca3d3 100644
--- a/arch/arc/include/asm/kgdb.h
+++ b/arch/arc/include/asm/kgdb.h
@@ -13,7 +13,7 @@
 
 #ifdef CONFIG_KGDB
 
-#include <asm/user.h>
+#include <asm/ptrace.h>
 
 /* to ensure compatibility with Linux 2.6.35, we don't implement the get/set
  * register API yet */
@@ -53,9 +53,7 @@ enum arc700_linux_regnums {
 };
 
 #else
-static inline void kgdb_trap(struct pt_regs *regs, int param)
-{
-}
+#define kgdb_trap(regs, param)
 #endif
 
 #endif	/* __ARC_KGDB_H__ */
diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h
index 6afa4f702075..30333cec0fef 100644
--- a/arch/arc/include/uapi/asm/ptrace.h
+++ b/arch/arc/include/uapi/asm/ptrace.h
@@ -28,14 +28,14 @@
 */
 struct user_regs_struct {
 
-	struct scratch {
+	struct {
 		long pad;
 		long bta, lp_start, lp_end, lp_count;
 		long status32, ret, blink, fp, gp;
 		long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
 		long sp;
 	} scratch;
-	struct callee {
+	struct {
 		long pad;
 		long r25, r24, r23, r22, r21, r20;
 		long r19, r18, r17, r16, r15, r14, r13;
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index 2888ba5be47e..52bdc83c1495 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/kgdb.h>
+#include <linux/sched.h>
 #include <asm/disasm.h>
 #include <asm/cacheflush.h>
 
-- 
cgit v1.2.2


From 8ff14bbc6a2083e83c6d387d025fb67ba639807c Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 6 Mar 2013 14:33:27 +0530
Subject: ARC: ABIv3: Print the correct ABI ver

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/setup.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index dc0f968dae0a..2d95ac07df7b 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -232,10 +232,8 @@ char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 
 	n += scnprintf(buf + n, len - n, "\n");
 
-#ifdef _ASM_GENERIC_UNISTD_H
 	n += scnprintf(buf + n, len - n,
-		       "OS ABI [v2]\t: asm-generic/{unistd,stat,fcntl}\n");
-#endif
+		       "OS ABI [v3]\t: no-legacy-syscalls\n");
 
 	return buf;
 }
-- 
cgit v1.2.2


From 180d406e4948faee6e63781f3e062f40ec7c6fc3 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 4 Mar 2013 16:01:35 +0530
Subject: ARC: ABIv3: fork/vfork wrappers not needed in "no-legacy-syscall" ABI

When switching to clone() only ABI - I missed out pruning the low level
asm syscall wrappers

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/syscalls.h |  2 --
 arch/arc/kernel/entry.S         | 25 -------------------------
 arch/arc/kernel/sys.c           |  2 --
 3 files changed, 29 deletions(-)

diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h
index e53a5340ba4f..dd785befe7fd 100644
--- a/arch/arc/include/asm/syscalls.h
+++ b/arch/arc/include/asm/syscalls.h
@@ -16,8 +16,6 @@
 #include <linux/types.h>
 
 int sys_clone_wrapper(int, int, int, int, int);
-int sys_fork_wrapper(void);
-int sys_vfork_wrapper(void);
 int sys_cacheflush(uint32_t, uint32_t uint32_t);
 int sys_arc_settls(void *);
 int sys_arc_gettls(void);
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index ef6800ba2f03..b9d875a441cc 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -792,31 +792,6 @@ ARC_EXIT ret_from_fork
 
 ;################### Special Sys Call Wrappers ##########################
 
-; TBD: call do_fork directly from here
-ARC_ENTRY sys_fork_wrapper
-	SAVE_CALLEE_SAVED_USER
-	bl  @sys_fork
-	DISCARD_CALLEE_SAVED_USER
-
-	GET_CURR_THR_INFO_FLAGS   r10
-	btst r10, TIF_SYSCALL_TRACE
-	bnz  tracesys_exit
-
-	b ret_from_system_call
-ARC_EXIT sys_fork_wrapper
-
-ARC_ENTRY sys_vfork_wrapper
-	SAVE_CALLEE_SAVED_USER
-	bl  @sys_vfork
-	DISCARD_CALLEE_SAVED_USER
-
-	GET_CURR_THR_INFO_FLAGS   r10
-	btst r10, TIF_SYSCALL_TRACE
-	bnz  tracesys_exit
-
-	b ret_from_system_call
-ARC_EXIT sys_vfork_wrapper
-
 ARC_ENTRY sys_clone_wrapper
 	SAVE_CALLEE_SAVED_USER
 	bl  @sys_clone
diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c
index f6bdd07583f3..9d6c1ca26af6 100644
--- a/arch/arc/kernel/sys.c
+++ b/arch/arc/kernel/sys.c
@@ -6,8 +6,6 @@
 #include <asm/syscalls.h>
 
 #define sys_clone	sys_clone_wrapper
-#define sys_fork	sys_fork_wrapper
-#define sys_vfork	sys_vfork_wrapper
 
 #undef __SYSCALL
 #define __SYSCALL(nr, call) [nr] = (call),
-- 
cgit v1.2.2


From f0e68fc3caf677e834f7bd0f601800e686b56c98 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 22 Feb 2013 13:22:39 +0000
Subject: thermal: rcar: fix missing unlock on error in
 rcar_thermal_update_temp()

Add the missing unlock before return from function rcar_thermal_update_temp()
in the error handling case.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/rcar_thermal.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 28f091994013..04a5566b1723 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -145,6 +145,7 @@ static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv)
 	struct device *dev = rcar_priv_to_dev(priv);
 	int i;
 	int ctemp, old, new;
+	int ret = -EINVAL;
 
 	mutex_lock(&priv->lock);
 
@@ -174,7 +175,7 @@ static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv)
 
 	if (!ctemp) {
 		dev_err(dev, "thermal sensor was broken\n");
-		return -EINVAL;
+		goto err_out_unlock;
 	}
 
 	/*
@@ -192,10 +193,10 @@ static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv)
 	dev_dbg(dev, "thermal%d  %d -> %d\n", priv->id, priv->ctemp, ctemp);
 
 	priv->ctemp = ctemp;
-
+	ret = 0;
+err_out_unlock:
 	mutex_unlock(&priv->lock);
-
-	return 0;
+	return ret;
 }
 
 static int rcar_thermal_get_temp(struct thermal_zone_device *zone,
-- 
cgit v1.2.2


From 6bc51b662241738ac292ffa021b345c2aa604230 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Mon, 4 Mar 2013 06:45:32 +0000
Subject: Thermal: dove: Convert to devm_ioremap_resource()

Use the newly introduced devm_ioremap_resource() instead of
devm_request_and_ioremap() which provides more consistent error handling.

devm_ioremap_resource() provides its own error messages; so all explicit
error messages can be removed from the failure code paths.

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Cc: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/dove_thermal.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c
index 7b0bfa0e7a9c..3078c403b42d 100644
--- a/drivers/thermal/dove_thermal.c
+++ b/drivers/thermal/dove_thermal.c
@@ -143,22 +143,18 @@ static int dove_thermal_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
-	priv->sensor = devm_request_and_ioremap(&pdev->dev, res);
-	if (!priv->sensor) {
-		dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
-		return -EADDRNOTAVAIL;
-	}
+	priv->sensor = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->sensor))
+		return PTR_ERR(priv->sensor);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	if (!res) {
 		dev_err(&pdev->dev, "Failed to get platform resource\n");
 		return -ENODEV;
 	}
-	priv->control = devm_request_and_ioremap(&pdev->dev, res);
-	if (!priv->control) {
-		dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
-		return -EADDRNOTAVAIL;
-	}
+	priv->control = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->control))
+		return PTR_ERR(priv->control);
 
 	ret = dove_init_sensor(priv);
 	if (ret) {
-- 
cgit v1.2.2


From 5095526faf38472bf04af919797a1f01a0ccb558 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Mon, 4 Mar 2013 06:45:33 +0000
Subject: Thermal: rcar: Convert to devm_ioremap_resource()

Use the newly introduced devm_ioremap_resource() instead of
devm_request_and_ioremap() which provides more consistent error handling.

devm_ioremap_resource() provides its own error messages; so all explicit
error messages can be removed from the failure code paths.

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/rcar_thermal.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 04a5566b1723..ab518140207d 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -400,11 +400,9 @@ static int rcar_thermal_probe(struct platform_device *pdev)
 		/*
 		 * rcar_has_irq_support() will be enabled
 		 */
-		common->base = devm_request_and_ioremap(dev, res);
-		if (!common->base) {
-			dev_err(dev, "Unable to ioremap thermal register\n");
-			return -ENOMEM;
-		}
+		common->base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(common->base))
+			return PTR_ERR(common->base);
 
 		/* enable temperature comparation */
 		rcar_thermal_common_write(common, ENR, 0x00030303);
@@ -423,11 +421,9 @@ static int rcar_thermal_probe(struct platform_device *pdev)
 			return -ENOMEM;
 		}
 
-		priv->base = devm_request_and_ioremap(dev, res);
-		if (!priv->base) {
-			dev_err(dev, "Unable to ioremap priv register\n");
-			return -ENOMEM;
-		}
+		priv->base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(priv->base))
+			return PTR_ERR(priv->base);
 
 		priv->common = common;
 		priv->id = i;
-- 
cgit v1.2.2


From aa3b5d222da5922ab1883eba3e9d8f92ad148155 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Mon, 4 Mar 2013 06:45:34 +0000
Subject: Thermal: kirkwood: Convert to devm_ioremap_resource()

Use the newly introduced devm_ioremap_resource() instead of
devm_request_and_ioremap() which provides more consistent error handling.

devm_ioremap_resource() provides its own error messages; so all explicit
error messages can be removed from the failure code paths.

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Cc: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Reviewed-by: Thierry Reding <thierry.reding@avionic-design.de>
Acked-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/kirkwood_thermal.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c
index 65cb4f09e8f6..e5500edb5285 100644
--- a/drivers/thermal/kirkwood_thermal.c
+++ b/drivers/thermal/kirkwood_thermal.c
@@ -85,11 +85,9 @@ static int kirkwood_thermal_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
-	priv->sensor = devm_request_and_ioremap(&pdev->dev, res);
-	if (!priv->sensor) {
-		dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
-		return -EADDRNOTAVAIL;
-	}
+	priv->sensor = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->sensor))
+		return PTR_ERR(priv->sensor);
 
 	thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0,
 					       priv, &ops, NULL, 0, 0);
-- 
cgit v1.2.2


From fb84d9907f0ff0e3f7d70d55039ddf0f78d2a472 Mon Sep 17 00:00:00 2001
From: Devendra Naga <devendra.aaru@gmail.com>
Date: Mon, 4 Mar 2013 16:52:47 +0000
Subject: thermal: rcar_thermal: propagate return value of
 thermal_zone_device_register

thermal_zone_device_register returns a value contained in the pointer itself
use PTR_ERR to obtain the address and return it at the end.

Signed-off-by: Devendra Naga <devendra.aaru@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/rcar_thermal.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index ab518140207d..2cc5b6115e3e 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -364,6 +364,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
 	struct resource *res, *irq;
 	int mres = 0;
 	int i;
+	int ret = -ENODEV;
 	int idle = IDLE_INTERVAL;
 
 	common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL);
@@ -438,6 +439,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
 						idle);
 		if (IS_ERR(priv->zone)) {
 			dev_err(dev, "can't register thermal zone\n");
+			ret = PTR_ERR(priv->zone);
 			goto error_unregister;
 		}
 
@@ -457,7 +459,7 @@ error_unregister:
 	rcar_thermal_for_each_priv(priv, common)
 		thermal_zone_device_unregister(priv->zone);
 
-	return -ENODEV;
+	return ret;
 }
 
 static int rcar_thermal_remove(struct platform_device *pdev)
-- 
cgit v1.2.2


From 043e4652bf3378883e7c0db38fa47fa8e2558f9c Mon Sep 17 00:00:00 2001
From: Devendra Naga <devendra.aaru@gmail.com>
Date: Mon, 4 Mar 2013 16:52:48 +0000
Subject: thermal: exynos_thermal: return a proper error code while
 thermal_zone_device_register fail.

we are returning EINVAL while the thermal_zone_device_register function fail.
instead we can use the return value from the thermal_zone_device_register by
using PTR_ERR.

Signed-off-by: Devendra Naga <devendra.aaru@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/exynos_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c
index e04ebd8671ac..46568c078dee 100644
--- a/drivers/thermal/exynos_thermal.c
+++ b/drivers/thermal/exynos_thermal.c
@@ -476,7 +476,7 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
 
 	if (IS_ERR(th_zone->therm_dev)) {
 		pr_err("Failed to register thermal zone device\n");
-		ret = -EINVAL;
+		ret = PTR_ERR(th_zone->therm_dev);
 		goto err_unregister;
 	}
 	th_zone->mode = THERMAL_DEVICE_ENABLED;
-- 
cgit v1.2.2


From ad56edad089b56300fd13bb9eeb7d0424d978239 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 11 Mar 2013 13:24:56 -0400
Subject: jbd2: fix use after free in jbd2_journal_dirty_metadata()

jbd2_journal_dirty_metadata() didn't get a reference to journal_head it
was working with. This is OK in most of the cases since the journal head
should be attached to a transaction but in rare occasions when we are
journalling data, __ext4_journalled_writepage() can race with
jbd2_journal_invalidatepage() stripping buffers from a page and thus
journal head can be freed under hands of jbd2_journal_dirty_metadata().

Fix the problem by getting own journal head reference in
jbd2_journal_dirty_metadata() (and also in jbd2_journal_set_triggers()
which can possibly have the same issue).

Reported-by: Zheng Liu <gnehzuil.liu@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/jbd2/transaction.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d6ee5aed56b1..325bc019ed88 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1065,9 +1065,12 @@ out:
 void jbd2_journal_set_triggers(struct buffer_head *bh,
 			       struct jbd2_buffer_trigger_type *type)
 {
-	struct journal_head *jh = bh2jh(bh);
+	struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
 
+	if (WARN_ON(!jh))
+		return;
 	jh->b_triggers = type;
+	jbd2_journal_put_journal_head(jh);
 }
 
 void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
@@ -1119,17 +1122,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
-	struct journal_head *jh = bh2jh(bh);
+	struct journal_head *jh;
 	int ret = 0;
 
-	jbd_debug(5, "journal_head %p\n", jh);
-	JBUFFER_TRACE(jh, "entry");
 	if (is_handle_aborted(handle))
 		goto out;
-	if (!buffer_jbd(bh)) {
+	jh = jbd2_journal_grab_journal_head(bh);
+	if (!jh) {
 		ret = -EUCLEAN;
 		goto out;
 	}
+	jbd_debug(5, "journal_head %p\n", jh);
+	JBUFFER_TRACE(jh, "entry");
 
 	jbd_lock_bh_state(bh);
 
@@ -1220,6 +1224,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 	spin_unlock(&journal->j_list_lock);
 out_unlock_bh:
 	jbd_unlock_bh_state(bh);
+	jbd2_journal_put_journal_head(jh);
 out:
 	JBUFFER_TRACE(jh, "exit");
 	WARN_ON(ret);	/* All errors are bugs, so dump the stack */
-- 
cgit v1.2.2


From 0e367ae46503cfe7791460c8ba8434a5d60b2bd5 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 7 Mar 2013 17:32:01 +0000
Subject: xen/blkback: correctly respond to unknown, non-native requests

If the frontend is using a non-native protocol (e.g., a 64-bit
frontend with a 32-bit backend) and it sent an unrecognized request,
the request was not translated and the response would have the
incorrect ID.  This may cause the frontend driver to behave
incorrectly or crash.

Since the ID field in the request is always in the same place,
regardless of the request type we can get the correct ID and make a
valid response (which will report BLKIF_RSP_EOPNOTSUPP).

This bug affected 64-bit SLES 11 guests when using a 32-bit backend.
This guest does a BLKIF_OP_RESERVED_1 (BLKIF_OP_PACKET in the SLES
source) and would crash in blkif_int() as the ID in the response would
be invalid.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Cc: stable@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 31 +++++++++++++++++++++++++++----
 drivers/block/xen-blkback/common.h  | 25 +++++++++++++++++++++++++
 include/xen/interface/io/blkif.h    | 10 ++++++++++
 3 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 6d1cc3df2ac6..1a0faf6370ca 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -679,6 +679,16 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
 	return err;
 }
 
+static int dispatch_other_io(struct xen_blkif *blkif,
+			     struct blkif_request *req,
+			     struct pending_req *pending_req)
+{
+	free_req(pending_req);
+	make_response(blkif, req->u.other.id, req->operation,
+		      BLKIF_RSP_EOPNOTSUPP);
+	return -EIO;
+}
+
 static void xen_blk_drain_io(struct xen_blkif *blkif)
 {
 	atomic_set(&blkif->drain, 1);
@@ -800,17 +810,30 @@ __do_block_io_op(struct xen_blkif *blkif)
 
 		/* Apply all sanity checks to /private copy/ of request. */
 		barrier();
-		if (unlikely(req.operation == BLKIF_OP_DISCARD)) {
+
+		switch (req.operation) {
+		case BLKIF_OP_READ:
+		case BLKIF_OP_WRITE:
+		case BLKIF_OP_WRITE_BARRIER:
+		case BLKIF_OP_FLUSH_DISKCACHE:
+			if (dispatch_rw_block_io(blkif, &req, pending_req))
+				goto done;
+			break;
+		case BLKIF_OP_DISCARD:
 			free_req(pending_req);
 			if (dispatch_discard_io(blkif, &req))
-				break;
-		} else if (dispatch_rw_block_io(blkif, &req, pending_req))
+				goto done;
+			break;
+		default:
+			if (dispatch_other_io(blkif, &req, pending_req))
+				goto done;
 			break;
+		}
 
 		/* Yield point for this unbounded loop. */
 		cond_resched();
 	}
-
+done:
 	return more_to_do;
 }
 
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 6072390c7f57..195278ae993d 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -77,11 +77,18 @@ struct blkif_x86_32_request_discard {
 	uint64_t       nr_sectors;
 } __attribute__((__packed__));
 
+struct blkif_x86_32_request_other {
+	uint8_t        _pad1;
+	blkif_vdev_t   _pad2;
+	uint64_t       id;           /* private guest value, echoed in resp  */
+} __attribute__((__packed__));
+
 struct blkif_x86_32_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_x86_32_request_rw rw;
 		struct blkif_x86_32_request_discard discard;
+		struct blkif_x86_32_request_other other;
 	} u;
 } __attribute__((__packed__));
 
@@ -113,11 +120,19 @@ struct blkif_x86_64_request_discard {
 	uint64_t       nr_sectors;
 } __attribute__((__packed__));
 
+struct blkif_x86_64_request_other {
+	uint8_t        _pad1;
+	blkif_vdev_t   _pad2;
+	uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
+	uint64_t       id;           /* private guest value, echoed in resp  */
+} __attribute__((__packed__));
+
 struct blkif_x86_64_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_x86_64_request_rw rw;
 		struct blkif_x86_64_request_discard discard;
+		struct blkif_x86_64_request_other other;
 	} u;
 } __attribute__((__packed__));
 
@@ -278,6 +293,11 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
 	default:
+		/*
+		 * Don't know how to translate this op. Only get the
+		 * ID so failure can be reported to the frontend.
+		 */
+		dst->u.other.id = src->u.other.id;
 		break;
 	}
 }
@@ -309,6 +329,11 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
 	default:
+		/*
+		 * Don't know how to translate this op. Only get the
+		 * ID so failure can be reported to the frontend.
+		 */
+		dst->u.other.id = src->u.other.id;
 		break;
 	}
 }
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index 01c3d62436ef..ffd4652de91c 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -138,11 +138,21 @@ struct blkif_request_discard {
 	uint8_t        _pad3;
 } __attribute__((__packed__));
 
+struct blkif_request_other {
+	uint8_t      _pad1;
+	blkif_vdev_t _pad2;        /* only for read/write requests         */
+#ifdef CONFIG_X86_64
+	uint32_t     _pad3;        /* offsetof(blkif_req..,u.other.id)==8*/
+#endif
+	uint64_t     id;           /* private guest value, echoed in resp  */
+} __attribute__((__packed__));
+
 struct blkif_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_request_rw rw;
 		struct blkif_request_discard discard;
+		struct blkif_request_other other;
 	} u;
 } __attribute__((__packed__));
 
-- 
cgit v1.2.2


From 986cacbd26abe5d498be922cd6632f1ec376c271 Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Mon, 11 Mar 2013 16:15:50 +0000
Subject: xen/blkback: Change statistics counter types to unsigned

These values shouldn't be negative, but after an overflow their value
can turn into negative, if they are signed. xentop can show bogus
values in this case.

Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Reported-by: Ichiro Ogino <ichiro.ogino@citrix.co.jp>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c |  4 ++--
 drivers/block/xen-blkback/common.h  | 14 +++++++-------
 drivers/block/xen-blkback/xenbus.c  | 14 +++++++-------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 1a0faf6370ca..eaccc222a1dc 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -381,8 +381,8 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 
 static void print_stats(struct xen_blkif *blkif)
 {
-	pr_info("xen-blkback (%s): oo %3d  |  rd %4d  |  wr %4d  |  f %4d"
-		 "  |  ds %4d\n",
+	pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
+		 "  |  ds %4llu\n",
 		 current->comm, blkif->st_oo_req,
 		 blkif->st_rd_req, blkif->st_wr_req,
 		 blkif->st_f_req, blkif->st_ds_req);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 195278ae993d..da78346487ae 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -223,13 +223,13 @@ struct xen_blkif {
 
 	/* statistics */
 	unsigned long		st_print;
-	int			st_rd_req;
-	int			st_wr_req;
-	int			st_oo_req;
-	int			st_f_req;
-	int			st_ds_req;
-	int			st_rd_sect;
-	int			st_wr_sect;
+	unsigned long long			st_rd_req;
+	unsigned long long			st_wr_req;
+	unsigned long long			st_oo_req;
+	unsigned long long			st_f_req;
+	unsigned long long			st_ds_req;
+	unsigned long long			st_rd_sect;
+	unsigned long long			st_wr_sect;
 
 	wait_queue_head_t	waiting_to_free;
 };
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 5e237f630c47..8bfd1bcf95ec 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -230,13 +230,13 @@ int __init xen_blkif_interface_init(void)
 	}								\
 	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
 
-VBD_SHOW(oo_req,  "%d\n", be->blkif->st_oo_req);
-VBD_SHOW(rd_req,  "%d\n", be->blkif->st_rd_req);
-VBD_SHOW(wr_req,  "%d\n", be->blkif->st_wr_req);
-VBD_SHOW(f_req,  "%d\n", be->blkif->st_f_req);
-VBD_SHOW(ds_req,  "%d\n", be->blkif->st_ds_req);
-VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
-VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
+VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req);
+VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req);
+VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req);
+VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req);
+VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req);
+VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
+VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
 
 static struct attribute *xen_vbdstat_attrs[] = {
 	&dev_attr_oo_req.attr,
-- 
cgit v1.2.2


From f37912039eb04979f269de0a7dc1a601702df51a Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Mon, 25 Feb 2013 12:27:46 -0600
Subject: block: IBM RamSan 70/80 trivial changes.

This patch includes trivial changes that were recommended by
different members of the Linux Community.

Changes include:
o Removing the redundant wmb().
o Formatting
o Various other little things.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/config.c |  6 ++----
 drivers/block/rsxx/core.c   |  4 ++--
 drivers/block/rsxx/cregs.c  | 13 +++----------
 drivers/block/rsxx/dma.c    | 12 ------------
 drivers/block/rsxx/rsxx.h   |  6 ++++--
 5 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c
index a295e7e9ee41..0d8cb18284eb 100644
--- a/drivers/block/rsxx/config.c
+++ b/drivers/block/rsxx/config.c
@@ -29,10 +29,8 @@
 #include "rsxx_priv.h"
 #include "rsxx_cfg.h"
 
-static void initialize_config(void *config)
+static void initialize_config(struct rsxx_card_cfg *cfg)
 {
-	struct rsxx_card_cfg *cfg = config;
-
 	cfg->hdr.version = RSXX_CFG_VERSION;
 
 	cfg->data.block_size        = RSXX_HW_BLK_SIZE;
@@ -181,7 +179,7 @@ int rsxx_load_config(struct rsxx_cardinfo *card)
 	} else {
 		dev_info(CARD_TO_DEV(card),
 			"Initializing card configuration.\n");
-		initialize_config(card);
+		initialize_config(&card->config);
 		st = rsxx_save_config(card);
 		if (st)
 			return st;
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index e5162487686a..edbae10e7f6f 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -161,9 +161,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
 }
 
 /*----------------- Card Event Handler -------------------*/
-static char *rsxx_card_state_to_str(unsigned int state)
+static const char * const rsxx_card_state_to_str(unsigned int state)
 {
-	static char *state_strings[] = {
+	static const char * const state_strings[] = {
 		"Unknown", "Shutdown", "Starting", "Formatting",
 		"Uninitialized", "Good", "Shutting Down",
 		"Fault", "Read Only Fault", "dStroying"
diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
index 80bbe639fccd..224156435261 100644
--- a/drivers/block/rsxx/cregs.c
+++ b/drivers/block/rsxx/cregs.c
@@ -126,13 +126,6 @@ static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
 					  cmd->buf, cmd->stream);
 	}
 
-	/*
-	 * Data copy must complete before initiating the command. This is
-	 * needed for weakly ordered processors (i.e. PowerPC), so that all
-	 * neccessary registers are written before we kick the hardware.
-	 */
-	wmb();
-
 	/* Setting the valid bit will kick off the command. */
 	iowrite32(cmd->op, card->regmap + CREG_CMD);
 }
@@ -399,12 +392,12 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card,
 		return st;
 
 	/*
-	 * This timeout is neccessary for unresponsive hardware. The additional
+	 * This timeout is necessary for unresponsive hardware. The additional
 	 * 20 seconds to used to guarantee that each cregs requests has time to
 	 * complete.
 	 */
-	timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC *
-				card->creg_ctrl.q_depth) + 20000);
+	timeout = msecs_to_jiffies(CREG_TIMEOUT_MSEC *
+				   card->creg_ctrl.q_depth + 20000);
 
 	/*
 	 * The creg interface is guaranteed to complete. It has a timeout
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 63176e67662f..7c3a57bed2cd 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -432,16 +432,6 @@ static void rsxx_issue_dmas(struct work_struct *work)
 
 	/* Let HW know we've queued commands. */
 	if (cmds_pending) {
-		/*
-		 * We must guarantee that the CPU writes to 'ctrl->cmd.buf'
-		 * (which is in PCI-consistent system-memory) from the loop
-		 * above make it into the coherency domain before the
-		 * following PIO "trigger" updating the cmd.idx.  A WMB is
-		 * sufficient. We need not explicitly CPU cache-flush since
-		 * the memory is a PCI-consistent (ie; coherent) mapping.
-		 */
-		wmb();
-
 		atomic_add(cmds_pending, &ctrl->stats.hw_q_depth);
 		mod_timer(&ctrl->activity_timer,
 			  jiffies + DMA_ACTIVITY_TIMEOUT);
@@ -798,8 +788,6 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 	iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX);
 	iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
 
-	wmb();
-
 	return 0;
 }
 
diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h
index 2e50b65902b7..24ba3642bd89 100644
--- a/drivers/block/rsxx/rsxx.h
+++ b/drivers/block/rsxx/rsxx.h
@@ -27,15 +27,17 @@
 
 /*----------------- IOCTL Definitions -------------------*/
 
+#define RSXX_MAX_DATA 8
+
 struct rsxx_reg_access {
 	__u32 addr;
 	__u32 cnt;
 	__u32 stat;
 	__u32 stream;
-	__u32 data[8];
+	__u32 data[RSXX_MAX_DATA];
 };
 
-#define RSXX_MAX_REG_CNT	(8 * (sizeof(__u32)))
+#define RSXX_MAX_REG_CNT	(RSXX_MAX_DATA * (sizeof(__u32)))
 
 #define RSXX_IOC_MAGIC 'r'
 
-- 
cgit v1.2.2


From 03ac03a8971bd7e9f8c8b20a309b61beaf154d60 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Mon, 25 Feb 2013 12:31:31 -0600
Subject: block: IBM RamSan 70/80 fixes inconsistent locking.

This patch includes changes to the cregs locking scheme. Before,
inconsistant locking would occur because of misuse of spin_lock,
spin_lock_bh, and counter parts.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/cregs.c | 44 ++++++++++++++++++--------------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
index 224156435261..0539a25877eb 100644
--- a/drivers/block/rsxx/cregs.c
+++ b/drivers/block/rsxx/cregs.c
@@ -99,22 +99,6 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,
 	}
 }
 
-static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card)
-{
-	struct creg_cmd *cmd;
-
-	/*
-	 * Spin lock is needed because this can be called in atomic/interrupt
-	 * context.
-	 */
-	spin_lock_bh(&card->creg_ctrl.lock);
-	cmd = card->creg_ctrl.active_cmd;
-	card->creg_ctrl.active_cmd = NULL;
-	spin_unlock_bh(&card->creg_ctrl.lock);
-
-	return cmd;
-}
-
 static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
 {
 	iowrite32(cmd->addr, card->regmap + CREG_ADD);
@@ -189,11 +173,11 @@ static int creg_queue_cmd(struct rsxx_cardinfo *card,
 	cmd->cb_private = cb_private;
 	cmd->status	= 0;
 
-	spin_lock(&card->creg_ctrl.lock);
+	spin_lock_bh(&card->creg_ctrl.lock);
 	list_add_tail(&cmd->list, &card->creg_ctrl.queue);
 	card->creg_ctrl.q_depth++;
 	creg_kick_queue(card);
-	spin_unlock(&card->creg_ctrl.lock);
+	spin_unlock_bh(&card->creg_ctrl.lock);
 
 	return 0;
 }
@@ -203,7 +187,11 @@ static void creg_cmd_timed_out(unsigned long data)
 	struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data;
 	struct creg_cmd *cmd;
 
-	cmd = pop_active_cmd(card);
+	spin_lock(&card->creg_ctrl.lock);
+	cmd = card->creg_ctrl.active_cmd;
+	card->creg_ctrl.active_cmd = NULL;
+	spin_unlock(&card->creg_ctrl.lock);
+
 	if (cmd == NULL) {
 		card->creg_ctrl.creg_stats.creg_timeout++;
 		dev_warn(CARD_TO_DEV(card),
@@ -240,7 +228,11 @@ static void creg_cmd_done(struct work_struct *work)
 	if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0)
 		card->creg_ctrl.creg_stats.failed_cancel_timer++;
 
-	cmd = pop_active_cmd(card);
+	spin_lock_bh(&card->creg_ctrl.lock);
+	cmd = card->creg_ctrl.active_cmd;
+	card->creg_ctrl.active_cmd = NULL;
+	spin_unlock_bh(&card->creg_ctrl.lock);
+
 	if (cmd == NULL) {
 		dev_err(CARD_TO_DEV(card),
 			"Spurious creg interrupt!\n");
@@ -289,10 +281,10 @@ creg_done:
 
 	kmem_cache_free(creg_cmd_pool, cmd);
 
-	spin_lock(&card->creg_ctrl.lock);
+	spin_lock_bh(&card->creg_ctrl.lock);
 	card->creg_ctrl.active = 0;
 	creg_kick_queue(card);
-	spin_unlock(&card->creg_ctrl.lock);
+	spin_unlock_bh(&card->creg_ctrl.lock);
 }
 
 static void creg_reset(struct rsxx_cardinfo *card)
@@ -317,7 +309,7 @@ static void creg_reset(struct rsxx_cardinfo *card)
 		"Resetting creg interface for recovery\n");
 
 	/* Cancel outstanding commands */
-	spin_lock(&card->creg_ctrl.lock);
+	spin_lock_bh(&card->creg_ctrl.lock);
 	list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
 		list_del(&cmd->list);
 		card->creg_ctrl.q_depth--;
@@ -338,7 +330,7 @@ static void creg_reset(struct rsxx_cardinfo *card)
 
 		card->creg_ctrl.active = 0;
 	}
-	spin_unlock(&card->creg_ctrl.lock);
+	spin_unlock_bh(&card->creg_ctrl.lock);
 
 	card->creg_ctrl.reset = 0;
 	spin_lock_irqsave(&card->irq_lock, flags);
@@ -705,7 +697,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card)
 	int cnt = 0;
 
 	/* Cancel outstanding commands */
-	spin_lock(&card->creg_ctrl.lock);
+	spin_lock_bh(&card->creg_ctrl.lock);
 	list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
 		list_del(&cmd->list);
 		if (cmd->cb)
@@ -730,7 +722,7 @@ void rsxx_creg_destroy(struct rsxx_cardinfo *card)
 			"Canceled active creg command\n");
 		kmem_cache_free(creg_cmd_pool, cmd);
 	}
-	spin_unlock(&card->creg_ctrl.lock);
+	spin_unlock_bh(&card->creg_ctrl.lock);
 
 	cancel_work_sync(&card->creg_ctrl.done_work);
 }
-- 
cgit v1.2.2


From 9bb3c4469e317919b0fde8c0e0a3ebe7bd2cf167 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Wed, 27 Feb 2013 09:24:59 -0600
Subject: block: IBM RamSan 70/80 branding changes.

This patch includes changing the hardware branding name from
IBM RamSan to IBM FlashSystem.

v2 Changes include:
o Removing the unnecessary IBM Vendor ID #define

v1 Changes include:
o Changed all references of RamSan to FlashSystem.
o Changed the vendor/device IDs for the product.
o Changed driver version number.
o Updated the MAINTAINERS file.
o Various other little things.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 MAINTAINERS                    | 12 ++++++------
 drivers/block/Kconfig          |  4 ++--
 drivers/block/rsxx/Makefile    |  2 +-
 drivers/block/rsxx/config.c    |  2 +-
 drivers/block/rsxx/core.c      | 10 ++++------
 drivers/block/rsxx/dma.c       |  2 +-
 drivers/block/rsxx/rsxx_cfg.h  |  2 +-
 drivers/block/rsxx/rsxx_priv.h |  9 +++------
 8 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 95616582c728..a00f0eaf0eda 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3242,6 +3242,12 @@ F:	Documentation/firmware_class/
 F:	drivers/base/firmware*.c
 F:	include/linux/firmware.h
 
+FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card)
+M:	Joshua Morris <josh.h.morris@us.ibm.com>
+M:	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+S:	Maintained
+F:	drivers/block/rsxx/
+
 FLOPPY DRIVER
 M:	Jiri Kosina <jkosina@suse.cz>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/floppy.git
@@ -6516,12 +6522,6 @@ S:	Maintained
 F:	Documentation/blockdev/ramdisk.txt
 F:	drivers/block/brd.c
 
-RAMSAM DRIVER (IBM RamSan 70/80 PCI SSD Flash Card)
-M:	Joshua Morris <josh.h.morris@us.ibm.com>
-M:	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
-S:	Maintained
-F:	drivers/block/rsxx/
-
 RANDOM NUMBER DRIVER
 M:	Theodore Ts'o" <tytso@mit.edu>
 S:	Maintained
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 5dc0daed8fac..b81ddfea1da0 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -532,11 +532,11 @@ config BLK_DEV_RBD
 	  If unsure, say N.
 
 config BLK_DEV_RSXX
-	tristate "RamSam PCIe Flash SSD Device Driver"
+	tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"
 	depends on PCI
 	help
 	  Device driver for IBM's high speed PCIe SSD
-	  storage devices: RamSan-70 and RamSan-80.
+	  storage devices: FlashSystem-70 and FlashSystem-80.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rsxx.
diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile
index f35cd0b71f7b..b1c53c0aa450 100644
--- a/drivers/block/rsxx/Makefile
+++ b/drivers/block/rsxx/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o
-rsxx-y := config.o core.o cregs.o dev.o dma.o
+rsxx-objs := config.o core.o cregs.o dev.o dma.o
diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c
index 0d8cb18284eb..10cd530d3e10 100644
--- a/drivers/block/rsxx/config.c
+++ b/drivers/block/rsxx/config.c
@@ -35,7 +35,7 @@ static void initialize_config(struct rsxx_card_cfg *cfg)
 
 	cfg->data.block_size        = RSXX_HW_BLK_SIZE;
 	cfg->data.stripe_size       = RSXX_HW_BLK_SIZE;
-	cfg->data.vendor_id         = RSXX_VENDOR_ID_TMS_IBM;
+	cfg->data.vendor_id         = RSXX_VENDOR_ID_IBM;
 	cfg->data.cache_order       = (-1);
 	cfg->data.intr_coal.mode    = RSXX_INTR_COAL_DISABLED;
 	cfg->data.intr_coal.count   = 0;
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index edbae10e7f6f..b82ee7baf0e8 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -39,8 +39,8 @@
 
 #define NO_LEGACY 0
 
-MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver");
-MODULE_AUTHOR("IBM <support@ramsan.com>");
+MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
+MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRIVER_VERSION);
 
@@ -593,10 +593,8 @@ static void rsxx_pci_shutdown(struct pci_dev *dev)
 }
 
 static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = {
-	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)},
-	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)},
-	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)},
-	{PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)},
+	{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)},
+	{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)},
 	{0,},
 };
 
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 7c3a57bed2cd..efd75b55a670 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -28,7 +28,7 @@
 struct rsxx_dma {
 	struct list_head	 list;
 	u8			 cmd;
-	unsigned int		 laddr;     /* Logical address on the ramsan */
+	unsigned int		 laddr;     /* Logical address */
 	struct {
 		u32		 off;
 		u32		 cnt;
diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h
index c025fe5fdb70..f384c943846d 100644
--- a/drivers/block/rsxx/rsxx_cfg.h
+++ b/drivers/block/rsxx/rsxx_cfg.h
@@ -58,7 +58,7 @@ struct rsxx_card_cfg {
 };
 
 /* Vendor ID Values */
-#define RSXX_VENDOR_ID_TMS_IBM		0
+#define RSXX_VENDOR_ID_IBM		0
 #define RSXX_VENDOR_ID_DSI		1
 #define RSXX_VENDOR_COUNT		2
 
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index a1ac907d8f4c..f5a95f75bd57 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -45,16 +45,13 @@
 
 struct proc_cmd;
 
-#define PCI_VENDOR_ID_TMS_IBM		0x15B6
-#define PCI_DEVICE_ID_RS70_FLASH	0x0019
-#define PCI_DEVICE_ID_RS70D_FLASH	0x001A
-#define PCI_DEVICE_ID_RS80_FLASH	0x001C
-#define PCI_DEVICE_ID_RS81_FLASH	0x001E
+#define PCI_DEVICE_ID_FS70_FLASH	0x04A9
+#define PCI_DEVICE_ID_FS80_FLASH	0x04AA
 
 #define RS70_PCI_REV_SUPPORTED	4
 
 #define DRIVER_NAME "rsxx"
-#define DRIVER_VERSION "3.7"
+#define DRIVER_VERSION "4.0"
 
 /* Block size is 4096 */
 #define RSXX_HW_BLK_SHIFT		12
-- 
cgit v1.2.2


From 1ebfd109822ea35b71aee4efe9ddc2e1b9ac0ed7 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Mon, 25 Feb 2013 13:09:40 -0600
Subject: block: IBM RamSan 70/80 error message bug fix.

This patch includes a simple change to the rsxx_pci_remove
function that caused error messages because traffic was halted
too early.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index b82ee7baf0e8..cbbdff113f46 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -538,9 +538,6 @@ static void rsxx_pci_remove(struct pci_dev *dev)
 	rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
 	spin_unlock_irqrestore(&card->irq_lock, flags);
 
-	/* Prevent work_structs from re-queuing themselves. */
-	card->halt = 1;
-
 	cancel_work_sync(&card->event_work);
 
 	rsxx_destroy_dev(card);
@@ -549,6 +546,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)
 	spin_lock_irqsave(&card->irq_lock, flags);
 	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
 	spin_unlock_irqrestore(&card->irq_lock, flags);
+
+	/* Prevent work_structs from re-queuing themselves. */
+	card->halt = 1;
+
 	free_irq(dev->irq, card);
 
 	if (!force_legacy)
-- 
cgit v1.2.2


From 94a32d10f47b637ae24b78b1ddc7ef0e8396fda4 Mon Sep 17 00:00:00 2001
From: Sunguk Lee <d3m3vilurr@gmail.com>
Date: Tue, 12 Mar 2013 04:41:58 +0900
Subject: Bluetooth: Device 0cf3:3008 should map AR 3012

T:  Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#=  3 Spd=12   MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=0cf3 ProdID=3008 Rev= 0.01
S:  Manufacturer=Atheros Communications
S:  Product=Bluetooth USB Host Controller
S:  SerialNumber=Alaska Day 2006
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms

Signed-off-by: Sunguk Lee <d3m3vilurr@gmail.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index b9908dd84529..3095d2e74f24 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -74,6 +74,7 @@ static struct usb_device_id ath3k_table[] = {
 
 	/* Atheros AR3012 with sflash firmware*/
 	{ USB_DEVICE(0x0CF3, 0x3004) },
+	{ USB_DEVICE(0x0CF3, 0x3008) },
 	{ USB_DEVICE(0x0CF3, 0x311D) },
 	{ USB_DEVICE(0x13d3, 0x3375) },
 	{ USB_DEVICE(0x04CA, 0x3004) },
@@ -107,6 +108,7 @@ static struct usb_device_id ath3k_blist_tbl[] = {
 
 	/* Atheros AR3012 with sflash firmware*/
 	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 59cde8e882ff..e547851870e7 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -132,6 +132,7 @@ static struct usb_device_id blacklist_table[] = {
 
 	/* Atheros 3012 with sflash firmware */
 	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
-- 
cgit v1.2.2


From b47506d91259c29b9c75c404737eb6525556f9b4 Mon Sep 17 00:00:00 2001
From: Marek Lindner <lindner_marek@yahoo.de>
Date: Mon, 4 Mar 2013 10:39:49 +0800
Subject: batman-adv: verify tt len does not exceed packet len

batadv_iv_ogm_process() accesses the packet using the tt_num_changes
attribute regardless of the real packet len (assuming the length check
was done before). Therefore a length check is needed to avoid reading
random memory.

Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/bat_iv_ogm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a0b253ecadaf..a5bb0a769eb9 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1288,7 +1288,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 	batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff;
 
 	/* unpack the aggregated packets and process them one by one */
-	do {
+	while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len,
+					 batadv_ogm_packet->tt_num_changes)) {
 		tt_buff = packet_buff + buff_pos + BATADV_OGM_HLEN;
 
 		batadv_iv_ogm_process(ethhdr, batadv_ogm_packet, tt_buff,
@@ -1299,8 +1300,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 
 		packet_pos = packet_buff + buff_pos;
 		batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
-	} while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len,
-					   batadv_ogm_packet->tt_num_changes));
+	}
 
 	kfree_skb(skb);
 	return NET_RX_SUCCESS;
-- 
cgit v1.2.2


From 90ba983f6889e65a3b506b30dc606aa9d1d46cd2 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 11 Mar 2013 23:39:59 -0400
Subject: ext4: use atomic64_t for the per-flexbg free_clusters count

A user who was using a 8TB+ file system and with a very large flexbg
size (> 65536) could cause the atomic_t used in the struct flex_groups
to overflow.  This was detected by PaX security patchset:

http://forums.grsecurity.net/viewtopic.php?f=3&t=3289&p=12551#p12551

This bug was introduced in commit 9f24e4208f7e, so it's been around
since 2.6.30.  :-(

Fix this by using an atomic64_t for struct orlav_stats's
free_clusters.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Cc: stable@vger.kernel.org
---
 fs/ext4/ext4.h    |  6 +++---
 fs/ext4/ialloc.c  |  4 ++--
 fs/ext4/mballoc.c | 12 ++++++------
 fs/ext4/resize.c  |  4 ++--
 fs/ext4/super.c   |  4 ++--
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4a01ba315262..167ff564bbfa 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -335,9 +335,9 @@ struct ext4_group_desc
  */
 
 struct flex_groups {
-	atomic_t free_inodes;
-	atomic_t free_clusters;
-	atomic_t used_dirs;
+	atomic64_t	free_clusters;
+	atomic_t	free_inodes;
+	atomic_t	used_dirs;
 };
 
 #define EXT4_BG_INODE_UNINIT	0x0001 /* Inode table/bitmap not in use */
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 32fd2b9075dd..6c5bb8d993fe 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -324,8 +324,8 @@ error_return:
 }
 
 struct orlov_stats {
+	__u64 free_clusters;
 	__u32 free_inodes;
-	__u32 free_clusters;
 	__u32 used_dirs;
 };
 
@@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
 
 	if (flex_size > 1) {
 		stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
-		stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
+		stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
 		stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
 		return;
 	}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8b2ea9f75004..ee6614bdb639 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi,
 							  ac->ac_b_ex.fe_group);
-		atomic_sub(ac->ac_b_ex.fe_len,
-			   &sbi->s_flex_groups[flex_group].free_clusters);
+		atomic64_sub(ac->ac_b_ex.fe_len,
+			     &sbi->s_flex_groups[flex_group].free_clusters);
 	}
 
 	err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4661,8 +4661,8 @@ do_more:
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-		atomic_add(count_clusters,
-			   &sbi->s_flex_groups[flex_group].free_clusters);
+		atomic64_add(count_clusters,
+			     &sbi->s_flex_groups[flex_group].free_clusters);
 	}
 
 	ext4_mb_unload_buddy(&e4b);
@@ -4804,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-		atomic_add(EXT4_NUM_B2C(sbi, blocks_freed),
-			   &sbi->s_flex_groups[flex_group].free_clusters);
+		atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
+			     &sbi->s_flex_groups[flex_group].free_clusters);
 	}
 
 	ext4_mb_unload_buddy(&e4b);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b2c8ee56eb98..c169477a62c9 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb,
 	    sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group;
 		flex_group = ext4_flex_group(sbi, group_data[0].group);
-		atomic_add(EXT4_NUM_B2C(sbi, free_blocks),
-			   &sbi->s_flex_groups[flex_group].free_clusters);
+		atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
+			     &sbi->s_flex_groups[flex_group].free_clusters);
 		atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
 			   &sbi->s_flex_groups[flex_group].free_inodes);
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9379b7fbfd92..d1ee6a84338a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1923,8 +1923,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
 		flex_group = ext4_flex_group(sbi, i);
 		atomic_add(ext4_free_inodes_count(sb, gdp),
 			   &sbi->s_flex_groups[flex_group].free_inodes);
-		atomic_add(ext4_free_group_clusters(sb, gdp),
-			   &sbi->s_flex_groups[flex_group].free_clusters);
+		atomic64_add(ext4_free_group_clusters(sb, gdp),
+			     &sbi->s_flex_groups[flex_group].free_clusters);
 		atomic_add(ext4_used_dirs_count(sb, gdp),
 			   &sbi->s_flex_groups[flex_group].used_dirs);
 	}
-- 
cgit v1.2.2


From 47ce9c4821fa41ef72c1004e1a362d08334cd717 Mon Sep 17 00:00:00 2001
From: Santosh Rastapur <santosh@chelsio.com>
Date: Fri, 8 Mar 2013 03:35:29 +0000
Subject: cxgb4: Allow for backward compatibility with new VPD scheme.

New scheme calls for 3rd party VPD at offset 0x0 and Chelsio VPD at offset
0x400 of the function.  If no 3rd party VPD is present, then a copy of
Chelsio's VPD will be at offset 0x0 to keep in line with PCI spec which
requires the VPD to be present at offset 0x0.

Signed-off-by: Santosh Rastapur <santosh@chelsio.com>
Signed-off-by: Vipul Pandya <vipul@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 4ce62031f62f..8049268ce0f2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -497,8 +497,9 @@ int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len,
 }
 
 #define EEPROM_STAT_ADDR   0x7bfc
-#define VPD_BASE           0
 #define VPD_LEN            512
+#define VPD_BASE           0x400
+#define VPD_BASE_OLD       0
 
 /**
  *	t4_seeprom_wp - enable/disable EEPROM write protection
@@ -524,7 +525,7 @@ int t4_seeprom_wp(struct adapter *adapter, bool enable)
 int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
 {
 	u32 cclk_param, cclk_val;
-	int i, ret;
+	int i, ret, addr;
 	int ec, sn;
 	u8 *vpd, csum;
 	unsigned int vpdr_len, kw_offset, id_len;
@@ -533,7 +534,12 @@ int get_vpd_params(struct adapter *adapter, struct vpd_params *p)
 	if (!vpd)
 		return -ENOMEM;
 
-	ret = pci_read_vpd(adapter->pdev, VPD_BASE, VPD_LEN, vpd);
+	ret = pci_read_vpd(adapter->pdev, VPD_BASE, sizeof(u32), vpd);
+	if (ret < 0)
+		goto out;
+	addr = *vpd == 0x82 ? VPD_BASE : VPD_BASE_OLD;
+
+	ret = pci_read_vpd(adapter->pdev, addr, VPD_LEN, vpd);
 	if (ret < 0)
 		goto out;
 
-- 
cgit v1.2.2


From 4660c7f498c07c43173142ea95145e9dac5a6d14 Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Mon, 11 Mar 2013 10:43:39 +0000
Subject: net/ipv4: Ensure that location of timestamp option is stored

This is needed in order to detect if the timestamp option appears
more than once in a packet, to remove the option if the packet is
fragmented, etc. My previous change neglected to store the option
location when the router addresses were prespecified and Pointer >
Length. But now the option location is also stored when Flag is an
unrecognized value, to ensure these option handling behaviors are
still performed.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_options.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 310a3647c83d..ec7264514a82 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -370,7 +370,6 @@ int ip_options_compile(struct net *net,
 				}
 				switch (optptr[3]&0xF) {
 				      case IPOPT_TS_TSONLY:
-					opt->ts = optptr - iph;
 					if (skb)
 						timeptr = &optptr[optptr[2]-1];
 					opt->ts_needtime = 1;
@@ -381,7 +380,6 @@ int ip_options_compile(struct net *net,
 						pp_ptr = optptr + 2;
 						goto error;
 					}
-					opt->ts = optptr - iph;
 					if (rt)  {
 						spec_dst_fill(&spec_dst, skb);
 						memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
@@ -396,7 +394,6 @@ int ip_options_compile(struct net *net,
 						pp_ptr = optptr + 2;
 						goto error;
 					}
-					opt->ts = optptr - iph;
 					{
 						__be32 addr;
 						memcpy(&addr, &optptr[optptr[2]-1], 4);
@@ -429,12 +426,12 @@ int ip_options_compile(struct net *net,
 					pp_ptr = optptr + 3;
 					goto error;
 				}
-				opt->ts = optptr - iph;
 				if (skb) {
 					optptr[3] = (optptr[3]&0xF)|((overflow+1)<<4);
 					opt->is_changed = 1;
 				}
 			}
+			opt->ts = optptr - iph;
 			break;
 		      case IPOPT_RA:
 			if (optlen < 4) {
-- 
cgit v1.2.2


From 3da889b616164bde76a37350cf28e0d17a94e979 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Mon, 11 Mar 2013 13:52:17 +0000
Subject: bridge: reserve space for IFLA_BRPORT_FAST_LEAVE

The bridge multicast fast leave feature was added sufficient space
was not reserved in the netlink message. This means the flag may be
lost in netlink events and results of queries.

Found by observation while looking up some netlink stuff for discussion with Vlad.
Problem introduced by commit c2d3babfafbb9f6629cfb47139758e59a5eb0d80
Author: David S. Miller <davem@davemloft.net>
Date:   Wed Dec 5 16:24:45 2012 -0500

    bridge: implement multicast fast leave

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 27aa3ee517ce..db12a0fcfe50 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -29,6 +29,7 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_MODE */
 		+ nla_total_size(1)	/* IFLA_BRPORT_GUARD */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROTECT */
+		+ nla_total_size(1)	/* IFLA_BRPORT_FAST_LEAVE */
 		+ 0;
 }
 
-- 
cgit v1.2.2


From 3f315bef23075ea8a98a6fe4221a83b83456d970 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Mon, 11 Mar 2013 00:21:48 +0000
Subject: netconsole: don't call __netpoll_cleanup() while atomic

__netpoll_cleanup() is called in netconsole_netdev_event() while holding a
spinlock. Release/acquire the spinlock before/after it and restart the
loop. Also, disable the netconsole completely, because we won't have chance
after the restart of the loop, and might end up in a situation where
nt->enabled == 1 and nt->np.dev == NULL.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 37add21a3d7d..59ac143dec25 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -666,6 +666,7 @@ static int netconsole_netdev_event(struct notifier_block *this,
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
+restart:
 	list_for_each_entry(nt, &target_list, list) {
 		netconsole_target_get(nt);
 		if (nt->np.dev == dev) {
@@ -678,15 +679,17 @@ static int netconsole_netdev_event(struct notifier_block *this,
 			case NETDEV_UNREGISTER:
 				/*
 				 * rtnl_lock already held
+				 * we might sleep in __netpoll_cleanup()
 				 */
-				if (nt->np.dev) {
-					__netpoll_cleanup(&nt->np);
-					dev_put(nt->np.dev);
-					nt->np.dev = NULL;
-				}
+				spin_unlock_irqrestore(&target_list_lock, flags);
+				__netpoll_cleanup(&nt->np);
+				spin_lock_irqsave(&target_list_lock, flags);
+				dev_put(nt->np.dev);
+				nt->np.dev = NULL;
 				nt->enabled = 0;
 				stopped = true;
-				break;
+				netconsole_target_put(nt);
+				goto restart;
 			}
 		}
 		netconsole_target_put(nt);
-- 
cgit v1.2.2


From 069552777a121eb39da29de4bc0383483dbe1f7e Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@ti.com>
Date: Tue, 5 Mar 2013 10:58:22 -0500
Subject: ARM: davinci: edma: fix dmaengine induced null pointer dereference on
 da830

This adds additional error checking to the private edma api implementation
to catch the case where the edma_alloc_slot() has an invalid controller
parameter. The edma dmaengine wrapper driver relies on this condition
being handled in order to avoid setting up a second edma dmaengine
instance on DA830.

Verfied using a DA850 with the second EDMA controller platform instance
removed to simulate a DA830 which only has a single EDMA controller.

Reported-by: Tomas Novotny <tomas@novotny.cz>
Signed-off-by: Matt Porter <mporter@ti.com>
Cc: stable@vger.kernel.org # v3.7.x+
Tested-by: Tomas Novotny <tomas@novotny.cz>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dma.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/mach-davinci/dma.c b/arch/arm/mach-davinci/dma.c
index a685e9706b7b..45b7c71d9cc1 100644
--- a/arch/arm/mach-davinci/dma.c
+++ b/arch/arm/mach-davinci/dma.c
@@ -743,6 +743,9 @@ EXPORT_SYMBOL(edma_free_channel);
  */
 int edma_alloc_slot(unsigned ctlr, int slot)
 {
+	if (!edma_cc[ctlr])
+		return -EINVAL;
+
 	if (slot >= 0)
 		slot = EDMA_CHAN_SLOT(slot);
 
-- 
cgit v1.2.2


From 418df63adac56841ef6b0f1fcf435bc64d4ed177 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 12 Mar 2013 13:00:42 +0100
Subject: ARM: 7670/1: fix the memset fix

Commit 455bd4c430b0 ("ARM: 7668/1: fix memset-related crashes caused by
recent GCC (4.7.2) optimizations") attempted to fix a compliance issue
with the memset return value.  However the memset itself became broken
by that patch for misaligned pointers.

This fixes the above by branching over the entry code from the
misaligned fixup code to avoid reloading the original pointer.

Also, because the function entry alignment is wrong in the Thumb mode
compilation, that fixup code is moved to the end.

While at it, the entry instructions are slightly reworked to help dual
issue pipelines.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Alexander Holler <holler@ahsoftware.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/lib/memset.S | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index d912e7397ecc..94b0650ea98f 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -14,31 +14,15 @@
 
 	.text
 	.align	5
-	.word	0
-
-1:	subs	r2, r2, #4		@ 1 do we have enough
-	blt	5f			@ 1 bytes to align with?
-	cmp	r3, #2			@ 1
-	strltb	r1, [ip], #1		@ 1
-	strleb	r1, [ip], #1		@ 1
-	strb	r1, [ip], #1		@ 1
-	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted.  Try doing the
- * memset again.
- */
 
 ENTRY(memset)
-/*
- * Preserve the contents of r0 for the return value.
- */
-	mov	ip, r0
-	ands	r3, ip, #3		@ 1 unaligned?
-	bne	1b			@ 1
+	ands	r3, r0, #3		@ 1 unaligned?
+	mov	ip, r0			@ preserve r0 as return value
+	bne	6f			@ 1
 /*
  * we know that the pointer in ip is aligned to a word boundary.
  */
-	orr	r1, r1, r1, lsl #8
+1:	orr	r1, r1, r1, lsl #8
 	orr	r1, r1, r1, lsl #16
 	mov	r3, r1
 	cmp	r2, #16
@@ -127,4 +111,13 @@ ENTRY(memset)
 	tst	r2, #1
 	strneb	r1, [ip], #1
 	mov	pc, lr
+
+6:	subs	r2, r2, #4		@ 1 do we have enough
+	blt	5b			@ 1 bytes to align with?
+	cmp	r3, #2			@ 1
+	strltb	r1, [ip], #1		@ 1
+	strleb	r1, [ip], #1		@ 1
+	strb	r1, [ip], #1		@ 1
+	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
+	b	1b
 ENDPROC(memset)
-- 
cgit v1.2.2


From 45549a68a592dd1daed72aaf4df2295931b93138 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Sun, 10 Mar 2013 10:15:54 +0800
Subject: ARM:net: an issue for k which is u32, never < 0

  k is u32 which never < 0, need type cast, or cause issue.

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Mircea Gherzan <mgherzan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6828ef6ce80e..a0bd8a755bdf 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -576,7 +576,7 @@ load_ind:
 			/* x = ((*(frame + k)) & 0xf) << 2; */
 			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
 			/* the interpreter should deal with the negative K */
-			if (k < 0)
+			if ((int)k < 0)
 				return -1;
 			/* offset in r1: we might have to take the slow path */
 			emit_mov_i(r_off, k, ctx);
-- 
cgit v1.2.2


From c80a8512ee3a8e1f7c3704140ea55f21dc6bd651 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Mon, 11 Mar 2013 20:30:44 +0000
Subject: net/core: move vlan_depth out of while loop in skb_network_protocol()

[ Bug added added in commit 05e8ef4ab2d8087d (net: factor out
  skb_mac_gso_segment() from skb_gso_segment() ) ]

move vlan_depth out of while loop, or else vlan_depth always is ETH_HLEN,
can not be increased, and lead to infinite loop when frame has two vlan headers.

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index dffbef70cd31..d540ced1f6c6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2219,9 +2219,9 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
+	int vlan_depth = ETH_HLEN;
 
 	while (type == htons(ETH_P_8021Q)) {
-		int vlan_depth = ETH_HLEN;
 		struct vlan_hdr *vh;
 
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
-- 
cgit v1.2.2


From 2721e72dd10f71a3ba90f59781becf02638aa0d9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 12 Mar 2013 11:32:32 -0400
Subject: tracing: Fix race in snapshot swapping

Although the swap is wrapped with a spin_lock, the assignment
of the temp buffer used to swap is not within that lock.
It needs to be moved into that lock, otherwise two swaps
happening on two different CPUs, can end up using the wrong
temp buffer to assign in the swap.

Luckily, all current callers of the swap function appear to have
their own locks. But in case something is added that allows two
different callers to call the swap, then there's a chance that
this race can trigger and corrupt the buffers.

New code is coming soon that will allow for this race to trigger.

I've Cc'd stable, so this bug will not show up if someone backports
one of the changes that can trigger this bug.

Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1f835a83cb2c..53df2839bb93 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -704,7 +704,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-	struct ring_buffer *buf = tr->buffer;
+	struct ring_buffer *buf;
 
 	if (trace_stop_count)
 		return;
@@ -719,6 +719,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	arch_spin_lock(&ftrace_max_lock);
 
+	buf = tr->buffer;
 	tr->buffer = max_tr.buffer;
 	max_tr.buffer = buf;
 
-- 
cgit v1.2.2


From 4f42f80a8f08d4c3f52c4267361241885d5dee3a Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Tue, 12 Mar 2013 12:40:04 -0400
Subject: ext4: use s_extent_max_zeroout_kb value as number of kb

Currently when converting extent to initialized, we have to decide
whether to zeroout part/all of the uninitialized extent in order to
avoid extent tree growing rapidly.

The decision is made by comparing the size of the extent with the
configurable value s_extent_max_zeroout_kb which is in kibibytes units.

However when converting it to number of blocks we currently use it as it
was in bytes. This is obviously bug and it will result in ext4 _never_
zeroout extents, but rather always split and convert parts to
initialized while leaving the rest uninitialized in default setting.

Fix this by using s_extent_max_zeroout_kb as kibibytes.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bd69e906bd91..e2bb929bea93 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3264,7 +3264,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 
 	if (EXT4_EXT_MAY_ZEROOUT & split_flag)
 		max_zeroout = sbi->s_extent_max_zeroout_kb >>
-			inode->i_sb->s_blocksize_bits;
+			(inode->i_sb->s_blocksize_bits - 10);
 
 	/* If extent is less than s_max_zeroout_kb, zeroout directly */
 	if (max_zeroout && (ee_len <= max_zeroout)) {
-- 
cgit v1.2.2


From fae8563b25f73dc584a07bcda7a82750ff4f7672 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 27 Feb 2013 16:50:38 +0000
Subject: sfc: Only use TX push if a single descriptor is to be written

Using TX push when notifying the NIC of multiple new descriptors in
the ring will very occasionally cause the TX DMA engine to re-use an
old descriptor.  This can result in a duplicated or partly duplicated
packet (new headers with old data), or an IOMMU page fault.  This does
not happen when the pushed descriptor is the only one written.

TX push also provides little latency benefit when a packet requires
more than one descriptor.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/net/ethernet/sfc/nic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 0ad790cc473c..eaa8e874a3cb 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -376,7 +376,8 @@ efx_may_push_tx_desc(struct efx_tx_queue *tx_queue, unsigned int write_count)
 		return false;
 
 	tx_queue->empty_read_count = 0;
-	return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
+	return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0
+		&& tx_queue->write_count - write_count == 1;
 }
 
 /* For each entry inserted into the software descriptor ring, create a
-- 
cgit v1.2.2


From 6183c009f6cd94b42e5812adcfd4ba6220a196e1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:57 -0700
Subject: workqueue: make sanity checks less punshing using WARN_ON[_ONCE]()s

Workqueue has been using mostly BUG_ON()s for sanity checks, which
fail unnecessarily harshly when the assertion doesn't hold.  Most
assertions can converted to be less drastic such that things can limp
along instead of dying completely.  Convert BUG_ON()s to
WARN_ON[_ONCE]()s with softer failure behaviors - e.g. if assertion
check fails in destroy_worker(), trigger WARN and silently ignore
destruction request.

Most conversions are trivial.  Note that sanity checks in
destroy_workqueue() are moved above removal from workqueues list so
that it can bail out without side-effects if assertion checks fail.

This patch doesn't introduce any visible behavior changes during
normal operation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 85 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 46 insertions(+), 39 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fd9a28a13afd..c6e1bdb469ee 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -530,7 +530,7 @@ static int work_next_color(int color)
 static inline void set_work_data(struct work_struct *work, unsigned long data,
 				 unsigned long flags)
 {
-	BUG_ON(!work_pending(work));
+	WARN_ON_ONCE(!work_pending(work));
 	atomic_long_set(&work->data, data | flags | work_static(work));
 }
 
@@ -785,7 +785,8 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
 	pool = worker->pool;
 
 	/* this can only happen on the local cpu */
-	BUG_ON(cpu != raw_smp_processor_id());
+	if (WARN_ON_ONCE(cpu != raw_smp_processor_id()))
+		return NULL;
 
 	/*
 	 * The counterpart of the following dec_and_test, implied mb,
@@ -1458,9 +1459,10 @@ static void worker_enter_idle(struct worker *worker)
 {
 	struct worker_pool *pool = worker->pool;
 
-	BUG_ON(worker->flags & WORKER_IDLE);
-	BUG_ON(!list_empty(&worker->entry) &&
-	       (worker->hentry.next || worker->hentry.pprev));
+	if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
+	    WARN_ON_ONCE(!list_empty(&worker->entry) &&
+			 (worker->hentry.next || worker->hentry.pprev)))
+		return;
 
 	/* can't use worker_set_flags(), also called from start_worker() */
 	worker->flags |= WORKER_IDLE;
@@ -1497,7 +1499,8 @@ static void worker_leave_idle(struct worker *worker)
 {
 	struct worker_pool *pool = worker->pool;
 
-	BUG_ON(!(worker->flags & WORKER_IDLE));
+	if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
+		return;
 	worker_clr_flags(worker, WORKER_IDLE);
 	pool->nr_idle--;
 	list_del_init(&worker->entry);
@@ -1793,8 +1796,9 @@ static void destroy_worker(struct worker *worker)
 	int id = worker->id;
 
 	/* sanity check frenzy */
-	BUG_ON(worker->current_work);
-	BUG_ON(!list_empty(&worker->scheduled));
+	if (WARN_ON(worker->current_work) ||
+	    WARN_ON(!list_empty(&worker->scheduled)))
+		return;
 
 	if (worker->flags & WORKER_STARTED)
 		pool->nr_workers--;
@@ -1923,7 +1927,8 @@ restart:
 			del_timer_sync(&pool->mayday_timer);
 			spin_lock_irq(&pool->lock);
 			start_worker(worker);
-			BUG_ON(need_to_create_worker(pool));
+			if (WARN_ON_ONCE(need_to_create_worker(pool)))
+				goto restart;
 			return true;
 		}
 
@@ -2256,7 +2261,7 @@ recheck:
 	 * preparing to process a work or actually processing it.
 	 * Make sure nobody diddled with it while I was sleeping.
 	 */
-	BUG_ON(!list_empty(&worker->scheduled));
+	WARN_ON_ONCE(!list_empty(&worker->scheduled));
 
 	/*
 	 * When control reaches this point, we're guaranteed to have
@@ -2364,7 +2369,7 @@ repeat:
 		 * Slurp in all works issued via this workqueue and
 		 * process'em.
 		 */
-		BUG_ON(!list_empty(&rescuer->scheduled));
+		WARN_ON_ONCE(!list_empty(&rescuer->scheduled));
 		list_for_each_entry_safe(work, n, &pool->worklist, entry)
 			if (get_work_pwq(work) == pwq)
 				move_linked_works(work, scheduled, &n);
@@ -2499,7 +2504,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 	unsigned int cpu;
 
 	if (flush_color >= 0) {
-		BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
+		WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
 		atomic_set(&wq->nr_pwqs_to_flush, 1);
 	}
 
@@ -2510,7 +2515,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 		spin_lock_irq(&pool->lock);
 
 		if (flush_color >= 0) {
-			BUG_ON(pwq->flush_color != -1);
+			WARN_ON_ONCE(pwq->flush_color != -1);
 
 			if (pwq->nr_in_flight[flush_color]) {
 				pwq->flush_color = flush_color;
@@ -2520,7 +2525,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 		}
 
 		if (work_color >= 0) {
-			BUG_ON(work_color != work_next_color(pwq->work_color));
+			WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
 			pwq->work_color = work_color;
 		}
 
@@ -2568,13 +2573,13 @@ void flush_workqueue(struct workqueue_struct *wq)
 		 * becomes our flush_color and work_color is advanced
 		 * by one.
 		 */
-		BUG_ON(!list_empty(&wq->flusher_overflow));
+		WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
 		this_flusher.flush_color = wq->work_color;
 		wq->work_color = next_color;
 
 		if (!wq->first_flusher) {
 			/* no flush in progress, become the first flusher */
-			BUG_ON(wq->flush_color != this_flusher.flush_color);
+			WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
 
 			wq->first_flusher = &this_flusher;
 
@@ -2587,7 +2592,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 			}
 		} else {
 			/* wait in queue */
-			BUG_ON(wq->flush_color == this_flusher.flush_color);
+			WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
 			list_add_tail(&this_flusher.list, &wq->flusher_queue);
 			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
@@ -2621,8 +2626,8 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 	wq->first_flusher = NULL;
 
-	BUG_ON(!list_empty(&this_flusher.list));
-	BUG_ON(wq->flush_color != this_flusher.flush_color);
+	WARN_ON_ONCE(!list_empty(&this_flusher.list));
+	WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
 
 	while (true) {
 		struct wq_flusher *next, *tmp;
@@ -2635,8 +2640,8 @@ void flush_workqueue(struct workqueue_struct *wq)
 			complete(&next->done);
 		}
 
-		BUG_ON(!list_empty(&wq->flusher_overflow) &&
-		       wq->flush_color != work_next_color(wq->work_color));
+		WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
+			     wq->flush_color != work_next_color(wq->work_color));
 
 		/* this flush_color is finished, advance by one */
 		wq->flush_color = work_next_color(wq->flush_color);
@@ -2660,7 +2665,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 		}
 
 		if (list_empty(&wq->flusher_queue)) {
-			BUG_ON(wq->flush_color != wq->work_color);
+			WARN_ON_ONCE(wq->flush_color != wq->work_color);
 			break;
 		}
 
@@ -2668,8 +2673,8 @@ void flush_workqueue(struct workqueue_struct *wq)
 		 * Need to flush more colors.  Make the next flusher
 		 * the new first flusher and arm pwqs.
 		 */
-		BUG_ON(wq->flush_color == wq->work_color);
-		BUG_ON(wq->flush_color != next->flush_color);
+		WARN_ON_ONCE(wq->flush_color == wq->work_color);
+		WARN_ON_ONCE(wq->flush_color != next->flush_color);
 
 		list_del_init(&next->list);
 		wq->first_flusher = next;
@@ -3263,6 +3268,19 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	/* drain it before proceeding with destruction */
 	drain_workqueue(wq);
 
+	/* sanity checks */
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		int i;
+
+		for (i = 0; i < WORK_NR_COLORS; i++)
+			if (WARN_ON(pwq->nr_in_flight[i]))
+				return;
+		if (WARN_ON(pwq->nr_active) ||
+		    WARN_ON(!list_empty(&pwq->delayed_works)))
+			return;
+	}
+
 	/*
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
@@ -3271,17 +3289,6 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	list_del(&wq->list);
 	spin_unlock(&workqueue_lock);
 
-	/* sanity check */
-	for_each_pwq_cpu(cpu, wq) {
-		struct pool_workqueue *pwq = get_pwq(cpu, wq);
-		int i;
-
-		for (i = 0; i < WORK_NR_COLORS; i++)
-			BUG_ON(pwq->nr_in_flight[i]);
-		BUG_ON(pwq->nr_active);
-		BUG_ON(!list_empty(&pwq->delayed_works));
-	}
-
 	if (wq->flags & WQ_RESCUER) {
 		kthread_stop(wq->rescuer->task);
 		free_mayday_mask(wq->mayday_mask);
@@ -3424,7 +3431,7 @@ static void wq_unbind_fn(struct work_struct *work)
 	int i;
 
 	for_each_std_worker_pool(pool, cpu) {
-		BUG_ON(cpu != smp_processor_id());
+		WARN_ON_ONCE(cpu != smp_processor_id());
 
 		mutex_lock(&pool->assoc_mutex);
 		spin_lock_irq(&pool->lock);
@@ -3594,7 +3601,7 @@ void freeze_workqueues_begin(void)
 
 	spin_lock(&workqueue_lock);
 
-	BUG_ON(workqueue_freezing);
+	WARN_ON_ONCE(workqueue_freezing);
 	workqueue_freezing = true;
 
 	for_each_wq_cpu(cpu) {
@@ -3642,7 +3649,7 @@ bool freeze_workqueues_busy(void)
 
 	spin_lock(&workqueue_lock);
 
-	BUG_ON(!workqueue_freezing);
+	WARN_ON_ONCE(!workqueue_freezing);
 
 	for_each_wq_cpu(cpu) {
 		struct workqueue_struct *wq;
@@ -3656,7 +3663,7 @@ bool freeze_workqueues_busy(void)
 			if (!pwq || !(wq->flags & WQ_FREEZABLE))
 				continue;
 
-			BUG_ON(pwq->nr_active < 0);
+			WARN_ON_ONCE(pwq->nr_active < 0);
 			if (pwq->nr_active) {
 				busy = true;
 				goto out_unlock;
-- 
cgit v1.2.2


From e98d5b16cf4df992c40a7c83f1eae61db5bb03da Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:57 -0700
Subject: workqueue: make workqueue_lock irq-safe

workqueue_lock will be used to synchronize areas which require
irq-safety and there isn't much benefit in keeping it not irq-safe.
Make it irq-safe.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c6e1bdb469ee..c585d0ebd353 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2715,10 +2715,10 @@ void drain_workqueue(struct workqueue_struct *wq)
 	 * hotter than drain_workqueue() and already looks at @wq->flags.
 	 * Use WQ_DRAINING so that queue doesn't have to check nr_drainers.
 	 */
-	spin_lock(&workqueue_lock);
+	spin_lock_irq(&workqueue_lock);
 	if (!wq->nr_drainers++)
 		wq->flags |= WQ_DRAINING;
-	spin_unlock(&workqueue_lock);
+	spin_unlock_irq(&workqueue_lock);
 reflush:
 	flush_workqueue(wq);
 
@@ -2740,10 +2740,10 @@ reflush:
 		goto reflush;
 	}
 
-	spin_lock(&workqueue_lock);
+	spin_lock_irq(&workqueue_lock);
 	if (!--wq->nr_drainers)
 		wq->flags &= ~WQ_DRAINING;
-	spin_unlock(&workqueue_lock);
+	spin_unlock_irq(&workqueue_lock);
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
@@ -3233,7 +3233,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	 * list.  Grab it, set max_active accordingly and add the new
 	 * workqueue to workqueues list.
 	 */
-	spin_lock(&workqueue_lock);
+	spin_lock_irq(&workqueue_lock);
 
 	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
 		for_each_pwq_cpu(cpu, wq)
@@ -3241,7 +3241,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 
 	list_add(&wq->list, &workqueues);
 
-	spin_unlock(&workqueue_lock);
+	spin_unlock_irq(&workqueue_lock);
 
 	return wq;
 err:
@@ -3285,9 +3285,9 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
 	 */
-	spin_lock(&workqueue_lock);
+	spin_lock_irq(&workqueue_lock);
 	list_del(&wq->list);
-	spin_unlock(&workqueue_lock);
+	spin_unlock_irq(&workqueue_lock);
 
 	if (wq->flags & WQ_RESCUER) {
 		kthread_stop(wq->rescuer->task);
@@ -3336,7 +3336,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
-	spin_lock(&workqueue_lock);
+	spin_lock_irq(&workqueue_lock);
 
 	wq->saved_max_active = max_active;
 
@@ -3344,16 +3344,16 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		struct worker_pool *pool = pwq->pool;
 
-		spin_lock_irq(&pool->lock);
+		spin_lock(&pool->lock);
 
 		if (!(wq->flags & WQ_FREEZABLE) ||
 		    !(pool->flags & POOL_FREEZING))
 			pwq_set_max_active(pwq, max_active);
 
-		spin_unlock_irq(&pool->lock);
+		spin_unlock(&pool->lock);
 	}
 
-	spin_unlock(&workqueue_lock);
+	spin_unlock_irq(&workqueue_lock);
 }
 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 
@@ -3599,7 +3599,7 @@ void freeze_workqueues_begin(void)
 {
 	unsigned int cpu;
 
-	spin_lock(&workqueue_lock);
+	spin_lock_irq(&workqueue_lock);
 
 	WARN_ON_ONCE(workqueue_freezing);
 	workqueue_freezing = true;
@@ -3609,7 +3609,7 @@ void freeze_workqueues_begin(void)
 		struct workqueue_struct *wq;
 
 		for_each_std_worker_pool(pool, cpu) {
-			spin_lock_irq(&pool->lock);
+			spin_lock(&pool->lock);
 
 			WARN_ON_ONCE(pool->flags & POOL_FREEZING);
 			pool->flags |= POOL_FREEZING;
@@ -3622,11 +3622,11 @@ void freeze_workqueues_begin(void)
 					pwq->max_active = 0;
 			}
 
-			spin_unlock_irq(&pool->lock);
+			spin_unlock(&pool->lock);
 		}
 	}
 
-	spin_unlock(&workqueue_lock);
+	spin_unlock_irq(&workqueue_lock);
 }
 
 /**
@@ -3647,7 +3647,7 @@ bool freeze_workqueues_busy(void)
 	unsigned int cpu;
 	bool busy = false;
 
-	spin_lock(&workqueue_lock);
+	spin_lock_irq(&workqueue_lock);
 
 	WARN_ON_ONCE(!workqueue_freezing);
 
@@ -3671,7 +3671,7 @@ bool freeze_workqueues_busy(void)
 		}
 	}
 out_unlock:
-	spin_unlock(&workqueue_lock);
+	spin_unlock_irq(&workqueue_lock);
 	return busy;
 }
 
@@ -3688,7 +3688,7 @@ void thaw_workqueues(void)
 {
 	unsigned int cpu;
 
-	spin_lock(&workqueue_lock);
+	spin_lock_irq(&workqueue_lock);
 
 	if (!workqueue_freezing)
 		goto out_unlock;
@@ -3698,7 +3698,7 @@ void thaw_workqueues(void)
 		struct workqueue_struct *wq;
 
 		for_each_std_worker_pool(pool, cpu) {
-			spin_lock_irq(&pool->lock);
+			spin_lock(&pool->lock);
 
 			WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
 			pool->flags &= ~POOL_FREEZING;
@@ -3716,13 +3716,13 @@ void thaw_workqueues(void)
 
 			wake_up_worker(pool);
 
-			spin_unlock_irq(&pool->lock);
+			spin_unlock(&pool->lock);
 		}
 	}
 
 	workqueue_freezing = false;
 out_unlock:
-	spin_unlock(&workqueue_lock);
+	spin_unlock_irq(&workqueue_lock);
 }
 #endif /* CONFIG_FREEZER */
 
-- 
cgit v1.2.2


From e904e6c2668bba78497c660aec812ca3f77f4ef9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:57 -0700
Subject: workqueue: introduce kmem_cache for pool_workqueues

pool_workqueues need to be aligned to 1 << WORK_STRUCT_FLAG_BITS as
the lower bits of work->data are used for flags when they're pointing
to pool_workqueues.

Due to historical reasons, unbound pool_workqueues are allocated using
kzalloc() with sufficient buffer area for alignment and aligned
manually.  The original pointer is stored at the end which free_pwqs()
retrieves when freeing it.

There's no reason for this hackery anymore.  Set alignment of struct
pool_workqueue to 1 << WORK_STRUCT_FLAG_BITS, add kmem_cache for
pool_workqueues with proper alignment and replace the hacky alloc and
free implementation with plain kmem_cache_zalloc/free().

In case WORK_STRUCT_FLAG_BITS gets shrunk too much and makes fields of
pool_workqueues misaligned, trigger WARN if the alignment of struct
pool_workqueue becomes smaller than that of long long.

Note that assertion on IS_ALIGNED() is removed from alloc_pwqs().  We
already have another one in pwq init loop in __alloc_workqueue_key().

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 43 ++++++++++++-------------------------------
 1 file changed, 12 insertions(+), 31 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c585d0ebd353..f9e2ad9a3205 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -169,7 +169,7 @@ struct pool_workqueue {
 	int			nr_active;	/* L: nr of active works */
 	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
-};
+} __aligned(1 << WORK_STRUCT_FLAG_BITS);
 
 /*
  * Structure used to wait for workqueue flush.
@@ -233,6 +233,8 @@ struct workqueue_struct {
 	char			name[];		/* I: workqueue name */
 };
 
+static struct kmem_cache *pwq_cache;
+
 struct workqueue_struct *system_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -3096,34 +3098,11 @@ int keventd_up(void)
 
 static int alloc_pwqs(struct workqueue_struct *wq)
 {
-	/*
-	 * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
-	 * Make sure that the alignment isn't lower than that of
-	 * unsigned long long.
-	 */
-	const size_t size = sizeof(struct pool_workqueue);
-	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
-				   __alignof__(unsigned long long));
-
 	if (!(wq->flags & WQ_UNBOUND))
-		wq->pool_wq.pcpu = __alloc_percpu(size, align);
-	else {
-		void *ptr;
-
-		/*
-		 * Allocate enough room to align pwq and put an extra
-		 * pointer at the end pointing back to the originally
-		 * allocated pointer which will be used for free.
-		 */
-		ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
-		if (ptr) {
-			wq->pool_wq.single = PTR_ALIGN(ptr, align);
-			*(void **)(wq->pool_wq.single + 1) = ptr;
-		}
-	}
+		wq->pool_wq.pcpu = alloc_percpu(struct pool_workqueue);
+	else
+		wq->pool_wq.single = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
 
-	/* just in case, make sure it's actually aligned */
-	BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align));
 	return wq->pool_wq.v ? 0 : -ENOMEM;
 }
 
@@ -3131,10 +3110,8 @@ static void free_pwqs(struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND))
 		free_percpu(wq->pool_wq.pcpu);
-	else if (wq->pool_wq.single) {
-		/* the pointer to free is stored right after the pwq */
-		kfree(*(void **)(wq->pool_wq.single + 1));
-	}
+	else
+		kmem_cache_free(pwq_cache, wq->pool_wq.single);
 }
 
 static int wq_clamp_max_active(int max_active, unsigned int flags,
@@ -3734,6 +3711,10 @@ static int __init init_workqueues(void)
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
 		     WORK_CPU_END * NR_STD_WORKER_POOLS);
 
+	WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
+
+	pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
+
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
-- 
cgit v1.2.2


From 30cdf2496d8ac2ef94b9b85f1891cf069490c8c4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:57 -0700
Subject: workqueue: add workqueue_struct->pwqs list

Add workqueue_struct->pwqs list and chain all pool_workqueues
belonging to a workqueue there.  This will be used to implement
generic pool_workqueue iteration and handle multiple pool_workqueues
for the scheduled unbound pools with custom attributes.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f9e2ad9a3205..8634fc9d52d2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -169,6 +169,7 @@ struct pool_workqueue {
 	int			nr_active;	/* L: nr of active works */
 	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
+	struct list_head	pwqs_node;	/* I: node on wq->pwqs */
 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
 
 /*
@@ -212,6 +213,7 @@ struct workqueue_struct {
 		struct pool_workqueue			*single;
 		unsigned long				v;
 	} pool_wq;				/* I: pwq's */
+	struct list_head	pwqs;		/* I: all pwqs of this wq */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
@@ -3096,14 +3098,32 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static int alloc_pwqs(struct workqueue_struct *wq)
+static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
-	if (!(wq->flags & WQ_UNBOUND))
+	int cpu;
+
+	if (!(wq->flags & WQ_UNBOUND)) {
 		wq->pool_wq.pcpu = alloc_percpu(struct pool_workqueue);
-	else
-		wq->pool_wq.single = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
+		if (!wq->pool_wq.pcpu)
+			return -ENOMEM;
+
+		for_each_possible_cpu(cpu) {
+			struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-	return wq->pool_wq.v ? 0 : -ENOMEM;
+			list_add_tail(&pwq->pwqs_node, &wq->pwqs);
+		}
+	} else {
+		struct pool_workqueue *pwq;
+
+		pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
+		if (!pwq)
+			return -ENOMEM;
+
+		wq->pool_wq.single = pwq;
+		list_add_tail(&pwq->pwqs_node, &wq->pwqs);
+	}
+
+	return 0;
 }
 
 static void free_pwqs(struct workqueue_struct *wq)
@@ -3165,13 +3185,14 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
 	atomic_set(&wq->nr_pwqs_to_flush, 0);
+	INIT_LIST_HEAD(&wq->pwqs);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
-	if (alloc_pwqs(wq) < 0)
+	if (alloc_and_link_pwqs(wq) < 0)
 		goto err;
 
 	for_each_pwq_cpu(cpu, wq) {
-- 
cgit v1.2.2


From 49e3cf44df0663a521aa71e7667c52a9dbd0fce9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:58 -0700
Subject: workqueue: replace for_each_pwq_cpu() with for_each_pwq()

Introduce for_each_pwq() which iterates all pool_workqueues of a
workqueue using the recently added workqueue->pwqs list and replace
for_each_pwq_cpu() usages with it.

This is primarily to remove the single unbound CPU assumption from pwq
iteration for the scheduled unbound pools with custom attributes
support which would introduce multiple unbound pwqs per workqueue;
however, it also simplifies iterator users.

Note that pwq->pool initialization is moved to alloc_and_link_pwqs()
as that now is the only place which is explicitly handling the two pwq
types.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 53 ++++++++++++++++++++++-------------------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8634fc9d52d2..2db1532b09dc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -273,12 +273,6 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 	return WORK_CPU_END;
 }
 
-static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
-				 struct workqueue_struct *wq)
-{
-	return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
-}
-
 /*
  * CPU iterators
  *
@@ -289,8 +283,6 @@ static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
  *
  * for_each_wq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
  * for_each_online_wq_cpu()	: online CPUs + WORK_CPU_UNBOUND
- * for_each_pwq_cpu()		: possible CPUs for bound workqueues,
- *				  WORK_CPU_UNBOUND for unbound workqueues
  */
 #define for_each_wq_cpu(cpu)						\
 	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3);		\
@@ -302,10 +294,13 @@ static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
 	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
-#define for_each_pwq_cpu(cpu, wq)					\
-	for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq));	\
-	     (cpu) < WORK_CPU_END;					\
-	     (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
+/**
+ * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
+ * @pwq: iteration cursor
+ * @wq: the target workqueue
+ */
+#define for_each_pwq(pwq, wq)						\
+	list_for_each_entry((pwq), &(wq)->pwqs, pwqs_node)
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
@@ -2505,15 +2500,14 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 				      int flush_color, int work_color)
 {
 	bool wait = false;
-	unsigned int cpu;
+	struct pool_workqueue *pwq;
 
 	if (flush_color >= 0) {
 		WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
 		atomic_set(&wq->nr_pwqs_to_flush, 1);
 	}
 
-	for_each_pwq_cpu(cpu, wq) {
-		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+	for_each_pwq(pwq, wq) {
 		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
@@ -2712,7 +2706,7 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
 void drain_workqueue(struct workqueue_struct *wq)
 {
 	unsigned int flush_cnt = 0;
-	unsigned int cpu;
+	struct pool_workqueue *pwq;
 
 	/*
 	 * __queue_work() needs to test whether there are drainers, is much
@@ -2726,8 +2720,7 @@ void drain_workqueue(struct workqueue_struct *wq)
 reflush:
 	flush_workqueue(wq);
 
-	for_each_pwq_cpu(cpu, wq) {
-		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+	for_each_pwq(pwq, wq) {
 		bool drained;
 
 		spin_lock_irq(&pwq->pool->lock);
@@ -3100,6 +3093,7 @@ int keventd_up(void)
 
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
+	bool highpri = wq->flags & WQ_HIGHPRI;
 	int cpu;
 
 	if (!(wq->flags & WQ_UNBOUND)) {
@@ -3110,6 +3104,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 		for_each_possible_cpu(cpu) {
 			struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
+			pwq->pool = get_std_worker_pool(cpu, highpri);
 			list_add_tail(&pwq->pwqs_node, &wq->pwqs);
 		}
 	} else {
@@ -3120,6 +3115,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			return -ENOMEM;
 
 		wq->pool_wq.single = pwq;
+		pwq->pool = get_std_worker_pool(WORK_CPU_UNBOUND, highpri);
 		list_add_tail(&pwq->pwqs_node, &wq->pwqs);
 	}
 
@@ -3154,7 +3150,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 {
 	va_list args, args1;
 	struct workqueue_struct *wq;
-	unsigned int cpu;
+	struct pool_workqueue *pwq;
 	size_t namelen;
 
 	/* determine namelen, allocate wq and format name */
@@ -3195,11 +3191,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	if (alloc_and_link_pwqs(wq) < 0)
 		goto err;
 
-	for_each_pwq_cpu(cpu, wq) {
-		struct pool_workqueue *pwq = get_pwq(cpu, wq);
-
+	for_each_pwq(pwq, wq) {
 		BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
-		pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
 		pwq->wq = wq;
 		pwq->flush_color = -1;
 		pwq->max_active = max_active;
@@ -3234,8 +3227,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	spin_lock_irq(&workqueue_lock);
 
 	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
-		for_each_pwq_cpu(cpu, wq)
-			get_pwq(cpu, wq)->max_active = 0;
+		for_each_pwq(pwq, wq)
+			pwq->max_active = 0;
 
 	list_add(&wq->list, &workqueues);
 
@@ -3261,14 +3254,13 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
  */
 void destroy_workqueue(struct workqueue_struct *wq)
 {
-	unsigned int cpu;
+	struct pool_workqueue *pwq;
 
 	/* drain it before proceeding with destruction */
 	drain_workqueue(wq);
 
 	/* sanity checks */
-	for_each_pwq_cpu(cpu, wq) {
-		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+	for_each_pwq(pwq, wq) {
 		int i;
 
 		for (i = 0; i < WORK_NR_COLORS; i++)
@@ -3330,7 +3322,7 @@ static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
  */
 void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 {
-	unsigned int cpu;
+	struct pool_workqueue *pwq;
 
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
@@ -3338,8 +3330,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	wq->saved_max_active = max_active;
 
-	for_each_pwq_cpu(cpu, wq) {
-		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+	for_each_pwq(pwq, wq) {
 		struct worker_pool *pool = pwq->pool;
 
 		spin_lock(&pool->lock);
-- 
cgit v1.2.2


From 171169695555831e8cc41dbc1783700868631ea5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:58 -0700
Subject: workqueue: introduce for_each_pool()

With the scheduled unbound pools with custom attributes, there will be
multiple unbound pools, so it wouldn't be able to use
for_each_wq_cpu() + for_each_std_worker_pool() to iterate through all
pools.

Introduce for_each_pool() which iterates through all pools using
worker_pool_idr and use it instead of for_each_wq_cpu() +
for_each_std_worker_pool() combination in freeze_workqueues_begin().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2db1532b09dc..55494e3f9f3b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -294,6 +294,14 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
+/**
+ * for_each_pool - iterate through all worker_pools in the system
+ * @pool: iteration cursor
+ * @id: integer used for iteration
+ */
+#define for_each_pool(pool, id)						\
+	idr_for_each_entry(&worker_pool_idr, pool, id)
+
 /**
  * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
  * @pwq: iteration cursor
@@ -3586,33 +3594,31 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  */
 void freeze_workqueues_begin(void)
 {
-	unsigned int cpu;
+	struct worker_pool *pool;
+	int id;
 
 	spin_lock_irq(&workqueue_lock);
 
 	WARN_ON_ONCE(workqueue_freezing);
 	workqueue_freezing = true;
 
-	for_each_wq_cpu(cpu) {
-		struct worker_pool *pool;
+	for_each_pool(pool, id) {
 		struct workqueue_struct *wq;
 
-		for_each_std_worker_pool(pool, cpu) {
-			spin_lock(&pool->lock);
-
-			WARN_ON_ONCE(pool->flags & POOL_FREEZING);
-			pool->flags |= POOL_FREEZING;
+		spin_lock(&pool->lock);
 
-			list_for_each_entry(wq, &workqueues, list) {
-				struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		WARN_ON_ONCE(pool->flags & POOL_FREEZING);
+		pool->flags |= POOL_FREEZING;
 
-				if (pwq && pwq->pool == pool &&
-				    (wq->flags & WQ_FREEZABLE))
-					pwq->max_active = 0;
-			}
+		list_for_each_entry(wq, &workqueues, list) {
+			struct pool_workqueue *pwq = get_pwq(pool->cpu, wq);
 
-			spin_unlock(&pool->lock);
+			if (pwq && pwq->pool == pool &&
+			    (wq->flags & WQ_FREEZABLE))
+				pwq->max_active = 0;
 		}
+
+		spin_unlock(&pool->lock);
 	}
 
 	spin_unlock_irq(&workqueue_lock);
-- 
cgit v1.2.2


From 24b8a84718ed28a51b452881612c267ba3f2b263 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:58 -0700
Subject: workqueue: restructure pool / pool_workqueue iterations in
 freeze/thaw functions

The three freeze/thaw related functions - freeze_workqueues_begin(),
freeze_workqueues_busy() and thaw_workqueues() - need to iterate
through all pool_workqueues of all freezable workqueues.  They did it
by first iterating pools and then visiting all pwqs (pool_workqueues)
of all workqueues and process it if its pwq->pool matches the current
pool.  This is rather backwards and done this way partly because
workqueue didn't have fitting iteration helpers and partly to avoid
the number of lock operations on pool->lock.

Workqueue now has fitting iterators and the locking operation overhead
isn't anything to worry about - those locks are unlikely to be
contended and the same CPU visiting the same set of locks multiple
times isn't expensive.

Restructure the three functions such that the flow better matches the
logical steps and pwq iteration is done using for_each_pwq() inside
workqueue iteration.

* freeze_workqueues_begin(): Setting of FREEZING is moved into a
  separate for_each_pool() iteration.  pwq iteration for clearing
  max_active is updated as described above.

* freeze_workqueues_busy(): pwq iteration updated as described above.

* thaw_workqueues(): The single for_each_wq_cpu() iteration is broken
  into three discrete steps - clearing FREEZING, restoring max_active,
  and kicking workers.  The first and last steps use for_each_pool()
  and the second step uses pwq iteration described above.

This makes the code easier to understand and removes the use of
for_each_wq_cpu() for walking pwqs, which can't support multiple
unbound pwqs which will be needed to implement unbound workqueues with
custom attributes.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 87 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 45 insertions(+), 42 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 55494e3f9f3b..8942cc74d83b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3595,6 +3595,8 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
 void freeze_workqueues_begin(void)
 {
 	struct worker_pool *pool;
+	struct workqueue_struct *wq;
+	struct pool_workqueue *pwq;
 	int id;
 
 	spin_lock_irq(&workqueue_lock);
@@ -3602,23 +3604,24 @@ void freeze_workqueues_begin(void)
 	WARN_ON_ONCE(workqueue_freezing);
 	workqueue_freezing = true;
 
+	/* set FREEZING */
 	for_each_pool(pool, id) {
-		struct workqueue_struct *wq;
-
 		spin_lock(&pool->lock);
-
 		WARN_ON_ONCE(pool->flags & POOL_FREEZING);
 		pool->flags |= POOL_FREEZING;
+		spin_unlock(&pool->lock);
+	}
 
-		list_for_each_entry(wq, &workqueues, list) {
-			struct pool_workqueue *pwq = get_pwq(pool->cpu, wq);
+	/* suppress further executions by setting max_active to zero */
+	list_for_each_entry(wq, &workqueues, list) {
+		if (!(wq->flags & WQ_FREEZABLE))
+			continue;
 
-			if (pwq && pwq->pool == pool &&
-			    (wq->flags & WQ_FREEZABLE))
-				pwq->max_active = 0;
+		for_each_pwq(pwq, wq) {
+			spin_lock(&pwq->pool->lock);
+			pwq->max_active = 0;
+			spin_unlock(&pwq->pool->lock);
 		}
-
-		spin_unlock(&pool->lock);
 	}
 
 	spin_unlock_irq(&workqueue_lock);
@@ -3639,25 +3642,22 @@ void freeze_workqueues_begin(void)
  */
 bool freeze_workqueues_busy(void)
 {
-	unsigned int cpu;
 	bool busy = false;
+	struct workqueue_struct *wq;
+	struct pool_workqueue *pwq;
 
 	spin_lock_irq(&workqueue_lock);
 
 	WARN_ON_ONCE(!workqueue_freezing);
 
-	for_each_wq_cpu(cpu) {
-		struct workqueue_struct *wq;
+	list_for_each_entry(wq, &workqueues, list) {
+		if (!(wq->flags & WQ_FREEZABLE))
+			continue;
 		/*
 		 * nr_active is monotonically decreasing.  It's safe
 		 * to peek without lock.
 		 */
-		list_for_each_entry(wq, &workqueues, list) {
-			struct pool_workqueue *pwq = get_pwq(cpu, wq);
-
-			if (!pwq || !(wq->flags & WQ_FREEZABLE))
-				continue;
-
+		for_each_pwq(pwq, wq) {
 			WARN_ON_ONCE(pwq->nr_active < 0);
 			if (pwq->nr_active) {
 				busy = true;
@@ -3681,40 +3681,43 @@ out_unlock:
  */
 void thaw_workqueues(void)
 {
-	unsigned int cpu;
+	struct workqueue_struct *wq;
+	struct pool_workqueue *pwq;
+	struct worker_pool *pool;
+	int id;
 
 	spin_lock_irq(&workqueue_lock);
 
 	if (!workqueue_freezing)
 		goto out_unlock;
 
-	for_each_wq_cpu(cpu) {
-		struct worker_pool *pool;
-		struct workqueue_struct *wq;
-
-		for_each_std_worker_pool(pool, cpu) {
-			spin_lock(&pool->lock);
-
-			WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
-			pool->flags &= ~POOL_FREEZING;
-
-			list_for_each_entry(wq, &workqueues, list) {
-				struct pool_workqueue *pwq = get_pwq(cpu, wq);
-
-				if (!pwq || pwq->pool != pool ||
-				    !(wq->flags & WQ_FREEZABLE))
-					continue;
-
-				/* restore max_active and repopulate worklist */
-				pwq_set_max_active(pwq, wq->saved_max_active);
-			}
+	/* clear FREEZING */
+	for_each_pool(pool, id) {
+		spin_lock(&pool->lock);
+		WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
+		pool->flags &= ~POOL_FREEZING;
+		spin_unlock(&pool->lock);
+	}
 
-			wake_up_worker(pool);
+	/* restore max_active and repopulate worklist */
+	list_for_each_entry(wq, &workqueues, list) {
+		if (!(wq->flags & WQ_FREEZABLE))
+			continue;
 
-			spin_unlock(&pool->lock);
+		for_each_pwq(pwq, wq) {
+			spin_lock(&pwq->pool->lock);
+			pwq_set_max_active(pwq, wq->saved_max_active);
+			spin_unlock(&pwq->pool->lock);
 		}
 	}
 
+	/* kick workers */
+	for_each_pool(pool, id) {
+		spin_lock(&pool->lock);
+		wake_up_worker(pool);
+		spin_unlock(&pool->lock);
+	}
+
 	workqueue_freezing = false;
 out_unlock:
 	spin_unlock_irq(&workqueue_lock);
-- 
cgit v1.2.2


From 493a1724fef9a3e931d9199f1a19e358e526a6e7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:59 -0700
Subject: workqueue: add wokrqueue_struct->maydays list to replace mayday cpu
 iterators

Similar to how pool_workqueue iteration used to be, raising and
servicing mayday requests is based on CPU numbers.  It's hairy because
cpumask_t may not be able to handle WORK_CPU_UNBOUND and cpumasks are
assumed to be always set on UP.  This is ugly and can't handle
multiple unbound pools to be added for unbound workqueues w/ custom
attributes.

Add workqueue_struct->maydays.  When a pool_workqueue needs rescuing,
it gets chained on the list through pool_workqueue->mayday_node and
rescuer_thread() consumes the list until it's empty.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 77 ++++++++++++++++++++----------------------------------
 1 file changed, 28 insertions(+), 49 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8942cc74d83b..26c67c76b6c5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -170,6 +170,7 @@ struct pool_workqueue {
 	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
 	struct list_head	pwqs_node;	/* I: node on wq->pwqs */
+	struct list_head	mayday_node;	/* W: node on wq->maydays */
 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
 
 /*
@@ -181,27 +182,6 @@ struct wq_flusher {
 	struct completion	done;		/* flush completion */
 };
 
-/*
- * All cpumasks are assumed to be always set on UP and thus can't be
- * used to determine whether there's something to be done.
- */
-#ifdef CONFIG_SMP
-typedef cpumask_var_t mayday_mask_t;
-#define mayday_test_and_set_cpu(cpu, mask)	\
-	cpumask_test_and_set_cpu((cpu), (mask))
-#define mayday_clear_cpu(cpu, mask)		cpumask_clear_cpu((cpu), (mask))
-#define for_each_mayday_cpu(cpu, mask)		for_each_cpu((cpu), (mask))
-#define alloc_mayday_mask(maskp, gfp)		zalloc_cpumask_var((maskp), (gfp))
-#define free_mayday_mask(mask)			free_cpumask_var((mask))
-#else
-typedef unsigned long mayday_mask_t;
-#define mayday_test_and_set_cpu(cpu, mask)	test_and_set_bit(0, &(mask))
-#define mayday_clear_cpu(cpu, mask)		clear_bit(0, &(mask))
-#define for_each_mayday_cpu(cpu, mask)		if ((cpu) = 0, (mask))
-#define alloc_mayday_mask(maskp, gfp)		true
-#define free_mayday_mask(mask)			do { } while (0)
-#endif
-
 /*
  * The externally visible workqueue abstraction is an array of
  * per-CPU workqueues:
@@ -224,7 +204,7 @@ struct workqueue_struct {
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
 
-	mayday_mask_t		mayday_mask;	/* cpus requesting rescue */
+	struct list_head	maydays;	/* W: pwqs requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
 	int			nr_drainers;	/* W: drain in progress */
@@ -1850,23 +1830,21 @@ static void idle_worker_timeout(unsigned long __pool)
 	spin_unlock_irq(&pool->lock);
 }
 
-static bool send_mayday(struct work_struct *work)
+static void send_mayday(struct work_struct *work)
 {
 	struct pool_workqueue *pwq = get_work_pwq(work);
 	struct workqueue_struct *wq = pwq->wq;
-	unsigned int cpu;
+
+	lockdep_assert_held(&workqueue_lock);
 
 	if (!(wq->flags & WQ_RESCUER))
-		return false;
+		return;
 
 	/* mayday mayday mayday */
-	cpu = pwq->pool->cpu;
-	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
-	if (cpu == WORK_CPU_UNBOUND)
-		cpu = 0;
-	if (!mayday_test_and_set_cpu(cpu, wq->mayday_mask))
+	if (list_empty(&pwq->mayday_node)) {
+		list_add_tail(&pwq->mayday_node, &wq->maydays);
 		wake_up_process(wq->rescuer->task);
-	return true;
+	}
 }
 
 static void pool_mayday_timeout(unsigned long __pool)
@@ -1874,7 +1852,8 @@ static void pool_mayday_timeout(unsigned long __pool)
 	struct worker_pool *pool = (void *)__pool;
 	struct work_struct *work;
 
-	spin_lock_irq(&pool->lock);
+	spin_lock_irq(&workqueue_lock);		/* for wq->maydays */
+	spin_lock(&pool->lock);
 
 	if (need_to_create_worker(pool)) {
 		/*
@@ -1887,7 +1866,8 @@ static void pool_mayday_timeout(unsigned long __pool)
 			send_mayday(work);
 	}
 
-	spin_unlock_irq(&pool->lock);
+	spin_unlock(&pool->lock);
+	spin_unlock_irq(&workqueue_lock);
 
 	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
 }
@@ -2336,8 +2316,6 @@ static int rescuer_thread(void *__rescuer)
 	struct worker *rescuer = __rescuer;
 	struct workqueue_struct *wq = rescuer->rescue_wq;
 	struct list_head *scheduled = &rescuer->scheduled;
-	bool is_unbound = wq->flags & WQ_UNBOUND;
-	unsigned int cpu;
 
 	set_user_nice(current, RESCUER_NICE_LEVEL);
 
@@ -2355,18 +2333,19 @@ repeat:
 		return 0;
 	}
 
-	/*
-	 * See whether any cpu is asking for help.  Unbounded
-	 * workqueues use cpu 0 in mayday_mask for CPU_UNBOUND.
-	 */
-	for_each_mayday_cpu(cpu, wq->mayday_mask) {
-		unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
-		struct pool_workqueue *pwq = get_pwq(tcpu, wq);
+	/* see whether any pwq is asking for help */
+	spin_lock_irq(&workqueue_lock);
+
+	while (!list_empty(&wq->maydays)) {
+		struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
+					struct pool_workqueue, mayday_node);
 		struct worker_pool *pool = pwq->pool;
 		struct work_struct *work, *n;
 
 		__set_current_state(TASK_RUNNING);
-		mayday_clear_cpu(cpu, wq->mayday_mask);
+		list_del_init(&pwq->mayday_node);
+
+		spin_unlock_irq(&workqueue_lock);
 
 		/* migrate to the target cpu if possible */
 		worker_maybe_bind_and_lock(pool);
@@ -2392,9 +2371,12 @@ repeat:
 			wake_up_worker(pool);
 
 		rescuer->pool = NULL;
-		spin_unlock_irq(&pool->lock);
+		spin_unlock(&pool->lock);
+		spin_lock(&workqueue_lock);
 	}
 
+	spin_unlock_irq(&workqueue_lock);
+
 	/* rescuers should never participate in concurrency management */
 	WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
 	schedule();
@@ -3192,6 +3174,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	INIT_LIST_HEAD(&wq->pwqs);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
+	INIT_LIST_HEAD(&wq->maydays);
 
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
@@ -3205,14 +3188,12 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		pwq->flush_color = -1;
 		pwq->max_active = max_active;
 		INIT_LIST_HEAD(&pwq->delayed_works);
+		INIT_LIST_HEAD(&pwq->mayday_node);
 	}
 
 	if (flags & WQ_RESCUER) {
 		struct worker *rescuer;
 
-		if (!alloc_mayday_mask(&wq->mayday_mask, GFP_KERNEL))
-			goto err;
-
 		wq->rescuer = rescuer = alloc_worker();
 		if (!rescuer)
 			goto err;
@@ -3246,7 +3227,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 err:
 	if (wq) {
 		free_pwqs(wq);
-		free_mayday_mask(wq->mayday_mask);
 		kfree(wq->rescuer);
 		kfree(wq);
 	}
@@ -3289,7 +3269,6 @@ void destroy_workqueue(struct workqueue_struct *wq)
 
 	if (wq->flags & WQ_RESCUER) {
 		kthread_stop(wq->rescuer->task);
-		free_mayday_mask(wq->mayday_mask);
 		kfree(wq->rescuer);
 	}
 
-- 
cgit v1.2.2


From d84ff0512f1bfc0d8c864efadb4523fce68919cc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:59 -0700
Subject: workqueue: consistently use int for @cpu variables

Workqueue is mixing unsigned int and int for @cpu variables.  There's
no point in using unsigned int for cpus - many of cpu related APIs
take int anyway.  Consistently use int for @cpu variables so that we
can use negative values to mark special ones.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h   |  6 +++---
 kernel/workqueue.c          | 24 +++++++++++-------------
 kernel/workqueue_internal.h |  5 ++---
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5bd030f630a9..899be6636d20 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -435,7 +435,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
-extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
+extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 
 /*
@@ -466,12 +466,12 @@ static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwo
 }
 
 #ifndef CONFIG_SMP
-static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
 	return fn(arg);
 }
 #else
-long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
+long work_on_cpu(int cpu, long (*fn)(void *), void *arg);
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_FREEZER
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 26c67c76b6c5..73c5f68065b5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -124,7 +124,7 @@ enum {
 
 struct worker_pool {
 	spinlock_t		lock;		/* the pool lock */
-	unsigned int		cpu;		/* I: the associated cpu */
+	int			cpu;		/* I: the associated cpu */
 	int			id;		/* I: pool ID */
 	unsigned int		flags;		/* X: flags */
 
@@ -467,8 +467,7 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
-static struct pool_workqueue *get_pwq(unsigned int cpu,
-				      struct workqueue_struct *wq)
+static struct pool_workqueue *get_pwq(int cpu, struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (likely(cpu < nr_cpu_ids))
@@ -730,7 +729,7 @@ static void wake_up_worker(struct worker_pool *pool)
  * CONTEXT:
  * spin_lock_irq(rq->lock)
  */
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
+void wq_worker_waking_up(struct task_struct *task, int cpu)
 {
 	struct worker *worker = kthread_data(task);
 
@@ -755,8 +754,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
  * RETURNS:
  * Worker task on @cpu to wake up, %NULL if none.
  */
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
-				       unsigned int cpu)
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
 {
 	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
 	struct worker_pool *pool;
@@ -1159,7 +1157,7 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	return worker && worker->current_pwq->wq == wq;
 }
 
-static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
+static void __queue_work(int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
 	struct pool_workqueue *pwq;
@@ -1714,7 +1712,7 @@ static struct worker *create_worker(struct worker_pool *pool)
 	if (pool->cpu != WORK_CPU_UNBOUND)
 		worker->task = kthread_create_on_node(worker_thread,
 					worker, cpu_to_node(pool->cpu),
-					"kworker/%u:%d%s", pool->cpu, id, pri);
+					"kworker/%d:%d%s", pool->cpu, id, pri);
 	else
 		worker->task = kthread_create(worker_thread, worker,
 					      "kworker/u:%d%s", id, pri);
@@ -3345,7 +3343,7 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
  * RETURNS:
  * %true if congested, %false otherwise.
  */
-bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
+bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 {
 	struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
@@ -3461,7 +3459,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 					       unsigned long action,
 					       void *hcpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
+	int cpu = (unsigned long)hcpu;
 	struct worker_pool *pool;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
@@ -3507,7 +3505,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 						 unsigned long action,
 						 void *hcpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
+	int cpu = (unsigned long)hcpu;
 	struct work_struct unbind_work;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
@@ -3547,7 +3545,7 @@ static void work_for_cpu_fn(struct work_struct *work)
  * It is up to the caller to ensure that the cpu doesn't go offline.
  * The caller must not hold any locks which would prevent @fn from completing.
  */
-long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
 	struct work_for_cpu wfc = { .fn = fn, .arg = arg };
 
@@ -3705,7 +3703,7 @@ out_unlock:
 
 static int __init init_workqueues(void)
 {
-	unsigned int cpu;
+	int cpu;
 
 	/* make sure we have enough bits for OFFQ pool ID */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index f9c887731e2b..f116f071d919 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -59,8 +59,7 @@ static inline struct worker *current_wq_worker(void)
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
  * sched.c and workqueue.c.
  */
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
-				       unsigned int cpu);
+void wq_worker_waking_up(struct task_struct *task, int cpu);
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
 
 #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
-- 
cgit v1.2.2


From 420c0ddb1f205a3511b766d0dfee2cc87ed9dae0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:59 -0700
Subject: workqueue: remove workqueue_struct->pool_wq.single

workqueue->pool_wq union is used to point either to percpu pwqs
(pool_workqueues) or single unbound pwq.  As the first pwq can be
accessed via workqueue->pwqs list, there's no reason for the single
pointer anymore.

Use list_first_entry(workqueue->pwqs) to access the unbound pwq and
drop workqueue->pool_wq.single pointer and the pool_wq union.  It
simplifies the code and eases implementing multiple unbound pools w/
custom attributes.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 73c5f68065b5..acee7b525d51 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -188,11 +188,7 @@ struct wq_flusher {
  */
 struct workqueue_struct {
 	unsigned int		flags;		/* W: WQ_* flags */
-	union {
-		struct pool_workqueue __percpu		*pcpu;
-		struct pool_workqueue			*single;
-		unsigned long				v;
-	} pool_wq;				/* I: pwq's */
+	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
 	struct list_head	pwqs;		/* I: all pwqs of this wq */
 	struct list_head	list;		/* W: list of all workqueues */
 
@@ -471,9 +467,11 @@ static struct pool_workqueue *get_pwq(int cpu, struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (likely(cpu < nr_cpu_ids))
-			return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
-	} else if (likely(cpu == WORK_CPU_UNBOUND))
-		return wq->pool_wq.single;
+			return per_cpu_ptr(wq->cpu_pwqs, cpu);
+	} else if (likely(cpu == WORK_CPU_UNBOUND)) {
+		return list_first_entry(&wq->pwqs, struct pool_workqueue,
+					pwqs_node);
+	}
 	return NULL;
 }
 
@@ -3085,8 +3083,8 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 	int cpu;
 
 	if (!(wq->flags & WQ_UNBOUND)) {
-		wq->pool_wq.pcpu = alloc_percpu(struct pool_workqueue);
-		if (!wq->pool_wq.pcpu)
+		wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
+		if (!wq->cpu_pwqs)
 			return -ENOMEM;
 
 		for_each_possible_cpu(cpu) {
@@ -3102,7 +3100,6 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 		if (!pwq)
 			return -ENOMEM;
 
-		wq->pool_wq.single = pwq;
 		pwq->pool = get_std_worker_pool(WORK_CPU_UNBOUND, highpri);
 		list_add_tail(&pwq->pwqs_node, &wq->pwqs);
 	}
@@ -3113,9 +3110,10 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 static void free_pwqs(struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND))
-		free_percpu(wq->pool_wq.pcpu);
-	else
-		kmem_cache_free(pwq_cache, wq->pool_wq.single);
+		free_percpu(wq->cpu_pwqs);
+	else if (!list_empty(&wq->pwqs))
+		kmem_cache_free(pwq_cache, list_first_entry(&wq->pwqs,
+					struct pool_workqueue, pwqs_node));
 }
 
 static int wq_clamp_max_active(int max_active, unsigned int flags,
-- 
cgit v1.2.2


From 7fb98ea79cecb14fc1735544146be06fdb1944c3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:00 -0700
Subject: workqueue: replace get_pwq() with explicit per_cpu_ptr() accesses and
 first_pwq()

get_pwq() takes @cpu, which can also be WORK_CPU_UNBOUND, and @wq and
returns the matching pwq (pool_workqueue).  We want to move away from
using @cpu for identifying pools and pwqs for unbound pools with
custom attributes and there is only one user - workqueue_congested() -
which makes use of the WQ_UNBOUND conditional in get_pwq().  All other
users already know whether they're dealing with a per-cpu or unbound
workqueue.

Replace get_pwq() with explicit per_cpu_ptr(wq->cpu_pwqs, cpu) for
per-cpu workqueues and first_pwq() for unbound ones, and open-code
WQ_UNBOUND conditional in workqueue_congested().

Note that this makes workqueue_congested() behave sligntly differently
when @cpu other than WORK_CPU_UNBOUND is specified.  It ignores @cpu
for unbound workqueues and always uses the first pwq instead of
oopsing.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index acee7b525d51..577ac719eaec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -463,16 +463,9 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
-static struct pool_workqueue *get_pwq(int cpu, struct workqueue_struct *wq)
+static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
 {
-	if (!(wq->flags & WQ_UNBOUND)) {
-		if (likely(cpu < nr_cpu_ids))
-			return per_cpu_ptr(wq->cpu_pwqs, cpu);
-	} else if (likely(cpu == WORK_CPU_UNBOUND)) {
-		return list_first_entry(&wq->pwqs, struct pool_workqueue,
-					pwqs_node);
-	}
-	return NULL;
+	return list_first_entry(&wq->pwqs, struct pool_workqueue, pwqs_node);
 }
 
 static unsigned int work_color_to_flags(int color)
@@ -1191,7 +1184,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 		 * work needs to be queued on that cpu to guarantee
 		 * non-reentrancy.
 		 */
-		pwq = get_pwq(cpu, wq);
+		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
 		last_pool = get_work_pool(work);
 
 		if (last_pool && last_pool != pwq->pool) {
@@ -1202,7 +1195,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 			worker = find_worker_executing_work(last_pool, work);
 
 			if (worker && worker->current_pwq->wq == wq) {
-				pwq = get_pwq(last_pool->cpu, wq);
+				pwq = per_cpu_ptr(wq->cpu_pwqs, last_pool->cpu);
 			} else {
 				/* meh... not running there, queue here */
 				spin_unlock(&last_pool->lock);
@@ -1212,7 +1205,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 			spin_lock(&pwq->pool->lock);
 		}
 	} else {
-		pwq = get_pwq(WORK_CPU_UNBOUND, wq);
+		pwq = first_pwq(wq);
 		spin_lock(&pwq->pool->lock);
 	}
 
@@ -1650,7 +1643,7 @@ static void rebind_workers(struct worker_pool *pool)
 		else
 			wq = system_wq;
 
-		insert_work(get_pwq(pool->cpu, wq), rebind_work,
+		insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work,
 			    worker->scheduled.next,
 			    work_color_to_flags(WORK_NO_COLOR));
 	}
@@ -3088,7 +3081,8 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			return -ENOMEM;
 
 		for_each_possible_cpu(cpu) {
-			struct pool_workqueue *pwq = get_pwq(cpu, wq);
+			struct pool_workqueue *pwq =
+				per_cpu_ptr(wq->cpu_pwqs, cpu);
 
 			pwq->pool = get_std_worker_pool(cpu, highpri);
 			list_add_tail(&pwq->pwqs_node, &wq->pwqs);
@@ -3343,7 +3337,12 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
  */
 bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 {
-	struct pool_workqueue *pwq = get_pwq(cpu, wq);
+	struct pool_workqueue *pwq;
+
+	if (!(wq->flags & WQ_UNBOUND))
+		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
+	else
+		pwq = first_pwq(wq);
 
 	return !list_empty(&pwq->delayed_works);
 }
-- 
cgit v1.2.2


From 76af4d936153afec176c53378e6ba8671e7e237d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:00 -0700
Subject: workqueue: update synchronization rules on workqueue->pwqs

Make workqueue->pwqs protected by workqueue_lock for writes and
sched-RCU protected for reads.  Lockdep assertions are added to
for_each_pwq() and first_pwq() and all their users are converted to
either hold workqueue_lock or disable preemption/irq.

alloc_and_link_pwqs() is updated to use list_add_tail_rcu() for
consistency which isn't strictly necessary as the workqueue isn't
visible.  destroy_workqueue() isn't updated to sched-RCU release pwqs.
This is okay as the workqueue should have on users left by that point.

The locking is superflous at this point.  This is to help
implementation of unbound pools/pwqs with custom attributes.

This patch doesn't introduce any behavior changes.

v2: Updated for_each_pwq() use if/else for the hidden assertion
    statement instead of just if as suggested by Lai.  This avoids
    confusing the following else clause.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 87 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 70 insertions(+), 17 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 577ac719eaec..e060ff2bc20c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -42,6 +42,7 @@
 #include <linux/lockdep.h>
 #include <linux/idr.h>
 #include <linux/hashtable.h>
+#include <linux/rculist.h>
 
 #include "workqueue_internal.h"
 
@@ -118,6 +119,8 @@ enum {
  * F: wq->flush_mutex protected.
  *
  * W: workqueue_lock protected.
+ *
+ * R: workqueue_lock protected for writes.  Sched-RCU protected for reads.
  */
 
 /* struct worker is defined in workqueue_internal.h */
@@ -169,7 +172,7 @@ struct pool_workqueue {
 	int			nr_active;	/* L: nr of active works */
 	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
-	struct list_head	pwqs_node;	/* I: node on wq->pwqs */
+	struct list_head	pwqs_node;	/* R: node on wq->pwqs */
 	struct list_head	mayday_node;	/* W: node on wq->maydays */
 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
 
@@ -189,7 +192,7 @@ struct wq_flusher {
 struct workqueue_struct {
 	unsigned int		flags;		/* W: WQ_* flags */
 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
-	struct list_head	pwqs;		/* I: all pwqs of this wq */
+	struct list_head	pwqs;		/* R: all pwqs of this wq */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
@@ -227,6 +230,11 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
 
+#define assert_rcu_or_wq_lock()						\
+	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
+			   lockdep_is_held(&workqueue_lock),		\
+			   "sched RCU or workqueue lock should be held")
+
 #define for_each_std_worker_pool(pool, cpu)				\
 	for ((pool) = &std_worker_pools(cpu)[0];			\
 	     (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
@@ -282,9 +290,18 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
  * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
  * @pwq: iteration cursor
  * @wq: the target workqueue
+ *
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked.  If the pwq needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pwq stays online.
+ *
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
  */
 #define for_each_pwq(pwq, wq)						\
-	list_for_each_entry((pwq), &(wq)->pwqs, pwqs_node)
+	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node)		\
+		if (({ assert_rcu_or_wq_lock(); false; })) { }		\
+		else
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
@@ -463,9 +480,19 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
+/**
+ * first_pwq - return the first pool_workqueue of the specified workqueue
+ * @wq: the target workqueue
+ *
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked.  If the pwq needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pwq stays online.
+ */
 static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
 {
-	return list_first_entry(&wq->pwqs, struct pool_workqueue, pwqs_node);
+	assert_rcu_or_wq_lock();
+	return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
+				      pwqs_node);
 }
 
 static unsigned int work_color_to_flags(int color)
@@ -2486,10 +2513,12 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 		atomic_set(&wq->nr_pwqs_to_flush, 1);
 	}
 
+	local_irq_disable();
+
 	for_each_pwq(pwq, wq) {
 		struct worker_pool *pool = pwq->pool;
 
-		spin_lock_irq(&pool->lock);
+		spin_lock(&pool->lock);
 
 		if (flush_color >= 0) {
 			WARN_ON_ONCE(pwq->flush_color != -1);
@@ -2506,9 +2535,11 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 			pwq->work_color = work_color;
 		}
 
-		spin_unlock_irq(&pool->lock);
+		spin_unlock(&pool->lock);
 	}
 
+	local_irq_enable();
+
 	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
 		complete(&wq->first_flusher->done);
 
@@ -2699,12 +2730,14 @@ void drain_workqueue(struct workqueue_struct *wq)
 reflush:
 	flush_workqueue(wq);
 
+	local_irq_disable();
+
 	for_each_pwq(pwq, wq) {
 		bool drained;
 
-		spin_lock_irq(&pwq->pool->lock);
+		spin_lock(&pwq->pool->lock);
 		drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
-		spin_unlock_irq(&pwq->pool->lock);
+		spin_unlock(&pwq->pool->lock);
 
 		if (drained)
 			continue;
@@ -2713,13 +2746,17 @@ reflush:
 		    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
 			pr_warn("workqueue %s: flush on destruction isn't complete after %u tries\n",
 				wq->name, flush_cnt);
+
+		local_irq_enable();
 		goto reflush;
 	}
 
-	spin_lock_irq(&workqueue_lock);
+	spin_lock(&workqueue_lock);
 	if (!--wq->nr_drainers)
 		wq->flags &= ~WQ_DRAINING;
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock(&workqueue_lock);
+
+	local_irq_enable();
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
@@ -3085,7 +3122,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 				per_cpu_ptr(wq->cpu_pwqs, cpu);
 
 			pwq->pool = get_std_worker_pool(cpu, highpri);
-			list_add_tail(&pwq->pwqs_node, &wq->pwqs);
+			list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
 		}
 	} else {
 		struct pool_workqueue *pwq;
@@ -3095,7 +3132,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			return -ENOMEM;
 
 		pwq->pool = get_std_worker_pool(WORK_CPU_UNBOUND, highpri);
-		list_add_tail(&pwq->pwqs_node, &wq->pwqs);
+		list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
 	}
 
 	return 0;
@@ -3172,6 +3209,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	if (alloc_and_link_pwqs(wq) < 0)
 		goto err;
 
+	local_irq_disable();
 	for_each_pwq(pwq, wq) {
 		BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 		pwq->wq = wq;
@@ -3180,6 +3218,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		INIT_LIST_HEAD(&pwq->delayed_works);
 		INIT_LIST_HEAD(&pwq->mayday_node);
 	}
+	local_irq_enable();
 
 	if (flags & WQ_RESCUER) {
 		struct worker *rescuer;
@@ -3237,24 +3276,32 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	/* drain it before proceeding with destruction */
 	drain_workqueue(wq);
 
+	spin_lock_irq(&workqueue_lock);
+
 	/* sanity checks */
 	for_each_pwq(pwq, wq) {
 		int i;
 
-		for (i = 0; i < WORK_NR_COLORS; i++)
-			if (WARN_ON(pwq->nr_in_flight[i]))
+		for (i = 0; i < WORK_NR_COLORS; i++) {
+			if (WARN_ON(pwq->nr_in_flight[i])) {
+				spin_unlock_irq(&workqueue_lock);
 				return;
+			}
+		}
+
 		if (WARN_ON(pwq->nr_active) ||
-		    WARN_ON(!list_empty(&pwq->delayed_works)))
+		    WARN_ON(!list_empty(&pwq->delayed_works))) {
+			spin_unlock_irq(&workqueue_lock);
 			return;
+		}
 	}
 
 	/*
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
 	 */
-	spin_lock_irq(&workqueue_lock);
 	list_del(&wq->list);
+
 	spin_unlock_irq(&workqueue_lock);
 
 	if (wq->flags & WQ_RESCUER) {
@@ -3338,13 +3385,19 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 {
 	struct pool_workqueue *pwq;
+	bool ret;
+
+	preempt_disable();
 
 	if (!(wq->flags & WQ_UNBOUND))
 		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
 	else
 		pwq = first_pwq(wq);
 
-	return !list_empty(&pwq->delayed_works);
+	ret = !list_empty(&pwq->delayed_works);
+	preempt_enable();
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
-- 
cgit v1.2.2


From fa1b54e69bc6c04674c9bb96a6cfa8b2c9f44771 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:00 -0700
Subject: workqueue: update synchronization rules on worker_pool_idr

Make worker_pool_idr protected by workqueue_lock for writes and
sched-RCU protected for reads.  Lockdep assertions are added to
for_each_pool() and get_work_pool() and all their users are converted
to either hold workqueue_lock or disable preemption/irq.

worker_pool_assign_id() is updated to hold workqueue_lock when
allocating a pool ID.  As idr_get_new() always performs RCU-safe
assignment, this is enough on the writer side.

As standard pools are never destroyed, there's nothing to do on that
side.

The locking is superflous at this point.  This is to help
implementation of unbound pools/pwqs with custom attributes.

This patch doesn't introduce any behavior changes.

v2: Updated for_each_pwq() use if/else for the hidden assertion
    statement instead of just if as suggested by Lai.  This avoids
    confusing the following else clause.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 71 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e060ff2bc20c..46381490f496 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -282,9 +282,18 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
  * for_each_pool - iterate through all worker_pools in the system
  * @pool: iteration cursor
  * @id: integer used for iteration
+ *
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked.  If the pool needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pool stays online.
+ *
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
  */
 #define for_each_pool(pool, id)						\
-	idr_for_each_entry(&worker_pool_idr, pool, id)
+	idr_for_each_entry(&worker_pool_idr, pool, id)			\
+		if (({ assert_rcu_or_wq_lock(); false; })) { }		\
+		else
 
 /**
  * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
@@ -432,8 +441,10 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
 				     cpu_std_worker_pools);
 static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
 
-/* idr of all pools */
-static DEFINE_MUTEX(worker_pool_idr_mutex);
+/*
+ * idr of all pools.  Modifications are protected by workqueue_lock.  Read
+ * accesses are protected by sched-RCU protected.
+ */
 static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
@@ -456,21 +467,16 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 {
 	int ret;
 
-	mutex_lock(&worker_pool_idr_mutex);
-	idr_pre_get(&worker_pool_idr, GFP_KERNEL);
-	ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
-	mutex_unlock(&worker_pool_idr_mutex);
+	do {
+		if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL))
+			return -ENOMEM;
 
-	return ret;
-}
+		spin_lock_irq(&workqueue_lock);
+		ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
+		spin_unlock_irq(&workqueue_lock);
+	} while (ret == -EAGAIN);
 
-/*
- * Lookup worker_pool by id.  The idr currently is built during boot and
- * never modified.  Don't worry about locking for now.
- */
-static struct worker_pool *worker_pool_by_id(int pool_id)
-{
-	return idr_find(&worker_pool_idr, pool_id);
+	return ret;
 }
 
 static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
@@ -586,13 +592,23 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
  * @work: the work item of interest
  *
  * Return the worker_pool @work was last associated with.  %NULL if none.
+ *
+ * Pools are created and destroyed under workqueue_lock, and allows read
+ * access under sched-RCU read lock.  As such, this function should be
+ * called under workqueue_lock or with preemption disabled.
+ *
+ * All fields of the returned pool are accessible as long as the above
+ * mentioned locking is in effect.  If the returned pool needs to be used
+ * beyond the critical section, the caller is responsible for ensuring the
+ * returned pool is and stays online.
  */
 static struct worker_pool *get_work_pool(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
-	struct worker_pool *pool;
 	int pool_id;
 
+	assert_rcu_or_wq_lock();
+
 	if (data & WORK_STRUCT_PWQ)
 		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
@@ -601,9 +617,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
 	if (pool_id == WORK_OFFQ_POOL_NONE)
 		return NULL;
 
-	pool = worker_pool_by_id(pool_id);
-	WARN_ON_ONCE(!pool);
-	return pool;
+	return idr_find(&worker_pool_idr, pool_id);
 }
 
 /**
@@ -2767,11 +2781,15 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	struct pool_workqueue *pwq;
 
 	might_sleep();
+
+	local_irq_disable();
 	pool = get_work_pool(work);
-	if (!pool)
+	if (!pool) {
+		local_irq_enable();
 		return false;
+	}
 
-	spin_lock_irq(&pool->lock);
+	spin_lock(&pool->lock);
 	/* see the comment in try_to_grab_pending() with the same code */
 	pwq = get_work_pwq(work);
 	if (pwq) {
@@ -3414,19 +3432,22 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  */
 unsigned int work_busy(struct work_struct *work)
 {
-	struct worker_pool *pool = get_work_pool(work);
+	struct worker_pool *pool;
 	unsigned long flags;
 	unsigned int ret = 0;
 
 	if (work_pending(work))
 		ret |= WORK_BUSY_PENDING;
 
+	local_irq_save(flags);
+	pool = get_work_pool(work);
 	if (pool) {
-		spin_lock_irqsave(&pool->lock, flags);
+		spin_lock(&pool->lock);
 		if (find_worker_executing_work(pool, work))
 			ret |= WORK_BUSY_RUNNING;
-		spin_unlock_irqrestore(&pool->lock, flags);
+		spin_unlock(&pool->lock);
 	}
+	local_irq_restore(flags);
 
 	return ret;
 }
-- 
cgit v1.2.2


From 34a06bd6b6fa92ccd9d3e6866b6cb91264c3cd20 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:00 -0700
Subject: workqueue: replace POOL_MANAGING_WORKERS flag with
 worker_pool->manager_arb

POOL_MANAGING_WORKERS is used to synchronize the manager role.
Synchronizing among workers doesn't need blocking and that's why it's
implemented as a flag.

It got converted to a mutex a while back to add blocking wait from CPU
hotplug path - 6037315269 ("workqueue: use mutex for global_cwq
manager exclusion").  Later it turned out that synchronization among
workers and cpu hotplug need to be done separately.  Eventually,
POOL_MANAGING_WORKERS is restored and workqueue->manager_mutex got
morphed into workqueue->assoc_mutex - 552a37e936 ("workqueue: restore
POOL_MANAGING_WORKERS") and b2eb83d123 ("workqueue: rename
manager_mutex to assoc_mutex").

Now, we're gonna need to be able to lock out managers from
destroy_workqueue() to support multiple unbound pools with custom
attributes making it again necessary to be able to block on the
manager role.  This patch replaces POOL_MANAGING_WORKERS with
worker_pool->manager_arb.

This patch doesn't introduce any behavior changes.

v2: s/manager_mutex/manager_arb/

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 46381490f496..16f7f8d79d35 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -64,7 +64,6 @@ enum {
 	 * create_worker() is in progress.
 	 */
 	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
-	POOL_MANAGING_WORKERS   = 1 << 1,       /* managing workers */
 	POOL_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
 	POOL_FREEZING		= 1 << 3,	/* freeze in progress */
 
@@ -145,6 +144,7 @@ struct worker_pool {
 	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
 						/* L: hash of busy workers */
 
+	struct mutex		manager_arb;	/* manager arbitration */
 	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
 	struct ida		worker_ida;	/* L: for worker IDs */
 
@@ -706,7 +706,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
-	bool managing = pool->flags & POOL_MANAGING_WORKERS;
+	bool managing = mutex_is_locked(&pool->manager_arb);
 	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
 	int nr_busy = pool->nr_workers - nr_idle;
 
@@ -2029,19 +2029,17 @@ static bool manage_workers(struct worker *worker)
 	struct worker_pool *pool = worker->pool;
 	bool ret = false;
 
-	if (pool->flags & POOL_MANAGING_WORKERS)
+	if (!mutex_trylock(&pool->manager_arb))
 		return ret;
 
-	pool->flags |= POOL_MANAGING_WORKERS;
-
 	/*
 	 * To simplify both worker management and CPU hotplug, hold off
 	 * management while hotplug is in progress.  CPU hotplug path can't
-	 * grab %POOL_MANAGING_WORKERS to achieve this because that can
-	 * lead to idle worker depletion (all become busy thinking someone
-	 * else is managing) which in turn can result in deadlock under
-	 * extreme circumstances.  Use @pool->assoc_mutex to synchronize
-	 * manager against CPU hotplug.
+	 * grab @pool->manager_arb to achieve this because that can lead to
+	 * idle worker depletion (all become busy thinking someone else is
+	 * managing) which in turn can result in deadlock under extreme
+	 * circumstances.  Use @pool->assoc_mutex to synchronize manager
+	 * against CPU hotplug.
 	 *
 	 * assoc_mutex would always be free unless CPU hotplug is in
 	 * progress.  trylock first without dropping @pool->lock.
@@ -2077,8 +2075,8 @@ static bool manage_workers(struct worker *worker)
 	ret |= maybe_destroy_workers(pool);
 	ret |= maybe_create_worker(pool);
 
-	pool->flags &= ~POOL_MANAGING_WORKERS;
 	mutex_unlock(&pool->assoc_mutex);
+	mutex_unlock(&pool->manager_arb);
 	return ret;
 }
 
@@ -3806,6 +3804,7 @@ static int __init init_workqueues(void)
 			setup_timer(&pool->mayday_timer, pool_mayday_timeout,
 				    (unsigned long)pool);
 
+			mutex_init(&pool->manager_arb);
 			mutex_init(&pool->assoc_mutex);
 			ida_init(&pool->worker_ida);
 
-- 
cgit v1.2.2


From 4e1a1f9a051b4c9a2821a2a0f7f4a27c701fba51 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:00 -0700
Subject: workqueue: separate out init_worker_pool() from init_workqueues()

This will be used to implement unbound pools with custom attributes.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 16f7f8d79d35..094f16668e1b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3123,6 +3123,26 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
+static void init_worker_pool(struct worker_pool *pool)
+{
+	spin_lock_init(&pool->lock);
+	pool->flags |= POOL_DISASSOCIATED;
+	INIT_LIST_HEAD(&pool->worklist);
+	INIT_LIST_HEAD(&pool->idle_list);
+	hash_init(pool->busy_hash);
+
+	init_timer_deferrable(&pool->idle_timer);
+	pool->idle_timer.function = idle_worker_timeout;
+	pool->idle_timer.data = (unsigned long)pool;
+
+	setup_timer(&pool->mayday_timer, pool_mayday_timeout,
+		    (unsigned long)pool);
+
+	mutex_init(&pool->manager_arb);
+	mutex_init(&pool->assoc_mutex);
+	ida_init(&pool->worker_ida);
+}
+
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
 	bool highpri = wq->flags & WQ_HIGHPRI;
@@ -3790,23 +3810,8 @@ static int __init init_workqueues(void)
 		struct worker_pool *pool;
 
 		for_each_std_worker_pool(pool, cpu) {
-			spin_lock_init(&pool->lock);
+			init_worker_pool(pool);
 			pool->cpu = cpu;
-			pool->flags |= POOL_DISASSOCIATED;
-			INIT_LIST_HEAD(&pool->worklist);
-			INIT_LIST_HEAD(&pool->idle_list);
-			hash_init(pool->busy_hash);
-
-			init_timer_deferrable(&pool->idle_timer);
-			pool->idle_timer.function = idle_worker_timeout;
-			pool->idle_timer.data = (unsigned long)pool;
-
-			setup_timer(&pool->mayday_timer, pool_mayday_timeout,
-				    (unsigned long)pool);
-
-			mutex_init(&pool->manager_arb);
-			mutex_init(&pool->assoc_mutex);
-			ida_init(&pool->worker_ida);
 
 			/* alloc pool ID */
 			BUG_ON(worker_pool_assign_id(pool));
-- 
cgit v1.2.2


From 7a4e344c5675eefbde93ed9a98ef45e0e4957bc2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:00 -0700
Subject: workqueue: introduce workqueue_attrs

Introduce struct workqueue_attrs which carries worker attributes -
currently the nice level and allowed cpumask along with helper
routines alloc_workqueue_attrs() and free_workqueue_attrs().

Each worker_pool now carries ->attrs describing the attributes of its
workers.  All functions dealing with cpumask and nice level of workers
are updated to follow worker_pool->attrs instead of determining them
from other characteristics of the worker_pool, and init_workqueues()
is updated to set worker_pool->attrs appropriately for all standard
pools.

Note that create_worker() is updated to always perform set_user_nice()
and use set_cpus_allowed_ptr() combined with manual assertion of
PF_THREAD_BOUND instead of kthread_bind().  This simplifies handling
random attributes without affecting the outcome.

This patch doesn't introduce any behavior changes.

v2: Missing cpumask_var_t definition caused build failure on some
    archs.  linux/cpumask.h included.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  13 ++++++
 kernel/workqueue.c        | 103 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 94 insertions(+), 22 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 899be6636d20..00c1b9ba8252 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -11,6 +11,7 @@
 #include <linux/lockdep.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
+#include <linux/cpumask.h>
 
 struct workqueue_struct;
 
@@ -115,6 +116,15 @@ struct delayed_work {
 	int cpu;
 };
 
+/*
+ * A struct for workqueue attributes.  This can be used to change
+ * attributes of an unbound workqueue.
+ */
+struct workqueue_attrs {
+	int			nice;		/* nice level */
+	cpumask_var_t		cpumask;	/* allowed CPUs */
+};
+
 static inline struct delayed_work *to_delayed_work(struct work_struct *work)
 {
 	return container_of(work, struct delayed_work, work);
@@ -399,6 +409,9 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
+void free_workqueue_attrs(struct workqueue_attrs *attrs);
+
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
 extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 094f16668e1b..b0d3cbb83f63 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -148,6 +148,8 @@ struct worker_pool {
 	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
 	struct ida		worker_ida;	/* L: for worker IDs */
 
+	struct workqueue_attrs	*attrs;		/* I: worker attributes */
+
 	/*
 	 * The current concurrency level.  As it's likely to be accessed
 	 * from other CPUs during try_to_wake_up(), put it in a separate
@@ -1566,14 +1568,13 @@ __acquires(&pool->lock)
 		 * against POOL_DISASSOCIATED.
 		 */
 		if (!(pool->flags & POOL_DISASSOCIATED))
-			set_cpus_allowed_ptr(current, get_cpu_mask(pool->cpu));
+			set_cpus_allowed_ptr(current, pool->attrs->cpumask);
 
 		spin_lock_irq(&pool->lock);
 		if (pool->flags & POOL_DISASSOCIATED)
 			return false;
 		if (task_cpu(current) == pool->cpu &&
-		    cpumask_equal(&current->cpus_allowed,
-				  get_cpu_mask(pool->cpu)))
+		    cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
 			return true;
 		spin_unlock_irq(&pool->lock);
 
@@ -1679,7 +1680,7 @@ static void rebind_workers(struct worker_pool *pool)
 		 * wq doesn't really matter but let's keep @worker->pool
 		 * and @pwq->pool consistent for sanity.
 		 */
-		if (std_worker_pool_pri(worker->pool))
+		if (worker->pool->attrs->nice < 0)
 			wq = system_highpri_wq;
 		else
 			wq = system_wq;
@@ -1721,7 +1722,7 @@ static struct worker *alloc_worker(void)
  */
 static struct worker *create_worker(struct worker_pool *pool)
 {
-	const char *pri = std_worker_pool_pri(pool) ? "H" : "";
+	const char *pri = pool->attrs->nice < 0  ? "H" : "";
 	struct worker *worker = NULL;
 	int id = -1;
 
@@ -1751,24 +1752,23 @@ static struct worker *create_worker(struct worker_pool *pool)
 	if (IS_ERR(worker->task))
 		goto fail;
 
-	if (std_worker_pool_pri(pool))
-		set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
+	set_user_nice(worker->task, pool->attrs->nice);
+	set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
 
 	/*
-	 * Determine CPU binding of the new worker depending on
-	 * %POOL_DISASSOCIATED.  The caller is responsible for ensuring the
-	 * flag remains stable across this function.  See the comments
-	 * above the flag definition for details.
-	 *
-	 * As an unbound worker may later become a regular one if CPU comes
-	 * online, make sure every worker has %PF_THREAD_BOUND set.
+	 * %PF_THREAD_BOUND is used to prevent userland from meddling with
+	 * cpumask of workqueue workers.  This is an abuse.  We need
+	 * %PF_NO_SETAFFINITY.
 	 */
-	if (!(pool->flags & POOL_DISASSOCIATED)) {
-		kthread_bind(worker->task, pool->cpu);
-	} else {
-		worker->task->flags |= PF_THREAD_BOUND;
+	worker->task->flags |= PF_THREAD_BOUND;
+
+	/*
+	 * The caller is responsible for ensuring %POOL_DISASSOCIATED
+	 * remains stable across this function.  See the comments above the
+	 * flag definition for details.
+	 */
+	if (pool->flags & POOL_DISASSOCIATED)
 		worker->flags |= WORKER_UNBOUND;
-	}
 
 	return worker;
 fail:
@@ -3123,7 +3123,52 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static void init_worker_pool(struct worker_pool *pool)
+/**
+ * free_workqueue_attrs - free a workqueue_attrs
+ * @attrs: workqueue_attrs to free
+ *
+ * Undo alloc_workqueue_attrs().
+ */
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
+{
+	if (attrs) {
+		free_cpumask_var(attrs->cpumask);
+		kfree(attrs);
+	}
+}
+
+/**
+ * alloc_workqueue_attrs - allocate a workqueue_attrs
+ * @gfp_mask: allocation mask to use
+ *
+ * Allocate a new workqueue_attrs, initialize with default settings and
+ * return it.  Returns NULL on failure.
+ */
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
+{
+	struct workqueue_attrs *attrs;
+
+	attrs = kzalloc(sizeof(*attrs), gfp_mask);
+	if (!attrs)
+		goto fail;
+	if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
+		goto fail;
+
+	cpumask_setall(attrs->cpumask);
+	return attrs;
+fail:
+	free_workqueue_attrs(attrs);
+	return NULL;
+}
+
+/**
+ * init_worker_pool - initialize a newly zalloc'd worker_pool
+ * @pool: worker_pool to initialize
+ *
+ * Initiailize a newly zalloc'd @pool.  It also allocates @pool->attrs.
+ * Returns 0 on success, -errno on failure.
+ */
+static int init_worker_pool(struct worker_pool *pool)
 {
 	spin_lock_init(&pool->lock);
 	pool->flags |= POOL_DISASSOCIATED;
@@ -3141,6 +3186,11 @@ static void init_worker_pool(struct worker_pool *pool)
 	mutex_init(&pool->manager_arb);
 	mutex_init(&pool->assoc_mutex);
 	ida_init(&pool->worker_ida);
+
+	pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!pool->attrs)
+		return -ENOMEM;
+	return 0;
 }
 
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
@@ -3792,7 +3842,8 @@ out_unlock:
 
 static int __init init_workqueues(void)
 {
-	int cpu;
+	int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
+	int i, cpu;
 
 	/* make sure we have enough bits for OFFQ pool ID */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
@@ -3809,10 +3860,18 @@ static int __init init_workqueues(void)
 	for_each_wq_cpu(cpu) {
 		struct worker_pool *pool;
 
+		i = 0;
 		for_each_std_worker_pool(pool, cpu) {
-			init_worker_pool(pool);
+			BUG_ON(init_worker_pool(pool));
 			pool->cpu = cpu;
 
+			if (cpu != WORK_CPU_UNBOUND)
+				cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
+			else
+				cpumask_setall(pool->attrs->cpumask);
+
+			pool->attrs->nice = std_nice[i++];
+
 			/* alloc pool ID */
 			BUG_ON(worker_pool_assign_id(pool));
 		}
-- 
cgit v1.2.2


From 29c91e9912bed7060df6116af90286500f5a700d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:03 -0700
Subject: workqueue: implement attribute-based unbound worker_pool management

This patch makes unbound worker_pools reference counted and
dynamically created and destroyed as workqueues needing them come and
go.  All unbound worker_pools are hashed on unbound_pool_hash which is
keyed by the content of worker_pool->attrs.

When an unbound workqueue is allocated, get_unbound_pool() is called
with the attributes of the workqueue.  If there already is a matching
worker_pool, the reference count is bumped and the pool is returned.
If not, a new worker_pool with matching attributes is created and
returned.

When an unbound workqueue is destroyed, put_unbound_pool() is called
which decrements the reference count of the associated worker_pool.
If the refcnt reaches zero, the worker_pool is destroyed in sched-RCU
safe way.

Note that the standard unbound worker_pools - normal and highpri ones
with no specific cpumask affinity - are no longer created explicitly
during init_workqueues().  init_workqueues() only initializes
workqueue_attrs to be used for standard unbound pools -
unbound_std_wq_attrs[].  The pools are spawned on demand as workqueues
are created.

v2: - Comment added to init_worker_pool() explaining that @pool should
      be in a condition which can be passed to put_unbound_pool() even
      on failure.

    - pool->refcnt reaching zero and the pool being removed from
      unbound_pool_hash should be dynamic.  pool->refcnt is converted
      to int from atomic_t and now manipulated inside workqueue_lock.

    - Removed an incorrect sanity check on nr_idle in
      put_unbound_pool() which may trigger spuriously.

    All changes were suggested by Lai Jiangshan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 237 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 224 insertions(+), 13 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b0d3cbb83f63..3fe2c79bf166 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,6 +41,7 @@
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
 #include <linux/idr.h>
+#include <linux/jhash.h>
 #include <linux/hashtable.h>
 #include <linux/rculist.h>
 
@@ -80,6 +81,7 @@ enum {
 
 	NR_STD_WORKER_POOLS	= 2,		/* # standard pools per cpu */
 
+	UNBOUND_POOL_HASH_ORDER	= 6,		/* hashed by pool->attrs */
 	BUSY_WORKER_HASH_ORDER	= 6,		/* 64 pointers */
 
 	MAX_IDLE_WORKERS_RATIO	= 4,		/* 1/4 of busy can be idle */
@@ -149,6 +151,8 @@ struct worker_pool {
 	struct ida		worker_ida;	/* L: for worker IDs */
 
 	struct workqueue_attrs	*attrs;		/* I: worker attributes */
+	struct hlist_node	hash_node;	/* R: unbound_pool_hash node */
+	int			refcnt;		/* refcnt for unbound pools */
 
 	/*
 	 * The current concurrency level.  As it's likely to be accessed
@@ -156,6 +160,12 @@ struct worker_pool {
 	 * cacheline.
 	 */
 	atomic_t		nr_running ____cacheline_aligned_in_smp;
+
+	/*
+	 * Destruction of pool is sched-RCU protected to allow dereferences
+	 * from get_work_pool().
+	 */
+	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -218,6 +228,11 @@ struct workqueue_struct {
 
 static struct kmem_cache *pwq_cache;
 
+/* hash of all unbound pools keyed by pool->attrs */
+static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
+
+static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
+
 struct workqueue_struct *system_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -1742,7 +1757,7 @@ static struct worker *create_worker(struct worker_pool *pool)
 	worker->pool = pool;
 	worker->id = id;
 
-	if (pool->cpu != WORK_CPU_UNBOUND)
+	if (pool->cpu >= 0)
 		worker->task = kthread_create_on_node(worker_thread,
 					worker, cpu_to_node(pool->cpu),
 					"kworker/%d:%d%s", pool->cpu, id, pri);
@@ -3161,16 +3176,68 @@ fail:
 	return NULL;
 }
 
+static void copy_workqueue_attrs(struct workqueue_attrs *to,
+				 const struct workqueue_attrs *from)
+{
+	to->nice = from->nice;
+	cpumask_copy(to->cpumask, from->cpumask);
+}
+
+/*
+ * Hacky implementation of jhash of bitmaps which only considers the
+ * specified number of bits.  We probably want a proper implementation in
+ * include/linux/jhash.h.
+ */
+static u32 jhash_bitmap(const unsigned long *bitmap, int bits, u32 hash)
+{
+	int nr_longs = bits / BITS_PER_LONG;
+	int nr_leftover = bits % BITS_PER_LONG;
+	unsigned long leftover = 0;
+
+	if (nr_longs)
+		hash = jhash(bitmap, nr_longs * sizeof(long), hash);
+	if (nr_leftover) {
+		bitmap_copy(&leftover, bitmap + nr_longs, nr_leftover);
+		hash = jhash(&leftover, sizeof(long), hash);
+	}
+	return hash;
+}
+
+/* hash value of the content of @attr */
+static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
+{
+	u32 hash = 0;
+
+	hash = jhash_1word(attrs->nice, hash);
+	hash = jhash_bitmap(cpumask_bits(attrs->cpumask), nr_cpu_ids, hash);
+	return hash;
+}
+
+/* content equality test */
+static bool wqattrs_equal(const struct workqueue_attrs *a,
+			  const struct workqueue_attrs *b)
+{
+	if (a->nice != b->nice)
+		return false;
+	if (!cpumask_equal(a->cpumask, b->cpumask))
+		return false;
+	return true;
+}
+
 /**
  * init_worker_pool - initialize a newly zalloc'd worker_pool
  * @pool: worker_pool to initialize
  *
  * Initiailize a newly zalloc'd @pool.  It also allocates @pool->attrs.
- * Returns 0 on success, -errno on failure.
+ * Returns 0 on success, -errno on failure.  Even on failure, all fields
+ * inside @pool proper are initialized and put_unbound_pool() can be called
+ * on @pool safely to release it.
  */
 static int init_worker_pool(struct worker_pool *pool)
 {
 	spin_lock_init(&pool->lock);
+	pool->id = -1;
+	pool->cpu = -1;
 	pool->flags |= POOL_DISASSOCIATED;
 	INIT_LIST_HEAD(&pool->worklist);
 	INIT_LIST_HEAD(&pool->idle_list);
@@ -3187,12 +3254,136 @@ static int init_worker_pool(struct worker_pool *pool)
 	mutex_init(&pool->assoc_mutex);
 	ida_init(&pool->worker_ida);
 
+	INIT_HLIST_NODE(&pool->hash_node);
+	pool->refcnt = 1;
+
+	/* shouldn't fail above this point */
 	pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
 	if (!pool->attrs)
 		return -ENOMEM;
 	return 0;
 }
 
+static void rcu_free_pool(struct rcu_head *rcu)
+{
+	struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
+
+	ida_destroy(&pool->worker_ida);
+	free_workqueue_attrs(pool->attrs);
+	kfree(pool);
+}
+
+/**
+ * put_unbound_pool - put a worker_pool
+ * @pool: worker_pool to put
+ *
+ * Put @pool.  If its refcnt reaches zero, it gets destroyed in sched-RCU
+ * safe manner.
+ */
+static void put_unbound_pool(struct worker_pool *pool)
+{
+	struct worker *worker;
+
+	spin_lock_irq(&workqueue_lock);
+	if (--pool->refcnt) {
+		spin_unlock_irq(&workqueue_lock);
+		return;
+	}
+
+	/* sanity checks */
+	if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) ||
+	    WARN_ON(!list_empty(&pool->worklist))) {
+		spin_unlock_irq(&workqueue_lock);
+		return;
+	}
+
+	/* release id and unhash */
+	if (pool->id >= 0)
+		idr_remove(&worker_pool_idr, pool->id);
+	hash_del(&pool->hash_node);
+
+	spin_unlock_irq(&workqueue_lock);
+
+	/* lock out manager and destroy all workers */
+	mutex_lock(&pool->manager_arb);
+	spin_lock_irq(&pool->lock);
+
+	while ((worker = first_worker(pool)))
+		destroy_worker(worker);
+	WARN_ON(pool->nr_workers || pool->nr_idle);
+
+	spin_unlock_irq(&pool->lock);
+	mutex_unlock(&pool->manager_arb);
+
+	/* shut down the timers */
+	del_timer_sync(&pool->idle_timer);
+	del_timer_sync(&pool->mayday_timer);
+
+	/* sched-RCU protected to allow dereferences from get_work_pool() */
+	call_rcu_sched(&pool->rcu, rcu_free_pool);
+}
+
+/**
+ * get_unbound_pool - get a worker_pool with the specified attributes
+ * @attrs: the attributes of the worker_pool to get
+ *
+ * Obtain a worker_pool which has the same attributes as @attrs, bump the
+ * reference count and return it.  If there already is a matching
+ * worker_pool, it will be used; otherwise, this function attempts to
+ * create a new one.  On failure, returns NULL.
+ */
+static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
+{
+	static DEFINE_MUTEX(create_mutex);
+	u32 hash = wqattrs_hash(attrs);
+	struct worker_pool *pool;
+	struct worker *worker;
+
+	mutex_lock(&create_mutex);
+
+	/* do we already have a matching pool? */
+	spin_lock_irq(&workqueue_lock);
+	hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
+		if (wqattrs_equal(pool->attrs, attrs)) {
+			pool->refcnt++;
+			goto out_unlock;
+		}
+	}
+	spin_unlock_irq(&workqueue_lock);
+
+	/* nope, create a new one */
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool || init_worker_pool(pool) < 0)
+		goto fail;
+
+	copy_workqueue_attrs(pool->attrs, attrs);
+
+	if (worker_pool_assign_id(pool) < 0)
+		goto fail;
+
+	/* create and start the initial worker */
+	worker = create_worker(pool);
+	if (!worker)
+		goto fail;
+
+	spin_lock_irq(&pool->lock);
+	start_worker(worker);
+	spin_unlock_irq(&pool->lock);
+
+	/* install */
+	spin_lock_irq(&workqueue_lock);
+	hash_add(unbound_pool_hash, &pool->hash_node, hash);
+out_unlock:
+	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&create_mutex);
+	return pool;
+fail:
+	mutex_unlock(&create_mutex);
+	if (pool)
+		put_unbound_pool(pool);
+	return NULL;
+}
+
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
 	bool highpri = wq->flags & WQ_HIGHPRI;
@@ -3217,7 +3408,12 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 		if (!pwq)
 			return -ENOMEM;
 
-		pwq->pool = get_std_worker_pool(WORK_CPU_UNBOUND, highpri);
+		pwq->pool = get_unbound_pool(unbound_std_wq_attrs[highpri]);
+		if (!pwq->pool) {
+			kmem_cache_free(pwq_cache, pwq);
+			return -ENOMEM;
+		}
+
 		list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
 	}
 
@@ -3395,6 +3591,15 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		kfree(wq->rescuer);
 	}
 
+	/*
+	 * We're the sole accessor of @wq at this point.  Directly access
+	 * the first pwq and put its pool.
+	 */
+	if (wq->flags & WQ_UNBOUND) {
+		pwq = list_first_entry(&wq->pwqs, struct pool_workqueue,
+				       pwqs_node);
+		put_unbound_pool(pwq->pool);
+	}
 	free_pwqs(wq);
 	kfree(wq);
 }
@@ -3857,19 +4062,14 @@ static int __init init_workqueues(void)
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
 	/* initialize CPU pools */
-	for_each_wq_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		struct worker_pool *pool;
 
 		i = 0;
 		for_each_std_worker_pool(pool, cpu) {
 			BUG_ON(init_worker_pool(pool));
 			pool->cpu = cpu;
-
-			if (cpu != WORK_CPU_UNBOUND)
-				cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
-			else
-				cpumask_setall(pool->attrs->cpumask);
-
+			cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
 			pool->attrs->nice = std_nice[i++];
 
 			/* alloc pool ID */
@@ -3878,14 +4078,13 @@ static int __init init_workqueues(void)
 	}
 
 	/* create the initial worker */
-	for_each_online_wq_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct worker_pool *pool;
 
 		for_each_std_worker_pool(pool, cpu) {
 			struct worker *worker;
 
-			if (cpu != WORK_CPU_UNBOUND)
-				pool->flags &= ~POOL_DISASSOCIATED;
+			pool->flags &= ~POOL_DISASSOCIATED;
 
 			worker = create_worker(pool);
 			BUG_ON(!worker);
@@ -3895,6 +4094,18 @@ static int __init init_workqueues(void)
 		}
 	}
 
+	/* create default unbound wq attrs */
+	for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
+		struct workqueue_attrs *attrs;
+
+		BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+
+		attrs->nice = std_nice[i];
+		cpumask_setall(attrs->cpumask);
+
+		unbound_std_wq_attrs[i] = attrs;
+	}
+
 	system_wq = alloc_workqueue("events", 0, 0);
 	system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
 	system_long_wq = alloc_workqueue("events_long", 0, 0);
-- 
cgit v1.2.2


From 7a62c2c87e3bc174fe4b9e9720e148427510fcfb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:03 -0700
Subject: workqueue: remove unbound_std_worker_pools[] and related helpers

Workqueue no longer makes use of unbound_std_worker_pools[].  All
unbound worker_pools are created dynamically and there's nothing
special about the standard ones.  With unbound_std_worker_pools[]
unused, workqueue no longer has places where it needs to treat the
per-cpu pools-cpu and unbound pools together.

Remove unbound_std_worker_pools[] and the helpers wrapping it to
present unified per-cpu and unbound standard worker_pools.

* for_each_std_worker_pool() now only walks through per-cpu pools.

* for_each[_online]_wq_cpu() which don't have any users left are
  removed.

* std_worker_pools() and std_worker_pool_pri() are unused and removed.

* get_std_worker_pool() is removed.  Its only user -
  alloc_and_link_pwqs() - only used it for per-cpu pools anyway.  Open
  code per_cpu access in alloc_and_link_pwqs() instead.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 66 +++++-------------------------------------------------
 1 file changed, 6 insertions(+), 60 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3fe2c79bf166..7642bb7b70ee 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -253,48 +253,13 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 			   "sched RCU or workqueue lock should be held")
 
 #define for_each_std_worker_pool(pool, cpu)				\
-	for ((pool) = &std_worker_pools(cpu)[0];			\
-	     (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
+	for ((pool) = &per_cpu(cpu_std_worker_pools, cpu)[0];		\
+	     (pool) < &per_cpu(cpu_std_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
+	     (pool)++)
 
 #define for_each_busy_worker(worker, i, pool)				\
 	hash_for_each(pool->busy_hash, i, worker, hentry)
 
-static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
-				unsigned int sw)
-{
-	if (cpu < nr_cpu_ids) {
-		if (sw & 1) {
-			cpu = cpumask_next(cpu, mask);
-			if (cpu < nr_cpu_ids)
-				return cpu;
-		}
-		if (sw & 2)
-			return WORK_CPU_UNBOUND;
-	}
-	return WORK_CPU_END;
-}
-
-/*
- * CPU iterators
- *
- * An extra cpu number is defined using an invalid cpu number
- * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
- * specific CPU.  The following iterators are similar to for_each_*_cpu()
- * iterators but also considers the unbound CPU.
- *
- * for_each_wq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
- * for_each_online_wq_cpu()	: online CPUs + WORK_CPU_UNBOUND
- */
-#define for_each_wq_cpu(cpu)						\
-	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3);		\
-	     (cpu) < WORK_CPU_END;					\
-	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
-
-#define for_each_online_wq_cpu(cpu)					\
-	for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3);		\
-	     (cpu) < WORK_CPU_END;					\
-	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
-
 /**
  * for_each_pool - iterate through all worker_pools in the system
  * @pool: iteration cursor
@@ -456,7 +421,6 @@ static bool workqueue_freezing;		/* W: have wqs started freezing? */
  */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
 				     cpu_std_worker_pools);
-static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
 
 /*
  * idr of all pools.  Modifications are protected by workqueue_lock.  Read
@@ -466,19 +430,6 @@ static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
 
-static struct worker_pool *std_worker_pools(int cpu)
-{
-	if (cpu != WORK_CPU_UNBOUND)
-		return per_cpu(cpu_std_worker_pools, cpu);
-	else
-		return unbound_std_worker_pools;
-}
-
-static int std_worker_pool_pri(struct worker_pool *pool)
-{
-	return pool - std_worker_pools(pool->cpu);
-}
-
 /* allocate ID and assign it to @pool */
 static int worker_pool_assign_id(struct worker_pool *pool)
 {
@@ -496,13 +447,6 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 	return ret;
 }
 
-static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
-{
-	struct worker_pool *pools = std_worker_pools(cpu);
-
-	return &pools[highpri];
-}
-
 /**
  * first_pwq - return the first pool_workqueue of the specified workqueue
  * @wq: the target workqueue
@@ -3397,8 +3341,10 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 		for_each_possible_cpu(cpu) {
 			struct pool_workqueue *pwq =
 				per_cpu_ptr(wq->cpu_pwqs, cpu);
+			struct worker_pool *cpu_pools =
+				per_cpu(cpu_std_worker_pools, cpu);
 
-			pwq->pool = get_std_worker_pool(cpu, highpri);
+			pwq->pool = &cpu_pools[highpri];
 			list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
 		}
 	} else {
-- 
cgit v1.2.2


From f02ae73aaa4f285199683862ac59972877a11c5d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:03 -0700
Subject: workqueue: drop "std" from cpu_std_worker_pools and
 for_each_std_worker_pool()

All per-cpu pools are standard, so there's no need to use both "cpu"
and "std" and for_each_std_worker_pool() is confusing in that it can
be used only for per-cpu pools.

* s/cpu_std_worker_pools/cpu_worker_pools/

* s/for_each_std_worker_pool()/for_each_cpu_worker_pool()/

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7642bb7b70ee..2c5073214774 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -252,9 +252,9 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 			   lockdep_is_held(&workqueue_lock),		\
 			   "sched RCU or workqueue lock should be held")
 
-#define for_each_std_worker_pool(pool, cpu)				\
-	for ((pool) = &per_cpu(cpu_std_worker_pools, cpu)[0];		\
-	     (pool) < &per_cpu(cpu_std_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
+#define for_each_cpu_worker_pool(pool, cpu)				\
+	for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];		\
+	     (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
 	     (pool)++)
 
 #define for_each_busy_worker(worker, i, pool)				\
@@ -420,7 +420,7 @@ static bool workqueue_freezing;		/* W: have wqs started freezing? */
  * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set.
  */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
-				     cpu_std_worker_pools);
+				     cpu_worker_pools);
 
 /*
  * idr of all pools.  Modifications are protected by workqueue_lock.  Read
@@ -3342,7 +3342,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			struct pool_workqueue *pwq =
 				per_cpu_ptr(wq->cpu_pwqs, cpu);
 			struct worker_pool *cpu_pools =
-				per_cpu(cpu_std_worker_pools, cpu);
+				per_cpu(cpu_worker_pools, cpu);
 
 			pwq->pool = &cpu_pools[highpri];
 			list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
@@ -3694,7 +3694,7 @@ static void wq_unbind_fn(struct work_struct *work)
 	struct worker *worker;
 	int i;
 
-	for_each_std_worker_pool(pool, cpu) {
+	for_each_cpu_worker_pool(pool, cpu) {
 		WARN_ON_ONCE(cpu != smp_processor_id());
 
 		mutex_lock(&pool->assoc_mutex);
@@ -3737,7 +3737,7 @@ static void wq_unbind_fn(struct work_struct *work)
 	 * unbound chain execution of pending work items if other workers
 	 * didn't already.
 	 */
-	for_each_std_worker_pool(pool, cpu)
+	for_each_cpu_worker_pool(pool, cpu)
 		atomic_set(&pool->nr_running, 0);
 }
 
@@ -3754,7 +3754,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
-		for_each_std_worker_pool(pool, cpu) {
+		for_each_cpu_worker_pool(pool, cpu) {
 			struct worker *worker;
 
 			if (pool->nr_workers)
@@ -3772,7 +3772,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
-		for_each_std_worker_pool(pool, cpu) {
+		for_each_cpu_worker_pool(pool, cpu) {
 			mutex_lock(&pool->assoc_mutex);
 			spin_lock_irq(&pool->lock);
 
@@ -4012,7 +4012,7 @@ static int __init init_workqueues(void)
 		struct worker_pool *pool;
 
 		i = 0;
-		for_each_std_worker_pool(pool, cpu) {
+		for_each_cpu_worker_pool(pool, cpu) {
 			BUG_ON(init_worker_pool(pool));
 			pool->cpu = cpu;
 			cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
@@ -4027,7 +4027,7 @@ static int __init init_workqueues(void)
 	for_each_online_cpu(cpu) {
 		struct worker_pool *pool;
 
-		for_each_std_worker_pool(pool, cpu) {
+		for_each_cpu_worker_pool(pool, cpu) {
 			struct worker *worker;
 
 			pool->flags &= ~POOL_DISASSOCIATED;
-- 
cgit v1.2.2


From ac6104cdf87cc162b0a0d78280d1dcb9752e25bb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:03 -0700
Subject: workqueue: add pool ID to the names of unbound kworkers

There are gonna be multiple unbound pools.  Include pool ID in the
name of unbound kworkers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2c5073214774..a8b86f7b6e34 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1707,7 +1707,8 @@ static struct worker *create_worker(struct worker_pool *pool)
 					"kworker/%d:%d%s", pool->cpu, id, pri);
 	else
 		worker->task = kthread_create(worker_thread, worker,
-					      "kworker/u:%d%s", id, pri);
+					      "kworker/u%d:%d%s",
+					      pool->id, id, pri);
 	if (IS_ERR(worker->task))
 		goto fail;
 
-- 
cgit v1.2.2


From 493008a8e475771a2126e0ce95a73e35b371d277 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:03 -0700
Subject: workqueue: drop WQ_RESCUER and test workqueue->rescuer for NULL
 instead

WQ_RESCUER is superflous.  WQ_MEM_RECLAIM indicates that the user
wants a rescuer and testing wq->rescuer for NULL can answer whether a
given workqueue has a rescuer or not.  Drop WQ_RESCUER and test
wq->rescuer directly.

This will help simplifying __alloc_workqueue_key() failure path by
allowing it to use destroy_workqueue() on a partially constructed
workqueue, which in turn will help implementing dynamic management of
pool_workqueues.

While at it, clear wq->rescuer after freeing it in
destroy_workqueue().  This is a precaution as scheduled changes will
make destruction more complex.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  1 -
 kernel/workqueue.c        | 22 ++++++++++------------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 00c1b9ba8252..c270b4eedf16 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -295,7 +295,6 @@ enum {
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
 	WQ_DRAINING		= 1 << 6, /* internal: workqueue is draining */
-	WQ_RESCUER		= 1 << 7, /* internal: workqueue has rescuer */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a8b86f7b6e34..7ff2b9c5cc3a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1827,7 +1827,7 @@ static void send_mayday(struct work_struct *work)
 
 	lockdep_assert_held(&workqueue_lock);
 
-	if (!(wq->flags & WQ_RESCUER))
+	if (!wq->rescuer)
 		return;
 
 	/* mayday mayday mayday */
@@ -2285,7 +2285,7 @@ sleep:
  * @__rescuer: self
  *
  * Workqueue rescuer thread function.  There's one rescuer for each
- * workqueue which has WQ_RESCUER set.
+ * workqueue which has WQ_MEM_RECLAIM set.
  *
  * Regular work processing on a pool may block trying to create a new
  * worker which uses GFP_KERNEL allocation which has slight chance of
@@ -2769,7 +2769,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	 * flusher is not running on the same workqueue by verifying write
 	 * access.
 	 */
-	if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
+	if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
 		lock_map_acquire(&pwq->wq->lockdep_map);
 	else
 		lock_map_acquire_read(&pwq->wq->lockdep_map);
@@ -3412,13 +3412,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	va_end(args);
 	va_end(args1);
 
-	/*
-	 * Workqueues which may be used during memory reclaim should
-	 * have a rescuer to guarantee forward progress.
-	 */
-	if (flags & WQ_MEM_RECLAIM)
-		flags |= WQ_RESCUER;
-
 	max_active = max_active ?: WQ_DFL_ACTIVE;
 	max_active = wq_clamp_max_active(max_active, flags, wq->name);
 
@@ -3449,7 +3442,11 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	}
 	local_irq_enable();
 
-	if (flags & WQ_RESCUER) {
+	/*
+	 * Workqueues which may be used during memory reclaim should
+	 * have a rescuer to guarantee forward progress.
+	 */
+	if (flags & WQ_MEM_RECLAIM) {
 		struct worker *rescuer;
 
 		wq->rescuer = rescuer = alloc_worker();
@@ -3533,9 +3530,10 @@ void destroy_workqueue(struct workqueue_struct *wq)
 
 	spin_unlock_irq(&workqueue_lock);
 
-	if (wq->flags & WQ_RESCUER) {
+	if (wq->rescuer) {
 		kthread_stop(wq->rescuer->task);
 		kfree(wq->rescuer);
+		wq->rescuer = NULL;
 	}
 
 	/*
-- 
cgit v1.2.2


From d2c1d40487bb1884be085c187233084f80df052d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: restructure __alloc_workqueue_key()

* Move initialization and linking of pool_workqueues into
  init_and_link_pwq().

* Make the failure path use destroy_workqueue() once pool_workqueue
  initialization succeeds.

These changes are to prepare for dynamic management of pool_workqueues
and don't introduce any functional changes.

While at it, convert list_del(&wq->list) to list_del_init() as a
precaution as scheduled changes will make destruction more complex.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 67 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7ff2b9c5cc3a..5ac846e0085e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3329,6 +3329,23 @@ fail:
 	return NULL;
 }
 
+/* initialize @pwq which interfaces with @pool for @wq and link it in */
+static void init_and_link_pwq(struct pool_workqueue *pwq,
+			      struct workqueue_struct *wq,
+			      struct worker_pool *pool)
+{
+	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
+
+	pwq->pool = pool;
+	pwq->wq = wq;
+	pwq->flush_color = -1;
+	pwq->max_active = wq->saved_max_active;
+	INIT_LIST_HEAD(&pwq->delayed_works);
+	INIT_LIST_HEAD(&pwq->mayday_node);
+
+	list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
+}
+
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
 	bool highpri = wq->flags & WQ_HIGHPRI;
@@ -3345,23 +3362,23 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			struct worker_pool *cpu_pools =
 				per_cpu(cpu_worker_pools, cpu);
 
-			pwq->pool = &cpu_pools[highpri];
-			list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
+			init_and_link_pwq(pwq, wq, &cpu_pools[highpri]);
 		}
 	} else {
 		struct pool_workqueue *pwq;
+		struct worker_pool *pool;
 
 		pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
 		if (!pwq)
 			return -ENOMEM;
 
-		pwq->pool = get_unbound_pool(unbound_std_wq_attrs[highpri]);
-		if (!pwq->pool) {
+		pool = get_unbound_pool(unbound_std_wq_attrs[highpri]);
+		if (!pool) {
 			kmem_cache_free(pwq_cache, pwq);
 			return -ENOMEM;
 		}
 
-		list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
+		init_and_link_pwq(pwq, wq, pool);
 	}
 
 	return 0;
@@ -3406,7 +3423,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 
 	wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL);
 	if (!wq)
-		goto err;
+		return NULL;
 
 	vsnprintf(wq->name, namelen, fmt, args1);
 	va_end(args);
@@ -3429,18 +3446,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	INIT_LIST_HEAD(&wq->list);
 
 	if (alloc_and_link_pwqs(wq) < 0)
-		goto err;
-
-	local_irq_disable();
-	for_each_pwq(pwq, wq) {
-		BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
-		pwq->wq = wq;
-		pwq->flush_color = -1;
-		pwq->max_active = max_active;
-		INIT_LIST_HEAD(&pwq->delayed_works);
-		INIT_LIST_HEAD(&pwq->mayday_node);
-	}
-	local_irq_enable();
+		goto err_free_wq;
 
 	/*
 	 * Workqueues which may be used during memory reclaim should
@@ -3449,16 +3455,19 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	if (flags & WQ_MEM_RECLAIM) {
 		struct worker *rescuer;
 
-		wq->rescuer = rescuer = alloc_worker();
+		rescuer = alloc_worker();
 		if (!rescuer)
-			goto err;
+			goto err_destroy;
 
 		rescuer->rescue_wq = wq;
 		rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
 					       wq->name);
-		if (IS_ERR(rescuer->task))
-			goto err;
+		if (IS_ERR(rescuer->task)) {
+			kfree(rescuer);
+			goto err_destroy;
+		}
 
+		wq->rescuer = rescuer;
 		rescuer->task->flags |= PF_THREAD_BOUND;
 		wake_up_process(rescuer->task);
 	}
@@ -3479,12 +3488,12 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	spin_unlock_irq(&workqueue_lock);
 
 	return wq;
-err:
-	if (wq) {
-		free_pwqs(wq);
-		kfree(wq->rescuer);
-		kfree(wq);
-	}
+
+err_free_wq:
+	kfree(wq);
+	return NULL;
+err_destroy:
+	destroy_workqueue(wq);
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
@@ -3526,7 +3535,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
 	 */
-	list_del(&wq->list);
+	list_del_init(&wq->list);
 
 	spin_unlock_irq(&workqueue_lock);
 
-- 
cgit v1.2.2


From 8864b4e59f7945a636eeb27671f10486149be6e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: implement get/put_pwq()

Add pool_workqueue->refcnt along with get/put_pwq().  Both per-cpu and
unbound pwqs have refcnts and any work item inserted on a pwq
increments the refcnt which is dropped when the work item finishes.

For per-cpu pwqs the base ref is never dropped and destroy_workqueue()
frees the pwqs as before.  For unbound ones, destroy_workqueue()
simply drops the base ref on the first pwq.  When the refcnt reaches
zero, pwq_unbound_release_workfn() is scheduled on system_wq, which
unlinks the pwq, puts the associated pool and frees the pwq and wq as
necessary.  This needs to be done from a work item as put_pwq() needs
to be protected by pool->lock but release can't happen with the lock
held - e.g. put_unbound_pool() involves blocking operations.

Unbound pool->locks are marked with lockdep subclas 1 as put_pwq()
will schedule the release work item on system_wq while holding the
unbound pool's lock and triggers recursive locking warning spuriously.

This will be used to implement dynamic creation and destruction of
unbound pwqs.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 137 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 114 insertions(+), 23 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5ac846e0085e..7dd8e7bcec51 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -179,6 +179,7 @@ struct pool_workqueue {
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	int			work_color;	/* L: current color */
 	int			flush_color;	/* L: flushing color */
+	int			refcnt;		/* L: reference count */
 	int			nr_in_flight[WORK_NR_COLORS];
 						/* L: nr of in_flight works */
 	int			nr_active;	/* L: nr of active works */
@@ -186,6 +187,15 @@ struct pool_workqueue {
 	struct list_head	delayed_works;	/* L: delayed works */
 	struct list_head	pwqs_node;	/* R: node on wq->pwqs */
 	struct list_head	mayday_node;	/* W: node on wq->maydays */
+
+	/*
+	 * Release of unbound pwq is punted to system_wq.  See put_pwq()
+	 * and pwq_unbound_release_workfn() for details.  pool_workqueue
+	 * itself is also sched-RCU protected so that the first pwq can be
+	 * determined without grabbing workqueue_lock.
+	 */
+	struct work_struct	unbound_release_work;
+	struct rcu_head		rcu;
 } __aligned(1 << WORK_STRUCT_FLAG_BITS);
 
 /*
@@ -939,6 +949,45 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
+/**
+ * get_pwq - get an extra reference on the specified pool_workqueue
+ * @pwq: pool_workqueue to get
+ *
+ * Obtain an extra reference on @pwq.  The caller should guarantee that
+ * @pwq has positive refcnt and be holding the matching pool->lock.
+ */
+static void get_pwq(struct pool_workqueue *pwq)
+{
+	lockdep_assert_held(&pwq->pool->lock);
+	WARN_ON_ONCE(pwq->refcnt <= 0);
+	pwq->refcnt++;
+}
+
+/**
+ * put_pwq - put a pool_workqueue reference
+ * @pwq: pool_workqueue to put
+ *
+ * Drop a reference of @pwq.  If its refcnt reaches zero, schedule its
+ * destruction.  The caller should be holding the matching pool->lock.
+ */
+static void put_pwq(struct pool_workqueue *pwq)
+{
+	lockdep_assert_held(&pwq->pool->lock);
+	if (likely(--pwq->refcnt))
+		return;
+	if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
+		return;
+	/*
+	 * @pwq can't be released under pool->lock, bounce to
+	 * pwq_unbound_release_workfn().  This never recurses on the same
+	 * pool->lock as this path is taken only for unbound workqueues and
+	 * the release work item is scheduled on a per-cpu workqueue.  To
+	 * avoid lockdep warning, unbound pool->locks are given lockdep
+	 * subclass of 1 in get_unbound_pool().
+	 */
+	schedule_work(&pwq->unbound_release_work);
+}
+
 static void pwq_activate_delayed_work(struct work_struct *work)
 {
 	struct pool_workqueue *pwq = get_work_pwq(work);
@@ -970,9 +1019,9 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
  */
 static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 {
-	/* ignore uncolored works */
+	/* uncolored work items don't participate in flushing or nr_active */
 	if (color == WORK_NO_COLOR)
-		return;
+		goto out_put;
 
 	pwq->nr_in_flight[color]--;
 
@@ -985,11 +1034,11 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 
 	/* is flush in progress and are we at the flushing tip? */
 	if (likely(pwq->flush_color != color))
-		return;
+		goto out_put;
 
 	/* are there still in-flight works? */
 	if (pwq->nr_in_flight[color])
-		return;
+		goto out_put;
 
 	/* this pwq is done, clear flush_color */
 	pwq->flush_color = -1;
@@ -1000,6 +1049,8 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 	 */
 	if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
 		complete(&pwq->wq->first_flusher->done);
+out_put:
+	put_pwq(pwq);
 }
 
 /**
@@ -1122,6 +1173,7 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
 	/* we own @work, set data and link */
 	set_work_pwq(work, pwq, extra_flags);
 	list_add_tail(&work->entry, head);
+	get_pwq(pwq);
 
 	/*
 	 * Ensure either worker_sched_deactivated() sees the above
@@ -3301,6 +3353,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 	if (!pool || init_worker_pool(pool) < 0)
 		goto fail;
 
+	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
 	copy_workqueue_attrs(pool->attrs, attrs);
 
 	if (worker_pool_assign_id(pool) < 0)
@@ -3329,7 +3382,41 @@ fail:
 	return NULL;
 }
 
-/* initialize @pwq which interfaces with @pool for @wq and link it in */
+static void rcu_free_pwq(struct rcu_head *rcu)
+{
+	kmem_cache_free(pwq_cache,
+			container_of(rcu, struct pool_workqueue, rcu));
+}
+
+/*
+ * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
+ * and needs to be destroyed.
+ */
+static void pwq_unbound_release_workfn(struct work_struct *work)
+{
+	struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
+						  unbound_release_work);
+	struct workqueue_struct *wq = pwq->wq;
+	struct worker_pool *pool = pwq->pool;
+
+	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+		return;
+
+	spin_lock_irq(&workqueue_lock);
+	list_del_rcu(&pwq->pwqs_node);
+	spin_unlock_irq(&workqueue_lock);
+
+	put_unbound_pool(pool);
+	call_rcu_sched(&pwq->rcu, rcu_free_pwq);
+
+	/*
+	 * If we're the last pwq going away, @wq is already dead and no one
+	 * is gonna access it anymore.  Free it.
+	 */
+	if (list_empty(&wq->pwqs))
+		kfree(wq);
+}
+
 static void init_and_link_pwq(struct pool_workqueue *pwq,
 			      struct workqueue_struct *wq,
 			      struct worker_pool *pool)
@@ -3339,9 +3426,11 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	pwq->pool = pool;
 	pwq->wq = wq;
 	pwq->flush_color = -1;
+	pwq->refcnt = 1;
 	pwq->max_active = wq->saved_max_active;
 	INIT_LIST_HEAD(&pwq->delayed_works);
 	INIT_LIST_HEAD(&pwq->mayday_node);
+	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 
 	list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
 }
@@ -3384,15 +3473,6 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 	return 0;
 }
 
-static void free_pwqs(struct workqueue_struct *wq)
-{
-	if (!(wq->flags & WQ_UNBOUND))
-		free_percpu(wq->cpu_pwqs);
-	else if (!list_empty(&wq->pwqs))
-		kmem_cache_free(pwq_cache, list_first_entry(&wq->pwqs,
-					struct pool_workqueue, pwqs_node));
-}
-
 static int wq_clamp_max_active(int max_active, unsigned int flags,
 			       const char *name)
 {
@@ -3524,7 +3604,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
 			}
 		}
 
-		if (WARN_ON(pwq->nr_active) ||
+		if (WARN_ON(pwq->refcnt > 1) ||
+		    WARN_ON(pwq->nr_active) ||
 		    WARN_ON(!list_empty(&pwq->delayed_works))) {
 			spin_unlock_irq(&workqueue_lock);
 			return;
@@ -3545,17 +3626,27 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		wq->rescuer = NULL;
 	}
 
-	/*
-	 * We're the sole accessor of @wq at this point.  Directly access
-	 * the first pwq and put its pool.
-	 */
-	if (wq->flags & WQ_UNBOUND) {
+	if (!(wq->flags & WQ_UNBOUND)) {
+		/*
+		 * The base ref is never dropped on per-cpu pwqs.  Directly
+		 * free the pwqs and wq.
+		 */
+		free_percpu(wq->cpu_pwqs);
+		kfree(wq);
+	} else {
+		/*
+		 * We're the sole accessor of @wq at this point.  Directly
+		 * access the first pwq and put the base ref.  As both pwqs
+		 * and pools are sched-RCU protected, the lock operations
+		 * are safe.  @wq will be freed when the last pwq is
+		 * released.
+		 */
 		pwq = list_first_entry(&wq->pwqs, struct pool_workqueue,
 				       pwqs_node);
-		put_unbound_pool(pwq->pool);
+		spin_lock_irq(&pwq->pool->lock);
+		put_pwq(pwq);
+		spin_unlock_irq(&pwq->pool->lock);
 	}
-	free_pwqs(wq);
-	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
-- 
cgit v1.2.2


From 75ccf5950f828d53aebfd3a852283a00abf2c5bf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: prepare flush_workqueue() for dynamic creation and
 destrucion of unbound pool_workqueues

Unbound pwqs (pool_workqueues) will be dynamically created and
destroyed with the scheduled unbound workqueue w/ custom attributes
support.  This patch synchronizes pwq linking and unlinking against
flush_workqueue() so that its operation isn't disturbed by pwqs coming
and going.

Linking and unlinking a pwq into wq->pwqs is now protected also by
wq->flush_mutex and a new pwq's work_color is initialized to
wq->work_color during linking.  This ensures that pwqs changes don't
disturb flush_workqueue() in progress and the new pwq's work coloring
stays in sync with the rest of the workqueue.

flush_mutex during unlinking isn't strictly necessary but it's simpler
to do it anyway.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7dd8e7bcec51..e933979678e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -122,6 +122,9 @@ enum {
  * W: workqueue_lock protected.
  *
  * R: workqueue_lock protected for writes.  Sched-RCU protected for reads.
+ *
+ * FR: wq->flush_mutex and workqueue_lock protected for writes.  Sched-RCU
+ *     protected for reads.
  */
 
 /* struct worker is defined in workqueue_internal.h */
@@ -185,7 +188,7 @@ struct pool_workqueue {
 	int			nr_active;	/* L: nr of active works */
 	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
-	struct list_head	pwqs_node;	/* R: node on wq->pwqs */
+	struct list_head	pwqs_node;	/* FR: node on wq->pwqs */
 	struct list_head	mayday_node;	/* W: node on wq->maydays */
 
 	/*
@@ -214,7 +217,7 @@ struct wq_flusher {
 struct workqueue_struct {
 	unsigned int		flags;		/* W: WQ_* flags */
 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
-	struct list_head	pwqs;		/* R: all pwqs of this wq */
+	struct list_head	pwqs;		/* FR: all pwqs of this wq */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
@@ -3402,9 +3405,16 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
 		return;
 
+	/*
+	 * Unlink @pwq.  Synchronization against flush_mutex isn't strictly
+	 * necessary on release but do it anyway.  It's easier to verify
+	 * and consistent with the linking path.
+	 */
+	mutex_lock(&wq->flush_mutex);
 	spin_lock_irq(&workqueue_lock);
 	list_del_rcu(&pwq->pwqs_node);
 	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&wq->flush_mutex);
 
 	put_unbound_pool(pool);
 	call_rcu_sched(&pwq->rcu, rcu_free_pwq);
@@ -3432,7 +3442,18 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	INIT_LIST_HEAD(&pwq->mayday_node);
 	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 
+	/*
+	 * Link @pwq and set the matching work_color.  This is synchronized
+	 * with flush_mutex to avoid confusing flush_workqueue().
+	 */
+	mutex_lock(&wq->flush_mutex);
+	spin_lock_irq(&workqueue_lock);
+
+	pwq->work_color = wq->work_color;
 	list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
+
+	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&wq->flush_mutex);
 }
 
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
-- 
cgit v1.2.2


From c9178087acd71b4ea010ea48e147cf66952d2da9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: perform non-reentrancy test when queueing to unbound
 workqueues too

Because per-cpu workqueues have multiple pwqs (pool_workqueues) to
serve the CPUs, to guarantee that a single work item isn't queued on
one pwq while still executing another, __queue_work() takes a look at
the previous pool the target work item was on and if it's still
executing there, queue the work item on that pool.

To support changing workqueue_attrs on the fly, unbound workqueues too
will have multiple pwqs and thus need non-reentrancy test when
queueing.  This patch modifies __queue_work() such that the reentrancy
test is performed regardless of the workqueue type.

per_cpu_ptr(wq->cpu_pwqs, cpu) used to be used to determine the
matching pwq for the last pool.  This can't be used for unbound
workqueues and is replaced with worker->current_pwq which also happens
to be simpler.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e933979678e5..16fb6747276a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1209,6 +1209,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
 	struct pool_workqueue *pwq;
+	struct worker_pool *last_pool;
 	struct list_head *worklist;
 	unsigned int work_flags;
 	unsigned int req_cpu = cpu;
@@ -1228,41 +1229,36 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
-	/* determine the pwq to use */
+	/* pwq which will be used unless @work is executing elsewhere */
 	if (!(wq->flags & WQ_UNBOUND)) {
-		struct worker_pool *last_pool;
-
 		if (cpu == WORK_CPU_UNBOUND)
 			cpu = raw_smp_processor_id();
-
-		/*
-		 * It's multi cpu.  If @work was previously on a different
-		 * cpu, it might still be running there, in which case the
-		 * work needs to be queued on that cpu to guarantee
-		 * non-reentrancy.
-		 */
 		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
-		last_pool = get_work_pool(work);
+	} else {
+		pwq = first_pwq(wq);
+	}
 
-		if (last_pool && last_pool != pwq->pool) {
-			struct worker *worker;
+	/*
+	 * If @work was previously on a different pool, it might still be
+	 * running there, in which case the work needs to be queued on that
+	 * pool to guarantee non-reentrancy.
+	 */
+	last_pool = get_work_pool(work);
+	if (last_pool && last_pool != pwq->pool) {
+		struct worker *worker;
 
-			spin_lock(&last_pool->lock);
+		spin_lock(&last_pool->lock);
 
-			worker = find_worker_executing_work(last_pool, work);
+		worker = find_worker_executing_work(last_pool, work);
 
-			if (worker && worker->current_pwq->wq == wq) {
-				pwq = per_cpu_ptr(wq->cpu_pwqs, last_pool->cpu);
-			} else {
-				/* meh... not running there, queue here */
-				spin_unlock(&last_pool->lock);
-				spin_lock(&pwq->pool->lock);
-			}
+		if (worker && worker->current_pwq->wq == wq) {
+			pwq = worker->current_pwq;
 		} else {
+			/* meh... not running there, queue here */
+			spin_unlock(&last_pool->lock);
 			spin_lock(&pwq->pool->lock);
 		}
 	} else {
-		pwq = first_pwq(wq);
 		spin_lock(&pwq->pool->lock);
 	}
 
-- 
cgit v1.2.2


From 9e8cd2f5898ab6710ad81f4583fada08bf8049a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: implement apply_workqueue_attrs()

Implement apply_workqueue_attrs() which applies workqueue_attrs to the
specified unbound workqueue by creating a new pwq (pool_workqueue)
linked to worker_pool with the specified attributes.

A new pwq is linked at the head of wq->pwqs instead of tail and
__queue_work() verifies that the first unbound pwq has positive refcnt
before choosing it for the actual queueing.  This is to cover the case
where creation of a new pwq races with queueing.  As base ref on a pwq
won't be dropped without making another pwq the first one,
__queue_work() is guaranteed to make progress and not add work item to
a dead pwq.

init_and_link_pwq() is updated to return the last first pwq the new
pwq replaced, which is put by apply_workqueue_attrs().

Note that apply_workqueue_attrs() is almost identical to unbound pwq
part of alloc_and_link_pwqs().  The only difference is that there is
no previous first pwq.  apply_workqueue_attrs() is implemented to
handle such cases and replaces unbound pwq handling in
alloc_and_link_pwqs().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 91 ++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 73 insertions(+), 20 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index c270b4eedf16..e152394fa7eb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -410,6 +410,8 @@ extern void destroy_workqueue(struct workqueue_struct *wq);
 
 struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
 void free_workqueue_attrs(struct workqueue_attrs *attrs);
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+			  const struct workqueue_attrs *attrs);
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 16fb6747276a..2a67fbbd192c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1228,7 +1228,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	if (unlikely(wq->flags & WQ_DRAINING) &&
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
-
+retry:
 	/* pwq which will be used unless @work is executing elsewhere */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (cpu == WORK_CPU_UNBOUND)
@@ -1262,6 +1262,25 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 		spin_lock(&pwq->pool->lock);
 	}
 
+	/*
+	 * pwq is determined and locked.  For unbound pools, we could have
+	 * raced with pwq release and it could already be dead.  If its
+	 * refcnt is zero, repeat pwq selection.  Note that pwqs never die
+	 * without another pwq replacing it as the first pwq or while a
+	 * work item is executing on it, so the retying is guaranteed to
+	 * make forward-progress.
+	 */
+	if (unlikely(!pwq->refcnt)) {
+		if (wq->flags & WQ_UNBOUND) {
+			spin_unlock(&pwq->pool->lock);
+			cpu_relax();
+			goto retry;
+		}
+		/* oops */
+		WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
+			  wq->name, cpu);
+	}
+
 	/* pwq determined, queue */
 	trace_workqueue_queue_work(req_cpu, pwq, work);
 
@@ -3425,7 +3444,8 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 
 static void init_and_link_pwq(struct pool_workqueue *pwq,
 			      struct workqueue_struct *wq,
-			      struct worker_pool *pool)
+			      struct worker_pool *pool,
+			      struct pool_workqueue **p_last_pwq)
 {
 	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 
@@ -3445,13 +3465,58 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	mutex_lock(&wq->flush_mutex);
 	spin_lock_irq(&workqueue_lock);
 
+	if (p_last_pwq)
+		*p_last_pwq = first_pwq(wq);
 	pwq->work_color = wq->work_color;
-	list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
+	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 
 	spin_unlock_irq(&workqueue_lock);
 	mutex_unlock(&wq->flush_mutex);
 }
 
+/**
+ * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
+ * @wq: the target workqueue
+ * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
+ *
+ * Apply @attrs to an unbound workqueue @wq.  If @attrs doesn't match the
+ * current attributes, a new pwq is created and made the first pwq which
+ * will serve all new work items.  Older pwqs are released as in-flight
+ * work items finish.  Note that a work item which repeatedly requeues
+ * itself back-to-back will stay on its current pwq.
+ *
+ * Performs GFP_KERNEL allocations.  Returns 0 on success and -errno on
+ * failure.
+ */
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+			  const struct workqueue_attrs *attrs)
+{
+	struct pool_workqueue *pwq, *last_pwq;
+	struct worker_pool *pool;
+
+	if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
+		return -EINVAL;
+
+	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
+	if (!pwq)
+		return -ENOMEM;
+
+	pool = get_unbound_pool(attrs);
+	if (!pool) {
+		kmem_cache_free(pwq_cache, pwq);
+		return -ENOMEM;
+	}
+
+	init_and_link_pwq(pwq, wq, pool, &last_pwq);
+	if (last_pwq) {
+		spin_lock_irq(&last_pwq->pool->lock);
+		put_pwq(last_pwq);
+		spin_unlock_irq(&last_pwq->pool->lock);
+	}
+
+	return 0;
+}
+
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
 	bool highpri = wq->flags & WQ_HIGHPRI;
@@ -3468,26 +3533,12 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			struct worker_pool *cpu_pools =
 				per_cpu(cpu_worker_pools, cpu);
 
-			init_and_link_pwq(pwq, wq, &cpu_pools[highpri]);
+			init_and_link_pwq(pwq, wq, &cpu_pools[highpri], NULL);
 		}
+		return 0;
 	} else {
-		struct pool_workqueue *pwq;
-		struct worker_pool *pool;
-
-		pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
-		if (!pwq)
-			return -ENOMEM;
-
-		pool = get_unbound_pool(unbound_std_wq_attrs[highpri]);
-		if (!pool) {
-			kmem_cache_free(pwq_cache, pwq);
-			return -ENOMEM;
-		}
-
-		init_and_link_pwq(pwq, wq, pool);
+		return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
 	}
-
-	return 0;
 }
 
 static int wq_clamp_max_active(int max_active, unsigned int flags,
-- 
cgit v1.2.2


From 618b01eb426dd2d73a4b5e5ebc6379e4eee3b123 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: make it clear that WQ_DRAINING is an internal flag

We're gonna add another internal WQ flag.  Let's make the distinction
clear.  Prefix WQ_DRAINING with __ and move it to bit 16.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 2 +-
 kernel/workqueue.c        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e152394fa7eb..1751ec4c47c9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -294,7 +294,7 @@ enum {
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
-	WQ_DRAINING		= 1 << 6, /* internal: workqueue is draining */
+	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2a67fbbd192c..590f4d048ec7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1225,7 +1225,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	debug_work_activate(work);
 
 	/* if dying, only works from the same workqueue are allowed */
-	if (unlikely(wq->flags & WQ_DRAINING) &&
+	if (unlikely(wq->flags & __WQ_DRAINING) &&
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 retry:
@@ -2763,11 +2763,11 @@ void drain_workqueue(struct workqueue_struct *wq)
 	/*
 	 * __queue_work() needs to test whether there are drainers, is much
 	 * hotter than drain_workqueue() and already looks at @wq->flags.
-	 * Use WQ_DRAINING so that queue doesn't have to check nr_drainers.
+	 * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
 	 */
 	spin_lock_irq(&workqueue_lock);
 	if (!wq->nr_drainers++)
-		wq->flags |= WQ_DRAINING;
+		wq->flags |= __WQ_DRAINING;
 	spin_unlock_irq(&workqueue_lock);
 reflush:
 	flush_workqueue(wq);
@@ -2795,7 +2795,7 @@ reflush:
 
 	spin_lock(&workqueue_lock);
 	if (!--wq->nr_drainers)
-		wq->flags &= ~WQ_DRAINING;
+		wq->flags &= ~__WQ_DRAINING;
 	spin_unlock(&workqueue_lock);
 
 	local_irq_enable();
-- 
cgit v1.2.2


From 8719dceae2f98a578507c0f6b49c93f320bd729c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: reject adjusting max_active or applying attrs to ordered
 workqueues

Adjusting max_active of or applying new workqueue_attrs to an ordered
workqueue breaks its ordering guarantee.  The former is obvious.  The
latter is because applying attrs creates a new pwq (pool_workqueue)
and there is no ordering constraint between the old and new pwqs.

Make apply_workqueue_attrs() and workqueue_set_max_active() trigger
WARN_ON() if those operations are requested on an ordered workqueue
and fail / ignore respectively.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 3 ++-
 kernel/workqueue.c        | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1751ec4c47c9..5668ab249af5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -295,6 +295,7 @@ enum {
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
+	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@@ -397,7 +398,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
 #define alloc_ordered_workqueue(fmt, flags, args...)			\
-	alloc_workqueue(fmt, WQ_UNBOUND | (flags), 1, ##args)
+	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
 	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 590f4d048ec7..cecd4ffe2c40 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3494,9 +3494,14 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	struct pool_workqueue *pwq, *last_pwq;
 	struct worker_pool *pool;
 
+	/* only unbound workqueues can change attributes */
 	if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
 		return -EINVAL;
 
+	/* creating multiple pwqs breaks ordering guarantee */
+	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
+		return -EINVAL;
+
 	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
 	if (!pwq)
 		return -ENOMEM;
@@ -3752,6 +3757,10 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 {
 	struct pool_workqueue *pwq;
 
+	/* disallow meddling with max_active for ordered workqueues */
+	if (WARN_ON(wq->flags & __WQ_ORDERED))
+		return;
+
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
 	spin_lock_irq(&workqueue_lock);
-- 
cgit v1.2.2


From ba630e4940924ad1962883c207a62890778ced63 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: cpumask: implement cpumask_parse()

We have cpulist_parse() but not cpumask_parse().  Implement it using
bitmap_parse().

bitmap_parse() is weird in that it takes @len for a string in
kernel-memory which also is inconsistent with bitmap_parselist().
Make cpumask_parse() calculate the length and don't expose the
inconsistency to cpumask users.  Maybe we can fix up bitmap_parse()
later.

This will be used to expose workqueue cpumask knobs to userland via
sysfs.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/cpumask.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 032560295fcb..d08e4d2a9b92 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -590,6 +590,21 @@ static inline int cpulist_scnprintf(char *buf, int len,
 				    nr_cpumask_bits);
 }
 
+/**
+ * cpumask_parse - extract a cpumask from from a string
+ * @buf: the buffer to extract from
+ * @dstp: the cpumask to set.
+ *
+ * Returns -errno, or 0 for success.
+ */
+static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
+{
+	char *nl = strchr(buf, '\n');
+	int len = nl ? nl - buf : strlen(buf);
+
+	return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
+}
+
 /**
  * cpulist_parse - extract a cpumask from a user string of ranges
  * @buf: the buffer to extract from
-- 
cgit v1.2.2


From d73ce004225a7b2ed75f4340bb63721d55552265 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:05 -0700
Subject: driver/base: implement subsys_virtual_register()

Kay tells me the most appropriate place to expose workqueues to
userland would be /sys/devices/virtual/workqueues/WQ_NAME which is
symlinked to /sys/bus/workqueue/devices/WQ_NAME and that we're lacking
a way to do that outside of driver core as virtual_device_parent()
isn't exported and there's no inteface to conveniently create a
virtual subsystem.

This patch implements subsys_virtual_register() by factoring out
subsys_register() from subsys_system_register() and using it with
virtual_device_parent() as the origin directory.  It's identical to
subsys_system_register() other than the origin directory but we aren't
gonna restrict the device names which should be used under it.

This will be used to expose workqueue attributes to userland.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
---
 drivers/base/base.h    |  2 ++
 drivers/base/bus.c     | 73 +++++++++++++++++++++++++++++++++++---------------
 drivers/base/core.c    |  2 +-
 include/linux/device.h |  2 ++
 4 files changed, 57 insertions(+), 22 deletions(-)

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 6ee17bb391a9..b8bdfe61daa6 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -101,6 +101,8 @@ static inline int hypervisor_init(void) { return 0; }
 extern int platform_bus_init(void);
 extern void cpu_dev_init(void);
 
+struct kobject *virtual_device_parent(struct device *dev);
+
 extern int bus_add_device(struct device *dev);
 extern void bus_probe_device(struct device *dev);
 extern void bus_remove_device(struct device *dev);
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 519865b53f76..2ae2d2f92b6b 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -1205,26 +1205,10 @@ static void system_root_device_release(struct device *dev)
 {
 	kfree(dev);
 }
-/**
- * subsys_system_register - register a subsystem at /sys/devices/system/
- * @subsys: system subsystem
- * @groups: default attributes for the root device
- *
- * All 'system' subsystems have a /sys/devices/system/<name> root device
- * with the name of the subsystem. The root device can carry subsystem-
- * wide attributes. All registered devices are below this single root
- * device and are named after the subsystem with a simple enumeration
- * number appended. The registered devices are not explicitely named;
- * only 'id' in the device needs to be set.
- *
- * Do not use this interface for anything new, it exists for compatibility
- * with bad ideas only. New subsystems should use plain subsystems; and
- * add the subsystem-wide attributes should be added to the subsystem
- * directory itself and not some create fake root-device placed in
- * /sys/devices/system/<name>.
- */
-int subsys_system_register(struct bus_type *subsys,
-			   const struct attribute_group **groups)
+
+static int subsys_register(struct bus_type *subsys,
+			   const struct attribute_group **groups,
+			   struct kobject *parent_of_root)
 {
 	struct device *dev;
 	int err;
@@ -1243,7 +1227,7 @@ int subsys_system_register(struct bus_type *subsys,
 	if (err < 0)
 		goto err_name;
 
-	dev->kobj.parent = &system_kset->kobj;
+	dev->kobj.parent = parent_of_root;
 	dev->groups = groups;
 	dev->release = system_root_device_release;
 
@@ -1263,8 +1247,55 @@ err_dev:
 	bus_unregister(subsys);
 	return err;
 }
+
+/**
+ * subsys_system_register - register a subsystem at /sys/devices/system/
+ * @subsys: system subsystem
+ * @groups: default attributes for the root device
+ *
+ * All 'system' subsystems have a /sys/devices/system/<name> root device
+ * with the name of the subsystem. The root device can carry subsystem-
+ * wide attributes. All registered devices are below this single root
+ * device and are named after the subsystem with a simple enumeration
+ * number appended. The registered devices are not explicitely named;
+ * only 'id' in the device needs to be set.
+ *
+ * Do not use this interface for anything new, it exists for compatibility
+ * with bad ideas only. New subsystems should use plain subsystems; and
+ * add the subsystem-wide attributes should be added to the subsystem
+ * directory itself and not some create fake root-device placed in
+ * /sys/devices/system/<name>.
+ */
+int subsys_system_register(struct bus_type *subsys,
+			   const struct attribute_group **groups)
+{
+	return subsys_register(subsys, groups, &system_kset->kobj);
+}
 EXPORT_SYMBOL_GPL(subsys_system_register);
 
+/**
+ * subsys_virtual_register - register a subsystem at /sys/devices/virtual/
+ * @subsys: virtual subsystem
+ * @groups: default attributes for the root device
+ *
+ * All 'virtual' subsystems have a /sys/devices/system/<name> root device
+ * with the name of the subystem.  The root device can carry subsystem-wide
+ * attributes.  All registered devices are below this single root device.
+ * There's no restriction on device naming.  This is for kernel software
+ * constructs which need sysfs interface.
+ */
+int subsys_virtual_register(struct bus_type *subsys,
+			    const struct attribute_group **groups)
+{
+	struct kobject *virtual_dir;
+
+	virtual_dir = virtual_device_parent(NULL);
+	if (!virtual_dir)
+		return -ENOMEM;
+
+	return subsys_register(subsys, groups, virtual_dir);
+}
+
 int __init buses_init(void)
 {
 	bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 56536f4b0f6b..f58084a86e8c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -690,7 +690,7 @@ void device_initialize(struct device *dev)
 	set_dev_node(dev, -1);
 }
 
-static struct kobject *virtual_device_parent(struct device *dev)
+struct kobject *virtual_device_parent(struct device *dev)
 {
 	static struct kobject *virtual_dir = NULL;
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 9d6464ea99c6..ee10d4e7be1a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -302,6 +302,8 @@ void subsys_interface_unregister(struct subsys_interface *sif);
 
 int subsys_system_register(struct bus_type *subsys,
 			   const struct attribute_group **groups);
+int subsys_virtual_register(struct bus_type *subsys,
+			    const struct attribute_group **groups);
 
 /**
  * struct class - device classes
-- 
cgit v1.2.2


From 226223ab3c4118ddd10688cc2c131135848371ab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:05 -0700
Subject: workqueue: implement sysfs interface for workqueues

There are cases where workqueue users want to expose control knobs to
userland.  e.g. Unbound workqueues with custom attributes are
scheduled to be used for writeback workers and depending on
configuration it can be useful to allow admins to tinker with the
priority or allowed CPUs.

This patch implements workqueue_sysfs_register(), which makes the
workqueue visible under /sys/bus/workqueue/devices/WQ_NAME.  There
currently are two attributes common to both per-cpu and unbound pools
and extra attributes for unbound pools including nice level and
cpumask.

If alloc_workqueue*() is called with WQ_SYSFS,
workqueue_sysfs_register() is called automatically as part of
workqueue creation.  This is the preferred method unless the workqueue
user wants to apply workqueue_attrs before making the workqueue
visible to userland.

v2: Disallow exposing ordered workqueues as ordered workqueues can't
    be tuned in any way.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   8 ++
 kernel/workqueue.c        | 288 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 296 insertions(+)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5668ab249af5..7f6d29a417c0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -293,6 +293,7 @@ enum {
 	WQ_MEM_RECLAIM		= 1 << 3, /* may be used for memory reclaim */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
+	WQ_SYSFS		= 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
@@ -495,4 +496,11 @@ extern bool freeze_workqueues_busy(void);
 extern void thaw_workqueues(void);
 #endif /* CONFIG_FREEZER */
 
+#ifdef CONFIG_SYSFS
+int workqueue_sysfs_register(struct workqueue_struct *wq);
+#else	/* CONFIG_SYSFS */
+static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
+{ return 0; }
+#endif	/* CONFIG_SYSFS */
+
 #endif
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cecd4ffe2c40..c82feac0a878 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -210,6 +210,8 @@ struct wq_flusher {
 	struct completion	done;		/* flush completion */
 };
 
+struct wq_device;
+
 /*
  * The externally visible workqueue abstraction is an array of
  * per-CPU workqueues:
@@ -233,6 +235,10 @@ struct workqueue_struct {
 
 	int			nr_drainers;	/* W: drain in progress */
 	int			saved_max_active; /* W: saved pwq max_active */
+
+#ifdef CONFIG_SYSFS
+	struct wq_device	*wq_dev;	/* I: for sysfs interface */
+#endif
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -442,6 +448,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
 static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
+static void copy_workqueue_attrs(struct workqueue_attrs *to,
+				 const struct workqueue_attrs *from);
 
 /* allocate ID and assign it to @pool */
 static int worker_pool_assign_id(struct worker_pool *pool)
@@ -3153,6 +3161,281 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
+#ifdef CONFIG_SYSFS
+/*
+ * Workqueues with WQ_SYSFS flag set is visible to userland via
+ * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the
+ * following attributes.
+ *
+ *  per_cpu	RO bool	: whether the workqueue is per-cpu or unbound
+ *  max_active	RW int	: maximum number of in-flight work items
+ *
+ * Unbound workqueues have the following extra attributes.
+ *
+ *  id		RO int	: the associated pool ID
+ *  nice	RW int	: nice value of the workers
+ *  cpumask	RW mask	: bitmask of allowed CPUs for the workers
+ */
+struct wq_device {
+	struct workqueue_struct		*wq;
+	struct device			dev;
+};
+
+static struct workqueue_struct *dev_to_wq(struct device *dev)
+{
+	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+
+	return wq_dev->wq;
+}
+
+static ssize_t wq_per_cpu_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
+}
+
+static ssize_t wq_max_active_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
+}
+
+static ssize_t wq_max_active_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int val;
+
+	if (sscanf(buf, "%d", &val) != 1 || val <= 0)
+		return -EINVAL;
+
+	workqueue_set_max_active(wq, val);
+	return count;
+}
+
+static struct device_attribute wq_sysfs_attrs[] = {
+	__ATTR(per_cpu, 0444, wq_per_cpu_show, NULL),
+	__ATTR(max_active, 0644, wq_max_active_show, wq_max_active_store),
+	__ATTR_NULL,
+};
+
+static ssize_t wq_pool_id_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct worker_pool *pool;
+	int written;
+
+	rcu_read_lock_sched();
+	pool = first_pwq(wq)->pool;
+	written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id);
+	rcu_read_unlock_sched();
+
+	return written;
+}
+
+static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	rcu_read_lock_sched();
+	written = scnprintf(buf, PAGE_SIZE, "%d\n",
+			    first_pwq(wq)->pool->attrs->nice);
+	rcu_read_unlock_sched();
+
+	return written;
+}
+
+/* prepare workqueue_attrs for sysfs store operations */
+static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
+{
+	struct workqueue_attrs *attrs;
+
+	attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!attrs)
+		return NULL;
+
+	rcu_read_lock_sched();
+	copy_workqueue_attrs(attrs, first_pwq(wq)->pool->attrs);
+	rcu_read_unlock_sched();
+	return attrs;
+}
+
+static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	if (sscanf(buf, "%d", &attrs->nice) == 1 &&
+	    attrs->nice >= -20 && attrs->nice <= 19)
+		ret = apply_workqueue_attrs(wq, attrs);
+	else
+		ret = -EINVAL;
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static ssize_t wq_cpumask_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	rcu_read_lock_sched();
+	written = cpumask_scnprintf(buf, PAGE_SIZE,
+				    first_pwq(wq)->pool->attrs->cpumask);
+	rcu_read_unlock_sched();
+
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
+	return written;
+}
+
+static ssize_t wq_cpumask_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	ret = cpumask_parse(buf, attrs->cpumask);
+	if (!ret)
+		ret = apply_workqueue_attrs(wq, attrs);
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static struct device_attribute wq_sysfs_unbound_attrs[] = {
+	__ATTR(pool_id, 0444, wq_pool_id_show, NULL),
+	__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
+	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+	__ATTR_NULL,
+};
+
+static struct bus_type wq_subsys = {
+	.name				= "workqueue",
+	.dev_attrs			= wq_sysfs_attrs,
+};
+
+static int __init wq_sysfs_init(void)
+{
+	return subsys_virtual_register(&wq_subsys, NULL);
+}
+core_initcall(wq_sysfs_init);
+
+static void wq_device_release(struct device *dev)
+{
+	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+
+	kfree(wq_dev);
+}
+
+/**
+ * workqueue_sysfs_register - make a workqueue visible in sysfs
+ * @wq: the workqueue to register
+ *
+ * Expose @wq in sysfs under /sys/bus/workqueue/devices.
+ * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
+ * which is the preferred method.
+ *
+ * Workqueue user should use this function directly iff it wants to apply
+ * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
+ * apply_workqueue_attrs() may race against userland updating the
+ * attributes.
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int workqueue_sysfs_register(struct workqueue_struct *wq)
+{
+	struct wq_device *wq_dev;
+	int ret;
+
+	/*
+	 * Adjusting max_active or creating new pwqs by applyting
+	 * attributes breaks ordering guarantee.  Disallow exposing ordered
+	 * workqueues.
+	 */
+	if (WARN_ON(wq->flags & __WQ_ORDERED))
+		return -EINVAL;
+
+	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
+	if (!wq_dev)
+		return -ENOMEM;
+
+	wq_dev->wq = wq;
+	wq_dev->dev.bus = &wq_subsys;
+	wq_dev->dev.init_name = wq->name;
+	wq_dev->dev.release = wq_device_release;
+
+	/*
+	 * unbound_attrs are created separately.  Suppress uevent until
+	 * everything is ready.
+	 */
+	dev_set_uevent_suppress(&wq_dev->dev, true);
+
+	ret = device_register(&wq_dev->dev);
+	if (ret) {
+		kfree(wq_dev);
+		wq->wq_dev = NULL;
+		return ret;
+	}
+
+	if (wq->flags & WQ_UNBOUND) {
+		struct device_attribute *attr;
+
+		for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
+			ret = device_create_file(&wq_dev->dev, attr);
+			if (ret) {
+				device_unregister(&wq_dev->dev);
+				wq->wq_dev = NULL;
+				return ret;
+			}
+		}
+	}
+
+	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
+	return 0;
+}
+
+/**
+ * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
+ * @wq: the workqueue to unregister
+ *
+ * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
+ */
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
+{
+	struct wq_device *wq_dev = wq->wq_dev;
+
+	if (!wq->wq_dev)
+		return;
+
+	wq->wq_dev = NULL;
+	device_unregister(&wq_dev->dev);
+}
+#else	/* CONFIG_SYSFS */
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }
+#endif	/* CONFIG_SYSFS */
+
 /**
  * free_workqueue_attrs - free a workqueue_attrs
  * @attrs: workqueue_attrs to free
@@ -3625,6 +3908,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		wake_up_process(rescuer->task);
 	}
 
+	if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
+		goto err_destroy;
+
 	/*
 	 * workqueue_lock protects global freeze state and workqueues
 	 * list.  Grab it, set max_active accordingly and add the new
@@ -3693,6 +3979,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
 
 	spin_unlock_irq(&workqueue_lock);
 
+	workqueue_sysfs_unregister(wq);
+
 	if (wq->rescuer) {
 		kthread_stop(wq->rescuer->task);
 		kfree(wq->rescuer);
-- 
cgit v1.2.2


From 13938117a57f88a22f0df9722a5db7271fda85cd Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 13 Mar 2013 09:49:06 +1100
Subject: powerpc: Fix STAB initialization

Commit f5339277eb8d3aed37f12a27988366f68ab68930 accidentally removed
more than just iSeries bits and took out the call to stab_initialize()
thus breaking support for POWER3 processors.

Put it back. (Yes, nobody noticed until now ...)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org> [v3.4+]
---
 arch/powerpc/mm/hash_utils_64.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 1b6e1271719f..6ec6c1997b3a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -759,6 +759,8 @@ void __init early_init_mmu(void)
 	/* Initialize stab / SLB management */
 	if (mmu_has_feature(MMU_FTR_SLB))
 		slb_initialize();
+	else
+		stab_initialize(get_paca()->stab_real);
 }
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.2


From d63ac5f6cf31c8a83170a9509b350c1489a7262b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 13 Mar 2013 09:55:02 +1100
Subject: powerpc: Fix cputable entry for 970MP rev 1.0

Commit 44ae3ab3358e962039c36ad4ae461ae9fb29596c forgot to update
the entry for the 970MP rev 1.0 processor when moving some CPU
features bits to the MMU feature bit mask. This breaks booting
on some rare G5 models using that chip revision.

Reported-by: Phileas Fogg <phileas-fogg@mail.ru>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org> [v3.0+]
---
 arch/powerpc/kernel/cputable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 75a3d71b895d..19599ef352bc 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -275,7 +275,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_PPC970,
 		.cpu_user_features	= COMMON_USER_POWER4 |
 			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.mmu_features		= MMU_FTRS_PPC970,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 8,
-- 
cgit v1.2.2


From ff2d7587c7b2a1b46abc7618f45b8cc3476d8716 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Mon, 11 Mar 2013 13:44:55 +0000
Subject: powerpc: Remove last traces of POWER4_ONLY

The Kconfig symbol POWER4_ONLY got removed in commit
694caf0255dcab506d1e174c96a65ab65d96e108 ("powerpc: Remove
CONFIG_POWER4_ONLY"). Remove its last traces.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/Kconfig.cputype | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index cea2f09c4241..18e3b76c78d7 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -124,9 +124,8 @@ config 6xx
 	select PPC_HAVE_PMU_SUPPORT
 
 config POWER3
-	bool
 	depends on PPC64 && PPC_BOOK3S
-	default y if !POWER4_ONLY
+	def_bool y
 
 config POWER4
 	depends on PPC64 && PPC_BOOK3S
@@ -145,8 +144,7 @@ config TUNE_CELL
 	  but somewhat slower on other machines. This option only changes
 	  the scheduling of instructions, not the selection of instructions
 	  itself, so the resulting kernel will keep running on all other
-	  machines. When building a kernel that is supposed to run only
-	  on Cell, you should also select the POWER4_ONLY option.
+	  machines.
 
 # this is temp to handle compat with arch=ppc
 config 8xx
-- 
cgit v1.2.2


From 1674400aaee5b466c595a8fc310488263ce888c7 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 12 Mar 2013 01:51:51 +0000
Subject: powerpc: Fix -mcmodel=medium breakage in prom_init.c

Commit 5ac47f7a6efb (powerpc: Relocate prom_init.c on 64bit) made
prom_init.c position independent by manually relocating its entries
in the TOC.

We get the address of the TOC entries with the __prom_init_toc_start
linker symbol. If __prom_init_toc_start ends up as an entry in the
TOC then we need to add an offset to get the current address. This is
the case for older toolchains.

On the other hand, if we have a newer toolchain that supports
-mcmodel=medium then __prom_init_toc_start will be created by a
relative offset from r2 (the TOC pointer). Since r2 has already been
relocated, nothing more needs to be done.  Adding an offset in this
case is wrong and Aaro Koskinen and Alexander Graf have noticed noticed
G5 and OpenBIOS breakage.

Alan Modra suggested we just use r2 to get at the TOC which is simpler
and works with both old and new toolchains.

Reported-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Anton Blanchard <anton@samba.org>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/prom_init.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 7f7fb7fd991b..13f8d168b3f1 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2832,11 +2832,13 @@ static void unreloc_toc(void)
 {
 }
 #else
-static void __reloc_toc(void *tocstart, unsigned long offset,
-			unsigned long nr_entries)
+static void __reloc_toc(unsigned long offset, unsigned long nr_entries)
 {
 	unsigned long i;
-	unsigned long *toc_entry = (unsigned long *)tocstart;
+	unsigned long *toc_entry;
+
+	/* Get the start of the TOC by using r2 directly. */
+	asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry));
 
 	for (i = 0; i < nr_entries; i++) {
 		*toc_entry = *toc_entry + offset;
@@ -2850,8 +2852,7 @@ static void reloc_toc(void)
 	unsigned long nr_entries =
 		(__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
 
-	/* Need to add offset to get at __prom_init_toc_start */
-	__reloc_toc(__prom_init_toc_start + offset, offset, nr_entries);
+	__reloc_toc(offset, nr_entries);
 
 	mb();
 }
@@ -2864,8 +2865,7 @@ static void unreloc_toc(void)
 
 	mb();
 
-	/* __prom_init_toc_start has been relocated, no need to add offset */
-	__reloc_toc(__prom_init_toc_start, -offset, nr_entries);
+	__reloc_toc(-offset, nr_entries);
 }
 #endif
 #endif
-- 
cgit v1.2.2


From e62676169118bc2d42e5008b3f8872646313f077 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 17:41:37 -0700
Subject: workqueue: implement current_is_workqueue_rescuer()

Implement a function which queries whether it currently is running off
a workqueue rescuer.  This will be used to convert writeback to
workqueue.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 13 +++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7f6d29a417c0..df30763c8682 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -451,6 +451,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
+extern bool current_is_workqueue_rescuer(void);
 extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c82feac0a878..f5c8bbb9ada3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4071,6 +4071,19 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 }
 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 
+/**
+ * current_is_workqueue_rescuer - is %current workqueue rescuer?
+ *
+ * Determine whether %current is a workqueue rescuer.  Can be used from
+ * work functions to determine whether it's being run off the rescuer task.
+ */
+bool current_is_workqueue_rescuer(void)
+{
+	struct worker *worker = current_wq_worker();
+
+	return worker && worker == worker->current_pwq->wq->rescuer;
+}
+
 /**
  * workqueue_congested - test whether a workqueue is congested
  * @cpu: CPU in question
-- 
cgit v1.2.2


From 810d601f07ce2481ff776e049c0733ded2abbcc1 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 18 Feb 2013 10:15:03 +0900
Subject: extcon: max8997: Check the pointer of platform data to protect null
 pointer error

This patch check the pointer of platform data to protect
kernel panic when platform data is not used and code clean.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max8997.c | 56 +++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index e636d950ad6c..69641bcae325 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -712,29 +712,45 @@ static int max8997_muic_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
-	/* Initialize registers according to platform data */
 	if (pdata->muic_pdata) {
-		struct max8997_muic_platform_data *mdata = info->muic_pdata;
-
-		for (i = 0; i < mdata->num_init_data; i++) {
-			max8997_write_reg(info->muic, mdata->init_data[i].addr,
-					mdata->init_data[i].data);
+		struct max8997_muic_platform_data *muic_pdata
+			= pdata->muic_pdata;
+
+		/* Initialize registers according to platform data */
+		for (i = 0; i < muic_pdata->num_init_data; i++) {
+			max8997_write_reg(info->muic,
+					muic_pdata->init_data[i].addr,
+					muic_pdata->init_data[i].data);
 		}
-	}
 
-	/*
-	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
-	 * h/w path of COMP2/COMN1 on CONTROL1 register.
-	 */
-	if (pdata->muic_pdata->path_uart)
-		info->path_uart = pdata->muic_pdata->path_uart;
-	else
-		info->path_uart = CONTROL1_SW_UART;
+		/*
+		 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
+		 * h/w path of COMP2/COMN1 on CONTROL1 register.
+		 */
+		if (muic_pdata->path_uart)
+			info->path_uart = muic_pdata->path_uart;
+		else
+			info->path_uart = CONTROL1_SW_UART;
 
-	if (pdata->muic_pdata->path_usb)
-		info->path_usb = pdata->muic_pdata->path_usb;
-	else
+		if (muic_pdata->path_usb)
+			info->path_usb = muic_pdata->path_usb;
+		else
+			info->path_usb = CONTROL1_SW_USB;
+
+		/*
+		 * Default delay time for detecting cable state
+		 * after certain time.
+		 */
+		if (muic_pdata->detcable_delay_ms)
+			delay_jiffies =
+				msecs_to_jiffies(muic_pdata->detcable_delay_ms);
+		else
+			delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT);
+	} else {
+		info->path_uart = CONTROL1_SW_UART;
 		info->path_usb = CONTROL1_SW_USB;
+		delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT);
+	}
 
 	/* Set initial path for UART */
 	 max8997_muic_set_path(info, info->path_uart, true);
@@ -751,10 +767,6 @@ static int max8997_muic_probe(struct platform_device *pdev)
 	 * driver should notify cable state to upper layer.
 	 */
 	INIT_DELAYED_WORK(&info->wq_detcable, max8997_muic_detect_cable_wq);
-	if (pdata->muic_pdata->detcable_delay_ms)
-		delay_jiffies = msecs_to_jiffies(pdata->muic_pdata->detcable_delay_ms);
-	else
-		delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT);
 	schedule_delayed_work(&info->wq_detcable, delay_jiffies);
 
 	return 0;
-- 
cgit v1.2.2


From 190d7cfc8632c10bfbfe756f882b6d9cfddfdf6a Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 18 Feb 2013 10:03:32 +0900
Subject: extcon: max77693: Fix bug of wrong pointer when platform data is not
 used

This patch fix wrong pointer of platform data. If each machine set
platform data for h/w path or delay time of workqueue, this driver
happen kernel panic related to null pointer.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max77693.c | 90 +++++++++++++++++++++++-----------------
 1 file changed, 52 insertions(+), 38 deletions(-)

diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index b70e3815c459..fea10624f3e5 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -1045,7 +1045,6 @@ static int max77693_muic_probe(struct platform_device *pdev)
 {
 	struct max77693_dev *max77693 = dev_get_drvdata(pdev->dev.parent);
 	struct max77693_platform_data *pdata = dev_get_platdata(max77693->dev);
-	struct max77693_muic_platform_data *muic_pdata = pdata->muic_data;
 	struct max77693_muic_info *info;
 	int delay_jiffies;
 	int ret;
@@ -1145,44 +1144,63 @@ static int max77693_muic_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
-	/* Initialize MUIC register by using platform data */
-	for (i = 0 ; i < muic_pdata->num_init_data ; i++) {
-		enum max77693_irq_source irq_src = MAX77693_IRQ_GROUP_NR;
-
-		max77693_write_reg(info->max77693->regmap_muic,
-				muic_pdata->init_data[i].addr,
-				muic_pdata->init_data[i].data);
-
-		switch (muic_pdata->init_data[i].addr) {
-		case MAX77693_MUIC_REG_INTMASK1:
-			irq_src = MUIC_INT1;
-			break;
-		case MAX77693_MUIC_REG_INTMASK2:
-			irq_src = MUIC_INT2;
-			break;
-		case MAX77693_MUIC_REG_INTMASK3:
-			irq_src = MUIC_INT3;
-			break;
+	if (pdata->muic_data) {
+		struct max77693_muic_platform_data *muic_pdata = pdata->muic_data;
+
+		/* Initialize MUIC register by using platform data */
+		for (i = 0 ; i < muic_pdata->num_init_data ; i++) {
+			enum max77693_irq_source irq_src
+					= MAX77693_IRQ_GROUP_NR;
+
+			max77693_write_reg(info->max77693->regmap_muic,
+					muic_pdata->init_data[i].addr,
+					muic_pdata->init_data[i].data);
+
+			switch (muic_pdata->init_data[i].addr) {
+			case MAX77693_MUIC_REG_INTMASK1:
+				irq_src = MUIC_INT1;
+				break;
+			case MAX77693_MUIC_REG_INTMASK2:
+				irq_src = MUIC_INT2;
+				break;
+			case MAX77693_MUIC_REG_INTMASK3:
+				irq_src = MUIC_INT3;
+				break;
+			}
+
+			if (irq_src < MAX77693_IRQ_GROUP_NR)
+				info->max77693->irq_masks_cur[irq_src]
+					= muic_pdata->init_data[i].data;
 		}
 
-		if (irq_src < MAX77693_IRQ_GROUP_NR)
-			info->max77693->irq_masks_cur[irq_src]
-				= muic_pdata->init_data[i].data;
-	}
+		/*
+		 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
+		 * h/w path of COMP2/COMN1 on CONTROL1 register.
+		 */
+		if (muic_pdata->path_uart)
+			info->path_uart = muic_pdata->path_uart;
+		else
+			info->path_uart = CONTROL1_SW_UART;
 
-	/*
-	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
-	 * h/w path of COMP2/COMN1 on CONTROL1 register.
-	 */
-	if (muic_pdata->path_uart)
-		info->path_uart = muic_pdata->path_uart;
-	else
-		info->path_uart = CONTROL1_SW_UART;
+		if (muic_pdata->path_usb)
+			info->path_usb = muic_pdata->path_usb;
+		else
+			info->path_usb = CONTROL1_SW_USB;
 
-	if (muic_pdata->path_usb)
-		info->path_usb = muic_pdata->path_usb;
-	else
+		/*
+		 * Default delay time for detecting cable state
+		 * after certain time.
+		 */
+		if (muic_pdata->detcable_delay_ms)
+			delay_jiffies =
+				msecs_to_jiffies(muic_pdata->detcable_delay_ms);
+		else
+			delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT);
+	} else {
 		info->path_usb = CONTROL1_SW_USB;
+		info->path_uart = CONTROL1_SW_UART;
+		delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT);
+	}
 
 	/* Set initial path for UART */
 	 max77693_muic_set_path(info, info->path_uart, true);
@@ -1208,10 +1226,6 @@ static int max77693_muic_probe(struct platform_device *pdev)
 	 * driver should notify cable state to upper layer.
 	 */
 	INIT_DELAYED_WORK(&info->wq_detcable, max77693_muic_detect_cable_wq);
-	if (muic_pdata->detcable_delay_ms)
-		delay_jiffies = msecs_to_jiffies(muic_pdata->detcable_delay_ms);
-	else
-		delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT);
 	schedule_delayed_work(&info->wq_detcable, delay_jiffies);
 
 	return ret;
-- 
cgit v1.2.2


From 0ec83bd2460ed6aed0e7f29f9e0633b054621c02 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 13 Mar 2013 17:38:57 +0900
Subject: extcon: max77693: Initialize register of MUIC device to bring up it
 without platform data

This patch set default value of MUIC register to bring up MUIC device.

If user don't set some initial value for MUIC device through platform data,
extcon-max77693 driver use 'default_init_data' to bring up base operation
of MAX77693 MUIC device.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max77693.c     | 93 ++++++++++++++++++++++++++----------
 include/linux/mfd/max77693-private.h | 23 +++++++++
 2 files changed, 91 insertions(+), 25 deletions(-)

diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index fea10624f3e5..8f3c947b0029 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -32,6 +32,38 @@
 #define	DEV_NAME			"max77693-muic"
 #define	DELAY_MS_DEFAULT		20000		/* unit: millisecond */
 
+/*
+ * Default value of MAX77693 register to bring up MUIC device.
+ * If user don't set some initial value for MUIC device through platform data,
+ * extcon-max77693 driver use 'default_init_data' to bring up base operation
+ * of MAX77693 MUIC device.
+ */
+struct max77693_reg_data default_init_data[] = {
+	{
+		/* STATUS2 - [3]ChgDetRun */
+		.addr = MAX77693_MUIC_REG_STATUS2,
+		.data = STATUS2_CHGDETRUN_MASK,
+	}, {
+		/* INTMASK1 - Unmask [3]ADC1KM,[0]ADCM */
+		.addr = MAX77693_MUIC_REG_INTMASK1,
+		.data = INTMASK1_ADC1K_MASK
+			| INTMASK1_ADC_MASK,
+	}, {
+		/* INTMASK2 - Unmask [0]ChgTypM */
+		.addr = MAX77693_MUIC_REG_INTMASK2,
+		.data = INTMASK2_CHGTYP_MASK,
+	}, {
+		/* INTMASK3 - Mask all of interrupts */
+		.addr = MAX77693_MUIC_REG_INTMASK3,
+		.data = 0x0,
+	}, {
+		/* CDETCTRL2 */
+		.addr = MAX77693_MUIC_REG_CDETCTRL2,
+		.data = CDETCTRL2_VIDRMEN_MASK
+			| CDETCTRL2_DXOVPEN_MASK,
+	},
+};
+
 enum max77693_muic_adc_debounce_time {
 	ADC_DEBOUNCE_TIME_5MS = 0,
 	ADC_DEBOUNCE_TIME_10MS,
@@ -1046,6 +1078,8 @@ static int max77693_muic_probe(struct platform_device *pdev)
 	struct max77693_dev *max77693 = dev_get_drvdata(pdev->dev.parent);
 	struct max77693_platform_data *pdata = dev_get_platdata(max77693->dev);
 	struct max77693_muic_info *info;
+	struct max77693_reg_data *init_data;
+	int num_init_data;
 	int delay_jiffies;
 	int ret;
 	int i;
@@ -1144,35 +1178,44 @@ static int max77693_muic_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
+
+	/* Initialize MUIC register by using platform data or default data */
 	if (pdata->muic_data) {
-		struct max77693_muic_platform_data *muic_pdata = pdata->muic_data;
+		init_data = pdata->muic_data->init_data;
+		num_init_data = pdata->muic_data->num_init_data;
+	} else {
+		init_data = default_init_data;
+		num_init_data = ARRAY_SIZE(default_init_data);
+	}
+
+	for (i = 0 ; i < num_init_data ; i++) {
+		enum max77693_irq_source irq_src
+				= MAX77693_IRQ_GROUP_NR;
 
-		/* Initialize MUIC register by using platform data */
-		for (i = 0 ; i < muic_pdata->num_init_data ; i++) {
-			enum max77693_irq_source irq_src
-					= MAX77693_IRQ_GROUP_NR;
-
-			max77693_write_reg(info->max77693->regmap_muic,
-					muic_pdata->init_data[i].addr,
-					muic_pdata->init_data[i].data);
-
-			switch (muic_pdata->init_data[i].addr) {
-			case MAX77693_MUIC_REG_INTMASK1:
-				irq_src = MUIC_INT1;
-				break;
-			case MAX77693_MUIC_REG_INTMASK2:
-				irq_src = MUIC_INT2;
-				break;
-			case MAX77693_MUIC_REG_INTMASK3:
-				irq_src = MUIC_INT3;
-				break;
-			}
-
-			if (irq_src < MAX77693_IRQ_GROUP_NR)
-				info->max77693->irq_masks_cur[irq_src]
-					= muic_pdata->init_data[i].data;
+		max77693_write_reg(info->max77693->regmap_muic,
+				init_data[i].addr,
+				init_data[i].data);
+
+		switch (init_data[i].addr) {
+		case MAX77693_MUIC_REG_INTMASK1:
+			irq_src = MUIC_INT1;
+			break;
+		case MAX77693_MUIC_REG_INTMASK2:
+			irq_src = MUIC_INT2;
+			break;
+		case MAX77693_MUIC_REG_INTMASK3:
+			irq_src = MUIC_INT3;
+			break;
 		}
 
+		if (irq_src < MAX77693_IRQ_GROUP_NR)
+			info->max77693->irq_masks_cur[irq_src]
+				= init_data[i].data;
+	}
+
+	if (pdata->muic_data) {
+		struct max77693_muic_platform_data *muic_pdata = pdata->muic_data;
+
 		/*
 		 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
 		 * h/w path of COMP2/COMN1 on CONTROL1 register.
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 5b18ecde69b5..1aa4f13cdfa6 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -106,6 +106,29 @@ enum max77693_muic_reg {
 	MAX77693_MUIC_REG_END,
 };
 
+/* MAX77693 INTMASK1~2 Register */
+#define INTMASK1_ADC1K_SHIFT		3
+#define INTMASK1_ADCERR_SHIFT		2
+#define INTMASK1_ADCLOW_SHIFT		1
+#define INTMASK1_ADC_SHIFT		0
+#define INTMASK1_ADC1K_MASK		(1 << INTMASK1_ADC1K_SHIFT)
+#define INTMASK1_ADCERR_MASK		(1 << INTMASK1_ADCERR_SHIFT)
+#define INTMASK1_ADCLOW_MASK		(1 << INTMASK1_ADCLOW_SHIFT)
+#define INTMASK1_ADC_MASK		(1 << INTMASK1_ADC_SHIFT)
+
+#define INTMASK2_VIDRM_SHIFT		5
+#define INTMASK2_VBVOLT_SHIFT		4
+#define INTMASK2_DXOVP_SHIFT		3
+#define INTMASK2_DCDTMR_SHIFT		2
+#define INTMASK2_CHGDETRUN_SHIFT	1
+#define INTMASK2_CHGTYP_SHIFT		0
+#define INTMASK2_VIDRM_MASK		(1 << INTMASK2_VIDRM_SHIFT)
+#define INTMASK2_VBVOLT_MASK		(1 << INTMASK2_VBVOLT_SHIFT)
+#define INTMASK2_DXOVP_MASK		(1 << INTMASK2_DXOVP_SHIFT)
+#define INTMASK2_DCDTMR_MASK		(1 << INTMASK2_DCDTMR_SHIFT)
+#define INTMASK2_CHGDETRUN_MASK		(1 << INTMASK2_CHGDETRUN_SHIFT)
+#define INTMASK2_CHGTYP_MASK		(1 << INTMASK2_CHGTYP_SHIFT)
+
 /* MAX77693 MUIC - STATUS1~3 Register */
 #define STATUS1_ADC_SHIFT		(0)
 #define STATUS1_ADCLOW_SHIFT		(5)
-- 
cgit v1.2.2


From d35162f89b8f00537d7b240b76d2d0e8b8d29aa0 Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Tue, 12 Mar 2013 06:31:19 +0000
Subject: net: ethernet: cpsw: fix usage of cpdma_check_free_tx_desc()

Commit fae50823d0 ("net: ethernet: davinci_cpdma: Add boundary for rx
and tx descriptors") introduced a function to check the current
allocation state of tx packets. The return value is taken into account
to stop the netqork queue on the adapter in case there are no free
slots.

However, cpdma_check_free_tx_desc() returns 'true' if there is room in
the bitmap, not 'false', so the usage of the function is wrong.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Cc: Mugunthan V N <mugunthanvnm@ti.com>
Reported-by: Sven Neumann <s.neumann@raumfeld.com>
Reported-by: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Tested-by: Mugunthan V N <mugunthanvnm@ti.com>
Acked-by: Mugunthan V N <mugunthanvnm@ti.com>
Tested-by: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 01ffbc486982..75c48558e6fd 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -905,7 +905,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 	/* If there is no more tx desc left free then we need to
 	 * tell the kernel to stop sending us tx frames.
 	 */
-	if (unlikely(cpdma_check_free_tx_desc(priv->txch)))
+	if (unlikely(!cpdma_check_free_tx_desc(priv->txch)))
 		netif_stop_queue(ndev);
 
 	return NETDEV_TX_OK;
-- 
cgit v1.2.2


From 876254ae2758d50dcb08c7bd00caf6a806571178 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Tue, 12 Mar 2013 06:31:32 +0000
Subject: bonding: don't call update_speed_duplex() under spinlocks

bond_update_speed_duplex() might sleep while calling underlying slave's
routines. Move it out of atomic context in bond_enslave() and remove it
from bond_miimon_commit() - it was introduced by commit 546add79, however
when the slave interfaces go up/change state it's their responsibility to
fire NETDEV_UP/NETDEV_CHANGE events so that bonding can properly update
their speed.

I've tested it on all combinations of ifup/ifdown, autoneg/speed/duplex
changes, remote-controlled and local, on (not) MII-based cards. All changes
are visible.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8b4e96e01d6c..6bbd90e1123c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1746,6 +1746,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 	bond_compute_features(bond);
 
+	bond_update_speed_duplex(new_slave);
+
 	read_lock(&bond->lock);
 
 	new_slave->last_arp_rx = jiffies -
@@ -1798,8 +1800,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		new_slave->link == BOND_LINK_DOWN ? "DOWN" :
 			(new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
 
-	bond_update_speed_duplex(new_slave);
-
 	if (USES_PRIMARY(bond->params.mode) && bond->params.primary[0]) {
 		/* if there is a primary slave, remember it */
 		if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
@@ -2374,8 +2374,6 @@ static void bond_miimon_commit(struct bonding *bond)
 				bond_set_backup_slave(slave);
 			}
 
-			bond_update_speed_duplex(slave);
-
 			pr_info("%s: link status definitely up for interface %s, %u Mbps %s duplex.\n",
 				bond->dev->name, slave->dev->name,
 				slave->speed, slave->duplex ? "full" : "half");
-- 
cgit v1.2.2


From f04feec2501774fe20fc7c77b5a16b9e23b36f95 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Mon, 11 Mar 2013 15:12:39 +0100
Subject: ARM: at91: dt: at91sam9x5: correct NAND pins comments

Comments on NAND pins where inverted.

Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/boot/dts/at91sam9x5.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index aa98e641931f..9b5d0480d7ee 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -238,8 +238,8 @@
 				nand {
 					pinctrl_nand: nand-0 {
 						atmel,pins =
-							<3 4 0x0 0x1	/* PD5 gpio RDY pin pull_up */
-							 3 5 0x0 0x1>;	/* PD4 gpio enable pin pull_up */
+							<3 4 0x0 0x1	/* PD4 gpio Chip Enable pin pull_up */
+							 3 5 0x0 0x1>;	/* PD5 gpio RDY/BUSY pin pull_up */
 					};
 				};
 
-- 
cgit v1.2.2


From 7f06472f1c3281abceea36059f94e099bfe4698f Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Mon, 11 Mar 2013 15:12:40 +0100
Subject: ARM: at91: dt: at91sam9x5: complete NAND pinctrl

There was only chip enable and readdy/busy pins for the nand controller.
This add the rest of the pins.
pinctrl_nand_16bits contains the specific muxes for 16 bits NANDs.

Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/boot/dts/at91sam9x5.dtsi | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 9b5d0480d7ee..a98c0d50fbbe 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -238,8 +238,32 @@
 				nand {
 					pinctrl_nand: nand-0 {
 						atmel,pins =
-							<3 4 0x0 0x1	/* PD4 gpio Chip Enable pin pull_up */
-							 3 5 0x0 0x1>;	/* PD5 gpio RDY/BUSY pin pull_up */
+							<3 0 0x1 0x0	/* PD0 periph A Read Enable */
+							 3 1 0x1 0x0	/* PD1 periph A Write Enable */
+							 3 2 0x1 0x0	/* PD2 periph A Address Latch Enable */
+							 3 3 0x1 0x0	/* PD3 periph A Command Latch Enable */
+							 3 4 0x0 0x1	/* PD4 gpio Chip Enable pin pull_up */
+							 3 5 0x0 0x1	/* PD5 gpio RDY/BUSY pin pull_up */
+							 3 6 0x1 0x0	/* PD6 periph A Data bit 0 */
+							 3 7 0x1 0x0	/* PD7 periph A Data bit 1 */
+							 3 8 0x1 0x0	/* PD8 periph A Data bit 2 */
+							 3 9 0x1 0x0	/* PD9 periph A Data bit 3 */
+							 3 10 0x1 0x0	/* PD10 periph A Data bit 4 */
+							 3 11 0x1 0x0	/* PD11 periph A Data bit 5 */
+							 3 12 0x1 0x0	/* PD12 periph A Data bit 6 */
+							 3 13 0x1 0x0>;	/* PD13 periph A Data bit 7 */
+					};
+
+					pinctrl_nand_16bits: nand_16bits-0 {
+						atmel,pins =
+							<3 14 0x1 0x0	/* PD14 periph A Data bit 8 */
+							 3 15 0x1 0x0	/* PD15 periph A Data bit 9 */
+							 3 16 0x1 0x0	/* PD16 periph A Data bit 10 */
+							 3 17 0x1 0x0	/* PD17 periph A Data bit 11 */
+							 3 18 0x1 0x0	/* PD18 periph A Data bit 12 */
+							 3 19 0x1 0x0	/* PD19 periph A Data bit 13 */
+							 3 20 0x1 0x0	/* PD20 periph A Data bit 14 */
+							 3 21 0x1 0x0>;	/* PD21 periph A Data bit 15 */
 					};
 				};
 
-- 
cgit v1.2.2


From a79eac7165ed62114e6ca197195aa5060a54f137 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 5 Feb 2013 14:35:11 +0100
Subject: atmel_lcdfb: fix 16-bpp modes on older SOCs

Fix regression introduced by commit 787f9fd23283 ("atmel_lcdfb: support
16bit BGR:565 mode, remove unsupported 15bit modes") which broke 16-bpp
modes for older SOCs which use IBGR:555 (msb is intensity) rather
than BGR:565.

Use SOC-type to determine the pixel layout.

Tested on at91sam9263 and at91sam9g45.

Cc: <stable@vger.kernel.org>
Acked-by: Peter Korsgaard <jacmet@sunsite.dk>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/video/atmel_lcdfb.c | 22 +++++++++++++++-------
 include/video/atmel_lcdc.h  |  1 +
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index 12cf5f31ee8f..025428e04c33 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -422,17 +422,22 @@ static int atmel_lcdfb_check_var(struct fb_var_screeninfo *var,
 			= var->bits_per_pixel;
 		break;
 	case 16:
+		/* Older SOCs use IBGR:555 rather than BGR:565. */
+		if (sinfo->have_intensity_bit)
+			var->green.length = 5;
+		else
+			var->green.length = 6;
+
 		if (sinfo->lcd_wiring_mode == ATMEL_LCDC_WIRING_RGB) {
-			/* RGB:565 mode */
-			var->red.offset = 11;
+			/* RGB:5X5 mode */
+			var->red.offset = var->green.length + 5;
 			var->blue.offset = 0;
 		} else {
-			/* BGR:565 mode */
+			/* BGR:5X5 mode */
 			var->red.offset = 0;
-			var->blue.offset = 11;
+			var->blue.offset = var->green.length + 5;
 		}
 		var->green.offset = 5;
-		var->green.length = 6;
 		var->red.length = var->blue.length = 5;
 		break;
 	case 32:
@@ -679,8 +684,7 @@ static int atmel_lcdfb_setcolreg(unsigned int regno, unsigned int red,
 
 	case FB_VISUAL_PSEUDOCOLOR:
 		if (regno < 256) {
-			if (cpu_is_at91sam9261() || cpu_is_at91sam9263()
-			    || cpu_is_at91sam9rl()) {
+			if (sinfo->have_intensity_bit) {
 				/* old style I+BGR:555 */
 				val  = ((red   >> 11) & 0x001f);
 				val |= ((green >>  6) & 0x03e0);
@@ -870,6 +874,10 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev)
 	}
 	sinfo->info = info;
 	sinfo->pdev = pdev;
+	if (cpu_is_at91sam9261() || cpu_is_at91sam9263() ||
+							cpu_is_at91sam9rl()) {
+		sinfo->have_intensity_bit = true;
+	}
 
 	strcpy(info->fix.id, sinfo->pdev->name);
 	info->flags = ATMEL_LCDFB_FBINFO_DEFAULT;
diff --git a/include/video/atmel_lcdc.h b/include/video/atmel_lcdc.h
index 28447f1594fa..5f0e234026c0 100644
--- a/include/video/atmel_lcdc.h
+++ b/include/video/atmel_lcdc.h
@@ -62,6 +62,7 @@ struct atmel_lcdfb_info {
 	void (*atmel_lcdfb_power_control)(int on);
 	struct fb_monspecs	*default_monspecs;
 	u32			pseudo_palette[16];
+	bool			have_intensity_bit;
 };
 
 #define ATMEL_LCDC_DMABADDR1	0x00
-- 
cgit v1.2.2


From 67cf9c0a00bd88443adb7d6c3efa8b18d03f97c5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 13 Mar 2013 10:56:13 +0100
Subject: ARM: at91: fix LCD-wiring mode

Fix regression introduced by commit 787f9fd23283 ("atmel_lcdfb: support
16bit BGR:565 mode, remove unsupported 15bit modes") which broke 16-bpp
modes for older SOCs which use IBGR:555 (msb is intensity) rather than
BGR:565.

The above commit removes the RGB:555-wiring hack by
removing the no longer used ATMEL_LCDC_WIRING_RGB555 define.

Acked-by: Peter Korsgaard <jacmet@sunsite.dk>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 include/video/atmel_lcdc.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/video/atmel_lcdc.h b/include/video/atmel_lcdc.h
index 5f0e234026c0..8deb22672ada 100644
--- a/include/video/atmel_lcdc.h
+++ b/include/video/atmel_lcdc.h
@@ -30,7 +30,6 @@
  */
 #define ATMEL_LCDC_WIRING_BGR	0
 #define ATMEL_LCDC_WIRING_RGB	1
-#define ATMEL_LCDC_WIRING_RGB555	2
 
 
  /* LCD Controller info data structure, stored in device platform_data */
-- 
cgit v1.2.2


From 7c6cdead7cc9a99650d15497aae47d7472217eb1 Mon Sep 17 00:00:00 2001
From: Nithin Sujir <nsujir@broadcom.com>
Date: Tue, 12 Mar 2013 15:32:48 +0000
Subject: tg3: 5715 does not link up when autoneg off

Commit d13ba512cbba7de5d55d7a3b2aae7d83c8921457 ("tg3: Remove
SPEED_UNKNOWN checks") cleaned up the autoneg advertisement by
removing some dead code. One effect of this change was that the
advertisement register would not be updated if autoneg is turned off.

This exposed a bug on the 5715 device w.r.t linking. The 5715 defaults
to advertise only 10Mb Full duplex. But with autoneg disabled, it needs
the configured speed enabled in the advertisement register to link up.

This patch adds the work around to advertise all speeds on the 5715 when
autoneg is disabled.

Reported-by: Marcin Miotk <marcinmiotk81@gmail.com>
Reviewed-by: Benjamin Li <benli@broadcom.com>
Signed-off-by: Nithin Nayak Sujir <nsujir@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 93729f942358..67d2663b3974 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -4130,6 +4130,14 @@ static void tg3_phy_copper_begin(struct tg3 *tp)
 		tp->link_config.active_speed = tp->link_config.speed;
 		tp->link_config.active_duplex = tp->link_config.duplex;
 
+		if (tg3_asic_rev(tp) == ASIC_REV_5714) {
+			/* With autoneg disabled, 5715 only links up when the
+			 * advertisement register has the configured speed
+			 * enabled.
+			 */
+			tg3_writephy(tp, MII_ADVERTISE, ADVERTISE_ALL);
+		}
+
 		bmcr = 0;
 		switch (tp->link_config.speed) {
 		default:
-- 
cgit v1.2.2


From f2815633504b442ca0b0605c16bf3d88a3a0fcea Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Tue, 12 Mar 2013 15:53:23 +0000
Subject: sctp: Use correct sideffect command in duplicate cookie handling

When SCTP is done processing a duplicate cookie chunk, it tries
to delete a newly created association.  For that, it has to set
the right association for the side-effect processing to work.
However, when it uses the SCTP_CMD_NEW_ASOC command, that performs
more work then really needed (like hashing the associationa and
assigning it an id) and there is no point to do that only to
delete the association as a next step.  In fact, it also creates
an impossible condition where an association may be found by
the getsockopt() call, and that association is empty.  This
causes a crash in some sctp getsockopts.

The solution is rather simple.  We simply use SCTP_CMD_SET_ASOC
command that doesn't have all the overhead and does exactly
what we need.

Reported-by: Karl Heiss <kheiss@gmail.com>
Tested-by: Karl Heiss <kheiss@gmail.com>
CC: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 5131fcfedb03..de1a0138317f 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2082,7 +2082,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
 	}
 
 	/* Delete the tempory new association. */
-	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
+	sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 
 	/* Restore association pointer to provide SCTP command interpeter
-- 
cgit v1.2.2


From 2317f449af30073cfa6ec8352e4a65a89e357bdd Mon Sep 17 00:00:00 2001
From: Xufeng Zhang <xufeng.zhang@windriver.com>
Date: Thu, 7 Mar 2013 21:39:37 +0000
Subject: sctp: don't break the loop while meeting the active_path so as to
 find the matched transport

sctp_assoc_lookup_tsn() function searchs which transport a certain TSN
was sent on, if not found in the active_path transport, then go search
all the other transports in the peer's transport_addr_list, however, we
should continue to the next entry rather than break the loop when meet
the active_path transport.

Signed-off-by: Xufeng Zhang <xufeng.zhang@windriver.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 43cd0dd9149d..d2709e2b7be6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1079,7 +1079,7 @@ struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *asoc,
 			transports) {
 
 		if (transport == active)
-			break;
+			continue;
 		list_for_each_entry(chunk, &transport->transmitted,
 				transmitted_list) {
 			if (key == chunk->subh.data_hdr->tsn) {
-- 
cgit v1.2.2


From 5b9e12dbf92b441b37136ea71dac59f05f2673a9 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 13 Mar 2013 00:24:15 +0000
Subject: ipv4: fix definition of FIB_TABLE_HASHSZ

a long time ago by the commit

  commit 93456b6d7753def8760b423ac6b986eb9d5a4a95
  Author: Denis V. Lunev <den@openvz.org>
  Date:   Thu Jan 10 03:23:38 2008 -0800

    [IPV4]: Unify access to the routing tables.

the defenition of FIB_HASH_TABLE size has obtained wrong dependency:
it should depend upon CONFIG_IP_MULTIPLE_TABLES (as was in the original
code) but it was depended from CONFIG_IP_ROUTE_MULTIPATH

This patch returns the situation to the original state.

The problem was spotted by Tingwei Liu.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Tingwei Liu <tingw.liu@gmail.com>
CC: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9497be1ad4c0..e49db91593a9 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -152,18 +152,16 @@ struct fib_result_nl {
 };
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-
 #define FIB_RES_NH(res)		((res).fi->fib_nh[(res).nh_sel])
-
-#define FIB_TABLE_HASHSZ 2
-
 #else /* CONFIG_IP_ROUTE_MULTIPATH */
-
 #define FIB_RES_NH(res)		((res).fi->fib_nh[0])
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
+#ifdef CONFIG_IP_MULTIPLE_TABLES
 #define FIB_TABLE_HASHSZ 256
-
-#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+#else
+#define FIB_TABLE_HASHSZ 2
+#endif
 
 extern __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
 
-- 
cgit v1.2.2


From b701f16dd490d3f346724050f17d60beda094998 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Wed, 13 Mar 2013 02:25:17 +0000
Subject: net: qmi_wwan: set correct altsetting for Gobi 1K devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit bd877e4 ("net: qmi_wwan: use a single bind function for
all device types") made Gobi 1K devices fail probing.

Using the number of endpoints in the default altsetting to decide
whether the function use one or two interfaces is wrong.  Other
altsettings may provide more endpoints.

With Gobi 1K devices, USB interface #3's altsetting is 0 by default, but
altsetting 0 only provides one interrupt endpoint and is not sufficent
for QMI.  Altsetting 1 provides all 3 endpoints required for qmi_wwan
and works with QMI. Gobi 1K layout for intf#3 is:

    Interface Descriptor:  255/255/255
      bInterfaceNumber        3
      bAlternateSetting       0
      Endpoint Descriptor:  Interrupt IN
    Interface Descriptor:  255/255/255
      bInterfaceNumber        3
      bAlternateSetting       1
      Endpoint Descriptor:  Interrupt IN
      Endpoint Descriptor:  Bulk IN
      Endpoint Descriptor:  Bulk OUT

Prior to commit bd877e4, we would call usbnet_get_endpoints
before giving up finding enough endpoints. Removing the early
endpoint number test and the strict functional descriptor
requirement allow qmi_wwan_bind to continue until
usbnet_get_endpoints has made the final attempt to collect
endpoints.  This restores the behaviour from before commit
bd877e4 without losing the added benefit of using a single bind
function.

The driver has always required a CDC Union functional descriptor
for two-interface functions. Using the existence of this
descriptor to detect two-interface functions is the logically
correct method.

Reported-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Tested-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 49 +++++++++++++++-------------------------------
 1 file changed, 16 insertions(+), 33 deletions(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index efb5c7c33a28..968d5d50751d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -139,16 +139,9 @@ static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) < sizeof(struct qmi_wwan_state)));
 
-	/* control and data is shared? */
-	if (intf->cur_altsetting->desc.bNumEndpoints == 3) {
-		info->control = intf;
-		info->data = intf;
-		goto shared;
-	}
-
-	/* else require a single interrupt status endpoint on control intf */
-	if (intf->cur_altsetting->desc.bNumEndpoints != 1)
-		goto err;
+	/* set up initial state */
+	info->control = intf;
+	info->data = intf;
 
 	/* and a number of CDC descriptors */
 	while (len > 3) {
@@ -207,25 +200,14 @@ next_desc:
 		buf += h->bLength;
 	}
 
-	/* did we find all the required ones? */
-	if (!(found & (1 << USB_CDC_HEADER_TYPE)) ||
-	    !(found & (1 << USB_CDC_UNION_TYPE))) {
-		dev_err(&intf->dev, "CDC functional descriptors missing\n");
-		goto err;
-	}
-
-	/* verify CDC Union */
-	if (desc->bInterfaceNumber != cdc_union->bMasterInterface0) {
-		dev_err(&intf->dev, "bogus CDC Union: master=%u\n", cdc_union->bMasterInterface0);
-		goto err;
-	}
-
-	/* need to save these for unbind */
-	info->control = intf;
-	info->data = usb_ifnum_to_if(dev->udev,	cdc_union->bSlaveInterface0);
-	if (!info->data) {
-		dev_err(&intf->dev, "bogus CDC Union: slave=%u\n", cdc_union->bSlaveInterface0);
-		goto err;
+	/* Use separate control and data interfaces if we found a CDC Union */
+	if (cdc_union) {
+		info->data = usb_ifnum_to_if(dev->udev, cdc_union->bSlaveInterface0);
+		if (desc->bInterfaceNumber != cdc_union->bMasterInterface0 || !info->data) {
+			dev_err(&intf->dev, "bogus CDC Union: master=%u, slave=%u\n",
+				cdc_union->bMasterInterface0, cdc_union->bSlaveInterface0);
+			goto err;
+		}
 	}
 
 	/* errors aren't fatal - we can live with the dynamic address */
@@ -235,11 +217,12 @@ next_desc:
 	}
 
 	/* claim data interface and set it up */
-	status = usb_driver_claim_interface(driver, info->data, dev);
-	if (status < 0)
-		goto err;
+	if (info->control != info->data) {
+		status = usb_driver_claim_interface(driver, info->data, dev);
+		if (status < 0)
+			goto err;
+	}
 
-shared:
 	status = qmi_wwan_register_subdriver(dev);
 	if (status < 0 && info->control != info->data) {
 		usb_set_intfdata(info->data, NULL);
-- 
cgit v1.2.2


From 6a40cdd5440d7b61a349bc04e85eed4fa7c24a3c Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 5 Mar 2013 14:58:53 +0800
Subject: pinctrl: abx500: Fix checking if pin use AlternateFunction register

It's pointless to check "af.alt_bit1 == UNUSED" twice.
This looks like a copy-paste bug, I think what we want is to check if *both*
af.alt_bit1 and af.alt_bit2 are UNUSED.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Patrice Chotard <patrice.chotard@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-abx500.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-abx500.c b/drivers/pinctrl/pinctrl-abx500.c
index caecdd373061..c542a97c82f3 100644
--- a/drivers/pinctrl/pinctrl-abx500.c
+++ b/drivers/pinctrl/pinctrl-abx500.c
@@ -422,7 +422,7 @@ static u8 abx500_get_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip,
 	}
 
 	/* check if pin use AlternateFunction register */
-	if ((af.alt_bit1 == UNUSED) && (af.alt_bit1 == UNUSED))
+	if ((af.alt_bit1 == UNUSED) && (af.alt_bit2 == UNUSED))
 		return mode;
 	/*
 	 * if pin GPIOSEL bit is set and pin supports alternate function,
-- 
cgit v1.2.2


From 53ded8191e81507da0786ac45152eebb68d25d0c Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 13 Mar 2013 03:18:18 +0100
Subject: pinctrl: Print the correct information in debugfs pinconf-state file

A bad copy&paste resulted in the debugfs pinconf-state file printing the
pin name instead of the state name. Fix it.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c
index ac8d382a79bb..d611ecfcbf70 100644
--- a/drivers/pinctrl/pinconf.c
+++ b/drivers/pinctrl/pinconf.c
@@ -622,7 +622,7 @@ static const struct file_operations pinconf_dbg_pinname_fops = {
 static int pinconf_dbg_state_print(struct seq_file *s, void *d)
 {
 	if (strlen(dbg_state_name))
-		seq_printf(s, "%s\n", dbg_pinname);
+		seq_printf(s, "%s\n", dbg_state_name);
 	else
 		seq_printf(s, "No pin state set\n");
 	return 0;
-- 
cgit v1.2.2


From 5818a46a999ad9546e68e8765a3ca1d9d87f9b4a Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Wed, 13 Mar 2013 13:20:15 +0100
Subject: rt2x00: fix rt2x00 to work with the new ralink SoC config symbols

Since v3.9-rc1 the kernel has basic support for Ralink WiSoC. The config symbols
are named slightly different than before. Fix the rt2x00 to match the new
symbols.

The commit causing this breakage is:
commit ae2b5bb6570481b50a7175c64176b82da0a81836
Author: John Crispin <blogic@openwrt.org>
Date:   Sun Jan 20 22:05:30 2013 +0100
MIPS: ralink: adds Kbuild files

Signed-off-by: John Crispin <blogic@openwrt.org>
Acked-by: Gertjan van Wingerde <gwingerde@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rt2x00/Kconfig     |  4 ++--
 drivers/net/wireless/rt2x00/rt2800pci.c | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/rt2x00/Kconfig b/drivers/net/wireless/rt2x00/Kconfig
index 44d6ead43341..2bf4efa33186 100644
--- a/drivers/net/wireless/rt2x00/Kconfig
+++ b/drivers/net/wireless/rt2x00/Kconfig
@@ -55,10 +55,10 @@ config RT61PCI
 
 config RT2800PCI
 	tristate "Ralink rt27xx/rt28xx/rt30xx (PCI/PCIe/PCMCIA) support"
-	depends on PCI || RALINK_RT288X || RALINK_RT305X
+	depends on PCI || SOC_RT288X || SOC_RT305X
 	select RT2800_LIB
 	select RT2X00_LIB_PCI if PCI
-	select RT2X00_LIB_SOC if RALINK_RT288X || RALINK_RT305X
+	select RT2X00_LIB_SOC if SOC_RT288X || SOC_RT305X
 	select RT2X00_LIB_FIRMWARE
 	select RT2X00_LIB_CRYPTO
 	select CRC_CCITT
diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c
index 48a01aa21f1c..ded73da4de0b 100644
--- a/drivers/net/wireless/rt2x00/rt2800pci.c
+++ b/drivers/net/wireless/rt2x00/rt2800pci.c
@@ -89,7 +89,7 @@ static void rt2800pci_mcu_status(struct rt2x00_dev *rt2x00dev, const u8 token)
 	rt2x00pci_register_write(rt2x00dev, H2M_MAILBOX_CID, ~0);
 }
 
-#if defined(CONFIG_RALINK_RT288X) || defined(CONFIG_RALINK_RT305X)
+#if defined(CONFIG_SOC_RT288X) || defined(CONFIG_SOC_RT305X)
 static int rt2800pci_read_eeprom_soc(struct rt2x00_dev *rt2x00dev)
 {
 	void __iomem *base_addr = ioremap(0x1F040000, EEPROM_SIZE);
@@ -107,7 +107,7 @@ static inline int rt2800pci_read_eeprom_soc(struct rt2x00_dev *rt2x00dev)
 {
 	return -ENOMEM;
 }
-#endif /* CONFIG_RALINK_RT288X || CONFIG_RALINK_RT305X */
+#endif /* CONFIG_SOC_RT288X || CONFIG_SOC_RT305X */
 
 #ifdef CONFIG_PCI
 static void rt2800pci_eepromregister_read(struct eeprom_93cx6 *eeprom)
@@ -1177,7 +1177,7 @@ MODULE_DEVICE_TABLE(pci, rt2800pci_device_table);
 #endif /* CONFIG_PCI */
 MODULE_LICENSE("GPL");
 
-#if defined(CONFIG_RALINK_RT288X) || defined(CONFIG_RALINK_RT305X)
+#if defined(CONFIG_SOC_RT288X) || defined(CONFIG_SOC_RT305X)
 static int rt2800soc_probe(struct platform_device *pdev)
 {
 	return rt2x00soc_probe(pdev, &rt2800pci_ops);
@@ -1194,7 +1194,7 @@ static struct platform_driver rt2800soc_driver = {
 	.suspend	= rt2x00soc_suspend,
 	.resume		= rt2x00soc_resume,
 };
-#endif /* CONFIG_RALINK_RT288X || CONFIG_RALINK_RT305X */
+#endif /* CONFIG_SOC_RT288X || CONFIG_SOC_RT305X */
 
 #ifdef CONFIG_PCI
 static int rt2800pci_probe(struct pci_dev *pci_dev,
@@ -1217,7 +1217,7 @@ static int __init rt2800pci_init(void)
 {
 	int ret = 0;
 
-#if defined(CONFIG_RALINK_RT288X) || defined(CONFIG_RALINK_RT305X)
+#if defined(CONFIG_SOC_RT288X) || defined(CONFIG_SOC_RT305X)
 	ret = platform_driver_register(&rt2800soc_driver);
 	if (ret)
 		return ret;
@@ -1225,7 +1225,7 @@ static int __init rt2800pci_init(void)
 #ifdef CONFIG_PCI
 	ret = pci_register_driver(&rt2800pci_driver);
 	if (ret) {
-#if defined(CONFIG_RALINK_RT288X) || defined(CONFIG_RALINK_RT305X)
+#if defined(CONFIG_SOC_RT288X) || defined(CONFIG_SOC_RT305X)
 		platform_driver_unregister(&rt2800soc_driver);
 #endif
 		return ret;
@@ -1240,7 +1240,7 @@ static void __exit rt2800pci_exit(void)
 #ifdef CONFIG_PCI
 	pci_unregister_driver(&rt2800pci_driver);
 #endif
-#if defined(CONFIG_RALINK_RT288X) || defined(CONFIG_RALINK_RT305X)
+#if defined(CONFIG_SOC_RT288X) || defined(CONFIG_SOC_RT305X)
 	platform_driver_unregister(&rt2800soc_driver);
 #endif
 }
-- 
cgit v1.2.2


From 9437a248e7cac427c898bdb11bd1ac6844a1ead4 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Wed, 13 Mar 2013 10:28:13 -0500
Subject: rtlwifi: rtl8192cu: Fix problem that prevents reassociation

The driver was failing to clear the BSSID when a disconnect happened. That
prevented a reconnection. This problem is reported at
https://bugzilla.redhat.com/show_bug.cgi?id=789605,
https://bugzilla.redhat.com/show_bug.cgi?id=866786,
https://bugzilla.redhat.com/show_bug.cgi?id=906734, and
https://bugzilla.kernel.org/show_bug.cgi?id=46171.

Thanks to Jussi Kivilinna for making the critical observation
that led to the solution.

Reported-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Tested-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Tested-by: Alessandro Lannocca <alessandro.lannocca@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rtlwifi/rtl8192cu/hw.c | 87 ++++++++++++-----------------
 1 file changed, 35 insertions(+), 52 deletions(-)

diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
index 3c6e18c38e30..c08d0f4c5f3d 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
@@ -1376,75 +1376,58 @@ void rtl92cu_card_disable(struct ieee80211_hw *hw)
 }
 
 void rtl92cu_set_check_bssid(struct ieee80211_hw *hw, bool check_bssid)
-{
-	/* dummy routine needed for callback from rtl_op_configure_filter() */
-}
-
-/*========================================================================== */
-
-static void _rtl92cu_set_check_bssid(struct ieee80211_hw *hw,
-			      enum nl80211_iftype type)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	u32 reg_rcr = rtl_read_dword(rtlpriv, REG_RCR);
 	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
-	struct rtl_phy *rtlphy = &(rtlpriv->phy);
-	u8 filterout_non_associated_bssid = false;
+	u32 reg_rcr = rtl_read_dword(rtlpriv, REG_RCR);
 
-	switch (type) {
-	case NL80211_IFTYPE_ADHOC:
-	case NL80211_IFTYPE_STATION:
-		filterout_non_associated_bssid = true;
-		break;
-	case NL80211_IFTYPE_UNSPECIFIED:
-	case NL80211_IFTYPE_AP:
-	default:
-		break;
-	}
-	if (filterout_non_associated_bssid) {
+	if (rtlpriv->psc.rfpwr_state != ERFON)
+		return;
+
+	if (check_bssid) {
+		u8 tmp;
 		if (IS_NORMAL_CHIP(rtlhal->version)) {
-			switch (rtlphy->current_io_type) {
-			case IO_CMD_RESUME_DM_BY_SCAN:
-				reg_rcr |= (RCR_CBSSID_DATA | RCR_CBSSID_BCN);
-				rtlpriv->cfg->ops->set_hw_reg(hw,
-						 HW_VAR_RCR, (u8 *)(&reg_rcr));
-				/* enable update TSF */
-				_rtl92cu_set_bcn_ctrl_reg(hw, 0, BIT(4));
-				break;
-			case IO_CMD_PAUSE_DM_BY_SCAN:
-				reg_rcr &= ~(RCR_CBSSID_DATA | RCR_CBSSID_BCN);
-				rtlpriv->cfg->ops->set_hw_reg(hw,
-						 HW_VAR_RCR, (u8 *)(&reg_rcr));
-				/* disable update TSF */
-				_rtl92cu_set_bcn_ctrl_reg(hw, BIT(4), 0);
-				break;
-			}
+			reg_rcr |= (RCR_CBSSID_DATA | RCR_CBSSID_BCN);
+			tmp = BIT(4);
 		} else {
-			reg_rcr |= (RCR_CBSSID);
-			rtlpriv->cfg->ops->set_hw_reg(hw, HW_VAR_RCR,
-						      (u8 *)(&reg_rcr));
-			_rtl92cu_set_bcn_ctrl_reg(hw, 0, (BIT(4)|BIT(5)));
+			reg_rcr |= RCR_CBSSID;
+			tmp = BIT(4) | BIT(5);
 		}
-	} else if (filterout_non_associated_bssid == false) {
+		rtlpriv->cfg->ops->set_hw_reg(hw, HW_VAR_RCR,
+					      (u8 *) (&reg_rcr));
+		_rtl92cu_set_bcn_ctrl_reg(hw, 0, tmp);
+	} else {
+		u8 tmp;
 		if (IS_NORMAL_CHIP(rtlhal->version)) {
-			reg_rcr &= (~(RCR_CBSSID_DATA | RCR_CBSSID_BCN));
-			rtlpriv->cfg->ops->set_hw_reg(hw, HW_VAR_RCR,
-						      (u8 *)(&reg_rcr));
-			_rtl92cu_set_bcn_ctrl_reg(hw, BIT(4), 0);
+			reg_rcr &= ~(RCR_CBSSID_DATA | RCR_CBSSID_BCN);
+			tmp = BIT(4);
 		} else {
-			reg_rcr &= (~RCR_CBSSID);
-			rtlpriv->cfg->ops->set_hw_reg(hw, HW_VAR_RCR,
-						      (u8 *)(&reg_rcr));
-			_rtl92cu_set_bcn_ctrl_reg(hw, (BIT(4)|BIT(5)), 0);
+			reg_rcr &= ~RCR_CBSSID;
+			tmp = BIT(4) | BIT(5);
 		}
+		reg_rcr &= (~(RCR_CBSSID_DATA | RCR_CBSSID_BCN));
+		rtlpriv->cfg->ops->set_hw_reg(hw,
+					      HW_VAR_RCR, (u8 *) (&reg_rcr));
+		_rtl92cu_set_bcn_ctrl_reg(hw, tmp, 0);
 	}
 }
 
+/*========================================================================== */
+
 int rtl92cu_set_network_type(struct ieee80211_hw *hw, enum nl80211_iftype type)
 {
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+
 	if (_rtl92cu_set_media_status(hw, type))
 		return -EOPNOTSUPP;
-	_rtl92cu_set_check_bssid(hw, type);
+
+	if (rtlpriv->mac80211.link_state == MAC80211_LINKED) {
+		if (type != NL80211_IFTYPE_AP)
+			rtl92cu_set_check_bssid(hw, true);
+	} else {
+		rtl92cu_set_check_bssid(hw, false);
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From bf4d7be57ba9040347065f48a60f895a254f6e28 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Wed, 13 Mar 2013 17:13:46 +0530
Subject: pinctrl: generic: Fix compilation error

The function definition of pinconf_generic_dump_config is defined
under CONFIG_DEBUG_FS macro. Define the declaration too under this macro.

Without this patch we get the following build error:
drivers/built-in.o: In function `pcs_pinconf_config_dbg_show':
drivers/pinctrl/pinctrl-single.c:726: undefined reference to
`pinconf_generic_dump_config'

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinconf.h b/drivers/pinctrl/pinconf.h
index e3ed8cb072a5..bfda73d64eed 100644
--- a/drivers/pinctrl/pinconf.h
+++ b/drivers/pinctrl/pinconf.h
@@ -90,7 +90,7 @@ static inline void pinconf_init_device_debugfs(struct dentry *devroot,
  * pin config.
  */
 
-#ifdef CONFIG_GENERIC_PINCONF
+#if defined(CONFIG_GENERIC_PINCONF) && defined(CONFIG_DEBUG_FS)
 
 void pinconf_generic_dump_pin(struct pinctrl_dev *pctldev,
 			      struct seq_file *s, unsigned pin);
-- 
cgit v1.2.2


From 47c78f4a70d791ff44cab3254b489605a52e3181 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Mon, 11 Mar 2013 13:08:49 +0000
Subject: cifs: map NT_STATUS_SHARING_VIOLATION to EBUSY instead of ETXTBSY

NT_SHARING_VIOLATION errors are mapped to ETXTBSY which is unexpected
for operations such as unlink where we can hit these errors.

The patch maps the error NT_SHARING_VIOLATION to EBUSY instead. The
patch also replaces all instances of ETXTBSY in
cifs_rename_pending_delete() with EBUSY.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c   | 10 ++++------
 fs/cifs/netmisc.c |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 0079696305c9..20887bf63121 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1043,7 +1043,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
 				   cifs_sb->mnt_cifs_flags &
 					    CIFS_MOUNT_MAP_SPECIAL_CHR);
 	if (rc != 0) {
-		rc = -ETXTBSY;
+		rc = -EBUSY;
 		goto undo_setattr;
 	}
 
@@ -1062,7 +1062,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
 		if (rc == -ENOENT)
 			rc = 0;
 		else if (rc != 0) {
-			rc = -ETXTBSY;
+			rc = -EBUSY;
 			goto undo_rename;
 		}
 		cifsInode->delete_pending = true;
@@ -1169,15 +1169,13 @@ psx_del_no_retry:
 			cifs_drop_nlink(inode);
 	} else if (rc == -ENOENT) {
 		d_drop(dentry);
-	} else if (rc == -ETXTBSY) {
+	} else if (rc == -EBUSY) {
 		if (server->ops->rename_pending_delete) {
 			rc = server->ops->rename_pending_delete(full_path,
 								dentry, xid);
 			if (rc == 0)
 				cifs_drop_nlink(inode);
 		}
-		if (rc == -ETXTBSY)
-			rc = -EBUSY;
 	} else if ((rc == -EACCES) && (dosattr == 0) && inode) {
 		attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
 		if (attrs == NULL) {
@@ -1518,7 +1516,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
 	 * source. Note that cross directory moves do not work with
 	 * rename by filehandle to various Windows servers.
 	 */
-	if (rc == 0 || rc != -ETXTBSY)
+	if (rc == 0 || rc != -EBUSY)
 		goto do_rename_exit;
 
 	/* open-file renames don't work across directories */
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index a82bc51fdc82..c0b25b28be6c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -62,7 +62,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
 	{ERRdiffdevice, -EXDEV},
 	{ERRnofiles, -ENOENT},
 	{ERRwriteprot, -EROFS},
-	{ERRbadshare, -ETXTBSY},
+	{ERRbadshare, -EBUSY},
 	{ERRlock, -EACCES},
 	{ERRunsup, -EINVAL},
 	{ERRnosuchshare, -ENXIO},
-- 
cgit v1.2.2


From 24261fc23db950951760d00c188ba63cc756b932 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mguzik@redhat.com>
Date: Fri, 8 Mar 2013 16:30:03 +0100
Subject: cifs: delay super block destruction until all cifsFileInfo objects
 are gone

cifsFileInfo objects hold references to dentries and it is possible that
these will still be around in workqueues when VFS decides to kill super
block during unmount.

This results in panics like this one:
BUG: Dentry ffff88001f5e76c0{i=66b4a,n=1M-2} still in use (1) [unmount of cifs cifs]
------------[ cut here ]------------
kernel BUG at fs/dcache.c:943!
[..]
Process umount (pid: 1781, threadinfo ffff88003d6e8000, task ffff880035eeaec0)
[..]
Call Trace:
 [<ffffffff811b44f3>] shrink_dcache_for_umount+0x33/0x60
 [<ffffffff8119f7fc>] generic_shutdown_super+0x2c/0xe0
 [<ffffffff8119f946>] kill_anon_super+0x16/0x30
 [<ffffffffa036623a>] cifs_kill_sb+0x1a/0x30 [cifs]
 [<ffffffff8119fcc7>] deactivate_locked_super+0x57/0x80
 [<ffffffff811a085e>] deactivate_super+0x4e/0x70
 [<ffffffff811bb417>] mntput_no_expire+0xd7/0x130
 [<ffffffff811bc30c>] sys_umount+0x9c/0x3c0
 [<ffffffff81657c19>] system_call_fastpath+0x16/0x1b

Fix this by making each cifsFileInfo object hold a reference to cifs
super block, which implicitly keeps VFS super block around as well.

Signed-off-by: Mateusz Guzik <mguzik@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Cc: <stable@vger.kernel.org>
Reported-and-Tested-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 24 ++++++++++++++++++++++++
 fs/cifs/cifsfs.h |  4 ++++
 fs/cifs/file.c   |  6 +++++-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1a052c0eee8e..054b90b682a7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -91,6 +91,30 @@ struct workqueue_struct	*cifsiod_wq;
 __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
 #endif
 
+/*
+ * Bumps refcount for cifs super block.
+ * Note that it should be only called if a referece to VFS super block is
+ * already held, e.g. in open-type syscalls context. Otherwise it can race with
+ * atomic_dec_and_test in deactivate_locked_super.
+ */
+void
+cifs_sb_active(struct super_block *sb)
+{
+	struct cifs_sb_info *server = CIFS_SB(sb);
+
+	if (atomic_inc_return(&server->active) == 1)
+		atomic_inc(&sb->s_active);
+}
+
+void
+cifs_sb_deactive(struct super_block *sb)
+{
+	struct cifs_sb_info *server = CIFS_SB(sb);
+
+	if (atomic_dec_and_test(&server->active))
+		deactivate_super(sb);
+}
+
 static int
 cifs_read_super(struct super_block *sb)
 {
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 7163419cecd9..0e32c3446ce9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -41,6 +41,10 @@ extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;
 extern const struct address_space_operations cifs_addr_ops_smallbuf;
 
+/* Functions related to super block operations */
+extern void cifs_sb_active(struct super_block *sb);
+extern void cifs_sb_deactive(struct super_block *sb);
+
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8c0d85577314..7a0dd99e4507 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -300,6 +300,8 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	mutex_init(&cfile->fh_mutex);
 
+	cifs_sb_active(inode->i_sb);
+
 	/*
 	 * If the server returned a read oplock and we have mandatory brlocks,
 	 * set oplock level to None.
@@ -349,7 +351,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
-	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct super_block *sb = inode->i_sb;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	struct cifsLockInfo *li, *tmp;
 	struct cifs_fid fid;
 	struct cifs_pending_open open;
@@ -414,6 +417,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 
 	cifs_put_tlink(cifs_file->tlink);
 	dput(cifs_file->dentry);
+	cifs_sb_deactive(sb);
 	kfree(cifs_file);
 }
 
-- 
cgit v1.2.2


From 1e825efb571a87d039cb3fe7ca833a25f11c7cb9 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 26 Feb 2013 16:02:02 +1100
Subject: perf annotate: Fix build with NO_NEWT=1

Commit 18c9e5c "Make it to be able to skip unannotatable symbols" broke
the build with NO_NEWT=1:

   CC builtin-annotate.o
builtin-annotate.c: In function 'hists__find_annotations':
builtin-annotate.c:161:4: error: duplicate case value
builtin-annotate.c:154:4: error: previously used here
make: *** [builtin-annotate.o] Error 1

This is because without NEWT support K_LEFT is #defined to -1 in
utils/hist.h

Fix it by shifting the K_LEFT/K_RIGHT #defines out of the likely range
of error values.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1361854923-1814-1-git-send-email-michael@ellerman.id.au
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 38624686ee9a..609a115e35cf 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -208,8 +208,8 @@ static inline int script_browse(const char *script_opt __maybe_unused)
 	return 0;
 }
 
-#define K_LEFT -1
-#define K_RIGHT -2
+#define K_LEFT  -1000
+#define K_RIGHT -2000
 #endif
 
 #ifdef GTK2_SUPPORT
-- 
cgit v1.2.2


From 5f7439e07822942f32b9e0a66f4a3cc9c3e29e67 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Tue, 26 Feb 2013 16:02:03 +1100
Subject: perf report: Fix build with NO_NEWT=1

Commit ad0de09 "Enable the runtime switching of perf data file" broke
the build with NO_NEWT=1:

    CC builtin-report.o
builtin-report.c: In function '__cmd_report':
builtin-report.c:479:15: error: 'K_SWITCH_INPUT_DATA' undeclared (first use in this function)
builtin-report.c:479:15: note: each undeclared identifier is reported only once for each function it appears in
builtin-report.c: In function 'cmd_report':
builtin-report.c:823:13: error: 'K_SWITCH_INPUT_DATA' undeclared (first use in this function)
make: *** [builtin-report.o] Error 1

Fix it by adding a dummy definition of K_SWITCH_INPUT_DATA.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1361854923-1814-2-git-send-email-michael@ellerman.id.au
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 609a115e35cf..226a4ae2f936 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -210,6 +210,7 @@ static inline int script_browse(const char *script_opt __maybe_unused)
 
 #define K_LEFT  -1000
 #define K_RIGHT -2000
+#define K_SWITCH_INPUT_DATA -3000
 #endif
 
 #ifdef GTK2_SUPPORT
-- 
cgit v1.2.2


From d2f32479e5526a1ab3b4e43910fcb279871524ce Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Sun, 17 Feb 2013 16:03:36 +0100
Subject: perf tools: check if -DFORTIFY_SOURCE=2 is allowed

It seems gcc (4.7.2) defines _FORTIFY_SOURCE internally and becomes
confused when it sees another definition in flags.

For me, build failed like this:

CHK glibc
Makefile:548: *** No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static. Stop.

and only with V=1 it printed:

<command-line>:0:0: error: "_FORTIFY_SOURCE" redefined [-Werror]
<stdin>:1:0: note: this is the location of the previous definition

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1361113416-8662-1-git-send-email-marcin.slusarz@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index a2108ca1cc17..bb74c79cd16e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -95,7 +95,7 @@ ifeq ("$(origin DEBUG)", "command line")
   PERF_DEBUG = $(DEBUG)
 endif
 ifndef PERF_DEBUG
-  CFLAGS_OPTIMIZE = -O6 -D_FORTIFY_SOURCE=2
+  CFLAGS_OPTIMIZE = -O6
 endif
 
 ifdef PARSER_DEBUG
@@ -180,6 +180,12 @@ ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -Wvolatile-register-var,-W
        CFLAGS := $(CFLAGS) -Wvolatile-register-var
 endif
 
+ifndef PERF_DEBUG
+	ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -D_FORTIFY_SOURCE=2,-D_FORTIFY_SOURCE=2),y)
+		CFLAGS := $(CFLAGS) -D_FORTIFY_SOURCE=2
+	endif
+endif
+
 ### --- END CONFIGURATION SECTION ---
 
 ifeq ($(srctree),)
-- 
cgit v1.2.2


From e4dd45fe7add0de09e0e5b2b511732d9c86b6ee7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Mon, 25 Feb 2013 10:52:48 +0100
Subject: perf record: Fix -C option

Currently the -C option does not work for record command, because of the
targets mismatch when synthesizing threads.

Fixing this by using proper target interface for the synthesize
decision.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1361785972-7431-2-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 774c90713a53..f1a939ebc19c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -573,13 +573,15 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 					 perf_event__synthesize_guest_os, tool);
 	}
 
-	if (!opts->target.system_wide)
+	if (perf_target__has_task(&opts->target))
 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
 						  process_synthesized_event,
 						  machine);
-	else
+	else if (perf_target__has_cpu(&opts->target))
 		err = perf_event__synthesize_threads(tool, process_synthesized_event,
 					       machine);
+	else /* command specified */
+		err = 0;
 
 	if (err != 0)
 		goto out_delete_session;
-- 
cgit v1.2.2


From b9e8c37220c80e78289a1e87b50c09418eb59a7e Mon Sep 17 00:00:00 2001
From: Jack Mitchell <jack.mitchell@dbbroadcast.co.uk>
Date: Fri, 8 Mar 2013 11:21:52 +0000
Subject: libtraceevent: Remove hard coded include to /usr/local/include in
 Makefile

having /usr/local/include hardcoded into the makefile is not necessary
as this is automatically included by GCC. It also infects cross-compile
builds with the host systems includes.

Signed-off-by: Jack Mitchell <jack.mitchell@dbbroadcast.co.uk>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1362741712-21308-1-git-send-email-ml@communistcode.co.uk
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index a20e32033431..0b0a90787db6 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -122,7 +122,7 @@ export Q VERBOSE
 
 EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
 
-INCLUDES = -I. -I/usr/local/include $(CONFIG_INCLUDES)
+INCLUDES = -I. $(CONFIG_INCLUDES)
 
 # Set compile option CFLAGS if not set elsewhere
 CFLAGS ?= -g -Wall
-- 
cgit v1.2.2


From 79146a69c8bc3f28e51c5267633abc6babf47a31 Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Tue, 12 Mar 2013 14:32:17 +0530
Subject: perf probe: Fix segfault

Fix segfault in perf probe due to a bug introduced by commit d8639f068
(perf tools: Stop using 'self' in strlist).

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20130312090217.GC4668@in.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/strlist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
index 55433aa42c8f..eabdce0a2daa 100644
--- a/tools/perf/util/strlist.c
+++ b/tools/perf/util/strlist.c
@@ -143,7 +143,7 @@ struct strlist *strlist__new(bool dupstr, const char *list)
 		slist->rblist.node_delete = strlist__node_delete;
 
 		slist->dupstr	 = dupstr;
-		if (slist && strlist__parse_list(slist, list) != 0)
+		if (list && strlist__parse_list(slist, list) != 0)
 			goto out_error;
 	}
 
-- 
cgit v1.2.2


From 3bf7b07ece6e00747602938f68c1db8001b9925f Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 5 Mar 2013 21:48:26 -0800
Subject: perf/POWER7: Create a sysfs format entry for Power7 events

Create a sysfs entry, '/sys/bus/event_source/devices/cpu/format/event'
which describes the format of the POWER7 PMU events.

This code is based on corresponding code in x86.

Changelog[v4]:  [Michael Ellerman, Paul Mckerras] The event format is different
		for other POWER cpus. So move the code to POWER7-specific,
		power7-pmu.c Also, the POWER7 format uses bits 0-19 not 0-20.

Changelog[v2]: [Jiri Osla] Use PMU_FORMAT_ATTR rather than duplicating code.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Michael Ellerman <michael@ellerman.id.au>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anton Blanchard <anton@au1.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: benh@kernel.crashing.org
Cc: linuxppc-dev@ozlabs.org
Link: http://lkml.kernel.org/r/20130306054826.GA14627@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/powerpc/perf/power7-pmu.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index b554879bd31e..3c475d6267c7 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -420,7 +420,20 @@ static struct attribute_group power7_pmu_events_group = {
 	.attrs = power7_events_attr,
 };
 
+PMU_FORMAT_ATTR(event, "config:0-19");
+
+static struct attribute *power7_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+struct attribute_group power7_pmu_format_group = {
+	.name = "format",
+	.attrs = power7_pmu_format_attr,
+};
+
 static const struct attribute_group *power7_pmu_attr_groups[] = {
+	&power7_pmu_format_group,
 	&power7_pmu_events_group,
 	NULL,
 };
-- 
cgit v1.2.2


From 2563a4524febe8f4a98e717e02436d1aaf672aa2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 11 Mar 2013 12:25:19 -0700
Subject: drm/i915: restrict kernel address leak in debugfs

Masks kernel address info-leak in object dumps with the %pK suffix,
so they cannot be used to target kernel memory corruption attacks if
the kptr_restrict sysctl is set.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index aae31489c893..7299ea45dd03 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -103,7 +103,7 @@ static const char *cache_level_str(int type)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
-	seq_printf(m, "%p: %s%s %8zdKiB %02x %02x %d %d %d%s%s%s",
+	seq_printf(m, "%pK: %s%s %8zdKiB %02x %02x %d %d %d%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
-- 
cgit v1.2.2


From 3118a4f652c7b12c752f3222af0447008f9b2368 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 11 Mar 2013 17:31:45 -0700
Subject: drm/i915: bounds check execbuffer relocation count

It is possible to wrap the counter used to allocate the buffer for
relocation copies. This could lead to heap writing overflows.

CVE-2013-0913

v3: collapse test, improve comment
v2: move check into validate_exec_list

Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Pinkie Pie
Cc: stable@vger.kernel.org
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2f2daebd0eef..3b11ab0fbc96 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -732,6 +732,8 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 		   int count)
 {
 	int i;
+	int relocs_total = 0;
+	int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
 
 	for (i = 0; i < count; i++) {
 		char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
@@ -740,10 +742,13 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
 			return -EINVAL;
 
-		/* First check for malicious input causing overflow */
-		if (exec[i].relocation_count >
-		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
+		/* First check for malicious input causing overflow in
+		 * the worst case where we need to allocate the entire
+		 * relocation tree as a single array.
+		 */
+		if (exec[i].relocation_count > relocs_max - relocs_total)
 			return -EINVAL;
+		relocs_total += exec[i].relocation_count;
 
 		length = exec[i].relocation_count *
 			sizeof(struct drm_i915_gem_relocation_entry);
-- 
cgit v1.2.2


From 740466bc89ad8bd5afcc8de220f715f62b21e365 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 13 Mar 2013 11:15:19 -0400
Subject: tracing: Fix free of probe entry by calling call_rcu_sched()

Because function tracing is very invasive, and can even trace
calls to rcu_read_lock(), RCU access in function tracing is done
with preempt_disable_notrace(). This requires a synchronize_sched()
for updates and not a synchronize_rcu().

Function probes (traceon, traceoff, etc) must be freed after
a synchronize_sched() after its entry has been removed from the
hash. But call_rcu() is used. Fix this by using call_rcu_sched().

Also fix the usage to use hlist_del_rcu() instead of hlist_del().

Cc: stable@vger.kernel.org
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 98ca94a41819..e6effd0c40a9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3108,8 +3108,8 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 					continue;
 			}
 
-			hlist_del(&entry->node);
-			call_rcu(&entry->rcu, ftrace_free_entry_rcu);
+			hlist_del_rcu(&entry->node);
+			call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu);
 		}
 	}
 	__disable_ftrace_function_probe();
-- 
cgit v1.2.2


From 0fbd95aa8a056194933fba4ae78c50fc20f0704e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 16:51:35 -0700
Subject: workqueue: relocate pwq_set_max_active()

pwq_set_max_active() is gonna be modified and used during
pool_workqueue init.  Move it above init_and_link_pwq().

This patch is pure code reorganization and doesn't introduce any
functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f5c8bbb9ada3..d51928981615 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3725,6 +3725,26 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 		kfree(wq);
 }
 
+/**
+ * pwq_set_max_active - adjust max_active of a pwq
+ * @pwq: target pool_workqueue
+ * @max_active: new max_active value.
+ *
+ * Set @pwq->max_active to @max_active and activate delayed works if
+ * increased.
+ *
+ * CONTEXT:
+ * spin_lock_irq(pool->lock).
+ */
+static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
+{
+	pwq->max_active = max_active;
+
+	while (!list_empty(&pwq->delayed_works) &&
+	       pwq->nr_active < pwq->max_active)
+		pwq_activate_first_delayed(pwq);
+}
+
 static void init_and_link_pwq(struct pool_workqueue *pwq,
 			      struct workqueue_struct *wq,
 			      struct worker_pool *pool,
@@ -4011,26 +4031,6 @@ void destroy_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
-/**
- * pwq_set_max_active - adjust max_active of a pwq
- * @pwq: target pool_workqueue
- * @max_active: new max_active value.
- *
- * Set @pwq->max_active to @max_active and activate delayed works if
- * increased.
- *
- * CONTEXT:
- * spin_lock_irq(pool->lock).
- */
-static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
-{
-	pwq->max_active = max_active;
-
-	while (!list_empty(&pwq->delayed_works) &&
-	       pwq->nr_active < pwq->max_active)
-		pwq_activate_first_delayed(pwq);
-}
-
 /**
  * workqueue_set_max_active - adjust max_active of a workqueue
  * @wq: target workqueue
-- 
cgit v1.2.2


From 699ce097efe8f45bc5c055e4f12cb1e271c270d9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 16:51:35 -0700
Subject: workqueue: implement and use pwq_adjust_max_active()

Rename pwq_set_max_active() to pwq_adjust_max_active() and move
pool_workqueue->max_active synchronization and max_active
determination logic into it.

The new function should be called with workqueue_lock held for stable
workqueue->saved_max_active, determines the current max_active value
the target pool_workqueue should be using from @wq->saved_max_active
and the state of the associated pool, and applies it with proper
synchronization.

The current two users - workqueue_set_max_active() and
thaw_workqueues() - are updated accordingly.  In addition, the manual
freezing handling in __alloc_workqueue_key() and
freeze_workqueues_begin() are replaced with calls to
pwq_adjust_max_active().

This centralizes max_active handling so that it's less error-prone.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 83 +++++++++++++++++++++++++-----------------------------
 1 file changed, 38 insertions(+), 45 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d51928981615..9e2ec4ca415a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3726,23 +3726,38 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 }
 
 /**
- * pwq_set_max_active - adjust max_active of a pwq
+ * pwq_adjust_max_active - update a pwq's max_active to the current setting
  * @pwq: target pool_workqueue
- * @max_active: new max_active value.
- *
- * Set @pwq->max_active to @max_active and activate delayed works if
- * increased.
  *
- * CONTEXT:
- * spin_lock_irq(pool->lock).
+ * If @pwq isn't freezing, set @pwq->max_active to the associated
+ * workqueue's saved_max_active and activate delayed work items
+ * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.
  */
-static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
+static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 {
-	pwq->max_active = max_active;
+	struct workqueue_struct *wq = pwq->wq;
+	bool freezable = wq->flags & WQ_FREEZABLE;
+
+	/* for @wq->saved_max_active */
+	lockdep_assert_held(&workqueue_lock);
+
+	/* fast exit for non-freezable wqs */
+	if (!freezable && pwq->max_active == wq->saved_max_active)
+		return;
+
+	spin_lock(&pwq->pool->lock);
+
+	if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) {
+		pwq->max_active = wq->saved_max_active;
 
-	while (!list_empty(&pwq->delayed_works) &&
-	       pwq->nr_active < pwq->max_active)
-		pwq_activate_first_delayed(pwq);
+		while (!list_empty(&pwq->delayed_works) &&
+		       pwq->nr_active < pwq->max_active)
+			pwq_activate_first_delayed(pwq);
+	} else {
+		pwq->max_active = 0;
+	}
+
+	spin_unlock(&pwq->pool->lock);
 }
 
 static void init_and_link_pwq(struct pool_workqueue *pwq,
@@ -3932,15 +3947,14 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		goto err_destroy;
 
 	/*
-	 * workqueue_lock protects global freeze state and workqueues
-	 * list.  Grab it, set max_active accordingly and add the new
-	 * workqueue to workqueues list.
+	 * workqueue_lock protects global freeze state and workqueues list.
+	 * Grab it, adjust max_active and add the new workqueue to
+	 * workqueues list.
 	 */
 	spin_lock_irq(&workqueue_lock);
 
-	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
-		for_each_pwq(pwq, wq)
-			pwq->max_active = 0;
+	for_each_pwq(pwq, wq)
+		pwq_adjust_max_active(pwq);
 
 	list_add(&wq->list, &workqueues);
 
@@ -4055,17 +4069,8 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	wq->saved_max_active = max_active;
 
-	for_each_pwq(pwq, wq) {
-		struct worker_pool *pool = pwq->pool;
-
-		spin_lock(&pool->lock);
-
-		if (!(wq->flags & WQ_FREEZABLE) ||
-		    !(pool->flags & POOL_FREEZING))
-			pwq_set_max_active(pwq, max_active);
-
-		spin_unlock(&pool->lock);
-	}
+	for_each_pwq(pwq, wq)
+		pwq_adjust_max_active(pwq);
 
 	spin_unlock_irq(&workqueue_lock);
 }
@@ -4358,14 +4363,8 @@ void freeze_workqueues_begin(void)
 
 	/* suppress further executions by setting max_active to zero */
 	list_for_each_entry(wq, &workqueues, list) {
-		if (!(wq->flags & WQ_FREEZABLE))
-			continue;
-
-		for_each_pwq(pwq, wq) {
-			spin_lock(&pwq->pool->lock);
-			pwq->max_active = 0;
-			spin_unlock(&pwq->pool->lock);
-		}
+		for_each_pwq(pwq, wq)
+			pwq_adjust_max_active(pwq);
 	}
 
 	spin_unlock_irq(&workqueue_lock);
@@ -4445,14 +4444,8 @@ void thaw_workqueues(void)
 
 	/* restore max_active and repopulate worklist */
 	list_for_each_entry(wq, &workqueues, list) {
-		if (!(wq->flags & WQ_FREEZABLE))
-			continue;
-
-		for_each_pwq(pwq, wq) {
-			spin_lock(&pwq->pool->lock);
-			pwq_set_max_active(pwq, wq->saved_max_active);
-			spin_unlock(&pwq->pool->lock);
-		}
+		for_each_pwq(pwq, wq)
+			pwq_adjust_max_active(pwq);
 	}
 
 	/* kick workers */
-- 
cgit v1.2.2


From 983ca25e738ee0c9c5435a503a6bb0034d4552b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 16:51:35 -0700
Subject: workqueue: fix max_active handling in init_and_link_pwq()

Since 9e8cd2f589 ("workqueue: implement apply_workqueue_attrs()"),
init_and_link_pwq() may be called to initialize a new pool_workqueue
for a workqueue which is already online, but the function was setting
pwq->max_active to wq->saved_max_active without proper
synchronization.

Fix it by calling pwq_adjust_max_active() under proper locking instead
of manually setting max_active.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9e2ec4ca415a..756761480a1a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3771,21 +3771,25 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	pwq->wq = wq;
 	pwq->flush_color = -1;
 	pwq->refcnt = 1;
-	pwq->max_active = wq->saved_max_active;
 	INIT_LIST_HEAD(&pwq->delayed_works);
 	INIT_LIST_HEAD(&pwq->mayday_node);
 	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 
-	/*
-	 * Link @pwq and set the matching work_color.  This is synchronized
-	 * with flush_mutex to avoid confusing flush_workqueue().
-	 */
 	mutex_lock(&wq->flush_mutex);
 	spin_lock_irq(&workqueue_lock);
 
+	/*
+	 * Set the matching work_color.  This is synchronized with
+	 * flush_mutex to avoid confusing flush_workqueue().
+	 */
 	if (p_last_pwq)
 		*p_last_pwq = first_pwq(wq);
 	pwq->work_color = wq->work_color;
+
+	/* sync max_active to the current setting */
+	pwq_adjust_max_active(pwq);
+
+	/* link in @pwq */
 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 
 	spin_unlock_irq(&workqueue_lock);
-- 
cgit v1.2.2


From c5aa87bbf4b23f5e4f167489406daeb0ed275c47 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 16:51:36 -0700
Subject: workqueue: update comments and a warning message

* Update incorrect and add missing synchronization labels.

* Update incorrect or misleading comments.  Add new comments where
  clarification is necessary.  Reformat / rephrase some comments.

* drain_workqueue() can be used separately from destroy_workqueue()
  but its warning message was incorrectly referring to destruction.

Other than the warning message change, this patch doesn't make any
functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 84 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 756761480a1a..248a1e95b577 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -145,7 +145,7 @@ struct worker_pool {
 	struct timer_list	idle_timer;	/* L: worker idle timeout */
 	struct timer_list	mayday_timer;	/* L: SOS timer for workers */
 
-	/* workers are chained either in busy_hash or idle_list */
+	/* a workers is either on busy_hash or idle_list, or the manager */
 	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
 						/* L: hash of busy workers */
 
@@ -154,8 +154,8 @@ struct worker_pool {
 	struct ida		worker_ida;	/* L: for worker IDs */
 
 	struct workqueue_attrs	*attrs;		/* I: worker attributes */
-	struct hlist_node	hash_node;	/* R: unbound_pool_hash node */
-	int			refcnt;		/* refcnt for unbound pools */
+	struct hlist_node	hash_node;	/* W: unbound_pool_hash node */
+	int			refcnt;		/* W: refcnt for unbound pools */
 
 	/*
 	 * The current concurrency level.  As it's likely to be accessed
@@ -213,8 +213,8 @@ struct wq_flusher {
 struct wq_device;
 
 /*
- * The externally visible workqueue abstraction is an array of
- * per-CPU workqueues:
+ * The externally visible workqueue.  It relays the issued work items to
+ * the appropriate worker_pool through its pool_workqueues.
  */
 struct workqueue_struct {
 	unsigned int		flags;		/* W: WQ_* flags */
@@ -247,9 +247,10 @@ struct workqueue_struct {
 
 static struct kmem_cache *pwq_cache;
 
-/* hash of all unbound pools keyed by pool->attrs */
+/* W: hash of all unbound pools keyed by pool->attrs */
 static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
 
+/* I: attributes used when instantiating standard unbound pools on demand */
 static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
 
 struct workqueue_struct *system_wq __read_mostly;
@@ -434,16 +435,13 @@ static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
 static bool workqueue_freezing;		/* W: have wqs started freezing? */
 
-/*
- * The CPU and unbound standard worker pools.  The unbound ones have
- * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set.
- */
+/* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
 				     cpu_worker_pools);
 
 /*
- * idr of all pools.  Modifications are protected by workqueue_lock.  Read
- * accesses are protected by sched-RCU protected.
+ * R: idr of all pools.  Modifications are protected by workqueue_lock.
+ * Read accesses are protected by sched-RCU protected.
  */
 static DEFINE_IDR(worker_pool_idr);
 
@@ -890,13 +888,12 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
  * recycled work item as currently executing and make it wait until the
  * current execution finishes, introducing an unwanted dependency.
  *
- * This function checks the work item address, work function and workqueue
- * to avoid false positives.  Note that this isn't complete as one may
- * construct a work function which can introduce dependency onto itself
- * through a recycled work item.  Well, if somebody wants to shoot oneself
- * in the foot that badly, there's only so much we can do, and if such
- * deadlock actually occurs, it should be easy to locate the culprit work
- * function.
+ * This function checks the work item address and work function to avoid
+ * false positives.  Note that this isn't complete as one may construct a
+ * work function which can introduce dependency onto itself through a
+ * recycled work item.  Well, if somebody wants to shoot oneself in the
+ * foot that badly, there's only so much we can do, and if such deadlock
+ * actually occurs, it should be easy to locate the culprit work function.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
@@ -1187,9 +1184,9 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
 	get_pwq(pwq);
 
 	/*
-	 * Ensure either worker_sched_deactivated() sees the above
-	 * list_add_tail() or we see zero nr_running to avoid workers
-	 * lying around lazily while there are works to be processed.
+	 * Ensure either wq_worker_sleeping() sees the above
+	 * list_add_tail() or we see zero nr_running to avoid workers lying
+	 * around lazily while there are works to be processed.
 	 */
 	smp_mb();
 
@@ -1790,6 +1787,10 @@ static struct worker *create_worker(struct worker_pool *pool)
 	if (IS_ERR(worker->task))
 		goto fail;
 
+	/*
+	 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
+	 * online CPUs.  It'll be re-applied when any of the CPUs come up.
+	 */
 	set_user_nice(worker->task, pool->attrs->nice);
 	set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
 
@@ -1950,8 +1951,8 @@ static void pool_mayday_timeout(unsigned long __pool)
  * sent to all rescuers with works scheduled on @pool to resolve
  * possible allocation deadlock.
  *
- * On return, need_to_create_worker() is guaranteed to be false and
- * may_start_working() true.
+ * On return, need_to_create_worker() is guaranteed to be %false and
+ * may_start_working() %true.
  *
  * LOCKING:
  * spin_lock_irq(pool->lock) which may be released and regrabbed
@@ -1959,7 +1960,7 @@ static void pool_mayday_timeout(unsigned long __pool)
  * manager.
  *
  * RETURNS:
- * false if no action was taken and pool->lock stayed locked, true
+ * %false if no action was taken and pool->lock stayed locked, %true
  * otherwise.
  */
 static bool maybe_create_worker(struct worker_pool *pool)
@@ -2016,7 +2017,7 @@ restart:
  * multiple times.  Called only from manager.
  *
  * RETURNS:
- * false if no action was taken and pool->lock stayed locked, true
+ * %false if no action was taken and pool->lock stayed locked, %true
  * otherwise.
  */
 static bool maybe_destroy_workers(struct worker_pool *pool)
@@ -2268,11 +2269,11 @@ static void process_scheduled_works(struct worker *worker)
  * worker_thread - the worker thread function
  * @__worker: self
  *
- * The worker thread function.  There are NR_CPU_WORKER_POOLS dynamic pools
- * of these per each cpu.  These workers process all works regardless of
- * their specific target workqueue.  The only exception is works which
- * belong to workqueues with a rescuer which will be explained in
- * rescuer_thread().
+ * The worker thread function.  All workers belong to a worker_pool -
+ * either a per-cpu one or dynamic unbound one.  These workers process all
+ * work items regardless of their specific target workqueue.  The only
+ * exception is work items which belong to workqueues with a rescuer which
+ * will be explained in rescuer_thread().
  */
 static int worker_thread(void *__worker)
 {
@@ -2600,11 +2601,8 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
  * flush_workqueue - ensure that any scheduled work has run to completion.
  * @wq: workqueue to flush
  *
- * Forces execution of the workqueue and blocks until its completion.
- * This is typically used in driver shutdown handlers.
- *
- * We sleep until all works which were queued on entry have been handled,
- * but we are not livelocked by new incoming ones.
+ * This function sleeps until all work items which were queued on entry
+ * have finished execution, but it is not livelocked by new incoming ones.
  */
 void flush_workqueue(struct workqueue_struct *wq)
 {
@@ -2794,7 +2792,7 @@ reflush:
 
 		if (++flush_cnt == 10 ||
 		    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
-			pr_warn("workqueue %s: flush on destruction isn't complete after %u tries\n",
+			pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
 				wq->name, flush_cnt);
 
 		local_irq_enable();
@@ -3576,7 +3574,9 @@ static void rcu_free_pool(struct rcu_head *rcu)
  * @pool: worker_pool to put
  *
  * Put @pool.  If its refcnt reaches zero, it gets destroyed in sched-RCU
- * safe manner.
+ * safe manner.  get_unbound_pool() calls this function on its failure path
+ * and this function should be able to release pools which went through,
+ * successfully or not, init_worker_pool().
  */
 static void put_unbound_pool(struct worker_pool *pool)
 {
@@ -3602,7 +3602,11 @@ static void put_unbound_pool(struct worker_pool *pool)
 
 	spin_unlock_irq(&workqueue_lock);
 
-	/* lock out manager and destroy all workers */
+	/*
+	 * Become the manager and destroy all workers.  Grabbing
+	 * manager_arb prevents @pool's workers from blocking on
+	 * manager_mutex.
+	 */
 	mutex_lock(&pool->manager_arb);
 	spin_lock_irq(&pool->lock);
 
@@ -4339,7 +4343,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  * freeze_workqueues_begin - begin freezing workqueues
  *
  * Start freezing workqueues.  After this function returns, all freezable
- * workqueues will queue new works to their frozen_works list instead of
+ * workqueues will queue new works to their delayed_works list instead of
  * pool->worklist.
  *
  * CONTEXT:
-- 
cgit v1.2.2


From 611c92a0203091bb022edec7e2d8b765fe148622 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 16:51:36 -0700
Subject: workqueue: rename @id to @pi in for_each_each_pool()

Rename @id argument of for_each_pool() to @pi so that it doesn't get
reused accidentally when for_each_pool() is used in combination with
other iterators.

This patch is purely cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 248a1e95b577..147fc5a784f0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -283,7 +283,7 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 /**
  * for_each_pool - iterate through all worker_pools in the system
  * @pool: iteration cursor
- * @id: integer used for iteration
+ * @pi: integer used for iteration
  *
  * This must be called either with workqueue_lock held or sched RCU read
  * locked.  If the pool needs to be used beyond the locking in effect, the
@@ -292,8 +292,8 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
  * The if/else clause exists only for the lockdep assertion and can be
  * ignored.
  */
-#define for_each_pool(pool, id)						\
-	idr_for_each_entry(&worker_pool_idr, pool, id)			\
+#define for_each_pool(pool, pi)						\
+	idr_for_each_entry(&worker_pool_idr, pool, pi)			\
 		if (({ assert_rcu_or_wq_lock(); false; })) { }		\
 		else
 
@@ -4354,7 +4354,7 @@ void freeze_workqueues_begin(void)
 	struct worker_pool *pool;
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
-	int id;
+	int pi;
 
 	spin_lock_irq(&workqueue_lock);
 
@@ -4362,7 +4362,7 @@ void freeze_workqueues_begin(void)
 	workqueue_freezing = true;
 
 	/* set FREEZING */
-	for_each_pool(pool, id) {
+	for_each_pool(pool, pi) {
 		spin_lock(&pool->lock);
 		WARN_ON_ONCE(pool->flags & POOL_FREEZING);
 		pool->flags |= POOL_FREEZING;
@@ -4435,7 +4435,7 @@ void thaw_workqueues(void)
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
 	struct worker_pool *pool;
-	int id;
+	int pi;
 
 	spin_lock_irq(&workqueue_lock);
 
@@ -4443,7 +4443,7 @@ void thaw_workqueues(void)
 		goto out_unlock;
 
 	/* clear FREEZING */
-	for_each_pool(pool, id) {
+	for_each_pool(pool, pi) {
 		spin_lock(&pool->lock);
 		WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
 		pool->flags &= ~POOL_FREEZING;
@@ -4457,7 +4457,7 @@ void thaw_workqueues(void)
 	}
 
 	/* kick workers */
-	for_each_pool(pool, id) {
+	for_each_pool(pool, pi) {
 		spin_lock(&pool->lock);
 		wake_up_worker(pool);
 		spin_unlock(&pool->lock);
-- 
cgit v1.2.2


From 8425e3d5bdbe8e741d2c73cf3189ed59b4038b84 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 16:51:36 -0700
Subject: workqueue: inline trivial wrappers

There's no reason to make these trivial wrappers full (exported)
functions.  Inline the followings.

 queue_work()
 queue_delayed_work()
 mod_delayed_work()
 schedule_work_on()
 schedule_work()
 schedule_delayed_work_on()
 schedule_delayed_work()
 keventd_up()

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 123 +++++++++++++++++++++++++++++++++++++++++-----
 kernel/workqueue.c        | 111 -----------------------------------------
 2 files changed, 111 insertions(+), 123 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index df30763c8682..835d12b76960 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -417,28 +417,16 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
-extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work);
 extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
-extern bool queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *work, unsigned long delay);
 extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *dwork, unsigned long delay);
-extern bool mod_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *dwork, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern bool schedule_work_on(int cpu, struct work_struct *work);
-extern bool schedule_work(struct work_struct *work);
-extern bool schedule_delayed_work_on(int cpu, struct delayed_work *work,
-				     unsigned long delay);
-extern bool schedule_delayed_work(struct delayed_work *work,
-				  unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
-extern int keventd_up(void);
 
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
@@ -455,6 +443,117 @@ extern bool current_is_workqueue_rescuer(void);
 extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 
+/**
+ * queue_work - queue work on a workqueue
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns %false if @work was already on a queue, %true otherwise.
+ *
+ * We queue the work to the CPU on which it was submitted, but if the CPU dies
+ * it can be processed by another CPU.
+ */
+static inline bool queue_work(struct workqueue_struct *wq,
+			      struct work_struct *work)
+{
+	return queue_work_on(WORK_CPU_UNBOUND, wq, work);
+}
+
+/**
+ * queue_delayed_work - queue work on a workqueue after delay
+ * @wq: workqueue to use
+ * @dwork: delayable work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * Equivalent to queue_delayed_work_on() but tries to use the local CPU.
+ */
+static inline bool queue_delayed_work(struct workqueue_struct *wq,
+				      struct delayed_work *dwork,
+				      unsigned long delay)
+{
+	return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+
+/**
+ * mod_delayed_work - modify delay of or queue a delayed work
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * mod_delayed_work_on() on local CPU.
+ */
+static inline bool mod_delayed_work(struct workqueue_struct *wq,
+				    struct delayed_work *dwork,
+				    unsigned long delay)
+{
+	return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+
+/**
+ * schedule_work_on - put work task on a specific cpu
+ * @cpu: cpu to put the work task on
+ * @work: job to be done
+ *
+ * This puts a job on a specific cpu
+ */
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+	return queue_work_on(cpu, system_wq, work);
+}
+
+/**
+ * schedule_work - put work task in global workqueue
+ * @work: job to be done
+ *
+ * Returns %false if @work was already on the kernel-global workqueue and
+ * %true otherwise.
+ *
+ * This puts a job in the kernel-global workqueue if it was not already
+ * queued and leaves it in the same position on the kernel-global
+ * workqueue otherwise.
+ */
+static inline bool schedule_work(struct work_struct *work)
+{
+	return queue_work(system_wq, work);
+}
+
+/**
+ * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
+ * @cpu: cpu to use
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue on the specified CPU.
+ */
+static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+					    unsigned long delay)
+{
+	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
+}
+
+/**
+ * schedule_delayed_work - put work task in global workqueue after delay
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait or 0 for immediate execution
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ */
+static inline bool schedule_delayed_work(struct delayed_work *dwork,
+					 unsigned long delay)
+{
+	return queue_delayed_work(system_wq, dwork, delay);
+}
+
+/**
+ * keventd_up - is workqueue initialized yet?
+ */
+static inline bool keventd_up(void)
+{
+	return system_wq != NULL;
+}
+
 /*
  * Like above, but uses del_timer() instead of del_timer_sync(). This means,
  * if it returns 0 the timer function may be running and the queueing is in
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 147fc5a784f0..f37421fb4f35 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1340,22 +1340,6 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_work_on);
 
-/**
- * queue_work - queue work on a workqueue
- * @wq: workqueue to use
- * @work: work to queue
- *
- * Returns %false if @work was already on a queue, %true otherwise.
- *
- * We queue the work to the CPU on which it was submitted, but if the CPU dies
- * it can be processed by another CPU.
- */
-bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
-{
-	return queue_work_on(WORK_CPU_UNBOUND, wq, work);
-}
-EXPORT_SYMBOL_GPL(queue_work);
-
 void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
@@ -1430,21 +1414,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-/**
- * queue_delayed_work - queue work on a workqueue after delay
- * @wq: workqueue to use
- * @dwork: delayable work to queue
- * @delay: number of jiffies to wait before queueing
- *
- * Equivalent to queue_delayed_work_on() but tries to use the local CPU.
- */
-bool queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *dwork, unsigned long delay)
-{
-	return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
-}
-EXPORT_SYMBOL_GPL(queue_delayed_work);
-
 /**
  * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
  * @cpu: CPU number to execute work on
@@ -1483,21 +1452,6 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(mod_delayed_work_on);
 
-/**
- * mod_delayed_work - modify delay of or queue a delayed work
- * @wq: workqueue to use
- * @dwork: work to queue
- * @delay: number of jiffies to wait before queueing
- *
- * mod_delayed_work_on() on local CPU.
- */
-bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
-		      unsigned long delay)
-{
-	return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
-}
-EXPORT_SYMBOL_GPL(mod_delayed_work);
-
 /**
  * worker_enter_idle - enter idle state
  * @worker: worker which is entering idle state
@@ -3001,66 +2955,6 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-/**
- * schedule_work_on - put work task on a specific cpu
- * @cpu: cpu to put the work task on
- * @work: job to be done
- *
- * This puts a job on a specific cpu
- */
-bool schedule_work_on(int cpu, struct work_struct *work)
-{
-	return queue_work_on(cpu, system_wq, work);
-}
-EXPORT_SYMBOL(schedule_work_on);
-
-/**
- * schedule_work - put work task in global workqueue
- * @work: job to be done
- *
- * Returns %false if @work was already on the kernel-global workqueue and
- * %true otherwise.
- *
- * This puts a job in the kernel-global workqueue if it was not already
- * queued and leaves it in the same position on the kernel-global
- * workqueue otherwise.
- */
-bool schedule_work(struct work_struct *work)
-{
-	return queue_work(system_wq, work);
-}
-EXPORT_SYMBOL(schedule_work);
-
-/**
- * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
- * @cpu: cpu to use
- * @dwork: job to be done
- * @delay: number of jiffies to wait
- *
- * After waiting for a given time this puts a job in the kernel-global
- * workqueue on the specified CPU.
- */
-bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
-			      unsigned long delay)
-{
-	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
-}
-EXPORT_SYMBOL(schedule_delayed_work_on);
-
-/**
- * schedule_delayed_work - put work task in global workqueue after delay
- * @dwork: job to be done
- * @delay: number of jiffies to wait or 0 for immediate execution
- *
- * After waiting for a given time this puts a job in the kernel-global
- * workqueue.
- */
-bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
-{
-	return queue_delayed_work(system_wq, dwork, delay);
-}
-EXPORT_SYMBOL(schedule_delayed_work);
-
 /**
  * schedule_on_each_cpu - execute a function synchronously on each online CPU
  * @func: the function to call
@@ -3154,11 +3048,6 @@ int execute_in_process_context(work_func_t fn, struct execute_work *ew)
 }
 EXPORT_SYMBOL_GPL(execute_in_process_context);
 
-int keventd_up(void)
-{
-	return system_wq != NULL;
-}
-
 #ifdef CONFIG_SYSFS
 /*
  * Workqueues with WQ_SYSFS flag set is visible to userland via
-- 
cgit v1.2.2


From bc3a1afc92aea46d6df18d38e5d15867b17c69f6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 19:47:39 -0700
Subject: workqueue: rename worker_pool->assoc_mutex to ->manager_mutex

Manager operations are currently governed by two mutexes -
pool->manager_arb and ->assoc_mutex.  The former is used to decide who
gets to be the manager and the latter to exclude the actual manager
operations including creation and destruction of workers.  Anyone who
grabs ->manager_arb must perform manager role; otherwise, the pool
might stall.

Grabbing ->assoc_mutex blocks everyone else from performing manager
operations but doesn't require the holder to perform manager duties as
it's merely blocking manager operations without becoming the manager.

Because the blocking was necessary when [dis]associating per-cpu
workqueues during CPU hotplug events, the latter was named
assoc_mutex.  The mutex is scheduled to be used for other purposes, so
this patch gives it a more fitting generic name - manager_mutex - and
updates / adds comments to explain synchronization around the manager
role and operations.

This patch is pure rename / doc update.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 62 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 38 insertions(+), 24 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f37421fb4f35..bc25bdfb4b42 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -60,8 +60,8 @@ enum {
 	 * %WORKER_UNBOUND set and concurrency management disabled, and may
 	 * be executing on any CPU.  The pool behaves as an unbound one.
 	 *
-	 * Note that DISASSOCIATED can be flipped only while holding
-	 * assoc_mutex to avoid changing binding state while
+	 * Note that DISASSOCIATED should be flipped only while holding
+	 * manager_mutex to avoid changing binding state while
 	 * create_worker() is in progress.
 	 */
 	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
@@ -149,8 +149,9 @@ struct worker_pool {
 	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
 						/* L: hash of busy workers */
 
+	/* see manage_workers() for details on the two manager mutexes */
 	struct mutex		manager_arb;	/* manager arbitration */
-	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
+	struct mutex		manager_mutex;	/* manager exclusion */
 	struct ida		worker_ida;	/* L: for worker IDs */
 
 	struct workqueue_attrs	*attrs;		/* I: worker attributes */
@@ -1635,7 +1636,7 @@ static void rebind_workers(struct worker_pool *pool)
 	struct worker *worker, *n;
 	int i;
 
-	lockdep_assert_held(&pool->assoc_mutex);
+	lockdep_assert_held(&pool->manager_mutex);
 	lockdep_assert_held(&pool->lock);
 
 	/* dequeue and kick idle ones */
@@ -2022,31 +2023,44 @@ static bool manage_workers(struct worker *worker)
 	struct worker_pool *pool = worker->pool;
 	bool ret = false;
 
+	/*
+	 * Managership is governed by two mutexes - manager_arb and
+	 * manager_mutex.  manager_arb handles arbitration of manager role.
+	 * Anyone who successfully grabs manager_arb wins the arbitration
+	 * and becomes the manager.  mutex_trylock() on pool->manager_arb
+	 * failure while holding pool->lock reliably indicates that someone
+	 * else is managing the pool and the worker which failed trylock
+	 * can proceed to executing work items.  This means that anyone
+	 * grabbing manager_arb is responsible for actually performing
+	 * manager duties.  If manager_arb is grabbed and released without
+	 * actual management, the pool may stall indefinitely.
+	 *
+	 * manager_mutex is used for exclusion of actual management
+	 * operations.  The holder of manager_mutex can be sure that none
+	 * of management operations, including creation and destruction of
+	 * workers, won't take place until the mutex is released.  Because
+	 * manager_mutex doesn't interfere with manager role arbitration,
+	 * it is guaranteed that the pool's management, while may be
+	 * delayed, won't be disturbed by someone else grabbing
+	 * manager_mutex.
+	 */
 	if (!mutex_trylock(&pool->manager_arb))
 		return ret;
 
 	/*
-	 * To simplify both worker management and CPU hotplug, hold off
-	 * management while hotplug is in progress.  CPU hotplug path can't
-	 * grab @pool->manager_arb to achieve this because that can lead to
-	 * idle worker depletion (all become busy thinking someone else is
-	 * managing) which in turn can result in deadlock under extreme
-	 * circumstances.  Use @pool->assoc_mutex to synchronize manager
-	 * against CPU hotplug.
-	 *
-	 * assoc_mutex would always be free unless CPU hotplug is in
-	 * progress.  trylock first without dropping @pool->lock.
+	 * With manager arbitration won, manager_mutex would be free in
+	 * most cases.  trylock first without dropping @pool->lock.
 	 */
-	if (unlikely(!mutex_trylock(&pool->assoc_mutex))) {
+	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
 		spin_unlock_irq(&pool->lock);
-		mutex_lock(&pool->assoc_mutex);
+		mutex_lock(&pool->manager_mutex);
 		/*
 		 * CPU hotplug could have happened while we were waiting
 		 * for assoc_mutex.  Hotplug itself can't handle us
 		 * because manager isn't either on idle or busy list, and
 		 * @pool's state and ours could have deviated.
 		 *
-		 * As hotplug is now excluded via assoc_mutex, we can
+		 * As hotplug is now excluded via manager_mutex, we can
 		 * simply try to bind.  It will succeed or fail depending
 		 * on @pool's current state.  Try it and adjust
 		 * %WORKER_UNBOUND accordingly.
@@ -2068,7 +2082,7 @@ static bool manage_workers(struct worker *worker)
 	ret |= maybe_destroy_workers(pool);
 	ret |= maybe_create_worker(pool);
 
-	mutex_unlock(&pool->assoc_mutex);
+	mutex_unlock(&pool->manager_mutex);
 	mutex_unlock(&pool->manager_arb);
 	return ret;
 }
@@ -3436,7 +3450,7 @@ static int init_worker_pool(struct worker_pool *pool)
 		    (unsigned long)pool);
 
 	mutex_init(&pool->manager_arb);
-	mutex_init(&pool->assoc_mutex);
+	mutex_init(&pool->manager_mutex);
 	ida_init(&pool->worker_ida);
 
 	INIT_HLIST_NODE(&pool->hash_node);
@@ -4076,11 +4090,11 @@ static void wq_unbind_fn(struct work_struct *work)
 	for_each_cpu_worker_pool(pool, cpu) {
 		WARN_ON_ONCE(cpu != smp_processor_id());
 
-		mutex_lock(&pool->assoc_mutex);
+		mutex_lock(&pool->manager_mutex);
 		spin_lock_irq(&pool->lock);
 
 		/*
-		 * We've claimed all manager positions.  Make all workers
+		 * We've blocked all manager operations.  Make all workers
 		 * unbound and set DISASSOCIATED.  Before this, all workers
 		 * except for the ones which are still executing works from
 		 * before the last CPU down must be on the cpu.  After
@@ -4095,7 +4109,7 @@ static void wq_unbind_fn(struct work_struct *work)
 		pool->flags |= POOL_DISASSOCIATED;
 
 		spin_unlock_irq(&pool->lock);
-		mutex_unlock(&pool->assoc_mutex);
+		mutex_unlock(&pool->manager_mutex);
 	}
 
 	/*
@@ -4152,14 +4166,14 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
 		for_each_cpu_worker_pool(pool, cpu) {
-			mutex_lock(&pool->assoc_mutex);
+			mutex_lock(&pool->manager_mutex);
 			spin_lock_irq(&pool->lock);
 
 			pool->flags &= ~POOL_DISASSOCIATED;
 			rebind_workers(pool);
 
 			spin_unlock_irq(&pool->lock);
-			mutex_unlock(&pool->assoc_mutex);
+			mutex_unlock(&pool->manager_mutex);
 		}
 		break;
 	}
-- 
cgit v1.2.2


From ebf44d16ec4619c8a8daeacd987dd86d420ea2c3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 19:47:39 -0700
Subject: workqueue: factor out initial worker creation into
 create_and_start_worker()

get_unbound_pool(), workqueue_cpu_up_callback() and init_workqueues()
have similar code pieces to create and start the initial worker factor
those out into create_and_start_worker().

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 47 +++++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bc25bdfb4b42..cac710646cbc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1792,6 +1792,26 @@ static void start_worker(struct worker *worker)
 	wake_up_process(worker->task);
 }
 
+/**
+ * create_and_start_worker - create and start a worker for a pool
+ * @pool: the target pool
+ *
+ * Create and start a new worker for @pool.
+ */
+static int create_and_start_worker(struct worker_pool *pool)
+{
+	struct worker *worker;
+
+	worker = create_worker(pool);
+	if (worker) {
+		spin_lock_irq(&pool->lock);
+		start_worker(worker);
+		spin_unlock_irq(&pool->lock);
+	}
+
+	return worker ? 0 : -ENOMEM;
+}
+
 /**
  * destroy_worker - destroy a workqueue worker
  * @worker: worker to be destroyed
@@ -3542,7 +3562,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 	static DEFINE_MUTEX(create_mutex);
 	u32 hash = wqattrs_hash(attrs);
 	struct worker_pool *pool;
-	struct worker *worker;
 
 	mutex_lock(&create_mutex);
 
@@ -3568,14 +3587,9 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 		goto fail;
 
 	/* create and start the initial worker */
-	worker = create_worker(pool);
-	if (!worker)
+	if (create_and_start_worker(pool) < 0)
 		goto fail;
 
-	spin_lock_irq(&pool->lock);
-	start_worker(worker);
-	spin_unlock_irq(&pool->lock);
-
 	/* install */
 	spin_lock_irq(&workqueue_lock);
 	hash_add(unbound_pool_hash, &pool->hash_node, hash);
@@ -4148,18 +4162,10 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
 		for_each_cpu_worker_pool(pool, cpu) {
-			struct worker *worker;
-
 			if (pool->nr_workers)
 				continue;
-
-			worker = create_worker(pool);
-			if (!worker)
+			if (create_and_start_worker(pool) < 0)
 				return NOTIFY_BAD;
-
-			spin_lock_irq(&pool->lock);
-			start_worker(worker);
-			spin_unlock_irq(&pool->lock);
 		}
 		break;
 
@@ -4409,15 +4415,8 @@ static int __init init_workqueues(void)
 		struct worker_pool *pool;
 
 		for_each_cpu_worker_pool(pool, cpu) {
-			struct worker *worker;
-
 			pool->flags &= ~POOL_DISASSOCIATED;
-
-			worker = create_worker(pool);
-			BUG_ON(!worker);
-			spin_lock_irq(&pool->lock);
-			start_worker(worker);
-			spin_unlock_irq(&pool->lock);
+			BUG_ON(create_and_start_worker(pool) < 0);
 		}
 	}
 
-- 
cgit v1.2.2


From cd549687a7ee5e619a26f55af4059c4ae585811c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 19:47:39 -0700
Subject: workqueue: better define locking rules around worker creation /
 destruction

When a manager creates or destroys workers, the operations are always
done with the manager_mutex held; however, initial worker creation or
worker destruction during pool release don't grab the mutex.  They are
still correct as initial worker creation doesn't require
synchronization and grabbing manager_arb provides enough exclusion for
pool release path.

Still, let's make everyone follow the same rules for consistency and
such that lockdep annotations can be added.

Update create_and_start_worker() and put_unbound_pool() to grab
manager_mutex around thread creation and destruction respectively and
add lockdep assertions to create_worker() and destroy_worker().

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cac710646cbc..ce1ab069c5fe 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1715,6 +1715,8 @@ static struct worker *create_worker(struct worker_pool *pool)
 	struct worker *worker = NULL;
 	int id = -1;
 
+	lockdep_assert_held(&pool->manager_mutex);
+
 	spin_lock_irq(&pool->lock);
 	while (ida_get_new(&pool->worker_ida, &id)) {
 		spin_unlock_irq(&pool->lock);
@@ -1796,12 +1798,14 @@ static void start_worker(struct worker *worker)
  * create_and_start_worker - create and start a worker for a pool
  * @pool: the target pool
  *
- * Create and start a new worker for @pool.
+ * Grab the managership of @pool and create and start a new worker for it.
  */
 static int create_and_start_worker(struct worker_pool *pool)
 {
 	struct worker *worker;
 
+	mutex_lock(&pool->manager_mutex);
+
 	worker = create_worker(pool);
 	if (worker) {
 		spin_lock_irq(&pool->lock);
@@ -1809,6 +1813,8 @@ static int create_and_start_worker(struct worker_pool *pool)
 		spin_unlock_irq(&pool->lock);
 	}
 
+	mutex_unlock(&pool->manager_mutex);
+
 	return worker ? 0 : -ENOMEM;
 }
 
@@ -1826,6 +1832,9 @@ static void destroy_worker(struct worker *worker)
 	struct worker_pool *pool = worker->pool;
 	int id = worker->id;
 
+	lockdep_assert_held(&pool->manager_mutex);
+	lockdep_assert_held(&pool->lock);
+
 	/* sanity check frenzy */
 	if (WARN_ON(worker->current_work) ||
 	    WARN_ON(!list_empty(&worker->scheduled)))
@@ -3531,6 +3540,7 @@ static void put_unbound_pool(struct worker_pool *pool)
 	 * manager_mutex.
 	 */
 	mutex_lock(&pool->manager_arb);
+	mutex_lock(&pool->manager_mutex);
 	spin_lock_irq(&pool->lock);
 
 	while ((worker = first_worker(pool)))
@@ -3538,6 +3548,7 @@ static void put_unbound_pool(struct worker_pool *pool)
 	WARN_ON(pool->nr_workers || pool->nr_idle);
 
 	spin_unlock_irq(&pool->lock);
+	mutex_unlock(&pool->manager_mutex);
 	mutex_unlock(&pool->manager_arb);
 
 	/* shut down the timers */
-- 
cgit v1.2.2


From 7d19c5ce6682fd0390049b5340d4b6bb6065d677 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 19:47:40 -0700
Subject: workqueue: relocate global variable defs and function decls in
 workqueue.c

They're split across debugobj code for some reason.  Collect them.

This patch is pure relocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ce1ab069c5fe..9a0cbb2fdd64 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -248,6 +248,21 @@ struct workqueue_struct {
 
 static struct kmem_cache *pwq_cache;
 
+/* Serializes the accesses to the list of workqueues. */
+static DEFINE_SPINLOCK(workqueue_lock);
+static LIST_HEAD(workqueues);
+static bool workqueue_freezing;		/* W: have wqs started freezing? */
+
+/* the per-cpu worker pools */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
+				     cpu_worker_pools);
+
+/*
+ * R: idr of all pools.  Modifications are protected by workqueue_lock.
+ * Read accesses are protected by sched-RCU protected.
+ */
+static DEFINE_IDR(worker_pool_idr);
+
 /* W: hash of all unbound pools keyed by pool->attrs */
 static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
 
@@ -265,6 +280,10 @@ EXPORT_SYMBOL_GPL(system_unbound_wq);
 struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
 
+static int worker_thread(void *__worker);
+static void copy_workqueue_attrs(struct workqueue_attrs *to,
+				 const struct workqueue_attrs *from);
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
 
@@ -431,25 +450,6 @@ static inline void debug_work_activate(struct work_struct *work) { }
 static inline void debug_work_deactivate(struct work_struct *work) { }
 #endif
 
-/* Serializes the accesses to the list of workqueues. */
-static DEFINE_SPINLOCK(workqueue_lock);
-static LIST_HEAD(workqueues);
-static bool workqueue_freezing;		/* W: have wqs started freezing? */
-
-/* the per-cpu worker pools */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
-				     cpu_worker_pools);
-
-/*
- * R: idr of all pools.  Modifications are protected by workqueue_lock.
- * Read accesses are protected by sched-RCU protected.
- */
-static DEFINE_IDR(worker_pool_idr);
-
-static int worker_thread(void *__worker);
-static void copy_workqueue_attrs(struct workqueue_attrs *to,
-				 const struct workqueue_attrs *from);
-
 /* allocate ID and assign it to @pool */
 static int worker_pool_assign_id(struct worker_pool *pool)
 {
-- 
cgit v1.2.2


From 5bcab3355a555a9c1bd4becb136cbd3651c8eafa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 19:47:40 -0700
Subject: workqueue: separate out pool and workqueue locking into wq_mutex

Currently, workqueue_lock protects most shared workqueue resources -
the pools, workqueues, pool_workqueues, draining, ID assignments,
mayday handling and so on.  The coverage has grown organically and
there is no identified bottleneck coming from workqueue_lock, but it
has grown a bit too much and scheduled rebinding changes need the
pools and workqueues to be protected by a mutex instead of a spinlock.

This patch breaks out pool and workqueue synchronization from
workqueue_lock into a new mutex - wq_mutex.  The followings are
protected by wq_mutex.

* worker_pool_idr and unbound_pool_hash
* pool->refcnt
* workqueues list
* workqueue->flags, ->nr_drainers

Most changes are mostly straight-forward.  workqueue_lock is replaced
with wq_mutex where applicable and workqueue_lock lock/unlocks are
added where wq_mutex conversion leaves data structures not protected
by wq_mutex without locking.  irq / preemption flippings were added
where the conversion affects them.  Things worth noting are

* New WQ and WR locking lables added along with
  assert_rcu_or_wq_mutex().

* worker_pool_assign_id() now expects to be called under wq_mutex.

* create_mutex is removed from get_unbound_pool().  It now just holds
  wq_mutex.

This patch shouldn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 146 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 77 insertions(+), 69 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9a0cbb2fdd64..c3b59ff22007 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -119,9 +119,11 @@ enum {
  *
  * F: wq->flush_mutex protected.
  *
- * W: workqueue_lock protected.
+ * WQ: wq_mutex protected.
+ *
+ * WR: wq_mutex protected for writes.  Sched-RCU protected for reads.
  *
- * R: workqueue_lock protected for writes.  Sched-RCU protected for reads.
+ * W: workqueue_lock protected.
  *
  * FR: wq->flush_mutex and workqueue_lock protected for writes.  Sched-RCU
  *     protected for reads.
@@ -155,8 +157,8 @@ struct worker_pool {
 	struct ida		worker_ida;	/* L: for worker IDs */
 
 	struct workqueue_attrs	*attrs;		/* I: worker attributes */
-	struct hlist_node	hash_node;	/* W: unbound_pool_hash node */
-	int			refcnt;		/* W: refcnt for unbound pools */
+	struct hlist_node	hash_node;	/* WQ: unbound_pool_hash node */
+	int			refcnt;		/* WQ: refcnt for unbound pools */
 
 	/*
 	 * The current concurrency level.  As it's likely to be accessed
@@ -218,10 +220,10 @@ struct wq_device;
  * the appropriate worker_pool through its pool_workqueues.
  */
 struct workqueue_struct {
-	unsigned int		flags;		/* W: WQ_* flags */
+	unsigned int		flags;		/* WQ: WQ_* flags */
 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
 	struct list_head	pwqs;		/* FR: all pwqs of this wq */
-	struct list_head	list;		/* W: list of all workqueues */
+	struct list_head	list;		/* WQ: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
 	int			work_color;	/* F: current work color */
@@ -234,7 +236,7 @@ struct workqueue_struct {
 	struct list_head	maydays;	/* W: pwqs requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
-	int			nr_drainers;	/* W: drain in progress */
+	int			nr_drainers;	/* WQ: drain in progress */
 	int			saved_max_active; /* W: saved pwq max_active */
 
 #ifdef CONFIG_SYSFS
@@ -248,22 +250,19 @@ struct workqueue_struct {
 
 static struct kmem_cache *pwq_cache;
 
-/* Serializes the accesses to the list of workqueues. */
+static DEFINE_MUTEX(wq_mutex);		/* protects workqueues and pools */
 static DEFINE_SPINLOCK(workqueue_lock);
-static LIST_HEAD(workqueues);
-static bool workqueue_freezing;		/* W: have wqs started freezing? */
+
+static LIST_HEAD(workqueues);		/* WQ: list of all workqueues */
+static bool workqueue_freezing;		/* WQ: have wqs started freezing? */
 
 /* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
 				     cpu_worker_pools);
 
-/*
- * R: idr of all pools.  Modifications are protected by workqueue_lock.
- * Read accesses are protected by sched-RCU protected.
- */
-static DEFINE_IDR(worker_pool_idr);
+static DEFINE_IDR(worker_pool_idr);	/* WR: idr of all pools */
 
-/* W: hash of all unbound pools keyed by pool->attrs */
+/* WQ: hash of all unbound pools keyed by pool->attrs */
 static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
 
 /* I: attributes used when instantiating standard unbound pools on demand */
@@ -287,6 +286,11 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
 
+#define assert_rcu_or_wq_mutex()					\
+	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
+			   lockdep_is_held(&wq_mutex),			\
+			   "sched RCU or wq_mutex should be held")
+
 #define assert_rcu_or_wq_lock()						\
 	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
 			   lockdep_is_held(&workqueue_lock),		\
@@ -305,16 +309,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
  * @pool: iteration cursor
  * @pi: integer used for iteration
  *
- * This must be called either with workqueue_lock held or sched RCU read
- * locked.  If the pool needs to be used beyond the locking in effect, the
- * caller is responsible for guaranteeing that the pool stays online.
+ * This must be called either with wq_mutex held or sched RCU read locked.
+ * If the pool needs to be used beyond the locking in effect, the caller is
+ * responsible for guaranteeing that the pool stays online.
  *
  * The if/else clause exists only for the lockdep assertion and can be
  * ignored.
  */
 #define for_each_pool(pool, pi)						\
 	idr_for_each_entry(&worker_pool_idr, pool, pi)			\
-		if (({ assert_rcu_or_wq_lock(); false; })) { }		\
+		if (({ assert_rcu_or_wq_mutex(); false; })) { }		\
 		else
 
 /**
@@ -455,13 +459,12 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 {
 	int ret;
 
+	lockdep_assert_held(&wq_mutex);
+
 	do {
 		if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL))
 			return -ENOMEM;
-
-		spin_lock_irq(&workqueue_lock);
 		ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
-		spin_unlock_irq(&workqueue_lock);
 	} while (ret == -EAGAIN);
 
 	return ret;
@@ -574,9 +577,9 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
  *
  * Return the worker_pool @work was last associated with.  %NULL if none.
  *
- * Pools are created and destroyed under workqueue_lock, and allows read
- * access under sched-RCU read lock.  As such, this function should be
- * called under workqueue_lock or with preemption disabled.
+ * Pools are created and destroyed under wq_mutex, and allows read access
+ * under sched-RCU read lock.  As such, this function should be called
+ * under wq_mutex or with preemption disabled.
  *
  * All fields of the returned pool are accessible as long as the above
  * mentioned locking is in effect.  If the returned pool needs to be used
@@ -588,7 +591,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
 	unsigned long data = atomic_long_read(&work->data);
 	int pool_id;
 
-	assert_rcu_or_wq_lock();
+	assert_rcu_or_wq_mutex();
 
 	if (data & WORK_STRUCT_PWQ)
 		return ((struct pool_workqueue *)
@@ -2768,10 +2771,10 @@ void drain_workqueue(struct workqueue_struct *wq)
 	 * hotter than drain_workqueue() and already looks at @wq->flags.
 	 * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
 	 */
-	spin_lock_irq(&workqueue_lock);
+	mutex_lock(&wq_mutex);
 	if (!wq->nr_drainers++)
 		wq->flags |= __WQ_DRAINING;
-	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&wq_mutex);
 reflush:
 	flush_workqueue(wq);
 
@@ -2796,12 +2799,12 @@ reflush:
 		goto reflush;
 	}
 
-	spin_lock(&workqueue_lock);
+	local_irq_enable();
+
+	mutex_lock(&wq_mutex);
 	if (!--wq->nr_drainers)
 		wq->flags &= ~__WQ_DRAINING;
-	spin_unlock(&workqueue_lock);
-
-	local_irq_enable();
+	mutex_unlock(&wq_mutex);
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
@@ -3514,16 +3517,16 @@ static void put_unbound_pool(struct worker_pool *pool)
 {
 	struct worker *worker;
 
-	spin_lock_irq(&workqueue_lock);
+	mutex_lock(&wq_mutex);
 	if (--pool->refcnt) {
-		spin_unlock_irq(&workqueue_lock);
+		mutex_unlock(&wq_mutex);
 		return;
 	}
 
 	/* sanity checks */
 	if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) ||
 	    WARN_ON(!list_empty(&pool->worklist))) {
-		spin_unlock_irq(&workqueue_lock);
+		mutex_unlock(&wq_mutex);
 		return;
 	}
 
@@ -3532,7 +3535,7 @@ static void put_unbound_pool(struct worker_pool *pool)
 		idr_remove(&worker_pool_idr, pool->id);
 	hash_del(&pool->hash_node);
 
-	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&wq_mutex);
 
 	/*
 	 * Become the manager and destroy all workers.  Grabbing
@@ -3570,21 +3573,18 @@ static void put_unbound_pool(struct worker_pool *pool)
  */
 static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 {
-	static DEFINE_MUTEX(create_mutex);
 	u32 hash = wqattrs_hash(attrs);
 	struct worker_pool *pool;
 
-	mutex_lock(&create_mutex);
+	mutex_lock(&wq_mutex);
 
 	/* do we already have a matching pool? */
-	spin_lock_irq(&workqueue_lock);
 	hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
 		if (wqattrs_equal(pool->attrs, attrs)) {
 			pool->refcnt++;
 			goto out_unlock;
 		}
 	}
-	spin_unlock_irq(&workqueue_lock);
 
 	/* nope, create a new one */
 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
@@ -3602,14 +3602,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 		goto fail;
 
 	/* install */
-	spin_lock_irq(&workqueue_lock);
 	hash_add(unbound_pool_hash, &pool->hash_node, hash);
 out_unlock:
-	spin_unlock_irq(&workqueue_lock);
-	mutex_unlock(&create_mutex);
+	mutex_unlock(&wq_mutex);
 	return pool;
 fail:
-	mutex_unlock(&create_mutex);
+	mutex_unlock(&wq_mutex);
 	if (pool)
 		put_unbound_pool(pool);
 	return NULL;
@@ -3883,18 +3881,19 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		goto err_destroy;
 
 	/*
-	 * workqueue_lock protects global freeze state and workqueues list.
-	 * Grab it, adjust max_active and add the new workqueue to
-	 * workqueues list.
+	 * wq_mutex protects global freeze state and workqueues list.  Grab
+	 * it, adjust max_active and add the new @wq to workqueues list.
 	 */
-	spin_lock_irq(&workqueue_lock);
+	mutex_lock(&wq_mutex);
 
+	spin_lock_irq(&workqueue_lock);
 	for_each_pwq(pwq, wq)
 		pwq_adjust_max_active(pwq);
+	spin_unlock_irq(&workqueue_lock);
 
 	list_add(&wq->list, &workqueues);
 
-	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&wq_mutex);
 
 	return wq;
 
@@ -3920,9 +3919,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	/* drain it before proceeding with destruction */
 	drain_workqueue(wq);
 
-	spin_lock_irq(&workqueue_lock);
-
 	/* sanity checks */
+	spin_lock_irq(&workqueue_lock);
 	for_each_pwq(pwq, wq) {
 		int i;
 
@@ -3940,14 +3938,15 @@ void destroy_workqueue(struct workqueue_struct *wq)
 			return;
 		}
 	}
+	spin_unlock_irq(&workqueue_lock);
 
 	/*
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
 	 */
+	mutex_lock(&wq_mutex);
 	list_del_init(&wq->list);
-
-	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&wq_mutex);
 
 	workqueue_sysfs_unregister(wq);
 
@@ -4267,7 +4266,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  * pool->worklist.
  *
  * CONTEXT:
- * Grabs and releases workqueue_lock and pool->lock's.
+ * Grabs and releases wq_mutex, workqueue_lock and pool->lock's.
  */
 void freeze_workqueues_begin(void)
 {
@@ -4276,26 +4275,28 @@ void freeze_workqueues_begin(void)
 	struct pool_workqueue *pwq;
 	int pi;
 
-	spin_lock_irq(&workqueue_lock);
+	mutex_lock(&wq_mutex);
 
 	WARN_ON_ONCE(workqueue_freezing);
 	workqueue_freezing = true;
 
 	/* set FREEZING */
 	for_each_pool(pool, pi) {
-		spin_lock(&pool->lock);
+		spin_lock_irq(&pool->lock);
 		WARN_ON_ONCE(pool->flags & POOL_FREEZING);
 		pool->flags |= POOL_FREEZING;
-		spin_unlock(&pool->lock);
+		spin_unlock_irq(&pool->lock);
 	}
 
 	/* suppress further executions by setting max_active to zero */
+	spin_lock_irq(&workqueue_lock);
 	list_for_each_entry(wq, &workqueues, list) {
 		for_each_pwq(pwq, wq)
 			pwq_adjust_max_active(pwq);
 	}
-
 	spin_unlock_irq(&workqueue_lock);
+
+	mutex_unlock(&wq_mutex);
 }
 
 /**
@@ -4305,7 +4306,7 @@ void freeze_workqueues_begin(void)
  * between freeze_workqueues_begin() and thaw_workqueues().
  *
  * CONTEXT:
- * Grabs and releases workqueue_lock.
+ * Grabs and releases wq_mutex.
  *
  * RETURNS:
  * %true if some freezable workqueues are still busy.  %false if freezing
@@ -4317,7 +4318,7 @@ bool freeze_workqueues_busy(void)
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
 
-	spin_lock_irq(&workqueue_lock);
+	mutex_lock(&wq_mutex);
 
 	WARN_ON_ONCE(!workqueue_freezing);
 
@@ -4328,16 +4329,19 @@ bool freeze_workqueues_busy(void)
 		 * nr_active is monotonically decreasing.  It's safe
 		 * to peek without lock.
 		 */
+		preempt_disable();
 		for_each_pwq(pwq, wq) {
 			WARN_ON_ONCE(pwq->nr_active < 0);
 			if (pwq->nr_active) {
 				busy = true;
+				preempt_enable();
 				goto out_unlock;
 			}
 		}
+		preempt_enable();
 	}
 out_unlock:
-	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&wq_mutex);
 	return busy;
 }
 
@@ -4348,7 +4352,7 @@ out_unlock:
  * frozen works are transferred to their respective pool worklists.
  *
  * CONTEXT:
- * Grabs and releases workqueue_lock and pool->lock's.
+ * Grabs and releases wq_mutex, workqueue_lock and pool->lock's.
  */
 void thaw_workqueues(void)
 {
@@ -4357,35 +4361,37 @@ void thaw_workqueues(void)
 	struct worker_pool *pool;
 	int pi;
 
-	spin_lock_irq(&workqueue_lock);
+	mutex_lock(&wq_mutex);
 
 	if (!workqueue_freezing)
 		goto out_unlock;
 
 	/* clear FREEZING */
 	for_each_pool(pool, pi) {
-		spin_lock(&pool->lock);
+		spin_lock_irq(&pool->lock);
 		WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
 		pool->flags &= ~POOL_FREEZING;
-		spin_unlock(&pool->lock);
+		spin_unlock_irq(&pool->lock);
 	}
 
 	/* restore max_active and repopulate worklist */
+	spin_lock_irq(&workqueue_lock);
 	list_for_each_entry(wq, &workqueues, list) {
 		for_each_pwq(pwq, wq)
 			pwq_adjust_max_active(pwq);
 	}
+	spin_unlock_irq(&workqueue_lock);
 
 	/* kick workers */
 	for_each_pool(pool, pi) {
-		spin_lock(&pool->lock);
+		spin_lock_irq(&pool->lock);
 		wake_up_worker(pool);
-		spin_unlock(&pool->lock);
+		spin_unlock_irq(&pool->lock);
 	}
 
 	workqueue_freezing = false;
 out_unlock:
-	spin_unlock_irq(&workqueue_lock);
+	mutex_unlock(&wq_mutex);
 }
 #endif /* CONFIG_FREEZER */
 
@@ -4417,7 +4423,9 @@ static int __init init_workqueues(void)
 			pool->attrs->nice = std_nice[i++];
 
 			/* alloc pool ID */
+			mutex_lock(&wq_mutex);
 			BUG_ON(worker_pool_assign_id(pool));
+			mutex_unlock(&wq_mutex);
 		}
 	}
 
-- 
cgit v1.2.2


From 794b18bc8a3f80445e1f85c9c87c74de9575c93a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 19:47:40 -0700
Subject: workqueue: separate out pool_workqueue locking into pwq_lock

This patch continues locking cleanup from the previous patch.  It
breaks out pool_workqueue synchronization from workqueue_lock into a
new spinlock - pwq_lock.  The followings are protected by pwq_lock.

* workqueue->pwqs
* workqueue->saved_max_active

The conversion is straight-forward.  workqueue_lock usages which cover
the above two are converted to pwq_lock.  New locking label PW added
for things protected by pwq_lock and FR is updated to mean flush_mutex
+ pwq_lock + sched-RCU.

This patch shouldn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 69 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 36 insertions(+), 33 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c3b59ff22007..63856dfbd082 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -123,9 +123,11 @@ enum {
  *
  * WR: wq_mutex protected for writes.  Sched-RCU protected for reads.
  *
+ * PW: pwq_lock protected.
+ *
  * W: workqueue_lock protected.
  *
- * FR: wq->flush_mutex and workqueue_lock protected for writes.  Sched-RCU
+ * FR: wq->flush_mutex and pwq_lock protected for writes.  Sched-RCU
  *     protected for reads.
  */
 
@@ -198,7 +200,7 @@ struct pool_workqueue {
 	 * Release of unbound pwq is punted to system_wq.  See put_pwq()
 	 * and pwq_unbound_release_workfn() for details.  pool_workqueue
 	 * itself is also sched-RCU protected so that the first pwq can be
-	 * determined without grabbing workqueue_lock.
+	 * determined without grabbing pwq_lock.
 	 */
 	struct work_struct	unbound_release_work;
 	struct rcu_head		rcu;
@@ -237,7 +239,7 @@ struct workqueue_struct {
 	struct worker		*rescuer;	/* I: rescue worker */
 
 	int			nr_drainers;	/* WQ: drain in progress */
-	int			saved_max_active; /* W: saved pwq max_active */
+	int			saved_max_active; /* PW: saved pwq max_active */
 
 #ifdef CONFIG_SYSFS
 	struct wq_device	*wq_dev;	/* I: for sysfs interface */
@@ -251,6 +253,7 @@ struct workqueue_struct {
 static struct kmem_cache *pwq_cache;
 
 static DEFINE_MUTEX(wq_mutex);		/* protects workqueues and pools */
+static DEFINE_SPINLOCK(pwq_lock);	/* protects pool_workqueues */
 static DEFINE_SPINLOCK(workqueue_lock);
 
 static LIST_HEAD(workqueues);		/* WQ: list of all workqueues */
@@ -291,10 +294,10 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 			   lockdep_is_held(&wq_mutex),			\
 			   "sched RCU or wq_mutex should be held")
 
-#define assert_rcu_or_wq_lock()						\
+#define assert_rcu_or_pwq_lock()					\
 	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
-			   lockdep_is_held(&workqueue_lock),		\
-			   "sched RCU or workqueue lock should be held")
+			   lockdep_is_held(&pwq_lock),			\
+			   "sched RCU or pwq_lock should be held")
 
 #define for_each_cpu_worker_pool(pool, cpu)				\
 	for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];		\
@@ -326,16 +329,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
  * @pwq: iteration cursor
  * @wq: the target workqueue
  *
- * This must be called either with workqueue_lock held or sched RCU read
- * locked.  If the pwq needs to be used beyond the locking in effect, the
- * caller is responsible for guaranteeing that the pwq stays online.
+ * This must be called either with pwq_lock held or sched RCU read locked.
+ * If the pwq needs to be used beyond the locking in effect, the caller is
+ * responsible for guaranteeing that the pwq stays online.
  *
  * The if/else clause exists only for the lockdep assertion and can be
  * ignored.
  */
 #define for_each_pwq(pwq, wq)						\
 	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node)		\
-		if (({ assert_rcu_or_wq_lock(); false; })) { }		\
+		if (({ assert_rcu_or_pwq_lock(); false; })) { }		\
 		else
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -474,13 +477,13 @@ static int worker_pool_assign_id(struct worker_pool *pool)
  * first_pwq - return the first pool_workqueue of the specified workqueue
  * @wq: the target workqueue
  *
- * This must be called either with workqueue_lock held or sched RCU read
- * locked.  If the pwq needs to be used beyond the locking in effect, the
- * caller is responsible for guaranteeing that the pwq stays online.
+ * This must be called either with pwq_lock held or sched RCU read locked.
+ * If the pwq needs to be used beyond the locking in effect, the caller is
+ * responsible for guaranteeing that the pwq stays online.
  */
 static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
 {
-	assert_rcu_or_wq_lock();
+	assert_rcu_or_pwq_lock();
 	return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
 				      pwqs_node);
 }
@@ -3639,9 +3642,9 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	 * and consistent with the linking path.
 	 */
 	mutex_lock(&wq->flush_mutex);
-	spin_lock_irq(&workqueue_lock);
+	spin_lock_irq(&pwq_lock);
 	list_del_rcu(&pwq->pwqs_node);
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&pwq_lock);
 	mutex_unlock(&wq->flush_mutex);
 
 	put_unbound_pool(pool);
@@ -3669,7 +3672,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 	bool freezable = wq->flags & WQ_FREEZABLE;
 
 	/* for @wq->saved_max_active */
-	lockdep_assert_held(&workqueue_lock);
+	lockdep_assert_held(&pwq_lock);
 
 	/* fast exit for non-freezable wqs */
 	if (!freezable && pwq->max_active == wq->saved_max_active)
@@ -3706,7 +3709,7 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 
 	mutex_lock(&wq->flush_mutex);
-	spin_lock_irq(&workqueue_lock);
+	spin_lock_irq(&pwq_lock);
 
 	/*
 	 * Set the matching work_color.  This is synchronized with
@@ -3722,7 +3725,7 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	/* link in @pwq */
 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&pwq_lock);
 	mutex_unlock(&wq->flush_mutex);
 }
 
@@ -3886,10 +3889,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	 */
 	mutex_lock(&wq_mutex);
 
-	spin_lock_irq(&workqueue_lock);
+	spin_lock_irq(&pwq_lock);
 	for_each_pwq(pwq, wq)
 		pwq_adjust_max_active(pwq);
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&pwq_lock);
 
 	list_add(&wq->list, &workqueues);
 
@@ -3920,13 +3923,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	drain_workqueue(wq);
 
 	/* sanity checks */
-	spin_lock_irq(&workqueue_lock);
+	spin_lock_irq(&pwq_lock);
 	for_each_pwq(pwq, wq) {
 		int i;
 
 		for (i = 0; i < WORK_NR_COLORS; i++) {
 			if (WARN_ON(pwq->nr_in_flight[i])) {
-				spin_unlock_irq(&workqueue_lock);
+				spin_unlock_irq(&pwq_lock);
 				return;
 			}
 		}
@@ -3934,11 +3937,11 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		if (WARN_ON(pwq->refcnt > 1) ||
 		    WARN_ON(pwq->nr_active) ||
 		    WARN_ON(!list_empty(&pwq->delayed_works))) {
-			spin_unlock_irq(&workqueue_lock);
+			spin_unlock_irq(&pwq_lock);
 			return;
 		}
 	}
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&pwq_lock);
 
 	/*
 	 * wq list is used to freeze wq, remove from list after
@@ -4000,14 +4003,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
-	spin_lock_irq(&workqueue_lock);
+	spin_lock_irq(&pwq_lock);
 
 	wq->saved_max_active = max_active;
 
 	for_each_pwq(pwq, wq)
 		pwq_adjust_max_active(pwq);
 
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&pwq_lock);
 }
 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 
@@ -4266,7 +4269,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  * pool->worklist.
  *
  * CONTEXT:
- * Grabs and releases wq_mutex, workqueue_lock and pool->lock's.
+ * Grabs and releases wq_mutex, pwq_lock and pool->lock's.
  */
 void freeze_workqueues_begin(void)
 {
@@ -4289,12 +4292,12 @@ void freeze_workqueues_begin(void)
 	}
 
 	/* suppress further executions by setting max_active to zero */
-	spin_lock_irq(&workqueue_lock);
+	spin_lock_irq(&pwq_lock);
 	list_for_each_entry(wq, &workqueues, list) {
 		for_each_pwq(pwq, wq)
 			pwq_adjust_max_active(pwq);
 	}
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&pwq_lock);
 
 	mutex_unlock(&wq_mutex);
 }
@@ -4352,7 +4355,7 @@ out_unlock:
  * frozen works are transferred to their respective pool worklists.
  *
  * CONTEXT:
- * Grabs and releases wq_mutex, workqueue_lock and pool->lock's.
+ * Grabs and releases wq_mutex, pwq_lock and pool->lock's.
  */
 void thaw_workqueues(void)
 {
@@ -4375,12 +4378,12 @@ void thaw_workqueues(void)
 	}
 
 	/* restore max_active and repopulate worklist */
-	spin_lock_irq(&workqueue_lock);
+	spin_lock_irq(&pwq_lock);
 	list_for_each_entry(wq, &workqueues, list) {
 		for_each_pwq(pwq, wq)
 			pwq_adjust_max_active(pwq);
 	}
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&pwq_lock);
 
 	/* kick workers */
 	for_each_pool(pool, pi) {
-- 
cgit v1.2.2


From 2e109a2855bf6cf675a8b74dbd89b6492e8def42 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 19:47:40 -0700
Subject: workqueue: rename workqueue_lock to wq_mayday_lock

With the recent locking updates, the only thing protected by
workqueue_lock is workqueue->maydays list.  Rename workqueue_lock to
wq_mayday_lock.

This patch is pure rename.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 63856dfbd082..969be0b72071 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,10 +125,10 @@ enum {
  *
  * PW: pwq_lock protected.
  *
- * W: workqueue_lock protected.
- *
  * FR: wq->flush_mutex and pwq_lock protected for writes.  Sched-RCU
  *     protected for reads.
+ *
+ * MD: wq_mayday_lock protected.
  */
 
 /* struct worker is defined in workqueue_internal.h */
@@ -194,7 +194,7 @@ struct pool_workqueue {
 	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
 	struct list_head	pwqs_node;	/* FR: node on wq->pwqs */
-	struct list_head	mayday_node;	/* W: node on wq->maydays */
+	struct list_head	mayday_node;	/* MD: node on wq->maydays */
 
 	/*
 	 * Release of unbound pwq is punted to system_wq.  See put_pwq()
@@ -235,7 +235,7 @@ struct workqueue_struct {
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
 
-	struct list_head	maydays;	/* W: pwqs requesting rescue */
+	struct list_head	maydays;	/* MD: pwqs requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
 	int			nr_drainers;	/* WQ: drain in progress */
@@ -254,7 +254,7 @@ static struct kmem_cache *pwq_cache;
 
 static DEFINE_MUTEX(wq_mutex);		/* protects workqueues and pools */
 static DEFINE_SPINLOCK(pwq_lock);	/* protects pool_workqueues */
-static DEFINE_SPINLOCK(workqueue_lock);
+static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
 static LIST_HEAD(workqueues);		/* WQ: list of all workqueues */
 static bool workqueue_freezing;		/* WQ: have wqs started freezing? */
@@ -1894,7 +1894,7 @@ static void send_mayday(struct work_struct *work)
 	struct pool_workqueue *pwq = get_work_pwq(work);
 	struct workqueue_struct *wq = pwq->wq;
 
-	lockdep_assert_held(&workqueue_lock);
+	lockdep_assert_held(&wq_mayday_lock);
 
 	if (!wq->rescuer)
 		return;
@@ -1911,7 +1911,7 @@ static void pool_mayday_timeout(unsigned long __pool)
 	struct worker_pool *pool = (void *)__pool;
 	struct work_struct *work;
 
-	spin_lock_irq(&workqueue_lock);		/* for wq->maydays */
+	spin_lock_irq(&wq_mayday_lock);		/* for wq->maydays */
 	spin_lock(&pool->lock);
 
 	if (need_to_create_worker(pool)) {
@@ -1926,7 +1926,7 @@ static void pool_mayday_timeout(unsigned long __pool)
 	}
 
 	spin_unlock(&pool->lock);
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&wq_mayday_lock);
 
 	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
 }
@@ -2404,7 +2404,7 @@ repeat:
 	}
 
 	/* see whether any pwq is asking for help */
-	spin_lock_irq(&workqueue_lock);
+	spin_lock_irq(&wq_mayday_lock);
 
 	while (!list_empty(&wq->maydays)) {
 		struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
@@ -2415,7 +2415,7 @@ repeat:
 		__set_current_state(TASK_RUNNING);
 		list_del_init(&pwq->mayday_node);
 
-		spin_unlock_irq(&workqueue_lock);
+		spin_unlock_irq(&wq_mayday_lock);
 
 		/* migrate to the target cpu if possible */
 		worker_maybe_bind_and_lock(pool);
@@ -2442,10 +2442,10 @@ repeat:
 
 		rescuer->pool = NULL;
 		spin_unlock(&pool->lock);
-		spin_lock(&workqueue_lock);
+		spin_lock(&wq_mayday_lock);
 	}
 
-	spin_unlock_irq(&workqueue_lock);
+	spin_unlock_irq(&wq_mayday_lock);
 
 	/* rescuers should never participate in concurrency management */
 	WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
-- 
cgit v1.2.2


From 647f8d94a4e69d39e88a617846755655853c20f5 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Fri, 8 Mar 2013 16:18:21 +0100
Subject: ARM: at91: add gpio suspend/resume support when using pinctrl

gpio suspend/resume and wakeup sources where not managed when using pinctrl so
it was impossible to wake up the system with a gpio.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/mach-at91/include/mach/gpio.h |  8 +++++
 arch/arm/mach-at91/pm.c                | 10 ++++--
 drivers/pinctrl/pinctrl-at91.c         | 61 +++++++++++++++++++++++++++++++++-
 3 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-at91/include/mach/gpio.h b/arch/arm/mach-at91/include/mach/gpio.h
index eed465ab0dd7..5fc23771c154 100644
--- a/arch/arm/mach-at91/include/mach/gpio.h
+++ b/arch/arm/mach-at91/include/mach/gpio.h
@@ -209,6 +209,14 @@ extern int at91_get_gpio_value(unsigned pin);
 extern void at91_gpio_suspend(void);
 extern void at91_gpio_resume(void);
 
+#ifdef CONFIG_PINCTRL_AT91
+extern void at91_pinctrl_gpio_suspend(void);
+extern void at91_pinctrl_gpio_resume(void);
+#else
+static inline void at91_pinctrl_gpio_suspend(void) {}
+static inline void at91_pinctrl_gpio_resume(void) {}
+#endif
+
 #endif	/* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index adb6db888a1f..73f1f250403a 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -201,7 +201,10 @@ extern u32 at91_slow_clock_sz;
 
 static int at91_pm_enter(suspend_state_t state)
 {
-	at91_gpio_suspend();
+	if (of_have_populated_dt())
+		at91_pinctrl_gpio_suspend();
+	else
+		at91_gpio_suspend();
 	at91_irq_suspend();
 
 	pr_debug("AT91: PM - wake mask %08x, pm state %d\n",
@@ -286,7 +289,10 @@ static int at91_pm_enter(suspend_state_t state)
 error:
 	target_state = PM_SUSPEND_ON;
 	at91_irq_resume();
-	at91_gpio_resume();
+	if (of_have_populated_dt())
+		at91_pinctrl_gpio_resume();
+	else
+		at91_gpio_resume();
 	return 0;
 }
 
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 75933a6aa828..efb7f10e902a 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -1277,21 +1277,80 @@ static int alt_gpio_irq_type(struct irq_data *d, unsigned type)
 }
 
 #ifdef CONFIG_PM
+
+static u32 wakeups[MAX_GPIO_BANKS];
+static u32 backups[MAX_GPIO_BANKS];
+
 static int gpio_irq_set_wake(struct irq_data *d, unsigned state)
 {
 	struct at91_gpio_chip *at91_gpio = irq_data_get_irq_chip_data(d);
 	unsigned	bank = at91_gpio->pioc_idx;
+	unsigned mask = 1 << d->hwirq;
 
 	if (unlikely(bank >= MAX_GPIO_BANKS))
 		return -EINVAL;
 
+	if (state)
+		wakeups[bank] |= mask;
+	else
+		wakeups[bank] &= ~mask;
+
 	irq_set_irq_wake(at91_gpio->pioc_virq, state);
 
 	return 0;
 }
+
+void at91_pinctrl_gpio_suspend(void)
+{
+	int i;
+
+	for (i = 0; i < gpio_banks; i++) {
+		void __iomem  *pio;
+
+		if (!gpio_chips[i])
+			continue;
+
+		pio = gpio_chips[i]->regbase;
+
+		backups[i] = __raw_readl(pio + PIO_IMR);
+		__raw_writel(backups[i], pio + PIO_IDR);
+		__raw_writel(wakeups[i], pio + PIO_IER);
+
+		if (!wakeups[i]) {
+			clk_unprepare(gpio_chips[i]->clock);
+			clk_disable(gpio_chips[i]->clock);
+		} else {
+			printk(KERN_DEBUG "GPIO-%c may wake for %08x\n",
+			       'A'+i, wakeups[i]);
+		}
+	}
+}
+
+void at91_pinctrl_gpio_resume(void)
+{
+	int i;
+
+	for (i = 0; i < gpio_banks; i++) {
+		void __iomem  *pio;
+
+		if (!gpio_chips[i])
+			continue;
+
+		pio = gpio_chips[i]->regbase;
+
+		if (!wakeups[i]) {
+			if (clk_prepare(gpio_chips[i]->clock) == 0)
+				clk_enable(gpio_chips[i]->clock);
+		}
+
+		__raw_writel(wakeups[i], pio + PIO_IDR);
+		__raw_writel(backups[i], pio + PIO_IER);
+	}
+}
+
 #else
 #define gpio_irq_set_wake	NULL
-#endif
+#endif /* CONFIG_PM */
 
 static struct irq_chip gpio_irqchip = {
 	.name		= "GPIO",
-- 
cgit v1.2.2


From 0ed66befaae893e82c9f016238282d73ef9fd6c7 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Fri, 8 Mar 2013 16:13:57 +0100
Subject: ARM: at91: fix infinite loop in at91_irq_suspend/resume

Fix an infinite loop when suspending or resuming a device with AIC5.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/mach-at91/irq.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/arch/arm/mach-at91/irq.c b/arch/arm/mach-at91/irq.c
index 8e210262aeee..e0ca59171022 100644
--- a/arch/arm/mach-at91/irq.c
+++ b/arch/arm/mach-at91/irq.c
@@ -92,23 +92,21 @@ static int at91_aic_set_wake(struct irq_data *d, unsigned value)
 
 void at91_irq_suspend(void)
 {
-	int i = 0, bit;
+	int bit = -1;
 
 	if (has_aic5()) {
 		/* disable enabled irqs */
-		while ((bit = find_next_bit(backups, n_irqs, i)) < n_irqs) {
+		while ((bit = find_next_bit(backups, n_irqs, bit + 1)) < n_irqs) {
 			at91_aic_write(AT91_AIC5_SSR,
 				       bit & AT91_AIC5_INTSEL_MSK);
 			at91_aic_write(AT91_AIC5_IDCR, 1);
-			i = bit;
 		}
 		/* enable wakeup irqs */
-		i = 0;
-		while ((bit = find_next_bit(wakeups, n_irqs, i)) < n_irqs) {
+		bit = -1;
+		while ((bit = find_next_bit(wakeups, n_irqs, bit + 1)) < n_irqs) {
 			at91_aic_write(AT91_AIC5_SSR,
 				       bit & AT91_AIC5_INTSEL_MSK);
 			at91_aic_write(AT91_AIC5_IECR, 1);
-			i = bit;
 		}
 	} else {
 		at91_aic_write(AT91_AIC_IDCR, *backups);
@@ -118,23 +116,21 @@ void at91_irq_suspend(void)
 
 void at91_irq_resume(void)
 {
-	int i = 0, bit;
+	int bit = -1;
 
 	if (has_aic5()) {
 		/* disable wakeup irqs */
-		while ((bit = find_next_bit(wakeups, n_irqs, i)) < n_irqs) {
+		while ((bit = find_next_bit(wakeups, n_irqs, bit + 1)) < n_irqs) {
 			at91_aic_write(AT91_AIC5_SSR,
 				       bit & AT91_AIC5_INTSEL_MSK);
 			at91_aic_write(AT91_AIC5_IDCR, 1);
-			i = bit;
 		}
 		/* enable irqs disabled for suspend */
-		i = 0;
-		while ((bit = find_next_bit(backups, n_irqs, i)) < n_irqs) {
+		bit = -1;
+		while ((bit = find_next_bit(backups, n_irqs, bit + 1)) < n_irqs) {
 			at91_aic_write(AT91_AIC5_SSR,
 				       bit & AT91_AIC5_INTSEL_MSK);
 			at91_aic_write(AT91_AIC5_IECR, 1);
-			i = bit;
 		}
 	} else {
 		at91_aic_write(AT91_AIC_IDCR, *wakeups);
-- 
cgit v1.2.2


From db9e51617faad3a54d10b7cb340a82688ec0232d Mon Sep 17 00:00:00 2001
From: Mikhail Kshevetskiy <mikhail.kshevetskiy@gmail.com>
Date: Thu, 14 Mar 2013 10:18:29 +0100
Subject: usb: musb: da8xx: Fix build breakage due to typo

Commit 032ec49f5351e9cb242b1a1c367d14415043ab95 (usb: musb: drop useless
board_mode usage) introduced a typo that breaks the build.

Signed-off-by: Mikhail Kshevetskiy <mikhail.kshevetskiy@gmail.com>

[ Fixed commit message ]

Cc: Mikhail Kshevetskiy <mikhail.kshevetskiy@gmail.com>
Cc: Sergei Shtylyov <sshtylyov@mvista.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
Signed-off-by: Michael Riesch <michael.riesch@omicron.at>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/da8xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 7c71769d71ff..41613a2b35e8 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -327,7 +327,7 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
 		u8 devctl = musb_readb(mregs, MUSB_DEVCTL);
 		int err;
 
-		err = musb->int_usb & USB_INTR_VBUSERROR;
+		err = musb->int_usb & MUSB_INTR_VBUSERROR;
 		if (err) {
 			/*
 			 * The Mentor core doesn't debounce VBUS as needed
-- 
cgit v1.2.2


From 273daf2f2ab9f42d82f017b20fcf902ec8d7cffa Mon Sep 17 00:00:00 2001
From: Bo Shen <voice.shen@atmel.com>
Date: Wed, 13 Mar 2013 16:54:07 +0800
Subject: usb: gadget: u_serial: fix typo which cause build warning

fix typo error introduced by commit ea0e6276 (usb: gadget: add
multiple definition guards) which causes the following build warning:

  warning: "pr_vdebug" redefined

Signed-off-by: Bo Shen <voice.shen@atmel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/u_serial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c
index c5034d9c946b..b369292d4b90 100644
--- a/drivers/usb/gadget/u_serial.c
+++ b/drivers/usb/gadget/u_serial.c
@@ -136,7 +136,7 @@ static struct portmaster {
 	pr_debug(fmt, ##arg)
 #endif /* pr_vdebug */
 #else
-#ifndef pr_vdebig
+#ifndef pr_vdebug
 #define pr_vdebug(fmt, arg...) \
 	({ if (0) pr_debug(fmt, ##arg); })
 #endif /* pr_vdebug */
-- 
cgit v1.2.2


From d1398ccfec56e54010476efd6a316427d29045a6 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@twitter.com>
Date: Wed, 13 Mar 2013 15:34:24 -0700
Subject: perf tools: Fix LIBNUMA build with glibc 2.12 and older.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The tokens MADV_HUGEPAGE and MADV_NOHUGEPAGE are not available with
glibc 2.12 and older. Define these tokens if they are not already
defined.

This patch fixes these build errors with older versions of glibc.

    CC bench/numa.o
bench/numa.c: In function ‘alloc_data’:
bench/numa.c:334: error: ‘MADV_HUGEPAGE’ undeclared (first use in this function)
bench/numa.c:334: error: (Each undeclared identifier is reported only once
bench/numa.c:334: error: for each function it appears in.)
bench/numa.c:341: error: ‘MADV_NOHUGEPAGE’ undeclared (first use in this function)
make: *** [bench/numa.o] Error 1

Signed-off-by: Vinson Lee <vlee@twitter.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Irina Tirdea <irina.tirdea@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1363214064-4671-2-git-send-email-vlee@twitter.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/bench.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index a5223e6a7b43..0fdc85269c4d 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -1,6 +1,30 @@
 #ifndef BENCH_H
 #define BENCH_H
 
+/*
+ * The madvise transparent hugepage constants were added in glibc
+ * 2.13. For compatibility with older versions of glibc, define these
+ * tokens if they are not already defined.
+ *
+ * PA-RISC uses different madvise values from other architectures and
+ * needs to be special-cased.
+ */
+#ifdef __hppa__
+# ifndef MADV_HUGEPAGE
+#  define MADV_HUGEPAGE		67
+# endif
+# ifndef MADV_NOHUGEPAGE
+#  define MADV_NOHUGEPAGE	68
+# endif
+#else
+# ifndef MADV_HUGEPAGE
+#  define MADV_HUGEPAGE		14
+# endif
+# ifndef MADV_NOHUGEPAGE
+#  define MADV_NOHUGEPAGE	15
+# endif
+#endif
+
 extern int bench_numa(int argc, const char **argv, const char *prefix);
 extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
 extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
-- 
cgit v1.2.2


From 42b84328428bfe305fcb60eb382fba60cee9071f Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Thu, 14 Mar 2013 12:52:05 +0100
Subject: ARM: i.MX25: Fix DT compilation

The i.MX25 DT machine descriptor calls a non existing imx25_timer_init()
function. This patch adds it to fix compilation.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-imx/imx25-dt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/mach-imx/imx25-dt.c b/arch/arm/mach-imx/imx25-dt.c
index 03b65e5ea541..82348391582a 100644
--- a/arch/arm/mach-imx/imx25-dt.c
+++ b/arch/arm/mach-imx/imx25-dt.c
@@ -27,6 +27,11 @@ static const char * const imx25_dt_board_compat[] __initconst = {
 	NULL
 };
 
+static void __init imx25_timer_init(void)
+{
+	mx25_clocks_init_dt();
+}
+
 DT_MACHINE_START(IMX25_DT, "Freescale i.MX25 (Device Tree Support)")
 	.map_io		= mx25_map_io,
 	.init_early	= imx25_init_early,
-- 
cgit v1.2.2


From 5bc7c33ca93a285dcfe7b7fd64970f6314440ad1 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Wed, 13 Mar 2013 09:51:31 -0700
Subject: mtd: nand: reintroduce NAND_NO_READRDY as NAND_NEED_READRDY

This partially reverts commit 1696e6bc2ae83734e64e206ac99766ea19e9a14e
("mtd: nand: kill NAND_NO_READRDY").

In that patch I overlooked a few things.

The original documentation for NAND_NO_READRDY included "True for all
large page devices, as they do not support autoincrement." I was
conflating "not support autoincrement" with the NAND_NO_AUTOINCR option,
which was in fact doing nothing. So, when I dropped NAND_NO_AUTOINCR, I
concluded that I then could harmlessly drop NAND_NO_READRDY. But of
course the fact the NAND_NO_AUTOINCR was doing nothing didn't mean
NAND_NO_READRDY was doing nothing...

So, NAND_NO_READRDY is re-introduced as NAND_NEED_READRDY and applied
only to those few remaining small-page NAND which needed it in the first
place.

Cc: stable@kernel.org [3.5+]
Reported-by: Alexander Shiyan <shc_work@mail.ru>
Tested-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 16 +++++++++
 drivers/mtd/nand/nand_ids.c  | 80 +++++++++++++++++++++++---------------------
 include/linux/mtd/nand.h     |  7 ++++
 3 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 43214151b882..42c63927609d 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1523,6 +1523,14 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 					oobreadlen -= toread;
 				}
 			}
+
+			if (chip->options & NAND_NEED_READRDY) {
+				/* Apply delay or wait for ready/busy pin */
+				if (!chip->dev_ready)
+					udelay(chip->chip_delay);
+				else
+					nand_wait_ready(mtd);
+			}
 		} else {
 			memcpy(buf, chip->buffers->databuf + col, bytes);
 			buf += bytes;
@@ -1787,6 +1795,14 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 		len = min(len, readlen);
 		buf = nand_transfer_oob(chip, buf, ops, len);
 
+		if (chip->options & NAND_NEED_READRDY) {
+			/* Apply delay or wait for ready/busy pin */
+			if (!chip->dev_ready)
+				udelay(chip->chip_delay);
+			else
+				nand_wait_ready(mtd);
+		}
+
 		readlen -= len;
 		if (!readlen)
 			break;
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index e3aa2748a6e7..9c612388e5de 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -22,49 +22,51 @@
 *	512	512 Byte page size
 */
 struct nand_flash_dev nand_flash_ids[] = {
+#define SP_OPTIONS NAND_NEED_READRDY
+#define SP_OPTIONS16 (SP_OPTIONS | NAND_BUSWIDTH_16)
 
 #ifdef CONFIG_MTD_NAND_MUSEUM_IDS
-	{"NAND 1MiB 5V 8-bit",		0x6e, 256, 1, 0x1000, 0},
-	{"NAND 2MiB 5V 8-bit",		0x64, 256, 2, 0x1000, 0},
-	{"NAND 4MiB 5V 8-bit",		0x6b, 512, 4, 0x2000, 0},
-	{"NAND 1MiB 3,3V 8-bit",	0xe8, 256, 1, 0x1000, 0},
-	{"NAND 1MiB 3,3V 8-bit",	0xec, 256, 1, 0x1000, 0},
-	{"NAND 2MiB 3,3V 8-bit",	0xea, 256, 2, 0x1000, 0},
-	{"NAND 4MiB 3,3V 8-bit",	0xd5, 512, 4, 0x2000, 0},
-	{"NAND 4MiB 3,3V 8-bit",	0xe3, 512, 4, 0x2000, 0},
-	{"NAND 4MiB 3,3V 8-bit",	0xe5, 512, 4, 0x2000, 0},
-	{"NAND 8MiB 3,3V 8-bit",	0xd6, 512, 8, 0x2000, 0},
-
-	{"NAND 8MiB 1,8V 8-bit",	0x39, 512, 8, 0x2000, 0},
-	{"NAND 8MiB 3,3V 8-bit",	0xe6, 512, 8, 0x2000, 0},
-	{"NAND 8MiB 1,8V 16-bit",	0x49, 512, 8, 0x2000, NAND_BUSWIDTH_16},
-	{"NAND 8MiB 3,3V 16-bit",	0x59, 512, 8, 0x2000, NAND_BUSWIDTH_16},
+	{"NAND 1MiB 5V 8-bit",		0x6e, 256, 1, 0x1000, SP_OPTIONS},
+	{"NAND 2MiB 5V 8-bit",		0x64, 256, 2, 0x1000, SP_OPTIONS},
+	{"NAND 4MiB 5V 8-bit",		0x6b, 512, 4, 0x2000, SP_OPTIONS},
+	{"NAND 1MiB 3,3V 8-bit",	0xe8, 256, 1, 0x1000, SP_OPTIONS},
+	{"NAND 1MiB 3,3V 8-bit",	0xec, 256, 1, 0x1000, SP_OPTIONS},
+	{"NAND 2MiB 3,3V 8-bit",	0xea, 256, 2, 0x1000, SP_OPTIONS},
+	{"NAND 4MiB 3,3V 8-bit",	0xd5, 512, 4, 0x2000, SP_OPTIONS},
+	{"NAND 4MiB 3,3V 8-bit",	0xe3, 512, 4, 0x2000, SP_OPTIONS},
+	{"NAND 4MiB 3,3V 8-bit",	0xe5, 512, 4, 0x2000, SP_OPTIONS},
+	{"NAND 8MiB 3,3V 8-bit",	0xd6, 512, 8, 0x2000, SP_OPTIONS},
+
+	{"NAND 8MiB 1,8V 8-bit",	0x39, 512, 8, 0x2000, SP_OPTIONS},
+	{"NAND 8MiB 3,3V 8-bit",	0xe6, 512, 8, 0x2000, SP_OPTIONS},
+	{"NAND 8MiB 1,8V 16-bit",	0x49, 512, 8, 0x2000, SP_OPTIONS16},
+	{"NAND 8MiB 3,3V 16-bit",	0x59, 512, 8, 0x2000, SP_OPTIONS16},
 #endif
 
-	{"NAND 16MiB 1,8V 8-bit",	0x33, 512, 16, 0x4000, 0},
-	{"NAND 16MiB 3,3V 8-bit",	0x73, 512, 16, 0x4000, 0},
-	{"NAND 16MiB 1,8V 16-bit",	0x43, 512, 16, 0x4000, NAND_BUSWIDTH_16},
-	{"NAND 16MiB 3,3V 16-bit",	0x53, 512, 16, 0x4000, NAND_BUSWIDTH_16},
-
-	{"NAND 32MiB 1,8V 8-bit",	0x35, 512, 32, 0x4000, 0},
-	{"NAND 32MiB 3,3V 8-bit",	0x75, 512, 32, 0x4000, 0},
-	{"NAND 32MiB 1,8V 16-bit",	0x45, 512, 32, 0x4000, NAND_BUSWIDTH_16},
-	{"NAND 32MiB 3,3V 16-bit",	0x55, 512, 32, 0x4000, NAND_BUSWIDTH_16},
-
-	{"NAND 64MiB 1,8V 8-bit",	0x36, 512, 64, 0x4000, 0},
-	{"NAND 64MiB 3,3V 8-bit",	0x76, 512, 64, 0x4000, 0},
-	{"NAND 64MiB 1,8V 16-bit",	0x46, 512, 64, 0x4000, NAND_BUSWIDTH_16},
-	{"NAND 64MiB 3,3V 16-bit",	0x56, 512, 64, 0x4000, NAND_BUSWIDTH_16},
-
-	{"NAND 128MiB 1,8V 8-bit",	0x78, 512, 128, 0x4000, 0},
-	{"NAND 128MiB 1,8V 8-bit",	0x39, 512, 128, 0x4000, 0},
-	{"NAND 128MiB 3,3V 8-bit",	0x79, 512, 128, 0x4000, 0},
-	{"NAND 128MiB 1,8V 16-bit",	0x72, 512, 128, 0x4000, NAND_BUSWIDTH_16},
-	{"NAND 128MiB 1,8V 16-bit",	0x49, 512, 128, 0x4000, NAND_BUSWIDTH_16},
-	{"NAND 128MiB 3,3V 16-bit",	0x74, 512, 128, 0x4000, NAND_BUSWIDTH_16},
-	{"NAND 128MiB 3,3V 16-bit",	0x59, 512, 128, 0x4000, NAND_BUSWIDTH_16},
-
-	{"NAND 256MiB 3,3V 8-bit",	0x71, 512, 256, 0x4000, 0},
+	{"NAND 16MiB 1,8V 8-bit",	0x33, 512, 16, 0x4000, SP_OPTIONS},
+	{"NAND 16MiB 3,3V 8-bit",	0x73, 512, 16, 0x4000, SP_OPTIONS},
+	{"NAND 16MiB 1,8V 16-bit",	0x43, 512, 16, 0x4000, SP_OPTIONS16},
+	{"NAND 16MiB 3,3V 16-bit",	0x53, 512, 16, 0x4000, SP_OPTIONS16},
+
+	{"NAND 32MiB 1,8V 8-bit",	0x35, 512, 32, 0x4000, SP_OPTIONS},
+	{"NAND 32MiB 3,3V 8-bit",	0x75, 512, 32, 0x4000, SP_OPTIONS},
+	{"NAND 32MiB 1,8V 16-bit",	0x45, 512, 32, 0x4000, SP_OPTIONS16},
+	{"NAND 32MiB 3,3V 16-bit",	0x55, 512, 32, 0x4000, SP_OPTIONS16},
+
+	{"NAND 64MiB 1,8V 8-bit",	0x36, 512, 64, 0x4000, SP_OPTIONS},
+	{"NAND 64MiB 3,3V 8-bit",	0x76, 512, 64, 0x4000, SP_OPTIONS},
+	{"NAND 64MiB 1,8V 16-bit",	0x46, 512, 64, 0x4000, SP_OPTIONS16},
+	{"NAND 64MiB 3,3V 16-bit",	0x56, 512, 64, 0x4000, SP_OPTIONS16},
+
+	{"NAND 128MiB 1,8V 8-bit",	0x78, 512, 128, 0x4000, SP_OPTIONS},
+	{"NAND 128MiB 1,8V 8-bit",	0x39, 512, 128, 0x4000, SP_OPTIONS},
+	{"NAND 128MiB 3,3V 8-bit",	0x79, 512, 128, 0x4000, SP_OPTIONS},
+	{"NAND 128MiB 1,8V 16-bit",	0x72, 512, 128, 0x4000, SP_OPTIONS16},
+	{"NAND 128MiB 1,8V 16-bit",	0x49, 512, 128, 0x4000, SP_OPTIONS16},
+	{"NAND 128MiB 3,3V 16-bit",	0x74, 512, 128, 0x4000, SP_OPTIONS16},
+	{"NAND 128MiB 3,3V 16-bit",	0x59, 512, 128, 0x4000, SP_OPTIONS16},
+
+	{"NAND 256MiB 3,3V 8-bit",	0x71, 512, 256, 0x4000, SP_OPTIONS},
 
 	/*
 	 * These are the new chips with large page size. The pagesize and the
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7ccb3c59ed60..ef52d9c91459 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -187,6 +187,13 @@ typedef enum {
  * This happens with the Renesas AG-AND chips, possibly others.
  */
 #define BBT_AUTO_REFRESH	0x00000080
+/*
+ * Chip requires ready check on read (for auto-incremented sequential read).
+ * True only for small page devices; large page devices do not support
+ * autoincrement.
+ */
+#define NAND_NEED_READRDY	0x00000100
+
 /* Chip does not allow subpage writes */
 #define NAND_NO_SUBPAGE_WRITE	0x00000200
 
-- 
cgit v1.2.2


From 16fad69cfe4adbbfa813de516757b87bcae36d93 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 14 Mar 2013 05:40:32 +0000
Subject: tcp: fix skb_availroom()

Chrome OS team reported a crash on a Pixel ChromeBook in TCP stack :

https://code.google.com/p/chromium/issues/detail?id=182056

commit a21d45726acac (tcp: avoid order-1 allocations on wifi and tx
path) did a poor choice adding an 'avail_size' field to skb, while
what we really needed was a 'reserved_tailroom' one.

It would have avoided commit 22b4a4f22da (tcp: fix retransmit of
partially acked frames) and this commit.

Crash occurs because skb_split() is not aware of the 'avail_size'
management (and should not be aware)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Mukesh Agrawal <quiche@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 +++++--
 net/ipv4/tcp.c         | 2 +-
 net/ipv4/tcp_output.c  | 1 -
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 821c7f45d2a7..6f2bb860e051 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -500,7 +500,7 @@ struct sk_buff {
 	union {
 		__u32		mark;
 		__u32		dropcount;
-		__u32		avail_size;
+		__u32		reserved_tailroom;
 	};
 
 	sk_buff_data_t		inner_transport_header;
@@ -1447,7 +1447,10 @@ static inline int skb_tailroom(const struct sk_buff *skb)
  */
 static inline int skb_availroom(const struct sk_buff *skb)
 {
-	return skb_is_nonlinear(skb) ? 0 : skb->avail_size - skb->len;
+	if (skb_is_nonlinear(skb))
+		return 0;
+
+	return skb->end - skb->tail - skb->reserved_tailroom;
 }
 
 /**
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 47e854fcae24..e22020790709 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -775,7 +775,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 			 * Make sure that we have exactly size bytes
 			 * available to the caller, no more, no less.
 			 */
-			skb->avail_size = size;
+			skb->reserved_tailroom = skb->end - skb->tail - size;
 			return skb;
 		}
 		__kfree_skb(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e2b4461074da..817fbb396bc8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1298,7 +1298,6 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 	eat = min_t(int, len, skb_headlen(skb));
 	if (eat) {
 		__skb_pull(skb, eat);
-		skb->avail_size -= eat;
 		len -= eat;
 		if (!len)
 			return;
-- 
cgit v1.2.2


From cca7af3889bfa343d33d5e657a38d876abd10e58 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 14 Mar 2013 03:29:40 +0000
Subject: skb: Propagate pfmemalloc on skb from head page only

Hi.

I'm trying to send big chunks of memory from application address space via
TCP socket using vmsplice + splice like this

   mem = mmap(128Mb);
   vmsplice(pipe[1], mem); /* splice memory into pipe */
   splice(pipe[0], tcp_socket); /* send it into network */

When I'm lucky and a huge page splices into the pipe and then into the socket
_and_ client and server ends of the TCP connection are on the same host,
communicating via lo, the whole connection gets stuck! The sending queue
becomes full and app stops writing/splicing more into it, but the receiving
queue remains empty, and that's why.

The __skb_fill_page_desc observes a tail page of a huge page and erroneously
propagates its page->pfmemalloc value onto socket (the pfmemalloc on tail pages
contain garbage). Then this skb->pfmemalloc leaks through lo and due to the

    tcp_v4_rcv
    sk_filter
        if (skb->pfmemalloc && !sock_flag(sk, SOCK_MEMALLOC)) /* true */
            return -ENOMEM
        goto release_and_discard;

no packets reach the socket. Even TCP re-transmits are dropped by this, as skb
cloning clones the pfmemalloc flag as well.

That said, here's the proper page->pfmemalloc propagation onto socket: we
must check the huge-page's head page only, other pages' pfmemalloc and mapping
values do not contain what is expected in this place. However, I'm not sure
whether this fix is _complete_, since pfmemalloc propagation via lo also
oesn't look great.

Both, bit propagation from page to skb and this check in sk_filter, were
introduced by c48a11c7 (netvm: propagate page->pfmemalloc to skb), in v3.5 so
Mel and stable@ are in Cc.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f2bb860e051..441f5bfdab8e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1288,11 +1288,13 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 	 * do not lose pfmemalloc information as the pages would not be
 	 * allocated using __GFP_MEMALLOC.
 	 */
-	if (page->pfmemalloc && !page->mapping)
-		skb->pfmemalloc	= true;
 	frag->page.p		  = page;
 	frag->page_offset	  = off;
 	skb_frag_size_set(frag, size);
+
+	page = compound_head(page);
+	if (page->pfmemalloc && !page->mapping)
+		skb->pfmemalloc	= true;
 }
 
 /**
-- 
cgit v1.2.2


From eb20ff9c91ddcb2d55c1849a87d3db85af5e88a9 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Date: Wed, 13 Mar 2013 19:46:20 -0300
Subject: Bluetooth: Fix not closing SCO sockets in the BT_CONNECT2 state

With deferred setup for SCO, it is possible that userspace closes the
socket when it is in the BT_CONNECT2 state, after the Connect Request is
received but before the Accept Synchonous Connection is sent.

If this happens the following crash was observed, when the connection is
terminated:

[  +0.000003] hci_sync_conn_complete_evt: hci0 status 0x10
[  +0.000005] sco_connect_cfm: hcon ffff88003d1bd800 bdaddr 40:98:4e:32:d7:39 status 16
[  +0.000003] sco_conn_del: hcon ffff88003d1bd800 conn ffff88003cc8e300, err 110
[  +0.000015] BUG: unable to handle kernel NULL pointer dereference at 0000000000000199
[  +0.000906] IP: [<ffffffff810620dd>] __lock_acquire+0xed/0xe82
[  +0.000000] PGD 3d21f067 PUD 3d291067 PMD 0
[  +0.000000] Oops: 0002 [#1] SMP
[  +0.000000] Modules linked in: rfcomm bnep btusb bluetooth
[  +0.000000] CPU 0
[  +0.000000] Pid: 1481, comm: kworker/u:2H Not tainted 3.9.0-rc1-25019-gad82cdd #1 Bochs Bochs
[  +0.000000] RIP: 0010:[<ffffffff810620dd>]  [<ffffffff810620dd>] __lock_acquire+0xed/0xe82
[  +0.000000] RSP: 0018:ffff88003c3c19d8  EFLAGS: 00010002
[  +0.000000] RAX: 0000000000000001 RBX: 0000000000000246 RCX: 0000000000000000
[  +0.000000] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003d1be868
[  +0.000000] RBP: ffff88003c3c1a98 R08: 0000000000000002 R09: 0000000000000000
[  +0.000000] R10: ffff88003d1be868 R11: ffff88003e20b000 R12: 0000000000000002
[  +0.000000] R13: ffff88003aaa8000 R14: 000000000000006e R15: ffff88003d1be850
[  +0.000000] FS:  0000000000000000(0000) GS:ffff88003e200000(0000) knlGS:0000000000000000
[  +0.000000] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  +0.000000] CR2: 0000000000000199 CR3: 000000003c1cb000 CR4: 00000000000006b0
[  +0.000000] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  +0.000000] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  +0.000000] Process kworker/u:2H (pid: 1481, threadinfo ffff88003c3c0000, task ffff88003aaa8000)
[  +0.000000] Stack:
[  +0.000000]  ffffffff81b16342 0000000000000000 0000000000000000 ffff88003d1be868
[  +0.000000]  ffffffff00000000 00018c0c7863e367 000000003c3c1a28 ffffffff8101efbd
[  +0.000000]  0000000000000000 ffff88003e3d2400 ffff88003c3c1a38 ffffffff81007c7a
[  +0.000000] Call Trace:
[  +0.000000]  [<ffffffff8101efbd>] ? kvm_clock_read+0x34/0x3b
[  +0.000000]  [<ffffffff81007c7a>] ? paravirt_sched_clock+0x9/0xd
[  +0.000000]  [<ffffffff81007fd4>] ? sched_clock+0x9/0xb
[  +0.000000]  [<ffffffff8104fd7a>] ? sched_clock_local+0x12/0x75
[  +0.000000]  [<ffffffff810632d1>] lock_acquire+0x93/0xb1
[  +0.000000]  [<ffffffffa0022339>] ? spin_lock+0x9/0xb [bluetooth]
[  +0.000000]  [<ffffffff8105f3d8>] ? lock_release_holdtime.part.22+0x4e/0x55
[  +0.000000]  [<ffffffff814f6038>] _raw_spin_lock+0x40/0x74
[  +0.000000]  [<ffffffffa0022339>] ? spin_lock+0x9/0xb [bluetooth]
[  +0.000000]  [<ffffffff814f6936>] ? _raw_spin_unlock+0x23/0x36
[  +0.000000]  [<ffffffffa0022339>] spin_lock+0x9/0xb [bluetooth]
[  +0.000000]  [<ffffffffa00230cc>] sco_conn_del+0x76/0xbb [bluetooth]
[  +0.000000]  [<ffffffffa002391d>] sco_connect_cfm+0x2da/0x2e9 [bluetooth]
[  +0.000000]  [<ffffffffa000862a>] hci_proto_connect_cfm+0x38/0x65 [bluetooth]
[  +0.000000]  [<ffffffffa0008d30>] hci_sync_conn_complete_evt.isra.79+0x11a/0x13e [bluetooth]
[  +0.000000]  [<ffffffffa000cd96>] hci_event_packet+0x153b/0x239d [bluetooth]
[  +0.000000]  [<ffffffff814f68ff>] ? _raw_spin_unlock_irqrestore+0x48/0x5c
[  +0.000000]  [<ffffffffa00025f6>] hci_rx_work+0xf3/0x2e3 [bluetooth]
[  +0.000000]  [<ffffffff8103efed>] process_one_work+0x1dc/0x30b
[  +0.000000]  [<ffffffff8103ef83>] ? process_one_work+0x172/0x30b
[  +0.000000]  [<ffffffff8103e07f>] ? spin_lock_irq+0x9/0xb
[  +0.000000]  [<ffffffff8103fc8d>] worker_thread+0x123/0x1d2
[  +0.000000]  [<ffffffff8103fb6a>] ? manage_workers+0x240/0x240
[  +0.000000]  [<ffffffff81044211>] kthread+0x9d/0xa5
[  +0.000000]  [<ffffffff81044174>] ? __kthread_parkme+0x60/0x60
[  +0.000000]  [<ffffffff814f75bc>] ret_from_fork+0x7c/0xb0
[  +0.000000]  [<ffffffff81044174>] ? __kthread_parkme+0x60/0x60
[  +0.000000] Code: d7 44 89 8d 50 ff ff ff 4c 89 95 58 ff ff ff e8 44 fc ff ff 44 8b 8d 50 ff ff ff 48 85 c0 4c 8b 95 58 ff ff ff 0f 84 7a 04 00 00 <f0> ff 80 98 01 00 00 83 3d 25 41 a7 00 00 45 8b b5 e8 05 00 00
[  +0.000000] RIP  [<ffffffff810620dd>] __lock_acquire+0xed/0xe82
[  +0.000000]  RSP <ffff88003c3c19d8>
[  +0.000000] CR2: 0000000000000199
[  +0.000000] ---[ end trace e73cd3b52352dd34 ]---

Cc: stable@vger.kernel.org [3.8]
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Tested-by: Frederic Dalleau <frederic.dalleau@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 net/bluetooth/sco.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 57f250c20e39..aaf1957bc4fe 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -361,6 +361,7 @@ static void __sco_sock_close(struct sock *sk)
 			sco_chan_del(sk, ECONNRESET);
 		break;
 
+	case BT_CONNECT2:
 	case BT_CONNECT:
 	case BT_DISCONN:
 		sco_chan_del(sk, ECONNRESET);
-- 
cgit v1.2.2


From 69d34da2984c95b33ea21518227e1f9470f11d95 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 14 Mar 2013 13:50:56 -0400
Subject: tracing: Protect tracer flags with trace_types_lock

Seems that the tracer flags have never been protected from
synchronous writes. Luckily, admins don't usually modify the
tracing flags via two different tasks. But if scripts were to
be used to modify them, then they could get corrupted.

Move the trace_types_lock that protects against tracers changing
to also protect the flags being set.

Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 53df2839bb93..00daf5f8c50b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2916,6 +2916,8 @@ static int trace_set_options(char *option)
 		cmp += 2;
 	}
 
+	mutex_lock(&trace_types_lock);
+
 	for (i = 0; trace_options[i]; i++) {
 		if (strcmp(cmp, trace_options[i]) == 0) {
 			set_tracer_flags(1 << i, !neg);
@@ -2924,11 +2926,10 @@ static int trace_set_options(char *option)
 	}
 
 	/* If no option could be set, test the specific tracer options */
-	if (!trace_options[i]) {
-		mutex_lock(&trace_types_lock);
+	if (!trace_options[i])
 		ret = set_tracer_option(current_trace, cmp, neg);
-		mutex_unlock(&trace_types_lock);
-	}
+
+	mutex_unlock(&trace_types_lock);
 
 	return ret;
 }
@@ -4781,7 +4782,10 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
 
 	if (val != 0 && val != 1)
 		return -EINVAL;
+
+	mutex_lock(&trace_types_lock);
 	set_tracer_flags(1 << index, val);
+	mutex_unlock(&trace_types_lock);
 
 	*ppos += cnt;
 
-- 
cgit v1.2.2


From d6d1053a8bbf75e5eb6ef29ddcf87e66421763c4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 1 Mar 2013 14:12:01 +0100
Subject: clk: vt8500: Fix "fix device clock divisor calculations"

Patch 72480014b8 "Fix device clock divisor calculations" was apparently
rebased incorrectly before it got upstream, causing a build error.

Replacing the "prate" pointer with the local parent_rate is most
likely the correct solution.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Tony Prisk <linux@prisktech.co.nz>
Cc: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-vt8500.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/clk-vt8500.c b/drivers/clk/clk-vt8500.c
index b5538bba7a10..09c63315e579 100644
--- a/drivers/clk/clk-vt8500.c
+++ b/drivers/clk/clk-vt8500.c
@@ -157,7 +157,7 @@ static int vt8500_dclk_set_rate(struct clk_hw *hw, unsigned long rate,
 	divisor =  parent_rate / rate;
 
 	/* If prate / rate would be decimal, incr the divisor */
-	if (rate * divisor < *prate)
+	if (rate * divisor < parent_rate)
 		divisor++;
 
 	if (divisor == cdev->div_mask + 1)
-- 
cgit v1.2.2


From 01ffe957e2f0340a13fd40a6577d029f252ad7c7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Feb 2013 23:11:33 +0100
Subject: [media] s5p-fimc: fix s5pv210 build

56bc911 "[media] s5p-fimc: Redefine platform data structure for fimc-is"
changed the bus_type member of struct fimc_source_info treewide, but
got one instance wrong in mach-s5pv210, which was evidently not
even build tested.

This adds the missing change to get s5pv210_defconfig to build again.
Applies on the Mauro's media tree.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Sylwester Nawrocki <s.nawrocki@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/arm/mach-s5pv210/mach-goni.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index 3a38f7b34b94..e373de44a8b6 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -845,7 +845,7 @@ static struct fimc_source_info goni_camera_sensors[] = {
 		.mux_id		= 0,
 		.flags		= V4L2_MBUS_PCLK_SAMPLE_FALLING |
 				  V4L2_MBUS_VSYNC_ACTIVE_LOW,
-		.bus_type	= FIMC_BUS_TYPE_ITU_601,
+		.fimc_bus_type	= FIMC_BUS_TYPE_ITU_601,
 		.board_info	= &noon010pc30_board_info,
 		.i2c_bus_num	= 0,
 		.clk_frequency	= 16000000UL,
-- 
cgit v1.2.2


From 6fdd496e07f511a94ba27e4a1433038b32d6af05 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 Feb 2013 17:11:09 +0100
Subject: input/joystick: use get_cycles on ARM

ARM normally has an accurate clock source, so
we can theoretically use analog joysticks more
accurately and at the same time avoid the
build warning

 #warning Precise timer not defined for this architecture.

from the joystick driver.

Now, why anybody would use that driver no ARM I have no
idea, but Ben Dooks enabled it in the s3c2410_defconfig
along with a bunch of other drivers, even though that
platform has neither ISA nor PCI support. It still
seems to be the right thing to fix this quirk.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Vojtech Pavlik <vojtech@suse.cz>
Cc: Ben Dooks <ben-linux@fluff.org>
---
 drivers/input/joystick/analog.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 7cd74e29cbc8..9135606c8649 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -158,14 +158,10 @@ static unsigned int get_time_pit(void)
 #define GET_TIME(x)	rdtscl(x)
 #define DELTA(x,y)	((y)-(x))
 #define TIME_NAME	"TSC"
-#elif defined(__alpha__)
+#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_TILE)
 #define GET_TIME(x)	do { x = get_cycles(); } while (0)
 #define DELTA(x,y)	((y)-(x))
-#define TIME_NAME	"PCC"
-#elif defined(CONFIG_MN10300) || defined(CONFIG_TILE)
-#define GET_TIME(x)	do { x = get_cycles(); } while (0)
-#define DELTA(x, y)	((x) - (y))
-#define TIME_NAME	"TSC"
+#define TIME_NAME	"get_cycles"
 #else
 #define FAKE_TIME
 static unsigned long analog_faketime = 0;
-- 
cgit v1.2.2


From 80902822658aab18330569587cdb69ac1dfdcea8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 14 Mar 2013 14:20:54 -0400
Subject: tracing: Keep overwrite in sync between regular and snapshot buffers

Changing the overwrite mode for the ring buffer via the trace
option only sets the normal buffer. But the snapshot buffer could
swap with it, and then the snapshot would be in non overwrite mode
and the normal buffer would be in overwrite mode, even though the
option flag states otherwise.

Keep the two buffers overwrite modes in sync.

Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 00daf5f8c50b..02debabe9ed4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2895,8 +2895,12 @@ static void set_tracer_flags(unsigned int mask, int enabled)
 	if (mask == TRACE_ITER_RECORD_CMD)
 		trace_event_enable_cmd_record(enabled);
 
-	if (mask == TRACE_ITER_OVERWRITE)
+	if (mask == TRACE_ITER_OVERWRITE) {
 		ring_buffer_change_overwrite(global_trace.buffer, enabled);
+#ifdef CONFIG_TRACER_MAX_TRACE
+		ring_buffer_change_overwrite(max_tr.buffer, enabled);
+#endif
+	}
 
 	if (mask == TRACE_ITER_PRINTK)
 		trace_printk_start_stop_comm(enabled);
-- 
cgit v1.2.2


From 613f04a0f51e6e68ac6fe571ab79da3c0a5eb4da Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 14 Mar 2013 15:03:53 -0400
Subject: tracing: Prevent buffer overwrite disabled for latency tracers

The latency tracers require the buffers to be in overwrite mode,
otherwise they get screwed up. Force the buffers to stay in overwrite
mode when latency tracers are enabled.

Added a flag_changed() method to the tracer structure to allow
the tracers to see what flags are being changed, and also be able
to prevent the change from happing.

Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c              | 38 ++++++++++++++++++++++++++++++++------
 kernel/trace/trace.h              |  6 ++++++
 kernel/trace/trace_irqsoff.c      | 19 ++++++++++++++-----
 kernel/trace/trace_sched_wakeup.c | 18 +++++++++++++-----
 4 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 02debabe9ed4..4f1dade56981 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2881,11 +2881,25 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
 	return -EINVAL;
 }
 
-static void set_tracer_flags(unsigned int mask, int enabled)
+/* Some tracers require overwrite to stay enabled */
+int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
+{
+	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
+		return -1;
+
+	return 0;
+}
+
+int set_tracer_flag(unsigned int mask, int enabled)
 {
 	/* do nothing if flag is already set */
 	if (!!(trace_flags & mask) == !!enabled)
-		return;
+		return 0;
+
+	/* Give the tracer a chance to approve the change */
+	if (current_trace->flag_changed)
+		if (current_trace->flag_changed(current_trace, mask, !!enabled))
+			return -EINVAL;
 
 	if (enabled)
 		trace_flags |= mask;
@@ -2904,13 +2918,15 @@ static void set_tracer_flags(unsigned int mask, int enabled)
 
 	if (mask == TRACE_ITER_PRINTK)
 		trace_printk_start_stop_comm(enabled);
+
+	return 0;
 }
 
 static int trace_set_options(char *option)
 {
 	char *cmp;
 	int neg = 0;
-	int ret = 0;
+	int ret = -ENODEV;
 	int i;
 
 	cmp = strstrip(option);
@@ -2924,7 +2940,7 @@ static int trace_set_options(char *option)
 
 	for (i = 0; trace_options[i]; i++) {
 		if (strcmp(cmp, trace_options[i]) == 0) {
-			set_tracer_flags(1 << i, !neg);
+			ret = set_tracer_flag(1 << i, !neg);
 			break;
 		}
 	}
@@ -2943,6 +2959,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 			size_t cnt, loff_t *ppos)
 {
 	char buf[64];
+	int ret;
 
 	if (cnt >= sizeof(buf))
 		return -EINVAL;
@@ -2952,7 +2969,9 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 
 	buf[cnt] = 0;
 
-	trace_set_options(buf);
+	ret = trace_set_options(buf);
+	if (ret < 0)
+		return ret;
 
 	*ppos += cnt;
 
@@ -3256,6 +3275,9 @@ static int tracing_set_tracer(const char *buf)
 		goto out;
 
 	trace_branch_disable();
+
+	current_trace->enabled = false;
+
 	if (current_trace->reset)
 		current_trace->reset(tr);
 
@@ -3300,6 +3322,7 @@ static int tracing_set_tracer(const char *buf)
 	}
 
 	current_trace = t;
+	current_trace->enabled = true;
 	trace_branch_enable(tr);
  out:
 	mutex_unlock(&trace_types_lock);
@@ -4788,9 +4811,12 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		return -EINVAL;
 
 	mutex_lock(&trace_types_lock);
-	set_tracer_flags(1 << index, val);
+	ret = set_tracer_flag(1 << index, val);
 	mutex_unlock(&trace_types_lock);
 
+	if (ret < 0)
+		return ret;
+
 	*ppos += cnt;
 
 	return cnt;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 57d7e5397d56..2081971367ea 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -283,11 +283,15 @@ struct tracer {
 	enum print_line_t	(*print_line)(struct trace_iterator *iter);
 	/* If you handled the flag setting, return 0 */
 	int			(*set_flag)(u32 old_flags, u32 bit, int set);
+	/* Return 0 if OK with change, else return non-zero */
+	int			(*flag_changed)(struct tracer *tracer,
+						u32 mask, int set);
 	struct tracer		*next;
 	struct tracer_flags	*flags;
 	bool			print_max;
 	bool			use_max_tr;
 	bool			allocated_snapshot;
+	bool			enabled;
 };
 
 
@@ -943,6 +947,8 @@ extern const char *__stop___trace_bprintk_fmt[];
 
 void trace_printk_init_buffers(void);
 void trace_printk_start_comm(void);
+int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
+int set_tracer_flag(unsigned int mask, int enabled);
 
 #undef FTRACE_ENTRY
 #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)	\
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 713a2cac4881..443b25b43b4f 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -32,7 +32,7 @@ enum {
 
 static int trace_type __read_mostly;
 
-static int save_lat_flag;
+static int save_flags;
 
 static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
 static int start_irqsoff_tracer(struct trace_array *tr, int graph);
@@ -558,8 +558,11 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
 
 static void __irqsoff_tracer_init(struct trace_array *tr)
 {
-	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
-	trace_flags |= TRACE_ITER_LATENCY_FMT;
+	save_flags = trace_flags;
+
+	/* non overwrite screws up the latency tracers */
+	set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
+	set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
 
 	tracing_max_latency = 0;
 	irqsoff_trace = tr;
@@ -573,10 +576,13 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
 
 static void irqsoff_tracer_reset(struct trace_array *tr)
 {
+	int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
+	int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
+
 	stop_irqsoff_tracer(tr, is_graph());
 
-	if (!save_lat_flag)
-		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
+	set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
+	set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
 }
 
 static void irqsoff_tracer_start(struct trace_array *tr)
@@ -609,6 +615,7 @@ static struct tracer irqsoff_tracer __read_mostly =
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
 	.set_flag	= irqsoff_set_flag,
+	.flag_changed	= trace_keep_overwrite,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_irqsoff,
 #endif
@@ -642,6 +649,7 @@ static struct tracer preemptoff_tracer __read_mostly =
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
 	.set_flag	= irqsoff_set_flag,
+	.flag_changed	= trace_keep_overwrite,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_preemptoff,
 #endif
@@ -677,6 +685,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
 	.print_line     = irqsoff_print_line,
 	.flags		= &tracer_flags,
 	.set_flag	= irqsoff_set_flag,
+	.flag_changed	= trace_keep_overwrite,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_preemptirqsoff,
 #endif
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 75aa97fbe1a1..fde652c9a511 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -36,7 +36,7 @@ static void __wakeup_reset(struct trace_array *tr);
 static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
 static void wakeup_graph_return(struct ftrace_graph_ret *trace);
 
-static int save_lat_flag;
+static int save_flags;
 
 #define TRACE_DISPLAY_GRAPH     1
 
@@ -540,8 +540,11 @@ static void stop_wakeup_tracer(struct trace_array *tr)
 
 static int __wakeup_tracer_init(struct trace_array *tr)
 {
-	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
-	trace_flags |= TRACE_ITER_LATENCY_FMT;
+	save_flags = trace_flags;
+
+	/* non overwrite screws up the latency tracers */
+	set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
+	set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
 
 	tracing_max_latency = 0;
 	wakeup_trace = tr;
@@ -563,12 +566,15 @@ static int wakeup_rt_tracer_init(struct trace_array *tr)
 
 static void wakeup_tracer_reset(struct trace_array *tr)
 {
+	int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
+	int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
+
 	stop_wakeup_tracer(tr);
 	/* make sure we put back any tasks we are tracing */
 	wakeup_reset(tr);
 
-	if (!save_lat_flag)
-		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
+	set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
+	set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
 }
 
 static void wakeup_tracer_start(struct trace_array *tr)
@@ -594,6 +600,7 @@ static struct tracer wakeup_tracer __read_mostly =
 	.print_line	= wakeup_print_line,
 	.flags		= &tracer_flags,
 	.set_flag	= wakeup_set_flag,
+	.flag_changed	= trace_keep_overwrite,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_wakeup,
 #endif
@@ -615,6 +622,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
 	.print_line	= wakeup_print_line,
 	.flags		= &tracer_flags,
 	.set_flag	= wakeup_set_flag,
+	.flag_changed	= trace_keep_overwrite,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_wakeup,
 #endif
-- 
cgit v1.2.2


From e71dc5f787f3908b8b8158103fc7d74a78a22cd1 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Fri, 15 Mar 2013 16:27:53 +0800
Subject: ARM: mmp: add platform_device head file in gplugd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

arch/arm/mach-mmp/gplugd.c: In function ‘gplugd_init’:
arch/arm/mach-mmp/gplugd.c:188:2: error: implicit declaration of
function ‘platform_device_register’
[-Werror=implicit-function-declaration]
cc1: some warnings being treated as errors
make[1]: *** [arch/arm/mach-mmp/gplugd.o] Error 1
make: *** [arch/arm/mach-mmp] Error 2

So append platform_device.h to resolve build issue.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
---
 arch/arm/mach-mmp/gplugd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-mmp/gplugd.c b/arch/arm/mach-mmp/gplugd.c
index d1e2d595e79c..f62b68d926f4 100644
--- a/arch/arm/mach-mmp/gplugd.c
+++ b/arch/arm/mach-mmp/gplugd.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/platform_device.h>
 #include <linux/gpio.h>
 
 #include <asm/mach/arch.h>
-- 
cgit v1.2.2


From 8dda05ccd8a227e2b56ce6b26d52b1af88437f9e Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 4 Mar 2013 15:19:19 -0800
Subject: ARM: Scorpion is a v7 architecture, not v6

Scorpion processors have always been v7 CPUs. Fix the Kconfig
text to reflect this.

Reported-by: Stepan Moskovchenko <stepanm@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9c87fd8ed9eb..ca1b6fd94a3f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1005,12 +1005,12 @@ config ARCH_MULTI_V4_V5
 	bool
 
 config ARCH_MULTI_V6
-	bool "ARMv6 based platforms (ARM11, Scorpion, ...)"
+	bool "ARMv6 based platforms (ARM11)"
 	select ARCH_MULTI_V6_V7
 	select CPU_V6
 
 config ARCH_MULTI_V7
-	bool "ARMv7 based platforms (Cortex-A, PJ4, Krait)"
+	bool "ARMv7 based platforms (Cortex-A, PJ4, Scorpion, Krait)"
 	default y
 	select ARCH_MULTI_V6_V7
 	select ARCH_VEXPRESS
-- 
cgit v1.2.2


From 0d98da5d845e0d0293055913ce65c9904b3b902a Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Thu, 7 Mar 2013 17:20:46 +0000
Subject: netfilter: nf_conntrack: register pernet subsystem before register L4
 proto

In (c296bb4 netfilter: nf_conntrack: refactor l4proto support for netns)
the l4proto gre/dccp/udplite/sctp registration happened before the pernet
subsystem, which is wrong.

Register pernet subsystem before register L4proto since after register
L4proto, init_conntrack may try to access the resources which allocated
in register_pernet_subsys.

Reported-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_dccp.c    | 12 ++++++------
 net/netfilter/nf_conntrack_proto_gre.c     | 12 ++++++------
 net/netfilter/nf_conntrack_proto_sctp.c    | 12 ++++++------
 net/netfilter/nf_conntrack_proto_udplite.c | 12 ++++++------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 432f95780003..ba65b2041eb4 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -969,6 +969,10 @@ static int __init nf_conntrack_proto_dccp_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&dccp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
 	ret = nf_ct_l4proto_register(&dccp_proto4);
 	if (ret < 0)
 		goto out_dccp4;
@@ -977,16 +981,12 @@ static int __init nf_conntrack_proto_dccp_init(void)
 	if (ret < 0)
 		goto out_dccp6;
 
-	ret = register_pernet_subsys(&dccp_net_ops);
-	if (ret < 0)
-		goto out_pernet;
-
 	return 0;
-out_pernet:
-	nf_ct_l4proto_unregister(&dccp_proto6);
 out_dccp6:
 	nf_ct_l4proto_unregister(&dccp_proto4);
 out_dccp4:
+	unregister_pernet_subsys(&dccp_net_ops);
+out_pernet:
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index bd7d01d9c7e7..155ce9f8a0db 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -420,18 +420,18 @@ static int __init nf_ct_proto_gre_init(void)
 {
 	int ret;
 
-	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
-	if (ret < 0)
-		goto out_gre4;
-
 	ret = register_pernet_subsys(&proto_gre_net_ops);
 	if (ret < 0)
 		goto out_pernet;
 
+	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
+	if (ret < 0)
+		goto out_gre4;
+
 	return 0;
-out_pernet:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4);
 out_gre4:
+	unregister_pernet_subsys(&proto_gre_net_ops);
+out_pernet:
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 480f616d5936..ec83536def9a 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -888,6 +888,10 @@ static int __init nf_conntrack_proto_sctp_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&sctp_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
 	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4);
 	if (ret < 0)
 		goto out_sctp4;
@@ -896,16 +900,12 @@ static int __init nf_conntrack_proto_sctp_init(void)
 	if (ret < 0)
 		goto out_sctp6;
 
-	ret = register_pernet_subsys(&sctp_net_ops);
-	if (ret < 0)
-		goto out_pernet;
-
 	return 0;
-out_pernet:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
 out_sctp6:
 	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
 out_sctp4:
+	unregister_pernet_subsys(&sctp_net_ops);
+out_pernet:
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 157489581c31..ca969f6273f7 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -371,6 +371,10 @@ static int __init nf_conntrack_proto_udplite_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&udplite_net_ops);
+	if (ret < 0)
+		goto out_pernet;
+
 	ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4);
 	if (ret < 0)
 		goto out_udplite4;
@@ -379,16 +383,12 @@ static int __init nf_conntrack_proto_udplite_init(void)
 	if (ret < 0)
 		goto out_udplite6;
 
-	ret = register_pernet_subsys(&udplite_net_ops);
-	if (ret < 0)
-		goto out_pernet;
-
 	return 0;
-out_pernet:
-	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
 out_udplite6:
 	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
 out_udplite4:
+	unregister_pernet_subsys(&udplite_net_ops);
+out_pernet:
 	return ret;
 }
 
-- 
cgit v1.2.2


From bae99f7a1d372374aaf9ed8910f3b825da995b36 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 14 Mar 2013 06:03:18 +0000
Subject: netfilter: nfnetlink_queue: fix incorrect initialization of copy
 range field

2^16 = 0xffff, not 0xfffff (note the extra 'f'). Not dangerous since you
adjust it to min_t(data_len, skb->len) just after on.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 858fd52c1040..1cb48540f86a 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -112,7 +112,7 @@ instance_create(u_int16_t queue_num, int portid)
 	inst->queue_num = queue_num;
 	inst->peer_portid = portid;
 	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
-	inst->copy_range = 0xfffff;
+	inst->copy_range = 0xffff;
 	inst->copy_mode = NFQNL_COPY_NONE;
 	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
-- 
cgit v1.2.2


From a82783c91d5dce680dbd290ebf301a520b0e72a5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Mar 2013 20:11:01 +0000
Subject: netfilter: ip6t_NPT: restrict to mangle table

As the translation is stateless, using it in nat table
doesn't work (only initial packet is translated).
filter table OUTPUT works but won't re-route the packet after translation.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/ip6t_NPT.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index 83acc1405a18..33608c610276 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -114,6 +114,7 @@ ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
 static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
 	{
 		.name		= "SNPT",
+		.table		= "mangle",
 		.target		= ip6t_snpt_tg,
 		.targetsize	= sizeof(struct ip6t_npt_tginfo),
 		.checkentry	= ip6t_npt_checkentry,
@@ -124,6 +125,7 @@ static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
 	},
 	{
 		.name		= "DNPT",
+		.table		= "mangle",
 		.target		= ip6t_dnpt_tg,
 		.targetsize	= sizeof(struct ip6t_npt_tginfo),
 		.checkentry	= ip6t_npt_checkentry,
-- 
cgit v1.2.2


From d97e74976982a35168c7f131cce0d93537337a26 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Thu, 14 Mar 2013 05:12:01 +0000
Subject: net: fec: restart the FEC when PHY speed changes

Proviously we would only restart the FEC when PHY link or duplex state
changed. PHY does not always bring down the link for speed changes, in
which case we would not detect any change and keep FEC running.

Switching link speed without restarting the FEC results in the FEC being
stuck in an indefinite state, generating error conditions for every
packet.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.c | 26 +++++++++++++++-----------
 drivers/net/ethernet/freescale/fec.h |  1 +
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index 069a155d16ed..61d2e6202bf3 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -934,24 +934,28 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 		goto spin_unlock;
 	}
 
-	/* Duplex link change */
 	if (phy_dev->link) {
-		if (fep->full_duplex != phy_dev->duplex) {
-			fec_restart(ndev, phy_dev->duplex);
-			/* prevent unnecessary second fec_restart() below */
+		if (!fep->link) {
 			fep->link = phy_dev->link;
 			status_change = 1;
 		}
-	}
 
-	/* Link on or off change */
-	if (phy_dev->link != fep->link) {
-		fep->link = phy_dev->link;
-		if (phy_dev->link)
+		if (fep->full_duplex != phy_dev->duplex)
+			status_change = 1;
+
+		if (phy_dev->speed != fep->speed) {
+			fep->speed = phy_dev->speed;
+			status_change = 1;
+		}
+
+		/* if any of the above changed restart the FEC */
+		if (status_change)
 			fec_restart(ndev, phy_dev->duplex);
-		else
+	} else {
+		if (fep->link) {
 			fec_stop(ndev);
-		status_change = 1;
+			status_change = 1;
+		}
 	}
 
 spin_unlock:
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index f5390071efd0..eb4372962839 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -240,6 +240,7 @@ struct fec_enet_private {
 	phy_interface_t	phy_interface;
 	int	link;
 	int	full_duplex;
+	int	speed;
 	struct	completion mdio_done;
 	int	irq[FEC_IRQ_NUM];
 	int	bufdesc_ex;
-- 
cgit v1.2.2


From 3f104c38259dcb3e5443c246f0805bc04d887cc3 Mon Sep 17 00:00:00 2001
From: Georg Hofmann <georg@hofmannsweb.com>
Date: Thu, 14 Mar 2013 06:54:09 +0000
Subject: net: fec: fix missing napi_disable call

Commit dc975382d2ef36be7e78fac3717927de1a5abcd8 introduces napi support
but never calls napi_disable. This will generate a kernel oops
(kernel BUG at include/linux/netdevice.h:473!) every time, when
ndo_stop is called followed by ndo_start.
Add the missing napi_diable call.

Signed-off-by: Georg Hofmann <georg@hofmannsweb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index 61d2e6202bf3..e3f39372ce25 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -1441,6 +1441,7 @@ fec_enet_close(struct net_device *ndev)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 
 	/* Don't know what to do yet. */
+	napi_disable(&fep->napi);
 	fep->opened = 0;
 	netif_stop_queue(ndev);
 	fec_stop(ndev);
-- 
cgit v1.2.2


From 1e731cb986d564c4938bcba89ff5f4aea1d8e2fb Mon Sep 17 00:00:00 2001
From: Robert de Vries <rhdv@xs4all.nl>
Date: Thu, 14 Mar 2013 09:29:06 +0000
Subject: smsc75xx: configuration help incorrectly mentions smsc95xx

The Kconfig file help information incorrectly mentions that the
SMSC LAN75xx config option is for SMSC LAN95xx devices.

Signed-off-by: Robert de Vries <rhdv@xs4all.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index 3b6e9b83342d..7c769d8e25ad 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -268,7 +268,7 @@ config USB_NET_SMSC75XX
 	select CRC16
 	select CRC32
 	help
-	  This option adds support for SMSC LAN95XX based USB 2.0
+	  This option adds support for SMSC LAN75XX based USB 2.0
 	  Gigabit Ethernet adapters.
 
 config USB_NET_SMSC95XX
-- 
cgit v1.2.2


From aaa0c23cb90141309f5076ba5e3bfbd39544b985 Mon Sep 17 00:00:00 2001
From: Zhouyi Zhou <zhouzhouyi@gmail.com>
Date: Thu, 14 Mar 2013 17:21:50 +0000
Subject: Fix dst_neigh_lookup/dst_neigh_lookup_skb return value handling bug

When neighbour table is full, dst_neigh_lookup/dst_neigh_lookup_skb will return
-ENOBUFS which is absolutely non zero, while all the code in kernel which use
above functions assume failure only on zero return which will cause panic. (for
example: : https://bugzilla.kernel.org/show_bug.cgi?id=54731).

This patch corrects above error with smallest changes to kernel source code and
also correct two return value check missing bugs in drivers/infiniband/hw/cxgb4/cm.c

Tested on my x86_64 SMP machine

Reported-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Tested-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/cxgb4/cm.c | 12 ++++++++++++
 include/net/dst.h                |  6 ++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 565bfb161c1a..a3fde52840ca 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1575,6 +1575,12 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
 
 	neigh = dst_neigh_lookup(ep->dst,
 			&ep->com.cm_id->remote_addr.sin_addr.s_addr);
+	if (!neigh) {
+		pr_err("%s - cannot alloc neigh.\n", __func__);
+		err = -ENOMEM;
+		goto fail4;
+	}
+
 	/* get a l2t entry */
 	if (neigh->dev->flags & IFF_LOOPBACK) {
 		PDBG("%s LOOPBACK\n", __func__);
@@ -3053,6 +3059,12 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
 	dst = &rt->dst;
 	neigh = dst_neigh_lookup_skb(dst, skb);
 
+	if (!neigh) {
+		pr_err("%s - failed to allocate neigh!\n",
+		       __func__);
+		goto free_dst;
+	}
+
 	if (neigh->dev->flags & IFF_LOOPBACK) {
 		pdev = ip_dev_find(&init_net, iph->daddr);
 		e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
diff --git a/include/net/dst.h b/include/net/dst.h
index 853cda11e518..1f8fd109e225 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -413,13 +413,15 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
 
 static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 {
-	return dst->ops->neigh_lookup(dst, NULL, daddr);
+	struct neighbour *n = dst->ops->neigh_lookup(dst, NULL, daddr);
+	return IS_ERR(n) ? NULL : n;
 }
 
 static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
 						     struct sk_buff *skb)
 {
-	return dst->ops->neigh_lookup(dst, skb, NULL);
+	struct neighbour *n =  dst->ops->neigh_lookup(dst, skb, NULL);
+	return IS_ERR(n) ? NULL : n;
 }
 
 static inline void dst_link_failure(struct sk_buff *skb)
-- 
cgit v1.2.2


From 5dc2eb7da1e387e31ce54f54af580c6a6f512ca6 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 26 Feb 2013 10:55:18 +0100
Subject: ARM: i.MX35: enable MAX clock

The i.MX35 has two bits per clock gate which are decoded as follows:
      0b00 -> clock off
      0b01 -> clock is on in run mode, off in wait/doze
      0b10 -> clock is on in run/wait mode, off in doze
      0b11 -> clock is always on

The reset value for the MAX clock is 0b10.

The MAX clock is needed by the SoC, yet unused in the Kernel, so the
common clock framework will disable it during late init time. It will
only disable clocks though which it detects as being turned on. This
detection is made depending on the lower bit of the gate. If the reset
value has been altered by the bootloader to 0b11 the clock framework
will detect the clock as turned on, yet unused, hence it will turn it
off and the system locks up.

This patch turns the MAX clock on unconditionally making the Kernel
independent of the bootloader.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 arch/arm/mach-imx/clk-imx35.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-imx/clk-imx35.c b/arch/arm/mach-imx/clk-imx35.c
index 74e3a34d78b8..e13a8fa5e62c 100644
--- a/arch/arm/mach-imx/clk-imx35.c
+++ b/arch/arm/mach-imx/clk-imx35.c
@@ -264,6 +264,7 @@ int __init mx35_clocks_init(void)
 	clk_prepare_enable(clk[gpio3_gate]);
 	clk_prepare_enable(clk[iim_gate]);
 	clk_prepare_enable(clk[emi_gate]);
+	clk_prepare_enable(clk[max_gate]);
 
 	/*
 	 * SCC is needed to boot via mmc after a watchdog reset. The clock code
-- 
cgit v1.2.2


From d66629c1325399cf080ba8b2fb086c10e5439cdd Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Fri, 15 Mar 2013 11:00:39 +0800
Subject: Bluetooth: Add support for Dell[QCA 0cf3:0036]

Add support for the AR9462 chip

T:  Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#=  3 Spd=12   MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=0cf3 ProdID=0036 Rev= 0.02
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
A:  FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=01
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms

Cc: <stable@vger.kernel.org>
Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 3095d2e74f24..0a6ef6b694d4 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -73,6 +73,7 @@ static struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x03F0, 0x311D) },
 
 	/* Atheros AR3012 with sflash firmware*/
+	{ USB_DEVICE(0x0CF3, 0x0036) },
 	{ USB_DEVICE(0x0CF3, 0x3004) },
 	{ USB_DEVICE(0x0CF3, 0x3008) },
 	{ USB_DEVICE(0x0CF3, 0x311D) },
@@ -107,6 +108,7 @@ MODULE_DEVICE_TABLE(usb, ath3k_table);
 static struct usb_device_id ath3k_blist_tbl[] = {
 
 	/* Atheros AR3012 with sflash firmware*/
+	{ USB_DEVICE(0x0CF3, 0x0036), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index e547851870e7..11ac3036bb8a 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -131,6 +131,7 @@ static struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE },
 
 	/* Atheros 3012 with sflash firmware */
+	{ USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
-- 
cgit v1.2.2


From f7db706b132f11c79ae1d74b2382e0926cf31644 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 14 Mar 2013 10:03:03 +0100
Subject: ARM: 7674/1: smp: Avoid dummy clockevent being preferred over real
 hardware clock-event

With recent arm broadcast time clean-up from Mark Rutland, the dummy
broadcast device is always registered with timer subsystem. And since
the rating of the dummy clock event is very high, it may be preferred
over a real clock event.

This is a change in behavior from past and not an intended one. So
reduce the rating of the dummy clock-event so that real clock-event
device is selected when available.

Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 31644f1978d5..79078edbb9bc 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -480,7 +480,7 @@ static void __cpuinit broadcast_timer_setup(struct clock_event_device *evt)
 	evt->features	= CLOCK_EVT_FEAT_ONESHOT |
 			  CLOCK_EVT_FEAT_PERIODIC |
 			  CLOCK_EVT_FEAT_DUMMY;
-	evt->rating	= 400;
+	evt->rating	= 100;
 	evt->mult	= 1;
 	evt->set_mode	= broadcast_timer_set_mode;
 
-- 
cgit v1.2.2


From 2c36af0e559c0a0674ad846527116df41aa5f612 Mon Sep 17 00:00:00 2001
From: Hiroshi Doyu <hdoyu@nvidia.com>
Date: Fri, 15 Mar 2013 07:54:11 +0100
Subject: ARM: 7675/1: amba: tegra-ahb: Fix build error w/ PM_SLEEP w/o
 PM_RUNTIME

Make this depend on CONFIG_PM. This protection is necessary to not
cause any build errors with any combination of PM features especially
when supporting a new SoC where each PM features are being enabled
one-by-one during its depelopment.

Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/amba/tegra-ahb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c
index 093c43554963..1f44e56cc65d 100644
--- a/drivers/amba/tegra-ahb.c
+++ b/drivers/amba/tegra-ahb.c
@@ -158,7 +158,7 @@ int tegra_ahb_enable_smmu(struct device_node *dn)
 EXPORT_SYMBOL(tegra_ahb_enable_smmu);
 #endif
 
-#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_PM
 static int tegra_ahb_suspend(struct device *dev)
 {
 	int i;
-- 
cgit v1.2.2


From 7cb035d9e619a8d20f5d3b9791f8cb5160d19e70 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sun, 10 Mar 2013 13:56:08 +0200
Subject: mei: add mei_stop function to stop mei device

mei_stop calls mei_reset with disabling the interrupts.
It will have the same effect as the open code it replaces in the mei_remove.

The reset sequence on remove is required for the Lynx Point LP devices
to clean the reset state.

mei_stop is called from mei_pci_suspend and mei_remove functions

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/init.c    | 18 ++++++++++++++++
 drivers/misc/mei/mei_dev.h |  1 +
 drivers/misc/mei/pci-me.c  | 52 +++++++---------------------------------------
 3 files changed, 26 insertions(+), 45 deletions(-)

diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index 6ec530168afb..356179991a2e 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -183,6 +183,24 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled)
 	mei_cl_all_write_clear(dev);
 }
 
+void mei_stop(struct mei_device *dev)
+{
+	dev_dbg(&dev->pdev->dev, "stopping the device.\n");
+
+	mutex_lock(&dev->device_lock);
+
+	cancel_delayed_work(&dev->timer_work);
+
+	mei_wd_stop(dev);
+
+	dev->dev_state = MEI_DEV_POWER_DOWN;
+	mei_reset(dev, 0);
+
+	mutex_unlock(&dev->device_lock);
+
+	flush_scheduled_work();
+}
+
 
 
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index cb80166161f0..97873812e33b 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -381,6 +381,7 @@ static inline unsigned long mei_secs_to_jiffies(unsigned long sec)
 void mei_device_init(struct mei_device *dev);
 void mei_reset(struct mei_device *dev, int interrupts);
 int mei_hw_init(struct mei_device *dev);
+void mei_stop(struct mei_device *dev);
 
 /*
  *  MEI interrupt functions prototype
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index b40ec0601ab0..b8b5c9c3ad03 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -247,44 +247,14 @@ static void mei_remove(struct pci_dev *pdev)
 
 	hw = to_me_hw(dev);
 
-	mutex_lock(&dev->device_lock);
-
-	cancel_delayed_work(&dev->timer_work);
 
-	mei_wd_stop(dev);
+	dev_err(&pdev->dev, "stop\n");
+	mei_stop(dev);
 
 	mei_pdev = NULL;
 
-	if (dev->iamthif_cl.state == MEI_FILE_CONNECTED) {
-		dev->iamthif_cl.state = MEI_FILE_DISCONNECTING;
-		mei_cl_disconnect(&dev->iamthif_cl);
-	}
-	if (dev->wd_cl.state == MEI_FILE_CONNECTED) {
-		dev->wd_cl.state = MEI_FILE_DISCONNECTING;
-		mei_cl_disconnect(&dev->wd_cl);
-	}
-
-	/* Unregistering watchdog device */
 	mei_watchdog_unregister(dev);
 
-	/* remove entry if already in list */
-	dev_dbg(&pdev->dev, "list del iamthif and wd file list.\n");
-
-	if (dev->open_handle_count > 0)
-		dev->open_handle_count--;
-	mei_cl_unlink(&dev->wd_cl);
-
-	if (dev->open_handle_count > 0)
-		dev->open_handle_count--;
-	mei_cl_unlink(&dev->iamthif_cl);
-
-	dev->iamthif_current_cb = NULL;
-	dev->me_clients_num = 0;
-
-	mutex_unlock(&dev->device_lock);
-
-	flush_scheduled_work();
-
 	/* disable interrupts */
 	mei_disable_interrupts(dev);
 
@@ -308,28 +278,20 @@ static int mei_pci_suspend(struct device *device)
 {
 	struct pci_dev *pdev = to_pci_dev(device);
 	struct mei_device *dev = pci_get_drvdata(pdev);
-	int err;
 
 	if (!dev)
 		return -ENODEV;
-	mutex_lock(&dev->device_lock);
 
-	cancel_delayed_work(&dev->timer_work);
+	dev_err(&pdev->dev, "suspend\n");
 
-	/* Stop watchdog if exists */
-	err = mei_wd_stop(dev);
-	/* Set new mei state */
-	if (dev->dev_state == MEI_DEV_ENABLED ||
-	    dev->dev_state == MEI_DEV_RECOVERING_FROM_RESET) {
-		dev->dev_state = MEI_DEV_POWER_DOWN;
-		mei_reset(dev, 0);
-	}
-	mutex_unlock(&dev->device_lock);
+	mei_stop(dev);
+
+	mei_disable_interrupts(dev);
 
 	free_irq(pdev->irq, dev);
 	pci_disable_msi(pdev);
 
-	return err;
+	return 0;
 }
 
 static int mei_pci_resume(struct device *device)
-- 
cgit v1.2.2


From 68f8ea184bf7a552b59a38c4b0c7dc243822d2d5 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sun, 10 Mar 2013 13:56:07 +0200
Subject: mei: ME hardware reset needs to be synchronized

This fixes failure during initialization on Lynx Point LP devices.

ME driver needs to release the device from the reset
only after the FW has completed its flow and indicated
it by delivering an interrupt to the host.

This is the correct behavior for all the ME devices yet the
the previous versions are less susceptive to the implementation
that ignored FW reset completion indication.

We add mei_me_hw_reset_release function which is called
after reset from the interrupt thread or directly
from mei_reset during power down.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/hw-me.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 45ea7185c003..642c6223fa6c 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -151,6 +151,20 @@ static void mei_me_intr_disable(struct mei_device *dev)
 	mei_hcsr_set(hw, hcsr);
 }
 
+/**
+ * mei_me_hw_reset_release - release device from the reset
+ *
+ * @dev: the device structure
+ */
+static void mei_me_hw_reset_release(struct mei_device *dev)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+	u32 hcsr = mei_hcsr_read(hw);
+
+	hcsr |= H_IG;
+	hcsr &= ~H_RST;
+	mei_hcsr_set(hw, hcsr);
+}
 /**
  * mei_me_hw_reset - resets fw via mei csr register.
  *
@@ -169,18 +183,14 @@ static void mei_me_hw_reset(struct mei_device *dev, bool intr_enable)
 	if (intr_enable)
 		hcsr |= H_IE;
 	else
-		hcsr &= ~H_IE;
-
-	mei_hcsr_set(hw, hcsr);
-
-	hcsr = mei_hcsr_read(hw) | H_IG;
-	hcsr &= ~H_RST;
+		hcsr |= ~H_IE;
 
 	mei_hcsr_set(hw, hcsr);
 
-	hcsr = mei_hcsr_read(hw);
+	if (dev->dev_state == MEI_DEV_POWER_DOWN)
+		mei_me_hw_reset_release(dev);
 
-	dev_dbg(&dev->pdev->dev, "current HCSR = 0x%08x.\n", hcsr);
+	dev_dbg(&dev->pdev->dev, "current HCSR = 0x%08x.\n", mei_hcsr_read(hw));
 }
 
 /**
@@ -466,7 +476,8 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
 			mutex_unlock(&dev->device_lock);
 			return IRQ_HANDLED;
 		} else {
-			dev_dbg(&dev->pdev->dev, "FW not ready.\n");
+			dev_dbg(&dev->pdev->dev, "Reset Completed.\n");
+			mei_me_hw_reset_release(dev);
 			mutex_unlock(&dev->device_lock);
 			return IRQ_HANDLED;
 		}
-- 
cgit v1.2.2


From 25e9789ddd9d14a8971f4a421d04f282719ab733 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 15 Mar 2013 12:58:20 -0600
Subject: vfio: include <linux/slab.h> for kmalloc

The vfio drivers call kmalloc or kzalloc, but do not
include <linux/slab.h>, which causes build errors on
ARM.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: kvm@vger.kernel.org
---
 drivers/vfio/pci/vfio_pci_config.c | 1 +
 drivers/vfio/pci/vfio_pci_intrs.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 964ff22bf281..aeb00fc2d3be 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -27,6 +27,7 @@
 #include <linux/pci.h>
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
+#include <linux/slab.h>
 
 #include "vfio_pci_private.h"
 
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 3639371fa697..a96509187deb 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -22,6 +22,7 @@
 #include <linux/vfio.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <linux/slab.h>
 
 #include "vfio_pci_private.h"
 
-- 
cgit v1.2.2


From 00eed9c814cb8f281be6f0f5d8f45025dc0a97eb Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 4 Mar 2013 17:14:43 +0100
Subject: USB: xhci: correctly enable interrupts

xhci has its own interrupt enabling routine, which will try to
use MSI-X/MSI if present. So the usb core shouldn't try to enable
legacy interrupts; on some machines the xhci legacy IRQ setting
is invalid.

v3: Be careful to not break XHCI_BROKEN_MSI workaround (by trenn)

Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Oliver Neukum <oneukum@suse.de>
Cc: Thomas Renninger <trenn@suse.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Frederik Himpe <fhimpe@vub.ac.be>
Cc: David Haerdeman <david@hardeman.nu>
Cc: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Reviewed-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd-pci.c | 23 ++++++++++++++---------
 drivers/usb/host/xhci.c    |  3 ++-
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 622b4a48e732..2b487d4797bd 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -173,6 +173,7 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	struct hc_driver	*driver;
 	struct usb_hcd		*hcd;
 	int			retval;
+	int			hcd_irq = 0;
 
 	if (usb_disabled())
 		return -ENODEV;
@@ -187,15 +188,19 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return -ENODEV;
 	dev->current_state = PCI_D0;
 
-	/* The xHCI driver supports MSI and MSI-X,
-	 * so don't fail if the BIOS doesn't provide a legacy IRQ.
+	/*
+	 * The xHCI driver has its own irq management
+	 * make sure irq setup is not touched for xhci in generic hcd code
 	 */
-	if (!dev->irq && (driver->flags & HCD_MASK) != HCD_USB3) {
-		dev_err(&dev->dev,
-			"Found HC with no IRQ.  Check BIOS/PCI %s setup!\n",
-			pci_name(dev));
-		retval = -ENODEV;
-		goto disable_pci;
+	if ((driver->flags & HCD_MASK) != HCD_USB3) {
+		if (!dev->irq) {
+			dev_err(&dev->dev,
+			"Found HC with no IRQ. Check BIOS/PCI %s setup!\n",
+				pci_name(dev));
+			retval = -ENODEV;
+			goto disable_pci;
+		}
+		hcd_irq = dev->irq;
 	}
 
 	hcd = usb_create_hcd(driver, &dev->dev, pci_name(dev));
@@ -245,7 +250,7 @@ int usb_hcd_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 	pci_set_master(dev);
 
-	retval = usb_add_hcd(hcd, dev->irq, IRQF_SHARED);
+	retval = usb_add_hcd(hcd, hcd_irq, IRQF_SHARED);
 	if (retval != 0)
 		goto unmap_registers;
 	set_hs_companion(dev, hcd);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index f1f01a834ba7..849470b18831 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -350,7 +350,7 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd)
 	 * generate interrupts.  Don't even try to enable MSI.
 	 */
 	if (xhci->quirks & XHCI_BROKEN_MSI)
-		return 0;
+		goto legacy_irq;
 
 	/* unregister the legacy interrupt */
 	if (hcd->irq)
@@ -371,6 +371,7 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd)
 		return -EINVAL;
 	}
 
+ legacy_irq:
 	/* fall back to legacy interrupt*/
 	ret = request_irq(pdev->irq, &usb_hcd_irq, IRQF_SHARED,
 			hcd->irq_descr, hcd);
-- 
cgit v1.2.2


From 29f86e66428ee083aec106cca1748dc63d98ce23 Mon Sep 17 00:00:00 2001
From: Dmitry Artamonow <mad_soft@inbox.ru>
Date: Sat, 9 Mar 2013 20:30:58 +0400
Subject: usb-storage: add unusual_devs entry for Samsung YP-Z3 mp3 player

Device stucks on filesystem writes, unless following quirk is passed:
  echo 04e8:5136:m > /sys/module/usb_storage/parameters/quirks

Add corresponding entry to unusual_devs.h

Signed-off-by: Dmitry Artamonow <mad_soft@inbox.ru>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index da04a074e790..1799335288bd 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -496,6 +496,13 @@ UNUSUAL_DEV(  0x04e8, 0x5122, 0x0000, 0x9999,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_MAX_SECTORS_64 | US_FL_BULK_IGNORE_TAG),
 
+/* Added by Dmitry Artamonow <mad_soft@inbox.ru> */
+UNUSUAL_DEV(  0x04e8, 0x5136, 0x0000, 0x9999,
+		"Samsung",
+		"YP-Z3",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_MAX_SECTORS_64),
+
 /* Entry and supporting patch by Theodore Kilgore <kilgota@auburn.edu>.
  * Device uses standards-violating 32-byte Bulk Command Block Wrappers and
  * reports itself as "Proprietary SCSI Bulk." Cf. device entry 0x084d:0x0011.
-- 
cgit v1.2.2


From 2a40f324541ee61c22146214349c2ce9f5c30bcf Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 15 Mar 2013 14:40:26 -0400
Subject: USB: EHCI: fix regression during bus resume

This patch (as1663) fixes a regression caused by commit
6e0c3339a6f19d748f16091d0a05adeb1e1f822b (USB: EHCI: unlink one async
QH at a time).  In order to avoid keeping multiple QHs in an unusable
intermediate state, that commit changed unlink_empty_async() so that
it unlinks only one empty QH at a time.

However, when the EHCI root hub is suspended, _all_ async QHs need to
be unlinked.  ehci_bus_suspend() used to do this by calling
unlink_empty_async(), but now this only unlinks one of the QHs, not
all of them.

The symptom is that when the root hub is resumed, USB communications
don't work for some period of time.  This is because ehci-hcd doesn't
realize it needs to restart the async schedule; it assumes that
because some QHs are already on the schedule, the schedule must be
running.

The easiest way to fix the problem is add a new function that unlinks
all the async QHs when the root hub is suspended.

This patch should be applied to all kernels that have the 6e0c3339a6f1
commit.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Adrian Bassett <adrian.bassett@hotmail.co.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-hcd.c |  1 +
 drivers/usb/host/ehci-hub.c |  2 +-
 drivers/usb/host/ehci-q.c   | 13 +++++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 5726cb144abf..416a6dce5e11 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -302,6 +302,7 @@ static void ehci_quiesce (struct ehci_hcd *ehci)
 
 static void end_unlink_async(struct ehci_hcd *ehci);
 static void unlink_empty_async(struct ehci_hcd *ehci);
+static void unlink_empty_async_suspended(struct ehci_hcd *ehci);
 static void ehci_work(struct ehci_hcd *ehci);
 static void start_unlink_intr(struct ehci_hcd *ehci, struct ehci_qh *qh);
 static void end_unlink_intr(struct ehci_hcd *ehci, struct ehci_qh *qh);
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 4d3b294f203e..7d06e77f6c4f 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -328,7 +328,7 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 	ehci->rh_state = EHCI_RH_SUSPENDED;
 
 	end_unlink_async(ehci);
-	unlink_empty_async(ehci);
+	unlink_empty_async_suspended(ehci);
 	ehci_handle_intr_unlinks(ehci);
 	end_free_itds(ehci);
 
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 5464665f0b6a..23d136904285 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -1316,6 +1316,19 @@ static void unlink_empty_async(struct ehci_hcd *ehci)
 	}
 }
 
+/* The root hub is suspended; unlink all the async QHs */
+static void unlink_empty_async_suspended(struct ehci_hcd *ehci)
+{
+	struct ehci_qh		*qh;
+
+	while (ehci->async->qh_next.qh) {
+		qh = ehci->async->qh_next.qh;
+		WARN_ON(!list_empty(&qh->qtd_list));
+		single_unlink_async(ehci, qh);
+	}
+	start_iaa_cycle(ehci, false);
+}
+
 /* makes sure the async qh will become idle */
 /* caller must own ehci->lock */
 
-- 
cgit v1.2.2


From 347e0899b1c75d907f01ac883ca38d37fe9bfa42 Mon Sep 17 00:00:00 2001
From: Andy King <acking@vmware.com>
Date: Thu, 7 Mar 2013 07:29:08 -0800
Subject: VMCI: Fix process-to-process DRGAMs.

When sending between processes, we always schedule a work item.  Our work info
struct has the message embedded in the middle, which means that we end up
overwriting subsequent fields when we copy the (variable-length) message into
it.  Move it to the end of the struct.

Acked-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Andy King <acking@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/vmw_vmci/vmci_datagram.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c
index ed5c433cd493..f3cdd904fe4d 100644
--- a/drivers/misc/vmw_vmci/vmci_datagram.c
+++ b/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -42,9 +42,11 @@ struct datagram_entry {
 
 struct delayed_datagram_info {
 	struct datagram_entry *entry;
-	struct vmci_datagram msg;
 	struct work_struct work;
 	bool in_dg_host_queue;
+	/* msg and msg_payload must be together. */
+	struct vmci_datagram msg;
+	u8 msg_payload[];
 };
 
 /* Number of in-flight host->host datagrams */
-- 
cgit v1.2.2


From 503bded92da283b2f31d87e054c4c6d30c3c2340 Mon Sep 17 00:00:00 2001
From: Pawel Wieczorkiewicz <wpawel@gmail.com>
Date: Wed, 20 Feb 2013 17:26:20 +0100
Subject: tty: atmel_serial_probe(): index of atmel_ports[] fix

Index of atmel_ports[ATMEL_MAX_UART] should be smaller
than ATMEL_MAX_UART.

Signed-off-by: Pawel Wieczorkiewicz <wpawel@gmail.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index d4a7c241b751..3467462869ce 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -158,7 +158,7 @@ struct atmel_uart_port {
 };
 
 static struct atmel_uart_port atmel_ports[ATMEL_MAX_UART];
-static unsigned long atmel_ports_in_use;
+static DECLARE_BITMAP(atmel_ports_in_use, ATMEL_MAX_UART);
 
 #ifdef SUPPORT_SYSRQ
 static struct console atmel_console;
@@ -1769,15 +1769,14 @@ static int atmel_serial_probe(struct platform_device *pdev)
 	if (ret < 0)
 		/* port id not found in platform data nor device-tree aliases:
 		 * auto-enumerate it */
-		ret = find_first_zero_bit(&atmel_ports_in_use,
-				sizeof(atmel_ports_in_use));
+		ret = find_first_zero_bit(atmel_ports_in_use, ATMEL_MAX_UART);
 
-	if (ret > ATMEL_MAX_UART) {
+	if (ret >= ATMEL_MAX_UART) {
 		ret = -ENODEV;
 		goto err;
 	}
 
-	if (test_and_set_bit(ret, &atmel_ports_in_use)) {
+	if (test_and_set_bit(ret, atmel_ports_in_use)) {
 		/* port already in use */
 		ret = -EBUSY;
 		goto err;
@@ -1857,7 +1856,7 @@ static int atmel_serial_remove(struct platform_device *pdev)
 
 	/* "port" is allocated statically, so we shouldn't free it */
 
-	clear_bit(port->line, &atmel_ports_in_use);
+	clear_bit(port->line, atmel_ports_in_use);
 
 	clk_put(atmel_port->clk);
 
-- 
cgit v1.2.2


From 8b5c913f7ee6464849570bacb6bcd9ef0eaf7dce Mon Sep 17 00:00:00 2001
From: Wang YanQing <udknight@gmail.com>
Date: Tue, 5 Mar 2013 23:16:48 +0800
Subject: serial: 8250_pci: Add WCH CH352 quirk to avoid Xscale detection

The code in 8250.c for detecting ARM/XScale UARTs says:

  * Try writing and reading the UART_IER_UUE bit (b6).
  * If it works, this is probably one of the Xscale platform's
  * internal UARTs.

If the above passes, it then goes on to:

     * It's an Xscale.
     * We'll leave the UART_IER_UUE bit set to 1 (enabled).

However, the CH352 uses the UART_IER_UUE as the LOWPOWER function,
so it is readable and writable.  According to the datasheet:

    "LOWPOWER:When the bit is 1, close the internal benchmark
     clock of serial port to set into low-power status.

So it essentially gets mis-detected as Xscale, and gets
powered down in the process.  The device in question where
this was seen is listed by lspci as:

 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])

Re-using the 353 quirk which just sets flags to fixed and type
to 16550 is suitable for fixing the 352 as well.

Signed-off-by: Wang YanQing <udknight@gmail.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index aa76825229dc..26e3a97ab157 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1554,6 +1554,7 @@ pci_wch_ch353_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_PLX_CRONYX_OMEGA	0xc001
 #define PCI_DEVICE_ID_INTEL_PATSBURG_KT 0x1d3d
 #define PCI_VENDOR_ID_WCH		0x4348
+#define PCI_DEVICE_ID_WCH_CH352_2S	0x3253
 #define PCI_DEVICE_ID_WCH_CH353_4S	0x3453
 #define PCI_DEVICE_ID_WCH_CH353_2S1PF	0x5046
 #define PCI_DEVICE_ID_WCH_CH353_2S1P	0x7053
@@ -2172,6 +2173,14 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.subdevice      = PCI_ANY_ID,
 		.setup          = pci_wch_ch353_setup,
 	},
+	/* WCH CH352 2S card (16550 clone) */
+	{
+		.vendor		= PCI_VENDOR_ID_WCH,
+		.device		= PCI_DEVICE_ID_WCH_CH352_2S,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.setup		= pci_wch_ch353_setup,
+	},
 	/*
 	 * ASIX devices with FIFO bug
 	 */
@@ -4870,6 +4879,10 @@ static struct pci_device_id serial_pci_tbl[] = {
 		PCI_ANY_ID, PCI_ANY_ID,
 		0, 0, pbn_b0_bt_2_115200 },
 
+	{	PCI_VENDOR_ID_WCH, PCI_DEVICE_ID_WCH_CH352_2S,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0, pbn_b0_bt_2_115200 },
+
 	/*
 	 * Commtech, Inc. Fastcom adapters
 	 */
-- 
cgit v1.2.2


From fa3daf9aa74a3ac1c87d8188a43d283d06720032 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 11 Mar 2013 15:32:26 -0400
Subject: drm/radeon: fix S/R on VM systems (cayman/TN/SI)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We weren't properly tearing down the VM sub-alloctor
on suspend leading to bogus VM PTs on resume.

Fixes:
https://bugs.freedesktop.org/show_bug.cgi?id=60439

Reviewed-by: Christian König <christian.koenig@amd.com>
Tested-by: Dmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/ni.c | 1 +
 drivers/gpu/drm/radeon/si.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index d4c633e12863..e77c9273bc9c 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1771,6 +1771,7 @@ int cayman_resume(struct radeon_device *rdev)
 int cayman_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	radeon_vm_manager_fini(rdev);
 	cayman_cp_enable(rdev, false);
 	cayman_dma_stop(rdev);
 	evergreen_irq_suspend(rdev);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 9128120da044..bafbe3216952 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -4469,6 +4469,7 @@ int si_resume(struct radeon_device *rdev)
 
 int si_suspend(struct radeon_device *rdev)
 {
+	radeon_vm_manager_fini(rdev);
 	si_cp_enable(rdev, false);
 	cayman_dma_stop(rdev);
 	si_irq_suspend(rdev);
-- 
cgit v1.2.2


From 8f612b23a17dce86fef75407e698de6243cc99a1 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 11 Mar 2013 19:28:39 -0400
Subject: drm/radeon: fix backend map setup on 1 RB trinity boards

Need to adjust the backend map depending on which RB is
enabled.  This is the trinity equivalent of:
f7eb97300832f4fe5fe916c5d84cd2e25169330e

May fix:
https://bugs.freedesktop.org/show_bug.cgi?id=57919

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/ni.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index e77c9273bc9c..a7d3de73be04 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -616,11 +616,22 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
 
-	tmp = gb_addr_config & NUM_PIPES_MASK;
-	tmp = r6xx_remap_render_backend(rdev, tmp,
-					rdev->config.cayman.max_backends_per_se *
-					rdev->config.cayman.max_shader_engines,
-					CAYMAN_MAX_BACKENDS, disabled_rb_mask);
+	if ((rdev->config.cayman.max_backends_per_se == 1) &&
+	    (rdev->flags & RADEON_IS_IGP)) {
+		if ((disabled_rb_mask & 3) == 1) {
+			/* RB0 disabled, RB1 enabled */
+			tmp = 0x11111111;
+		} else {
+			/* RB1 disabled, RB0 enabled */
+			tmp = 0x00000000;
+		}
+	} else {
+		tmp = gb_addr_config & NUM_PIPES_MASK;
+		tmp = r6xx_remap_render_backend(rdev, tmp,
+						rdev->config.cayman.max_backends_per_se *
+						rdev->config.cayman.max_shader_engines,
+						CAYMAN_MAX_BACKENDS, disabled_rb_mask);
+	}
 	WREG32(GB_BACKEND_MAP, tmp);
 
 	cgts_tcc_disable = 0xffff0000;
-- 
cgit v1.2.2


From fa8d387dc3f62062a6b4afbbb2a3438094fd8584 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 12 Mar 2013 12:53:13 -0400
Subject: drm/radeon/benchmark: make sure bo blit copy exists before using it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes a segfault on asics without a blit callback.

Fixes:
https://bugs.freedesktop.org/show_bug.cgi?id=62239

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_benchmark.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index bedda9caadd9..a2f0c243deb2 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -135,13 +135,15 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
 						     sdomain, ddomain, "dma");
 	}
 
-	time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
-					RADEON_BENCHMARK_COPY_BLIT, n);
-	if (time < 0)
-		goto out_cleanup;
-	if (time > 0)
-		radeon_benchmark_log_results(n, size, time,
-					     sdomain, ddomain, "blit");
+	if (rdev->asic->copy.blit) {
+		time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+						RADEON_BENCHMARK_COPY_BLIT, n);
+		if (time < 0)
+			goto out_cleanup;
+		if (time > 0)
+			radeon_benchmark_log_results(n, size, time,
+						     sdomain, ddomain, "blit");
+	}
 
 out_cleanup:
 	if (sobj) {
-- 
cgit v1.2.2


From 271e53dcffa527c853b4f1b0cdedd10bef406a22 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 12 Mar 2013 12:55:56 -0400
Subject: drm/radeon/benchmark: allow same domains for dma copy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove old comment and allow benchmarking moves within the
same memory domain for both dma and blit methods.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_benchmark.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index a2f0c243deb2..6e05a2e75a46 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -122,10 +122,7 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
 		goto out_cleanup;
 	}
 
-	/* r100 doesn't have dma engine so skip the test */
-	/* also, VRAM-to-VRAM test doesn't make much sense for DMA */
-	/* skip it as well if domains are the same */
-	if ((rdev->asic->copy.dma) && (sdomain != ddomain)) {
+	if (rdev->asic->copy.dma) {
 		time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
 						RADEON_BENCHMARK_COPY_DMA, n);
 		if (time < 0)
-- 
cgit v1.2.2


From e4d170633fde379f39a90f8a5e7eb619b5d1144d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 8 Mar 2013 13:44:15 -0500
Subject: drm/radeon: add support for Richland APUs

Richland APUs are a new version of the Trinity APUs
with performance and power management improvements.

Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/ni.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index a7d3de73be04..27769e724b6d 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -468,13 +468,19 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 		    (rdev->pdev->device == 0x9907) ||
 		    (rdev->pdev->device == 0x9908) ||
 		    (rdev->pdev->device == 0x9909) ||
+		    (rdev->pdev->device == 0x990B) ||
+		    (rdev->pdev->device == 0x990C) ||
+		    (rdev->pdev->device == 0x990F) ||
 		    (rdev->pdev->device == 0x9910) ||
-		    (rdev->pdev->device == 0x9917)) {
+		    (rdev->pdev->device == 0x9917) ||
+		    (rdev->pdev->device == 0x9999)) {
 			rdev->config.cayman.max_simds_per_se = 6;
 			rdev->config.cayman.max_backends_per_se = 2;
 		} else if ((rdev->pdev->device == 0x9903) ||
 			   (rdev->pdev->device == 0x9904) ||
 			   (rdev->pdev->device == 0x990A) ||
+			   (rdev->pdev->device == 0x990D) ||
+			   (rdev->pdev->device == 0x990E) ||
 			   (rdev->pdev->device == 0x9913) ||
 			   (rdev->pdev->device == 0x9918)) {
 			rdev->config.cayman.max_simds_per_se = 4;
@@ -483,6 +489,9 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 			   (rdev->pdev->device == 0x9990) ||
 			   (rdev->pdev->device == 0x9991) ||
 			   (rdev->pdev->device == 0x9994) ||
+			   (rdev->pdev->device == 0x9995) ||
+			   (rdev->pdev->device == 0x9996) ||
+			   (rdev->pdev->device == 0x999A) ||
 			   (rdev->pdev->device == 0x99A0)) {
 			rdev->config.cayman.max_simds_per_se = 3;
 			rdev->config.cayman.max_backends_per_se = 1;
-- 
cgit v1.2.2


From b75bbaa038ffc426e88ea3df6c4ae11834fc3e4f Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 8 Mar 2013 13:36:54 -0500
Subject: drm/radeon: add Richland pci ids

Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 include/drm/drm_pciids.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index a386b0b654cc..918e8fe2f5e9 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -581,7 +581,11 @@
 	{0x1002, 0x9908, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
-	{0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x990B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x990C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x990D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x990E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9910, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9913, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9917, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
@@ -592,6 +596,13 @@
 	{0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9996, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9998, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9999, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x999A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x999B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x99A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x99A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x99A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
-- 
cgit v1.2.2


From 1eef1282549d7accdd33ee36d409b039b1f911fb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 11 Mar 2013 09:07:46 -0300
Subject: amd64_edac: Correct DIMM sizes

We were filling the csrow size with a wrong value. 16a528ee3975 ("EDAC:
Fix csrow size reported in sysfs") tried to address the issue. It fixed
the report with the old API but not with the new one. Correct it for the
new API too.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
[ make it a per-csrow accounting regardless of ->channel_count ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/amd64_edac.c    | 14 +++++++++-----
 drivers/edac/edac_mc_sysfs.c | 13 +++----------
 include/linux/edac.h         |  1 -
 3 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 910b0116c128..532de775a184 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2048,12 +2048,18 @@ static int init_csrows(struct mem_ctl_info *mci)
 		edac_dbg(1, "MC node: %d, csrow: %d\n",
 			    pvt->mc_node_id, i);
 
-		if (row_dct0)
+		if (row_dct0) {
 			nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
+			csrow->channels[0]->dimm->nr_pages = nr_pages;
+		}
 
 		/* K8 has only one DCT */
-		if (boot_cpu_data.x86 != 0xf && row_dct1)
-			nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
+		if (boot_cpu_data.x86 != 0xf && row_dct1) {
+			int row_dct1_pages = amd64_csrow_nr_pages(pvt, 1, i);
+
+			csrow->channels[1]->dimm->nr_pages = row_dct1_pages;
+			nr_pages += row_dct1_pages;
+		}
 
 		mtype = amd64_determine_memory_type(pvt, i);
 
@@ -2072,9 +2078,7 @@ static int init_csrows(struct mem_ctl_info *mci)
 			dimm = csrow->channels[j]->dimm;
 			dimm->mtype = mtype;
 			dimm->edac_mode = edac_mode;
-			dimm->nr_pages = nr_pages;
 		}
-		csrow->nr_pages = nr_pages;
 	}
 
 	return empty;
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 0cbf670efa23..4c50a4760db7 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -180,9 +180,6 @@ static ssize_t csrow_size_show(struct device *dev,
 	int i;
 	u32 nr_pages = 0;
 
-	if (csrow->mci->csbased)
-		return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages));
-
 	for (i = 0; i < csrow->nr_channels; i++)
 		nr_pages += csrow->channels[i]->dimm->nr_pages;
 	return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
@@ -778,14 +775,10 @@ static ssize_t mci_size_mb_show(struct device *dev,
 	for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
 		struct csrow_info *csrow = mci->csrows[csrow_idx];
 
-		if (csrow->mci->csbased) {
-			total_pages += csrow->nr_pages;
-		} else {
-			for (j = 0; j < csrow->nr_channels; j++) {
-				struct dimm_info *dimm = csrow->channels[j]->dimm;
+		for (j = 0; j < csrow->nr_channels; j++) {
+			struct dimm_info *dimm = csrow->channels[j]->dimm;
 
-				total_pages += dimm->nr_pages;
-			}
+			total_pages += dimm->nr_pages;
 		}
 	}
 
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 4fd4999ccb5b..ab1ea98e767c 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -561,7 +561,6 @@ struct csrow_info {
 
 	u32 ue_count;		/* Uncorrectable Errors for this csrow */
 	u32 ce_count;		/* Correctable Errors for this csrow */
-	u32 nr_pages;		/* combined pages count of all channels */
 
 	struct mem_ctl_info *mci;	/* the parent */
 
-- 
cgit v1.2.2


From 9713faecff3d071de1208b081d4943b002e9cb1c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 11 Mar 2013 09:28:48 -0300
Subject: EDAC: Merge mci.mem_is_per_rank with mci.csbased

Both mci.mem_is_per_rank and mci.csbased denote the same thing: the
memory controller is csrows based. Merge both fields into one.

There's no need for the driver to actually fill it, as the core detects
it by checking if one of the layers has the csrows type as part of the
memory hierarchy:

	if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
			per_rank = true;

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/amd64_edac.c    | 1 -
 drivers/edac/edac_mc.c       | 6 +++---
 drivers/edac/edac_mc_sysfs.c | 2 +-
 include/linux/edac.h         | 6 ++----
 4 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 532de775a184..e1d13c463c90 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2423,7 +2423,6 @@ static int amd64_init_one_instance(struct pci_dev *F2)
 
 	mci->pvt_info = pvt;
 	mci->pdev = &pvt->F2->dev;
-	mci->csbased = 1;
 
 	setup_mci_misc_attrs(mci, fam_type);
 
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index cdb81aa73ab7..27e86d938262 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -86,7 +86,7 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
 	edac_dimm_info_location(dimm, location, sizeof(location));
 
 	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
-		 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
+		 dimm->mci->csbased ? "rank" : "dimm",
 		 number, location, dimm->csrow, dimm->cschannel);
 	edac_dbg(4, "  dimm = %p\n", dimm);
 	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
@@ -341,7 +341,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 	mci->nr_csrows = tot_csrows;
 	mci->num_cschannel = tot_channels;
-	mci->mem_is_per_rank = per_rank;
+	mci->csbased = per_rank;
 
 	/*
 	 * Alocate and fill the csrow/channels structs
@@ -1235,7 +1235,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			 * incrementing the compat API counters
 			 */
 			edac_dbg(4, "%s csrows map: (%d,%d)\n",
-				 mci->mem_is_per_rank ? "rank" : "dimm",
+				 mci->csbased ? "rank" : "dimm",
 				 dimm->csrow, dimm->cschannel);
 			if (row == -1)
 				row = dimm->csrow;
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 4c50a4760db7..5899a76eec3b 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -609,7 +609,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
 	device_initialize(&dimm->dev);
 
 	dimm->dev.parent = &mci->dev;
-	if (mci->mem_is_per_rank)
+	if (mci->csbased)
 		dev_set_name(&dimm->dev, "rank%d", index);
 	else
 		dev_set_name(&dimm->dev, "dimm%d", index);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index ab1ea98e767c..0b763276f619 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -675,11 +675,11 @@ struct mem_ctl_info {
 	 * sees memory sticks ("dimms"), and the ones that sees memory ranks.
 	 * All old memory controllers enumerate memories per rank, but most
 	 * of the recent drivers enumerate memories per DIMM, instead.
-	 * When the memory controller is per rank, mem_is_per_rank is true.
+	 * When the memory controller is per rank, csbased is true.
 	 */
 	unsigned n_layers;
 	struct edac_mc_layer *layers;
-	bool mem_is_per_rank;
+	bool csbased;
 
 	/*
 	 * DIMM info. Will eventually remove the entire csrows_info some day
@@ -740,8 +740,6 @@ struct mem_ctl_info {
 	u32 fake_inject_ue;
 	u16 fake_inject_count;
 #endif
-	__u8 csbased : 1,	/* csrow-based memory controller */
-	     __resv  : 7;
 };
 
 #endif
-- 
cgit v1.2.2


From c95246c3a2ac796cfa43e76200ede59cb4a1644f Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Sat, 16 Mar 2013 08:22:25 +0100
Subject: Adding in EEH support to the IBM FlashSystem 70/80 device driver

Changes in v2 include:
o Fixed spelling of guarantee.
o Fixed potential memory leak if slot reset fails out.
o Changed list_for_each_entry_safe with list_for_each_entry.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/core.c      | 203 +++++++++++++++++++++++++++++++++++++-
 drivers/block/rsxx/cregs.c     |  59 +++++++++--
 drivers/block/rsxx/dma.c       | 216 ++++++++++++++++++++++++++++++-----------
 drivers/block/rsxx/rsxx_priv.h |  25 ++++-
 4 files changed, 436 insertions(+), 67 deletions(-)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index cbbdff113f46..93f28191a0ff 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -30,6 +30,7 @@
 #include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
+#include <linux/delay.h>
 
 #include <linux/genhd.h>
 #include <linux/idr.h>
@@ -52,6 +53,13 @@ static DEFINE_IDA(rsxx_disk_ida);
 static DEFINE_SPINLOCK(rsxx_ida_lock);
 
 /*----------------- Interrupt Control & Handling -------------------*/
+
+static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
+{
+	card->isr_mask = 0;
+	card->ier_mask = 0;
+}
+
 static void __enable_intr(unsigned int *mask, unsigned int intr)
 {
 	*mask |= intr;
@@ -71,7 +79,8 @@ static void __disable_intr(unsigned int *mask, unsigned int intr)
  */
 void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
 {
-	if (unlikely(card->halt))
+	if (unlikely(card->halt) ||
+	    unlikely(card->eeh_state))
 		return;
 
 	__enable_intr(&card->ier_mask, intr);
@@ -80,6 +89,9 @@ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
 
 void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
 {
+	if (unlikely(card->eeh_state))
+		return;
+
 	__disable_intr(&card->ier_mask, intr);
 	iowrite32(card->ier_mask, card->regmap + IER);
 }
@@ -87,7 +99,8 @@ void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
 void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
 				 unsigned int intr)
 {
-	if (unlikely(card->halt))
+	if (unlikely(card->halt) ||
+	    unlikely(card->eeh_state))
 		return;
 
 	__enable_intr(&card->isr_mask, intr);
@@ -97,6 +110,9 @@ void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
 void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
 				  unsigned int intr)
 {
+	if (unlikely(card->eeh_state))
+		return;
+
 	__disable_intr(&card->isr_mask, intr);
 	__disable_intr(&card->ier_mask, intr);
 	iowrite32(card->ier_mask, card->regmap + IER);
@@ -115,6 +131,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
 	do {
 		reread_isr = 0;
 
+		if (unlikely(card->eeh_state))
+			break;
+
 		isr = ioread32(card->regmap + ISR);
 		if (isr == 0xffffffff) {
 			/*
@@ -304,6 +323,179 @@ static int card_shutdown(struct rsxx_cardinfo *card)
 	return 0;
 }
 
+static void rsxx_eeh_frozen(struct pci_dev *dev)
+{
+	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+	int i;
+
+	dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
+
+	card->eeh_state = 1;
+	rsxx_mask_interrupts(card);
+
+	/*
+	 * We need to guarantee that the write for eeh_state and masking
+	 * interrupts does not become reordered. This will prevent a possible
+	 * race condition with the EEH code.
+	 */
+	wmb();
+
+	pci_disable_device(dev);
+
+	rsxx_eeh_save_issued_dmas(card);
+
+	rsxx_eeh_save_issued_creg(card);
+
+	for (i = 0; i < card->n_targets; i++) {
+		if (card->ctrl[i].status.buf)
+			pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
+					    card->ctrl[i].status.buf,
+					    card->ctrl[i].status.dma_addr);
+		if (card->ctrl[i].cmd.buf)
+			pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
+					    card->ctrl[i].cmd.buf,
+					    card->ctrl[i].cmd.dma_addr);
+	}
+}
+
+static void rsxx_eeh_failure(struct pci_dev *dev)
+{
+	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+	int i;
+
+	dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
+
+	card->eeh_state = 1;
+
+	for (i = 0; i < card->n_targets; i++)
+		del_timer_sync(&card->ctrl[i].activity_timer);
+
+	rsxx_eeh_cancel_dmas(card);
+}
+
+static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
+{
+	unsigned int status;
+	int iter = 0;
+
+	/* We need to wait for the hardware to reset */
+	while (iter++ < 10) {
+		status = ioread32(card->regmap + PCI_RECONFIG);
+
+		if (status & RSXX_FLUSH_BUSY) {
+			ssleep(1);
+			continue;
+		}
+
+		if (status & RSXX_FLUSH_TIMEOUT)
+			dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n");
+		return 0;
+	}
+
+	/* Hardware failed resetting itself. */
+	return -1;
+}
+
+static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev,
+					    enum pci_channel_state error)
+{
+	if (dev->revision < RSXX_EEH_SUPPORT)
+		return PCI_ERS_RESULT_NONE;
+
+	if (error == pci_channel_io_perm_failure) {
+		rsxx_eeh_failure(dev);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	rsxx_eeh_frozen(dev);
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
+{
+	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+	unsigned long flags;
+	int i;
+	int st;
+
+	dev_warn(&dev->dev,
+		"IBM FlashSystem PCI: recovering from slot reset.\n");
+
+	st = pci_enable_device(dev);
+	if (st)
+		goto failed_hw_setup;
+
+	pci_set_master(dev);
+
+	st = rsxx_eeh_fifo_flush_poll(card);
+	if (st)
+		goto failed_hw_setup;
+
+	rsxx_dma_queue_reset(card);
+
+	for (i = 0; i < card->n_targets; i++) {
+		st = rsxx_hw_buffers_init(dev, &card->ctrl[i]);
+		if (st)
+			goto failed_hw_buffers_init;
+	}
+
+	if (card->config_valid)
+		rsxx_dma_configure(card);
+
+	/* Clears the ISR register from spurious interrupts */
+	st = ioread32(card->regmap + ISR);
+
+	card->eeh_state = 0;
+
+	st = rsxx_eeh_remap_dmas(card);
+	if (st)
+		goto failed_remap_dmas;
+
+	spin_lock_irqsave(&card->irq_lock, flags);
+	if (card->n_targets & RSXX_MAX_TARGETS)
+		rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G);
+	else
+		rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C);
+	spin_unlock_irqrestore(&card->irq_lock, flags);
+
+	rsxx_kick_creg_queue(card);
+
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock(&card->ctrl[i].queue_lock);
+		if (list_empty(&card->ctrl[i].queue)) {
+			spin_unlock(&card->ctrl[i].queue_lock);
+			continue;
+		}
+		spin_unlock(&card->ctrl[i].queue_lock);
+
+		queue_work(card->ctrl[i].issue_wq,
+				&card->ctrl[i].issue_dma_work);
+	}
+
+	dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
+
+	return PCI_ERS_RESULT_RECOVERED;
+
+failed_hw_buffers_init:
+failed_remap_dmas:
+	for (i = 0; i < card->n_targets; i++) {
+		if (card->ctrl[i].status.buf)
+			pci_free_consistent(card->dev,
+					STATUS_BUFFER_SIZE8,
+					card->ctrl[i].status.buf,
+					card->ctrl[i].status.dma_addr);
+		if (card->ctrl[i].cmd.buf)
+			pci_free_consistent(card->dev,
+					COMMAND_BUFFER_SIZE8,
+					card->ctrl[i].cmd.buf,
+					card->ctrl[i].cmd.dma_addr);
+	}
+failed_hw_setup:
+	rsxx_eeh_failure(dev);
+	return PCI_ERS_RESULT_DISCONNECT;
+
+}
+
 /*----------------- Driver Initialization & Setup -------------------*/
 /* Returns:   0 if the driver is compatible with the device
 	     -1 if the driver is NOT compatible with the device */
@@ -383,6 +575,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 
 	spin_lock_init(&card->irq_lock);
 	card->halt = 0;
+	card->eeh_state = 0;
 
 	spin_lock_irq(&card->irq_lock);
 	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
@@ -593,6 +786,11 @@ static void rsxx_pci_shutdown(struct pci_dev *dev)
 	card_shutdown(card);
 }
 
+static const struct pci_error_handlers rsxx_err_handler = {
+	.error_detected = rsxx_error_detected,
+	.slot_reset     = rsxx_slot_reset,
+};
+
 static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)},
 	{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)},
@@ -608,6 +806,7 @@ static struct pci_driver rsxx_pci_driver = {
 	.remove		= rsxx_pci_remove,
 	.suspend	= rsxx_pci_suspend,
 	.shutdown	= rsxx_pci_shutdown,
+	.err_handler    = &rsxx_err_handler,
 };
 
 static int __init rsxx_core_init(void)
diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
index 0539a25877eb..4b5c020a0a65 100644
--- a/drivers/block/rsxx/cregs.c
+++ b/drivers/block/rsxx/cregs.c
@@ -58,7 +58,7 @@ static struct kmem_cache *creg_cmd_pool;
 #error Unknown endianess!!! Aborting...
 #endif
 
-static void copy_to_creg_data(struct rsxx_cardinfo *card,
+static int copy_to_creg_data(struct rsxx_cardinfo *card,
 			      int cnt8,
 			      void *buf,
 			      unsigned int stream)
@@ -66,6 +66,9 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card,
 	int i = 0;
 	u32 *data = buf;
 
+	if (unlikely(card->eeh_state))
+		return -EIO;
+
 	for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
 		/*
 		 * Firmware implementation makes it necessary to byte swap on
@@ -76,10 +79,12 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card,
 		else
 			iowrite32(data[i], card->regmap + CREG_DATA(i));
 	}
+
+	return 0;
 }
 
 
-static void copy_from_creg_data(struct rsxx_cardinfo *card,
+static int copy_from_creg_data(struct rsxx_cardinfo *card,
 				int cnt8,
 				void *buf,
 				unsigned int stream)
@@ -87,6 +92,9 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,
 	int i = 0;
 	u32 *data = buf;
 
+	if (unlikely(card->eeh_state))
+		return -EIO;
+
 	for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
 		/*
 		 * Firmware implementation makes it necessary to byte swap on
@@ -97,19 +105,32 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,
 		else
 			data[i] = ioread32(card->regmap + CREG_DATA(i));
 	}
+
+	return 0;
 }
 
 static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
 {
+	int st;
+
+	if (unlikely(card->eeh_state))
+		return;
+
 	iowrite32(cmd->addr, card->regmap + CREG_ADD);
 	iowrite32(cmd->cnt8, card->regmap + CREG_CNT);
 
 	if (cmd->op == CREG_OP_WRITE) {
-		if (cmd->buf)
-			copy_to_creg_data(card, cmd->cnt8,
-					  cmd->buf, cmd->stream);
+		if (cmd->buf) {
+			st = copy_to_creg_data(card, cmd->cnt8,
+					       cmd->buf, cmd->stream);
+			if (st)
+				return;
+		}
 	}
 
+	if (unlikely(card->eeh_state))
+		return;
+
 	/* Setting the valid bit will kick off the command. */
 	iowrite32(cmd->op, card->regmap + CREG_CMD);
 }
@@ -272,7 +293,7 @@ static void creg_cmd_done(struct work_struct *work)
 			goto creg_done;
 		}
 
-		copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
+		st = copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
 	}
 
 creg_done:
@@ -675,6 +696,32 @@ int rsxx_reg_access(struct rsxx_cardinfo *card,
 	return 0;
 }
 
+void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card)
+{
+	struct creg_cmd *cmd = NULL;
+
+	cmd = card->creg_ctrl.active_cmd;
+	card->creg_ctrl.active_cmd = NULL;
+
+	if (cmd) {
+		del_timer_sync(&card->creg_ctrl.cmd_timer);
+
+		spin_lock_bh(&card->creg_ctrl.lock);
+		list_add(&cmd->list, &card->creg_ctrl.queue);
+		card->creg_ctrl.q_depth++;
+		card->creg_ctrl.active = 0;
+		spin_unlock_bh(&card->creg_ctrl.lock);
+	}
+}
+
+void rsxx_kick_creg_queue(struct rsxx_cardinfo *card)
+{
+	spin_lock_bh(&card->creg_ctrl.lock);
+	if (!list_empty(&card->creg_ctrl.queue))
+		creg_kick_queue(card);
+	spin_unlock_bh(&card->creg_ctrl.lock);
+}
+
 /*------------ Initialization & Setup --------------*/
 int rsxx_creg_setup(struct rsxx_cardinfo *card)
 {
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index efd75b55a670..60d344d002ec 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -81,9 +81,6 @@ enum rsxx_hw_status {
 	HW_STATUS_FAULT		= 0x08,
 };
 
-#define STATUS_BUFFER_SIZE8     4096
-#define COMMAND_BUFFER_SIZE8    4096
-
 static struct kmem_cache *rsxx_dma_pool;
 
 struct dma_tracker {
@@ -122,7 +119,7 @@ static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8)
 	return tgt;
 }
 
-static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card)
+void rsxx_dma_queue_reset(struct rsxx_cardinfo *card)
 {
 	/* Reset all DMA Command/Status Queues */
 	iowrite32(DMA_QUEUE_RESET, card->regmap + RESET);
@@ -210,7 +207,8 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
 	u32 q_depth = 0;
 	u32 intr_coal;
 
-	if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE)
+	if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE ||
+	    unlikely(card->eeh_state))
 		return;
 
 	for (i = 0; i < card->n_targets; i++)
@@ -223,31 +221,26 @@ static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
 }
 
 /*----------------- RSXX DMA Handling -------------------*/
-static void rsxx_complete_dma(struct rsxx_cardinfo *card,
+static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
 				  struct rsxx_dma *dma,
 				  unsigned int status)
 {
 	if (status & DMA_SW_ERR)
-		printk_ratelimited(KERN_ERR
-				   "SW Error in DMA(cmd x%02x, laddr x%08x)\n",
-				   dma->cmd, dma->laddr);
+		ctrl->stats.dma_sw_err++;
 	if (status & DMA_HW_FAULT)
-		printk_ratelimited(KERN_ERR
-				   "HW Fault in DMA(cmd x%02x, laddr x%08x)\n",
-				   dma->cmd, dma->laddr);
+		ctrl->stats.dma_hw_fault++;
 	if (status & DMA_CANCELLED)
-		printk_ratelimited(KERN_ERR
-				   "DMA Cancelled(cmd x%02x, laddr x%08x)\n",
-				   dma->cmd, dma->laddr);
+		ctrl->stats.dma_cancelled++;
 
 	if (dma->dma_addr)
-		pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma),
+		pci_unmap_page(ctrl->card->dev, dma->dma_addr,
+			       get_dma_size(dma),
 			       dma->cmd == HW_CMD_BLK_WRITE ?
 					   PCI_DMA_TODEVICE :
 					   PCI_DMA_FROMDEVICE);
 
 	if (dma->cb)
-		dma->cb(card, dma->cb_data, status ? 1 : 0);
+		dma->cb(ctrl->card, dma->cb_data, status ? 1 : 0);
 
 	kmem_cache_free(rsxx_dma_pool, dma);
 }
@@ -330,14 +323,15 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
 	if (requeue_cmd)
 		rsxx_requeue_dma(ctrl, dma);
 	else
-		rsxx_complete_dma(ctrl->card, dma, status);
+		rsxx_complete_dma(ctrl, dma, status);
 }
 
 static void dma_engine_stalled(unsigned long data)
 {
 	struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
 
-	if (atomic_read(&ctrl->stats.hw_q_depth) == 0)
+	if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
+	    unlikely(ctrl->card->eeh_state))
 		return;
 
 	if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) {
@@ -369,7 +363,8 @@ static void rsxx_issue_dmas(struct work_struct *work)
 	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 	hw_cmd_buf = ctrl->cmd.buf;
 
-	if (unlikely(ctrl->card->halt))
+	if (unlikely(ctrl->card->halt) ||
+	    unlikely(ctrl->card->eeh_state))
 		return;
 
 	while (1) {
@@ -397,7 +392,7 @@ static void rsxx_issue_dmas(struct work_struct *work)
 		 */
 		if (unlikely(ctrl->card->dma_fault)) {
 			push_tracker(ctrl->trackers, tag);
-			rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED);
+			rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
 			continue;
 		}
 
@@ -435,6 +430,12 @@ static void rsxx_issue_dmas(struct work_struct *work)
 		atomic_add(cmds_pending, &ctrl->stats.hw_q_depth);
 		mod_timer(&ctrl->activity_timer,
 			  jiffies + DMA_ACTIVITY_TIMEOUT);
+
+		if (unlikely(ctrl->card->eeh_state)) {
+			del_timer_sync(&ctrl->activity_timer);
+			return;
+		}
+
 		iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
 	}
 }
@@ -453,7 +454,8 @@ static void rsxx_dma_done(struct work_struct *work)
 	hw_st_buf = ctrl->status.buf;
 
 	if (unlikely(ctrl->card->halt) ||
-	    unlikely(ctrl->card->dma_fault))
+	    unlikely(ctrl->card->dma_fault) ||
+	    unlikely(ctrl->card->eeh_state))
 		return;
 
 	count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count);
@@ -498,7 +500,7 @@ static void rsxx_dma_done(struct work_struct *work)
 		if (status)
 			rsxx_handle_dma_error(ctrl, dma, status);
 		else
-			rsxx_complete_dma(ctrl->card, dma, 0);
+			rsxx_complete_dma(ctrl, dma, 0);
 
 		push_tracker(ctrl->trackers, tag);
 
@@ -717,20 +719,54 @@ bvec_err:
 
 
 /*----------------- DMA Engine Initialization & Setup -------------------*/
+int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl)
+{
+	ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8,
+				&ctrl->status.dma_addr);
+	ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8,
+				&ctrl->cmd.dma_addr);
+	if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL)
+		return -ENOMEM;
+
+	memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8);
+	iowrite32(lower_32_bits(ctrl->status.dma_addr),
+		ctrl->regmap + SB_ADD_LO);
+	iowrite32(upper_32_bits(ctrl->status.dma_addr),
+		ctrl->regmap + SB_ADD_HI);
+
+	memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8);
+	iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO);
+	iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI);
+
+	ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT);
+	if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) {
+		dev_crit(&dev->dev, "Failed reading status cnt x%x\n",
+			ctrl->status.idx);
+		return -EINVAL;
+	}
+	iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT);
+	iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT);
+
+	ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX);
+	if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) {
+		dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n",
+			ctrl->status.idx);
+		return -EINVAL;
+	}
+	iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX);
+	iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+
+	return 0;
+}
+
 static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 				  struct rsxx_dma_ctrl *ctrl)
 {
 	int i;
+	int st;
 
 	memset(&ctrl->stats, 0, sizeof(ctrl->stats));
 
-	ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8,
-						&ctrl->status.dma_addr);
-	ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8,
-					     &ctrl->cmd.dma_addr);
-	if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL)
-		return -ENOMEM;
-
 	ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8);
 	if (!ctrl->trackers)
 		return -ENOMEM;
@@ -760,33 +796,9 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 	INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
 	INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
 
-	memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8);
-	iowrite32(lower_32_bits(ctrl->status.dma_addr),
-		  ctrl->regmap + SB_ADD_LO);
-	iowrite32(upper_32_bits(ctrl->status.dma_addr),
-		  ctrl->regmap + SB_ADD_HI);
-
-	memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8);
-	iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO);
-	iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI);
-
-	ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT);
-	if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) {
-		dev_crit(&dev->dev, "Failed reading status cnt x%x\n",
-			 ctrl->status.idx);
-		return -EINVAL;
-	}
-	iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT);
-	iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT);
-
-	ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX);
-	if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) {
-		dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n",
-			 ctrl->status.idx);
-		return -EINVAL;
-	}
-	iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX);
-	iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+	st = rsxx_hw_buffers_init(dev, ctrl);
+	if (st)
+		return st;
 
 	return 0;
 }
@@ -822,7 +834,7 @@ static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card,
 	return 0;
 }
 
-static int rsxx_dma_configure(struct rsxx_cardinfo *card)
+int rsxx_dma_configure(struct rsxx_cardinfo *card)
 {
 	u32 intr_coal;
 
@@ -968,6 +980,94 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 	}
 }
 
+void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
+{
+	int i;
+	int j;
+	int cnt;
+	struct rsxx_dma *dma;
+	struct list_head issued_dmas[card->n_targets];
+
+	for (i = 0; i < card->n_targets; i++) {
+		INIT_LIST_HEAD(&issued_dmas[i]);
+		cnt = 0;
+		for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
+			dma = get_tracker_dma(card->ctrl[i].trackers, j);
+			if (dma == NULL)
+				continue;
+
+			if (dma->cmd == HW_CMD_BLK_WRITE)
+				card->ctrl[i].stats.writes_issued--;
+			else if (dma->cmd == HW_CMD_BLK_DISCARD)
+				card->ctrl[i].stats.discards_issued--;
+			else
+				card->ctrl[i].stats.reads_issued--;
+
+			list_add_tail(&dma->list, &issued_dmas[i]);
+			push_tracker(card->ctrl[i].trackers, j);
+			cnt++;
+		}
+
+		spin_lock(&card->ctrl[i].queue_lock);
+		list_splice(&issued_dmas[i], &card->ctrl[i].queue);
+
+		atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
+		card->ctrl[i].stats.sw_q_depth += cnt;
+		card->ctrl[i].e_cnt = 0;
+
+		list_for_each_entry(dma, &card->ctrl[i].queue, list) {
+			if (dma->dma_addr)
+				pci_unmap_page(card->dev, dma->dma_addr,
+					       get_dma_size(dma),
+					       dma->cmd == HW_CMD_BLK_WRITE ?
+					       PCI_DMA_TODEVICE :
+					       PCI_DMA_FROMDEVICE);
+		}
+		spin_unlock(&card->ctrl[i].queue_lock);
+	}
+}
+
+void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
+{
+	struct rsxx_dma *dma;
+	struct rsxx_dma *tmp;
+	int i;
+
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock(&card->ctrl[i].queue_lock);
+		list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) {
+			list_del(&dma->list);
+
+			rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED);
+		}
+		spin_unlock(&card->ctrl[i].queue_lock);
+	}
+}
+
+int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
+{
+	struct rsxx_dma *dma;
+	struct rsxx_dma *tmp;
+	int i;
+
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock(&card->ctrl[i].queue_lock);
+		list_for_each_entry(dma, &card->ctrl[i].queue, list) {
+			dma->dma_addr = pci_map_page(card->dev, dma->page,
+					dma->pg_off, get_dma_size(dma),
+					dma->cmd == HW_CMD_BLK_WRITE ?
+					PCI_DMA_TODEVICE :
+					PCI_DMA_FROMDEVICE);
+			if (!dma->dma_addr) {
+				kmem_cache_free(rsxx_dma_pool, dma);
+				return -ENOMEM;
+			}
+		}
+		spin_unlock(&card->ctrl[i].queue_lock);
+	}
+
+	return 0;
+}
 
 int rsxx_dma_init(void)
 {
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index f5a95f75bd57..8a7ac87f1dc5 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -64,6 +64,9 @@ struct proc_cmd;
 #define RSXX_MAX_OUTSTANDING_CMDS	255
 #define RSXX_CS_IDX_MASK		0xff
 
+#define STATUS_BUFFER_SIZE8     4096
+#define COMMAND_BUFFER_SIZE8    4096
+
 #define RSXX_MAX_TARGETS	8
 
 struct dma_tracker_list;
@@ -88,6 +91,9 @@ struct rsxx_dma_stats {
 	u32 discards_failed;
 	u32 done_rescheduled;
 	u32 issue_rescheduled;
+	u32 dma_sw_err;
+	u32 dma_hw_fault;
+	u32 dma_cancelled;
 	u32 sw_q_depth;		/* Number of DMAs on the SW queue. */
 	atomic_t hw_q_depth;	/* Number of DMAs queued to HW. */
 };
@@ -113,6 +119,7 @@ struct rsxx_dma_ctrl {
 struct rsxx_cardinfo {
 	struct pci_dev		*dev;
 	unsigned int		halt;
+	unsigned int		eeh_state;
 
 	void			__iomem *regmap;
 	spinlock_t		irq_lock;
@@ -221,6 +228,7 @@ enum rsxx_pci_regmap {
 	PERF_RD512_HI	= 0xac,
 	PERF_WR512_LO	= 0xb0,
 	PERF_WR512_HI	= 0xb4,
+	PCI_RECONFIG	= 0xb8,
 };
 
 enum rsxx_intr {
@@ -234,6 +242,8 @@ enum rsxx_intr {
 	CR_INTR_DMA5	= 0x00000080,
 	CR_INTR_DMA6	= 0x00000100,
 	CR_INTR_DMA7	= 0x00000200,
+	CR_INTR_ALL_C	= 0x0000003f,
+	CR_INTR_ALL_G	= 0x000003ff,
 	CR_INTR_DMA_ALL = 0x000003f5,
 	CR_INTR_ALL	= 0xffffffff,
 };
@@ -250,8 +260,14 @@ enum rsxx_pci_reset {
 	DMA_QUEUE_RESET		= 0x00000001,
 };
 
+enum rsxx_hw_fifo_flush {
+	RSXX_FLUSH_BUSY		= 0x00000002,
+	RSXX_FLUSH_TIMEOUT	= 0x00000004,
+};
+
 enum rsxx_pci_revision {
 	RSXX_DISCARD_SUPPORT = 2,
+	RSXX_EEH_SUPPORT     = 3,
 };
 
 enum rsxx_creg_cmd {
@@ -357,11 +373,17 @@ int rsxx_dma_setup(struct rsxx_cardinfo *card);
 void rsxx_dma_destroy(struct rsxx_cardinfo *card);
 int rsxx_dma_init(void);
 void rsxx_dma_cleanup(void);
+void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
+int rsxx_dma_configure(struct rsxx_cardinfo *card);
 int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   struct bio *bio,
 			   atomic_t *n_dmas,
 			   rsxx_dma_cb cb,
 			   void *cb_data);
+int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
+void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
+void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
+int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
 
 /***** cregs.c *****/
 int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr,
@@ -386,10 +408,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card);
 void rsxx_creg_destroy(struct rsxx_cardinfo *card);
 int rsxx_creg_init(void);
 void rsxx_creg_cleanup(void);
-
 int rsxx_reg_access(struct rsxx_cardinfo *card,
 			struct rsxx_reg_access __user *ucmd,
 			int read);
+void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card);
+void rsxx_kick_creg_queue(struct rsxx_cardinfo *card);
 
 
-- 
cgit v1.2.2


From 351a2c6e7d265f97799ec7f6b1dde7fc7cb4b92d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 16 Mar 2013 10:10:48 +0100
Subject: rsxx: fix missing unlock on error return in rsxx_eeh_remap_dmas()

Spotted by Fenguan Wu's super build robot.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 60d344d002ec..d523e9c56578 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -1059,6 +1059,7 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 					PCI_DMA_TODEVICE :
 					PCI_DMA_FROMDEVICE);
 			if (!dma->dma_addr) {
+				spin_unlock(&card->ctrl[i].queue_lock);
 				kmem_cache_free(rsxx_dma_pool, dma);
 				return -ENOMEM;
 			}
-- 
cgit v1.2.2


From 2bb78efab42cf4ee7a7184f7d2da1b7cb331b479 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 11 Mar 2013 16:42:49 +0000
Subject: powerpc/ptrace: Fix brk.len used uninitialised

With some CONFIGS it's possible that in ppc_set_hwdebug, brk.len is
uninitialised before being used.  It has been reported that GCC 4.2 will
produce the following error in this case:

  arch/powerpc/kernel/ptrace.c:1479: warning: 'brk.len' is used uninitialized in this function
  arch/powerpc/kernel/ptrace.c:1381: note: 'brk.len' was declared here

This patch corrects this.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Reported-by: Philippe De Muyter <phdm@macqel.be>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/ptrace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 245c1b6a0858..f9b30c68ba47 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1428,6 +1428,7 @@ static long ppc_set_hwdebug(struct task_struct *child,
 
 	brk.address = bp_info->addr & ~7UL;
 	brk.type = HW_BRK_TYPE_TRANSLATE;
+	brk.len = 8;
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
 		brk.type |= HW_BRK_TYPE_READ;
 	if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
-- 
cgit v1.2.2


From d812c0e1f90b7b86ff8e06b500cd8b8a03f3dbe6 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 6 Mar 2013 18:11:51 +0000
Subject: powerpc: Make sure that we alays include CONFIG_BINFMT_ELF

Our kernel is not much good without BINFMT_ELF and this fixes a build
warning on 64 bit allnoconfig builds:

warning: (COMPAT) selects COMPAT_BINFMT_ELF which has unmet direct dependencies (COMPAT && BINFMT_ELF)

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b89d7eb730a2..a091c01762ed 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -90,6 +90,7 @@ config GENERIC_GPIO
 config PPC
 	bool
 	default y
+	select BINFMT_ELF
 	select OF
 	select OF_EARLY_FLATTREE
 	select HAVE_FTRACE_MCOUNT_RECORD
-- 
cgit v1.2.2


From e39d1a471484662620651cd9520250d33843f235 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 13 Mar 2013 03:34:53 +0000
Subject: powerpc: Make VSID_BITS* dependency explicit

VSID_BITS and VSID_BITS_1T depends on the context bits  and user esid
bits. Make the dependency explicit

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org> [v3.8]
---
 arch/powerpc/include/asm/mmu-hash64.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 2fdb47a19efd..5f8c2bd58818 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -381,21 +381,22 @@ extern void slb_set_size(u16 size);
  * hash collisions.
  */
 
+#define CONTEXT_BITS		19
+#define USER_ESID_BITS		18
+#define USER_ESID_BITS_1T	6
+
 /*
  * This should be computed such that protovosid * vsid_mulitplier
  * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
  */
 #define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_256M		38
+#define VSID_BITS_256M		(CONTEXT_BITS + USER_ESID_BITS + 1)
 #define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
 
 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_1T		26
+#define VSID_BITS_1T		(CONTEXT_BITS + USER_ESID_BITS_1T + 1)
 #define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
 
-#define CONTEXT_BITS		19
-#define USER_ESID_BITS		18
-#define USER_ESID_BITS_1T	6
 
 #define USER_VSID_RANGE	(1UL << (USER_ESID_BITS + SID_SHIFT))
 
-- 
cgit v1.2.2


From c60ac5693c47df32a2b4b18af97fca5635def015 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 13 Mar 2013 03:34:54 +0000
Subject: powerpc: Update kernel VSID range

This patch change the kernel VSID range so that we limit VSID_BITS to 37.
This enables us to support 64TB with 65 bit VA (37+28). Without this patch
we have boot hangs on platforms that only support 65 bit VA.

With this patch we now have proto vsid generated as below:

We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
from mmu context id and effective segment id of the address.

For user processes max context id is limited to ((1ul << 19) - 5)
for kernel space, we use the top 4 context ids to map address as below
0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Tested-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org> [v3.8]
---
 arch/powerpc/include/asm/mmu-hash64.h | 115 +++++++++++++++++-----------------
 arch/powerpc/kernel/exceptions-64s.S  |  34 +++++++---
 arch/powerpc/mm/hash_utils_64.c       |  20 ++++--
 arch/powerpc/mm/mmu_context_hash64.c  |  11 +---
 arch/powerpc/mm/slb_low.S             |  50 +++++++--------
 arch/powerpc/mm/tlb_hash64.c          |   2 +-
 6 files changed, 126 insertions(+), 106 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 5f8c2bd58818..a32461f9d825 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -343,17 +343,16 @@ extern void slb_set_size(u16 size);
 /*
  * VSID allocation (256MB segment)
  *
- * We first generate a 38-bit "proto-VSID".  For kernel addresses this
- * is equal to the ESID | 1 << 37, for user addresses it is:
- *	(context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1)
+ * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
+ * from mmu context id and effective segment id of the address.
  *
- * This splits the proto-VSID into the below range
- *  0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range
- *  2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range
- *
- * We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1
- * That is, we assign half of the space to user processes and half
- * to the kernel.
+ * For user processes max context id is limited to ((1ul << 19) - 5)
+ * for kernel space, we use the top 4 context ids to map address as below
+ * NOTE: each context only support 64TB now.
+ * 0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
+ * 0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
+ * 0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
+ * 0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
  *
  * The proto-VSIDs are then scrambled into real VSIDs with the
  * multiplicative hash:
@@ -363,38 +362,45 @@ extern void slb_set_size(u16 size);
  * VSID_MULTIPLIER is prime, so in particular it is
  * co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
  * Because the modulus is 2^n-1 we can compute it efficiently without
- * a divide or extra multiply (see below).
- *
- * This scheme has several advantages over older methods:
+ * a divide or extra multiply (see below). The scramble function gives
+ * robust scattering in the hash table (at least based on some initial
+ * results).
  *
- *	- We have VSIDs allocated for every kernel address
- * (i.e. everything above 0xC000000000000000), except the very top
- * segment, which simplifies several things.
+ * We also consider VSID 0 special. We use VSID 0 for slb entries mapping
+ * bad address. This enables us to consolidate bad address handling in
+ * hash_page.
  *
- *	- We allow for USER_ESID_BITS significant bits of ESID and
- * CONTEXT_BITS  bits of context for user addresses.
- *  i.e. 64T (46 bits) of address space for up to half a million contexts.
- *
- *	- The scramble function gives robust scattering in the hash
- * table (at least based on some initial results).  The previous
- * method was more susceptible to pathological cases giving excessive
- * hash collisions.
+ * We also need to avoid the last segment of the last context, because that
+ * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
+ * because of the modulo operation in vsid scramble. But the vmemmap
+ * (which is what uses region 0xf) will never be close to 64TB in size
+ * (it's 56 bytes per page of system memory).
  */
 
 #define CONTEXT_BITS		19
 #define USER_ESID_BITS		18
 #define USER_ESID_BITS_1T	6
 
+/*
+ * 256MB segment
+ * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
+ * available for user + kernel mapping. The top 4 contexts are used for
+ * kernel mapping. Each segment contains 2^28 bytes. Each
+ * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
+ * (19 == 37 + 28 - 46).
+ */
+#define MAX_USER_CONTEXT	((ASM_CONST(1) << CONTEXT_BITS) - 5)
+
 /*
  * This should be computed such that protovosid * vsid_mulitplier
  * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
  */
 #define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_256M		(CONTEXT_BITS + USER_ESID_BITS + 1)
+#define VSID_BITS_256M		(CONTEXT_BITS + USER_ESID_BITS)
 #define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
 
 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_1T		(CONTEXT_BITS + USER_ESID_BITS_1T + 1)
+#define VSID_BITS_1T		(CONTEXT_BITS + USER_ESID_BITS_1T)
 #define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
 
 
@@ -422,7 +428,8 @@ extern void slb_set_size(u16 size);
 	srdi	rx,rt,VSID_BITS_##size;					\
 	clrldi	rt,rt,(64-VSID_BITS_##size);				\
 	add	rt,rt,rx;		/* add high and low bits */	\
-	/* Now, r3 == VSID (mod 2^36-1), and lies between 0 and		\
+	/* NOTE: explanation based on VSID_BITS_##size = 36		\
+	 * Now, r3 == VSID (mod 2^36-1), and lies between 0 and		\
 	 * 2^36-1+2^28-1.  That in particular means that if r3 >=	\
 	 * 2^36-1, then r3+1 has the 2^36 bit set.  So, if r3+1 has	\
 	 * the bit clear, r3 already has the answer we want, if it	\
@@ -514,34 +521,6 @@ typedef struct {
 	})
 #endif /* 1 */
 
-/*
- * This is only valid for addresses >= PAGE_OFFSET
- * The proto-VSID space is divided into two class
- * User:   0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1
- * kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1
- *
- * With KERNEL_START at 0xc000000000000000, the proto vsid for
- * the kernel ends up with 0xc00000000 (36 bits). With 64TB
- * support we need to have kernel proto-VSID in the
- * [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS.
- */
-static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
-{
-	unsigned long proto_vsid;
-	/*
-	 * We need to make sure proto_vsid for the kernel is
-	 * >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T])
-	 */
-	if (ssize == MMU_SEGSIZE_256M) {
-		proto_vsid = ea >> SID_SHIFT;
-		proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS));
-		return vsid_scramble(proto_vsid, 256M);
-	}
-	proto_vsid = ea >> SID_SHIFT_1T;
-	proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T));
-	return vsid_scramble(proto_vsid, 1T);
-}
-
 /* Returns the segment size indicator for a user address */
 static inline int user_segment_size(unsigned long addr)
 {
@@ -551,10 +530,15 @@ static inline int user_segment_size(unsigned long addr)
 	return MMU_SEGSIZE_256M;
 }
 
-/* This is only valid for user addresses (which are below 2^44) */
 static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
 				     int ssize)
 {
+	/*
+	 * Bad address. We return VSID 0 for that
+	 */
+	if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
+		return 0;
+
 	if (ssize == MMU_SEGSIZE_256M)
 		return vsid_scramble((context << USER_ESID_BITS)
 				     | (ea >> SID_SHIFT), 256M);
@@ -562,6 +546,25 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
 			     | (ea >> SID_SHIFT_1T), 1T);
 }
 
+/*
+ * This is only valid for addresses >= PAGE_OFFSET
+ *
+ * For kernel space, we use the top 4 context ids to map address as below
+ * 0x7fffc -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
+ * 0x7fffd -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
+ * 0x7fffe -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
+ * 0x7ffff -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
+ */
+static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
+{
+	unsigned long context;
+
+	/*
+	 * kernel take the top 4 context from the available range
+	 */
+	context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
+	return get_vsid(context, ea, ssize);
+}
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_MMU_HASH64_H_ */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 87ef8f5ee5bc..b112359ea7a8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1452,20 +1452,36 @@ do_ste_alloc:
 _GLOBAL(do_stab_bolted)
 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
 	std	r11,PACA_EXSLB+EX_SRR0(r13)	/* save SRR0 in exc. frame */
+	mfspr	r11,SPRN_DAR			/* ea */
 
+	/*
+	 * check for bad kernel/user address
+	 * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+	 */
+	rldicr. r9,r11,4,(63 - 46 - 4)
+	li	r9,0	/* VSID = 0 for bad address */
+	bne-	0f
+
+	/*
+	 * Calculate VSID:
+	 * This is the kernel vsid, we take the top for context from
+	 * the range. context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+	 * Here we know that (ea >> 60) == 0xc
+	 */
+	lis	r9,(MAX_USER_CONTEXT + 1)@ha
+	addi	r9,r9,(MAX_USER_CONTEXT + 1)@l
+
+	srdi	r10,r11,SID_SHIFT
+	rldimi  r10,r9,USER_ESID_BITS,0 /* proto vsid */
+	ASM_VSID_SCRAMBLE(r10, r9, 256M)
+	rldic	r9,r10,12,16	/* r9 = vsid << 12 */
+
+0:
 	/* Hash to the primary group */
 	ld	r10,PACASTABVIRT(r13)
-	mfspr	r11,SPRN_DAR
-	srdi	r11,r11,28
+	srdi	r11,r11,SID_SHIFT
 	rldimi	r10,r11,7,52	/* r10 = first ste of the group */
 
-	/* Calculate VSID */
-	/* This is a kernel address, so protovsid = ESID | 1 << 37 */
-	li	r9,0x1
-	rldimi  r11,r9,(CONTEXT_BITS + USER_ESID_BITS),0
-	ASM_VSID_SCRAMBLE(r11, r9, 256M)
-	rldic	r9,r11,12,16	/* r9 = vsid << 12 */
-
 	/* Search the primary group for a free entry */
 1:	ld	r11,0(r10)	/* Test valid bit of the current ste	*/
 	andi.	r11,r11,0x80
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6ec6c1997b3a..f410c3e12c1e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -195,6 +195,11 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 		unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
 		unsigned long tprot = prot;
 
+		/*
+		 * If we hit a bad address return error.
+		 */
+		if (!vsid)
+			return -1;
 		/* Make kernel text executable */
 		if (overlaps_kernel_text(vaddr, vaddr + step))
 			tprot &= ~HPTE_R_N;
@@ -924,11 +929,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
 		ea, access, trap);
 
-	if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
-		DBG_LOW(" out of pgtable range !\n");
- 		return 1;
-	}
-
 	/* Get region & vsid */
  	switch (REGION_ID(ea)) {
 	case USER_REGION_ID:
@@ -959,6 +959,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 	}
 	DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
 
+	/* Bad address. */
+	if (!vsid) {
+		DBG_LOW("Bad address!\n");
+		return 1;
+	}
 	/* Get pgdir */
 	pgdir = mm->pgd;
 	if (pgdir == NULL)
@@ -1128,6 +1133,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 	/* Get VSID */
 	ssize = user_segment_size(ea);
 	vsid = get_vsid(mm->context.id, ea, ssize);
+	if (!vsid)
+		return;
 
 	/* Hash doesn't like irqs */
 	local_irq_save(flags);
@@ -1235,6 +1242,9 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 	hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
 	hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
+	/* Don't create HPTE entries for bad address */
+	if (!vsid)
+		return;
 	ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr),
 				 mode, HPTE_V_BOLTED,
 				 mmu_linear_psize, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 40bc5b0ace54..d1d1b92c5b99 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -29,15 +29,6 @@
 static DEFINE_SPINLOCK(mmu_context_lock);
 static DEFINE_IDA(mmu_context_ida);
 
-/*
- * 256MB segment
- * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
- * available for user mappings. Each segment contains 2^28 bytes. Each
- * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
- * (19 == 37 + 28 - 46).
- */
-#define MAX_CONTEXT	((1UL << CONTEXT_BITS) - 1)
-
 int __init_new_context(void)
 {
 	int index;
@@ -56,7 +47,7 @@ again:
 	else if (err)
 		return err;
 
-	if (index > MAX_CONTEXT) {
+	if (index > MAX_USER_CONTEXT) {
 		spin_lock(&mmu_context_lock);
 		ida_remove(&mmu_context_ida, index);
 		spin_unlock(&mmu_context_lock);
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 1a16ca227757..77aafaa1ab09 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -31,10 +31,15 @@
  * No other registers are examined or changed.
  */
 _GLOBAL(slb_allocate_realmode)
-	/* r3 = faulting address */
+	/*
+	 * check for bad kernel/user address
+	 * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+	 */
+	rldicr. r9,r3,4,(63 - 46 - 4)
+	bne-	8f
 
 	srdi	r9,r3,60		/* get region */
-	srdi	r10,r3,28		/* get esid */
+	srdi	r10,r3,SID_SHIFT	/* get esid */
 	cmpldi	cr7,r9,0xc		/* cmp PAGE_OFFSET for later use */
 
 	/* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
@@ -56,12 +61,14 @@ _GLOBAL(slb_allocate_realmode)
 	 */
 _GLOBAL(slb_miss_kernel_load_linear)
 	li	r11,0
-	li	r9,0x1
 	/*
-	 * for 1T we shift 12 bits more.  slb_finish_load_1T will do
-	 * the necessary adjustment
+	 * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+	 * r9 = region id.
 	 */
-	rldimi  r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
+	addis	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
+	addi	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
+
+
 BEGIN_FTR_SECTION
 	b	slb_finish_load
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
@@ -91,24 +98,19 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
 	_GLOBAL(slb_miss_kernel_load_io)
 	li	r11,0
 6:
-	li	r9,0x1
 	/*
-	 * for 1T we shift 12 bits more.  slb_finish_load_1T will do
-	 * the necessary adjustment
+	 * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+	 * r9 = region id.
 	 */
-	rldimi  r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
+	addis	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
+	addi	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
+
 BEGIN_FTR_SECTION
 	b	slb_finish_load
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load_1T
 
-0:	/* user address: proto-VSID = context << 15 | ESID. First check
-	 * if the address is within the boundaries of the user region
-	 */
-	srdi.	r9,r10,USER_ESID_BITS
-	bne-	8f			/* invalid ea bits set */
-
-
+0:
 	/* when using slices, we extract the psize off the slice bitmaps
 	 * and then we need to get the sllp encoding off the mmu_psize_defs
 	 * array.
@@ -164,15 +166,13 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	ld	r9,PACACONTEXTID(r13)
 BEGIN_FTR_SECTION
 	cmpldi	r10,0x1000
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	rldimi	r10,r9,USER_ESID_BITS,0
-BEGIN_FTR_SECTION
 	bge	slb_finish_load_1T
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load
 
 8:	/* invalid EA */
 	li	r10,0			/* BAD_VSID */
+	li	r9,0			/* BAD_VSID */
 	li	r11,SLB_VSID_USER	/* flags don't much matter */
 	b	slb_finish_load
 
@@ -221,8 +221,6 @@ _GLOBAL(slb_allocate_user)
 
 	/* get context to calculate proto-VSID */
 	ld	r9,PACACONTEXTID(r13)
-	rldimi	r10,r9,USER_ESID_BITS,0
-
 	/* fall through slb_finish_load */
 
 #endif /* __DISABLED__ */
@@ -231,9 +229,10 @@ _GLOBAL(slb_allocate_user)
 /*
  * Finish loading of an SLB entry and return
  *
- * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
+ * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
  */
 slb_finish_load:
+	rldimi  r10,r9,USER_ESID_BITS,0
 	ASM_VSID_SCRAMBLE(r10,r9,256M)
 	/*
 	 * bits above VSID_BITS_256M need to be ignored from r10
@@ -298,10 +297,11 @@ _GLOBAL(slb_compare_rr_to_size)
 /*
  * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
  *
- * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9
+ * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
  */
 slb_finish_load_1T:
-	srdi	r10,r10,40-28		/* get 1T ESID */
+	srdi	r10,r10,(SID_SHIFT_1T - SID_SHIFT)	/* get 1T ESID */
+	rldimi  r10,r9,USER_ESID_BITS_1T,0
 	ASM_VSID_SCRAMBLE(r10,r9,1T)
 	/*
 	 * bits above VSID_BITS_1T need to be ignored from r10
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 0d82ef50dc3f..023ec8a13f38 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -82,11 +82,11 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	if (!is_kernel_addr(addr)) {
 		ssize = user_segment_size(addr);
 		vsid = get_vsid(mm->context.id, addr, ssize);
-		WARN_ON(vsid == 0);
 	} else {
 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
 		ssize = mmu_kernel_ssize;
 	}
+	WARN_ON(vsid == 0);
 	vpn = hpt_vpn(addr, vsid, ssize);
 	rpte = __real_pte(__pte(pte), ptep);
 
-- 
cgit v1.2.2


From af81d7878c641629f2693ae3fdaf74b4af14dfca Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 13 Mar 2013 03:34:55 +0000
Subject: powerpc: Rename USER_ESID_BITS* to ESID_BITS*

Now we use ESID_BITS of kernel address to build proto vsid. So rename
USER_ESIT_BITS to ESID_BITS

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org> [v3.8]
---
 arch/powerpc/include/asm/mmu-hash64.h | 16 ++++++++--------
 arch/powerpc/kernel/exceptions-64s.S  |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c |  4 ++--
 arch/powerpc/mm/pgtable_64.c          |  2 +-
 arch/powerpc/mm/slb_low.S             |  4 ++--
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index a32461f9d825..b59e06f507ea 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -378,12 +378,12 @@ extern void slb_set_size(u16 size);
  */
 
 #define CONTEXT_BITS		19
-#define USER_ESID_BITS		18
-#define USER_ESID_BITS_1T	6
+#define ESID_BITS		18
+#define ESID_BITS_1T		6
 
 /*
  * 256MB segment
- * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
+ * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
  * available for user + kernel mapping. The top 4 contexts are used for
  * kernel mapping. Each segment contains 2^28 bytes. Each
  * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
@@ -396,15 +396,15 @@ extern void slb_set_size(u16 size);
  * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
  */
 #define VSID_MULTIPLIER_256M	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_256M		(CONTEXT_BITS + USER_ESID_BITS)
+#define VSID_BITS_256M		(CONTEXT_BITS + ESID_BITS)
 #define VSID_MODULUS_256M	((1UL<<VSID_BITS_256M)-1)
 
 #define VSID_MULTIPLIER_1T	ASM_CONST(12538073)	/* 24-bit prime */
-#define VSID_BITS_1T		(CONTEXT_BITS + USER_ESID_BITS_1T)
+#define VSID_BITS_1T		(CONTEXT_BITS + ESID_BITS_1T)
 #define VSID_MODULUS_1T		((1UL<<VSID_BITS_1T)-1)
 
 
-#define USER_VSID_RANGE	(1UL << (USER_ESID_BITS + SID_SHIFT))
+#define USER_VSID_RANGE	(1UL << (ESID_BITS + SID_SHIFT))
 
 /*
  * This macro generates asm code to compute the VSID scramble
@@ -540,9 +540,9 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
 		return 0;
 
 	if (ssize == MMU_SEGSIZE_256M)
-		return vsid_scramble((context << USER_ESID_BITS)
+		return vsid_scramble((context << ESID_BITS)
 				     | (ea >> SID_SHIFT), 256M);
-	return vsid_scramble((context << USER_ESID_BITS_1T)
+	return vsid_scramble((context << ESID_BITS_1T)
 			     | (ea >> SID_SHIFT_1T), 1T);
 }
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index b112359ea7a8..200afa5bcfb7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1472,7 +1472,7 @@ _GLOBAL(do_stab_bolted)
 	addi	r9,r9,(MAX_USER_CONTEXT + 1)@l
 
 	srdi	r10,r11,SID_SHIFT
-	rldimi  r10,r9,USER_ESID_BITS,0 /* proto vsid */
+	rldimi  r10,r9,ESID_BITS,0 /* proto vsid */
 	ASM_VSID_SCRAMBLE(r10, r9, 256M)
 	rldic	r9,r10,12,16	/* r9 = vsid << 12 */
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index ead58e317294..5d7d29a313eb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -326,8 +326,8 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 	vcpu3s->context_id[0] = err;
 
 	vcpu3s->proto_vsid_max = ((vcpu3s->context_id[0] + 1)
-				  << USER_ESID_BITS) - 1;
-	vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
+				  << ESID_BITS) - 1;
+	vcpu3s->proto_vsid_first = vcpu3s->context_id[0] << ESID_BITS;
 	vcpu3s->proto_vsid_next = vcpu3s->proto_vsid_first;
 
 	kvmppc_mmu_hpte_init(vcpu);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e212a271c7a4..654258f165ae 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -61,7 +61,7 @@
 #endif
 
 #ifdef CONFIG_PPC_STD_MMU_64
-#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
+#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
 #error TASK_SIZE_USER64 exceeds user VSID range
 #endif
 #endif
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 77aafaa1ab09..17aa6dfceb34 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -232,7 +232,7 @@ _GLOBAL(slb_allocate_user)
  * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
  */
 slb_finish_load:
-	rldimi  r10,r9,USER_ESID_BITS,0
+	rldimi  r10,r9,ESID_BITS,0
 	ASM_VSID_SCRAMBLE(r10,r9,256M)
 	/*
 	 * bits above VSID_BITS_256M need to be ignored from r10
@@ -301,7 +301,7 @@ _GLOBAL(slb_compare_rr_to_size)
  */
 slb_finish_load_1T:
 	srdi	r10,r10,(SID_SHIFT_1T - SID_SHIFT)	/* get 1T ESID */
-	rldimi  r10,r9,USER_ESID_BITS_1T,0
+	rldimi  r10,r9,ESID_BITS_1T,0
 	ASM_VSID_SCRAMBLE(r10,r9,1T)
 	/*
 	 * bits above VSID_BITS_1T need to be ignored from r10
-- 
cgit v1.2.2


From 8c6216d7f118a128678270824b6a1286a63863ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
Date: Wed, 13 Mar 2013 02:37:49 +0000
Subject: Revert "ip_gre: make ipgre_tunnel_xmit() not parse network header as
 IP unconditionally"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 412ed94744d16806fbec3bd250fd94e71cde5a1f.

The commit is wrong as tiph points to the outer IPv4 header which is
installed at ipgre_header() and not the inner one which is protocol dependant.

This commit broke succesfully opennhrp which use PF_PACKET socket with
ETH_P_NHRP protocol. Additionally ssl_addr is set to the link-layer
IPv4 address. This address is written by ipgre_header() to the skb
earlier, and this is the IPv4 header tiph should point to - regardless
of the inner protocol payload.

Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d0ef0e674ec5..91d66dbde9c0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -798,10 +798,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 
 	if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
 		gre_hlen = 0;
-		if (skb->protocol == htons(ETH_P_IP))
-			tiph = (const struct iphdr *)skb->data;
-		else
-			tiph = &tunnel->parms.iph;
+		tiph = (const struct iphdr *)skb->data;
 	} else {
 		gre_hlen = tunnel->hlen;
 		tiph = &tunnel->parms.iph;
-- 
cgit v1.2.2


From 9ad477a1453be32da4a6f068cc08f9353e224be2 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 17 Mar 2013 02:57:28 +0900
Subject: ALSA: documentation: Fix typo in Documentation/sound

Correct spelling typos in Documentation/sound/alsa

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 Documentation/sound/alsa/ALSA-Configuration.txt | 2 +-
 Documentation/sound/alsa/seq_oss.html           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index ce6581c8ca26..4499bd948860 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -912,7 +912,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     models depending on the codec chip.  The list of available models
     is found in HD-Audio-Models.txt
 
-    The model name "genric" is treated as a special case.  When this
+    The model name "generic" is treated as a special case.  When this
     model is given, the driver uses the generic codec parser without
     "codec-patch".  It's sometimes good for testing and debugging.
 
diff --git a/Documentation/sound/alsa/seq_oss.html b/Documentation/sound/alsa/seq_oss.html
index d9776cf60c07..9663b45f6fde 100644
--- a/Documentation/sound/alsa/seq_oss.html
+++ b/Documentation/sound/alsa/seq_oss.html
@@ -285,7 +285,7 @@ sample data.
 <H4>
 7.2.4 Close Callback</H4>
 The <TT>close</TT> callback is called when this device is closed by the
-applicaion. If any private data was allocated in open callback, it must
+application. If any private data was allocated in open callback, it must
 be released in the close callback. The deletion of ALSA port should be
 done here, too. This callback must not be NULL.
 <H4>
-- 
cgit v1.2.2


From a5b8db91442fce9c9713fcd656c3698f1adde1d6 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Wed, 13 Mar 2013 04:18:58 +0000
Subject: rtnetlink: Mask the rta_type when range checking

Range/validity checks on rta_type in rtnetlink_rcv_msg() do
not account for flags that may be set.  This causes the function
to return -EINVAL when flags are set on the type (for example
NLA_F_NESTED).

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a585d45cc9d9..5fb8d7e47294 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2621,7 +2621,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
 
 		while (RTA_OK(attr, attrlen)) {
-			unsigned int flavor = attr->rta_type;
+			unsigned int flavor = attr->rta_type & NLA_TYPE_MASK;
 			if (flavor) {
 				if (flavor > rta_max[sz_idx])
 					return -EINVAL;
-- 
cgit v1.2.2


From 1e8bbe6cd02fc300c88bd48244ce61ad9c7d1776 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 14 Mar 2013 01:05:13 +0000
Subject: net: cdc_ncm, cdc_mbim: allow user to prefer NCM for backwards
 compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit bd329e1 ("net: cdc_ncm: do not bind to NCM compatible MBIM devices")
introduced a new policy, preferring MBIM for dual NCM/MBIM functions if
the cdc_mbim driver was enabled.  This caused a regression for users
wanting to use NCM.

Devices implementing NCM backwards compatibility according to section
3.2 of the MBIM v1.0 specification allow either NCM or MBIM on a single
USB function, using different altsettings.  The cdc_ncm and cdc_mbim
drivers will both probe such functions, and must agree on a common
policy for selecting either MBIM or NCM.  Until now, this policy has
been set at build time based on CONFIG_USB_NET_CDC_MBIM.

Use a module parameter to set the system policy at runtime, allowing the
user to prefer NCM on systems with the cdc_mbim driver.

Cc: Greg Suarez <gsuarez@smithmicro.com>
Cc: Alexey Orishko <alexey.orishko@stericsson.com>
Reported-by: Geir Haatveit <nospam@haatveit.nu>
Reported-by: Tommi Kyntola <kynde@ts.ray.fi>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=54791
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c  | 11 +---------
 drivers/net/usb/cdc_ncm.c   | 49 +++++++++++++++++++++++++++++----------------
 include/linux/usb/cdc_ncm.h |  1 +
 3 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 248d2dc765a5..16c842997291 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -68,18 +68,9 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf)
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *subdriver = ERR_PTR(-ENODEV);
 	int ret = -ENODEV;
-	u8 data_altsetting = CDC_NCM_DATA_ALTSETTING_NCM;
+	u8 data_altsetting = cdc_ncm_select_altsetting(dev, intf);
 	struct cdc_mbim_state *info = (void *)&dev->data;
 
-	/* see if interface supports MBIM alternate setting */
-	if (intf->num_altsetting == 2) {
-		if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
-			usb_set_interface(dev->udev,
-					  intf->cur_altsetting->desc.bInterfaceNumber,
-					  CDC_NCM_COMM_ALTSETTING_MBIM);
-		data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM;
-	}
-
 	/* Probably NCM, defer for cdc_ncm_bind */
 	if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
 		goto err;
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 61b74a2b89ac..4709fa3497cf 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -55,6 +55,14 @@
 
 #define	DRIVER_VERSION				"14-Mar-2012"
 
+#if IS_ENABLED(CONFIG_USB_NET_CDC_MBIM)
+static bool prefer_mbim = true;
+#else
+static bool prefer_mbim;
+#endif
+module_param(prefer_mbim, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(prefer_mbim, "Prefer MBIM setting on dual NCM/MBIM functions");
+
 static void cdc_ncm_txpath_bh(unsigned long param);
 static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx);
 static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer);
@@ -550,9 +558,12 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_unbind);
 
-static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
+/* Select the MBIM altsetting iff it is preferred and available,
+ * returning the number of the corresponding data interface altsetting
+ */
+u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf)
 {
-	int ret;
+	struct usb_host_interface *alt;
 
 	/* The MBIM spec defines a NCM compatible default altsetting,
 	 * which we may have matched:
@@ -568,23 +579,27 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
 	 *   endpoint descriptors, shall be constructed according to
 	 *   the rules given in section 6 (USB Device Model) of this
 	 *   specification."
-	 *
-	 * Do not bind to such interfaces, allowing cdc_mbim to handle
-	 * them
 	 */
-#if IS_ENABLED(CONFIG_USB_NET_CDC_MBIM)
-	if ((intf->num_altsetting == 2) &&
-	    !usb_set_interface(dev->udev,
-			       intf->cur_altsetting->desc.bInterfaceNumber,
-			       CDC_NCM_COMM_ALTSETTING_MBIM)) {
-		if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
-			return -ENODEV;
-		else
-			usb_set_interface(dev->udev,
-					  intf->cur_altsetting->desc.bInterfaceNumber,
-					  CDC_NCM_COMM_ALTSETTING_NCM);
+	if (prefer_mbim && intf->num_altsetting == 2) {
+		alt = usb_altnum_to_altsetting(intf, CDC_NCM_COMM_ALTSETTING_MBIM);
+		if (alt && cdc_ncm_comm_intf_is_mbim(alt) &&
+		    !usb_set_interface(dev->udev,
+				       intf->cur_altsetting->desc.bInterfaceNumber,
+				       CDC_NCM_COMM_ALTSETTING_MBIM))
+			return CDC_NCM_DATA_ALTSETTING_MBIM;
 	}
-#endif
+	return CDC_NCM_DATA_ALTSETTING_NCM;
+}
+EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting);
+
+static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
+{
+	int ret;
+
+	/* MBIM backwards compatible function? */
+	cdc_ncm_select_altsetting(dev, intf);
+	if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
+		return -ENODEV;
 
 	/* NCM data altsetting is always 1 */
 	ret = cdc_ncm_bind_common(dev, intf, 1);
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 3b8f9d4fc3fe..cc25b70af33c 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -127,6 +127,7 @@ struct cdc_ncm_ctx {
 	u16 connected;
 };
 
+extern u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
 extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
 extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
 extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign);
-- 
cgit v1.2.2


From 8008f6e173c239ea9b2423a937aaf85c3157a306 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Mar 2013 11:56:41 +0000
Subject: isdn: hisax: netjet requires VIRT_TO_BUS

Disabling CONFIG_VIRT_TO_BUS on ARM showed that the hisax netjet
driver depends on this deprecated functionality but is not
marked so in Kconfig.

Rather than adding ARM to the already long list of architectures
that this driver is broken on, this patch adds 'depends on
VIRT_TO_BUS' and removes the dependency on !SPARC, which is
also implied by that.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Karsten Keil <isdn@linux-pingi.de>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hisax/Kconfig | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig
index 5313c9ea44dc..d9edcc94c2a8 100644
--- a/drivers/isdn/hisax/Kconfig
+++ b/drivers/isdn/hisax/Kconfig
@@ -237,7 +237,8 @@ config HISAX_MIC
 
 config HISAX_NETJET
 	bool "NETjet card"
-	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN)))
+	depends on PCI && (BROKEN || !(PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN)))
+	depends on VIRT_TO_BUS
 	help
 	  This enables HiSax support for the NetJet from Traverse
 	  Technologies.
@@ -248,7 +249,8 @@ config HISAX_NETJET
 
 config HISAX_NETJET_U
 	bool "NETspider U card"
-	depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN)))
+	depends on PCI && (BROKEN || !(PPC || PARISC || M68K || (MIPS && !CPU_LITTLE_ENDIAN) || FRV || (XTENSA && !CPU_LITTLE_ENDIAN)))
+	depends on VIRT_TO_BUS
 	help
 	  This enables HiSax support for the Netspider U interface ISDN card
 	  from Traverse Technologies.
-- 
cgit v1.2.2


From db0b82760ef84562b5c0fa880c22b4f352545554 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Mar 2013 11:56:42 +0000
Subject: ethernet/tulip: DE4x5 needs VIRT_TO_BUS

The automated ARM build tests have shown that the tulip de4x5 driver
uses the old-style virt_to_bus() interface on some architectures.

Alpha, Sparc and PowerPC did not hit this problem, because they
use a different code path, and most other architectures actually
do provide VIRT_TO_BUS.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Grant Grundler <grundler@parisc-linux.org>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dec/tulip/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/dec/tulip/Kconfig b/drivers/net/ethernet/dec/tulip/Kconfig
index 0c37fb2cc867..1df33c799c00 100644
--- a/drivers/net/ethernet/dec/tulip/Kconfig
+++ b/drivers/net/ethernet/dec/tulip/Kconfig
@@ -108,6 +108,7 @@ config TULIP_DM910X
 config DE4X5
 	tristate "Generic DECchip & DIGITAL EtherWORKS PCI/EISA"
 	depends on (PCI || EISA)
+	depends on VIRT_TO_BUS || ALPHA || PPC || SPARC
 	select CRC32
 	---help---
 	  This is support for the DIGITAL series of PCI/EISA Ethernet cards.
-- 
cgit v1.2.2


From 75b9b61bb8a18e75afe7b10dd55681e748fa27df Mon Sep 17 00:00:00 2001
From: Mugunthan V N <mugunthanvnm@ti.com>
Date: Fri, 15 Mar 2013 04:10:16 +0000
Subject: drivers: net: ethernet: ti: davinci_emac: fix usage of
 cpdma_check_free_tx_desc()

Fix which was done in the following commit in cpsw driver has
to be taken forward to davinci emac driver as well.

commit d35162f89b8f00537d7b240b76d2d0e8b8d29aa0
Author: Daniel Mack <zonque@gmail.com>
Date:   Tue Mar 12 06:31:19 2013 +0000

    net: ethernet: cpsw: fix usage of cpdma_check_free_tx_desc()

    Commit fae50823d0 ("net: ethernet: davinci_cpdma: Add boundary for rx
    and tx descriptors") introduced a function to check the current
    allocation state of tx packets. The return value is taken into account
    to stop the netqork queue on the adapter in case there are no free
    slots.

    However, cpdma_check_free_tx_desc() returns 'true' if there is room in
    the bitmap, not 'false', so the usage of the function is wrong.

Reported-by: Prabhakar Lad <prabhakar.csengg@gmail.com>
Tested-by: Prabhakar Lad <prabhakar.csengg@gmail.com>
Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/davinci_emac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 52c05366599a..ae1b77aa199f 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1102,7 +1102,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev)
 	/* If there is no more tx desc left free then we need to
 	 * tell the kernel to stop sending us tx frames.
 	 */
-	if (unlikely(cpdma_check_free_tx_desc(priv->txchan)))
+	if (unlikely(!cpdma_check_free_tx_desc(priv->txchan)))
 		netif_stop_queue(ndev);
 
 	return NETDEV_TX_OK;
-- 
cgit v1.2.2


From 722c6f585088a2c392b4c5d01b87a584bb8fb73f Mon Sep 17 00:00:00 2001
From: Michal Schmidt <mschmidt@redhat.com>
Date: Fri, 15 Mar 2013 05:27:54 +0000
Subject: bnx2x: add missing napi deletion in error path

If the hardware initialization fails in bnx2x_nic_load() after adding
napi objects, they would not be deleted. A subsequent attempt to unload
the bnx2x module detects a corruption in the napi list.

Add the missing napi deletion to the error path.

Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Acked-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index a923bc4d5a1f..4046f97378c2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2760,6 +2760,7 @@ load_error2:
 	bp->port.pmf = 0;
 load_error1:
 	bnx2x_napi_disable(bp);
+	bnx2x_del_all_napi(bp);
 
 	/* clear pf_load status, as it was already set */
 	if (IS_PF(bp))
-- 
cgit v1.2.2


From 3d84fa98aca7f05f7010022bc45acb1b50326332 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 15 Mar 2013 06:39:12 +0000
Subject: bridge: Add support for setting BR_ROOT_BLOCK flag.

Most of the support was already there.  The only thing that was missing
was the call to set the flag.  Add this call.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index db12a0fcfe50..299fc5f40a26 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -330,6 +330,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 	br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
 	br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD);
 	br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE);
+	br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
 
 	if (tb[IFLA_BRPORT_COST]) {
 		err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
-- 
cgit v1.2.2


From 46aa92d1ba162b4b3d6b7102440e459d4e4ee255 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 17 Mar 2013 02:46:09 +0000
Subject: vhost/net: fix heads usage of ubuf_info

ubuf info allocator uses guest controlled head as an index,
so a malicious guest could put the same head entry in the ring twice,
and we will get two callbacks on the same value.
To fix use upend_idx which is guaranteed to be unique.

Reported-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 959b1cd89e6a..ec6fb3fa59bb 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -339,7 +339,8 @@ static void handle_tx(struct vhost_net *net)
 				msg.msg_controllen = 0;
 				ubufs = NULL;
 			} else {
-				struct ubuf_info *ubuf = &vq->ubuf_info[head];
+				struct ubuf_info *ubuf;
+				ubuf = vq->ubuf_info + vq->upend_idx;
 
 				vq->heads[vq->upend_idx].len =
 					VHOST_DMA_IN_PROGRESS;
-- 
cgit v1.2.2


From 3b4f819d5eac94ba8fe5e8c061f6dabfe8d7b22c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 11 Mar 2013 18:40:16 +0100
Subject: Revert "drm/i915: try to train DP even harder"

This reverts commit 0d71068835e2610576d369d6d4cbf90e0f802a71.

Not only that the commit introduces a bogus check (voltage_tries == 5
will never meet at the inserted code path), it brings the i915 driver
into an endless dp-train loop on HP Z1 desktop machine with IVY+eDP.

At least reverting this commit recovers the framebuffer (but X is
still broken by other reasons...)

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 6f728e5ee793..d46dde5a51e3 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1930,7 +1930,7 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
 		for (i = 0; i < intel_dp->lane_count; i++)
 			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
 				break;
-		if (i == intel_dp->lane_count && voltage_tries == 5) {
+		if (i == intel_dp->lane_count) {
 			++loop_tries;
 			if (loop_tries == 5) {
 				DRM_DEBUG_KMS("too many full retries, give up\n");
-- 
cgit v1.2.2


From 92f28d973cce45ef5823209aab3138eb45d8b349 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 15 Mar 2013 01:03:33 -0700
Subject: scm: Require CAP_SYS_ADMIN over the current pidns to spoof pids.

Don't allow spoofing pids over unix domain sockets in the corner
cases where a user has created a user namespace but has not yet
created a pid namespace.

Cc: stable@vger.kernel.org
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 net/core/scm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/scm.c b/net/core/scm.c
index 905dcc6ad1e3..2dc6cdaaae8a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/security.h>
+#include <linux/pid_namespace.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
@@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds)
 	if (!uid_valid(uid) || !gid_valid(gid))
 		return -EINVAL;
 
-	if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == task_tgid_vnr(current) ||
+	     ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
 	    ((uid_eq(uid, cred->uid)   || uid_eq(uid, cred->euid) ||
 	      uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
 	    ((gid_eq(gid, cred->gid)   || gid_eq(gid, cred->egid) ||
-- 
cgit v1.2.2


From 7ae9712c60698ae2870fd115cb3ef4449a615509 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Tue, 5 Mar 2013 10:26:30 +0100
Subject: drm/nv40/therm: improve selection between the old and the new style

The condition to select between the old and new style was a thinko
as rnndb orders chipsets based on their release date (or general
chronologie hw-wise) and not based on their chipset number.

As the nv40 family is a mess when it comes to numbers, this patch
introduces a switch-based selection between the old and new style.

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c | 50 ++++++++++++++++++------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
index 0f5363edb964..d8f43252c048 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
@@ -29,42 +29,68 @@ struct nv40_therm_priv {
 	struct nouveau_therm_priv base;
 };
 
+enum nv40_sensor_style { INVALID_STYLE = -1, OLD_STYLE = 0, NEW_STYLE = 1 };
+
+static enum nv40_sensor_style
+nv40_sensor_style(struct nouveau_therm *therm)
+{
+	struct nouveau_device *device = nv_device(therm);
+
+	switch (device->chipset) {
+	case 0x43:
+	case 0x44:
+	case 0x4a:
+	case 0x47:
+		return OLD_STYLE;
+
+	case 0x46:
+	case 0x49:
+	case 0x4b:
+	case 0x4e:
+	case 0x4c:
+	case 0x67:
+	case 0x68:
+	case 0x63:
+		return NEW_STYLE;
+	default:
+		return INVALID_STYLE;
+	}
+}
+
 static int
 nv40_sensor_setup(struct nouveau_therm *therm)
 {
-	struct nouveau_device *device = nv_device(therm);
+	enum nv40_sensor_style style = nv40_sensor_style(therm);
 
 	/* enable ADC readout and disable the ALARM threshold */
-	if (device->chipset >= 0x46) {
+	if (style == NEW_STYLE) {
 		nv_mask(therm, 0x15b8, 0x80000000, 0);
 		nv_wr32(therm, 0x15b0, 0x80003fff);
 		mdelay(10); /* wait for the temperature to stabilize */
 		return nv_rd32(therm, 0x15b4) & 0x3fff;
-	} else {
+	} else if (style == OLD_STYLE) {
 		nv_wr32(therm, 0x15b0, 0xff);
 		return nv_rd32(therm, 0x15b4) & 0xff;
-	}
+	} else
+		return -ENODEV;
 }
 
 static int
 nv40_temp_get(struct nouveau_therm *therm)
 {
 	struct nouveau_therm_priv *priv = (void *)therm;
-	struct nouveau_device *device = nv_device(therm);
 	struct nvbios_therm_sensor *sensor = &priv->bios_sensor;
+	enum nv40_sensor_style style = nv40_sensor_style(therm);
 	int core_temp;
 
-	if (device->chipset >= 0x46) {
+	if (style == NEW_STYLE) {
 		nv_wr32(therm, 0x15b0, 0x80003fff);
 		core_temp = nv_rd32(therm, 0x15b4) & 0x3fff;
-	} else {
+	} else if (style == OLD_STYLE) {
 		nv_wr32(therm, 0x15b0, 0xff);
 		core_temp = nv_rd32(therm, 0x15b4) & 0xff;
-	}
-
-	/* Setup the sensor if the temperature is 0 */
-	if (core_temp == 0)
-		core_temp = nv40_sensor_setup(therm);
+	} else
+		return -ENODEV;
 
 	if (sensor->slope_div == 0)
 		sensor->slope_div = 1;
-- 
cgit v1.2.2


From eea4eb14a0f74f806e7a458f174f880744a68bdd Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Tue, 5 Mar 2013 10:35:20 +0100
Subject: drm/nv40/therm: increase the sensor's settling delay to 20ms

Based on my experience, 10ms wasn't always enough. Let's bump that
to a little more.

If this turns out to be insufficient-enough again, then an approach
based on letting the sensor settle for several seconds before starting
polling on the temperature would be better suited. This way, boot time
wouldn't be impacted by those waits too much.

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
index d8f43252c048..0575af5328ec 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
@@ -66,10 +66,11 @@ nv40_sensor_setup(struct nouveau_therm *therm)
 	if (style == NEW_STYLE) {
 		nv_mask(therm, 0x15b8, 0x80000000, 0);
 		nv_wr32(therm, 0x15b0, 0x80003fff);
-		mdelay(10); /* wait for the temperature to stabilize */
+		mdelay(20); /* wait for the temperature to stabilize */
 		return nv_rd32(therm, 0x15b4) & 0x3fff;
 	} else if (style == OLD_STYLE) {
 		nv_wr32(therm, 0x15b0, 0xff);
+		mdelay(20); /* wait for the temperature to stabilize */
 		return nv_rd32(therm, 0x15b4) & 0xff;
 	} else
 		return -ENODEV;
-- 
cgit v1.2.2


From 7591782b9f30a5a8bcbba5744c85050ff6743d69 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Tue, 5 Mar 2013 10:44:12 +0100
Subject: drm/nouveau/therm: do not make assumptions on temperature

In nouveau_therm_sensor_event, temperature is stored as an uint8_t
even though the original interface returns an int.

This change should make it more obvious when the sensor is either
very-ill-calibrated or when we selected the wrong sensor style
on the nv40 family.

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/temp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
index b37624af8297..0a17b9588e09 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
@@ -106,16 +106,16 @@ void nouveau_therm_sensor_event(struct nouveau_therm *therm,
 	const char *thresolds[] = {
 		"fanboost", "downclock", "critical", "shutdown"
 	};
-	uint8_t temperature = therm->temp_get(therm);
+	int temperature = therm->temp_get(therm);
 
 	if (thrs < 0 || thrs > 3)
 		return;
 
 	if (dir == NOUVEAU_THERM_THRS_FALLING)
-		nv_info(therm, "temperature (%u C) went below the '%s' threshold\n",
+		nv_info(therm, "temperature (%i C) went below the '%s' threshold\n",
 			temperature, thresolds[thrs]);
 	else
-		nv_info(therm, "temperature (%u C) hit the '%s' threshold\n",
+		nv_info(therm, "temperature (%i C) hit the '%s' threshold\n",
 			temperature, thresolds[thrs]);
 
 	active = (dir == NOUVEAU_THERM_THRS_RISING);
-- 
cgit v1.2.2


From c4ce9246ca4708482a9a03e76f4177e9f46a13ef Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Tue, 5 Mar 2013 10:58:59 +0100
Subject: drm/nouveau/therm: remove some confusion introduced by therm_mode

The kernel message "[  PTHERM][0000:01:00.0] Thermal management: disabled"
is misleading as it actually means "fan management: disabled".

This patch fixes both the source and the message to improve readability.

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/include/subdev/therm.h |  2 +-
 drivers/gpu/drm/nouveau/core/subdev/therm/base.c    | 10 +++++-----
 drivers/gpu/drm/nouveau/core/subdev/therm/priv.h    |  2 +-
 drivers/gpu/drm/nouveau/core/subdev/therm/temp.c    |  2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/therm.h b/drivers/gpu/drm/nouveau/core/include/subdev/therm.h
index 6b17b614629f..0b20fc0d19c1 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/therm.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/therm.h
@@ -4,7 +4,7 @@
 #include <core/device.h>
 #include <core/subdev.h>
 
-enum nouveau_therm_mode {
+enum nouveau_therm_fan_mode {
 	NOUVEAU_THERM_CTRL_NONE = 0,
 	NOUVEAU_THERM_CTRL_MANUAL = 1,
 	NOUVEAU_THERM_CTRL_AUTO = 2,
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
index f794dc89a3b2..321a55bc25a7 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
@@ -134,7 +134,7 @@ nouveau_therm_alarm(struct nouveau_alarm *alarm)
 }
 
 int
-nouveau_therm_mode(struct nouveau_therm *therm, int mode)
+nouveau_therm_fan_mode(struct nouveau_therm *therm, int mode)
 {
 	struct nouveau_therm_priv *priv = (void *)therm;
 	struct nouveau_device *device = nv_device(therm);
@@ -152,7 +152,7 @@ nouveau_therm_mode(struct nouveau_therm *therm, int mode)
 	if (priv->mode == mode)
 		return 0;
 
-	nv_info(therm, "Thermal management: %s\n", name[mode]);
+	nv_info(therm, "fan management: %s\n", name[mode]);
 	nouveau_therm_update(therm, mode);
 	return 0;
 }
@@ -213,7 +213,7 @@ nouveau_therm_attr_set(struct nouveau_therm *therm,
 		priv->fan->bios.max_duty = value;
 		return 0;
 	case NOUVEAU_THERM_ATTR_FAN_MODE:
-		return nouveau_therm_mode(therm, value);
+		return nouveau_therm_fan_mode(therm, value);
 	case NOUVEAU_THERM_ATTR_THRS_FAN_BOOST:
 		priv->bios_sensor.thrs_fan_boost.temp = value;
 		priv->sensor.program_alarms(therm);
@@ -263,7 +263,7 @@ _nouveau_therm_init(struct nouveau_object *object)
 		return ret;
 
 	if (priv->suspend >= 0)
-		nouveau_therm_mode(therm, priv->mode);
+		nouveau_therm_fan_mode(therm, priv->mode);
 	priv->sensor.program_alarms(therm);
 	return 0;
 }
@@ -317,7 +317,7 @@ nouveau_therm_preinit(struct nouveau_therm *therm)
 	nouveau_therm_sensor_ctor(therm);
 	nouveau_therm_fan_ctor(therm);
 
-	nouveau_therm_mode(therm, NOUVEAU_THERM_CTRL_NONE);
+	nouveau_therm_fan_mode(therm, NOUVEAU_THERM_CTRL_NONE);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
index 06b98706b3fc..d8483dd5e0f9 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
@@ -102,7 +102,7 @@ struct nouveau_therm_priv {
 	struct i2c_client *ic;
 };
 
-int nouveau_therm_mode(struct nouveau_therm *therm, int mode);
+int nouveau_therm_fan_mode(struct nouveau_therm *therm, int mode);
 int nouveau_therm_attr_get(struct nouveau_therm *therm,
 		       enum nouveau_therm_attr_type type);
 int nouveau_therm_attr_set(struct nouveau_therm *therm,
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
index 0a17b9588e09..441f60bba226 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
@@ -123,7 +123,7 @@ void nouveau_therm_sensor_event(struct nouveau_therm *therm,
 	case NOUVEAU_THERM_THRS_FANBOOST:
 		if (active) {
 			nouveau_therm_fan_set(therm, true, 100);
-			nouveau_therm_mode(therm, NOUVEAU_THERM_CTRL_AUTO);
+			nouveau_therm_fan_mode(therm, NOUVEAU_THERM_CTRL_AUTO);
 		}
 		break;
 	case NOUVEAU_THERM_THRS_DOWNCLOCK:
-- 
cgit v1.2.2


From 13506e2ab40ebec3be3e2fda708d40d3ba972e3e Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Tue, 5 Mar 2013 11:24:04 +0100
Subject: drm/nouveau/therm-ic: the temperature is off by sensor_constant, warn
 the user

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/base.c | 2 +-
 drivers/gpu/drm/nouveau/core/subdev/therm/ic.c   | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
index 321a55bc25a7..3f8083f41be8 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
@@ -313,8 +313,8 @@ nouveau_therm_create_(struct nouveau_object *parent,
 int
 nouveau_therm_preinit(struct nouveau_therm *therm)
 {
-	nouveau_therm_ic_ctor(therm);
 	nouveau_therm_sensor_ctor(therm);
+	nouveau_therm_ic_ctor(therm);
 	nouveau_therm_fan_ctor(therm);
 
 	nouveau_therm_fan_mode(therm, NOUVEAU_THERM_CTRL_NONE);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c b/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c
index e24090bac195..8b3adec5fbb1 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/ic.c
@@ -32,6 +32,7 @@ probe_monitoring_device(struct nouveau_i2c_port *i2c,
 			struct i2c_board_info *info)
 {
 	struct nouveau_therm_priv *priv = (void *)nouveau_therm(i2c);
+	struct nvbios_therm_sensor *sensor = &priv->bios_sensor;
 	struct i2c_client *client;
 
 	request_module("%s%s", I2C_MODULE_PREFIX, info->type);
@@ -46,8 +47,9 @@ probe_monitoring_device(struct nouveau_i2c_port *i2c,
 	}
 
 	nv_info(priv,
-		"Found an %s at address 0x%x (controlled by lm_sensors)\n",
-		info->type, info->addr);
+		"Found an %s at address 0x%x (controlled by lm_sensors, "
+		"temp offset %+i C)\n",
+		info->type, info->addr, sensor->offset_constant);
 	priv->ic = client;
 
 	return true;
-- 
cgit v1.2.2


From ad40d73ef533ab0ad16b4a1ab2f7870c1f8ab954 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Thu, 14 Mar 2013 23:51:16 +0100
Subject: drm/nv40/therm: disable temperature reading if the bios misses some
 parameters

Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c | 10 ++++------
 drivers/gpu/drm/nouveau/core/subdev/therm/temp.c |  9 ---------
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
index 0575af5328ec..c526d536409f 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
@@ -93,12 +93,10 @@ nv40_temp_get(struct nouveau_therm *therm)
 	} else
 		return -ENODEV;
 
-	if (sensor->slope_div == 0)
-		sensor->slope_div = 1;
-	if (sensor->offset_den == 0)
-		sensor->offset_den = 1;
-	if (sensor->slope_mult < 1)
-		sensor->slope_mult = 1;
+	/* if the slope or the offset is unset, do no use the sensor */
+	if (!sensor->slope_div || !sensor->slope_mult ||
+	    !sensor->offset_num || !sensor->offset_den)
+	    return -ENODEV;
 
 	core_temp = core_temp * sensor->slope_mult / sensor->slope_div;
 	core_temp = core_temp + sensor->offset_num / sensor->offset_den;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
index 441f60bba226..0d94d1a19eb7 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
@@ -34,10 +34,6 @@ nouveau_therm_temp_set_defaults(struct nouveau_therm *therm)
 {
 	struct nouveau_therm_priv *priv = (void *)therm;
 
-	priv->bios_sensor.slope_mult = 1;
-	priv->bios_sensor.slope_div = 1;
-	priv->bios_sensor.offset_num = 0;
-	priv->bios_sensor.offset_den = 1;
 	priv->bios_sensor.offset_constant = 0;
 
 	priv->bios_sensor.thrs_fan_boost.temp = 90;
@@ -60,11 +56,6 @@ nouveau_therm_temp_safety_checks(struct nouveau_therm *therm)
 	struct nouveau_therm_priv *priv = (void *)therm;
 	struct nvbios_therm_sensor *s = &priv->bios_sensor;
 
-	if (!priv->bios_sensor.slope_div)
-		priv->bios_sensor.slope_div = 1;
-	if (!priv->bios_sensor.offset_den)
-		priv->bios_sensor.offset_den = 1;
-
 	/* enforce a minimum hysteresis on thresholds */
 	s->thrs_fan_boost.hysteresis = max_t(u8, s->thrs_fan_boost.hysteresis, 2);
 	s->thrs_down_clock.hysteresis = max_t(u8, s->thrs_down_clock.hysteresis, 2);
-- 
cgit v1.2.2


From 76c0295c389ad9ba19b668b5974cdd90eb95788e Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Fri, 15 Mar 2013 02:09:20 +0100
Subject: drm/nv40/therm: reserve negative temperatures for errors

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
index c526d536409f..a70d1b7e397b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nv40.c
@@ -102,6 +102,10 @@ nv40_temp_get(struct nouveau_therm *therm)
 	core_temp = core_temp + sensor->offset_num / sensor->offset_den;
 	core_temp = core_temp + sensor->offset_constant - 8;
 
+	/* reserve negative temperatures for errors */
+	if (core_temp < 0)
+		core_temp = 0;
+
 	return core_temp;
 }
 
-- 
cgit v1.2.2


From 98ee7c7c63f16e443f51abf08e5412f8eb44ad1e Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Fri, 15 Mar 2013 00:21:07 +0100
Subject: drm/nouveau/therm: disable auto fan management if temperature is not
 available

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/base.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
index 3f8083f41be8..d6a05589c941 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
@@ -149,6 +149,11 @@ nouveau_therm_fan_mode(struct nouveau_therm *therm, int mode)
 	    (mode != NOUVEAU_THERM_CTRL_NONE && device->card_type >= NV_C0))
 		return -EINVAL;
 
+	/* do not allow automatic fan management if the thermal sensor is
+	 * not available */
+	if (priv->mode == 2 && therm->temp_get(therm) < 0)
+		return -EINVAL;
+
 	if (priv->mode == mode)
 		return 0;
 
-- 
cgit v1.2.2


From bf55eb843d266ad31696f17cf1f5c237409485cf Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Fri, 15 Mar 2013 00:42:38 +0100
Subject: drm/nouveau/therm: disable temperature management if the sensor isn't
 readable

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/temp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
index 0d94d1a19eb7..2a02c9f1d7ff 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
@@ -193,7 +193,7 @@ alarm_timer_callback(struct nouveau_alarm *alarm)
 					     NOUVEAU_THERM_THRS_SHUTDOWN);
 
 	/* schedule the next poll in one second */
-	if (list_empty(&alarm->head))
+	if (therm->temp_get(therm) >= 0 && list_empty(&alarm->head))
 		ptimer->alarm(ptimer, 1000 * 1000 * 1000, alarm);
 
 	spin_unlock_irqrestore(&priv->sensor.alarm_program_lock, flags);
-- 
cgit v1.2.2


From 0b3ee3772e11da2f36c91e542545780d3ed28415 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Fri, 15 Mar 2013 01:47:16 +0100
Subject: drm/nouveau/therm: display the availability of the internal sensor

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/subdev/therm/base.c |  1 +
 drivers/gpu/drm/nouveau/core/subdev/therm/priv.h |  1 +
 drivers/gpu/drm/nouveau/core/subdev/therm/temp.c | 11 +++++++++++
 3 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
index d6a05589c941..a00a5a76e2d6 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/base.c
@@ -323,6 +323,7 @@ nouveau_therm_preinit(struct nouveau_therm *therm)
 	nouveau_therm_fan_ctor(therm);
 
 	nouveau_therm_fan_mode(therm, NOUVEAU_THERM_CTRL_NONE);
+	nouveau_therm_sensor_preinit(therm);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
index d8483dd5e0f9..438d9824b774 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
@@ -122,6 +122,7 @@ int nouveau_therm_fan_sense(struct nouveau_therm *therm);
 
 int nouveau_therm_preinit(struct nouveau_therm *);
 
+void nouveau_therm_sensor_preinit(struct nouveau_therm *);
 void nouveau_therm_sensor_set_threshold_state(struct nouveau_therm *therm,
 					     enum nouveau_therm_thrs thrs,
 					     enum nouveau_therm_thrs_state st);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
index 2a02c9f1d7ff..470f6a47b656 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
@@ -216,6 +216,17 @@ nouveau_therm_program_alarms_polling(struct nouveau_therm *therm)
 	alarm_timer_callback(&priv->sensor.therm_poll_alarm);
 }
 
+void
+nouveau_therm_sensor_preinit(struct nouveau_therm *therm)
+{
+	const char *sensor_avail = "yes";
+
+	if (therm->temp_get(therm) < 0)
+		sensor_avail = "no";
+
+	nv_info(therm, "internal sensor: %s\n", sensor_avail);
+}
+
 int
 nouveau_therm_sensor_ctor(struct nouveau_therm *therm)
 {
-- 
cgit v1.2.2


From 804ca90f3fe35dd7c12889eaa74a44abbc4b91fd Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@labri.fr>
Date: Fri, 15 Mar 2013 00:59:55 +0100
Subject: drm/nouveau/hwmon: do not expose a buggy temperature if it is
 unavailable

Signed-off-by: Martin Peres <martin.peres@labri.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_pm.c | 44 +++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index bb54098c6d97..936b442a6ab7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -402,8 +402,12 @@ nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf)
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_therm *therm = nouveau_therm(drm->device);
+	int temp = therm->temp_get(therm);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", therm->temp_get(therm) * 1000);
+	if (temp < 0)
+		return temp;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", temp * 1000);
 }
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, nouveau_hwmon_show_temp,
 						  NULL, 0);
@@ -871,7 +875,12 @@ static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO | S_IWUSR,
 			  nouveau_hwmon_get_pwm1_max,
 			  nouveau_hwmon_set_pwm1_max, 0);
 
-static struct attribute *hwmon_attributes[] = {
+static struct attribute *hwmon_default_attributes[] = {
+	&sensor_dev_attr_name.dev_attr.attr,
+	&sensor_dev_attr_update_rate.dev_attr.attr,
+	NULL
+};
+static struct attribute *hwmon_temp_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_auto_point1_pwm.dev_attr.attr,
 	&sensor_dev_attr_temp1_auto_point1_temp.dev_attr.attr,
@@ -882,8 +891,6 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
 	&sensor_dev_attr_temp1_emergency.dev_attr.attr,
 	&sensor_dev_attr_temp1_emergency_hyst.dev_attr.attr,
-	&sensor_dev_attr_name.dev_attr.attr,
-	&sensor_dev_attr_update_rate.dev_attr.attr,
 	NULL
 };
 static struct attribute *hwmon_fan_rpm_attributes[] = {
@@ -898,8 +905,11 @@ static struct attribute *hwmon_pwm_fan_attributes[] = {
 	NULL
 };
 
-static const struct attribute_group hwmon_attrgroup = {
-	.attrs = hwmon_attributes,
+static const struct attribute_group hwmon_default_attrgroup = {
+	.attrs = hwmon_default_attributes,
+};
+static const struct attribute_group hwmon_temp_attrgroup = {
+	.attrs = hwmon_temp_attributes,
 };
 static const struct attribute_group hwmon_fan_rpm_attrgroup = {
 	.attrs = hwmon_fan_rpm_attributes,
@@ -931,13 +941,22 @@ nouveau_hwmon_init(struct drm_device *dev)
 	}
 	dev_set_drvdata(hwmon_dev, dev);
 
-	/* default sysfs entries */
-	ret = sysfs_create_group(&hwmon_dev->kobj, &hwmon_attrgroup);
+	/* set the default attributes */
+	ret = sysfs_create_group(&hwmon_dev->kobj, &hwmon_default_attrgroup);
 	if (ret) {
 		if (ret)
 			goto error;
 	}
 
+	/* if the card has a working thermal sensor */
+	if (therm->temp_get(therm) >= 0) {
+		ret = sysfs_create_group(&hwmon_dev->kobj, &hwmon_temp_attrgroup);
+		if (ret) {
+			if (ret)
+				goto error;
+		}
+	}
+
 	/* if the card has a pwm fan */
 	/*XXX: incorrect, need better detection for this, some boards have
 	 *     the gpio entries for pwm fan control even when there's no
@@ -979,11 +998,10 @@ nouveau_hwmon_fini(struct drm_device *dev)
 	struct nouveau_pm *pm = nouveau_pm(dev);
 
 	if (pm->hwmon) {
-		sysfs_remove_group(&pm->hwmon->kobj, &hwmon_attrgroup);
-		sysfs_remove_group(&pm->hwmon->kobj,
-				   &hwmon_pwm_fan_attrgroup);
-		sysfs_remove_group(&pm->hwmon->kobj,
-				   &hwmon_fan_rpm_attrgroup);
+		sysfs_remove_group(&pm->hwmon->kobj, &hwmon_default_attrgroup);
+		sysfs_remove_group(&pm->hwmon->kobj, &hwmon_temp_attrgroup);
+		sysfs_remove_group(&pm->hwmon->kobj, &hwmon_pwm_fan_attrgroup);
+		sysfs_remove_group(&pm->hwmon->kobj, &hwmon_fan_rpm_attrgroup);
 
 		hwmon_device_unregister(pm->hwmon);
 	}
-- 
cgit v1.2.2


From 778141e3cf0bf29f91cd3cb5c314ea477b9402a7 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Mon, 18 Mar 2013 11:41:46 +0900
Subject: perf: Reset hwc->last_period on sw clock events

When cpu/task clock events are initialized, their sampling
frequencies are converted to have a fixed value.  However it
missed to update the hwc->last_period which was set to 1 for
initial sampling frequency calibration.

Because this hwc->last_period value is used as a period in
perf_swevent_ hrtime(), every recorded sample will have an
incorrected period of 1.

  $ perf record -e task-clock noploop 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.158 MB perf.data (~6919 samples) ]

  $ perf report -n --show-total-period  --stdio
  # Samples: 4K of event 'task-clock'
  # Event count (approx.): 4000
  #
  # Overhead       Samples        Period  Command  Shared Object              Symbol
  # ........  ............  ............  .......  .............  ..................
  #
      99.95%          3998          3998  noploop  noploop        [.] main
       0.03%             1             1  noploop  libc-2.15.so   [.] init_cacheinfo
       0.03%             1             1  noploop  ld-2.15.so     [.] open_verify

Note that it doesn't affect the non-sampling event so that the
perf stat still gets correct value with or without this patch.

  $ perf stat -e task-clock noploop 1

   Performance counter stats for 'noploop 1':

         1000.272525 task-clock                #    1.000 CPUs utilized

         1.000560605 seconds time elapsed

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1363574507-18808-1-git-send-email-namhyung@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0cd86501c30..fa79c377d65d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5647,6 +5647,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
 		event->attr.sample_period = NSEC_PER_SEC / freq;
 		hwc->sample_period = event->attr.sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
+		hwc->last_period = hwc->sample_period;
 		event->attr.freq = 0;
 	}
 }
-- 
cgit v1.2.2


From 06d9db7273c7bd5b07624b313faeea57a4b31056 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 15 Mar 2013 18:58:50 +0530
Subject: usb: musb: gadget: do *unmap_dma_buffer* only for valid DMA addr

musb does not use DMA buffer for ep0 but it uses the same giveback
function *musb_g_giveback* for all endpoints (*musb_g_ep0_giveback* calls
*musb_g_giveback*). So for ep0 case request.dma will be '0'
and will result in kernel OOPS if tried to *unmap_dma_buffer* for requests in
ep0. Fixed it by doing *unmap_dma_buffer* only for valid DMA addr and
checking that musb_ep->dma is valid when unmapping.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/musb_gadget.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index be18537c5f14..83eddedcd9be 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -141,7 +141,9 @@ static inline void map_dma_buffer(struct musb_request *request,
 static inline void unmap_dma_buffer(struct musb_request *request,
 				struct musb *musb)
 {
-	if (!is_buffer_mapped(request))
+	struct musb_ep *musb_ep = request->ep;
+
+	if (!is_buffer_mapped(request) || !musb_ep->dma)
 		return;
 
 	if (request->request.dma == DMA_ADDR_INVALID) {
@@ -195,7 +197,10 @@ __acquires(ep->musb->lock)
 
 	ep->busy = 1;
 	spin_unlock(&musb->lock);
-	unmap_dma_buffer(req, musb);
+
+	if (!dma_mapping_error(&musb->g.dev, request->dma))
+		unmap_dma_buffer(req, musb);
+
 	if (request->status == 0)
 		dev_dbg(musb->controller, "%s done request %p,  %d/%d\n",
 				ep->end_point.name, request,
-- 
cgit v1.2.2


From d610d98b5de6860feb21539726e9af7c9094151c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Fri, 15 Mar 2013 16:27:13 +0900
Subject: perf: Generate EXIT event only once per task context

perf_event_task_event() iterates pmu list and generate events
for each eligible pmu context.  But if task_event has task_ctx
like in EXIT it'll generate events even though the pmu doesn't
have an eligible one. Fix it by moving the code to proper
places.

Before this patch:

  $ perf record -n true
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.006 MB perf.data (~248 samples) ]

  $ perf report -D | tail
  Aggregated stats:
             TOTAL events:         73
              MMAP events:         67
              COMM events:          2
              EXIT events:          4
  cycles stats:
             TOTAL events:         73
              MMAP events:         67
              COMM events:          2
              EXIT events:          4

After this patch:

  $ perf report -D | tail
  Aggregated stats:
             TOTAL events:         70
              MMAP events:         67
              COMM events:          2
              EXIT events:          1
  cycles stats:
             TOTAL events:         70
              MMAP events:         67
              COMM events:          2
              EXIT events:          1

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1363332433-7637-1-git-send-email-namhyung@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index fa79c377d65d..59412d037eed 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4434,12 +4434,15 @@ static void perf_event_task_event(struct perf_task_event *task_event)
 			if (ctxn < 0)
 				goto next;
 			ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+			if (ctx)
+				perf_event_task_ctx(ctx, task_event);
 		}
-		if (ctx)
-			perf_event_task_ctx(ctx, task_event);
 next:
 		put_cpu_ptr(pmu->pmu_cpu_context);
 	}
+	if (task_event->task_ctx)
+		perf_event_task_ctx(task_event->task_ctx, task_event);
+
 	rcu_read_unlock();
 }
 
-- 
cgit v1.2.2


From 31b6945a899a30f9dffa9cba8ed2e494784810a9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Mar 2013 10:23:40 +0100
Subject: ALSA: hda - Fix missing beep detach in patch_conexant.c

This leaks the beep input device after module unload, which leads to
Oops.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=55321
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 941bf6c766ec..1051a88f5304 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3191,11 +3191,17 @@ static int cx_auto_build_controls(struct hda_codec *codec)
 	return 0;
 }
 
+static void cx_auto_free(struct hda_codec *codec)
+{
+	snd_hda_detach_beep_device(codec);
+	snd_hda_gen_free(codec);
+}
+
 static const struct hda_codec_ops cx_auto_patch_ops = {
 	.build_controls = cx_auto_build_controls,
 	.build_pcms = snd_hda_gen_build_pcms,
 	.init = snd_hda_gen_init,
-	.free = snd_hda_gen_free,
+	.free = cx_auto_free,
 	.unsol_event = snd_hda_jack_unsol_event,
 #ifdef CONFIG_PM
 	.check_power_status = snd_hda_gen_check_power_status,
-- 
cgit v1.2.2


From a37b2dc52b88ccd926099d852eae1bb324bc92eb Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Sat, 2 Mar 2013 12:31:39 +0530
Subject: ARC: Remove SET_PERSONALITY (tracks cross-arch change)

Tracks commit e72837e3e7b "default SET_PERSONALITY() in linux/elf.h"

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/elf.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index f4c8d36ebecb..a26282857683 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -72,7 +72,4 @@ extern int elf_check_arch(const struct elf32_hdr *);
  */
 #define ELF_PLATFORM	(NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #endif
-- 
cgit v1.2.2


From 65c10553552b487a71bf5e4676743435046fae6f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 14 Mar 2013 20:52:30 +0900
Subject: kprobes: Make hash_64() as always inlined

Because hash_64() is called from the get_kprobe() inside
int3 handler, kernel causes int3 recursion and crashes if
kprobes user puts a probe on it.

Usually hash_64() is inlined into caller function, but in
some cases, it has instances by gcc's interprocedural
constant propagation.

This patch uses __always_inline instead of inline to
prevent gcc from doing such things.

Reported-by: Timo Juhani Lindfors <timo.lindfors@iki.fi>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Nadia Yvette Chambers <nyc@holomorphy.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: David S. Miller <davem@davemloft.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20130314115230.19690.39387.stgit@mhiramat-M0-7522
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hash.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/hash.h b/include/linux/hash.h
index 61c97ae22e01..f09a0ae4d858 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -15,6 +15,7 @@
  */
 
 #include <asm/types.h>
+#include <linux/compiler.h>
 
 /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
 #define GOLDEN_RATIO_PRIME_32 0x9e370001UL
@@ -31,7 +32,7 @@
 #error Wordsize not 32 or 64
 #endif
 
-static inline u64 hash_64(u64 val, unsigned int bits)
+static __always_inline u64 hash_64(u64 val, unsigned int bits)
 {
 	u64 hash = val;
 
-- 
cgit v1.2.2


From 9a556ab998071457e79b319f2527642dd6e50617 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 14 Mar 2013 20:52:43 +0900
Subject: kprobes/x86: Check Interrupt Flag modifier when registering probe

Currently kprobes check whether the copied instruction modifies
IF (interrupt flag) on each probe hit. This results not only in
introducing overhead but also involving
inat_get_opcode_attribute into the kprobes hot path, and it can
cause an infinite recursive call (and kernel panic in the end).

Actually, since the copied instruction itself can never be modified
on the buffer, it is needless to analyze the instruction on every
probe hit.

To fix this issue, we check it only once when registering probe
and store the result on ainsn->if_modifier.

Reported-by: Timo Juhani Lindfors <timo.lindfors@iki.fi>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20130314115242.19690.33573.stgit@mhiramat-M0-7522
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/kprobes.h | 1 +
 arch/x86/kernel/kprobes/core.c | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index d3ddd17405d0..5a6d2873f80e 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -77,6 +77,7 @@ struct arch_specific_insn {
 	 * a post_handler or break_handler).
 	 */
 	int boostable;
+	bool if_modifier;
 };
 
 struct arch_optimized_insn {
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 3f06e6149981..7bfe318d3d8a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -375,6 +375,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
 	else
 		p->ainsn.boostable = -1;
 
+	/* Check whether the instruction modifies Interrupt Flag or not */
+	p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
+
 	/* Also, displacement change doesn't affect the first byte */
 	p->opcode = p->ainsn.insn[0];
 }
@@ -434,7 +437,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 	__this_cpu_write(current_kprobe, p);
 	kcb->kprobe_saved_flags = kcb->kprobe_old_flags
 		= (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
-	if (is_IF_modifier(p->ainsn.insn))
+	if (p->ainsn.if_modifier)
 		kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
 }
 
-- 
cgit v1.2.2


From fd4a5aef002bb57e8a35ed34d8a878034b9bde94 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Sun, 17 Mar 2013 14:49:57 +0100
Subject: perf/x86: Add SNB/SNB-EP scheduling constraints for cycle_activity
 event

Add scheduling constraints for SNB/SNB-EP CYCLE_ACTIVITY event
as defined by SDM Jan 2013 edition. The STALLS umasks are
combinations with the NO_DISPATCH umask.

Signed-off-by: Stephane Eranian <eranian@gmail.com>
Cc: peterz@infradead.org
Cc: ak@linux.intel.com
Cc: jolsa@redhat.com
Link: http://lkml.kernel.org/r/20130317134957.GA8550@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 529c8931fc02..dab7580c47ae 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -101,6 +101,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+	INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+	INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
-- 
cgit v1.2.2


From a86b1a2cd2f81f74e815e07f756edd7bc5b6f034 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 18 Mar 2013 11:00:44 +0100
Subject: ALSA: hda/cirrus - Fix the digital beep registration

The argument passed to snd_hda_attach_beep_device() is a widget NID
while spec->beep_amp holds the composed value for amp controls.

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 1051a88f5304..2a89d1eefeb6 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1142,7 +1142,7 @@ static int patch_cxt5045(struct hda_codec *codec)
 	}
 
 	if (spec->beep_amp)
-		snd_hda_attach_beep_device(codec, spec->beep_amp);
+		snd_hda_attach_beep_device(codec, get_amp_nid_(spec->beep_amp));
 
 	return 0;
 }
@@ -1921,7 +1921,7 @@ static int patch_cxt5051(struct hda_codec *codec)
 	}
 
 	if (spec->beep_amp)
-		snd_hda_attach_beep_device(codec, spec->beep_amp);
+		snd_hda_attach_beep_device(codec, get_amp_nid_(spec->beep_amp));
 
 	return 0;
 }
@@ -3099,7 +3099,7 @@ static int patch_cxt5066(struct hda_codec *codec)
 	}
 
 	if (spec->beep_amp)
-		snd_hda_attach_beep_device(codec, spec->beep_amp);
+		snd_hda_attach_beep_device(codec, get_amp_nid_(spec->beep_amp));
 
 	return 0;
 }
@@ -3397,7 +3397,7 @@ static int patch_conexant_auto(struct hda_codec *codec)
 
 	codec->patch_ops = cx_auto_patch_ops;
 	if (spec->beep_amp)
-		snd_hda_attach_beep_device(codec, spec->beep_amp);
+		snd_hda_attach_beep_device(codec, get_amp_nid_(spec->beep_amp));
 
 	/* Some laptops with Conexant chips show stalls in S3 resume,
 	 * which falls into the single-cmd mode.
-- 
cgit v1.2.2


From 0d96724e298c08ba24589b4802b0a26b6a237721 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 18 Mar 2013 10:12:56 +0000
Subject: arm64: Removed unused variable in compat_setup_rt_frame()

Recent clean-up of the compat signal code left an unused 'stack'
variable.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/signal32.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 7f4f3673f2bc..e393174fe859 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -549,7 +549,6 @@ int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
 			  sigset_t *set, struct pt_regs *regs)
 {
 	struct compat_rt_sigframe __user *frame;
-	compat_stack_t stack;
 	int err = 0;
 
 	frame = compat_get_sigframe(ka, regs, sizeof(*frame));
-- 
cgit v1.2.2


From 9d1a455b0ca1c2c956b4d9ab212864a8695270f1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 18 Mar 2013 11:25:36 +0100
Subject: drm/i915: Use the fixed pixel clock for eDP in intel_dp_set_m_n()

The eDP output on HP Z1 is still broken when X is started even after
fixing the infinite link-train loop.  The regression was introduced in
3.6 kernel for cleaning up the mode clock handling code in intel_dp.c
by the commit [71244653: drm/i915: adjusted_mode->clock in the dp
mode_fix].

In the past, the clock of the reference mode was modified in
intel_dp_mode_fixup() in the case of eDP fixed clock, and this clock was
used for calculating in intel_dp_set_m_n().  This override was removed,
thus the wrong mode clock is used for the calculation, resulting in a
psychedelic smoking output in the end.

This patch corrects the clock to be used in the place.

v1->v2: Use intel_edp_target_clock() for checking eDP fixed clock
instead of open code as in ironlake_set_m_n().

Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_dp.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index d46dde5a51e3..d7d4afe01341 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -820,6 +820,7 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
 	struct intel_link_m_n m_n;
 	int pipe = intel_crtc->pipe;
 	enum transcoder cpu_transcoder = intel_crtc->cpu_transcoder;
+	int target_clock;
 
 	/*
 	 * Find the lane count in the intel_encoder private
@@ -835,13 +836,22 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
 		}
 	}
 
+	target_clock = mode->clock;
+	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
+		if (intel_encoder->type == INTEL_OUTPUT_EDP) {
+			target_clock = intel_edp_target_clock(intel_encoder,
+							      mode);
+			break;
+		}
+	}
+
 	/*
 	 * Compute the GMCH and Link ratios. The '3' here is
 	 * the number of bytes_per_pixel post-LUT, which we always
 	 * set up for 8-bits of R/G/B, or 3 bytes total.
 	 */
 	intel_link_compute_m_n(intel_crtc->bpp, lane_count,
-			       mode->clock, adjusted_mode->clock, &m_n);
+			       target_clock, adjusted_mode->clock, &m_n);
 
 	if (IS_HASWELL(dev)) {
 		I915_WRITE(PIPE_DATA_M1(cpu_transcoder),
-- 
cgit v1.2.2


From 362132d228ef37c1e2d31ad5d649a7ed65efe539 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 13 Mar 2013 22:28:46 +0100
Subject: MAINTAINERS: intel-gfx is no longer subscribers-only

It is though still filtered for non-subscribers, but without pissing
off people with moderation queue spam. So drop the subscribers-only
tag to make getmaintainers.pl tdrt.

Acked-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 95616582c728..16439ee11150 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2629,7 +2629,7 @@ F:	include/uapi/drm/
 
 INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
 M:	Daniel Vetter <daniel.vetter@ffwll.ch>
-L:	intel-gfx@lists.freedesktop.org (subscribers-only)
+L:	intel-gfx@lists.freedesktop.org
 L:	dri-devel@lists.freedesktop.org
 T:	git git://people.freedesktop.org/~danvet/drm-intel
 S:	Supported
-- 
cgit v1.2.2


From a2c91547b5b1b9ae515851a85b5574f205b8e1c4 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 18 Feb 2013 18:22:14 +0000
Subject: arm64: Fix build error with !SMP

The __atomic_hash is only defined when SMP is enabled but the
arm64ksyms.c exports it even for the UP case.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/arm64ksyms.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index cef3925eaf60..aa3e948f7885 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -40,7 +40,9 @@ EXPORT_SYMBOL(__copy_to_user);
 EXPORT_SYMBOL(__clear_user);
 
 	/* bitops */
+#ifdef CONFIG_SMP
 EXPORT_SYMBOL(__atomic_hash);
+#endif
 
 	/* physical memory */
 EXPORT_SYMBOL(memstart_addr);
-- 
cgit v1.2.2


From 18931c892724cb9408811c793fe2656d11573b3a Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Tue, 26 Feb 2013 16:55:54 +0000
Subject: arm64: fix padding computation in struct ucontext

The expression to compute the padding needed to fill the uc_sigmask field
to 1024 bits actually computes the padding needed for 1080 bits.
Fortunately, due to the 16-byte alignment of the following field
(uc_mcontext) the definition in glibc contains enough bytes of padding
after uc_sigmask so that the overall offsets and size match in both
definitions.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/ucontext.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/ucontext.h b/arch/arm64/include/asm/ucontext.h
index bde960720892..42e04c877428 100644
--- a/arch/arm64/include/asm/ucontext.h
+++ b/arch/arm64/include/asm/ucontext.h
@@ -22,7 +22,7 @@ struct ucontext {
 	stack_t		  uc_stack;
 	sigset_t	  uc_sigmask;
 	/* glibc uses a 1024-bit sigset_t */
-	__u8		  __unused[(1024 - sizeof(sigset_t)) / 8];
+	__u8		  __unused[1024 / 8 - sizeof(sigset_t)];
 	/* last for future expansion */
 	struct sigcontext uc_mcontext;
 };
-- 
cgit v1.2.2


From 4502403dcf8f5c76abd4dbab8726c8e4ecb5cd34 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 16 Mar 2013 12:48:11 +0300
Subject: selinux: use GFP_ATOMIC under spin_lock

The call tree here is:

sk_clone_lock()              <- takes bh_lock_sock(newsk);
xfrm_sk_clone_policy()
__xfrm_sk_clone_policy()
clone_policy()               <- uses GFP_ATOMIC for allocations
security_xfrm_policy_clone()
security_ops->xfrm_policy_clone_security()
selinux_xfrm_policy_clone()

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable@kernel.org
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/selinux/xfrm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 48665ecd1197..8ab295154517 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -310,7 +310,7 @@ int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx,
 
 	if (old_ctx) {
 		new_ctx = kmalloc(sizeof(*old_ctx) + old_ctx->ctx_len,
-				  GFP_KERNEL);
+				  GFP_ATOMIC);
 		if (!new_ctx)
 			return -ENOMEM;
 
-- 
cgit v1.2.2


From b4811bacbc68f6e17d442df88f98afaa9394d4f5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 Mar 2013 17:36:37 +0100
Subject: ARM: fix CONFIG_VIRT_TO_BUS handling

887cbce0 "arch Kconfig: centralise CONFIG_ARCH_NO_VIRT_TO_BUS"
and  4febd95a8 "Select VIRT_TO_BUS directly where needed" from
Stephen Rothwell changed globally how CONFIG_VIRT_TO_BUS is
selected, while my own a5d533ee0 "ARM: disable virt_to_bus/
virt_to_bus almost everywhere" was merged at the same time and
changed which platforms select it on ARM.

The result of this conflict was that we again see CONFIG_VIRT_TO_BUS
on all ARM systems. This patch fixes up the problem and removes
CONFIG_ARCH_NO_VIRT_TO_BUS again on ARM.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/arm/Kconfig                 | 7 ++-----
 arch/arm/mach-footbridge/Kconfig | 1 +
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ca1b6fd94a3f..6d6e77c89691 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -49,7 +49,6 @@ config ARM
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UID16
-	select HAVE_VIRT_TO_BUS
 	select KTIME_SCALAR
 	select PERF_USE_VMALLOC
 	select RTC_LIB
@@ -743,6 +742,7 @@ config ARCH_RPC
 	select NEED_MACH_IO_H
 	select NEED_MACH_MEMORY_H
 	select NO_IOPORT
+	select VIRT_TO_BUS
 	help
 	  On the Acorn Risc-PC, Linux can support the internal IDE disk and
 	  CD-ROM interface, serial and parallel port, and the floppy drive.
@@ -878,6 +878,7 @@ config ARCH_SHARK
 	select ISA_DMA
 	select NEED_MACH_MEMORY_H
 	select PCI
+	select VIRT_TO_BUS
 	select ZONE_DMA
 	help
 	  Support for the StrongARM based Digital DNARD machine, also known
@@ -1461,10 +1462,6 @@ config ISA_DMA
 	bool
 	select ISA_DMA_API
 
-config ARCH_NO_VIRT_TO_BUS
-	def_bool y
-	depends on !ARCH_RPC && !ARCH_NETWINDER && !ARCH_SHARK
-
 # Select ISA DMA interface
 config ISA_DMA_API
 	bool
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index abda5a18a664..0f2111a11315 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -67,6 +67,7 @@ config ARCH_NETWINDER
 	select ISA
 	select ISA_DMA
 	select PCI
+	select VIRT_TO_BUS
 	help
 	  Say Y here if you intend to run this kernel on the Rebel.COM
 	  NetWinder.  Information about this machine can be found at:
-- 
cgit v1.2.2


From 3d464d9b71ef2f2b40a4bc9dcf06794fd1be9d12 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Mon, 18 Mar 2013 09:45:42 -0400
Subject: HID: usbhid: quirk for Realtek Multi-card reader

This device needs to be added to the quirks list with HID_QUIRK_NO_INIT_REPORTS,
otherwise it causes 10 seconds timeout during report initialization.

This fixes Red Hat bugzilla https://bugzilla.redhat.com/show_bug.cgi?id=806587

Cc: stable@vger.kernel.org
Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 6e5c2ffa8d96..3fc5c33561d5 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -681,6 +681,9 @@
 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001		0x3001
 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008		0x3008
 
+#define USB_VENDOR_ID_REALTEK		0x0bda
+#define USB_DEVICE_ID_REALTEK_READER	0x0152
+
 #define USB_VENDOR_ID_ROCCAT		0x1e7d
 #define USB_DEVICE_ID_ROCCAT_ARVO	0x30d4
 #define USB_DEVICE_ID_ROCCAT_ISKU	0x319c
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index e0e6abf1cd3b..e991d81602b6 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -80,6 +80,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_PRODIGE, USB_DEVICE_ID_PRODIGE_CORDLESS, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008, HID_QUIRK_NOGET },
+	{ USB_VENDOR_ID_REALTEK, USB_DEVICE_ID_REALTEK_READER, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_SIGMATEL, USB_DEVICE_ID_SIGMATEL_STMP3780, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET },
-- 
cgit v1.2.2


From 620ae90ed8ca8b6e40cb9e10279b4f5ef9f0ab81 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Mon, 18 Mar 2013 09:47:02 -0400
Subject: HID: usbhid: quirk for MSI GX680R led panel

This keyboard backlight device causes a 10 second delay to boot.  Add it
to the quirk list with HID_QUIRK_NO_INIT_REPORTS.

This fixes Red Hat bugzilla https://bugzilla.redhat.com/show_bug.cgi?id=907221

Cc: stable@vger.kernel.org
Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h           | 3 +++
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 3fc5c33561d5..49b88a0608fb 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -587,6 +587,9 @@
 #define USB_VENDOR_ID_MONTEREY		0x0566
 #define USB_DEVICE_ID_GENIUS_KB29E	0x3004
 
+#define USB_VENDOR_ID_MSI		0x1770
+#define USB_DEVICE_ID_MSI_GX680R_LED_PANEL	0xff00
+
 #define USB_VENDOR_ID_NATIONAL_SEMICONDUCTOR 0x0400
 #define USB_DEVICE_ID_N_S_HARMONY	0xc359
 
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index e991d81602b6..476c984dbdf3 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -73,6 +73,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET },
+	{ USB_VENDIR_ID_MSI, USB_DEVICE_ID_MSI_GX680R_LED_PANEL, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_NOVATEK, USB_DEVICE_ID_NOVATEK_MOUSE, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS },
-- 
cgit v1.2.2


From 570637dc8eeb2faba06228d497ff40bb019bcc93 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Mon, 18 Mar 2013 15:50:10 +0100
Subject: HID: usbhid: fix build problem

Fix build problem caused by typo introduced by 620ae90ed8
("HID: usbhid: quirk for MSI GX680R led panel").

Reported-by: fengguang.wu@intel.com
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-quirks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 476c984dbdf3..19b8360f2330 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -73,7 +73,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET },
-	{ USB_VENDIR_ID_MSI, USB_DEVICE_ID_MSI_GX680R_LED_PANEL, HID_QUIRK_NO_INIT_REPORTS },
+	{ USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GX680R_LED_PANEL, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_NOVATEK, USB_DEVICE_ID_NOVATEK_MOUSE, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS },
-- 
cgit v1.2.2


From f8264340e694604863255cc0276491d17c402390 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@vmware.com>
Date: Mon, 25 Feb 2013 10:56:01 -0800
Subject: USB: xhci - fix bit definitions for IMAN register

According to XHCI specification (5.5.2.1) the IP is bit 0 and IE is bit 1
of IMAN register. Previously their definitions were reversed.

Even though there are no ill effects being observed from the swapped
definitions (because IMAN_IP is RW1C and in legacy PCI case we come in
with it already set to 1 so it was clearing itself even though we were
setting IMAN_IE instead of IMAN_IP), we should still correct the values.

This patch should be backported to kernels as old as 2.6.36, that
contain the commit 4e833c0b87a30798e67f06120cecebef6ee9644c "xhci: don't
re-enable IE constantly".

Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@vger.kernel.org
---
 drivers/usb/host/xhci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index f791bd0aee6c..2c510e4a7d4c 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -206,8 +206,8 @@ struct xhci_op_regs {
 /* bits 12:31 are reserved (and should be preserved on writes). */
 
 /* IMAN - Interrupt Management Register */
-#define IMAN_IP		(1 << 1)
-#define IMAN_IE		(1 << 0)
+#define IMAN_IE		(1 << 1)
+#define IMAN_IP		(1 << 0)
 
 /* USBSTS - USB status - status bitmasks */
 /* HC not running - set to 1 when run/stop bit is cleared. */
-- 
cgit v1.2.2


From 0e401101db49959f5783f6ee9e676124b5a183ac Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 18 Mar 2013 11:40:19 -0400
Subject: ext4: fix memory leakage in mext_check_coverage

Regression was introduced by following commit 8c854473
TESTCASE (git://oss.sgi.com/xfs/cmds/xfstests.git):
#while true;do ./check 301 || break ;done

Also fix potential memory leakage in get_ext_path() once
ext4_ext_find_extent() have failed.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/move_extent.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index c1f15b203e98..bbae4ed15c3d 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -32,16 +32,18 @@
  */
 static inline int
 get_ext_path(struct inode *inode, ext4_lblk_t lblock,
-		struct ext4_ext_path **path)
+		struct ext4_ext_path **orig_path)
 {
 	int ret = 0;
+	struct ext4_ext_path *path;
 
-	*path = ext4_ext_find_extent(inode, lblock, *path);
-	if (IS_ERR(*path)) {
-		ret = PTR_ERR(*path);
-		*path = NULL;
-	} else if ((*path)[ext_depth(inode)].p_ext == NULL)
+	path = ext4_ext_find_extent(inode, lblock, *orig_path);
+	if (IS_ERR(path))
+		ret = PTR_ERR(path);
+	else if (path[ext_depth(inode)].p_ext == NULL)
 		ret = -ENODATA;
+	else
+		*orig_path = path;
 
 	return ret;
 }
@@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent *ext;
+	int ret = 0;
 	ext4_lblk_t last = from + count;
 	while (from < last) {
 		*err = get_ext_path(inode, from, &path);
 		if (*err)
-			return 0;
+			goto out;
 		ext = path[ext_depth(inode)].p_ext;
-		if (!ext) {
-			ext4_ext_drop_refs(path);
-			return 0;
-		}
-		if (uninit != ext4_ext_is_uninitialized(ext)) {
-			ext4_ext_drop_refs(path);
-			return 0;
-		}
+		if (uninit != ext4_ext_is_uninitialized(ext))
+			goto out;
 		from += ext4_ext_get_actual_len(ext);
 		ext4_ext_drop_refs(path);
 	}
-	return 1;
+	ret = 1;
+out:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+	return ret;
 }
 
 /**
-- 
cgit v1.2.2


From 039eb75350acd1131a18a9bd12a0d4e1fb17892e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 18 Mar 2013 16:55:49 +0100
Subject: ALSA: hda - Fix yet missing GPIO/EAPD setup in cirrus driver

I forgot to update spec->gpio_data in the automute hook, so it will be
overridden at the init sequence, thus the machine is still silent when
no headphone jack is plugged at boot time.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_cirrus.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 60d08f669f0c..0d9c58f13560 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -168,10 +168,10 @@ static void cs_automute(struct hda_codec *codec)
 	snd_hda_gen_update_outputs(codec);
 
 	if (spec->gpio_eapd_hp) {
-		unsigned int gpio = spec->gen.hp_jack_present ?
+		spec->gpio_data = spec->gen.hp_jack_present ?
 			spec->gpio_eapd_hp : spec->gpio_eapd_speaker;
 		snd_hda_codec_write(codec, 0x01, 0,
-				    AC_VERB_SET_GPIO_DATA, gpio);
+				    AC_VERB_SET_GPIO_DATA, spec->gpio_data);
 	}
 }
 
-- 
cgit v1.2.2


From b009aac12cd0fe34293c68af8ac48b85be3bd858 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Fri, 15 Mar 2013 11:56:17 +0000
Subject: bnx2x: fix occasional statistics off-by-4GB error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The UPDATE_QSTAT function introduced on February 15, 2012
in commit 1355b704b9ba "bnx2x: consistent statistics after
internal driver reload" incorrectly fails to handle overflow
during addition of the lower 32-bit field of a stat.

This bug is present since 3.4-rc1 and should thus be considered
a candidate for stable 3.4+ releases.

Google-Bug-Id: 8374428
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Cc: Mintz Yuval <yuvalmin@broadcom.com>
Acked-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
index 364e37ecbc5c..198f6f1c9ad5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
@@ -459,8 +459,9 @@ struct bnx2x_fw_port_stats_old {
 
 #define UPDATE_QSTAT(s, t) \
 	do { \
-		qstats->t##_hi = qstats_old->t##_hi + le32_to_cpu(s.hi); \
 		qstats->t##_lo = qstats_old->t##_lo + le32_to_cpu(s.lo); \
+		qstats->t##_hi = qstats_old->t##_hi + le32_to_cpu(s.hi) \
+			+ ((qstats->t##_lo < qstats_old->t##_lo) ? 1 : 0); \
 	} while (0)
 
 #define UPDATE_QSTAT_OLD(f) \
-- 
cgit v1.2.2


From ebaf5795ef57a70a042ea259448a465024e2821d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Mon, 18 Mar 2013 23:45:11 +0800
Subject: Bluetooth: Add support for Dell[QCA 0cf3:817a]

Add support for the AR9462 chip

T:  Bus=03 Lev=01 Prnt=01 Port=08 Cnt=01 Dev#=  5 Spd=12   MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=0cf3 ProdID=817a Rev= 0.02
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms

Cc: <stable@vger.kernel.org>
Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 0a6ef6b694d4..8af01c177ce5 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -77,6 +77,7 @@ static struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x0CF3, 0x3004) },
 	{ USB_DEVICE(0x0CF3, 0x3008) },
 	{ USB_DEVICE(0x0CF3, 0x311D) },
+	{ USB_DEVICE(0x0CF3, 0x817a) },
 	{ USB_DEVICE(0x13d3, 0x3375) },
 	{ USB_DEVICE(0x04CA, 0x3004) },
 	{ USB_DEVICE(0x04CA, 0x3005) },
@@ -112,6 +113,7 @@ static struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 11ac3036bb8a..2cc5f774a29c 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -135,6 +135,7 @@ static struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 },
-- 
cgit v1.2.2


From 0d4f0608619de59fd8169dd8e72aadc28d80e715 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Mar 2013 07:01:28 +0000
Subject: tcp: dont handle MTU reduction on LISTEN socket

When an ICMP ICMP_FRAG_NEEDED (or ICMPV6_PKT_TOOBIG) message finds a
LISTEN socket, and this socket is currently owned by the user, we
set TCP_MTU_REDUCED_DEFERRED flag in listener tsq_flags.

This is bad because if we clone the parent before it had a chance to
clear the flag, the child inherits the tsq_flags value, and next
tcp_release_cb() on the child will decrement sk_refcnt.

Result is that we might free a live TCP socket, as reported by
Dormando.

IPv4: Attempt to release TCP socket in state 1

Fix this issue by testing sk_state against TCP_LISTEN early, so that we
set TCP_MTU_REDUCED_DEFERRED on appropriate sockets (not a LISTEN one)

This bug was introduced in commit 563d34d05786
(tcp: dont drop MTU reduction indications)

Reported-by: dormando <dormando@rydia.net>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 14 +++++++-------
 net/ipv6/tcp_ipv6.c |  7 +++++++
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4a8ec457310f..d09203c63264 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -274,13 +274,6 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
 	struct inet_sock *inet = inet_sk(sk);
 	u32 mtu = tcp_sk(sk)->mtu_info;
 
-	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
-	 * send out by Linux are always <576bytes so they should go through
-	 * unfragmented).
-	 */
-	if (sk->sk_state == TCP_LISTEN)
-		return;
-
 	dst = inet_csk_update_pmtu(sk, mtu);
 	if (!dst)
 		return;
@@ -408,6 +401,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 			goto out;
 
 		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+			/* We are not interested in TCP_LISTEN and open_requests
+			 * (SYN-ACKs send out by Linux are always <576bytes so
+			 * they should go through unfragmented).
+			 */
+			if (sk->sk_state == TCP_LISTEN)
+				goto out;
+
 			tp->mtu_info = info;
 			if (!sock_owned_by_user(sk)) {
 				tcp_v4_mtu_reduced(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9b6460055df5..f6d629fd6aee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -389,6 +389,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	}
 
 	if (type == ICMPV6_PKT_TOOBIG) {
+		/* We are not interested in TCP_LISTEN and open_requests
+		 * (SYN-ACKs send out by Linux are always <576bytes so
+		 * they should go through unfragmented).
+		 */
+		if (sk->sk_state == TCP_LISTEN)
+			goto out;
+
 		tp->mtu_info = ntohl(info);
 		if (!sock_owned_by_user(sk))
 			tcp_v6_mtu_reduced(sk);
-- 
cgit v1.2.2


From 83cdadd8b0559c93728d065d23ca3485fa567e54 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Fri, 22 Feb 2013 13:32:56 -0500
Subject: xfs: fix potential infinite loop in xfs_iomap_prealloc_size()

If freesp == 0, we could end up in an infinite loop while squashing
the preallocation. Break the loop when we've killed the prealloc
entirely.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

(cherry picked from commit e78c420bfc2608bb5f9a0b9165b1071c1e31166a)
---
 fs/xfs/xfs_iomap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 912d83d8860a..b0b0f448e843 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -413,7 +413,7 @@ xfs_iomap_prealloc_size(
 		 * have a large file on a small filesystem and the above
 		 * lowspace thresholds are smaller than MAXEXTLEN.
 		 */
-		while (alloc_blocks >= freesp)
+		while (alloc_blocks && alloc_blocks >= freesp)
 			alloc_blocks >>= 4;
 	}
 
-- 
cgit v1.2.2


From 66db3feb486c01349f767b98ebb10b0c3d2d021b Mon Sep 17 00:00:00 2001
From: CQ Tang <cq.tang@intel.com>
Date: Mon, 18 Mar 2013 11:02:21 -0400
Subject: x86-64: Fix the failure case in copy_user_handle_tail()

The increment of "to" in copy_user_handle_tail() will have incremented
before a failure has been noted.  This causes us to skip a byte in the
failure case.

Only do the increment when assured there is no failure.

Signed-off-by: CQ Tang <cq.tang@intel.com>
Link: http://lkml.kernel.org/r/20130318150221.8439.993.stgit@phlsvslse11.ph.intel.com
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@vger.kernel.org>
---
 arch/x86/lib/usercopy_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 05928aae911e..906fea315791 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -74,10 +74,10 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
 	char c;
 	unsigned zero_len;
 
-	for (; len; --len) {
+	for (; len; --len, to++) {
 		if (__get_user_nocheck(c, from++, sizeof(char)))
 			break;
-		if (__put_user_nocheck(c, to++, sizeof(char)))
+		if (__put_user_nocheck(c, to, sizeof(char)))
 			break;
 	}
 
-- 
cgit v1.2.2


From 3325beed46d8d14d873e94d89ea57ee900dec942 Mon Sep 17 00:00:00 2001
From: Mark Tinguely <tinguely@sgi.com>
Date: Sun, 24 Feb 2013 13:04:37 -0600
Subject: xfs: fix xfs_iomap_eof_prealloc_initial_size type

Fix the return type of xfs_iomap_eof_prealloc_initial_size() to
xfs_fsblock_t to reflect the fact that the return value may be an
unsigned 64 bits if XFS_BIG_BLKNOS is defined.

Signed-off-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

(cherry picked from commit e8108cedb1c5d1dc359690d18ca997e97a0061d2)
---
 fs/xfs/xfs_iomap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index b0b0f448e843..5a30dd899d2b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -325,7 +325,7 @@ xfs_iomap_eof_want_preallocate(
  * rather than falling short due to things like stripe unit/width alignment of
  * real extents.
  */
-STATIC int
+STATIC xfs_fsblock_t
 xfs_iomap_eof_prealloc_initial_size(
 	struct xfs_mount	*mp,
 	struct xfs_inode	*ip,
-- 
cgit v1.2.2


From e001873853d87674dd5b3cfa2851885023616695 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 12 Mar 2013 23:30:34 +1100
Subject: xfs: ensure we capture IO errors correctly

Failed buffer readahead can leave the buffer in the cache marked
with an error. Most callers that then issue a subsequent read on the
buffer do not zero the b_error field out, and so we may incorectly
detect an error during IO completion due to the stale error value
left on the buffer.

Avoid this problem by zeroing the error before IO submission. This
ensures that the only IO errors that are detected those captured
from are those captured from bio submission or completion.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

(cherry picked from commit c163f9a1760229a95d04e37b332de7d5c1c225cd)
---
 fs/xfs/xfs_buf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4e8f0df82d02..8459b5d8cb71 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1334,6 +1334,12 @@ _xfs_buf_ioapply(
 	int		size;
 	int		i;
 
+	/*
+	 * Make sure we capture only current IO errors rather than stale errors
+	 * left over from previous use of the buffer (e.g. failed readahead).
+	 */
+	bp->b_error = 0;
+
 	if (bp->b_flags & XBF_WRITE) {
 		if (bp->b_flags & XBF_SYNCIO)
 			rw = WRITE_SYNC;
-- 
cgit v1.2.2


From a517b608fa3d9b65930ef53ffe4a2f9800e10f7d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 18 Mar 2013 10:49:07 -0400
Subject: nfsd: only unhash DRC entries that are in the hashtable

It's not safe to call hlist_del() on a newly initialized hlist_node.
That leads to a NULL pointer dereference. Only do that if the entry
is hashed.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfscache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 62c1ee128aeb..18509bd6f587 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -102,7 +102,8 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 {
 	if (rp->c_type == RC_REPLBUFF)
 		kfree(rp->c_replvec.iov_base);
-	hlist_del(&rp->c_hash);
+	if (!hlist_unhashed(&rp->c_hash))
+		hlist_del(&rp->c_hash);
 	list_del(&rp->c_lru);
 	--num_drc_entries;
 	kmem_cache_free(drc_slab, rp);
-- 
cgit v1.2.2


From 7f42ace3118afedbd1848a349d01a11d9ca13d41 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Thu, 14 Mar 2013 12:48:40 +0100
Subject: iwl3945: fix length of dma buffers

commit bdb084b22d8aee66c87af5e9c36bd6cf7f3bccfd
Author: Stanislaw Gruszka <sgruszka@redhat.com>
Date:   Wed Feb 13 15:49:08 2013 +0100

    iwlegacy: more checks for dma mapping errors

broke il3945_tx_skb() dma buffer length settings, what results on
firmware errors like showed below and make 3945 device non usable.

iwl3945 0000:02:00.0: Microcode SW error detected. Restarting 0x82000008.
iwl3945 0000:02:00.0: Loaded firmware version: 15.32.2.9
iwl3945 0000:02:00.0: Start IWL Error Log Dump:
iwl3945 0000:02:00.0: Status: 0x000202E4, count: 1
iwl3945 0000:02:00.0: Desc 	Time	   asrtPC blink2 ilink1  nmiPC   Line
iwl3945 0000:02:00.0: SYSASSERT     (0x5) 0000208934 0x008B6 0x0035E 0x00320 0x00000 267
iwl3945 0000:02:00.0: Error Reply type 0x00000001 cmd

Reported-by: Zdenek Kabelac <zkabelac@redhat.com>
Reported-by: Krzysztof Kolasa <kkolasa@winsoft.pl>
Reported-by: Pedro Francisco <pedrogfrancisco@gmail.com>
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlegacy/3945-mac.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/iwlegacy/3945-mac.c b/drivers/net/wireless/iwlegacy/3945-mac.c
index 3630a41df50d..c353b5f19c8c 100644
--- a/drivers/net/wireless/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/iwlegacy/3945-mac.c
@@ -475,6 +475,7 @@ il3945_tx_skb(struct il_priv *il,
 	dma_addr_t txcmd_phys;
 	int txq_id = skb_get_queue_mapping(skb);
 	u16 len, idx, hdr_len;
+	u16 firstlen, secondlen;
 	u8 id;
 	u8 unicast;
 	u8 sta_id;
@@ -589,21 +590,22 @@ il3945_tx_skb(struct il_priv *il,
 	len =
 	    sizeof(struct il3945_tx_cmd) + sizeof(struct il_cmd_header) +
 	    hdr_len;
-	len = (len + 3) & ~3;
+	firstlen = (len + 3) & ~3;
 
 	/* Physical address of this Tx command's header (not MAC header!),
 	 * within command buffer array. */
 	txcmd_phys =
-	    pci_map_single(il->pci_dev, &out_cmd->hdr, len, PCI_DMA_TODEVICE);
+	    pci_map_single(il->pci_dev, &out_cmd->hdr, firstlen,
+			   PCI_DMA_TODEVICE);
 	if (unlikely(pci_dma_mapping_error(il->pci_dev, txcmd_phys)))
 		goto drop_unlock;
 
 	/* Set up TFD's 2nd entry to point directly to remainder of skb,
 	 * if any (802.11 null frames have no payload). */
-	len = skb->len - hdr_len;
-	if (len) {
+	secondlen = skb->len - hdr_len;
+	if (secondlen > 0) {
 		phys_addr =
-		    pci_map_single(il->pci_dev, skb->data + hdr_len, len,
+		    pci_map_single(il->pci_dev, skb->data + hdr_len, secondlen,
 				   PCI_DMA_TODEVICE);
 		if (unlikely(pci_dma_mapping_error(il->pci_dev, phys_addr)))
 			goto drop_unlock;
@@ -611,12 +613,12 @@ il3945_tx_skb(struct il_priv *il,
 
 	/* Add buffer containing Tx command and MAC(!) header to TFD's
 	 * first entry */
-	il->ops->txq_attach_buf_to_tfd(il, txq, txcmd_phys, len, 1, 0);
+	il->ops->txq_attach_buf_to_tfd(il, txq, txcmd_phys, firstlen, 1, 0);
 	dma_unmap_addr_set(out_meta, mapping, txcmd_phys);
-	dma_unmap_len_set(out_meta, len, len);
-	if (len)
-		il->ops->txq_attach_buf_to_tfd(il, txq, phys_addr, len, 0,
-					       U32_PAD(len));
+	dma_unmap_len_set(out_meta, len, firstlen);
+	if (secondlen > 0)
+		il->ops->txq_attach_buf_to_tfd(il, txq, phys_addr, secondlen, 0,
+					       U32_PAD(secondlen));
 
 	if (!ieee80211_has_morefrags(hdr->frame_control)) {
 		txq->need_update = 1;
-- 
cgit v1.2.2


From 74632d11a133b5baf6b9d622dd19d2f944d93d94 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 15 Mar 2013 14:53:31 +0100
Subject: ath9k_hw: revert chainmask to user configuration after calibration

The commit 'ath9k_hw: fix calibration issues on chainmask that don't
include chain 0' changed the hardware chainmask to the chip chainmask
for the duration of the calibration, but the revert to user
configuration in the reset path runs too early.

That causes some issues with limiting the number of antennas (including
spurious failure in hardware-generated packets).

Fix this by reverting the chainmask after the essential parts of the
calibration that need the workaround, and before NF calibration is run.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Reported-by: Wojciech Dubowik <Wojciech.Dubowik@neratec.com>
Tested-by: Wojciech Dubowik <Wojciech.Dubowik@neratec.com>
Cc: stable@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/ar9003_calib.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_calib.c b/drivers/net/wireless/ath/ath9k/ar9003_calib.c
index 4cc13940c895..f76c3ca07a45 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_calib.c
@@ -1023,6 +1023,7 @@ static bool ar9003_hw_init_cal(struct ath_hw *ah,
 					  AR_PHY_AGC_CONTROL_FLTR_CAL   |
 					  AR_PHY_AGC_CONTROL_PKDET_CAL;
 
+	/* Use chip chainmask only for calibration */
 	ar9003_hw_set_chain_masks(ah, ah->caps.rx_chainmask, ah->caps.tx_chainmask);
 
 	if (rtt) {
@@ -1150,6 +1151,9 @@ skip_tx_iqcal:
 		ar9003_hw_rtt_disable(ah);
 	}
 
+	/* Revert chainmask to runtime parameters */
+	ar9003_hw_set_chain_masks(ah, ah->rxchainmask, ah->txchainmask);
+
 	/* Initialize list pointers */
 	ah->cal_list = ah->cal_list_last = ah->cal_list_curr = NULL;
 
-- 
cgit v1.2.2


From 01d4ab96d2e7fceaad204e5a8710ce34e229b8c5 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 15 Mar 2013 16:18:44 +0100
Subject: ath9k: limit tx path hang check to normal data queues

The beacon and multicast-buffer queues are managed by the beacon
tasklet, and the generic tx path hang check does not help in any way
here. Running it on those queues anyway can introduce some race
conditions leading to unnecessary chip resets.

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/link.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c
index ade3afb21f91..39c84ecf6a42 100644
--- a/drivers/net/wireless/ath/ath9k/link.c
+++ b/drivers/net/wireless/ath/ath9k/link.c
@@ -28,21 +28,21 @@ void ath_tx_complete_poll_work(struct work_struct *work)
 	int i;
 	bool needreset = false;
 
-	for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++)
-		if (ATH_TXQ_SETUP(sc, i)) {
-			txq = &sc->tx.txq[i];
-			ath_txq_lock(sc, txq);
-			if (txq->axq_depth) {
-				if (txq->axq_tx_inprogress) {
-					needreset = true;
-					ath_txq_unlock(sc, txq);
-					break;
-				} else {
-					txq->axq_tx_inprogress = true;
-				}
+	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+		txq = sc->tx.txq_map[i];
+
+		ath_txq_lock(sc, txq);
+		if (txq->axq_depth) {
+			if (txq->axq_tx_inprogress) {
+				needreset = true;
+				ath_txq_unlock(sc, txq);
+				break;
+			} else {
+				txq->axq_tx_inprogress = true;
 			}
-			ath_txq_unlock_complete(sc, txq);
 		}
+		ath_txq_unlock_complete(sc, txq);
+	}
 
 	if (needreset) {
 		ath_dbg(ath9k_hw_common(sc->sc_ah), RESET,
-- 
cgit v1.2.2


From 00d7ea11ff0783e24fe70778f3141270b561aaa1 Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Fri, 15 Mar 2013 18:47:05 -0700
Subject: mwifiex: fix race when queuing commands

Running the following script repeatedly on XO-4 with SD8787
produces command timeout and system lockup.

insmod mwifiex_sdio.ko
sleep 1
ifconfig eth0 up
iwlist eth0 scan &
sleep 0.5
rmmod mwifiex_sdio

mwifiex_send_cmd_async() is called for sync as well as async
commands. (mwifiex_send_cmd_sync() internally calls it for
sync command.)

"adapter->cmd_queued" gets filled inside mwifiex_send_cmd_async()
routine for both types of commands. But it is used only for sync
commands in mwifiex_wait_queue_complete(). This could lead to a
race when two threads try to queue a sync command with another
sync/async command simultaneously.

Get rid of global variable and pass command node as a parameter
to mwifiex_wait_queue_complete() to fix the problem.

Cc: <stable@vger.kernel.org> # 3.8
Reported-by: Daniel Drake <dsd@laptop.org>
Tested-by: Daniel Drake <dsd@laptop.org>
Tested-by: Marco Cesarano <marco@marvell.com>
Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mwifiex/cmdevt.c    |  5 ++---
 drivers/net/wireless/mwifiex/main.h      |  4 ++--
 drivers/net/wireless/mwifiex/scan.c      |  8 ++++----
 drivers/net/wireless/mwifiex/sta_ioctl.c | 10 ++--------
 4 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/mwifiex/cmdevt.c b/drivers/net/wireless/mwifiex/cmdevt.c
index 20a6c5555873..2ffabddbcfca 100644
--- a/drivers/net/wireless/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/mwifiex/cmdevt.c
@@ -484,8 +484,6 @@ int mwifiex_send_cmd_sync(struct mwifiex_private *priv, uint16_t cmd_no,
 
 	ret = mwifiex_send_cmd_async(priv, cmd_no, cmd_action, cmd_oid,
 				     data_buf);
-	if (!ret)
-		ret = mwifiex_wait_queue_complete(adapter);
 
 	return ret;
 }
@@ -588,9 +586,10 @@ int mwifiex_send_cmd_async(struct mwifiex_private *priv, uint16_t cmd_no,
 	if (cmd_no == HostCmd_CMD_802_11_SCAN) {
 		mwifiex_queue_scan_cmd(priv, cmd_node);
 	} else {
-		adapter->cmd_queued = cmd_node;
 		mwifiex_insert_cmd_to_pending_q(adapter, cmd_node, true);
 		queue_work(adapter->workqueue, &adapter->main_work);
+		if (cmd_node->wait_q_enabled)
+			ret = mwifiex_wait_queue_complete(adapter, cmd_node);
 	}
 
 	return ret;
diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h
index 553adfb0aa81..7035ade9af74 100644
--- a/drivers/net/wireless/mwifiex/main.h
+++ b/drivers/net/wireless/mwifiex/main.h
@@ -723,7 +723,6 @@ struct mwifiex_adapter {
 	u16 cmd_wait_q_required;
 	struct mwifiex_wait_queue cmd_wait_q;
 	u8 scan_wait_q_woken;
-	struct cmd_ctrl_node *cmd_queued;
 	spinlock_t queue_lock;		/* lock for tx queues */
 	struct completion fw_load;
 	u8 country_code[IEEE80211_COUNTRY_STRING_LEN];
@@ -1018,7 +1017,8 @@ int mwifiex_request_set_multicast_list(struct mwifiex_private *priv,
 			struct mwifiex_multicast_list *mcast_list);
 int mwifiex_copy_mcast_addr(struct mwifiex_multicast_list *mlist,
 			    struct net_device *dev);
-int mwifiex_wait_queue_complete(struct mwifiex_adapter *adapter);
+int mwifiex_wait_queue_complete(struct mwifiex_adapter *adapter,
+				struct cmd_ctrl_node *cmd_queued);
 int mwifiex_bss_start(struct mwifiex_private *priv, struct cfg80211_bss *bss,
 		      struct cfg80211_ssid *req_ssid);
 int mwifiex_cancel_hs(struct mwifiex_private *priv, int cmd_type);
diff --git a/drivers/net/wireless/mwifiex/scan.c b/drivers/net/wireless/mwifiex/scan.c
index bb60c2754a97..d215b4d3c51b 100644
--- a/drivers/net/wireless/mwifiex/scan.c
+++ b/drivers/net/wireless/mwifiex/scan.c
@@ -1388,10 +1388,13 @@ int mwifiex_scan_networks(struct mwifiex_private *priv,
 			list_del(&cmd_node->list);
 			spin_unlock_irqrestore(&adapter->scan_pending_q_lock,
 					       flags);
-			adapter->cmd_queued = cmd_node;
 			mwifiex_insert_cmd_to_pending_q(adapter, cmd_node,
 							true);
 			queue_work(adapter->workqueue, &adapter->main_work);
+
+			/* Perform internal scan synchronously */
+			if (!priv->scan_request)
+				mwifiex_wait_queue_complete(adapter, cmd_node);
 		} else {
 			spin_unlock_irqrestore(&adapter->scan_pending_q_lock,
 					       flags);
@@ -1946,9 +1949,6 @@ int mwifiex_request_scan(struct mwifiex_private *priv,
 		/* Normal scan */
 		ret = mwifiex_scan_networks(priv, NULL);
 
-	if (!ret)
-		ret = mwifiex_wait_queue_complete(priv->adapter);
-
 	up(&priv->async_sem);
 
 	return ret;
diff --git a/drivers/net/wireless/mwifiex/sta_ioctl.c b/drivers/net/wireless/mwifiex/sta_ioctl.c
index 9f33c92c90f5..13100f8de3db 100644
--- a/drivers/net/wireless/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/mwifiex/sta_ioctl.c
@@ -54,16 +54,10 @@ int mwifiex_copy_mcast_addr(struct mwifiex_multicast_list *mlist,
  * This function waits on a cmd wait queue. It also cancels the pending
  * request after waking up, in case of errors.
  */
-int mwifiex_wait_queue_complete(struct mwifiex_adapter *adapter)
+int mwifiex_wait_queue_complete(struct mwifiex_adapter *adapter,
+				struct cmd_ctrl_node *cmd_queued)
 {
 	int status;
-	struct cmd_ctrl_node *cmd_queued;
-
-	if (!adapter->cmd_queued)
-		return 0;
-
-	cmd_queued = adapter->cmd_queued;
-	adapter->cmd_queued = NULL;
 
 	dev_dbg(adapter->dev, "cmd pending\n");
 	atomic_inc(&adapter->cmd_pending);
-- 
cgit v1.2.2


From a3e240cacc93a06bff3313e28938e980d01a2160 Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Fri, 15 Mar 2013 18:47:06 -0700
Subject: mwifiex: skip pending commands after function shutdown

During rmmod mwifiex_sdio processing FUNC_SHUTDOWN command is
sent to firmware. Firmware expcets only FUNC_INIT once WLAN
function is shut down.

Any command pending in the command queue should be ignored and
freed.

Cc: <stable@vger.kernel.org> # 3.8
Tested-by: Daniel Drake <dsd@laptop.org>
Tested-by: Marco Cesarano <marco@marvell.com>
Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mwifiex/cmdevt.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mwifiex/cmdevt.c b/drivers/net/wireless/mwifiex/cmdevt.c
index 2ffabddbcfca..b5c8b962ce12 100644
--- a/drivers/net/wireless/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/mwifiex/cmdevt.c
@@ -157,6 +157,20 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv,
 		return -1;
 	}
 
+	cmd_code = le16_to_cpu(host_cmd->command);
+	cmd_size = le16_to_cpu(host_cmd->size);
+
+	if (adapter->hw_status == MWIFIEX_HW_STATUS_RESET &&
+	    cmd_code != HostCmd_CMD_FUNC_SHUTDOWN &&
+	    cmd_code != HostCmd_CMD_FUNC_INIT) {
+		dev_err(adapter->dev,
+			"DNLD_CMD: FW in reset state, ignore cmd %#x\n",
+			cmd_code);
+		mwifiex_complete_cmd(adapter, cmd_node);
+		mwifiex_insert_cmd_to_free_q(adapter, cmd_node);
+		return -1;
+	}
+
 	/* Set command sequence number */
 	adapter->seq_num++;
 	host_cmd->seq_num = cpu_to_le16(HostCmd_SET_SEQ_NO_BSS_INFO
@@ -168,9 +182,6 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv,
 	adapter->curr_cmd = cmd_node;
 	spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, flags);
 
-	cmd_code = le16_to_cpu(host_cmd->command);
-	cmd_size = le16_to_cpu(host_cmd->size);
-
 	/* Adjust skb length */
 	if (cmd_node->cmd_skb->len > cmd_size)
 		/*
-- 
cgit v1.2.2


From 084c7189acb3f969c855536166042e27f5dd703f Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Fri, 15 Mar 2013 18:47:07 -0700
Subject: mwifiex: cancel cmd timer and free curr_cmd in shutdown process

curr_cmd points to the command that is in processing or waiting
for its command response from firmware. If the function shutdown
happens to occur at this time we should cancel the cmd timer and
put the command back to free queue.

Cc: <stable@vger.kernel.org> # 3.8
Tested-by: Marco Cesarano <marco@marvell.com>
Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mwifiex/init.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/wireless/mwifiex/init.c b/drivers/net/wireless/mwifiex/init.c
index e38aa9b3663d..0ff4c37ab42a 100644
--- a/drivers/net/wireless/mwifiex/init.c
+++ b/drivers/net/wireless/mwifiex/init.c
@@ -709,6 +709,14 @@ mwifiex_shutdown_drv(struct mwifiex_adapter *adapter)
 		return ret;
 	}
 
+	/* cancel current command */
+	if (adapter->curr_cmd) {
+		dev_warn(adapter->dev, "curr_cmd is still in processing\n");
+		del_timer(&adapter->cmd_timer);
+		mwifiex_insert_cmd_to_free_q(adapter, adapter->curr_cmd);
+		adapter->curr_cmd = NULL;
+	}
+
 	/* shut down mwifiex */
 	dev_dbg(adapter->dev, "info: shutdown mwifiex...\n");
 
-- 
cgit v1.2.2


From 36ef0b473fbf43d5db23eea4616cc1d18cec245f Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Date: Sun, 17 Mar 2013 11:54:04 +0200
Subject: rtlwifi: usb: add missing freeing of skbuff

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: stable@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rtlwifi/usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c
index 156b52732f3d..5847d6d0881e 100644
--- a/drivers/net/wireless/rtlwifi/usb.c
+++ b/drivers/net/wireless/rtlwifi/usb.c
@@ -851,6 +851,7 @@ static void _rtl_usb_transmit(struct ieee80211_hw *hw, struct sk_buff *skb,
 	if (unlikely(!_urb)) {
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
 			 "Can't allocate urb. Drop skb!\n");
+		kfree_skb(skb);
 		return;
 	}
 	_rtl_submit_tx_urb(hw, _urb);
-- 
cgit v1.2.2


From 882e3f8e6966109ad837cfe79e97cf3deb3ae19b Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 4 Mar 2013 14:08:06 +0100
Subject: target_core_sbc: use noop for SYNCHRONIZE_CACHE

Windows does not expect SYNCHRONIZE_CACHE to be not supported,
and will generate a BSOD upon shutdown when using rd_mcp backend.
So better use a noop here.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 290230de2c53..60d4b5185f32 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -464,8 +464,11 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
 		break;
 	case SYNCHRONIZE_CACHE:
 	case SYNCHRONIZE_CACHE_16:
-		if (!ops->execute_sync_cache)
-			return TCM_UNSUPPORTED_SCSI_OPCODE;
+		if (!ops->execute_sync_cache) {
+			size = 0;
+			cmd->execute_cmd = sbc_emulate_noop;
+			break;
+		}
 
 		/*
 		 * Extract LBA and range to be flushed for emulated SYNCHRONIZE_CACHE
-- 
cgit v1.2.2


From 7ac9ad11b2a5cf77a92b58ee6b672ad2fa155eb1 Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Mon, 4 Mar 2013 13:52:09 -0800
Subject: target/iscsi: Fix mutual CHAP auth on big-endian arches

See https://bugzilla.redhat.com/show_bug.cgi?id=916290

Used a temp var since we take its address in sg_init_one.

Signed-off-by: Andy Grover <agrover@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_auth.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c
index db0cf7c8adde..a0fc7b9eea65 100644
--- a/drivers/target/iscsi/iscsi_target_auth.c
+++ b/drivers/target/iscsi/iscsi_target_auth.c
@@ -166,6 +166,7 @@ static int chap_server_compute_md5(
 {
 	char *endptr;
 	unsigned long id;
+	unsigned char id_as_uchar;
 	unsigned char digest[MD5_SIGNATURE_SIZE];
 	unsigned char type, response[MD5_SIGNATURE_SIZE * 2 + 2];
 	unsigned char identifier[10], *challenge = NULL;
@@ -355,7 +356,9 @@ static int chap_server_compute_md5(
 		goto out;
 	}
 
-	sg_init_one(&sg, &id, 1);
+	/* To handle both endiannesses */
+	id_as_uchar = id;
+	sg_init_one(&sg, &id_as_uchar, 1);
 	ret = crypto_hash_update(&desc, &sg, 1);
 	if (ret < 0) {
 		pr_err("crypto_hash_update() failed for id\n");
-- 
cgit v1.2.2


From 0fb889b83186e54c0cfa79516599f2267fb553fb Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 15 Mar 2013 17:19:26 +0800
Subject: target: fix possible memory leak in core_tpg_register()

'se_tpg->tpg_lun_list' is malloced in core_tpg_register() and should be freed
before leaving from the error handling cases, otherwise it will cause memory
leak.
'se_tpg' is malloced out of this function, and will be freed if we return error, so
remove free for 'se_tpg'.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tpg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 9169d6a5d7e4..aac9d2727e3c 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -711,7 +711,8 @@ int core_tpg_register(
 
 	if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) {
 		if (core_tpg_setup_virtual_lun0(se_tpg) < 0) {
-			kfree(se_tpg);
+			array_free(se_tpg->tpg_lun_list,
+				   TRANSPORT_MAX_LUNS_PER_TPG);
 			return -ENOMEM;
 		}
 	}
-- 
cgit v1.2.2


From 94877548ec95269db60b61ee56ccea4fb27bbf7c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 18 Mar 2013 21:19:49 +0100
Subject: MAINTAINERS: Remove Mark M. Hoffman

Mark M. Hoffman stopped working on the Linux kernel several years
ago, so he should no longer be listed as a driver maintainer. I'm not
even sure if his e-mail address still works.

I can take over 3 drivers he was responsible for, the 4th one will
fall down to the subsystem maintainer.

Also give Mark credit for all the good work he did.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: "Mark M. Hoffman" <mhoffman@lightlink.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Cc: Wolfram Sang <wolfram@the-dreams.de>
---
 CREDITS     |  8 ++++++++
 MAINTAINERS | 17 ++---------------
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/CREDITS b/CREDITS
index 78163cb3eb6a..afaa7cec6ea5 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1510,6 +1510,14 @@ D: Natsemi ethernet
 D: Cobalt Networks (x86) support
 D: This-and-That
 
+N: Mark M. Hoffman
+E: mhoffman@lightlink.com
+D: asb100, lm93 and smsc47b397 hardware monitoring drivers
+D: hwmon subsystem core
+D: hwmon subsystem maintainer
+D: i2c-sis96x and i2c-stub SMBus drivers
+S: USA
+
 N: Dirk Hohndel
 E: hohndel@suse.de
 D: The XFree86[tm] Project
diff --git a/MAINTAINERS b/MAINTAINERS
index 50b4d735f961..fb89be1281c6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1338,12 +1338,6 @@ S:	Maintained
 F:	drivers/platform/x86/asus*.c
 F:	drivers/platform/x86/eeepc*.c
 
-ASUS ASB100 HARDWARE MONITOR DRIVER
-M:	"Mark M. Hoffman" <mhoffman@lightlink.com>
-L:	lm-sensors@lm-sensors.org
-S:	Maintained
-F:	drivers/hwmon/asb100.c
-
 ASYNCHRONOUS TRANSFERS/TRANSFORMS (IOAT) API
 M:	Dan Williams <djbw@fb.com>
 W:	http://sourceforge.net/projects/xscaleiop
@@ -3851,7 +3845,7 @@ F:	drivers/i2c/busses/i2c-ismt.c
 F:	Documentation/i2c/busses/i2c-ismt
 
 I2C/SMBUS STUB DRIVER
-M:	"Mark M. Hoffman" <mhoffman@lightlink.com>
+M:	Jean Delvare <khali@linux-fr.org>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/i2c-stub.c
@@ -7198,13 +7192,6 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/sis/sis900.*
 
-SIS 96X I2C/SMBUS DRIVER
-M:	"Mark M. Hoffman" <mhoffman@lightlink.com>
-L:	linux-i2c@vger.kernel.org
-S:	Maintained
-F:	Documentation/i2c/busses/i2c-sis96x
-F:	drivers/i2c/busses/i2c-sis96x.c
-
 SIS FRAMEBUFFER DRIVER
 M:	Thomas Winischhofer <thomas@winischhofer.net>
 W:	http://www.winischhofer.net/linuxsisvga.shtml
@@ -7282,7 +7269,7 @@ F:	Documentation/hwmon/sch5627
 F:	drivers/hwmon/sch5627.c
 
 SMSC47B397 HARDWARE MONITOR DRIVER
-M:	"Mark M. Hoffman" <mhoffman@lightlink.com>
+M:	Jean Delvare <khali@linux-fr.org>
 L:	lm-sensors@lm-sensors.org
 S:	Maintained
 F:	Documentation/hwmon/smsc47b397
-- 
cgit v1.2.2


From 5a4c060114822255302d4763ad6712f9cde2372b Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 18 Mar 2013 21:19:49 +0100
Subject: hwmon: (lm75.h) Update header inclusion

File lm75.h used to include <linux/hwmon.h> for SENSORS_LIMIT() but
this function is gone by now. Instead we call clamp_val() so we should
include <linux/kernel.h>, where this function is declared.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm75.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/lm75.h b/drivers/hwmon/lm75.h
index 668ff4721323..5cde94e56f17 100644
--- a/drivers/hwmon/lm75.h
+++ b/drivers/hwmon/lm75.h
@@ -25,7 +25,7 @@
     which contains this code, we don't worry about the wasted space.
 */
 
-#include <linux/hwmon.h>
+#include <linux/kernel.h>
 
 /* straight from the datasheet */
 #define LM75_TEMP_MIN (-55000)
-- 
cgit v1.2.2


From 25eba81b7fbbb14dde63fc85231c699fe77afc58 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Mon, 18 Mar 2013 21:19:49 +0100
Subject: hwmon: (lm75) Fix tcn75 prefix

The TCN75 has its own prefix for a long time now.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/lm75 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/hwmon/lm75 b/Documentation/hwmon/lm75
index c91a1d15fa28..69af1c7db6b7 100644
--- a/Documentation/hwmon/lm75
+++ b/Documentation/hwmon/lm75
@@ -23,7 +23,7 @@ Supported chips:
     Datasheet: Publicly available at the Maxim website
                http://www.maxim-ic.com/
   * Microchip (TelCom) TCN75
-    Prefix: 'lm75'
+    Prefix: 'tcn75'
     Addresses scanned: none
     Datasheet: Publicly available at the Microchip website
                http://www.microchip.com/
-- 
cgit v1.2.2


From 0e5e098ac22dae38f957e951b70d3cf73beff0f7 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 11 Mar 2013 09:39:55 +0000
Subject: xen-blkback: fix dispatch_rw_block_io() error path

Commit 7708992 ("xen/blkback: Seperate the bio allocation and the bio
submission") consolidated the pendcnt updates to just a single write,
neglecting the fact that the error path relied on it getting set to 1
up front (such that the decrement in __end_block_io_op() would actually
drop the count to zero, triggering the necessary cleanup actions).

Also remove a misleading and a stale (after said commit) comment.

CC: stable@vger.kernel.org
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index eaccc222a1dc..477a17c20820 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1001,13 +1001,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 		bio->bi_end_io  = end_block_io_op;
 	}
 
-	/*
-	 * We set it one so that the last submit_bio does not have to call
-	 * atomic_inc.
-	 */
 	atomic_set(&pending_req->pendcnt, nbio);
-
-	/* Get a reference count for the disk queue and start sending I/O */
 	blk_start_plug(&plug);
 
 	for (i = 0; i < nbio; i++)
@@ -1035,6 +1029,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
  fail_put_bio:
 	for (i = 0; i < nbio; i++)
 		bio_put(biolist[i]);
+	atomic_set(&pending_req->pendcnt, 1);
 	__end_block_io_op(pending_req, -EINVAL);
 	msleep(1); /* back off a bit */
 	return -EIO;
-- 
cgit v1.2.2


From 29d0b218c87ace1078e08bb32c2e72fc96fa3db3 Mon Sep 17 00:00:00 2001
From: Mihnea Dobrescu-Balaur <mihneadb@gmail.com>
Date: Mon, 11 Mar 2013 13:23:36 +0200
Subject: xen-blkfront: replace kmalloc and then memcpy with kmemdup

The benefits are:
* code is cleaner
* kmemdup adds additional debugging info useful for tracking the real
place where memory was allocated (CONFIG_DEBUG_SLAB).

Signed-off-by: Mihnea Dobrescu-Balaur <mihneadb@gmail.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c3dae2e0f290..962064487ef7 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1203,11 +1203,10 @@ static int blkif_recover(struct blkfront_info *info)
 	int j;
 
 	/* Stage 1: Make a safe copy of the shadow state. */
-	copy = kmalloc(sizeof(info->shadow),
+	copy = kmemdup(info->shadow, sizeof(info->shadow),
 		       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
 	if (!copy)
 		return -ENOMEM;
-	memcpy(copy, info->shadow, sizeof(info->shadow));
 
 	/* Stage 2: Set up free list. */
 	memset(&info->shadow, 0, sizeof(info->shadow));
-- 
cgit v1.2.2


From f445f11eb2cc265dd47da5b2e864df46cd6e5a82 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@linaro.org>
Date: Thu, 14 Mar 2013 17:13:46 -0700
Subject: KVM: allow host header to be included even for !CONFIG_KVM

The new context tracking subsystem unconditionally includes kvm_host.h
headers for the guest enter/exit macros.  This causes a compile
failure when KVM is not enabled.

Fix by adding an IS_ENABLED(CONFIG_KVM) check to kvm_host so it can
be included/compiled even when KVM is not enabled.

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cad77fe09d77..a9428635c9fd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1,6 +1,8 @@
 #ifndef __KVM_HOST_H
 #define __KVM_HOST_H
 
+#if IS_ENABLED(CONFIG_KVM)
+
 /*
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
@@ -1055,5 +1057,8 @@ static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 }
 
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
+#else
+static inline void __guest_enter(void) { return; }
+static inline void __guest_exit(void) { return; }
+#endif /* IS_ENABLED(CONFIG_KVM) */
 #endif
-
-- 
cgit v1.2.2


From c09664bb44184b3846e8c5254db4eae4b932682a Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 18 Mar 2013 13:54:32 -0300
Subject: KVM: x86: fix deadlock in clock-in-progress request handling

There is a deadlock in pvclock handling:

cpu0:                                               cpu1:
kvm_gen_update_masterclock()
                                              kvm_guest_time_update()
 spin_lock(pvclock_gtod_sync_lock)
                                               local_irq_save(flags)

spin_lock(pvclock_gtod_sync_lock)

 kvm_make_mclock_inprogress_request(kvm)
  make_all_cpus_request()
   smp_call_function_many()

Now if smp_call_function_many() called by cpu0 tries to call function on
cpu1 there will be a deadlock.

Fix by moving pvclock_gtod_sync_lock protected section outside irq
disabled section.

Analyzed by Gleb Natapov <gleb@redhat.com>
Acked-by: Gleb Natapov <gleb@redhat.com>
Reported-and-Tested-by: Yongjie Ren <yongjie.ren@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f71500af1f81..f7c850b36910 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1416,15 +1416,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	kernel_ns = 0;
 	host_tsc = 0;
 
-	/* Keep irq disabled to prevent changes to the clock */
-	local_irq_save(flags);
-	this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
-	if (unlikely(this_tsc_khz == 0)) {
-		local_irq_restore(flags);
-		kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
-		return 1;
-	}
-
 	/*
 	 * If the host uses TSC clock, then passthrough TSC as stable
 	 * to the guest.
@@ -1436,6 +1427,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 		kernel_ns = ka->master_kernel_ns;
 	}
 	spin_unlock(&ka->pvclock_gtod_sync_lock);
+
+	/* Keep irq disabled to prevent changes to the clock */
+	local_irq_save(flags);
+	this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
+	if (unlikely(this_tsc_khz == 0)) {
+		local_irq_restore(flags);
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
+		return 1;
+	}
 	if (!use_master_clock) {
 		host_tsc = native_read_tsc();
 		kernel_ns = get_kernel_ns();
-- 
cgit v1.2.2


From ac534ff2d5508bdff1358a55d88053da729ff46b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 15 Mar 2013 09:16:29 -0400
Subject: nfsd: fix startup order in nfsd_reply_cache_init

If we end up doing "goto out_nomem" in this function, we'll call
nfsd_reply_cache_shutdown. That will attempt to walk the LRU list and
free entries, but that list may not be initialized yet if the server is
starting up for the first time. It's also possible for the shrinker to
kick in before we've initialized the LRU list.

Rearrange the initialization so that the LRU list_head and cache size
are initialized before doing any of the allocations that might fail.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfscache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 18509bd6f587..ca05f6dc3544 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -119,6 +119,10 @@ nfsd_reply_cache_free(struct svc_cacherep *rp)
 
 int nfsd_reply_cache_init(void)
 {
+	INIT_LIST_HEAD(&lru_head);
+	max_drc_entries = nfsd_cache_size_limit();
+	num_drc_entries = 0;
+
 	register_shrinker(&nfsd_reply_cache_shrinker);
 	drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
 					0, 0, NULL);
@@ -129,10 +133,6 @@ int nfsd_reply_cache_init(void)
 	if (!cache_hash)
 		goto out_nomem;
 
-	INIT_LIST_HEAD(&lru_head);
-	max_drc_entries = nfsd_cache_size_limit();
-	num_drc_entries = 0;
-
 	return 0;
 out_nomem:
 	printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
-- 
cgit v1.2.2


From 038e0af4a4fc4db9631aef39ab53e5d991b972ef Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Fri, 15 Mar 2013 09:14:05 +0800
Subject: tcm_vhost: Add missed lock in vhost_scsi_clear_endpoint()

tv_tpg->tv_tpg_vhost_count should be protected by tv_tpg->tv_tpg_mutex.

Signed-off-by: Asias He <asias@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/vhost/tcm_vhost.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 9951297b2427..79d3aeaff8fa 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -850,7 +850,7 @@ static int vhost_scsi_clear_endpoint(
 	for (index = 0; index < vs->dev.nvqs; ++index) {
 		if (!vhost_vq_access_ok(&vs->vqs[index])) {
 			ret = -EFAULT;
-			goto err;
+			goto err_dev;
 		}
 	}
 	for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
@@ -860,10 +860,11 @@ static int vhost_scsi_clear_endpoint(
 		if (!tv_tpg)
 			continue;
 
+		mutex_lock(&tv_tpg->tv_tpg_mutex);
 		tv_tport = tv_tpg->tport;
 		if (!tv_tport) {
 			ret = -ENODEV;
-			goto err;
+			goto err_tpg;
 		}
 
 		if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
@@ -872,16 +873,19 @@ static int vhost_scsi_clear_endpoint(
 				tv_tport->tport_name, tv_tpg->tport_tpgt,
 				t->vhost_wwpn, t->vhost_tpgt);
 			ret = -EINVAL;
-			goto err;
+			goto err_tpg;
 		}
 		tv_tpg->tv_tpg_vhost_count--;
 		vs->vs_tpg[target] = NULL;
 		vs->vs_endpoint = false;
+		mutex_unlock(&tv_tpg->tv_tpg_mutex);
 	}
 	mutex_unlock(&vs->dev.mutex);
 	return 0;
 
-err:
+err_tpg:
+	mutex_unlock(&tv_tpg->tv_tpg_mutex);
+err_dev:
 	mutex_unlock(&vs->dev.mutex);
 	return ret;
 }
-- 
cgit v1.2.2


From 72c77539fd56f5ba8dd538df9f3ce0e331fe601c Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Fri, 15 Mar 2013 09:14:06 +0800
Subject: tcm_vhost: Flush vhost_work in vhost_scsi_flush()

We also need to flush the vhost_works. It is the completion vhost_work
currently.

Signed-off-by: Asias He <asias@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/vhost/tcm_vhost.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 79d3aeaff8fa..43fb11ee2e8d 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -941,6 +941,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
 
 	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
 		vhost_scsi_flush_vq(vs, i);
+	vhost_work_flush(&vs->dev, &vs->vs_completion_work);
 }
 
 static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
-- 
cgit v1.2.2


From 27ca0391fb717eecb9b9ca29ad4b9e8d7a84aba5 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sat, 9 Mar 2013 22:19:35 +0100
Subject: staging: zcache: fix typo "64_BIT"

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/zcache/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/zcache/Kconfig b/drivers/staging/zcache/Kconfig
index 73582705e8c5..5c3714530961 100644
--- a/drivers/staging/zcache/Kconfig
+++ b/drivers/staging/zcache/Kconfig
@@ -15,7 +15,7 @@ config RAMSTER
 	depends on CONFIGFS_FS=y && SYSFS=y && !HIGHMEM && ZCACHE=y
 	depends on NET
 	# must ensure struct page is 8-byte aligned
-	select HAVE_ALIGNED_STRUCT_PAGE if !64_BIT
+	select HAVE_ALIGNED_STRUCT_PAGE if !64BIT
 	default n
 	help
 	  RAMster allows RAM on other machines in a cluster to be utilized
-- 
cgit v1.2.2


From e71918ea66121985f287c0c3d0efa9c4c35da7fa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Mar 2013 17:56:44 -0300
Subject: [media] ir: IR_RX51 only works on OMAP2

This driver can be enabled on OMAP1 at the moment, which breaks
allyesconfig for that platform. Let's mark it OMAP2PLUS-only
in Kconfig, since that is the only thing it builds on.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Timo Kokkonen <timo.t.kokkonen@iki.fi>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/rc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
index 19f3563c61da..5a79c333d45e 100644
--- a/drivers/media/rc/Kconfig
+++ b/drivers/media/rc/Kconfig
@@ -291,7 +291,7 @@ config IR_TTUSBIR
 
 config IR_RX51
 	tristate "Nokia N900 IR transmitter diode"
-	depends on OMAP_DM_TIMER && LIRC && !ARCH_MULTIPLATFORM
+	depends on OMAP_DM_TIMER && ARCH_OMAP2PLUS && LIRC && !ARCH_MULTIPLATFORM
 	---help---
 	   Say Y or M here if you want to enable support for the IR
 	   transmitter diode built in the Nokia N900 (RX51) device.
-- 
cgit v1.2.2


From cf2e39429c245245db889fffdfbdf3f889a6cb22 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 9 Mar 2013 23:25:06 +0200
Subject: ipvs: fix sctp chunk length order

Fix wrong but non-fatal access to chunk length.
sch->length should be in network order, next chunk should
be aligned to 4 bytes. Problem noticed in sparse output.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index ae8ec6f27688..cd1d7298f7ba 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -906,7 +906,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	sctp_chunkhdr_t _sctpch, *sch;
 	unsigned char chunk_type;
 	int event, next_state;
-	int ihl;
+	int ihl, cofs;
 
 #ifdef CONFIG_IP_VS_IPV6
 	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
@@ -914,8 +914,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	ihl = ip_hdrlen(skb);
 #endif
 
-	sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t),
-				sizeof(_sctpch), &_sctpch);
+	cofs = ihl + sizeof(sctp_sctphdr_t);
+	sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
 	if (sch == NULL)
 		return;
 
@@ -933,10 +933,12 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 	 */
 	if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
 	    (sch->type == SCTP_CID_COOKIE_ACK)) {
-		sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) +
-				sch->length), sizeof(_sctpch), &_sctpch);
-		if (sch) {
-			if (sch->type == SCTP_CID_ABORT)
+		int clen = ntohs(sch->length);
+
+		if (clen >= sizeof(sctp_chunkhdr_t)) {
+			sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
+						 sizeof(_sctpch), &_sctpch);
+			if (sch && sch->type == SCTP_CID_ABORT)
 				chunk_type = sch->type;
 		}
 	}
-- 
cgit v1.2.2


From 9997d08806062cb7ba471ab12fa2742cfec2f413 Mon Sep 17 00:00:00 2001
From: Ben Collins <benmcollins13@gmail.com>
Date: Mon, 18 Mar 2013 19:19:07 -0400
Subject: sgy-cts1000: Remove __dev* attributes

Somehow the driver snuck in with these still in it.

Signed-off-by: Ben Collins <ben.c@servergy.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/85xx/sgy_cts1000.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
index 611e92f291c4..7179726ba5c5 100644
--- a/arch/powerpc/platforms/85xx/sgy_cts1000.c
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -69,7 +69,7 @@ static irqreturn_t gpio_halt_irq(int irq, void *__data)
         return IRQ_HANDLED;
 };
 
-static int __devinit gpio_halt_probe(struct platform_device *pdev)
+static int gpio_halt_probe(struct platform_device *pdev)
 {
 	enum of_gpio_flags flags;
 	struct device_node *node = pdev->dev.of_node;
@@ -128,7 +128,7 @@ static int __devinit gpio_halt_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __devexit gpio_halt_remove(struct platform_device *pdev)
+static int gpio_halt_remove(struct platform_device *pdev)
 {
 	if (halt_node) {
 		int gpio = of_get_gpio(halt_node, 0);
@@ -165,7 +165,7 @@ static struct platform_driver gpio_halt_driver = {
 		.of_match_table = gpio_halt_match,
 	},
 	.probe		= gpio_halt_probe,
-	.remove		= __devexit_p(gpio_halt_remove),
+	.remove		= gpio_halt_remove,
 };
 
 module_platform_driver(gpio_halt_driver);
-- 
cgit v1.2.2


From 6a15075eced2d780fa6c5d83682410f47f2e800b Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Mon, 18 Mar 2013 19:24:02 +0100
Subject: ARM: video: mxs: Fix mxsfb misconfiguring VDCTRL0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The issue fixed by this patch manifests only then using X11
with mxsfb driver. The X11 will display either shifted image
or otherwise distorted image on the LCD.

The problem is that the X11 tries to reconfigure the framebuffer
and along the way calls fb_ops.fb_set_par() with X11's desired
configuration values. The field of particular interest is
fb_info->var.sync which contains non-standard values if
configured by kernel. These are either FB_SYNC_DATA_ENABLE_HIGH_ACT,
FB_SYNC_DOTCLK_FAILING_ACT or both, depending on the platform
configuration. Both of these values are defined in the
include/linux/mxsfb.h file.

The driver interprets these values and configures the LCD controller
accordingly. Yet X11 only has access to the standard values for this
field defined in include/uapi/linux/fb.h and thus, unlike kernel,
omits these special values. This results in distorted image on the
LCD.

This patch moves these non-standard values into new field of the
mxsfb_platform_data structure so the driver can in turn check this
field instead of the video mode field for these specific portions.

Moreover, this patch prefixes these values with MXSFB_SYNC_ prefix
instead of FB_SYNC_ prefix to prevent confusion of subsequent users.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Linux ARM <linux-arm-kernel@lists.infradead.org>
Cc: Linux FBDEV <linux-fbdev@vger.kernel.org>
Cc: Lothar Waßmann <LW@karo-electronics.de>
Cc: Sascha Hauer <kernel@pengutronix.de>
Tested-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/mach-mxs/mach-mxs.c | 24 ++++++++++++------------
 drivers/video/mxsfb.c        |  7 +++++--
 include/linux/mxsfb.h        |  7 +++++--
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index 3218f1f2c0e0..e7b781d3788f 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c
@@ -41,8 +41,6 @@ static struct fb_videomode mx23evk_video_modes[] = {
 		.lower_margin	= 4,
 		.hsync_len	= 1,
 		.vsync_len	= 1,
-		.sync		= FB_SYNC_DATA_ENABLE_HIGH_ACT |
-				  FB_SYNC_DOTCLK_FAILING_ACT,
 	},
 };
 
@@ -59,8 +57,6 @@ static struct fb_videomode mx28evk_video_modes[] = {
 		.lower_margin	= 10,
 		.hsync_len	= 10,
 		.vsync_len	= 10,
-		.sync		= FB_SYNC_DATA_ENABLE_HIGH_ACT |
-				  FB_SYNC_DOTCLK_FAILING_ACT,
 	},
 };
 
@@ -77,7 +73,6 @@ static struct fb_videomode m28evk_video_modes[] = {
 		.lower_margin	= 45,
 		.hsync_len	= 1,
 		.vsync_len	= 1,
-		.sync		= FB_SYNC_DATA_ENABLE_HIGH_ACT,
 	},
 };
 
@@ -94,9 +89,7 @@ static struct fb_videomode apx4devkit_video_modes[] = {
 		.lower_margin	= 13,
 		.hsync_len	= 48,
 		.vsync_len	= 3,
-		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT |
-				  FB_SYNC_DATA_ENABLE_HIGH_ACT |
-				  FB_SYNC_DOTCLK_FAILING_ACT,
+		.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
 	},
 };
 
@@ -113,9 +106,7 @@ static struct fb_videomode apf28dev_video_modes[] = {
 		.lower_margin = 0x15,
 		.hsync_len = 64,
 		.vsync_len = 4,
-		.sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT |
-				FB_SYNC_DATA_ENABLE_HIGH_ACT |
-				FB_SYNC_DOTCLK_FAILING_ACT,
+		.sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
 	},
 };
 
@@ -132,7 +123,6 @@ static struct fb_videomode cfa10049_video_modes[] = {
 		.lower_margin	= 2,
 		.hsync_len	= 15,
 		.vsync_len	= 15,
-		.sync		= FB_SYNC_DATA_ENABLE_HIGH_ACT
 	},
 };
 
@@ -259,6 +249,8 @@ static void __init imx23_evk_init(void)
 	mxsfb_pdata.mode_count = ARRAY_SIZE(mx23evk_video_modes);
 	mxsfb_pdata.default_bpp = 32;
 	mxsfb_pdata.ld_intf_width = STMLCDIF_24BIT;
+	mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT |
+				MXSFB_SYNC_DOTCLK_FAILING_ACT;
 }
 
 static inline void enable_clk_enet_out(void)
@@ -278,6 +270,8 @@ static void __init imx28_evk_init(void)
 	mxsfb_pdata.mode_count = ARRAY_SIZE(mx28evk_video_modes);
 	mxsfb_pdata.default_bpp = 32;
 	mxsfb_pdata.ld_intf_width = STMLCDIF_24BIT;
+	mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT |
+				MXSFB_SYNC_DOTCLK_FAILING_ACT;
 
 	mxs_saif_clkmux_select(MXS_DIGCTL_SAIF_CLKMUX_EXTMSTR0);
 }
@@ -297,6 +291,7 @@ static void __init m28evk_init(void)
 	mxsfb_pdata.mode_count = ARRAY_SIZE(m28evk_video_modes);
 	mxsfb_pdata.default_bpp = 16;
 	mxsfb_pdata.ld_intf_width = STMLCDIF_18BIT;
+	mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT;
 }
 
 static void __init sc_sps1_init(void)
@@ -322,6 +317,8 @@ static void __init apx4devkit_init(void)
 	mxsfb_pdata.mode_count = ARRAY_SIZE(apx4devkit_video_modes);
 	mxsfb_pdata.default_bpp = 32;
 	mxsfb_pdata.ld_intf_width = STMLCDIF_24BIT;
+	mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT |
+				MXSFB_SYNC_DOTCLK_FAILING_ACT;
 }
 
 #define ENET0_MDC__GPIO_4_0	MXS_GPIO_NR(4, 0)
@@ -407,6 +404,7 @@ static void __init cfa10049_init(void)
 	mxsfb_pdata.mode_count = ARRAY_SIZE(cfa10049_video_modes);
 	mxsfb_pdata.default_bpp = 32;
 	mxsfb_pdata.ld_intf_width = STMLCDIF_18BIT;
+	mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT;
 }
 
 static void __init cfa10037_init(void)
@@ -423,6 +421,8 @@ static void __init apf28_init(void)
 	mxsfb_pdata.mode_count = ARRAY_SIZE(apf28dev_video_modes);
 	mxsfb_pdata.default_bpp = 16;
 	mxsfb_pdata.ld_intf_width = STMLCDIF_16BIT;
+	mxsfb_pdata.sync = MXSFB_SYNC_DATA_ENABLE_HIGH_ACT |
+				MXSFB_SYNC_DOTCLK_FAILING_ACT;
 }
 
 static void __init mxs_machine_init(void)
diff --git a/drivers/video/mxsfb.c b/drivers/video/mxsfb.c
index 755556ca5b2d..45169cbaba6e 100644
--- a/drivers/video/mxsfb.c
+++ b/drivers/video/mxsfb.c
@@ -169,6 +169,7 @@ struct mxsfb_info {
 	unsigned dotclk_delay;
 	const struct mxsfb_devdata *devdata;
 	int mapped;
+	u32 sync;
 };
 
 #define mxsfb_is_v3(host) (host->devdata->ipversion == 3)
@@ -456,9 +457,9 @@ static int mxsfb_set_par(struct fb_info *fb_info)
 		vdctrl0 |= VDCTRL0_HSYNC_ACT_HIGH;
 	if (fb_info->var.sync & FB_SYNC_VERT_HIGH_ACT)
 		vdctrl0 |= VDCTRL0_VSYNC_ACT_HIGH;
-	if (fb_info->var.sync & FB_SYNC_DATA_ENABLE_HIGH_ACT)
+	if (host->sync & MXSFB_SYNC_DATA_ENABLE_HIGH_ACT)
 		vdctrl0 |= VDCTRL0_ENABLE_ACT_HIGH;
-	if (fb_info->var.sync & FB_SYNC_DOTCLK_FAILING_ACT)
+	if (host->sync & MXSFB_SYNC_DOTCLK_FAILING_ACT)
 		vdctrl0 |= VDCTRL0_DOTCLK_ACT_FAILING;
 
 	writel(vdctrl0, host->base + LCDC_VDCTRL0);
@@ -861,6 +862,8 @@ static int mxsfb_probe(struct platform_device *pdev)
 
 	INIT_LIST_HEAD(&fb_info->modelist);
 
+	host->sync = pdata->sync;
+
 	ret = mxsfb_init_fbinfo(host);
 	if (ret != 0)
 		goto error_init_fb;
diff --git a/include/linux/mxsfb.h b/include/linux/mxsfb.h
index f14943d55315..f80af8674342 100644
--- a/include/linux/mxsfb.h
+++ b/include/linux/mxsfb.h
@@ -24,8 +24,8 @@
 #define STMLCDIF_18BIT 2 /** pixel data bus to the display is of 18 bit width */
 #define STMLCDIF_24BIT 3 /** pixel data bus to the display is of 24 bit width */
 
-#define FB_SYNC_DATA_ENABLE_HIGH_ACT	(1 << 6)
-#define FB_SYNC_DOTCLK_FAILING_ACT	(1 << 7) /* failing/negtive edge sampling */
+#define MXSFB_SYNC_DATA_ENABLE_HIGH_ACT	(1 << 6)
+#define MXSFB_SYNC_DOTCLK_FAILING_ACT	(1 << 7) /* failing/negtive edge sampling */
 
 struct mxsfb_platform_data {
 	struct fb_videomode *mode_list;
@@ -44,6 +44,9 @@ struct mxsfb_platform_data {
 				 * allocated. If specified,fb_size must also be specified.
 				 * fb_phys must be unused by Linux.
 				 */
+	u32 sync;		/* sync mask, contains MXSFB specifics not
+				 * carried in fb_info->var.sync
+				 */
 };
 
 #endif /* __LINUX_MXSFB_H */
-- 
cgit v1.2.2


From 4fa133954e91b83cfa22947579154c6f16e1b2b4 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 19 Mar 2013 09:57:57 +1000
Subject: drm/nouveau/core: fix return value of nouveau_object_del()

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/core/core/object.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/core/core/object.c b/drivers/gpu/drm/nouveau/core/core/object.c
index 0daab62ea14c..3b2e7b6304d3 100644
--- a/drivers/gpu/drm/nouveau/core/core/object.c
+++ b/drivers/gpu/drm/nouveau/core/core/object.c
@@ -278,7 +278,6 @@ nouveau_object_del(struct nouveau_object *client, u32 _parent, u32 _handle)
 	struct nouveau_object *parent = NULL;
 	struct nouveau_object *namedb = NULL;
 	struct nouveau_handle *handle = NULL;
-	int ret = -EINVAL;
 
 	parent = nouveau_handle_ref(client, _parent);
 	if (!parent)
@@ -295,7 +294,7 @@ nouveau_object_del(struct nouveau_object *client, u32 _parent, u32 _handle)
 	}
 
 	nouveau_object_ref(NULL, &parent);
-	return ret;
+	return handle ? 0 : -EINVAL;
 }
 
 int
-- 
cgit v1.2.2


From f60b6e7a6078ceae438a95b808be04cd98f9909a Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 19 Mar 2013 15:20:00 +1000
Subject: drm/nv50/kms: prevent lockdep false-positive in page flipping path

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv50_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 2db57990f65c..7f0e6c3f37d1 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -524,6 +524,8 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	swap_interval <<= 4;
 	if (swap_interval == 0)
 		swap_interval |= 0x100;
+	if (chan == NULL)
+		evo_sync(crtc->dev);
 
 	push = evo_wait(sync, 128);
 	if (unlikely(push == NULL))
@@ -586,8 +588,6 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		sync->addr ^= 0x10;
 		sync->data++;
 		FIRE_RING (chan);
-	} else {
-		evo_sync(crtc->dev);
 	}
 
 	/* queue the flip */
-- 
cgit v1.2.2


From 287939a3690c8da6fd3310d7593ff0448cb9447c Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Wed, 13 Mar 2013 10:52:49 +0800
Subject: ARM: imx: add dependency check for DEBUG_IMX_UART_PORT

While adding i.MX DEBUG_LL selection, commit f8c95fe (ARM: imx: support
DEBUG_LL uart port selection for all i.MX SoCs) leaves Kconfig symbol
DEBUG_IMX_UART_PORT there without any dependency check.  This results in
that everyone gets the symbol in their config, which is someting
undesirable.  Add "depends on ARCH_MXC" for the symbol to prevent that.

Reported-by: Karl Beldan <karl.beldan@gmail.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index ecfcdba2d17c..9b31f4311ea2 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -495,6 +495,7 @@ config DEBUG_IMX_UART_PORT
 						DEBUG_IMX53_UART || \
 						DEBUG_IMX6Q_UART
 	default 1
+	depends on ARCH_MXC
 	help
 	  Choose UART port on which kernel low-level debug messages
 	  should be output.
-- 
cgit v1.2.2


From 2105fd550ca7dbdd490934f487852c2a399b20cf Mon Sep 17 00:00:00 2001
From: Pierrick Hascoet <pierrick.hascoet@abilis.com>
Date: Mon, 18 Mar 2013 17:04:45 +0100
Subject: arc: fix dma_address assignment during dma_map_sg()

Signed-off-by: Pierrick Hascoet <pierrick.hascoet@abilis.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h
index 31f77aec0823..45b8e0cea176 100644
--- a/arch/arc/include/asm/dma-mapping.h
+++ b/arch/arc/include/asm/dma-mapping.h
@@ -126,7 +126,7 @@ dma_map_sg(struct device *dev, struct scatterlist *sg,
 	int i;
 
 	for_each_sg(sg, s, nents, i)
-		sg->dma_address = dma_map_page(dev, sg_page(s), s->offset,
+		s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
 					       s->length, dir);
 
 	return nents;
-- 
cgit v1.2.2


From 0c12582fbcdea0cbb0dfd224e1c5f9a8428ffa18 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 9 Mar 2013 23:25:04 +0200
Subject: ipvs: add backup_only flag to avoid loops

Dmitry Akindinov is reporting for a problem where SYNs are looping
between the master and backup server when the backup server is used as
real server in DR mode and has IPVS rules to function as director.

Even when the backup function is enabled we continue to forward
traffic and schedule new connections when the current master is using
the backup server as real server. While this is not a problem for NAT,
for DR and TUN method the backup server can not determine if a request
comes from client or from director.

To avoid such loops add new sysctl flag backup_only. It can be needed
for DR/TUN setups that do not need backup and director function at the
same time. When the backup function is enabled we stop any forwarding
and pass the traffic to the local stack (real server mode). The flag
disables the director function when the backup function is enabled.

For setups that enable backup function for some virtual services and
director function for other virtual services there should be another
more complex solution to support DR/TUN mode, may be to assign
per-virtual service syncid value, so that we can differentiate the
requests.

Reported-by: Dmitry Akindinov <dimak@stalker.com>
Tested-by: German Myzovsky <lawyer@sipnet.ru>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 Documentation/networking/ipvs-sysctl.txt |  7 +++++++
 include/net/ip_vs.h                      | 12 ++++++++++++
 net/netfilter/ipvs/ip_vs_core.c          | 12 ++++++++----
 net/netfilter/ipvs/ip_vs_ctl.c           |  7 +++++++
 4 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
index f2a2488f1bf3..9573d0c48c6e 100644
--- a/Documentation/networking/ipvs-sysctl.txt
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -15,6 +15,13 @@ amemthresh - INTEGER
         enabled and the variable is automatically set to 2, otherwise
         the strategy is disabled and the variable is  set  to 1.
 
+backup_only - BOOLEAN
+	0 - disabled (default)
+	not 0 - enabled
+
+	If set, disable the director function while the server is
+	in backup mode to avoid packet loops for DR/TUN methods.
+
 conntrack - BOOLEAN
 	0 - disabled (default)
 	not 0 - enabled
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 68c69d54d392..fce8e6b66d55 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -976,6 +976,7 @@ struct netns_ipvs {
 	int			sysctl_sync_retries;
 	int			sysctl_nat_icmp_send;
 	int			sysctl_pmtu_disc;
+	int			sysctl_backup_only;
 
 	/* ip_vs_lblc */
 	int			sysctl_lblc_expiration;
@@ -1067,6 +1068,12 @@ static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
 	return ipvs->sysctl_pmtu_disc;
 }
 
+static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
+{
+	return ipvs->sync_state & IP_VS_STATE_BACKUP &&
+	       ipvs->sysctl_backup_only;
+}
+
 #else
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1114,6 +1121,11 @@ static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
 	return 1;
 }
 
+static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
+{
+	return 0;
+}
+
 #endif
 
 /*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 47edf5a40a59..18b4bc55fa3d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1577,7 +1577,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	ip_vs_fill_iph_skb(af, skb, &iph);
@@ -1654,7 +1655,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		/* the destination server is not available */
@@ -1815,13 +1815,15 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 {
 	int r;
 	struct net *net;
+	struct netns_ipvs *ipvs;
 
 	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	return ip_vs_in_icmp(skb, &r, hooknum);
@@ -1835,6 +1837,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 {
 	int r;
 	struct net *net;
+	struct netns_ipvs *ipvs;
 	struct ip_vs_iphdr iphdr;
 
 	ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
@@ -1843,7 +1846,8 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
 
 	/* ipvs enabled in this netns ? */
 	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	ipvs = net_ipvs(net);
+	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
 	return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c68198bf9128..9e2d1cccd1eb 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1808,6 +1808,12 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "backup_only",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_IP_VS_DEBUG
 	{
 		.procname	= "debug_level",
@@ -3741,6 +3747,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
 	ipvs->sysctl_pmtu_disc = 1;
 	tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
+	tbl[idx++].data = &ipvs->sysctl_backup_only;
 
 
 	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
-- 
cgit v1.2.2


From bf93ad72cd8cfabe66a7b3d66236a1266d357189 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 9 Mar 2013 23:25:05 +0200
Subject: ipvs: remove extra rcu lock

In 3.7 we added code that uses ipv4_update_pmtu but after commit
c5ae7d4192 (ipv4: must use rcu protection while calling fib_lookup)
the RCU lock is not needed.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 18b4bc55fa3d..61f49d241712 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1394,10 +1394,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 			skb_reset_network_header(skb);
 			IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
 				&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
-			rcu_read_lock();
 			ipv4_update_pmtu(skb, dev_net(skb->dev),
 					 mtu, 0, 0, 0, 0);
-			rcu_read_unlock();
 			/* Client uses PMTUD? */
 			if (!(cih->frag_off & htons(IP_DF)))
 				goto ignore_ipip;
-- 
cgit v1.2.2


From 217fd5e709f029c125a9d39de5f13387407f131a Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Mon, 18 Mar 2013 17:49:33 +0100
Subject: xen-blkback: fix foreach_grant_safe to handle empty lists
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We may use foreach_grant_safe in the future with empty lists, so make
sure we can handle them.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: xen-devel@lists.xen.org
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 477a17c20820..2cf8381a1c6e 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -164,7 +164,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 
 #define foreach_grant_safe(pos, n, rbtree, node) \
 	for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \
-	     (n) = rb_next(&(pos)->node); \
+	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL; \
 	     &(pos)->node != NULL; \
 	     (pos) = container_of(n, typeof(*(pos)), node), \
 	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
-- 
cgit v1.2.2


From 82f77cf9704cd06c452019421e5aada3a0648c76 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.vnet.ibm.com>
Date: Mon, 18 Mar 2013 20:04:42 +0000
Subject: qeth: delay feature trace

Delay tracing of the card features until the optional commands have been
enabled.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Frank Blaschka <blaschka@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 091ca0efa1c5..4eb7ea3fc1c3 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3348,7 +3348,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		rc = -ENODEV;
 		goto out_remove;
 	}
-	qeth_trace_features(card);
 
 	if (!card->dev && qeth_l3_setup_netdev(card)) {
 		rc = -ENODEV;
@@ -3425,6 +3424,7 @@ contin:
 		qeth_l3_set_multicast_list(card->dev);
 		rtnl_unlock();
 	}
+	qeth_trace_features(card);
 	/* let user_space know that device is online */
 	kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
 	mutex_unlock(&card->conf_mutex);
-- 
cgit v1.2.2


From 82e2e782a3e486e3bfcc6130f0ebc28453af9955 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.vnet.ibm.com>
Date: Mon, 18 Mar 2013 20:04:43 +0000
Subject: qeth: Fix invalid router settings handling

Give a bad return code when specifying a router setting that is either
invalid or not support on the respective device type. In addition, fall back
the previous setting instead of silently switching back to 'no routing'.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Frank Blaschka <blaschka@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 17 +++++++++++------
 drivers/s390/net/qeth_l3_sys.c  |  2 ++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 4eb7ea3fc1c3..b6da6cec9c3e 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -623,7 +623,7 @@ static int qeth_l3_send_setrouting(struct qeth_card *card,
 	return rc;
 }
 
-static void qeth_l3_correct_routing_type(struct qeth_card *card,
+static int qeth_l3_correct_routing_type(struct qeth_card *card,
 		enum qeth_routing_types *type, enum qeth_prot_versions prot)
 {
 	if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -632,7 +632,7 @@ static void qeth_l3_correct_routing_type(struct qeth_card *card,
 		case PRIMARY_CONNECTOR:
 		case SECONDARY_CONNECTOR:
 		case MULTICAST_ROUTER:
-			return;
+			return 0;
 		default:
 			goto out_inval;
 		}
@@ -641,17 +641,18 @@ static void qeth_l3_correct_routing_type(struct qeth_card *card,
 		case NO_ROUTER:
 		case PRIMARY_ROUTER:
 		case SECONDARY_ROUTER:
-			return;
+			return 0;
 		case MULTICAST_ROUTER:
 			if (qeth_is_ipafunc_supported(card, prot,
 						      IPA_OSA_MC_ROUTER))
-				return;
+				return 0;
 		default:
 			goto out_inval;
 		}
 	}
 out_inval:
 	*type = NO_ROUTER;
+	return -EINVAL;
 }
 
 int qeth_l3_setrouting_v4(struct qeth_card *card)
@@ -660,8 +661,10 @@ int qeth_l3_setrouting_v4(struct qeth_card *card)
 
 	QETH_CARD_TEXT(card, 3, "setrtg4");
 
-	qeth_l3_correct_routing_type(card, &card->options.route4.type,
+	rc = qeth_l3_correct_routing_type(card, &card->options.route4.type,
 				  QETH_PROT_IPV4);
+	if (rc)
+		return rc;
 
 	rc = qeth_l3_send_setrouting(card, card->options.route4.type,
 				  QETH_PROT_IPV4);
@@ -683,8 +686,10 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
 
 	if (!qeth_is_supported(card, IPA_IPV6))
 		return 0;
-	qeth_l3_correct_routing_type(card, &card->options.route6.type,
+	rc = qeth_l3_correct_routing_type(card, &card->options.route6.type,
 				  QETH_PROT_IPV6);
+	if (rc)
+		return rc;
 
 	rc = qeth_l3_send_setrouting(card, card->options.route6.type,
 				  QETH_PROT_IPV6);
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index ebc379486267..e70af2406ff9 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -87,6 +87,8 @@ static ssize_t qeth_l3_dev_route_store(struct qeth_card *card,
 			rc = qeth_l3_setrouting_v6(card);
 	}
 out:
+	if (rc)
+		route->type = old_route_type;
 	mutex_unlock(&card->conf_mutex);
 	return rc ? rc : count;
 }
-- 
cgit v1.2.2


From 271648b4c610eed540daaf9ff366209825757565 Mon Sep 17 00:00:00 2001
From: Frank Blaschka <blaschka@linux.vnet.ibm.com>
Date: Mon, 18 Mar 2013 20:04:44 +0000
Subject: qeth: Fix scatter-gather regression

This patch fixes a scatter-gather regression introduced with

commit 5640f768 net: use a per task frag allocator

Now the qeth driver can cope with bigger framents and split a fragment in
sub framents if required.

Signed-off-by: Frank Blaschka <blaschka@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  1 +
 drivers/s390/net/qeth_core_main.c | 45 +++++++++++++++++++++++++++++++++------
 drivers/s390/net/qeth_l3_main.c   |  4 +++-
 3 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index d87961d4c0de..8c0622399fcd 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -916,6 +916,7 @@ int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *,
 	void *reply_param);
 int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
 int qeth_get_elements_no(struct qeth_card *, void *, struct sk_buff *, int);
+int qeth_get_elements_for_frags(struct sk_buff *);
 int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
 			struct sk_buff *, struct qeth_hdr *, int, int, int);
 int qeth_do_send_packet(struct qeth_card *, struct qeth_qdio_out_q *,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 0d8cdff81813..0d73a999983d 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3679,6 +3679,25 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(qeth_get_priority_queue);
 
+int qeth_get_elements_for_frags(struct sk_buff *skb)
+{
+	int cnt, length, e, elements = 0;
+	struct skb_frag_struct *frag;
+	char *data;
+
+	for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) {
+		frag = &skb_shinfo(skb)->frags[cnt];
+		data = (char *)page_to_phys(skb_frag_page(frag)) +
+			frag->page_offset;
+		length = frag->size;
+		e = PFN_UP((unsigned long)data + length - 1) -
+			PFN_DOWN((unsigned long)data);
+		elements += e;
+	}
+	return elements;
+}
+EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
+
 int qeth_get_elements_no(struct qeth_card *card, void *hdr,
 		     struct sk_buff *skb, int elems)
 {
@@ -3686,7 +3705,8 @@ int qeth_get_elements_no(struct qeth_card *card, void *hdr,
 	int elements_needed = PFN_UP((unsigned long)skb->data + dlen - 1) -
 		PFN_DOWN((unsigned long)skb->data);
 
-	elements_needed += skb_shinfo(skb)->nr_frags;
+	elements_needed += qeth_get_elements_for_frags(skb);
+
 	if ((elements_needed + elems) > QETH_MAX_BUFFER_ELEMENTS(card)) {
 		QETH_DBF_MESSAGE(2, "Invalid size of IP packet "
 			"(Number=%d / Length=%d). Discarded.\n",
@@ -3771,12 +3791,23 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb,
 
 	for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) {
 		frag = &skb_shinfo(skb)->frags[cnt];
-		buffer->element[element].addr = (char *)
-			page_to_phys(skb_frag_page(frag))
-			+ frag->page_offset;
-		buffer->element[element].length = frag->size;
-		buffer->element[element].eflags = SBAL_EFLAGS_MIDDLE_FRAG;
-		element++;
+		data = (char *)page_to_phys(skb_frag_page(frag)) +
+			frag->page_offset;
+		length = frag->size;
+		while (length > 0) {
+			length_here = PAGE_SIZE -
+				((unsigned long) data % PAGE_SIZE);
+			if (length < length_here)
+				length_here = length;
+
+			buffer->element[element].addr = data;
+			buffer->element[element].length = length_here;
+			buffer->element[element].eflags =
+				SBAL_EFLAGS_MIDDLE_FRAG;
+			length -= length_here;
+			data += length_here;
+			element++;
+		}
 	}
 
 	if (buffer->element[element - 1].eflags)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b6da6cec9c3e..8710337dab3e 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2903,7 +2903,9 @@ static inline int qeth_l3_tso_elements(struct sk_buff *skb)
 		tcp_hdr(skb)->doff * 4;
 	int tcpd_len = skb->len - (tcpd - (unsigned long)skb->data);
 	int elements = PFN_UP(tcpd + tcpd_len - 1) - PFN_DOWN(tcpd);
-	elements += skb_shinfo(skb)->nr_frags;
+
+	elements += qeth_get_elements_for_frags(skb);
+
 	return elements;
 }
 
-- 
cgit v1.2.2


From 155b7edb51430a280f86c1e21b7be308b0d219d4 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Mon, 18 Mar 2013 17:49:34 +0100
Subject: xen-blkfront: switch from llist to list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The git commit f84adf4921ae3115502f44ff467b04bf2f88cf04
(xen-blkfront: drop the use of llist_for_each_entry_safe)

was a stop-gate to fix a GCC4.1 bug. The appropiate way
is to actually use an list instead of using an llist.

As such this patch replaces the usage of llist with an
list.

Since we always manipulate the list while holding the io_lock, there's
no need for additional locking (llist used previously is safe to use
concurrently without additional locking).

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
CC: stable@vger.kernel.org
[v1: Redid the git commit description]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 962064487ef7..97324cd18f4b 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -44,7 +44,7 @@
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/bitmap.h>
-#include <linux/llist.h>
+#include <linux/list.h>
 
 #include <xen/xen.h>
 #include <xen/xenbus.h>
@@ -68,7 +68,7 @@ enum blkif_state {
 struct grant {
 	grant_ref_t gref;
 	unsigned long pfn;
-	struct llist_node node;
+	struct list_head node;
 };
 
 struct blk_shadow {
@@ -105,7 +105,7 @@ struct blkfront_info
 	struct work_struct work;
 	struct gnttab_free_callback callback;
 	struct blk_shadow shadow[BLK_RING_SIZE];
-	struct llist_head persistent_gnts;
+	struct list_head persistent_gnts;
 	unsigned int persistent_gnts_c;
 	unsigned long shadow_free;
 	unsigned int feature_flush;
@@ -371,10 +371,11 @@ static int blkif_queue_request(struct request *req)
 			lsect = fsect + (sg->length >> 9) - 1;
 
 			if (info->persistent_gnts_c) {
-				BUG_ON(llist_empty(&info->persistent_gnts));
-				gnt_list_entry = llist_entry(
-					llist_del_first(&info->persistent_gnts),
-					struct grant, node);
+				BUG_ON(list_empty(&info->persistent_gnts));
+				gnt_list_entry = list_first_entry(
+				                      &info->persistent_gnts,
+				                      struct grant, node);
+				list_del(&gnt_list_entry->node);
 
 				ref = gnt_list_entry->gref;
 				buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
@@ -790,9 +791,8 @@ static void blkif_restart_queue(struct work_struct *work)
 
 static void blkif_free(struct blkfront_info *info, int suspend)
 {
-	struct llist_node *all_gnts;
-	struct grant *persistent_gnt, *tmp;
-	struct llist_node *n;
+	struct grant *persistent_gnt;
+	struct grant *n;
 
 	/* Prevent new requests being issued until we fix things up. */
 	spin_lock_irq(&info->io_lock);
@@ -804,20 +804,15 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 
 	/* Remove all persistent grants */
 	if (info->persistent_gnts_c) {
-		all_gnts = llist_del_all(&info->persistent_gnts);
-		persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
-		while (persistent_gnt) {
+		list_for_each_entry_safe(persistent_gnt, n,
+		                         &info->persistent_gnts, node) {
+			list_del(&persistent_gnt->node);
 			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
 			__free_page(pfn_to_page(persistent_gnt->pfn));
-			tmp = persistent_gnt;
-			n = persistent_gnt->node.next;
-			if (n)
-				persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
-			else
-				persistent_gnt = NULL;
-			kfree(tmp);
+			kfree(persistent_gnt);
+			info->persistent_gnts_c--;
 		}
-		info->persistent_gnts_c = 0;
+		BUG_ON(info->persistent_gnts_c != 0);
 	}
 
 	/* No more gnttab callback work. */
@@ -875,7 +870,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 	}
 	/* Add the persistent grant into the list of free grants */
 	for (i = 0; i < s->req.u.rw.nr_segments; i++) {
-		llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
+		list_add(&s->grants_used[i]->node, &info->persistent_gnts);
 		info->persistent_gnts_c++;
 	}
 }
@@ -1171,7 +1166,7 @@ static int blkfront_probe(struct xenbus_device *dev,
 	spin_lock_init(&info->io_lock);
 	info->xbdev = dev;
 	info->vdevice = vdevice;
-	init_llist_head(&info->persistent_gnts);
+	INIT_LIST_HEAD(&info->persistent_gnts);
 	info->persistent_gnts_c = 0;
 	info->connected = BLKIF_STATE_DISCONNECTED;
 	INIT_WORK(&info->work, blkif_restart_queue);
-- 
cgit v1.2.2


From 5a3da1fe9561828d0ca7eca664b16ec2b9bf0055 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 15 Mar 2013 11:32:30 +0000
Subject: inet: limit length of fragment queue hash table bucket lists

This patch introduces a constant limit of the fragment queue hash
table bucket list lengths. Currently the limit 128 is choosen somewhat
arbitrary and just ensures that we can fill up the fragment cache with
empty packets up to the default ip_frag_high_thresh limits. It should
just protect from list iteration eating considerable amounts of cpu.

If we reach the maximum length in one hash bucket a warning is printed.
This is implemented on the caller side of inet_frag_find to distinguish
between the different users of inet_fragment.c.

I dropped the out of memory warning in the ipv4 fragment lookup path,
because we already get a warning by the slab allocator.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jesper Dangaard Brouer <jbrouer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h                 |  9 +++++++++
 net/ipv4/inet_fragment.c                | 20 +++++++++++++++++++-
 net/ipv4/ip_fragment.c                  | 11 ++++-------
 net/ipv6/netfilter/nf_conntrack_reasm.c | 12 ++++++------
 net/ipv6/reassembly.c                   |  8 ++++++--
 5 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 76c3fe5ecc2e..0a1dcc2fa2f5 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -43,6 +43,13 @@ struct inet_frag_queue {
 
 #define INETFRAGS_HASHSZ		64
 
+/* averaged:
+ * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
+ *	       rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
+ *	       struct frag_queue))
+ */
+#define INETFRAGS_MAXDEPTH		128
+
 struct inet_frags {
 	struct hlist_head	hash[INETFRAGS_HASHSZ];
 	/* This rwlock is a global lock (seperate per IPv4, IPv6 and
@@ -76,6 +83,8 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force);
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 		struct inet_frags *f, void *key, unsigned int hash)
 	__releases(&f->lock);
+void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+				   const char *prefix);
 
 static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
 {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 245ae078a07f..f4fd23de9b13 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -21,6 +21,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 
+#include <net/sock.h>
 #include <net/inet_frag.h>
 
 static void inet_frag_secret_rebuild(unsigned long dummy)
@@ -277,6 +278,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	__releases(&f->lock)
 {
 	struct inet_frag_queue *q;
+	int depth = 0;
 
 	hlist_for_each_entry(q, &f->hash[hash], list) {
 		if (q->net == nf && f->match(q, key)) {
@@ -284,9 +286,25 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 			read_unlock(&f->lock);
 			return q;
 		}
+		depth++;
 	}
 	read_unlock(&f->lock);
 
-	return inet_frag_create(nf, f, key);
+	if (depth <= INETFRAGS_MAXDEPTH)
+		return inet_frag_create(nf, f, key);
+	else
+		return ERR_PTR(-ENOBUFS);
 }
 EXPORT_SYMBOL(inet_frag_find);
+
+void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+				   const char *prefix)
+{
+	static const char msg[] = "inet_frag_find: Fragment hash bucket"
+		" list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
+		". Dropping fragment.\n";
+
+	if (PTR_ERR(q) == -ENOBUFS)
+		LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg);
+}
+EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6d30acb600c..a6445b843ef4 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -292,14 +292,11 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
 	hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
 
 	q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
-	if (q == NULL)
-		goto out_nomem;
-
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
 	return container_of(q, struct ipq, q);
-
-out_nomem:
-	LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n"));
-	return NULL;
 }
 
 /* Is the fragment too far ahead to be part of ipq? */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 54087e96d7b8..6700069949dd 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -14,6 +14,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "IPv6-nf: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -180,13 +182,11 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 
 	q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
 	local_bh_enable();
-	if (q == NULL)
-		goto oom;
-
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
+		return NULL;
+	}
 	return container_of(q, struct frag_queue, q);
-
-oom:
-	return NULL;
 }
 
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3c6a77290c6e..196ab9347ad1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -26,6 +26,9 @@
  *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
  *				calculate ICV correctly.
  */
+
+#define pr_fmt(fmt) "IPv6: " fmt
+
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -185,9 +188,10 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6
 	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
 
 	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
-	if (q == NULL)
+	if (IS_ERR_OR_NULL(q)) {
+		inet_frag_maybe_warn_overflow(q, pr_fmt());
 		return NULL;
-
+	}
 	return container_of(q, struct frag_queue, q);
 }
 
-- 
cgit v1.2.2


From 63b7743fdd4dab8a534f366479c2bf0caa0991f7 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Tue, 5 Mar 2013 20:43:42 +0000
Subject: arm64: Do not select GENERIC_HARDIRQS_NO_DEPRECATED

Config option GENERIC_HARDIRQS_NO_DEPRECATED was removed in commit
78c89825649a9a5ed526c507603196f467d781a5 ("genirq: Remove the now obsolete
config options and select statements"), but the select was accidentally
reintroduced in commit 8c2c3df31e3b87cb5348e48776c366ebd1dc5a7a ("arm64:
Build infrastructure").

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fd70a68387eb..9b6d19f74078 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -9,7 +9,6 @@ config ARM64
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select GENERIC_CLOCKEVENTS
-	select GENERIC_HARDIRQS_NO_DEPRECATED
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
-- 
cgit v1.2.2


From 792072066d30372772137be9ee2f4d72d77329f9 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Tue, 19 Mar 2013 15:41:37 +0000
Subject: arm64: Kconfig.debug: Remove unused CONFIG_DEBUG_ERRORS

The Kconfig entry for DEBUG_ERRORS is a verbatim copy of the former arm
entry for that symbol. It got removed in v2.6.39 because it wasn't
actually used anywhere. There are still no users of DEBUG_ERRORS so
remove this entry too.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
[catalin.marinas@arm.com: removed option from defconfig]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig.debug     | 11 -----------
 arch/arm64/configs/defconfig |  1 -
 2 files changed, 12 deletions(-)

diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 51493430f142..1a6bfe954d49 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -6,17 +6,6 @@ config FRAME_POINTER
 	bool
 	default y
 
-config DEBUG_ERRORS
-	bool "Verbose kernel error messages"
-	depends on DEBUG_KERNEL
-	help
-	  This option controls verbose debugging information which can be
-	  printed when the kernel detects an internal error. This debugging
-	  information is useful to kernel hackers when tracking down problems,
-	  but mostly meaningless to other people. It's safe to say Y unless
-	  you are concerned with the code size or don't want to see these
-	  messages.
-
 config DEBUG_STACK_USAGE
 	bool "Enable stack utilization instrumentation"
 	depends on DEBUG_KERNEL
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 9212c7880da7..09bef29f3a09 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -82,4 +82,3 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_FTRACE is not set
 CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_DEBUG_ERRORS=y
-- 
cgit v1.2.2


From ffb1dabd1eb10c76a1e7af62f75a1aaa8d590b5a Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Mon, 18 Mar 2013 17:49:32 +0100
Subject: xen-blkback: don't store dev_bus_addr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dev_bus_addr returned in the grant ref map operation is the mfn of the
passed page, there's no need to store it in the persistent grant
entry, since we can always get it provided that we have the page.

This reduces the memory overhead of persistent grants in blkback.

While at it, rename the 'seg[i].buf' to be 'seg[i].offset' as
it makes much more sense - as we use that value in bio_add_page
which as the fourth argument expects the offset.

We hadn't used the physical address as part of this at all.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: xen-devel@lists.xen.org
[v1: s/buf/offset/]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 21 ++++++---------------
 drivers/block/xen-blkback/common.h  |  1 -
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 2cf8381a1c6e..dd5b2fed97e9 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg)
 }
 
 struct seg_buf {
-	unsigned long buf;
+	unsigned int offset;
 	unsigned int nsec;
 };
 /*
@@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,
 				 * If this is a new persistent grant
 				 * save the handler
 				 */
-				persistent_gnts[i]->handle = map[j].handle;
-				persistent_gnts[i]->dev_bus_addr =
-					map[j++].dev_bus_addr;
+				persistent_gnts[i]->handle = map[j++].handle;
 			}
 			pending_handle(pending_req, i) =
 				persistent_gnts[i]->handle;
 
 			if (ret)
 				continue;
-
-			seg[i].buf = persistent_gnts[i]->dev_bus_addr |
-				(req->u.rw.seg[i].first_sect << 9);
 		} else {
-			pending_handle(pending_req, i) = map[j].handle;
+			pending_handle(pending_req, i) = map[j++].handle;
 			bitmap_set(pending_req->unmap_seg, i, 1);
 
-			if (ret) {
-				j++;
+			if (ret)
 				continue;
-			}
-
-			seg[i].buf = map[j++].dev_bus_addr |
-				(req->u.rw.seg[i].first_sect << 9);
 		}
+		seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
 	}
 	return ret;
 }
@@ -971,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 		       (bio_add_page(bio,
 				     pages[i],
 				     seg[i].nsec << 9,
-				     seg[i].buf & ~PAGE_MASK) == 0)) {
+				     seg[i].offset) == 0)) {
 
 			bio = bio_alloc(GFP_KERNEL, nseg-i);
 			if (unlikely(bio == NULL))
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index da78346487ae..60103e2517ba 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -187,7 +187,6 @@ struct persistent_gnt {
 	struct page *page;
 	grant_ref_t gnt;
 	grant_handle_t handle;
-	uint64_t dev_bus_addr;
 	struct rb_node node;
 };
 
-- 
cgit v1.2.2


From 9c1e050caeb4d1250f8ceef1180a8b3d0db6c624 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Mon, 18 Mar 2013 17:49:35 +0100
Subject: xen-blkfront: pre-allocate pages for requests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This prevents us from having to call alloc_page while we are preparing
the request. Since blkfront was calling alloc_page with a spinlock
held we used GFP_ATOMIC, which can fail if we are requesting a lot of
pages since it is using the emergency memory pools.

Allocating all the pages at init prevents us from having to call
alloc_page, thus preventing possible failures.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: xen-devel@lists.xen.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 120 ++++++++++++++++++++++++++++---------------
 1 file changed, 79 insertions(+), 41 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 97324cd18f4b..c64043323399 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -165,6 +165,69 @@ static int add_id_to_freelist(struct blkfront_info *info,
 	return 0;
 }
 
+static int fill_grant_buffer(struct blkfront_info *info, int num)
+{
+	struct page *granted_page;
+	struct grant *gnt_list_entry, *n;
+	int i = 0;
+
+	while(i < num) {
+		gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
+		if (!gnt_list_entry)
+			goto out_of_memory;
+
+		granted_page = alloc_page(GFP_NOIO);
+		if (!granted_page) {
+			kfree(gnt_list_entry);
+			goto out_of_memory;
+		}
+
+		gnt_list_entry->pfn = page_to_pfn(granted_page);
+		gnt_list_entry->gref = GRANT_INVALID_REF;
+		list_add(&gnt_list_entry->node, &info->persistent_gnts);
+		i++;
+	}
+
+	return 0;
+
+out_of_memory:
+	list_for_each_entry_safe(gnt_list_entry, n,
+	                         &info->persistent_gnts, node) {
+		list_del(&gnt_list_entry->node);
+		__free_page(pfn_to_page(gnt_list_entry->pfn));
+		kfree(gnt_list_entry);
+		i--;
+	}
+	BUG_ON(i != 0);
+	return -ENOMEM;
+}
+
+static struct grant *get_grant(grant_ref_t *gref_head,
+                               struct blkfront_info *info)
+{
+	struct grant *gnt_list_entry;
+	unsigned long buffer_mfn;
+
+	BUG_ON(list_empty(&info->persistent_gnts));
+	gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant,
+	                                  node);
+	list_del(&gnt_list_entry->node);
+
+	if (gnt_list_entry->gref != GRANT_INVALID_REF) {
+		info->persistent_gnts_c--;
+		return gnt_list_entry;
+	}
+
+	/* Assign a gref to this page */
+	gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
+	BUG_ON(gnt_list_entry->gref == -ENOSPC);
+	buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+	gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
+	                                info->xbdev->otherend_id,
+	                                buffer_mfn, 0);
+	return gnt_list_entry;
+}
+
 static const char *op_name(int op)
 {
 	static const char *const names[] = {
@@ -306,7 +369,6 @@ static int blkif_queue_request(struct request *req)
 	 */
 	bool new_persistent_gnts;
 	grant_ref_t gref_head;
-	struct page *granted_page;
 	struct grant *gnt_list_entry = NULL;
 	struct scatterlist *sg;
 
@@ -370,42 +432,9 @@ static int blkif_queue_request(struct request *req)
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
 
-			if (info->persistent_gnts_c) {
-				BUG_ON(list_empty(&info->persistent_gnts));
-				gnt_list_entry = list_first_entry(
-				                      &info->persistent_gnts,
-				                      struct grant, node);
-				list_del(&gnt_list_entry->node);
-
-				ref = gnt_list_entry->gref;
-				buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
-				info->persistent_gnts_c--;
-			} else {
-				ref = gnttab_claim_grant_reference(&gref_head);
-				BUG_ON(ref == -ENOSPC);
-
-				gnt_list_entry =
-					kmalloc(sizeof(struct grant),
-							 GFP_ATOMIC);
-				if (!gnt_list_entry)
-					return -ENOMEM;
-
-				granted_page = alloc_page(GFP_ATOMIC);
-				if (!granted_page) {
-					kfree(gnt_list_entry);
-					return -ENOMEM;
-				}
-
-				gnt_list_entry->pfn =
-					page_to_pfn(granted_page);
-				gnt_list_entry->gref = ref;
-
-				buffer_mfn = pfn_to_mfn(page_to_pfn(
-								granted_page));
-				gnttab_grant_foreign_access_ref(ref,
-					info->xbdev->otherend_id,
-					buffer_mfn, 0);
-			}
+			gnt_list_entry = get_grant(&gref_head, info);
+			ref = gnt_list_entry->gref;
+			buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
 
 			info->shadow[id].grants_used[i] = gnt_list_entry;
 
@@ -803,17 +832,20 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 		blk_stop_queue(info->rq);
 
 	/* Remove all persistent grants */
-	if (info->persistent_gnts_c) {
+	if (!list_empty(&info->persistent_gnts)) {
 		list_for_each_entry_safe(persistent_gnt, n,
 		                         &info->persistent_gnts, node) {
 			list_del(&persistent_gnt->node);
-			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			if (persistent_gnt->gref != GRANT_INVALID_REF) {
+				gnttab_end_foreign_access(persistent_gnt->gref,
+				                          0, 0UL);
+				info->persistent_gnts_c--;
+			}
 			__free_page(pfn_to_page(persistent_gnt->pfn));
 			kfree(persistent_gnt);
-			info->persistent_gnts_c--;
 		}
-		BUG_ON(info->persistent_gnts_c != 0);
 	}
+	BUG_ON(info->persistent_gnts_c != 0);
 
 	/* No more gnttab callback work. */
 	gnttab_cancel_free_callback(&info->callback);
@@ -1008,6 +1040,12 @@ static int setup_blkring(struct xenbus_device *dev,
 
 	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 
+	/* Allocate memory for grants */
+	err = fill_grant_buffer(info, BLK_RING_SIZE *
+	                              BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	if (err)
+		goto fail;
+
 	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
 	if (err < 0) {
 		free_page((unsigned long)sring);
-- 
cgit v1.2.2


From b1173e316bf2ff3c11f46247417f0f5789a4ea0c Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Mon, 18 Mar 2013 17:49:36 +0100
Subject: xen-blkfront: remove frame list from blk_shadow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already have the frame (pfn of the grant page) stored inside struct
grant, so there's no need to keep an aditional list of mapped frames
for a specific request. This reduces memory usage in blkfront.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: xen-devel@lists.xen.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c64043323399..a894f88762d8 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -74,7 +74,6 @@ struct grant {
 struct blk_shadow {
 	struct blkif_request req;
 	struct request *request;
-	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
@@ -356,7 +355,6 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
 static int blkif_queue_request(struct request *req)
 {
 	struct blkfront_info *info = req->rq_disk->private_data;
-	unsigned long buffer_mfn;
 	struct blkif_request *ring_req;
 	unsigned long id;
 	unsigned int fsect, lsect;
@@ -434,7 +432,6 @@ static int blkif_queue_request(struct request *req)
 
 			gnt_list_entry = get_grant(&gref_head, info);
 			ref = gnt_list_entry->gref;
-			buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
 
 			info->shadow[id].grants_used[i] = gnt_list_entry;
 
@@ -465,7 +462,6 @@ static int blkif_queue_request(struct request *req)
 				kunmap_atomic(shared_data);
 			}
 
-			info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
 			ring_req->u.rw.seg[i] =
 					(struct blkif_request_segment) {
 						.gref       = ref,
@@ -1268,7 +1264,7 @@ static int blkif_recover(struct blkfront_info *info)
 				gnttab_grant_foreign_access_ref(
 					req->u.rw.seg[j].gref,
 					info->xbdev->otherend_id,
-					pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
+					pfn_to_mfn(copy[i].grants_used[j]->pfn),
 					0);
 		}
 		info->shadow[req->u.rw.id].req = *req;
-- 
cgit v1.2.2


From c300aa64ddf57d9c5d9c898a64b36877345dd4a9 Mon Sep 17 00:00:00 2001
From: Andy Honig <ahonig@google.com>
Date: Mon, 11 Mar 2013 09:34:52 -0700
Subject: KVM: x86: fix for buffer overflow in handling of MSR_KVM_SYSTEM_TIME
 (CVE-2013-1796)

If the guest sets the GPA of the time_page so that the request to update the
time straddles a page then KVM will write onto an incorrect page.  The
write is done byusing kmap atomic to get a pointer to the page for the time
structure and then performing a memcpy to that page starting at an offset
that the guest controls.  Well behaved guests always provide a 32-byte aligned
address, however a malicious guest could use this to corrupt host kernel
memory.

Tested: Tested against kvmclock unit test.

Signed-off-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f7c850b36910..2ade60c25402 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1959,6 +1959,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
+		/* Check that the address is 32-byte aligned. */
+		if (vcpu->arch.time_offset &
+				(sizeof(struct pvclock_vcpu_time_info) - 1))
+			break;
+
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
 
-- 
cgit v1.2.2


From 0b79459b482e85cb7426aa7da683a9f2c97aeae1 Mon Sep 17 00:00:00 2001
From: Andy Honig <ahonig@google.com>
Date: Wed, 20 Feb 2013 14:48:10 -0800
Subject: KVM: x86: Convert MSR_KVM_SYSTEM_TIME to use gfn_to_hva_cache
 functions (CVE-2013-1797)

There is a potential use after free issue with the handling of
MSR_KVM_SYSTEM_TIME.  If the guest specifies a GPA in a movable or removable
memory such as frame buffers then KVM might continue to write to that
address even after it's removed via KVM_SET_USER_MEMORY_REGION.  KVM pins
the page in memory so it's unlikely to cause an issue, but if the user
space component re-purposes the memory previously used for the guest, then
the guest will be able to corrupt that memory.

Tested: Tested against kvmclock unit test

Signed-off-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  4 ++--
 arch/x86/kvm/x86.c              | 47 ++++++++++++++++++-----------------------
 2 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 635a74d22409..4979778cc7fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -414,8 +414,8 @@ struct kvm_vcpu_arch {
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
 	unsigned int hw_tsc_khz;
-	unsigned int time_offset;
-	struct page *time_page;
+	struct gfn_to_hva_cache pv_time;
+	bool pv_time_enabled;
 	/* set guest stopped flag in pvclock flags field */
 	bool pvclock_set_guest_stopped_request;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2ade60c25402..f19ac0aca60d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1406,10 +1406,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	unsigned long flags, this_tsc_khz;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
 	struct kvm_arch *ka = &v->kvm->arch;
-	void *shared_kaddr;
 	s64 kernel_ns, max_kernel_ns;
 	u64 tsc_timestamp, host_tsc;
-	struct pvclock_vcpu_time_info *guest_hv_clock;
+	struct pvclock_vcpu_time_info guest_hv_clock;
 	u8 pvclock_flags;
 	bool use_master_clock;
 
@@ -1463,7 +1462,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	local_irq_restore(flags);
 
-	if (!vcpu->time_page)
+	if (!vcpu->pv_time_enabled)
 		return 0;
 
 	/*
@@ -1525,12 +1524,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	 */
 	vcpu->hv_clock.version += 2;
 
-	shared_kaddr = kmap_atomic(vcpu->time_page);
-
-	guest_hv_clock = shared_kaddr + vcpu->time_offset;
+	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+		&guest_hv_clock, sizeof(guest_hv_clock))))
+		return 0;
 
 	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
-	pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+	pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
 
 	if (vcpu->pvclock_set_guest_stopped_request) {
 		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
@@ -1543,12 +1542,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	vcpu->hv_clock.flags = pvclock_flags;
 
-	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-	       sizeof(vcpu->hv_clock));
-
-	kunmap_atomic(shared_kaddr);
-
-	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+				&vcpu->hv_clock,
+				sizeof(vcpu->hv_clock));
 	return 0;
 }
 
@@ -1837,10 +1833,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->arch.time_page) {
-		kvm_release_page_dirty(vcpu->arch.time_page);
-		vcpu->arch.time_page = NULL;
-	}
+	vcpu->arch.pv_time_enabled = false;
 }
 
 static void accumulate_steal_time(struct kvm_vcpu *vcpu)
@@ -1947,6 +1940,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_KVM_SYSTEM_TIME_NEW:
 	case MSR_KVM_SYSTEM_TIME: {
+		u64 gpa_offset;
 		kvmclock_reset(vcpu);
 
 		vcpu->arch.time = data;
@@ -1956,19 +1950,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (!(data & 1))
 			break;
 
-		/* ...but clean it before doing the actual write */
-		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
+		gpa_offset = data & ~(PAGE_MASK | 1);
 
 		/* Check that the address is 32-byte aligned. */
-		if (vcpu->arch.time_offset &
-				(sizeof(struct pvclock_vcpu_time_info) - 1))
+		if (gpa_offset & (sizeof(struct pvclock_vcpu_time_info) - 1))
 			break;
 
-		vcpu->arch.time_page =
-				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
-
-		if (is_error_page(vcpu->arch.time_page))
-			vcpu->arch.time_page = NULL;
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+		     &vcpu->arch.pv_time, data & ~1ULL))
+			vcpu->arch.pv_time_enabled = false;
+		else
+			vcpu->arch.pv_time_enabled = true;
 
 		break;
 	}
@@ -2972,7 +2964,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
  */
 static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->arch.time_page)
+	if (!vcpu->arch.pv_time_enabled)
 		return -EINVAL;
 	vcpu->arch.pvclock_set_guest_stopped_request = true;
 	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -6723,6 +6715,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		goto fail_free_wbinvd_dirty_mask;
 
 	vcpu->arch.ia32_tsc_adjust_msr = 0x0;
+	vcpu->arch.pv_time_enabled = false;
 	kvm_async_pf_hash_reset(vcpu);
 	kvm_pmu_init(vcpu);
 
-- 
cgit v1.2.2


From a2c118bfab8bc6b8bb213abfc35201e441693d55 Mon Sep 17 00:00:00 2001
From: Andy Honig <ahonig@google.com>
Date: Wed, 20 Feb 2013 14:49:16 -0800
Subject: KVM: Fix bounds checking in ioapic indirect register reads
 (CVE-2013-1798)

If the guest specifies a IOAPIC_REG_SELECT with an invalid value and follows
that with a read of the IOAPIC_REG_WINDOW KVM does not properly validate
that request.  ioapic_read_indirect contains an
ASSERT(redir_index < IOAPIC_NUM_PINS), but the ASSERT has no effect in
non-debug builds.  In recent kernels this allows a guest to cause a kernel
oops by reading invalid memory.  In older kernels (pre-3.3) this allows a
guest to read from large ranges of host memory.

Tested: tested against apic unit tests.

Signed-off-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 virt/kvm/ioapic.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ce82b9401958..5ba005c00e2f 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -74,9 +74,12 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 			u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
 			u64 redir_content;
 
-			ASSERT(redir_index < IOAPIC_NUM_PINS);
+			if (redir_index < IOAPIC_NUM_PINS)
+				redir_content =
+					ioapic->redirtbl[redir_index].bits;
+			else
+				redir_content = ~0ULL;
 
-			redir_content = ioapic->redirtbl[redir_index].bits;
 			result = (ioapic->ioregsel & 0x1) ?
 			    (redir_content >> 32) & 0xffffffff :
 			    redir_content & 0xffffffff;
-- 
cgit v1.2.2


From e0b2029614fe7e3b09fab253630c5b70eea58f53 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Tue, 12 Mar 2013 21:35:19 +0100
Subject: sparc: delete "if !ULTRA_HAS_POPULATION_COUNT"

Commit 2d78d4beb64eb07d50665432867971c481192ebf ("[PATCH] bitops:
sparc64: use generic bitops") made the default of GENERIC_HWEIGHT depend
on !ULTRA_HAS_POPULATION_COUNT. But since there's no Kconfig symbol with
that name, this always evaluates to true. Delete this dependency.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 289127d5241c..7fcd4b4ebcfc 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -197,7 +197,7 @@ config RWSEM_XCHGADD_ALGORITHM
 
 config GENERIC_HWEIGHT
 	bool
-	default y if !ULTRA_HAS_POPULATION_COUNT
+	default y
 
 config GENERIC_CALIBRATE_DELAY
 	bool
-- 
cgit v1.2.2


From f58b20bd6bad48d6fc5633f003c3651115273fb2 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Tue, 19 Mar 2013 05:58:47 +0000
Subject: sparc: remove unused "config BITS"

sparc's asm/module.h got removed in commit
786d35d45cc40b2a51a18f73e14e135d47fdced7 ("Make most arch asm/module.h
files use asm-generic/module.h"). That removed the only two uses of this
Kconfig symbol. So we can remove its entry too.

> >From arch/sparc/Makefile:
>     ifeq ($(CONFIG_SPARC32),y)
>     [...]
>
>     [...]
>     export BITS    := 32
>     [...]
>
>     else
>     [...]
>
>     [...]
>     export BITS   := 64
>     [...]
>
> So $(BITS) is set depending on whether CONFIG_SPARC32 is set or not.
> Using $(BITS) in sparc's Makefiles is not using CONFIG_BITS. That
> doesn't count as usage of "config BITS".

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/Kconfig | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 7fcd4b4ebcfc..3d361f236308 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -84,12 +84,6 @@ config ARCH_DEFCONFIG
 	default "arch/sparc/configs/sparc32_defconfig" if SPARC32
 	default "arch/sparc/configs/sparc64_defconfig" if SPARC64
 
-# CONFIG_BITS can be used at source level to get 32/64 bits
-config BITS
-	int
-	default 32 if SPARC32
-	default 64 if SPARC64
-
 config IOMMU_HELPER
 	bool
 	default y if SPARC64
-- 
cgit v1.2.2


From 14a40ffccd6163bbcd1d6f32b28a88ffe6149fc6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Mar 2013 13:45:20 -0700
Subject: sched: replace PF_THREAD_BOUND with PF_NO_SETAFFINITY

PF_THREAD_BOUND was originally used to mark kernel threads which were
bound to a specific CPU using kthread_bind() and a task with the flag
set allows cpus_allowed modifications only to itself.  Workqueue is
currently abusing it to prevent userland from meddling with
cpus_allowed of workqueue workers.

What we need is a flag to prevent userland from messing with
cpus_allowed of certain kernel tasks.  In kernel, anyone can
(incorrectly) squash the flag, and, for worker-type usages,
restricting cpus_allowed modification to the task itself doesn't
provide meaningful extra proection as other tasks can inject work
items to the task anyway.

This patch replaces PF_THREAD_BOUND with PF_NO_SETAFFINITY.
sched_setaffinity() checks the flag and return -EINVAL if set.
set_cpus_allowed_ptr() is no longer affected by the flag.

This will allow simplifying workqueue worker CPU affinity management.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h |  2 +-
 kernel/cgroup.c       |  4 ++--
 kernel/cpuset.c       | 16 ++++++++--------
 kernel/kthread.c      |  2 +-
 kernel/sched/core.c   |  9 ++++-----
 kernel/workqueue.c    | 10 +++-------
 6 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d35d2b6ddbfb..e5c64f7b8c1d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1793,7 +1793,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
 #define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
-#define PF_THREAD_BOUND	0x04000000	/* Thread bound to specific cpu */
+#define PF_NO_SETAFFINITY 0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a32f9432666c..3852d926322c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2224,11 +2224,11 @@ retry_find_task:
 		tsk = tsk->group_leader;
 
 	/*
-	 * Workqueue threads may acquire PF_THREAD_BOUND and become
+	 * Workqueue threads may acquire PF_NO_SETAFFINITY and become
 	 * trapped in a cpuset, or RT worker may be born in a cgroup
 	 * with no rt_runtime allocated.  Just say no.
 	 */
-	if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) {
+	if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
 		ret = -EINVAL;
 		rcu_read_unlock();
 		goto out_unlock_cgroup;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4f9dfe43ecbd..f22e94792707 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1388,16 +1388,16 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
 
 	cgroup_taskset_for_each(task, cgrp, tset) {
 		/*
-		 * Kthreads bound to specific cpus cannot be moved to a new
-		 * cpuset; we cannot change their cpu affinity and
-		 * isolating such threads by their set of allowed nodes is
-		 * unnecessary.  Thus, cpusets are not applicable for such
-		 * threads.  This prevents checking for success of
-		 * set_cpus_allowed_ptr() on all attached tasks before
-		 * cpus_allowed may be changed.
+		 * Kthreads which disallow setaffinity shouldn't be moved
+		 * to a new cpuset; we don't want to change their cpu
+		 * affinity and isolating such threads by their set of
+		 * allowed nodes is unnecessary.  Thus, cpusets are not
+		 * applicable for such threads.  This prevents checking for
+		 * success of set_cpus_allowed_ptr() on all attached tasks
+		 * before cpus_allowed may be changed.
 		 */
 		ret = -EINVAL;
-		if (task->flags & PF_THREAD_BOUND)
+		if (task->flags & PF_NO_SETAFFINITY)
 			goto out_unlock;
 		ret = security_task_setscheduler(task);
 		if (ret)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 691dc2ef9baf..a2fbbb782bad 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -260,7 +260,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu)
 {
 	/* It's safe because the task is inactive. */
 	do_set_cpus_allowed(p, cpumask_of(cpu));
-	p->flags |= PF_THREAD_BOUND;
+	p->flags |= PF_NO_SETAFFINITY;
 }
 
 /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f12624a393c..23606ee961b5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4126,6 +4126,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 	get_task_struct(p);
 	rcu_read_unlock();
 
+	if (p->flags & PF_NO_SETAFFINITY) {
+		retval = -EINVAL;
+		goto out_put_task;
+	}
 	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
 		retval = -ENOMEM;
 		goto out_put_task;
@@ -4773,11 +4777,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 		goto out;
 	}
 
-	if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	do_set_cpus_allowed(p, new_mask);
 
 	/* Can the task run on the task's current CPU? If so, we're done */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 969be0b72071..39a591f65b08 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1757,12 +1757,8 @@ static struct worker *create_worker(struct worker_pool *pool)
 	set_user_nice(worker->task, pool->attrs->nice);
 	set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
 
-	/*
-	 * %PF_THREAD_BOUND is used to prevent userland from meddling with
-	 * cpumask of workqueue workers.  This is an abuse.  We need
-	 * %PF_NO_SETAFFINITY.
-	 */
-	worker->task->flags |= PF_THREAD_BOUND;
+	/* prevent userland from meddling with cpumask of workqueue workers */
+	worker->task->flags |= PF_NO_SETAFFINITY;
 
 	/*
 	 * The caller is responsible for ensuring %POOL_DISASSOCIATED
@@ -3876,7 +3872,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		}
 
 		wq->rescuer = rescuer;
-		rescuer->task->flags |= PF_THREAD_BOUND;
+		rescuer->task->flags |= PF_NO_SETAFFINITY;
 		wake_up_process(rescuer->task);
 	}
 
-- 
cgit v1.2.2


From 822d8405d13931062d653e0c2cc0199ed801b072 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Mar 2013 13:45:21 -0700
Subject: workqueue: convert worker_pool->worker_ida to idr and implement
 for_each_pool_worker()

Make worker_ida an idr - worker_idr and use it to implement
for_each_pool_worker() which will be used to simplify worker rebinding
on CPU_ONLINE.

pool->worker_idr is protected by both pool->manager_mutex and
pool->lock so that it can be iterated while holding either lock.

* create_worker() allocates ID without installing worker pointer and
  installs the pointer later using idr_replace().  This is because
  worker ID is needed when creating the actual task to name it and the
  new worker shouldn't be visible to iterations before fully
  initialized.

* In destroy_worker(), ID removal is moved before kthread_stop().
  This is again to guarantee that only fully working workers are
  visible to for_each_pool_worker().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 63 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 39a591f65b08..384ff34c9aff 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -119,6 +119,9 @@ enum {
  *
  * F: wq->flush_mutex protected.
  *
+ * MG: pool->manager_mutex and pool->lock protected.  Writes require both
+ *     locks.  Reads can happen under either lock.
+ *
  * WQ: wq_mutex protected.
  *
  * WR: wq_mutex protected for writes.  Sched-RCU protected for reads.
@@ -156,7 +159,7 @@ struct worker_pool {
 	/* see manage_workers() for details on the two manager mutexes */
 	struct mutex		manager_arb;	/* manager arbitration */
 	struct mutex		manager_mutex;	/* manager exclusion */
-	struct ida		worker_ida;	/* L: for worker IDs */
+	struct idr		worker_idr;	/* MG: worker IDs and iteration */
 
 	struct workqueue_attrs	*attrs;		/* I: worker attributes */
 	struct hlist_node	hash_node;	/* WQ: unbound_pool_hash node */
@@ -299,6 +302,15 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 			   lockdep_is_held(&pwq_lock),			\
 			   "sched RCU or pwq_lock should be held")
 
+#ifdef CONFIG_LOCKDEP
+#define assert_manager_or_pool_lock(pool)				\
+	WARN_ONCE(!lockdep_is_held(&(pool)->manager_mutex) &&		\
+		  !lockdep_is_held(&(pool)->lock),			\
+		  "pool->manager_mutex or ->lock should be held")
+#else
+#define assert_manager_or_pool_lock(pool)	do { } while (0)
+#endif
+
 #define for_each_cpu_worker_pool(pool, cpu)				\
 	for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];		\
 	     (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -324,6 +336,22 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 		if (({ assert_rcu_or_wq_mutex(); false; })) { }		\
 		else
 
+/**
+ * for_each_pool_worker - iterate through all workers of a worker_pool
+ * @worker: iteration cursor
+ * @wi: integer used for iteration
+ * @pool: worker_pool to iterate workers of
+ *
+ * This must be called with either @pool->manager_mutex or ->lock held.
+ *
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
+ */
+#define for_each_pool_worker(worker, wi, pool)				\
+	idr_for_each_entry(&(pool)->worker_idr, (worker), (wi))		\
+		if (({ assert_manager_or_pool_lock((pool)); false; })) { } \
+		else
+
 /**
  * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
  * @pwq: iteration cursor
@@ -1723,14 +1751,19 @@ static struct worker *create_worker(struct worker_pool *pool)
 
 	lockdep_assert_held(&pool->manager_mutex);
 
+	/*
+	 * ID is needed to determine kthread name.  Allocate ID first
+	 * without installing the pointer.
+	 */
+	idr_preload(GFP_KERNEL);
 	spin_lock_irq(&pool->lock);
-	while (ida_get_new(&pool->worker_ida, &id)) {
-		spin_unlock_irq(&pool->lock);
-		if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL))
-			goto fail;
-		spin_lock_irq(&pool->lock);
-	}
+
+	id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
+
 	spin_unlock_irq(&pool->lock);
+	idr_preload_end();
+	if (id < 0)
+		goto fail;
 
 	worker = alloc_worker();
 	if (!worker)
@@ -1768,11 +1801,17 @@ static struct worker *create_worker(struct worker_pool *pool)
 	if (pool->flags & POOL_DISASSOCIATED)
 		worker->flags |= WORKER_UNBOUND;
 
+	/* successful, commit the pointer to idr */
+	spin_lock_irq(&pool->lock);
+	idr_replace(&pool->worker_idr, worker, worker->id);
+	spin_unlock_irq(&pool->lock);
+
 	return worker;
+
 fail:
 	if (id >= 0) {
 		spin_lock_irq(&pool->lock);
-		ida_remove(&pool->worker_ida, id);
+		idr_remove(&pool->worker_idr, id);
 		spin_unlock_irq(&pool->lock);
 	}
 	kfree(worker);
@@ -1832,7 +1871,6 @@ static int create_and_start_worker(struct worker_pool *pool)
 static void destroy_worker(struct worker *worker)
 {
 	struct worker_pool *pool = worker->pool;
-	int id = worker->id;
 
 	lockdep_assert_held(&pool->manager_mutex);
 	lockdep_assert_held(&pool->lock);
@@ -1850,13 +1888,14 @@ static void destroy_worker(struct worker *worker)
 	list_del_init(&worker->entry);
 	worker->flags |= WORKER_DIE;
 
+	idr_remove(&pool->worker_idr, worker->id);
+
 	spin_unlock_irq(&pool->lock);
 
 	kthread_stop(worker->task);
 	kfree(worker);
 
 	spin_lock_irq(&pool->lock);
-	ida_remove(&pool->worker_ida, id);
 }
 
 static void idle_worker_timeout(unsigned long __pool)
@@ -3482,7 +3521,7 @@ static int init_worker_pool(struct worker_pool *pool)
 
 	mutex_init(&pool->manager_arb);
 	mutex_init(&pool->manager_mutex);
-	ida_init(&pool->worker_ida);
+	idr_init(&pool->worker_idr);
 
 	INIT_HLIST_NODE(&pool->hash_node);
 	pool->refcnt = 1;
@@ -3498,7 +3537,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
 {
 	struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
 
-	ida_destroy(&pool->worker_ida);
+	idr_destroy(&pool->worker_idr);
 	free_workqueue_attrs(pool->attrs);
 	kfree(pool);
 }
-- 
cgit v1.2.2


From bd7c089eb25b26d2e03fd34f97e5517a4463f871 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Mar 2013 13:45:21 -0700
Subject: workqueue: relocate rebind_workers()

rebind_workers() will be reimplemented in a way which makes it mostly
decoupled from the rest of worker management.  Move rebind_workers()
so that it's located with other CPU hotplug related functions.

This patch is pure function relocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 142 ++++++++++++++++++++++++++---------------------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 384ff34c9aff..3e297c574be8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1643,77 +1643,6 @@ static void busy_worker_rebind_fn(struct work_struct *work)
 	spin_unlock_irq(&worker->pool->lock);
 }
 
-/**
- * rebind_workers - rebind all workers of a pool to the associated CPU
- * @pool: pool of interest
- *
- * @pool->cpu is coming online.  Rebind all workers to the CPU.  Rebinding
- * is different for idle and busy ones.
- *
- * Idle ones will be removed from the idle_list and woken up.  They will
- * add themselves back after completing rebind.  This ensures that the
- * idle_list doesn't contain any unbound workers when re-bound busy workers
- * try to perform local wake-ups for concurrency management.
- *
- * Busy workers can rebind after they finish their current work items.
- * Queueing the rebind work item at the head of the scheduled list is
- * enough.  Note that nr_running will be properly bumped as busy workers
- * rebind.
- *
- * On return, all non-manager workers are scheduled for rebind - see
- * manage_workers() for the manager special case.  Any idle worker
- * including the manager will not appear on @idle_list until rebind is
- * complete, making local wake-ups safe.
- */
-static void rebind_workers(struct worker_pool *pool)
-{
-	struct worker *worker, *n;
-	int i;
-
-	lockdep_assert_held(&pool->manager_mutex);
-	lockdep_assert_held(&pool->lock);
-
-	/* dequeue and kick idle ones */
-	list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
-		/*
-		 * idle workers should be off @pool->idle_list until rebind
-		 * is complete to avoid receiving premature local wake-ups.
-		 */
-		list_del_init(&worker->entry);
-
-		/*
-		 * worker_thread() will see the above dequeuing and call
-		 * idle_worker_rebind().
-		 */
-		wake_up_process(worker->task);
-	}
-
-	/* rebind busy workers */
-	for_each_busy_worker(worker, i, pool) {
-		struct work_struct *rebind_work = &worker->rebind_work;
-		struct workqueue_struct *wq;
-
-		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
-				     work_data_bits(rebind_work)))
-			continue;
-
-		debug_work_activate(rebind_work);
-
-		/*
-		 * wq doesn't really matter but let's keep @worker->pool
-		 * and @pwq->pool consistent for sanity.
-		 */
-		if (worker->pool->attrs->nice < 0)
-			wq = system_highpri_wq;
-		else
-			wq = system_wq;
-
-		insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work,
-			    worker->scheduled.next,
-			    work_color_to_flags(WORK_NO_COLOR));
-	}
-}
-
 static struct worker *alloc_worker(void)
 {
 	struct worker *worker;
@@ -4196,6 +4125,77 @@ static void wq_unbind_fn(struct work_struct *work)
 		atomic_set(&pool->nr_running, 0);
 }
 
+/**
+ * rebind_workers - rebind all workers of a pool to the associated CPU
+ * @pool: pool of interest
+ *
+ * @pool->cpu is coming online.  Rebind all workers to the CPU.  Rebinding
+ * is different for idle and busy ones.
+ *
+ * Idle ones will be removed from the idle_list and woken up.  They will
+ * add themselves back after completing rebind.  This ensures that the
+ * idle_list doesn't contain any unbound workers when re-bound busy workers
+ * try to perform local wake-ups for concurrency management.
+ *
+ * Busy workers can rebind after they finish their current work items.
+ * Queueing the rebind work item at the head of the scheduled list is
+ * enough.  Note that nr_running will be properly bumped as busy workers
+ * rebind.
+ *
+ * On return, all non-manager workers are scheduled for rebind - see
+ * manage_workers() for the manager special case.  Any idle worker
+ * including the manager will not appear on @idle_list until rebind is
+ * complete, making local wake-ups safe.
+ */
+static void rebind_workers(struct worker_pool *pool)
+{
+	struct worker *worker, *n;
+	int i;
+
+	lockdep_assert_held(&pool->manager_mutex);
+	lockdep_assert_held(&pool->lock);
+
+	/* dequeue and kick idle ones */
+	list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
+		/*
+		 * idle workers should be off @pool->idle_list until rebind
+		 * is complete to avoid receiving premature local wake-ups.
+		 */
+		list_del_init(&worker->entry);
+
+		/*
+		 * worker_thread() will see the above dequeuing and call
+		 * idle_worker_rebind().
+		 */
+		wake_up_process(worker->task);
+	}
+
+	/* rebind busy workers */
+	for_each_busy_worker(worker, i, pool) {
+		struct work_struct *rebind_work = &worker->rebind_work;
+		struct workqueue_struct *wq;
+
+		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
+				     work_data_bits(rebind_work)))
+			continue;
+
+		debug_work_activate(rebind_work);
+
+		/*
+		 * wq doesn't really matter but let's keep @worker->pool
+		 * and @pwq->pool consistent for sanity.
+		 */
+		if (worker->pool->attrs->nice < 0)
+			wq = system_highpri_wq;
+		else
+			wq = system_wq;
+
+		insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work,
+			    worker->scheduled.next,
+			    work_color_to_flags(WORK_NO_COLOR));
+	}
+}
+
 /*
  * Workqueues should be brought up before normal priority CPU notifiers.
  * This will be registered high priority CPU notifier.
-- 
cgit v1.2.2


From a9ab775bcadf122d91e1a201eb66ae2eec90365a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Mar 2013 13:45:21 -0700
Subject: workqueue: directly restore CPU affinity of workers from CPU_ONLINE

Rebinding workers of a per-cpu pool after a CPU comes online involves
a lot of back-and-forth mostly because only the task itself could
adjust CPU affinity if PF_THREAD_BOUND was set.

As CPU_ONLINE itself couldn't adjust affinity, it had to somehow
coerce the workers themselves to perform set_cpus_allowed_ptr().  Due
to the various states a worker can be in, this led to three different
paths a worker may be rebound.  worker->rebind_work is queued to busy
workers.  Idle ones are signaled by unlinking worker->entry and call
idle_worker_rebind().  The manager isn't covered by either and
implements its own mechanism.

PF_THREAD_BOUND has been relaced with PF_NO_SETAFFINITY and CPU_ONLINE
itself now can manipulate CPU affinity of workers.  This patch
replaces the existing rebind mechanism with direct one where
CPU_ONLINE iterates over all workers using for_each_pool_worker(),
restores CPU affinity, and clears WORKER_UNBOUND.

There are a couple subtleties.  All bound idle workers should have
their runqueues set to that of the bound CPU; however, if the target
task isn't running, set_cpus_allowed_ptr() just updates the
cpus_allowed mask deferring the actual migration to when the task
wakes up.  This is worked around by waking up idle workers after
restoring CPU affinity before any workers can become bound.

Another subtlety is stems from matching @pool->nr_running with the
number of running unbound workers.  While DISASSOCIATED, all workers
are unbound and nr_running is zero.  As workers become bound again,
nr_running needs to be adjusted accordingly; however, there is no good
way to tell whether a given worker is running without poking into
scheduler internals.  Instead of clearing UNBOUND directly,
rebind_workers() replaces UNBOUND with another new NOT_RUNNING flag -
REBOUND, which will later be cleared by the workers themselves while
preparing for the next round of work item execution.  The only change
needed for the workers is clearing REBOUND along with PREP.

* This patch leaves for_each_busy_worker() without any user.  Removed.

* idle_worker_rebind(), busy_worker_rebind_fn(), worker->rebind_work
  and rebind logic in manager_workers() removed.

* worker_thread() now looks at WORKER_DIE instead of testing whether
  @worker->entry is empty to determine whether it needs to do
  something special as dying is the only special thing now.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c          | 192 +++++++++++++++-----------------------------
 kernel/workqueue_internal.h |   3 -
 2 files changed, 64 insertions(+), 131 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3e297c574be8..9508b5ed7336 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -75,9 +75,10 @@ enum {
 	WORKER_PREP		= 1 << 3,	/* preparing to run works */
 	WORKER_CPU_INTENSIVE	= 1 << 6,	/* cpu intensive */
 	WORKER_UNBOUND		= 1 << 7,	/* worker is unbound */
+	WORKER_REBOUND		= 1 << 8,	/* worker was rebound */
 
-	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_UNBOUND |
-				  WORKER_CPU_INTENSIVE,
+	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_CPU_INTENSIVE |
+				  WORKER_UNBOUND | WORKER_REBOUND,
 
 	NR_STD_WORKER_POOLS	= 2,		/* # standard pools per cpu */
 
@@ -316,9 +317,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 	     (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
 	     (pool)++)
 
-#define for_each_busy_worker(worker, i, pool)				\
-	hash_for_each(pool->busy_hash, i, worker, hentry)
-
 /**
  * for_each_pool - iterate through all worker_pools in the system
  * @pool: iteration cursor
@@ -1612,37 +1610,6 @@ __acquires(&pool->lock)
 	}
 }
 
-/*
- * Rebind an idle @worker to its CPU.  worker_thread() will test
- * list_empty(@worker->entry) before leaving idle and call this function.
- */
-static void idle_worker_rebind(struct worker *worker)
-{
-	/* CPU may go down again inbetween, clear UNBOUND only on success */
-	if (worker_maybe_bind_and_lock(worker->pool))
-		worker_clr_flags(worker, WORKER_UNBOUND);
-
-	/* rebind complete, become available again */
-	list_add(&worker->entry, &worker->pool->idle_list);
-	spin_unlock_irq(&worker->pool->lock);
-}
-
-/*
- * Function for @worker->rebind.work used to rebind unbound busy workers to
- * the associated cpu which is coming back online.  This is scheduled by
- * cpu up but can race with other cpu hotplug operations and may be
- * executed twice without intervening cpu down.
- */
-static void busy_worker_rebind_fn(struct work_struct *work)
-{
-	struct worker *worker = container_of(work, struct worker, rebind_work);
-
-	if (worker_maybe_bind_and_lock(worker->pool))
-		worker_clr_flags(worker, WORKER_UNBOUND);
-
-	spin_unlock_irq(&worker->pool->lock);
-}
-
 static struct worker *alloc_worker(void)
 {
 	struct worker *worker;
@@ -1651,7 +1618,6 @@ static struct worker *alloc_worker(void)
 	if (worker) {
 		INIT_LIST_HEAD(&worker->entry);
 		INIT_LIST_HEAD(&worker->scheduled);
-		INIT_WORK(&worker->rebind_work, busy_worker_rebind_fn);
 		/* on creation a worker is in !idle && prep state */
 		worker->flags = WORKER_PREP;
 	}
@@ -2053,22 +2019,6 @@ static bool manage_workers(struct worker *worker)
 	if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
 		spin_unlock_irq(&pool->lock);
 		mutex_lock(&pool->manager_mutex);
-		/*
-		 * CPU hotplug could have happened while we were waiting
-		 * for assoc_mutex.  Hotplug itself can't handle us
-		 * because manager isn't either on idle or busy list, and
-		 * @pool's state and ours could have deviated.
-		 *
-		 * As hotplug is now excluded via manager_mutex, we can
-		 * simply try to bind.  It will succeed or fail depending
-		 * on @pool's current state.  Try it and adjust
-		 * %WORKER_UNBOUND accordingly.
-		 */
-		if (worker_maybe_bind_and_lock(pool))
-			worker->flags &= ~WORKER_UNBOUND;
-		else
-			worker->flags |= WORKER_UNBOUND;
-
 		ret = true;
 	}
 
@@ -2252,19 +2202,12 @@ static int worker_thread(void *__worker)
 woke_up:
 	spin_lock_irq(&pool->lock);
 
-	/* we are off idle list if destruction or rebind is requested */
-	if (unlikely(list_empty(&worker->entry))) {
+	/* am I supposed to die? */
+	if (unlikely(worker->flags & WORKER_DIE)) {
 		spin_unlock_irq(&pool->lock);
-
-		/* if DIE is set, destruction is requested */
-		if (worker->flags & WORKER_DIE) {
-			worker->task->flags &= ~PF_WQ_WORKER;
-			return 0;
-		}
-
-		/* otherwise, rebind */
-		idle_worker_rebind(worker);
-		goto woke_up;
+		WARN_ON_ONCE(!list_empty(&worker->entry));
+		worker->task->flags &= ~PF_WQ_WORKER;
+		return 0;
 	}
 
 	worker_leave_idle(worker);
@@ -2285,11 +2228,13 @@ recheck:
 	WARN_ON_ONCE(!list_empty(&worker->scheduled));
 
 	/*
-	 * When control reaches this point, we're guaranteed to have
-	 * at least one idle worker or that someone else has already
-	 * assumed the manager role.
+	 * Finish PREP stage.  We're guaranteed to have at least one idle
+	 * worker or that someone else has already assumed the manager
+	 * role.  This is where @worker starts participating in concurrency
+	 * management if applicable and concurrency management is restored
+	 * after being rebound.  See rebind_workers() for details.
 	 */
-	worker_clr_flags(worker, WORKER_PREP);
+	worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
 
 	do {
 		struct work_struct *work =
@@ -4076,7 +4021,7 @@ static void wq_unbind_fn(struct work_struct *work)
 	int cpu = smp_processor_id();
 	struct worker_pool *pool;
 	struct worker *worker;
-	int i;
+	int wi;
 
 	for_each_cpu_worker_pool(pool, cpu) {
 		WARN_ON_ONCE(cpu != smp_processor_id());
@@ -4091,10 +4036,7 @@ static void wq_unbind_fn(struct work_struct *work)
 		 * before the last CPU down must be on the cpu.  After
 		 * this, they may become diasporas.
 		 */
-		list_for_each_entry(worker, &pool->idle_list, entry)
-			worker->flags |= WORKER_UNBOUND;
-
-		for_each_busy_worker(worker, i, pool)
+		for_each_pool_worker(worker, wi, pool)
 			worker->flags |= WORKER_UNBOUND;
 
 		pool->flags |= POOL_DISASSOCIATED;
@@ -4129,71 +4071,64 @@ static void wq_unbind_fn(struct work_struct *work)
  * rebind_workers - rebind all workers of a pool to the associated CPU
  * @pool: pool of interest
  *
- * @pool->cpu is coming online.  Rebind all workers to the CPU.  Rebinding
- * is different for idle and busy ones.
- *
- * Idle ones will be removed from the idle_list and woken up.  They will
- * add themselves back after completing rebind.  This ensures that the
- * idle_list doesn't contain any unbound workers when re-bound busy workers
- * try to perform local wake-ups for concurrency management.
- *
- * Busy workers can rebind after they finish their current work items.
- * Queueing the rebind work item at the head of the scheduled list is
- * enough.  Note that nr_running will be properly bumped as busy workers
- * rebind.
- *
- * On return, all non-manager workers are scheduled for rebind - see
- * manage_workers() for the manager special case.  Any idle worker
- * including the manager will not appear on @idle_list until rebind is
- * complete, making local wake-ups safe.
+ * @pool->cpu is coming online.  Rebind all workers to the CPU.
  */
 static void rebind_workers(struct worker_pool *pool)
 {
-	struct worker *worker, *n;
-	int i;
+	struct worker *worker;
+	int wi;
 
 	lockdep_assert_held(&pool->manager_mutex);
-	lockdep_assert_held(&pool->lock);
-
-	/* dequeue and kick idle ones */
-	list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
-		/*
-		 * idle workers should be off @pool->idle_list until rebind
-		 * is complete to avoid receiving premature local wake-ups.
-		 */
-		list_del_init(&worker->entry);
 
-		/*
-		 * worker_thread() will see the above dequeuing and call
-		 * idle_worker_rebind().
-		 */
-		wake_up_process(worker->task);
-	}
-
-	/* rebind busy workers */
-	for_each_busy_worker(worker, i, pool) {
-		struct work_struct *rebind_work = &worker->rebind_work;
-		struct workqueue_struct *wq;
+	/*
+	 * Restore CPU affinity of all workers.  As all idle workers should
+	 * be on the run-queue of the associated CPU before any local
+	 * wake-ups for concurrency management happen, restore CPU affinty
+	 * of all workers first and then clear UNBOUND.  As we're called
+	 * from CPU_ONLINE, the following shouldn't fail.
+	 */
+	for_each_pool_worker(worker, wi, pool)
+		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
+						  pool->attrs->cpumask) < 0);
 
-		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
-				     work_data_bits(rebind_work)))
-			continue;
+	spin_lock_irq(&pool->lock);
 
-		debug_work_activate(rebind_work);
+	for_each_pool_worker(worker, wi, pool) {
+		unsigned int worker_flags = worker->flags;
 
 		/*
-		 * wq doesn't really matter but let's keep @worker->pool
-		 * and @pwq->pool consistent for sanity.
+		 * A bound idle worker should actually be on the runqueue
+		 * of the associated CPU for local wake-ups targeting it to
+		 * work.  Kick all idle workers so that they migrate to the
+		 * associated CPU.  Doing this in the same loop as
+		 * replacing UNBOUND with REBOUND is safe as no worker will
+		 * be bound before @pool->lock is released.
 		 */
-		if (worker->pool->attrs->nice < 0)
-			wq = system_highpri_wq;
-		else
-			wq = system_wq;
+		if (worker_flags & WORKER_IDLE)
+			wake_up_process(worker->task);
 
-		insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work,
-			    worker->scheduled.next,
-			    work_color_to_flags(WORK_NO_COLOR));
+		/*
+		 * We want to clear UNBOUND but can't directly call
+		 * worker_clr_flags() or adjust nr_running.  Atomically
+		 * replace UNBOUND with another NOT_RUNNING flag REBOUND.
+		 * @worker will clear REBOUND using worker_clr_flags() when
+		 * it initiates the next execution cycle thus restoring
+		 * concurrency management.  Note that when or whether
+		 * @worker clears REBOUND doesn't affect correctness.
+		 *
+		 * ACCESS_ONCE() is necessary because @worker->flags may be
+		 * tested without holding any lock in
+		 * wq_worker_waking_up().  Without it, NOT_RUNNING test may
+		 * fail incorrectly leading to premature concurrency
+		 * management operations.
+		 */
+		WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
+		worker_flags |= WORKER_REBOUND;
+		worker_flags &= ~WORKER_UNBOUND;
+		ACCESS_ONCE(worker->flags) = worker_flags;
 	}
+
+	spin_unlock_irq(&pool->lock);
 }
 
 /*
@@ -4221,12 +4156,13 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 	case CPU_ONLINE:
 		for_each_cpu_worker_pool(pool, cpu) {
 			mutex_lock(&pool->manager_mutex);
-			spin_lock_irq(&pool->lock);
 
+			spin_lock_irq(&pool->lock);
 			pool->flags &= ~POOL_DISASSOCIATED;
+			spin_unlock_irq(&pool->lock);
+
 			rebind_workers(pool);
 
-			spin_unlock_irq(&pool->lock);
 			mutex_unlock(&pool->manager_mutex);
 		}
 		break;
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index f116f071d919..84ab6e1dc6fb 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -38,9 +38,6 @@ struct worker {
 	unsigned int		flags;		/* X: flags */
 	int			id;		/* I: worker id */
 
-	/* for rebinding worker to CPU */
-	struct work_struct	rebind_work;	/* L: for busy worker */
-
 	/* used only by rescuers to point to the target workqueue */
 	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
 };
-- 
cgit v1.2.2


From 7dbc725e4749d822eb6dc962526049af1586f041 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Mar 2013 13:45:21 -0700
Subject: workqueue: restore CPU affinity of unbound workers on CPU_ONLINE

With the recent addition of the custom attributes support, unbound
pools may have allowed cpumask which isn't full.  As long as some of
CPUs in the cpumask are online, its workers will maintain cpus_allowed
as set on worker creation; however, once no online CPU is left in
cpus_allowed, the scheduler will reset cpus_allowed of any workers
which get scheduled so that they can execute.

To remain compliant to the user-specified configuration, CPU affinity
needs to be restored when a CPU becomes online for an unbound pool
which doesn't currently have any online CPUs before.

This patch implement restore_unbound_workers_cpumask(), which is
called from CPU_ONLINE for all unbound pools, checks whether the
coming up CPU is the first allowed online one, and, if so, invokes
set_cpus_allowed_ptr() with the configured cpumask on all workers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9508b5ed7336..e38d035bf671 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4131,6 +4131,39 @@ static void rebind_workers(struct worker_pool *pool)
 	spin_unlock_irq(&pool->lock);
 }
 
+/**
+ * restore_unbound_workers_cpumask - restore cpumask of unbound workers
+ * @pool: unbound pool of interest
+ * @cpu: the CPU which is coming up
+ *
+ * An unbound pool may end up with a cpumask which doesn't have any online
+ * CPUs.  When a worker of such pool get scheduled, the scheduler resets
+ * its cpus_allowed.  If @cpu is in @pool's cpumask which didn't have any
+ * online CPU before, cpus_allowed of all its workers should be restored.
+ */
+static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
+{
+	static cpumask_t cpumask;
+	struct worker *worker;
+	int wi;
+
+	lockdep_assert_held(&pool->manager_mutex);
+
+	/* is @cpu allowed for @pool? */
+	if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
+		return;
+
+	/* is @cpu the only online CPU? */
+	cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
+	if (cpumask_weight(&cpumask) != 1)
+		return;
+
+	/* as we're called from CPU_ONLINE, the following shouldn't fail */
+	for_each_pool_worker(worker, wi, pool)
+		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
+						  pool->attrs->cpumask) < 0);
+}
+
 /*
  * Workqueues should be brought up before normal priority CPU notifiers.
  * This will be registered high priority CPU notifier.
@@ -4141,6 +4174,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 {
 	int cpu = (unsigned long)hcpu;
 	struct worker_pool *pool;
+	int pi;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
@@ -4154,17 +4188,25 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
-		for_each_cpu_worker_pool(pool, cpu) {
+		mutex_lock(&wq_mutex);
+
+		for_each_pool(pool, pi) {
 			mutex_lock(&pool->manager_mutex);
 
-			spin_lock_irq(&pool->lock);
-			pool->flags &= ~POOL_DISASSOCIATED;
-			spin_unlock_irq(&pool->lock);
+			if (pool->cpu == cpu) {
+				spin_lock_irq(&pool->lock);
+				pool->flags &= ~POOL_DISASSOCIATED;
+				spin_unlock_irq(&pool->lock);
 
-			rebind_workers(pool);
+				rebind_workers(pool);
+			} else if (pool->cpu < 0) {
+				restore_unbound_workers_cpumask(pool, cpu);
+			}
 
 			mutex_unlock(&pool->manager_mutex);
 		}
+
+		mutex_unlock(&wq_mutex);
 		break;
 	}
 	return NOTIFY_OK;
-- 
cgit v1.2.2


From 547b524636249fbe906ab78a50ab0017c490316c Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthew.garrett@nebula.com>
Date: Tue, 19 Mar 2013 17:26:57 -0400
Subject: PCI: Use ROM images from firmware only if no other ROM source
 available
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mantas Mikulėnas reported that his graphics hardware failed to
initialise after commit f9a37be0f02a ("x86: Use PCI setup data").

The aim of this commit was to ensure that ROM images were available on
some Apple systems that don't expose the GPU ROM via any other source.
In this case, UEFI appears to have provided a broken ROM image that we
were using even though there was a perfectly valid ROM available via
other sources.  The simplest way to handle this seems to be to just
re-order pci_map_rom() and leave any firmare-supplied ROM to last.

Signed-off-by: Matthew Garrett <matthew.garrett@nebula.com>
Tested-by: Mantas Mikulėnas <grawity@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/rom.c | 55 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index ab886b7ee327..b41ac7756a4b 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -100,6 +100,27 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size)
 	return min((size_t)(image - rom), size);
 }
 
+static loff_t pci_find_rom(struct pci_dev *pdev, size_t *size)
+{
+	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
+	loff_t start;
+
+	/* assign the ROM an address if it doesn't have one */
+	if (res->parent == NULL && pci_assign_resource(pdev, PCI_ROM_RESOURCE))
+		return 0;
+	start = pci_resource_start(pdev, PCI_ROM_RESOURCE);
+	*size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+
+	if (*size == 0)
+		return 0;
+
+	/* Enable ROM space decodes */
+	if (pci_enable_rom(pdev))
+		return 0;
+
+	return start;
+}
+
 /**
  * pci_map_rom - map a PCI ROM to kernel space
  * @pdev: pointer to pci device struct
@@ -114,21 +135,15 @@ size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size)
 void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 {
 	struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
-	loff_t start;
+	loff_t start = 0;
 	void __iomem *rom;
 
-	/*
-	 * Some devices may provide ROMs via a source other than the BAR
-	 */
-	if (pdev->rom && pdev->romlen) {
-		*size = pdev->romlen;
-		return phys_to_virt(pdev->rom);
 	/*
 	 * IORESOURCE_ROM_SHADOW set on x86, x86_64 and IA64 supports legacy
 	 * memory map if the VGA enable bit of the Bridge Control register is
 	 * set for embedded VGA.
 	 */
-	} else if (res->flags & IORESOURCE_ROM_SHADOW) {
+	if (res->flags & IORESOURCE_ROM_SHADOW) {
 		/* primary video rom always starts here */
 		start = (loff_t)0xC0000;
 		*size = 0x20000; /* cover C000:0 through E000:0 */
@@ -139,21 +154,21 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 			return (void __iomem *)(unsigned long)
 				pci_resource_start(pdev, PCI_ROM_RESOURCE);
 		} else {
-			/* assign the ROM an address if it doesn't have one */
-			if (res->parent == NULL &&
-			    pci_assign_resource(pdev,PCI_ROM_RESOURCE))
-				return NULL;
-			start = pci_resource_start(pdev, PCI_ROM_RESOURCE);
-			*size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
-			if (*size == 0)
-				return NULL;
-
-			/* Enable ROM space decodes */
-			if (pci_enable_rom(pdev))
-				return NULL;
+			start = pci_find_rom(pdev, size);
 		}
 	}
 
+	/*
+	 * Some devices may provide ROMs via a source other than the BAR
+	 */
+	if (!start && pdev->rom && pdev->romlen) {
+		*size = pdev->romlen;
+		return phys_to_virt(pdev->rom);
+	}
+
+	if (!start)
+		return NULL;
+
 	rom = ioremap(start, *size);
 	if (!rom) {
 		/* restore enable if ioremap fails */
-- 
cgit v1.2.2


From c12aba5aa0e60b7947bc8b6ea25ef55c4acf81a4 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 19 Mar 2013 09:56:57 +0100
Subject: drm/i915: stop using GMBUS IRQs on Gen4 chips

Commit 28c70f162 ("drm/i915: use the gmbus irq for waits") switched to
using GMBUS irqs instead of GPIO bit-banging for chipset generations 4
and above.

It turns out though that on many systems this leads to spurious interrupts
being generated, long after the register write to disable the IRQs has been
issued.

Typically this results in the spurious interrupt source getting
disabled:

[    9.636345] irq 16: nobody cared (try booting with the "irqpoll" option)
[    9.637915] Pid: 4157, comm: ifup Tainted: GF            3.9.0-rc2-00341-g0863702 #422
[    9.639484] Call Trace:
[    9.640731]  <IRQ>  [<ffffffff8109b40d>] __report_bad_irq+0x1d/0xc7
[    9.640731]  [<ffffffff8109b7db>] note_interrupt+0x15b/0x1e8
[    9.640731]  [<ffffffff810999f7>] handle_irq_event_percpu+0x1bf/0x214
[    9.640731]  [<ffffffff81099a88>] handle_irq_event+0x3c/0x5c
[    9.640731]  [<ffffffff8109c139>] handle_fasteoi_irq+0x7a/0xb0
[    9.640731]  [<ffffffff8100400e>] handle_irq+0x1a/0x24
[    9.640731]  [<ffffffff81003d17>] do_IRQ+0x48/0xaf
[    9.640731]  [<ffffffff8142f1ea>] common_interrupt+0x6a/0x6a
[    9.640731]  <EOI>  [<ffffffff8142f952>] ? system_call_fastpath+0x16/0x1b
[    9.640731] handlers:
[    9.640731] [<ffffffffa000d771>] usb_hcd_irq [usbcore]
[    9.640731] [<ffffffffa0306189>] yenta_interrupt [yenta_socket]
[    9.640731] Disabling IRQ #16

The really curious thing is now that irq 16 is _not_ the interrupt for
the i915 driver when using MSI, but it _is_ the interrupt when not
using MSI. So by all indications it seems like gmbus is able to
generate a legacy (shared) interrupt in MSI mode on some
configurations. I've tried to reproduce this and the differentiating
thing seems to be that on unaffected systems no other device uses irq
16 (which seems to be the non-MSI intel gfx interrupt on all gm45).

I have no idea how that even can happen.

To avoid tempting this elephant into a rage, just disable gmbus
interrupt support on gen 4.

v2: Improve the commit message with exact details of what's going on.
Also add a comment in the code to warn against this particular
elephant in the room.

v3: Move the comment explaing how gen4 blows up next to the definition
of HAS_GMBUS_IRQ to keep the code-flow straight. Suggested by Chris
Wilson.

Signed-off-by: Jiri Kosina <jkosina@suse.cz> (v1)
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
References: https://lkml.org/lkml/2013/3/8/325
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_i2c.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index acf8aec9ada7..ef4744e1bf0b 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -203,7 +203,13 @@ intel_gpio_setup(struct intel_gmbus *bus, u32 pin)
 	algo->data = bus;
 }
 
-#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 4)
+/*
+ * gmbus on gen4 seems to be able to generate legacy interrupts even when in MSI
+ * mode. This results in spurious interrupt warnings if the legacy irq no. is
+ * shared with another device. The kernel then disables that interrupt source
+ * and so prevents the other device from working properly.
+ */
+#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5)
 static int
 gmbus_wait_hw_status(struct drm_i915_private *dev_priv,
 		     u32 gmbus2_status,
@@ -214,6 +220,9 @@ gmbus_wait_hw_status(struct drm_i915_private *dev_priv,
 	u32 gmbus2 = 0;
 	DEFINE_WAIT(wait);
 
+	if (!HAS_GMBUS_IRQ(dev_priv->dev))
+		gmbus4_irq_en = 0;
+
 	/* Important: The hw handles only the first bit, so set only one! Since
 	 * we also need to check for NAKs besides the hw ready/idle signal, we
 	 * need to wake up periodically and check that ourselves. */
-- 
cgit v1.2.2


From 3dd6664fac7e6041bfc8756ae9e8c78f59108cd9 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Tue, 19 Mar 2013 13:09:59 +0000
Subject: netfilter: remove unused "config IP_NF_QUEUE"

Kconfig symbol IP_NF_QUEUE is unused since commit
d16cf20e2f2f13411eece7f7fb72c17d141c4a84 ("netfilter: remove ip_queue
support"). Let's remove it too.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ce2d43e1f09f..0d755c50994b 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,19 +36,6 @@ config NF_CONNTRACK_PROC_COMPAT
 
 	  If unsure, say Y.
 
-config IP_NF_QUEUE
-	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
-	depends on NETFILTER_ADVANCED
-	help
-	  Netfilter has the ability to queue packets to user space: the
-	  netlink device can be used to access them using this driver.
-
-	  This option enables the old IPv4-only "ip_queue" implementation
-	  which has been obsoleted by the new "nfnetlink_queue" code (see
-	  CONFIG_NETFILTER_NETLINK_QUEUE).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP_NF_IPTABLES
 	tristate "IP tables support (required for filtering/masq/NAT)"
 	default m if NETFILTER_ADVANCED=n
-- 
cgit v1.2.2


From f002a24388cc460c8a9be7d446a9871f7c9d52b6 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Mon, 18 Mar 2013 13:15:57 -0700
Subject: target/file: Bump FD_MAX_SECTORS to 2048 to handle 1M sized I/Os

This patch bumps the default FILEIO backend FD_MAX_SECTORS value from
1024 -> 2048 in order to allow block_size=512 to handle 1M sized I/Os.

The current default rejects I/Os larger than 512K in sbc_parse_cdb():

[12015.915146] SCSI OP 2ah with too big sectors 1347 exceeds backend
hw_max_sectors: 1024
[12015.977744] SCSI OP 2ah with too big sectors 2048 exceeds backend
hw_max_sectors: 1024

This issue is present in >= v3.5 based kernels, introduced after the
removal of se_task logic.

Reported-by: Viljami Ilola <azmulx@netikka.fi>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h
index bc02b018ae46..37ffc5bd2399 100644
--- a/drivers/target/target_core_file.h
+++ b/drivers/target/target_core_file.h
@@ -7,7 +7,7 @@
 #define FD_DEVICE_QUEUE_DEPTH	32
 #define FD_MAX_DEVICE_QUEUE_DEPTH 128
 #define FD_BLOCKSIZE		512
-#define FD_MAX_SECTORS		1024
+#define FD_MAX_SECTORS		2048
 
 #define RRF_EMULATE_CDB		0x01
 #define RRF_GOT_LBA		0x02
-- 
cgit v1.2.2


From 8f27d487bcc2bd603c2d87e1729abcbc301f15db Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Tue, 19 Mar 2013 12:55:16 +0800
Subject: target/pscsi: Reject cross page boundary case in pscsi_map_sg

We can only have one page of data in each sg element, so we can not
cross a page boundary. Fail this case.

The 'while (len > 0 && data_len > 0) {}' loop is not necessary. The loop
can only be executed once.

Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pscsi.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 82e78d72fdb6..e992b27aa090 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -883,7 +883,14 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		pr_debug("PSCSI: i: %d page: %p len: %d off: %d\n", i,
 			page, len, off);
 
-		while (len > 0 && data_len > 0) {
+		/*
+		 * We only have one page of data in each sg element,
+		 * we can not cross a page boundary.
+		 */
+		if (off + len > PAGE_SIZE)
+			goto fail;
+
+		if (len > 0 && data_len > 0) {
 			bytes = min_t(unsigned int, len, PAGE_SIZE - off);
 			bytes = min(bytes, data_len);
 
@@ -940,9 +947,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 				bio = NULL;
 			}
 
-			len -= bytes;
 			data_len -= bytes;
-			off = 0;
 		}
 	}
 
-- 
cgit v1.2.2


From ce7d363aaf1e28be8406a2976220944ca487e8ca Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 4 Mar 2013 12:37:14 +1100
Subject: md/raid5: schedule_construction should abort if nothing to do.

Since commit 1ed850f356a0a422013846b5291acff08815008b
    md/raid5: make sure to_read and to_write never go negative.

It has been possible for handle_stripe_dirtying to be called
when there isn't actually any work to do.
It then calls schedule_reconstruction() which will set R5_LOCKED
on the parity block(s) even when nothing else is happening.
This then causes problems in do_release_stripe().

So add checks to schedule_reconstruction() so that if it doesn't
find anything to do, it just aborts.

This bug was introduced in v3.7, so the patch is suitable
for -stable kernels since then.

Cc: stable@vger.kernel.org (v3.7+)
Reported-by: majianpeng <majianpeng@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid5.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 35031c8b2d02..5601dda1bc40 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2283,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 	int level = conf->level;
 
 	if (rcw) {
-		/* if we are not expanding this is a proper write request, and
-		 * there will be bios with new data to be drained into the
-		 * stripe cache
-		 */
-		if (!expand) {
-			sh->reconstruct_state = reconstruct_state_drain_run;
-			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-		} else
-			sh->reconstruct_state = reconstruct_state_run;
-
-		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
 
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
@@ -2306,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 				s->locked++;
 			}
 		}
+		/* if we are not expanding this is a proper write request, and
+		 * there will be bios with new data to be drained into the
+		 * stripe cache
+		 */
+		if (!expand) {
+			if (!s->locked)
+				/* False alarm, nothing to do */
+				return;
+			sh->reconstruct_state = reconstruct_state_drain_run;
+			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+		} else
+			sh->reconstruct_state = reconstruct_state_run;
+
+		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
+
 		if (s->locked + conf->max_degraded == disks)
 			if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
 				atomic_inc(&conf->pending_full_writes);
@@ -2314,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
 			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
 
-		sh->reconstruct_state = reconstruct_state_prexor_drain_run;
-		set_bit(STRIPE_OP_PREXOR, &s->ops_request);
-		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
-
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 			if (i == pd_idx)
@@ -2333,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 				s->locked++;
 			}
 		}
+		if (!s->locked)
+			/* False alarm - nothing to do */
+			return;
+		sh->reconstruct_state = reconstruct_state_prexor_drain_run;
+		set_bit(STRIPE_OP_PREXOR, &s->ops_request);
+		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
 	}
 
 	/* keep the parity disk(s) locked while asynchronous operations
-- 
cgit v1.2.2


From e3620a3ad52609f64a2402e4b59300afb4b83b77 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Thu, 7 Mar 2013 16:22:01 -0600
Subject: MD RAID5: Avoid accessing gendisk or queue structs when not available

MD RAID5:  Fix kernel oops when RAID4/5/6 is used via device-mapper

Commit a9add5d (v3.8-rc1) added blktrace calls to the RAID4/5/6 driver.
However, when device-mapper is used to create RAID4/5/6 arrays, the
mddev->gendisk and mddev->queue fields are not setup.  Therefore, calling
things like trace_block_bio_remap will cause a kernel oops.  This patch
conditionalizes those calls on whether the proper fields exist to make
the calls.  (Device-mapper will call trace_block_bio_remap on its own.)

This patch is suitable for the 3.8.y stable kernel.

Cc: stable@vger.kernel.org (v3.8+)
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid5.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5601dda1bc40..52ba88a10668 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -674,9 +674,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 			bi->bi_next = NULL;
 			if (rrdev)
 				set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
-			trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
-					      bi, disk_devt(conf->mddev->gendisk),
-					      sh->dev[i].sector);
+
+			if (conf->mddev->gendisk)
+				trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
+						      bi, disk_devt(conf->mddev->gendisk),
+						      sh->dev[i].sector);
 			generic_make_request(bi);
 		}
 		if (rrdev) {
@@ -704,9 +706,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 			rbi->bi_io_vec[0].bv_offset = 0;
 			rbi->bi_size = STRIPE_SIZE;
 			rbi->bi_next = NULL;
-			trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
-					      rbi, disk_devt(conf->mddev->gendisk),
-					      sh->dev[i].sector);
+			if (conf->mddev->gendisk)
+				trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
+						      rbi, disk_devt(conf->mddev->gendisk),
+						      sh->dev[i].sector);
 			generic_make_request(rbi);
 		}
 		if (!rdev && !rrdev) {
@@ -2835,8 +2838,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
 	set_bit(STRIPE_HANDLE, &sh->state);
 	if (rmw < rcw && rmw > 0) {
 		/* prefer read-modify-write, but need to get some data */
-		blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
-				  (unsigned long long)sh->sector, rmw);
+		if (conf->mddev->queue)
+			blk_add_trace_msg(conf->mddev->queue,
+					  "raid5 rmw %llu %d",
+					  (unsigned long long)sh->sector, rmw);
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 			if ((dev->towrite || i == sh->pd_idx) &&
@@ -2886,7 +2891,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
 				}
 			}
 		}
-		if (rcw)
+		if (rcw && conf->mddev->queue)
 			blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
 					  (unsigned long long)sh->sector,
 					  rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
@@ -3993,9 +3998,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 		atomic_inc(&conf->active_aligned_reads);
 		spin_unlock_irq(&conf->device_lock);
 
-		trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
-				      align_bi, disk_devt(mddev->gendisk),
-				      raid_bio->bi_sector);
+		if (mddev->gendisk)
+			trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
+					      align_bi, disk_devt(mddev->gendisk),
+					      raid_bio->bi_sector);
 		generic_make_request(align_bi);
 		return 1;
 	} else {
@@ -4089,7 +4095,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
 		}
 		spin_unlock_irq(&conf->device_lock);
 	}
-	trace_block_unplug(mddev->queue, cnt, !from_schedule);
+	if (mddev->queue)
+		trace_block_unplug(mddev->queue, cnt, !from_schedule);
 	kfree(cb);
 }
 
-- 
cgit v1.2.2


From 90584fc93d461520a888f691144f0879283b3624 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Thu, 7 Mar 2013 16:24:26 -0600
Subject: MD: Prevent sysfs operations on uninitialized kobjects

MD: Prevent sysfs operations on uninitialized kobjects

Device-mapper does not use sysfs; but when device-mapper is leveraging
MD's RAID personalities, MD sometimes attempts to update sysfs.  This
patch adds checks for 'mddev-kobj.sd' in sysfs_[un]link_rdev to ensure
it is about to operate on something valid.  This patch also checks for
'mddev->kobj.sd' before calling 'sysfs_notify' in 'remove_and_add_spares'.
Although 'sysfs_notify' already makes this check, doing so in
'remove_and_add_spares' prevents an additional mutex operation.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/md.c | 6 ++----
 drivers/md/md.h | 4 ++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index fcb878f88796..aeceedfc530b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7663,10 +7663,8 @@ static int remove_and_add_spares(struct mddev *mddev)
 				removed++;
 			}
 		}
-	if (removed)
-		sysfs_notify(&mddev->kobj, NULL,
-			     "degraded");
-
+	if (removed && mddev->kobj.sd)
+		sysfs_notify(&mddev->kobj, NULL, "degraded");
 
 	rdev_for_each(rdev, mddev) {
 		if (rdev->raid_disk >= 0 &&
diff --git a/drivers/md/md.h b/drivers/md/md.h
index eca59c3074ef..d90fb1a879e1 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -506,7 +506,7 @@ static inline char * mdname (struct mddev * mddev)
 static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char nm[20];
-	if (!test_bit(Replacement, &rdev->flags)) {
+	if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
 		sprintf(nm, "rd%d", rdev->raid_disk);
 		return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
 	} else
@@ -516,7 +516,7 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char nm[20];
-	if (!test_bit(Replacement, &rdev->flags)) {
+	if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
 		sprintf(nm, "rd%d", rdev->raid_disk);
 		sysfs_remove_link(&mddev->kobj, nm);
 	}
-- 
cgit v1.2.2


From f8dfcffd0472a0f353f34a567ad3f53568914d04 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 12 Mar 2013 12:18:06 +1100
Subject: md/raid5: ensure sync and DISCARD don't happen at the same time.

A number of problems can occur due to races between
resync/recovery and discard.

- if sync_request calls handle_stripe() while a discard is
  happening on the stripe, it might call handle_stripe_clean_event
  before all of the individual discard requests have completed
  (so some devices are still locked, but not all).
  Since commit ca64cae96037de16e4af92678814f5d4bf0c1c65
     md/raid5: Make sure we clear R5_Discard when discard is finished.
  this will cause R5_Discard to be cleared for the parity device,
  so handle_stripe_clean_event() will not be called when the other
  devices do become unlocked, so their ->written will not be cleared.
  This ultimately leads to a WARN_ON in init_stripe and a lock-up.

- If handle_stripe_clean_event() does clear R5_UPTODATE at an awkward
  time for resync, it can lead to s->uptodate being less than disks
  in handle_parity_checks5(), which triggers a BUG (because it is
  one).

So:
 - keep R5_Discard on the parity device until all other devices have
   completed their discard request
 - make sure we don't try to have a 'discard' and a 'sync' action at
   the same time.
   This involves a new stripe flag to we know when a 'discard' is
   happening, and the use of R5_Overlap on the parity disk so when a
   discard is wanted while a sync is active, so we know to wake up
   the discard at the appropriate time.

Discard support for RAID5 was added in 3.7, so this is suitable for
any -stable kernel since 3.7.

Cc: stable@vger.kernel.org (v3.7+)
Reported-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Tested-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid5.c | 45 +++++++++++++++++++++++++++++++++++++++------
 drivers/md/raid5.h |  1 +
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 52ba88a10668..42a899728748 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2576,6 +2576,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
 	int i;
 
 	clear_bit(STRIPE_SYNCING, &sh->state);
+	if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
+		wake_up(&conf->wait_for_overlap);
 	s->syncing = 0;
 	s->replacing = 0;
 	/* There is nothing more to do for sync/check/repair.
@@ -2749,6 +2751,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
 {
 	int i;
 	struct r5dev *dev;
+	int discard_pending = 0;
 
 	for (i = disks; i--; )
 		if (sh->dev[i].written) {
@@ -2777,9 +2780,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
 						STRIPE_SECTORS,
 					 !test_bit(STRIPE_DEGRADED, &sh->state),
 						0);
-			}
-		} else if (test_bit(R5_Discard, &sh->dev[i].flags))
-			clear_bit(R5_Discard, &sh->dev[i].flags);
+			} else if (test_bit(R5_Discard, &dev->flags))
+				discard_pending = 1;
+		}
+	if (!discard_pending &&
+	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
+		clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
+		clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
+		if (sh->qd_idx >= 0) {
+			clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
+			clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
+		}
+		/* now that discard is done we can proceed with any sync */
+		clear_bit(STRIPE_DISCARD, &sh->state);
+		if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
+			set_bit(STRIPE_HANDLE, &sh->state);
+
+	}
 
 	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
 		if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -3431,9 +3448,15 @@ static void handle_stripe(struct stripe_head *sh)
 		return;
 	}
 
-	if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
-		set_bit(STRIPE_SYNCING, &sh->state);
-		clear_bit(STRIPE_INSYNC, &sh->state);
+	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+		spin_lock(&sh->stripe_lock);
+		/* Cannot process 'sync' concurrently with 'discard' */
+		if (!test_bit(STRIPE_DISCARD, &sh->state) &&
+		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+			set_bit(STRIPE_SYNCING, &sh->state);
+			clear_bit(STRIPE_INSYNC, &sh->state);
+		}
+		spin_unlock(&sh->stripe_lock);
 	}
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
@@ -3593,6 +3616,8 @@ static void handle_stripe(struct stripe_head *sh)
 	    test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
+		if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
+			wake_up(&conf->wait_for_overlap);
 	}
 
 	/* If the failed drives are just a ReadError, then we might need
@@ -4159,6 +4184,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
 		sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
 		prepare_to_wait(&conf->wait_for_overlap, &w,
 				TASK_UNINTERRUPTIBLE);
+		set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
+		if (test_bit(STRIPE_SYNCING, &sh->state)) {
+			release_stripe(sh);
+			schedule();
+			goto again;
+		}
+		clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
 		spin_lock_irq(&sh->stripe_lock);
 		for (d = 0; d < conf->raid_disks; d++) {
 			if (d == sh->pd_idx || d == sh->qd_idx)
@@ -4171,6 +4203,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
 				goto again;
 			}
 		}
+		set_bit(STRIPE_DISCARD, &sh->state);
 		finish_wait(&conf->wait_for_overlap, &w);
 		for (d = 0; d < conf->raid_disks; d++) {
 			if (d == sh->pd_idx || d == sh->qd_idx)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 18b2c4a8a1fd..050a334e89c1 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -323,6 +323,7 @@ enum {
 	STRIPE_COMPUTE_RUN,
 	STRIPE_OPS_REQ_PENDING,
 	STRIPE_ON_UNPLUG_LIST,
+	STRIPE_DISCARD,
 };
 
 /*
-- 
cgit v1.2.2


From 238f5908bd48f9e2f4668e0289e88cba969d710c Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Mon, 11 Mar 2013 11:27:44 +0100
Subject: md: remove CONFIG_MULTICORE_RAID456 entirely

Once instance of this Kconfig macro remained after commit
51acbcec6c42b24482bac18e42befc822524535d ("md: remove
CONFIG_MULTICORE_RAID456"). Remove that one too. And, while we're at it,
also remove it from the defconfig files that carry it.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 arch/tile/configs/tilegx_defconfig  | 1 -
 arch/tile/configs/tilepro_defconfig | 1 -
 drivers/md/raid5.h                  | 4 ----
 3 files changed, 6 deletions(-)

diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
index 8c5eff6d6df5..47684815e5c8 100644
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -330,7 +330,6 @@ CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
 CONFIG_MD_RAID10=m
 CONFIG_MD_RAID456=m
-CONFIG_MULTICORE_RAID456=y
 CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_DEBUG=y
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
index e7a3dfcbcda7..dd2b8f0c631f 100644
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -324,7 +324,6 @@ CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
 CONFIG_MD_RAID10=m
 CONFIG_MD_RAID456=m
-CONFIG_MULTICORE_RAID456=y
 CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_DEBUG=y
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 050a334e89c1..b0b663b119a8 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -221,10 +221,6 @@ struct stripe_head {
 	struct stripe_operations {
 		int 		     target, target2;
 		enum sum_check_flags zero_sum_result;
-		#ifdef CONFIG_MULTICORE_RAID456
-		unsigned long	     request;
-		wait_queue_head_t    wait_for_ops;
-		#endif
 	} ops;
 	struct r5dev {
 		/* rreq and rvec are used for the replacement device when
-- 
cgit v1.2.2


From c83a9d5e425d4678b05ca058fec6254f18601474 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 19 Mar 2013 08:04:44 -0700
Subject: x86-32, microcode_intel_early: Fix crash with CONFIG_DEBUG_VIRTUAL

In 32-bit, __pa_symbol() in CONFIG_DEBUG_VIRTUAL accesses kernel data
(e.g.  max_low_pfn) that not only hasn't been setup yet in such early
boot phase, but since we are in linear mode, cannot even be detected
as uninitialized.

Thus, use __pa_nodebug() rather than __pa_symbol() to get a global
symbol's physical address.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/1363705484-27645-1-git-send-email-fenghua.yu@intel.com
Reported-and-tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/microcode_intel_early.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 7890bc838952..5992ee8086b7 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -90,13 +90,13 @@ microcode_phys(struct microcode_intel **mc_saved_tmp,
 	struct microcode_intel ***mc_saved;
 
 	mc_saved = (struct microcode_intel ***)
-		   __pa_symbol(&mc_saved_data->mc_saved);
+		   __pa_nodebug(&mc_saved_data->mc_saved);
 	for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
 		struct microcode_intel *p;
 
 		p = *(struct microcode_intel **)
-			__pa(mc_saved_data->mc_saved + i);
-		mc_saved_tmp[i] = (struct microcode_intel *)__pa(p);
+			__pa_nodebug(mc_saved_data->mc_saved + i);
+		mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
 	}
 }
 #endif
@@ -562,7 +562,7 @@ scan_microcode(unsigned long start, unsigned long end,
 	struct cpio_data cd;
 	long offset = 0;
 #ifdef CONFIG_X86_32
-	char *p = (char *)__pa_symbol(ucode_name);
+	char *p = (char *)__pa_nodebug(ucode_name);
 #else
 	char *p = ucode_name;
 #endif
@@ -630,8 +630,8 @@ static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
 	if (mc_intel == NULL)
 		return;
 
-	delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info);
-	current_mc_date_p = (int *)__pa_symbol(&current_mc_date);
+	delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
+	current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
 
 	*delay_ucode_info_p = 1;
 	*current_mc_date_p = mc_intel->hdr.date;
@@ -741,15 +741,15 @@ load_ucode_intel_bsp(void)
 #ifdef CONFIG_X86_32
 	struct boot_params *boot_params_p;
 
-	boot_params_p = (struct boot_params *)__pa_symbol(&boot_params);
+	boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
 	ramdisk_image = boot_params_p->hdr.ramdisk_image;
 	ramdisk_size  = boot_params_p->hdr.ramdisk_size;
 	initrd_start_early = ramdisk_image;
 	initrd_end_early = initrd_start_early + ramdisk_size;
 
 	_load_ucode_intel_bsp(
-		(struct mc_saved_data *)__pa_symbol(&mc_saved_data),
-		(unsigned long *)__pa_symbol(&mc_saved_in_initrd),
+		(struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+		(unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
 		initrd_start_early, initrd_end_early, &uci);
 #else
 	ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -772,10 +772,10 @@ void __cpuinit load_ucode_intel_ap(void)
 	unsigned long *initrd_start_p;
 
 	mc_saved_in_initrd_p =
-		(unsigned long *)__pa_symbol(mc_saved_in_initrd);
-	mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data);
-	initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start);
-	initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p);
+		(unsigned long *)__pa_nodebug(mc_saved_in_initrd);
+	mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
+	initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+	initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
 #else
 	mc_saved_data_p = &mc_saved_data;
 	mc_saved_in_initrd_p = mc_saved_in_initrd;
-- 
cgit v1.2.2


From 61ac51301e6c6d4ed977d7674ce2b8e713619a9b Mon Sep 17 00:00:00 2001
From: Torstein Hegge <hegge@resisty.net>
Date: Tue, 19 Mar 2013 17:12:14 +0100
Subject: ALSA: usb: Parse UAC2 extension unit like for UAC1

UAC2_EXTENSION_UNIT_V2 differs from UAC1_EXTENSION_UNIT, but can be handled in
the same way when parsing the unit. Otherwise parse_audio_unit() fails when it
sees an extension unit on a UAC2 device.

UAC2_EXTENSION_UNIT_V2 is outside the range allocated by UAC1.

Signed-off-by: Torstein Hegge <hegge@resisty.net>
Acked-by: Daniel Mack <zonque@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 638e7f738018..8eb84c0f7bf1 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -725,7 +725,8 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_
 		case UAC1_PROCESSING_UNIT:
 		case UAC1_EXTENSION_UNIT:
 		/* UAC2_PROCESSING_UNIT_V2 */
-		/* UAC2_EFFECT_UNIT */ {
+		/* UAC2_EFFECT_UNIT */
+		case UAC2_EXTENSION_UNIT_V2: {
 			struct uac_processing_unit_descriptor *d = p1;
 
 			if (state->mixer->protocol == UAC_VERSION_2 &&
@@ -2052,6 +2053,8 @@ static int parse_audio_unit(struct mixer_build *state, int unitid)
 			return parse_audio_extension_unit(state, unitid, p1);
 		else /* UAC_VERSION_2 */
 			return parse_audio_processing_unit(state, unitid, p1);
+	case UAC2_EXTENSION_UNIT_V2:
+		return parse_audio_extension_unit(state, unitid, p1);
 	default:
 		snd_printk(KERN_ERR "usbaudio: unit %u: unexpected type 0x%02x\n", unitid, p1[2]);
 		return -EINVAL;
-- 
cgit v1.2.2


From 4d7b86c98e445b075c2c4c3757eb6d3d6efbe72e Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Tue, 19 Mar 2013 21:09:24 +0100
Subject: ALSA: snd-usb: mixer: propagate errors up the call chain

In check_input_term() and parse_audio_feature_unit(), propagate the
error value that has been returned by a failing function instead of
-EINVAL. That helps cleaning up the error pathes in the mixer.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 8eb84c0f7bf1..45cc0aff9c3e 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -715,8 +715,9 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_
 		case UAC2_CLOCK_SELECTOR: {
 			struct uac_selector_unit_descriptor *d = p1;
 			/* call recursively to retrieve the channel info */
-			if (check_input_term(state, d->baSourceID[0], term) < 0)
-				return -ENODEV;
+			err = check_input_term(state, d->baSourceID[0], term);
+			if (err < 0)
+				return err;
 			term->type = d->bDescriptorSubtype << 16; /* virtual type */
 			term->id = id;
 			term->name = uac_selector_unit_iSelector(d);
@@ -1357,8 +1358,9 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void
 		return err;
 
 	/* determine the input source type and name */
-	if (check_input_term(state, hdr->bSourceID, &iterm) < 0)
-		return -EINVAL;
+	err = check_input_term(state, hdr->bSourceID, &iterm);
+	if (err < 0)
+		return err;
 
 	master_bits = snd_usb_combine_bytes(bmaControls, csize);
 	/* master configuration quirks */
-- 
cgit v1.2.2


From 83ea5d18d74f032a760fecde78c0210f66f7f70c Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Tue, 19 Mar 2013 21:09:25 +0100
Subject: ALSA: snd-usb: mixer: ignore -EINVAL in snd_usb_mixer_controls()

Creation of individual mixer controls may fail, but that shouldn't cause
the entire mixer creation to fail. Even worse, if the mixer creation
fails, that will error out the entire device probing.

All the functions called by parse_audio_unit() should return -EINVAL if
they find descriptors that are unsupported or believed to be malformed,
so we can safely handle this error code as a non-fatal condition in
snd_usb_mixer_controls().

That fixes a long standing bug which is commonly worked around by
adding quirks which make the driver ignore entire interfaces. Some of
them might now be unnecessary.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Reported-and-tested-by: Rodolfo Thomazelli <pe.soberbo@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 45cc0aff9c3e..ca4739c3f650 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -2123,7 +2123,7 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 			state.oterm.type = le16_to_cpu(desc->wTerminalType);
 			state.oterm.name = desc->iTerminal;
 			err = parse_audio_unit(&state, desc->bSourceID);
-			if (err < 0)
+			if (err < 0 && err != -EINVAL)
 				return err;
 		} else { /* UAC_VERSION_2 */
 			struct uac2_output_terminal_descriptor *desc = p;
@@ -2135,12 +2135,12 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 			state.oterm.type = le16_to_cpu(desc->wTerminalType);
 			state.oterm.name = desc->iTerminal;
 			err = parse_audio_unit(&state, desc->bSourceID);
-			if (err < 0)
+			if (err < 0 && err != -EINVAL)
 				return err;
 
 			/* for UAC2, use the same approach to also add the clock selectors */
 			err = parse_audio_unit(&state, desc->bCSourceID);
-			if (err < 0)
+			if (err < 0 && err != -EINVAL)
 				return err;
 		}
 	}
-- 
cgit v1.2.2


From 5830daf8174d7ea8df2621f8dbede3096bb659b5 Mon Sep 17 00:00:00 2001
From: Vikas Sajjan <vikas.sajjan@linaro.org>
Date: Wed, 27 Feb 2013 16:02:58 +0530
Subject: drm/exynos: modify the compatible string for exynos fimd

modified compatible string for exynos4 fimd as "exynos4210-fimd" and
exynos5 fimd as "exynos5250-fimd" to stick to the rule that compatible
value should be named after first specific SoC model in which this
particular IP version was included as discussed at
https://patchwork.kernel.org/patch/2144861/

Signed-off-by: Vikas Sajjan <vikas.sajjan@linaro.org>
Acked-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fimd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 36493ce71f9a..549cb7db9c9f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -109,9 +109,9 @@ struct fimd_context {
 
 #ifdef CONFIG_OF
 static const struct of_device_id fimd_driver_dt_match[] = {
-	{ .compatible = "samsung,exynos4-fimd",
+	{ .compatible = "samsung,exynos4210-fimd",
 	  .data = &exynos4_fimd_driver_data },
-	{ .compatible = "samsung,exynos5-fimd",
+	{ .compatible = "samsung,exynos5250-fimd",
 	  .data = &exynos5_fimd_driver_data },
 	{},
 };
-- 
cgit v1.2.2


From 9800935a215ddf278da4860f59b4d29d2f429152 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Sat, 2 Mar 2013 15:06:24 +0530
Subject: drm/exynos: Make mixer_check_timing static

Fixes the following sparse warning:
drivers/gpu/drm/exynos/exynos_mixer.c:821:5: warning:
symbol 'mixer_check_timing' was not declared. Should it be static?

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_mixer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index e919aba29b3d..2f4f72f07047 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -818,7 +818,7 @@ static void mixer_win_disable(void *ctx, int win)
 	mixer_ctx->win_data[win].enabled = false;
 }
 
-int mixer_check_timing(void *ctx, struct fb_videomode *timing)
+static int mixer_check_timing(void *ctx, struct fb_videomode *timing)
 {
 	struct mixer_context *mixer_ctx = ctx;
 	u32 w, h;
-- 
cgit v1.2.2


From 0f10cf1463c6fc02a9e85bf098ef3c215d94b1e3 Mon Sep 17 00:00:00 2001
From: Leela Krishna Amudala <l.krishna@samsung.com>
Date: Thu, 7 Mar 2013 23:28:52 -0500
Subject: drm/exynos: fimd: calculate the correct address offset

Calculate the correct address offset values for alpha and color key
control registers based on exynos4 and exynos5 user manuals.
Also remove VIDOSD_C_SIZE_W0 macro and fix comments about registers for
size and alpha.

Signed-off-by: Leela Krishna Amudala <l.krishna@samsung.com>
Acked-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fimd.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 549cb7db9c9f..98cc14725ba9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -38,11 +38,12 @@
 /* position control register for hardware window 0, 2 ~ 4.*/
 #define VIDOSD_A(win)		(VIDOSD_BASE + 0x00 + (win) * 16)
 #define VIDOSD_B(win)		(VIDOSD_BASE + 0x04 + (win) * 16)
-/* size control register for hardware window 0. */
-#define VIDOSD_C_SIZE_W0	(VIDOSD_BASE + 0x08)
-/* alpha control register for hardware window 1 ~ 4. */
-#define VIDOSD_C(win)		(VIDOSD_BASE + 0x18 + (win) * 16)
-/* size control register for hardware window 1 ~ 4. */
+/*
+ * size control register for hardware windows 0 and alpha control register
+ * for hardware windows 1 ~ 4
+ */
+#define VIDOSD_C(win)		(VIDOSD_BASE + 0x08 + (win) * 16)
+/* size control register for hardware windows 1 ~ 2. */
 #define VIDOSD_D(win)		(VIDOSD_BASE + 0x0C + (win) * 16)
 
 #define VIDWx_BUF_START(win, buf)	(VIDW_BUF_START(buf) + (win) * 8)
@@ -50,9 +51,9 @@
 #define VIDWx_BUF_SIZE(win, buf)	(VIDW_BUF_SIZE(buf) + (win) * 4)
 
 /* color key control register for hardware window 1 ~ 4. */
-#define WKEYCON0_BASE(x)		((WKEYCON0 + 0x140) + (x * 8))
+#define WKEYCON0_BASE(x)		((WKEYCON0 + 0x140) + ((x - 1) * 8))
 /* color key value register for hardware window 1 ~ 4. */
-#define WKEYCON1_BASE(x)		((WKEYCON1 + 0x140) + (x * 8))
+#define WKEYCON1_BASE(x)		((WKEYCON1 + 0x140) + ((x - 1) * 8))
 
 /* FIMD has totally five hardware windows. */
 #define WINDOWS_NR	5
@@ -581,7 +582,7 @@ static void fimd_win_commit(struct device *dev, int zpos)
 	if (win != 3 && win != 4) {
 		u32 offset = VIDOSD_D(win);
 		if (win == 0)
-			offset = VIDOSD_C_SIZE_W0;
+			offset = VIDOSD_C(win);
 		val = win_data->ovl_width * win_data->ovl_height;
 		writel(val, ctx->regs + offset);
 
-- 
cgit v1.2.2


From e2779e1698c7dbf36a02a9922d216b4db0e212b8 Mon Sep 17 00:00:00 2001
From: Alexandru Gheorghiu <gheorghiuandru@gmail.com>
Date: Mon, 11 Mar 2013 21:25:22 +0200
Subject: drm/exynos: Replaced kzalloc & memcpy with kmemdup

Replaced calls to kzalloc followed by memcpy with call to kmemdup.
Patch found using coccinelle.

Signed-off-by: Alexandru Gheorghiu <gheorghiuandru@gmail.com>
Acked-by: Seung-Woo Kim <sw0312.kim@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_vidi.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 13ccbd4bcfaa..9504b0cd825a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -117,13 +117,12 @@ static struct edid *vidi_get_edid(struct device *dev,
 	}
 
 	edid_len = (1 + ctx->raw_edid->extensions) * EDID_LENGTH;
-	edid = kzalloc(edid_len, GFP_KERNEL);
+	edid = kmemdup(ctx->raw_edid, edid_len, GFP_KERNEL);
 	if (!edid) {
 		DRM_DEBUG_KMS("failed to allocate edid\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memcpy(edid, ctx->raw_edid, edid_len);
 	return edid;
 }
 
@@ -563,12 +562,11 @@ int vidi_connection_ioctl(struct drm_device *drm_dev, void *data,
 			return -EINVAL;
 		}
 		edid_len = (1 + raw_edid->extensions) * EDID_LENGTH;
-		ctx->raw_edid = kzalloc(edid_len, GFP_KERNEL);
+		ctx->raw_edid = kmemdup(raw_edid, edid_len, GFP_KERNEL);
 		if (!ctx->raw_edid) {
 			DRM_DEBUG_KMS("failed to allocate raw_edid.\n");
 			return -ENOMEM;
 		}
-		memcpy(ctx->raw_edid, raw_edid, edid_len);
 	} else {
 		/*
 		 * with connection = 0, free raw_edid
-- 
cgit v1.2.2


From 067ed3311f7961bef67551fa5115dbadf9a035f4 Mon Sep 17 00:00:00 2001
From: YoungJun Cho <yj44.cho@samsung.com>
Date: Mon, 11 Mar 2013 19:48:05 +0900
Subject: drm/exynos: Fix error routine to getting dma addr.

This patch fixes error routine when g2d_userptr_get_dma_add is failed.
When sg_alloc_table_from_pages() is failed, it doesn't call
sg_free_table() anymore.

Signed-off-by: YoungJun Cho <yj44.cho@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 3b0da0378acf..28b71125189b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -450,7 +450,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev,
 						DMA_BIDIRECTIONAL);
 	if (ret < 0) {
 		DRM_ERROR("failed to map sgt with dma region.\n");
-		goto err_free_sgt;
+		goto err_sg_free_table;
 	}
 
 	g2d_userptr->dma_addr = sgt->sgl[0].dma_address;
@@ -467,8 +467,10 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev,
 
 	return &g2d_userptr->dma_addr;
 
-err_free_sgt:
+err_sg_free_table:
 	sg_free_table(sgt);
+
+err_free_sgt:
 	kfree(sgt);
 	sgt = NULL;
 
-- 
cgit v1.2.2


From 5efc1d1b53ba60a89ce8269880ed02eddecd1add Mon Sep 17 00:00:00 2001
From: YoungJun Cho <yj44.cho@samsung.com>
Date: Mon, 11 Mar 2013 19:56:17 +0900
Subject: drm/exynos: clear node object type at gem unmap

This patch clears node object type in G2D unmap cmdlist.
The obj_type of cmdlist node has to be cleared in
g2d_unmap_cmdlist_gem() so that the node can be reused
in g2d_map_cmdlist_gem().

Signed-off-by: YoungJun Cho <yj44.cho@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 28b71125189b..095520fdf5eb 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -576,6 +576,7 @@ static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d,
 							false);
 
 		node->handles[i] = 0;
+		node->obj_type[i] = 0;
 	}
 
 	node->map_nr = 0;
-- 
cgit v1.2.2


From 7ad018140cc9c0e3388243e524f8410e5f174658 Mon Sep 17 00:00:00 2001
From: YoungJun Cho <yj44.cho@samsung.com>
Date: Wed, 13 Mar 2013 16:44:37 +0900
Subject: drm/exynos: Fix G2D core malfunctioning issue

This patch fixes G2D core malfunctioning issue once g2d dma is started.
Without 'DMA_HOLD_CMD_REG' register setting, there is only one interrupt
after the execution to all command lists have been completed. And that
induces watchdog. So this patch sets 'LIST_HOLD' command to the register
so that command execution interrupt can be occured whenever each command
list execution is finished.

Changelog v2:
- Consider for interrupt setup to each command list and all command lists
  And correct typo.

Signed-off-by: YoungJun Cho <yj44.cho@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 095520fdf5eb..1ff11443f552 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -82,7 +82,7 @@
 #define G2D_DMA_LIST_DONE_COUNT_OFFSET	17
 
 /* G2D_DMA_HOLD_CMD */
-#define G2D_USET_HOLD			(1 << 2)
+#define G2D_USER_HOLD			(1 << 2)
 #define G2D_LIST_HOLD			(1 << 1)
 #define G2D_BITBLT_HOLD			(1 << 0)
 
@@ -592,10 +592,6 @@ static void g2d_dma_start(struct g2d_data *g2d,
 	pm_runtime_get_sync(g2d->dev);
 	clk_enable(g2d->gate_clk);
 
-	/* interrupt enable */
-	writel_relaxed(G2D_INTEN_ACF | G2D_INTEN_UCF | G2D_INTEN_GCF,
-			g2d->regs + G2D_INTEN);
-
 	writel_relaxed(node->dma_addr, g2d->regs + G2D_DMA_SFR_BASE_ADDR);
 	writel_relaxed(G2D_DMA_START, g2d->regs + G2D_DMA_COMMAND);
 }
@@ -863,9 +859,23 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data,
 	cmdlist->data[cmdlist->last++] = G2D_SRC_BASE_ADDR;
 	cmdlist->data[cmdlist->last++] = 0;
 
+	/*
+	 * 'LIST_HOLD' command should be set to the DMA_HOLD_CMD_REG
+	 * and GCF bit should be set to INTEN register if user wants
+	 * G2D interrupt event once current command list execution is
+	 * finished.
+	 * Otherwise only ACF bit should be set to INTEN register so
+	 * that one interrupt is occured after all command lists
+	 * have been completed.
+	 */
 	if (node->event) {
+		cmdlist->data[cmdlist->last++] = G2D_INTEN;
+		cmdlist->data[cmdlist->last++] = G2D_INTEN_ACF | G2D_INTEN_GCF;
 		cmdlist->data[cmdlist->last++] = G2D_DMA_HOLD_CMD;
 		cmdlist->data[cmdlist->last++] = G2D_LIST_HOLD;
+	} else {
+		cmdlist->data[cmdlist->last++] = G2D_INTEN;
+		cmdlist->data[cmdlist->last++] = G2D_INTEN_ACF;
 	}
 
 	/* Check size of cmdlist: last 2 is about G2D_BITBLT_START */
-- 
cgit v1.2.2


From f3d2fc4a7315d8dd39e6fb37122a3aa08fea6e62 Mon Sep 17 00:00:00 2001
From: YoungJun Cho <yj44.cho@samsung.com>
Date: Wed, 13 Mar 2013 16:55:48 +0900
Subject: drm/exynos: Clean up some G2D codes for readability

This patch just cleans up G2D codes for readability.

For this, it changes the member of g2d_cmdlist_node, obj_type into
buf_type.

Changelog v2:
- Revert irrelevant codes.

Signed-off-by: YoungJun Cho <yj44.cho@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 1ff11443f552..7c1aac3871da 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -131,13 +131,12 @@ struct g2d_cmdlist_userptr {
 	bool			in_pool;
 	bool			out_of_list;
 };
-
 struct g2d_cmdlist_node {
 	struct list_head	list;
 	struct g2d_cmdlist	*cmdlist;
 	unsigned int		map_nr;
 	unsigned long		handles[MAX_BUF_ADDR_NR];
-	unsigned int		obj_type[MAX_BUF_ADDR_NR];
+	unsigned int		buf_type[MAX_BUF_ADDR_NR];
 	dma_addr_t		dma_addr;
 
 	struct drm_exynos_pending_g2d_event	*event;
@@ -524,7 +523,7 @@ static int g2d_map_cmdlist_gem(struct g2d_data *g2d,
 		offset = cmdlist->last - (i * 2 + 1);
 		handle = cmdlist->data[offset];
 
-		if (node->obj_type[i] == BUF_TYPE_GEM) {
+		if (node->buf_type[i] == BUF_TYPE_GEM) {
 			addr = exynos_drm_gem_get_dma_addr(drm_dev, handle,
 								file);
 			if (IS_ERR(addr)) {
@@ -568,7 +567,7 @@ static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d,
 	for (i = 0; i < node->map_nr; i++) {
 		unsigned long handle = node->handles[i];
 
-		if (node->obj_type[i] == BUF_TYPE_GEM)
+		if (node->buf_type[i] == BUF_TYPE_GEM)
 			exynos_drm_gem_put_dma_addr(subdrv->drm_dev, handle,
 							filp);
 		else
@@ -576,7 +575,7 @@ static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d,
 							false);
 
 		node->handles[i] = 0;
-		node->obj_type[i] = 0;
+		node->buf_type[i] = 0;
 	}
 
 	node->map_nr = 0;
@@ -642,7 +641,6 @@ static void g2d_runqueue_worker(struct work_struct *work)
 	struct g2d_data *g2d = container_of(work, struct g2d_data,
 					    runqueue_work);
 
-
 	mutex_lock(&g2d->runqueue_mutex);
 	clk_disable(g2d->gate_clk);
 	pm_runtime_put_sync(g2d->dev);
@@ -730,7 +728,7 @@ static int g2d_check_reg_offset(struct device *dev,
 			reg_offset = (cmdlist->data[index] &
 					~0x7fffffff) >> 31;
 			if (reg_offset) {
-				node->obj_type[i] = BUF_TYPE_USERPTR;
+				node->buf_type[i] = BUF_TYPE_USERPTR;
 				cmdlist->data[index] &= ~G2D_BUF_USERPTR;
 			}
 		}
@@ -752,8 +750,8 @@ static int g2d_check_reg_offset(struct device *dev,
 			if (!for_addr)
 				goto err;
 
-			if (node->obj_type[i] != BUF_TYPE_USERPTR)
-				node->obj_type[i] = BUF_TYPE_GEM;
+			if (node->buf_type[i] != BUF_TYPE_USERPTR)
+				node->buf_type[i] = BUF_TYPE_GEM;
 			break;
 		default:
 			if (for_addr)
-- 
cgit v1.2.2


From 9963cb6ef9e6f925617b3c74f0700bf5fbee9a1d Mon Sep 17 00:00:00 2001
From: YoungJun Cho <yj44.cho@samsung.com>
Date: Wed, 13 Mar 2013 17:10:08 +0900
Subject: drm/exynos: Deal with g2d buffer info more efficiently

This patch adds g2d_buf_info structure and buffer relevant
variables moves into the g2d_buf_info to manage g2d buffer
information more efficiently.

Changelog v2:
- Fix merge conflict.

Signed-off-by: YoungJun Cho <yj44.cho@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c | 160 ++++++++++++++++++++++++--------
 1 file changed, 123 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 7c1aac3871da..1a022dc4188e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -96,8 +96,6 @@
 #define G2D_CMDLIST_POOL_SIZE		(G2D_CMDLIST_SIZE * G2D_CMDLIST_NUM)
 #define G2D_CMDLIST_DATA_NUM		(G2D_CMDLIST_SIZE / sizeof(u32) - 2)
 
-#define MAX_BUF_ADDR_NR			6
-
 /* maximum buffer pool size of userptr is 64MB as default */
 #define MAX_POOL		(64 * 1024 * 1024)
 
@@ -106,6 +104,17 @@ enum {
 	BUF_TYPE_USERPTR,
 };
 
+enum g2d_reg_type {
+	REG_TYPE_NONE = -1,
+	REG_TYPE_SRC,
+	REG_TYPE_SRC_PLANE2,
+	REG_TYPE_DST,
+	REG_TYPE_DST_PLANE2,
+	REG_TYPE_PAT,
+	REG_TYPE_MSK,
+	MAX_REG_TYPE_NR
+};
+
 /* cmdlist data structure */
 struct g2d_cmdlist {
 	u32		head;
@@ -113,6 +122,22 @@ struct g2d_cmdlist {
 	u32		last;	/* last data offset */
 };
 
+/*
+ * A structure of buffer information
+ *
+ * @map_nr: manages the number of mapped buffers
+ * @reg_types: stores regitster type in the order of requested command
+ * @handles: stores buffer handle in its reg_type position
+ * @types: stores buffer type in its reg_type position
+ *
+ */
+struct g2d_buf_info {
+	unsigned int		map_nr;
+	enum g2d_reg_type	reg_types[MAX_REG_TYPE_NR];
+	unsigned long		handles[MAX_REG_TYPE_NR];
+	unsigned int		types[MAX_REG_TYPE_NR];
+};
+
 struct drm_exynos_pending_g2d_event {
 	struct drm_pending_event	base;
 	struct drm_exynos_g2d_event	event;
@@ -134,10 +159,8 @@ struct g2d_cmdlist_userptr {
 struct g2d_cmdlist_node {
 	struct list_head	list;
 	struct g2d_cmdlist	*cmdlist;
-	unsigned int		map_nr;
-	unsigned long		handles[MAX_BUF_ADDR_NR];
-	unsigned int		buf_type[MAX_BUF_ADDR_NR];
 	dma_addr_t		dma_addr;
+	struct g2d_buf_info	buf_info;
 
 	struct drm_exynos_pending_g2d_event	*event;
 };
@@ -187,6 +210,7 @@ static int g2d_init_cmdlist(struct g2d_data *g2d)
 	struct exynos_drm_subdrv *subdrv = &g2d->subdrv;
 	int nr;
 	int ret;
+	struct g2d_buf_info *buf_info;
 
 	init_dma_attrs(&g2d->cmdlist_dma_attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &g2d->cmdlist_dma_attrs);
@@ -208,11 +232,17 @@ static int g2d_init_cmdlist(struct g2d_data *g2d)
 	}
 
 	for (nr = 0; nr < G2D_CMDLIST_NUM; nr++) {
+		unsigned int i;
+
 		node[nr].cmdlist =
 			g2d->cmdlist_pool_virt + nr * G2D_CMDLIST_SIZE;
 		node[nr].dma_addr =
 			g2d->cmdlist_pool + nr * G2D_CMDLIST_SIZE;
 
+		buf_info = &node[nr].buf_info;
+		for (i = 0; i < MAX_REG_TYPE_NR; i++)
+			buf_info->reg_types[i] = REG_TYPE_NONE;
+
 		list_add_tail(&node[nr].list, &g2d->free_cmdlist);
 	}
 
@@ -507,36 +537,80 @@ static void g2d_userptr_free_all(struct drm_device *drm_dev,
 	g2d->current_pool = 0;
 }
 
+static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
+{
+	enum g2d_reg_type reg_type;
+
+	switch (reg_offset) {
+	case G2D_SRC_BASE_ADDR:
+		reg_type = REG_TYPE_SRC;
+		break;
+	case G2D_SRC_PLANE2_BASE_ADDR:
+		reg_type = REG_TYPE_SRC_PLANE2;
+		break;
+	case G2D_DST_BASE_ADDR:
+		reg_type = REG_TYPE_DST;
+		break;
+	case G2D_DST_PLANE2_BASE_ADDR:
+		reg_type = REG_TYPE_DST_PLANE2;
+		break;
+	case G2D_PAT_BASE_ADDR:
+		reg_type = REG_TYPE_PAT;
+		break;
+	case G2D_MSK_BASE_ADDR:
+		reg_type = REG_TYPE_MSK;
+		break;
+	default:
+		reg_type = REG_TYPE_NONE;
+		DRM_ERROR("Unknown register offset![%d]\n", reg_offset);
+		break;
+	};
+
+	return reg_type;
+}
+
 static int g2d_map_cmdlist_gem(struct g2d_data *g2d,
 				struct g2d_cmdlist_node *node,
 				struct drm_device *drm_dev,
 				struct drm_file *file)
 {
 	struct g2d_cmdlist *cmdlist = node->cmdlist;
+	struct g2d_buf_info *buf_info = &node->buf_info;
 	int offset;
+	int ret;
 	int i;
 
-	for (i = 0; i < node->map_nr; i++) {
+	for (i = 0; i < buf_info->map_nr; i++) {
+		enum g2d_reg_type reg_type;
+		int reg_pos;
 		unsigned long handle;
 		dma_addr_t *addr;
 
-		offset = cmdlist->last - (i * 2 + 1);
-		handle = cmdlist->data[offset];
+		reg_pos = cmdlist->last - 2 * (i + 1);
+
+		offset = cmdlist->data[reg_pos];
+		handle = cmdlist->data[reg_pos + 1];
+
+		reg_type = g2d_get_reg_type(offset);
+		if (reg_type == REG_TYPE_NONE) {
+			ret = -EFAULT;
+			goto err;
+		}
 
-		if (node->buf_type[i] == BUF_TYPE_GEM) {
+		if (buf_info->types[reg_type] == BUF_TYPE_GEM) {
 			addr = exynos_drm_gem_get_dma_addr(drm_dev, handle,
 								file);
 			if (IS_ERR(addr)) {
-				node->map_nr = i;
-				return -EFAULT;
+				ret = -EFAULT;
+				goto err;
 			}
 		} else {
 			struct drm_exynos_g2d_userptr g2d_userptr;
 
 			if (copy_from_user(&g2d_userptr, (void __user *)handle,
 				sizeof(struct drm_exynos_g2d_userptr))) {
-				node->map_nr = i;
-				return -EFAULT;
+				ret = -EFAULT;
+				goto err;
 			}
 
 			addr = g2d_userptr_get_dma_addr(drm_dev,
@@ -545,16 +619,21 @@ static int g2d_map_cmdlist_gem(struct g2d_data *g2d,
 							file,
 							&handle);
 			if (IS_ERR(addr)) {
-				node->map_nr = i;
-				return -EFAULT;
+				ret = -EFAULT;
+				goto err;
 			}
 		}
 
-		cmdlist->data[offset] = *addr;
-		node->handles[i] = handle;
+		cmdlist->data[reg_pos + 1] = *addr;
+		buf_info->reg_types[i] = reg_type;
+		buf_info->handles[reg_type] = handle;
 	}
 
 	return 0;
+
+err:
+	buf_info->map_nr = i;
+	return ret;
 }
 
 static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d,
@@ -562,23 +641,30 @@ static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d,
 				  struct drm_file *filp)
 {
 	struct exynos_drm_subdrv *subdrv = &g2d->subdrv;
+	struct g2d_buf_info *buf_info = &node->buf_info;
 	int i;
 
-	for (i = 0; i < node->map_nr; i++) {
-		unsigned long handle = node->handles[i];
+	for (i = 0; i < buf_info->map_nr; i++) {
+		enum g2d_reg_type reg_type;
+		unsigned long handle;
+
+		reg_type = buf_info->reg_types[i];
 
-		if (node->buf_type[i] == BUF_TYPE_GEM)
+		handle = buf_info->handles[reg_type];
+
+		if (buf_info->types[reg_type] == BUF_TYPE_GEM)
 			exynos_drm_gem_put_dma_addr(subdrv->drm_dev, handle,
 							filp);
 		else
 			g2d_userptr_put_dma_addr(subdrv->drm_dev, handle,
 							false);
 
-		node->handles[i] = 0;
-		node->buf_type[i] = 0;
+		buf_info->reg_types[i] = REG_TYPE_NONE;
+		buf_info->handles[reg_type] = 0;
+		buf_info->types[reg_type] = 0;
 	}
 
-	node->map_nr = 0;
+	buf_info->map_nr = 0;
 }
 
 static void g2d_dma_start(struct g2d_data *g2d,
@@ -721,20 +807,12 @@ static int g2d_check_reg_offset(struct device *dev,
 	int i;
 
 	for (i = 0; i < nr; i++) {
-		index = cmdlist->last - 2 * (i + 1);
+		struct g2d_buf_info *buf_info = &node->buf_info;
+		enum g2d_reg_type reg_type;
 
-		if (for_addr) {
-			/* check userptr buffer type. */
-			reg_offset = (cmdlist->data[index] &
-					~0x7fffffff) >> 31;
-			if (reg_offset) {
-				node->buf_type[i] = BUF_TYPE_USERPTR;
-				cmdlist->data[index] &= ~G2D_BUF_USERPTR;
-			}
-		}
+		index = cmdlist->last - 2 * (i + 1);
 
 		reg_offset = cmdlist->data[index] & ~0xfffff000;
-
 		if (reg_offset < G2D_VALID_START || reg_offset > G2D_VALID_END)
 			goto err;
 		if (reg_offset % 4)
@@ -750,8 +828,16 @@ static int g2d_check_reg_offset(struct device *dev,
 			if (!for_addr)
 				goto err;
 
-			if (node->buf_type[i] != BUF_TYPE_USERPTR)
-				node->buf_type[i] = BUF_TYPE_GEM;
+			reg_type = g2d_get_reg_type(reg_offset);
+			if (reg_type == REG_TYPE_NONE)
+				goto err;
+
+			/* check userptr buffer type. */
+			if ((cmdlist->data[index] & ~0x7fffffff) >> 31) {
+				buf_info->types[reg_type] = BUF_TYPE_USERPTR;
+				cmdlist->data[index] &= ~G2D_BUF_USERPTR;
+			} else
+				buf_info->types[reg_type] = BUF_TYPE_GEM;
 			break;
 		default:
 			if (for_addr)
@@ -898,7 +984,7 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data,
 	if (ret < 0)
 		goto err_free_event;
 
-	node->map_nr = req->cmd_buf_nr;
+	node->buf_info.map_nr = req->cmd_buf_nr;
 	if (req->cmd_buf_nr) {
 		struct drm_exynos_g2d_cmd *cmd_buf;
 
-- 
cgit v1.2.2


From a4f19aaab3e69f9d15cc995e3378d27c8ef4f780 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Mon, 11 Mar 2013 21:15:59 +0900
Subject: drm/exynos: Add a new function to get gem buffer size

This patch adds a new function to get gem buffer size. And this
funtion could be used for g2d driver or others can get gem buffer
size to check if the buffer is valid or not.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_gem.c | 21 +++++++++++++++++++++
 drivers/gpu/drm/exynos/exynos_drm_gem.h |  5 +++++
 2 files changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 67e17ce112b6..0e6fe000578c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -164,6 +164,27 @@ out:
 	exynos_gem_obj = NULL;
 }
 
+unsigned long exynos_drm_gem_get_size(struct drm_device *dev,
+						unsigned int gem_handle,
+						struct drm_file *file_priv)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+	struct drm_gem_object *obj;
+
+	obj = drm_gem_object_lookup(dev, file_priv, gem_handle);
+	if (!obj) {
+		DRM_ERROR("failed to lookup gem object.\n");
+		return 0;
+	}
+
+	exynos_gem_obj = to_exynos_gem_obj(obj);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return exynos_gem_obj->buffer->size;
+}
+
+
 struct exynos_drm_gem_obj *exynos_drm_gem_init(struct drm_device *dev,
 						      unsigned long size)
 {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 35ebac47dc2b..468766bee450 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -130,6 +130,11 @@ int exynos_drm_gem_userptr_ioctl(struct drm_device *dev, void *data,
 int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data,
 				      struct drm_file *file_priv);
 
+/* get buffer size to gem handle. */
+unsigned long exynos_drm_gem_get_size(struct drm_device *dev,
+						unsigned int gem_handle,
+						struct drm_file *file_priv);
+
 /* initialize gem object. */
 int exynos_drm_gem_init_object(struct drm_gem_object *obj);
 
-- 
cgit v1.2.2


From 2dec17c70e7567f226331c26d8daa0c16d3e7e6d Mon Sep 17 00:00:00 2001
From: YoungJun Cho <yj44.cho@samsung.com>
Date: Mon, 11 Mar 2013 21:17:52 +0900
Subject: drm/exynos: Check g2d cmd list for g2d restrictions

This patch checks command list from user for g2d restrictions.

For now, g2d driver wasn't considered for G2D hardware restrictions
properly. The below is the restrictions to G2D hardware and this patch
considers them.
    - width or height value in the command list
	has to be in valid range (1 to 8000 pixels)
    - The requested area should be less than buffer size.
    - right has to be bigger than left.
    - bottom has to be bigger than top.

Changelog v2:
- Fix merge conflict.

Signed-off-by: YoungJun Cho <yj44.cho@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c | 183 ++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 1a022dc4188e..47a493c8a71f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -48,8 +48,14 @@
 
 /* registers for base address */
 #define G2D_SRC_BASE_ADDR		0x0304
+#define G2D_SRC_COLOR_MODE		0x030C
+#define G2D_SRC_LEFT_TOP		0x0310
+#define G2D_SRC_RIGHT_BOTTOM		0x0314
 #define G2D_SRC_PLANE2_BASE_ADDR	0x0318
 #define G2D_DST_BASE_ADDR		0x0404
+#define G2D_DST_COLOR_MODE		0x040C
+#define G2D_DST_LEFT_TOP		0x0410
+#define G2D_DST_RIGHT_BOTTOM		0x0414
 #define G2D_DST_PLANE2_BASE_ADDR	0x0418
 #define G2D_PAT_BASE_ADDR		0x0500
 #define G2D_MSK_BASE_ADDR		0x0520
@@ -91,6 +97,22 @@
 #define G2D_START_NHOLT			(1 << 1)
 #define G2D_START_BITBLT		(1 << 0)
 
+/* buffer color format */
+#define G2D_FMT_XRGB8888		0
+#define G2D_FMT_ARGB8888		1
+#define G2D_FMT_RGB565			2
+#define G2D_FMT_XRGB1555		3
+#define G2D_FMT_ARGB1555		4
+#define G2D_FMT_XRGB4444		5
+#define G2D_FMT_ARGB4444		6
+#define G2D_FMT_PACKED_RGB888		7
+#define G2D_FMT_A8			11
+#define G2D_FMT_L8			12
+
+/* buffer valid length */
+#define G2D_LEN_MIN			1
+#define G2D_LEN_MAX			8000
+
 #define G2D_CMDLIST_SIZE		(PAGE_SIZE / 4)
 #define G2D_CMDLIST_NUM			64
 #define G2D_CMDLIST_POOL_SIZE		(G2D_CMDLIST_SIZE * G2D_CMDLIST_NUM)
@@ -122,6 +144,24 @@ struct g2d_cmdlist {
 	u32		last;	/* last data offset */
 };
 
+/*
+ * A structure of buffer description
+ *
+ * @format: color format
+ * @left_x: the x coordinates of left top corner
+ * @top_y: the y coordinates of left top corner
+ * @right_x: the x coordinates of right bottom corner
+ * @bottom_y: the y coordinates of right bottom corner
+ *
+ */
+struct g2d_buf_desc {
+	unsigned int	format;
+	unsigned int	left_x;
+	unsigned int	top_y;
+	unsigned int	right_x;
+	unsigned int	bottom_y;
+};
+
 /*
  * A structure of buffer information
  *
@@ -129,6 +169,7 @@ struct g2d_cmdlist {
  * @reg_types: stores regitster type in the order of requested command
  * @handles: stores buffer handle in its reg_type position
  * @types: stores buffer type in its reg_type position
+ * @descs: stores buffer description in its reg_type position
  *
  */
 struct g2d_buf_info {
@@ -136,6 +177,7 @@ struct g2d_buf_info {
 	enum g2d_reg_type	reg_types[MAX_REG_TYPE_NR];
 	unsigned long		handles[MAX_REG_TYPE_NR];
 	unsigned int		types[MAX_REG_TYPE_NR];
+	struct g2d_buf_desc	descs[MAX_REG_TYPE_NR];
 };
 
 struct drm_exynos_pending_g2d_event {
@@ -543,12 +585,18 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 
 	switch (reg_offset) {
 	case G2D_SRC_BASE_ADDR:
+	case G2D_SRC_COLOR_MODE:
+	case G2D_SRC_LEFT_TOP:
+	case G2D_SRC_RIGHT_BOTTOM:
 		reg_type = REG_TYPE_SRC;
 		break;
 	case G2D_SRC_PLANE2_BASE_ADDR:
 		reg_type = REG_TYPE_SRC_PLANE2;
 		break;
 	case G2D_DST_BASE_ADDR:
+	case G2D_DST_COLOR_MODE:
+	case G2D_DST_LEFT_TOP:
+	case G2D_DST_RIGHT_BOTTOM:
 		reg_type = REG_TYPE_DST;
 		break;
 	case G2D_DST_PLANE2_BASE_ADDR:
@@ -569,6 +617,69 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 	return reg_type;
 }
 
+static unsigned long g2d_get_buf_bpp(unsigned int format)
+{
+	unsigned long bpp;
+
+	switch (format) {
+	case G2D_FMT_XRGB8888:
+	case G2D_FMT_ARGB8888:
+		bpp = 4;
+		break;
+	case G2D_FMT_RGB565:
+	case G2D_FMT_XRGB1555:
+	case G2D_FMT_ARGB1555:
+	case G2D_FMT_XRGB4444:
+	case G2D_FMT_ARGB4444:
+		bpp = 2;
+		break;
+	case G2D_FMT_PACKED_RGB888:
+		bpp = 3;
+		break;
+	default:
+		bpp = 1;
+		break;
+	}
+
+	return bpp;
+}
+
+static bool g2d_check_buf_desc_is_valid(struct g2d_buf_desc *buf_desc,
+						enum g2d_reg_type reg_type,
+						unsigned long size)
+{
+	unsigned int width, height;
+	unsigned long area;
+
+	/*
+	 * check source and destination buffers only.
+	 * so the others are always valid.
+	 */
+	if (reg_type != REG_TYPE_SRC && reg_type != REG_TYPE_DST)
+		return true;
+
+	width = buf_desc->right_x - buf_desc->left_x;
+	if (width < G2D_LEN_MIN || width > G2D_LEN_MAX) {
+		DRM_ERROR("width[%u] is out of range!\n", width);
+		return false;
+	}
+
+	height = buf_desc->bottom_y - buf_desc->top_y;
+	if (height < G2D_LEN_MIN || height > G2D_LEN_MAX) {
+		DRM_ERROR("height[%u] is out of range!\n", height);
+		return false;
+	}
+
+	area = (unsigned long)width * (unsigned long)height *
+					g2d_get_buf_bpp(buf_desc->format);
+	if (area > size) {
+		DRM_ERROR("area[%lu] is out of range[%lu]!\n", area, size);
+		return false;
+	}
+
+	return true;
+}
+
 static int g2d_map_cmdlist_gem(struct g2d_data *g2d,
 				struct g2d_cmdlist_node *node,
 				struct drm_device *drm_dev,
@@ -581,6 +692,7 @@ static int g2d_map_cmdlist_gem(struct g2d_data *g2d,
 	int i;
 
 	for (i = 0; i < buf_info->map_nr; i++) {
+		struct g2d_buf_desc *buf_desc;
 		enum g2d_reg_type reg_type;
 		int reg_pos;
 		unsigned long handle;
@@ -597,7 +709,23 @@ static int g2d_map_cmdlist_gem(struct g2d_data *g2d,
 			goto err;
 		}
 
+		buf_desc = &buf_info->descs[reg_type];
+
 		if (buf_info->types[reg_type] == BUF_TYPE_GEM) {
+			unsigned long size;
+
+			size = exynos_drm_gem_get_size(drm_dev, handle, file);
+			if (!size) {
+				ret = -EFAULT;
+				goto err;
+			}
+
+			if (!g2d_check_buf_desc_is_valid(buf_desc, reg_type,
+									size)) {
+				ret = -EFAULT;
+				goto err;
+			}
+
 			addr = exynos_drm_gem_get_dma_addr(drm_dev, handle,
 								file);
 			if (IS_ERR(addr)) {
@@ -613,6 +741,12 @@ static int g2d_map_cmdlist_gem(struct g2d_data *g2d,
 				goto err;
 			}
 
+			if (!g2d_check_buf_desc_is_valid(buf_desc, reg_type,
+							g2d_userptr.size)) {
+				ret = -EFAULT;
+				goto err;
+			}
+
 			addr = g2d_userptr_get_dma_addr(drm_dev,
 							g2d_userptr.userptr,
 							g2d_userptr.size,
@@ -645,11 +779,13 @@ static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d,
 	int i;
 
 	for (i = 0; i < buf_info->map_nr; i++) {
+		struct g2d_buf_desc *buf_desc;
 		enum g2d_reg_type reg_type;
 		unsigned long handle;
 
 		reg_type = buf_info->reg_types[i];
 
+		buf_desc = &buf_info->descs[reg_type];
 		handle = buf_info->handles[reg_type];
 
 		if (buf_info->types[reg_type] == BUF_TYPE_GEM)
@@ -662,6 +798,7 @@ static void g2d_unmap_cmdlist_gem(struct g2d_data *g2d,
 		buf_info->reg_types[i] = REG_TYPE_NONE;
 		buf_info->handles[reg_type] = 0;
 		buf_info->types[reg_type] = 0;
+		memset(buf_desc, 0x00, sizeof(*buf_desc));
 	}
 
 	buf_info->map_nr = 0;
@@ -808,7 +945,9 @@ static int g2d_check_reg_offset(struct device *dev,
 
 	for (i = 0; i < nr; i++) {
 		struct g2d_buf_info *buf_info = &node->buf_info;
+		struct g2d_buf_desc *buf_desc;
 		enum g2d_reg_type reg_type;
+		unsigned long value;
 
 		index = cmdlist->last - 2 * (i + 1);
 
@@ -839,6 +978,50 @@ static int g2d_check_reg_offset(struct device *dev,
 			} else
 				buf_info->types[reg_type] = BUF_TYPE_GEM;
 			break;
+		case G2D_SRC_COLOR_MODE:
+		case G2D_DST_COLOR_MODE:
+			if (for_addr)
+				goto err;
+
+			reg_type = g2d_get_reg_type(reg_offset);
+			if (reg_type == REG_TYPE_NONE)
+				goto err;
+
+			buf_desc = &buf_info->descs[reg_type];
+			value = cmdlist->data[index + 1];
+
+			buf_desc->format = value & 0xf;
+			break;
+		case G2D_SRC_LEFT_TOP:
+		case G2D_DST_LEFT_TOP:
+			if (for_addr)
+				goto err;
+
+			reg_type = g2d_get_reg_type(reg_offset);
+			if (reg_type == REG_TYPE_NONE)
+				goto err;
+
+			buf_desc = &buf_info->descs[reg_type];
+			value = cmdlist->data[index + 1];
+
+			buf_desc->left_x = value & 0x1fff;
+			buf_desc->top_y = (value & 0x1fff0000) >> 16;
+			break;
+		case G2D_SRC_RIGHT_BOTTOM:
+		case G2D_DST_RIGHT_BOTTOM:
+			if (for_addr)
+				goto err;
+
+			reg_type = g2d_get_reg_type(reg_offset);
+			if (reg_type == REG_TYPE_NONE)
+				goto err;
+
+			buf_desc = &buf_info->descs[reg_type];
+			value = cmdlist->data[index + 1];
+
+			buf_desc->right_x = value & 0x1fff;
+			buf_desc->bottom_y = (value & 0x1fff0000) >> 16;
+			break;
 		default:
 			if (for_addr)
 				goto err;
-- 
cgit v1.2.2


From 367f3fcd9296977bc4689546f55c5f4a9c680e8d Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Wed, 20 Mar 2013 16:53:14 +0530
Subject: ARC: Fix the typo in event identifier flags used by ptrace

orig_r8_IS_EXCPN and orig_r8_IS_BRKPT were same values due to a
copy/paste error. Although it looks bad and is wrong, it really doesn't
affect gdb working.

orig_r8_IS_BRKPT is the one relevant to debugging (breakpoints), since
it is used to provide EFA vs. ERET to a ptrace "stop_pc" request.

So when gdb has inserted a breakpoint, orig_r8_IS_BRKPT is already set,
and anything else (i.e. orig_r8_IS_EXCPN) becoming same as it, really
doesn't hurt gdb. The corollary case, could be nasty but nobody uses the
ptrace "stop_pc" request in that case

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/entry.h  | 2 +-
 arch/arc/include/asm/ptrace.h | 2 +-
 arch/arc/kernel/entry.S       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index 23daa326fc9b..eb2ae53187d9 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -415,7 +415,7 @@
  *-------------------------------------------------------------*/
 .macro SAVE_ALL_EXCEPTION   marker
 
-	st      \marker, [sp, 8]
+	st      \marker, [sp, 8]	/* orig_r8 */
 	st      r0, [sp, 4]    /* orig_r0, needed only for sys calls */
 
 	/* Restore r9 used to code the early prologue */
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 8ae783d20a81..6179de7e07c2 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -123,7 +123,7 @@ static inline long regs_return_value(struct pt_regs *regs)
 #define orig_r8_IS_SCALL		0x0001
 #define orig_r8_IS_SCALL_RESTARTED	0x0002
 #define orig_r8_IS_BRKPT		0x0004
-#define orig_r8_IS_EXCPN		0x0004
+#define orig_r8_IS_EXCPN		0x0008
 #define orig_r8_IS_IRQ1			0x0010
 #define orig_r8_IS_IRQ2			0x0020
 
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index b9d875a441cc..91eeab81f52d 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -452,7 +452,7 @@ tracesys:
 	; using ERET won't work since next-PC has already committed
 	lr  r12, [efa]
 	GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r11
-	st  r12, [r11, THREAD_FAULT_ADDR]
+	st  r12, [r11, THREAD_FAULT_ADDR]	; thread.fault_address
 
 	; PRE Sys Call Ptrace hook
 	mov r0, sp			; pt_regs needed
-- 
cgit v1.2.2


From 1ada47d9468fe3907f7f9e00179168f5e2f90803 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 20 Mar 2013 09:39:42 -0400
Subject: ext4: fix ext4_evict_inode() racing against workqueue processing code

Commit 84c17543ab56 (ext4: move work from io_end to inode) triggered a
regression when running xfstest #270 when the file system is mounted
with dioread_nolock.

The problem is that after ext4_evict_inode() calls ext4_ioend_wait(),
this guarantees that last io_end structure has been freed, but it does
not guarantee that the workqueue structure, which was moved into the
inode by commit 84c17543ab56, is actually finished.  Once
ext4_flush_completed_IO() calls ext4_free_io_end() on CPU #1, this
will allow ext4_ioend_wait() to return on CPU #2, at which point the
evict_inode() codepath can race against the workqueue code on CPU #1
accessing EXT4_I(inode)->i_unwritten_work to find the next item of
work to do.

Fix this by calling cancel_work_sync() in ext4_ioend_wait(), which
will be renamed ext4_ioend_shutdown(), since it is only used by
ext4_evict_inode().  Also, move the call to ext4_ioend_shutdown()
until after truncate_inode_pages() and filemap_write_and_wait() are
called, to make sure all dirty pages have been written back and
flushed from the page cache first.

BUG: unable to handle kernel NULL pointer dereference at   (null)
IP: [<c01dda6a>] cwq_activate_delayed_work+0x3b/0x7e
*pdpt = 0000000030bc3001 *pde = 0000000000000000
Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
Modules linked in:
Pid: 6, comm: kworker/u:0 Not tainted 3.8.0-rc3-00013-g84c1754-dirty #91 Bochs Bochs
EIP: 0060:[<c01dda6a>] EFLAGS: 00010046 CPU: 0
EIP is at cwq_activate_delayed_work+0x3b/0x7e
EAX: 00000000 EBX: 00000000 ECX: f505fe54 EDX: 00000000
ESI: ed5b697c EDI: 00000006 EBP: f64b7e8c ESP: f64b7e84
 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: 00000000 CR3: 30bc2000 CR4: 000006f0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
Process kworker/u:0 (pid: 6, ti=f64b6000 task=f64b4160 task.ti=f64b6000)
Stack:
 f505fe00 00000006 f64b7e9c c01de3d7 f6435540 00000003 f64b7efc c01def1d
 f6435540 00000002 00000000 0000008a c16d0808 c040a10b c16d07d8 c16d08b0
 f505fe00 c16d0780 00000000 00000000 ee153df4 c1ce4a30 c17d0e30 00000000
Call Trace:
 [<c01de3d7>] cwq_dec_nr_in_flight+0x71/0xfb
 [<c01def1d>] process_one_work+0x5d8/0x637
 [<c040a10b>] ? ext4_end_bio+0x300/0x300
 [<c01e3105>] worker_thread+0x249/0x3ef
 [<c01ea317>] kthread+0xd8/0xeb
 [<c01e2ebc>] ? manage_workers+0x4bb/0x4bb
 [<c023a370>] ? trace_hardirqs_on+0x27/0x37
 [<c0f1b4b7>] ret_from_kernel_thread+0x1b/0x28
 [<c01ea23f>] ? __init_kthread_worker+0x71/0x71
Code: 01 83 15 ac ff 6c c1 00 31 db 89 c6 8b 00 a8 04 74 12 89 c3 30 db 83 05 b0 ff 6c c1 01 83 15 b4 ff 6c c1 00 89 f0 e8 42 ff ff ff <8b> 13 89 f0 83 05 b8 ff 6c c1
 6c c1 00 31 c9 83
EIP: [<c01dda6a>] cwq_activate_delayed_work+0x3b/0x7e SS:ESP 0068:f64b7e84
CR2: 0000000000000000
---[ end trace a1923229da53d8a4 ]---

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
---
 fs/ext4/ext4.h    |  2 +-
 fs/ext4/inode.c   |  4 ++--
 fs/ext4/page-io.c | 12 +++++++++++-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 167ff564bbfa..3b83cd604796 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 extern int __init ext4_init_pageio(void);
 extern void ext4_add_complete_io(ext4_io_end_t *io_end);
 extern void ext4_exit_pageio(void);
-extern void ext4_ioend_wait(struct inode *);
+extern void ext4_ioend_shutdown(struct inode *);
 extern void ext4_free_io_end(ext4_io_end_t *io);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
 extern void ext4_end_io_work(struct work_struct *work);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 65bbc9339aca..ea5f24ffa60c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode)
 
 	trace_ext4_evict_inode(inode);
 
-	ext4_ioend_wait(inode);
-
 	if (inode->i_nlink) {
 		/*
 		 * When journalling data dirty buffers are tracked only in the
@@ -216,6 +214,7 @@ void ext4_evict_inode(struct inode *inode)
 			filemap_write_and_wait(&inode->i_data);
 		}
 		truncate_inode_pages(&inode->i_data, 0);
+		ext4_ioend_shutdown(inode);
 		goto no_delete;
 	}
 
@@ -225,6 +224,7 @@ void ext4_evict_inode(struct inode *inode)
 	if (ext4_should_order_data(inode))
 		ext4_begin_ordered_truncate(inode, 0);
 	truncate_inode_pages(&inode->i_data, 0);
+	ext4_ioend_shutdown(inode);
 
 	if (is_bad_inode(inode))
 		goto no_delete;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 809b31003ecc..047a6de04a0a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -50,11 +50,21 @@ void ext4_exit_pageio(void)
 	kmem_cache_destroy(io_page_cachep);
 }
 
-void ext4_ioend_wait(struct inode *inode)
+/*
+ * This function is called by ext4_evict_inode() to make sure there is
+ * no more pending I/O completion work left to do.
+ */
+void ext4_ioend_shutdown(struct inode *inode)
 {
 	wait_queue_head_t *wq = ext4_ioend_wq(inode);
 
 	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+	/*
+	 * We need to make sure the work structure is finished being
+	 * used before we let the inode get destroyed.
+	 */
+	if (work_pending(&EXT4_I(inode)->i_unwritten_work))
+		cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
 }
 
 static void put_io_page(struct ext4_io_page *io_page)
-- 
cgit v1.2.2


From 2b405bfa84063bfa35621d2d6879f52693c614b0 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 20 Mar 2013 09:42:11 -0400
Subject: ext4: fix data=journal fast mount/umount hang

In data=journal mode, if we unmount the file system before a
transaction has a chance to complete, when the journal inode is being
evicted, we can end up calling into jbd2_log_wait_commit() for the
last transaction, after the journalling machinery has been shut down.

Arguably we should adjust ext4_should_journal_data() to return FALSE
for the journal inode, but the only place it matters is
ext4_evict_inode(), and so to save a bit of CPU time, and to make the
patch much more obviously correct by inspection(tm), we'll fix it by
explicitly not trying to waiting for a journal commit when we are
evicting the journal inode, since it's guaranteed to never succeed in
this case.

This can be easily replicated via:

     mount -t ext4 -o data=journal /dev/vdb /vdb ; umount /vdb

------------[ cut here ]------------
WARNING: at /usr/projects/linux/ext4/fs/jbd2/journal.c:542 __jbd2_log_start_commit+0xba/0xcd()
Hardware name: Bochs
JBD2: bad log_start_commit: 3005630206 3005630206 0 0
Modules linked in:
Pid: 2909, comm: umount Not tainted 3.8.0-rc3 #1020
Call Trace:
 [<c015c0ef>] warn_slowpath_common+0x68/0x7d
 [<c02b7e7d>] ? __jbd2_log_start_commit+0xba/0xcd
 [<c015c177>] warn_slowpath_fmt+0x2b/0x2f
 [<c02b7e7d>] __jbd2_log_start_commit+0xba/0xcd
 [<c02b8075>] jbd2_log_start_commit+0x24/0x34
 [<c0279ed5>] ext4_evict_inode+0x71/0x2e3
 [<c021f0ec>] evict+0x94/0x135
 [<c021f9aa>] iput+0x10a/0x110
 [<c02b7836>] jbd2_journal_destroy+0x190/0x1ce
 [<c0175284>] ? bit_waitqueue+0x50/0x50
 [<c028d23f>] ext4_put_super+0x52/0x294
 [<c020efe3>] generic_shutdown_super+0x48/0xb4
 [<c020f071>] kill_block_super+0x22/0x60
 [<c020f3e0>] deactivate_locked_super+0x22/0x49
 [<c020f5d6>] deactivate_super+0x30/0x33
 [<c0222795>] mntput_no_expire+0x107/0x10c
 [<c02233a7>] sys_umount+0x2cf/0x2e0
 [<c02233ca>] sys_oldumount+0x12/0x14
 [<c08096b8>] syscall_call+0x7/0xb
---[ end trace 6a954cc790501c1f ]---
jbd2_log_wait_commit: error: j_commit_request=-1289337090, tid=0

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: stable@vger.kernel.org
---
 fs/ext4/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ea5f24ffa60c..85e41a2a39ad 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -205,7 +205,8 @@ void ext4_evict_inode(struct inode *inode)
 		 * don't use page cache.
 		 */
 		if (ext4_should_journal_data(inode) &&
-		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+		    inode->i_ino != EXT4_JOURNAL_INO) {
 			journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
 			tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
 
-- 
cgit v1.2.2


From a686fd141e20244ad75f80ad54706da07d7bb90a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 20 Mar 2013 15:42:00 +0100
Subject: ALSA: hda - Fix typo in checking IEC958 emphasis bit

There is a typo in convert_to_spdif_status() about checking the
emphasis IEC958 status bit.  It should check the given value instead
of the resultant value.

Reported-by: Martin Weishart <martin.weishart@telosalliance.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_codec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index a9ebcf9e3710..ecdf30eb5879 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -3144,7 +3144,7 @@ static unsigned int convert_to_spdif_status(unsigned short val)
 	if (val & AC_DIG1_PROFESSIONAL)
 		sbits |= IEC958_AES0_PROFESSIONAL;
 	if (sbits & IEC958_AES0_PROFESSIONAL) {
-		if (sbits & AC_DIG1_EMPHASIS)
+		if (val & AC_DIG1_EMPHASIS)
 			sbits |= IEC958_AES0_PRO_EMPHASIS_5015;
 	} else {
 		if (val & AC_DIG1_EMPHASIS)
-- 
cgit v1.2.2


From 699412d951e6dd4dec48db88f33dc27b361582f0 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 18 Mar 2013 10:14:47 +0200
Subject: usb: gadget: net22xx: fix ->disconnect reporting

with the latest udc_start/udc_stop conversion,
too much code was deleted which ended up creating
a regression in net2272 and net2280 drivers.

To fix the regression we revert one hunk of the
original commits.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/net2272.c | 7 +++++++
 drivers/usb/gadget/net2280.c | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c
index d226058e3b88..17628337c6b0 100644
--- a/drivers/usb/gadget/net2272.c
+++ b/drivers/usb/gadget/net2272.c
@@ -1495,6 +1495,13 @@ stop_activity(struct net2272 *dev, struct usb_gadget_driver *driver)
 	for (i = 0; i < 4; ++i)
 		net2272_dequeue_all(&dev->ep[i]);
 
+	/* report disconnect; the driver is already quiesced */
+	if (driver) {
+		spin_unlock(&dev->lock);
+		driver->disconnect(&dev->gadget);
+		spin_lock(&dev->lock);
+	}
+
 	net2272_usb_reinit(dev);
 }
 
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index a1b650e11339..3105a4d601c8 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1946,6 +1946,13 @@ stop_activity (struct net2280 *dev, struct usb_gadget_driver *driver)
 	for (i = 0; i < 7; i++)
 		nuke (&dev->ep [i]);
 
+	/* report disconnect; the driver is already quiesced */
+	if (driver) {
+		spin_unlock(&dev->lock);
+		driver->disconnect(&dev->gadget);
+		spin_lock(&dev->lock);
+	}
+
 	usb_reinit (dev);
 }
 
-- 
cgit v1.2.2


From 511f3c5326eabe1ece35202a404c24c0aeacc246 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 15 Mar 2013 14:02:14 -0400
Subject: usb: gadget: udc-core: fix a regression during gadget driver
 unbinding

This patch (as1666) fixes a regression in the UDC core.  The core
takes care of unbinding gadget drivers, and it does the unbinding
before telling the UDC driver to turn off the controller hardware.
When the call to the udc_stop callback is made, the gadget no longer
has a driver.  The callback routine should not be invoked with a
pointer to the old driver; doing so can cause problems (such as
use-after-free accesses in net2280).

This patch should be applied, with appropriate context changes, to all
the stable kernels going back to 3.1.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: <stable@vger.kernel.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/udc-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index 2a9cd369f71c..f8f62c3ed65e 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -216,7 +216,7 @@ static void usb_gadget_remove_driver(struct usb_udc *udc)
 	usb_gadget_disconnect(udc->gadget);
 	udc->driver->disconnect(udc->gadget);
 	udc->driver->unbind(udc->gadget);
-	usb_gadget_udc_stop(udc->gadget, udc->driver);
+	usb_gadget_udc_stop(udc->gadget, NULL);
 
 	udc->driver = NULL;
 	udc->dev.driver = NULL;
-- 
cgit v1.2.2


From 8119b55aed818e590c26cb97706c914e3d660fd8 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 15 Mar 2013 14:03:17 -0400
Subject: USB: gadget: net2280: remove leftover driver->unbind call in error
 pathway

This patch (as1667) removes an incorrect driver->unbind() call from
the net2280 driver.  If startup fails, the UDC core takes care of
unbinding the gadget driver automatically; the controller driver
shouldn't do it too.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/net2280.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index 3105a4d601c8..3bd0f992fb49 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1924,7 +1924,6 @@ static int net2280_start(struct usb_gadget *_gadget,
 err_func:
 	device_remove_file (&dev->pdev->dev, &dev_attr_function);
 err_unbind:
-	driver->unbind (&dev->gadget);
 	dev->gadget.dev.driver = NULL;
 	dev->driver = NULL;
 	return retval;
-- 
cgit v1.2.2


From 3416905ba058e43112ad7b1b4859797f027f5a07 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Mon, 11 Mar 2013 16:32:14 +0100
Subject: usb: gadget: ffs: fix enable multiple instances

This patch fixes an "off-by-one" bug found in
581791f (FunctionFS: enable multiple functions).

During gfs_bind/gfs_unbind the functionfs_bind/functionfs_unbind should be
called for every functionfs instance. With the "i" pre-decremented they
were not called for the zeroth instance.

Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: <stable@vger.kernel.org>

[ balbi@ti.com : added offending commit's subject ]

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/g_ffs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/g_ffs.c b/drivers/usb/gadget/g_ffs.c
index 3953dd4d7186..3b343b23e4b0 100644
--- a/drivers/usb/gadget/g_ffs.c
+++ b/drivers/usb/gadget/g_ffs.c
@@ -357,7 +357,7 @@ static int gfs_bind(struct usb_composite_dev *cdev)
 		goto error;
 	gfs_dev_desc.iProduct = gfs_strings[USB_GADGET_PRODUCT_IDX].id;
 
-	for (i = func_num; --i; ) {
+	for (i = func_num; i--; ) {
 		ret = functionfs_bind(ffs_tab[i].ffs_data, cdev);
 		if (unlikely(ret < 0)) {
 			while (++i < func_num)
@@ -413,7 +413,7 @@ static int gfs_unbind(struct usb_composite_dev *cdev)
 		gether_cleanup();
 	gfs_ether_setup = false;
 
-	for (i = func_num; --i; )
+	for (i = func_num; i--; )
 		if (ffs_tab[i].ffs_data)
 			functionfs_unbind(ffs_tab[i].ffs_data);
 
-- 
cgit v1.2.2


From 7fa4cd1a78ea5af688ffce45553abbee9d7afd84 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Wed, 20 Mar 2013 10:35:44 -0300
Subject: usb: ulpi: Define a *otg_ulpi_create no-op

Building a kernel for imx_v4_v5_defconfig with CONFIG_USB_ULPI disabled, results
in the following error:

arch/arm/mach-imx/built-in.o: In function 'pca100_init':
platform-mx2-emma.c:(.init.text+0x6788): undefined reference to 'otg_ulpi_create'
platform-mx2-emma.c:(.init.text+0x682c): undefined reference to 'mxc_ulpi_access_ops'

Fix this by providing a no-op definition of *otg_ulpi_create for the case when
CONFIG_USB_ULPI is not defined.

Acked-by: Igor Grinberg <grinberg@compulab.co.il>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/ulpi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h
index 6f033a415ecb..5c295c26ad37 100644
--- a/include/linux/usb/ulpi.h
+++ b/include/linux/usb/ulpi.h
@@ -181,8 +181,16 @@
 
 /*-------------------------------------------------------------------------*/
 
+#if IS_ENABLED(CONFIG_USB_ULPI)
 struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops,
 					unsigned int flags);
+#else
+static inline struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops,
+					      unsigned int flags)
+{
+	return NULL;
+}
+#endif
 
 #ifdef CONFIG_USB_ULPI_VIEWPORT
 /* access ops for controllers with a viewport register */
-- 
cgit v1.2.2


From 967baed40eaaf6df632b7e929b903140a9744b87 Mon Sep 17 00:00:00 2001
From: Truls Bengtsson <truls.bengtsson@sonymobile.com>
Date: Wed, 20 Mar 2013 14:02:25 +0100
Subject: usb: gadget: f_rndis: Avoid to use ERROR macro if cdev can be null

The udc_irq service runs the isr_tr_complete_handler which in turn
"nukes" the endpoints, including a call to rndis_response_complete,
if appropriate. If the rndis_msg_parser fails here, an error will
be printed using a dev_err call (through the ERROR() macro).

However, if the usb cable was just disconnected the device (cdev)
might not be available and will be null. Since the dev_err macro will
dereference the cdev pointer we get a null pointer exception.

Reviewed-by: Radovan Lekanovic <radovan.lekanovic@sonymobile.com>
Signed-off-by: Truls Bengtsson <truls.bengtsson@sonymobile.com>
Signed-off-by: Oskar Andero <oskar.andero@sonymobile.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/f_rndis.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 71beeb833558..cc9c49c57c80 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -447,14 +447,13 @@ static void rndis_response_complete(struct usb_ep *ep, struct usb_request *req)
 static void rndis_command_complete(struct usb_ep *ep, struct usb_request *req)
 {
 	struct f_rndis			*rndis = req->context;
-	struct usb_composite_dev	*cdev = rndis->port.func.config->cdev;
 	int				status;
 
 	/* received RNDIS command from USB_CDC_SEND_ENCAPSULATED_COMMAND */
 //	spin_lock(&dev->lock);
 	status = rndis_msg_parser(rndis->config, (u8 *) req->buf);
 	if (status < 0)
-		ERROR(cdev, "RNDIS command error %d, %d/%d\n",
+		pr_err("RNDIS command error %d, %d/%d\n",
 			status, req->actual, req->length);
 //	spin_unlock(&dev->lock);
 }
-- 
cgit v1.2.2


From f1e79e208076ffe7bad97158275f1c572c04f5c7 Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Tue, 19 Mar 2013 01:47:27 +0000
Subject: genetlink: trigger BUG_ON if a group name is too long

Trigger BUG_ON if a group name is longer than GENL_NAMSIZ.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/genetlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f2aabb6f4105..5a55be3f17a5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -142,6 +142,7 @@ int genl_register_mc_group(struct genl_family *family,
 	int err = 0;
 
 	BUG_ON(grp->name[0] == '\0');
+	BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL);
 
 	genl_lock();
 
-- 
cgit v1.2.2


From 44046a593eb770dbecdabf1c82bcd252f2a8337b Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:12 +0000
Subject: udp: add encap_destroy callback

Users of udp encapsulation currently have an encap_rcv callback which they can
use to hook into the udp receive path.

In situations where a encapsulation user allocates resources associated with a
udp encap socket, it may be convenient to be able to also hook the proto
.destroy operation.  For example, if an encap user holds a reference to the
udp socket, the destroy hook might be used to relinquish this reference.

This patch adds a socket destroy hook into udp, which is set and enabled
in the same way as the existing encap_rcv hook.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h | 1 +
 net/ipv4/udp.c      | 7 +++++++
 net/ipv6/udp.c      | 8 ++++++++
 3 files changed, 16 insertions(+)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 9d81de123c90..42278bbf7a88 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -68,6 +68,7 @@ struct udp_sock {
 	 * For encapsulation sockets.
 	 */
 	int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
+	void (*encap_destroy)(struct sock *sk);
 };
 
 static inline struct udp_sock *udp_sk(const struct sock *sk)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 265c42cf963c..0a073a263720 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1762,9 +1762,16 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
+	struct udp_sock *up = udp_sk(sk);
 	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
 	unlock_sock_fast(sk, slow);
+	if (static_key_false(&udp_encap_needed) && up->encap_type) {
+		void (*encap_destroy)(struct sock *sk);
+		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		if (encap_destroy)
+			encap_destroy(sk);
+	}
 }
 
 /*
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 599e1ba6d1ce..d8e5e852fc7a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1285,10 +1285,18 @@ do_confirm:
 
 void udpv6_destroy_sock(struct sock *sk)
 {
+	struct udp_sock *up = udp_sk(sk);
 	lock_sock(sk);
 	udp_v6_flush_pending_frames(sk);
 	release_sock(sk);
 
+	if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+		void (*encap_destroy)(struct sock *sk);
+		encap_destroy = ACCESS_ONCE(up->encap_destroy);
+		if (encap_destroy)
+			encap_destroy(sk);
+	}
+
 	inet6_destroy_sock(sk);
 }
 
-- 
cgit v1.2.2


From 9980d001cec86c3c75f3a6008ddb73c397ea3b3e Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:13 +0000
Subject: l2tp: add udp encap socket destroy handler

L2TP sessions hold a reference to the tunnel socket to prevent it going away
while sessions are still active.  However, since tunnel destruction is handled
by the sock sk_destruct callback there is a catch-22: a tunnel with sessions
cannot be deleted since each session holds a reference to the tunnel socket.
If userspace closes a managed tunnel socket, or dies, the tunnel will persist
and it will be neccessary to individually delete the sessions using netlink
commands.  This is ugly.

To prevent this occuring, this patch leverages the udp encapsulation socket
destroy callback to gain early notification when the tunnel socket is closed.
This allows us to safely close the sessions running in the tunnel, dropping
the tunnel socket references in the process.  The tunnel socket is then
destroyed as normal, and the tunnel resources deallocated in sk_destruct.

While we're at it, ensure that l2tp_tunnel_closeall correctly drops session
references to allow the sessions to be deleted rather than leaking.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index d36875f3427e..ee726a752292 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1282,6 +1282,7 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 		/* No longer an encapsulation socket. See net/ipv4/udp.c */
 		(udp_sk(sk))->encap_type = 0;
 		(udp_sk(sk))->encap_rcv = NULL;
+		(udp_sk(sk))->encap_destroy = NULL;
 		break;
 	case L2TP_ENCAPTYPE_IP:
 		break;
@@ -1360,6 +1361,8 @@ again:
 			if (session->deref != NULL)
 				(*session->deref)(session);
 
+			l2tp_session_dec_refcount(session);
+
 			write_lock_bh(&tunnel->hlist_lock);
 
 			/* Now restart from the beginning of this hash
@@ -1373,6 +1376,16 @@ again:
 	write_unlock_bh(&tunnel->hlist_lock);
 }
 
+/* Tunnel socket destroy hook for UDP encapsulation */
+static void l2tp_udp_encap_destroy(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+}
+
 /* Really kill the tunnel.
  * Come here only when all sessions have been cleared from the tunnel.
  */
@@ -1668,6 +1681,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
 		udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
 		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+		udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
 #if IS_ENABLED(CONFIG_IPV6)
 		if (sk->sk_family == PF_INET6)
 			udpv6_encap_enable();
-- 
cgit v1.2.2


From e34f4c7050e5471b6d4fb25380713937fc837514 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:14 +0000
Subject: l2tp: export l2tp_tunnel_closeall

l2tp_core internally uses l2tp_tunnel_closeall to close all sessions in a
tunnel when a UDP-encapsulation socket is destroyed.  We need to do something
similar for IP-encapsulation sockets.

Export l2tp_tunnel_closeall as a GPL symbol to enable l2tp_ip and l2tp_ip6 to
call it from their .destroy handlers.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 4 ++--
 net/l2tp/l2tp_core.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ee726a752292..287e327342d1 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -114,7 +114,6 @@ struct l2tp_net {
 
 static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
-static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 
 static inline struct l2tp_net *l2tp_pernet(struct net *net)
 {
@@ -1312,7 +1311,7 @@ end:
 
 /* When the tunnel is closed, all the attached sessions need to go too.
  */
-static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
 {
 	int hash;
 	struct hlist_node *walk;
@@ -1375,6 +1374,7 @@ again:
 	}
 	write_unlock_bh(&tunnel->hlist_lock);
 }
+EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
 
 /* Tunnel socket destroy hook for UDP encapsulation */
 static void l2tp_udp_encap_destroy(struct sock *sk)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 8eb8f1d47f3a..b0861f68a10b 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -240,6 +240,7 @@ extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
 
 extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
 extern int l2tp_session_delete(struct l2tp_session *session);
-- 
cgit v1.2.2


From 936063175afd895913a5e9db77e1a0ef43ea44ea Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:15 +0000
Subject: l2tp: close sessions in ip socket destroy callback

l2tp_core hooks UDP's .destroy handler to gain advance warning of a tunnel
socket being closed from userspace.  We need to do the same thing for
IP-encapsulation sockets.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 6 ++++++
 net/l2tp/l2tp_ip6.c | 7 +++++++
 2 files changed, 13 insertions(+)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 7f41b7051269..571db8dd2292 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -228,10 +228,16 @@ static void l2tp_ip_close(struct sock *sk, long timeout)
 static void l2tp_ip_destroy_sock(struct sock *sk)
 {
 	struct sk_buff *skb;
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
 
 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
 		kfree_skb(skb);
 
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+
 	sk_refcnt_debug_dec(sk);
 }
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 41f2f8126ebc..c74f5a91ff6a 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -241,10 +241,17 @@ static void l2tp_ip6_close(struct sock *sk, long timeout)
 
 static void l2tp_ip6_destroy_sock(struct sock *sk)
 {
+	struct l2tp_tunnel *tunnel = l2tp_sock_to_tunnel(sk);
+
 	lock_sock(sk);
 	ip6_flush_pending_frames(sk);
 	release_sock(sk);
 
+	if (tunnel) {
+		l2tp_tunnel_closeall(tunnel);
+		sock_put(sk);
+	}
+
 	inet6_destroy_sock(sk);
 }
 
-- 
cgit v1.2.2


From 2b551c6e7d5bca2c78c216b15ef675653d4f459a Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:16 +0000
Subject: l2tp: close sessions before initiating tunnel delete

When a user deletes a tunnel using netlink, all the sessions in the tunnel
should also be deleted.  Since running sessions will pin the tunnel socket
with the references they hold, have the l2tp_tunnel_delete close all sessions
in a tunnel before finally closing the tunnel socket.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 287e327342d1..0dd50c079f29 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1737,6 +1737,7 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
  */
 int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
+	l2tp_tunnel_closeall(tunnel);
 	return (false == queue_work(l2tp_wq, &tunnel->del_work));
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
-- 
cgit v1.2.2


From 8abbbe8ff572fd84d1b98eb9acf30611a97cf72e Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:17 +0000
Subject: l2tp: take a reference for kernel sockets in l2tp_tunnel_sock_lookup

When looking up the tunnel socket in struct l2tp_tunnel, hold a reference
whether the socket was created by the kernel or by userspace.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 0dd50c079f29..45373fee38c5 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -191,6 +191,7 @@ struct sock *l2tp_tunnel_sock_lookup(struct l2tp_tunnel *tunnel)
 	} else {
 		/* Socket is owned by kernelspace */
 		sk = tunnel->sock;
+		sock_hold(sk);
 	}
 
 out:
@@ -209,6 +210,7 @@ void l2tp_tunnel_sock_put(struct sock *sk)
 		}
 		sock_put(sk);
 	}
+	sock_put(sk);
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_sock_put);
 
-- 
cgit v1.2.2


From 02d13ed5f94af38c37d1abd53462fe48d78bcc9d Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:18 +0000
Subject: l2tp: don't BUG_ON sk_socket being NULL

It is valid for an existing struct sock object to have a NULL sk_socket
pointer, so don't BUG_ON in l2tp_tunnel_del_work if that should occur.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 45373fee38c5..e841ef2a68a5 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1412,19 +1412,21 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
 		return;
 
 	sock = sk->sk_socket;
-	BUG_ON(!sock);
 
-	/* If the tunnel socket was created directly by the kernel, use the
-	 * sk_* API to release the socket now.  Otherwise go through the
-	 * inet_* layer to shut the socket down, and let userspace close it.
+	/* If the tunnel socket was created by userspace, then go through the
+	 * inet layer to shut the socket down, and let userspace close it.
+	 * Otherwise, if we created the socket directly within the kernel, use
+	 * the sk API to release it here.
 	 * In either case the tunnel resources are freed in the socket
 	 * destructor when the tunnel socket goes away.
 	 */
-	if (sock->file == NULL) {
-		kernel_sock_shutdown(sock, SHUT_RDWR);
-		sk_release_kernel(sk);
+	if (tunnel->fd >= 0) {
+		if (sock)
+			inet_shutdown(sock, 2);
 	} else {
-		inet_shutdown(sock, 2);
+		if (sock)
+			kernel_sock_shutdown(sock, SHUT_RDWR);
+		sk_release_kernel(sk);
 	}
 
 	l2tp_tunnel_sock_put(sk);
-- 
cgit v1.2.2


From 48f72f92b31431c40279b0fba6c5588e07e67d95 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:19 +0000
Subject: l2tp: add session reorder queue purge function to core

If an l2tp session is deleted, it is necessary to delete skbs in-flight
on the session's reorder queue before taking it down.

Rather than having each pseudowire implementation reaching into the
l2tp_session struct to handle this itself, provide a function in l2tp_core to
purge the session queue.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 17 +++++++++++++++++
 net/l2tp/l2tp_core.h |  1 +
 2 files changed, 18 insertions(+)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e841ef2a68a5..69c316dd02dc 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -829,6 +829,23 @@ discard:
 }
 EXPORT_SYMBOL(l2tp_recv_common);
 
+/* Drop skbs from the session's reorder_q
+ */
+int l2tp_session_queue_purge(struct l2tp_session *session)
+{
+	struct sk_buff *skb = NULL;
+	BUG_ON(!session);
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+	while ((skb = skb_dequeue(&session->reorder_q))) {
+		atomic_long_inc(&session->stats.rx_errors);
+		kfree_skb(skb);
+		if (session->deref)
+			(*session->deref)(session);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_queue_purge);
+
 /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
  * here. The skb is not on a list when we get here.
  * Returns 0 if the packet was a data packet and was successfully passed on.
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index b0861f68a10b..d40713d105fc 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -246,6 +246,7 @@ extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunne
 extern int l2tp_session_delete(struct l2tp_session *session);
 extern void l2tp_session_free(struct l2tp_session *session);
 extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
+extern int l2tp_session_queue_purge(struct l2tp_session *session);
 extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
 
 extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
-- 
cgit v1.2.2


From 4c6e2fd35460208596fa099ee0750a4b0438aa5c Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:20 +0000
Subject: l2tp: purge session reorder queue on delete

Add calls to l2tp_session_queue_purge as a part of l2tp_tunnel_closeall
and l2tp_session_delete.  Pseudowire implementations which are deleted only
via. l2tp_core l2tp_session_delete calls can dispense with their own code for
flushing the reorder queue.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 69c316dd02dc..c00f31b8cc04 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1373,6 +1373,8 @@ again:
 				synchronize_rcu();
 			}
 
+			l2tp_session_queue_purge(session);
+
 			if (session->session_close != NULL)
 				(*session->session_close)(session);
 
@@ -1813,6 +1815,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_free);
  */
 int l2tp_session_delete(struct l2tp_session *session)
 {
+	l2tp_session_queue_purge(session);
+
 	if (session->session_close != NULL)
 		(*session->session_close)(session);
 
-- 
cgit v1.2.2


From cf2f5c886a209377daefd5d2ba0bcd49c3887813 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:21 +0000
Subject: l2tp: push all ppp pseudowire shutdown through .release handler

If userspace deletes a ppp pseudowire using the netlink API, either by
directly deleting the session or by deleting the tunnel that contains the
session, we need to tear down the corresponding pppox channel.

Rather than trying to manage two pppox unbind codepaths, switch the netlink
and l2tp_core session_close handlers to close via. the l2tp_ppp socket
.release handler.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 53 ++++++++++-------------------------------------------
 1 file changed, 10 insertions(+), 43 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 6a53371dba1f..7e3e16aefcb5 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -97,6 +97,7 @@
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/xfrm.h>
+#include <net/inet_common.h>
 
 #include <asm/byteorder.h>
 #include <linux/atomic.h>
@@ -447,34 +448,16 @@ static void pppol2tp_session_close(struct l2tp_session *session)
 {
 	struct pppol2tp_session *ps = l2tp_session_priv(session);
 	struct sock *sk = ps->sock;
-	struct sk_buff *skb;
+	struct socket *sock = sk->sk_socket;
 
 	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
-	if (session->session_id == 0)
-		goto out;
-
-	if (sk != NULL) {
-		lock_sock(sk);
-
-		if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
-			pppox_unbind_sock(sk);
-			sk->sk_state = PPPOX_DEAD;
-			sk->sk_state_change(sk);
-		}
-
-		/* Purge any queued data */
-		skb_queue_purge(&sk->sk_receive_queue);
-		skb_queue_purge(&sk->sk_write_queue);
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
 
-		release_sock(sk);
+	if (sock) {
+		inet_shutdown(sock, 2);
+		/* Don't let the session go away before our socket does */
+		l2tp_session_inc_refcount(session);
 	}
-
-out:
 	return;
 }
 
@@ -525,16 +508,12 @@ static int pppol2tp_release(struct socket *sock)
 	session = pppol2tp_sock_to_session(sk);
 
 	/* Purge any queued data */
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
 	if (session != NULL) {
-		struct sk_buff *skb;
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
+		l2tp_session_queue_purge(session);
 		sock_put(sk);
 	}
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
 
 	release_sock(sk);
 
@@ -880,18 +859,6 @@ out:
 	return error;
 }
 
-/* Called when deleting sessions via the netlink interface.
- */
-static int pppol2tp_session_delete(struct l2tp_session *session)
-{
-	struct pppol2tp_session *ps = l2tp_session_priv(session);
-
-	if (ps->sock == NULL)
-		l2tp_session_dec_refcount(session);
-
-	return 0;
-}
-
 #endif /* CONFIG_L2TP_V3 */
 
 /* getname() support.
@@ -1839,7 +1806,7 @@ static const struct pppox_proto pppol2tp_proto = {
 
 static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
 	.session_create	= pppol2tp_session_create,
-	.session_delete	= pppol2tp_session_delete,
+	.session_delete	= l2tp_session_delete,
 };
 
 #endif /* CONFIG_L2TP_V3 */
-- 
cgit v1.2.2


From 7b7c0719cd7afee725b920d75ec6a500b76107e6 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:22 +0000
Subject: l2tp: avoid deadlock in l2tp stats update

l2tp's u64_stats writers were incorrectly synchronised, making it possible to
deadlock a 64bit machine running a 32bit kernel simply by sending the l2tp
code netlink commands while passing data through l2tp sessions.

Previous discussion on netdev determined that alternative solutions such as
spinlock writer synchronisation or per-cpu data would bring unjustified
overhead, given that most users interested in high volume traffic will likely
be running 64bit kernels on 64bit hardware.

As such, this patch replaces l2tp's use of u64_stats with atomic_long_t,
thereby avoiding the deadlock.

Ref:
http://marc.info/?l=linux-netdev&m=134029167910731&w=2
http://marc.info/?l=linux-netdev&m=134079868111131&w=2

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c    | 75 +++++++++++++------------------------------------
 net/l2tp/l2tp_core.h    | 19 ++++++-------
 net/l2tp/l2tp_debugfs.c | 28 +++++++++---------
 net/l2tp/l2tp_netlink.c | 72 ++++++++++++++++++-----------------------------
 net/l2tp/l2tp_ppp.c     | 46 +++++++++++++++---------------
 5 files changed, 93 insertions(+), 147 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index c00f31b8cc04..97d30ac67c88 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -374,10 +374,8 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 	struct sk_buff *skbp;
 	struct sk_buff *tmp;
 	u32 ns = L2TP_SKB_CB(skb)->ns;
-	struct l2tp_stats *sstats;
 
 	spin_lock_bh(&session->reorder_q.lock);
-	sstats = &session->stats;
 	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
 		if (L2TP_SKB_CB(skbp)->ns > ns) {
 			__skb_queue_before(&session->reorder_q, skbp, skb);
@@ -385,9 +383,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 				 "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
 				 session->name, ns, L2TP_SKB_CB(skbp)->ns,
 				 skb_queue_len(&session->reorder_q));
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_oos_packets++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_oos_packets);
 			goto out;
 		}
 	}
@@ -404,23 +400,16 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
 {
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	int length = L2TP_SKB_CB(skb)->length;
-	struct l2tp_stats *tstats, *sstats;
 
 	/* We're about to requeue the skb, so return resources
 	 * to its current owner (a socket receive buffer).
 	 */
 	skb_orphan(skb);
 
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	sstats = &session->stats;
-	u64_stats_update_begin(&sstats->syncp);
-	tstats->rx_packets++;
-	tstats->rx_bytes += length;
-	sstats->rx_packets++;
-	sstats->rx_bytes += length;
-	u64_stats_update_end(&tstats->syncp);
-	u64_stats_update_end(&sstats->syncp);
+	atomic_long_inc(&tunnel->stats.rx_packets);
+	atomic_long_add(length, &tunnel->stats.rx_bytes);
+	atomic_long_inc(&session->stats.rx_packets);
+	atomic_long_add(length, &session->stats.rx_bytes);
 
 	if (L2TP_SKB_CB(skb)->has_seq) {
 		/* Bump our Nr */
@@ -451,7 +440,6 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 {
 	struct sk_buff *skb;
 	struct sk_buff *tmp;
-	struct l2tp_stats *sstats;
 
 	/* If the pkt at the head of the queue has the nr that we
 	 * expect to send up next, dequeue it and any other
@@ -459,13 +447,10 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 	 */
 start:
 	spin_lock_bh(&session->reorder_q.lock);
-	sstats = &session->stats;
 	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
 		if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			sstats->rx_errors++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
+			atomic_long_inc(&session->stats.rx_errors);
 			l2tp_dbg(session, L2TP_MSG_SEQ,
 				 "%s: oos pkt %u len %d discarded (too old), waiting for %u, reorder_q_len=%d\n",
 				 session->name, L2TP_SKB_CB(skb)->ns,
@@ -624,7 +609,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	int offset;
 	u32 ns, nr;
-	struct l2tp_stats *sstats = &session->stats;
 
 	/* The ref count is increased since we now hold a pointer to
 	 * the session. Take care to decrement the refcnt when exiting
@@ -641,9 +625,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 				  "%s: cookie mismatch (%u/%u). Discarding.\n",
 				  tunnel->name, tunnel->tunnel_id,
 				  session->session_id);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_cookie_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_cookie_discards);
 			goto discard;
 		}
 		ptr += session->peer_cookie_len;
@@ -712,9 +694,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			l2tp_warn(session, L2TP_MSG_SEQ,
 				  "%s: recv data has no seq numbers when required. Discarding.\n",
 				  session->name);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
 			goto discard;
 		}
 
@@ -733,9 +713,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			l2tp_warn(session, L2TP_MSG_SEQ,
 				  "%s: recv data has no seq numbers when required. Discarding.\n",
 				  session->name);
-			u64_stats_update_begin(&sstats->syncp);
-			sstats->rx_seq_discards++;
-			u64_stats_update_end(&sstats->syncp);
+			atomic_long_inc(&session->stats.rx_seq_discards);
 			goto discard;
 		}
 	}
@@ -789,9 +767,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			 * packets
 			 */
 			if (L2TP_SKB_CB(skb)->ns != session->nr) {
-				u64_stats_update_begin(&sstats->syncp);
-				sstats->rx_seq_discards++;
-				u64_stats_update_end(&sstats->syncp);
+				atomic_long_inc(&session->stats.rx_seq_discards);
 				l2tp_dbg(session, L2TP_MSG_SEQ,
 					 "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n",
 					 session->name, L2TP_SKB_CB(skb)->ns,
@@ -817,9 +793,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	return;
 
 discard:
-	u64_stats_update_begin(&sstats->syncp);
-	sstats->rx_errors++;
-	u64_stats_update_end(&sstats->syncp);
+	atomic_long_inc(&session->stats.rx_errors);
 	kfree_skb(skb);
 
 	if (session->deref)
@@ -861,7 +835,6 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	u32 tunnel_id, session_id;
 	u16 version;
 	int length;
-	struct l2tp_stats *tstats;
 
 	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
 		goto discard_bad_csum;
@@ -950,10 +923,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 discard_bad_csum:
 	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
 	UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	tstats->rx_errors++;
-	u64_stats_update_end(&tstats->syncp);
+	atomic_long_inc(&tunnel->stats.rx_errors);
 	kfree_skb(skb);
 
 	return 0;
@@ -1080,7 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	unsigned int len = skb->len;
 	int error;
-	struct l2tp_stats *tstats, *sstats;
 
 	/* Debug */
 	if (session->send_seq)
@@ -1109,21 +1078,15 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 		error = ip_queue_xmit(skb, fl);
 
 	/* Update stats */
-	tstats = &tunnel->stats;
-	u64_stats_update_begin(&tstats->syncp);
-	sstats = &session->stats;
-	u64_stats_update_begin(&sstats->syncp);
 	if (error >= 0) {
-		tstats->tx_packets++;
-		tstats->tx_bytes += len;
-		sstats->tx_packets++;
-		sstats->tx_bytes += len;
+		atomic_long_inc(&tunnel->stats.tx_packets);
+		atomic_long_add(len, &tunnel->stats.tx_bytes);
+		atomic_long_inc(&session->stats.tx_packets);
+		atomic_long_add(len, &session->stats.tx_bytes);
 	} else {
-		tstats->tx_errors++;
-		sstats->tx_errors++;
+		atomic_long_inc(&tunnel->stats.tx_errors);
+		atomic_long_inc(&session->stats.tx_errors);
 	}
-	u64_stats_update_end(&tstats->syncp);
-	u64_stats_update_end(&sstats->syncp);
 
 	return 0;
 }
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index d40713d105fc..519b013f8b31 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -36,16 +36,15 @@ enum {
 struct sk_buff;
 
 struct l2tp_stats {
-	u64			tx_packets;
-	u64			tx_bytes;
-	u64			tx_errors;
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			rx_seq_discards;
-	u64			rx_oos_packets;
-	u64			rx_errors;
-	u64			rx_cookie_discards;
-	struct u64_stats_sync	syncp;
+	atomic_long_t		tx_packets;
+	atomic_long_t		tx_bytes;
+	atomic_long_t		tx_errors;
+	atomic_long_t		rx_packets;
+	atomic_long_t		rx_bytes;
+	atomic_long_t		rx_seq_discards;
+	atomic_long_t		rx_oos_packets;
+	atomic_long_t		rx_errors;
+	atomic_long_t		rx_cookie_discards;
 };
 
 struct l2tp_tunnel;
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index c3813bc84552..072d7202e182 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -146,14 +146,14 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
 		   atomic_read(&tunnel->ref_count));
 
-	seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+	seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n",
 		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
+		   atomic_long_read(&tunnel->stats.tx_packets),
+		   atomic_long_read(&tunnel->stats.tx_bytes),
+		   atomic_long_read(&tunnel->stats.tx_errors),
+		   atomic_long_read(&tunnel->stats.rx_packets),
+		   atomic_long_read(&tunnel->stats.rx_bytes),
+		   atomic_long_read(&tunnel->stats.rx_errors));
 
 	if (tunnel->show != NULL)
 		tunnel->show(m, tunnel);
@@ -203,14 +203,14 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 		seq_printf(m, "\n");
 	}
 
-	seq_printf(m, "   %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+	seq_printf(m, "   %hu/%hu tx %ld/%ld/%ld rx %ld/%ld/%ld\n",
 		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
+		   atomic_long_read(&session->stats.tx_packets),
+		   atomic_long_read(&session->stats.tx_bytes),
+		   atomic_long_read(&session->stats.tx_errors),
+		   atomic_long_read(&session->stats.rx_packets),
+		   atomic_long_read(&session->stats.rx_bytes),
+		   atomic_long_read(&session->stats.rx_errors));
 
 	if (session->show != NULL)
 		session->show(m, session);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index c1bab22db85e..0825ff26e113 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -246,8 +246,6 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 #if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6_pinfo *np = NULL;
 #endif
-	struct l2tp_stats stats;
-	unsigned int start;
 
 	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags,
 			  L2TP_CMD_TUNNEL_GET);
@@ -265,28 +263,22 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	do {
-		start = u64_stats_fetch_begin(&tunnel->stats.syncp);
-		stats.tx_packets = tunnel->stats.tx_packets;
-		stats.tx_bytes = tunnel->stats.tx_bytes;
-		stats.tx_errors = tunnel->stats.tx_errors;
-		stats.rx_packets = tunnel->stats.rx_packets;
-		stats.rx_bytes = tunnel->stats.rx_bytes;
-		stats.rx_errors = tunnel->stats.rx_errors;
-		stats.rx_seq_discards = tunnel->stats.rx_seq_discards;
-		stats.rx_oos_packets = tunnel->stats.rx_oos_packets;
-	} while (u64_stats_fetch_retry(&tunnel->stats.syncp, start));
-
-	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) ||
+	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS,
+		    atomic_long_read(&tunnel->stats.tx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES,
+		    atomic_long_read(&tunnel->stats.tx_bytes)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS,
+		    atomic_long_read(&tunnel->stats.tx_errors)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS,
+		    atomic_long_read(&tunnel->stats.rx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES,
+		    atomic_long_read(&tunnel->stats.rx_bytes)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
-			stats.rx_seq_discards) ||
+		    atomic_long_read(&tunnel->stats.rx_seq_discards)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS,
-			stats.rx_oos_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors))
+		    atomic_long_read(&tunnel->stats.rx_oos_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS,
+		    atomic_long_read(&tunnel->stats.rx_errors)))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
@@ -612,8 +604,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	struct nlattr *nest;
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	struct sock *sk = NULL;
-	struct l2tp_stats stats;
-	unsigned int start;
 
 	sk = tunnel->sock;
 
@@ -656,28 +646,22 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	do {
-		start = u64_stats_fetch_begin(&session->stats.syncp);
-		stats.tx_packets = session->stats.tx_packets;
-		stats.tx_bytes = session->stats.tx_bytes;
-		stats.tx_errors = session->stats.tx_errors;
-		stats.rx_packets = session->stats.rx_packets;
-		stats.rx_bytes = session->stats.rx_bytes;
-		stats.rx_errors = session->stats.rx_errors;
-		stats.rx_seq_discards = session->stats.rx_seq_discards;
-		stats.rx_oos_packets = session->stats.rx_oos_packets;
-	} while (u64_stats_fetch_retry(&session->stats.syncp, start));
-
-	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS, stats.tx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES, stats.tx_bytes) ||
-	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS, stats.tx_errors) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS, stats.rx_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES, stats.rx_bytes) ||
+	if (nla_put_u64(skb, L2TP_ATTR_TX_PACKETS,
+		atomic_long_read(&session->stats.tx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_BYTES,
+		atomic_long_read(&session->stats.tx_bytes)) ||
+	    nla_put_u64(skb, L2TP_ATTR_TX_ERRORS,
+		atomic_long_read(&session->stats.tx_errors)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_PACKETS,
+		atomic_long_read(&session->stats.rx_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_BYTES,
+		atomic_long_read(&session->stats.rx_bytes)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_SEQ_DISCARDS,
-			stats.rx_seq_discards) ||
+		atomic_long_read(&session->stats.rx_seq_discards)) ||
 	    nla_put_u64(skb, L2TP_ATTR_RX_OOS_PACKETS,
-			stats.rx_oos_packets) ||
-	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS, stats.rx_errors))
+		atomic_long_read(&session->stats.rx_oos_packets)) ||
+	    nla_put_u64(skb, L2TP_ATTR_RX_ERRORS,
+		atomic_long_read(&session->stats.rx_errors)))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 7e3e16aefcb5..9d0eb8c13530 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -260,7 +260,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
 			  session->name);
 
 		/* Not bound. Nothing we can do, so discard. */
-		session->stats.rx_errors++;
+		atomic_long_inc(&session->stats.rx_errors);
 		kfree_skb(skb);
 	}
 
@@ -992,14 +992,14 @@ end:
 static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
 				struct l2tp_stats *stats)
 {
-	dest->tx_packets = stats->tx_packets;
-	dest->tx_bytes = stats->tx_bytes;
-	dest->tx_errors = stats->tx_errors;
-	dest->rx_packets = stats->rx_packets;
-	dest->rx_bytes = stats->rx_bytes;
-	dest->rx_seq_discards = stats->rx_seq_discards;
-	dest->rx_oos_packets = stats->rx_oos_packets;
-	dest->rx_errors = stats->rx_errors;
+	dest->tx_packets = atomic_long_read(&stats->tx_packets);
+	dest->tx_bytes = atomic_long_read(&stats->tx_bytes);
+	dest->tx_errors = atomic_long_read(&stats->tx_errors);
+	dest->rx_packets = atomic_long_read(&stats->rx_packets);
+	dest->rx_bytes = atomic_long_read(&stats->rx_bytes);
+	dest->rx_seq_discards = atomic_long_read(&stats->rx_seq_discards);
+	dest->rx_oos_packets = atomic_long_read(&stats->rx_oos_packets);
+	dest->rx_errors = atomic_long_read(&stats->rx_errors);
 }
 
 /* Session ioctl helper.
@@ -1633,14 +1633,14 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
 		   tunnel->name,
 		   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
 		   atomic_read(&tunnel->ref_count) - 1);
-	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+	seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n",
 		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
+		   atomic_long_read(&tunnel->stats.tx_packets),
+		   atomic_long_read(&tunnel->stats.tx_bytes),
+		   atomic_long_read(&tunnel->stats.tx_errors),
+		   atomic_long_read(&tunnel->stats.rx_packets),
+		   atomic_long_read(&tunnel->stats.rx_bytes),
+		   atomic_long_read(&tunnel->stats.rx_errors));
 }
 
 static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
@@ -1675,14 +1675,14 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   session->lns_mode ? "LNS" : "LAC",
 		   session->debug,
 		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+	seq_printf(m, "   %hu/%hu %ld/%ld/%ld %ld/%ld/%ld\n",
 		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
+		   atomic_long_read(&session->stats.tx_packets),
+		   atomic_long_read(&session->stats.tx_bytes),
+		   atomic_long_read(&session->stats.tx_errors),
+		   atomic_long_read(&session->stats.rx_packets),
+		   atomic_long_read(&session->stats.rx_bytes),
+		   atomic_long_read(&session->stats.rx_errors));
 
 	if (po)
 		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
-- 
cgit v1.2.2


From f6e16b299bacaa71c6604a784f2d088a966f8c23 Mon Sep 17 00:00:00 2001
From: Tom Parkin <tparkin@katalix.com>
Date: Tue, 19 Mar 2013 06:11:23 +0000
Subject: l2tp: unhash l2tp sessions on delete, not on free

If we postpone unhashing of l2tp sessions until the structure is freed, we
risk:

 1. further packets arriving and getting queued while the pseudowire is being
    closed down
 2. the recv path hitting "scheduling while atomic" errors in the case that
    recv drops the last reference to a session and calls l2tp_session_free
    while in atomic context

As such, l2tp sessions should be unhashed from l2tp_core data structures early
in the teardown process prior to calling pseudowire close.  For pseudowires
like l2tp_ppp which have multiple shutdown codepaths, provide an unhash hook.

Signed-off-by: Tom Parkin <tparkin@katalix.com>
Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 75 ++++++++++++++++++++++++----------------------------
 net/l2tp/l2tp_core.h |  1 +
 net/l2tp/l2tp_ppp.c  | 12 +++------
 3 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 97d30ac67c88..8aecf5df6656 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1316,26 +1316,12 @@ again:
 
 			hlist_del_init(&session->hlist);
 
-			/* Since we should hold the sock lock while
-			 * doing any unbinding, we need to release the
-			 * lock we're holding before taking that lock.
-			 * Hold a reference to the sock so it doesn't
-			 * disappear as we're jumping between locks.
-			 */
 			if (session->ref != NULL)
 				(*session->ref)(session);
 
 			write_unlock_bh(&tunnel->hlist_lock);
 
-			if (tunnel->version != L2TP_HDR_VER_2) {
-				struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
-				spin_lock_bh(&pn->l2tp_session_hlist_lock);
-				hlist_del_init_rcu(&session->global_hlist);
-				spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-				synchronize_rcu();
-			}
-
+			__l2tp_session_unhash(session);
 			l2tp_session_queue_purge(session);
 
 			if (session->session_close != NULL)
@@ -1732,64 +1718,71 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
  */
 void l2tp_session_free(struct l2tp_session *session)
 {
-	struct l2tp_tunnel *tunnel;
+	struct l2tp_tunnel *tunnel = session->tunnel;
 
 	BUG_ON(atomic_read(&session->ref_count) != 0);
 
-	tunnel = session->tunnel;
-	if (tunnel != NULL) {
+	if (tunnel) {
 		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+		if (session->session_id != 0)
+			atomic_dec(&l2tp_session_count);
+		sock_put(tunnel->sock);
+		session->tunnel = NULL;
+		l2tp_tunnel_dec_refcount(tunnel);
+	}
+
+	kfree(session);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_free);
+
+/* Remove an l2tp session from l2tp_core's hash lists.
+ * Provides a tidyup interface for pseudowire code which can't just route all
+ * shutdown via. l2tp_session_delete and a pseudowire-specific session_close
+ * callback.
+ */
+void __l2tp_session_unhash(struct l2tp_session *session)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
 
-		/* Delete the session from the hash */
+	/* Remove the session from core hashes */
+	if (tunnel) {
+		/* Remove from the per-tunnel hash */
 		write_lock_bh(&tunnel->hlist_lock);
 		hlist_del_init(&session->hlist);
 		write_unlock_bh(&tunnel->hlist_lock);
 
-		/* Unlink from the global hash if not L2TPv2 */
+		/* For L2TPv3 we have a per-net hash: remove from there, too */
 		if (tunnel->version != L2TP_HDR_VER_2) {
 			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
 			spin_lock_bh(&pn->l2tp_session_hlist_lock);
 			hlist_del_init_rcu(&session->global_hlist);
 			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
 			synchronize_rcu();
 		}
-
-		if (session->session_id != 0)
-			atomic_dec(&l2tp_session_count);
-
-		sock_put(tunnel->sock);
-
-		/* This will delete the tunnel context if this
-		 * is the last session on the tunnel.
-		 */
-		session->tunnel = NULL;
-		l2tp_tunnel_dec_refcount(tunnel);
 	}
-
-	kfree(session);
-
-	return;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_free);
+EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
 
 /* This function is used by the netlink SESSION_DELETE command and by
    pseudowire modules.
  */
 int l2tp_session_delete(struct l2tp_session *session)
 {
+	if (session->ref)
+		(*session->ref)(session);
+	__l2tp_session_unhash(session);
 	l2tp_session_queue_purge(session);
-
 	if (session->session_close != NULL)
 		(*session->session_close)(session);
-
+	if (session->deref)
+		(*session->ref)(session);
 	l2tp_session_dec_refcount(session);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(l2tp_session_delete);
 
-
 /* We come here whenever a session's send_seq, cookie_len or
  * l2specific_len parameters are set.
  */
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 519b013f8b31..485a490fd990 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -242,6 +242,7 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
 extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern void __l2tp_session_unhash(struct l2tp_session *session);
 extern int l2tp_session_delete(struct l2tp_session *session);
 extern void l2tp_session_free(struct l2tp_session *session);
 extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 9d0eb8c13530..637a341c1e2d 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -466,19 +466,12 @@ static void pppol2tp_session_close(struct l2tp_session *session)
  */
 static void pppol2tp_session_destruct(struct sock *sk)
 {
-	struct l2tp_session *session;
-
-	if (sk->sk_user_data != NULL) {
-		session = sk->sk_user_data;
-		if (session == NULL)
-			goto out;
-
+	struct l2tp_session *session = sk->sk_user_data;
+	if (session) {
 		sk->sk_user_data = NULL;
 		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 		l2tp_session_dec_refcount(session);
 	}
-
-out:
 	return;
 }
 
@@ -509,6 +502,7 @@ static int pppol2tp_release(struct socket *sock)
 
 	/* Purge any queued data */
 	if (session != NULL) {
+		__l2tp_session_unhash(session);
 		l2tp_session_queue_purge(session);
 		sock_put(sk);
 	}
-- 
cgit v1.2.2


From 8ed781668dd49b608f1e67a22e3b445fd0c2cd6f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 19 Mar 2013 06:39:29 +0000
Subject: flow_keys: include thoff into flow_keys for later usage

In skb_flow_dissect(), we perform a dissection of a skbuff. Since we're
doing the work here anyway, also store thoff for a later usage, e.g. in
the BPF filter.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_keys.h   | 1 +
 net/core/flow_dissector.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index 80461c1ae9ef..bb8271d487b7 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -9,6 +9,7 @@ struct flow_keys {
 		__be32 ports;
 		__be16 port16[2];
 	};
+	u16 thoff;
 	u8 ip_proto;
 };
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 9d4c7201400d..e187bf06d673 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -140,6 +140,8 @@ ipv6:
 			flow->ports = *ports;
 	}
 
+	flow->thoff = (u16) nhoff;
+
 	return true;
 }
 EXPORT_SYMBOL(skb_flow_dissect);
-- 
cgit v1.2.2


From 283951f95b067877ca5ea77afaa212bb1e0507b5 Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@parkeon.com>
Date: Tue, 19 Mar 2013 08:19:29 +0000
Subject: ipconfig: Fix newline handling in log message.

When using ipconfig the logs currently look like:

Single name server:
[    3.467270] IP-Config: Complete:
[    3.470613]      device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1
[    3.480670]      host=infigo-1, domain=, nis-domain=(none)
[    3.486166]      bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath=
[    3.492910]      nameserver0=172.16.42.1[    3.496853] ALSA device list:

Three name servers:
[    3.496949] IP-Config: Complete:
[    3.500293]      device=eth0, hwaddr=ac:de:48:00:00:01, ipaddr=172.16.42.2, mask=255.255.255.0, gw=172.16.42.1
[    3.510367]      host=infigo-1, domain=, nis-domain=(none)
[    3.515864]      bootserver=172.16.42.1, rootserver=172.16.42.1, rootpath=
[    3.522635]      nameserver0=172.16.42.1, nameserver1=172.16.42.100
[    3.529149] , nameserver2=172.16.42.200

Fix newline handling for these cases

Signed-off-by: Martin Fuzzey <mfuzzey@parkeon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 98cbc6877019..bf6c5cf31aed 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1522,7 +1522,8 @@ static int __init ip_auto_config(void)
 		}
 	for (i++; i < CONF_NAMESERVERS_MAX; i++)
 		if (ic_nameservers[i] != NONE)
-			pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]);
+			pr_cont(", nameserver%u=%pI4", i, &ic_nameservers[i]);
+	pr_cont("\n");
 #endif /* !SILENT */
 
 	return 0;
-- 
cgit v1.2.2


From 0582b7d15f8a7ae53dd2128b8eb01567b3fd2277 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Tue, 19 Mar 2013 13:40:23 +0000
Subject: sh_eth: fix bitbang memory leak

sh_mdio_init() allocates pointer to 'struct bb_info' but only stores it locally,
so that sh_mdio_release() can't free it on driver unload.  Add the pointer to
'struct bb_info' to 'struct sh_eth_private', so that sh_mdio_init() can save
'bitbang' variable for sh_mdio_release() to be able to free it later...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 5 +++++
 drivers/net/ethernet/renesas/sh_eth.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 33e96176e4d8..c87862812ead 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2220,6 +2220,7 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp)
 /* MDIO bus release function */
 static int sh_mdio_release(struct net_device *ndev)
 {
+	struct sh_eth_private *mdp = netdev_priv(ndev);
 	struct mii_bus *bus = dev_get_drvdata(&ndev->dev);
 
 	/* unregister mdio bus */
@@ -2234,6 +2235,9 @@ static int sh_mdio_release(struct net_device *ndev)
 	/* free bitbang info */
 	free_mdio_bitbang(bus);
 
+	/* free bitbang memory */
+	kfree(mdp->bitbang);
+
 	return 0;
 }
 
@@ -2262,6 +2266,7 @@ static int sh_mdio_init(struct net_device *ndev, int id,
 	bitbang->ctrl.ops = &bb_ops;
 
 	/* MII controller setting */
+	mdp->bitbang = bitbang;
 	mdp->mii_bus = alloc_mdio_bitbang(&bitbang->ctrl);
 	if (!mdp->mii_bus) {
 		ret = -ENOMEM;
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index bae84fd2e73a..e6655678458e 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -705,6 +705,7 @@ struct sh_eth_private {
 	const u16 *reg_offset;
 	void __iomem *addr;
 	void __iomem *tsu_addr;
+	struct bb_info *bitbang;
 	u32 num_rx_ring;
 	u32 num_tx_ring;
 	dma_addr_t rx_desc_dma;
-- 
cgit v1.2.2


From fc0c0900408e05758a0df17c1924ca837fafca5e Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Tue, 19 Mar 2013 13:41:32 +0000
Subject: sh_eth: check TSU registers ioremap() error

One must check the result of ioremap() -- in this case it prevents potential
kernel oops when initializing TSU registers further on...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index c87862812ead..bf5e3cf97c4d 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2446,6 +2446,11 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 		}
 		mdp->tsu_addr = ioremap(rtsu->start,
 					resource_size(rtsu));
+		if (mdp->tsu_addr == NULL) {
+			ret = -ENOMEM;
+			dev_err(&pdev->dev, "TSU ioremap failed.\n");
+			goto out_release;
+		}
 		mdp->port = devno % 2;
 		ndev->features = NETIF_F_HW_VLAN_FILTER;
 	}
-- 
cgit v1.2.2


From fa90b077d72b4ea92706e86fdff7b5dca294caa3 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Wed, 20 Mar 2013 02:21:48 +0000
Subject: lpc_eth: fix error return code in lpc_eth_drv_probe()

Fix to return a negative error code from the error handling
case instead of 0, as returned elsewhere in this function.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/nxp/lpc_eth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index c4122c86f829..efa29b712d5f 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1472,7 +1472,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	}
 	platform_set_drvdata(pdev, ndev);
 
-	if (lpc_mii_init(pldat) != 0)
+	ret = lpc_mii_init(pldat);
+	if (ret)
 		goto err_out_unregister_netdev;
 
 	netdev_info(ndev, "LPC mac at 0x%08x irq %d\n",
-- 
cgit v1.2.2


From 896ee0eee6261e30c3623be931c3f621428947df Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 20 Mar 2013 05:19:24 +0000
Subject: net/irda: add missing error path release_sock call

This makes sure that release_sock is called for all error conditions in
irda_getsockopt.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Brad Spengler <spender@grsecurity.net>
Cc: stable@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index d07e3a626446..d28e7f014cc6 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2583,8 +2583,10 @@ bed:
 				    NULL, NULL, NULL);
 
 		/* Check if the we got some results */
-		if (!self->cachedaddr)
-			return -EAGAIN;		/* Didn't find any devices */
+		if (!self->cachedaddr) {
+			err = -EAGAIN;		/* Didn't find any devices */
+			goto out;
+		}
 		daddr = self->cachedaddr;
 		/* Cleanup */
 		self->cachedaddr = 0;
-- 
cgit v1.2.2


From da2191e31409d1058dcbed44e8f53e39a40e86b3 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Wed, 20 Mar 2013 12:31:07 -0300
Subject: net: fec: Define indexes as 'unsigned int'

Fix the following warnings that happen when building with W=1 option:

drivers/net/ethernet/freescale/fec.c: In function 'fec_enet_free_buffers':
drivers/net/ethernet/freescale/fec.c:1337:16: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
drivers/net/ethernet/freescale/fec.c: In function 'fec_enet_alloc_buffers':
drivers/net/ethernet/freescale/fec.c:1361:16: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
drivers/net/ethernet/freescale/fec.c: In function 'fec_enet_init':
drivers/net/ethernet/freescale/fec.c:1631:16: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index e3f39372ce25..911d0253dbb2 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -1332,7 +1332,7 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 static void fec_enet_free_buffers(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	int i;
+	unsigned int i;
 	struct sk_buff *skb;
 	struct bufdesc	*bdp;
 
@@ -1356,7 +1356,7 @@ static void fec_enet_free_buffers(struct net_device *ndev)
 static int fec_enet_alloc_buffers(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	int i;
+	unsigned int i;
 	struct sk_buff *skb;
 	struct bufdesc	*bdp;
 
@@ -1598,7 +1598,7 @@ static int fec_enet_init(struct net_device *ndev)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	struct bufdesc *cbd_base;
 	struct bufdesc *bdp;
-	int i;
+	unsigned int i;
 
 	/* Allocate memory for buffer descriptors. */
 	cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma,
-- 
cgit v1.2.2


From 12ee4fc67c00895b3d740297f7ca447239c1983b Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 20 Mar 2013 03:28:01 +0800
Subject: workqueue: add missing POOL_FREEZING

get_unbound_pool() forgot to set POOL_FREEZING if workqueue_freezing
is set and a new pool could go out of sync with the global freezing
state.

Fix it by adding POOL_FREEZING if workqueue_freezing.  wq_mutex is
already held so no further locking is necessary.  This also removes
the unused static variable warning when !CONFIG_FREEZER.

tj: Updated commit message.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e38d035bf671..40f4017285a0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3503,6 +3503,9 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 	if (!pool || init_worker_pool(pool) < 0)
 		goto fail;
 
+	if (workqueue_freezing)
+		pool->flags |= POOL_FREEZING;
+
 	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
 	copy_workqueue_attrs(pool->attrs, attrs);
 
-- 
cgit v1.2.2


From f046f89a99ccfd9408b94c653374ff3065c7edb3 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Wed, 20 Mar 2013 17:21:24 +0000
Subject: dm thin: fix discard corruption

Fix a bug in dm_btree_remove that could leave leaf values with incorrect
reference counts.  The effect of this was that removal of a shared block
could result in the space maps thinking the block was no longer used.
More concretely, if you have a thin device and a snapshot of it, sending
a discard to a shared region of the thin could corrupt the snapshot.

Thinp uses a 2-level nested btree to store it's mappings.  This first
level is indexed by thin device, and the second level by logical
block.

Often when we're removing an entry in this mapping tree we need to
rebalance nodes, which can involve shadowing them, possibly creating a
copy if the block is shared.  If we do create a copy then children of
that node need to have their reference counts incremented.  In this
way reference counts percolate down the tree as shared trees diverge.

The rebalance functions were incrementing the children at the
appropriate time, but they were always assuming the children were
internal nodes.  This meant the leaf values (in our case packed
block/flags entries) were not being incremented.

Cc: stable@vger.kernel.org
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-thin.c                         |  4 +--
 drivers/md/persistent-data/dm-btree-remove.c | 46 +++++++++++++++-------------
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 009339d62828..ab95e5ff3758 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2544,7 +2544,7 @@ static struct target_type pool_target = {
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 6, 1},
+	.version = {1, 7, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -2831,7 +2831,7 @@ static int thin_iterate_devices(struct dm_target *ti,
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 7, 1},
+	.version = {1, 8, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index c4f28133ef82..b88757cd0d1d 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -139,15 +139,8 @@ struct child {
 	struct btree_node *n;
 };
 
-static struct dm_btree_value_type le64_type = {
-	.context = NULL,
-	.size = sizeof(__le64),
-	.inc = NULL,
-	.dec = NULL,
-	.equal = NULL
-};
-
-static int init_child(struct dm_btree_info *info, struct btree_node *parent,
+static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
+		      struct btree_node *parent,
 		      unsigned index, struct child *result)
 {
 	int r, inc;
@@ -164,7 +157,7 @@ static int init_child(struct dm_btree_info *info, struct btree_node *parent,
 	result->n = dm_block_data(result->block);
 
 	if (inc)
-		inc_children(info->tm, result->n, &le64_type);
+		inc_children(info->tm, result->n, vt);
 
 	*((__le64 *) value_ptr(parent, index)) =
 		cpu_to_le64(dm_block_location(result->block));
@@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
 }
 
 static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
-		      unsigned left_index)
+		      struct dm_btree_value_type *vt, unsigned left_index)
 {
 	int r;
 	struct btree_node *parent;
@@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
 
 	parent = dm_block_data(shadow_current(s));
 
-	r = init_child(info, parent, left_index, &left);
+	r = init_child(info, vt, parent, left_index, &left);
 	if (r)
 		return r;
 
-	r = init_child(info, parent, left_index + 1, &right);
+	r = init_child(info, vt, parent, left_index + 1, &right);
 	if (r) {
 		exit_child(info, &left);
 		return r;
@@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
 }
 
 static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
-		      unsigned left_index)
+		      struct dm_btree_value_type *vt, unsigned left_index)
 {
 	int r;
 	struct btree_node *parent = dm_block_data(shadow_current(s));
@@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
 	/*
 	 * FIXME: fill out an array?
 	 */
-	r = init_child(info, parent, left_index, &left);
+	r = init_child(info, vt, parent, left_index, &left);
 	if (r)
 		return r;
 
-	r = init_child(info, parent, left_index + 1, &center);
+	r = init_child(info, vt, parent, left_index + 1, &center);
 	if (r) {
 		exit_child(info, &left);
 		return r;
 	}
 
-	r = init_child(info, parent, left_index + 2, &right);
+	r = init_child(info, vt, parent, left_index + 2, &right);
 	if (r) {
 		exit_child(info, &left);
 		exit_child(info, &center);
@@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_transaction_manager *tm,
 }
 
 static int rebalance_children(struct shadow_spine *s,
-			      struct dm_btree_info *info, uint64_t key)
+			      struct dm_btree_info *info,
+			      struct dm_btree_value_type *vt, uint64_t key)
 {
 	int i, r, has_left_sibling, has_right_sibling;
 	uint32_t child_entries;
@@ -472,13 +466,13 @@ static int rebalance_children(struct shadow_spine *s,
 	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
 
 	if (!has_left_sibling)
-		r = rebalance2(s, info, i);
+		r = rebalance2(s, info, vt, i);
 
 	else if (!has_right_sibling)
-		r = rebalance2(s, info, i - 1);
+		r = rebalance2(s, info, vt, i - 1);
 
 	else
-		r = rebalance3(s, info, i - 1);
+		r = rebalance3(s, info, vt, i - 1);
 
 	return r;
 }
@@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
 		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
 			return do_leaf(n, key, index);
 
-		r = rebalance_children(s, info, key);
+		r = rebalance_children(s, info, vt, key);
 		if (r)
 			break;
 
@@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
 	return r;
 }
 
+static struct dm_btree_value_type le64_type = {
+	.context = NULL,
+	.size = sizeof(__le64),
+	.inc = NULL,
+	.dec = NULL,
+	.equal = NULL
+};
+
 int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 		    uint64_t *keys, dm_block_t *new_root)
 {
-- 
cgit v1.2.2


From 58051b94e05a59c4d34f9f1a441af40894817c59 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Wed, 20 Mar 2013 17:21:25 +0000
Subject: dm thin: fix non power of two discard granularity calc

Fix a discard granularity calculation to work for non power of 2 block sizes.

In order for thinp to passdown discard bios to the underlying data
device, the data device must have a discard granularity that is a
factor of the thinp block size.  Originally this check was done by
using bitops since the block_size was known to be a power of two.

Introduced by commit f13945d75730081830b6f3360266950e2b7c9067
("dm thin: support a non power of 2 discard_granularity").

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-thin.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ab95e5ff3758..004ad1652b73 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1577,6 +1577,11 @@ static bool data_dev_supports_discard(struct pool_c *pt)
 	return q && blk_queue_discard(q);
 }
 
+static bool is_factor(sector_t block_size, uint32_t n)
+{
+	return !sector_div(block_size, n);
+}
+
 /*
  * If discard_passdown was enabled verify that the data device
  * supports discards.  Disable discard_passdown if not.
@@ -1602,7 +1607,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
 	else if (data_limits->discard_granularity > block_size)
 		reason = "discard granularity larger than a block";
 
-	else if (block_size & (data_limits->discard_granularity - 1))
+	else if (!is_factor(block_size, data_limits->discard_granularity))
 		reason = "discard granularity not a factor of block size";
 
 	if (reason) {
-- 
cgit v1.2.2


From 3b6b7813b198b578aa7e04e4047ddb8225c37b7f Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 20 Mar 2013 17:21:25 +0000
Subject: dm verity: avoid deadlock

A deadlock was found in the prefetch code in the dm verity map
function.  This patch fixes this by transferring the prefetch
to a worker thread and skipping it completely if kmalloc fails.

If generic_make_request is called recursively, it queues the I/O
request on the current->bio_list without making the I/O request
and returns. The routine making the recursive call cannot wait
for the I/O to complete.

The deadlock occurs when one thread grabs the bufio_client
mutex and waits for an I/O to complete but the I/O is queued
on another thread's current->bio_list and is waiting to get
the mutex held by the first thread.

The fix recognises that prefetching is not essential.  If memory
can be allocated, it queues the prefetch request to the worker thread,
but if not, it does nothing.

Signed-off-by: Paul Taysom <taysom@chromium.org>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Cc: stable@kernel.org
---
 drivers/md/dm-bufio.c  |  2 ++
 drivers/md/dm-verity.c | 39 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 3c955e10a618..c6083132c4b8 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1025,6 +1025,8 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
 {
 	struct blk_plug plug;
 
+	BUG_ON(dm_bufio_in_request());
+
 	blk_start_plug(&plug);
 	dm_bufio_lock(c);
 
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 6ad538375c3c..a746f1d21c66 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -93,6 +93,13 @@ struct dm_verity_io {
 	 */
 };
 
+struct dm_verity_prefetch_work {
+	struct work_struct work;
+	struct dm_verity *v;
+	sector_t block;
+	unsigned n_blocks;
+};
+
 static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
 {
 	return (struct shash_desc *)(io + 1);
@@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bio, int error)
  * The root buffer is not prefetched, it is assumed that it will be cached
  * all the time.
  */
-static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
+static void verity_prefetch_io(struct work_struct *work)
 {
+	struct dm_verity_prefetch_work *pw =
+		container_of(work, struct dm_verity_prefetch_work, work);
+	struct dm_verity *v = pw->v;
 	int i;
 
 	for (i = v->levels - 2; i >= 0; i--) {
 		sector_t hash_block_start;
 		sector_t hash_block_end;
-		verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
-		verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
+		verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
+		verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
 		if (!i) {
 			unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
 
@@ -452,6 +462,25 @@ no_prefetch_cluster:
 		dm_bufio_prefetch(v->bufio, hash_block_start,
 				  hash_block_end - hash_block_start + 1);
 	}
+
+	kfree(pw);
+}
+
+static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
+{
+	struct dm_verity_prefetch_work *pw;
+
+	pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
+		GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+	if (!pw)
+		return;
+
+	INIT_WORK(&pw->work, verity_prefetch_io);
+	pw->v = v;
+	pw->block = io->block;
+	pw->n_blocks = io->n_blocks;
+	queue_work(v->verify_wq, &pw->work);
 }
 
 /*
@@ -498,7 +527,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 	memcpy(io->io_vec, bio_iovec(bio),
 	       io->io_vec_size * sizeof(struct bio_vec));
 
-	verity_prefetch_io(v, io);
+	verity_submit_prefetch(v, io);
 
 	generic_make_request(bio);
 
@@ -858,7 +887,7 @@ bad:
 
 static struct target_type verity_target = {
 	.name		= "verity",
-	.version	= {1, 1, 1},
+	.version	= {1, 2, 0},
 	.module		= THIS_MODULE,
 	.ctr		= verity_ctr,
 	.dtr		= verity_dtr,
-- 
cgit v1.2.2


From 414dd67d50a6b9a11af23bbb68e8fae13d726c8b Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Wed, 20 Mar 2013 17:21:25 +0000
Subject: dm cache: avoid 64 bit division on 32 bit

Squash various 32bit link errors.

  >> on i386:
  >> drivers/built-in.o: In function `is_discarded_oblock':
  >> dm-cache-target.c:(.text+0x1ea28e): undefined reference to `__udivdi3'
  ...

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-target.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 0f4e84b15c30..5ad227f0cea3 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -158,7 +158,7 @@ struct cache {
 	/*
 	 * origin_blocks entries, discarded if set.
 	 */
-	sector_t discard_block_size; /* a power of 2 times sectors per block */
+	uint32_t discard_block_size; /* a power of 2 times sectors per block */
 	dm_dblock_t discard_nr_blocks;
 	unsigned long *discard_bitset;
 
@@ -412,17 +412,24 @@ static bool block_size_is_power_of_two(struct cache *cache)
 	return cache->sectors_per_block_shift >= 0;
 }
 
+static dm_block_t block_div(dm_block_t b, uint32_t n)
+{
+	do_div(b, n);
+
+	return b;
+}
+
 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
 {
-	sector_t discard_blocks = cache->discard_block_size;
+	uint32_t discard_blocks = cache->discard_block_size;
 	dm_block_t b = from_oblock(oblock);
 
 	if (!block_size_is_power_of_two(cache))
-		(void) sector_div(discard_blocks, cache->sectors_per_block);
+		discard_blocks = discard_blocks / cache->sectors_per_block;
 	else
 		discard_blocks >>= cache->sectors_per_block_shift;
 
-	(void) sector_div(b, discard_blocks);
+	b = block_div(b, discard_blocks);
 
 	return to_dblock(b);
 }
@@ -1002,7 +1009,7 @@ static void process_discard_bio(struct cache *cache, struct bio *bio)
 	dm_block_t end_block = bio->bi_sector + bio_sectors(bio);
 	dm_block_t b;
 
-	(void) sector_div(end_block, cache->discard_block_size);
+	end_block = block_div(end_block, cache->discard_block_size);
 
 	for (b = start_block; b < end_block; b++)
 		set_discard(cache, to_dblock(b));
@@ -1835,7 +1842,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	/* FIXME: factor out this whole section */
 	origin_blocks = cache->origin_sectors = ca->origin_sectors;
-	(void) sector_div(origin_blocks, ca->block_size);
+	origin_blocks = block_div(origin_blocks, ca->block_size);
 	cache->origin_blocks = to_oblock(origin_blocks);
 
 	cache->sectors_per_block = ca->block_size;
@@ -1848,7 +1855,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 		dm_block_t cache_size = ca->cache_sectors;
 
 		cache->sectors_per_block_shift = -1;
-		(void) sector_div(cache_size, ca->block_size);
+		cache_size = block_div(cache_size, ca->block_size);
 		cache->cache_size = to_cblock(cache_size);
 	} else {
 		cache->sectors_per_block_shift = __ffs(ca->block_size);
-- 
cgit v1.2.2


From 617a0b89da4898d4cc990c9eb4bc9c0591c538a5 Mon Sep 17 00:00:00 2001
From: Heinz Mauelshagen <heinzm@redhat.com>
Date: Wed, 20 Mar 2013 17:21:26 +0000
Subject: dm cache: detect cache_create failure

Return error if cache_create() fails.

A missing return check made cache_ctr continue even after an error in
cache_create() resulting in the cache object being destroyed.  So a
simple failure like an odd number of cache policy config value arguments
would result in an oops.

Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-target.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 5ad227f0cea3..76cc910557f0 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2009,6 +2009,8 @@ static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		goto out;
 
 	r = cache_create(ca, &cache);
+	if (r)
+		goto out;
 
 	r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
 	if (r) {
-- 
cgit v1.2.2


From b978440b8db901aba0c4cd38c7c841c9b5cd9a7e Mon Sep 17 00:00:00 2001
From: Heinz Mauelshagen <heinzm@redhat.com>
Date: Wed, 20 Mar 2013 17:21:26 +0000
Subject: dm cache: avoid calling policy destructor twice on error

If the cache policy's config values are not able to be set we must
set the policy to NULL after destroying it in create_cache_policy()
so we don't attempt to destroy it a second time later.

Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-target.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 76cc910557f0..79ac8603644d 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1763,8 +1763,11 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
 	}
 
 	r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv);
-	if (r)
+	if (r) {
+		*error = "Error setting cache policy's config values";
 		dm_cache_policy_destroy(cache->policy);
+		cache->policy = NULL;
+	}
 
 	return r;
 }
-- 
cgit v1.2.2


From 79ed9caffc9fff67aa64fd683e791aa70f1bcb51 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Wed, 20 Mar 2013 17:21:27 +0000
Subject: dm cache: metadata clear dirty bits on clean shutdown

When writing the dirty bitset to the metadata device on a clean
shutdown, clear the dirty bits.  Previously they were left indicating
the cache was dirty. This led to confusion about whether there really
was dirty data in the cache or not.  (This was a harmless bug.)

Reported-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-metadata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index fbd3625f2748..1bb91802b22a 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -979,7 +979,7 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty
 		/* nothing to be done */
 		return 0;
 
-	value = pack_value(oblock, flags | (dirty ? M_DIRTY : 0));
+	value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
 	__dm_bless_for_disk(&value);
 
 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
-- 
cgit v1.2.2


From e2e74d617eadc15f601983270c4f4a6935c5a943 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Wed, 20 Mar 2013 17:21:27 +0000
Subject: dm cache: fix race in writethrough implementation

We have found a race in the optimisation used in the dm cache
writethrough implementation.  Currently, dm core sends the cache target
two bios, one for the origin device and one for the cache device and
these are processed in parallel.  This patch avoids the race by
changing the code back to a simpler (slower) implementation which
processes the two writes in series, one after the other, until we can
develop a complete fix for the problem.

When the cache is in writethrough mode it needs to send WRITE bios to
both the origin and cache devices.

Previously we've been implementing this by having dm core query the
cache target on every write to find out how many copies of the bio it
wants.  The cache will ask for two bios if the block is in the cache,
and one otherwise.

Then main problem with this is it's racey.  At the time this check is
made the bio hasn't yet been submitted and so isn't being taken into
account when quiescing a block for migration (promotion or demotion).
This means a single bio may be submitted when two were needed because
the block has since been promoted to the cache (catastrophic), or two
bios where only one is needed (harmless).

I really don't want to start entering bios into the quiescing system
(deferred_set) in the get_num_write_bios callback.  Instead this patch
simplifies things; only one bio is submitted by the core, this is
first written to the origin and then the cache device in series.
Obviously this will have a latency impact.

deferred_writethrough_bios is introduced to record bios that must be
later issued to the cache device from the worker thread.  This deferred
submission, after the origin bio completes, is required given that we're
in interrupt context (writethrough_endio).

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-target.c | 138 +++++++++++++++++++++++++++----------------
 1 file changed, 88 insertions(+), 50 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 79ac8603644d..ff267db60025 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -142,6 +142,7 @@ struct cache {
 	spinlock_t lock;
 	struct bio_list deferred_bios;
 	struct bio_list deferred_flush_bios;
+	struct bio_list deferred_writethrough_bios;
 	struct list_head quiesced_migrations;
 	struct list_head completed_migrations;
 	struct list_head need_commit_migrations;
@@ -199,6 +200,11 @@ struct per_bio_data {
 	bool tick:1;
 	unsigned req_nr:2;
 	struct dm_deferred_entry *all_io_entry;
+
+	/* writethrough fields */
+	struct cache *cache;
+	dm_cblock_t cblock;
+	bio_end_io_t *saved_bi_end_io;
 };
 
 struct dm_cache_migration {
@@ -616,6 +622,56 @@ static void issue(struct cache *cache, struct bio *bio)
 	spin_unlock_irqrestore(&cache->lock, flags);
 }
 
+static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_add(&cache->deferred_writethrough_bios, bio);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	wake_worker(cache);
+}
+
+static void writethrough_endio(struct bio *bio, int err)
+{
+	struct per_bio_data *pb = get_per_bio_data(bio);
+	bio->bi_end_io = pb->saved_bi_end_io;
+
+	if (err) {
+		bio_endio(bio, err);
+		return;
+	}
+
+	remap_to_cache(pb->cache, bio, pb->cblock);
+
+	/*
+	 * We can't issue this bio directly, since we're in interrupt
+	 * context.  So it get's put on a bio list for processing by the
+	 * worker thread.
+	 */
+	defer_writethrough_bio(pb->cache, bio);
+}
+
+/*
+ * When running in writethrough mode we need to send writes to clean blocks
+ * to both the cache and origin devices.  In future we'd like to clone the
+ * bio and send them in parallel, but for now we're doing them in
+ * series as this is easier.
+ */
+static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
+				       dm_oblock_t oblock, dm_cblock_t cblock)
+{
+	struct per_bio_data *pb = get_per_bio_data(bio);
+
+	pb->cache = cache;
+	pb->cblock = cblock;
+	pb->saved_bi_end_io = bio->bi_end_io;
+	bio->bi_end_io = writethrough_endio;
+
+	remap_to_origin_clear_discard(pb->cache, bio, oblock);
+}
+
 /*----------------------------------------------------------------
  * Migration processing
  *
@@ -1077,14 +1133,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 		inc_hit_counter(cache, bio);
 		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
 
-		if (is_writethrough_io(cache, bio, lookup_result.cblock)) {
-			/*
-			 * No need to mark anything dirty in write through mode.
-			 */
-			pb->req_nr == 0 ?
-				remap_to_cache(cache, bio, lookup_result.cblock) :
-				remap_to_origin_clear_discard(cache, bio, block);
-		} else
+		if (is_writethrough_io(cache, bio, lookup_result.cblock))
+			remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+		else
 			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
 
 		issue(cache, bio);
@@ -1093,17 +1144,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 	case POLICY_MISS:
 		inc_miss_counter(cache, bio);
 		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
-		if (pb->req_nr != 0) {
-			/*
-			 * This is a duplicate writethrough io that is no
-			 * longer needed because the block has been demoted.
-			 */
-			bio_endio(bio, 0);
-		} else {
-			remap_to_origin_clear_discard(cache, bio, block);
-			issue(cache, bio);
-		}
+		remap_to_origin_clear_discard(cache, bio, block);
+		issue(cache, bio);
 		break;
 
 	case POLICY_NEW:
@@ -1224,6 +1266,23 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
 		submit_bios ? generic_make_request(bio) : bio_io_error(bio);
 }
 
+static void process_deferred_writethrough_bios(struct cache *cache)
+{
+	unsigned long flags;
+	struct bio_list bios;
+	struct bio *bio;
+
+	bio_list_init(&bios);
+
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_merge(&bios, &cache->deferred_writethrough_bios);
+	bio_list_init(&cache->deferred_writethrough_bios);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	while ((bio = bio_list_pop(&bios)))
+		generic_make_request(bio);
+}
+
 static void writeback_some_dirty_blocks(struct cache *cache)
 {
 	int r = 0;
@@ -1320,6 +1379,7 @@ static int more_work(struct cache *cache)
 	else
 		return !bio_list_empty(&cache->deferred_bios) ||
 			!bio_list_empty(&cache->deferred_flush_bios) ||
+			!bio_list_empty(&cache->deferred_writethrough_bios) ||
 			!list_empty(&cache->quiesced_migrations) ||
 			!list_empty(&cache->completed_migrations) ||
 			!list_empty(&cache->need_commit_migrations);
@@ -1338,6 +1398,8 @@ static void do_worker(struct work_struct *ws)
 
 		writeback_some_dirty_blocks(cache);
 
+		process_deferred_writethrough_bios(cache);
+
 		if (commit_if_needed(cache)) {
 			process_deferred_flush_bios(cache, false);
 
@@ -1803,8 +1865,6 @@ static sector_t calculate_discard_block_size(sector_t cache_block_size,
 
 #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100)
 
-static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio);
-
 static int cache_create(struct cache_args *ca, struct cache **result)
 {
 	int r = 0;
@@ -1831,9 +1891,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	memcpy(&cache->features, &ca->features, sizeof(cache->features));
 
-	if (cache->features.write_through)
-		ti->num_write_bios = cache_num_write_bios;
-
 	cache->callbacks.congested_fn = cache_is_congested;
 	dm_table_add_target_callbacks(ti->table, &cache->callbacks);
 
@@ -1883,6 +1940,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	spin_lock_init(&cache->lock);
 	bio_list_init(&cache->deferred_bios);
 	bio_list_init(&cache->deferred_flush_bios);
+	bio_list_init(&cache->deferred_writethrough_bios);
 	INIT_LIST_HEAD(&cache->quiesced_migrations);
 	INIT_LIST_HEAD(&cache->completed_migrations);
 	INIT_LIST_HEAD(&cache->need_commit_migrations);
@@ -2028,20 +2086,6 @@ out:
 	return r;
 }
 
-static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio)
-{
-	int r;
-	struct cache *cache = ti->private;
-	dm_oblock_t block = get_bio_block(cache, bio);
-	dm_cblock_t cblock;
-
-	r = policy_lookup(cache->policy, block, &cblock);
-	if (r < 0)
-		return 2;	/* assume the worst */
-
-	return (!r && !is_dirty(cache, cblock)) ? 2 : 1;
-}
-
 static int cache_map(struct dm_target *ti, struct bio *bio)
 {
 	struct cache *cache = ti->private;
@@ -2109,18 +2153,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 		inc_hit_counter(cache, bio);
 		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
 
-		if (is_writethrough_io(cache, bio, lookup_result.cblock)) {
-			/*
-			 * No need to mark anything dirty in write through mode.
-			 */
-			pb->req_nr == 0 ?
-				remap_to_cache(cache, bio, lookup_result.cblock) :
-				remap_to_origin_clear_discard(cache, bio, block);
-			cell_defer(cache, cell, false);
-		} else {
+		if (is_writethrough_io(cache, bio, lookup_result.cblock))
+			remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+		else
 			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
-			cell_defer(cache, cell, false);
-		}
+
+		cell_defer(cache, cell, false);
 		break;
 
 	case POLICY_MISS:
@@ -2547,7 +2585,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type cache_target = {
 	.name = "cache",
-	.version = {1, 0, 0},
+	.version = {1, 1, 0},
 	.module = THIS_MODULE,
 	.ctr = cache_ctr,
 	.dtr = cache_dtr,
-- 
cgit v1.2.2


From 4e7f506f6429636115e2f58f9f97089acc62524a Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 20 Mar 2013 17:21:27 +0000
Subject: dm cache: policy change version from string to integer set

Separate dm cache policy version string into 3 unsigned numbers
corresponding to major, minor and patchlevel and store them at the end
of the on-disk metadata so we know which version of the policy generated
the hints in case a future version wants to use them differently.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-metadata.c        | 15 +++++++++++++--
 drivers/md/dm-cache-policy-cleaner.c  |  7 +++++--
 drivers/md/dm-cache-policy-internal.h |  2 ++
 drivers/md/dm-cache-policy-mq.c       |  8 ++++++--
 drivers/md/dm-cache-policy.c          |  8 ++++++++
 drivers/md/dm-cache-policy.h          |  2 ++
 6 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 1bb91802b22a..74213d1f1db5 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -83,6 +83,8 @@ struct cache_disk_superblock {
 	__le32 read_misses;
 	__le32 write_hits;
 	__le32 write_misses;
+
+	__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
 } __packed;
 
 struct dm_cache_metadata {
@@ -109,6 +111,7 @@ struct dm_cache_metadata {
 	bool clean_when_opened:1;
 
 	char policy_name[CACHE_POLICY_NAME_SIZE];
+	unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
 	size_t policy_hint_size;
 	struct dm_cache_statistics stats;
 };
@@ -268,7 +271,8 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
 	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
 	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
 	disk_super->version = cpu_to_le32(CACHE_VERSION);
-	memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE);
+	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
+	memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
 	disk_super->policy_hint_size = 0;
 
 	r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
@@ -284,7 +288,6 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
 	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
 	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
 	disk_super->cache_blocks = cpu_to_le32(0);
-	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
 
 	disk_super->read_hits = cpu_to_le32(0);
 	disk_super->read_misses = cpu_to_le32(0);
@@ -478,6 +481,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
 	cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
 	cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
 	strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
+	cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
+	cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
+	cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
 	cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
 
 	cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
@@ -572,6 +578,9 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
 	disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
 	strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
+	disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
+	disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
+	disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
 
 	disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
 	disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
@@ -1070,6 +1079,7 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
 	__le32 value;
 	size_t hint_size;
 	const char *policy_name = dm_cache_policy_get_name(policy);
+	const unsigned *policy_version = dm_cache_policy_get_version(policy);
 
 	if (!policy_name[0] ||
 	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
@@ -1077,6 +1087,7 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
 
 	if (strcmp(cmd->policy_name, policy_name)) {
 		strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
+		memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
 
 		hint_size = dm_cache_policy_get_hint_size(policy);
 		if (!hint_size)
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c
index cc05d70b3cb8..b04d1f904d07 100644
--- a/drivers/md/dm-cache-policy-cleaner.c
+++ b/drivers/md/dm-cache-policy-cleaner.c
@@ -17,7 +17,6 @@
 /*----------------------------------------------------------------*/
 
 #define DM_MSG_PREFIX "cache cleaner"
-#define CLEANER_VERSION "1.0.0"
 
 /* Cache entry struct. */
 struct wb_cache_entry {
@@ -434,6 +433,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size,
 
 static struct dm_cache_policy_type wb_policy_type = {
 	.name = "cleaner",
+	.version = {1, 0, 0},
 	.hint_size = 0,
 	.owner = THIS_MODULE,
 	.create = wb_create
@@ -446,7 +446,10 @@ static int __init wb_init(void)
 	if (r < 0)
 		DMERR("register failed %d", r);
 	else
-		DMINFO("version " CLEANER_VERSION " loaded");
+		DMINFO("version %u.%u.%u loaded",
+		       wb_policy_type.version[0],
+		       wb_policy_type.version[1],
+		       wb_policy_type.version[2]);
 
 	return r;
 }
diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h
index 52a75beeced5..0928abdc49f0 100644
--- a/drivers/md/dm-cache-policy-internal.h
+++ b/drivers/md/dm-cache-policy-internal.h
@@ -117,6 +117,8 @@ void dm_cache_policy_destroy(struct dm_cache_policy *p);
  */
 const char *dm_cache_policy_get_name(struct dm_cache_policy *p);
 
+const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p);
+
 size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p);
 
 /*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 964153255076..dc112a7137fe 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -14,7 +14,6 @@
 #include <linux/vmalloc.h>
 
 #define DM_MSG_PREFIX "cache-policy-mq"
-#define MQ_VERSION	"1.0.0"
 
 static struct kmem_cache *mq_entry_cache;
 
@@ -1133,6 +1132,7 @@ bad_cache_alloc:
 
 static struct dm_cache_policy_type mq_policy_type = {
 	.name = "mq",
+	.version = {1, 0, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = mq_create
@@ -1140,6 +1140,7 @@ static struct dm_cache_policy_type mq_policy_type = {
 
 static struct dm_cache_policy_type default_policy_type = {
 	.name = "default",
+	.version = {1, 0, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = mq_create
@@ -1164,7 +1165,10 @@ static int __init mq_init(void)
 
 	r = dm_cache_policy_register(&default_policy_type);
 	if (!r) {
-		DMINFO("version " MQ_VERSION " loaded");
+		DMINFO("version %u.%u.%u loaded",
+		       mq_policy_type.version[0],
+		       mq_policy_type.version[1],
+		       mq_policy_type.version[2]);
 		return 0;
 	}
 
diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c
index 2cbf5fdaac52..21c03c570c06 100644
--- a/drivers/md/dm-cache-policy.c
+++ b/drivers/md/dm-cache-policy.c
@@ -150,6 +150,14 @@ const char *dm_cache_policy_get_name(struct dm_cache_policy *p)
 }
 EXPORT_SYMBOL_GPL(dm_cache_policy_get_name);
 
+const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p)
+{
+	struct dm_cache_policy_type *t = p->private;
+
+	return t->version;
+}
+EXPORT_SYMBOL_GPL(dm_cache_policy_get_version);
+
 size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p)
 {
 	struct dm_cache_policy_type *t = p->private;
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h
index f0f51b260544..558bdfdabf5f 100644
--- a/drivers/md/dm-cache-policy.h
+++ b/drivers/md/dm-cache-policy.h
@@ -196,6 +196,7 @@ struct dm_cache_policy {
  * We maintain a little register of the different policy types.
  */
 #define CACHE_POLICY_NAME_SIZE 16
+#define CACHE_POLICY_VERSION_SIZE 3
 
 struct dm_cache_policy_type {
 	/* For use by the register code only. */
@@ -206,6 +207,7 @@ struct dm_cache_policy_type {
 	 * what gets passed on the target line to select your policy.
 	 */
 	char name[CACHE_POLICY_NAME_SIZE];
+	unsigned version[CACHE_POLICY_VERSION_SIZE];
 
 	/*
 	 * Policies may store a hint for each each cache block.
-- 
cgit v1.2.2


From ea2dd8c1ed0becee9812cf0840a9cd553ed398fe Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 20 Mar 2013 17:21:28 +0000
Subject: dm cache: policy ignore hints if generated by different version

When reading the dm cache metadata from disk, ignore the policy hints
unless they were generated by the same major version number of the same
policy module.

The hints are considered to be private data belonging to the specific
module that generated them and there is no requirement for them to make
sense to different versions of the policy that generated them.
Policy modules are all required to work fine if no previous hints are
supplied (or if existing hints are lost).

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-cache-metadata.c | 47 +++++++++++++++++++++++++++++++++---------
 drivers/md/dm-cache-metadata.h |  2 +-
 drivers/md/dm-cache-target.c   |  3 +--
 3 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 74213d1f1db5..83e995fece88 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -863,18 +863,43 @@ struct thunk {
 	bool hints_valid;
 };
 
+static bool policy_unchanged(struct dm_cache_metadata *cmd,
+			     struct dm_cache_policy *policy)
+{
+	const char *policy_name = dm_cache_policy_get_name(policy);
+	const unsigned *policy_version = dm_cache_policy_get_version(policy);
+	size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
+
+	/*
+	 * Ensure policy names match.
+	 */
+	if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
+		return false;
+
+	/*
+	 * Ensure policy major versions match.
+	 */
+	if (cmd->policy_version[0] != policy_version[0])
+		return false;
+
+	/*
+	 * Ensure policy hint sizes match.
+	 */
+	if (cmd->policy_hint_size != policy_hint_size)
+		return false;
+
+	return true;
+}
+
 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
 {
 	return cmd->hint_root && cmd->policy_hint_size;
 }
 
 static bool hints_array_available(struct dm_cache_metadata *cmd,
-				  const char *policy_name)
+				  struct dm_cache_policy *policy)
 {
-	bool policy_names_match = !strncmp(cmd->policy_name, policy_name,
-					   sizeof(cmd->policy_name));
-
-	return cmd->clean_when_opened && policy_names_match &&
+	return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
 		hints_array_initialized(cmd);
 }
 
@@ -908,7 +933,8 @@ static int __load_mapping(void *context, uint64_t cblock, void *leaf)
 	return r;
 }
 
-static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_name,
+static int __load_mappings(struct dm_cache_metadata *cmd,
+			   struct dm_cache_policy *policy,
 			   load_mapping_fn fn, void *context)
 {
 	struct thunk thunk;
@@ -918,18 +944,19 @@ static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_nam
 
 	thunk.cmd = cmd;
 	thunk.respect_dirty_flags = cmd->clean_when_opened;
-	thunk.hints_valid = hints_array_available(cmd, policy_name);
+	thunk.hints_valid = hints_array_available(cmd, policy);
 
 	return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
 }
 
-int dm_cache_load_mappings(struct dm_cache_metadata *cmd, const char *policy_name,
+int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
+			   struct dm_cache_policy *policy,
 			   load_mapping_fn fn, void *context)
 {
 	int r;
 
 	down_read(&cmd->root_lock);
-	r = __load_mappings(cmd, policy_name, fn, context);
+	r = __load_mappings(cmd, policy, fn, context);
 	up_read(&cmd->root_lock);
 
 	return r;
@@ -1085,7 +1112,7 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
 	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
 		return -EINVAL;
 
-	if (strcmp(cmd->policy_name, policy_name)) {
+	if (!policy_unchanged(cmd, policy)) {
 		strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
 		memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
 
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index 135864ea0eee..f45cef21f3d0 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -89,7 +89,7 @@ typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock,
 			       dm_cblock_t cblock, bool dirty,
 			       uint32_t hint, bool hint_valid);
 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
-			   const char *policy_name,
+			   struct dm_cache_policy *policy,
 			   load_mapping_fn fn,
 			   void *context);
 
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index ff267db60025..66120bd46d15 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2369,8 +2369,7 @@ static int cache_preresume(struct dm_target *ti)
 	}
 
 	if (!cache->loaded_mappings) {
-		r = dm_cache_load_mappings(cache->cmd,
-					   dm_cache_policy_get_name(cache->policy),
+		r = dm_cache_load_mappings(cache->cmd, cache->policy,
 					   load_mapping, cache);
 		if (r) {
 			DMERR("could not load cache mappings");
-- 
cgit v1.2.2


From eb49faa6a4703698fa5d8b304b01e7f59e7d1f11 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 15 Mar 2013 09:19:11 +0100
Subject: ALSA: hda - Fix abuse of snd_hda_lock_devices() for DSP loader

The current DSP loader code abuses snd_hda_lock_devices() for ensuring
the DSP loader not conflicting with the other normal operations.  But
this trick obviously doesn't work for the PM resume since the streams
are kept opened there where snd_hda_lock_devices() returns -EBUSY.
That means we need another lock mechanism instead of abuse.

This patch provides the new lock state to azx_dev.  Theoretically it's
possible that the DSP loader conflicts with the stream that has been
already assigned for another PCM.  If it's running, the DSP loader
should simply fail.  If not -- it's the case for PM resume --, we
should assign this stream temporarily to the DSP loader, and take it
back to the PCM after finishing DSP loading.  If the PCM is operated
during the DSP loading, it should get an error, too.

Reported-and-tested-by: Dylan Reid <dgreid@chromium.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 132 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 109 insertions(+), 23 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 4cea6bb6fade..418bfc0eb0a3 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -415,6 +415,8 @@ struct azx_dev {
 	unsigned int opened :1;
 	unsigned int running :1;
 	unsigned int irq_pending :1;
+	unsigned int prepared:1;
+	unsigned int locked:1;
 	/*
 	 * For VIA:
 	 *  A flag to ensure DMA position is 0
@@ -426,8 +428,25 @@ struct azx_dev {
 
 	struct timecounter  azx_tc;
 	struct cyclecounter azx_cc;
+
+#ifdef CONFIG_SND_HDA_DSP_LOADER
+	struct mutex dsp_mutex;
+#endif
 };
 
+/* DSP lock helpers */
+#ifdef CONFIG_SND_HDA_DSP_LOADER
+#define dsp_lock_init(dev)	mutex_init(&(dev)->dsp_mutex)
+#define dsp_lock(dev)		mutex_lock(&(dev)->dsp_mutex)
+#define dsp_unlock(dev)		mutex_unlock(&(dev)->dsp_mutex)
+#define dsp_is_locked(dev)	((dev)->locked)
+#else
+#define dsp_lock_init(dev)	do {} while (0)
+#define dsp_lock(dev)		do {} while (0)
+#define dsp_unlock(dev)		do {} while (0)
+#define dsp_is_locked(dev)	0
+#endif
+
 /* CORB/RIRB */
 struct azx_rb {
 	u32 *buf;		/* CORB/RIRB buffer
@@ -527,6 +546,10 @@ struct azx {
 
 	/* card list (for power_save trigger) */
 	struct list_head list;
+
+#ifdef CONFIG_SND_HDA_DSP_LOADER
+	struct azx_dev saved_azx_dev;
+#endif
 };
 
 #define CREATE_TRACE_POINTS
@@ -1793,15 +1816,25 @@ azx_assign_device(struct azx *chip, struct snd_pcm_substream *substream)
 		dev = chip->capture_index_offset;
 		nums = chip->capture_streams;
 	}
-	for (i = 0; i < nums; i++, dev++)
-		if (!chip->azx_dev[dev].opened) {
-			res = &chip->azx_dev[dev];
-			if (res->assigned_key == key)
-				break;
+	for (i = 0; i < nums; i++, dev++) {
+		struct azx_dev *azx_dev = &chip->azx_dev[dev];
+		dsp_lock(azx_dev);
+		if (!azx_dev->opened && !dsp_is_locked(azx_dev)) {
+			res = azx_dev;
+			if (res->assigned_key == key) {
+				res->opened = 1;
+				res->assigned_key = key;
+				dsp_unlock(azx_dev);
+				return azx_dev;
+			}
 		}
+		dsp_unlock(azx_dev);
+	}
 	if (res) {
+		dsp_lock(res);
 		res->opened = 1;
 		res->assigned_key = key;
+		dsp_unlock(res);
 	}
 	return res;
 }
@@ -2009,6 +2042,12 @@ static int azx_pcm_hw_params(struct snd_pcm_substream *substream,
 	struct azx_dev *azx_dev = get_azx_dev(substream);
 	int ret;
 
+	dsp_lock(azx_dev);
+	if (dsp_is_locked(azx_dev)) {
+		ret = -EBUSY;
+		goto unlock;
+	}
+
 	mark_runtime_wc(chip, azx_dev, substream, false);
 	azx_dev->bufsize = 0;
 	azx_dev->period_bytes = 0;
@@ -2016,8 +2055,10 @@ static int azx_pcm_hw_params(struct snd_pcm_substream *substream,
 	ret = snd_pcm_lib_malloc_pages(substream,
 					params_buffer_bytes(hw_params));
 	if (ret < 0)
-		return ret;
+		goto unlock;
 	mark_runtime_wc(chip, azx_dev, substream, true);
+ unlock:
+	dsp_unlock(azx_dev);
 	return ret;
 }
 
@@ -2029,16 +2070,21 @@ static int azx_pcm_hw_free(struct snd_pcm_substream *substream)
 	struct hda_pcm_stream *hinfo = apcm->hinfo[substream->stream];
 
 	/* reset BDL address */
-	azx_sd_writel(azx_dev, SD_BDLPL, 0);
-	azx_sd_writel(azx_dev, SD_BDLPU, 0);
-	azx_sd_writel(azx_dev, SD_CTL, 0);
-	azx_dev->bufsize = 0;
-	azx_dev->period_bytes = 0;
-	azx_dev->format_val = 0;
+	dsp_lock(azx_dev);
+	if (!dsp_is_locked(azx_dev)) {
+		azx_sd_writel(azx_dev, SD_BDLPL, 0);
+		azx_sd_writel(azx_dev, SD_BDLPU, 0);
+		azx_sd_writel(azx_dev, SD_CTL, 0);
+		azx_dev->bufsize = 0;
+		azx_dev->period_bytes = 0;
+		azx_dev->format_val = 0;
+	}
 
 	snd_hda_codec_cleanup(apcm->codec, hinfo, substream);
 
 	mark_runtime_wc(chip, azx_dev, substream, false);
+	azx_dev->prepared = 0;
+	dsp_unlock(azx_dev);
 	return snd_pcm_lib_free_pages(substream);
 }
 
@@ -2055,6 +2101,12 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream)
 		snd_hda_spdif_out_of_nid(apcm->codec, hinfo->nid);
 	unsigned short ctls = spdif ? spdif->ctls : 0;
 
+	dsp_lock(azx_dev);
+	if (dsp_is_locked(azx_dev)) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
 	azx_stream_reset(chip, azx_dev);
 	format_val = snd_hda_calc_stream_format(runtime->rate,
 						runtime->channels,
@@ -2065,7 +2117,8 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream)
 		snd_printk(KERN_ERR SFX
 			   "%s: invalid format_val, rate=%d, ch=%d, format=%d\n",
 			   pci_name(chip->pci), runtime->rate, runtime->channels, runtime->format);
-		return -EINVAL;
+		err = -EINVAL;
+		goto unlock;
 	}
 
 	bufsize = snd_pcm_lib_buffer_bytes(substream);
@@ -2084,7 +2137,7 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream)
 		azx_dev->no_period_wakeup = runtime->no_period_wakeup;
 		err = azx_setup_periods(chip, substream, azx_dev);
 		if (err < 0)
-			return err;
+			goto unlock;
 	}
 
 	/* wallclk has 24Mhz clock source */
@@ -2101,8 +2154,14 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream)
 	if ((chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND) &&
 	    stream_tag > chip->capture_streams)
 		stream_tag -= chip->capture_streams;
-	return snd_hda_codec_prepare(apcm->codec, hinfo, stream_tag,
+	err = snd_hda_codec_prepare(apcm->codec, hinfo, stream_tag,
 				     azx_dev->format_val, substream);
+
+ unlock:
+	if (!err)
+		azx_dev->prepared = 1;
+	dsp_unlock(azx_dev);
+	return err;
 }
 
 static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
@@ -2117,6 +2176,9 @@ static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 	azx_dev = get_azx_dev(substream);
 	trace_azx_pcm_trigger(chip, azx_dev, cmd);
 
+	if (dsp_is_locked(azx_dev) || !azx_dev->prepared)
+		return -EPIPE;
+
 	switch (cmd) {
 	case SNDRV_PCM_TRIGGER_START:
 		rstart = 1;
@@ -2621,17 +2683,27 @@ static int azx_load_dsp_prepare(struct hda_bus *bus, unsigned int format,
 	struct azx_dev *azx_dev;
 	int err;
 
-	if (snd_hda_lock_devices(bus))
-		return -EBUSY;
+	azx_dev = azx_get_dsp_loader_dev(chip);
+
+	dsp_lock(azx_dev);
+	spin_lock_irq(&chip->reg_lock);
+	if (azx_dev->running || azx_dev->locked) {
+		spin_unlock_irq(&chip->reg_lock);
+		err = -EBUSY;
+		goto unlock;
+	}
+	azx_dev->prepared = 0;
+	chip->saved_azx_dev = *azx_dev;
+	azx_dev->locked = 1;
+	spin_unlock_irq(&chip->reg_lock);
 
 	err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG,
 				  snd_dma_pci_data(chip->pci),
 				  byte_size, bufp);
 	if (err < 0)
-		goto unlock;
+		goto err_alloc;
 
 	mark_pages_wc(chip, bufp, true);
-	azx_dev = azx_get_dsp_loader_dev(chip);
 	azx_dev->bufsize = byte_size;
 	azx_dev->period_bytes = byte_size;
 	azx_dev->format_val = format;
@@ -2649,13 +2721,20 @@ static int azx_load_dsp_prepare(struct hda_bus *bus, unsigned int format,
 		goto error;
 
 	azx_setup_controller(chip, azx_dev);
+	dsp_unlock(azx_dev);
 	return azx_dev->stream_tag;
 
  error:
 	mark_pages_wc(chip, bufp, false);
 	snd_dma_free_pages(bufp);
-unlock:
-	snd_hda_unlock_devices(bus);
+ err_alloc:
+	spin_lock_irq(&chip->reg_lock);
+	if (azx_dev->opened)
+		*azx_dev = chip->saved_azx_dev;
+	azx_dev->locked = 0;
+	spin_unlock_irq(&chip->reg_lock);
+ unlock:
+	dsp_unlock(azx_dev);
 	return err;
 }
 
@@ -2677,9 +2756,10 @@ static void azx_load_dsp_cleanup(struct hda_bus *bus,
 	struct azx *chip = bus->private_data;
 	struct azx_dev *azx_dev = azx_get_dsp_loader_dev(chip);
 
-	if (!dmab->area)
+	if (!dmab->area || !azx_dev->locked)
 		return;
 
+	dsp_lock(azx_dev);
 	/* reset BDL address */
 	azx_sd_writel(azx_dev, SD_BDLPL, 0);
 	azx_sd_writel(azx_dev, SD_BDLPU, 0);
@@ -2692,7 +2772,12 @@ static void azx_load_dsp_cleanup(struct hda_bus *bus,
 	snd_dma_free_pages(dmab);
 	dmab->area = NULL;
 
-	snd_hda_unlock_devices(bus);
+	spin_lock_irq(&chip->reg_lock);
+	if (azx_dev->opened)
+		*azx_dev = chip->saved_azx_dev;
+	azx_dev->locked = 0;
+	spin_unlock_irq(&chip->reg_lock);
+	dsp_unlock(azx_dev);
 }
 #endif /* CONFIG_SND_HDA_DSP_LOADER */
 
@@ -3481,6 +3566,7 @@ static int azx_first_init(struct azx *chip)
 	}
 
 	for (i = 0; i < chip->num_streams; i++) {
+		dsp_lock_init(&chip->azx_dev[i]);
 		/* allocate memory for the BDL for each stream */
 		err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV,
 					  snd_dma_pci_data(chip->pci),
-- 
cgit v1.2.2


From 6a092dfd51e5af9b321d683d4b4eddc79e2606ed Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 20 Mar 2013 03:28:03 +0800
Subject: workqueue: simplify current_is_workqueue_rescuer()

We can test worker->recue_wq instead of reaching into
current_pwq->wq->rescuer and then comparing it to self.

tj: Commit message.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 40f4017285a0..d2ac6cbfe8ab 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3936,7 +3936,7 @@ bool current_is_workqueue_rescuer(void)
 {
 	struct worker *worker = current_wq_worker();
 
-	return worker && worker == worker->current_pwq->wq->rescuer;
+	return worker && worker->rescue_wq;
 }
 
 /**
-- 
cgit v1.2.2


From 951a078a5285ad31bc22e190616ad54b78fac992 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 20 Mar 2013 10:52:30 -0700
Subject: workqueue: kick a worker in pwq_adjust_max_active()

If pwq_adjust_max_active() changes max_active from 0 to
saved_max_active, it needs to wakeup worker.  This is already done by
thaw_workqueues().

If pwq_adjust_max_active() increases max_active for an unbound wq,
while not strictly necessary for correctness, it's still desirable to
wake up a worker so that the requested concurrency level is reached
sooner.

Move wake_up_worker() call from thaw_workqueues() to
pwq_adjust_max_active() so that it can handle both of the above two
cases.  This also makes thaw_workqueues() simpler.

tj: Updated comments and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d2ac6cbfe8ab..79d1d347e690 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3598,6 +3598,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 		while (!list_empty(&pwq->delayed_works) &&
 		       pwq->nr_active < pwq->max_active)
 			pwq_activate_first_delayed(pwq);
+
+		/*
+		 * Need to kick a worker after thawed or an unbound wq's
+		 * max_active is bumped.  It's a slow path.  Do it always.
+		 */
+		wake_up_worker(pwq->pool);
 	} else {
 		pwq->max_active = 0;
 	}
@@ -4401,13 +4407,6 @@ void thaw_workqueues(void)
 	}
 	spin_unlock_irq(&pwq_lock);
 
-	/* kick workers */
-	for_each_pool(pool, pi) {
-		spin_lock_irq(&pool->lock);
-		wake_up_worker(pool);
-		spin_unlock_irq(&pool->lock);
-	}
-
 	workqueue_freezing = false;
 out_unlock:
 	mutex_unlock(&wq_mutex);
-- 
cgit v1.2.2


From 881094532e2a27406a5f06f839087bd152a8a494 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 20 Mar 2013 03:28:10 +0800
Subject: workqueue: use rcu_read_lock_sched() instead for accessing pwq in RCU

rcu_read_lock_sched() is better than preempt_disable() if the code is
protected by RCU_SCHED.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 79d1d347e690..b6c5a524d7c4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3962,7 +3962,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 	struct pool_workqueue *pwq;
 	bool ret;
 
-	preempt_disable();
+	rcu_read_lock_sched();
 
 	if (!(wq->flags & WQ_UNBOUND))
 		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
@@ -3970,7 +3970,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 		pwq = first_pwq(wq);
 
 	ret = !list_empty(&pwq->delayed_works);
-	preempt_enable();
+	rcu_read_unlock_sched();
 
 	return ret;
 }
@@ -4354,16 +4354,16 @@ bool freeze_workqueues_busy(void)
 		 * nr_active is monotonically decreasing.  It's safe
 		 * to peek without lock.
 		 */
-		preempt_disable();
+		rcu_read_lock_sched();
 		for_each_pwq(pwq, wq) {
 			WARN_ON_ONCE(pwq->nr_active < 0);
 			if (pwq->nr_active) {
 				busy = true;
-				preempt_enable();
+				rcu_read_unlock_sched();
 				goto out_unlock;
 			}
 		}
-		preempt_enable();
+		rcu_read_unlock_sched();
 	}
 out_unlock:
 	mutex_unlock(&wq_mutex);
-- 
cgit v1.2.2


From 519e3c1163ce2b2d510b76b0f5b374198f9378f3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 20 Mar 2013 03:28:21 +0800
Subject: workqueue: avoid false negative in assert_manager_or_pool_lock()

If lockdep complains something for other subsystem, lockdep_is_held()
can be false negative, so we need to also test debug_locks before
triggering WARN.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b6c5a524d7c4..47f258799bf2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -305,7 +305,8 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 
 #ifdef CONFIG_LOCKDEP
 #define assert_manager_or_pool_lock(pool)				\
-	WARN_ONCE(!lockdep_is_held(&(pool)->manager_mutex) &&		\
+	WARN_ONCE(debug_locks &&					\
+		  !lockdep_is_held(&(pool)->manager_mutex) &&		\
 		  !lockdep_is_held(&(pool)->lock),			\
 		  "pool->manager_mutex or ->lock should be held")
 #else
-- 
cgit v1.2.2


From 9d73adf431e093b23fb4990f1ade11283cb67a98 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Wed, 20 Mar 2013 08:19:32 +0000
Subject: fec: Fix the build as module

Since commit ff43da86c69 (NET: FEC: dynamtic check DMA desc buff type) the
following build error happens when CONFIG_FEC=m

ERROR: "fec_ptp_init" [drivers/net/ethernet/freescale/fec.ko] undefined!
ERROR: "fec_ptp_ioctl" [drivers/net/ethernet/freescale/fec.ko] undefined!
ERROR: "fec_ptp_start_cyclecounter" [drivers/net/ethernet/freescale/fec.ko] undefined!

Fix it by exporting the required fec_ptp symbols.

Reported-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_ptp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 1f17ca0f2201..0d8df400a479 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -128,6 +128,7 @@ void fec_ptp_start_cyclecounter(struct net_device *ndev)
 
 	spin_unlock_irqrestore(&fep->tmreg_lock, flags);
 }
+EXPORT_SYMBOL(fec_ptp_start_cyclecounter);
 
 /**
  * fec_ptp_adjfreq - adjust ptp cycle frequency
@@ -318,6 +319,7 @@ int fec_ptp_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
 	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
 	    -EFAULT : 0;
 }
+EXPORT_SYMBOL(fec_ptp_ioctl);
 
 /**
  * fec_time_keep - call timecounter_read every second to avoid timer overrun
@@ -383,3 +385,4 @@ void fec_ptp_init(struct net_device *ndev, struct platform_device *pdev)
 		pr_info("registered PHC device on %s\n", ndev->name);
 	}
 }
+EXPORT_SYMBOL(fec_ptp_init);
-- 
cgit v1.2.2


From cf4ab538f1516606d3ae730dce15d6f33d96b7e1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 8 Mar 2013 12:56:37 -0500
Subject: NFSv4: Fix the string length returned by the idmapper

Functions like nfs_map_uid_to_name() and nfs_map_gid_to_group() are
expected to return a string without any terminating NUL character.
Regression introduced by commit 57e62324e469e092ecc6c94a7a86fe4bd6ac5172
(NFS: Store the legacy idmapper result in the keyring).

Reported-by: Dave Chiluk <dave.chiluk@canonical.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Bryan Schumaker <bjschuma@netapp.com>
Cc: stable@vger.kernel.org [>=3.4]
---
 fs/nfs/idmap.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dc0f98dfa717..c516da5873fd 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -726,9 +726,9 @@ out1:
 	return ret;
 }
 
-static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
 {
-	return key_instantiate_and_link(key, data, strlen(data) + 1,
+	return key_instantiate_and_link(key, data, datalen,
 					id_resolver_cache->thread_keyring,
 					authkey);
 }
@@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
 		struct key *key, struct key *authkey)
 {
 	char id_str[NFS_UINT_MAXLEN];
+	size_t len;
 	int ret = -ENOKEY;
 
 	/* ret = -ENOKEY */
@@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
 	case IDMAP_CONV_NAMETOID:
 		if (strcmp(upcall->im_name, im->im_name) != 0)
 			break;
-		sprintf(id_str, "%d", im->im_id);
-		ret = nfs_idmap_instantiate(key, authkey, id_str);
+		/* Note: here we store the NUL terminator too */
+		len = sprintf(id_str, "%d", im->im_id) + 1;
+		ret = nfs_idmap_instantiate(key, authkey, id_str, len);
 		break;
 	case IDMAP_CONV_IDTONAME:
 		if (upcall->im_id != im->im_id)
 			break;
-		ret = nfs_idmap_instantiate(key, authkey, im->im_name);
+		len = strlen(im->im_name);
+		ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
 		break;
 	default:
 		ret = -EINVAL;
-- 
cgit v1.2.2


From 73214f5d9f33b79918b1f7babddd5c8af28dd23d Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Tue, 19 Mar 2013 01:47:28 +0000
Subject: thermal: shorten too long mcast group name

The original name is too long.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/thermal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index f0bd7f90a90d..e3c0ae9bb1fa 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -44,7 +44,7 @@
 /* Adding event notification support elements */
 #define THERMAL_GENL_FAMILY_NAME                "thermal_event"
 #define THERMAL_GENL_VERSION                    0x01
-#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_group"
+#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_grp"
 
 /* Default Thermal Governor */
 #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
-- 
cgit v1.2.2


From d714aaf649460cbfd5e82e75520baa856b4fa0a0 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 20 Mar 2013 15:07:26 -0400
Subject: USB: EHCI: fix regression in QH unlinking

This patch (as1670) fixes a regression caused by commit
6402c796d3b4205d3d7296157956c5100a05d7d6 (USB: EHCI: work around
silicon bug in Intel's EHCI controllers).  The workaround goes through
two IAA cycles for each QH being unlinked.  During the first cycle,
the QH is not added to the async_iaa list (because it isn't fully gone
from the hardware yet), which means that list will be empty.

Unfortunately, I forgot to update the IAA watchdog timer routine.  It
thinks that an empty async_iaa list means the timer expiration was an
error, which isn't true any more.  This problem didn't show up during
initial testing because the controllers being tested all had working
IAA interrupts.  But not all controllers do, and when the watchdog
timer expires, the empty-list check prevents the second IAA cycle from
starting.  As a result, URB unlinks never complete.  The check needs
to be removed.

Among the symptoms of the regression are processes stuck in D wait
states and hangs during system shutdown.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Stephen Warren <swarren@wwwdotorg.org>
Reported-and-tested-by: Sven Joachim <svenjoac@gmx.de>
Reported-by: Andreas Bombe <aeb@debian.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c
index 20dbdcbe9b0f..c3fa1305f830 100644
--- a/drivers/usb/host/ehci-timer.c
+++ b/drivers/usb/host/ehci-timer.c
@@ -304,7 +304,7 @@ static void ehci_iaa_watchdog(struct ehci_hcd *ehci)
 	 * (a) SMP races against real IAA firing and retriggering, and
 	 * (b) clean HC shutdown, when IAA watchdog was pending.
 	 */
-	if (ehci->async_iaa) {
+	if (1) {
 		u32 cmd, status;
 
 		/* If we get here, IAA is *REALLY* late.  It's barely
-- 
cgit v1.2.2


From 991f76f837bf22c5bb07261cfd86525a0a96650c Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 20 Mar 2013 23:25:24 +0800
Subject: sysfs: fix race between readdir and lseek

While readdir() is running, lseek() may set filp->f_pos as zero,
then may leave filp->private_data pointing to one sysfs_dirent
object without holding its reference counter, so the sysfs_dirent
object may be used after free in next readdir().

This patch holds inode->i_mutex to avoid the problem since
the lock is always held in readdir path.

Reported-by: Dave Jones <davej@redhat.com>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/dir.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2fbdff6be25c..c9e16608f486 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -1058,10 +1058,21 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	return 0;
 }
 
+static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file_inode(file);
+	loff_t ret;
+
+	mutex_lock(&inode->i_mutex);
+	ret = generic_file_llseek(file, offset, whence);
+	mutex_unlock(&inode->i_mutex);
+
+	return ret;
+}
 
 const struct file_operations sysfs_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= sysfs_readdir,
 	.release	= sysfs_dir_release,
-	.llseek		= generic_file_llseek,
+	.llseek		= sysfs_dir_llseek,
 };
-- 
cgit v1.2.2


From e5110f411d2ee35bf8d202ccca2e89c633060dca Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 20 Mar 2013 23:25:25 +0800
Subject: sysfs: handle failure path correctly for readdir()

In case of 'if (filp->f_pos ==  0 or 1)' of sysfs_readdir(),
the failure from filldir() isn't handled, and the reference counter
of the sysfs_dirent object pointed by filp->private_data will be
released without clearing filp->private_data, so use after free
bug will be triggered later.

This patch returns immeadiately under the situation for fixing the bug,
and it is reasonable to return from readdir() when filldir() fails.

Reported-by: Dave Jones <davej@redhat.com>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/dir.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index c9e16608f486..e14512678c9b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -1020,6 +1020,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 		ino = parent_sd->s_ino;
 		if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
 			filp->f_pos++;
+		else
+			return 0;
 	}
 	if (filp->f_pos == 1) {
 		if (parent_sd->s_parent)
@@ -1028,6 +1030,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			ino = parent_sd->s_ino;
 		if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
 			filp->f_pos++;
+		else
+			return 0;
 	}
 	mutex_lock(&sysfs_mutex);
 	for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
-- 
cgit v1.2.2


From 260b3f1291a75a580d22ce8bfb1499c617272716 Mon Sep 17 00:00:00 2001
From: Julia Lemire <jlemire@matrox.com>
Date: Mon, 18 Mar 2013 10:17:47 -0400
Subject: drm/mgag200: Bug fix: Modified pll algorithm for EH project

While testing the mgag200 kms driver on the HP ProLiant Gen8, a
bug was seen.  Once the bootloader would load the selected kernel,
the screen would go black.  At first it was assumed that the
mgag200 kms driver was hanging.  But after setting up the grub
serial output, it was seen that the driver was being loaded
properly.  After trying serval monitors, one finaly displayed
the message "Frequency Out of Range".  By comparing the kms pll
algorithm with the previous mgag200 xorg driver pll algorithm,
discrepencies were found.  Once the kms pll algorithm was
modified, the expected pll values were produced.  This fix was
tested on several monitors of varying native resolutions.

Signed-off-by: Julia Lemire <jlemire@matrox.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/mgag200/mgag200_mode.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index a274b9906ef8..fe22bb780e1d 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -382,19 +382,19 @@ static int mga_g200eh_set_plls(struct mga_device *mdev, long clock)
 	m = n = p = 0;
 	vcomax = 800000;
 	vcomin = 400000;
-	pllreffreq = 3333;
+	pllreffreq = 33333;
 
 	delta = 0xffffffff;
 	permitteddelta = clock * 5 / 1000;
 
-	for (testp = 16; testp > 0; testp--) {
+	for (testp = 16; testp > 0; testp >>= 1) {
 		if (clock * testp > vcomax)
 			continue;
 		if (clock * testp < vcomin)
 			continue;
 
 		for (testm = 1; testm < 33; testm++) {
-			for (testn = 1; testn < 257; testn++) {
+			for (testn = 17; testn < 257; testn++) {
 				computed = (pllreffreq * testn) /
 					(testm * testp);
 				if (computed > clock)
@@ -404,11 +404,11 @@ static int mga_g200eh_set_plls(struct mga_device *mdev, long clock)
 				if (tmpdelta < delta) {
 					delta = tmpdelta;
 					n = testn - 1;
-					m = (testm - 1) | ((n >> 1) & 0x80);
+					m = (testm - 1);
 					p = testp - 1;
 				}
 				if ((clock * testp) >= 600000)
-					p |= 80;
+					p |= 0x80;
 			}
 		}
 	}
-- 
cgit v1.2.2


From 991155bacb91c988c45586525771758ddadd44ce Mon Sep 17 00:00:00 2001
From: Horia Geanta <horia.geanta@freescale.com>
Date: Wed, 20 Mar 2013 16:31:38 +0200
Subject: Revert "crypto: talitos - add IPsec ESN support"

This reverts commit e763eb699be723fb41af818118068c6b3afdaf8d.

Current IPsec ESN implementation for authencesn(cbc(aes), hmac(sha))
(separate encryption and integrity algorithms) does not conform
to RFC4303.

ICV is generated by hashing the sequence
SPI, SeqNum-High, SeqNum-Low, IV, Payload
instead of
SPI, SeqNum-Low, IV, Payload, SeqNum-High.

Cc: <stable@vger.kernel.org> # 3.8, 3.7
Reported-by: Chaoxing Lin <Chaoxing.Lin@ultra-3eti.com>
Signed-off-by: Horia Geanta <horia.geanta@freescale.com>
Reviewed-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 09b184adf31b..5b2b5e61e4f9 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -38,7 +38,6 @@
 #include <linux/spinlock.h>
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
-#include <linux/string.h>
 
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
@@ -1974,11 +1973,7 @@ struct talitos_alg_template {
 };
 
 static struct talitos_alg_template driver_algs[] = {
-	/*
-	 * AEAD algorithms. These use a single-pass ipsec_esp descriptor.
-	 * authencesn(*,*) is also registered, although not present
-	 * explicitly here.
-	 */
+	/* AEAD algorithms.  These use a single-pass ipsec_esp descriptor */
 	{	.type = CRYPTO_ALG_TYPE_AEAD,
 		.alg.crypto = {
 			.cra_name = "authenc(hmac(sha1),cbc(aes))",
@@ -2820,9 +2815,7 @@ static int talitos_probe(struct platform_device *ofdev)
 		if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
 			struct talitos_crypto_alg *t_alg;
 			char *name = NULL;
-			bool authenc = false;
 
-authencesn:
 			t_alg = talitos_alg_alloc(dev, &driver_algs[i]);
 			if (IS_ERR(t_alg)) {
 				err = PTR_ERR(t_alg);
@@ -2837,8 +2830,6 @@ authencesn:
 				err = crypto_register_alg(
 						&t_alg->algt.alg.crypto);
 				name = t_alg->algt.alg.crypto.cra_driver_name;
-				authenc = authenc ? !authenc :
-					  !(bool)memcmp(name, "authenc", 7);
 				break;
 			case CRYPTO_ALG_TYPE_AHASH:
 				err = crypto_register_ahash(
@@ -2851,25 +2842,8 @@ authencesn:
 				dev_err(dev, "%s alg registration failed\n",
 					name);
 				kfree(t_alg);
-			} else {
+			} else
 				list_add_tail(&t_alg->entry, &priv->alg_list);
-				if (authenc) {
-					struct crypto_alg *alg =
-						&driver_algs[i].alg.crypto;
-
-					name = alg->cra_name;
-					memmove(name + 10, name + 7,
-						strlen(name) - 7);
-					memcpy(name + 7, "esn", 3);
-
-					name = alg->cra_driver_name;
-					memmove(name + 10, name + 7,
-						strlen(name) - 7);
-					memcpy(name + 7, "esn", 3);
-
-					goto authencesn;
-				}
-			}
 		}
 	}
 	if (!list_empty(&priv->alg_list))
-- 
cgit v1.2.2


From 246bbedb9aaf27e2207501d93a869023a439fce5 Mon Sep 17 00:00:00 2001
From: Horia Geanta <horia.geanta@freescale.com>
Date: Wed, 20 Mar 2013 16:31:58 +0200
Subject: Revert "crypto: caam - add IPsec ESN support"

This reverts commit 891104ed008e8646c7860fe5bc70b0aac55dcc6c.

Current IPsec ESN implementation for authencesn(cbc(aes), hmac(sha))
(separate encryption and integrity algorithms) does not conform
to RFC4303.

ICV is generated by hashing the sequence
SPI, SeqNum-High, SeqNum-Low, IV, Payload
instead of
SPI, SeqNum-Low, IV, Payload, SeqNum-High.

Cc: <stable@vger.kernel.org> # 3.8, 3.7
Reported-by: Chaoxing Lin <Chaoxing.Lin@ultra-3eti.com>
Signed-off-by: Horia Geanta <horia.geanta@freescale.com>
Reviewed-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg.c | 27 ++-------------------------
 drivers/crypto/caam/compat.h  |  1 -
 2 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index b2a0a0726a54..cf268b14ae9a 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1650,11 +1650,7 @@ struct caam_alg_template {
 };
 
 static struct caam_alg_template driver_algs[] = {
-	/*
-	 * single-pass ipsec_esp descriptor
-	 * authencesn(*,*) is also registered, although not present
-	 * explicitly here.
-	 */
+	/* single-pass ipsec_esp descriptor */
 	{
 		.name = "authenc(hmac(md5),cbc(aes))",
 		.driver_name = "authenc-hmac-md5-cbc-aes-caam",
@@ -2217,9 +2213,7 @@ static int __init caam_algapi_init(void)
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		/* TODO: check if h/w supports alg */
 		struct caam_crypto_alg *t_alg;
-		bool done = false;
 
-authencesn:
 		t_alg = caam_alg_alloc(ctrldev, &driver_algs[i]);
 		if (IS_ERR(t_alg)) {
 			err = PTR_ERR(t_alg);
@@ -2233,25 +2227,8 @@ authencesn:
 			dev_warn(ctrldev, "%s alg registration failed\n",
 				t_alg->crypto_alg.cra_driver_name);
 			kfree(t_alg);
-		} else {
+		} else
 			list_add_tail(&t_alg->entry, &priv->alg_list);
-			if (driver_algs[i].type == CRYPTO_ALG_TYPE_AEAD &&
-			    !memcmp(driver_algs[i].name, "authenc", 7) &&
-			    !done) {
-				char *name;
-
-				name = driver_algs[i].name;
-				memmove(name + 10, name + 7, strlen(name) - 7);
-				memcpy(name + 7, "esn", 3);
-
-				name = driver_algs[i].driver_name;
-				memmove(name + 10, name + 7, strlen(name) - 7);
-				memcpy(name + 7, "esn", 3);
-
-				done = true;
-				goto authencesn;
-			}
-		}
 	}
 	if (!list_empty(&priv->alg_list))
 		dev_info(ctrldev, "%s algorithms registered in /proc/crypto\n",
diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h
index cf15e7813801..762aeff626ac 100644
--- a/drivers/crypto/caam/compat.h
+++ b/drivers/crypto/caam/compat.h
@@ -23,7 +23,6 @@
 #include <linux/types.h>
 #include <linux/debugfs.h>
 #include <linux/circ_buf.h>
-#include <linux/string.h>
 #include <net/xfrm.h>
 
 #include <crypto/algapi.h>
-- 
cgit v1.2.2


From eda81bea894e5cd945e30f85b00546caf80fbecc Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Wed, 20 Mar 2013 09:44:17 +0100
Subject: usb: gadget: net2272: finally convert "CONFIG_USB_GADGET_NET2272_DMA"

The Kconfig symbol USB_GADGET_NET2272_DMA was renamed to USB_NET2272_DMA
in commit 193ab2a6070039e7ee2b9b9bebea754a7c52fd1b ("usb: gadget: allow
multiple gadgets to be built"). That commit did not convert the only
occurrence of the corresponding Kconfig macro. Convert that macro now.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/net2272.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c
index 17628337c6b0..32524b631959 100644
--- a/drivers/usb/gadget/net2272.c
+++ b/drivers/usb/gadget/net2272.c
@@ -59,7 +59,7 @@ static const char * const ep_name[] = {
 };
 
 #define DMA_ADDR_INVALID	(~(dma_addr_t)0)
-#ifdef CONFIG_USB_GADGET_NET2272_DMA
+#ifdef CONFIG_USB_NET2272_DMA
 /*
  * use_dma: the NET2272 can use an external DMA controller.
  * Note that since there is no generic DMA api, some functions,
-- 
cgit v1.2.2


From ed9dc8ce7a1c8115dba9483a9b51df8b63a2e0ef Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Thu, 7 Mar 2013 11:40:17 -0600
Subject: efivars: Allow disabling use as a pstore backend

Add a new option, CONFIG_EFI_VARS_PSTORE, which can be set to N to
avoid using efivars as a backend to pstore, as some users may want to
compile out the code completely.

Set the default to Y to maintain backwards compatability, since this
feature has always been enabled until now.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Cc: Josh Boyer <jwboyer@redhat.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/Kconfig   |  9 +++++++
 drivers/firmware/efivars.c | 64 +++++++++++++++-------------------------------
 2 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 9b00072a020f..898023d8e486 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -53,6 +53,15 @@ config EFI_VARS
 	  Subsequent efibootmgr releases may be found at:
 	  <http://linux.dell.com/efibootmgr>
 
+config EFI_VARS_PSTORE
+	bool "Register efivars backend for pstore"
+	depends on EFI_VARS && PSTORE
+	default y
+	help
+	  Say Y here to enable use efivars as a backend to pstore. This
+	  will allow writing console messages, crash dumps, or anything
+	  else supported by pstore to EFI variables.
+
 config EFI_PCDP
 	bool "Console device selection via EFI PCDP or HCDP table"
 	depends on ACPI && EFI && IA64
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index fe62aa392239..37b6f247399e 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -1309,9 +1309,7 @@ static const struct inode_operations efivarfs_dir_inode_operations = {
 	.create = efivarfs_create,
 };
 
-static struct pstore_info efi_pstore_info;
-
-#ifdef CONFIG_PSTORE
+#ifdef CONFIG_EFI_VARS_PSTORE
 
 static int efi_pstore_open(struct pstore_info *psi)
 {
@@ -1514,38 +1512,6 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
 
 	return 0;
 }
-#else
-static int efi_pstore_open(struct pstore_info *psi)
-{
-	return 0;
-}
-
-static int efi_pstore_close(struct pstore_info *psi)
-{
-	return 0;
-}
-
-static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, int *count,
-			       struct timespec *timespec,
-			       char **buf, struct pstore_info *psi)
-{
-	return -1;
-}
-
-static int efi_pstore_write(enum pstore_type_id type,
-		enum kmsg_dump_reason reason, u64 *id,
-		unsigned int part, int count, size_t size,
-		struct pstore_info *psi)
-{
-	return 0;
-}
-
-static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
-			    struct timespec time, struct pstore_info *psi)
-{
-	return 0;
-}
-#endif
 
 static struct pstore_info efi_pstore_info = {
 	.owner		= THIS_MODULE,
@@ -1557,6 +1523,24 @@ static struct pstore_info efi_pstore_info = {
 	.erase		= efi_pstore_erase,
 };
 
+static void efivar_pstore_register(struct efivars *efivars)
+{
+	efivars->efi_pstore_info = efi_pstore_info;
+	efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL);
+	if (efivars->efi_pstore_info.buf) {
+		efivars->efi_pstore_info.bufsize = 1024;
+		efivars->efi_pstore_info.data = efivars;
+		spin_lock_init(&efivars->efi_pstore_info.buf_lock);
+		pstore_register(&efivars->efi_pstore_info);
+	}
+}
+#else
+static void efivar_pstore_register(struct efivars *efivars)
+{
+	return;
+}
+#endif
+
 static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
@@ -2025,15 +2009,7 @@ int register_efivars(struct efivars *efivars,
 	if (error)
 		unregister_efivars(efivars);
 
-	efivars->efi_pstore_info = efi_pstore_info;
-
-	efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL);
-	if (efivars->efi_pstore_info.buf) {
-		efivars->efi_pstore_info.bufsize = 1024;
-		efivars->efi_pstore_info.data = efivars;
-		spin_lock_init(&efivars->efi_pstore_info.buf_lock);
-		pstore_register(&efivars->efi_pstore_info);
-	}
+	efivar_pstore_register(efivars);
 
 	register_filesystem(&efivarfs_type);
 
-- 
cgit v1.2.2


From ec0971ba5372a4dfa753f232449d23a8fd98490e Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Mon, 11 Mar 2013 16:17:50 -0500
Subject: efivars: Add module parameter to disable use as a pstore backend

We know that with some firmware implementations writing too much data to
UEFI variables can lead to bricking machines. Recent changes attempt to
address this issue, but for some it may still be prudent to avoid
writing large amounts of data until the solution has been proven on a
wide variety of hardware.

Crash dumps or other data from pstore can potentially be a large data
source. Add a pstore_module parameter to efivars to allow disabling its
use as a backend for pstore. Also add a config option,
CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE, to allow setting the default
value of this paramter to true (i.e. disabled by default).

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Cc: Josh Boyer <jwboyer@redhat.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/Kconfig   | 9 +++++++++
 drivers/firmware/efivars.c | 8 +++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 898023d8e486..42c759a4d047 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -62,6 +62,15 @@ config EFI_VARS_PSTORE
 	  will allow writing console messages, crash dumps, or anything
 	  else supported by pstore to EFI variables.
 
+config EFI_VARS_PSTORE_DEFAULT_DISABLE
+	bool "Disable using efivars as a pstore backend by default"
+	depends on EFI_VARS_PSTORE
+	default n
+	help
+	  Saying Y here will disable the use of efivars as a storage
+	  backend for pstore by default. This setting can be overridden
+	  using the efivars module's pstore_disable parameter.
+
 config EFI_PCDP
 	bool "Console device selection via EFI PCDP or HCDP table"
 	depends on ACPI && EFI && IA64
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 37b6f247399e..6607daf5a08d 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -103,6 +103,11 @@ MODULE_VERSION(EFIVARS_VERSION);
  */
 #define GUID_LEN 36
 
+static bool efivars_pstore_disable =
+	IS_ENABLED(EFI_VARS_PSTORE_DEFAULT_DISABLE);
+
+module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
+
 /*
  * The maximum size of VariableName + Data = 1024
  * Therefore, it's reasonable to save that much
@@ -2009,7 +2014,8 @@ int register_efivars(struct efivars *efivars,
 	if (error)
 		unregister_efivars(efivars);
 
-	efivar_pstore_register(efivars);
+	if (!efivars_pstore_disable)
+		efivar_pstore_register(efivars);
 
 	register_filesystem(&efivarfs_type);
 
-- 
cgit v1.2.2


From ec50bd32f1672d38ddce10fb1841cbfda89cfe9a Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 1 Mar 2013 14:49:12 +0000
Subject: efivars: explicitly calculate length of VariableName

It's not wise to assume VariableNameSize represents the length of
VariableName, as not all firmware updates VariableNameSize in the same
way (some don't update it at all if EFI_SUCCESS is returned). There
are even implementations out there that update VariableNameSize with
values that are both larger than the string returned in VariableName
and smaller than the buffer passed to GetNextVariableName(), which
resulted in the following bug report from Michael Schroeder,

  > On HP z220 system (firmware version 1.54), some EFI variables are
  > incorrectly named :
  >
  > ls -d /sys/firmware/efi/vars/*8be4d* | grep -v -- -8be returns
  > /sys/firmware/efi/vars/dbxDefault-pport8be4df61-93ca-11d2-aa0d-00e098032b8c
  > /sys/firmware/efi/vars/KEKDefault-pport8be4df61-93ca-11d2-aa0d-00e098032b8c
  > /sys/firmware/efi/vars/SecureBoot-pport8be4df61-93ca-11d2-aa0d-00e098032b8c
  > /sys/firmware/efi/vars/SetupMode-Information8be4df61-93ca-11d2-aa0d-00e098032b8c

The issue here is that because we blindly use VariableNameSize without
verifying its value, we can potentially read garbage values from the
buffer containing VariableName if VariableNameSize is larger than the
length of VariableName.

Since VariableName is a string, we can calculate its size by searching
for the terminating NULL character.

Reported-by: Frederic Crozat <fcrozat@suse.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Josh Boyer <jwboyer@redhat.com>
Cc: Michael Schroeder <mls@suse.com>
Cc: Lee, Chun-Yi <jlee@suse.com>
Cc: Lingzhu Xiang <lxiang@redhat.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 6607daf5a08d..1e9d9b9d7a12 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -1705,6 +1705,31 @@ static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor)
 	return found;
 }
 
+/*
+ * Returns the size of variable_name, in bytes, including the
+ * terminating NULL character, or variable_name_size if no NULL
+ * character is found among the first variable_name_size bytes.
+ */
+static unsigned long var_name_strnsize(efi_char16_t *variable_name,
+				       unsigned long variable_name_size)
+{
+	unsigned long len;
+	efi_char16_t c;
+
+	/*
+	 * The variable name is, by definition, a NULL-terminated
+	 * string, so make absolutely sure that variable_name_size is
+	 * the value we expect it to be. If not, return the real size.
+	 */
+	for (len = 2; len <= variable_name_size; len += sizeof(c)) {
+		c = variable_name[(len / sizeof(c)) - 1];
+		if (!c)
+			break;
+	}
+
+	return min(len, variable_name_size);
+}
+
 static void efivar_update_sysfs_entries(struct work_struct *work)
 {
 	struct efivars *efivars = &__efivars;
@@ -1745,10 +1770,13 @@ static void efivar_update_sysfs_entries(struct work_struct *work)
 		if (!found) {
 			kfree(variable_name);
 			break;
-		} else
+		} else {
+			variable_name_size = var_name_strnsize(variable_name,
+							       variable_name_size);
 			efivar_create_sysfs_entry(efivars,
 						  variable_name_size,
 						  variable_name, &vendor);
+		}
 	}
 }
 
@@ -1995,6 +2023,8 @@ int register_efivars(struct efivars *efivars,
 						&vendor_guid);
 		switch (status) {
 		case EFI_SUCCESS:
+			variable_name_size = var_name_strnsize(variable_name,
+							       variable_name_size);
 			efivar_create_sysfs_entry(efivars,
 						  variable_name_size,
 						  variable_name,
-- 
cgit v1.2.2


From e971318bbed610e28bb3fde9d548e6aaf0a6b02e Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Thu, 7 Mar 2013 11:59:14 +0000
Subject: efivars: Handle duplicate names from get_next_variable()

Some firmware exhibits a bug where the same VariableName and
VendorGuid values are returned on multiple invocations of
GetNextVariableName(). See,

    https://bugzilla.kernel.org/show_bug.cgi?id=47631

As a consequence of such a bug, Andre reports hitting the following
WARN_ON() in the sysfs code after updating the BIOS on his, "Gigabyte
Technology Co., Ltd. To be filled by O.E.M./Z77X-UD3H, BIOS F19e
11/21/2012)" machine,

[    0.581554] EFI Variables Facility v0.08 2004-May-17
[    0.584914] ------------[ cut here ]------------
[    0.585639] WARNING: at /home/andre/linux/fs/sysfs/dir.c:536 sysfs_add_one+0xd4/0x100()
[    0.586381] Hardware name: To be filled by O.E.M.
[    0.587123] sysfs: cannot create duplicate filename '/firmware/efi/vars/SbAslBufferPtrVar-01f33c25-764d-43ea-aeea-6b5a41f3f3e8'
[    0.588694] Modules linked in:
[    0.589484] Pid: 1, comm: swapper/0 Not tainted 3.8.0+ #7
[    0.590280] Call Trace:
[    0.591066]  [<ffffffff81208954>] ? sysfs_add_one+0xd4/0x100
[    0.591861]  [<ffffffff810587bf>] warn_slowpath_common+0x7f/0xc0
[    0.592650]  [<ffffffff810588bc>] warn_slowpath_fmt+0x4c/0x50
[    0.593429]  [<ffffffff8134dd85>] ? strlcat+0x65/0x80
[    0.594203]  [<ffffffff81208954>] sysfs_add_one+0xd4/0x100
[    0.594979]  [<ffffffff81208b78>] create_dir+0x78/0xd0
[    0.595753]  [<ffffffff81208ec6>] sysfs_create_dir+0x86/0xe0
[    0.596532]  [<ffffffff81347e4c>] kobject_add_internal+0x9c/0x220
[    0.597310]  [<ffffffff81348307>] kobject_init_and_add+0x67/0x90
[    0.598083]  [<ffffffff81584a71>] ? efivar_create_sysfs_entry+0x61/0x1c0
[    0.598859]  [<ffffffff81584b2b>] efivar_create_sysfs_entry+0x11b/0x1c0
[    0.599631]  [<ffffffff8158517e>] register_efivars+0xde/0x420
[    0.600395]  [<ffffffff81d430a7>] ? edd_init+0x2f5/0x2f5
[    0.601150]  [<ffffffff81d4315f>] efivars_init+0xb8/0x104
[    0.601903]  [<ffffffff8100215a>] do_one_initcall+0x12a/0x180
[    0.602659]  [<ffffffff81d05d80>] kernel_init_freeable+0x13e/0x1c6
[    0.603418]  [<ffffffff81d05586>] ? loglevel+0x31/0x31
[    0.604183]  [<ffffffff816a6530>] ? rest_init+0x80/0x80
[    0.604936]  [<ffffffff816a653e>] kernel_init+0xe/0xf0
[    0.605681]  [<ffffffff816ce7ec>] ret_from_fork+0x7c/0xb0
[    0.606414]  [<ffffffff816a6530>] ? rest_init+0x80/0x80
[    0.607143] ---[ end trace 1609741ab737eb29 ]---

There's not much we can do to work around and keep traversing the
variable list once we hit this firmware bug. Our only solution is to
terminate the loop because, as Lingzhu reports, some machines get
stuck when they encounter duplicate names,

  > I had an IBM System x3100 M4 and x3850 X5 on which kernel would
  > get stuck in infinite loop creating duplicate sysfs files because,
  > for some reason, there are several duplicate boot entries in nvram
  > getting GetNextVariableName into a circle of iteration (with
  > period > 2).

Also disable the workqueue, as efivar_update_sysfs_entries() uses
GetNextVariableName() to figure out which variables have been created
since the last iteration. That algorithm isn't going to work if
GetNextVariableName() returns duplicates. Note that we don't disable
EFI variable creation completely on the affected machines, it's just
that any pstore dump-* files won't appear in sysfs until the next
boot.

Reported-by: Andre Heider <a.heider@gmail.com>
Reported-by: Lingzhu Xiang <lxiang@redhat.com>
Tested-by: Lingzhu Xiang <lxiang@redhat.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 48 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 1e9d9b9d7a12..d64661fda4fd 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -170,6 +170,7 @@ efivar_create_sysfs_entry(struct efivars *efivars,
 
 static void efivar_update_sysfs_entries(struct work_struct *);
 static DECLARE_WORK(efivar_work, efivar_update_sysfs_entries);
+static bool efivar_wq_enabled = true;
 
 /* Return the number of unicode characters in data */
 static unsigned long
@@ -1444,7 +1445,7 @@ static int efi_pstore_write(enum pstore_type_id type,
 
 	spin_unlock_irqrestore(&efivars->lock, flags);
 
-	if (reason == KMSG_DUMP_OOPS)
+	if (reason == KMSG_DUMP_OOPS && efivar_wq_enabled)
 		schedule_work(&efivar_work);
 
 	*id = part;
@@ -1975,6 +1976,35 @@ void unregister_efivars(struct efivars *efivars)
 }
 EXPORT_SYMBOL_GPL(unregister_efivars);
 
+/*
+ * Print a warning when duplicate EFI variables are encountered and
+ * disable the sysfs workqueue since the firmware is buggy.
+ */
+static void dup_variable_bug(efi_char16_t *s16, efi_guid_t *vendor_guid,
+			     unsigned long len16)
+{
+	size_t i, len8 = len16 / sizeof(efi_char16_t);
+	char *s8;
+
+	/*
+	 * Disable the workqueue since the algorithm it uses for
+	 * detecting new variables won't work with this buggy
+	 * implementation of GetNextVariableName().
+	 */
+	efivar_wq_enabled = false;
+
+	s8 = kzalloc(len8, GFP_KERNEL);
+	if (!s8)
+		return;
+
+	for (i = 0; i < len8; i++)
+		s8[i] = s16[i];
+
+	printk(KERN_WARNING "efivars: duplicate variable: %s-%pUl\n",
+	       s8, vendor_guid);
+	kfree(s8);
+}
+
 int register_efivars(struct efivars *efivars,
 		     const struct efivar_operations *ops,
 		     struct kobject *parent_kobj)
@@ -2025,6 +2055,22 @@ int register_efivars(struct efivars *efivars,
 		case EFI_SUCCESS:
 			variable_name_size = var_name_strnsize(variable_name,
 							       variable_name_size);
+
+			/*
+			 * Some firmware implementations return the
+			 * same variable name on multiple calls to
+			 * get_next_variable(). Terminate the loop
+			 * immediately as there is no guarantee that
+			 * we'll ever see a different variable name,
+			 * and may end up looping here forever.
+			 */
+			if (variable_is_present(variable_name, &vendor_guid)) {
+				dup_variable_bug(variable_name, &vendor_guid,
+						 variable_name_size);
+				status = EFI_NOT_FOUND;
+				break;
+			}
+
 			efivar_create_sysfs_entry(efivars,
 						  variable_name_size,
 						  variable_name,
-- 
cgit v1.2.2


From 4376c94618c26225e69e17b7c91169c45a90b292 Mon Sep 17 00:00:00 2001
From: fanchaoting <fanchaoting@cn.fujitsu.com>
Date: Thu, 21 Mar 2013 09:15:30 +0800
Subject: pnfs-block: removing DM device maybe cause oops when call dev_remove

when pnfs block using device mapper,if umounting later,it maybe
cause oops. we apply "1 + sizeof(bl_umount_request)" memory for
msg->data, the memory maybe overflow when we do "memcpy(&dataptr
[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request))",
because the size of bl_msg is more than 1 byte.

Signed-off-by: fanchaoting<fanchaoting@cn.fujitsu.com>
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/blocklayout/blocklayoutdm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839bc17b..6fc7b5cae92b 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
 
 	bl_pipe_msg.bl_wq = &nn->bl_wq;
 	memset(msg, 0, sizeof(*msg));
-	msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
+	msg->len = sizeof(bl_msg) + bl_msg.totallen;
+	msg->data = kzalloc(msg->len, GFP_NOFS);
 	if (!msg->data)
 		goto out;
 
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
 	memcpy(msg->data, &bl_msg, sizeof(bl_msg));
 	dataptr = (uint8_t *) msg->data;
 	memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
-	msg->len = sizeof(bl_msg) + bl_msg.totallen;
 
 	add_wait_queue(&nn->bl_wq, &wq);
 	if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
-- 
cgit v1.2.2


From a073dbff359f4741013ae4b8395f5364c5e00b48 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 20 Mar 2013 12:34:32 -0400
Subject: NFSv4.1: Fix a race in pNFS layoutcommit

We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
where the two are out of sync.
The first half of the problem is to ensure that pnfs_layoutcommit_inode
clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
We still need to keep the reference to those segments until the RPC call
is finished, so in order to make it clear _where_ those references come
from, we add a helper pnfs_list_write_lseg_done() that cleans up after
pnfs_list_write_lseg.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Benny Halevy <bhalevy@tonian.com>
Cc: stable@vger.kernel.org
---
 fs/nfs/nfs4proc.c | 14 --------------
 fs/nfs/pnfs.c     | 19 ++++++++++++++++++-
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb0f901..6ccdd4fd9b59 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 static void nfs4_layoutcommit_release(void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
-	struct pnfs_layout_segment *lseg, *tmp;
-	unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
 
 	pnfs_cleanup_layoutcommit(data);
-	/* Matched by references in pnfs_set_layoutcommit */
-	list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
-		list_del_init(&lseg->pls_lc_list);
-		if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
-				       &lseg->pls_flags))
-			pnfs_put_lseg(lseg);
-	}
-
-	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
-	smp_mb__after_clear_bit();
-	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
-
 	put_rpccred(data->cred);
 	kfree(data);
 }
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aad6258..3d900916fd41 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1746,11 +1746,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
 
 	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
 		if (lseg->pls_range.iomode == IOMODE_RW &&
-		    test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+		    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
 			list_add(&lseg->pls_lc_list, listp);
 	}
 }
 
+static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
+{
+	struct pnfs_layout_segment *lseg, *tmp;
+	unsigned long *bitlock = &NFS_I(inode)->flags;
+
+	/* Matched by references in pnfs_set_layoutcommit */
+	list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
+		list_del_init(&lseg->pls_lc_list);
+		pnfs_put_lseg(lseg);
+	}
+
+	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+	smp_mb__after_clear_bit();
+	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
+
 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 {
 	pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1811,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
 
 	if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
 		nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+	pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
 }
 
 /*
-- 
cgit v1.2.2


From 24956804349ca0eadcdde032d65e8c00b4214096 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 20 Mar 2013 13:03:00 -0400
Subject: NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn

Note that clearing NFS_INO_LAYOUTCOMMIT is tricky, since it requires
you to also clear the NFS_LSEG_LAYOUTCOMMIT bits from the layout
segments.
The only two sites that need to do this are the ones that call
pnfs_return_layout() without first doing a layout commit.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Benny Halevy <bhalevy@tonian.com>
Cc: stable@vger.kernel.org
---
 fs/nfs/nfs4filelayout.c |  1 -
 fs/nfs/pnfs.c           | 35 +++++++++++++++++++++++++++--------
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb044c109..4fb234d3aefb 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
 {
 	if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
 		return;
-	clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
 	pnfs_return_layout(inode);
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3d900916fd41..5044142c1216 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
 	       lo_seg_intersecting(lseg_range, recall_range);
 }
 
+static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
+		struct list_head *tmp_list)
+{
+	if (!atomic_dec_and_test(&lseg->pls_refcount))
+		return false;
+	pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
+	list_add(&lseg->pls_list, tmp_list);
+	return true;
+}
+
 /* Returns 1 if lseg is removed from list, 0 otherwise */
 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
 			     struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
 		 */
 		dprintk("%s: lseg %p ref %d\n", __func__, lseg,
 			atomic_read(&lseg->pls_refcount));
-		if (atomic_dec_and_test(&lseg->pls_refcount)) {
-			pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
-			list_add(&lseg->pls_list, tmp_list);
+		if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
 			rv = 1;
-		}
 	}
 	return rv;
 }
@@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	return lseg;
 }
 
+static void pnfs_clear_layoutcommit(struct inode *inode,
+		struct list_head *head)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct pnfs_layout_segment *lseg, *tmp;
+
+	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+		return;
+	list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
+		if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+			continue;
+		pnfs_lseg_dec_and_remove_zero(lseg, head);
+	}
+}
+
 /*
  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
  * when the layout segment list is empty.
@@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino)
 	/* Reference matched in nfs4_layoutreturn_release */
 	pnfs_get_layout_hdr(lo);
 	empty = list_empty(&lo->plh_segs);
+	pnfs_clear_layoutcommit(ino, &tmp_list);
 	pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 	/* Don't send a LAYOUTRETURN if list was initially empty */
 	if (empty) {
@@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino)
 	spin_unlock(&ino->i_lock);
 	pnfs_free_lseg_list(&tmp_list);
 
-	WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
-
 	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
 	if (unlikely(lrp == NULL)) {
 		status = -ENOMEM;
@@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
-		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1633,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
-		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-- 
cgit v1.2.2


From 240286725d854331422cb15957f8d9bf2741d4e3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 20 Mar 2013 13:23:33 -0400
Subject: NFSv4.1: Add a helper pnfs_commit_and_return_layout

In order to be able to safely return the layout in nfs4_proc_setattr,
we need to block new uses of the layout, wait for all outstanding
users of the layout to complete, commit the layout and then return it.

This patch adds a helper in order to do all this safely.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/nfs4proc.c |  2 +-
 fs/nfs/pnfs.c     | 27 +++++++++++++++++++++++++++
 fs/nfs/pnfs.h     |  6 ++++++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6ccdd4fd9b59..26431cf62ddb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2632,7 +2632,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	int status;
 
 	if (pnfs_ld_layoutret_on_setattr(inode))
-		pnfs_return_layout(inode);
+		pnfs_commit_and_return_layout(inode);
 
 	nfs_fattr_init(fattr);
 	
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5044142c1216..4bdffe0ba025 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -866,6 +866,33 @@ out:
 }
 EXPORT_SYMBOL_GPL(_pnfs_return_layout);
 
+int
+pnfs_commit_and_return_layout(struct inode *inode)
+{
+	struct pnfs_layout_hdr *lo;
+	int ret;
+
+	spin_lock(&inode->i_lock);
+	lo = NFS_I(inode)->layout;
+	if (lo == NULL) {
+		spin_unlock(&inode->i_lock);
+		return 0;
+	}
+	pnfs_get_layout_hdr(lo);
+	/* Block new layoutgets and read/write to ds */
+	lo->plh_block_lgets++;
+	spin_unlock(&inode->i_lock);
+	filemap_fdatawait(inode->i_mapping);
+	ret = pnfs_layoutcommit_inode(inode, true);
+	if (ret == 0)
+		ret = _pnfs_return_layout(inode);
+	spin_lock(&inode->i_lock);
+	lo->plh_block_lgets--;
+	spin_unlock(&inode->i_lock);
+	pnfs_put_layout_hdr(lo);
+	return ret;
+}
+
 bool pnfs_roc(struct inode *ino)
 {
 	struct pnfs_layout_hdr *lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba80417748..f5f8a470a647 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
+int pnfs_commit_and_return_layout(struct inode *);
 void pnfs_ld_write_done(struct nfs_write_data *);
 void pnfs_ld_read_done(struct nfs_read_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
 	return 0;
 }
 
+static inline int pnfs_commit_and_return_layout(struct inode *inode)
+{
+	return 0;
+}
+
 static inline bool
 pnfs_ld_layoutret_on_setattr(struct inode *inode)
 {
-- 
cgit v1.2.2


From cb0e51d80694fc9964436be1a1a15275e991cb1e Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Wed, 20 Mar 2013 21:31:42 +0000
Subject: lantiq_etop: use free_netdev(netdev) instead of kfree()

Freeing netdev without free_netdev() leads to net, tx leaks.
And it may lead to dereferencing freed pointer.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_etop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 6a2127489af7..bfdb06860397 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -769,7 +769,7 @@ ltq_etop_probe(struct platform_device *pdev)
 	return 0;
 
 err_free:
-	kfree(dev);
+	free_netdev(dev);
 err_out:
 	return err;
 }
-- 
cgit v1.2.2


From ce16294fda230c787ce5c35f61b2f80d14d70a72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lothar=20Wa=C3=9Fmann?= <LW@KARO-electronics.de>
Date: Thu, 21 Mar 2013 02:20:11 +0000
Subject: net: ethernet: cpsw: fix erroneous condition in error check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The error check in cpsw_probe_dt() has an '&&' where an '||' is
meant to be. This causes a NULL pointer dereference when incomplet DT
data is passed to the driver ('phy_id' property for cpsw_emac1
missing).

Signed-off-by: Lothar Waßmann <LW@KARO-electronics.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 75c48558e6fd..df32a090d08e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1364,7 +1364,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		struct platform_device *mdio;
 
 		parp = of_get_property(slave_node, "phy_id", &lenp);
-		if ((parp == NULL) && (lenp != (sizeof(void *) * 2))) {
+		if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) {
 			pr_err("Missing slave[%d] phy_id property\n", i);
 			ret = -EINVAL;
 			goto error_ret;
-- 
cgit v1.2.2


From c101c81b5293cdcb616ed4948d0c4a4cfd1f481a Mon Sep 17 00:00:00 2001
From: Moshe Lazer <moshel@mellanox.com>
Date: Thu, 21 Mar 2013 05:55:51 +0000
Subject: net/mlx4_core: Fix wrong mask applied on EQ numbers in the wrapper

Currently the  mask is wrongly set in the MAP_EQ wrapper, fix that.
Without the fix any EQ number above 511 is mapped to one below 511.

Signed-off-by: Moshe Lazer <moshel@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/eq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 251ae2f93116..8e3123a1df88 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -771,7 +771,7 @@ int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
 	struct mlx4_slave_event_eq_info *event_eq =
 		priv->mfunc.master.slave_state[slave].event_eq;
 	u32 in_modifier = vhcr->in_modifier;
-	u32 eqn = in_modifier & 0x1FF;
+	u32 eqn = in_modifier & 0x3FF;
 	u64 in_param =  vhcr->in_param;
 	int err = 0;
 	int i;
-- 
cgit v1.2.2


From 80cb0021163cb55b14c7c054073f89d63a2e1e40 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Thu, 21 Mar 2013 05:55:52 +0000
Subject: net/mlx4_core: Fix wrong order of flow steering resources removal

On the resource tracker cleanup flow, the DMFS rules must be deleted before we
destroy the QPs, else the HW may attempt doing packet steering to non existent QPs.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 2995687f1aee..0d1d9679179c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -3806,6 +3806,7 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave)
 	mutex_lock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
 	/*VLAN*/
 	rem_slave_macs(dev, slave);
+	rem_slave_fs_rule(dev, slave);
 	rem_slave_qps(dev, slave);
 	rem_slave_srqs(dev, slave);
 	rem_slave_cqs(dev, slave);
@@ -3814,6 +3815,5 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave)
 	rem_slave_mtts(dev, slave);
 	rem_slave_counters(dev, slave);
 	rem_slave_xrcdns(dev, slave);
-	rem_slave_fs_rule(dev, slave);
 	mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
 }
-- 
cgit v1.2.2


From 6efb5fac4d6b617972ab5a10bf67e0eba2c2d212 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Thu, 21 Mar 2013 05:55:53 +0000
Subject: net/mlx4_en: Remove ethtool flow steering rules before releasing QPs

Fix the ethtool flow steering rules cleanup to be carried out before
releasing the RX QPs.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 995d4b6d5c1e..f278b10ef714 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1637,6 +1637,17 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 	/* Flush multicast filter */
 	mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG);
 
+	/* Remove flow steering rules for the port*/
+	if (mdev->dev->caps.steering_mode ==
+	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
+		ASSERT_RTNL();
+		list_for_each_entry_safe(flow, tmp_flow,
+					 &priv->ethtool_list, list) {
+			mlx4_flow_detach(mdev->dev, flow->id);
+			list_del(&flow->list);
+		}
+	}
+
 	mlx4_en_destroy_drop_qp(priv);
 
 	/* Free TX Rings */
@@ -1657,17 +1668,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN))
 		mdev->mac_removed[priv->port] = 1;
 
-	/* Remove flow steering rules for the port*/
-	if (mdev->dev->caps.steering_mode ==
-	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
-		ASSERT_RTNL();
-		list_for_each_entry_safe(flow, tmp_flow,
-					 &priv->ethtool_list, list) {
-			mlx4_flow_detach(mdev->dev, flow->id);
-			list_del(&flow->list);
-		}
-	}
-
 	/* Free RX Rings */
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
-- 
cgit v1.2.2


From 1e3f7b324e46b772dec1bb6dd96ae745fc085401 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Thu, 21 Mar 2013 05:55:54 +0000
Subject: net/mlx4_core: Always use 64 bit resource ID when doing lookup

One of the resource tracker code paths was wrongly using int and not u64
for resource tracking IDs, fix it.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0d1d9679179c..b0ccdb55ca46 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -355,7 +355,7 @@ static int mpt_mask(struct mlx4_dev *dev)
 	return dev->caps.num_mpts - 1;
 }
 
-static void *find_res(struct mlx4_dev *dev, int res_id,
+static void *find_res(struct mlx4_dev *dev, u64 res_id,
 		      enum mlx4_resource type)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-- 
cgit v1.2.2


From 2c473ae7e5826c108e52f4a9d75425fd4c6f9ed1 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Thu, 21 Mar 2013 05:55:55 +0000
Subject: net/mlx4_core: Disallow releasing VF QPs which have steering rules

VF QPs must not be released when they have steering rules attached to them.

For that end, introduce a reference count field to the QP object in the
SRIOV resource tracker which is incremented/decremented when steering rules
are attached/detached to it. QPs can be released by VF only when their
ref count is zero.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 41 +++++++++++++++++-----
 1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index b0ccdb55ca46..1391b52f443a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -99,6 +99,7 @@ struct res_qp {
 	struct list_head	mcg_list;
 	spinlock_t		mcg_spl;
 	int			local_qpn;
+	atomic_t		ref_count;
 };
 
 enum res_mtt_states {
@@ -197,6 +198,7 @@ enum res_fs_rule_states {
 
 struct res_fs_rule {
 	struct res_common	com;
+	int			qpn;
 };
 
 static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
@@ -447,6 +449,7 @@ static struct res_common *alloc_qp_tr(int id)
 	ret->local_qpn = id;
 	INIT_LIST_HEAD(&ret->mcg_list);
 	spin_lock_init(&ret->mcg_spl);
+	atomic_set(&ret->ref_count, 0);
 
 	return &ret->com;
 }
@@ -554,7 +557,7 @@ static struct res_common *alloc_xrcdn_tr(int id)
 	return &ret->com;
 }
 
-static struct res_common *alloc_fs_rule_tr(u64 id)
+static struct res_common *alloc_fs_rule_tr(u64 id, int qpn)
 {
 	struct res_fs_rule *ret;
 
@@ -564,7 +567,7 @@ static struct res_common *alloc_fs_rule_tr(u64 id)
 
 	ret->com.res_id = id;
 	ret->com.state = RES_FS_RULE_ALLOCATED;
-
+	ret->qpn = qpn;
 	return &ret->com;
 }
 
@@ -602,7 +605,7 @@ static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave,
 		ret = alloc_xrcdn_tr(id);
 		break;
 	case RES_FS_RULE:
-		ret = alloc_fs_rule_tr(id);
+		ret = alloc_fs_rule_tr(id, extra);
 		break;
 	default:
 		return NULL;
@@ -671,10 +674,14 @@ undo:
 
 static int remove_qp_ok(struct res_qp *res)
 {
-	if (res->com.state == RES_QP_BUSY)
+	if (res->com.state == RES_QP_BUSY || atomic_read(&res->ref_count) ||
+	    !list_empty(&res->mcg_list)) {
+		pr_err("resource tracker: fail to remove qp, state %d, ref_count %d\n",
+		       res->com.state, atomic_read(&res->ref_count));
 		return -EBUSY;
-	else if (res->com.state != RES_QP_RESERVED)
+	} else if (res->com.state != RES_QP_RESERVED) {
 		return -EPERM;
+	}
 
 	return 0;
 }
@@ -3124,6 +3131,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 	struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC];
 	int err;
 	int qpn;
+	struct res_qp *rqp;
 	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
 	struct _rule_hw  *rule_header;
 	int header_id;
@@ -3134,7 +3142,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 
 	ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
 	qpn = be32_to_cpu(ctrl->qpn) & 0xffffff;
-	err = get_res(dev, slave, qpn, RES_QP, NULL);
+	err = get_res(dev, slave, qpn, RES_QP, &rqp);
 	if (err) {
 		pr_err("Steering rule with qpn 0x%x rejected.\n", qpn);
 		return err;
@@ -3175,14 +3183,16 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 	if (err)
 		goto err_put;
 
-	err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0);
+	err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, qpn);
 	if (err) {
 		mlx4_err(dev, "Fail to add flow steering resources.\n ");
 		/* detach rule*/
 		mlx4_cmd(dev, vhcr->out_param, 0, 0,
 			 MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
 			 MLX4_CMD_NATIVE);
+		goto err_put;
 	}
+	atomic_inc(&rqp->ref_count);
 err_put:
 	put_res(dev, slave, qpn, RES_QP);
 	return err;
@@ -3195,20 +3205,35 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
 					 struct mlx4_cmd_info *cmd)
 {
 	int err;
+	struct res_qp *rqp;
+	struct res_fs_rule *rrule;
 
 	if (dev->caps.steering_mode !=
 	    MLX4_STEERING_MODE_DEVICE_MANAGED)
 		return -EOPNOTSUPP;
 
+	err = get_res(dev, slave, vhcr->in_param, RES_FS_RULE, &rrule);
+	if (err)
+		return err;
+	/* Release the rule form busy state before removal */
+	put_res(dev, slave, vhcr->in_param, RES_FS_RULE);
+	err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp);
+	if (err)
+		return err;
+
 	err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0);
 	if (err) {
 		mlx4_err(dev, "Fail to remove flow steering resources.\n ");
-		return err;
+		goto out;
 	}
 
 	err = mlx4_cmd(dev, vhcr->in_param, 0, 0,
 		       MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
 		       MLX4_CMD_NATIVE);
+	if (!err)
+		atomic_dec(&rqp->ref_count);
+out:
+	put_res(dev, slave, rrule->qpn, RES_QP);
 	return err;
 }
 
-- 
cgit v1.2.2


From 55a63d4da3b8850480a1c5b222f77c739e30e346 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Mar 2013 17:20:12 +0100
Subject: ALSA: hda - Fix DAC assignment for independent HP

The generic parser should evaluate the availability of the independent
HP when specified.  Otherwise a DAC without the direct connection to
the corresponding pin may be assigned for the HP, but the driver
doesn't check it at all.  The problem was actually seen on some
machines with VT1708s or equivalent codec, where DAC0 is assigned to
HP although it can be connected only via aamix.

This patch adds the badness evaluation for the independent HP to make
it working properly.

Reported-by: Lydia Wang <LydiaWang@viatech.com.cn>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_generic.c | 46 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 78897d05d80f..43c2ea539561 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -995,6 +995,8 @@ enum {
 	BAD_NO_EXTRA_SURR_DAC = 0x101,
 	/* Primary DAC shared with main surrounds */
 	BAD_SHARED_SURROUND = 0x100,
+	/* No independent HP possible */
+	BAD_NO_INDEP_HP = 0x40,
 	/* Primary DAC shared with main CLFE */
 	BAD_SHARED_CLFE = 0x10,
 	/* Primary DAC shared with extra surrounds */
@@ -1392,6 +1394,43 @@ static int check_aamix_out_path(struct hda_codec *codec, int path_idx)
 	return snd_hda_get_path_idx(codec, path);
 }
 
+/* check whether the independent HP is available with the current config */
+static bool indep_hp_possible(struct hda_codec *codec)
+{
+	struct hda_gen_spec *spec = codec->spec;
+	struct auto_pin_cfg *cfg = &spec->autocfg;
+	struct nid_path *path;
+	int i, idx;
+
+	if (cfg->line_out_type == AUTO_PIN_HP_OUT)
+		idx = spec->out_paths[0];
+	else
+		idx = spec->hp_paths[0];
+	path = snd_hda_get_path_from_idx(codec, idx);
+	if (!path)
+		return false;
+
+	/* assume no path conflicts unless aamix is involved */
+	if (!spec->mixer_nid || !is_nid_contained(path, spec->mixer_nid))
+		return true;
+
+	/* check whether output paths contain aamix */
+	for (i = 0; i < cfg->line_outs; i++) {
+		if (spec->out_paths[i] == idx)
+			break;
+		path = snd_hda_get_path_from_idx(codec, spec->out_paths[i]);
+		if (path && is_nid_contained(path, spec->mixer_nid))
+			return false;
+	}
+	for (i = 0; i < cfg->speaker_outs; i++) {
+		path = snd_hda_get_path_from_idx(codec, spec->speaker_paths[i]);
+		if (path && is_nid_contained(path, spec->mixer_nid))
+			return false;
+	}
+
+	return true;
+}
+
 /* fill the empty entries in the dac array for speaker/hp with the
  * shared dac pointed by the paths
  */
@@ -1545,6 +1584,9 @@ static int fill_and_eval_dacs(struct hda_codec *codec,
 		badness += BAD_MULTI_IO;
 	}
 
+	if (spec->indep_hp && !indep_hp_possible(codec))
+		badness += BAD_NO_INDEP_HP;
+
 	/* re-fill the shared DAC for speaker / headphone */
 	if (cfg->line_out_type != AUTO_PIN_HP_OUT)
 		refill_shared_dacs(codec, cfg->hp_outs,
@@ -1758,6 +1800,10 @@ static int parse_output_paths(struct hda_codec *codec)
 				cfg->speaker_pins, val);
 	}
 
+	/* clear indep_hp flag if not available */
+	if (spec->indep_hp && !indep_hp_possible(codec))
+		spec->indep_hp = 0;
+
 	kfree(best_cfg);
 	return 0;
 }
-- 
cgit v1.2.2


From ae5fc98728c8bbbd6d7cab0b9781671fc4419c1b Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Thu, 21 Mar 2013 20:33:46 +0400
Subject: net: fix *_DIAG_MAX constants

Follow the common pattern and define *_DIAG_MAX like:

        [...]
        __XXX_DIAG_MAX,
};

Because everyone is used to do:

        struct nlattr *attrs[XXX_DIAG_MAX+1];

        nla_parse([...], XXX_DIAG_MAX, [...]

Reported-by: Thomas Graf <tgraf@suug.ch>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/packet_diag.h | 4 +++-
 include/uapi/linux/unix_diag.h   | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
index 93f5fa94a431..afafd703ad92 100644
--- a/include/uapi/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
@@ -33,9 +33,11 @@ enum {
 	PACKET_DIAG_TX_RING,
 	PACKET_DIAG_FANOUT,
 
-	PACKET_DIAG_MAX,
+	__PACKET_DIAG_MAX,
 };
 
+#define PACKET_DIAG_MAX (__PACKET_DIAG_MAX - 1)
+
 struct packet_diag_info {
 	__u32	pdi_index;
 	__u32	pdi_version;
diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h
index b8a24941db21..b9e2a6a7446f 100644
--- a/include/uapi/linux/unix_diag.h
+++ b/include/uapi/linux/unix_diag.h
@@ -39,9 +39,11 @@ enum {
 	UNIX_DIAG_MEMINFO,
 	UNIX_DIAG_SHUTDOWN,
 
-	UNIX_DIAG_MAX,
+	__UNIX_DIAG_MAX,
 };
 
+#define UNIX_DIAG_MAX (__UNIX_DIAG_MAX - 1)
+
 struct unix_diag_vfs {
 	__u32	udiag_vfs_ino;
 	__u32	udiag_vfs_dev;
-- 
cgit v1.2.2


From 06ae43f34bcc07a0b6be8bf78a1c895bcd12c839 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 20 Mar 2013 13:19:30 -0400
Subject: Don't bother with redoing rw_verify_area() from
 default_file_splice_from()

default_file_splice_from() ends up calling vfs_write() (via very convoluted
callchain).  It's an overkill, since we already have done rw_verify_area()
in the caller by the time we call vfs_write() we are under set_fs(KERNEL_DS),
so access_ok() is also pointless.  Add a new helper (__kernel_write()),
use it instead of kernel_write() in there.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h   |  5 +++++
 fs/read_write.c | 25 +++++++++++++++++++++++++
 fs/splice.c     |  4 +++-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/fs/internal.h b/fs/internal.h
index 507141fceb99..4be78237d896 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,3 +125,8 @@ extern int invalidate_inodes(struct super_block *, bool);
  * dcache.c
  */
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
+
+/*
+ * read_write.c
+ */
+extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
diff --git a/fs/read_write.c b/fs/read_write.c
index a698eff457fb..f7b5a23b804b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
 #include <linux/splice.h>
 #include <linux/compat.h>
 #include "read_write.h"
+#include "internal.h"
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -417,6 +418,30 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
 EXPORT_SYMBOL(do_sync_write);
 
+ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
+{
+	mm_segment_t old_fs;
+	const char __user *p;
+	ssize_t ret;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	p = (__force const char __user *)buf;
+	if (count > MAX_RW_COUNT)
+		count =  MAX_RW_COUNT;
+	if (file->f_op->write)
+		ret = file->f_op->write(file, p, count, pos);
+	else
+		ret = do_sync_write(file, p, count, pos);
+	set_fs(old_fs);
+	if (ret > 0) {
+		fsnotify_modify(file);
+		add_wchar(current, ret);
+	}
+	inc_syscw(current);
+	return ret;
+}
+
 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 {
 	ssize_t ret;
diff --git a/fs/splice.c b/fs/splice.c
index 718bd0056384..29e394e49ddd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/gfp.h>
 #include <linux/socket.h>
+#include "internal.h"
 
 /*
  * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -1048,9 +1049,10 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 {
 	int ret;
 	void *data;
+	loff_t tmp = sd->pos;
 
 	data = buf->ops->map(pipe, buf, 0);
-	ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+	ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
 	buf->ops->unmap(pipe, buf, data);
 
 	return ret;
-- 
cgit v1.2.2


From f853c616883a8de966873a1dab283f1369e275a1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 11 Mar 2013 09:52:19 -0400
Subject: cifs: ignore everything in SPNEGO blob after mechTypes

We've had several reports of people attempting to mount Windows 8 shares
and getting failures with a return code of -EINVAL. The default sec=
mode changed recently to sec=ntlmssp. With that, we expect and parse a
SPNEGO blob from the server in the NEGOTIATE reply.

The current decode_negTokenInit function first parses all of the
mechTypes and then tries to parse the rest of the negTokenInit reply.
The parser however currently expects a mechListMIC or nothing to follow the
mechTypes, but Windows 8 puts a mechToken field there instead to carry
some info for the new NegoEx stuff.

In practice, we don't do anything with the fields after the mechTypes
anyway so I don't see any real benefit in continuing to parse them.
This patch just has the kernel ignore the fields after the mechTypes.
We'll probably need to reinstate some of this if we ever want to support
NegoEx.

Reported-by: Jason Burgess <jason@jacknife2.dns2go.com>
Reported-by: Yan Li <elliot.li.tech@gmail.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/asn1.c | 53 +++++------------------------------------------------
 1 file changed, 5 insertions(+), 48 deletions(-)

diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index cfd1ce34e0bc..1d36db114772 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 		}
 	}
 
-	/* mechlistMIC */
-	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		/* Check if we have reached the end of the blob, but with
-		   no mechListMic (e.g. NTLMSSP instead of KRB5) */
-		if (ctx.error == ASN1_ERR_DEC_EMPTY)
-			goto decode_negtoken_exit;
-		cFYI(1, "Error decoding last part negTokenInit exit3");
-		return 0;
-	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-		/* tag = 3 indicating mechListMIC */
-		cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
-			cls, con, tag, end, *end);
-		return 0;
-	}
-
-	/* sequence */
-	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cFYI(1, "Error decoding last part negTokenInit exit5");
-		return 0;
-	} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-		   || (tag != ASN1_SEQ)) {
-		cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)",
-			cls, con, tag, end, *end);
-	}
-
-	/* sequence of */
-	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cFYI(1, "Error decoding last part negTokenInit exit 7");
-		return 0;
-	} else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-		cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
-			cls, con, tag, end, *end);
-		return 0;
-	}
-
-	/* general string */
-	if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-		cFYI(1, "Error decoding last part negTokenInit exit9");
-		return 0;
-	} else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
-		   || (tag != ASN1_GENSTR)) {
-		cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)",
-			cls, con, tag, end, *end);
-		return 0;
-	}
-	cFYI(1, "Need to call asn1_octets_decode() function for %s",
-		ctx.pointer);	/* is this UTF-8 or ASCII? */
-decode_negtoken_exit:
+	/*
+	 * We currently ignore anything at the end of the SPNEGO blob after
+	 * the mechTypes have been parsed, since none of that info is
+	 * used at the moment.
+	 */
 	return 1;
 }
-- 
cgit v1.2.2


From 48a23fac5eb0030a2d578ee2bde21b6e035b3d57 Mon Sep 17 00:00:00 2001
From: Simon Guinot <simon.guinot@sequanux.org>
Date: Tue, 19 Mar 2013 20:07:42 +0100
Subject: pinctrl: mvebu: fix checking for SoC specific controls

This patch fixes a minor bug (probably due to a typo) while checking
the SoC specific controls in mvebu_pinctrl_probe().

Acked-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: Simon Guinot <simon.guinot@sequanux.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mvebu/pinctrl-mvebu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
index c689c04a4f52..2d2f0a43d36b 100644
--- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c
+++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
@@ -620,7 +620,7 @@ int mvebu_pinctrl_probe(struct platform_device *pdev)
 
 		/* special soc specific control */
 		if (ctrl->mpp_get || ctrl->mpp_set) {
-			if (!ctrl->name || !ctrl->mpp_set || !ctrl->mpp_set) {
+			if (!ctrl->name || !ctrl->mpp_get || !ctrl->mpp_set) {
 				dev_err(&pdev->dev, "wrong soc control info\n");
 				return -EINVAL;
 			}
-- 
cgit v1.2.2


From 740924a267e85de09707ea158bbf594b4d8bae01 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Thu, 21 Mar 2013 12:21:47 +0100
Subject: pinmux: forbid mux_usecount to be set at UINT_MAX

If pin_free is called on a pin already freed, mux_usecount is set to
UINT_MAX which is really a bad idea.

This will issue a warning, so that we can correct the code responsible
for the double free.

Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinmux.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 1a00658b3ea0..bd83c8b01cd1 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -194,6 +194,11 @@ static const char *pin_free(struct pinctrl_dev *pctldev, int pin,
 	}
 
 	if (!gpio_range) {
+		/*
+		 * A pin should not be freed more times than allocated.
+		 */
+		if (WARN_ON(!desc->mux_usecount))
+			return NULL;
 		desc->mux_usecount--;
 		if (desc->mux_usecount)
 			return NULL;
-- 
cgit v1.2.2


From 4cec1893d8fe01ef6adc89a135a804acd2989a48 Mon Sep 17 00:00:00 2001
From: Shaik Ameer Basha <shaik.ameer@samsung.com>
Date: Thu, 21 Feb 2013 07:54:18 -0300
Subject: [media] fimc-lite: Initialize 'step' field in fimc_lite_ctrl
 structure

v4l2_ctrl_new() uses check_range() for control range checking.
This function expects 'step' value for V4L2_CTRL_TYPE_BOOLEAN type control.
If 'step' value doesn't match to '1', it returns -ERANGE error.
This patch adds the default .step value to 1.

Signed-off-by: Shaik Ameer Basha <shaik.ameer@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/platform/s5p-fimc/fimc-lite.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/platform/s5p-fimc/fimc-lite.c b/drivers/media/platform/s5p-fimc/fimc-lite.c
index bfc4206935c8..bbc35de7db27 100644
--- a/drivers/media/platform/s5p-fimc/fimc-lite.c
+++ b/drivers/media/platform/s5p-fimc/fimc-lite.c
@@ -1408,6 +1408,7 @@ static const struct v4l2_ctrl_config fimc_lite_ctrl = {
 	.id	= V4L2_CTRL_CLASS_USER | 0x1001,
 	.type	= V4L2_CTRL_TYPE_BOOLEAN,
 	.name	= "Test Pattern 640x480",
+	.step	= 1,
 };
 
 static int fimc_lite_create_capture_subdev(struct fimc_lite *fimc)
-- 
cgit v1.2.2


From 6a5360966ac44905ecb840dd6c9d736072145df2 Mon Sep 17 00:00:00 2001
From: Shaik Ameer Basha <shaik.ameer@samsung.com>
Date: Thu, 21 Feb 2013 07:54:17 -0300
Subject: [media] fimc-lite: Fix the variable type to avoid possible crash

Changing the variable type to 'int' from 'unsigned int'. Driver
logic expects the variable type to be 'int'.

Signed-off-by: Shaik Ameer Basha <shaik.ameer@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/platform/s5p-fimc/fimc-lite-reg.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/s5p-fimc/fimc-lite-reg.c b/drivers/media/platform/s5p-fimc/fimc-lite-reg.c
index f0af0754a7b4..ac9663ce2a49 100644
--- a/drivers/media/platform/s5p-fimc/fimc-lite-reg.c
+++ b/drivers/media/platform/s5p-fimc/fimc-lite-reg.c
@@ -128,10 +128,10 @@ static const u32 src_pixfmt_map[8][3] = {
 void flite_hw_set_source_format(struct fimc_lite *dev, struct flite_frame *f)
 {
 	enum v4l2_mbus_pixelcode pixelcode = dev->fmt->mbus_code;
-	unsigned int i = ARRAY_SIZE(src_pixfmt_map);
+	int i = ARRAY_SIZE(src_pixfmt_map);
 	u32 cfg;
 
-	while (i-- >= 0) {
+	while (--i >= 0) {
 		if (src_pixfmt_map[i][0] == pixelcode)
 			break;
 	}
@@ -224,9 +224,9 @@ static void flite_hw_set_out_order(struct fimc_lite *dev, struct flite_frame *f)
 		{ V4L2_MBUS_FMT_VYUY8_2X8, FLITE_REG_CIODMAFMT_CRYCBY },
 	};
 	u32 cfg = readl(dev->regs + FLITE_REG_CIODMAFMT);
-	unsigned int i = ARRAY_SIZE(pixcode);
+	int i = ARRAY_SIZE(pixcode);
 
-	while (i-- >= 0)
+	while (--i >= 0)
 		if (pixcode[i][0] == dev->fmt->mbus_code)
 			break;
 	cfg &= ~FLITE_REG_CIODMAFMT_YCBCR_ORDER_MASK;
-- 
cgit v1.2.2


From 5d83790be7c88a5ea99ab32ca196d99cf4d177a4 Mon Sep 17 00:00:00 2001
From: Shaik Ameer Basha <shaik.ameer@samsung.com>
Date: Wed, 6 Feb 2013 01:46:18 -0300
Subject: [media] exynos-gsc: send valid m2m ctx to gsc_m2m_job_finish

gsc_m2m_job_finish() has to be called with the m2m context for the necessary
cleanup while resume. But currently gsc_m2m_job_finish() always passes m2m
context as NULL.
This patch preserves the context before making it null, for necessary cleanup.
Use gsc_m2m_opened() instead gsc_m2m_active() in gsc_resume().

Signed-off-by: Shaik Ameer Basha <shaik.ameer@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/platform/exynos-gsc/gsc-core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c
index 82d9f6ac12f3..33b5ffc8d66d 100644
--- a/drivers/media/platform/exynos-gsc/gsc-core.c
+++ b/drivers/media/platform/exynos-gsc/gsc-core.c
@@ -1054,16 +1054,18 @@ static int gsc_m2m_suspend(struct gsc_dev *gsc)
 
 static int gsc_m2m_resume(struct gsc_dev *gsc)
 {
+	struct gsc_ctx *ctx;
 	unsigned long flags;
 
 	spin_lock_irqsave(&gsc->slock, flags);
 	/* Clear for full H/W setup in first run after resume */
+	ctx = gsc->m2m.ctx;
 	gsc->m2m.ctx = NULL;
 	spin_unlock_irqrestore(&gsc->slock, flags);
 
 	if (test_and_clear_bit(ST_M2M_SUSPENDED, &gsc->state))
-		gsc_m2m_job_finish(gsc->m2m.ctx,
-				    VB2_BUF_STATE_ERROR);
+		gsc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR);
+
 	return 0;
 }
 
@@ -1204,7 +1206,7 @@ static int gsc_resume(struct device *dev)
 	/* Do not resume if the device was idle before system suspend */
 	spin_lock_irqsave(&gsc->slock, flags);
 	if (!test_and_clear_bit(ST_SUSPEND, &gsc->state) ||
-	    !gsc_m2m_active(gsc)) {
+	    !gsc_m2m_opened(gsc)) {
 		spin_unlock_irqrestore(&gsc->slock, flags);
 		return 0;
 	}
-- 
cgit v1.2.2


From e34a89b39719dd1eb08e0b23c907e98b103078b4 Mon Sep 17 00:00:00 2001
From: Shaik Ameer Basha <shaik.ameer@samsung.com>
Date: Wed, 6 Feb 2013 01:47:10 -0300
Subject: [media] s5p-fimc: send valid m2m ctx to fimc_m2m_job_finish

fimc_m2m_job_finish() has to be called with the m2m context for the necessary
cleanup while resume. But currently fimc_m2m_job_finish() always passes m2m
context as NULL.
This patch preserves the context before making it null, for necessary cleanup.

Signed-off-by: Shaik Ameer Basha <shaik.ameer@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/platform/s5p-fimc/fimc-core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/s5p-fimc/fimc-core.c b/drivers/media/platform/s5p-fimc/fimc-core.c
index e3916bde45cf..0f513dd19f86 100644
--- a/drivers/media/platform/s5p-fimc/fimc-core.c
+++ b/drivers/media/platform/s5p-fimc/fimc-core.c
@@ -850,16 +850,18 @@ static int fimc_m2m_suspend(struct fimc_dev *fimc)
 
 static int fimc_m2m_resume(struct fimc_dev *fimc)
 {
+	struct fimc_ctx *ctx;
 	unsigned long flags;
 
 	spin_lock_irqsave(&fimc->slock, flags);
 	/* Clear for full H/W setup in first run after resume */
+	ctx = fimc->m2m.ctx;
 	fimc->m2m.ctx = NULL;
 	spin_unlock_irqrestore(&fimc->slock, flags);
 
 	if (test_and_clear_bit(ST_M2M_SUSPENDED, &fimc->state))
-		fimc_m2m_job_finish(fimc->m2m.ctx,
-				    VB2_BUF_STATE_ERROR);
+		fimc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR);
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From 90c0ae50097bba165022ddf0a592aa4001e23aaa Mon Sep 17 00:00:00 2001
From: Arun Kumar K <arun.kk@samsung.com>
Date: Tue, 26 Feb 2013 18:02:11 -0300
Subject: [media] s5p-mfc: Fix frame skip bug

The issue was seen in VP8 decoding where the last frame was
skipped by the driver. This patch gets the correct frame_type value
to fix this bug.

Signed-off-by: Arun Kumar K <arun.kk@samsung.com>
Signed-off-by: Arun Mankuzhi <arun.m@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/platform/s5p-mfc/s5p_mfc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c
index e84703c314ce..1cb6d57987c6 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c
@@ -276,7 +276,7 @@ static void s5p_mfc_handle_frame_new(struct s5p_mfc_ctx *ctx, unsigned int err)
 	unsigned int frame_type;
 
 	dspl_y_addr = s5p_mfc_hw_call(dev->mfc_ops, get_dspl_y_adr, dev);
-	frame_type = s5p_mfc_hw_call(dev->mfc_ops, get_dec_frame_type, dev);
+	frame_type = s5p_mfc_hw_call(dev->mfc_ops, get_disp_frame_type, ctx);
 
 	/* If frame is same as previous then skip and do not dequeue */
 	if (frame_type == S5P_FIMV_DECODE_FRAME_SKIPPED) {
-- 
cgit v1.2.2


From 053e09f319c25fb9c698ad56b9b65058939ec6ef Mon Sep 17 00:00:00 2001
From: Arun Kumar K <arun.kk@samsung.com>
Date: Wed, 6 Mar 2013 09:15:57 -0300
Subject: [media] s5p-mfc: Fix encoder control 15 issue

mfc-encoder is not working in the latest kernel giving the
erorr "Adding control (15) failed". Adding the missing step
parameter in this control to fix the issue.

Signed-off-by: Arun Kumar K <arun.kk@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/platform/s5p-mfc/s5p_mfc_enc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
index 2356fd52a169..4f6b553c4b2d 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
@@ -232,6 +232,7 @@ static struct mfc_control controls[] = {
 		.minimum = 0,
 		.maximum = 1,
 		.default_value = 0,
+		.step = 1,
 		.menu_skip_mask = 0,
 	},
 	{
-- 
cgit v1.2.2


From 8c6ecdd7ce11e737eeffbd78fd6a9d4c47d0b26d Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Tue, 5 Feb 2013 15:43:08 -0300
Subject: [media] s5p-fimc: Do not attempt to disable not enabled media
 pipeline

This fixes following warnings when all links are being disconnected:
[   20.080000] WARNING: at drivers/media/platform/s5p-fimc/fimc-mdevice.c:1269 __fimc_md_set_camclk+0x208/0x20c()
[   20.090000] Modules linked in:
[   20.095000] [<c001603c>] (unwind_backtrace+0x0/0x13c) from [<c00246dc>] (warn_slowpath_common+0x54/0x64)
[   20.105000] [<c00246dc>] (warn_slowpath_common+0x54/0x64) from [<c0024708>] (warn_slowpath_null+0x1c/0x24)
[   20.115000] [<c0024708>] (warn_slowpath_null+0x1c/0x24) from [<c0323fe8>] (__fimc_md_set_camclk+0x208/0x20c)
[   20.125000] [<c0323fe8>] (__fimc_md_set_camclk+0x208/0x20c) from [<c0324368>] (__fimc_pipeline_close+0x38/0x48)
[   20.135000] [<c0324368>] (__fimc_pipeline_close+0x38/0x48) from [<c0325848>] (fimc_md_link_notify+0x10c/0x198)
[   20.145000] [<c0325848>] (fimc_md_link_notify+0x10c/0x198) from [<c02f9dd4>] (__media_entity_setup_link+0x1c0/0x1e8)
[   20.155000] [<c02f9dd4>] (__media_entity_setup_link+0x1c0/0x1e8) from [<c02f9710>] (media_device_ioctl+0x2c0/0x41c)
[   20.165000] [<c02f9710>] (media_device_ioctl+0x2c0/0x41c) from [<c02f9938>] (media_ioctl+0x30/0x34)
[   20.175000] [<c02f9938>] (media_ioctl+0x30/0x34) from [<c00cf0bc>] (do_vfs_ioctl+0x84/0x5e8)
[   20.185000] [<c00cf0bc>] (do_vfs_ioctl+0x84/0x5e8) from [<c00cf65c>] (sys_ioctl+0x3c/0x60)
[   20.190000] [<c00cf65c>] (sys_ioctl+0x3c/0x60) from [<c000f040>] (ret_fast_syscall+0x0/0x30)

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/platform/s5p-fimc/fimc-mdevice.c | 39 +++++++++++++-------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/media/platform/s5p-fimc/fimc-mdevice.c b/drivers/media/platform/s5p-fimc/fimc-mdevice.c
index a17fcb2d5d41..cd38d708ab58 100644
--- a/drivers/media/platform/s5p-fimc/fimc-mdevice.c
+++ b/drivers/media/platform/s5p-fimc/fimc-mdevice.c
@@ -827,7 +827,7 @@ static int fimc_md_link_notify(struct media_pad *source,
 	struct fimc_pipeline *pipeline;
 	struct v4l2_subdev *sd;
 	struct mutex *lock;
-	int ret = 0;
+	int i, ret = 0;
 	int ref_count;
 
 	if (media_entity_type(sink->entity) != MEDIA_ENT_T_V4L2_SUBDEV)
@@ -854,29 +854,28 @@ static int fimc_md_link_notify(struct media_pad *source,
 		return 0;
 	}
 
+	mutex_lock(lock);
+	ref_count = fimc ? fimc->vid_cap.refcnt : fimc_lite->ref_count;
+
 	if (!(flags & MEDIA_LNK_FL_ENABLED)) {
-		int i;
-		mutex_lock(lock);
-		ret = __fimc_pipeline_close(pipeline);
+		if (ref_count > 0) {
+			ret = __fimc_pipeline_close(pipeline);
+			if (!ret && fimc)
+				fimc_ctrls_delete(fimc->vid_cap.ctx);
+		}
 		for (i = 0; i < IDX_MAX; i++)
 			pipeline->subdevs[i] = NULL;
-		if (fimc)
-			fimc_ctrls_delete(fimc->vid_cap.ctx);
-		mutex_unlock(lock);
-		return ret;
+	} else if (ref_count > 0) {
+		/*
+		 * Link activation. Enable power of pipeline elements only if
+		 * the pipeline is already in use, i.e. its video node is open.
+		 * Recreate the controls destroyed during the link deactivation.
+		 */
+		ret = __fimc_pipeline_open(pipeline,
+					   source->entity, true);
+		if (!ret && fimc)
+			ret = fimc_capture_ctrls_create(fimc);
 	}
-	/*
-	 * Link activation. Enable power of pipeline elements only if the
-	 * pipeline is already in use, i.e. its video node is opened.
-	 * Recreate the controls destroyed during the link deactivation.
-	 */
-	mutex_lock(lock);
-
-	ref_count = fimc ? fimc->vid_cap.refcnt : fimc_lite->ref_count;
-	if (ref_count > 0)
-		ret = __fimc_pipeline_open(pipeline, source->entity, true);
-	if (!ret && fimc)
-		ret = fimc_capture_ctrls_create(fimc);
 
 	mutex_unlock(lock);
 	return ret ? -EPIPE : ret;
-- 
cgit v1.2.2


From 6ba4d05e84eeb450011f7f3514ec8556d3b46743 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Wed, 27 Feb 2013 08:21:07 -0300
Subject: [media] m5mols: Fix bug in stream on handler

Due to improper condition check streaming start for some pixel
formats was prevent and the s_stream just reatuned -EINVAL.
This fixes regression introduced in commit 5565a2ad47cdd8e697
[media] m5mols: Protect driver data with a mutex.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/i2c/m5mols/m5mols_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c
index d4e7567b367c..0b899cb6cda1 100644
--- a/drivers/media/i2c/m5mols/m5mols_core.c
+++ b/drivers/media/i2c/m5mols/m5mols_core.c
@@ -724,7 +724,7 @@ static int m5mols_s_stream(struct v4l2_subdev *sd, int enable)
 	if (enable) {
 		if (is_code(code, M5MOLS_RESTYPE_MONITOR))
 			ret = m5mols_start_monitor(info);
-		if (is_code(code, M5MOLS_RESTYPE_CAPTURE))
+		else if (is_code(code, M5MOLS_RESTYPE_CAPTURE))
 			ret = m5mols_start_capture(info);
 		else
 			ret = -EINVAL;
-- 
cgit v1.2.2


From c93d81955005c2ac0ea072f88d376026208410e1 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sat, 16 Mar 2013 01:30:32 +0400
Subject: usb: cdc-acm: fix error handling in acm_probe()

acm_probe() ignores errors in tty_port_register_device()
and leaves intfdata pointing to freed memory on alloc_fail7
error path. The patch fixes the both issues.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Acked-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 8ac25adf31b4..c125b61c2499 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -977,6 +977,8 @@ static int acm_probe(struct usb_interface *intf,
 	int num_rx_buf;
 	int i;
 	int combined_interfaces = 0;
+	struct device *tty_dev;
+	int rv = -ENOMEM;
 
 	/* normal quirks */
 	quirks = (unsigned long)id->driver_info;
@@ -1339,11 +1341,24 @@ skip_countries:
 	usb_set_intfdata(data_interface, acm);
 
 	usb_get_intf(control_interface);
-	tty_port_register_device(&acm->port, acm_tty_driver, minor,
+	tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor,
 			&control_interface->dev);
+	if (IS_ERR(tty_dev)) {
+		rv = PTR_ERR(tty_dev);
+		goto alloc_fail8;
+	}
 
 	return 0;
+alloc_fail8:
+	if (acm->country_codes) {
+		device_remove_file(&acm->control->dev,
+				&dev_attr_wCountryCodes);
+		device_remove_file(&acm->control->dev,
+				&dev_attr_iCountryCodeRelDate);
+	}
+	device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities);
 alloc_fail7:
+	usb_set_intfdata(intf, NULL);
 	for (i = 0; i < ACM_NW; i++)
 		usb_free_urb(acm->wb[i].urb);
 alloc_fail6:
@@ -1359,7 +1374,7 @@ alloc_fail2:
 	acm_release_minor(acm);
 	kfree(acm);
 alloc_fail:
-	return -ENOMEM;
+	return rv;
 }
 
 static void stop_data_traffic(struct acm *acm)
-- 
cgit v1.2.2


From cb25505fc604292c70fc02143fc102f54c8595f0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:06 +0100
Subject: USB: cdc-acm: fix device unregistration

Unregister tty device in disconnect as is required by the USB stack.

By deferring unregistration to when the last tty reference is dropped,
the parent interface device can get unregistered before the child
resulting in broken hotplug events being generated when the tty is
finally closed:

KERNEL[2290.798128] remove   /devices/pci0000:00/0000:00:1d.7/usb2/2-1/2-1:3.1 (usb)
KERNEL[2290.804589] remove   /devices/pci0000:00/0000:00:1d.7/usb2/2-1 (usb)
KERNEL[2294.554799] remove   /2-1:3.1/tty/ttyACM0 (tty)

The driver must deal with tty callbacks after disconnect by checking the
disconnected flag. Specifically, further opens must be prevented and
this is already implemented.

Cc: stable <stable@vger.kernel.org>
Cc: Oliver Neukum <oneukum@suse.de>
Acked-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index c125b61c2499..387dc6c8ad25 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -593,7 +593,6 @@ static void acm_port_destruct(struct tty_port *port)
 
 	dev_dbg(&acm->control->dev, "%s\n", __func__);
 
-	tty_unregister_device(acm_tty_driver, acm->minor);
 	acm_release_minor(acm);
 	usb_put_intf(acm->control);
 	kfree(acm->country_codes);
@@ -1426,6 +1425,8 @@ static void acm_disconnect(struct usb_interface *intf)
 
 	stop_data_traffic(acm);
 
+	tty_unregister_device(acm_tty_driver, acm->minor);
+
 	usb_free_urb(acm->ctrlurb);
 	for (i = 0; i < ACM_NW; i++)
 		usb_free_urb(acm->wb[i].urb);
-- 
cgit v1.2.2


From 618aa1068df29c37a58045fe940f9106664153fd Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:07 +0100
Subject: USB: garmin_gps: fix memory leak on disconnect

Remove bogus disconnect test introduced by 95bef012e ("USB: more serial
drivers writing after disconnect") which prevented queued data from
being freed on disconnect.

The possible IO it was supposed to prevent is long gone.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/garmin_gps.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 1a07b12ef341..81caf5623ee2 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -956,10 +956,7 @@ static void garmin_close(struct usb_serial_port *port)
 	if (!serial)
 		return;
 
-	mutex_lock(&port->serial->disc_mutex);
-
-	if (!port->serial->disconnected)
-		garmin_clear(garmin_data_p);
+	garmin_clear(garmin_data_p);
 
 	/* shutdown our urbs */
 	usb_kill_urb(port->read_urb);
@@ -968,8 +965,6 @@ static void garmin_close(struct usb_serial_port *port)
 	/* keep reset state so we know that we must start a new session */
 	if (garmin_data_p->state != STATE_RESET)
 		garmin_data_p->state = STATE_DISCONNECTED;
-
-	mutex_unlock(&port->serial->disc_mutex);
 }
 
 
-- 
cgit v1.2.2


From 5492bf3d5655b4954164f69c02955a7fca267611 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:08 +0100
Subject: USB: io_ti: fix get_icount for two port adapters

Add missing get_icount field to two-port driver.

The two-port driver was not updated when switching to the new icount
interface in commit 0bca1b913aff ("tty: Convert the USB drivers to the
new icount interface").

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/io_ti.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index c23776679f70..d7d3c0e7cd27 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2649,6 +2649,7 @@ static struct usb_serial_driver edgeport_2port_device = {
 	.set_termios		= edge_set_termios,
 	.tiocmget		= edge_tiocmget,
 	.tiocmset		= edge_tiocmset,
+	.get_icount		= edge_get_icount,
 	.write			= edge_write,
 	.write_room		= edge_write_room,
 	.chars_in_buffer	= edge_chars_in_buffer,
-- 
cgit v1.2.2


From d7971051e4df825e0bc11b995e87bfe86355b8e5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:09 +0100
Subject: USB: serial: fix interface refcounting

Make sure the interface is not released before our serial device.

Note that drivers are still not allowed to access the interface in
any way that may interfere with another driver that may have gotten
bound to the same interface after disconnect returns.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/usb-serial.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index a19ed74d770d..2e70efa08b77 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -151,6 +151,7 @@ static void destroy_serial(struct kref *kref)
 		}
 	}
 
+	usb_put_intf(serial->interface);
 	usb_put_dev(serial->dev);
 	kfree(serial);
 }
@@ -620,7 +621,7 @@ static struct usb_serial *create_serial(struct usb_device *dev,
 	}
 	serial->dev = usb_get_dev(dev);
 	serial->type = driver;
-	serial->interface = interface;
+	serial->interface = usb_get_intf(interface);
 	kref_init(&serial->kref);
 	mutex_init(&serial->disc_mutex);
 	serial->minor = SERIAL_TTY_NO_MINOR;
-- 
cgit v1.2.2


From e5b33dc9d16053c2ae4c2c669cf008829530364b Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:10 +0100
Subject: USB: serial: add modem-status-change wait queue

Add modem-status-change wait queue to struct usb_serial_port that
subdrivers can use to implement TIOCMIWAIT.

Currently subdrivers use a private wait queue which may have been
released when waking up after device disconnected.

Note that we're adding a new wait queue rather than reusing the tty-port
one as we do not want to get woken up at hangup (yet).

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/serial.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index ef9be7e1e190..1819b59aab2a 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -66,6 +66,7 @@
  *	port.
  * @flags: usb serial port flags
  * @write_wait: a wait_queue_head_t used by the port.
+ * @delta_msr_wait: modem-status-change wait queue
  * @work: work queue entry for the line discipline waking up.
  * @throttled: nonzero if the read urb is inactive to throttle the device
  * @throttle_req: nonzero if the tty wants to throttle us
@@ -112,6 +113,7 @@ struct usb_serial_port {
 
 	unsigned long		flags;
 	wait_queue_head_t	write_wait;
+	wait_queue_head_t	delta_msr_wait;
 	struct work_struct	work;
 	char			throttled;
 	char			throttle_req;
-- 
cgit v1.2.2


From 5018860321dc7a9e50a75d5f319bc981298fb5b7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:11 +0100
Subject: USB: ark3116: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ark3116.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index cbd904b8fba5..4775f8209e55 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -62,7 +62,6 @@ static int is_irda(struct usb_serial *serial)
 }
 
 struct ark3116_private {
-	wait_queue_head_t       delta_msr_wait;
 	struct async_icount	icount;
 	int			irda;	/* 1 for irda device */
 
@@ -146,7 +145,6 @@ static int ark3116_port_probe(struct usb_serial_port *port)
 	if (!priv)
 		return -ENOMEM;
 
-	init_waitqueue_head(&priv->delta_msr_wait);
 	mutex_init(&priv->hw_lock);
 	spin_lock_init(&priv->status_lock);
 
@@ -456,10 +454,14 @@ static int ark3116_ioctl(struct tty_struct *tty,
 	case TIOCMIWAIT:
 		for (;;) {
 			struct async_icount prev = priv->icount;
-			interruptible_sleep_on(&priv->delta_msr_wait);
+			interruptible_sleep_on(&port->delta_msr_wait);
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
+
+			if (port->serial->disconnected)
+				return -EIO;
+
 			if ((prev.rng == priv->icount.rng) &&
 			    (prev.dsr == priv->icount.dsr) &&
 			    (prev.dcd == priv->icount.dcd) &&
@@ -580,7 +582,7 @@ static void ark3116_update_msr(struct usb_serial_port *port, __u8 msr)
 			priv->icount.dcd++;
 		if (msr & UART_MSR_TERI)
 			priv->icount.rng++;
-		wake_up_interruptible(&priv->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 	}
 }
 
-- 
cgit v1.2.2


From fa1e11d5231c001c80a479160b5832933c5d35fb Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:12 +0100
Subject: USB: ch341: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ch341.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index d255f66e708e..07d4650a32ab 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -80,7 +80,6 @@ MODULE_DEVICE_TABLE(usb, id_table);
 
 struct ch341_private {
 	spinlock_t lock; /* access lock */
-	wait_queue_head_t delta_msr_wait; /* wait queue for modem status */
 	unsigned baud_rate; /* set baud rate */
 	u8 line_control; /* set line control value RTS/DTR */
 	u8 line_status; /* active status of modem control inputs */
@@ -252,7 +251,6 @@ static int ch341_port_probe(struct usb_serial_port *port)
 		return -ENOMEM;
 
 	spin_lock_init(&priv->lock);
-	init_waitqueue_head(&priv->delta_msr_wait);
 	priv->baud_rate = DEFAULT_BAUD_RATE;
 	priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
 
@@ -298,7 +296,7 @@ static void ch341_dtr_rts(struct usb_serial_port *port, int on)
 		priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR);
 	spin_unlock_irqrestore(&priv->lock, flags);
 	ch341_set_handshake(port->serial->dev, priv->line_control);
-	wake_up_interruptible(&priv->delta_msr_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 }
 
 static void ch341_close(struct usb_serial_port *port)
@@ -491,7 +489,7 @@ static void ch341_read_int_callback(struct urb *urb)
 			tty_kref_put(tty);
 		}
 
-		wake_up_interruptible(&priv->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 	}
 
 exit:
@@ -517,11 +515,14 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	while (!multi_change) {
-		interruptible_sleep_on(&priv->delta_msr_wait);
+		interruptible_sleep_on(&port->delta_msr_wait);
 		/* see if a signal did it */
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
+		if (port->serial->disconnected)
+			return -EIO;
+
 		spin_lock_irqsave(&priv->lock, flags);
 		status = priv->line_status;
 		multi_change = priv->multi_status_change;
-- 
cgit v1.2.2


From 356050d8b1e526db093e9d2c78daf49d6bf418e3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:13 +0100
Subject: USB: cypress_m8: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Also remove bogus test for private data pointer being NULL as it is
never assigned in the loop.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cypress_m8.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index 8efa19d0e9fb..ba7352e4187e 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -111,7 +111,6 @@ struct cypress_private {
 	int baud_rate;			   /* stores current baud rate in
 					      integer form */
 	int isthrottled;		   /* if throttled, discard reads */
-	wait_queue_head_t delta_msr_wait;  /* used for TIOCMIWAIT */
 	char prev_status, diff_status;	   /* used for TIOCMIWAIT */
 	/* we pass a pointer to this as the argument sent to
 	   cypress_set_termios old_termios */
@@ -449,7 +448,6 @@ static int cypress_generic_port_probe(struct usb_serial_port *port)
 		kfree(priv);
 		return -ENOMEM;
 	}
-	init_waitqueue_head(&priv->delta_msr_wait);
 
 	usb_reset_configuration(serial->dev);
 
@@ -868,12 +866,16 @@ static int cypress_ioctl(struct tty_struct *tty,
 	switch (cmd) {
 	/* This code comes from drivers/char/serial.c and ftdi_sio.c */
 	case TIOCMIWAIT:
-		while (priv != NULL) {
-			interruptible_sleep_on(&priv->delta_msr_wait);
+		for (;;) {
+			interruptible_sleep_on(&port->delta_msr_wait);
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
-			else {
+
+			if (port->serial->disconnected)
+				return -EIO;
+
+			{
 				char diff = priv->diff_status;
 				if (diff == 0)
 					return -EIO; /* no change => error */
@@ -1187,7 +1189,7 @@ static void cypress_read_int_callback(struct urb *urb)
 	if (priv->current_status != priv->prev_status) {
 		priv->diff_status |= priv->current_status ^
 			priv->prev_status;
-		wake_up_interruptible(&priv->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 		priv->prev_status = priv->current_status;
 	}
 	spin_unlock_irqrestore(&priv->lock, flags);
-- 
cgit v1.2.2


From 508f940f1407656076a2e7d8f7fa059b567ecac2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:14 +0100
Subject: USB: f81232: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/f81232.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index b1b2dc64b50b..a172ad5c5ce8 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -47,7 +47,6 @@ MODULE_DEVICE_TABLE(usb, id_table);
 
 struct f81232_private {
 	spinlock_t lock;
-	wait_queue_head_t delta_msr_wait;
 	u8 line_control;
 	u8 line_status;
 };
@@ -111,7 +110,7 @@ static void f81232_process_read_urb(struct urb *urb)
 	line_status = priv->line_status;
 	priv->line_status &= ~UART_STATE_TRANSIENT_MASK;
 	spin_unlock_irqrestore(&priv->lock, flags);
-	wake_up_interruptible(&priv->delta_msr_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 
 	if (!urb->actual_length)
 		return;
@@ -256,11 +255,14 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	while (1) {
-		interruptible_sleep_on(&priv->delta_msr_wait);
+		interruptible_sleep_on(&port->delta_msr_wait);
 		/* see if a signal did it */
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
+		if (port->serial->disconnected)
+			return -EIO;
+
 		spin_lock_irqsave(&priv->lock, flags);
 		status = priv->line_status;
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -322,7 +324,6 @@ static int f81232_port_probe(struct usb_serial_port *port)
 		return -ENOMEM;
 
 	spin_lock_init(&priv->lock);
-	init_waitqueue_head(&priv->delta_msr_wait);
 
 	usb_set_serial_port_data(port, priv);
 
-- 
cgit v1.2.2


From 71ccb9b01981fabae27d3c98260ea4613207618e Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:15 +0100
Subject: USB: ftdi_sio: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

When switching to tty ports, some lifetime assumptions were changed.
Specifically, close can now be called before the final tty reference is
dropped as part of hangup at device disconnect. Even with the ftdi
private-data refcounting this means that the port private data can be
freed while a process is sleeping on modem-status changes and thus
cannot be relied on to detect disconnects when woken up.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index edd162df49ca..d4809d551473 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -69,9 +69,7 @@ struct ftdi_private {
 	int flags;		/* some ASYNC_xxxx flags are supported */
 	unsigned long last_dtr_rts;	/* saved modem control outputs */
 	struct async_icount	icount;
-	wait_queue_head_t delta_msr_wait; /* Used for TIOCMIWAIT */
 	char prev_status;        /* Used for TIOCMIWAIT */
-	bool dev_gone;        /* Used to abort TIOCMIWAIT */
 	char transmit_empty;	/* If transmitter is empty or not */
 	__u16 interface;	/* FT2232C, FT2232H or FT4232H port interface
 				   (0 for FT232/245) */
@@ -1691,10 +1689,8 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port)
 
 	kref_init(&priv->kref);
 	mutex_init(&priv->cfg_lock);
-	init_waitqueue_head(&priv->delta_msr_wait);
 
 	priv->flags = ASYNC_LOW_LATENCY;
-	priv->dev_gone = false;
 
 	if (quirk && quirk->port_probe)
 		quirk->port_probe(priv);
@@ -1840,8 +1836,7 @@ static int ftdi_sio_port_remove(struct usb_serial_port *port)
 {
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
 
-	priv->dev_gone = true;
-	wake_up_interruptible_all(&priv->delta_msr_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 
 	remove_sysfs_attrs(port);
 
@@ -1989,7 +1984,7 @@ static int ftdi_process_packet(struct usb_serial_port *port,
 		if (diff_status & FTDI_RS0_RLSD)
 			priv->icount.dcd++;
 
-		wake_up_interruptible_all(&priv->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 		priv->prev_status = status;
 	}
 
@@ -2440,11 +2435,15 @@ static int ftdi_ioctl(struct tty_struct *tty,
 	 */
 	case TIOCMIWAIT:
 		cprev = priv->icount;
-		while (!priv->dev_gone) {
-			interruptible_sleep_on(&priv->delta_msr_wait);
+		for (;;) {
+			interruptible_sleep_on(&port->delta_msr_wait);
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
+
+			if (port->serial->disconnected)
+				return -EIO;
+
 			cnow = priv->icount;
 			if (((arg & TIOCM_RNG) && (cnow.rng != cprev.rng)) ||
 			    ((arg & TIOCM_DSR) && (cnow.dsr != cprev.dsr)) ||
@@ -2454,8 +2453,6 @@ static int ftdi_ioctl(struct tty_struct *tty,
 			}
 			cprev = cnow;
 		}
-		return -EIO;
-		break;
 	case TIOCSERGETLSR:
 		return get_lsr_info(port, (struct serial_struct __user *)arg);
 		break;
-- 
cgit v1.2.2


From 333576255d4cfc53efd056aad438568184b36af6 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:16 +0100
Subject: USB: io_edgeport: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/io_edgeport.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index b00e5cbf741f..efd8b978128c 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -110,7 +110,6 @@ struct edgeport_port {
 	wait_queue_head_t	wait_chase;		/* for handling sleeping while waiting for chase to finish */
 	wait_queue_head_t	wait_open;		/* for handling sleeping while waiting for open to finish */
 	wait_queue_head_t	wait_command;		/* for handling sleeping while waiting for command to finish */
-	wait_queue_head_t	delta_msr_wait;		/* for handling sleeping while waiting for msr change to happen */
 
 	struct async_icount	icount;
 	struct usb_serial_port	*port;			/* loop back to the owner of this object */
@@ -884,7 +883,6 @@ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port)
 	/* initialize our wait queues */
 	init_waitqueue_head(&edge_port->wait_open);
 	init_waitqueue_head(&edge_port->wait_chase);
-	init_waitqueue_head(&edge_port->delta_msr_wait);
 	init_waitqueue_head(&edge_port->wait_command);
 
 	/* initialize our icount structure */
@@ -1669,13 +1667,17 @@ static int edge_ioctl(struct tty_struct *tty,
 		dev_dbg(&port->dev, "%s (%d) TIOCMIWAIT\n", __func__,  port->number);
 		cprev = edge_port->icount;
 		while (1) {
-			prepare_to_wait(&edge_port->delta_msr_wait,
+			prepare_to_wait(&port->delta_msr_wait,
 						&wait, TASK_INTERRUPTIBLE);
 			schedule();
-			finish_wait(&edge_port->delta_msr_wait, &wait);
+			finish_wait(&port->delta_msr_wait, &wait);
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
+
+			if (port->serial->disconnected)
+				return -EIO;
+
 			cnow = edge_port->icount;
 			if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr &&
 			    cnow.dcd == cprev.dcd && cnow.cts == cprev.cts)
@@ -2051,7 +2053,7 @@ static void handle_new_msr(struct edgeport_port *edge_port, __u8 newMsr)
 			icount->dcd++;
 		if (newMsr & EDGEPORT_MSR_DELTA_RI)
 			icount->rng++;
-		wake_up_interruptible(&edge_port->delta_msr_wait);
+		wake_up_interruptible(&edge_port->port->delta_msr_wait);
 	}
 
 	/* Save the new modem status */
-- 
cgit v1.2.2


From 7b2459690584f239650a365f3411ba2ec1c6d1e0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:17 +0100
Subject: USB: io_ti: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/io_ti.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index d7d3c0e7cd27..7777172206de 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -87,9 +87,6 @@ struct edgeport_port {
 	int close_pending;
 	int lsr_event;
 	struct async_icount	icount;
-	wait_queue_head_t	delta_msr_wait;	/* for handling sleeping while
-						   waiting for msr change to
-						   happen */
 	struct edgeport_serial	*edge_serial;
 	struct usb_serial_port	*port;
 	__u8 bUartMode;		/* Port type, 0: RS232, etc. */
@@ -1459,7 +1456,7 @@ static void handle_new_msr(struct edgeport_port *edge_port, __u8 msr)
 			icount->dcd++;
 		if (msr & EDGEPORT_MSR_DELTA_RI)
 			icount->rng++;
-		wake_up_interruptible(&edge_port->delta_msr_wait);
+		wake_up_interruptible(&edge_port->port->delta_msr_wait);
 	}
 
 	/* Save the new modem status */
@@ -1754,7 +1751,6 @@ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port)
 	dev = port->serial->dev;
 
 	memset(&(edge_port->icount), 0x00, sizeof(edge_port->icount));
-	init_waitqueue_head(&edge_port->delta_msr_wait);
 
 	/* turn off loopback */
 	status = ti_do_config(edge_port, UMPC_SET_CLR_LOOPBACK, 0);
@@ -2434,10 +2430,14 @@ static int edge_ioctl(struct tty_struct *tty,
 		dev_dbg(&port->dev, "%s - TIOCMIWAIT\n", __func__);
 		cprev = edge_port->icount;
 		while (1) {
-			interruptible_sleep_on(&edge_port->delta_msr_wait);
+			interruptible_sleep_on(&port->delta_msr_wait);
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
+
+			if (port->serial->disconnected)
+				return -EIO;
+
 			cnow = edge_port->icount;
 			if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr &&
 			    cnow.dcd == cprev.dcd && cnow.cts == cprev.cts)
-- 
cgit v1.2.2


From cf1d24443677a0758cfa88ca40f24858b89261c0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:18 +0100
Subject: USB: mct_u232: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/mct_u232.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index a64d420f687b..06d5a60be2c4 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -114,8 +114,6 @@ struct mct_u232_private {
 	unsigned char	     last_msr;      /* Modem Status Register */
 	unsigned int	     rx_flags;      /* Throttling flags */
 	struct async_icount  icount;
-	wait_queue_head_t    msr_wait;	/* for handling sleeping while waiting
-						for msr change to happen */
 };
 
 #define THROTTLED		0x01
@@ -409,7 +407,6 @@ static int mct_u232_port_probe(struct usb_serial_port *port)
 		return -ENOMEM;
 
 	spin_lock_init(&priv->lock);
-	init_waitqueue_head(&priv->msr_wait);
 
 	usb_set_serial_port_data(port, priv);
 
@@ -601,7 +598,7 @@ static void mct_u232_read_int_callback(struct urb *urb)
 		tty_kref_put(tty);
 	}
 #endif
-	wake_up_interruptible(&priv->msr_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 	spin_unlock_irqrestore(&priv->lock, flags);
 exit:
 	retval = usb_submit_urb(urb, GFP_ATOMIC);
@@ -810,13 +807,17 @@ static int  mct_u232_ioctl(struct tty_struct *tty,
 		cprev = mct_u232_port->icount;
 		spin_unlock_irqrestore(&mct_u232_port->lock, flags);
 		for ( ; ; ) {
-			prepare_to_wait(&mct_u232_port->msr_wait,
+			prepare_to_wait(&port->delta_msr_wait,
 					&wait, TASK_INTERRUPTIBLE);
 			schedule();
-			finish_wait(&mct_u232_port->msr_wait, &wait);
+			finish_wait(&port->delta_msr_wait, &wait);
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
+
+			if (port->serial->disconnected)
+				return -EIO;
+
 			spin_lock_irqsave(&mct_u232_port->lock, flags);
 			cnow = mct_u232_port->icount;
 			spin_unlock_irqrestore(&mct_u232_port->lock, flags);
-- 
cgit v1.2.2


From e670c6af12517d08a403487b1122eecf506021cf Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:19 +0100
Subject: USB: mos7840: fix broken TIOCMIWAIT

Make sure waiting processes are woken on modem-status changes.

Currently processes are only woken on termios changes regardless of
whether the modem status has changed.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/mos7840.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 809fb329eca5..1b83b01dfb77 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -423,6 +423,9 @@ static void mos7840_handle_new_msr(struct moschip_port *port, __u8 new_msr)
 			icount->rng++;
 			smp_wmb();
 		}
+
+		mos7840_port->delta_msr_cond = 1;
+		wake_up_interruptible(&mos7840_port->delta_msr_wait);
 	}
 }
 
@@ -2017,8 +2020,6 @@ static void mos7840_change_port_settings(struct tty_struct *tty,
 			mos7840_port->read_urb_busy = false;
 		}
 	}
-	wake_up(&mos7840_port->delta_msr_wait);
-	mos7840_port->delta_msr_cond = 1;
 	dev_dbg(&port->dev, "%s - mos7840_port->shadowLCR is End %x\n", __func__,
 		mos7840_port->shadowLCR);
 }
-- 
cgit v1.2.2


From a14430db686b8e459e1cf070a6ecf391515c9ab9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:20 +0100
Subject: USB: mos7840: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/mos7840.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 1b83b01dfb77..b8051fa61911 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -219,7 +219,6 @@ struct moschip_port {
 	char open;
 	char open_ports;
 	wait_queue_head_t wait_chase;	/* for handling sleeping while waiting for chase to finish */
-	wait_queue_head_t delta_msr_wait;	/* for handling sleeping while waiting for msr change to happen */
 	int delta_msr_cond;
 	struct async_icount icount;
 	struct usb_serial_port *port;	/* loop back to the owner of this object */
@@ -425,7 +424,7 @@ static void mos7840_handle_new_msr(struct moschip_port *port, __u8 new_msr)
 		}
 
 		mos7840_port->delta_msr_cond = 1;
-		wake_up_interruptible(&mos7840_port->delta_msr_wait);
+		wake_up_interruptible(&port->port->delta_msr_wait);
 	}
 }
 
@@ -1130,7 +1129,6 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
 
 	/* initialize our wait queues */
 	init_waitqueue_head(&mos7840_port->wait_chase);
-	init_waitqueue_head(&mos7840_port->delta_msr_wait);
 
 	/* initialize our icount structure */
 	memset(&(mos7840_port->icount), 0x00, sizeof(mos7840_port->icount));
@@ -2220,13 +2218,18 @@ static int mos7840_ioctl(struct tty_struct *tty,
 		while (1) {
 			/* interruptible_sleep_on(&mos7840_port->delta_msr_wait); */
 			mos7840_port->delta_msr_cond = 0;
-			wait_event_interruptible(mos7840_port->delta_msr_wait,
-						 (mos7840_port->
+			wait_event_interruptible(port->delta_msr_wait,
+						 (port->serial->disconnected ||
+						  mos7840_port->
 						  delta_msr_cond == 1));
 
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
+
+			if (port->serial->disconnected)
+				return -EIO;
+
 			cnow = mos7840_port->icount;
 			smp_rmb();
 			if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr &&
-- 
cgit v1.2.2


From 8edfdab37157d2683e51b8be5d3d5697f66a9f7b Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:21 +0100
Subject: USB: oti6858: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/oti6858.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index a958fd41b5b3..87c71ccfee87 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -188,7 +188,6 @@ struct oti6858_private {
 	u8 setup_done;
 	struct delayed_work delayed_setup_work;
 
-	wait_queue_head_t intr_wait;
 	struct usb_serial_port *port;   /* USB port with which associated */
 };
 
@@ -339,7 +338,6 @@ static int oti6858_port_probe(struct usb_serial_port *port)
 		return -ENOMEM;
 
 	spin_lock_init(&priv->lock);
-	init_waitqueue_head(&priv->intr_wait);
 	priv->port = port;
 	INIT_DELAYED_WORK(&priv->delayed_setup_work, setup_line);
 	INIT_DELAYED_WORK(&priv->delayed_write_work, send_data);
@@ -664,11 +662,15 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	while (1) {
-		wait_event_interruptible(priv->intr_wait,
+		wait_event_interruptible(port->delta_msr_wait,
+					port->serial->disconnected ||
 					priv->status.pin_state != prev);
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
+		if (port->serial->disconnected)
+			return -EIO;
+
 		spin_lock_irqsave(&priv->lock, flags);
 		status = priv->status.pin_state & PIN_MASK;
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -763,7 +765,7 @@ static void oti6858_read_int_callback(struct urb *urb)
 
 		if (!priv->transient) {
 			if (xs->pin_state != priv->status.pin_state)
-				wake_up_interruptible(&priv->intr_wait);
+				wake_up_interruptible(&port->delta_msr_wait);
 			memcpy(&priv->status, xs, OTI6858_CTRL_PKT_SIZE);
 		}
 
-- 
cgit v1.2.2


From 40509ca982c00c4b70fc00be887509feca0bff15 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:22 +0100
Subject: USB: pl2303: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/pl2303.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 54adc9125e5c..3b10018d89a3 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -139,7 +139,6 @@ struct pl2303_serial_private {
 
 struct pl2303_private {
 	spinlock_t lock;
-	wait_queue_head_t delta_msr_wait;
 	u8 line_control;
 	u8 line_status;
 };
@@ -233,7 +232,6 @@ static int pl2303_port_probe(struct usb_serial_port *port)
 		return -ENOMEM;
 
 	spin_lock_init(&priv->lock);
-	init_waitqueue_head(&priv->delta_msr_wait);
 
 	usb_set_serial_port_data(port, priv);
 
@@ -607,11 +605,14 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	while (1) {
-		interruptible_sleep_on(&priv->delta_msr_wait);
+		interruptible_sleep_on(&port->delta_msr_wait);
 		/* see if a signal did it */
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
+		if (port->serial->disconnected)
+			return -EIO;
+
 		spin_lock_irqsave(&priv->lock, flags);
 		status = priv->line_status;
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -719,7 +720,7 @@ static void pl2303_update_line_status(struct usb_serial_port *port,
 	spin_unlock_irqrestore(&priv->lock, flags);
 	if (priv->line_status & UART_BREAK_ERROR)
 		usb_serial_handle_break(port);
-	wake_up_interruptible(&priv->delta_msr_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 
 	tty = tty_port_tty_get(&port->port);
 	if (!tty)
@@ -783,7 +784,7 @@ static void pl2303_process_read_urb(struct urb *urb)
 	line_status = priv->line_status;
 	priv->line_status &= ~UART_STATE_TRANSIENT_MASK;
 	spin_unlock_irqrestore(&priv->lock, flags);
-	wake_up_interruptible(&priv->delta_msr_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 
 	if (!urb->actual_length)
 		return;
-- 
cgit v1.2.2


From 69f87f40d2b98e8b4ab82a121fd2bd584690b887 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:23 +0100
Subject: USB: quatech2: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/quatech2.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index d643a4d4d770..75f125ddb0c9 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -128,7 +128,6 @@ struct qt2_port_private {
 	u8          shadowLSR;
 	u8          shadowMSR;
 
-	wait_queue_head_t   delta_msr_wait; /* Used for TIOCMIWAIT */
 	struct async_icount icount;
 
 	struct usb_serial_port *port;
@@ -506,8 +505,9 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	while (1) {
-		wait_event_interruptible(priv->delta_msr_wait,
-					 ((priv->icount.rng != prev.rng) ||
+		wait_event_interruptible(port->delta_msr_wait,
+					 (port->serial->disconnected ||
+					  (priv->icount.rng != prev.rng) ||
 					  (priv->icount.dsr != prev.dsr) ||
 					  (priv->icount.dcd != prev.dcd) ||
 					  (priv->icount.cts != prev.cts)));
@@ -515,6 +515,9 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
+		if (port->serial->disconnected)
+			return -EIO;
+
 		spin_lock_irqsave(&priv->lock, flags);
 		cur = priv->icount;
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -827,7 +830,6 @@ static int qt2_port_probe(struct usb_serial_port *port)
 
 	spin_lock_init(&port_priv->lock);
 	spin_lock_init(&port_priv->urb_lock);
-	init_waitqueue_head(&port_priv->delta_msr_wait);
 	port_priv->port = port;
 
 	port_priv->write_urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -970,7 +972,7 @@ static void qt2_update_msr(struct usb_serial_port *port, unsigned char *ch)
 		if (newMSR & UART_MSR_TERI)
 			port_priv->icount.rng++;
 
-		wake_up_interruptible(&port_priv->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 	}
 }
 
-- 
cgit v1.2.2


From dbcea7615d8d7d58f6ff49d2c5568113f70effe9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:24 +0100
Subject: USB: spcp8x5: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/spcp8x5.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 91ff8e3bddbd..549ef68ff5fa 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -149,7 +149,6 @@ enum spcp8x5_type {
 struct spcp8x5_private {
 	spinlock_t 	lock;
 	enum spcp8x5_type	type;
-	wait_queue_head_t	delta_msr_wait;
 	u8 			line_control;
 	u8 			line_status;
 };
@@ -179,7 +178,6 @@ static int spcp8x5_port_probe(struct usb_serial_port *port)
 		return -ENOMEM;
 
 	spin_lock_init(&priv->lock);
-	init_waitqueue_head(&priv->delta_msr_wait);
 	priv->type = type;
 
 	usb_set_serial_port_data(port , priv);
@@ -475,7 +473,7 @@ static void spcp8x5_process_read_urb(struct urb *urb)
 	priv->line_status &= ~UART_STATE_TRANSIENT_MASK;
 	spin_unlock_irqrestore(&priv->lock, flags);
 	/* wake up the wait for termios */
-	wake_up_interruptible(&priv->delta_msr_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 
 	if (!urb->actual_length)
 		return;
@@ -526,12 +524,15 @@ static int spcp8x5_wait_modem_info(struct usb_serial_port *port,
 
 	while (1) {
 		/* wake up in bulk read */
-		interruptible_sleep_on(&priv->delta_msr_wait);
+		interruptible_sleep_on(&port->delta_msr_wait);
 
 		/* see if a signal did it */
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
+		if (port->serial->disconnected)
+			return -EIO;
+
 		spin_lock_irqsave(&priv->lock, flags);
 		status = priv->line_status;
 		spin_unlock_irqrestore(&priv->lock, flags);
-- 
cgit v1.2.2


From 43a66b4c417ad15f6d2f632ce67ad195bdf999e8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:25 +0100
Subject: USB: ssu100: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ssu100.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index b57cf841c5b6..4b2a19757b4d 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -61,7 +61,6 @@ struct ssu100_port_private {
 	spinlock_t status_lock;
 	u8 shadowLSR;
 	u8 shadowMSR;
-	wait_queue_head_t delta_msr_wait; /* Used for TIOCMIWAIT */
 	struct async_icount icount;
 };
 
@@ -355,8 +354,9 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 	spin_unlock_irqrestore(&priv->status_lock, flags);
 
 	while (1) {
-		wait_event_interruptible(priv->delta_msr_wait,
-					 ((priv->icount.rng != prev.rng) ||
+		wait_event_interruptible(port->delta_msr_wait,
+					 (port->serial->disconnected ||
+					  (priv->icount.rng != prev.rng) ||
 					  (priv->icount.dsr != prev.dsr) ||
 					  (priv->icount.dcd != prev.dcd) ||
 					  (priv->icount.cts != prev.cts)));
@@ -364,6 +364,9 @@ static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
+		if (port->serial->disconnected)
+			return -EIO;
+
 		spin_lock_irqsave(&priv->status_lock, flags);
 		cur = priv->icount;
 		spin_unlock_irqrestore(&priv->status_lock, flags);
@@ -445,7 +448,6 @@ static int ssu100_port_probe(struct usb_serial_port *port)
 		return -ENOMEM;
 
 	spin_lock_init(&priv->status_lock);
-	init_waitqueue_head(&priv->delta_msr_wait);
 
 	usb_set_serial_port_data(port, priv);
 
@@ -537,7 +539,7 @@ static void ssu100_update_msr(struct usb_serial_port *port, u8 msr)
 			priv->icount.dcd++;
 		if (msr & UART_MSR_TERI)
 			priv->icount.rng++;
-		wake_up_interruptible(&priv->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 	}
 }
 
-- 
cgit v1.2.2


From fc98ab873aa3dbe783ce56a2ffdbbe7c7609521a Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Tue, 19 Mar 2013 09:21:26 +0100
Subject: USB: ti_usb_3410_5052: fix use-after-free in TIOCMIWAIT

Use the port wait queue and make sure to check the serial disconnected
flag before accessing private port data after waking up.

This is is needed as the private port data (including the wait queue
itself) can be gone when waking up after a disconnect.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ti_usb_3410_5052.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 39cb9b807c3c..73deb029fc05 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -74,7 +74,6 @@ struct ti_port {
 	int			tp_flags;
 	int			tp_closing_wait;/* in .01 secs */
 	struct async_icount	tp_icount;
-	wait_queue_head_t	tp_msr_wait;	/* wait for msr change */
 	wait_queue_head_t	tp_write_wait;
 	struct ti_device	*tp_tdev;
 	struct usb_serial_port	*tp_port;
@@ -432,7 +431,6 @@ static int ti_port_probe(struct usb_serial_port *port)
 	else
 		tport->tp_uart_base_addr = TI_UART2_BASE_ADDR;
 	tport->tp_closing_wait = closing_wait;
-	init_waitqueue_head(&tport->tp_msr_wait);
 	init_waitqueue_head(&tport->tp_write_wait);
 	if (kfifo_alloc(&tport->write_fifo, TI_WRITE_BUF_SIZE, GFP_KERNEL)) {
 		kfree(tport);
@@ -784,9 +782,13 @@ static int ti_ioctl(struct tty_struct *tty,
 		dev_dbg(&port->dev, "%s - TIOCMIWAIT\n", __func__);
 		cprev = tport->tp_icount;
 		while (1) {
-			interruptible_sleep_on(&tport->tp_msr_wait);
+			interruptible_sleep_on(&port->delta_msr_wait);
 			if (signal_pending(current))
 				return -ERESTARTSYS;
+
+			if (port->serial->disconnected)
+				return -EIO;
+
 			cnow = tport->tp_icount;
 			if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr &&
 			    cnow.dcd == cprev.dcd && cnow.cts == cprev.cts)
@@ -1392,7 +1394,7 @@ static void ti_handle_new_msr(struct ti_port *tport, __u8 msr)
 			icount->dcd++;
 		if (msr & TI_MSR_DELTA_RI)
 			icount->rng++;
-		wake_up_interruptible(&tport->tp_msr_wait);
+		wake_up_interruptible(&tport->tp_port->delta_msr_wait);
 		spin_unlock_irqrestore(&tport->tp_lock, flags);
 	}
 
-- 
cgit v1.2.2


From d763448286377b8a0e3f179372e9e292bef3c337 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 11 Mar 2013 09:20:00 +0000
Subject: Btrfs: update to use fs_state bit

Now that we use bit operation to check fs_state, update
btrfs_free_fs_root()'s checker, otherwise we get back to
memory leak case.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/disk-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7d84651e850b..127b23e8323b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3253,7 +3253,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
 	if (btrfs_root_refs(&root->root_item) == 0)
 		synchronize_srcu(&fs_info->subvol_srcu);
 
-	if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 		btrfs_free_log(NULL, root);
 		btrfs_free_log_root_tree(NULL, fs_info);
 	}
-- 
cgit v1.2.2


From 835d974fabfa9bff4d173ad03c054ac2f673263f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Tue, 19 Mar 2013 12:13:25 -0400
Subject: Btrfs: handle a bogus chunk tree nicely

If you restore a btrfs-image file system and try to mount that file system we'll
panic.  That's because btrfs-image restores and just makes one big chunk to
envelope the whole disk, since they are really only meant to be messed with by
our btrfs-progs.  So fix up btrfs_rmap_block and the callers of it for mount so
that we no longer panic but instead just return an error and fail to mount.
Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/extent-tree.c | 35 ++++++++++++++++++++++++++++++-----
 fs/btrfs/volumes.c     | 13 ++++++++++++-
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 350b9b18140c..a8ff25aedca1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root,
 		cache->bytes_super += stripe_len;
 		ret = add_excluded_extent(root, cache->key.objectid,
 					  stripe_len);
-		BUG_ON(ret); /* -ENOMEM */
+		if (ret)
+			return ret;
 	}
 
 	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root,
 		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
 				       cache->key.objectid, bytenr,
 				       0, &logical, &nr, &stripe_len);
-		BUG_ON(ret); /* -ENOMEM */
+		if (ret)
+			return ret;
 
 		while (nr--) {
 			cache->bytes_super += stripe_len;
 			ret = add_excluded_extent(root, logical[nr],
 						  stripe_len);
-			BUG_ON(ret); /* -ENOMEM */
+			if (ret) {
+				kfree(logical);
+				return ret;
+			}
 		}
 
 		kfree(logical);
@@ -7964,7 +7969,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 		 * info has super bytes accounted for, otherwise we'll think
 		 * we have more space than we actually do.
 		 */
-		exclude_super_stripes(root, cache);
+		ret = exclude_super_stripes(root, cache);
+		if (ret) {
+			/*
+			 * We may have excluded something, so call this just in
+			 * case.
+			 */
+			free_excluded_extents(root, cache);
+			kfree(cache->free_space_ctl);
+			kfree(cache);
+			goto error;
+		}
 
 		/*
 		 * check for two cases, either we are full, and therefore
@@ -8106,7 +8121,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
 	cache->last_byte_to_unpin = (u64)-1;
 	cache->cached = BTRFS_CACHE_FINISHED;
-	exclude_super_stripes(root, cache);
+	ret = exclude_super_stripes(root, cache);
+	if (ret) {
+		/*
+		 * We may have excluded something, so call this just in
+		 * case.
+		 */
+		free_excluded_extents(root, cache);
+		kfree(cache->free_space_ctl);
+		kfree(cache);
+		return ret;
+	}
 
 	add_new_free_space(cache, root->fs_info, chunk_offset,
 			   chunk_offset + size);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5989a92236f7..2854c824ab64 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4935,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
 	em = lookup_extent_mapping(em_tree, chunk_start, 1);
 	read_unlock(&em_tree->lock);
 
-	BUG_ON(!em || em->start != chunk_start);
+	if (!em) {
+		printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
+		       chunk_start);
+		return -EIO;
+	}
+
+	if (em->start != chunk_start) {
+		printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
+		       em->start, chunk_start);
+		free_extent_map(em);
+		return -EIO;
+	}
 	map = (struct map_lookup *)em->bdev;
 
 	length = em->len;
-- 
cgit v1.2.2


From 6113077cd319e747875ec71227d2b5cb54e08c76 Mon Sep 17 00:00:00 2001
From: Wang Shilong <wangsl-fnst@cn.fujitsu.com>
Date: Tue, 19 Mar 2013 10:57:14 +0000
Subject: Btrfs: fix missing qgroup reservation before fallocating

Steps to reproduce:
	mkfs.btrfs <disk>
	mount <disk> <mnt>
	btrfs quota enable <mnt>
	btrfs sub create <mnt>/subv
	btrfs qgroup limit 10M <mnt>/subv
	fallocate --length 20M <mnt>/subv/data

For the above example, fallocating will return successfully which
is not expected, we try to fix it by doing qgroup reservation before
fallocating.

Signed-off-by: Wang Shilong <wangsl-fnst@cn.fujitsu.com>
Reviewed-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/file.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7bdb47faa12e..1be25b92d63c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2142,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct extent_state *cached_state = NULL;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 cur_offset;
 	u64 last_byte;
 	u64 alloc_start;
@@ -2169,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode,
 	ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
 	if (ret)
 		return ret;
+	if (root->fs_info->quota_enabled) {
+		ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
+		if (ret)
+			goto out_reserve_fail;
+	}
 
 	/*
 	 * wait for ordered IO before we have any locks.  We'll loop again
@@ -2272,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode,
 			     &cached_state, GFP_NOFS);
 out:
 	mutex_unlock(&inode->i_mutex);
+	if (root->fs_info->quota_enabled)
+		btrfs_qgroup_free(root, alloc_end - alloc_start);
+out_reserve_fail:
 	/* Let go of our reservation. */
 	btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
 	return ret;
-- 
cgit v1.2.2


From d9abbf1c3131b679379762700201ae69367f3f62 Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Wed, 20 Mar 2013 13:49:48 +0000
Subject: Btrfs: fix locking on ROOT_REPLACE operations in tree mod log

To resolve backrefs, ROOT_REPLACE operations in the tree mod log are
required to be tied to at least one KEY_REMOVE_WHILE_FREEING operation.
Therefore, those operations must be enclosed by tree_mod_log_write_lock()
and tree_mod_log_write_unlock() calls.

Those calls are private to the tree_mod_log_* functions, which means that
removal of the elements of an old root node must be logged from
tree_mod_log_insert_root. This partly reverts and corrects commit ba1bfbd5
(Btrfs: fix a tree mod logging issue for root replacement operations).

This fixes the brand-new version of xfstest 276 as of commit cfe73f71.

Cc: stable@vger.kernel.org
Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/ctree.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ecd25a1b4e51..ca9d8f1a3bb6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 	if (tree_mod_dont_log(fs_info, NULL))
 		return 0;
 
+	__tree_mod_log_free_eb(fs_info, old_root);
+
 	ret = tree_mod_alloc(fs_info, flags, &tm);
 	if (ret < 0)
 		goto out;
@@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
 static noinline void
 tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 		     struct extent_buffer *src, unsigned long dst_offset,
-		     unsigned long src_offset, int nr_items)
+		     unsigned long src_offset, int nr_items, int log_removal)
 {
 	int ret;
 	int i;
@@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 	}
 
 	for (i = 0; i < nr_items; i++) {
-		ret = tree_mod_log_insert_key_locked(fs_info, src,
-						     i + src_offset,
-						     MOD_LOG_KEY_REMOVE);
-		BUG_ON(ret < 0);
+		if (log_removal) {
+			ret = tree_mod_log_insert_key_locked(fs_info, src,
+							i + src_offset,
+							MOD_LOG_KEY_REMOVE);
+			BUG_ON(ret < 0);
+		}
 		ret = tree_mod_log_insert_key_locked(fs_info, dst,
 						     i + dst_offset,
 						     MOD_LOG_KEY_ADD);
@@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
 			ret = btrfs_dec_ref(trans, root, buf, 1, 1);
 			BUG_ON(ret); /* -ENOMEM */
 		}
-		tree_mod_log_free_eb(root->fs_info, buf);
 		clean_tree_block(trans, root, buf);
 		*last_ref = 1;
 	}
@@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 		btrfs_set_node_ptr_generation(parent, parent_slot,
 					      trans->transid);
 		btrfs_mark_buffer_dirty(parent);
+		tree_mod_log_free_eb(root->fs_info, buf);
 		btrfs_free_tree_block(trans, root, buf, parent_start,
 				      last_ref);
 	}
@@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 			goto enospc;
 		}
 
-		tree_mod_log_free_eb(root->fs_info, root->node);
 		tree_mod_log_set_root_pointer(root, child);
 		rcu_assign_pointer(root->node, child);
 
@@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
 		push_items = min(src_nritems - 8, push_items);
 
 	tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
-			     push_items);
+			     push_items, 1);
 	copy_extent_buffer(dst, src,
 			   btrfs_node_key_ptr_offset(dst_nritems),
 			   btrfs_node_key_ptr_offset(0),
@@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 				      sizeof(struct btrfs_key_ptr));
 
 	tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
-			     src_nritems - push_items, push_items);
+			     src_nritems - push_items, push_items, 1);
 	copy_extent_buffer(dst, src,
 			   btrfs_node_key_ptr_offset(0),
 			   btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 	int mid;
 	int ret;
 	u32 c_nritems;
+	int tree_mod_log_removal = 1;
 
 	c = path->nodes[level];
 	WARN_ON(btrfs_header_generation(c) != trans->transid);
 	if (c == root->node) {
 		/* trying to split the root, lets make a new one */
 		ret = insert_new_root(trans, root, path, level + 1);
+		/*
+		 * removal of root nodes has been logged by
+		 * tree_mod_log_set_root_pointer due to locking
+		 */
+		tree_mod_log_removal = 0;
 		if (ret)
 			return ret;
 	} else {
@@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 			    (unsigned long)btrfs_header_chunk_tree_uuid(split),
 			    BTRFS_UUID_SIZE);
 
-	tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+	tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid,
+			     tree_mod_log_removal);
 	copy_extent_buffer(split, c,
 			   btrfs_node_key_ptr_offset(0),
 			   btrfs_node_key_ptr_offset(mid),
-- 
cgit v1.2.2


From 1dd05682b3ef6e70409e130bfd83e91770801589 Mon Sep 17 00:00:00 2001
From: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Date: Thu, 21 Mar 2013 04:32:32 +0000
Subject: Btrfs: fix memory leak in btrfs_create_tree()

We should free leaf and root before returning from the error
handling code.

Signed-off-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/disk-io.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 127b23e8323b..6d19a0a554aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
 				      0, objectid, NULL, 0, 0, 0);
 	if (IS_ERR(leaf)) {
 		ret = PTR_ERR(leaf);
+		leaf = NULL;
 		goto fail;
 	}
 
@@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
 
 	btrfs_tree_unlock(leaf);
 
+	return root;
+
 fail:
-	if (ret)
-		return ERR_PTR(ret);
+	if (leaf) {
+		btrfs_tree_unlock(leaf);
+		free_extent_buffer(leaf);
+	}
+	kfree(root);
 
-	return root;
+	return ERR_PTR(ret);
 }
 
 static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
-- 
cgit v1.2.2


From f564c24103f87dc740c1c293c975565ac46b12ef Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 21 Mar 2013 17:32:36 -0700
Subject: x86, microcode_intel_early: Mark apply_microcode_early() as cpuinit

Add missing __cpuinit annotation to apply_microcode_early().

Reported-by: Shaun Ruffell <sruffell@digium.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/20130320170310.GA23362@digium.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_intel_early.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 5992ee8086b7..d893e8ed8ac9 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -659,8 +659,8 @@ static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
 }
 #endif
 
-static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
-				 struct ucode_cpu_info *uci)
+static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
+					   struct ucode_cpu_info *uci)
 {
 	struct microcode_intel *mc_intel;
 	unsigned int val[2];
-- 
cgit v1.2.2


From d31c3a81893e3416ea519d1b1383f319c046641f Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Mon, 4 Mar 2013 23:03:19 +0200
Subject: omapfb: fix broken build on OMAP1

Fix the following build regression in 3.9-rc1 by including
<mach/hardware.h>:

drivers/video/omap/omapfb_main.c: In function 'set_fb_var':
drivers/video/omap/omapfb_main.c:505:3: error: implicit declaration of function 'cpu_is_omap15xx' [-Werror=implicit-function-declaration]

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/omap/omapfb_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index e31f5b33b501..d40612c31a98 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -32,6 +32,8 @@
 
 #include <linux/omap-dma.h>
 
+#include <mach/hardware.h>
+
 #include "omapfb.h"
 #include "lcdc.h"
 
-- 
cgit v1.2.2


From a2f9b2a5607e494e6b98b0101aaa731b42454ad0 Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Sun, 17 Feb 2013 02:43:12 +0200
Subject: OMAPDSS: tpo-td043 panel: fix data passing between SPI/DSS parts

This driver uses omap_dss_device that it gets from a board file through
SPI platfrom_data pointer to pass data from SPI to DSS portion of the
driver by using dev_set_drvdata(). However this trick no longer works,
as DSS core no longer uses omap_dss_device from a board file to create
the real device, so use a global pointer to accomplish this instead,
like other SPI panel drivers do.

Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/omap2/displays/panel-tpo-td043mtea1.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
index 6b6643911d29..048c98381ef6 100644
--- a/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
+++ b/drivers/video/omap2/displays/panel-tpo-td043mtea1.c
@@ -63,6 +63,9 @@ struct tpo_td043_device {
 	u32 power_on_resume:1;
 };
 
+/* used to pass spi_device from SPI to DSS portion of the driver */
+static struct tpo_td043_device *g_tpo_td043;
+
 static int tpo_td043_write(struct spi_device *spi, u8 addr, u8 data)
 {
 	struct spi_message	m;
@@ -403,7 +406,7 @@ static void tpo_td043_disable(struct omap_dss_device *dssdev)
 
 static int tpo_td043_probe(struct omap_dss_device *dssdev)
 {
-	struct tpo_td043_device *tpo_td043 = dev_get_drvdata(&dssdev->dev);
+	struct tpo_td043_device *tpo_td043 = g_tpo_td043;
 	int nreset_gpio = dssdev->reset_gpio;
 	int ret = 0;
 
@@ -440,6 +443,8 @@ static int tpo_td043_probe(struct omap_dss_device *dssdev)
 	if (ret)
 		dev_warn(&dssdev->dev, "failed to create sysfs files\n");
 
+	dev_set_drvdata(&dssdev->dev, tpo_td043);
+
 	return 0;
 
 fail_gpio_req:
@@ -505,6 +510,9 @@ static int tpo_td043_spi_probe(struct spi_device *spi)
 		return -ENODEV;
 	}
 
+	if (g_tpo_td043 != NULL)
+		return -EBUSY;
+
 	spi->bits_per_word = 16;
 	spi->mode = SPI_MODE_0;
 
@@ -521,7 +529,7 @@ static int tpo_td043_spi_probe(struct spi_device *spi)
 	tpo_td043->spi = spi;
 	tpo_td043->nreset_gpio = dssdev->reset_gpio;
 	dev_set_drvdata(&spi->dev, tpo_td043);
-	dev_set_drvdata(&dssdev->dev, tpo_td043);
+	g_tpo_td043 = tpo_td043;
 
 	omap_dss_register_driver(&tpo_td043_driver);
 
@@ -534,6 +542,7 @@ static int tpo_td043_spi_remove(struct spi_device *spi)
 
 	omap_dss_unregister_driver(&tpo_td043_driver);
 	kfree(tpo_td043);
+	g_tpo_td043 = NULL;
 
 	return 0;
 }
-- 
cgit v1.2.2


From ff588d83bf12fe05521a64e85dd4e2b848c0b20d Mon Sep 17 00:00:00 2001
From: Archit Taneja <archit@ti.com>
Date: Tue, 5 Mar 2013 19:47:50 +0530
Subject: omapdss: features: fix supported outputs for OMAP4

The support outputs struct for overlay managers is incorrect for OMAP4. Make
these changes:

- DPI isn't supported via the LCD1 overlay manager, remove DPI as a supported
  output.
- the TV manager can suppport DPI, but the omapdss driver doesn't support that
  yet, we require some muxing at the DSS level, and we also need to configure
  the hdmi pll in the DPI driver so that the TV manager has a pixel clock. We
  don't support that yet.

Signed-off-by: Archit Taneja <archit@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/omap2/dss/dss_features.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c
index d7d66ef5cb58..7f791aeda4d2 100644
--- a/drivers/video/omap2/dss/dss_features.c
+++ b/drivers/video/omap2/dss/dss_features.c
@@ -202,12 +202,10 @@ static const enum omap_dss_output_id omap3630_dss_supported_outputs[] = {
 
 static const enum omap_dss_output_id omap4_dss_supported_outputs[] = {
 	/* OMAP_DSS_CHANNEL_LCD */
-	OMAP_DSS_OUTPUT_DPI | OMAP_DSS_OUTPUT_DBI |
-	OMAP_DSS_OUTPUT_DSI1,
+	OMAP_DSS_OUTPUT_DBI | OMAP_DSS_OUTPUT_DSI1,
 
 	/* OMAP_DSS_CHANNEL_DIGIT */
-	OMAP_DSS_OUTPUT_VENC | OMAP_DSS_OUTPUT_HDMI |
-	OMAP_DSS_OUTPUT_DPI,
+	OMAP_DSS_OUTPUT_VENC | OMAP_DSS_OUTPUT_HDMI,
 
 	/* OMAP_DSS_CHANNEL_LCD2 */
 	OMAP_DSS_OUTPUT_DPI | OMAP_DSS_OUTPUT_DBI |
-- 
cgit v1.2.2


From 132c803f7b70b17322579f6f4f3f65cf68e55135 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 15 Mar 2013 05:34:08 +0000
Subject: i2c: tegra: check the clk_prepare_enable() return value

NVIDIA's Tegra SoC allows read/write of controller register only
if controller clock is enabled. System hangs if read/write happens
to registers without enabling clock.

clk_prepare_enable() can be fail due to unknown reason and hence
adding check for return value of this function. If this function
success then only access register otherwise return to caller with
error.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
---
 drivers/i2c/busses/i2c-tegra.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 36704e3ab3fa..b714776b6ddd 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -411,7 +411,11 @@ static int tegra_i2c_init(struct tegra_i2c_dev *i2c_dev)
 	int clk_multiplier = I2C_CLK_MULTIPLIER_STD_FAST_MODE;
 	u32 clk_divisor;
 
-	tegra_i2c_clock_enable(i2c_dev);
+	err = tegra_i2c_clock_enable(i2c_dev);
+	if (err < 0) {
+		dev_err(i2c_dev->dev, "Clock enable failed %d\n", err);
+		return err;
+	}
 
 	tegra_periph_reset_assert(i2c_dev->div_clk);
 	udelay(2);
@@ -628,7 +632,12 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[],
 	if (i2c_dev->is_suspended)
 		return -EBUSY;
 
-	tegra_i2c_clock_enable(i2c_dev);
+	ret = tegra_i2c_clock_enable(i2c_dev);
+	if (ret < 0) {
+		dev_err(i2c_dev->dev, "Clock enable failed %d\n", ret);
+		return ret;
+	}
+
 	for (i = 0; i < num; i++) {
 		enum msg_end_type end_type = MSG_END_STOP;
 		if (i < (num - 1)) {
-- 
cgit v1.2.2


From 488b926923f6da5b90555cddb624ad783f4952b0 Mon Sep 17 00:00:00 2001
From: Seth Heasley <seth.heasley@intel.com>
Date: Thu, 21 Feb 2013 12:30:43 +0000
Subject: i2c: iSMT: add Intel Avoton DeviceIDs

This patch adds the iSMT SMBus Controller DeviceIDs for the Intel Avoton SOC.

Signed-off-by: Seth Heasley <seth.heasley@intel.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-ismt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index e9205ee8cf94..130f02cc9d94 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -80,6 +80,7 @@
 /* PCI DIDs for the Intel SMBus Message Transport (SMT) Devices */
 #define PCI_DEVICE_ID_INTEL_S1200_SMT0	0x0c59
 #define PCI_DEVICE_ID_INTEL_S1200_SMT1	0x0c5a
+#define PCI_DEVICE_ID_INTEL_AVOTON_SMT	0x1f15
 
 #define ISMT_DESC_ENTRIES	32	/* number of descriptor entries */
 #define ISMT_MAX_RETRIES	3	/* number of SMBus retries to attempt */
@@ -185,6 +186,7 @@ struct ismt_priv {
 static const DEFINE_PCI_DEVICE_TABLE(ismt_ids) = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT0) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_S1200_SMT1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AVOTON_SMT) },
 	{ 0, }
 };
 
-- 
cgit v1.2.2


From b104153e366396e6a631ee3c9d95c26ece36523b Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 26 Feb 2013 06:03:52 +0000
Subject: i2c: Fix my e-mail address in drivers and documentation

My old e-mail address is no longer working.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-diolan-u2c | 2 +-
 drivers/i2c/muxes/i2c-mux-pca9541.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/i2c/busses/i2c-diolan-u2c b/Documentation/i2c/busses/i2c-diolan-u2c
index 30fe4bb9a069..0d6018c316c7 100644
--- a/Documentation/i2c/busses/i2c-diolan-u2c
+++ b/Documentation/i2c/busses/i2c-diolan-u2c
@@ -5,7 +5,7 @@ Supported adapters:
     Documentation:
 	http://www.diolan.com/i2c/u2c12.html
 
-Author: Guenter Roeck <guenter.roeck@ericsson.com>
+Author: Guenter Roeck <linux@roeck-us.net>
 
 Description
 -----------
diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c
index f3b8f9a6a89b..966a18a5d12d 100644
--- a/drivers/i2c/muxes/i2c-mux-pca9541.c
+++ b/drivers/i2c/muxes/i2c-mux-pca9541.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2010 Ericsson AB.
  *
- * Author: Guenter Roeck <guenter.roeck@ericsson.com>
+ * Author: Guenter Roeck <linux@roeck-us.net>
  *
  * Derived from:
  *  pca954x.c
-- 
cgit v1.2.2


From 888f2804e46377b54f283707f87c09922ef87eb2 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Fri, 15 Mar 2013 05:32:57 +0000
Subject: MAINTAINERS: add maintainer entry for atmel i2c driver

Create an entry for atmel i2c driver: i2c-at91.c

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 50b4d735f961..23fa78bb6253 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1467,6 +1467,12 @@ F:	drivers/dma/at_hdmac.c
 F:	drivers/dma/at_hdmac_regs.h
 F:	include/linux/platform_data/dma-atmel.h
 
+ATMEL I2C DRIVER
+M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+L:	linux-i2c@vger.kernel.org
+S:	Supported
+F:	drivers/i2c/busses/i2c-at91.c
+
 ATMEL ISI DRIVER
 M:	Josh Wu <josh.wu@atmel.com>
 L:	linux-media@vger.kernel.org
-- 
cgit v1.2.2


From 09a6e1f4ad32243989b30485f78985c0923284cd Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 22 Mar 2013 08:08:06 -0300
Subject: Revert "KVM: allow host header to be included even for !CONFIG_KVM"

This reverts commit f445f11eb2cc265dd47da5b2e864df46cd6e5a82 as
it breaks PPC with CONFIG_KVM=n.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/kvm_host.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a9428635c9fd..cad77fe09d77 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1,8 +1,6 @@
 #ifndef __KVM_HOST_H
 #define __KVM_HOST_H
 
-#if IS_ENABLED(CONFIG_KVM)
-
 /*
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
@@ -1057,8 +1055,5 @@ static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 }
 
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
-#else
-static inline void __guest_enter(void) { return; }
-static inline void __guest_exit(void) { return; }
-#endif /* IS_ENABLED(CONFIG_KVM) */
 #endif
+
-- 
cgit v1.2.2


From 949dd8c14fb2b20b4b815817e66120b22cf531d4 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 19 Mar 2013 14:35:30 -0400
Subject: xen/acpi-processor: Don't dereference struct acpi_processor on all
 CPUs.

With git commit c705c78c0d0835a4aa5d0d9a3422e3218462030c
"acpi: Export the acpi_processor_get_performance_info" we are now
using a different mechanism to access the P-states.

The acpi_processor per-cpu structure is set and filtered by the
core ACPI code which shrinks the per_cpu contents to only online CPUs.
In the past we would call acpi_processor_register_performance()
which would have not tried to dereference offline cpus.

With the new patch and the fact that the loop we take is for
for_all_possible_cpus we end up crashing on some machines.
We could modify the loop to be for online_cpus - but all the other
loops in the code use possible_cpus (for a good reason) - so lets
leave it as so and just check if per_cpu(processor) is NULL.

With this patch we will bypass the !online but possible CPUs.
This fixes:

IP: [<ffffffffa00d13b5>] xen_acpi_processor_init+0x1b6/0xe01 [xen_acpi_processor]
PGD 4126e6067 PUD 4126e3067 PMD 0
Oops: 0002 [#1] SMP
Pid: 432, comm: modprobe Not tainted 3.9.0-rc3+ #28 To be filled by O.E.M. To be filled by O.E.M./M5A97
RIP: e030:[<ffffffffa00d13b5>]  [<ffffffffa00d13b5>] xen_acpi_processor_init+0x1b6/0xe01 [xen_acpi_processor]
RSP: e02b:ffff88040c8a3ce8  EFLAGS: 00010282
.. snip..
Call Trace:
 [<ffffffffa00d11ff>] ? read_acpi_id+0x12b/0x12b [xen_acpi_processor]
 [<ffffffff8100215a>] do_one_initcall+0x12a/0x180
 [<ffffffff810c42c3>] load_module+0x1cd3/0x2870
 [<ffffffff81319b70>] ? ddebug_proc_open+0xc0/0xc0
 [<ffffffff810c4f37>] sys_init_module+0xd7/0x120
 [<ffffffff8166ce19>] system_call_fastpath+0x16/0x1b

on some machines.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/xen-acpi-processor.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index f3278a6603ca..90e34ac7e522 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -505,6 +505,9 @@ static int __init xen_acpi_processor_init(void)
 
 		pr = per_cpu(processors, i);
 		perf = per_cpu_ptr(acpi_perf_data, i);
+		if (!pr)
+			continue;
+
 		pr->performance = perf;
 		rc = acpi_processor_get_performance_info(pr);
 		if (rc)
-- 
cgit v1.2.2


From 909b3fdb0dd4f3db07b2d75425a00a2adb551383 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 12 Mar 2013 15:06:23 +0000
Subject: xen-pciback: notify hypervisor about devices intended to be assigned
 to guests

For MSI-X capable devices the hypervisor wants to write protect the
MSI-X table and PBA, yet it can't assume that resources have been
assigned to their final values at device enumeration time. Thus have
pciback do that notification, as having the device controlled by it is
a prerequisite to assigning the device to guests anyway.

This is the kernel part of hypervisor side commit 4245d33 ("x86/MSI:
add mechanism to fully protect MSI-X table from PV guest accesses") on
the master branch of git://xenbits.xen.org/xen.git.

CC: stable@vger.kernel.org
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/hypercall.h |  4 +--
 drivers/xen/fallback.c               |  3 +-
 drivers/xen/xen-pciback/pci_stub.c   | 59 +++++++++++++++++++++++++++---------
 include/xen/interface/physdev.h      |  6 ++++
 4 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index c20d1ce62dc6..e709884d0ef9 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -382,14 +382,14 @@ HYPERVISOR_console_io(int cmd, int count, char *str)
 	return _hypercall3(int, console_io, cmd, count, str);
 }
 
-extern int __must_check HYPERVISOR_physdev_op_compat(int, void *);
+extern int __must_check xen_physdev_op_compat(int, void *);
 
 static inline int
 HYPERVISOR_physdev_op(int cmd, void *arg)
 {
 	int rc = _hypercall2(int, physdev_op, cmd, arg);
 	if (unlikely(rc == -ENOSYS))
-		rc = HYPERVISOR_physdev_op_compat(cmd, arg);
+		rc = xen_physdev_op_compat(cmd, arg);
 	return rc;
 }
 
diff --git a/drivers/xen/fallback.c b/drivers/xen/fallback.c
index 0ef7c4d40f86..b04fb64c5a91 100644
--- a/drivers/xen/fallback.c
+++ b/drivers/xen/fallback.c
@@ -44,7 +44,7 @@ int xen_event_channel_op_compat(int cmd, void *arg)
 }
 EXPORT_SYMBOL_GPL(xen_event_channel_op_compat);
 
-int HYPERVISOR_physdev_op_compat(int cmd, void *arg)
+int xen_physdev_op_compat(int cmd, void *arg)
 {
 	struct physdev_op op;
 	int rc;
@@ -78,3 +78,4 @@ int HYPERVISOR_physdev_op_compat(int cmd, void *arg)
 
 	return rc;
 }
+EXPORT_SYMBOL_GPL(xen_physdev_op_compat);
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 9204126f1560..a2278ba7fb27 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -17,6 +17,7 @@
 #include <xen/events.h>
 #include <asm/xen/pci.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/interface/physdev.h>
 #include "pciback.h"
 #include "conf_space.h"
 #include "conf_space_quirks.h"
@@ -85,37 +86,52 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
 static void pcistub_device_release(struct kref *kref)
 {
 	struct pcistub_device *psdev;
+	struct pci_dev *dev;
 	struct xen_pcibk_dev_data *dev_data;
 
 	psdev = container_of(kref, struct pcistub_device, kref);
-	dev_data = pci_get_drvdata(psdev->dev);
+	dev = psdev->dev;
+	dev_data = pci_get_drvdata(dev);
 
-	dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
+	dev_dbg(&dev->dev, "pcistub_device_release\n");
 
-	xen_unregister_device_domain_owner(psdev->dev);
+	xen_unregister_device_domain_owner(dev);
 
 	/* Call the reset function which does not take lock as this
 	 * is called from "unbind" which takes a device_lock mutex.
 	 */
-	__pci_reset_function_locked(psdev->dev);
-	if (pci_load_and_free_saved_state(psdev->dev,
-					  &dev_data->pci_saved_state)) {
-		dev_dbg(&psdev->dev->dev, "Could not reload PCI state\n");
-	} else
-		pci_restore_state(psdev->dev);
+	__pci_reset_function_locked(dev);
+	if (pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state))
+		dev_dbg(&dev->dev, "Could not reload PCI state\n");
+	else
+		pci_restore_state(dev);
+
+	if (pci_find_capability(dev, PCI_CAP_ID_MSIX)) {
+		struct physdev_pci_device ppdev = {
+			.seg = pci_domain_nr(dev->bus),
+			.bus = dev->bus->number,
+			.devfn = dev->devfn
+		};
+		int err = HYPERVISOR_physdev_op(PHYSDEVOP_release_msix,
+						&ppdev);
+
+		if (err)
+			dev_warn(&dev->dev, "MSI-X release failed (%d)\n",
+				 err);
+	}
 
 	/* Disable the device */
-	xen_pcibk_reset_device(psdev->dev);
+	xen_pcibk_reset_device(dev);
 
 	kfree(dev_data);
-	pci_set_drvdata(psdev->dev, NULL);
+	pci_set_drvdata(dev, NULL);
 
 	/* Clean-up the device */
-	xen_pcibk_config_free_dyn_fields(psdev->dev);
-	xen_pcibk_config_free_dev(psdev->dev);
+	xen_pcibk_config_free_dyn_fields(dev);
+	xen_pcibk_config_free_dev(dev);
 
-	psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
-	pci_dev_put(psdev->dev);
+	dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+	pci_dev_put(dev);
 
 	kfree(psdev);
 }
@@ -355,6 +371,19 @@ static int pcistub_init_device(struct pci_dev *dev)
 	if (err)
 		goto config_release;
 
+	if (pci_find_capability(dev, PCI_CAP_ID_MSIX)) {
+		struct physdev_pci_device ppdev = {
+			.seg = pci_domain_nr(dev->bus),
+			.bus = dev->bus->number,
+			.devfn = dev->devfn
+		};
+
+		err = HYPERVISOR_physdev_op(PHYSDEVOP_prepare_msix, &ppdev);
+		if (err)
+			dev_err(&dev->dev, "MSI-X preparation failed (%d)\n",
+				err);
+	}
+
 	/* We need the device active to save the state. */
 	dev_dbg(&dev->dev, "save state of device\n");
 	pci_save_state(dev);
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index 1844d31f4552..7000bb1f6e96 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -251,6 +251,12 @@ struct physdev_pci_device_add {
 
 #define PHYSDEVOP_pci_device_remove     26
 #define PHYSDEVOP_restore_msi_ext       27
+/*
+ * Dom0 should use these two to announce MMIO resources assigned to
+ * MSI-X capable devices won't (prepare) or may (release) change.
+ */
+#define PHYSDEVOP_prepare_msix          30
+#define PHYSDEVOP_release_msix          31
 struct physdev_pci_device {
     /* IN */
     uint16_t seg;
-- 
cgit v1.2.2


From f4541d60a449afd40448b06496dcd510f505928e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 21 Mar 2013 17:36:09 +0000
Subject: tcp: preserve ACK clocking in TSO

A long standing problem with TSO is the fact that tcp_tso_should_defer()
rearms the deferred timer, while it should not.

Current code leads to following bad bursty behavior :

20:11:24.484333 IP A > B: . 297161:316921(19760) ack 1 win 119
20:11:24.484337 IP B > A: . ack 263721 win 1117
20:11:24.485086 IP B > A: . ack 265241 win 1117
20:11:24.485925 IP B > A: . ack 266761 win 1117
20:11:24.486759 IP B > A: . ack 268281 win 1117
20:11:24.487594 IP B > A: . ack 269801 win 1117
20:11:24.488430 IP B > A: . ack 271321 win 1117
20:11:24.489267 IP B > A: . ack 272841 win 1117
20:11:24.490104 IP B > A: . ack 274361 win 1117
20:11:24.490939 IP B > A: . ack 275881 win 1117
20:11:24.491775 IP B > A: . ack 277401 win 1117
20:11:24.491784 IP A > B: . 316921:332881(15960) ack 1 win 119
20:11:24.492620 IP B > A: . ack 278921 win 1117
20:11:24.493448 IP B > A: . ack 280441 win 1117
20:11:24.494286 IP B > A: . ack 281961 win 1117
20:11:24.495122 IP B > A: . ack 283481 win 1117
20:11:24.495958 IP B > A: . ack 285001 win 1117
20:11:24.496791 IP B > A: . ack 286521 win 1117
20:11:24.497628 IP B > A: . ack 288041 win 1117
20:11:24.498459 IP B > A: . ack 289561 win 1117
20:11:24.499296 IP B > A: . ack 291081 win 1117
20:11:24.500133 IP B > A: . ack 292601 win 1117
20:11:24.500970 IP B > A: . ack 294121 win 1117
20:11:24.501388 IP B > A: . ack 295641 win 1117
20:11:24.501398 IP A > B: . 332881:351881(19000) ack 1 win 119

While the expected behavior is more like :

20:19:49.259620 IP A > B: . 197601:202161(4560) ack 1 win 119
20:19:49.260446 IP B > A: . ack 154281 win 1212
20:19:49.261282 IP B > A: . ack 155801 win 1212
20:19:49.262125 IP B > A: . ack 157321 win 1212
20:19:49.262136 IP A > B: . 202161:206721(4560) ack 1 win 119
20:19:49.262958 IP B > A: . ack 158841 win 1212
20:19:49.263795 IP B > A: . ack 160361 win 1212
20:19:49.264628 IP B > A: . ack 161881 win 1212
20:19:49.264637 IP A > B: . 206721:211281(4560) ack 1 win 119
20:19:49.265465 IP B > A: . ack 163401 win 1212
20:19:49.265886 IP B > A: . ack 164921 win 1212
20:19:49.266722 IP B > A: . ack 166441 win 1212
20:19:49.266732 IP A > B: . 211281:215841(4560) ack 1 win 119
20:19:49.267559 IP B > A: . ack 167961 win 1212
20:19:49.268394 IP B > A: . ack 169481 win 1212
20:19:49.269232 IP B > A: . ack 171001 win 1212
20:19:49.269241 IP A > B: . 215841:221161(5320) ack 1 win 119

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Van Jacobson <vanj@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 817fbb396bc8..5d0b4387cba6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1809,8 +1809,11 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 			goto send_now;
 	}
 
-	/* Ok, it looks like it is advisable to defer.  */
-	tp->tso_deferred = 1 | (jiffies << 1);
+	/* Ok, it looks like it is advisable to defer.
+	 * Do not rearm the timer if already set to not break TCP ACK clocking.
+	 */
+	if (!tp->tso_deferred)
+		tp->tso_deferred = 1 | (jiffies << 1);
 
 	return true;
 
-- 
cgit v1.2.2


From d137c8306c748d89260400176613b5a85574b255 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 22 Mar 2013 08:58:23 -0600
Subject: mtip32xx: fix error return code in mtip_pci_probe()

Fix to return a negative error code from the error handling
case instead of 0, as returned elsewhere in this function.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/mtip32xx/mtip32xx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 11cc9522cdd4..92250af84e7d 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -4224,6 +4224,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 	dd->isr_workq = create_workqueue(dd->workq_name);
 	if (!dd->isr_workq) {
 		dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
+		rv = -ENOMEM;
 		goto block_initialize_err;
 	}
 
@@ -4282,7 +4283,8 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 	INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
 
 	pci_set_master(pdev);
-	if (pci_enable_msi(pdev)) {
+	rv = pci_enable_msi(pdev);
+	if (rv) {
 		dev_warn(&pdev->dev,
 			"Unable to enable MSI interrupt.\n");
 		goto block_initialize_err;
-- 
cgit v1.2.2


From 183cfb5720dfc393641b87710ce78561af3db6cd Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 22 Mar 2013 08:59:19 -0600
Subject: loop: fix error return code in loop_add()

Fix to return a negative error code from the error handling
case, as returned elsewhere in this function.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 747bb2af69dc..ee13a82f3f5e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1623,6 +1623,7 @@ static int loop_add(struct loop_device **l, int i)
 		goto out_free_dev;
 	i = err;
 
+	err = -ENOMEM;
 	lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
 	if (!lo->lo_queue)
 		goto out_free_dev;
-- 
cgit v1.2.2


From 8761a3dc1f07b163414e2215a2cadbb4cfe2a107 Mon Sep 17 00:00:00 2001
From: Phillip Susi <psusi@ubuntu.com>
Date: Fri, 22 Mar 2013 12:21:53 -0600
Subject: loop: cleanup partitions when detaching loop device

Any partitions added by user space to the loop device were being
left in place after detaching the loop device.  This was because
the detach path issued a BLKRRPART to clean up partitions if
LO_FLAGS_PARTSCAN was set, meaning that the partitions were auto
scanned on attach.  Replace this BLKRRPART with code that
unconditionally cleans up partitions on detach instead.

Signed-off-by: Phillip Susi <psusi@ubuntu.com>

Modified by Jens to export delete_partition().

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partition-generic.c |  1 +
 drivers/block/loop.c      | 21 +++++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/block/partition-generic.c b/block/partition-generic.c
index 789cdea05893..ae95ee6a58aa 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -257,6 +257,7 @@ void delete_partition(struct gendisk *disk, int partno)
 
 	hd_struct_put(part);
 }
+EXPORT_SYMBOL(delete_partition);
 
 static ssize_t whole_disk_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ee13a82f3f5e..fe5f6403417f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1044,12 +1044,29 @@ static int loop_clr_fd(struct loop_device *lo)
 	lo->lo_state = Lo_unbound;
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
-	if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
 	mutex_unlock(&lo->lo_ctl_mutex);
+
+	/*
+	 * Remove all partitions, since BLKRRPART won't remove user
+	 * added partitions when max_part=0
+	 */
+	if (bdev) {
+		struct disk_part_iter piter;
+		struct hd_struct *part;
+
+		mutex_lock_nested(&bdev->bd_mutex, 1);
+		invalidate_partition(bdev->bd_disk, 0);
+		disk_part_iter_init(&piter, bdev->bd_disk,
+					DISK_PITER_INCL_EMPTY);
+		while ((part = disk_part_iter_next(&piter)))
+			delete_partition(bdev->bd_disk, part->partno);
+		disk_part_iter_exit(&piter);
+		mutex_unlock(&bdev->bd_mutex);
+	}
+
 	/*
 	 * Need not hold lo_ctl_mutex to fput backing file.
 	 * Calling fput holding lo_ctl_mutex triggers a circular
-- 
cgit v1.2.2


From d2b805d89510737ea80c1469f854a16480d19778 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 22 Mar 2013 09:11:00 -0600
Subject: cciss: fix invalid use of sizeof in cciss_find_cfgtables()

sizeof() when applied to a pointer typed expression gives the
size of the pointer, not that of the pointed data.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: scameron@beardog.cce.hp.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/cciss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ade58bc8f3c4..1c1b8e544aa2 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -4206,7 +4206,7 @@ static int cciss_find_cfgtables(ctlr_info_t *h)
 	if (rc)
 		return rc;
 	h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
-		cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
+		cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable));
 	if (!h->cfgtable)
 		return -ENOMEM;
 	rc = write_driver_ver_to_cfgtable(h->cfgtable);
-- 
cgit v1.2.2


From f2fc7d0eddf86b0233faa34aa5af6780ea48bc08 Mon Sep 17 00:00:00 2001
From: Alice Ferrazzi <alice.ferrazzi@gmail.com>
Date: Fri, 22 Mar 2013 11:11:04 -0600
Subject: Block: blk-flush: Fixed indent code style

Fixed code indent should use tabs where possible.

Signed-off-by: Alice Ferrazzi <alice.ferrazzi@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-flush.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index db8f1b507857..cc2b827a853c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -444,7 +444,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 	 * copied from blk_rq_pos(rq).
 	 */
 	if (error_sector)
-               *error_sector = bio->bi_sector;
+		*error_sector = bio->bi_sector;
 
 	if (!bio_flagged(bio, BIO_UPTODATE))
 		ret = -EIO;
-- 
cgit v1.2.2


From 51f0885e5415b4cc6535e9cdcc5145bfbc134353 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 22 Mar 2013 11:44:04 -0700
Subject: vfs,proc: guarantee unique inodes in /proc

Dave Jones found another /proc issue with his Trinity tool: thanks to
the namespace model, we can have multiple /proc dentries that point to
the same inode, aliasing directories in /proc/<pid>/net/ for example.

This ends up being a total disaster, because it acts like hardlinked
directories, and causes locking problems.  We rely on the topological
sort of the inodes pointed to by dentries, and if we have aliased
directories, that odering becomes unreliable.

In short: don't do this.  Multiple dentries with the same (directory)
inode is just a bad idea, and the namespace code should never have
exposed things this way.  But we're kind of stuck with it.

This solves things by just always allocating a new inode during /proc
dentry lookup, instead of using "iget_locked()" to look up existing
inodes by superblock and number.  That actually simplies the code a bit,
at the cost of potentially doing more inode [de]allocations.

That said, the inode lookup wasn't free either (and did a lot of locking
of inodes), so it is probably not that noticeable.  We could easily keep
the old lookup model for non-directory entries, but rather than try to
be excessively clever this just implements the minimal and simplest
workaround for the problem.

Reported-and-tested-by: Dave Jones <davej@redhat.com>
Analyzed-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a86aebc9ba7c..869116c2afbe 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -446,9 +446,10 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
 
 struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 {
-	struct inode *inode = iget_locked(sb, de->low_ino);
+	struct inode *inode = new_inode_pseudo(sb);
 
-	if (inode && (inode->i_state & I_NEW)) {
+	if (inode) {
+		inode->i_ino = de->low_ino;
 		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 		PROC_I(inode)->pde = de;
 
@@ -476,7 +477,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 				inode->i_fop = de->proc_fops;
 			}
 		}
-		unlock_new_inode(inode);
 	} else
 	       pde_put(de);
 	return inode;
-- 
cgit v1.2.2


From 122090366d1d5c6ec1bfb6dfdb3a6d121ff074aa Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 31 Jan 2013 14:40:38 -0700
Subject: NVMe: Add namespaces with no LBA range feature

The LBA Range Type feature is optional in the NVMe specification,
so we should continue with adding namespaces for controllers that do
not implement this feature.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 993c014d195a..e209ec5930cc 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -1540,7 +1540,7 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev)
 		res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i,
 							dma_addr + 4096, NULL);
 		if (res)
-			continue;
+			memset(mem + 4096, 0, 4096);
 
 		ns = nvme_alloc_ns(dev, i, mem, mem + 4096);
 		if (ns)
-- 
cgit v1.2.2


From ca0ba26fbbd2d81c43085df49ce0abfe34535a90 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 22 Mar 2013 19:56:51 +0000
Subject: efivars: Fix check for CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE

The 'CONFIG_' prefix is not implicit in IS_ENABLED().

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Cc: Seth Forshee <seth.forshee@canonical.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index d64661fda4fd..7acafb80fd4c 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -104,7 +104,7 @@ MODULE_VERSION(EFIVARS_VERSION);
 #define GUID_LEN 36
 
 static bool efivars_pstore_disable =
-	IS_ENABLED(EFI_VARS_PSTORE_DEFAULT_DISABLE);
+	IS_ENABLED(CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE);
 
 module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
 
-- 
cgit v1.2.2


From 57471c8d3c22873f70813820e6b4d2d1fea9629d Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Fri, 22 Mar 2013 12:35:06 -0600
Subject: ARM: tegra: fix register address of slink controller

Fix typo on register address of slink3 controller where register
address is wrongly set as 0x7000d480 but it is 0x7000d800.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/boot/dts/tegra20.dtsi | 2 +-
 arch/arm/boot/dts/tegra30.dtsi | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 48d00a099ce3..3d3f64d2111a 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -385,7 +385,7 @@
 
 	spi@7000d800 {
 		compatible = "nvidia,tegra20-slink";
-		reg = <0x7000d480 0x200>;
+		reg = <0x7000d800 0x200>;
 		interrupts = <0 83 0x04>;
 		nvidia,dma-request-selector = <&apbdma 17>;
 		#address-cells = <1>;
diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index 9d87a3ffe998..dbf46c272562 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -372,7 +372,7 @@
 
 	spi@7000d800 {
 		compatible = "nvidia,tegra30-slink", "nvidia,tegra20-slink";
-		reg = <0x7000d480 0x200>;
+		reg = <0x7000d800 0x200>;
 		interrupts = <0 83 0x04>;
 		nvidia,dma-request-selector = <&apbdma 17>;
 		#address-cells = <1>;
-- 
cgit v1.2.2


From e49dbbf3e770aa590a8a464ac4978a09027060b9 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Fri, 22 Mar 2013 11:18:24 -0700
Subject: nfsd: fix bad offset use

vfs_writev() updates the offset argument - but the code then passes the
offset to vfs_fsync_range(). Since offset now points to the offset after
what was just written, this is probably not what was intended

Introduced by face15025ffdf664de95e86ae831544154d26c9c "nfsd: use
vfs_fsync_range(), not O_SYNC, for stable writes".

Signed-off-by: Kent Overstreet <koverstreet@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: stable@vger.kernel.org
Reviewed-by: Zach Brown <zab@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2a7eb536de0b..2b2e2396a869 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1013,6 +1013,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	int			host_err;
 	int			stable = *stablep;
 	int			use_wgather;
+	loff_t			pos = offset;
 
 	dentry = file->f_path.dentry;
 	inode = dentry->d_inode;
@@ -1025,7 +1026,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	/* Write the data. */
 	oldfs = get_fs(); set_fs(KERNEL_DS);
-	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
+	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos);
 	set_fs(oldfs);
 	if (host_err < 0)
 		goto out_nfserr;
-- 
cgit v1.2.2


From c69d72aec52eb5234f433259ac5dcc3b68f1480d Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@deeprootsystems.com>
Date: Wed, 28 Nov 2012 15:46:45 -0800
Subject: MAINTAINERS: update email address for Kevin Hilman

Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 50b4d735f961..89573ca56a1d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5675,7 +5675,7 @@ S:	Maintained
 F:	arch/arm/*omap*/*clock*
 
 OMAP POWER MANAGEMENT SUPPORT
-M:	Kevin Hilman <khilman@ti.com>
+M:	Kevin Hilman <khilman@deeprootsystems.com>
 L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	arch/arm/*omap*/*pm*
@@ -5769,7 +5769,7 @@ F:	arch/arm/*omap*/usb*
 
 OMAP GPIO DRIVER
 M:	Santosh Shilimkar <santosh.shilimkar@ti.com>
-M:	Kevin Hilman <khilman@ti.com>
+M:	Kevin Hilman <khilman@deeprootsystems.com>
 L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	drivers/gpio/gpio-omap.c
@@ -7165,7 +7165,7 @@ F:	arch/arm/mach-s3c2410/bast-irq.c
 
 TI DAVINCI MACHINE SUPPORT
 M:	Sekhar Nori <nsekhar@ti.com>
-M:	Kevin Hilman <khilman@ti.com>
+M:	Kevin Hilman <khilman@deeprootsystems.com>
 L:	davinci-linux-open-source@linux.davincidsp.com (moderated for non-subscribers)
 T:	git git://gitorious.org/linux-davinci/linux-davinci.git
 Q:	http://patchwork.kernel.org/project/linux-davinci/list/
-- 
cgit v1.2.2


From 18e4321276fcf083b85b788fee7cf15be29ed72a Mon Sep 17 00:00:00 2001
From: Takahisa Tanaka <mc74hc00@gmail.com>
Date: Sun, 3 Mar 2013 14:52:07 +0900
Subject: watchdog: sp5100_tco: Remove code that may cause a boot failure

A problem was found on PC's with the SB700 chipset: The PC fails to
load BIOS after running the 3.8.x kernel until the power is completely
cut off. It occurs in all 3.8.x versions and the mainline version as of
2/4. The issue does not occur with the 3.7.x builds.

There are two methods for accessing the watchdog registers.

 1. Re-programming a resource address obtained by allocate_resource()
to chipset.
 2. Use the direct memory-mapped IO access.

The method 1 can be used by all the chipsets (SP5100, SB7x0, SB8x0 or
later). However, experience shows that only PC with the SB8x0 (or
later) chipsets can use the method 2.

This patch removes the method 1, because the critical problem was found.
That's why the watchdog timer was able to be used on SP5100 and SB7x0
chipsets until now.

Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1116835
Link: https://lkml.org/lkml/2013/2/14/271

Signed-off-by: Takahisa Tanaka <mc74hc00@gmail.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Cc: stable <stable@vger.kernel.org>
---
 drivers/watchdog/sp5100_tco.c | 126 ++----------------------------------------
 1 file changed, 6 insertions(+), 120 deletions(-)

diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c
index e3b8f757d2d3..0e9d8c479c35 100644
--- a/drivers/watchdog/sp5100_tco.c
+++ b/drivers/watchdog/sp5100_tco.c
@@ -40,13 +40,12 @@
 #include "sp5100_tco.h"
 
 /* Module and version information */
-#define TCO_VERSION "0.03"
+#define TCO_VERSION "0.05"
 #define TCO_MODULE_NAME "SP5100 TCO timer"
 #define TCO_DRIVER_NAME   TCO_MODULE_NAME ", v" TCO_VERSION
 
 /* internal variables */
 static u32 tcobase_phys;
-static u32 resbase_phys;
 static u32 tco_wdt_fired;
 static void __iomem *tcobase;
 static unsigned int pm_iobase;
@@ -54,10 +53,6 @@ static DEFINE_SPINLOCK(tco_lock);	/* Guards the hardware */
 static unsigned long timer_alive;
 static char tco_expect_close;
 static struct pci_dev *sp5100_tco_pci;
-static struct resource wdt_res = {
-	.name = "Watchdog Timer",
-	.flags = IORESOURCE_MEM,
-};
 
 /* the watchdog platform device */
 static struct platform_device *sp5100_tco_platform_device;
@@ -75,12 +70,6 @@ module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started."
 		" (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
-static unsigned int force_addr;
-module_param(force_addr, uint, 0);
-MODULE_PARM_DESC(force_addr, "Force the use of specified MMIO address."
-		" ONLY USE THIS PARAMETER IF YOU REALLY KNOW"
-		" WHAT YOU ARE DOING (default=none)");
-
 /*
  * Some TCO specific functions
  */
@@ -176,39 +165,6 @@ static void tco_timer_enable(void)
 	}
 }
 
-static void tco_timer_disable(void)
-{
-	int val;
-
-	if (sp5100_tco_pci->revision >= 0x40) {
-		/* For SB800 or later */
-		/* Enable watchdog decode bit and Disable watchdog timer */
-		outb(SB800_PM_WATCHDOG_CONTROL, SB800_IO_PM_INDEX_REG);
-		val = inb(SB800_IO_PM_DATA_REG);
-		val |= SB800_PCI_WATCHDOG_DECODE_EN;
-		val |= SB800_PM_WATCHDOG_DISABLE;
-		outb(val, SB800_IO_PM_DATA_REG);
-	} else {
-		/* For SP5100 or SB7x0 */
-		/* Enable watchdog decode bit */
-		pci_read_config_dword(sp5100_tco_pci,
-				      SP5100_PCI_WATCHDOG_MISC_REG,
-				      &val);
-
-		val |= SP5100_PCI_WATCHDOG_DECODE_EN;
-
-		pci_write_config_dword(sp5100_tco_pci,
-				       SP5100_PCI_WATCHDOG_MISC_REG,
-				       val);
-
-		/* Disable Watchdog timer */
-		outb(SP5100_PM_WATCHDOG_CONTROL, SP5100_IO_PM_INDEX_REG);
-		val = inb(SP5100_IO_PM_DATA_REG);
-		val |= SP5100_PM_WATCHDOG_DISABLE;
-		outb(val, SP5100_IO_PM_DATA_REG);
-	}
-}
-
 /*
  *	/dev/watchdog handling
  */
@@ -361,7 +317,7 @@ static unsigned char sp5100_tco_setupdevice(void)
 {
 	struct pci_dev *dev = NULL;
 	const char *dev_name = NULL;
-	u32 val, tmp_val;
+	u32 val;
 	u32 index_reg, data_reg, base_addr;
 
 	/* Match the PCI device */
@@ -459,63 +415,8 @@ static unsigned char sp5100_tco_setupdevice(void)
 	} else
 		pr_debug("SBResource_MMIO is disabled(0x%04x)\n", val);
 
-	/*
-	 * Lastly re-programming the watchdog timer MMIO address,
-	 * This method is a last resort...
-	 *
-	 * Before re-programming, to ensure that the watchdog timer
-	 * is disabled, disable the watchdog timer.
-	 */
-	tco_timer_disable();
-
-	if (force_addr) {
-		/*
-		 * Force the use of watchdog timer MMIO address, and aligned to
-		 * 8byte boundary.
-		 */
-		force_addr &= ~0x7;
-		val = force_addr;
-
-		pr_info("Force the use of 0x%04x as MMIO address\n", val);
-	} else {
-		/*
-		 * Get empty slot into the resource tree for watchdog timer.
-		 */
-		if (allocate_resource(&iomem_resource,
-				      &wdt_res,
-				      SP5100_WDT_MEM_MAP_SIZE,
-				      0xf0000000,
-				      0xfffffff8,
-				      0x8,
-				      NULL,
-				      NULL)) {
-			pr_err("MMIO allocation failed\n");
-			goto unreg_region;
-		}
-
-		val = resbase_phys = wdt_res.start;
-		pr_debug("Got 0x%04x from resource tree\n", val);
-	}
-
-	/* Restore to the low three bits */
-	outb(base_addr+0, index_reg);
-	tmp_val = val | (inb(data_reg) & 0x7);
-
-	/* Re-programming the watchdog timer base address */
-	outb(base_addr+0, index_reg);
-	outb((tmp_val >>  0) & 0xff, data_reg);
-	outb(base_addr+1, index_reg);
-	outb((tmp_val >>  8) & 0xff, data_reg);
-	outb(base_addr+2, index_reg);
-	outb((tmp_val >> 16) & 0xff, data_reg);
-	outb(base_addr+3, index_reg);
-	outb((tmp_val >> 24) & 0xff, data_reg);
-
-	if (!request_mem_region_exclusive(val, SP5100_WDT_MEM_MAP_SIZE,
-								   dev_name)) {
-		pr_err("MMIO address 0x%04x already in use\n", val);
-		goto unreg_resource;
-	}
+	pr_notice("failed to find MMIO address, giving up.\n");
+	goto  unreg_region;
 
 setup_wdt:
 	tcobase_phys = val;
@@ -555,9 +456,6 @@ setup_wdt:
 
 unreg_mem_region:
 	release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE);
-unreg_resource:
-	if (resbase_phys)
-		release_resource(&wdt_res);
 unreg_region:
 	release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE);
 exit:
@@ -567,7 +465,6 @@ exit:
 static int sp5100_tco_init(struct platform_device *dev)
 {
 	int ret;
-	char addr_str[16];
 
 	/*
 	 * Check whether or not the hardware watchdog is there. If found, then
@@ -599,23 +496,14 @@ static int sp5100_tco_init(struct platform_device *dev)
 	clear_bit(0, &timer_alive);
 
 	/* Show module parameters */
-	if (force_addr == tcobase_phys)
-		/* The force_addr is vaild */
-		sprintf(addr_str, "0x%04x", force_addr);
-	else
-		strcpy(addr_str, "none");
-
-	pr_info("initialized (0x%p). heartbeat=%d sec (nowayout=%d, "
-		"force_addr=%s)\n",
-		tcobase, heartbeat, nowayout, addr_str);
+	pr_info("initialized (0x%p). heartbeat=%d sec (nowayout=%d)\n",
+		tcobase, heartbeat, nowayout);
 
 	return 0;
 
 exit:
 	iounmap(tcobase);
 	release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE);
-	if (resbase_phys)
-		release_resource(&wdt_res);
 	release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE);
 	return ret;
 }
@@ -630,8 +518,6 @@ static void sp5100_tco_cleanup(void)
 	misc_deregister(&sp5100_tco_miscdev);
 	iounmap(tcobase);
 	release_mem_region(tcobase_phys, SP5100_WDT_MEM_MAP_SIZE);
-	if (resbase_phys)
-		release_resource(&wdt_res);
 	release_region(pm_iobase, SP5100_PM_IOPORTS_SIZE);
 }
 
-- 
cgit v1.2.2


From 81fc933f176cd95f757bfc8a98109ef422598b79 Mon Sep 17 00:00:00 2001
From: Takahisa Tanaka <mc74hc00@gmail.com>
Date: Sun, 3 Mar 2013 14:48:00 +0900
Subject: watchdog: sp5100_tco: Set the AcpiMmioSel bitmask value to 1 instead
 of 2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The AcpiMmioSel bit is bit 1 in the AcpiMmioEn register, but the current
sp5100_tco driver is using bit 2.

See 2.3.3 Power Management (PM) Registers page 150 of the
AMD SB800-Series Southbridges Register Reference Guide [1].

        AcpiMmioEn - RW – 8/16/32 bits - [PM_Reg: 24h]
        Field Name        Bits  Default  Description
        AcpiMMioDecodeEn  0     0b       Set to 1 to enable AcpiMMio space.
        AcpiMMIoSel       1     0b       Set AcpiMMio registers to be memory-mapped or IO-mapped space.
                                         0: Memory-mapped space
                                         1: I/O-mapped space

The sp5100_tco driver expects zero as a value of AcpiMmioSel (bit 1).

Fortunately, no problems were caused by this typo, because the default
value of the undocumented misused bit 2 seems to be zero.

However, the sp5100_tco driver should use the correct bitmask value.

[1] http://support.amd.com/us/Embedded_TechDocs/45482.pdf

Signed-off-by: Takahisa Tanaka <mc74hc00@gmail.com>
Signed-off-by: Paul Menzel <paulepanter@users.sourceforge.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
Cc: stable <stable@vger.kernel.org>
---
 drivers/watchdog/sp5100_tco.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/sp5100_tco.h b/drivers/watchdog/sp5100_tco.h
index 71594a0c14b7..2b28c00da0df 100644
--- a/drivers/watchdog/sp5100_tco.h
+++ b/drivers/watchdog/sp5100_tco.h
@@ -57,7 +57,7 @@
 #define SB800_PM_WATCHDOG_DISABLE	(1 << 2)
 #define SB800_PM_WATCHDOG_SECOND_RES	(3 << 0)
 #define SB800_ACPI_MMIO_DECODE_EN	(1 << 0)
-#define SB800_ACPI_MMIO_SEL		(1 << 2)
+#define SB800_ACPI_MMIO_SEL		(1 << 1)
 
 
 #define SB800_PM_WDT_MMIO_OFFSET	0xB00
-- 
cgit v1.2.2


From fe8d52614bd419cedef85ef55850fd090373f481 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 22 Mar 2013 15:04:37 -0700
Subject: irq_work.h: fix warning when CONFIG_IRQ_WORK=n

A randconfig caught repeated compiler warnings when CONFIG_IRQ_WORK=n
due to the definition of a non-inline static function in
<linux/irq_work.h>:

  include/linux/irq_work.h +40 : warning: 'irq_work_needs_cpu' defined but not used

Make it inline to supress the warning.  This is caused commit
00b42959106a ("irq_work: Don't stop the tick with pending works") merged
in v3.9-rc1.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/irq_work.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index f5dbce50466e..66017028dcb3 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -37,7 +37,7 @@ void irq_work_sync(struct irq_work *work);
 #ifdef CONFIG_IRQ_WORK
 bool irq_work_needs_cpu(void);
 #else
-static bool irq_work_needs_cpu(void) { return false; }
+static inline bool irq_work_needs_cpu(void) { return false; }
 #endif
 
 #endif /* _LINUX_IRQ_WORK_H */
-- 
cgit v1.2.2


From dc72c32e1fd872a9a4fdfe645283c9dcd68e556d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 22 Mar 2013 15:04:39 -0700
Subject: printk: Provide a wake_up_klogd() off-case

wake_up_klogd() is useless when CONFIG_PRINTK=n because neither printk()
nor printk_sched() are in use and there are actually no waiter on
log_wait waitqueue.  It should be a stub in this case for users like
bust_spinlocks().

Otherwise this results in this warning when CONFIG_PRINTK=n and
CONFIG_IRQ_WORK=n:

	kernel/built-in.o In function `wake_up_klogd':
	(.text.wake_up_klogd+0xb4): undefined reference to `irq_work_queue'

To fix this, provide an off-case for wake_up_klogd() when
CONFIG_PRINTK=n.

There is much more from console_unlock() and other console related code
in printk.c that should be moved under CONFIG_PRINTK.  But for now,
focus on a minimal fix as we passed the merged window already.

[akpm@linux-foundation.org: include printk.h in bust_spinlocks.c]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reported-by: James Hogan <james.hogan@imgtec.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h |  1 -
 include/linux/printk.h |  6 ++++
 kernel/printk.c        | 80 ++++++++++++++++++++++++--------------------------
 lib/bust_spinlocks.c   |  3 +-
 4 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 80d36874689b..79fdd80a42d4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -390,7 +390,6 @@ extern struct pid *session_of_pgrp(struct pid *pgrp);
 unsigned long int_sqrt(unsigned long);
 
 extern void bust_spinlocks(int yes);
-extern void wake_up_klogd(void);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
 extern int panic_timeout;
 extern int panic_on_oops;
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 1249a54d17e0..822171fcb1c8 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -134,6 +134,8 @@ extern int printk_delay_msec;
 extern int dmesg_restrict;
 extern int kptr_restrict;
 
+extern void wake_up_klogd(void);
+
 void log_buf_kexec_setup(void);
 void __init setup_log_buf(int early);
 #else
@@ -162,6 +164,10 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 	return false;
 }
 
+static inline void wake_up_klogd(void)
+{
+}
+
 static inline void log_buf_kexec_setup(void)
 {
 }
diff --git a/kernel/printk.c b/kernel/printk.c
index 0b31715f335a..abbdd9e2ac82 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -63,8 +63,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
 #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
 #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
 
-DECLARE_WAIT_QUEUE_HEAD(log_wait);
-
 int console_printk[4] = {
 	DEFAULT_CONSOLE_LOGLEVEL,	/* console_loglevel */
 	DEFAULT_MESSAGE_LOGLEVEL,	/* default_message_loglevel */
@@ -224,6 +222,7 @@ struct log {
 static DEFINE_RAW_SPINLOCK(logbuf_lock);
 
 #ifdef CONFIG_PRINTK
+DECLARE_WAIT_QUEUE_HEAD(log_wait);
 /* the next printk record to read by syslog(READ) or /proc/kmsg */
 static u64 syslog_seq;
 static u32 syslog_idx;
@@ -1957,45 +1956,6 @@ int is_console_locked(void)
 	return console_locked;
 }
 
-/*
- * Delayed printk version, for scheduler-internal messages:
- */
-#define PRINTK_BUF_SIZE		512
-
-#define PRINTK_PENDING_WAKEUP	0x01
-#define PRINTK_PENDING_SCHED	0x02
-
-static DEFINE_PER_CPU(int, printk_pending);
-static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
-
-static void wake_up_klogd_work_func(struct irq_work *irq_work)
-{
-	int pending = __this_cpu_xchg(printk_pending, 0);
-
-	if (pending & PRINTK_PENDING_SCHED) {
-		char *buf = __get_cpu_var(printk_sched_buf);
-		printk(KERN_WARNING "[sched_delayed] %s", buf);
-	}
-
-	if (pending & PRINTK_PENDING_WAKEUP)
-		wake_up_interruptible(&log_wait);
-}
-
-static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
-	.func = wake_up_klogd_work_func,
-	.flags = IRQ_WORK_LAZY,
-};
-
-void wake_up_klogd(void)
-{
-	preempt_disable();
-	if (waitqueue_active(&log_wait)) {
-		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
-		irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
-	}
-	preempt_enable();
-}
-
 static void console_cont_flush(char *text, size_t size)
 {
 	unsigned long flags;
@@ -2458,6 +2418,44 @@ static int __init printk_late_init(void)
 late_initcall(printk_late_init);
 
 #if defined CONFIG_PRINTK
+/*
+ * Delayed printk version, for scheduler-internal messages:
+ */
+#define PRINTK_BUF_SIZE		512
+
+#define PRINTK_PENDING_WAKEUP	0x01
+#define PRINTK_PENDING_SCHED	0x02
+
+static DEFINE_PER_CPU(int, printk_pending);
+static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
+
+static void wake_up_klogd_work_func(struct irq_work *irq_work)
+{
+	int pending = __this_cpu_xchg(printk_pending, 0);
+
+	if (pending & PRINTK_PENDING_SCHED) {
+		char *buf = __get_cpu_var(printk_sched_buf);
+		printk(KERN_WARNING "[sched_delayed] %s", buf);
+	}
+
+	if (pending & PRINTK_PENDING_WAKEUP)
+		wake_up_interruptible(&log_wait);
+}
+
+static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+	.func = wake_up_klogd_work_func,
+	.flags = IRQ_WORK_LAZY,
+};
+
+void wake_up_klogd(void)
+{
+	preempt_disable();
+	if (waitqueue_active(&log_wait)) {
+		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+		irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
+	}
+	preempt_enable();
+}
 
 int printk_sched(const char *fmt, ...)
 {
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
index 9681d54b95d1..f8e0e5367398 100644
--- a/lib/bust_spinlocks.c
+++ b/lib/bust_spinlocks.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/printk.h>
 #include <linux/spinlock.h>
 #include <linux/tty.h>
 #include <linux/wait.h>
@@ -28,5 +29,3 @@ void __attribute__((weak)) bust_spinlocks(int yes)
 			wake_up_klogd();
 	}
 }
-
-
-- 
cgit v1.2.2


From d00285884c0892bb1310df96bce6056e9ce9b9d9 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Date: Fri, 22 Mar 2013 15:04:40 -0700
Subject: mm/hugetlb: fix total hugetlbfs pages count when using memory
 overcommit accouting

hugetlb_total_pages is used for overcommit calculations but the current
implementation considers only the default hugetlb page size (which is
either the first defined hugepage size or the one specified by
default_hugepagesz kernel boot parameter).

If the system is configured for more than one hugepage size, which is
possible since commit a137e1cc6d6e ("hugetlbfs: per mount huge page
sizes") then the overcommit estimation done by __vm_enough_memory()
(resp.  shown by meminfo_proc_show) is not precise - there is an
impression of more available/allowed memory.  This can lead to an
unexpected ENOMEM/EFAULT resp.  SIGSEGV when memory is accounted.

Testcase:
  boot: hugepagesz=1G hugepages=1
  the default overcommit ratio is 50
  before patch:

    egrep 'CommitLimit' /proc/meminfo
    CommitLimit:     55434168 kB

  after patch:

    egrep 'CommitLimit' /proc/meminfo
    CommitLimit:     54909880 kB

[akpm@linux-foundation.org: coding-style tweak]
Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: <stable@vger.kernel.org>		[3.0+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0a0be33bb199..ca9a7c6d7e97 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2124,8 +2124,12 @@ int hugetlb_report_node_meminfo(int nid, char *buf)
 /* Return the number pages of memory we physically have, in PAGE_SIZE units. */
 unsigned long hugetlb_total_pages(void)
 {
-	struct hstate *h = &default_hstate;
-	return h->nr_huge_pages * pages_per_huge_page(h);
+	struct hstate *h;
+	unsigned long nr_total_pages = 0;
+
+	for_each_hstate(h)
+		nr_total_pages += h->nr_huge_pages * pages_per_huge_page(h);
+	return nr_total_pages;
 }
 
 static int hugetlb_acct_memory(struct hstate *h, long delta)
-- 
cgit v1.2.2


From 2ca067efd82939dfd87827d29d36a265823a4c2f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 22 Mar 2013 15:04:41 -0700
Subject: poweroff: change orderly_poweroff() to use schedule_work()

David said:

    Commit 6c0c0d4d1080 ("poweroff: fix bug in orderly_poweroff()")
    apparently fixes one bug in orderly_poweroff(), but introduces
    another.  The comments on orderly_poweroff() claim it can be called
    from any context - and indeed we call it from interrupt context in
    arch/powerpc/platforms/pseries/ras.c for example.  But since that
    commit this is no longer safe, since call_usermodehelper_fns() is not
    safe in interrupt context without the UMH_NO_WAIT option.

orderly_poweroff() can be used from any context but UMH_WAIT_EXEC is
sleepable.  Move the "force" logic into __orderly_poweroff() and change
orderly_poweroff() to use the global poweroff_work which simply calls
__orderly_poweroff().

While at it, remove the unneeded "int argc" and change argv_split() to
use GFP_KERNEL.

We use the global "bool poweroff_force" to pass the argument, this can
obviously affect the previous request if it is pending/running.  So we
only allow the "false => true" transition assuming that the pending
"true" should succeed anyway.  If schedule_work() fails after that we
know that work->func() was not called yet, it must see the new value.

This means that orderly_poweroff() becomes async even if we do not run
the command and always succeeds, schedule_work() can only fail if the
work is already pending.  We can export __orderly_poweroff() and change
the non-atomic callers which want the old semantics.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reported-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Cc: Feng Hong <hongfeng@marvell.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 57 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 81f56445fba9..39c9c4a2949f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2185,9 +2185,8 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
 
 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
 
-static int __orderly_poweroff(void)
+static int __orderly_poweroff(bool force)
 {
-	int argc;
 	char **argv;
 	static char *envp[] = {
 		"HOME=/",
@@ -2196,20 +2195,40 @@ static int __orderly_poweroff(void)
 	};
 	int ret;
 
-	argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
-	if (argv == NULL) {
+	argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
+	if (argv) {
+		ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+		argv_free(argv);
+	} else {
 		printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
-		       __func__, poweroff_cmd);
-		return -ENOMEM;
+					 __func__, poweroff_cmd);
+		ret = -ENOMEM;
 	}
 
-	ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC,
-				      NULL, NULL, NULL);
-	argv_free(argv);
+	if (ret && force) {
+		printk(KERN_WARNING "Failed to start orderly shutdown: "
+					"forcing the issue\n");
+		/*
+		 * I guess this should try to kick off some daemon to sync and
+		 * poweroff asap.  Or not even bother syncing if we're doing an
+		 * emergency shutdown?
+		 */
+		emergency_sync();
+		kernel_power_off();
+	}
 
 	return ret;
 }
 
+static bool poweroff_force;
+
+static void poweroff_work_func(struct work_struct *work)
+{
+	__orderly_poweroff(poweroff_force);
+}
+
+static DECLARE_WORK(poweroff_work, poweroff_work_func);
+
 /**
  * orderly_poweroff - Trigger an orderly system poweroff
  * @force: force poweroff if command execution fails
@@ -2219,21 +2238,9 @@ static int __orderly_poweroff(void)
  */
 int orderly_poweroff(bool force)
 {
-	int ret = __orderly_poweroff();
-
-	if (ret && force) {
-		printk(KERN_WARNING "Failed to start orderly shutdown: "
-		       "forcing the issue\n");
-
-		/*
-		 * I guess this should try to kick off some daemon to sync and
-		 * poweroff asap.  Or not even bother syncing if we're doing an
-		 * emergency shutdown?
-		 */
-		emergency_sync();
-		kernel_power_off();
-	}
-
-	return ret;
+	if (force) /* do not override the pending "true" */
+		poweroff_force = true;
+	schedule_work(&poweroff_work);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(orderly_poweroff);
-- 
cgit v1.2.2


From f9228b204f789493117e458d2fefae937edb7272 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@sgi.com>
Date: Fri, 22 Mar 2013 15:04:43 -0700
Subject: mm: zone_end_pfn is too small

Booting with 32 TBytes memory hits BUG at mm/page_alloc.c:552! (output
below).

The key hint is "page 4294967296 outside zone".
4294967296 = 0x100000000 (bit 32 is set).

The problem is in include/linux/mmzone.h:

  530 static inline unsigned zone_end_pfn(const struct zone *zone)
  531 {
  532         return zone->zone_start_pfn + zone->spanned_pages;
  533 }

zone_end_pfn is "unsigned" (32 bits).  Changing it to "unsigned long"
(64 bits) fixes the problem.

zone_end_pfn() was added recently in commit 108bcc96ef70 ("mm: add & use
zone_end_pfn() and zone_spans_pfn()")

Output from the failure.

  No AGP bridge found
  page 4294967296 outside zone [ 4294967296 - 4327469056 ]
  ------------[ cut here ]------------
  kernel BUG at mm/page_alloc.c:552!
  invalid opcode: 0000 [#1] SMP
  Modules linked in:
  CPU 0
  Pid: 0, comm: swapper Not tainted 3.9.0-rc2.dtp+ #10
  RIP: free_one_page+0x382/0x430
  Process swapper (pid: 0, threadinfo ffffffff81942000, task ffffffff81955420)
  Call Trace:
    __free_pages_ok+0x96/0xb0
    __free_pages+0x25/0x50
    __free_pages_bootmem+0x8a/0x8c
    __free_memory_core+0xea/0x131
    free_low_memory_core_early+0x4a/0x98
    free_all_bootmem+0x45/0x47
    mem_init+0x7b/0x14c
    start_kernel+0x216/0x433
    x86_64_start_reservations+0x2a/0x2c
    x86_64_start_kernel+0x144/0x153
  Code: 89 f1 ba 01 00 00 00 31 f6 d3 e2 4c 89 ef e8 66 a4 01 00 e9 2c fe ff ff 0f 0b eb fe 0f 0b 66 66 2e 0f 1f 84 00 00 00 00 00 eb f3 <0f> 0b eb fe 0f 0b 0f 1f 84 00 00 00 00 00 eb f6 0f 0b eb fe 49

Signed-off-by: Russ Anderson <rja@sgi.com>
Reported-by: George Beshers <gbeshers@sgi.com>
Acked-by: Hedi Berriche <hedi@sgi.com>
Cc: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ede274957e05..c74092eebf5c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -527,7 +527,7 @@ static inline int zone_is_oom_locked(const struct zone *zone)
 	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
 }
 
-static inline unsigned zone_end_pfn(const struct zone *zone)
+static inline unsigned long zone_end_pfn(const struct zone *zone)
 {
 	return zone->zone_start_pfn + zone->spanned_pages;
 }
-- 
cgit v1.2.2


From 925e8ea6bca2c9a590565634b27768d7042e089f Mon Sep 17 00:00:00 2001
From: Ashish Jangam <ashish.jangam@kpitcummins.com>
Date: Fri, 22 Mar 2013 15:04:44 -0700
Subject: drivers/rtc/rtc-da9052.c: fix for rtc device registration

Add support for the virtual irq since now MFD only handles virtual irq
Without this patch rtc device will fail in registration.

(akpm: Ashish has a different version whcih will be needed for 3.8.x and
earlier kernels)

Signed-off-by: Ashish <ashish.jangam@kpitcummins.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-da9052.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c
index 0dde688ca09b..969abbad7fe3 100644
--- a/drivers/rtc/rtc-da9052.c
+++ b/drivers/rtc/rtc-da9052.c
@@ -239,11 +239,9 @@ static int da9052_rtc_probe(struct platform_device *pdev)
 
 	rtc->da9052 = dev_get_drvdata(pdev->dev.parent);
 	platform_set_drvdata(pdev, rtc);
-	rtc->irq = platform_get_irq_byname(pdev, "ALM");
-	ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL,
-				da9052_rtc_irq,
-				IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				"ALM", rtc);
+	rtc->irq =  DA9052_IRQ_ALARM;
+	ret = da9052_request_irq(rtc->da9052, rtc->irq, "ALM",
+				da9052_rtc_irq, rtc);
 	if (ret != 0) {
 		rtc_err(rtc->da9052, "irq registration failed: %d\n", ret);
 		return ret;
-- 
cgit v1.2.2


From e66b05873a7a76afc569da6382509471cba8d5ff Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Fri, 22 Mar 2013 15:04:45 -0700
Subject: drivers/video/ep93xx-fb.c: include <linux/io.h> for devm_ioremap()

Commit be8678149701 ("drivers/video/ep93xx-fb.c: use devm_ functions")
introduced a build error:

  drivers/video/ep93xx-fb.c: In function 'ep93xxfb_probe':
  drivers/video/ep93xx-fb.c:532: error: implicit declaration of function 'devm_ioremap'
  drivers/video/ep93xx-fb.c:533: warning: assignment makes pointer from integer without a cast

Include <linux/io.h> to pickup the declaration of 'devm_ioremap'.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Acked-by: Ryan Mallon <rmallon@gmail.com>
Cc: Damien Cassou <damien.cassou@lifl.fr>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/ep93xx-fb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/ep93xx-fb.c b/drivers/video/ep93xx-fb.c
index 3f2519d30715..e06cd5d90c97 100644
--- a/drivers/video/ep93xx-fb.c
+++ b/drivers/video/ep93xx-fb.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/clk.h>
 #include <linux/fb.h>
+#include <linux/io.h>
 
 #include <linux/platform_data/video-ep93xx.h>
 
-- 
cgit v1.2.2


From 0ef1594c017521ea89278e80fe3f80dafb17abde Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Fri, 22 Mar 2013 15:04:47 -0700
Subject: drivers/rtc/rtc-at91rm9200.c: use a variable for storing IMR

On some revisions of AT91 SoCs, the RTC IMR register is not working.
Instead of elaborating a workaround for that specific SoC or IP version,
we simply use a software variable to store the Interrupt Mask Register
and modify it for each enabling/disabling of an interrupt.  The overhead
of this is negligible anyway.

The interrupt mask register (IMR) for the RTC is broken on the AT91SAM9x5
sub-family of SoCs (good overview of the members here:
http://www.eewiki.net/display/linuxonarm/AT91SAM9x5 ).  The "user visible
effect" is the RTC doesn't work.

That sub-family is less than two years old and only has devicetree (DT)
support and came online circa lk 3.7 .  The dust is yet to settle on the
DT stuff at least for AT91 SoCs (translation: lots of stuff is still
broken, so much that it is hard to know where to start).

The fix in the patch is pretty simple: just shadow the silicon IMR
register with a variable in the driver.  Some older SoCs (pre-DT) use the
the rtc-at91rm9200 driver (e.g.  obviously the AT91RM9200) and they should
not be impacted by the change.  There shouldn't be a large volume of
interrupts associated with a RTC.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Reported-by: Douglas Gilbert <dgilbert@interlog.com>
Cc: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Ludovic Desroches <ludovic.desroches@atmel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-at91rm9200.c | 50 +++++++++++++++++++++++++++-----------------
 drivers/rtc/rtc-at91rm9200.h |  1 -
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 434ebc3a99dc..0a9f27e094ea 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -44,6 +44,7 @@ static DECLARE_COMPLETION(at91_rtc_updated);
 static unsigned int at91_alarm_year = AT91_RTC_EPOCH;
 static void __iomem *at91_rtc_regs;
 static int irq;
+static u32 at91_rtc_imr;
 
 /*
  * Decode time/date into rtc_time structure
@@ -108,9 +109,11 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm)
 	cr = at91_rtc_read(AT91_RTC_CR);
 	at91_rtc_write(AT91_RTC_CR, cr | AT91_RTC_UPDCAL | AT91_RTC_UPDTIM);
 
+	at91_rtc_imr |= AT91_RTC_ACKUPD;
 	at91_rtc_write(AT91_RTC_IER, AT91_RTC_ACKUPD);
 	wait_for_completion(&at91_rtc_updated);	/* wait for ACKUPD interrupt */
 	at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD);
+	at91_rtc_imr &= ~AT91_RTC_ACKUPD;
 
 	at91_rtc_write(AT91_RTC_TIMR,
 			  bin2bcd(tm->tm_sec) << 0
@@ -142,7 +145,7 @@ static int at91_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	tm->tm_yday = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year);
 	tm->tm_year = at91_alarm_year - 1900;
 
-	alrm->enabled = (at91_rtc_read(AT91_RTC_IMR) & AT91_RTC_ALARM)
+	alrm->enabled = (at91_rtc_imr & AT91_RTC_ALARM)
 			? 1 : 0;
 
 	dev_dbg(dev, "%s(): %4d-%02d-%02d %02d:%02d:%02d\n", __func__,
@@ -168,6 +171,7 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	tm.tm_sec = alrm->time.tm_sec;
 
 	at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ALARM);
+	at91_rtc_imr &= ~AT91_RTC_ALARM;
 	at91_rtc_write(AT91_RTC_TIMALR,
 		  bin2bcd(tm.tm_sec) << 0
 		| bin2bcd(tm.tm_min) << 8
@@ -180,6 +184,7 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	if (alrm->enabled) {
 		at91_rtc_write(AT91_RTC_SCCR, AT91_RTC_ALARM);
+		at91_rtc_imr |= AT91_RTC_ALARM;
 		at91_rtc_write(AT91_RTC_IER, AT91_RTC_ALARM);
 	}
 
@@ -196,9 +201,12 @@ static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 
 	if (enabled) {
 		at91_rtc_write(AT91_RTC_SCCR, AT91_RTC_ALARM);
+		at91_rtc_imr |= AT91_RTC_ALARM;
 		at91_rtc_write(AT91_RTC_IER, AT91_RTC_ALARM);
-	} else
+	} else {
 		at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ALARM);
+		at91_rtc_imr &= ~AT91_RTC_ALARM;
+	}
 
 	return 0;
 }
@@ -207,12 +215,10 @@ static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
  */
 static int at91_rtc_proc(struct device *dev, struct seq_file *seq)
 {
-	unsigned long imr = at91_rtc_read(AT91_RTC_IMR);
-
 	seq_printf(seq, "update_IRQ\t: %s\n",
-			(imr & AT91_RTC_ACKUPD) ? "yes" : "no");
+			(at91_rtc_imr & AT91_RTC_ACKUPD) ? "yes" : "no");
 	seq_printf(seq, "periodic_IRQ\t: %s\n",
-			(imr & AT91_RTC_SECEV) ? "yes" : "no");
+			(at91_rtc_imr & AT91_RTC_SECEV) ? "yes" : "no");
 
 	return 0;
 }
@@ -227,7 +233,7 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id)
 	unsigned int rtsr;
 	unsigned long events = 0;
 
-	rtsr = at91_rtc_read(AT91_RTC_SR) & at91_rtc_read(AT91_RTC_IMR);
+	rtsr = at91_rtc_read(AT91_RTC_SR) & at91_rtc_imr;
 	if (rtsr) {		/* this interrupt is shared!  Is it ours? */
 		if (rtsr & AT91_RTC_ALARM)
 			events |= (RTC_AF | RTC_IRQF);
@@ -291,6 +297,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
 	at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM |
 					AT91_RTC_SECEV | AT91_RTC_TIMEV |
 					AT91_RTC_CALEV);
+	at91_rtc_imr = 0;
 
 	ret = request_irq(irq, at91_rtc_interrupt,
 				IRQF_SHARED,
@@ -329,6 +336,7 @@ static int __exit at91_rtc_remove(struct platform_device *pdev)
 	at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM |
 					AT91_RTC_SECEV | AT91_RTC_TIMEV |
 					AT91_RTC_CALEV);
+	at91_rtc_imr = 0;
 	free_irq(irq, pdev);
 
 	rtc_device_unregister(rtc);
@@ -341,31 +349,35 @@ static int __exit at91_rtc_remove(struct platform_device *pdev)
 
 /* AT91RM9200 RTC Power management control */
 
-static u32 at91_rtc_imr;
+static u32 at91_rtc_bkpimr;
+
 
 static int at91_rtc_suspend(struct device *dev)
 {
 	/* this IRQ is shared with DBGU and other hardware which isn't
 	 * necessarily doing PM like we are...
 	 */
-	at91_rtc_imr = at91_rtc_read(AT91_RTC_IMR)
-			& (AT91_RTC_ALARM|AT91_RTC_SECEV);
-	if (at91_rtc_imr) {
-		if (device_may_wakeup(dev))
+	at91_rtc_bkpimr = at91_rtc_imr & (AT91_RTC_ALARM|AT91_RTC_SECEV);
+	if (at91_rtc_bkpimr) {
+		if (device_may_wakeup(dev)) {
 			enable_irq_wake(irq);
-		else
-			at91_rtc_write(AT91_RTC_IDR, at91_rtc_imr);
-	}
+		} else {
+			at91_rtc_write(AT91_RTC_IDR, at91_rtc_bkpimr);
+			at91_rtc_imr &= ~at91_rtc_bkpimr;
+		}
+}
 	return 0;
 }
 
 static int at91_rtc_resume(struct device *dev)
 {
-	if (at91_rtc_imr) {
-		if (device_may_wakeup(dev))
+	if (at91_rtc_bkpimr) {
+		if (device_may_wakeup(dev)) {
 			disable_irq_wake(irq);
-		else
-			at91_rtc_write(AT91_RTC_IER, at91_rtc_imr);
+		} else {
+			at91_rtc_imr |= at91_rtc_bkpimr;
+			at91_rtc_write(AT91_RTC_IER, at91_rtc_bkpimr);
+		}
 	}
 	return 0;
 }
diff --git a/drivers/rtc/rtc-at91rm9200.h b/drivers/rtc/rtc-at91rm9200.h
index da1945e5f714..5f940b6844cb 100644
--- a/drivers/rtc/rtc-at91rm9200.h
+++ b/drivers/rtc/rtc-at91rm9200.h
@@ -64,7 +64,6 @@
 #define	AT91_RTC_SCCR		0x1c			/* Status Clear Command Register */
 #define	AT91_RTC_IER		0x20			/* Interrupt Enable Register */
 #define	AT91_RTC_IDR		0x24			/* Interrupt Disable Register */
-#define	AT91_RTC_IMR		0x28			/* Interrupt Mask Register */
 
 #define	AT91_RTC_VER		0x2c			/* Valid Entry Register */
 #define		AT91_RTC_NVTIM		(1 <<  0)		/* Non valid Time */
-- 
cgit v1.2.2


From 8d640a51ec9e9cdefa680b67ad55f933eefc5923 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 22 Mar 2013 15:04:48 -0700
Subject: dma-debug: fix locking bug in check_unmap()

In check_unmap() it is possible to get into a dead-locked state if
dma_mapping_error is called.  The problem is that the bucket is locked in
check_unmap, and locked again by debug_dma_mapping_error which is called
by dma_mapping_error.  To resolve that we must release the lock on the
bucket before making the call to dma_mapping_error.

[akpm@linux-foundation.org: restore 80-col trickery to be consistent with the rest of the file]
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Reviewed-by: Shuah Khan <shuah.khan@hp.com>
Tested-by: Shuah Khan <shuah.khan@hp.com>
Cc: Jakub Kicinski <kubakici@wp.pl>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/dma-debug.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 5e396accd3d0..d3e06a5e981e 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -862,17 +862,21 @@ static void check_unmap(struct dma_debug_entry *ref)
 	entry = bucket_find_exact(bucket, ref);
 
 	if (!entry) {
+		/* must drop lock before calling dma_mapping_error */
+		put_hash_bucket(bucket, &flags);
+
 		if (dma_mapping_error(ref->dev, ref->dev_addr)) {
 			err_printk(ref->dev, NULL,
-				   "DMA-API: device driver tries "
-				   "to free an invalid DMA memory address\n");
-			return;
+				   "DMA-API: device driver tries to free an "
+				   "invalid DMA memory address\n");
+		} else {
+			err_printk(ref->dev, NULL,
+				   "DMA-API: device driver tries to free DMA "
+				   "memory it has not allocated [device "
+				   "address=0x%016llx] [size=%llu bytes]\n",
+				   ref->dev_addr, ref->size);
 		}
-		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
-			   "to free DMA memory it has not allocated "
-			   "[device address=0x%016llx] [size=%llu bytes]\n",
-			   ref->dev_addr, ref->size);
-		goto out;
+		return;
 	}
 
 	if (ref->size != entry->size) {
@@ -936,7 +940,6 @@ static void check_unmap(struct dma_debug_entry *ref)
 	hash_bucket_del(entry);
 	dma_entry_free(entry);
 
-out:
 	put_hash_bucket(bucket, &flags);
 }
 
-- 
cgit v1.2.2


From 96e7d7a1e0fc7780b4c1981c787e42473aa91a95 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 22 Mar 2013 15:04:49 -0700
Subject: dma-debug: update DMA debug API to better handle multiple mappings of
 a buffer

There were reports of the igb driver unmapping buffers without calling
dma_mapping_error.  On closer inspection issues were found in the DMA
debug API and how it handled multiple mappings of the same buffer.

The issue I found is the fact that the debug_dma_mapping_error would
only set the map_err_type to MAP_ERR_CHECKED in the case that the was
only one match for device and device address.  However in the case of
non-IOMMU, multiple addresses existed and as a result it was not setting
this field once a second mapping was instantiated.  I have resolved this
by changing the search so that it instead will now set MAP_ERR_CHECKED
on the first buffer that matches the device and DMA address that is
currently in the state MAP_ERR_NOT_CHECKED.

A secondary side effect of this patch is that in the case of multiple
buffers using the same address only the last mapping will have a valid
map_err_type.  The previous mappings will all end up with map_err_type
set to MAP_ERR_CHECKED because of the dma_mapping_error call in
debug_dma_map_page.  However this behavior may be preferable as it means
you will likely only see one real error per multi-mapped buffer, versus
the current behavior of multiple false errors mer multi-mapped buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Reviewed-by: Shuah Khan <shuah.khan@hp.com>
Tested-by: Shuah Khan <shuah.khan@hp.com>
Cc: Jakub Kicinski <kubakici@wp.pl>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/dma-debug.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d3e06a5e981e..d87a17a819d0 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1085,13 +1085,27 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 	ref.dev = dev;
 	ref.dev_addr = dma_addr;
 	bucket = get_hash_bucket(&ref, &flags);
-	entry = bucket_find_exact(bucket, &ref);
 
-	if (!entry)
-		goto out;
+	list_for_each_entry(entry, &bucket->list, list) {
+		if (!exact_match(&ref, entry))
+			continue;
+
+		/*
+		 * The same physical address can be mapped multiple
+		 * times. Without a hardware IOMMU this results in the
+		 * same device addresses being put into the dma-debug
+		 * hash multiple times too. This can result in false
+		 * positives being reported. Therefore we implement a
+		 * best-fit algorithm here which updates the first entry
+		 * from the hash which fits the reference value and is
+		 * not currently listed as being checked.
+		 */
+		if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+			entry->map_err_type = MAP_ERR_CHECKED;
+			break;
+		}
+	}
 
-	entry->map_err_type = MAP_ERR_CHECKED;
-out:
 	put_hash_bucket(bucket, &flags);
 }
 EXPORT_SYMBOL(debug_dma_mapping_error);
-- 
cgit v1.2.2


From ca4b3f302c90de5e516296e581c31c80125cd24b Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@huawei.com>
Date: Fri, 22 Mar 2013 15:04:50 -0700
Subject: mm/hotplug: only free wait_table if it's allocated by vmalloc

zone->wait_table may be allocated from bootmem, it can not be freed.

Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
Reviewed-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9597eec8239d..ee3765760818 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1779,7 +1779,11 @@ void try_offline_node(int nid)
 	for (i = 0; i < MAX_NR_ZONES; i++) {
 		struct zone *zone = pgdat->node_zones + i;
 
-		if (zone->wait_table)
+		/*
+		 * wait_table may be allocated from boot memory,
+		 * here only free if it's allocated by vmalloc.
+		 */
+		if (is_vmalloc_addr(zone->wait_table))
 			vfree(zone->wait_table);
 	}
 
-- 
cgit v1.2.2


From 38d78e587d4960d0db94add518d27ee74bad2301 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 22 Mar 2013 15:04:51 -0700
Subject: mqueue: sys_mq_open: do not call mnt_drop_write() if read-only

mnt_drop_write() must be called only if mnt_want_write() succeeded,
otherwise the mnt_writers counter will diverge.

mnt_writers counters are used to check if remounting FS as read-only is
OK, so after an extra mnt_drop_write() call, it would be impossible to
remount mqueue FS as read-only.  Besides, on umount a warning would be
printed like this one:

  =====================================
  [ BUG: bad unlock balance detected! ]
  3.9.0-rc3 #5 Not tainted
  -------------------------------------
  a.out/12486 is trying to release lock (sb_writers) at:
  mnt_drop_write+0x1f/0x30
  but there are no more locks to release!

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/mqueue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e5c4f609f22c..3953fda2e8bd 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -840,7 +840,8 @@ out_putfd:
 		fd = error;
 	}
 	mutex_unlock(&root->d_inode->i_mutex);
-	mnt_drop_write(mnt);
+	if (!ro)
+		mnt_drop_write(mnt);
 out_putname:
 	putname(name);
 	return fd;
-- 
cgit v1.2.2


From 55e57a780a10c9fd734603ec4b32644791ef5b05 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 15 Mar 2013 09:42:12 +0000
Subject: RDMA/cxgb4: Fix error return code in create_qp()

Fix to return a negative error code from the error handling case
instead of 0, as returned elsewhere in this function.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/cxgb4/qp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 17ba4f8bc12d..70b1808a08f4 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -186,8 +186,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
 					  wq->rq.memsize, &(wq->rq.dma_addr),
 					  GFP_KERNEL);
-	if (!wq->rq.queue)
+	if (!wq->rq.queue) {
+		ret = -ENOMEM;
 		goto free_sq;
+	}
 	PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
 		__func__, wq->sq.queue,
 		(unsigned long long)virt_to_phys(wq->sq.queue),
-- 
cgit v1.2.2


From 1ee9e2aa7b31427303466776f455d43e5e3c9275 Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Tue, 26 Feb 2013 15:46:27 +0000
Subject: IPoIB: Fix send lockup due to missed TX completion

Commit f0dc117abdfa ("IPoIB: Fix TX queue lockup with mixed UD/CM
traffic") attempts to solve an issue where unprocessed UD send
completions can deadlock the netdev.

The patch doesn't fully resolve the issue because if more than half
the tx_outstanding's were UD and all of the destinations are RC
reachable, arming the CQ doesn't solve the issue.

This patch uses the IB_CQ_REPORT_MISSED_EVENTS on the
ib_req_notify_cq().  If the rc is above 0, the UD send cq completion
callback is called directly to re-arm the send completion timer.

This issue is seen in very large parallel filesystem deployments
and the patch has been shown to correct the issue.

Cc: <stable@vger.kernel.org>
Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 67b0c1d23678..1ef880de3a41 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -758,9 +758,13 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
 		if (++priv->tx_outstanding == ipoib_sendq_size) {
 			ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
 				  tx->qp->qp_num);
-			if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
-				ipoib_warn(priv, "request notify on send CQ failed\n");
 			netif_stop_queue(dev);
+			rc = ib_req_notify_cq(priv->send_cq,
+				IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+			if (rc < 0)
+				ipoib_warn(priv, "request notify on send CQ failed\n");
+			else if (rc)
+				ipoib_send_comp_handler(priv->send_cq, dev);
 		}
 	}
 }
-- 
cgit v1.2.2


From 3c32869f7afe40ff7372e5bb7cd3d8b4520711bb Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 18 Mar 2013 20:25:26 +0000
Subject: IB/ipath: Silence a static checker warning

I have a static checker which complains that 0x255 is too high for
the "dev->opstats[opcode]" array.  It turns out that the hardware
has already validated the opcode at this point so it can't actually
overflow.

However, silencing the warning is good and this matches how the
opcode is treated in qib_ib_rcv() as well.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/ipath/ipath_verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 439c35d4a669..ea93870266eb 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -620,7 +620,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 		goto bail;
 	}
 
-	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+	opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
 	dev->opstats[opcode].n_bytes += tlen;
 	dev->opstats[opcode].n_packets++;
 
-- 
cgit v1.2.2


From e2eed58b4fbfe7cd59d0c9d7bec48fcfa3b2117a Mon Sep 17 00:00:00 2001
From: Vinit Agnihotri <vinit.abhay.agnihotri@intel.com>
Date: Thu, 14 Mar 2013 18:13:41 +0000
Subject: IB/qib: change QLogic to Intel

These changes modify the qib driver as part of acquiring
the InfiniBand assets of QLogic.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Vinit Agnihotri <vinit.abhay.agnihotri@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/qib/Kconfig       |   6 +-
 drivers/infiniband/hw/qib/qib_driver.c  |   5 +-
 drivers/infiniband/hw/qib/qib_iba6120.c |   3 +-
 drivers/infiniband/hw/qib/qib_init.c    |   8 +-
 drivers/infiniband/hw/qib/qib_sd7220.c  |   4 +-
 drivers/infiniband/hw/qib/qib_verbs.c   |   4 +-
 firmware/Makefile                       |   2 +-
 firmware/intel/sd7220.fw.ihex           | 513 ++++++++++++++++++++++++++++++++
 firmware/qlogic/sd7220.fw.ihex          | 513 --------------------------------
 9 files changed, 530 insertions(+), 528 deletions(-)
 create mode 100644 firmware/intel/sd7220.fw.ihex
 delete mode 100644 firmware/qlogic/sd7220.fw.ihex

diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 8349f9c5064c..1e603a375069 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -1,7 +1,7 @@
 config INFINIBAND_QIB
-	tristate "QLogic PCIe HCA support"
+	tristate "Intel PCIe HCA support"
 	depends on 64BIT
 	---help---
-	This is a low-level driver for QLogic PCIe QLE InfiniBand host
-	channel adapters.  This driver does not support the QLogic
+	This is a low-level driver for Intel PCIe QLE InfiniBand host
+	channel adapters.  This driver does not support the Intel
 	HyperTransport card (model QHT7140).
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 5423edcab51f..216092477dfc 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -63,8 +64,8 @@ MODULE_PARM_DESC(compat_ddr_negotiate,
 		 "Attempt pre-IBTA 1.2 DDR speed negotiation");
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("QLogic <support@qlogic.com>");
-MODULE_DESCRIPTION("QLogic IB driver");
+MODULE_AUTHOR("Intel <ibsupport@intel.com>");
+MODULE_DESCRIPTION("Intel IB driver");
 MODULE_VERSION(QIB_DRIVER_VERSION);
 
 /*
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a099ac171e22..0232ae56b1fa 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
  * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
  * All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
@@ -51,7 +52,7 @@ static u32 qib_6120_iblink_state(u64);
 
 /*
  * This file contains all the chip-specific register information and
- * access functions for the QLogic QLogic_IB PCI-Express chip.
+ * access functions for the Intel Intel_IB PCI-Express chip.
  *
  */
 
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 50e33aa0b4e3..173f805790da 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -1138,7 +1138,7 @@ void qib_disable_after_error(struct qib_devdata *dd)
 static void qib_remove_one(struct pci_dev *);
 static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
 
-#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: "
+#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
 #define PFX QIB_DRV_NAME ": "
 
 static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = {
@@ -1355,7 +1355,7 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dd = qib_init_iba6120_funcs(pdev, ent);
 #else
 		qib_early_err(&pdev->dev,
-			"QLogic PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n",
+			"Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n",
 			ent->device);
 		dd = ERR_PTR(-ENODEV);
 #endif
@@ -1371,7 +1371,7 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	default:
 		qib_early_err(&pdev->dev,
-			"Failing on unknown QLogic deviceid 0x%x\n",
+			"Failing on unknown Intel deviceid 0x%x\n",
 			ent->device);
 		ret = -ENODEV;
 	}
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 50a8a0d4fe67..08a6c6d39e56 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -44,7 +44,7 @@
 #include "qib.h"
 #include "qib_7220.h"
 
-#define SD7220_FW_NAME "qlogic/sd7220.fw"
+#define SD7220_FW_NAME "intel/sd7220.fw"
 MODULE_FIRMWARE(SD7220_FW_NAME);
 
 /*
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index ba51a4715a1d..7c0ab16a2fe2 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
@@ -2224,7 +2224,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
 	ibdev->dma_ops = &qib_dma_mapping_ops;
 
 	snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
-		 "QLogic Infiniband HCA %s", init_utsname()->nodename);
+		 "Intel Infiniband HCA %s", init_utsname()->nodename);
 
 	ret = ib_register_device(ibdev, qib_create_port_files);
 	if (ret)
diff --git a/firmware/Makefile b/firmware/Makefile
index cbb09ce9730a..5d8ee1319b5c 100644
--- a/firmware/Makefile
+++ b/firmware/Makefile
@@ -82,7 +82,7 @@ fw-shipped-$(CONFIG_SCSI_ADVANSYS) += advansys/mcode.bin advansys/38C1600.bin \
 fw-shipped-$(CONFIG_SCSI_QLOGIC_1280) += qlogic/1040.bin qlogic/1280.bin \
 					 qlogic/12160.bin
 fw-shipped-$(CONFIG_SCSI_QLOGICPTI) += qlogic/isp1000.bin
-fw-shipped-$(CONFIG_INFINIBAND_QIB) += qlogic/sd7220.fw
+fw-shipped-$(CONFIG_INFINIBAND_QIB) += intel/sd7220.fw
 fw-shipped-$(CONFIG_SND_KORG1212) += korg/k1212.dsp
 fw-shipped-$(CONFIG_SND_MAESTRO3) += ess/maestro3_assp_kernel.fw \
 				     ess/maestro3_assp_minisrc.fw
diff --git a/firmware/intel/sd7220.fw.ihex b/firmware/intel/sd7220.fw.ihex
new file mode 100644
index 000000000000..a33636319112
--- /dev/null
+++ b/firmware/intel/sd7220.fw.ihex
@@ -0,0 +1,513 @@
+:10000000020A29020A87E5E630E6047F0180027FC2
+:1000100000E5E230E4047E0180027E00EE5F6008CD
+:1000200053F9F7E4F5FE80087F0A121731120EA289
+:1000300075FC08E4F5FDE5E720E70343F908220035
+:1000400001201100042000755101E4F552F553F52B
+:1000500052F57E7F04020438C2360552E552D3942D
+:100060000C4005755201D23690070C7407F0A3744A
+:10007000FFF0E4F50CA3F0900714F0A3F0750B204B
+:10008000F509E4F508E508D39430400302040412AE
+:100090000006150BE50870047F0180027F00E5096A
+:1000A00070047E0180027E00EE5F6005121871D23E
+:1000B0003553E1F7E5084509FFE50B25E025E02488
+:1000C00083F582E43407F583EFF085E220E552D32F
+:1000D0009401400D1219F3E054A064407003020330
+:1000E000FB53F9F8909470E4F0E0F510AF09121E9C
+:1000F000B3AF08EF4408F582758380E0F529EF443B
+:1001000007121A3CF5225440D39400401EE52954AE
+:10011000F070211219F3E04480F0E52254306508B4
+:1001200070091219F3E054BFF080091219F37440FA
+:10013000F00203FB121A127583AE74FFF0AF087E53
+:1001400000EF4407F582E0FDE50B25E025E0248182
+:10015000F582E43407F583EDF090070EE004F0EF4C
+:100160004407F582758398E0F528121A23400C1293
+:1001700019F3E04401121A320203F6AF087E00744C
+:1001800080CDEFCD8D82F583E030E00A1219F3E0E7
+:100190004420F00203FB1219F3E054DFF0EE44AE0A
+:1001A000121A4330E4030203FB749E121A0520E086
+:1001B000030203FB8F828E83E020E0030203FB1225
+:1001C00019F3E04410F0E5E320E708E508121A3AD5
+:1001D0004404F0AF087E00EF121A3A20E2341219FC
+:1001E000F3E04408F0E5E430E6047D0180027D00A0
+:1001F000E57EC3940450047C0180027C00EC4D60D9
+:1002000005C2350203FBEE44D2121A434440F00209
+:1002100003FB1219F3E054F7F0121A127583D2E0BF
+:1002200054BFF0900714E004F0E57E7003757E0182
+:10023000AF087E00121A2340121219F3E044011293
+:1002400019F2E05402121A320203FB1219F3E044CD
+:10025000021219F2E054FEF0C235EE448A8F82F5A4
+:1002600083E0F517548F4440F07490FCE508440790
+:10027000FDF5828C83E0543F900702F0E054C08D7E
+:10028000828C83F07492121A05900703121A197463
+:1002900082121A05900704121A1974B4121A0590E2
+:1002A0000705121A197494FEE5084406121A0AF595
+:1002B0001030E004D2378002C237E510547F8F82BD
+:1002C0008E83F0304430121A035480D394004004DB
+:1002D000D2398002C2398F828E83E04480F0121AB4
+:1002E000035440D394004004D23A8002C23A8F8231
+:1002F0008E83E04440F07492FEE5084406121A0A28
+:1003000030E704D2388002C2388F828E83E0547F77
+:10031000F0121E46E4F50A20030280033043031264
+:1003200019952002028003304203120C8F303006F0
+:10033000121995120C8F120D471219F3E054FBF0AD
+:10034000E50AC39401404643E1081219F3E044046E
+:10035000F0E5E420E72A121A127583D2E05408D39C
+:10036000940040047F0180027F00E50AC3940140AD
+:10037000047E0180027E00EF5E6005121DD78017AB
+:10038000121A127583D2E04408F00203FB121A120B
+:100390007583D2E054F7F0121E467F0812173174AD
+:1003A0008EFE121A128E83E0F51054FEF0E5104412
+:1003B00001FFE508FDED4407F582EFF0E51054FE7E
+:1003C000FFED4407F582EF121A11758386E04410A1
+:1003D000121A11E04410F01219F3E054FD4401FF29
+:1003E0001219F3EF121A3230320CE5084408F58284
+:1003F0007583827405F0AF0B1218D774102508F5B9
+:10040000080200850509E509D3940750030200821C
+:10041000E57ED3940040047F0180027F00E57EC327
+:1004200094FA50047E0180027E00EE5F6002057E39
+:1004300030350B43E1017F0912173102005853E1B7
+:10044000FE0200588E6A8F6B8C6C8D6D756E017517
+:100450006F01757001E4F573F574F57590072FF071
+:10046000F53CF53EF546F547F53DF53FF56FE56F93
+:10047000700FE56B456A12072A758380743AF08025
+:100480000912072A758380741AF0E4F56EC3743F6D
+:10049000956EFF120865758382EFF0121A4D1208EF
+:1004A000C6E533F01208FA1208B140E1E56F700BAF
+:1004B00012072A7583807436F0800912072A758323
+:1004C000807416F0756E0112072A7583B4E56EF01C
+:1004D000121A4D743F256EF582E43400F583E5333E
+:1004E000F074BF256EF582E434001208B140D8E400
+:1004F000F570F546F547F56E1208FAF583E0FE1241
+:1005000008C6E07C002400FFEC3EFEAD3BD3EF9D2F
+:10051000EE9C50047B0180027B00E57070047A0140
+:1005200080027A00EB5A6006856E46757001D3EF43
+:100530009DEE9C50047F0180027F00E570B40104B1
+:100540007E0180027E00EF5E6003856E47056EE5EA
+:100550006E647F70A3E5466005E547B47E0385467B
+:1005600047E56F7008854676854777800EC3747FB0
+:100570009546F578C3747F9547F579E56F7037E553
+:10058000466547700C757301757401F53CF53D8047
+:1005900035E4F54EC3E5479546F53CC313F57125A3
+:1005A00046F572C3943F4005E4F53D8040C3743F77
+:1005B0009572F53D8037E5466547700F7573017597
+:1005C0007501F53EF53F754E018022E4F54EC3E519
+:1005D000479546F53EC313F5712546F572D3943F12
+:1005E0005005E4F53F8006E57224C1F53F056FE54F
+:1005F0006FC39402500302046EE56D456C70028077
+:1006000004E574457590072FF07F01E53E6004E531
+:100610003C7014E4F53CF53DF53EF53F1208D27010
+:1006200004F00206A4807AE53CC3953E4007E53C11
+:10063000953EFF8006C3E53E953CFFE576D3957970
+:10064000400585767A800385797AE577C395785079
+:100650000585777B800385787BE57BD3957A403071
+:10066000E57B957AF53CF53EC3E57B957A900719D5
+:10067000F0E53CC313F571257AF572C3943F40054C
+:10068000E4F53D801FC3743F9572F53DF53F80143E
+:10069000E4F53CF53E900719F01208D27003F080A3
+:1006A000037401F01208657583D0E0540FFEAD3C71
+:1006B00070027E07BE0F027E80EEFBEFD39B74803C
+:1006C000F898401FE4F53CF53E1208D27003F08024
+:1006D000127401F0E508FBEB4407F5827583D2E064
+:1006E0004410F0E508FBEB4409F58275839EEDF0BC
+:1006F000EB4407F5827583CAEDF01208657583CC6B
+:10070000EFF022E5084407F5827583BCE054F0F071
+:10071000E5084407F5827583BEE054F0F0E508442F
+:1007200007F5827583C0E054F0F0E5084407F582D0
+:1007300022F0900728E0FEA3E0F5828E8322854216
+:100740004285414185404074C02FF58274023EF5D8
+:1007500083E542F074E02FF58274023EF58322E5D2
+:100760004229FDE433FCE53CC39DEC6480F87480D1
+:100770009822F583E0900722541FFDE0FAA3E0F5EC
+:10078000828A83EDF022900722E0FCA3E0F5828CC0
+:100790008322900724FFED4407CFF0A3EFF02285DA
+:1007A0003838853939853A3A74C02FF58274023E5B
+:1007B000F58322900726FFED4407CFF0A3EFF02248
+:1007C000F074A02FF58274023EF5832274C02511C7
+:1007D000F582E43401F5832274002511F582E434B6
+:1007E00002F5832274602511F582E43403F5832237
+:1007F00074802511F582E43403F5832274E0251119
+:10080000F582E43403F5832274402511F582E43443
+:1008100006F5832274802FF58274023EF58322AFA1
+:10082000087E00EF4407F58222F583E5824407F550
+:1008300082E540F02274402511F582E43402F5830C
+:100840002274C02511F582E43403F5832274002557
+:1008500011F582E43406F5832274202511F582E433
+:100860003406F58322E508FDED4407F58222E541D3
+:10087000F0E56564014564227E00FB7A00FD7C00A2
+:100880002274202511F582E434022274A02511F58A
+:1008900082E4340322853E42853F418F4022853CDD
+:1008A00042853D418F402275453F900720E4F0A3EB
+:1008B00022F583E532F0056EE56EC3944022F0E543
+:1008C000084406F582227400256EF582E43400F5B2
+:1008D0008322E56D456C90072F22E4F9E53CD39522
+:1008E0003E2274802EF582E43402F583E02274A067
+:1008F0002EF582E43402F583E0227480256EF582C1
+:10090000E43400222542FDE433FC22854242854145
+:100910004185404022ED4C60030209E5EF4E7037FF
+:10092000900726120789E0FD1207CCEDF09007280A
+:10093000120789E0FD1207D8EDF0120786E0541F78
+:10094000FD120881F583EDF0900724120789E05429
+:100950001FFD120835EDF0EF64044E703790072646
+:10096000120789E0FD1207E4EDF0900728120789CD
+:10097000E0FD1207F0EDF0120786E0541FFD1208AB
+:100980008BF583EDF0900724120789E0541FFD12C8
+:100990000841EDF0EF64014E70047D0180027D009E
+:1009A000EF64024E70047F0180027F00EF4D60789B
+:1009B000900726120735E0FF1207FCEF120731E01F
+:1009C000FF120808EFF0900722120735E0541FFFCE
+:1009D00012084DEFF0900724120735E0541FFF1264
+:1009E0000859EFF0221207CCE4F01207D8E4F01215
+:1009F0000881F583E4F01208357414F01207E4E47A
+:100A0000F01207F0E4F012088BF583E4F0120841CD
+:100A10007414F01207FCE4F0120808E4F012084D18
+:100A2000E4F01208597414F02253F9F775FC10E43D
+:100A3000F5FD75FE30F5FFE5E720E70343F908E52E
+:100A4000E620E70B78FFE4F6D8FD53E6FE80097850
+:100A500008E4F6D8FD53E6FE758180E4F5A8D2A837
+:100A6000C2A9D2AFE5E220E50520E602800343E11A
+:100A700002E5E220E00E9000007F007E08E4F0A393
+:100A8000DFFCDEFA020ADB43FA01C0E0C0F0C083FB
+:100A9000C082C0D0121CE7D0D0D082D083D0F0D09A
+:100AA000E053FAFE32021B55E493A3F8E493A3F655
+:100AB00008DFF98029E493A3F85407240CC8C33352
+:100AC000C4540F4420C8834004F456800146F6DF26
+:100AD000E4800B010204081020408090003FE47E77
+:100AE000019360C1A3FF543F30E509541FFEE49316
+:100AF000A360010ECF54C025E060AD40B880FE8CED
+:100B0000648D658A668B67E4F569EF4E7003021D9C
+:100B100055E4F568E5674566703212072A758390DB
+:100B2000E41207297583C2E41207297583C4E4120D
+:100B30000870702912072A758392E41207297583B9
+:100B4000C6E41207297583C8E4F0801190072612C5
+:100B50000735E41208707005120732E4F0121D55D3
+:100B6000121EBFE5674566703312072A758390E54C
+:100B7000411207297583C2E5411207297583C41202
+:100B8000086E702912072A758392E54012072975AD
+:100B900083C6E5401207297583C8800E9007261288
+:100BA000073512086E7006120732E540F0AF697E15
+:100BB00000AD67AC6612044412072A7583CAE0D3FD
+:100BC0009400500C0568E568C394055003020B14AB
+:100BD000228C608D611208DA7420400D2FF582742A
+:100BE000033EF583E53EF0800B2FF58274033EF55E
+:100BF00083E53CF0E53CD3953E403CE561456070C3
+:100C000010E9120904E53E120768403B120895807E
+:100C100018E53EC39538401D853E38E53E600585A4
+:100C20003F3980038539398F3A120814E53E12079F
+:100C3000C0E53FF0228043E5614560701912075F0F
+:100C4000400512089E802712090B120814E5421273
+:100C500007C0E541F022E53CC39538401D853C388E
+:100C6000E53C6005853D3980038539398F3A1208A6
+:100C700014E53C1207C0E53DF02285383885393946
+:100C8000853A3A120814E5381207C0E539F0227F98
+:100C900006121731121D23120E04120E33E0440AFD
+:100CA000F0748EFE120E04120E0BEFF0E52830E504
+:100CB00003D38001C3400575142080037514081206
+:100CC0000E0475838AE514F0B4FF05751280800662
+:100CD000E514C313F512E4F516F57F121936121355
+:100CE000A3E50AC3940150090516E516C394144000
+:100CF000EAE5E420E728120E047583D2E05408D315
+:100D0000940040047F0180027F00E50AC394014003
+:100D1000047E0180027E00EF5E6003121DD7E57F36
+:100D2000C394114014120E047583D2E04480F0E5A0
+:100D3000E420E70F121DD7800A120E047583D2E05B
+:100D4000547FF0121D2322748A850882F583E517EB
+:100D5000F0120E3AE4F0900702E0120E177583903D
+:100D6000EFF07492FEE5084407FFF5828E83E054AD
+:100D7000C0FD900703E0543F4D8F828E83F09007B3
+:100D800004E0120E17758382EFF0900705E0FFED87
+:100D90004407F5827583B4EF120E03758380E05427
+:100DA000BFF030370A120E91758394E04480F03022
+:100DB000380A120E91758392E04480F0E52830E401
+:100DC0001A20390A120E04758388E0547FF0203A05
+:100DD0000A120E04758388E054BFF0748CFE120E64
+:100DE000048E83E0540F120E03758386E054BFF027
+:100DF000E5084406120DFD75838AE4F022F582753C
+:100E00008382E4F0E5084407F582228E83E0F51042
+:100E100054FEF0E5104401FFE508FDED4407F582BE
+:100E200022E515C45407FFE508FDED4408F5827579
+:100E3000838222758380E04440F0E5084408F5820F
+:100E400075838A22E51625E025E024AFF582E43497
+:100E50001AF583E493F50D2243E11043E18053E159
+:100E6000FD85E11022E51625E025E024B2F582E4B7
+:100E7000341AF583E49322855582855483E515F071
+:100E800022E5E25420D3940022E5E25440D39400BA
+:100E900022E5084406F58222FDE508FBEB4407F550
+:100EA000822253F9F775FE3022EF4E70261207CCDE
+:100EB000E0FD90072612077B1207D8E0FD90072877
+:100EC00012077B120881120772120835E09007247E
+:100ED000120778EF64044E70291207E4E0FD9007D2
+:100EE0002612077B1207F0E0FD90072812077B12FD
+:100EF000088B120772120841E0541FFD900724125C
+:100F0000077BEF64014E70047D0180027D00EF6479
+:100F1000024E70047F0180027F00EF4D60351207A2
+:100F2000FCE0FF900726120789EFF0120808E0FFA7
+:100F3000900728120789EFF012084DE0541FFF12A6
+:100F40000786EFF0120859E0541FFF90072412079C
+:100F500089EFF022E4F553120E8140047F018002F4
+:100F60007F00120E8940047E0180027E00EE4F70E9
+:100F700003020FF685E11043E10253E10F85E11012
+:100F8000E4F551E5E3543FF552120E89401DAD5290
+:100F9000AF51121118EF600885E11043E140800B5A
+:100FA00053E1BF120E5812000680FBE5E3543FF5F3
+:100FB00051E5E4543FF552120E81401DAD52AF5140
+:100FC000121118EF600885E11043E120800B53E116
+:100FD000DF120E5812000680FB120E8140047F01C2
+:100FE00080027F00120E8940047E0180027E00EEA6
+:100FF0004F6003120E5B22120E21EFF012109122AD
+:1010000002110002104002109000000000000000D9
+:1010100001200120E4F5571216BD121644E4121007
+:10102000561214B7900726120735E4120731E4F080
+:101030001210561214B7900726120735E541120711
+:1010400031E540F0AF577E00AD567C00120444AF4E
+:10105000567E000211EEFF900720A3E0FDE4F55656
+:10106000F540FEFCAB56FA1211517F0F7D18E4F5E6
+:1010700056F540FEFCAB56FA121541AF567E0012F3
+:101080001AFFE4FFF5567D1FF540FEFCAB56FA2231
+:1010900022E4F555E508FD74A0F556ED4407F55733
+:1010A000E52830E503D38001C340057F28EF8004A5
+:1010B0007F14EFC313F554E4F9120E1875838EE014
+:1010C000F510CEEFCEEED394004026E51054FE127C
+:1010D0000E9875838EEDF0E5104401FDEB4407F5A5
+:1010E00082EDF0855782855683E030E301091E804A
+:1010F000D4C234E9C395544002D2342202000622FD
+:10110000303011901000E493F510901010E493F536
+:101110001012109012115022E4FCC3ED9FFAEFF56B
+:101120008375820079FFE493CC6CCCA3D9F8DAF60E
+:10113000E5E230E4028CE5ED24FFFFEF7582FFF578
+:1011400083E4936C70037F01227F00222211000050
+:10115000228E588F598C5A8D5B8A5C8B5D755E012F
+:10116000E4F55FF560F56212072A7583D0E0FFC4ED
+:10117000540FF561121EA585595ED3E55E955BE5BA
+:101180005A12076B504B1207037583BCE0455E1281
+:1011900007297583BEE0455E1207297583C0E045C7
+:1011A0005EF0AF5FE560120878120AFFAF627E0062
+:1011B000AD5DAC5C120444E561AF5E7E00B4030536
+:1011C000121E218007AD5DAC5C121317055E021183
+:1011D0007A1207037583BCE045401207297583BE68
+:1011E000E045401207297583C0E04540F0228E5843
+:1011F0008F59755A017901755B01E4FB12072A7555
+:1012000083AEE0541AFF120865E0C4135407FEEFE2
+:10121000700CEE6535700790072FE0B4010DAF3507
+:101220007E00120EA9CFEBCF021E60E55964024585
+:101230005870047F0180027F00E559455870047E94
+:101240000180027E00EE4F602385414985404BE5D9
+:10125000594558702CAF5AFECDE9CDFCAB59AA5870
+:10126000120AFFAF5B7E00121E608015AF5B7E002E
+:10127000121E60900726120735E549120731E54B2B
+:10128000F0E4FDAF35FEFC120915228C648D651269
+:1012900008DA403CE56545647010120904C3E53E78
+:1012A000120769403B1208958018E53EC395384007
+:1012B0001D853E38E53E6005853F39800385393917
+:1012C0008F3A1207A8E53E120753E53FF022803B14
+:1012D000E5654564701112075F400512089E801F86
+:1012E00012073EE541F022E53CC39538401D853CA0
+:1012F00038E53C6005853D3980038539398F3A12E0
+:1013000007A8E53C120753E53DF02212079FE53898
+:10131000120753E539F0228C638D641208DA403CE1
+:10132000E56445637010120904C3E53E1207694085
+:101330003B1208958018E53EC39538401D853E3820
+:10134000E53E6005853F3980038539398F3A1207BC
+:10135000A8E53E120753E53FF022803BE564456374
+:10136000701112075F400512089E801F12073EE5AC
+:1013700041F022E53CC39538401D853C38E53C6092
+:1013800005853D3980038539398F3A1207A8E53C38
+:10139000120753E53DF02212079FE538120753E587
+:1013A00039F022E50DFEE5088E544405F555751516
+:1013B0000FF582120E7A1217A320310575150380DE
+:1013C0000375150BE50AC39401503812142020311F
+:1013D0000605150515800415151515E50AC39401B4
+:1013E0005021121420203104051580021515E50A3C
+:1013F000C39401500E120E771217A3203105051564
+:10140000120E77E515B408047F0180027F00E51510
+:10141000B407047E0180027E00EE4F6002057F2249
+:10142000855582855483E515F01217A32212072AE9
+:101430007583AE74FF120729E0541AF534E0C41323
+:101440005407F53524FE602424FE603C24047063B8
+:1014500075312DE508FD74B612079274BC90072211
+:1014600012079574901207B37492803C75313AE577
+:1014700008FD74BA12079274C09007221207B6745E
+:10148000C41207B374C88020753135E508FD74B8FF
+:1014900012079274BEFFED4407900722CFF0A3EF2E
+:1014A000F074C21207B374C6FFED4407A3CFF0A3D4
+:1014B000EFF022753401228E588F598C5A8D5B8A39
+:1014C0005C8B5D755E01E4F55F121EA585595ED3E8
+:1014D000E55E955BE55A12076B5057E55D455C701C
+:1014E0003012072A758392E55E1207297583C6E5D7
+:1014F0005E1207297583C8E55E120729758390E59A
+:101500005E1207297583C2E55E1207297583C480C0
+:1015100003120732E55EF0AF5F7E00AD5DAC5C129A
+:101520000444AF5E7E00AD5DAC5C120BD1055E0283
+:1015300014CFAB5DAA5CAD5BAC5AAF59AE58021B81
+:10154000FB8C5C8D5D8A5E8B5F756001E4F561F5F7
+:1015500062F563121EA58F60D3E560955DE55C12B0
+:10156000076B5061E55F455E702712072A7583B6E9
+:10157000E5601207297583B8E5601207297583BAFB
+:10158000E560F0AF617E00E56212087A120AFF8022
+:1015900019900724120735E56012072975838EE438
+:1015A0001207297401120729E4F0AF637E00AD5FD2
+:1015B000AC5E120444AF607E00AD5FAC5E12128B75
+:1015C00005600215582290114DE49390072EF012F9
+:1015D000081F7583AEE0541AF5347067EF4407F5C1
+:1015E000827583CEE0FF1313135407F536540FD3DF
+:1015F0009400400612142D121BA9E536540F24FE48
+:10160000600C14600C146019240370378010021EE3
+:1016100091121E9112072A7583CEE054EFF0021D3D
+:10162000AE121014E4F555121D850555E555C39409
+:101630000540F412072A7583CEE054C7120729E04B
+:101640004408F022E4F558F559AF08EF4407F58255
+:101650007583D0E0FDC4540FF55AEF4407F5827549
+:1016600083807401F0120821758382E545F0EF4410
+:1016700007F58275838A74FFF0121A4D12072A75D6
+:1016800083BCE054EF1207297583BEE054EF1207C4
+:10169000297583C0E054EF1207297583BCE044101C
+:1016A0001207297583BEE044101207297583C0E034
+:1016B0004410F0AF58E559120878020AFFE4F558D3
+:1016C0007D01F559AF35FEFC12091512072A758305
+:1016D000B674101207297583B87410120729758320
+:1016E000BA74101207297583BC7410120729758308
+:1016F000BE74101207297583C074101207297583F0
+:1017000090E41207297583C2E41207297583C4E4A3
+:10171000120729758392E41207297583C6E412071C
+:10172000297583C8E4F0AF58FEE55912087A020A19
+:10173000FFE5E230E46CE5E754C064407064E5091D
+:10174000C45430FEE50825E025E054C04EFEEF54B9
+:101750003F4EFDE52BAE2A7802C333CE33CED8F907
+:10176000F5828E83EDF0E52BAE2A7802C333CE33BB
+:10177000CED8F9FFF5828E83A3E5FEF08F828E83AB
+:10178000A3A3E5FDF08F828E83A3A3A3E5FCF0C3A2
+:10179000E52B94FAE52A94005008052BE52B7002FE
+:1017A000052A22E4FFE4F558F556F5577482FC1239
+:1017B0000E048C83E0F510547FF0E5104480120E87
+:1017C00098EDF07E0A120E047583A0E020E026DE7C
+:1017D000F40557E55770020556E5142401FDE4337E
+:1017E000FCD3E5579DE5569C40D9E50A942050026C
+:1017F000050A43E108C231120E047583A6E05512B2
+:1018000065127003D23122C23122900726E0FAA37A
+:10181000E0F5828A83E0F541E539C395414026E54C
+:10182000399541C39FEE12076B40047C0180027C16
+:1018300000E541643F60047B0180027B00EC5B605B
+:101840002905418028C3E5419539C39FEE12076BF6
+:1018500040047F0180027F00E54160047E01800238
+:101860007E00EF5E600415418003853941853A4072
+:1018700022E5E230E460E5E130E25BE50970047FF7
+:101880000180027F00E50870047E0180027E00EE88
+:101890005F604353F9F8E5E230E43BE5E130E22EE6
+:1018A00043FA0253FAFBE4F510909470E510F0E56A
+:1018B000E130E2E7909470E06510600343FA0405BC
+:1018C00010909470E510F070E612000680E153FA73
+:1018D000FD53FAFB80C0228F54120006E5E130E090
+:1018E000047F0180027F00E57ED3940540047E01E1
+:1018F00080027E00EE4F603D855411E5E220E1322A
+:1019000074CE121A0530E7047D0180027D008F82BB
+:101910008E83E030E6047F0180027F00EF5D70156A
+:101920001215C674CE121A0530E607E04480F04363
+:10193000F98012187122120E44E51625E025E024E4
+:10194000B0F582E4341AF583E493F50FE51625E04B
+:1019500025E024B1F582E4341AF583E493F50E1200
+:101960000E65F510E50F54F0120E1775838CEFF02D
+:10197000E50F30E00C120E04758386E04440F080E1
+:101980000A120E04758386E054BFF0120E9175831F
+:1019900082E50EF0227F05121731120E04120E336B
+:1019A0007402F0748EFE120E04120E0BEFF0751519
+:1019B00070120FF72034057515108003751550123D
+:1019C0000FF72034047410800274F02515F51512F9
+:1019D0000E21EFF0121091203417E5156430600CE1
+:1019E00074102515F515B48003E4F515120E21EFDA
+:1019F000F022F0E50B25E025E02482F582E43407AF
+:101A0000F583227488FEE5084407FFF5828E83E0A3
+:101A100022F0E5084407F58222F0E054C08F828E60
+:101A200083F022EF4407F582758386E05410D39447
+:101A30000022F0900715E004F0224406F582758339
+:101A40009EE022FEEF4407F5828E83E022E49007B9
+:101A50002AF0A3F012072A758382E0547F12072927
+:101A6000E04480F01210FC12081F7583A0E020E013
+:101A70001A90072BE004F0700690072AE004F0901B
+:101A8000072AE0B410E1A3E0B400DCEE44A6FCEFCA
+:101A90004407F5828C83E0F532EE44A8FEEF44075C
+:101AA000F5828E83E0F5332201201100042000909E
+:101AB00000200F9200210F9400220F9600230F9810
+:101AC00000240F9A00250F9C00260F9E00270FA0D0
+:101AD000012001A2012101A4012201A6012301A8E4
+:101AE000012401AA012501AC012601AE012701B0A4
+:101AF000012801B400280FB640280FB8612801CB97
+:101B0000EFCBCAEECA7F01E4FDEB4A7024E508F58D
+:101B10008274B6120829E508F58274B8120829E51E
+:101B200008F58274BA1208297E007C00120AFF8030
+:101B300012900726120735E541F090072412073569
+:101B4000E540F012072A75838EE41207297401120A
+:101B50000729E4F022E4F526F52753E1FEF52A757E
+:101B60002B01F5087F0112173130301C901AA9E4BF
+:101B700093F510901FF9E493F510900041E493F56C
+:101B800010901ECAE493F5107F02121731120F5401
+:101B90007F03121731120006E5E230E70912100048
+:101BA00030300312110002004712081F7583D0E085
+:101BB000C4540FFD7543017544FF1208AA7404F064
+:101BC000753B01ED14600C14600B14600F2403705E
+:101BD0000B800980001208A704F080061208A77481
+:101BE00004F0EE4482FEEF4407F5828E83E5451251
+:101BF00008BE758382E531F002114C8E608F611250
+:101C00001EA5E4FFCEEDCEEED39561E56012076B25
+:101C1000403974202EF582E43403F583E07003FF2D
+:101C200080261208E2FDC39F401ECFEDCFEB4A7025
+:101C30000B8D421208EEF5418E40800C1208E2F541
+:101C4000381208EEF5398E3A1E80BC22755801E52F
+:101C500035700C1207CCE0F54A1207D8E0F54CE5D8
+:101C600035B4040C1207E4E0F54A1207F0E0F54C35
+:101C7000E535B401047F0180027F00E535B402043C
+:101C80007E0180027E00EE4F600C1207FCE0F54AF8
+:101C9000120808E0F54C85414985404B22755B01EF
+:101CA000900724120735E0541FFFD3940250048F8D
+:101CB000588005EF24FEF558EFC394184005755978
+:101CC000188004EF04F55985435AAF587E00AD598A
+:101CD0007C00AB5B7A00121541AF5A7E0012180AE5
+:101CE000AF5B7E00021AFFE5E230E70E121003C27E
+:101CF000303030031210FF203328E5E730E70512BB
+:101D00000EA2800DE5FEC394205006120EA243F9E8
+:101D100008E5F230E70353F97FE5F15470D39400FE
+:101D200050D822120E04758380E4F0E508440712AF
+:101D30000DFD758384120E02758386120E02758363
+:101D40008CE054F3120E0375838E120E0275839489
+:101D5000E054FBF02212072A75838EE412072974DF
+:101D600001120729E41208BE75838CE04420120892
+:101D7000BEE054DFF07484850882F583E0547FF080
+:101D8000E04480F022755601E4FDF557AF35FEFCC6
+:101D9000120915121C9D121E7A121C4CAF577E00A0
+:101DA000AD567C00120444AF567E000211EE75560B
+:101DB00001E4FDF557AF35FEFC120915121C9D120A
+:101DC0001E7A121C4CAF577E00AD567C00120444A4
+:101DD000AF567E000211EEE4F516120E44FEE50841
+:101DE0004405FF120E658F828E83F00516E516C33B
+:101DF000941440E6E508120E2BE4F022E4F558F5C1
+:101E000059F55AFFFEAD58FC1209157F047E00AD4E
+:101E1000587C001209157F027E00AD587C00020933
+:101E200015E53C253EFCE5422400FBE433FAECC317
+:101E30009BEA12076B400B8C42E53D253FF5418F35
+:101E4000402212090B227484F5188508198519821D
+:101E5000851883E0547FF0E04480F0E04480F02275
+:101E6000EF4E700B12072A7583D2E054DFF0221276
+:101E7000072A7583D2E04420F02275580190072686
+:101E8000120735E0543FF541120732E0543FF54068
+:101E900022755602E4F557121DFCAF577E00AD5671
+:101EA0007C00020444E4F542F541F540F538F5398B
+:101EB000F53A22EF5407FFE5F954F84FF5F9227F80
+:101EC00001E4FE0F0EBEFFFB2201200001042000F2
+:101ED0000000000000000000000000000000000002
+:101EE00000000000000000000000000000000000F2
+:101EF00000000000000000000000000000000000E2
+:101F000000000000000000000000000000000000D1
+:101F100000000000000000000000000000000000C1
+:101F200000000000000000000000000000000000B1
+:101F300000000000000000000000000000000000A1
+:101F40000000000000000000000000000000000091
+:101F50000000000000000000000000000000000081
+:101F60000000000000000000000000000000000071
+:101F70000000000000000000000000000000000061
+:101F80000000000000000000000000000000000051
+:101F90000000000000000000000000000000000041
+:101FA0000000000000000000000000000000000031
+:101FB0000000000000000000000000000000000021
+:101FC0000000000000000000000000000000000011
+:101FD0000000000000000000000000000000000001
+:101FE00000000000000000000000000000000000F1
+:101FF000000000000000000001201100042000810A
+:00000001FF
diff --git a/firmware/qlogic/sd7220.fw.ihex b/firmware/qlogic/sd7220.fw.ihex
deleted file mode 100644
index a33636319112..000000000000
--- a/firmware/qlogic/sd7220.fw.ihex
+++ /dev/null
@@ -1,513 +0,0 @@
-:10000000020A29020A87E5E630E6047F0180027FC2
-:1000100000E5E230E4047E0180027E00EE5F6008CD
-:1000200053F9F7E4F5FE80087F0A121731120EA289
-:1000300075FC08E4F5FDE5E720E70343F908220035
-:1000400001201100042000755101E4F552F553F52B
-:1000500052F57E7F04020438C2360552E552D3942D
-:100060000C4005755201D23690070C7407F0A3744A
-:10007000FFF0E4F50CA3F0900714F0A3F0750B204B
-:10008000F509E4F508E508D39430400302040412AE
-:100090000006150BE50870047F0180027F00E5096A
-:1000A00070047E0180027E00EE5F6005121871D23E
-:1000B0003553E1F7E5084509FFE50B25E025E02488
-:1000C00083F582E43407F583EFF085E220E552D32F
-:1000D0009401400D1219F3E054A064407003020330
-:1000E000FB53F9F8909470E4F0E0F510AF09121E9C
-:1000F000B3AF08EF4408F582758380E0F529EF443B
-:1001000007121A3CF5225440D39400401EE52954AE
-:10011000F070211219F3E04480F0E52254306508B4
-:1001200070091219F3E054BFF080091219F37440FA
-:10013000F00203FB121A127583AE74FFF0AF087E53
-:1001400000EF4407F582E0FDE50B25E025E0248182
-:10015000F582E43407F583EDF090070EE004F0EF4C
-:100160004407F582758398E0F528121A23400C1293
-:1001700019F3E04401121A320203F6AF087E00744C
-:1001800080CDEFCD8D82F583E030E00A1219F3E0E7
-:100190004420F00203FB1219F3E054DFF0EE44AE0A
-:1001A000121A4330E4030203FB749E121A0520E086
-:1001B000030203FB8F828E83E020E0030203FB1225
-:1001C00019F3E04410F0E5E320E708E508121A3AD5
-:1001D0004404F0AF087E00EF121A3A20E2341219FC
-:1001E000F3E04408F0E5E430E6047D0180027D00A0
-:1001F000E57EC3940450047C0180027C00EC4D60D9
-:1002000005C2350203FBEE44D2121A434440F00209
-:1002100003FB1219F3E054F7F0121A127583D2E0BF
-:1002200054BFF0900714E004F0E57E7003757E0182
-:10023000AF087E00121A2340121219F3E044011293
-:1002400019F2E05402121A320203FB1219F3E044CD
-:10025000021219F2E054FEF0C235EE448A8F82F5A4
-:1002600083E0F517548F4440F07490FCE508440790
-:10027000FDF5828C83E0543F900702F0E054C08D7E
-:10028000828C83F07492121A05900703121A197463
-:1002900082121A05900704121A1974B4121A0590E2
-:1002A0000705121A197494FEE5084406121A0AF595
-:1002B0001030E004D2378002C237E510547F8F82BD
-:1002C0008E83F0304430121A035480D394004004DB
-:1002D000D2398002C2398F828E83E04480F0121AB4
-:1002E000035440D394004004D23A8002C23A8F8231
-:1002F0008E83E04440F07492FEE5084406121A0A28
-:1003000030E704D2388002C2388F828E83E0547F77
-:10031000F0121E46E4F50A20030280033043031264
-:1003200019952002028003304203120C8F303006F0
-:10033000121995120C8F120D471219F3E054FBF0AD
-:10034000E50AC39401404643E1081219F3E044046E
-:10035000F0E5E420E72A121A127583D2E05408D39C
-:10036000940040047F0180027F00E50AC3940140AD
-:10037000047E0180027E00EF5E6005121DD78017AB
-:10038000121A127583D2E04408F00203FB121A120B
-:100390007583D2E054F7F0121E467F0812173174AD
-:1003A0008EFE121A128E83E0F51054FEF0E5104412
-:1003B00001FFE508FDED4407F582EFF0E51054FE7E
-:1003C000FFED4407F582EF121A11758386E04410A1
-:1003D000121A11E04410F01219F3E054FD4401FF29
-:1003E0001219F3EF121A3230320CE5084408F58284
-:1003F0007583827405F0AF0B1218D774102508F5B9
-:10040000080200850509E509D3940750030200821C
-:10041000E57ED3940040047F0180027F00E57EC327
-:1004200094FA50047E0180027E00EE5F6002057E39
-:1004300030350B43E1017F0912173102005853E1B7
-:10044000FE0200588E6A8F6B8C6C8D6D756E017517
-:100450006F01757001E4F573F574F57590072FF071
-:10046000F53CF53EF546F547F53DF53FF56FE56F93
-:10047000700FE56B456A12072A758380743AF08025
-:100480000912072A758380741AF0E4F56EC3743F6D
-:10049000956EFF120865758382EFF0121A4D1208EF
-:1004A000C6E533F01208FA1208B140E1E56F700BAF
-:1004B00012072A7583807436F0800912072A758323
-:1004C000807416F0756E0112072A7583B4E56EF01C
-:1004D000121A4D743F256EF582E43400F583E5333E
-:1004E000F074BF256EF582E434001208B140D8E400
-:1004F000F570F546F547F56E1208FAF583E0FE1241
-:1005000008C6E07C002400FFEC3EFEAD3BD3EF9D2F
-:10051000EE9C50047B0180027B00E57070047A0140
-:1005200080027A00EB5A6006856E46757001D3EF43
-:100530009DEE9C50047F0180027F00E570B40104B1
-:100540007E0180027E00EF5E6003856E47056EE5EA
-:100550006E647F70A3E5466005E547B47E0385467B
-:1005600047E56F7008854676854777800EC3747FB0
-:100570009546F578C3747F9547F579E56F7037E553
-:10058000466547700C757301757401F53CF53D8047
-:1005900035E4F54EC3E5479546F53CC313F57125A3
-:1005A00046F572C3943F4005E4F53D8040C3743F77
-:1005B0009572F53D8037E5466547700F7573017597
-:1005C0007501F53EF53F754E018022E4F54EC3E519
-:1005D000479546F53EC313F5712546F572D3943F12
-:1005E0005005E4F53F8006E57224C1F53F056FE54F
-:1005F0006FC39402500302046EE56D456C70028077
-:1006000004E574457590072FF07F01E53E6004E531
-:100610003C7014E4F53CF53DF53EF53F1208D27010
-:1006200004F00206A4807AE53CC3953E4007E53C11
-:10063000953EFF8006C3E53E953CFFE576D3957970
-:10064000400585767A800385797AE577C395785079
-:100650000585777B800385787BE57BD3957A403071
-:10066000E57B957AF53CF53EC3E57B957A900719D5
-:10067000F0E53CC313F571257AF572C3943F40054C
-:10068000E4F53D801FC3743F9572F53DF53F80143E
-:10069000E4F53CF53E900719F01208D27003F080A3
-:1006A000037401F01208657583D0E0540FFEAD3C71
-:1006B00070027E07BE0F027E80EEFBEFD39B74803C
-:1006C000F898401FE4F53CF53E1208D27003F08024
-:1006D000127401F0E508FBEB4407F5827583D2E064
-:1006E0004410F0E508FBEB4409F58275839EEDF0BC
-:1006F000EB4407F5827583CAEDF01208657583CC6B
-:10070000EFF022E5084407F5827583BCE054F0F071
-:10071000E5084407F5827583BEE054F0F0E508442F
-:1007200007F5827583C0E054F0F0E5084407F582D0
-:1007300022F0900728E0FEA3E0F5828E8322854216
-:100740004285414185404074C02FF58274023EF5D8
-:1007500083E542F074E02FF58274023EF58322E5D2
-:100760004229FDE433FCE53CC39DEC6480F87480D1
-:100770009822F583E0900722541FFDE0FAA3E0F5EC
-:10078000828A83EDF022900722E0FCA3E0F5828CC0
-:100790008322900724FFED4407CFF0A3EFF02285DA
-:1007A0003838853939853A3A74C02FF58274023E5B
-:1007B000F58322900726FFED4407CFF0A3EFF02248
-:1007C000F074A02FF58274023EF5832274C02511C7
-:1007D000F582E43401F5832274002511F582E434B6
-:1007E00002F5832274602511F582E43403F5832237
-:1007F00074802511F582E43403F5832274E0251119
-:10080000F582E43403F5832274402511F582E43443
-:1008100006F5832274802FF58274023EF58322AFA1
-:10082000087E00EF4407F58222F583E5824407F550
-:1008300082E540F02274402511F582E43402F5830C
-:100840002274C02511F582E43403F5832274002557
-:1008500011F582E43406F5832274202511F582E433
-:100860003406F58322E508FDED4407F58222E541D3
-:10087000F0E56564014564227E00FB7A00FD7C00A2
-:100880002274202511F582E434022274A02511F58A
-:1008900082E4340322853E42853F418F4022853CDD
-:1008A00042853D418F402275453F900720E4F0A3EB
-:1008B00022F583E532F0056EE56EC3944022F0E543
-:1008C000084406F582227400256EF582E43400F5B2
-:1008D0008322E56D456C90072F22E4F9E53CD39522
-:1008E0003E2274802EF582E43402F583E02274A067
-:1008F0002EF582E43402F583E0227480256EF582C1
-:10090000E43400222542FDE433FC22854242854145
-:100910004185404022ED4C60030209E5EF4E7037FF
-:10092000900726120789E0FD1207CCEDF09007280A
-:10093000120789E0FD1207D8EDF0120786E0541F78
-:10094000FD120881F583EDF0900724120789E05429
-:100950001FFD120835EDF0EF64044E703790072646
-:10096000120789E0FD1207E4EDF0900728120789CD
-:10097000E0FD1207F0EDF0120786E0541FFD1208AB
-:100980008BF583EDF0900724120789E0541FFD12C8
-:100990000841EDF0EF64014E70047D0180027D009E
-:1009A000EF64024E70047F0180027F00EF4D60789B
-:1009B000900726120735E0FF1207FCEF120731E01F
-:1009C000FF120808EFF0900722120735E0541FFFCE
-:1009D00012084DEFF0900724120735E0541FFF1264
-:1009E0000859EFF0221207CCE4F01207D8E4F01215
-:1009F0000881F583E4F01208357414F01207E4E47A
-:100A0000F01207F0E4F012088BF583E4F0120841CD
-:100A10007414F01207FCE4F0120808E4F012084D18
-:100A2000E4F01208597414F02253F9F775FC10E43D
-:100A3000F5FD75FE30F5FFE5E720E70343F908E52E
-:100A4000E620E70B78FFE4F6D8FD53E6FE80097850
-:100A500008E4F6D8FD53E6FE758180E4F5A8D2A837
-:100A6000C2A9D2AFE5E220E50520E602800343E11A
-:100A700002E5E220E00E9000007F007E08E4F0A393
-:100A8000DFFCDEFA020ADB43FA01C0E0C0F0C083FB
-:100A9000C082C0D0121CE7D0D0D082D083D0F0D09A
-:100AA000E053FAFE32021B55E493A3F8E493A3F655
-:100AB00008DFF98029E493A3F85407240CC8C33352
-:100AC000C4540F4420C8834004F456800146F6DF26
-:100AD000E4800B010204081020408090003FE47E77
-:100AE000019360C1A3FF543F30E509541FFEE49316
-:100AF000A360010ECF54C025E060AD40B880FE8CED
-:100B0000648D658A668B67E4F569EF4E7003021D9C
-:100B100055E4F568E5674566703212072A758390DB
-:100B2000E41207297583C2E41207297583C4E4120D
-:100B30000870702912072A758392E41207297583B9
-:100B4000C6E41207297583C8E4F0801190072612C5
-:100B50000735E41208707005120732E4F0121D55D3
-:100B6000121EBFE5674566703312072A758390E54C
-:100B7000411207297583C2E5411207297583C41202
-:100B8000086E702912072A758392E54012072975AD
-:100B900083C6E5401207297583C8800E9007261288
-:100BA000073512086E7006120732E540F0AF697E15
-:100BB00000AD67AC6612044412072A7583CAE0D3FD
-:100BC0009400500C0568E568C394055003020B14AB
-:100BD000228C608D611208DA7420400D2FF582742A
-:100BE000033EF583E53EF0800B2FF58274033EF55E
-:100BF00083E53CF0E53CD3953E403CE561456070C3
-:100C000010E9120904E53E120768403B120895807E
-:100C100018E53EC39538401D853E38E53E600585A4
-:100C20003F3980038539398F3A120814E53E12079F
-:100C3000C0E53FF0228043E5614560701912075F0F
-:100C4000400512089E802712090B120814E5421273
-:100C500007C0E541F022E53CC39538401D853C388E
-:100C6000E53C6005853D3980038539398F3A1208A6
-:100C700014E53C1207C0E53DF02285383885393946
-:100C8000853A3A120814E5381207C0E539F0227F98
-:100C900006121731121D23120E04120E33E0440AFD
-:100CA000F0748EFE120E04120E0BEFF0E52830E504
-:100CB00003D38001C3400575142080037514081206
-:100CC0000E0475838AE514F0B4FF05751280800662
-:100CD000E514C313F512E4F516F57F121936121355
-:100CE000A3E50AC3940150090516E516C394144000
-:100CF000EAE5E420E728120E047583D2E05408D315
-:100D0000940040047F0180027F00E50AC394014003
-:100D1000047E0180027E00EF5E6003121DD7E57F36
-:100D2000C394114014120E047583D2E04480F0E5A0
-:100D3000E420E70F121DD7800A120E047583D2E05B
-:100D4000547FF0121D2322748A850882F583E517EB
-:100D5000F0120E3AE4F0900702E0120E177583903D
-:100D6000EFF07492FEE5084407FFF5828E83E054AD
-:100D7000C0FD900703E0543F4D8F828E83F09007B3
-:100D800004E0120E17758382EFF0900705E0FFED87
-:100D90004407F5827583B4EF120E03758380E05427
-:100DA000BFF030370A120E91758394E04480F03022
-:100DB000380A120E91758392E04480F0E52830E401
-:100DC0001A20390A120E04758388E0547FF0203A05
-:100DD0000A120E04758388E054BFF0748CFE120E64
-:100DE000048E83E0540F120E03758386E054BFF027
-:100DF000E5084406120DFD75838AE4F022F582753C
-:100E00008382E4F0E5084407F582228E83E0F51042
-:100E100054FEF0E5104401FFE508FDED4407F582BE
-:100E200022E515C45407FFE508FDED4408F5827579
-:100E3000838222758380E04440F0E5084408F5820F
-:100E400075838A22E51625E025E024AFF582E43497
-:100E50001AF583E493F50D2243E11043E18053E159
-:100E6000FD85E11022E51625E025E024B2F582E4B7
-:100E7000341AF583E49322855582855483E515F071
-:100E800022E5E25420D3940022E5E25440D39400BA
-:100E900022E5084406F58222FDE508FBEB4407F550
-:100EA000822253F9F775FE3022EF4E70261207CCDE
-:100EB000E0FD90072612077B1207D8E0FD90072877
-:100EC00012077B120881120772120835E09007247E
-:100ED000120778EF64044E70291207E4E0FD9007D2
-:100EE0002612077B1207F0E0FD90072812077B12FD
-:100EF000088B120772120841E0541FFD900724125C
-:100F0000077BEF64014E70047D0180027D00EF6479
-:100F1000024E70047F0180027F00EF4D60351207A2
-:100F2000FCE0FF900726120789EFF0120808E0FFA7
-:100F3000900728120789EFF012084DE0541FFF12A6
-:100F40000786EFF0120859E0541FFF90072412079C
-:100F500089EFF022E4F553120E8140047F018002F4
-:100F60007F00120E8940047E0180027E00EE4F70E9
-:100F700003020FF685E11043E10253E10F85E11012
-:100F8000E4F551E5E3543FF552120E89401DAD5290
-:100F9000AF51121118EF600885E11043E140800B5A
-:100FA00053E1BF120E5812000680FBE5E3543FF5F3
-:100FB00051E5E4543FF552120E81401DAD52AF5140
-:100FC000121118EF600885E11043E120800B53E116
-:100FD000DF120E5812000680FB120E8140047F01C2
-:100FE00080027F00120E8940047E0180027E00EEA6
-:100FF0004F6003120E5B22120E21EFF012109122AD
-:1010000002110002104002109000000000000000D9
-:1010100001200120E4F5571216BD121644E4121007
-:10102000561214B7900726120735E4120731E4F080
-:101030001210561214B7900726120735E541120711
-:1010400031E540F0AF577E00AD567C00120444AF4E
-:10105000567E000211EEFF900720A3E0FDE4F55656
-:10106000F540FEFCAB56FA1211517F0F7D18E4F5E6
-:1010700056F540FEFCAB56FA121541AF567E0012F3
-:101080001AFFE4FFF5567D1FF540FEFCAB56FA2231
-:1010900022E4F555E508FD74A0F556ED4407F55733
-:1010A000E52830E503D38001C340057F28EF8004A5
-:1010B0007F14EFC313F554E4F9120E1875838EE014
-:1010C000F510CEEFCEEED394004026E51054FE127C
-:1010D0000E9875838EEDF0E5104401FDEB4407F5A5
-:1010E00082EDF0855782855683E030E301091E804A
-:1010F000D4C234E9C395544002D2342202000622FD
-:10110000303011901000E493F510901010E493F536
-:101110001012109012115022E4FCC3ED9FFAEFF56B
-:101120008375820079FFE493CC6CCCA3D9F8DAF60E
-:10113000E5E230E4028CE5ED24FFFFEF7582FFF578
-:1011400083E4936C70037F01227F00222211000050
-:10115000228E588F598C5A8D5B8A5C8B5D755E012F
-:10116000E4F55FF560F56212072A7583D0E0FFC4ED
-:10117000540FF561121EA585595ED3E55E955BE5BA
-:101180005A12076B504B1207037583BCE0455E1281
-:1011900007297583BEE0455E1207297583C0E045C7
-:1011A0005EF0AF5FE560120878120AFFAF627E0062
-:1011B000AD5DAC5C120444E561AF5E7E00B4030536
-:1011C000121E218007AD5DAC5C121317055E021183
-:1011D0007A1207037583BCE045401207297583BE68
-:1011E000E045401207297583C0E04540F0228E5843
-:1011F0008F59755A017901755B01E4FB12072A7555
-:1012000083AEE0541AFF120865E0C4135407FEEFE2
-:10121000700CEE6535700790072FE0B4010DAF3507
-:101220007E00120EA9CFEBCF021E60E55964024585
-:101230005870047F0180027F00E559455870047E94
-:101240000180027E00EE4F602385414985404BE5D9
-:10125000594558702CAF5AFECDE9CDFCAB59AA5870
-:10126000120AFFAF5B7E00121E608015AF5B7E002E
-:10127000121E60900726120735E549120731E54B2B
-:10128000F0E4FDAF35FEFC120915228C648D651269
-:1012900008DA403CE56545647010120904C3E53E78
-:1012A000120769403B1208958018E53EC395384007
-:1012B0001D853E38E53E6005853F39800385393917
-:1012C0008F3A1207A8E53E120753E53FF022803B14
-:1012D000E5654564701112075F400512089E801F86
-:1012E00012073EE541F022E53CC39538401D853CA0
-:1012F00038E53C6005853D3980038539398F3A12E0
-:1013000007A8E53C120753E53DF02212079FE53898
-:10131000120753E539F0228C638D641208DA403CE1
-:10132000E56445637010120904C3E53E1207694085
-:101330003B1208958018E53EC39538401D853E3820
-:10134000E53E6005853F3980038539398F3A1207BC
-:10135000A8E53E120753E53FF022803BE564456374
-:10136000701112075F400512089E801F12073EE5AC
-:1013700041F022E53CC39538401D853C38E53C6092
-:1013800005853D3980038539398F3A1207A8E53C38
-:10139000120753E53DF02212079FE538120753E587
-:1013A00039F022E50DFEE5088E544405F555751516
-:1013B0000FF582120E7A1217A320310575150380DE
-:1013C0000375150BE50AC39401503812142020311F
-:1013D0000605150515800415151515E50AC39401B4
-:1013E0005021121420203104051580021515E50A3C
-:1013F000C39401500E120E771217A3203105051564
-:10140000120E77E515B408047F0180027F00E51510
-:10141000B407047E0180027E00EE4F6002057F2249
-:10142000855582855483E515F01217A32212072AE9
-:101430007583AE74FF120729E0541AF534E0C41323
-:101440005407F53524FE602424FE603C24047063B8
-:1014500075312DE508FD74B612079274BC90072211
-:1014600012079574901207B37492803C75313AE577
-:1014700008FD74BA12079274C09007221207B6745E
-:10148000C41207B374C88020753135E508FD74B8FF
-:1014900012079274BEFFED4407900722CFF0A3EF2E
-:1014A000F074C21207B374C6FFED4407A3CFF0A3D4
-:1014B000EFF022753401228E588F598C5A8D5B8A39
-:1014C0005C8B5D755E01E4F55F121EA585595ED3E8
-:1014D000E55E955BE55A12076B5057E55D455C701C
-:1014E0003012072A758392E55E1207297583C6E5D7
-:1014F0005E1207297583C8E55E120729758390E59A
-:101500005E1207297583C2E55E1207297583C480C0
-:1015100003120732E55EF0AF5F7E00AD5DAC5C129A
-:101520000444AF5E7E00AD5DAC5C120BD1055E0283
-:1015300014CFAB5DAA5CAD5BAC5AAF59AE58021B81
-:10154000FB8C5C8D5D8A5E8B5F756001E4F561F5F7
-:1015500062F563121EA58F60D3E560955DE55C12B0
-:10156000076B5061E55F455E702712072A7583B6E9
-:10157000E5601207297583B8E5601207297583BAFB
-:10158000E560F0AF617E00E56212087A120AFF8022
-:1015900019900724120735E56012072975838EE438
-:1015A0001207297401120729E4F0AF637E00AD5FD2
-:1015B000AC5E120444AF607E00AD5FAC5E12128B75
-:1015C00005600215582290114DE49390072EF012F9
-:1015D000081F7583AEE0541AF5347067EF4407F5C1
-:1015E000827583CEE0FF1313135407F536540FD3DF
-:1015F0009400400612142D121BA9E536540F24FE48
-:10160000600C14600C146019240370378010021EE3
-:1016100091121E9112072A7583CEE054EFF0021D3D
-:10162000AE121014E4F555121D850555E555C39409
-:101630000540F412072A7583CEE054C7120729E04B
-:101640004408F022E4F558F559AF08EF4407F58255
-:101650007583D0E0FDC4540FF55AEF4407F5827549
-:1016600083807401F0120821758382E545F0EF4410
-:1016700007F58275838A74FFF0121A4D12072A75D6
-:1016800083BCE054EF1207297583BEE054EF1207C4
-:10169000297583C0E054EF1207297583BCE044101C
-:1016A0001207297583BEE044101207297583C0E034
-:1016B0004410F0AF58E559120878020AFFE4F558D3
-:1016C0007D01F559AF35FEFC12091512072A758305
-:1016D000B674101207297583B87410120729758320
-:1016E000BA74101207297583BC7410120729758308
-:1016F000BE74101207297583C074101207297583F0
-:1017000090E41207297583C2E41207297583C4E4A3
-:10171000120729758392E41207297583C6E412071C
-:10172000297583C8E4F0AF58FEE55912087A020A19
-:10173000FFE5E230E46CE5E754C064407064E5091D
-:10174000C45430FEE50825E025E054C04EFEEF54B9
-:101750003F4EFDE52BAE2A7802C333CE33CED8F907
-:10176000F5828E83EDF0E52BAE2A7802C333CE33BB
-:10177000CED8F9FFF5828E83A3E5FEF08F828E83AB
-:10178000A3A3E5FDF08F828E83A3A3A3E5FCF0C3A2
-:10179000E52B94FAE52A94005008052BE52B7002FE
-:1017A000052A22E4FFE4F558F556F5577482FC1239
-:1017B0000E048C83E0F510547FF0E5104480120E87
-:1017C00098EDF07E0A120E047583A0E020E026DE7C
-:1017D000F40557E55770020556E5142401FDE4337E
-:1017E000FCD3E5579DE5569C40D9E50A942050026C
-:1017F000050A43E108C231120E047583A6E05512B2
-:1018000065127003D23122C23122900726E0FAA37A
-:10181000E0F5828A83E0F541E539C395414026E54C
-:10182000399541C39FEE12076B40047C0180027C16
-:1018300000E541643F60047B0180027B00EC5B605B
-:101840002905418028C3E5419539C39FEE12076BF6
-:1018500040047F0180027F00E54160047E01800238
-:101860007E00EF5E600415418003853941853A4072
-:1018700022E5E230E460E5E130E25BE50970047FF7
-:101880000180027F00E50870047E0180027E00EE88
-:101890005F604353F9F8E5E230E43BE5E130E22EE6
-:1018A00043FA0253FAFBE4F510909470E510F0E56A
-:1018B000E130E2E7909470E06510600343FA0405BC
-:1018C00010909470E510F070E612000680E153FA73
-:1018D000FD53FAFB80C0228F54120006E5E130E090
-:1018E000047F0180027F00E57ED3940540047E01E1
-:1018F00080027E00EE4F603D855411E5E220E1322A
-:1019000074CE121A0530E7047D0180027D008F82BB
-:101910008E83E030E6047F0180027F00EF5D70156A
-:101920001215C674CE121A0530E607E04480F04363
-:10193000F98012187122120E44E51625E025E024E4
-:10194000B0F582E4341AF583E493F50FE51625E04B
-:1019500025E024B1F582E4341AF583E493F50E1200
-:101960000E65F510E50F54F0120E1775838CEFF02D
-:10197000E50F30E00C120E04758386E04440F080E1
-:101980000A120E04758386E054BFF0120E9175831F
-:1019900082E50EF0227F05121731120E04120E336B
-:1019A0007402F0748EFE120E04120E0BEFF0751519
-:1019B00070120FF72034057515108003751550123D
-:1019C0000FF72034047410800274F02515F51512F9
-:1019D0000E21EFF0121091203417E5156430600CE1
-:1019E00074102515F515B48003E4F515120E21EFDA
-:1019F000F022F0E50B25E025E02482F582E43407AF
-:101A0000F583227488FEE5084407FFF5828E83E0A3
-:101A100022F0E5084407F58222F0E054C08F828E60
-:101A200083F022EF4407F582758386E05410D39447
-:101A30000022F0900715E004F0224406F582758339
-:101A40009EE022FEEF4407F5828E83E022E49007B9
-:101A50002AF0A3F012072A758382E0547F12072927
-:101A6000E04480F01210FC12081F7583A0E020E013
-:101A70001A90072BE004F0700690072AE004F0901B
-:101A8000072AE0B410E1A3E0B400DCEE44A6FCEFCA
-:101A90004407F5828C83E0F532EE44A8FEEF44075C
-:101AA000F5828E83E0F5332201201100042000909E
-:101AB00000200F9200210F9400220F9600230F9810
-:101AC00000240F9A00250F9C00260F9E00270FA0D0
-:101AD000012001A2012101A4012201A6012301A8E4
-:101AE000012401AA012501AC012601AE012701B0A4
-:101AF000012801B400280FB640280FB8612801CB97
-:101B0000EFCBCAEECA7F01E4FDEB4A7024E508F58D
-:101B10008274B6120829E508F58274B8120829E51E
-:101B200008F58274BA1208297E007C00120AFF8030
-:101B300012900726120735E541F090072412073569
-:101B4000E540F012072A75838EE41207297401120A
-:101B50000729E4F022E4F526F52753E1FEF52A757E
-:101B60002B01F5087F0112173130301C901AA9E4BF
-:101B700093F510901FF9E493F510900041E493F56C
-:101B800010901ECAE493F5107F02121731120F5401
-:101B90007F03121731120006E5E230E70912100048
-:101BA00030300312110002004712081F7583D0E085
-:101BB000C4540FFD7543017544FF1208AA7404F064
-:101BC000753B01ED14600C14600B14600F2403705E
-:101BD0000B800980001208A704F080061208A77481
-:101BE00004F0EE4482FEEF4407F5828E83E5451251
-:101BF00008BE758382E531F002114C8E608F611250
-:101C00001EA5E4FFCEEDCEEED39561E56012076B25
-:101C1000403974202EF582E43403F583E07003FF2D
-:101C200080261208E2FDC39F401ECFEDCFEB4A7025
-:101C30000B8D421208EEF5418E40800C1208E2F541
-:101C4000381208EEF5398E3A1E80BC22755801E52F
-:101C500035700C1207CCE0F54A1207D8E0F54CE5D8
-:101C600035B4040C1207E4E0F54A1207F0E0F54C35
-:101C7000E535B401047F0180027F00E535B402043C
-:101C80007E0180027E00EE4F600C1207FCE0F54AF8
-:101C9000120808E0F54C85414985404B22755B01EF
-:101CA000900724120735E0541FFFD3940250048F8D
-:101CB000588005EF24FEF558EFC394184005755978
-:101CC000188004EF04F55985435AAF587E00AD598A
-:101CD0007C00AB5B7A00121541AF5A7E0012180AE5
-:101CE000AF5B7E00021AFFE5E230E70E121003C27E
-:101CF000303030031210FF203328E5E730E70512BB
-:101D00000EA2800DE5FEC394205006120EA243F9E8
-:101D100008E5F230E70353F97FE5F15470D39400FE
-:101D200050D822120E04758380E4F0E508440712AF
-:101D30000DFD758384120E02758386120E02758363
-:101D40008CE054F3120E0375838E120E0275839489
-:101D5000E054FBF02212072A75838EE412072974DF
-:101D600001120729E41208BE75838CE04420120892
-:101D7000BEE054DFF07484850882F583E0547FF080
-:101D8000E04480F022755601E4FDF557AF35FEFCC6
-:101D9000120915121C9D121E7A121C4CAF577E00A0
-:101DA000AD567C00120444AF567E000211EE75560B
-:101DB00001E4FDF557AF35FEFC120915121C9D120A
-:101DC0001E7A121C4CAF577E00AD567C00120444A4
-:101DD000AF567E000211EEE4F516120E44FEE50841
-:101DE0004405FF120E658F828E83F00516E516C33B
-:101DF000941440E6E508120E2BE4F022E4F558F5C1
-:101E000059F55AFFFEAD58FC1209157F047E00AD4E
-:101E1000587C001209157F027E00AD587C00020933
-:101E200015E53C253EFCE5422400FBE433FAECC317
-:101E30009BEA12076B400B8C42E53D253FF5418F35
-:101E4000402212090B227484F5188508198519821D
-:101E5000851883E0547FF0E04480F0E04480F02275
-:101E6000EF4E700B12072A7583D2E054DFF0221276
-:101E7000072A7583D2E04420F02275580190072686
-:101E8000120735E0543FF541120732E0543FF54068
-:101E900022755602E4F557121DFCAF577E00AD5671
-:101EA0007C00020444E4F542F541F540F538F5398B
-:101EB000F53A22EF5407FFE5F954F84FF5F9227F80
-:101EC00001E4FE0F0EBEFFFB2201200001042000F2
-:101ED0000000000000000000000000000000000002
-:101EE00000000000000000000000000000000000F2
-:101EF00000000000000000000000000000000000E2
-:101F000000000000000000000000000000000000D1
-:101F100000000000000000000000000000000000C1
-:101F200000000000000000000000000000000000B1
-:101F300000000000000000000000000000000000A1
-:101F40000000000000000000000000000000000091
-:101F50000000000000000000000000000000000081
-:101F60000000000000000000000000000000000071
-:101F70000000000000000000000000000000000061
-:101F80000000000000000000000000000000000051
-:101F90000000000000000000000000000000000041
-:101FA0000000000000000000000000000000000031
-:101FB0000000000000000000000000000000000021
-:101FC0000000000000000000000000000000000011
-:101FD0000000000000000000000000000000000001
-:101FE00000000000000000000000000000000000F1
-:101FF000000000000000000001201100042000810A
-:00000001FF
-- 
cgit v1.2.2


From 16dad1d743d31a104a849c8944e6b9eb479f6cd7 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <torsten@lst.de>
Date: Sat, 23 Mar 2013 15:38:22 +0100
Subject: KMS: fix EDID detailed timing vsync parsing

EDID spreads some values across multiple bytes; bit-fiddling is needed
to retrieve these.  The current code to parse "detailed timings" has a
cut&paste error that results in a vsync offset of at most 15 lines
instead of 63.

See

   http://en.wikipedia.org/wiki/EDID

and in the "EDID Detailed Timing Descriptor" see bytes 10+11 show why
that needs to be a left shift.

Cc: stable@vger.kernel.org
Signed-off-by: Torsten Duwe <duwe@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_edid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index c194f4e680ad..0dcbb637f61a 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1634,7 +1634,7 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
 	unsigned vblank = (pt->vactive_vblank_hi & 0xf) << 8 | pt->vblank_lo;
 	unsigned hsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0xc0) << 2 | pt->hsync_offset_lo;
 	unsigned hsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0x30) << 4 | pt->hsync_pulse_width_lo;
-	unsigned vsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0xc) >> 2 | pt->vsync_offset_pulse_width_lo >> 4;
+	unsigned vsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0xc) << 2 | pt->vsync_offset_pulse_width_lo >> 4;
 	unsigned vsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0x3) << 4 | (pt->vsync_offset_pulse_width_lo & 0xf);
 
 	/* ignore tiny modes */
-- 
cgit v1.2.2


From c19b3b0f6eed552952845e4ad908dba2113d67b4 Mon Sep 17 00:00:00 2001
From: Torsten Duwe <torsten@lst.de>
Date: Sat, 23 Mar 2013 15:39:34 +0100
Subject: KMS: fix EDID detailed timing frame rate

When KMS has parsed an EDID "detailed timing", it leaves the frame rate
zeroed.  Consecutive (debug-) output of that mode thus yields 0 for
vsync.  This simple fix also speeds up future invocations of
drm_mode_vrefresh().

While it is debatable whether this qualifies as a -stable fix I'd apply
it for consistency's sake; drm_helper_probe_single_connector_modes()
does the same thing already for all probed modes.

Cc: stable@vger.kernel.org
Signed-off-by: Torsten Duwe <duwe@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_edid.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 0dcbb637f61a..e2acfdbf7d3c 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1715,6 +1715,7 @@ set_size:
 	}
 
 	mode->type = DRM_MODE_TYPE_DRIVER;
+	mode->vrefresh = drm_mode_vrefresh(mode);
 	drm_mode_set_name(mode);
 
 	return mode;
-- 
cgit v1.2.2


From 8bb9660418e05bb1845ac1a2428444d78e322cc7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 23 Mar 2013 16:52:44 -0700
Subject: Linux 3.9-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 22113a77f8ed..54d2b2a0fef0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Unicycling Gorilla
 
 # *DOCUMENTATION*
-- 
cgit v1.2.2


From bba2181c49f1dddf8b592804a1b53cc1a3cf408a Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 22 Mar 2013 10:53:40 +0100
Subject: Revert "drm/i915: set TRANSCODER_EDP even earlier"

This reverts commit cc464b2a17c59adedbdc02cc54341d630354edc3.

The reason is that Takashi Iwai reported a regression bisected to this
commit:

http://www.mail-archive.com/intel-gfx@lists.freedesktop.org/msg18788.html

His machine has eDP on port D (usual desktop all-in-on setup), which
intel_dp.c identifies as an eDP panel, but the hsw ddi code
mishandles.

Closer inspection of the code reveals that haswell_crtc_mode_set also
checks intel_encoder_is_pch_edp when setting is_cpu_edp. On haswell
that doesn't make much sense (since there's no edp on the pch), but
what this function _really_ checks is whether that edp connector is on
port A or port D. It's just that on ilk-ivb port D was on the pch ...

So that explains why this seemingly innocent change killed eDP on port
D. Furthermore it looks like everything else accidentally works, since
we've never enabled eDP on port D support for hsw intentionally (e.g.
we still register the HDMI output for port D in that case).

But in retrospective I also don't like that this leaks highly platform
specific details into common code, and the reason is that the drm
vblank layer sucks. So instead I think we should:
- move the cpu_transcoder into the dynamic pipe_config tracking (once
  that's merged).
- fix up the drm vblank layer to finally deal with kms crtc objects
  instead of int pipes.

v2: Pimp commit message with the better diagnosis as discussed with
Paulo on irc.

Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 287b42c9d1a8..b20d50192fcc 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -5771,6 +5771,11 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc,
 		num_connectors++;
 	}
 
+	if (is_cpu_edp)
+		intel_crtc->cpu_transcoder = TRANSCODER_EDP;
+	else
+		intel_crtc->cpu_transcoder = pipe;
+
 	/* We are not sure yet this won't happen. */
 	WARN(!HAS_PCH_LPT(dev), "Unexpected PCH type %d\n",
 	     INTEL_PCH_TYPE(dev));
@@ -5837,11 +5842,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	int pipe = intel_crtc->pipe;
 	int ret;
 
-	if (IS_HASWELL(dev) && intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))
-		intel_crtc->cpu_transcoder = TRANSCODER_EDP;
-	else
-		intel_crtc->cpu_transcoder = pipe;
-
 	drm_vblank_pre_modeset(dev, pipe);
 
 	ret = dev_priv->display.crtc_mode_set(crtc, mode, adjusted_mode,
-- 
cgit v1.2.2


From 2124b72e6283c4e84a55e71077fee91793f4c801 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Fri, 22 Mar 2013 14:07:23 -0300
Subject: drm/i915: don't disable the power well yet

We're still not 100% ready to disable the power well, so don't disable
it for now. When we disable it we break the audio driver (because some
of the audio registers are on the power well) and machines with eDP on
port D (because it doesn't use TRANSCODER_EDP).

Also, instead of just reverting the code, add a Kernel option to let
us disable it if we want. This will allow us to keep developing and
testing the feature while it's not enabled.

This fixes problems caused by the following commit:
  commit d6dd9eb1d96d2b7345fe4664066c2b7ed86da898
  Author: Daniel Vetter <daniel.vetter@ffwll.ch>
  Date:   Tue Jan 29 16:35:20 2013 -0200
       drm/i915: dynamic Haswell display power well support

References: http://www.mail-archive.com/intel-gfx@lists.freedesktop.org/msg18788.html
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Mengdong Lin <mengdong.lin@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c | 5 +++++
 drivers/gpu/drm/i915/i915_drv.h | 1 +
 drivers/gpu/drm/i915/intel_pm.c | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0a8eceb75902..e9b57893db2b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -125,6 +125,11 @@ MODULE_PARM_DESC(preliminary_hw_support,
 		"Enable Haswell and ValleyView Support. "
 		"(default: false)");
 
+int i915_disable_power_well __read_mostly = 0;
+module_param_named(disable_power_well, i915_disable_power_well, int, 0600);
+MODULE_PARM_DESC(disable_power_well,
+		 "Disable the power well when possible (default: false)");
+
 static struct drm_driver driver;
 extern int intel_agp_enabled;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e95337c97459..01769e2a9953 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1398,6 +1398,7 @@ extern int i915_enable_fbc __read_mostly;
 extern bool i915_enable_hangcheck __read_mostly;
 extern int i915_enable_ppgtt __read_mostly;
 extern unsigned int i915_preliminary_hw_support __read_mostly;
+extern int i915_disable_power_well __read_mostly;
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
 extern int i915_resume(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a1794c6df1bf..adca00783e61 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4079,6 +4079,9 @@ void intel_set_power_well(struct drm_device *dev, bool enable)
 	if (!IS_HASWELL(dev))
 		return;
 
+	if (!i915_disable_power_well && !enable)
+		return;
+
 	tmp = I915_READ(HSW_PWR_WELL_DRIVER);
 	is_enabled = tmp & HSW_PWR_WELL_STATE;
 	enable_requested = tmp & HSW_PWR_WELL_ENABLE;
-- 
cgit v1.2.2


From b1289371fcd580b4c412e6d05c4cb8ac8d277239 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 22 Mar 2013 15:44:46 +0100
Subject: Revert "drm/i915: write backlight harder"

This reverts commit cf0a6584aa6d382f802f2c3cacac23ccbccde0cd.

Turns out that cargo-culting breaks systems. Note that we can't revert
further, since

commit 770c12312ad617172b1a65b911d3e6564fc5aca8
Author: Takashi Iwai <tiwai@suse.de>
Date:   Sat Aug 11 08:56:42 2012 +0200

    drm/i915: Fix blank panel at reopening lid

fixed a regression in 3.6-rc kernels for which we've never figured out
the exact root cause. But some further inspection of the backlight
code reveals that it's seriously lacking locking. And especially the
asle backlight update is know to get fired (through some smm magic)
when writing specific backlight control registers. So the possibility
of suffering from races is rather real.

Until those races are fixed I don't think it makes sense to try
further hacks. Which sucks a bit, but sometimes that's how it is :(

References: http://www.mail-archive.com/intel-gfx@lists.freedesktop.org/msg18788.html
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=47941
Tested-by: Takashi Iwai <tiwai@suse.de>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: stable@vger.kernel.org (the reverted commit was cc: stable, too)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_panel.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index a3730e0289e5..bee8cb6108a7 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -321,9 +321,6 @@ void intel_panel_enable_backlight(struct drm_device *dev,
 	if (dev_priv->backlight_level == 0)
 		dev_priv->backlight_level = intel_panel_get_max_backlight(dev);
 
-	dev_priv->backlight_enabled = true;
-	intel_panel_actually_set_backlight(dev, dev_priv->backlight_level);
-
 	if (INTEL_INFO(dev)->gen >= 4) {
 		uint32_t reg, tmp;
 
@@ -359,12 +356,12 @@ void intel_panel_enable_backlight(struct drm_device *dev,
 	}
 
 set_level:
-	/* Check the current backlight level and try to set again if it's zero.
-	 * On some machines, BLC_PWM_CPU_CTL is cleared to zero automatically
-	 * when BLC_PWM_CPU_CTL2 and BLC_PWM_PCH_CTL1 are written.
+	/* Call below after setting BLC_PWM_CPU_CTL2 and BLC_PWM_PCH_CTL1.
+	 * BLC_PWM_CPU_CTL may be cleared to zero automatically when these
+	 * registers are set.
 	 */
-	if (!intel_panel_get_backlight(dev))
-		intel_panel_actually_set_backlight(dev, dev_priv->backlight_level);
+	dev_priv->backlight_enabled = true;
+	intel_panel_actually_set_backlight(dev, dev_priv->backlight_level);
 }
 
 static void intel_panel_init_backlight(struct drm_device *dev)
-- 
cgit v1.2.2


From 9979a55a833883242e3a29f3596676edd7199c46 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Mar 2013 14:38:28 +0000
Subject: net: remove a WARN_ON() in net_enable_timestamp()

The WARN_ON(in_interrupt()) in net_enable_timestamp() can get false
positive, in socket clone path, run from softirq context :

[ 3641.624425] WARNING: at net/core/dev.c:1532 net_enable_timestamp+0x7b/0x80()
[ 3641.668811] Call Trace:
[ 3641.671254]  <IRQ>  [<ffffffff80286817>] warn_slowpath_common+0x87/0xc0
[ 3641.677871]  [<ffffffff8028686a>] warn_slowpath_null+0x1a/0x20
[ 3641.683683]  [<ffffffff80742f8b>] net_enable_timestamp+0x7b/0x80
[ 3641.689668]  [<ffffffff80732ce5>] sk_clone_lock+0x425/0x450
[ 3641.695222]  [<ffffffff8078db36>] inet_csk_clone_lock+0x16/0x170
[ 3641.701213]  [<ffffffff807ae449>] tcp_create_openreq_child+0x29/0x820
[ 3641.707663]  [<ffffffff807d62e2>] ? ipt_do_table+0x222/0x670
[ 3641.713354]  [<ffffffff807aaf5b>] tcp_v4_syn_recv_sock+0xab/0x3d0
[ 3641.719425]  [<ffffffff807af63a>] tcp_check_req+0x3da/0x530
[ 3641.724979]  [<ffffffff8078b400>] ? inet_hashinfo_init+0x60/0x80
[ 3641.730964]  [<ffffffff807ade6f>] ? tcp_v4_rcv+0x79f/0xbe0
[ 3641.736430]  [<ffffffff807ab9bd>] tcp_v4_do_rcv+0x38d/0x4f0
[ 3641.741985]  [<ffffffff807ae14a>] tcp_v4_rcv+0xa7a/0xbe0

Its safe at this point because the parent socket owns a reference
on the netstamp_needed, so we cant have a 0 -> 1 transition, which
requires to lock a mutex.

Instead of refining the check, lets remove it, as all known callers
are safe. If it ever changes in the future, static_key_slow_inc()
will complain anyway.

Reported-by: Laurent Chavey <chavey@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index d540ced1f6c6..b13e5c766c11 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1545,7 +1545,6 @@ void net_enable_timestamp(void)
 		return;
 	}
 #endif
-	WARN_ON(in_interrupt());
 	static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
-- 
cgit v1.2.2


From 4a7df340ed1bac190c124c1601bfc10cde9fb4fb Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Fri, 22 Mar 2013 19:14:07 +0000
Subject: 8021q: fix a potential use-after-free

vlan_vid_del() could possibly free ->vlan_info after a RCU grace
period, however, we may still refer to the freed memory area
by 'grp' pointer. Found by code inspection.

This patch moves vlan_vid_del() as behind as possible.

Cc: Patrick McHardy <kaber@trash.net>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a18714469bf7..85addcd9372b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -86,13 +86,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 
 	grp = &vlan_info->grp;
 
-	/* Take it out of our own structures, but be sure to interlock with
-	 * HW accelerating devices or SW vlan input packet processing if
-	 * VLAN is not 0 (leave it there for 802.1p).
-	 */
-	if (vlan_id)
-		vlan_vid_del(real_dev, vlan_id);
-
 	grp->nr_vlan_devs--;
 
 	if (vlan->flags & VLAN_FLAG_MVRP)
@@ -114,6 +107,13 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 		vlan_gvrp_uninit_applicant(real_dev);
 	}
 
+	/* Take it out of our own structures, but be sure to interlock with
+	 * HW accelerating devices or SW vlan input packet processing if
+	 * VLAN is not 0 (leave it there for 802.1p).
+	 */
+	if (vlan_id)
+		vlan_vid_del(real_dev, vlan_id);
+
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
 }
-- 
cgit v1.2.2


From 9b46922e15f4d9d2aedcd320c3b7f7f54d956da7 Mon Sep 17 00:00:00 2001
From: Hong zhi guo <honkiko@gmail.com>
Date: Sat, 23 Mar 2013 02:27:50 +0000
Subject: bridge: fix crash when set mac address of br interface

When I tried to set mac address of a bridge interface to a mac
address which already learned on this bridge, I got system hang.

The cause is straight forward: function br_fdb_change_mac_address
calls fdb_insert with NULL source nbp. Then an fdb lookup is
performed. If an fdb entry is found and it's local, it's OK. But
if it's not local, source is dereferenced for printk without NULL
check.

Signed-off-by: Hong Zhiguo <honkiko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b0812c91c0f0..bab338e6270d 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -423,7 +423,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 			return 0;
 		br_warn(br, "adding interface %s with same address "
 		       "as a received packet\n",
-		       source->dev->name);
+		       source ? source->dev->name : br->dev->name);
 		fdb_delete(br, fdb);
 	}
 
-- 
cgit v1.2.2


From 8fe7f99a9e11a43183bc27420309ae105e1fec1a Mon Sep 17 00:00:00 2001
From: Kumar Amit Mehta <gmate.amit@gmail.com>
Date: Sat, 23 Mar 2013 20:10:25 +0000
Subject: bnx2x: fix assignment of signed expression to unsigned variable

fix for incorrect assignment of signed expression to unsigned variable.

Signed-off-by: Kumar Amit Mehta <gmate.amit@gmail.com>
Acked-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
index 568205436a15..91ecd6a00d05 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
@@ -2139,12 +2139,12 @@ static u8 bnx2x_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
 			break;
 		default:
 			BNX2X_ERR("Non valid capability ID\n");
-			rval = -EINVAL;
+			rval = 1;
 			break;
 		}
 	} else {
 		DP(BNX2X_MSG_DCB, "DCB disabled\n");
-		rval = -EINVAL;
+		rval = 1;
 	}
 
 	DP(BNX2X_MSG_DCB, "capid %d:%x\n", capid, *cap);
@@ -2170,12 +2170,12 @@ static int bnx2x_dcbnl_get_numtcs(struct net_device *netdev, int tcid, u8 *num)
 			break;
 		default:
 			BNX2X_ERR("Non valid TC-ID\n");
-			rval = -EINVAL;
+			rval = 1;
 			break;
 		}
 	} else {
 		DP(BNX2X_MSG_DCB, "DCB disabled\n");
-		rval = -EINVAL;
+		rval = 1;
 	}
 
 	return rval;
@@ -2188,7 +2188,7 @@ static int bnx2x_dcbnl_set_numtcs(struct net_device *netdev, int tcid, u8 num)
 	return -EINVAL;
 }
 
-static u8  bnx2x_dcbnl_get_pfc_state(struct net_device *netdev)
+static u8 bnx2x_dcbnl_get_pfc_state(struct net_device *netdev)
 {
 	struct bnx2x *bp = netdev_priv(netdev);
 	DP(BNX2X_MSG_DCB, "state = %d\n", bp->dcbx_local_feat.pfc.enabled);
@@ -2390,12 +2390,12 @@ static u8 bnx2x_dcbnl_get_featcfg(struct net_device *netdev, int featid,
 			break;
 		default:
 			BNX2X_ERR("Non valid featrue-ID\n");
-			rval = -EINVAL;
+			rval = 1;
 			break;
 		}
 	} else {
 		DP(BNX2X_MSG_DCB, "DCB disabled\n");
-		rval = -EINVAL;
+		rval = 1;
 	}
 
 	return rval;
@@ -2431,12 +2431,12 @@ static u8 bnx2x_dcbnl_set_featcfg(struct net_device *netdev, int featid,
 			break;
 		default:
 			BNX2X_ERR("Non valid featrue-ID\n");
-			rval = -EINVAL;
+			rval = 1;
 			break;
 		}
 	} else {
 		DP(BNX2X_MSG_DCB, "dcbnl call not valid\n");
-		rval = -EINVAL;
+		rval = 1;
 	}
 
 	return rval;
-- 
cgit v1.2.2


From 7ebe183c6d444ef5587d803b64a1f4734b18c564 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Sun, 24 Mar 2013 10:42:25 +0000
Subject: tcp: undo spurious timeout after SACK reneging

On SACK reneging the sender immediately retransmits and forces a
timeout but disables Eifel (undo). If the (buggy) receiver does not
drop any packet this can trigger a false slow-start retransmit storm
driven by the ACKs of the original packets. This can be detected with
undo and TCP timestamps.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0d9bdacce99f..3bd55bad230a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2059,11 +2059,8 @@ void tcp_enter_loss(struct sock *sk, int how)
 	if (tcp_is_reno(tp))
 		tcp_reset_reno_sack(tp);
 
-	if (!how) {
-		/* Push undo marker, if it was plain RTO and nothing
-		 * was retransmitted. */
-		tp->undo_marker = tp->snd_una;
-	} else {
+	tp->undo_marker = tp->snd_una;
+	if (how) {
 		tp->sacked_out = 0;
 		tp->fackets_out = 0;
 	}
-- 
cgit v1.2.2


From 087aa036eb79f24b856893190359ba812b460f45 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Mon, 25 Mar 2013 09:31:31 +0800
Subject: powerpc: make additional room in exception vector area

The FWNMI region is fixed at 0x7000 and the vector are now overflowing
that with allmodconfig. Fix that by moving slb_miss_realmode code out
of that region as it doesn't need to be that close to the call sites
(it is a _GLOBAL function)

Fixes this build error:

arch/powerpc/kernel/exceptions-64s.S: Assembler messages:
arch/powerpc/kernel/exceptions-64s.S:1304: Error: attempt to move .org backwards

Signed-off-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 144 +++++++++++++++++------------------
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 200afa5bcfb7..56bd92362ce1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1066,78 +1066,6 @@ unrecov_user_slb:
 #endif /* __DISABLED__ */
 
 
-/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r12 contain the saved SRR1, SRR0 is still ready for return
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(slb_miss_realmode)
-	mflr	r10
-#ifdef CONFIG_RELOCATABLE
-	mtctr	r11
-#endif
-
-	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
-	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
-
-	bl	.slb_allocate_realmode
-
-	/* All done -- return from exception. */
-
-	ld	r10,PACA_EXSLB+EX_LR(r13)
-	ld	r3,PACA_EXSLB+EX_R3(r13)
-	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
-
-	mtlr	r10
-
-	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
-	beq-	2f
-
-.machine	push
-.machine	"power4"
-	mtcrf	0x80,r9
-	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
-.machine	pop
-
-	RESTORE_PPR_PACA(PACA_EXSLB, r9)
-	ld	r9,PACA_EXSLB+EX_R9(r13)
-	ld	r10,PACA_EXSLB+EX_R10(r13)
-	ld	r11,PACA_EXSLB+EX_R11(r13)
-	ld	r12,PACA_EXSLB+EX_R12(r13)
-	ld	r13,PACA_EXSLB+EX_R13(r13)
-	rfid
-	b	.	/* prevent speculative execution */
-
-2:	mfspr	r11,SPRN_SRR0
-	ld	r10,PACAKBASE(r13)
-	LOAD_HANDLER(r10,unrecov_slb)
-	mtspr	SPRN_SRR0,r10
-	ld	r10,PACAKMSR(r13)
-	mtspr	SPRN_SRR1,r10
-	rfid
-	b	.
-
-unrecov_slb:
-	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
-	DISABLE_INTS
-	bl	.save_nvgprs
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.unrecoverable_exception
-	b	1b
-
-
-#ifdef CONFIG_PPC_970_NAP
-power4_fixup_nap:
-	andc	r9,r9,r10
-	std	r9,TI_LOCAL_FLAGS(r11)
-	ld	r10,_LINK(r1)		/* make idle task do the */
-	std	r10,_NIP(r1)		/* equivalent of a blr */
-	blr
-#endif
-
 	.align	7
 	.globl alignment_common
 alignment_common:
@@ -1335,6 +1263,78 @@ _GLOBAL(opal_mc_secondary_handler)
 #endif /* CONFIG_PPC_POWERNV */
 
 
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(slb_miss_realmode)
+	mflr	r10
+#ifdef CONFIG_RELOCATABLE
+	mtctr	r11
+#endif
+
+	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
+	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
+
+	bl	.slb_allocate_realmode
+
+	/* All done -- return from exception. */
+
+	ld	r10,PACA_EXSLB+EX_LR(r13)
+	ld	r3,PACA_EXSLB+EX_R3(r13)
+	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
+
+	mtlr	r10
+
+	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
+	beq-	2f
+
+.machine	push
+.machine	"power4"
+	mtcrf	0x80,r9
+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
+.machine	pop
+
+	RESTORE_PPR_PACA(PACA_EXSLB, r9)
+	ld	r9,PACA_EXSLB+EX_R9(r13)
+	ld	r10,PACA_EXSLB+EX_R10(r13)
+	ld	r11,PACA_EXSLB+EX_R11(r13)
+	ld	r12,PACA_EXSLB+EX_R12(r13)
+	ld	r13,PACA_EXSLB+EX_R13(r13)
+	rfid
+	b	.	/* prevent speculative execution */
+
+2:	mfspr	r11,SPRN_SRR0
+	ld	r10,PACAKBASE(r13)
+	LOAD_HANDLER(r10,unrecov_slb)
+	mtspr	SPRN_SRR0,r10
+	ld	r10,PACAKMSR(r13)
+	mtspr	SPRN_SRR1,r10
+	rfid
+	b	.
+
+unrecov_slb:
+	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+	DISABLE_INTS
+	bl	.save_nvgprs
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.unrecoverable_exception
+	b	1b
+
+
+#ifdef CONFIG_PPC_970_NAP
+power4_fixup_nap:
+	andc	r9,r9,r10
+	std	r9,TI_LOCAL_FLAGS(r11)
+	ld	r10,_LINK(r1)		/* make idle task do the */
+	std	r10,_NIP(r1)		/* equivalent of a blr */
+	blr
+#endif
+
 /*
  * Hash table stuff
  */
-- 
cgit v1.2.2


From b563b4e3f2dd601e19b46ada31bd176fc0a16efc Mon Sep 17 00:00:00 2001
From: Dirk Brandewie <dirk.brandewie@gmail.com>
Date: Fri, 22 Mar 2013 01:29:28 +0100
Subject: cpufreq / intel_pstate: Add function to check that all MSRs are valid

Some VMs seem to try to implement some MSRs but not all the registers
the driver needs.  Check to make sure all the MSR that we need are
available. If any of the required MSRs are not available refuse to
load.

References: https://bugzilla.redhat.com/show_bug.cgi?id=922923
Reported-by: Josh Stone <jistone@redhat.com>
Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f6dd1e761129..cd9c5f4f5805 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -752,6 +752,29 @@ static struct cpufreq_driver intel_pstate_driver = {
 
 static int __initdata no_load;
 
+static int intel_pstate_msrs_not_valid(void)
+{
+	/* Check that all the msr's we are using are valid. */
+	u64 aperf, mperf, tmp;
+
+	rdmsrl(MSR_IA32_APERF, aperf);
+	rdmsrl(MSR_IA32_MPERF, mperf);
+
+	if (!intel_pstate_min_pstate() ||
+		!intel_pstate_max_pstate() ||
+		!intel_pstate_turbo_pstate())
+		return -ENODEV;
+
+	rdmsrl(MSR_IA32_APERF, tmp);
+	if (!(tmp - aperf))
+		return -ENODEV;
+
+	rdmsrl(MSR_IA32_MPERF, tmp);
+	if (!(tmp - mperf))
+		return -ENODEV;
+
+	return 0;
+}
 static int __init intel_pstate_init(void)
 {
 	int cpu, rc = 0;
@@ -764,6 +787,9 @@ static int __init intel_pstate_init(void)
 	if (!id)
 		return -ENODEV;
 
+	if (intel_pstate_msrs_not_valid())
+		return -ENODEV;
+
 	pr_info("Intel P-state driver initializing.\n");
 
 	all_cpu_data = vmalloc(sizeof(void *) * num_possible_cpus());
-- 
cgit v1.2.2


From e6f3eb29be471c4b536a91daab76d4aeda72a261 Mon Sep 17 00:00:00 2001
From: Dirk Brandewie <dirk.brandewie@gmail.com>
Date: Sun, 24 Mar 2013 00:54:39 +0100
Subject: cpufreq / intel_pstate: Fix calculation of current frequency

Use the correct pstate value to calculate the effective frequency.

References: https://bugzilla.redhat.com/show_bug.cgi?id=923942
Reported-by: Satish Balay <balay@fastmail.fm>
Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cd9c5f4f5805..b662529072f7 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -454,7 +454,7 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu,
 					sample->idletime_us * 100,
 					sample->duration_us);
 	core_pct = div64_u64(sample->aperf * 100, sample->mperf);
-	sample->freq = cpu->pstate.turbo_pstate * core_pct * 1000;
+	sample->freq = cpu->pstate.max_pstate * core_pct * 1000;
 
 	sample->core_pct_busy = div_s64((sample->pstate_pct_busy * core_pct),
 					100);
-- 
cgit v1.2.2


From 05e99c8cf9d4e53ef6e016815db40a89a6156529 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 20 Mar 2013 14:21:10 +0000
Subject: intel-pstate: Use #defines instead of hard-coded values.

They are defined in coreboot (MSR_PLATFORM) and the other
one is already defined in msr-index.h.

Let's use those.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/uapi/asm/msr-index.h | 1 +
 drivers/cpufreq/intel_pstate.c        | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 892ce40a7470..7a060f4b411f 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -44,6 +44,7 @@
 #define SNB_C1_AUTO_UNDEMOTE		(1UL << 27)
 #define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
 
+#define MSR_PLATFORM_INFO		0x000000ce
 #define MSR_MTRRcap			0x000000fe
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b662529072f7..ad72922919ed 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -358,14 +358,14 @@ static void intel_pstate_sysfs_expose_params(void)
 static int intel_pstate_min_pstate(void)
 {
 	u64 value;
-	rdmsrl(0xCE, value);
+	rdmsrl(MSR_PLATFORM_INFO, value);
 	return (value >> 40) & 0xFF;
 }
 
 static int intel_pstate_max_pstate(void)
 {
 	u64 value;
-	rdmsrl(0xCE, value);
+	rdmsrl(MSR_PLATFORM_INFO, value);
 	return (value >> 8) & 0xFF;
 }
 
@@ -373,7 +373,7 @@ static int intel_pstate_turbo_pstate(void)
 {
 	u64 value;
 	int nont, ret;
-	rdmsrl(0x1AD, value);
+	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
 	nont = intel_pstate_max_pstate();
 	ret = ((value) & 255);
 	if (ret <= nont)
-- 
cgit v1.2.2


From 187da1d97f3a949b967274d7ee2f95d3a4f39251 Mon Sep 17 00:00:00 2001
From: viresh kumar <viresh.kumar@linaro.org>
Date: Fri, 22 Mar 2013 10:13:52 +0000
Subject: cpufreq: stats: do cpufreq_cpu_put() corresponding to
 cpufreq_cpu_get()

In cpufreq_stats_free_sysfs() we aren't balancing calls to
cpufreq_cpu_get() with cpufreq_cpu_put(). This will never let us have
ref count to policy->kobj as zero.

We will get a hang if somehow cpufreq_driver_unregister() is called.
And that can happen when we compile our driver as module and
insmod/rmmod it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_stats.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 2fd779eb1ed1..bfd6273fd873 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -180,15 +180,19 @@ static void cpufreq_stats_free_sysfs(unsigned int cpu)
 {
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 
-	if (!cpufreq_frequency_get_table(cpu))
+	if (!policy)
 		return;
 
-	if (policy && !policy_is_shared(policy)) {
+	if (!cpufreq_frequency_get_table(cpu))
+		goto put_ref;
+
+	if (!policy_is_shared(policy)) {
 		pr_debug("%s: Free sysfs stat\n", __func__);
 		sysfs_remove_group(&policy->kobj, &stats_attr_group);
 	}
-	if (policy)
-		cpufreq_cpu_put(policy);
+
+put_ref:
+	cpufreq_cpu_put(policy);
 }
 
 static int cpufreq_stats_create_table(struct cpufreq_policy *policy,
-- 
cgit v1.2.2


From aa77a52764a92216b61a6c8079b5c01937c046cd Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sun, 24 Mar 2013 15:58:12 +0000
Subject: cpufreq: acpi-cpufreq: Don't set policy->related_cpus from .init()

With the addition of following patch:

fcf8058 cpufreq: Simplify cpufreq_add_dev()

cpufreq driver's .init() routine must initialize policy->cpus with
mask of all possible CPUs (Online + Offline) that share the clock.
Then the core would copy this mask onto policy->related_cpus and will
reset policy->cpus to carry only online cpus.

acpi-cpufreq driver wasn't updated with this assumption and so
sometimes when we try to hot[un]plug CPUs at run time, sysfs
directories get corrupted.

This patch fixes acpi-cpufreq driver against this corruption.

Reported-and-tested-by: Maciej Rutecki <maciej.rutecki@gmail.com>
Tested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 937bc286591f..57a8774f0b4e 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -730,7 +730,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
 		cpumask_copy(policy->cpus, perf->shared_cpu_map);
 	}
-	cpumask_copy(policy->related_cpus, perf->shared_cpu_map);
 
 #ifdef CONFIG_SMP
 	dmi_check_system(sw_any_bug_dmi_table);
@@ -742,7 +741,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) {
 		cpumask_clear(policy->cpus);
 		cpumask_set_cpu(cpu, policy->cpus);
-		cpumask_copy(policy->related_cpus, cpu_sibling_mask(cpu));
 		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
 		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
 	}
-- 
cgit v1.2.2


From 1166fde6a923c30f4351515b6a9a1efc513e7d00 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 25 Mar 2013 11:23:40 -0400
Subject: SUNRPC: Add barriers to ensure read ordering in
 rpc_wake_up_task_queue_locked

We need to be careful when testing task->tk_waitqueue in
rpc_wake_up_task_queue_locked, because it can be changed while we
are holding the queue->lock.
By adding appropriate memory barriers, we can ensure that it is safe to
test task->tk_waitqueue for equality if the RPC_TASK_QUEUED bit is set.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org
---
 net/sunrpc/sched.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index fb20f25ddec9..f8529fc8e542 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
 		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
 	task->tk_waitqueue = queue;
 	queue->qlen++;
+	/* barrier matches the read in rpc_wake_up_task_queue_locked() */
+	smp_wmb();
 	rpc_set_queued(task);
 
 	dprintk("RPC: %5u added to queue %p \"%s\"\n",
@@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
  */
 static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-	if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue)
-		__rpc_do_wake_up_task(queue, task);
+	if (RPC_IS_QUEUED(task)) {
+		smp_rmb();
+		if (task->tk_waitqueue == queue)
+			__rpc_do_wake_up_task(queue, task);
+	}
 }
 
 /*
-- 
cgit v1.2.2


From ded34e0fe8fe8c2d595bfa30626654e4b87621e0 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Mon, 25 Mar 2013 03:18:33 +0000
Subject: unix: fix a race condition in unix_release()

As reported by Jan, and others over the past few years, there is a
race condition caused by unix_release setting the sock->sk pointer
to NULL before properly marking the socket as dead/orphaned.  This
can cause a problem with the LSM hook security_unix_may_send() if
there is another socket attempting to write to this partially
released socket in between when sock->sk is set to NULL and it is
marked as dead/orphaned.  This patch fixes this by only setting
sock->sk to NULL after the socket has been marked as dead; I also
take the opportunity to make unix_release_sock() a void function
as it only ever returned 0/success.

Dave, I think this one should go on the -stable pile.

Special thanks to Jan for coming up with a reproducer for this
problem.

Reported-by: Jan Stancek <jan.stancek@gmail.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 51be64f163ec..f153a8d6e339 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk)
 #endif
 }
 
-static int unix_release_sock(struct sock *sk, int embrion)
+static void unix_release_sock(struct sock *sk, int embrion)
 {
 	struct unix_sock *u = unix_sk(sk);
 	struct path path;
@@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion)
 
 	if (unix_tot_inflight)
 		unix_gc();		/* Garbage collect fds */
-
-	return 0;
 }
 
 static void init_peercred(struct sock *sk)
@@ -699,9 +697,10 @@ static int unix_release(struct socket *sock)
 	if (!sk)
 		return 0;
 
+	unix_release_sock(sk, 0);
 	sock->sk = NULL;
 
-	return unix_release_sock(sk, 0);
+	return 0;
 }
 
 static int unix_autobind(struct socket *sock)
-- 
cgit v1.2.2


From 09ce0c0c8a99651cace20958278476ee3f31678c Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Wed, 20 Mar 2013 09:30:00 +0800
Subject: usb: xhci: fix build warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

/home/b29397/work/code/git/linus/linux-2.6/drivers/usb/host/xhci-ring.c: In function ‘handle_port_status’:
/home/b29397/work/code/git/linus/linux-2.6/drivers/usb/host/xhci-ring.c:1580: warning: ‘hcd’ may be used uninitialized in this function

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/host/xhci-ring.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 882875465301..ec2681918682 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1599,14 +1599,20 @@ static void handle_port_status(struct xhci_hcd *xhci,
 	max_ports = HCS_MAX_PORTS(xhci->hcs_params1);
 	if ((port_id <= 0) || (port_id > max_ports)) {
 		xhci_warn(xhci, "Invalid port id %d\n", port_id);
-		bogus_port_status = true;
-		goto cleanup;
+		inc_deq(xhci, xhci->event_ring);
+		return;
 	}
 
 	/* Figure out which usb_hcd this port is attached to:
 	 * is it a USB 3.0 port or a USB 2.0/1.1 port?
 	 */
 	major_revision = xhci->port_array[port_id - 1];
+
+	/* Find the right roothub. */
+	hcd = xhci_to_hcd(xhci);
+	if ((major_revision == 0x03) != (hcd->speed == HCD_USB3))
+		hcd = xhci->shared_hcd;
+
 	if (major_revision == 0) {
 		xhci_warn(xhci, "Event for port %u not in "
 				"Extended Capabilities, ignoring.\n",
@@ -1629,10 +1635,6 @@ static void handle_port_status(struct xhci_hcd *xhci,
 	 * into the index into the ports on the correct split roothub, and the
 	 * correct bus_state structure.
 	 */
-	/* Find the right roothub. */
-	hcd = xhci_to_hcd(xhci);
-	if ((major_revision == 0x03) != (hcd->speed == HCD_USB3))
-		hcd = xhci->shared_hcd;
 	bus_state = &xhci->bus_state[hcd_index(hcd)];
 	if (hcd->speed == HCD_USB3)
 		port_array = xhci->usb3_ports;
-- 
cgit v1.2.2


From 3f5eb14135ba9d97ba4b8514fc7ef5e0dac2abf4 Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Tue, 19 Mar 2013 16:48:12 +0800
Subject: usb: add find_raw_port_number callback to struct hc_driver()

xhci driver divides the root hub into two logical hubs which work
respectively for usb 2.0 and usb 3.0 devices. They are independent
devices in the usb core. But in the ACPI table, it's one device node
and all usb2.0 and usb3.0 ports are under it. Binding usb port with
its acpi node needs the raw port number which is reflected in the xhci
extended capabilities table. This patch is to add find_raw_port_number
callback to struct hc_driver(), fill it with xhci_find_raw_port_number()
which will return raw port number and add a wrap usb_hcd_find_raw_port_number().

Otherwise, refactor xhci_find_real_port_number(). Using
xhci_find_raw_port_number() to get real index in the HW port status
registers instead of scanning through the xHCI roothub port array.
This can help to speed up.

All addresses in xhci->usb2_ports and xhci->usb3_ports array are
kown good ports and don't include following bad ports in the extended
capabilities talbe.
     (1) root port that doesn't have an entry
     (2) root port with unknown speed
     (3) root port that is listed twice and with different speeds.

So xhci_find_raw_port_number() will only return port num of good ones
and never touch bad ports above.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/core/hcd.c      |  8 ++++++++
 drivers/usb/host/xhci-mem.c | 36 ++++++++----------------------------
 drivers/usb/host/xhci-pci.c |  1 +
 drivers/usb/host/xhci.c     | 22 ++++++++++++++++++++++
 drivers/usb/host/xhci.h     |  1 +
 include/linux/usb/hcd.h     |  2 ++
 6 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 99b34a30354f..f9ec44cbb82f 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2412,6 +2412,14 @@ int usb_hcd_is_primary_hcd(struct usb_hcd *hcd)
 }
 EXPORT_SYMBOL_GPL(usb_hcd_is_primary_hcd);
 
+int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1)
+{
+	if (!hcd->driver->find_raw_port_number)
+		return port1;
+
+	return hcd->driver->find_raw_port_number(hcd, port1);
+}
+
 static int usb_hcd_request_irqs(struct usb_hcd *hcd,
 		unsigned int irqnum, unsigned long irqflags)
 {
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 35616ffbe3ae..6dc238c592bc 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1022,44 +1022,24 @@ void xhci_copy_ep0_dequeue_into_input_ctx(struct xhci_hcd *xhci,
  * is attached to (or the roothub port its ancestor hub is attached to).  All we
  * know is the index of that port under either the USB 2.0 or the USB 3.0
  * roothub, but that doesn't give us the real index into the HW port status
- * registers.  Scan through the xHCI roothub port array, looking for the Nth
- * entry of the correct port speed.  Return the port number of that entry.
+ * registers. Call xhci_find_raw_port_number() to get real index.
  */
 static u32 xhci_find_real_port_number(struct xhci_hcd *xhci,
 		struct usb_device *udev)
 {
 	struct usb_device *top_dev;
-	unsigned int num_similar_speed_ports;
-	unsigned int faked_port_num;
-	int i;
+	struct usb_hcd *hcd;
+
+	if (udev->speed == USB_SPEED_SUPER)
+		hcd = xhci->shared_hcd;
+	else
+		hcd = xhci->main_hcd;
 
 	for (top_dev = udev; top_dev->parent && top_dev->parent->parent;
 			top_dev = top_dev->parent)
 		/* Found device below root hub */;
-	faked_port_num = top_dev->portnum;
-	for (i = 0, num_similar_speed_ports = 0;
-			i < HCS_MAX_PORTS(xhci->hcs_params1); i++) {
-		u8 port_speed = xhci->port_array[i];
-
-		/*
-		 * Skip ports that don't have known speeds, or have duplicate
-		 * Extended Capabilities port speed entries.
-		 */
-		if (port_speed == 0 || port_speed == DUPLICATE_ENTRY)
-			continue;
 
-		/*
-		 * USB 3.0 ports are always under a USB 3.0 hub.  USB 2.0 and
-		 * 1.1 ports are under the USB 2.0 hub.  If the port speed
-		 * matches the device speed, it's a similar speed port.
-		 */
-		if ((port_speed == 0x03) == (udev->speed == USB_SPEED_SUPER))
-			num_similar_speed_ports++;
-		if (num_similar_speed_ports == faked_port_num)
-			/* Roothub ports are numbered from 1 to N */
-			return i+1;
-	}
-	return 0;
+	return	xhci_find_raw_port_number(hcd, top_dev->portnum);
 }
 
 /* Setup an xHCI virtual device for a Set Address command */
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index af259e0ec172..1a30c380043c 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -313,6 +313,7 @@ static const struct hc_driver xhci_pci_hc_driver = {
 	.set_usb2_hw_lpm =	xhci_set_usb2_hardware_lpm,
 	.enable_usb3_lpm_timeout =	xhci_enable_usb3_lpm_timeout,
 	.disable_usb3_lpm_timeout =	xhci_disable_usb3_lpm_timeout,
+	.find_raw_port_number =	xhci_find_raw_port_number,
 };
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 849470b18831..53b8f89a0b1c 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3779,6 +3779,28 @@ int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev)
 	return 0;
 }
 
+/*
+ * Transfer the port index into real index in the HW port status
+ * registers. Caculate offset between the port's PORTSC register
+ * and port status base. Divide the number of per port register
+ * to get the real index. The raw port number bases 1.
+ */
+int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1)
+{
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+	__le32 __iomem *base_addr = &xhci->op_regs->port_status_base;
+	__le32 __iomem *addr;
+	int raw_port;
+
+	if (hcd->speed != HCD_USB3)
+		addr = xhci->usb2_ports[port1 - 1];
+	else
+		addr = xhci->usb3_ports[port1 - 1];
+
+	raw_port = (addr - base_addr)/NUM_PORT_REGS + 1;
+	return raw_port;
+}
+
 #ifdef CONFIG_USB_SUSPEND
 
 /* BESL to HIRD Encoding array for USB2 LPM */
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 2c510e4a7d4c..d798b6931914 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1829,6 +1829,7 @@ void xhci_test_and_clear_bit(struct xhci_hcd *xhci, __le32 __iomem **port_array,
 int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex,
 		char *buf, u16 wLength);
 int xhci_hub_status_data(struct usb_hcd *hcd, char *buf);
+int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1);
 
 #ifdef CONFIG_PM
 int xhci_bus_suspend(struct usb_hcd *hcd);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 0a78df5f6cfd..59694b5e5e90 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -357,6 +357,7 @@ struct hc_driver {
 		 */
 	int	(*disable_usb3_lpm_timeout)(struct usb_hcd *,
 			struct usb_device *, enum usb3_link_state state);
+	int	(*find_raw_port_number)(struct usb_hcd *, int);
 };
 
 extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
@@ -396,6 +397,7 @@ extern int usb_hcd_is_primary_hcd(struct usb_hcd *hcd);
 extern int usb_add_hcd(struct usb_hcd *hcd,
 		unsigned int irqnum, unsigned long irqflags);
 extern void usb_remove_hcd(struct usb_hcd *hcd);
+extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1);
 
 struct platform_device;
 extern void usb_hcd_platform_shutdown(struct platform_device *dev);
-- 
cgit v1.2.2


From bafcaf6d84b5d1bf92dabd1ffe7753ed36b7552e Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Tue, 19 Mar 2013 16:48:13 +0800
Subject: usb/acpi: binding xhci root hub usb port with ACPI

This patch is to bind xhci root hub usb port with its acpi node.
The port num in the acpi table matches with the sequence in the xhci
extended capabilities table. So call usb_hcd_find_raw_port_number() to
transfer hub port num into raw port number which associates with
the sequence in the xhci extended capabilities table before binding.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/core/usb-acpi.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c
index b6f4bad3f756..255c14464bf2 100644
--- a/drivers/usb/core/usb-acpi.c
+++ b/drivers/usb/core/usb-acpi.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <linux/usb/hcd.h>
 #include <acpi/acpi_bus.h>
 
 #include "usb.h"
@@ -188,8 +189,13 @@ static int usb_acpi_find_device(struct device *dev, acpi_handle *handle)
 		 * connected to.
 		 */
 		if (!udev->parent) {
-			*handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev),
+			struct usb_hcd *hcd = bus_to_hcd(udev->bus);
+			int raw_port_num;
+
+			raw_port_num = usb_hcd_find_raw_port_number(hcd,
 				port_num);
+			*handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev),
+				raw_port_num);
 			if (!*handle)
 				return -ENODEV;
 		} else {
-- 
cgit v1.2.2


From 1c11a172cb30492f5f6a82c6e118fdcd9946c34f Mon Sep 17 00:00:00 2001
From: Vivek Gautam <gautam.vivek@samsung.com>
Date: Thu, 21 Mar 2013 12:06:48 +0530
Subject: usb: xhci: Fix TRB transfer length macro used for Event TRB.

Use proper macro while extracting TRB transfer length from
Transfer event TRBs. Adding a macro EVENT_TRB_LEN (bits 0:23)
for the same, and use it instead of TRB_LEN (bits 0:16) in
case of event TRBs.

This patch should be backported to kernels as old as 2.6.31, that
contain the commit b10de142119a676552df3f0d2e3a9d647036c26a "USB: xhci:
Bulk transfer support".  This patch will have issues applying to older
kernels.

Signed-off-by: Vivek gautam <gautam.vivek@samsung.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@vger.kernel.org
---
 drivers/usb/host/xhci-ring.c | 24 ++++++++++++------------
 drivers/usb/host/xhci.h      |  4 ++++
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index ec2681918682..9652dae95942 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2029,8 +2029,8 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
 		if (event_trb != ep_ring->dequeue &&
 				event_trb != td->last_trb)
 			td->urb->actual_length =
-				td->urb->transfer_buffer_length
-				- TRB_LEN(le32_to_cpu(event->transfer_len));
+				td->urb->transfer_buffer_length -
+				EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
 		else
 			td->urb->actual_length = 0;
 
@@ -2062,7 +2062,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
 		/* Maybe the event was for the data stage? */
 			td->urb->actual_length =
 				td->urb->transfer_buffer_length -
-				TRB_LEN(le32_to_cpu(event->transfer_len));
+				EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
 			xhci_dbg(xhci, "Waiting for status "
 					"stage event\n");
 			return 0;
@@ -2098,7 +2098,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
 	/* handle completion code */
 	switch (trb_comp_code) {
 	case COMP_SUCCESS:
-		if (TRB_LEN(le32_to_cpu(event->transfer_len)) == 0) {
+		if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) == 0) {
 			frame->status = 0;
 			break;
 		}
@@ -2143,7 +2143,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td,
 				len += TRB_LEN(le32_to_cpu(cur_trb->generic.field[2]));
 		}
 		len += TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
-			TRB_LEN(le32_to_cpu(event->transfer_len));
+			EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
 
 		if (trb_comp_code != COMP_STOP_INVAL) {
 			frame->actual_length = len;
@@ -2201,7 +2201,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
 	case COMP_SUCCESS:
 		/* Double check that the HW transferred everything. */
 		if (event_trb != td->last_trb ||
-				TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
+		    EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
 			xhci_warn(xhci, "WARN Successful completion "
 					"on short TX\n");
 			if (td->urb->transfer_flags & URB_SHORT_NOT_OK)
@@ -2229,18 +2229,18 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
 				"%d bytes untransferred\n",
 				td->urb->ep->desc.bEndpointAddress,
 				td->urb->transfer_buffer_length,
-				TRB_LEN(le32_to_cpu(event->transfer_len)));
+				EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)));
 	/* Fast path - was this the last TRB in the TD for this URB? */
 	if (event_trb == td->last_trb) {
-		if (TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
+		if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
 			td->urb->actual_length =
 				td->urb->transfer_buffer_length -
-				TRB_LEN(le32_to_cpu(event->transfer_len));
+				EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
 			if (td->urb->transfer_buffer_length <
 					td->urb->actual_length) {
 				xhci_warn(xhci, "HC gave bad length "
 						"of %d bytes left\n",
-					  TRB_LEN(le32_to_cpu(event->transfer_len)));
+					  EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)));
 				td->urb->actual_length = 0;
 				if (td->urb->transfer_flags & URB_SHORT_NOT_OK)
 					*status = -EREMOTEIO;
@@ -2282,7 +2282,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
 		if (trb_comp_code != COMP_STOP_INVAL)
 			td->urb->actual_length +=
 				TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
-				TRB_LEN(le32_to_cpu(event->transfer_len));
+				EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
 	}
 
 	return finish_td(xhci, td, event_trb, event, ep, status, false);
@@ -2370,7 +2370,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	 * transfer type
 	 */
 	case COMP_SUCCESS:
-		if (TRB_LEN(le32_to_cpu(event->transfer_len)) == 0)
+		if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) == 0)
 			break;
 		if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
 			trb_comp_code = COMP_SHORT_TX;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index d798b6931914..63582719e0fb 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -972,6 +972,10 @@ struct xhci_transfer_event {
 	__le32	flags;
 };
 
+/* Transfer event TRB length bit mask */
+/* bits 0:23 */
+#define	EVENT_TRB_LEN(p)		((p) & 0xffffff)
+
 /** Transfer Event bit fields **/
 #define	TRB_TO_EP_ID(p)	(((p) >> 16) & 0x1f)
 
-- 
cgit v1.2.2


From a83d6755814e4614ba77e15d82796af0f695c6b8 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 18 Mar 2013 10:19:51 -0700
Subject: xhci: Don't warn on empty ring for suspended devices.

When a device attached to the roothub is suspended, the endpoint rings
are stopped.  The host may generate a completion event with the
completion code set to 'Stopped' or 'Stopped Invalid' when the ring is
halted.  The current xHCI code prints a warning in that case, which can
be really annoying if the USB device is coming into and out of suspend.

Remove the unnecessary warning.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Tested-by: Stephen Hemminger <stephen@networkplumber.org>
---
 drivers/usb/host/xhci-ring.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 9652dae95942..1969c001b3f9 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2463,14 +2463,21 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		 * TD list.
 		 */
 		if (list_empty(&ep_ring->td_list)) {
-			xhci_warn(xhci, "WARN Event TRB for slot %d ep %d "
-					"with no TDs queued?\n",
-				  TRB_TO_SLOT_ID(le32_to_cpu(event->flags)),
-				  ep_index);
-			xhci_dbg(xhci, "Event TRB with TRB type ID %u\n",
-				 (le32_to_cpu(event->flags) &
-				  TRB_TYPE_BITMASK)>>10);
-			xhci_print_trb_offsets(xhci, (union xhci_trb *) event);
+			/*
+			 * A stopped endpoint may generate an extra completion
+			 * event if the device was suspended.  Don't print
+			 * warnings.
+			 */
+			if (!(trb_comp_code == COMP_STOP ||
+						trb_comp_code == COMP_STOP_INVAL)) {
+				xhci_warn(xhci, "WARN Event TRB for slot %d ep %d with no TDs queued?\n",
+						TRB_TO_SLOT_ID(le32_to_cpu(event->flags)),
+						ep_index);
+				xhci_dbg(xhci, "Event TRB with TRB type ID %u\n",
+						(le32_to_cpu(event->flags) &
+						 TRB_TYPE_BITMASK)>>10);
+				xhci_print_trb_offsets(xhci, (union xhci_trb *) event);
+			}
 			if (ep->skip) {
 				ep->skip = false;
 				xhci_dbg(xhci, "td_list is empty while skip "
-- 
cgit v1.2.2


From a79ca223e029aa4f09abb337accf1812c900a800 Mon Sep 17 00:00:00 2001
From: Hong Zhiguo <honkiko@gmail.com>
Date: Tue, 26 Mar 2013 01:52:45 +0800
Subject: ipv6: fix bad free of addrconf_init_net

Signed-off-by: Hong Zhiguo <honkiko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f2c7e615f902..26512250e095 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4784,26 +4784,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev)
 
 static int __net_init addrconf_init_net(struct net *net)
 {
-	int err;
+	int err = -ENOMEM;
 	struct ipv6_devconf *all, *dflt;
 
-	err = -ENOMEM;
-	all = &ipv6_devconf;
-	dflt = &ipv6_devconf_dflt;
+	all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL);
+	if (all == NULL)
+		goto err_alloc_all;
 
-	if (!net_eq(net, &init_net)) {
-		all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL);
-		if (all == NULL)
-			goto err_alloc_all;
+	dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
+	if (dflt == NULL)
+		goto err_alloc_dflt;
 
-		dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL);
-		if (dflt == NULL)
-			goto err_alloc_dflt;
-	} else {
-		/* these will be inherited by all namespaces */
-		dflt->autoconf = ipv6_defaults.autoconf;
-		dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
-	}
+	/* these will be inherited by all namespaces */
+	dflt->autoconf = ipv6_defaults.autoconf;
+	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
 
 	net->ipv6.devconf_all = all;
 	net->ipv6.devconf_dflt = dflt;
-- 
cgit v1.2.2


From d17cfb34dc5eb527b98448f3999aac52311d438b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 22 Mar 2013 21:47:51 +0000
Subject: ARM64: early_printk: Fix check for CONFIG_ARM64_64K_PAGES

The 'CONFIG_' prefix is not implicit in IS_ENABLED().

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 224b44ab534e..70b8cd4021c4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -261,7 +261,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
 {
 	unsigned long size, mask;
-	bool page64k = IS_ENABLED(ARM64_64K_PAGES);
+	bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-- 
cgit v1.2.2


From 532ee00c4459086840eb35cc9c198bf580420aeb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 25 Mar 2013 12:24:35 -0300
Subject: [media] fix compilation with both V4L2 and I2C as 'm'

When config options are:
	CONFIG_VIDEO_DEV=y
	CONFIG_VIDEO_V4L2=m
	CONFIG_I2C=m

Compilation breaks, as reported by:
	https://bugzilla.kernel.org/show_bug.cgi?id=55681

Before changeset 7b34be71db533f3e0cf93d53cf62d036cdb5418a,
no compilation errors occurred. However, the I2C code there at
v4l2-device was incorrectly disabled.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/v4l2-core/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
index a9d355230e8e..768aaf62d5dc 100644
--- a/drivers/media/v4l2-core/Makefile
+++ b/drivers/media/v4l2-core/Makefile
@@ -10,7 +10,7 @@ ifeq ($(CONFIG_COMPAT),y)
   videodev-objs += v4l2-compat-ioctl32.o
 endif
 
-obj-$(CONFIG_VIDEO_DEV) += videodev.o
+obj-$(CONFIG_VIDEO_V4L2) += videodev.o
 obj-$(CONFIG_VIDEO_V4L2_INT_DEVICE) += v4l2-int-device.o
 obj-$(CONFIG_VIDEO_V4L2) += v4l2-common.o
 
-- 
cgit v1.2.2


From e4317ce877a31dbb9d96375391c1c4ad2210d637 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Fri, 22 Mar 2013 15:16:29 +0000
Subject: staging: comedi: s626: fix continuous acquisition

For the s626 driver, there is a bug in the handling of asynchronous
commands on the AI subdevice when the stop source is `TRIG_NONE`.  The
command should run continuously until cancelled, but the interrupt
handler stops the command running after the first scan.

The command set-up function `s626_ai_cmd()` contains this code:

	switch (cmd->stop_src) {
	case TRIG_COUNT:
		/*  data arrives as one packet */
		devpriv->ai_sample_count = cmd->stop_arg;
		devpriv->ai_continous = 0;
		break;
	case TRIG_NONE:
		/*  continous acquisition */
		devpriv->ai_continous = 1;
		devpriv->ai_sample_count = 0;
		break;
	}

The interrupt handler `s626_irq_handler()` contains this code:

		if (!(devpriv->ai_continous))
			devpriv->ai_sample_count--;
		if (devpriv->ai_sample_count <= 0) {
			devpriv->ai_cmd_running = 0;
			/* ... */
		}

So `devpriv->ai_sample_count` is only decremented for the `TRIG_COUNT`
case, but `devpriv->ai_cmd_running` is set to 0 (and the command
stopped) regardless.

Fix this in `s626_ai_cmd()` by setting `devpriv->ai_sample_count = 1`
for the `TRIG_NONE` case.  The interrupt handler will not decrement it
so it will remain greater than 0 and the check for stopping the
acquisition will fail.

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/s626.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/comedi/drivers/s626.c b/drivers/staging/comedi/drivers/s626.c
index 81a1fe661579..71a73ec5af8d 100644
--- a/drivers/staging/comedi/drivers/s626.c
+++ b/drivers/staging/comedi/drivers/s626.c
@@ -1483,7 +1483,7 @@ static int s626_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
 	case TRIG_NONE:
 		/*  continous acquisition */
 		devpriv->ai_continous = 1;
-		devpriv->ai_sample_count = 0;
+		devpriv->ai_sample_count = 1;
 		break;
 	}
 
-- 
cgit v1.2.2


From 85ecd0322b9a1a9f451d9150e9460ab42fd17219 Mon Sep 17 00:00:00 2001
From: Soeren Moch <smoch@web.de>
Date: Fri, 22 Mar 2013 12:16:52 -0400
Subject: USB: EHCI: fix bug in iTD/siTD DMA pool allocation

[Description written by Alan Stern]

Soeren tracked down a very difficult bug in ehci-hcd's DMA pool
management of iTD and siTD structures.  Some background: ehci-hcd
gives each isochronous endpoint its own set of active and free itd's
(or sitd's for full-speed devices).  When a new itd is needed, it is
taken from the head of the free list, if possible.  However, itd's
must not be used twice in a single frame because the hardware
continues to access the data structure for the entire duration of a
frame.  Therefore if the itd at the head of the free list has its
"frame" member equal to the current value of ehci->now_frame, it
cannot be reused and instead a new itd is allocated from the DMA pool.
The entries on the free list are not released back to the pool until
the endpoint is no longer in use.

The bug arises from the fact that sometimes an itd can be moved back
onto the free list before itd->frame has been set properly.  In
Soeren's case, this happened because ehci-hcd can allocate one more
itd than it actually needs for an URB; the extra itd may or may not be
required depending on how the transfer aligns with a frame boundary.
For example, an URB with 8 isochronous packets will cause two itd's to
be allocated.  If the URB is scheduled to start in microframe 3 of
frame N then it will require both itds: one for microframes 3 - 7 of
frame N and one for microframes 0 - 2 of frame N+1.  But if the URB
had been scheduled to start in microframe 0 then it would require only
the first itd, which could cover microframes 0 - 7 of frame N.  The
second itd would be returned to the end of the free list.

The itd allocation routine initializes the entire structure to 0, so
the extra itd ends up on the free list with itd->frame set to 0
instead of a meaningful value.  After a while the itd reaches the head
of the list, and occasionally this happens when ehci->now_frame is
equal to 0.  Then, even though it would be okay to reuse this itd, the
driver thinks it must get another itd from the DMA pool.

For as long as the isochronous endpoint remains in use, this flaw in
the mechanism causes more and more itd's to be taken slowly from the
DMA pool.  Since none are released back, the pool eventually becomes
exhausted.

This reuslts in memory allocation failures, which typically show up
during a long-running audio stream.  Video might suffer the same
effect.

The fix is very simple.  To prevent allocations from the pool when
they aren't needed, make sure that itd's sent back to the free list
prematurely have itd->frame set to an invalid value which can never be
equal to ehci->now_frame.

This should be applied to -stable kernels going back to 3.6.

Signed-off-by: Soeren Moch <smoch@web.de>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-sched.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index b476daf49f6f..010f686d8881 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -1214,6 +1214,7 @@ itd_urb_transaction (
 
 		memset (itd, 0, sizeof *itd);
 		itd->itd_dma = itd_dma;
+		itd->frame = 9999;		/* an invalid value */
 		list_add (&itd->itd_list, &sched->td_list);
 	}
 	spin_unlock_irqrestore (&ehci->lock, flags);
@@ -1915,6 +1916,7 @@ sitd_urb_transaction (
 
 		memset (sitd, 0, sizeof *sitd);
 		sitd->sitd_dma = sitd_dma;
+		sitd->frame = 9999;		/* an invalid value */
 		list_add (&sitd->sitd_list, &iso_sched->td_list);
 	}
 
-- 
cgit v1.2.2


From f9294e989fa6f2990da155242db03cea1550cac8 Mon Sep 17 00:00:00 2001
From: Stuart Yoder <stuart.yoder@freescale.com>
Date: Fri, 22 Mar 2013 09:12:13 +0000
Subject: powerpc: define the conditions where the ePAPR idle hcall can be
 supported

For 32-bit, CONFIG_EPAPR_PARAVIRT pulls in both epapr_paravirt.c
and epapr_hcalls.c which contains the 32-bit paravirt idle loop.

For 64-bit, the paravirt idle loop is in idle_book3e.S and that
source file is included only if CONFIG_PPC_BOOK3E_64 defined.

This patch makes that dependency for 64-bit explicit.

Fixes these build errors:

arch/powerpc/kernel/built-in.o: In function `restore_pblist_ptr':
ftrace.c:(.toc+0xdc0): undefined reference to `epapr_ev_idle_start'
ftrace.c:(.toc+0xdd0): undefined reference to `epapr_ev_idle'

Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/epapr_paravirt.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index f3eab8594d9f..d44a571e45a7 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -23,8 +23,10 @@
 #include <asm/code-patching.h>
 #include <asm/machdep.h>
 
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
 extern void epapr_ev_idle(void);
 extern u32 epapr_ev_idle_start[];
+#endif
 
 bool epapr_paravirt_enabled;
 
@@ -47,11 +49,15 @@ static int __init epapr_paravirt_init(void)
 
 	for (i = 0; i < (len / 4); i++) {
 		patch_instruction(epapr_hypercall_start + i, insts[i]);
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
 		patch_instruction(epapr_ev_idle_start + i, insts[i]);
+#endif
 	}
 
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
 	if (of_get_property(hyper_node, "has-idle", NULL))
 		ppc_md.power_save = epapr_ev_idle;
+#endif
 
 	epapr_paravirt_enabled = true;
 
-- 
cgit v1.2.2


From 9196d8acd7f91758872108958dfded7684628444 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 19 Mar 2013 11:34:56 +0100
Subject: TTY: 8250, revert module name change

In 3.7 the 8250 module name was changed unintentionally from 8250 to
8250_core by commit 835d844d1a28efba81d5aca7385e24c29d3a6db2
(8250_pnp: do pnp probe before legacy probe). We then had to
re-introduce the old module options to ensure the old good
8250.nr_uart & co. still work. This can be done only by a very dirty
hack and we did it in f2b8dfd9e480c3db3bad0c25c590a5d11b31f4ef
(serial: 8250: Keep 8250.<xxxx> module options functional after driver
rename).

That is so damn ugly so that I decided to revert to the old module
name and deprecate the new 8250_core options present in 3.7 and 3.8
only. The deprecation will happen in the following patch.

Note that this patch changes the hack above to support "8250_core.*",
because we now have "8250.*" natively.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.c      | 3448 -----------------------------------
 drivers/tty/serial/8250/8250_core.c | 3448 +++++++++++++++++++++++++++++++++++
 drivers/tty/serial/8250/Makefile    |    8 +-
 3 files changed, 3452 insertions(+), 3452 deletions(-)
 delete mode 100644 drivers/tty/serial/8250/8250.c
 create mode 100644 drivers/tty/serial/8250/8250_core.c

diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c
deleted file mode 100644
index cf6a5383748a..000000000000
--- a/drivers/tty/serial/8250/8250.c
+++ /dev/null
@@ -1,3448 +0,0 @@
-/*
- *  Driver for 8250/16550-type serial ports
- *
- *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
- *
- *  Copyright (C) 2001 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * A note about mapbase / membase
- *
- *  mapbase is the physical address of the IO port.
- *  membase is an 'ioremapped' cookie.
- */
-
-#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
-#define SUPPORT_SYSRQ
-#endif
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/tty.h>
-#include <linux/ratelimit.h>
-#include <linux/tty_flip.h>
-#include <linux/serial_reg.h>
-#include <linux/serial_core.h>
-#include <linux/serial.h>
-#include <linux/serial_8250.h>
-#include <linux/nmi.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#ifdef CONFIG_SPARC
-#include <linux/sunserialcore.h>
-#endif
-
-#include <asm/io.h>
-#include <asm/irq.h>
-
-#include "8250.h"
-
-/*
- * Configuration:
- *   share_irqs - whether we pass IRQF_SHARED to request_irq().  This option
- *                is unsafe when used on edge-triggered interrupts.
- */
-static unsigned int share_irqs = SERIAL8250_SHARE_IRQS;
-
-static unsigned int nr_uarts = CONFIG_SERIAL_8250_RUNTIME_UARTS;
-
-static struct uart_driver serial8250_reg;
-
-static int serial_index(struct uart_port *port)
-{
-	return (serial8250_reg.minor - 64) + port->line;
-}
-
-static unsigned int skip_txen_test; /* force skip of txen test at init time */
-
-/*
- * Debugging.
- */
-#if 0
-#define DEBUG_AUTOCONF(fmt...)	printk(fmt)
-#else
-#define DEBUG_AUTOCONF(fmt...)	do { } while (0)
-#endif
-
-#if 0
-#define DEBUG_INTR(fmt...)	printk(fmt)
-#else
-#define DEBUG_INTR(fmt...)	do { } while (0)
-#endif
-
-#define PASS_LIMIT	512
-
-#define BOTH_EMPTY 	(UART_LSR_TEMT | UART_LSR_THRE)
-
-
-#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
-#define CONFIG_SERIAL_DETECT_IRQ 1
-#endif
-#ifdef CONFIG_SERIAL_8250_MANY_PORTS
-#define CONFIG_SERIAL_MANY_PORTS 1
-#endif
-
-/*
- * HUB6 is always on.  This will be removed once the header
- * files have been cleaned.
- */
-#define CONFIG_HUB6 1
-
-#include <asm/serial.h>
-/*
- * SERIAL_PORT_DFNS tells us about built-in ports that have no
- * standard enumeration mechanism.   Platforms that can find all
- * serial ports via mechanisms like ACPI or PCI need not supply it.
- */
-#ifndef SERIAL_PORT_DFNS
-#define SERIAL_PORT_DFNS
-#endif
-
-static const struct old_serial_port old_serial_port[] = {
-	SERIAL_PORT_DFNS /* defined in asm/serial.h */
-};
-
-#define UART_NR	CONFIG_SERIAL_8250_NR_UARTS
-
-#ifdef CONFIG_SERIAL_8250_RSA
-
-#define PORT_RSA_MAX 4
-static unsigned long probe_rsa[PORT_RSA_MAX];
-static unsigned int probe_rsa_count;
-#endif /* CONFIG_SERIAL_8250_RSA  */
-
-struct irq_info {
-	struct			hlist_node node;
-	int			irq;
-	spinlock_t		lock;	/* Protects list not the hash */
-	struct list_head	*head;
-};
-
-#define NR_IRQ_HASH		32	/* Can be adjusted later */
-static struct hlist_head irq_lists[NR_IRQ_HASH];
-static DEFINE_MUTEX(hash_mutex);	/* Used to walk the hash */
-
-/*
- * Here we define the default xmit fifo size used for each type of UART.
- */
-static const struct serial8250_config uart_config[] = {
-	[PORT_UNKNOWN] = {
-		.name		= "unknown",
-		.fifo_size	= 1,
-		.tx_loadsz	= 1,
-	},
-	[PORT_8250] = {
-		.name		= "8250",
-		.fifo_size	= 1,
-		.tx_loadsz	= 1,
-	},
-	[PORT_16450] = {
-		.name		= "16450",
-		.fifo_size	= 1,
-		.tx_loadsz	= 1,
-	},
-	[PORT_16550] = {
-		.name		= "16550",
-		.fifo_size	= 1,
-		.tx_loadsz	= 1,
-	},
-	[PORT_16550A] = {
-		.name		= "16550A",
-		.fifo_size	= 16,
-		.tx_loadsz	= 16,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO,
-	},
-	[PORT_CIRRUS] = {
-		.name		= "Cirrus",
-		.fifo_size	= 1,
-		.tx_loadsz	= 1,
-	},
-	[PORT_16650] = {
-		.name		= "ST16650",
-		.fifo_size	= 1,
-		.tx_loadsz	= 1,
-		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
-	},
-	[PORT_16650V2] = {
-		.name		= "ST16650V2",
-		.fifo_size	= 32,
-		.tx_loadsz	= 16,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
-				  UART_FCR_T_TRIG_00,
-		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
-	},
-	[PORT_16750] = {
-		.name		= "TI16750",
-		.fifo_size	= 64,
-		.tx_loadsz	= 64,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
-				  UART_FCR7_64BYTE,
-		.flags		= UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE,
-	},
-	[PORT_STARTECH] = {
-		.name		= "Startech",
-		.fifo_size	= 1,
-		.tx_loadsz	= 1,
-	},
-	[PORT_16C950] = {
-		.name		= "16C950/954",
-		.fifo_size	= 128,
-		.tx_loadsz	= 128,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		/* UART_CAP_EFR breaks billionon CF bluetooth card. */
-		.flags		= UART_CAP_FIFO | UART_CAP_SLEEP,
-	},
-	[PORT_16654] = {
-		.name		= "ST16654",
-		.fifo_size	= 64,
-		.tx_loadsz	= 32,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
-				  UART_FCR_T_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
-	},
-	[PORT_16850] = {
-		.name		= "XR16850",
-		.fifo_size	= 128,
-		.tx_loadsz	= 128,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
-	},
-	[PORT_RSA] = {
-		.name		= "RSA",
-		.fifo_size	= 2048,
-		.tx_loadsz	= 2048,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11,
-		.flags		= UART_CAP_FIFO,
-	},
-	[PORT_NS16550A] = {
-		.name		= "NS16550A",
-		.fifo_size	= 16,
-		.tx_loadsz	= 16,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_NATSEMI,
-	},
-	[PORT_XSCALE] = {
-		.name		= "XScale",
-		.fifo_size	= 32,
-		.tx_loadsz	= 32,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE,
-	},
-	[PORT_OCTEON] = {
-		.name		= "OCTEON",
-		.fifo_size	= 64,
-		.tx_loadsz	= 64,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO,
-	},
-	[PORT_AR7] = {
-		.name		= "AR7",
-		.fifo_size	= 16,
-		.tx_loadsz	= 16,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
-		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
-	},
-	[PORT_U6_16550A] = {
-		.name		= "U6_16550A",
-		.fifo_size	= 64,
-		.tx_loadsz	= 64,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
-	},
-	[PORT_TEGRA] = {
-		.name		= "Tegra",
-		.fifo_size	= 32,
-		.tx_loadsz	= 8,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
-				  UART_FCR_T_TRIG_01,
-		.flags		= UART_CAP_FIFO | UART_CAP_RTOIE,
-	},
-	[PORT_XR17D15X] = {
-		.name		= "XR17D15X",
-		.fifo_size	= 64,
-		.tx_loadsz	= 64,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR |
-				  UART_CAP_SLEEP,
-	},
-	[PORT_XR17V35X] = {
-		.name		= "XR17V35X",
-		.fifo_size	= 256,
-		.tx_loadsz	= 256,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 |
-				  UART_FCR_T_TRIG_11,
-		.flags		= UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR |
-				  UART_CAP_SLEEP,
-	},
-	[PORT_LPC3220] = {
-		.name		= "LPC3220",
-		.fifo_size	= 64,
-		.tx_loadsz	= 32,
-		.fcr		= UART_FCR_DMA_SELECT | UART_FCR_ENABLE_FIFO |
-				  UART_FCR_R_TRIG_00 | UART_FCR_T_TRIG_00,
-		.flags		= UART_CAP_FIFO,
-	},
-	[PORT_BRCM_TRUMANAGE] = {
-		.name		= "TruManage",
-		.fifo_size	= 1,
-		.tx_loadsz	= 1024,
-		.flags		= UART_CAP_HFIFO,
-	},
-	[PORT_8250_CIR] = {
-		.name		= "CIR port"
-	},
-	[PORT_ALTR_16550_F32] = {
-		.name		= "Altera 16550 FIFO32",
-		.fifo_size	= 32,
-		.tx_loadsz	= 32,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
-	},
-	[PORT_ALTR_16550_F64] = {
-		.name		= "Altera 16550 FIFO64",
-		.fifo_size	= 64,
-		.tx_loadsz	= 64,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
-	},
-	[PORT_ALTR_16550_F128] = {
-		.name		= "Altera 16550 FIFO128",
-		.fifo_size	= 128,
-		.tx_loadsz	= 128,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
-	},
-};
-
-/* Uart divisor latch read */
-static int default_serial_dl_read(struct uart_8250_port *up)
-{
-	return serial_in(up, UART_DLL) | serial_in(up, UART_DLM) << 8;
-}
-
-/* Uart divisor latch write */
-static void default_serial_dl_write(struct uart_8250_port *up, int value)
-{
-	serial_out(up, UART_DLL, value & 0xff);
-	serial_out(up, UART_DLM, value >> 8 & 0xff);
-}
-
-#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X)
-
-/* Au1x00/RT288x UART hardware has a weird register layout */
-static const u8 au_io_in_map[] = {
-	[UART_RX]  = 0,
-	[UART_IER] = 2,
-	[UART_IIR] = 3,
-	[UART_LCR] = 5,
-	[UART_MCR] = 6,
-	[UART_LSR] = 7,
-	[UART_MSR] = 8,
-};
-
-static const u8 au_io_out_map[] = {
-	[UART_TX]  = 1,
-	[UART_IER] = 2,
-	[UART_FCR] = 4,
-	[UART_LCR] = 5,
-	[UART_MCR] = 6,
-};
-
-static unsigned int au_serial_in(struct uart_port *p, int offset)
-{
-	offset = au_io_in_map[offset] << p->regshift;
-	return __raw_readl(p->membase + offset);
-}
-
-static void au_serial_out(struct uart_port *p, int offset, int value)
-{
-	offset = au_io_out_map[offset] << p->regshift;
-	__raw_writel(value, p->membase + offset);
-}
-
-/* Au1x00 haven't got a standard divisor latch */
-static int au_serial_dl_read(struct uart_8250_port *up)
-{
-	return __raw_readl(up->port.membase + 0x28);
-}
-
-static void au_serial_dl_write(struct uart_8250_port *up, int value)
-{
-	__raw_writel(value, up->port.membase + 0x28);
-}
-
-#endif
-
-static unsigned int hub6_serial_in(struct uart_port *p, int offset)
-{
-	offset = offset << p->regshift;
-	outb(p->hub6 - 1 + offset, p->iobase);
-	return inb(p->iobase + 1);
-}
-
-static void hub6_serial_out(struct uart_port *p, int offset, int value)
-{
-	offset = offset << p->regshift;
-	outb(p->hub6 - 1 + offset, p->iobase);
-	outb(value, p->iobase + 1);
-}
-
-static unsigned int mem_serial_in(struct uart_port *p, int offset)
-{
-	offset = offset << p->regshift;
-	return readb(p->membase + offset);
-}
-
-static void mem_serial_out(struct uart_port *p, int offset, int value)
-{
-	offset = offset << p->regshift;
-	writeb(value, p->membase + offset);
-}
-
-static void mem32_serial_out(struct uart_port *p, int offset, int value)
-{
-	offset = offset << p->regshift;
-	writel(value, p->membase + offset);
-}
-
-static unsigned int mem32_serial_in(struct uart_port *p, int offset)
-{
-	offset = offset << p->regshift;
-	return readl(p->membase + offset);
-}
-
-static unsigned int io_serial_in(struct uart_port *p, int offset)
-{
-	offset = offset << p->regshift;
-	return inb(p->iobase + offset);
-}
-
-static void io_serial_out(struct uart_port *p, int offset, int value)
-{
-	offset = offset << p->regshift;
-	outb(value, p->iobase + offset);
-}
-
-static int serial8250_default_handle_irq(struct uart_port *port);
-static int exar_handle_irq(struct uart_port *port);
-
-static void set_io_from_upio(struct uart_port *p)
-{
-	struct uart_8250_port *up =
-		container_of(p, struct uart_8250_port, port);
-
-	up->dl_read = default_serial_dl_read;
-	up->dl_write = default_serial_dl_write;
-
-	switch (p->iotype) {
-	case UPIO_HUB6:
-		p->serial_in = hub6_serial_in;
-		p->serial_out = hub6_serial_out;
-		break;
-
-	case UPIO_MEM:
-		p->serial_in = mem_serial_in;
-		p->serial_out = mem_serial_out;
-		break;
-
-	case UPIO_MEM32:
-		p->serial_in = mem32_serial_in;
-		p->serial_out = mem32_serial_out;
-		break;
-
-#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X)
-	case UPIO_AU:
-		p->serial_in = au_serial_in;
-		p->serial_out = au_serial_out;
-		up->dl_read = au_serial_dl_read;
-		up->dl_write = au_serial_dl_write;
-		break;
-#endif
-
-	default:
-		p->serial_in = io_serial_in;
-		p->serial_out = io_serial_out;
-		break;
-	}
-	/* Remember loaded iotype */
-	up->cur_iotype = p->iotype;
-	p->handle_irq = serial8250_default_handle_irq;
-}
-
-static void
-serial_port_out_sync(struct uart_port *p, int offset, int value)
-{
-	switch (p->iotype) {
-	case UPIO_MEM:
-	case UPIO_MEM32:
-	case UPIO_AU:
-		p->serial_out(p, offset, value);
-		p->serial_in(p, UART_LCR);	/* safe, no side-effects */
-		break;
-	default:
-		p->serial_out(p, offset, value);
-	}
-}
-
-/*
- * For the 16C950
- */
-static void serial_icr_write(struct uart_8250_port *up, int offset, int value)
-{
-	serial_out(up, UART_SCR, offset);
-	serial_out(up, UART_ICR, value);
-}
-
-static unsigned int serial_icr_read(struct uart_8250_port *up, int offset)
-{
-	unsigned int value;
-
-	serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD);
-	serial_out(up, UART_SCR, offset);
-	value = serial_in(up, UART_ICR);
-	serial_icr_write(up, UART_ACR, up->acr);
-
-	return value;
-}
-
-/*
- * FIFO support.
- */
-static void serial8250_clear_fifos(struct uart_8250_port *p)
-{
-	if (p->capabilities & UART_CAP_FIFO) {
-		serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO);
-		serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO |
-			       UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
-		serial_out(p, UART_FCR, 0);
-	}
-}
-
-void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p)
-{
-	unsigned char fcr;
-
-	serial8250_clear_fifos(p);
-	fcr = uart_config[p->port.type].fcr;
-	serial_out(p, UART_FCR, fcr);
-}
-EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos);
-
-/*
- * IER sleep support.  UARTs which have EFRs need the "extended
- * capability" bit enabled.  Note that on XR16C850s, we need to
- * reset LCR to write to IER.
- */
-static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
-{
-	/*
-	 * Exar UARTs have a SLEEP register that enables or disables
-	 * each UART to enter sleep mode separately.  On the XR17V35x the
-	 * register is accessible to each UART at the UART_EXAR_SLEEP
-	 * offset but the UART channel may only write to the corresponding
-	 * bit.
-	 */
-	if ((p->port.type == PORT_XR17V35X) ||
-	   (p->port.type == PORT_XR17D15X)) {
-		serial_out(p, UART_EXAR_SLEEP, 0xff);
-		return;
-	}
-
-	if (p->capabilities & UART_CAP_SLEEP) {
-		if (p->capabilities & UART_CAP_EFR) {
-			serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
-			serial_out(p, UART_EFR, UART_EFR_ECB);
-			serial_out(p, UART_LCR, 0);
-		}
-		serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
-		if (p->capabilities & UART_CAP_EFR) {
-			serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
-			serial_out(p, UART_EFR, 0);
-			serial_out(p, UART_LCR, 0);
-		}
-	}
-}
-
-#ifdef CONFIG_SERIAL_8250_RSA
-/*
- * Attempts to turn on the RSA FIFO.  Returns zero on failure.
- * We set the port uart clock rate if we succeed.
- */
-static int __enable_rsa(struct uart_8250_port *up)
-{
-	unsigned char mode;
-	int result;
-
-	mode = serial_in(up, UART_RSA_MSR);
-	result = mode & UART_RSA_MSR_FIFO;
-
-	if (!result) {
-		serial_out(up, UART_RSA_MSR, mode | UART_RSA_MSR_FIFO);
-		mode = serial_in(up, UART_RSA_MSR);
-		result = mode & UART_RSA_MSR_FIFO;
-	}
-
-	if (result)
-		up->port.uartclk = SERIAL_RSA_BAUD_BASE * 16;
-
-	return result;
-}
-
-static void enable_rsa(struct uart_8250_port *up)
-{
-	if (up->port.type == PORT_RSA) {
-		if (up->port.uartclk != SERIAL_RSA_BAUD_BASE * 16) {
-			spin_lock_irq(&up->port.lock);
-			__enable_rsa(up);
-			spin_unlock_irq(&up->port.lock);
-		}
-		if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16)
-			serial_out(up, UART_RSA_FRR, 0);
-	}
-}
-
-/*
- * Attempts to turn off the RSA FIFO.  Returns zero on failure.
- * It is unknown why interrupts were disabled in here.  However,
- * the caller is expected to preserve this behaviour by grabbing
- * the spinlock before calling this function.
- */
-static void disable_rsa(struct uart_8250_port *up)
-{
-	unsigned char mode;
-	int result;
-
-	if (up->port.type == PORT_RSA &&
-	    up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) {
-		spin_lock_irq(&up->port.lock);
-
-		mode = serial_in(up, UART_RSA_MSR);
-		result = !(mode & UART_RSA_MSR_FIFO);
-
-		if (!result) {
-			serial_out(up, UART_RSA_MSR, mode & ~UART_RSA_MSR_FIFO);
-			mode = serial_in(up, UART_RSA_MSR);
-			result = !(mode & UART_RSA_MSR_FIFO);
-		}
-
-		if (result)
-			up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16;
-		spin_unlock_irq(&up->port.lock);
-	}
-}
-#endif /* CONFIG_SERIAL_8250_RSA */
-
-/*
- * This is a quickie test to see how big the FIFO is.
- * It doesn't work at all the time, more's the pity.
- */
-static int size_fifo(struct uart_8250_port *up)
-{
-	unsigned char old_fcr, old_mcr, old_lcr;
-	unsigned short old_dl;
-	int count;
-
-	old_lcr = serial_in(up, UART_LCR);
-	serial_out(up, UART_LCR, 0);
-	old_fcr = serial_in(up, UART_FCR);
-	old_mcr = serial_in(up, UART_MCR);
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
-		    UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
-	serial_out(up, UART_MCR, UART_MCR_LOOP);
-	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
-	old_dl = serial_dl_read(up);
-	serial_dl_write(up, 0x0001);
-	serial_out(up, UART_LCR, 0x03);
-	for (count = 0; count < 256; count++)
-		serial_out(up, UART_TX, count);
-	mdelay(20);/* FIXME - schedule_timeout */
-	for (count = 0; (serial_in(up, UART_LSR) & UART_LSR_DR) &&
-	     (count < 256); count++)
-		serial_in(up, UART_RX);
-	serial_out(up, UART_FCR, old_fcr);
-	serial_out(up, UART_MCR, old_mcr);
-	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
-	serial_dl_write(up, old_dl);
-	serial_out(up, UART_LCR, old_lcr);
-
-	return count;
-}
-
-/*
- * Read UART ID using the divisor method - set DLL and DLM to zero
- * and the revision will be in DLL and device type in DLM.  We
- * preserve the device state across this.
- */
-static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p)
-{
-	unsigned char old_dll, old_dlm, old_lcr;
-	unsigned int id;
-
-	old_lcr = serial_in(p, UART_LCR);
-	serial_out(p, UART_LCR, UART_LCR_CONF_MODE_A);
-
-	old_dll = serial_in(p, UART_DLL);
-	old_dlm = serial_in(p, UART_DLM);
-
-	serial_out(p, UART_DLL, 0);
-	serial_out(p, UART_DLM, 0);
-
-	id = serial_in(p, UART_DLL) | serial_in(p, UART_DLM) << 8;
-
-	serial_out(p, UART_DLL, old_dll);
-	serial_out(p, UART_DLM, old_dlm);
-	serial_out(p, UART_LCR, old_lcr);
-
-	return id;
-}
-
-/*
- * This is a helper routine to autodetect StarTech/Exar/Oxsemi UART's.
- * When this function is called we know it is at least a StarTech
- * 16650 V2, but it might be one of several StarTech UARTs, or one of
- * its clones.  (We treat the broken original StarTech 16650 V1 as a
- * 16550, and why not?  Startech doesn't seem to even acknowledge its
- * existence.)
- *
- * What evil have men's minds wrought...
- */
-static void autoconfig_has_efr(struct uart_8250_port *up)
-{
-	unsigned int id1, id2, id3, rev;
-
-	/*
-	 * Everything with an EFR has SLEEP
-	 */
-	up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP;
-
-	/*
-	 * First we check to see if it's an Oxford Semiconductor UART.
-	 *
-	 * If we have to do this here because some non-National
-	 * Semiconductor clone chips lock up if you try writing to the
-	 * LSR register (which serial_icr_read does)
-	 */
-
-	/*
-	 * Check for Oxford Semiconductor 16C950.
-	 *
-	 * EFR [4] must be set else this test fails.
-	 *
-	 * This shouldn't be necessary, but Mike Hudson (Exoray@isys.ca)
-	 * claims that it's needed for 952 dual UART's (which are not
-	 * recommended for new designs).
-	 */
-	up->acr = 0;
-	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-	serial_out(up, UART_EFR, UART_EFR_ECB);
-	serial_out(up, UART_LCR, 0x00);
-	id1 = serial_icr_read(up, UART_ID1);
-	id2 = serial_icr_read(up, UART_ID2);
-	id3 = serial_icr_read(up, UART_ID3);
-	rev = serial_icr_read(up, UART_REV);
-
-	DEBUG_AUTOCONF("950id=%02x:%02x:%02x:%02x ", id1, id2, id3, rev);
-
-	if (id1 == 0x16 && id2 == 0xC9 &&
-	    (id3 == 0x50 || id3 == 0x52 || id3 == 0x54)) {
-		up->port.type = PORT_16C950;
-
-		/*
-		 * Enable work around for the Oxford Semiconductor 952 rev B
-		 * chip which causes it to seriously miscalculate baud rates
-		 * when DLL is 0.
-		 */
-		if (id3 == 0x52 && rev == 0x01)
-			up->bugs |= UART_BUG_QUOT;
-		return;
-	}
-
-	/*
-	 * We check for a XR16C850 by setting DLL and DLM to 0, and then
-	 * reading back DLL and DLM.  The chip type depends on the DLM
-	 * value read back:
-	 *  0x10 - XR16C850 and the DLL contains the chip revision.
-	 *  0x12 - XR16C2850.
-	 *  0x14 - XR16C854.
-	 */
-	id1 = autoconfig_read_divisor_id(up);
-	DEBUG_AUTOCONF("850id=%04x ", id1);
-
-	id2 = id1 >> 8;
-	if (id2 == 0x10 || id2 == 0x12 || id2 == 0x14) {
-		up->port.type = PORT_16850;
-		return;
-	}
-
-	/*
-	 * It wasn't an XR16C850.
-	 *
-	 * We distinguish between the '654 and the '650 by counting
-	 * how many bytes are in the FIFO.  I'm using this for now,
-	 * since that's the technique that was sent to me in the
-	 * serial driver update, but I'm not convinced this works.
-	 * I've had problems doing this in the past.  -TYT
-	 */
-	if (size_fifo(up) == 64)
-		up->port.type = PORT_16654;
-	else
-		up->port.type = PORT_16650V2;
-}
-
-/*
- * We detected a chip without a FIFO.  Only two fall into
- * this category - the original 8250 and the 16450.  The
- * 16450 has a scratch register (accessible with LCR=0)
- */
-static void autoconfig_8250(struct uart_8250_port *up)
-{
-	unsigned char scratch, status1, status2;
-
-	up->port.type = PORT_8250;
-
-	scratch = serial_in(up, UART_SCR);
-	serial_out(up, UART_SCR, 0xa5);
-	status1 = serial_in(up, UART_SCR);
-	serial_out(up, UART_SCR, 0x5a);
-	status2 = serial_in(up, UART_SCR);
-	serial_out(up, UART_SCR, scratch);
-
-	if (status1 == 0xa5 && status2 == 0x5a)
-		up->port.type = PORT_16450;
-}
-
-static int broken_efr(struct uart_8250_port *up)
-{
-	/*
-	 * Exar ST16C2550 "A2" devices incorrectly detect as
-	 * having an EFR, and report an ID of 0x0201.  See
-	 * http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-11/4812.html 
-	 */
-	if (autoconfig_read_divisor_id(up) == 0x0201 && size_fifo(up) == 16)
-		return 1;
-
-	return 0;
-}
-
-static inline int ns16550a_goto_highspeed(struct uart_8250_port *up)
-{
-	unsigned char status;
-
-	status = serial_in(up, 0x04); /* EXCR2 */
-#define PRESL(x) ((x) & 0x30)
-	if (PRESL(status) == 0x10) {
-		/* already in high speed mode */
-		return 0;
-	} else {
-		status &= ~0xB0; /* Disable LOCK, mask out PRESL[01] */
-		status |= 0x10;  /* 1.625 divisor for baud_base --> 921600 */
-		serial_out(up, 0x04, status);
-	}
-	return 1;
-}
-
-/*
- * We know that the chip has FIFOs.  Does it have an EFR?  The
- * EFR is located in the same register position as the IIR and
- * we know the top two bits of the IIR are currently set.  The
- * EFR should contain zero.  Try to read the EFR.
- */
-static void autoconfig_16550a(struct uart_8250_port *up)
-{
-	unsigned char status1, status2;
-	unsigned int iersave;
-
-	up->port.type = PORT_16550A;
-	up->capabilities |= UART_CAP_FIFO;
-
-	/*
-	 * XR17V35x UARTs have an extra divisor register, DLD
-	 * that gets enabled with when DLAB is set which will
-	 * cause the device to incorrectly match and assign
-	 * port type to PORT_16650.  The EFR for this UART is
-	 * found at offset 0x09. Instead check the Deice ID (DVID)
-	 * register for a 2, 4 or 8 port UART.
-	 */
-	if (up->port.flags & UPF_EXAR_EFR) {
-		status1 = serial_in(up, UART_EXAR_DVID);
-		if (status1 == 0x82 || status1 == 0x84 || status1 == 0x88) {
-			DEBUG_AUTOCONF("Exar XR17V35x ");
-			up->port.type = PORT_XR17V35X;
-			up->capabilities |= UART_CAP_AFE | UART_CAP_EFR |
-						UART_CAP_SLEEP;
-
-			return;
-		}
-
-	}
-
-	/*
-	 * Check for presence of the EFR when DLAB is set.
-	 * Only ST16C650V1 UARTs pass this test.
-	 */
-	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
-	if (serial_in(up, UART_EFR) == 0) {
-		serial_out(up, UART_EFR, 0xA8);
-		if (serial_in(up, UART_EFR) != 0) {
-			DEBUG_AUTOCONF("EFRv1 ");
-			up->port.type = PORT_16650;
-			up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP;
-		} else {
-			DEBUG_AUTOCONF("Motorola 8xxx DUART ");
-		}
-		serial_out(up, UART_EFR, 0);
-		return;
-	}
-
-	/*
-	 * Maybe it requires 0xbf to be written to the LCR.
-	 * (other ST16C650V2 UARTs, TI16C752A, etc)
-	 */
-	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-	if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) {
-		DEBUG_AUTOCONF("EFRv2 ");
-		autoconfig_has_efr(up);
-		return;
-	}
-
-	/*
-	 * Check for a National Semiconductor SuperIO chip.
-	 * Attempt to switch to bank 2, read the value of the LOOP bit
-	 * from EXCR1. Switch back to bank 0, change it in MCR. Then
-	 * switch back to bank 2, read it from EXCR1 again and check
-	 * it's changed. If so, set baud_base in EXCR2 to 921600. -- dwmw2
-	 */
-	serial_out(up, UART_LCR, 0);
-	status1 = serial_in(up, UART_MCR);
-	serial_out(up, UART_LCR, 0xE0);
-	status2 = serial_in(up, 0x02); /* EXCR1 */
-
-	if (!((status2 ^ status1) & UART_MCR_LOOP)) {
-		serial_out(up, UART_LCR, 0);
-		serial_out(up, UART_MCR, status1 ^ UART_MCR_LOOP);
-		serial_out(up, UART_LCR, 0xE0);
-		status2 = serial_in(up, 0x02); /* EXCR1 */
-		serial_out(up, UART_LCR, 0);
-		serial_out(up, UART_MCR, status1);
-
-		if ((status2 ^ status1) & UART_MCR_LOOP) {
-			unsigned short quot;
-
-			serial_out(up, UART_LCR, 0xE0);
-
-			quot = serial_dl_read(up);
-			quot <<= 3;
-
-			if (ns16550a_goto_highspeed(up))
-				serial_dl_write(up, quot);
-
-			serial_out(up, UART_LCR, 0);
-
-			up->port.uartclk = 921600*16;
-			up->port.type = PORT_NS16550A;
-			up->capabilities |= UART_NATSEMI;
-			return;
-		}
-	}
-
-	/*
-	 * No EFR.  Try to detect a TI16750, which only sets bit 5 of
-	 * the IIR when 64 byte FIFO mode is enabled when DLAB is set.
-	 * Try setting it with and without DLAB set.  Cheap clones
-	 * set bit 5 without DLAB set.
-	 */
-	serial_out(up, UART_LCR, 0);
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
-	status1 = serial_in(up, UART_IIR) >> 5;
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
-	status2 = serial_in(up, UART_IIR) >> 5;
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-	serial_out(up, UART_LCR, 0);
-
-	DEBUG_AUTOCONF("iir1=%d iir2=%d ", status1, status2);
-
-	if (status1 == 6 && status2 == 7) {
-		up->port.type = PORT_16750;
-		up->capabilities |= UART_CAP_AFE | UART_CAP_SLEEP;
-		return;
-	}
-
-	/*
-	 * Try writing and reading the UART_IER_UUE bit (b6).
-	 * If it works, this is probably one of the Xscale platform's
-	 * internal UARTs.
-	 * We're going to explicitly set the UUE bit to 0 before
-	 * trying to write and read a 1 just to make sure it's not
-	 * already a 1 and maybe locked there before we even start start.
-	 */
-	iersave = serial_in(up, UART_IER);
-	serial_out(up, UART_IER, iersave & ~UART_IER_UUE);
-	if (!(serial_in(up, UART_IER) & UART_IER_UUE)) {
-		/*
-		 * OK it's in a known zero state, try writing and reading
-		 * without disturbing the current state of the other bits.
-		 */
-		serial_out(up, UART_IER, iersave | UART_IER_UUE);
-		if (serial_in(up, UART_IER) & UART_IER_UUE) {
-			/*
-			 * It's an Xscale.
-			 * We'll leave the UART_IER_UUE bit set to 1 (enabled).
-			 */
-			DEBUG_AUTOCONF("Xscale ");
-			up->port.type = PORT_XSCALE;
-			up->capabilities |= UART_CAP_UUE | UART_CAP_RTOIE;
-			return;
-		}
-	} else {
-		/*
-		 * If we got here we couldn't force the IER_UUE bit to 0.
-		 * Log it and continue.
-		 */
-		DEBUG_AUTOCONF("Couldn't force IER_UUE to 0 ");
-	}
-	serial_out(up, UART_IER, iersave);
-
-	/*
-	 * Exar uarts have EFR in a weird location
-	 */
-	if (up->port.flags & UPF_EXAR_EFR) {
-		DEBUG_AUTOCONF("Exar XR17D15x ");
-		up->port.type = PORT_XR17D15X;
-		up->capabilities |= UART_CAP_AFE | UART_CAP_EFR |
-				    UART_CAP_SLEEP;
-
-		return;
-	}
-
-	/*
-	 * We distinguish between 16550A and U6 16550A by counting
-	 * how many bytes are in the FIFO.
-	 */
-	if (up->port.type == PORT_16550A && size_fifo(up) == 64) {
-		up->port.type = PORT_U6_16550A;
-		up->capabilities |= UART_CAP_AFE;
-	}
-}
-
-/*
- * This routine is called by rs_init() to initialize a specific serial
- * port.  It determines what type of UART chip this serial port is
- * using: 8250, 16450, 16550, 16550A.  The important question is
- * whether or not this UART is a 16550A or not, since this will
- * determine whether or not we can use its FIFO features or not.
- */
-static void autoconfig(struct uart_8250_port *up, unsigned int probeflags)
-{
-	unsigned char status1, scratch, scratch2, scratch3;
-	unsigned char save_lcr, save_mcr;
-	struct uart_port *port = &up->port;
-	unsigned long flags;
-	unsigned int old_capabilities;
-
-	if (!port->iobase && !port->mapbase && !port->membase)
-		return;
-
-	DEBUG_AUTOCONF("ttyS%d: autoconf (0x%04lx, 0x%p): ",
-		       serial_index(port), port->iobase, port->membase);
-
-	/*
-	 * We really do need global IRQs disabled here - we're going to
-	 * be frobbing the chips IRQ enable register to see if it exists.
-	 */
-	spin_lock_irqsave(&port->lock, flags);
-
-	up->capabilities = 0;
-	up->bugs = 0;
-
-	if (!(port->flags & UPF_BUGGY_UART)) {
-		/*
-		 * Do a simple existence test first; if we fail this,
-		 * there's no point trying anything else.
-		 *
-		 * 0x80 is used as a nonsense port to prevent against
-		 * false positives due to ISA bus float.  The
-		 * assumption is that 0x80 is a non-existent port;
-		 * which should be safe since include/asm/io.h also
-		 * makes this assumption.
-		 *
-		 * Note: this is safe as long as MCR bit 4 is clear
-		 * and the device is in "PC" mode.
-		 */
-		scratch = serial_in(up, UART_IER);
-		serial_out(up, UART_IER, 0);
-#ifdef __i386__
-		outb(0xff, 0x080);
-#endif
-		/*
-		 * Mask out IER[7:4] bits for test as some UARTs (e.g. TL
-		 * 16C754B) allow only to modify them if an EFR bit is set.
-		 */
-		scratch2 = serial_in(up, UART_IER) & 0x0f;
-		serial_out(up, UART_IER, 0x0F);
-#ifdef __i386__
-		outb(0, 0x080);
-#endif
-		scratch3 = serial_in(up, UART_IER) & 0x0f;
-		serial_out(up, UART_IER, scratch);
-		if (scratch2 != 0 || scratch3 != 0x0F) {
-			/*
-			 * We failed; there's nothing here
-			 */
-			spin_unlock_irqrestore(&port->lock, flags);
-			DEBUG_AUTOCONF("IER test failed (%02x, %02x) ",
-				       scratch2, scratch3);
-			goto out;
-		}
-	}
-
-	save_mcr = serial_in(up, UART_MCR);
-	save_lcr = serial_in(up, UART_LCR);
-
-	/*
-	 * Check to see if a UART is really there.  Certain broken
-	 * internal modems based on the Rockwell chipset fail this
-	 * test, because they apparently don't implement the loopback
-	 * test mode.  So this test is skipped on the COM 1 through
-	 * COM 4 ports.  This *should* be safe, since no board
-	 * manufacturer would be stupid enough to design a board
-	 * that conflicts with COM 1-4 --- we hope!
-	 */
-	if (!(port->flags & UPF_SKIP_TEST)) {
-		serial_out(up, UART_MCR, UART_MCR_LOOP | 0x0A);
-		status1 = serial_in(up, UART_MSR) & 0xF0;
-		serial_out(up, UART_MCR, save_mcr);
-		if (status1 != 0x90) {
-			spin_unlock_irqrestore(&port->lock, flags);
-			DEBUG_AUTOCONF("LOOP test failed (%02x) ",
-				       status1);
-			goto out;
-		}
-	}
-
-	/*
-	 * We're pretty sure there's a port here.  Lets find out what
-	 * type of port it is.  The IIR top two bits allows us to find
-	 * out if it's 8250 or 16450, 16550, 16550A or later.  This
-	 * determines what we test for next.
-	 *
-	 * We also initialise the EFR (if any) to zero for later.  The
-	 * EFR occupies the same register location as the FCR and IIR.
-	 */
-	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-	serial_out(up, UART_EFR, 0);
-	serial_out(up, UART_LCR, 0);
-
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-	scratch = serial_in(up, UART_IIR) >> 6;
-
-	switch (scratch) {
-	case 0:
-		autoconfig_8250(up);
-		break;
-	case 1:
-		port->type = PORT_UNKNOWN;
-		break;
-	case 2:
-		port->type = PORT_16550;
-		break;
-	case 3:
-		autoconfig_16550a(up);
-		break;
-	}
-
-#ifdef CONFIG_SERIAL_8250_RSA
-	/*
-	 * Only probe for RSA ports if we got the region.
-	 */
-	if (port->type == PORT_16550A && probeflags & PROBE_RSA) {
-		int i;
-
-		for (i = 0 ; i < probe_rsa_count; ++i) {
-			if (probe_rsa[i] == port->iobase && __enable_rsa(up)) {
-				port->type = PORT_RSA;
-				break;
-			}
-		}
-	}
-#endif
-
-	serial_out(up, UART_LCR, save_lcr);
-
-	port->fifosize = uart_config[up->port.type].fifo_size;
-	old_capabilities = up->capabilities; 
-	up->capabilities = uart_config[port->type].flags;
-	up->tx_loadsz = uart_config[port->type].tx_loadsz;
-
-	if (port->type == PORT_UNKNOWN)
-		goto out_lock;
-
-	/*
-	 * Reset the UART.
-	 */
-#ifdef CONFIG_SERIAL_8250_RSA
-	if (port->type == PORT_RSA)
-		serial_out(up, UART_RSA_FRR, 0);
-#endif
-	serial_out(up, UART_MCR, save_mcr);
-	serial8250_clear_fifos(up);
-	serial_in(up, UART_RX);
-	if (up->capabilities & UART_CAP_UUE)
-		serial_out(up, UART_IER, UART_IER_UUE);
-	else
-		serial_out(up, UART_IER, 0);
-
-out_lock:
-	spin_unlock_irqrestore(&port->lock, flags);
-	if (up->capabilities != old_capabilities) {
-		printk(KERN_WARNING
-		       "ttyS%d: detected caps %08x should be %08x\n",
-		       serial_index(port), old_capabilities,
-		       up->capabilities);
-	}
-out:
-	DEBUG_AUTOCONF("iir=%d ", scratch);
-	DEBUG_AUTOCONF("type=%s\n", uart_config[port->type].name);
-}
-
-static void autoconfig_irq(struct uart_8250_port *up)
-{
-	struct uart_port *port = &up->port;
-	unsigned char save_mcr, save_ier;
-	unsigned char save_ICP = 0;
-	unsigned int ICP = 0;
-	unsigned long irqs;
-	int irq;
-
-	if (port->flags & UPF_FOURPORT) {
-		ICP = (port->iobase & 0xfe0) | 0x1f;
-		save_ICP = inb_p(ICP);
-		outb_p(0x80, ICP);
-		inb_p(ICP);
-	}
-
-	/* forget possible initially masked and pending IRQ */
-	probe_irq_off(probe_irq_on());
-	save_mcr = serial_in(up, UART_MCR);
-	save_ier = serial_in(up, UART_IER);
-	serial_out(up, UART_MCR, UART_MCR_OUT1 | UART_MCR_OUT2);
-
-	irqs = probe_irq_on();
-	serial_out(up, UART_MCR, 0);
-	udelay(10);
-	if (port->flags & UPF_FOURPORT) {
-		serial_out(up, UART_MCR,
-			    UART_MCR_DTR | UART_MCR_RTS);
-	} else {
-		serial_out(up, UART_MCR,
-			    UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2);
-	}
-	serial_out(up, UART_IER, 0x0f);	/* enable all intrs */
-	serial_in(up, UART_LSR);
-	serial_in(up, UART_RX);
-	serial_in(up, UART_IIR);
-	serial_in(up, UART_MSR);
-	serial_out(up, UART_TX, 0xFF);
-	udelay(20);
-	irq = probe_irq_off(irqs);
-
-	serial_out(up, UART_MCR, save_mcr);
-	serial_out(up, UART_IER, save_ier);
-
-	if (port->flags & UPF_FOURPORT)
-		outb_p(save_ICP, ICP);
-
-	port->irq = (irq > 0) ? irq : 0;
-}
-
-static inline void __stop_tx(struct uart_8250_port *p)
-{
-	if (p->ier & UART_IER_THRI) {
-		p->ier &= ~UART_IER_THRI;
-		serial_out(p, UART_IER, p->ier);
-	}
-}
-
-static void serial8250_stop_tx(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-
-	__stop_tx(up);
-
-	/*
-	 * We really want to stop the transmitter from sending.
-	 */
-	if (port->type == PORT_16C950) {
-		up->acr |= UART_ACR_TXDIS;
-		serial_icr_write(up, UART_ACR, up->acr);
-	}
-}
-
-static void serial8250_start_tx(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-
-	if (up->dma && !serial8250_tx_dma(up)) {
-		return;
-	} else if (!(up->ier & UART_IER_THRI)) {
-		up->ier |= UART_IER_THRI;
-		serial_port_out(port, UART_IER, up->ier);
-
-		if (up->bugs & UART_BUG_TXEN) {
-			unsigned char lsr;
-			lsr = serial_in(up, UART_LSR);
-			up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
-			if (lsr & UART_LSR_TEMT)
-				serial8250_tx_chars(up);
-		}
-	}
-
-	/*
-	 * Re-enable the transmitter if we disabled it.
-	 */
-	if (port->type == PORT_16C950 && up->acr & UART_ACR_TXDIS) {
-		up->acr &= ~UART_ACR_TXDIS;
-		serial_icr_write(up, UART_ACR, up->acr);
-	}
-}
-
-static void serial8250_stop_rx(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-
-	up->ier &= ~UART_IER_RLSI;
-	up->port.read_status_mask &= ~UART_LSR_DR;
-	serial_port_out(port, UART_IER, up->ier);
-}
-
-static void serial8250_enable_ms(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-
-	/* no MSR capabilities */
-	if (up->bugs & UART_BUG_NOMSR)
-		return;
-
-	up->ier |= UART_IER_MSI;
-	serial_port_out(port, UART_IER, up->ier);
-}
-
-/*
- * serial8250_rx_chars: processes according to the passed in LSR
- * value, and returns the remaining LSR bits not handled
- * by this Rx routine.
- */
-unsigned char
-serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr)
-{
-	struct uart_port *port = &up->port;
-	unsigned char ch;
-	int max_count = 256;
-	char flag;
-
-	do {
-		if (likely(lsr & UART_LSR_DR))
-			ch = serial_in(up, UART_RX);
-		else
-			/*
-			 * Intel 82571 has a Serial Over Lan device that will
-			 * set UART_LSR_BI without setting UART_LSR_DR when
-			 * it receives a break. To avoid reading from the
-			 * receive buffer without UART_LSR_DR bit set, we
-			 * just force the read character to be 0
-			 */
-			ch = 0;
-
-		flag = TTY_NORMAL;
-		port->icount.rx++;
-
-		lsr |= up->lsr_saved_flags;
-		up->lsr_saved_flags = 0;
-
-		if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) {
-			if (lsr & UART_LSR_BI) {
-				lsr &= ~(UART_LSR_FE | UART_LSR_PE);
-				port->icount.brk++;
-				/*
-				 * We do the SysRQ and SAK checking
-				 * here because otherwise the break
-				 * may get masked by ignore_status_mask
-				 * or read_status_mask.
-				 */
-				if (uart_handle_break(port))
-					goto ignore_char;
-			} else if (lsr & UART_LSR_PE)
-				port->icount.parity++;
-			else if (lsr & UART_LSR_FE)
-				port->icount.frame++;
-			if (lsr & UART_LSR_OE)
-				port->icount.overrun++;
-
-			/*
-			 * Mask off conditions which should be ignored.
-			 */
-			lsr &= port->read_status_mask;
-
-			if (lsr & UART_LSR_BI) {
-				DEBUG_INTR("handling break....");
-				flag = TTY_BREAK;
-			} else if (lsr & UART_LSR_PE)
-				flag = TTY_PARITY;
-			else if (lsr & UART_LSR_FE)
-				flag = TTY_FRAME;
-		}
-		if (uart_handle_sysrq_char(port, ch))
-			goto ignore_char;
-
-		uart_insert_char(port, lsr, UART_LSR_OE, ch, flag);
-
-ignore_char:
-		lsr = serial_in(up, UART_LSR);
-	} while ((lsr & (UART_LSR_DR | UART_LSR_BI)) && (max_count-- > 0));
-	spin_unlock(&port->lock);
-	tty_flip_buffer_push(&port->state->port);
-	spin_lock(&port->lock);
-	return lsr;
-}
-EXPORT_SYMBOL_GPL(serial8250_rx_chars);
-
-void serial8250_tx_chars(struct uart_8250_port *up)
-{
-	struct uart_port *port = &up->port;
-	struct circ_buf *xmit = &port->state->xmit;
-	int count;
-
-	if (port->x_char) {
-		serial_out(up, UART_TX, port->x_char);
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-	if (uart_tx_stopped(port)) {
-		serial8250_stop_tx(port);
-		return;
-	}
-	if (uart_circ_empty(xmit)) {
-		__stop_tx(up);
-		return;
-	}
-
-	count = up->tx_loadsz;
-	do {
-		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-		if (up->capabilities & UART_CAP_HFIFO) {
-			if ((serial_port_in(port, UART_LSR) & BOTH_EMPTY) !=
-			    BOTH_EMPTY)
-				break;
-		}
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-
-	DEBUG_INTR("THRE...");
-
-	if (uart_circ_empty(xmit))
-		__stop_tx(up);
-}
-EXPORT_SYMBOL_GPL(serial8250_tx_chars);
-
-unsigned int serial8250_modem_status(struct uart_8250_port *up)
-{
-	struct uart_port *port = &up->port;
-	unsigned int status = serial_in(up, UART_MSR);
-
-	status |= up->msr_saved_flags;
-	up->msr_saved_flags = 0;
-	if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI &&
-	    port->state != NULL) {
-		if (status & UART_MSR_TERI)
-			port->icount.rng++;
-		if (status & UART_MSR_DDSR)
-			port->icount.dsr++;
-		if (status & UART_MSR_DDCD)
-			uart_handle_dcd_change(port, status & UART_MSR_DCD);
-		if (status & UART_MSR_DCTS)
-			uart_handle_cts_change(port, status & UART_MSR_CTS);
-
-		wake_up_interruptible(&port->state->port.delta_msr_wait);
-	}
-
-	return status;
-}
-EXPORT_SYMBOL_GPL(serial8250_modem_status);
-
-/*
- * This handles the interrupt from one port.
- */
-int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
-{
-	unsigned char status;
-	unsigned long flags;
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	int dma_err = 0;
-
-	if (iir & UART_IIR_NO_INT)
-		return 0;
-
-	spin_lock_irqsave(&port->lock, flags);
-
-	status = serial_port_in(port, UART_LSR);
-
-	DEBUG_INTR("status = %x...", status);
-
-	if (status & (UART_LSR_DR | UART_LSR_BI)) {
-		if (up->dma)
-			dma_err = serial8250_rx_dma(up, iir);
-
-		if (!up->dma || dma_err)
-			status = serial8250_rx_chars(up, status);
-	}
-	serial8250_modem_status(up);
-	if (status & UART_LSR_THRE)
-		serial8250_tx_chars(up);
-
-	spin_unlock_irqrestore(&port->lock, flags);
-	return 1;
-}
-EXPORT_SYMBOL_GPL(serial8250_handle_irq);
-
-static int serial8250_default_handle_irq(struct uart_port *port)
-{
-	unsigned int iir = serial_port_in(port, UART_IIR);
-
-	return serial8250_handle_irq(port, iir);
-}
-
-/*
- * These Exar UARTs have an extra interrupt indicator that could
- * fire for a few unimplemented interrupts.  One of which is a
- * wakeup event when coming out of sleep.  Put this here just
- * to be on the safe side that these interrupts don't go unhandled.
- */
-static int exar_handle_irq(struct uart_port *port)
-{
-	unsigned char int0, int1, int2, int3;
-	unsigned int iir = serial_port_in(port, UART_IIR);
-	int ret;
-
-	ret = serial8250_handle_irq(port, iir);
-
-	if ((port->type == PORT_XR17V35X) ||
-	   (port->type == PORT_XR17D15X)) {
-		int0 = serial_port_in(port, 0x80);
-		int1 = serial_port_in(port, 0x81);
-		int2 = serial_port_in(port, 0x82);
-		int3 = serial_port_in(port, 0x83);
-	}
-
-	return ret;
-}
-
-/*
- * This is the serial driver's interrupt routine.
- *
- * Arjan thinks the old way was overly complex, so it got simplified.
- * Alan disagrees, saying that need the complexity to handle the weird
- * nature of ISA shared interrupts.  (This is a special exception.)
- *
- * In order to handle ISA shared interrupts properly, we need to check
- * that all ports have been serviced, and therefore the ISA interrupt
- * line has been de-asserted.
- *
- * This means we need to loop through all ports. checking that they
- * don't have an interrupt pending.
- */
-static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
-{
-	struct irq_info *i = dev_id;
-	struct list_head *l, *end = NULL;
-	int pass_counter = 0, handled = 0;
-
-	DEBUG_INTR("serial8250_interrupt(%d)...", irq);
-
-	spin_lock(&i->lock);
-
-	l = i->head;
-	do {
-		struct uart_8250_port *up;
-		struct uart_port *port;
-
-		up = list_entry(l, struct uart_8250_port, list);
-		port = &up->port;
-
-		if (port->handle_irq(port)) {
-			handled = 1;
-			end = NULL;
-		} else if (end == NULL)
-			end = l;
-
-		l = l->next;
-
-		if (l == i->head && pass_counter++ > PASS_LIMIT) {
-			/* If we hit this, we're dead. */
-			printk_ratelimited(KERN_ERR
-				"serial8250: too much work for irq%d\n", irq);
-			break;
-		}
-	} while (l != end);
-
-	spin_unlock(&i->lock);
-
-	DEBUG_INTR("end.\n");
-
-	return IRQ_RETVAL(handled);
-}
-
-/*
- * To support ISA shared interrupts, we need to have one interrupt
- * handler that ensures that the IRQ line has been deasserted
- * before returning.  Failing to do this will result in the IRQ
- * line being stuck active, and, since ISA irqs are edge triggered,
- * no more IRQs will be seen.
- */
-static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
-{
-	spin_lock_irq(&i->lock);
-
-	if (!list_empty(i->head)) {
-		if (i->head == &up->list)
-			i->head = i->head->next;
-		list_del(&up->list);
-	} else {
-		BUG_ON(i->head != &up->list);
-		i->head = NULL;
-	}
-	spin_unlock_irq(&i->lock);
-	/* List empty so throw away the hash node */
-	if (i->head == NULL) {
-		hlist_del(&i->node);
-		kfree(i);
-	}
-}
-
-static int serial_link_irq_chain(struct uart_8250_port *up)
-{
-	struct hlist_head *h;
-	struct hlist_node *n;
-	struct irq_info *i;
-	int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
-
-	mutex_lock(&hash_mutex);
-
-	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
-
-	hlist_for_each(n, h) {
-		i = hlist_entry(n, struct irq_info, node);
-		if (i->irq == up->port.irq)
-			break;
-	}
-
-	if (n == NULL) {
-		i = kzalloc(sizeof(struct irq_info), GFP_KERNEL);
-		if (i == NULL) {
-			mutex_unlock(&hash_mutex);
-			return -ENOMEM;
-		}
-		spin_lock_init(&i->lock);
-		i->irq = up->port.irq;
-		hlist_add_head(&i->node, h);
-	}
-	mutex_unlock(&hash_mutex);
-
-	spin_lock_irq(&i->lock);
-
-	if (i->head) {
-		list_add(&up->list, i->head);
-		spin_unlock_irq(&i->lock);
-
-		ret = 0;
-	} else {
-		INIT_LIST_HEAD(&up->list);
-		i->head = &up->list;
-		spin_unlock_irq(&i->lock);
-		irq_flags |= up->port.irqflags;
-		ret = request_irq(up->port.irq, serial8250_interrupt,
-				  irq_flags, "serial", i);
-		if (ret < 0)
-			serial_do_unlink(i, up);
-	}
-
-	return ret;
-}
-
-static void serial_unlink_irq_chain(struct uart_8250_port *up)
-{
-	struct irq_info *i;
-	struct hlist_node *n;
-	struct hlist_head *h;
-
-	mutex_lock(&hash_mutex);
-
-	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
-
-	hlist_for_each(n, h) {
-		i = hlist_entry(n, struct irq_info, node);
-		if (i->irq == up->port.irq)
-			break;
-	}
-
-	BUG_ON(n == NULL);
-	BUG_ON(i->head == NULL);
-
-	if (list_empty(i->head))
-		free_irq(up->port.irq, i);
-
-	serial_do_unlink(i, up);
-	mutex_unlock(&hash_mutex);
-}
-
-/*
- * This function is used to handle ports that do not have an
- * interrupt.  This doesn't work very well for 16450's, but gives
- * barely passable results for a 16550A.  (Although at the expense
- * of much CPU overhead).
- */
-static void serial8250_timeout(unsigned long data)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)data;
-
-	up->port.handle_irq(&up->port);
-	mod_timer(&up->timer, jiffies + uart_poll_timeout(&up->port));
-}
-
-static void serial8250_backup_timeout(unsigned long data)
-{
-	struct uart_8250_port *up = (struct uart_8250_port *)data;
-	unsigned int iir, ier = 0, lsr;
-	unsigned long flags;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-
-	/*
-	 * Must disable interrupts or else we risk racing with the interrupt
-	 * based handler.
-	 */
-	if (up->port.irq) {
-		ier = serial_in(up, UART_IER);
-		serial_out(up, UART_IER, 0);
-	}
-
-	iir = serial_in(up, UART_IIR);
-
-	/*
-	 * This should be a safe test for anyone who doesn't trust the
-	 * IIR bits on their UART, but it's specifically designed for
-	 * the "Diva" UART used on the management processor on many HP
-	 * ia64 and parisc boxes.
-	 */
-	lsr = serial_in(up, UART_LSR);
-	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
-	if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) &&
-	    (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) &&
-	    (lsr & UART_LSR_THRE)) {
-		iir &= ~(UART_IIR_ID | UART_IIR_NO_INT);
-		iir |= UART_IIR_THRI;
-	}
-
-	if (!(iir & UART_IIR_NO_INT))
-		serial8250_tx_chars(up);
-
-	if (up->port.irq)
-		serial_out(up, UART_IER, ier);
-
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	/* Standard timer interval plus 0.2s to keep the port running */
-	mod_timer(&up->timer,
-		jiffies + uart_poll_timeout(&up->port) + HZ / 5);
-}
-
-static unsigned int serial8250_tx_empty(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	unsigned long flags;
-	unsigned int lsr;
-
-	spin_lock_irqsave(&port->lock, flags);
-	lsr = serial_port_in(port, UART_LSR);
-	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0;
-}
-
-static unsigned int serial8250_get_mctrl(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	unsigned int status;
-	unsigned int ret;
-
-	status = serial8250_modem_status(up);
-
-	ret = 0;
-	if (status & UART_MSR_DCD)
-		ret |= TIOCM_CAR;
-	if (status & UART_MSR_RI)
-		ret |= TIOCM_RNG;
-	if (status & UART_MSR_DSR)
-		ret |= TIOCM_DSR;
-	if (status & UART_MSR_CTS)
-		ret |= TIOCM_CTS;
-	return ret;
-}
-
-static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	unsigned char mcr = 0;
-
-	if (mctrl & TIOCM_RTS)
-		mcr |= UART_MCR_RTS;
-	if (mctrl & TIOCM_DTR)
-		mcr |= UART_MCR_DTR;
-	if (mctrl & TIOCM_OUT1)
-		mcr |= UART_MCR_OUT1;
-	if (mctrl & TIOCM_OUT2)
-		mcr |= UART_MCR_OUT2;
-	if (mctrl & TIOCM_LOOP)
-		mcr |= UART_MCR_LOOP;
-
-	mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr;
-
-	serial_port_out(port, UART_MCR, mcr);
-}
-
-static void serial8250_break_ctl(struct uart_port *port, int break_state)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	unsigned long flags;
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (break_state == -1)
-		up->lcr |= UART_LCR_SBC;
-	else
-		up->lcr &= ~UART_LCR_SBC;
-	serial_port_out(port, UART_LCR, up->lcr);
-	spin_unlock_irqrestore(&port->lock, flags);
-}
-
-/*
- *	Wait for transmitter & holding register to empty
- */
-static void wait_for_xmitr(struct uart_8250_port *up, int bits)
-{
-	unsigned int status, tmout = 10000;
-
-	/* Wait up to 10ms for the character(s) to be sent. */
-	for (;;) {
-		status = serial_in(up, UART_LSR);
-
-		up->lsr_saved_flags |= status & LSR_SAVE_FLAGS;
-
-		if ((status & bits) == bits)
-			break;
-		if (--tmout == 0)
-			break;
-		udelay(1);
-	}
-
-	/* Wait up to 1s for flow control if necessary */
-	if (up->port.flags & UPF_CONS_FLOW) {
-		unsigned int tmout;
-		for (tmout = 1000000; tmout; tmout--) {
-			unsigned int msr = serial_in(up, UART_MSR);
-			up->msr_saved_flags |= msr & MSR_SAVE_FLAGS;
-			if (msr & UART_MSR_CTS)
-				break;
-			udelay(1);
-			touch_nmi_watchdog();
-		}
-	}
-}
-
-#ifdef CONFIG_CONSOLE_POLL
-/*
- * Console polling routines for writing and reading from the uart while
- * in an interrupt or debug context.
- */
-
-static int serial8250_get_poll_char(struct uart_port *port)
-{
-	unsigned char lsr = serial_port_in(port, UART_LSR);
-
-	if (!(lsr & UART_LSR_DR))
-		return NO_POLL_CHAR;
-
-	return serial_port_in(port, UART_RX);
-}
-
-
-static void serial8250_put_poll_char(struct uart_port *port,
-			 unsigned char c)
-{
-	unsigned int ier;
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-
-	/*
-	 *	First save the IER then disable the interrupts
-	 */
-	ier = serial_port_in(port, UART_IER);
-	if (up->capabilities & UART_CAP_UUE)
-		serial_port_out(port, UART_IER, UART_IER_UUE);
-	else
-		serial_port_out(port, UART_IER, 0);
-
-	wait_for_xmitr(up, BOTH_EMPTY);
-	/*
-	 *	Send the character out.
-	 *	If a LF, also do CR...
-	 */
-	serial_port_out(port, UART_TX, c);
-	if (c == 10) {
-		wait_for_xmitr(up, BOTH_EMPTY);
-		serial_port_out(port, UART_TX, 13);
-	}
-
-	/*
-	 *	Finally, wait for transmitter to become empty
-	 *	and restore the IER
-	 */
-	wait_for_xmitr(up, BOTH_EMPTY);
-	serial_port_out(port, UART_IER, ier);
-}
-
-#endif /* CONFIG_CONSOLE_POLL */
-
-static int serial8250_startup(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	unsigned long flags;
-	unsigned char lsr, iir;
-	int retval;
-
-	if (port->type == PORT_8250_CIR)
-		return -ENODEV;
-
-	if (!port->fifosize)
-		port->fifosize = uart_config[port->type].fifo_size;
-	if (!up->tx_loadsz)
-		up->tx_loadsz = uart_config[port->type].tx_loadsz;
-	if (!up->capabilities)
-		up->capabilities = uart_config[port->type].flags;
-	up->mcr = 0;
-
-	if (port->iotype != up->cur_iotype)
-		set_io_from_upio(port);
-
-	if (port->type == PORT_16C950) {
-		/* Wake up and initialize UART */
-		up->acr = 0;
-		serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
-		serial_port_out(port, UART_EFR, UART_EFR_ECB);
-		serial_port_out(port, UART_IER, 0);
-		serial_port_out(port, UART_LCR, 0);
-		serial_icr_write(up, UART_CSR, 0); /* Reset the UART */
-		serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
-		serial_port_out(port, UART_EFR, UART_EFR_ECB);
-		serial_port_out(port, UART_LCR, 0);
-	}
-
-#ifdef CONFIG_SERIAL_8250_RSA
-	/*
-	 * If this is an RSA port, see if we can kick it up to the
-	 * higher speed clock.
-	 */
-	enable_rsa(up);
-#endif
-
-	/*
-	 * Clear the FIFO buffers and disable them.
-	 * (they will be reenabled in set_termios())
-	 */
-	serial8250_clear_fifos(up);
-
-	/*
-	 * Clear the interrupt registers.
-	 */
-	serial_port_in(port, UART_LSR);
-	serial_port_in(port, UART_RX);
-	serial_port_in(port, UART_IIR);
-	serial_port_in(port, UART_MSR);
-
-	/*
-	 * At this point, there's no way the LSR could still be 0xff;
-	 * if it is, then bail out, because there's likely no UART
-	 * here.
-	 */
-	if (!(port->flags & UPF_BUGGY_UART) &&
-	    (serial_port_in(port, UART_LSR) == 0xff)) {
-		printk_ratelimited(KERN_INFO "ttyS%d: LSR safety check engaged!\n",
-				   serial_index(port));
-		return -ENODEV;
-	}
-
-	/*
-	 * For a XR16C850, we need to set the trigger levels
-	 */
-	if (port->type == PORT_16850) {
-		unsigned char fctr;
-
-		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-
-		fctr = serial_in(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX);
-		serial_port_out(port, UART_FCTR,
-				fctr | UART_FCTR_TRGD | UART_FCTR_RX);
-		serial_port_out(port, UART_TRG, UART_TRG_96);
-		serial_port_out(port, UART_FCTR,
-				fctr | UART_FCTR_TRGD | UART_FCTR_TX);
-		serial_port_out(port, UART_TRG, UART_TRG_96);
-
-		serial_port_out(port, UART_LCR, 0);
-	}
-
-	if (port->irq) {
-		unsigned char iir1;
-		/*
-		 * Test for UARTs that do not reassert THRE when the
-		 * transmitter is idle and the interrupt has already
-		 * been cleared.  Real 16550s should always reassert
-		 * this interrupt whenever the transmitter is idle and
-		 * the interrupt is enabled.  Delays are necessary to
-		 * allow register changes to become visible.
-		 */
-		spin_lock_irqsave(&port->lock, flags);
-		if (up->port.irqflags & IRQF_SHARED)
-			disable_irq_nosync(port->irq);
-
-		wait_for_xmitr(up, UART_LSR_THRE);
-		serial_port_out_sync(port, UART_IER, UART_IER_THRI);
-		udelay(1); /* allow THRE to set */
-		iir1 = serial_port_in(port, UART_IIR);
-		serial_port_out(port, UART_IER, 0);
-		serial_port_out_sync(port, UART_IER, UART_IER_THRI);
-		udelay(1); /* allow a working UART time to re-assert THRE */
-		iir = serial_port_in(port, UART_IIR);
-		serial_port_out(port, UART_IER, 0);
-
-		if (port->irqflags & IRQF_SHARED)
-			enable_irq(port->irq);
-		spin_unlock_irqrestore(&port->lock, flags);
-
-		/*
-		 * If the interrupt is not reasserted, or we otherwise
-		 * don't trust the iir, setup a timer to kick the UART
-		 * on a regular basis.
-		 */
-		if ((!(iir1 & UART_IIR_NO_INT) && (iir & UART_IIR_NO_INT)) ||
-		    up->port.flags & UPF_BUG_THRE) {
-			up->bugs |= UART_BUG_THRE;
-			pr_debug("ttyS%d - using backup timer\n",
-				 serial_index(port));
-		}
-	}
-
-	/*
-	 * The above check will only give an accurate result the first time
-	 * the port is opened so this value needs to be preserved.
-	 */
-	if (up->bugs & UART_BUG_THRE) {
-		up->timer.function = serial8250_backup_timeout;
-		up->timer.data = (unsigned long)up;
-		mod_timer(&up->timer, jiffies +
-			uart_poll_timeout(port) + HZ / 5);
-	}
-
-	/*
-	 * If the "interrupt" for this port doesn't correspond with any
-	 * hardware interrupt, we use a timer-based system.  The original
-	 * driver used to do this with IRQ0.
-	 */
-	if (!port->irq) {
-		up->timer.data = (unsigned long)up;
-		mod_timer(&up->timer, jiffies + uart_poll_timeout(port));
-	} else {
-		retval = serial_link_irq_chain(up);
-		if (retval)
-			return retval;
-	}
-
-	/*
-	 * Now, initialize the UART
-	 */
-	serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (up->port.flags & UPF_FOURPORT) {
-		if (!up->port.irq)
-			up->port.mctrl |= TIOCM_OUT1;
-	} else
-		/*
-		 * Most PC uarts need OUT2 raised to enable interrupts.
-		 */
-		if (port->irq)
-			up->port.mctrl |= TIOCM_OUT2;
-
-	serial8250_set_mctrl(port, port->mctrl);
-
-	/* Serial over Lan (SoL) hack:
-	   Intel 8257x Gigabit ethernet chips have a
-	   16550 emulation, to be used for Serial Over Lan.
-	   Those chips take a longer time than a normal
-	   serial device to signalize that a transmission
-	   data was queued. Due to that, the above test generally
-	   fails. One solution would be to delay the reading of
-	   iir. However, this is not reliable, since the timeout
-	   is variable. So, let's just don't test if we receive
-	   TX irq. This way, we'll never enable UART_BUG_TXEN.
-	 */
-	if (skip_txen_test || up->port.flags & UPF_NO_TXEN_TEST)
-		goto dont_test_tx_en;
-
-	/*
-	 * Do a quick test to see if we receive an
-	 * interrupt when we enable the TX irq.
-	 */
-	serial_port_out(port, UART_IER, UART_IER_THRI);
-	lsr = serial_port_in(port, UART_LSR);
-	iir = serial_port_in(port, UART_IIR);
-	serial_port_out(port, UART_IER, 0);
-
-	if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) {
-		if (!(up->bugs & UART_BUG_TXEN)) {
-			up->bugs |= UART_BUG_TXEN;
-			pr_debug("ttyS%d - enabling bad tx status workarounds\n",
-				 serial_index(port));
-		}
-	} else {
-		up->bugs &= ~UART_BUG_TXEN;
-	}
-
-dont_test_tx_en:
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	/*
-	 * Clear the interrupt registers again for luck, and clear the
-	 * saved flags to avoid getting false values from polling
-	 * routines or the previous session.
-	 */
-	serial_port_in(port, UART_LSR);
-	serial_port_in(port, UART_RX);
-	serial_port_in(port, UART_IIR);
-	serial_port_in(port, UART_MSR);
-	up->lsr_saved_flags = 0;
-	up->msr_saved_flags = 0;
-
-	/*
-	 * Request DMA channels for both RX and TX.
-	 */
-	if (up->dma) {
-		retval = serial8250_request_dma(up);
-		if (retval) {
-			pr_warn_ratelimited("ttyS%d - failed to request DMA\n",
-					    serial_index(port));
-			up->dma = NULL;
-		}
-	}
-
-	/*
-	 * Finally, enable interrupts.  Note: Modem status interrupts
-	 * are set via set_termios(), which will be occurring imminently
-	 * anyway, so we don't enable them here.
-	 */
-	up->ier = UART_IER_RLSI | UART_IER_RDI;
-	serial_port_out(port, UART_IER, up->ier);
-
-	if (port->flags & UPF_FOURPORT) {
-		unsigned int icp;
-		/*
-		 * Enable interrupts on the AST Fourport board
-		 */
-		icp = (port->iobase & 0xfe0) | 0x01f;
-		outb_p(0x80, icp);
-		inb_p(icp);
-	}
-
-	return 0;
-}
-
-static void serial8250_shutdown(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	unsigned long flags;
-
-	/*
-	 * Disable interrupts from this port
-	 */
-	up->ier = 0;
-	serial_port_out(port, UART_IER, 0);
-
-	if (up->dma)
-		serial8250_release_dma(up);
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (port->flags & UPF_FOURPORT) {
-		/* reset interrupts on the AST Fourport board */
-		inb((port->iobase & 0xfe0) | 0x1f);
-		port->mctrl |= TIOCM_OUT1;
-	} else
-		port->mctrl &= ~TIOCM_OUT2;
-
-	serial8250_set_mctrl(port, port->mctrl);
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	/*
-	 * Disable break condition and FIFOs
-	 */
-	serial_port_out(port, UART_LCR,
-			serial_port_in(port, UART_LCR) & ~UART_LCR_SBC);
-	serial8250_clear_fifos(up);
-
-#ifdef CONFIG_SERIAL_8250_RSA
-	/*
-	 * Reset the RSA board back to 115kbps compat mode.
-	 */
-	disable_rsa(up);
-#endif
-
-	/*
-	 * Read data port to reset things, and then unlink from
-	 * the IRQ chain.
-	 */
-	serial_port_in(port, UART_RX);
-
-	del_timer_sync(&up->timer);
-	up->timer.function = serial8250_timeout;
-	if (port->irq)
-		serial_unlink_irq_chain(up);
-}
-
-static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud)
-{
-	unsigned int quot;
-
-	/*
-	 * Handle magic divisors for baud rates above baud_base on
-	 * SMSC SuperIO chips.
-	 */
-	if ((port->flags & UPF_MAGIC_MULTIPLIER) &&
-	    baud == (port->uartclk/4))
-		quot = 0x8001;
-	else if ((port->flags & UPF_MAGIC_MULTIPLIER) &&
-		 baud == (port->uartclk/8))
-		quot = 0x8002;
-	else
-		quot = uart_get_divisor(port, baud);
-
-	return quot;
-}
-
-void
-serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
-		          struct ktermios *old)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	unsigned char cval, fcr = 0;
-	unsigned long flags;
-	unsigned int baud, quot;
-	int fifo_bug = 0;
-
-	switch (termios->c_cflag & CSIZE) {
-	case CS5:
-		cval = UART_LCR_WLEN5;
-		break;
-	case CS6:
-		cval = UART_LCR_WLEN6;
-		break;
-	case CS7:
-		cval = UART_LCR_WLEN7;
-		break;
-	default:
-	case CS8:
-		cval = UART_LCR_WLEN8;
-		break;
-	}
-
-	if (termios->c_cflag & CSTOPB)
-		cval |= UART_LCR_STOP;
-	if (termios->c_cflag & PARENB) {
-		cval |= UART_LCR_PARITY;
-		if (up->bugs & UART_BUG_PARITY)
-			fifo_bug = 1;
-	}
-	if (!(termios->c_cflag & PARODD))
-		cval |= UART_LCR_EPAR;
-#ifdef CMSPAR
-	if (termios->c_cflag & CMSPAR)
-		cval |= UART_LCR_SPAR;
-#endif
-
-	/*
-	 * Ask the core to calculate the divisor for us.
-	 */
-	baud = uart_get_baud_rate(port, termios, old,
-				  port->uartclk / 16 / 0xffff,
-				  port->uartclk / 16);
-	quot = serial8250_get_divisor(port, baud);
-
-	/*
-	 * Oxford Semi 952 rev B workaround
-	 */
-	if (up->bugs & UART_BUG_QUOT && (quot & 0xff) == 0)
-		quot++;
-
-	if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) {
-		fcr = uart_config[port->type].fcr;
-		if (baud < 2400 || fifo_bug) {
-			fcr &= ~UART_FCR_TRIGGER_MASK;
-			fcr |= UART_FCR_TRIGGER_1;
-		}
-	}
-
-	/*
-	 * MCR-based auto flow control.  When AFE is enabled, RTS will be
-	 * deasserted when the receive FIFO contains more characters than
-	 * the trigger, or the MCR RTS bit is cleared.  In the case where
-	 * the remote UART is not using CTS auto flow control, we must
-	 * have sufficient FIFO entries for the latency of the remote
-	 * UART to respond.  IOW, at least 32 bytes of FIFO.
-	 */
-	if (up->capabilities & UART_CAP_AFE && port->fifosize >= 32) {
-		up->mcr &= ~UART_MCR_AFE;
-		if (termios->c_cflag & CRTSCTS)
-			up->mcr |= UART_MCR_AFE;
-	}
-
-	/*
-	 * Ok, we're now changing the port state.  Do it with
-	 * interrupts disabled.
-	 */
-	spin_lock_irqsave(&port->lock, flags);
-
-	/*
-	 * Update the per-port timeout.
-	 */
-	uart_update_timeout(port, termios->c_cflag, baud);
-
-	port->read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR;
-	if (termios->c_iflag & INPCK)
-		port->read_status_mask |= UART_LSR_FE | UART_LSR_PE;
-	if (termios->c_iflag & (BRKINT | PARMRK))
-		port->read_status_mask |= UART_LSR_BI;
-
-	/*
-	 * Characteres to ignore
-	 */
-	port->ignore_status_mask = 0;
-	if (termios->c_iflag & IGNPAR)
-		port->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
-	if (termios->c_iflag & IGNBRK) {
-		port->ignore_status_mask |= UART_LSR_BI;
-		/*
-		 * If we're ignoring parity and break indicators,
-		 * ignore overruns too (for real raw support).
-		 */
-		if (termios->c_iflag & IGNPAR)
-			port->ignore_status_mask |= UART_LSR_OE;
-	}
-
-	/*
-	 * ignore all characters if CREAD is not set
-	 */
-	if ((termios->c_cflag & CREAD) == 0)
-		port->ignore_status_mask |= UART_LSR_DR;
-
-	/*
-	 * CTS flow control flag and modem status interrupts
-	 */
-	up->ier &= ~UART_IER_MSI;
-	if (!(up->bugs & UART_BUG_NOMSR) &&
-			UART_ENABLE_MS(&up->port, termios->c_cflag))
-		up->ier |= UART_IER_MSI;
-	if (up->capabilities & UART_CAP_UUE)
-		up->ier |= UART_IER_UUE;
-	if (up->capabilities & UART_CAP_RTOIE)
-		up->ier |= UART_IER_RTOIE;
-
-	serial_port_out(port, UART_IER, up->ier);
-
-	if (up->capabilities & UART_CAP_EFR) {
-		unsigned char efr = 0;
-		/*
-		 * TI16C752/Startech hardware flow control.  FIXME:
-		 * - TI16C752 requires control thresholds to be set.
-		 * - UART_MCR_RTS is ineffective if auto-RTS mode is enabled.
-		 */
-		if (termios->c_cflag & CRTSCTS)
-			efr |= UART_EFR_CTS;
-
-		serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
-		if (port->flags & UPF_EXAR_EFR)
-			serial_port_out(port, UART_XR_EFR, efr);
-		else
-			serial_port_out(port, UART_EFR, efr);
-	}
-
-	/* Workaround to enable 115200 baud on OMAP1510 internal ports */
-	if (is_omap1510_8250(up)) {
-		if (baud == 115200) {
-			quot = 1;
-			serial_port_out(port, UART_OMAP_OSC_12M_SEL, 1);
-		} else
-			serial_port_out(port, UART_OMAP_OSC_12M_SEL, 0);
-	}
-
-	/*
-	 * For NatSemi, switch to bank 2 not bank 1, to avoid resetting EXCR2,
-	 * otherwise just set DLAB
-	 */
-	if (up->capabilities & UART_NATSEMI)
-		serial_port_out(port, UART_LCR, 0xe0);
-	else
-		serial_port_out(port, UART_LCR, cval | UART_LCR_DLAB);
-
-	serial_dl_write(up, quot);
-
-	/*
-	 * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR
-	 * is written without DLAB set, this mode will be disabled.
-	 */
-	if (port->type == PORT_16750)
-		serial_port_out(port, UART_FCR, fcr);
-
-	serial_port_out(port, UART_LCR, cval);		/* reset DLAB */
-	up->lcr = cval;					/* Save LCR */
-	if (port->type != PORT_16750) {
-		/* emulated UARTs (Lucent Venus 167x) need two steps */
-		if (fcr & UART_FCR_ENABLE_FIFO)
-			serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO);
-		serial_port_out(port, UART_FCR, fcr);		/* set fcr */
-	}
-	serial8250_set_mctrl(port, port->mctrl);
-	spin_unlock_irqrestore(&port->lock, flags);
-	/* Don't rewrite B0 */
-	if (tty_termios_baud_rate(termios))
-		tty_termios_encode_baud_rate(termios, baud, baud);
-}
-EXPORT_SYMBOL(serial8250_do_set_termios);
-
-static void
-serial8250_set_termios(struct uart_port *port, struct ktermios *termios,
-		       struct ktermios *old)
-{
-	if (port->set_termios)
-		port->set_termios(port, termios, old);
-	else
-		serial8250_do_set_termios(port, termios, old);
-}
-
-static void
-serial8250_set_ldisc(struct uart_port *port, int new)
-{
-	if (new == N_PPS) {
-		port->flags |= UPF_HARDPPS_CD;
-		serial8250_enable_ms(port);
-	} else
-		port->flags &= ~UPF_HARDPPS_CD;
-}
-
-
-void serial8250_do_pm(struct uart_port *port, unsigned int state,
-		      unsigned int oldstate)
-{
-	struct uart_8250_port *p =
-		container_of(port, struct uart_8250_port, port);
-
-	serial8250_set_sleep(p, state != 0);
-}
-EXPORT_SYMBOL(serial8250_do_pm);
-
-static void
-serial8250_pm(struct uart_port *port, unsigned int state,
-	      unsigned int oldstate)
-{
-	if (port->pm)
-		port->pm(port, state, oldstate);
-	else
-		serial8250_do_pm(port, state, oldstate);
-}
-
-static unsigned int serial8250_port_size(struct uart_8250_port *pt)
-{
-	if (pt->port.iotype == UPIO_AU)
-		return 0x1000;
-	if (is_omap1_8250(pt))
-		return 0x16 << pt->port.regshift;
-
-	return 8 << pt->port.regshift;
-}
-
-/*
- * Resource handling.
- */
-static int serial8250_request_std_resource(struct uart_8250_port *up)
-{
-	unsigned int size = serial8250_port_size(up);
-	struct uart_port *port = &up->port;
-	int ret = 0;
-
-	switch (port->iotype) {
-	case UPIO_AU:
-	case UPIO_TSI:
-	case UPIO_MEM32:
-	case UPIO_MEM:
-		if (!port->mapbase)
-			break;
-
-		if (!request_mem_region(port->mapbase, size, "serial")) {
-			ret = -EBUSY;
-			break;
-		}
-
-		if (port->flags & UPF_IOREMAP) {
-			port->membase = ioremap_nocache(port->mapbase, size);
-			if (!port->membase) {
-				release_mem_region(port->mapbase, size);
-				ret = -ENOMEM;
-			}
-		}
-		break;
-
-	case UPIO_HUB6:
-	case UPIO_PORT:
-		if (!request_region(port->iobase, size, "serial"))
-			ret = -EBUSY;
-		break;
-	}
-	return ret;
-}
-
-static void serial8250_release_std_resource(struct uart_8250_port *up)
-{
-	unsigned int size = serial8250_port_size(up);
-	struct uart_port *port = &up->port;
-
-	switch (port->iotype) {
-	case UPIO_AU:
-	case UPIO_TSI:
-	case UPIO_MEM32:
-	case UPIO_MEM:
-		if (!port->mapbase)
-			break;
-
-		if (port->flags & UPF_IOREMAP) {
-			iounmap(port->membase);
-			port->membase = NULL;
-		}
-
-		release_mem_region(port->mapbase, size);
-		break;
-
-	case UPIO_HUB6:
-	case UPIO_PORT:
-		release_region(port->iobase, size);
-		break;
-	}
-}
-
-static int serial8250_request_rsa_resource(struct uart_8250_port *up)
-{
-	unsigned long start = UART_RSA_BASE << up->port.regshift;
-	unsigned int size = 8 << up->port.regshift;
-	struct uart_port *port = &up->port;
-	int ret = -EINVAL;
-
-	switch (port->iotype) {
-	case UPIO_HUB6:
-	case UPIO_PORT:
-		start += port->iobase;
-		if (request_region(start, size, "serial-rsa"))
-			ret = 0;
-		else
-			ret = -EBUSY;
-		break;
-	}
-
-	return ret;
-}
-
-static void serial8250_release_rsa_resource(struct uart_8250_port *up)
-{
-	unsigned long offset = UART_RSA_BASE << up->port.regshift;
-	unsigned int size = 8 << up->port.regshift;
-	struct uart_port *port = &up->port;
-
-	switch (port->iotype) {
-	case UPIO_HUB6:
-	case UPIO_PORT:
-		release_region(port->iobase + offset, size);
-		break;
-	}
-}
-
-static void serial8250_release_port(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-
-	serial8250_release_std_resource(up);
-	if (port->type == PORT_RSA)
-		serial8250_release_rsa_resource(up);
-}
-
-static int serial8250_request_port(struct uart_port *port)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	int ret;
-
-	if (port->type == PORT_8250_CIR)
-		return -ENODEV;
-
-	ret = serial8250_request_std_resource(up);
-	if (ret == 0 && port->type == PORT_RSA) {
-		ret = serial8250_request_rsa_resource(up);
-		if (ret < 0)
-			serial8250_release_std_resource(up);
-	}
-
-	return ret;
-}
-
-static void serial8250_config_port(struct uart_port *port, int flags)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	int probeflags = PROBE_ANY;
-	int ret;
-
-	if (port->type == PORT_8250_CIR)
-		return;
-
-	/*
-	 * Find the region that we can probe for.  This in turn
-	 * tells us whether we can probe for the type of port.
-	 */
-	ret = serial8250_request_std_resource(up);
-	if (ret < 0)
-		return;
-
-	ret = serial8250_request_rsa_resource(up);
-	if (ret < 0)
-		probeflags &= ~PROBE_RSA;
-
-	if (port->iotype != up->cur_iotype)
-		set_io_from_upio(port);
-
-	if (flags & UART_CONFIG_TYPE)
-		autoconfig(up, probeflags);
-
-	/* if access method is AU, it is a 16550 with a quirk */
-	if (port->type == PORT_16550A && port->iotype == UPIO_AU)
-		up->bugs |= UART_BUG_NOMSR;
-
-	if (port->type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ)
-		autoconfig_irq(up);
-
-	if (port->type != PORT_RSA && probeflags & PROBE_RSA)
-		serial8250_release_rsa_resource(up);
-	if (port->type == PORT_UNKNOWN)
-		serial8250_release_std_resource(up);
-
-	/* Fixme: probably not the best place for this */
-	if ((port->type == PORT_XR17V35X) ||
-	   (port->type == PORT_XR17D15X))
-		port->handle_irq = exar_handle_irq;
-}
-
-static int
-serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
-	if (ser->irq >= nr_irqs || ser->irq < 0 ||
-	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
-	    ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS ||
-	    ser->type == PORT_STARTECH)
-		return -EINVAL;
-	return 0;
-}
-
-static const char *
-serial8250_type(struct uart_port *port)
-{
-	int type = port->type;
-
-	if (type >= ARRAY_SIZE(uart_config))
-		type = 0;
-	return uart_config[type].name;
-}
-
-static struct uart_ops serial8250_pops = {
-	.tx_empty	= serial8250_tx_empty,
-	.set_mctrl	= serial8250_set_mctrl,
-	.get_mctrl	= serial8250_get_mctrl,
-	.stop_tx	= serial8250_stop_tx,
-	.start_tx	= serial8250_start_tx,
-	.stop_rx	= serial8250_stop_rx,
-	.enable_ms	= serial8250_enable_ms,
-	.break_ctl	= serial8250_break_ctl,
-	.startup	= serial8250_startup,
-	.shutdown	= serial8250_shutdown,
-	.set_termios	= serial8250_set_termios,
-	.set_ldisc	= serial8250_set_ldisc,
-	.pm		= serial8250_pm,
-	.type		= serial8250_type,
-	.release_port	= serial8250_release_port,
-	.request_port	= serial8250_request_port,
-	.config_port	= serial8250_config_port,
-	.verify_port	= serial8250_verify_port,
-#ifdef CONFIG_CONSOLE_POLL
-	.poll_get_char = serial8250_get_poll_char,
-	.poll_put_char = serial8250_put_poll_char,
-#endif
-};
-
-static struct uart_8250_port serial8250_ports[UART_NR];
-
-static void (*serial8250_isa_config)(int port, struct uart_port *up,
-	unsigned short *capabilities);
-
-void serial8250_set_isa_configurator(
-	void (*v)(int port, struct uart_port *up, unsigned short *capabilities))
-{
-	serial8250_isa_config = v;
-}
-EXPORT_SYMBOL(serial8250_set_isa_configurator);
-
-static void __init serial8250_isa_init_ports(void)
-{
-	struct uart_8250_port *up;
-	static int first = 1;
-	int i, irqflag = 0;
-
-	if (!first)
-		return;
-	first = 0;
-
-	if (nr_uarts > UART_NR)
-		nr_uarts = UART_NR;
-
-	for (i = 0; i < nr_uarts; i++) {
-		struct uart_8250_port *up = &serial8250_ports[i];
-		struct uart_port *port = &up->port;
-
-		port->line = i;
-		spin_lock_init(&port->lock);
-
-		init_timer(&up->timer);
-		up->timer.function = serial8250_timeout;
-		up->cur_iotype = 0xFF;
-
-		/*
-		 * ALPHA_KLUDGE_MCR needs to be killed.
-		 */
-		up->mcr_mask = ~ALPHA_KLUDGE_MCR;
-		up->mcr_force = ALPHA_KLUDGE_MCR;
-
-		port->ops = &serial8250_pops;
-	}
-
-	if (share_irqs)
-		irqflag = IRQF_SHARED;
-
-	for (i = 0, up = serial8250_ports;
-	     i < ARRAY_SIZE(old_serial_port) && i < nr_uarts;
-	     i++, up++) {
-		struct uart_port *port = &up->port;
-
-		port->iobase   = old_serial_port[i].port;
-		port->irq      = irq_canonicalize(old_serial_port[i].irq);
-		port->irqflags = old_serial_port[i].irqflags;
-		port->uartclk  = old_serial_port[i].baud_base * 16;
-		port->flags    = old_serial_port[i].flags;
-		port->hub6     = old_serial_port[i].hub6;
-		port->membase  = old_serial_port[i].iomem_base;
-		port->iotype   = old_serial_port[i].io_type;
-		port->regshift = old_serial_port[i].iomem_reg_shift;
-		set_io_from_upio(port);
-		port->irqflags |= irqflag;
-		if (serial8250_isa_config != NULL)
-			serial8250_isa_config(i, &up->port, &up->capabilities);
-
-	}
-}
-
-static void
-serial8250_init_fixed_type_port(struct uart_8250_port *up, unsigned int type)
-{
-	up->port.type = type;
-	if (!up->port.fifosize)
-		up->port.fifosize = uart_config[type].fifo_size;
-	if (!up->tx_loadsz)
-		up->tx_loadsz = uart_config[type].tx_loadsz;
-	if (!up->capabilities)
-		up->capabilities = uart_config[type].flags;
-}
-
-static void __init
-serial8250_register_ports(struct uart_driver *drv, struct device *dev)
-{
-	int i;
-
-	for (i = 0; i < nr_uarts; i++) {
-		struct uart_8250_port *up = &serial8250_ports[i];
-
-		if (up->port.dev)
-			continue;
-
-		up->port.dev = dev;
-
-		if (up->port.flags & UPF_FIXED_TYPE)
-			serial8250_init_fixed_type_port(up, up->port.type);
-
-		uart_add_one_port(drv, &up->port);
-	}
-}
-
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-
-static void serial8250_console_putchar(struct uart_port *port, int ch)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-
-	wait_for_xmitr(up, UART_LSR_THRE);
-	serial_port_out(port, UART_TX, ch);
-}
-
-/*
- *	Print a string to the serial port trying not to disturb
- *	any possible real use of the port...
- *
- *	The console_lock must be held when we get here.
- */
-static void
-serial8250_console_write(struct console *co, const char *s, unsigned int count)
-{
-	struct uart_8250_port *up = &serial8250_ports[co->index];
-	struct uart_port *port = &up->port;
-	unsigned long flags;
-	unsigned int ier;
-	int locked = 1;
-
-	touch_nmi_watchdog();
-
-	local_irq_save(flags);
-	if (port->sysrq) {
-		/* serial8250_handle_irq() already took the lock */
-		locked = 0;
-	} else if (oops_in_progress) {
-		locked = spin_trylock(&port->lock);
-	} else
-		spin_lock(&port->lock);
-
-	/*
-	 *	First save the IER then disable the interrupts
-	 */
-	ier = serial_port_in(port, UART_IER);
-
-	if (up->capabilities & UART_CAP_UUE)
-		serial_port_out(port, UART_IER, UART_IER_UUE);
-	else
-		serial_port_out(port, UART_IER, 0);
-
-	uart_console_write(port, s, count, serial8250_console_putchar);
-
-	/*
-	 *	Finally, wait for transmitter to become empty
-	 *	and restore the IER
-	 */
-	wait_for_xmitr(up, BOTH_EMPTY);
-	serial_port_out(port, UART_IER, ier);
-
-	/*
-	 *	The receive handling will happen properly because the
-	 *	receive ready bit will still be set; it is not cleared
-	 *	on read.  However, modem control will not, we must
-	 *	call it if we have saved something in the saved flags
-	 *	while processing with interrupts off.
-	 */
-	if (up->msr_saved_flags)
-		serial8250_modem_status(up);
-
-	if (locked)
-		spin_unlock(&port->lock);
-	local_irq_restore(flags);
-}
-
-static int __init serial8250_console_setup(struct console *co, char *options)
-{
-	struct uart_port *port;
-	int baud = 9600;
-	int bits = 8;
-	int parity = 'n';
-	int flow = 'n';
-
-	/*
-	 * Check whether an invalid uart number has been specified, and
-	 * if so, search for the first available port that does have
-	 * console support.
-	 */
-	if (co->index >= nr_uarts)
-		co->index = 0;
-	port = &serial8250_ports[co->index].port;
-	if (!port->iobase && !port->membase)
-		return -ENODEV;
-
-	if (options)
-		uart_parse_options(options, &baud, &parity, &bits, &flow);
-
-	return uart_set_options(port, co, baud, parity, bits, flow);
-}
-
-static int serial8250_console_early_setup(void)
-{
-	return serial8250_find_port_for_earlycon();
-}
-
-static struct console serial8250_console = {
-	.name		= "ttyS",
-	.write		= serial8250_console_write,
-	.device		= uart_console_device,
-	.setup		= serial8250_console_setup,
-	.early_setup	= serial8250_console_early_setup,
-	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
-	.index		= -1,
-	.data		= &serial8250_reg,
-};
-
-static int __init serial8250_console_init(void)
-{
-	serial8250_isa_init_ports();
-	register_console(&serial8250_console);
-	return 0;
-}
-console_initcall(serial8250_console_init);
-
-int serial8250_find_port(struct uart_port *p)
-{
-	int line;
-	struct uart_port *port;
-
-	for (line = 0; line < nr_uarts; line++) {
-		port = &serial8250_ports[line].port;
-		if (uart_match_port(p, port))
-			return line;
-	}
-	return -ENODEV;
-}
-
-#define SERIAL8250_CONSOLE	&serial8250_console
-#else
-#define SERIAL8250_CONSOLE	NULL
-#endif
-
-static struct uart_driver serial8250_reg = {
-	.owner			= THIS_MODULE,
-	.driver_name		= "serial",
-	.dev_name		= "ttyS",
-	.major			= TTY_MAJOR,
-	.minor			= 64,
-	.cons			= SERIAL8250_CONSOLE,
-};
-
-/*
- * early_serial_setup - early registration for 8250 ports
- *
- * Setup an 8250 port structure prior to console initialisation.  Use
- * after console initialisation will cause undefined behaviour.
- */
-int __init early_serial_setup(struct uart_port *port)
-{
-	struct uart_port *p;
-
-	if (port->line >= ARRAY_SIZE(serial8250_ports))
-		return -ENODEV;
-
-	serial8250_isa_init_ports();
-	p = &serial8250_ports[port->line].port;
-	p->iobase       = port->iobase;
-	p->membase      = port->membase;
-	p->irq          = port->irq;
-	p->irqflags     = port->irqflags;
-	p->uartclk      = port->uartclk;
-	p->fifosize     = port->fifosize;
-	p->regshift     = port->regshift;
-	p->iotype       = port->iotype;
-	p->flags        = port->flags;
-	p->mapbase      = port->mapbase;
-	p->private_data = port->private_data;
-	p->type		= port->type;
-	p->line		= port->line;
-
-	set_io_from_upio(p);
-	if (port->serial_in)
-		p->serial_in = port->serial_in;
-	if (port->serial_out)
-		p->serial_out = port->serial_out;
-	if (port->handle_irq)
-		p->handle_irq = port->handle_irq;
-	else
-		p->handle_irq = serial8250_default_handle_irq;
-
-	return 0;
-}
-
-/**
- *	serial8250_suspend_port - suspend one serial port
- *	@line:  serial line number
- *
- *	Suspend one serial port.
- */
-void serial8250_suspend_port(int line)
-{
-	uart_suspend_port(&serial8250_reg, &serial8250_ports[line].port);
-}
-
-/**
- *	serial8250_resume_port - resume one serial port
- *	@line:  serial line number
- *
- *	Resume one serial port.
- */
-void serial8250_resume_port(int line)
-{
-	struct uart_8250_port *up = &serial8250_ports[line];
-	struct uart_port *port = &up->port;
-
-	if (up->capabilities & UART_NATSEMI) {
-		/* Ensure it's still in high speed mode */
-		serial_port_out(port, UART_LCR, 0xE0);
-
-		ns16550a_goto_highspeed(up);
-
-		serial_port_out(port, UART_LCR, 0);
-		port->uartclk = 921600*16;
-	}
-	uart_resume_port(&serial8250_reg, port);
-}
-
-/*
- * Register a set of serial devices attached to a platform device.  The
- * list is terminated with a zero flags entry, which means we expect
- * all entries to have at least UPF_BOOT_AUTOCONF set.
- */
-static int serial8250_probe(struct platform_device *dev)
-{
-	struct plat_serial8250_port *p = dev->dev.platform_data;
-	struct uart_8250_port uart;
-	int ret, i, irqflag = 0;
-
-	memset(&uart, 0, sizeof(uart));
-
-	if (share_irqs)
-		irqflag = IRQF_SHARED;
-
-	for (i = 0; p && p->flags != 0; p++, i++) {
-		uart.port.iobase	= p->iobase;
-		uart.port.membase	= p->membase;
-		uart.port.irq		= p->irq;
-		uart.port.irqflags	= p->irqflags;
-		uart.port.uartclk	= p->uartclk;
-		uart.port.regshift	= p->regshift;
-		uart.port.iotype	= p->iotype;
-		uart.port.flags		= p->flags;
-		uart.port.mapbase	= p->mapbase;
-		uart.port.hub6		= p->hub6;
-		uart.port.private_data	= p->private_data;
-		uart.port.type		= p->type;
-		uart.port.serial_in	= p->serial_in;
-		uart.port.serial_out	= p->serial_out;
-		uart.port.handle_irq	= p->handle_irq;
-		uart.port.handle_break	= p->handle_break;
-		uart.port.set_termios	= p->set_termios;
-		uart.port.pm		= p->pm;
-		uart.port.dev		= &dev->dev;
-		uart.port.irqflags	|= irqflag;
-		ret = serial8250_register_8250_port(&uart);
-		if (ret < 0) {
-			dev_err(&dev->dev, "unable to register port at index %d "
-				"(IO%lx MEM%llx IRQ%d): %d\n", i,
-				p->iobase, (unsigned long long)p->mapbase,
-				p->irq, ret);
-		}
-	}
-	return 0;
-}
-
-/*
- * Remove serial ports registered against a platform device.
- */
-static int serial8250_remove(struct platform_device *dev)
-{
-	int i;
-
-	for (i = 0; i < nr_uarts; i++) {
-		struct uart_8250_port *up = &serial8250_ports[i];
-
-		if (up->port.dev == &dev->dev)
-			serial8250_unregister_port(i);
-	}
-	return 0;
-}
-
-static int serial8250_suspend(struct platform_device *dev, pm_message_t state)
-{
-	int i;
-
-	for (i = 0; i < UART_NR; i++) {
-		struct uart_8250_port *up = &serial8250_ports[i];
-
-		if (up->port.type != PORT_UNKNOWN && up->port.dev == &dev->dev)
-			uart_suspend_port(&serial8250_reg, &up->port);
-	}
-
-	return 0;
-}
-
-static int serial8250_resume(struct platform_device *dev)
-{
-	int i;
-
-	for (i = 0; i < UART_NR; i++) {
-		struct uart_8250_port *up = &serial8250_ports[i];
-
-		if (up->port.type != PORT_UNKNOWN && up->port.dev == &dev->dev)
-			serial8250_resume_port(i);
-	}
-
-	return 0;
-}
-
-static struct platform_driver serial8250_isa_driver = {
-	.probe		= serial8250_probe,
-	.remove		= serial8250_remove,
-	.suspend	= serial8250_suspend,
-	.resume		= serial8250_resume,
-	.driver		= {
-		.name	= "serial8250",
-		.owner	= THIS_MODULE,
-	},
-};
-
-/*
- * This "device" covers _all_ ISA 8250-compatible serial devices listed
- * in the table in include/asm/serial.h
- */
-static struct platform_device *serial8250_isa_devs;
-
-/*
- * serial8250_register_8250_port and serial8250_unregister_port allows for
- * 16x50 serial ports to be configured at run-time, to support PCMCIA
- * modems and PCI multiport cards.
- */
-static DEFINE_MUTEX(serial_mutex);
-
-static struct uart_8250_port *serial8250_find_match_or_unused(struct uart_port *port)
-{
-	int i;
-
-	/*
-	 * First, find a port entry which matches.
-	 */
-	for (i = 0; i < nr_uarts; i++)
-		if (uart_match_port(&serial8250_ports[i].port, port))
-			return &serial8250_ports[i];
-
-	/*
-	 * We didn't find a matching entry, so look for the first
-	 * free entry.  We look for one which hasn't been previously
-	 * used (indicated by zero iobase).
-	 */
-	for (i = 0; i < nr_uarts; i++)
-		if (serial8250_ports[i].port.type == PORT_UNKNOWN &&
-		    serial8250_ports[i].port.iobase == 0)
-			return &serial8250_ports[i];
-
-	/*
-	 * That also failed.  Last resort is to find any entry which
-	 * doesn't have a real port associated with it.
-	 */
-	for (i = 0; i < nr_uarts; i++)
-		if (serial8250_ports[i].port.type == PORT_UNKNOWN)
-			return &serial8250_ports[i];
-
-	return NULL;
-}
-
-/**
- *	serial8250_register_8250_port - register a serial port
- *	@up: serial port template
- *
- *	Configure the serial port specified by the request. If the
- *	port exists and is in use, it is hung up and unregistered
- *	first.
- *
- *	The port is then probed and if necessary the IRQ is autodetected
- *	If this fails an error is returned.
- *
- *	On success the port is ready to use and the line number is returned.
- */
-int serial8250_register_8250_port(struct uart_8250_port *up)
-{
-	struct uart_8250_port *uart;
-	int ret = -ENOSPC;
-
-	if (up->port.uartclk == 0)
-		return -EINVAL;
-
-	mutex_lock(&serial_mutex);
-
-	uart = serial8250_find_match_or_unused(&up->port);
-	if (uart && uart->port.type != PORT_8250_CIR) {
-		if (uart->port.dev)
-			uart_remove_one_port(&serial8250_reg, &uart->port);
-
-		uart->port.iobase       = up->port.iobase;
-		uart->port.membase      = up->port.membase;
-		uart->port.irq          = up->port.irq;
-		uart->port.irqflags     = up->port.irqflags;
-		uart->port.uartclk      = up->port.uartclk;
-		uart->port.fifosize     = up->port.fifosize;
-		uart->port.regshift     = up->port.regshift;
-		uart->port.iotype       = up->port.iotype;
-		uart->port.flags        = up->port.flags | UPF_BOOT_AUTOCONF;
-		uart->bugs		= up->bugs;
-		uart->port.mapbase      = up->port.mapbase;
-		uart->port.private_data = up->port.private_data;
-		uart->port.fifosize	= up->port.fifosize;
-		uart->tx_loadsz		= up->tx_loadsz;
-		uart->capabilities	= up->capabilities;
-
-		if (up->port.dev)
-			uart->port.dev = up->port.dev;
-
-		if (up->port.flags & UPF_FIXED_TYPE)
-			serial8250_init_fixed_type_port(uart, up->port.type);
-
-		set_io_from_upio(&uart->port);
-		/* Possibly override default I/O functions.  */
-		if (up->port.serial_in)
-			uart->port.serial_in = up->port.serial_in;
-		if (up->port.serial_out)
-			uart->port.serial_out = up->port.serial_out;
-		if (up->port.handle_irq)
-			uart->port.handle_irq = up->port.handle_irq;
-		/*  Possibly override set_termios call */
-		if (up->port.set_termios)
-			uart->port.set_termios = up->port.set_termios;
-		if (up->port.pm)
-			uart->port.pm = up->port.pm;
-		if (up->port.handle_break)
-			uart->port.handle_break = up->port.handle_break;
-		if (up->dl_read)
-			uart->dl_read = up->dl_read;
-		if (up->dl_write)
-			uart->dl_write = up->dl_write;
-		if (up->dma)
-			uart->dma = up->dma;
-
-		if (serial8250_isa_config != NULL)
-			serial8250_isa_config(0, &uart->port,
-					&uart->capabilities);
-
-		ret = uart_add_one_port(&serial8250_reg, &uart->port);
-		if (ret == 0)
-			ret = uart->port.line;
-	}
-	mutex_unlock(&serial_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL(serial8250_register_8250_port);
-
-/**
- *	serial8250_unregister_port - remove a 16x50 serial port at runtime
- *	@line: serial line number
- *
- *	Remove one serial port.  This may not be called from interrupt
- *	context.  We hand the port back to the our control.
- */
-void serial8250_unregister_port(int line)
-{
-	struct uart_8250_port *uart = &serial8250_ports[line];
-
-	mutex_lock(&serial_mutex);
-	uart_remove_one_port(&serial8250_reg, &uart->port);
-	if (serial8250_isa_devs) {
-		uart->port.flags &= ~UPF_BOOT_AUTOCONF;
-		uart->port.type = PORT_UNKNOWN;
-		uart->port.dev = &serial8250_isa_devs->dev;
-		uart->capabilities = uart_config[uart->port.type].flags;
-		uart_add_one_port(&serial8250_reg, &uart->port);
-	} else {
-		uart->port.dev = NULL;
-	}
-	mutex_unlock(&serial_mutex);
-}
-EXPORT_SYMBOL(serial8250_unregister_port);
-
-static int __init serial8250_init(void)
-{
-	int ret;
-
-	serial8250_isa_init_ports();
-
-	printk(KERN_INFO "Serial: 8250/16550 driver, "
-		"%d ports, IRQ sharing %sabled\n", nr_uarts,
-		share_irqs ? "en" : "dis");
-
-#ifdef CONFIG_SPARC
-	ret = sunserial_register_minors(&serial8250_reg, UART_NR);
-#else
-	serial8250_reg.nr = UART_NR;
-	ret = uart_register_driver(&serial8250_reg);
-#endif
-	if (ret)
-		goto out;
-
-	ret = serial8250_pnp_init();
-	if (ret)
-		goto unreg_uart_drv;
-
-	serial8250_isa_devs = platform_device_alloc("serial8250",
-						    PLAT8250_DEV_LEGACY);
-	if (!serial8250_isa_devs) {
-		ret = -ENOMEM;
-		goto unreg_pnp;
-	}
-
-	ret = platform_device_add(serial8250_isa_devs);
-	if (ret)
-		goto put_dev;
-
-	serial8250_register_ports(&serial8250_reg, &serial8250_isa_devs->dev);
-
-	ret = platform_driver_register(&serial8250_isa_driver);
-	if (ret == 0)
-		goto out;
-
-	platform_device_del(serial8250_isa_devs);
-put_dev:
-	platform_device_put(serial8250_isa_devs);
-unreg_pnp:
-	serial8250_pnp_exit();
-unreg_uart_drv:
-#ifdef CONFIG_SPARC
-	sunserial_unregister_minors(&serial8250_reg, UART_NR);
-#else
-	uart_unregister_driver(&serial8250_reg);
-#endif
-out:
-	return ret;
-}
-
-static void __exit serial8250_exit(void)
-{
-	struct platform_device *isa_dev = serial8250_isa_devs;
-
-	/*
-	 * This tells serial8250_unregister_port() not to re-register
-	 * the ports (thereby making serial8250_isa_driver permanently
-	 * in use.)
-	 */
-	serial8250_isa_devs = NULL;
-
-	platform_driver_unregister(&serial8250_isa_driver);
-	platform_device_unregister(isa_dev);
-
-	serial8250_pnp_exit();
-
-#ifdef CONFIG_SPARC
-	sunserial_unregister_minors(&serial8250_reg, UART_NR);
-#else
-	uart_unregister_driver(&serial8250_reg);
-#endif
-}
-
-module_init(serial8250_init);
-module_exit(serial8250_exit);
-
-EXPORT_SYMBOL(serial8250_suspend_port);
-EXPORT_SYMBOL(serial8250_resume_port);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Generic 8250/16x50 serial driver");
-
-module_param(share_irqs, uint, 0644);
-MODULE_PARM_DESC(share_irqs, "Share IRQs with other non-8250/16x50 devices"
-	" (unsafe)");
-
-module_param(nr_uarts, uint, 0644);
-MODULE_PARM_DESC(nr_uarts, "Maximum number of UARTs supported. (1-" __MODULE_STRING(CONFIG_SERIAL_8250_NR_UARTS) ")");
-
-module_param(skip_txen_test, uint, 0644);
-MODULE_PARM_DESC(skip_txen_test, "Skip checking for the TXEN bug at init time");
-
-#ifdef CONFIG_SERIAL_8250_RSA
-module_param_array(probe_rsa, ulong, &probe_rsa_count, 0444);
-MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA");
-#endif
-MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR);
-
-#ifndef MODULE
-/* This module was renamed to 8250_core in 3.7.  Keep the old "8250" name
- * working as well for the module options so we don't break people.  We
- * need to keep the names identical and the convenient macros will happily
- * refuse to let us do that by failing the build with redefinition errors
- * of global variables.  So we stick them inside a dummy function to avoid
- * those conflicts.  The options still get parsed, and the redefined
- * MODULE_PARAM_PREFIX lets us keep the "8250." syntax alive.
- *
- * This is hacky.  I'm sorry.
- */
-static void __used s8250_options(void)
-{
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX "8250."
-
-	module_param_cb(share_irqs, &param_ops_uint, &share_irqs, 0644);
-	module_param_cb(nr_uarts, &param_ops_uint, &nr_uarts, 0644);
-	module_param_cb(skip_txen_test, &param_ops_uint, &skip_txen_test, 0644);
-#ifdef CONFIG_SERIAL_8250_RSA
-	__module_param_call(MODULE_PARAM_PREFIX, probe_rsa,
-		&param_array_ops, .arr = &__param_arr_probe_rsa,
-		0444, -1);
-#endif
-}
-#else
-MODULE_ALIAS("8250");
-#endif
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
new file mode 100644
index 000000000000..2d563cb9057e
--- /dev/null
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -0,0 +1,3448 @@
+/*
+ *  Driver for 8250/16550-type serial ports
+ *
+ *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
+ *
+ *  Copyright (C) 2001 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * A note about mapbase / membase
+ *
+ *  mapbase is the physical address of the IO port.
+ *  membase is an 'ioremapped' cookie.
+ */
+
+#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/tty.h>
+#include <linux/ratelimit.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/nmi.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#ifdef CONFIG_SPARC
+#include <linux/sunserialcore.h>
+#endif
+
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include "8250.h"
+
+/*
+ * Configuration:
+ *   share_irqs - whether we pass IRQF_SHARED to request_irq().  This option
+ *                is unsafe when used on edge-triggered interrupts.
+ */
+static unsigned int share_irqs = SERIAL8250_SHARE_IRQS;
+
+static unsigned int nr_uarts = CONFIG_SERIAL_8250_RUNTIME_UARTS;
+
+static struct uart_driver serial8250_reg;
+
+static int serial_index(struct uart_port *port)
+{
+	return (serial8250_reg.minor - 64) + port->line;
+}
+
+static unsigned int skip_txen_test; /* force skip of txen test at init time */
+
+/*
+ * Debugging.
+ */
+#if 0
+#define DEBUG_AUTOCONF(fmt...)	printk(fmt)
+#else
+#define DEBUG_AUTOCONF(fmt...)	do { } while (0)
+#endif
+
+#if 0
+#define DEBUG_INTR(fmt...)	printk(fmt)
+#else
+#define DEBUG_INTR(fmt...)	do { } while (0)
+#endif
+
+#define PASS_LIMIT	512
+
+#define BOTH_EMPTY 	(UART_LSR_TEMT | UART_LSR_THRE)
+
+
+#ifdef CONFIG_SERIAL_8250_DETECT_IRQ
+#define CONFIG_SERIAL_DETECT_IRQ 1
+#endif
+#ifdef CONFIG_SERIAL_8250_MANY_PORTS
+#define CONFIG_SERIAL_MANY_PORTS 1
+#endif
+
+/*
+ * HUB6 is always on.  This will be removed once the header
+ * files have been cleaned.
+ */
+#define CONFIG_HUB6 1
+
+#include <asm/serial.h>
+/*
+ * SERIAL_PORT_DFNS tells us about built-in ports that have no
+ * standard enumeration mechanism.   Platforms that can find all
+ * serial ports via mechanisms like ACPI or PCI need not supply it.
+ */
+#ifndef SERIAL_PORT_DFNS
+#define SERIAL_PORT_DFNS
+#endif
+
+static const struct old_serial_port old_serial_port[] = {
+	SERIAL_PORT_DFNS /* defined in asm/serial.h */
+};
+
+#define UART_NR	CONFIG_SERIAL_8250_NR_UARTS
+
+#ifdef CONFIG_SERIAL_8250_RSA
+
+#define PORT_RSA_MAX 4
+static unsigned long probe_rsa[PORT_RSA_MAX];
+static unsigned int probe_rsa_count;
+#endif /* CONFIG_SERIAL_8250_RSA  */
+
+struct irq_info {
+	struct			hlist_node node;
+	int			irq;
+	spinlock_t		lock;	/* Protects list not the hash */
+	struct list_head	*head;
+};
+
+#define NR_IRQ_HASH		32	/* Can be adjusted later */
+static struct hlist_head irq_lists[NR_IRQ_HASH];
+static DEFINE_MUTEX(hash_mutex);	/* Used to walk the hash */
+
+/*
+ * Here we define the default xmit fifo size used for each type of UART.
+ */
+static const struct serial8250_config uart_config[] = {
+	[PORT_UNKNOWN] = {
+		.name		= "unknown",
+		.fifo_size	= 1,
+		.tx_loadsz	= 1,
+	},
+	[PORT_8250] = {
+		.name		= "8250",
+		.fifo_size	= 1,
+		.tx_loadsz	= 1,
+	},
+	[PORT_16450] = {
+		.name		= "16450",
+		.fifo_size	= 1,
+		.tx_loadsz	= 1,
+	},
+	[PORT_16550] = {
+		.name		= "16550",
+		.fifo_size	= 1,
+		.tx_loadsz	= 1,
+	},
+	[PORT_16550A] = {
+		.name		= "16550A",
+		.fifo_size	= 16,
+		.tx_loadsz	= 16,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO,
+	},
+	[PORT_CIRRUS] = {
+		.name		= "Cirrus",
+		.fifo_size	= 1,
+		.tx_loadsz	= 1,
+	},
+	[PORT_16650] = {
+		.name		= "ST16650",
+		.fifo_size	= 1,
+		.tx_loadsz	= 1,
+		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
+	},
+	[PORT_16650V2] = {
+		.name		= "ST16650V2",
+		.fifo_size	= 32,
+		.tx_loadsz	= 16,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
+				  UART_FCR_T_TRIG_00,
+		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
+	},
+	[PORT_16750] = {
+		.name		= "TI16750",
+		.fifo_size	= 64,
+		.tx_loadsz	= 64,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
+				  UART_FCR7_64BYTE,
+		.flags		= UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE,
+	},
+	[PORT_STARTECH] = {
+		.name		= "Startech",
+		.fifo_size	= 1,
+		.tx_loadsz	= 1,
+	},
+	[PORT_16C950] = {
+		.name		= "16C950/954",
+		.fifo_size	= 128,
+		.tx_loadsz	= 128,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		/* UART_CAP_EFR breaks billionon CF bluetooth card. */
+		.flags		= UART_CAP_FIFO | UART_CAP_SLEEP,
+	},
+	[PORT_16654] = {
+		.name		= "ST16654",
+		.fifo_size	= 64,
+		.tx_loadsz	= 32,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
+				  UART_FCR_T_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
+	},
+	[PORT_16850] = {
+		.name		= "XR16850",
+		.fifo_size	= 128,
+		.tx_loadsz	= 128,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
+	},
+	[PORT_RSA] = {
+		.name		= "RSA",
+		.fifo_size	= 2048,
+		.tx_loadsz	= 2048,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11,
+		.flags		= UART_CAP_FIFO,
+	},
+	[PORT_NS16550A] = {
+		.name		= "NS16550A",
+		.fifo_size	= 16,
+		.tx_loadsz	= 16,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_NATSEMI,
+	},
+	[PORT_XSCALE] = {
+		.name		= "XScale",
+		.fifo_size	= 32,
+		.tx_loadsz	= 32,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE,
+	},
+	[PORT_OCTEON] = {
+		.name		= "OCTEON",
+		.fifo_size	= 64,
+		.tx_loadsz	= 64,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO,
+	},
+	[PORT_AR7] = {
+		.name		= "AR7",
+		.fifo_size	= 16,
+		.tx_loadsz	= 16,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
+	[PORT_U6_16550A] = {
+		.name		= "U6_16550A",
+		.fifo_size	= 64,
+		.tx_loadsz	= 64,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
+	[PORT_TEGRA] = {
+		.name		= "Tegra",
+		.fifo_size	= 32,
+		.tx_loadsz	= 8,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
+				  UART_FCR_T_TRIG_01,
+		.flags		= UART_CAP_FIFO | UART_CAP_RTOIE,
+	},
+	[PORT_XR17D15X] = {
+		.name		= "XR17D15X",
+		.fifo_size	= 64,
+		.tx_loadsz	= 64,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR |
+				  UART_CAP_SLEEP,
+	},
+	[PORT_XR17V35X] = {
+		.name		= "XR17V35X",
+		.fifo_size	= 256,
+		.tx_loadsz	= 256,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 |
+				  UART_FCR_T_TRIG_11,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR |
+				  UART_CAP_SLEEP,
+	},
+	[PORT_LPC3220] = {
+		.name		= "LPC3220",
+		.fifo_size	= 64,
+		.tx_loadsz	= 32,
+		.fcr		= UART_FCR_DMA_SELECT | UART_FCR_ENABLE_FIFO |
+				  UART_FCR_R_TRIG_00 | UART_FCR_T_TRIG_00,
+		.flags		= UART_CAP_FIFO,
+	},
+	[PORT_BRCM_TRUMANAGE] = {
+		.name		= "TruManage",
+		.fifo_size	= 1,
+		.tx_loadsz	= 1024,
+		.flags		= UART_CAP_HFIFO,
+	},
+	[PORT_8250_CIR] = {
+		.name		= "CIR port"
+	},
+	[PORT_ALTR_16550_F32] = {
+		.name		= "Altera 16550 FIFO32",
+		.fifo_size	= 32,
+		.tx_loadsz	= 32,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
+	[PORT_ALTR_16550_F64] = {
+		.name		= "Altera 16550 FIFO64",
+		.fifo_size	= 64,
+		.tx_loadsz	= 64,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
+	[PORT_ALTR_16550_F128] = {
+		.name		= "Altera 16550 FIFO128",
+		.fifo_size	= 128,
+		.tx_loadsz	= 128,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+	},
+};
+
+/* Uart divisor latch read */
+static int default_serial_dl_read(struct uart_8250_port *up)
+{
+	return serial_in(up, UART_DLL) | serial_in(up, UART_DLM) << 8;
+}
+
+/* Uart divisor latch write */
+static void default_serial_dl_write(struct uart_8250_port *up, int value)
+{
+	serial_out(up, UART_DLL, value & 0xff);
+	serial_out(up, UART_DLM, value >> 8 & 0xff);
+}
+
+#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X)
+
+/* Au1x00/RT288x UART hardware has a weird register layout */
+static const u8 au_io_in_map[] = {
+	[UART_RX]  = 0,
+	[UART_IER] = 2,
+	[UART_IIR] = 3,
+	[UART_LCR] = 5,
+	[UART_MCR] = 6,
+	[UART_LSR] = 7,
+	[UART_MSR] = 8,
+};
+
+static const u8 au_io_out_map[] = {
+	[UART_TX]  = 1,
+	[UART_IER] = 2,
+	[UART_FCR] = 4,
+	[UART_LCR] = 5,
+	[UART_MCR] = 6,
+};
+
+static unsigned int au_serial_in(struct uart_port *p, int offset)
+{
+	offset = au_io_in_map[offset] << p->regshift;
+	return __raw_readl(p->membase + offset);
+}
+
+static void au_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = au_io_out_map[offset] << p->regshift;
+	__raw_writel(value, p->membase + offset);
+}
+
+/* Au1x00 haven't got a standard divisor latch */
+static int au_serial_dl_read(struct uart_8250_port *up)
+{
+	return __raw_readl(up->port.membase + 0x28);
+}
+
+static void au_serial_dl_write(struct uart_8250_port *up, int value)
+{
+	__raw_writel(value, up->port.membase + 0x28);
+}
+
+#endif
+
+static unsigned int hub6_serial_in(struct uart_port *p, int offset)
+{
+	offset = offset << p->regshift;
+	outb(p->hub6 - 1 + offset, p->iobase);
+	return inb(p->iobase + 1);
+}
+
+static void hub6_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = offset << p->regshift;
+	outb(p->hub6 - 1 + offset, p->iobase);
+	outb(value, p->iobase + 1);
+}
+
+static unsigned int mem_serial_in(struct uart_port *p, int offset)
+{
+	offset = offset << p->regshift;
+	return readb(p->membase + offset);
+}
+
+static void mem_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = offset << p->regshift;
+	writeb(value, p->membase + offset);
+}
+
+static void mem32_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = offset << p->regshift;
+	writel(value, p->membase + offset);
+}
+
+static unsigned int mem32_serial_in(struct uart_port *p, int offset)
+{
+	offset = offset << p->regshift;
+	return readl(p->membase + offset);
+}
+
+static unsigned int io_serial_in(struct uart_port *p, int offset)
+{
+	offset = offset << p->regshift;
+	return inb(p->iobase + offset);
+}
+
+static void io_serial_out(struct uart_port *p, int offset, int value)
+{
+	offset = offset << p->regshift;
+	outb(value, p->iobase + offset);
+}
+
+static int serial8250_default_handle_irq(struct uart_port *port);
+static int exar_handle_irq(struct uart_port *port);
+
+static void set_io_from_upio(struct uart_port *p)
+{
+	struct uart_8250_port *up =
+		container_of(p, struct uart_8250_port, port);
+
+	up->dl_read = default_serial_dl_read;
+	up->dl_write = default_serial_dl_write;
+
+	switch (p->iotype) {
+	case UPIO_HUB6:
+		p->serial_in = hub6_serial_in;
+		p->serial_out = hub6_serial_out;
+		break;
+
+	case UPIO_MEM:
+		p->serial_in = mem_serial_in;
+		p->serial_out = mem_serial_out;
+		break;
+
+	case UPIO_MEM32:
+		p->serial_in = mem32_serial_in;
+		p->serial_out = mem32_serial_out;
+		break;
+
+#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X)
+	case UPIO_AU:
+		p->serial_in = au_serial_in;
+		p->serial_out = au_serial_out;
+		up->dl_read = au_serial_dl_read;
+		up->dl_write = au_serial_dl_write;
+		break;
+#endif
+
+	default:
+		p->serial_in = io_serial_in;
+		p->serial_out = io_serial_out;
+		break;
+	}
+	/* Remember loaded iotype */
+	up->cur_iotype = p->iotype;
+	p->handle_irq = serial8250_default_handle_irq;
+}
+
+static void
+serial_port_out_sync(struct uart_port *p, int offset, int value)
+{
+	switch (p->iotype) {
+	case UPIO_MEM:
+	case UPIO_MEM32:
+	case UPIO_AU:
+		p->serial_out(p, offset, value);
+		p->serial_in(p, UART_LCR);	/* safe, no side-effects */
+		break;
+	default:
+		p->serial_out(p, offset, value);
+	}
+}
+
+/*
+ * For the 16C950
+ */
+static void serial_icr_write(struct uart_8250_port *up, int offset, int value)
+{
+	serial_out(up, UART_SCR, offset);
+	serial_out(up, UART_ICR, value);
+}
+
+static unsigned int serial_icr_read(struct uart_8250_port *up, int offset)
+{
+	unsigned int value;
+
+	serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD);
+	serial_out(up, UART_SCR, offset);
+	value = serial_in(up, UART_ICR);
+	serial_icr_write(up, UART_ACR, up->acr);
+
+	return value;
+}
+
+/*
+ * FIFO support.
+ */
+static void serial8250_clear_fifos(struct uart_8250_port *p)
+{
+	if (p->capabilities & UART_CAP_FIFO) {
+		serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO);
+		serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO |
+			       UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
+		serial_out(p, UART_FCR, 0);
+	}
+}
+
+void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p)
+{
+	unsigned char fcr;
+
+	serial8250_clear_fifos(p);
+	fcr = uart_config[p->port.type].fcr;
+	serial_out(p, UART_FCR, fcr);
+}
+EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos);
+
+/*
+ * IER sleep support.  UARTs which have EFRs need the "extended
+ * capability" bit enabled.  Note that on XR16C850s, we need to
+ * reset LCR to write to IER.
+ */
+static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
+{
+	/*
+	 * Exar UARTs have a SLEEP register that enables or disables
+	 * each UART to enter sleep mode separately.  On the XR17V35x the
+	 * register is accessible to each UART at the UART_EXAR_SLEEP
+	 * offset but the UART channel may only write to the corresponding
+	 * bit.
+	 */
+	if ((p->port.type == PORT_XR17V35X) ||
+	   (p->port.type == PORT_XR17D15X)) {
+		serial_out(p, UART_EXAR_SLEEP, 0xff);
+		return;
+	}
+
+	if (p->capabilities & UART_CAP_SLEEP) {
+		if (p->capabilities & UART_CAP_EFR) {
+			serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
+			serial_out(p, UART_EFR, UART_EFR_ECB);
+			serial_out(p, UART_LCR, 0);
+		}
+		serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
+		if (p->capabilities & UART_CAP_EFR) {
+			serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
+			serial_out(p, UART_EFR, 0);
+			serial_out(p, UART_LCR, 0);
+		}
+	}
+}
+
+#ifdef CONFIG_SERIAL_8250_RSA
+/*
+ * Attempts to turn on the RSA FIFO.  Returns zero on failure.
+ * We set the port uart clock rate if we succeed.
+ */
+static int __enable_rsa(struct uart_8250_port *up)
+{
+	unsigned char mode;
+	int result;
+
+	mode = serial_in(up, UART_RSA_MSR);
+	result = mode & UART_RSA_MSR_FIFO;
+
+	if (!result) {
+		serial_out(up, UART_RSA_MSR, mode | UART_RSA_MSR_FIFO);
+		mode = serial_in(up, UART_RSA_MSR);
+		result = mode & UART_RSA_MSR_FIFO;
+	}
+
+	if (result)
+		up->port.uartclk = SERIAL_RSA_BAUD_BASE * 16;
+
+	return result;
+}
+
+static void enable_rsa(struct uart_8250_port *up)
+{
+	if (up->port.type == PORT_RSA) {
+		if (up->port.uartclk != SERIAL_RSA_BAUD_BASE * 16) {
+			spin_lock_irq(&up->port.lock);
+			__enable_rsa(up);
+			spin_unlock_irq(&up->port.lock);
+		}
+		if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16)
+			serial_out(up, UART_RSA_FRR, 0);
+	}
+}
+
+/*
+ * Attempts to turn off the RSA FIFO.  Returns zero on failure.
+ * It is unknown why interrupts were disabled in here.  However,
+ * the caller is expected to preserve this behaviour by grabbing
+ * the spinlock before calling this function.
+ */
+static void disable_rsa(struct uart_8250_port *up)
+{
+	unsigned char mode;
+	int result;
+
+	if (up->port.type == PORT_RSA &&
+	    up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) {
+		spin_lock_irq(&up->port.lock);
+
+		mode = serial_in(up, UART_RSA_MSR);
+		result = !(mode & UART_RSA_MSR_FIFO);
+
+		if (!result) {
+			serial_out(up, UART_RSA_MSR, mode & ~UART_RSA_MSR_FIFO);
+			mode = serial_in(up, UART_RSA_MSR);
+			result = !(mode & UART_RSA_MSR_FIFO);
+		}
+
+		if (result)
+			up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16;
+		spin_unlock_irq(&up->port.lock);
+	}
+}
+#endif /* CONFIG_SERIAL_8250_RSA */
+
+/*
+ * This is a quickie test to see how big the FIFO is.
+ * It doesn't work at all the time, more's the pity.
+ */
+static int size_fifo(struct uart_8250_port *up)
+{
+	unsigned char old_fcr, old_mcr, old_lcr;
+	unsigned short old_dl;
+	int count;
+
+	old_lcr = serial_in(up, UART_LCR);
+	serial_out(up, UART_LCR, 0);
+	old_fcr = serial_in(up, UART_FCR);
+	old_mcr = serial_in(up, UART_MCR);
+	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
+		    UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
+	serial_out(up, UART_MCR, UART_MCR_LOOP);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
+	old_dl = serial_dl_read(up);
+	serial_dl_write(up, 0x0001);
+	serial_out(up, UART_LCR, 0x03);
+	for (count = 0; count < 256; count++)
+		serial_out(up, UART_TX, count);
+	mdelay(20);/* FIXME - schedule_timeout */
+	for (count = 0; (serial_in(up, UART_LSR) & UART_LSR_DR) &&
+	     (count < 256); count++)
+		serial_in(up, UART_RX);
+	serial_out(up, UART_FCR, old_fcr);
+	serial_out(up, UART_MCR, old_mcr);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
+	serial_dl_write(up, old_dl);
+	serial_out(up, UART_LCR, old_lcr);
+
+	return count;
+}
+
+/*
+ * Read UART ID using the divisor method - set DLL and DLM to zero
+ * and the revision will be in DLL and device type in DLM.  We
+ * preserve the device state across this.
+ */
+static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p)
+{
+	unsigned char old_dll, old_dlm, old_lcr;
+	unsigned int id;
+
+	old_lcr = serial_in(p, UART_LCR);
+	serial_out(p, UART_LCR, UART_LCR_CONF_MODE_A);
+
+	old_dll = serial_in(p, UART_DLL);
+	old_dlm = serial_in(p, UART_DLM);
+
+	serial_out(p, UART_DLL, 0);
+	serial_out(p, UART_DLM, 0);
+
+	id = serial_in(p, UART_DLL) | serial_in(p, UART_DLM) << 8;
+
+	serial_out(p, UART_DLL, old_dll);
+	serial_out(p, UART_DLM, old_dlm);
+	serial_out(p, UART_LCR, old_lcr);
+
+	return id;
+}
+
+/*
+ * This is a helper routine to autodetect StarTech/Exar/Oxsemi UART's.
+ * When this function is called we know it is at least a StarTech
+ * 16650 V2, but it might be one of several StarTech UARTs, or one of
+ * its clones.  (We treat the broken original StarTech 16650 V1 as a
+ * 16550, and why not?  Startech doesn't seem to even acknowledge its
+ * existence.)
+ *
+ * What evil have men's minds wrought...
+ */
+static void autoconfig_has_efr(struct uart_8250_port *up)
+{
+	unsigned int id1, id2, id3, rev;
+
+	/*
+	 * Everything with an EFR has SLEEP
+	 */
+	up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP;
+
+	/*
+	 * First we check to see if it's an Oxford Semiconductor UART.
+	 *
+	 * If we have to do this here because some non-National
+	 * Semiconductor clone chips lock up if you try writing to the
+	 * LSR register (which serial_icr_read does)
+	 */
+
+	/*
+	 * Check for Oxford Semiconductor 16C950.
+	 *
+	 * EFR [4] must be set else this test fails.
+	 *
+	 * This shouldn't be necessary, but Mike Hudson (Exoray@isys.ca)
+	 * claims that it's needed for 952 dual UART's (which are not
+	 * recommended for new designs).
+	 */
+	up->acr = 0;
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+	serial_out(up, UART_EFR, UART_EFR_ECB);
+	serial_out(up, UART_LCR, 0x00);
+	id1 = serial_icr_read(up, UART_ID1);
+	id2 = serial_icr_read(up, UART_ID2);
+	id3 = serial_icr_read(up, UART_ID3);
+	rev = serial_icr_read(up, UART_REV);
+
+	DEBUG_AUTOCONF("950id=%02x:%02x:%02x:%02x ", id1, id2, id3, rev);
+
+	if (id1 == 0x16 && id2 == 0xC9 &&
+	    (id3 == 0x50 || id3 == 0x52 || id3 == 0x54)) {
+		up->port.type = PORT_16C950;
+
+		/*
+		 * Enable work around for the Oxford Semiconductor 952 rev B
+		 * chip which causes it to seriously miscalculate baud rates
+		 * when DLL is 0.
+		 */
+		if (id3 == 0x52 && rev == 0x01)
+			up->bugs |= UART_BUG_QUOT;
+		return;
+	}
+
+	/*
+	 * We check for a XR16C850 by setting DLL and DLM to 0, and then
+	 * reading back DLL and DLM.  The chip type depends on the DLM
+	 * value read back:
+	 *  0x10 - XR16C850 and the DLL contains the chip revision.
+	 *  0x12 - XR16C2850.
+	 *  0x14 - XR16C854.
+	 */
+	id1 = autoconfig_read_divisor_id(up);
+	DEBUG_AUTOCONF("850id=%04x ", id1);
+
+	id2 = id1 >> 8;
+	if (id2 == 0x10 || id2 == 0x12 || id2 == 0x14) {
+		up->port.type = PORT_16850;
+		return;
+	}
+
+	/*
+	 * It wasn't an XR16C850.
+	 *
+	 * We distinguish between the '654 and the '650 by counting
+	 * how many bytes are in the FIFO.  I'm using this for now,
+	 * since that's the technique that was sent to me in the
+	 * serial driver update, but I'm not convinced this works.
+	 * I've had problems doing this in the past.  -TYT
+	 */
+	if (size_fifo(up) == 64)
+		up->port.type = PORT_16654;
+	else
+		up->port.type = PORT_16650V2;
+}
+
+/*
+ * We detected a chip without a FIFO.  Only two fall into
+ * this category - the original 8250 and the 16450.  The
+ * 16450 has a scratch register (accessible with LCR=0)
+ */
+static void autoconfig_8250(struct uart_8250_port *up)
+{
+	unsigned char scratch, status1, status2;
+
+	up->port.type = PORT_8250;
+
+	scratch = serial_in(up, UART_SCR);
+	serial_out(up, UART_SCR, 0xa5);
+	status1 = serial_in(up, UART_SCR);
+	serial_out(up, UART_SCR, 0x5a);
+	status2 = serial_in(up, UART_SCR);
+	serial_out(up, UART_SCR, scratch);
+
+	if (status1 == 0xa5 && status2 == 0x5a)
+		up->port.type = PORT_16450;
+}
+
+static int broken_efr(struct uart_8250_port *up)
+{
+	/*
+	 * Exar ST16C2550 "A2" devices incorrectly detect as
+	 * having an EFR, and report an ID of 0x0201.  See
+	 * http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-11/4812.html 
+	 */
+	if (autoconfig_read_divisor_id(up) == 0x0201 && size_fifo(up) == 16)
+		return 1;
+
+	return 0;
+}
+
+static inline int ns16550a_goto_highspeed(struct uart_8250_port *up)
+{
+	unsigned char status;
+
+	status = serial_in(up, 0x04); /* EXCR2 */
+#define PRESL(x) ((x) & 0x30)
+	if (PRESL(status) == 0x10) {
+		/* already in high speed mode */
+		return 0;
+	} else {
+		status &= ~0xB0; /* Disable LOCK, mask out PRESL[01] */
+		status |= 0x10;  /* 1.625 divisor for baud_base --> 921600 */
+		serial_out(up, 0x04, status);
+	}
+	return 1;
+}
+
+/*
+ * We know that the chip has FIFOs.  Does it have an EFR?  The
+ * EFR is located in the same register position as the IIR and
+ * we know the top two bits of the IIR are currently set.  The
+ * EFR should contain zero.  Try to read the EFR.
+ */
+static void autoconfig_16550a(struct uart_8250_port *up)
+{
+	unsigned char status1, status2;
+	unsigned int iersave;
+
+	up->port.type = PORT_16550A;
+	up->capabilities |= UART_CAP_FIFO;
+
+	/*
+	 * XR17V35x UARTs have an extra divisor register, DLD
+	 * that gets enabled with when DLAB is set which will
+	 * cause the device to incorrectly match and assign
+	 * port type to PORT_16650.  The EFR for this UART is
+	 * found at offset 0x09. Instead check the Deice ID (DVID)
+	 * register for a 2, 4 or 8 port UART.
+	 */
+	if (up->port.flags & UPF_EXAR_EFR) {
+		status1 = serial_in(up, UART_EXAR_DVID);
+		if (status1 == 0x82 || status1 == 0x84 || status1 == 0x88) {
+			DEBUG_AUTOCONF("Exar XR17V35x ");
+			up->port.type = PORT_XR17V35X;
+			up->capabilities |= UART_CAP_AFE | UART_CAP_EFR |
+						UART_CAP_SLEEP;
+
+			return;
+		}
+
+	}
+
+	/*
+	 * Check for presence of the EFR when DLAB is set.
+	 * Only ST16C650V1 UARTs pass this test.
+	 */
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
+	if (serial_in(up, UART_EFR) == 0) {
+		serial_out(up, UART_EFR, 0xA8);
+		if (serial_in(up, UART_EFR) != 0) {
+			DEBUG_AUTOCONF("EFRv1 ");
+			up->port.type = PORT_16650;
+			up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP;
+		} else {
+			DEBUG_AUTOCONF("Motorola 8xxx DUART ");
+		}
+		serial_out(up, UART_EFR, 0);
+		return;
+	}
+
+	/*
+	 * Maybe it requires 0xbf to be written to the LCR.
+	 * (other ST16C650V2 UARTs, TI16C752A, etc)
+	 */
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+	if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) {
+		DEBUG_AUTOCONF("EFRv2 ");
+		autoconfig_has_efr(up);
+		return;
+	}
+
+	/*
+	 * Check for a National Semiconductor SuperIO chip.
+	 * Attempt to switch to bank 2, read the value of the LOOP bit
+	 * from EXCR1. Switch back to bank 0, change it in MCR. Then
+	 * switch back to bank 2, read it from EXCR1 again and check
+	 * it's changed. If so, set baud_base in EXCR2 to 921600. -- dwmw2
+	 */
+	serial_out(up, UART_LCR, 0);
+	status1 = serial_in(up, UART_MCR);
+	serial_out(up, UART_LCR, 0xE0);
+	status2 = serial_in(up, 0x02); /* EXCR1 */
+
+	if (!((status2 ^ status1) & UART_MCR_LOOP)) {
+		serial_out(up, UART_LCR, 0);
+		serial_out(up, UART_MCR, status1 ^ UART_MCR_LOOP);
+		serial_out(up, UART_LCR, 0xE0);
+		status2 = serial_in(up, 0x02); /* EXCR1 */
+		serial_out(up, UART_LCR, 0);
+		serial_out(up, UART_MCR, status1);
+
+		if ((status2 ^ status1) & UART_MCR_LOOP) {
+			unsigned short quot;
+
+			serial_out(up, UART_LCR, 0xE0);
+
+			quot = serial_dl_read(up);
+			quot <<= 3;
+
+			if (ns16550a_goto_highspeed(up))
+				serial_dl_write(up, quot);
+
+			serial_out(up, UART_LCR, 0);
+
+			up->port.uartclk = 921600*16;
+			up->port.type = PORT_NS16550A;
+			up->capabilities |= UART_NATSEMI;
+			return;
+		}
+	}
+
+	/*
+	 * No EFR.  Try to detect a TI16750, which only sets bit 5 of
+	 * the IIR when 64 byte FIFO mode is enabled when DLAB is set.
+	 * Try setting it with and without DLAB set.  Cheap clones
+	 * set bit 5 without DLAB set.
+	 */
+	serial_out(up, UART_LCR, 0);
+	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
+	status1 = serial_in(up, UART_IIR) >> 5;
+	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
+	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
+	status2 = serial_in(up, UART_IIR) >> 5;
+	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
+	serial_out(up, UART_LCR, 0);
+
+	DEBUG_AUTOCONF("iir1=%d iir2=%d ", status1, status2);
+
+	if (status1 == 6 && status2 == 7) {
+		up->port.type = PORT_16750;
+		up->capabilities |= UART_CAP_AFE | UART_CAP_SLEEP;
+		return;
+	}
+
+	/*
+	 * Try writing and reading the UART_IER_UUE bit (b6).
+	 * If it works, this is probably one of the Xscale platform's
+	 * internal UARTs.
+	 * We're going to explicitly set the UUE bit to 0 before
+	 * trying to write and read a 1 just to make sure it's not
+	 * already a 1 and maybe locked there before we even start start.
+	 */
+	iersave = serial_in(up, UART_IER);
+	serial_out(up, UART_IER, iersave & ~UART_IER_UUE);
+	if (!(serial_in(up, UART_IER) & UART_IER_UUE)) {
+		/*
+		 * OK it's in a known zero state, try writing and reading
+		 * without disturbing the current state of the other bits.
+		 */
+		serial_out(up, UART_IER, iersave | UART_IER_UUE);
+		if (serial_in(up, UART_IER) & UART_IER_UUE) {
+			/*
+			 * It's an Xscale.
+			 * We'll leave the UART_IER_UUE bit set to 1 (enabled).
+			 */
+			DEBUG_AUTOCONF("Xscale ");
+			up->port.type = PORT_XSCALE;
+			up->capabilities |= UART_CAP_UUE | UART_CAP_RTOIE;
+			return;
+		}
+	} else {
+		/*
+		 * If we got here we couldn't force the IER_UUE bit to 0.
+		 * Log it and continue.
+		 */
+		DEBUG_AUTOCONF("Couldn't force IER_UUE to 0 ");
+	}
+	serial_out(up, UART_IER, iersave);
+
+	/*
+	 * Exar uarts have EFR in a weird location
+	 */
+	if (up->port.flags & UPF_EXAR_EFR) {
+		DEBUG_AUTOCONF("Exar XR17D15x ");
+		up->port.type = PORT_XR17D15X;
+		up->capabilities |= UART_CAP_AFE | UART_CAP_EFR |
+				    UART_CAP_SLEEP;
+
+		return;
+	}
+
+	/*
+	 * We distinguish between 16550A and U6 16550A by counting
+	 * how many bytes are in the FIFO.
+	 */
+	if (up->port.type == PORT_16550A && size_fifo(up) == 64) {
+		up->port.type = PORT_U6_16550A;
+		up->capabilities |= UART_CAP_AFE;
+	}
+}
+
+/*
+ * This routine is called by rs_init() to initialize a specific serial
+ * port.  It determines what type of UART chip this serial port is
+ * using: 8250, 16450, 16550, 16550A.  The important question is
+ * whether or not this UART is a 16550A or not, since this will
+ * determine whether or not we can use its FIFO features or not.
+ */
+static void autoconfig(struct uart_8250_port *up, unsigned int probeflags)
+{
+	unsigned char status1, scratch, scratch2, scratch3;
+	unsigned char save_lcr, save_mcr;
+	struct uart_port *port = &up->port;
+	unsigned long flags;
+	unsigned int old_capabilities;
+
+	if (!port->iobase && !port->mapbase && !port->membase)
+		return;
+
+	DEBUG_AUTOCONF("ttyS%d: autoconf (0x%04lx, 0x%p): ",
+		       serial_index(port), port->iobase, port->membase);
+
+	/*
+	 * We really do need global IRQs disabled here - we're going to
+	 * be frobbing the chips IRQ enable register to see if it exists.
+	 */
+	spin_lock_irqsave(&port->lock, flags);
+
+	up->capabilities = 0;
+	up->bugs = 0;
+
+	if (!(port->flags & UPF_BUGGY_UART)) {
+		/*
+		 * Do a simple existence test first; if we fail this,
+		 * there's no point trying anything else.
+		 *
+		 * 0x80 is used as a nonsense port to prevent against
+		 * false positives due to ISA bus float.  The
+		 * assumption is that 0x80 is a non-existent port;
+		 * which should be safe since include/asm/io.h also
+		 * makes this assumption.
+		 *
+		 * Note: this is safe as long as MCR bit 4 is clear
+		 * and the device is in "PC" mode.
+		 */
+		scratch = serial_in(up, UART_IER);
+		serial_out(up, UART_IER, 0);
+#ifdef __i386__
+		outb(0xff, 0x080);
+#endif
+		/*
+		 * Mask out IER[7:4] bits for test as some UARTs (e.g. TL
+		 * 16C754B) allow only to modify them if an EFR bit is set.
+		 */
+		scratch2 = serial_in(up, UART_IER) & 0x0f;
+		serial_out(up, UART_IER, 0x0F);
+#ifdef __i386__
+		outb(0, 0x080);
+#endif
+		scratch3 = serial_in(up, UART_IER) & 0x0f;
+		serial_out(up, UART_IER, scratch);
+		if (scratch2 != 0 || scratch3 != 0x0F) {
+			/*
+			 * We failed; there's nothing here
+			 */
+			spin_unlock_irqrestore(&port->lock, flags);
+			DEBUG_AUTOCONF("IER test failed (%02x, %02x) ",
+				       scratch2, scratch3);
+			goto out;
+		}
+	}
+
+	save_mcr = serial_in(up, UART_MCR);
+	save_lcr = serial_in(up, UART_LCR);
+
+	/*
+	 * Check to see if a UART is really there.  Certain broken
+	 * internal modems based on the Rockwell chipset fail this
+	 * test, because they apparently don't implement the loopback
+	 * test mode.  So this test is skipped on the COM 1 through
+	 * COM 4 ports.  This *should* be safe, since no board
+	 * manufacturer would be stupid enough to design a board
+	 * that conflicts with COM 1-4 --- we hope!
+	 */
+	if (!(port->flags & UPF_SKIP_TEST)) {
+		serial_out(up, UART_MCR, UART_MCR_LOOP | 0x0A);
+		status1 = serial_in(up, UART_MSR) & 0xF0;
+		serial_out(up, UART_MCR, save_mcr);
+		if (status1 != 0x90) {
+			spin_unlock_irqrestore(&port->lock, flags);
+			DEBUG_AUTOCONF("LOOP test failed (%02x) ",
+				       status1);
+			goto out;
+		}
+	}
+
+	/*
+	 * We're pretty sure there's a port here.  Lets find out what
+	 * type of port it is.  The IIR top two bits allows us to find
+	 * out if it's 8250 or 16450, 16550, 16550A or later.  This
+	 * determines what we test for next.
+	 *
+	 * We also initialise the EFR (if any) to zero for later.  The
+	 * EFR occupies the same register location as the FCR and IIR.
+	 */
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+	serial_out(up, UART_EFR, 0);
+	serial_out(up, UART_LCR, 0);
+
+	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
+	scratch = serial_in(up, UART_IIR) >> 6;
+
+	switch (scratch) {
+	case 0:
+		autoconfig_8250(up);
+		break;
+	case 1:
+		port->type = PORT_UNKNOWN;
+		break;
+	case 2:
+		port->type = PORT_16550;
+		break;
+	case 3:
+		autoconfig_16550a(up);
+		break;
+	}
+
+#ifdef CONFIG_SERIAL_8250_RSA
+	/*
+	 * Only probe for RSA ports if we got the region.
+	 */
+	if (port->type == PORT_16550A && probeflags & PROBE_RSA) {
+		int i;
+
+		for (i = 0 ; i < probe_rsa_count; ++i) {
+			if (probe_rsa[i] == port->iobase && __enable_rsa(up)) {
+				port->type = PORT_RSA;
+				break;
+			}
+		}
+	}
+#endif
+
+	serial_out(up, UART_LCR, save_lcr);
+
+	port->fifosize = uart_config[up->port.type].fifo_size;
+	old_capabilities = up->capabilities; 
+	up->capabilities = uart_config[port->type].flags;
+	up->tx_loadsz = uart_config[port->type].tx_loadsz;
+
+	if (port->type == PORT_UNKNOWN)
+		goto out_lock;
+
+	/*
+	 * Reset the UART.
+	 */
+#ifdef CONFIG_SERIAL_8250_RSA
+	if (port->type == PORT_RSA)
+		serial_out(up, UART_RSA_FRR, 0);
+#endif
+	serial_out(up, UART_MCR, save_mcr);
+	serial8250_clear_fifos(up);
+	serial_in(up, UART_RX);
+	if (up->capabilities & UART_CAP_UUE)
+		serial_out(up, UART_IER, UART_IER_UUE);
+	else
+		serial_out(up, UART_IER, 0);
+
+out_lock:
+	spin_unlock_irqrestore(&port->lock, flags);
+	if (up->capabilities != old_capabilities) {
+		printk(KERN_WARNING
+		       "ttyS%d: detected caps %08x should be %08x\n",
+		       serial_index(port), old_capabilities,
+		       up->capabilities);
+	}
+out:
+	DEBUG_AUTOCONF("iir=%d ", scratch);
+	DEBUG_AUTOCONF("type=%s\n", uart_config[port->type].name);
+}
+
+static void autoconfig_irq(struct uart_8250_port *up)
+{
+	struct uart_port *port = &up->port;
+	unsigned char save_mcr, save_ier;
+	unsigned char save_ICP = 0;
+	unsigned int ICP = 0;
+	unsigned long irqs;
+	int irq;
+
+	if (port->flags & UPF_FOURPORT) {
+		ICP = (port->iobase & 0xfe0) | 0x1f;
+		save_ICP = inb_p(ICP);
+		outb_p(0x80, ICP);
+		inb_p(ICP);
+	}
+
+	/* forget possible initially masked and pending IRQ */
+	probe_irq_off(probe_irq_on());
+	save_mcr = serial_in(up, UART_MCR);
+	save_ier = serial_in(up, UART_IER);
+	serial_out(up, UART_MCR, UART_MCR_OUT1 | UART_MCR_OUT2);
+
+	irqs = probe_irq_on();
+	serial_out(up, UART_MCR, 0);
+	udelay(10);
+	if (port->flags & UPF_FOURPORT) {
+		serial_out(up, UART_MCR,
+			    UART_MCR_DTR | UART_MCR_RTS);
+	} else {
+		serial_out(up, UART_MCR,
+			    UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2);
+	}
+	serial_out(up, UART_IER, 0x0f);	/* enable all intrs */
+	serial_in(up, UART_LSR);
+	serial_in(up, UART_RX);
+	serial_in(up, UART_IIR);
+	serial_in(up, UART_MSR);
+	serial_out(up, UART_TX, 0xFF);
+	udelay(20);
+	irq = probe_irq_off(irqs);
+
+	serial_out(up, UART_MCR, save_mcr);
+	serial_out(up, UART_IER, save_ier);
+
+	if (port->flags & UPF_FOURPORT)
+		outb_p(save_ICP, ICP);
+
+	port->irq = (irq > 0) ? irq : 0;
+}
+
+static inline void __stop_tx(struct uart_8250_port *p)
+{
+	if (p->ier & UART_IER_THRI) {
+		p->ier &= ~UART_IER_THRI;
+		serial_out(p, UART_IER, p->ier);
+	}
+}
+
+static void serial8250_stop_tx(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+
+	__stop_tx(up);
+
+	/*
+	 * We really want to stop the transmitter from sending.
+	 */
+	if (port->type == PORT_16C950) {
+		up->acr |= UART_ACR_TXDIS;
+		serial_icr_write(up, UART_ACR, up->acr);
+	}
+}
+
+static void serial8250_start_tx(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+
+	if (up->dma && !serial8250_tx_dma(up)) {
+		return;
+	} else if (!(up->ier & UART_IER_THRI)) {
+		up->ier |= UART_IER_THRI;
+		serial_port_out(port, UART_IER, up->ier);
+
+		if (up->bugs & UART_BUG_TXEN) {
+			unsigned char lsr;
+			lsr = serial_in(up, UART_LSR);
+			up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
+			if (lsr & UART_LSR_TEMT)
+				serial8250_tx_chars(up);
+		}
+	}
+
+	/*
+	 * Re-enable the transmitter if we disabled it.
+	 */
+	if (port->type == PORT_16C950 && up->acr & UART_ACR_TXDIS) {
+		up->acr &= ~UART_ACR_TXDIS;
+		serial_icr_write(up, UART_ACR, up->acr);
+	}
+}
+
+static void serial8250_stop_rx(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+
+	up->ier &= ~UART_IER_RLSI;
+	up->port.read_status_mask &= ~UART_LSR_DR;
+	serial_port_out(port, UART_IER, up->ier);
+}
+
+static void serial8250_enable_ms(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+
+	/* no MSR capabilities */
+	if (up->bugs & UART_BUG_NOMSR)
+		return;
+
+	up->ier |= UART_IER_MSI;
+	serial_port_out(port, UART_IER, up->ier);
+}
+
+/*
+ * serial8250_rx_chars: processes according to the passed in LSR
+ * value, and returns the remaining LSR bits not handled
+ * by this Rx routine.
+ */
+unsigned char
+serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr)
+{
+	struct uart_port *port = &up->port;
+	unsigned char ch;
+	int max_count = 256;
+	char flag;
+
+	do {
+		if (likely(lsr & UART_LSR_DR))
+			ch = serial_in(up, UART_RX);
+		else
+			/*
+			 * Intel 82571 has a Serial Over Lan device that will
+			 * set UART_LSR_BI without setting UART_LSR_DR when
+			 * it receives a break. To avoid reading from the
+			 * receive buffer without UART_LSR_DR bit set, we
+			 * just force the read character to be 0
+			 */
+			ch = 0;
+
+		flag = TTY_NORMAL;
+		port->icount.rx++;
+
+		lsr |= up->lsr_saved_flags;
+		up->lsr_saved_flags = 0;
+
+		if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) {
+			if (lsr & UART_LSR_BI) {
+				lsr &= ~(UART_LSR_FE | UART_LSR_PE);
+				port->icount.brk++;
+				/*
+				 * We do the SysRQ and SAK checking
+				 * here because otherwise the break
+				 * may get masked by ignore_status_mask
+				 * or read_status_mask.
+				 */
+				if (uart_handle_break(port))
+					goto ignore_char;
+			} else if (lsr & UART_LSR_PE)
+				port->icount.parity++;
+			else if (lsr & UART_LSR_FE)
+				port->icount.frame++;
+			if (lsr & UART_LSR_OE)
+				port->icount.overrun++;
+
+			/*
+			 * Mask off conditions which should be ignored.
+			 */
+			lsr &= port->read_status_mask;
+
+			if (lsr & UART_LSR_BI) {
+				DEBUG_INTR("handling break....");
+				flag = TTY_BREAK;
+			} else if (lsr & UART_LSR_PE)
+				flag = TTY_PARITY;
+			else if (lsr & UART_LSR_FE)
+				flag = TTY_FRAME;
+		}
+		if (uart_handle_sysrq_char(port, ch))
+			goto ignore_char;
+
+		uart_insert_char(port, lsr, UART_LSR_OE, ch, flag);
+
+ignore_char:
+		lsr = serial_in(up, UART_LSR);
+	} while ((lsr & (UART_LSR_DR | UART_LSR_BI)) && (max_count-- > 0));
+	spin_unlock(&port->lock);
+	tty_flip_buffer_push(&port->state->port);
+	spin_lock(&port->lock);
+	return lsr;
+}
+EXPORT_SYMBOL_GPL(serial8250_rx_chars);
+
+void serial8250_tx_chars(struct uart_8250_port *up)
+{
+	struct uart_port *port = &up->port;
+	struct circ_buf *xmit = &port->state->xmit;
+	int count;
+
+	if (port->x_char) {
+		serial_out(up, UART_TX, port->x_char);
+		port->icount.tx++;
+		port->x_char = 0;
+		return;
+	}
+	if (uart_tx_stopped(port)) {
+		serial8250_stop_tx(port);
+		return;
+	}
+	if (uart_circ_empty(xmit)) {
+		__stop_tx(up);
+		return;
+	}
+
+	count = up->tx_loadsz;
+	do {
+		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+		if (uart_circ_empty(xmit))
+			break;
+		if (up->capabilities & UART_CAP_HFIFO) {
+			if ((serial_port_in(port, UART_LSR) & BOTH_EMPTY) !=
+			    BOTH_EMPTY)
+				break;
+		}
+	} while (--count > 0);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	DEBUG_INTR("THRE...");
+
+	if (uart_circ_empty(xmit))
+		__stop_tx(up);
+}
+EXPORT_SYMBOL_GPL(serial8250_tx_chars);
+
+unsigned int serial8250_modem_status(struct uart_8250_port *up)
+{
+	struct uart_port *port = &up->port;
+	unsigned int status = serial_in(up, UART_MSR);
+
+	status |= up->msr_saved_flags;
+	up->msr_saved_flags = 0;
+	if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI &&
+	    port->state != NULL) {
+		if (status & UART_MSR_TERI)
+			port->icount.rng++;
+		if (status & UART_MSR_DDSR)
+			port->icount.dsr++;
+		if (status & UART_MSR_DDCD)
+			uart_handle_dcd_change(port, status & UART_MSR_DCD);
+		if (status & UART_MSR_DCTS)
+			uart_handle_cts_change(port, status & UART_MSR_CTS);
+
+		wake_up_interruptible(&port->state->port.delta_msr_wait);
+	}
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(serial8250_modem_status);
+
+/*
+ * This handles the interrupt from one port.
+ */
+int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
+{
+	unsigned char status;
+	unsigned long flags;
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	int dma_err = 0;
+
+	if (iir & UART_IIR_NO_INT)
+		return 0;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	status = serial_port_in(port, UART_LSR);
+
+	DEBUG_INTR("status = %x...", status);
+
+	if (status & (UART_LSR_DR | UART_LSR_BI)) {
+		if (up->dma)
+			dma_err = serial8250_rx_dma(up, iir);
+
+		if (!up->dma || dma_err)
+			status = serial8250_rx_chars(up, status);
+	}
+	serial8250_modem_status(up);
+	if (status & UART_LSR_THRE)
+		serial8250_tx_chars(up);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(serial8250_handle_irq);
+
+static int serial8250_default_handle_irq(struct uart_port *port)
+{
+	unsigned int iir = serial_port_in(port, UART_IIR);
+
+	return serial8250_handle_irq(port, iir);
+}
+
+/*
+ * These Exar UARTs have an extra interrupt indicator that could
+ * fire for a few unimplemented interrupts.  One of which is a
+ * wakeup event when coming out of sleep.  Put this here just
+ * to be on the safe side that these interrupts don't go unhandled.
+ */
+static int exar_handle_irq(struct uart_port *port)
+{
+	unsigned char int0, int1, int2, int3;
+	unsigned int iir = serial_port_in(port, UART_IIR);
+	int ret;
+
+	ret = serial8250_handle_irq(port, iir);
+
+	if ((port->type == PORT_XR17V35X) ||
+	   (port->type == PORT_XR17D15X)) {
+		int0 = serial_port_in(port, 0x80);
+		int1 = serial_port_in(port, 0x81);
+		int2 = serial_port_in(port, 0x82);
+		int3 = serial_port_in(port, 0x83);
+	}
+
+	return ret;
+}
+
+/*
+ * This is the serial driver's interrupt routine.
+ *
+ * Arjan thinks the old way was overly complex, so it got simplified.
+ * Alan disagrees, saying that need the complexity to handle the weird
+ * nature of ISA shared interrupts.  (This is a special exception.)
+ *
+ * In order to handle ISA shared interrupts properly, we need to check
+ * that all ports have been serviced, and therefore the ISA interrupt
+ * line has been de-asserted.
+ *
+ * This means we need to loop through all ports. checking that they
+ * don't have an interrupt pending.
+ */
+static irqreturn_t serial8250_interrupt(int irq, void *dev_id)
+{
+	struct irq_info *i = dev_id;
+	struct list_head *l, *end = NULL;
+	int pass_counter = 0, handled = 0;
+
+	DEBUG_INTR("serial8250_interrupt(%d)...", irq);
+
+	spin_lock(&i->lock);
+
+	l = i->head;
+	do {
+		struct uart_8250_port *up;
+		struct uart_port *port;
+
+		up = list_entry(l, struct uart_8250_port, list);
+		port = &up->port;
+
+		if (port->handle_irq(port)) {
+			handled = 1;
+			end = NULL;
+		} else if (end == NULL)
+			end = l;
+
+		l = l->next;
+
+		if (l == i->head && pass_counter++ > PASS_LIMIT) {
+			/* If we hit this, we're dead. */
+			printk_ratelimited(KERN_ERR
+				"serial8250: too much work for irq%d\n", irq);
+			break;
+		}
+	} while (l != end);
+
+	spin_unlock(&i->lock);
+
+	DEBUG_INTR("end.\n");
+
+	return IRQ_RETVAL(handled);
+}
+
+/*
+ * To support ISA shared interrupts, we need to have one interrupt
+ * handler that ensures that the IRQ line has been deasserted
+ * before returning.  Failing to do this will result in the IRQ
+ * line being stuck active, and, since ISA irqs are edge triggered,
+ * no more IRQs will be seen.
+ */
+static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up)
+{
+	spin_lock_irq(&i->lock);
+
+	if (!list_empty(i->head)) {
+		if (i->head == &up->list)
+			i->head = i->head->next;
+		list_del(&up->list);
+	} else {
+		BUG_ON(i->head != &up->list);
+		i->head = NULL;
+	}
+	spin_unlock_irq(&i->lock);
+	/* List empty so throw away the hash node */
+	if (i->head == NULL) {
+		hlist_del(&i->node);
+		kfree(i);
+	}
+}
+
+static int serial_link_irq_chain(struct uart_8250_port *up)
+{
+	struct hlist_head *h;
+	struct hlist_node *n;
+	struct irq_info *i;
+	int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
+
+	mutex_lock(&hash_mutex);
+
+	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+	hlist_for_each(n, h) {
+		i = hlist_entry(n, struct irq_info, node);
+		if (i->irq == up->port.irq)
+			break;
+	}
+
+	if (n == NULL) {
+		i = kzalloc(sizeof(struct irq_info), GFP_KERNEL);
+		if (i == NULL) {
+			mutex_unlock(&hash_mutex);
+			return -ENOMEM;
+		}
+		spin_lock_init(&i->lock);
+		i->irq = up->port.irq;
+		hlist_add_head(&i->node, h);
+	}
+	mutex_unlock(&hash_mutex);
+
+	spin_lock_irq(&i->lock);
+
+	if (i->head) {
+		list_add(&up->list, i->head);
+		spin_unlock_irq(&i->lock);
+
+		ret = 0;
+	} else {
+		INIT_LIST_HEAD(&up->list);
+		i->head = &up->list;
+		spin_unlock_irq(&i->lock);
+		irq_flags |= up->port.irqflags;
+		ret = request_irq(up->port.irq, serial8250_interrupt,
+				  irq_flags, "serial", i);
+		if (ret < 0)
+			serial_do_unlink(i, up);
+	}
+
+	return ret;
+}
+
+static void serial_unlink_irq_chain(struct uart_8250_port *up)
+{
+	struct irq_info *i;
+	struct hlist_node *n;
+	struct hlist_head *h;
+
+	mutex_lock(&hash_mutex);
+
+	h = &irq_lists[up->port.irq % NR_IRQ_HASH];
+
+	hlist_for_each(n, h) {
+		i = hlist_entry(n, struct irq_info, node);
+		if (i->irq == up->port.irq)
+			break;
+	}
+
+	BUG_ON(n == NULL);
+	BUG_ON(i->head == NULL);
+
+	if (list_empty(i->head))
+		free_irq(up->port.irq, i);
+
+	serial_do_unlink(i, up);
+	mutex_unlock(&hash_mutex);
+}
+
+/*
+ * This function is used to handle ports that do not have an
+ * interrupt.  This doesn't work very well for 16450's, but gives
+ * barely passable results for a 16550A.  (Although at the expense
+ * of much CPU overhead).
+ */
+static void serial8250_timeout(unsigned long data)
+{
+	struct uart_8250_port *up = (struct uart_8250_port *)data;
+
+	up->port.handle_irq(&up->port);
+	mod_timer(&up->timer, jiffies + uart_poll_timeout(&up->port));
+}
+
+static void serial8250_backup_timeout(unsigned long data)
+{
+	struct uart_8250_port *up = (struct uart_8250_port *)data;
+	unsigned int iir, ier = 0, lsr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	/*
+	 * Must disable interrupts or else we risk racing with the interrupt
+	 * based handler.
+	 */
+	if (up->port.irq) {
+		ier = serial_in(up, UART_IER);
+		serial_out(up, UART_IER, 0);
+	}
+
+	iir = serial_in(up, UART_IIR);
+
+	/*
+	 * This should be a safe test for anyone who doesn't trust the
+	 * IIR bits on their UART, but it's specifically designed for
+	 * the "Diva" UART used on the management processor on many HP
+	 * ia64 and parisc boxes.
+	 */
+	lsr = serial_in(up, UART_LSR);
+	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
+	if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) &&
+	    (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) &&
+	    (lsr & UART_LSR_THRE)) {
+		iir &= ~(UART_IIR_ID | UART_IIR_NO_INT);
+		iir |= UART_IIR_THRI;
+	}
+
+	if (!(iir & UART_IIR_NO_INT))
+		serial8250_tx_chars(up);
+
+	if (up->port.irq)
+		serial_out(up, UART_IER, ier);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	/* Standard timer interval plus 0.2s to keep the port running */
+	mod_timer(&up->timer,
+		jiffies + uart_poll_timeout(&up->port) + HZ / 5);
+}
+
+static unsigned int serial8250_tx_empty(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	unsigned long flags;
+	unsigned int lsr;
+
+	spin_lock_irqsave(&port->lock, flags);
+	lsr = serial_port_in(port, UART_LSR);
+	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int serial8250_get_mctrl(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	unsigned int status;
+	unsigned int ret;
+
+	status = serial8250_modem_status(up);
+
+	ret = 0;
+	if (status & UART_MSR_DCD)
+		ret |= TIOCM_CAR;
+	if (status & UART_MSR_RI)
+		ret |= TIOCM_RNG;
+	if (status & UART_MSR_DSR)
+		ret |= TIOCM_DSR;
+	if (status & UART_MSR_CTS)
+		ret |= TIOCM_CTS;
+	return ret;
+}
+
+static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	unsigned char mcr = 0;
+
+	if (mctrl & TIOCM_RTS)
+		mcr |= UART_MCR_RTS;
+	if (mctrl & TIOCM_DTR)
+		mcr |= UART_MCR_DTR;
+	if (mctrl & TIOCM_OUT1)
+		mcr |= UART_MCR_OUT1;
+	if (mctrl & TIOCM_OUT2)
+		mcr |= UART_MCR_OUT2;
+	if (mctrl & TIOCM_LOOP)
+		mcr |= UART_MCR_LOOP;
+
+	mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr;
+
+	serial_port_out(port, UART_MCR, mcr);
+}
+
+static void serial8250_break_ctl(struct uart_port *port, int break_state)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (break_state == -1)
+		up->lcr |= UART_LCR_SBC;
+	else
+		up->lcr &= ~UART_LCR_SBC;
+	serial_port_out(port, UART_LCR, up->lcr);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/*
+ *	Wait for transmitter & holding register to empty
+ */
+static void wait_for_xmitr(struct uart_8250_port *up, int bits)
+{
+	unsigned int status, tmout = 10000;
+
+	/* Wait up to 10ms for the character(s) to be sent. */
+	for (;;) {
+		status = serial_in(up, UART_LSR);
+
+		up->lsr_saved_flags |= status & LSR_SAVE_FLAGS;
+
+		if ((status & bits) == bits)
+			break;
+		if (--tmout == 0)
+			break;
+		udelay(1);
+	}
+
+	/* Wait up to 1s for flow control if necessary */
+	if (up->port.flags & UPF_CONS_FLOW) {
+		unsigned int tmout;
+		for (tmout = 1000000; tmout; tmout--) {
+			unsigned int msr = serial_in(up, UART_MSR);
+			up->msr_saved_flags |= msr & MSR_SAVE_FLAGS;
+			if (msr & UART_MSR_CTS)
+				break;
+			udelay(1);
+			touch_nmi_watchdog();
+		}
+	}
+}
+
+#ifdef CONFIG_CONSOLE_POLL
+/*
+ * Console polling routines for writing and reading from the uart while
+ * in an interrupt or debug context.
+ */
+
+static int serial8250_get_poll_char(struct uart_port *port)
+{
+	unsigned char lsr = serial_port_in(port, UART_LSR);
+
+	if (!(lsr & UART_LSR_DR))
+		return NO_POLL_CHAR;
+
+	return serial_port_in(port, UART_RX);
+}
+
+
+static void serial8250_put_poll_char(struct uart_port *port,
+			 unsigned char c)
+{
+	unsigned int ier;
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+
+	/*
+	 *	First save the IER then disable the interrupts
+	 */
+	ier = serial_port_in(port, UART_IER);
+	if (up->capabilities & UART_CAP_UUE)
+		serial_port_out(port, UART_IER, UART_IER_UUE);
+	else
+		serial_port_out(port, UART_IER, 0);
+
+	wait_for_xmitr(up, BOTH_EMPTY);
+	/*
+	 *	Send the character out.
+	 *	If a LF, also do CR...
+	 */
+	serial_port_out(port, UART_TX, c);
+	if (c == 10) {
+		wait_for_xmitr(up, BOTH_EMPTY);
+		serial_port_out(port, UART_TX, 13);
+	}
+
+	/*
+	 *	Finally, wait for transmitter to become empty
+	 *	and restore the IER
+	 */
+	wait_for_xmitr(up, BOTH_EMPTY);
+	serial_port_out(port, UART_IER, ier);
+}
+
+#endif /* CONFIG_CONSOLE_POLL */
+
+static int serial8250_startup(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	unsigned long flags;
+	unsigned char lsr, iir;
+	int retval;
+
+	if (port->type == PORT_8250_CIR)
+		return -ENODEV;
+
+	if (!port->fifosize)
+		port->fifosize = uart_config[port->type].fifo_size;
+	if (!up->tx_loadsz)
+		up->tx_loadsz = uart_config[port->type].tx_loadsz;
+	if (!up->capabilities)
+		up->capabilities = uart_config[port->type].flags;
+	up->mcr = 0;
+
+	if (port->iotype != up->cur_iotype)
+		set_io_from_upio(port);
+
+	if (port->type == PORT_16C950) {
+		/* Wake up and initialize UART */
+		up->acr = 0;
+		serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
+		serial_port_out(port, UART_EFR, UART_EFR_ECB);
+		serial_port_out(port, UART_IER, 0);
+		serial_port_out(port, UART_LCR, 0);
+		serial_icr_write(up, UART_CSR, 0); /* Reset the UART */
+		serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
+		serial_port_out(port, UART_EFR, UART_EFR_ECB);
+		serial_port_out(port, UART_LCR, 0);
+	}
+
+#ifdef CONFIG_SERIAL_8250_RSA
+	/*
+	 * If this is an RSA port, see if we can kick it up to the
+	 * higher speed clock.
+	 */
+	enable_rsa(up);
+#endif
+
+	/*
+	 * Clear the FIFO buffers and disable them.
+	 * (they will be reenabled in set_termios())
+	 */
+	serial8250_clear_fifos(up);
+
+	/*
+	 * Clear the interrupt registers.
+	 */
+	serial_port_in(port, UART_LSR);
+	serial_port_in(port, UART_RX);
+	serial_port_in(port, UART_IIR);
+	serial_port_in(port, UART_MSR);
+
+	/*
+	 * At this point, there's no way the LSR could still be 0xff;
+	 * if it is, then bail out, because there's likely no UART
+	 * here.
+	 */
+	if (!(port->flags & UPF_BUGGY_UART) &&
+	    (serial_port_in(port, UART_LSR) == 0xff)) {
+		printk_ratelimited(KERN_INFO "ttyS%d: LSR safety check engaged!\n",
+				   serial_index(port));
+		return -ENODEV;
+	}
+
+	/*
+	 * For a XR16C850, we need to set the trigger levels
+	 */
+	if (port->type == PORT_16850) {
+		unsigned char fctr;
+
+		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+
+		fctr = serial_in(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX);
+		serial_port_out(port, UART_FCTR,
+				fctr | UART_FCTR_TRGD | UART_FCTR_RX);
+		serial_port_out(port, UART_TRG, UART_TRG_96);
+		serial_port_out(port, UART_FCTR,
+				fctr | UART_FCTR_TRGD | UART_FCTR_TX);
+		serial_port_out(port, UART_TRG, UART_TRG_96);
+
+		serial_port_out(port, UART_LCR, 0);
+	}
+
+	if (port->irq) {
+		unsigned char iir1;
+		/*
+		 * Test for UARTs that do not reassert THRE when the
+		 * transmitter is idle and the interrupt has already
+		 * been cleared.  Real 16550s should always reassert
+		 * this interrupt whenever the transmitter is idle and
+		 * the interrupt is enabled.  Delays are necessary to
+		 * allow register changes to become visible.
+		 */
+		spin_lock_irqsave(&port->lock, flags);
+		if (up->port.irqflags & IRQF_SHARED)
+			disable_irq_nosync(port->irq);
+
+		wait_for_xmitr(up, UART_LSR_THRE);
+		serial_port_out_sync(port, UART_IER, UART_IER_THRI);
+		udelay(1); /* allow THRE to set */
+		iir1 = serial_port_in(port, UART_IIR);
+		serial_port_out(port, UART_IER, 0);
+		serial_port_out_sync(port, UART_IER, UART_IER_THRI);
+		udelay(1); /* allow a working UART time to re-assert THRE */
+		iir = serial_port_in(port, UART_IIR);
+		serial_port_out(port, UART_IER, 0);
+
+		if (port->irqflags & IRQF_SHARED)
+			enable_irq(port->irq);
+		spin_unlock_irqrestore(&port->lock, flags);
+
+		/*
+		 * If the interrupt is not reasserted, or we otherwise
+		 * don't trust the iir, setup a timer to kick the UART
+		 * on a regular basis.
+		 */
+		if ((!(iir1 & UART_IIR_NO_INT) && (iir & UART_IIR_NO_INT)) ||
+		    up->port.flags & UPF_BUG_THRE) {
+			up->bugs |= UART_BUG_THRE;
+			pr_debug("ttyS%d - using backup timer\n",
+				 serial_index(port));
+		}
+	}
+
+	/*
+	 * The above check will only give an accurate result the first time
+	 * the port is opened so this value needs to be preserved.
+	 */
+	if (up->bugs & UART_BUG_THRE) {
+		up->timer.function = serial8250_backup_timeout;
+		up->timer.data = (unsigned long)up;
+		mod_timer(&up->timer, jiffies +
+			uart_poll_timeout(port) + HZ / 5);
+	}
+
+	/*
+	 * If the "interrupt" for this port doesn't correspond with any
+	 * hardware interrupt, we use a timer-based system.  The original
+	 * driver used to do this with IRQ0.
+	 */
+	if (!port->irq) {
+		up->timer.data = (unsigned long)up;
+		mod_timer(&up->timer, jiffies + uart_poll_timeout(port));
+	} else {
+		retval = serial_link_irq_chain(up);
+		if (retval)
+			return retval;
+	}
+
+	/*
+	 * Now, initialize the UART
+	 */
+	serial_port_out(port, UART_LCR, UART_LCR_WLEN8);
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (up->port.flags & UPF_FOURPORT) {
+		if (!up->port.irq)
+			up->port.mctrl |= TIOCM_OUT1;
+	} else
+		/*
+		 * Most PC uarts need OUT2 raised to enable interrupts.
+		 */
+		if (port->irq)
+			up->port.mctrl |= TIOCM_OUT2;
+
+	serial8250_set_mctrl(port, port->mctrl);
+
+	/* Serial over Lan (SoL) hack:
+	   Intel 8257x Gigabit ethernet chips have a
+	   16550 emulation, to be used for Serial Over Lan.
+	   Those chips take a longer time than a normal
+	   serial device to signalize that a transmission
+	   data was queued. Due to that, the above test generally
+	   fails. One solution would be to delay the reading of
+	   iir. However, this is not reliable, since the timeout
+	   is variable. So, let's just don't test if we receive
+	   TX irq. This way, we'll never enable UART_BUG_TXEN.
+	 */
+	if (skip_txen_test || up->port.flags & UPF_NO_TXEN_TEST)
+		goto dont_test_tx_en;
+
+	/*
+	 * Do a quick test to see if we receive an
+	 * interrupt when we enable the TX irq.
+	 */
+	serial_port_out(port, UART_IER, UART_IER_THRI);
+	lsr = serial_port_in(port, UART_LSR);
+	iir = serial_port_in(port, UART_IIR);
+	serial_port_out(port, UART_IER, 0);
+
+	if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) {
+		if (!(up->bugs & UART_BUG_TXEN)) {
+			up->bugs |= UART_BUG_TXEN;
+			pr_debug("ttyS%d - enabling bad tx status workarounds\n",
+				 serial_index(port));
+		}
+	} else {
+		up->bugs &= ~UART_BUG_TXEN;
+	}
+
+dont_test_tx_en:
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	/*
+	 * Clear the interrupt registers again for luck, and clear the
+	 * saved flags to avoid getting false values from polling
+	 * routines or the previous session.
+	 */
+	serial_port_in(port, UART_LSR);
+	serial_port_in(port, UART_RX);
+	serial_port_in(port, UART_IIR);
+	serial_port_in(port, UART_MSR);
+	up->lsr_saved_flags = 0;
+	up->msr_saved_flags = 0;
+
+	/*
+	 * Request DMA channels for both RX and TX.
+	 */
+	if (up->dma) {
+		retval = serial8250_request_dma(up);
+		if (retval) {
+			pr_warn_ratelimited("ttyS%d - failed to request DMA\n",
+					    serial_index(port));
+			up->dma = NULL;
+		}
+	}
+
+	/*
+	 * Finally, enable interrupts.  Note: Modem status interrupts
+	 * are set via set_termios(), which will be occurring imminently
+	 * anyway, so we don't enable them here.
+	 */
+	up->ier = UART_IER_RLSI | UART_IER_RDI;
+	serial_port_out(port, UART_IER, up->ier);
+
+	if (port->flags & UPF_FOURPORT) {
+		unsigned int icp;
+		/*
+		 * Enable interrupts on the AST Fourport board
+		 */
+		icp = (port->iobase & 0xfe0) | 0x01f;
+		outb_p(0x80, icp);
+		inb_p(icp);
+	}
+
+	return 0;
+}
+
+static void serial8250_shutdown(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	unsigned long flags;
+
+	/*
+	 * Disable interrupts from this port
+	 */
+	up->ier = 0;
+	serial_port_out(port, UART_IER, 0);
+
+	if (up->dma)
+		serial8250_release_dma(up);
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (port->flags & UPF_FOURPORT) {
+		/* reset interrupts on the AST Fourport board */
+		inb((port->iobase & 0xfe0) | 0x1f);
+		port->mctrl |= TIOCM_OUT1;
+	} else
+		port->mctrl &= ~TIOCM_OUT2;
+
+	serial8250_set_mctrl(port, port->mctrl);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	/*
+	 * Disable break condition and FIFOs
+	 */
+	serial_port_out(port, UART_LCR,
+			serial_port_in(port, UART_LCR) & ~UART_LCR_SBC);
+	serial8250_clear_fifos(up);
+
+#ifdef CONFIG_SERIAL_8250_RSA
+	/*
+	 * Reset the RSA board back to 115kbps compat mode.
+	 */
+	disable_rsa(up);
+#endif
+
+	/*
+	 * Read data port to reset things, and then unlink from
+	 * the IRQ chain.
+	 */
+	serial_port_in(port, UART_RX);
+
+	del_timer_sync(&up->timer);
+	up->timer.function = serial8250_timeout;
+	if (port->irq)
+		serial_unlink_irq_chain(up);
+}
+
+static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud)
+{
+	unsigned int quot;
+
+	/*
+	 * Handle magic divisors for baud rates above baud_base on
+	 * SMSC SuperIO chips.
+	 */
+	if ((port->flags & UPF_MAGIC_MULTIPLIER) &&
+	    baud == (port->uartclk/4))
+		quot = 0x8001;
+	else if ((port->flags & UPF_MAGIC_MULTIPLIER) &&
+		 baud == (port->uartclk/8))
+		quot = 0x8002;
+	else
+		quot = uart_get_divisor(port, baud);
+
+	return quot;
+}
+
+void
+serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
+		          struct ktermios *old)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	unsigned char cval, fcr = 0;
+	unsigned long flags;
+	unsigned int baud, quot;
+	int fifo_bug = 0;
+
+	switch (termios->c_cflag & CSIZE) {
+	case CS5:
+		cval = UART_LCR_WLEN5;
+		break;
+	case CS6:
+		cval = UART_LCR_WLEN6;
+		break;
+	case CS7:
+		cval = UART_LCR_WLEN7;
+		break;
+	default:
+	case CS8:
+		cval = UART_LCR_WLEN8;
+		break;
+	}
+
+	if (termios->c_cflag & CSTOPB)
+		cval |= UART_LCR_STOP;
+	if (termios->c_cflag & PARENB) {
+		cval |= UART_LCR_PARITY;
+		if (up->bugs & UART_BUG_PARITY)
+			fifo_bug = 1;
+	}
+	if (!(termios->c_cflag & PARODD))
+		cval |= UART_LCR_EPAR;
+#ifdef CMSPAR
+	if (termios->c_cflag & CMSPAR)
+		cval |= UART_LCR_SPAR;
+#endif
+
+	/*
+	 * Ask the core to calculate the divisor for us.
+	 */
+	baud = uart_get_baud_rate(port, termios, old,
+				  port->uartclk / 16 / 0xffff,
+				  port->uartclk / 16);
+	quot = serial8250_get_divisor(port, baud);
+
+	/*
+	 * Oxford Semi 952 rev B workaround
+	 */
+	if (up->bugs & UART_BUG_QUOT && (quot & 0xff) == 0)
+		quot++;
+
+	if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) {
+		fcr = uart_config[port->type].fcr;
+		if (baud < 2400 || fifo_bug) {
+			fcr &= ~UART_FCR_TRIGGER_MASK;
+			fcr |= UART_FCR_TRIGGER_1;
+		}
+	}
+
+	/*
+	 * MCR-based auto flow control.  When AFE is enabled, RTS will be
+	 * deasserted when the receive FIFO contains more characters than
+	 * the trigger, or the MCR RTS bit is cleared.  In the case where
+	 * the remote UART is not using CTS auto flow control, we must
+	 * have sufficient FIFO entries for the latency of the remote
+	 * UART to respond.  IOW, at least 32 bytes of FIFO.
+	 */
+	if (up->capabilities & UART_CAP_AFE && port->fifosize >= 32) {
+		up->mcr &= ~UART_MCR_AFE;
+		if (termios->c_cflag & CRTSCTS)
+			up->mcr |= UART_MCR_AFE;
+	}
+
+	/*
+	 * Ok, we're now changing the port state.  Do it with
+	 * interrupts disabled.
+	 */
+	spin_lock_irqsave(&port->lock, flags);
+
+	/*
+	 * Update the per-port timeout.
+	 */
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	port->read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= UART_LSR_FE | UART_LSR_PE;
+	if (termios->c_iflag & (BRKINT | PARMRK))
+		port->read_status_mask |= UART_LSR_BI;
+
+	/*
+	 * Characteres to ignore
+	 */
+	port->ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
+	if (termios->c_iflag & IGNBRK) {
+		port->ignore_status_mask |= UART_LSR_BI;
+		/*
+		 * If we're ignoring parity and break indicators,
+		 * ignore overruns too (for real raw support).
+		 */
+		if (termios->c_iflag & IGNPAR)
+			port->ignore_status_mask |= UART_LSR_OE;
+	}
+
+	/*
+	 * ignore all characters if CREAD is not set
+	 */
+	if ((termios->c_cflag & CREAD) == 0)
+		port->ignore_status_mask |= UART_LSR_DR;
+
+	/*
+	 * CTS flow control flag and modem status interrupts
+	 */
+	up->ier &= ~UART_IER_MSI;
+	if (!(up->bugs & UART_BUG_NOMSR) &&
+			UART_ENABLE_MS(&up->port, termios->c_cflag))
+		up->ier |= UART_IER_MSI;
+	if (up->capabilities & UART_CAP_UUE)
+		up->ier |= UART_IER_UUE;
+	if (up->capabilities & UART_CAP_RTOIE)
+		up->ier |= UART_IER_RTOIE;
+
+	serial_port_out(port, UART_IER, up->ier);
+
+	if (up->capabilities & UART_CAP_EFR) {
+		unsigned char efr = 0;
+		/*
+		 * TI16C752/Startech hardware flow control.  FIXME:
+		 * - TI16C752 requires control thresholds to be set.
+		 * - UART_MCR_RTS is ineffective if auto-RTS mode is enabled.
+		 */
+		if (termios->c_cflag & CRTSCTS)
+			efr |= UART_EFR_CTS;
+
+		serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B);
+		if (port->flags & UPF_EXAR_EFR)
+			serial_port_out(port, UART_XR_EFR, efr);
+		else
+			serial_port_out(port, UART_EFR, efr);
+	}
+
+	/* Workaround to enable 115200 baud on OMAP1510 internal ports */
+	if (is_omap1510_8250(up)) {
+		if (baud == 115200) {
+			quot = 1;
+			serial_port_out(port, UART_OMAP_OSC_12M_SEL, 1);
+		} else
+			serial_port_out(port, UART_OMAP_OSC_12M_SEL, 0);
+	}
+
+	/*
+	 * For NatSemi, switch to bank 2 not bank 1, to avoid resetting EXCR2,
+	 * otherwise just set DLAB
+	 */
+	if (up->capabilities & UART_NATSEMI)
+		serial_port_out(port, UART_LCR, 0xe0);
+	else
+		serial_port_out(port, UART_LCR, cval | UART_LCR_DLAB);
+
+	serial_dl_write(up, quot);
+
+	/*
+	 * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR
+	 * is written without DLAB set, this mode will be disabled.
+	 */
+	if (port->type == PORT_16750)
+		serial_port_out(port, UART_FCR, fcr);
+
+	serial_port_out(port, UART_LCR, cval);		/* reset DLAB */
+	up->lcr = cval;					/* Save LCR */
+	if (port->type != PORT_16750) {
+		/* emulated UARTs (Lucent Venus 167x) need two steps */
+		if (fcr & UART_FCR_ENABLE_FIFO)
+			serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO);
+		serial_port_out(port, UART_FCR, fcr);		/* set fcr */
+	}
+	serial8250_set_mctrl(port, port->mctrl);
+	spin_unlock_irqrestore(&port->lock, flags);
+	/* Don't rewrite B0 */
+	if (tty_termios_baud_rate(termios))
+		tty_termios_encode_baud_rate(termios, baud, baud);
+}
+EXPORT_SYMBOL(serial8250_do_set_termios);
+
+static void
+serial8250_set_termios(struct uart_port *port, struct ktermios *termios,
+		       struct ktermios *old)
+{
+	if (port->set_termios)
+		port->set_termios(port, termios, old);
+	else
+		serial8250_do_set_termios(port, termios, old);
+}
+
+static void
+serial8250_set_ldisc(struct uart_port *port, int new)
+{
+	if (new == N_PPS) {
+		port->flags |= UPF_HARDPPS_CD;
+		serial8250_enable_ms(port);
+	} else
+		port->flags &= ~UPF_HARDPPS_CD;
+}
+
+
+void serial8250_do_pm(struct uart_port *port, unsigned int state,
+		      unsigned int oldstate)
+{
+	struct uart_8250_port *p =
+		container_of(port, struct uart_8250_port, port);
+
+	serial8250_set_sleep(p, state != 0);
+}
+EXPORT_SYMBOL(serial8250_do_pm);
+
+static void
+serial8250_pm(struct uart_port *port, unsigned int state,
+	      unsigned int oldstate)
+{
+	if (port->pm)
+		port->pm(port, state, oldstate);
+	else
+		serial8250_do_pm(port, state, oldstate);
+}
+
+static unsigned int serial8250_port_size(struct uart_8250_port *pt)
+{
+	if (pt->port.iotype == UPIO_AU)
+		return 0x1000;
+	if (is_omap1_8250(pt))
+		return 0x16 << pt->port.regshift;
+
+	return 8 << pt->port.regshift;
+}
+
+/*
+ * Resource handling.
+ */
+static int serial8250_request_std_resource(struct uart_8250_port *up)
+{
+	unsigned int size = serial8250_port_size(up);
+	struct uart_port *port = &up->port;
+	int ret = 0;
+
+	switch (port->iotype) {
+	case UPIO_AU:
+	case UPIO_TSI:
+	case UPIO_MEM32:
+	case UPIO_MEM:
+		if (!port->mapbase)
+			break;
+
+		if (!request_mem_region(port->mapbase, size, "serial")) {
+			ret = -EBUSY;
+			break;
+		}
+
+		if (port->flags & UPF_IOREMAP) {
+			port->membase = ioremap_nocache(port->mapbase, size);
+			if (!port->membase) {
+				release_mem_region(port->mapbase, size);
+				ret = -ENOMEM;
+			}
+		}
+		break;
+
+	case UPIO_HUB6:
+	case UPIO_PORT:
+		if (!request_region(port->iobase, size, "serial"))
+			ret = -EBUSY;
+		break;
+	}
+	return ret;
+}
+
+static void serial8250_release_std_resource(struct uart_8250_port *up)
+{
+	unsigned int size = serial8250_port_size(up);
+	struct uart_port *port = &up->port;
+
+	switch (port->iotype) {
+	case UPIO_AU:
+	case UPIO_TSI:
+	case UPIO_MEM32:
+	case UPIO_MEM:
+		if (!port->mapbase)
+			break;
+
+		if (port->flags & UPF_IOREMAP) {
+			iounmap(port->membase);
+			port->membase = NULL;
+		}
+
+		release_mem_region(port->mapbase, size);
+		break;
+
+	case UPIO_HUB6:
+	case UPIO_PORT:
+		release_region(port->iobase, size);
+		break;
+	}
+}
+
+static int serial8250_request_rsa_resource(struct uart_8250_port *up)
+{
+	unsigned long start = UART_RSA_BASE << up->port.regshift;
+	unsigned int size = 8 << up->port.regshift;
+	struct uart_port *port = &up->port;
+	int ret = -EINVAL;
+
+	switch (port->iotype) {
+	case UPIO_HUB6:
+	case UPIO_PORT:
+		start += port->iobase;
+		if (request_region(start, size, "serial-rsa"))
+			ret = 0;
+		else
+			ret = -EBUSY;
+		break;
+	}
+
+	return ret;
+}
+
+static void serial8250_release_rsa_resource(struct uart_8250_port *up)
+{
+	unsigned long offset = UART_RSA_BASE << up->port.regshift;
+	unsigned int size = 8 << up->port.regshift;
+	struct uart_port *port = &up->port;
+
+	switch (port->iotype) {
+	case UPIO_HUB6:
+	case UPIO_PORT:
+		release_region(port->iobase + offset, size);
+		break;
+	}
+}
+
+static void serial8250_release_port(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+
+	serial8250_release_std_resource(up);
+	if (port->type == PORT_RSA)
+		serial8250_release_rsa_resource(up);
+}
+
+static int serial8250_request_port(struct uart_port *port)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	int ret;
+
+	if (port->type == PORT_8250_CIR)
+		return -ENODEV;
+
+	ret = serial8250_request_std_resource(up);
+	if (ret == 0 && port->type == PORT_RSA) {
+		ret = serial8250_request_rsa_resource(up);
+		if (ret < 0)
+			serial8250_release_std_resource(up);
+	}
+
+	return ret;
+}
+
+static void serial8250_config_port(struct uart_port *port, int flags)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	int probeflags = PROBE_ANY;
+	int ret;
+
+	if (port->type == PORT_8250_CIR)
+		return;
+
+	/*
+	 * Find the region that we can probe for.  This in turn
+	 * tells us whether we can probe for the type of port.
+	 */
+	ret = serial8250_request_std_resource(up);
+	if (ret < 0)
+		return;
+
+	ret = serial8250_request_rsa_resource(up);
+	if (ret < 0)
+		probeflags &= ~PROBE_RSA;
+
+	if (port->iotype != up->cur_iotype)
+		set_io_from_upio(port);
+
+	if (flags & UART_CONFIG_TYPE)
+		autoconfig(up, probeflags);
+
+	/* if access method is AU, it is a 16550 with a quirk */
+	if (port->type == PORT_16550A && port->iotype == UPIO_AU)
+		up->bugs |= UART_BUG_NOMSR;
+
+	if (port->type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ)
+		autoconfig_irq(up);
+
+	if (port->type != PORT_RSA && probeflags & PROBE_RSA)
+		serial8250_release_rsa_resource(up);
+	if (port->type == PORT_UNKNOWN)
+		serial8250_release_std_resource(up);
+
+	/* Fixme: probably not the best place for this */
+	if ((port->type == PORT_XR17V35X) ||
+	   (port->type == PORT_XR17D15X))
+		port->handle_irq = exar_handle_irq;
+}
+
+static int
+serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
+{
+	if (ser->irq >= nr_irqs || ser->irq < 0 ||
+	    ser->baud_base < 9600 || ser->type < PORT_UNKNOWN ||
+	    ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS ||
+	    ser->type == PORT_STARTECH)
+		return -EINVAL;
+	return 0;
+}
+
+static const char *
+serial8250_type(struct uart_port *port)
+{
+	int type = port->type;
+
+	if (type >= ARRAY_SIZE(uart_config))
+		type = 0;
+	return uart_config[type].name;
+}
+
+static struct uart_ops serial8250_pops = {
+	.tx_empty	= serial8250_tx_empty,
+	.set_mctrl	= serial8250_set_mctrl,
+	.get_mctrl	= serial8250_get_mctrl,
+	.stop_tx	= serial8250_stop_tx,
+	.start_tx	= serial8250_start_tx,
+	.stop_rx	= serial8250_stop_rx,
+	.enable_ms	= serial8250_enable_ms,
+	.break_ctl	= serial8250_break_ctl,
+	.startup	= serial8250_startup,
+	.shutdown	= serial8250_shutdown,
+	.set_termios	= serial8250_set_termios,
+	.set_ldisc	= serial8250_set_ldisc,
+	.pm		= serial8250_pm,
+	.type		= serial8250_type,
+	.release_port	= serial8250_release_port,
+	.request_port	= serial8250_request_port,
+	.config_port	= serial8250_config_port,
+	.verify_port	= serial8250_verify_port,
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_get_char = serial8250_get_poll_char,
+	.poll_put_char = serial8250_put_poll_char,
+#endif
+};
+
+static struct uart_8250_port serial8250_ports[UART_NR];
+
+static void (*serial8250_isa_config)(int port, struct uart_port *up,
+	unsigned short *capabilities);
+
+void serial8250_set_isa_configurator(
+	void (*v)(int port, struct uart_port *up, unsigned short *capabilities))
+{
+	serial8250_isa_config = v;
+}
+EXPORT_SYMBOL(serial8250_set_isa_configurator);
+
+static void __init serial8250_isa_init_ports(void)
+{
+	struct uart_8250_port *up;
+	static int first = 1;
+	int i, irqflag = 0;
+
+	if (!first)
+		return;
+	first = 0;
+
+	if (nr_uarts > UART_NR)
+		nr_uarts = UART_NR;
+
+	for (i = 0; i < nr_uarts; i++) {
+		struct uart_8250_port *up = &serial8250_ports[i];
+		struct uart_port *port = &up->port;
+
+		port->line = i;
+		spin_lock_init(&port->lock);
+
+		init_timer(&up->timer);
+		up->timer.function = serial8250_timeout;
+		up->cur_iotype = 0xFF;
+
+		/*
+		 * ALPHA_KLUDGE_MCR needs to be killed.
+		 */
+		up->mcr_mask = ~ALPHA_KLUDGE_MCR;
+		up->mcr_force = ALPHA_KLUDGE_MCR;
+
+		port->ops = &serial8250_pops;
+	}
+
+	if (share_irqs)
+		irqflag = IRQF_SHARED;
+
+	for (i = 0, up = serial8250_ports;
+	     i < ARRAY_SIZE(old_serial_port) && i < nr_uarts;
+	     i++, up++) {
+		struct uart_port *port = &up->port;
+
+		port->iobase   = old_serial_port[i].port;
+		port->irq      = irq_canonicalize(old_serial_port[i].irq);
+		port->irqflags = old_serial_port[i].irqflags;
+		port->uartclk  = old_serial_port[i].baud_base * 16;
+		port->flags    = old_serial_port[i].flags;
+		port->hub6     = old_serial_port[i].hub6;
+		port->membase  = old_serial_port[i].iomem_base;
+		port->iotype   = old_serial_port[i].io_type;
+		port->regshift = old_serial_port[i].iomem_reg_shift;
+		set_io_from_upio(port);
+		port->irqflags |= irqflag;
+		if (serial8250_isa_config != NULL)
+			serial8250_isa_config(i, &up->port, &up->capabilities);
+
+	}
+}
+
+static void
+serial8250_init_fixed_type_port(struct uart_8250_port *up, unsigned int type)
+{
+	up->port.type = type;
+	if (!up->port.fifosize)
+		up->port.fifosize = uart_config[type].fifo_size;
+	if (!up->tx_loadsz)
+		up->tx_loadsz = uart_config[type].tx_loadsz;
+	if (!up->capabilities)
+		up->capabilities = uart_config[type].flags;
+}
+
+static void __init
+serial8250_register_ports(struct uart_driver *drv, struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < nr_uarts; i++) {
+		struct uart_8250_port *up = &serial8250_ports[i];
+
+		if (up->port.dev)
+			continue;
+
+		up->port.dev = dev;
+
+		if (up->port.flags & UPF_FIXED_TYPE)
+			serial8250_init_fixed_type_port(up, up->port.type);
+
+		uart_add_one_port(drv, &up->port);
+	}
+}
+
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+
+static void serial8250_console_putchar(struct uart_port *port, int ch)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+
+	wait_for_xmitr(up, UART_LSR_THRE);
+	serial_port_out(port, UART_TX, ch);
+}
+
+/*
+ *	Print a string to the serial port trying not to disturb
+ *	any possible real use of the port...
+ *
+ *	The console_lock must be held when we get here.
+ */
+static void
+serial8250_console_write(struct console *co, const char *s, unsigned int count)
+{
+	struct uart_8250_port *up = &serial8250_ports[co->index];
+	struct uart_port *port = &up->port;
+	unsigned long flags;
+	unsigned int ier;
+	int locked = 1;
+
+	touch_nmi_watchdog();
+
+	local_irq_save(flags);
+	if (port->sysrq) {
+		/* serial8250_handle_irq() already took the lock */
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&port->lock);
+	} else
+		spin_lock(&port->lock);
+
+	/*
+	 *	First save the IER then disable the interrupts
+	 */
+	ier = serial_port_in(port, UART_IER);
+
+	if (up->capabilities & UART_CAP_UUE)
+		serial_port_out(port, UART_IER, UART_IER_UUE);
+	else
+		serial_port_out(port, UART_IER, 0);
+
+	uart_console_write(port, s, count, serial8250_console_putchar);
+
+	/*
+	 *	Finally, wait for transmitter to become empty
+	 *	and restore the IER
+	 */
+	wait_for_xmitr(up, BOTH_EMPTY);
+	serial_port_out(port, UART_IER, ier);
+
+	/*
+	 *	The receive handling will happen properly because the
+	 *	receive ready bit will still be set; it is not cleared
+	 *	on read.  However, modem control will not, we must
+	 *	call it if we have saved something in the saved flags
+	 *	while processing with interrupts off.
+	 */
+	if (up->msr_saved_flags)
+		serial8250_modem_status(up);
+
+	if (locked)
+		spin_unlock(&port->lock);
+	local_irq_restore(flags);
+}
+
+static int __init serial8250_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = 9600;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	/*
+	 * Check whether an invalid uart number has been specified, and
+	 * if so, search for the first available port that does have
+	 * console support.
+	 */
+	if (co->index >= nr_uarts)
+		co->index = 0;
+	port = &serial8250_ports[co->index].port;
+	if (!port->iobase && !port->membase)
+		return -ENODEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static int serial8250_console_early_setup(void)
+{
+	return serial8250_find_port_for_earlycon();
+}
+
+static struct console serial8250_console = {
+	.name		= "ttyS",
+	.write		= serial8250_console_write,
+	.device		= uart_console_device,
+	.setup		= serial8250_console_setup,
+	.early_setup	= serial8250_console_early_setup,
+	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
+	.index		= -1,
+	.data		= &serial8250_reg,
+};
+
+static int __init serial8250_console_init(void)
+{
+	serial8250_isa_init_ports();
+	register_console(&serial8250_console);
+	return 0;
+}
+console_initcall(serial8250_console_init);
+
+int serial8250_find_port(struct uart_port *p)
+{
+	int line;
+	struct uart_port *port;
+
+	for (line = 0; line < nr_uarts; line++) {
+		port = &serial8250_ports[line].port;
+		if (uart_match_port(p, port))
+			return line;
+	}
+	return -ENODEV;
+}
+
+#define SERIAL8250_CONSOLE	&serial8250_console
+#else
+#define SERIAL8250_CONSOLE	NULL
+#endif
+
+static struct uart_driver serial8250_reg = {
+	.owner			= THIS_MODULE,
+	.driver_name		= "serial",
+	.dev_name		= "ttyS",
+	.major			= TTY_MAJOR,
+	.minor			= 64,
+	.cons			= SERIAL8250_CONSOLE,
+};
+
+/*
+ * early_serial_setup - early registration for 8250 ports
+ *
+ * Setup an 8250 port structure prior to console initialisation.  Use
+ * after console initialisation will cause undefined behaviour.
+ */
+int __init early_serial_setup(struct uart_port *port)
+{
+	struct uart_port *p;
+
+	if (port->line >= ARRAY_SIZE(serial8250_ports))
+		return -ENODEV;
+
+	serial8250_isa_init_ports();
+	p = &serial8250_ports[port->line].port;
+	p->iobase       = port->iobase;
+	p->membase      = port->membase;
+	p->irq          = port->irq;
+	p->irqflags     = port->irqflags;
+	p->uartclk      = port->uartclk;
+	p->fifosize     = port->fifosize;
+	p->regshift     = port->regshift;
+	p->iotype       = port->iotype;
+	p->flags        = port->flags;
+	p->mapbase      = port->mapbase;
+	p->private_data = port->private_data;
+	p->type		= port->type;
+	p->line		= port->line;
+
+	set_io_from_upio(p);
+	if (port->serial_in)
+		p->serial_in = port->serial_in;
+	if (port->serial_out)
+		p->serial_out = port->serial_out;
+	if (port->handle_irq)
+		p->handle_irq = port->handle_irq;
+	else
+		p->handle_irq = serial8250_default_handle_irq;
+
+	return 0;
+}
+
+/**
+ *	serial8250_suspend_port - suspend one serial port
+ *	@line:  serial line number
+ *
+ *	Suspend one serial port.
+ */
+void serial8250_suspend_port(int line)
+{
+	uart_suspend_port(&serial8250_reg, &serial8250_ports[line].port);
+}
+
+/**
+ *	serial8250_resume_port - resume one serial port
+ *	@line:  serial line number
+ *
+ *	Resume one serial port.
+ */
+void serial8250_resume_port(int line)
+{
+	struct uart_8250_port *up = &serial8250_ports[line];
+	struct uart_port *port = &up->port;
+
+	if (up->capabilities & UART_NATSEMI) {
+		/* Ensure it's still in high speed mode */
+		serial_port_out(port, UART_LCR, 0xE0);
+
+		ns16550a_goto_highspeed(up);
+
+		serial_port_out(port, UART_LCR, 0);
+		port->uartclk = 921600*16;
+	}
+	uart_resume_port(&serial8250_reg, port);
+}
+
+/*
+ * Register a set of serial devices attached to a platform device.  The
+ * list is terminated with a zero flags entry, which means we expect
+ * all entries to have at least UPF_BOOT_AUTOCONF set.
+ */
+static int serial8250_probe(struct platform_device *dev)
+{
+	struct plat_serial8250_port *p = dev->dev.platform_data;
+	struct uart_8250_port uart;
+	int ret, i, irqflag = 0;
+
+	memset(&uart, 0, sizeof(uart));
+
+	if (share_irqs)
+		irqflag = IRQF_SHARED;
+
+	for (i = 0; p && p->flags != 0; p++, i++) {
+		uart.port.iobase	= p->iobase;
+		uart.port.membase	= p->membase;
+		uart.port.irq		= p->irq;
+		uart.port.irqflags	= p->irqflags;
+		uart.port.uartclk	= p->uartclk;
+		uart.port.regshift	= p->regshift;
+		uart.port.iotype	= p->iotype;
+		uart.port.flags		= p->flags;
+		uart.port.mapbase	= p->mapbase;
+		uart.port.hub6		= p->hub6;
+		uart.port.private_data	= p->private_data;
+		uart.port.type		= p->type;
+		uart.port.serial_in	= p->serial_in;
+		uart.port.serial_out	= p->serial_out;
+		uart.port.handle_irq	= p->handle_irq;
+		uart.port.handle_break	= p->handle_break;
+		uart.port.set_termios	= p->set_termios;
+		uart.port.pm		= p->pm;
+		uart.port.dev		= &dev->dev;
+		uart.port.irqflags	|= irqflag;
+		ret = serial8250_register_8250_port(&uart);
+		if (ret < 0) {
+			dev_err(&dev->dev, "unable to register port at index %d "
+				"(IO%lx MEM%llx IRQ%d): %d\n", i,
+				p->iobase, (unsigned long long)p->mapbase,
+				p->irq, ret);
+		}
+	}
+	return 0;
+}
+
+/*
+ * Remove serial ports registered against a platform device.
+ */
+static int serial8250_remove(struct platform_device *dev)
+{
+	int i;
+
+	for (i = 0; i < nr_uarts; i++) {
+		struct uart_8250_port *up = &serial8250_ports[i];
+
+		if (up->port.dev == &dev->dev)
+			serial8250_unregister_port(i);
+	}
+	return 0;
+}
+
+static int serial8250_suspend(struct platform_device *dev, pm_message_t state)
+{
+	int i;
+
+	for (i = 0; i < UART_NR; i++) {
+		struct uart_8250_port *up = &serial8250_ports[i];
+
+		if (up->port.type != PORT_UNKNOWN && up->port.dev == &dev->dev)
+			uart_suspend_port(&serial8250_reg, &up->port);
+	}
+
+	return 0;
+}
+
+static int serial8250_resume(struct platform_device *dev)
+{
+	int i;
+
+	for (i = 0; i < UART_NR; i++) {
+		struct uart_8250_port *up = &serial8250_ports[i];
+
+		if (up->port.type != PORT_UNKNOWN && up->port.dev == &dev->dev)
+			serial8250_resume_port(i);
+	}
+
+	return 0;
+}
+
+static struct platform_driver serial8250_isa_driver = {
+	.probe		= serial8250_probe,
+	.remove		= serial8250_remove,
+	.suspend	= serial8250_suspend,
+	.resume		= serial8250_resume,
+	.driver		= {
+		.name	= "serial8250",
+		.owner	= THIS_MODULE,
+	},
+};
+
+/*
+ * This "device" covers _all_ ISA 8250-compatible serial devices listed
+ * in the table in include/asm/serial.h
+ */
+static struct platform_device *serial8250_isa_devs;
+
+/*
+ * serial8250_register_8250_port and serial8250_unregister_port allows for
+ * 16x50 serial ports to be configured at run-time, to support PCMCIA
+ * modems and PCI multiport cards.
+ */
+static DEFINE_MUTEX(serial_mutex);
+
+static struct uart_8250_port *serial8250_find_match_or_unused(struct uart_port *port)
+{
+	int i;
+
+	/*
+	 * First, find a port entry which matches.
+	 */
+	for (i = 0; i < nr_uarts; i++)
+		if (uart_match_port(&serial8250_ports[i].port, port))
+			return &serial8250_ports[i];
+
+	/*
+	 * We didn't find a matching entry, so look for the first
+	 * free entry.  We look for one which hasn't been previously
+	 * used (indicated by zero iobase).
+	 */
+	for (i = 0; i < nr_uarts; i++)
+		if (serial8250_ports[i].port.type == PORT_UNKNOWN &&
+		    serial8250_ports[i].port.iobase == 0)
+			return &serial8250_ports[i];
+
+	/*
+	 * That also failed.  Last resort is to find any entry which
+	 * doesn't have a real port associated with it.
+	 */
+	for (i = 0; i < nr_uarts; i++)
+		if (serial8250_ports[i].port.type == PORT_UNKNOWN)
+			return &serial8250_ports[i];
+
+	return NULL;
+}
+
+/**
+ *	serial8250_register_8250_port - register a serial port
+ *	@up: serial port template
+ *
+ *	Configure the serial port specified by the request. If the
+ *	port exists and is in use, it is hung up and unregistered
+ *	first.
+ *
+ *	The port is then probed and if necessary the IRQ is autodetected
+ *	If this fails an error is returned.
+ *
+ *	On success the port is ready to use and the line number is returned.
+ */
+int serial8250_register_8250_port(struct uart_8250_port *up)
+{
+	struct uart_8250_port *uart;
+	int ret = -ENOSPC;
+
+	if (up->port.uartclk == 0)
+		return -EINVAL;
+
+	mutex_lock(&serial_mutex);
+
+	uart = serial8250_find_match_or_unused(&up->port);
+	if (uart && uart->port.type != PORT_8250_CIR) {
+		if (uart->port.dev)
+			uart_remove_one_port(&serial8250_reg, &uart->port);
+
+		uart->port.iobase       = up->port.iobase;
+		uart->port.membase      = up->port.membase;
+		uart->port.irq          = up->port.irq;
+		uart->port.irqflags     = up->port.irqflags;
+		uart->port.uartclk      = up->port.uartclk;
+		uart->port.fifosize     = up->port.fifosize;
+		uart->port.regshift     = up->port.regshift;
+		uart->port.iotype       = up->port.iotype;
+		uart->port.flags        = up->port.flags | UPF_BOOT_AUTOCONF;
+		uart->bugs		= up->bugs;
+		uart->port.mapbase      = up->port.mapbase;
+		uart->port.private_data = up->port.private_data;
+		uart->port.fifosize	= up->port.fifosize;
+		uart->tx_loadsz		= up->tx_loadsz;
+		uart->capabilities	= up->capabilities;
+
+		if (up->port.dev)
+			uart->port.dev = up->port.dev;
+
+		if (up->port.flags & UPF_FIXED_TYPE)
+			serial8250_init_fixed_type_port(uart, up->port.type);
+
+		set_io_from_upio(&uart->port);
+		/* Possibly override default I/O functions.  */
+		if (up->port.serial_in)
+			uart->port.serial_in = up->port.serial_in;
+		if (up->port.serial_out)
+			uart->port.serial_out = up->port.serial_out;
+		if (up->port.handle_irq)
+			uart->port.handle_irq = up->port.handle_irq;
+		/*  Possibly override set_termios call */
+		if (up->port.set_termios)
+			uart->port.set_termios = up->port.set_termios;
+		if (up->port.pm)
+			uart->port.pm = up->port.pm;
+		if (up->port.handle_break)
+			uart->port.handle_break = up->port.handle_break;
+		if (up->dl_read)
+			uart->dl_read = up->dl_read;
+		if (up->dl_write)
+			uart->dl_write = up->dl_write;
+		if (up->dma)
+			uart->dma = up->dma;
+
+		if (serial8250_isa_config != NULL)
+			serial8250_isa_config(0, &uart->port,
+					&uart->capabilities);
+
+		ret = uart_add_one_port(&serial8250_reg, &uart->port);
+		if (ret == 0)
+			ret = uart->port.line;
+	}
+	mutex_unlock(&serial_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL(serial8250_register_8250_port);
+
+/**
+ *	serial8250_unregister_port - remove a 16x50 serial port at runtime
+ *	@line: serial line number
+ *
+ *	Remove one serial port.  This may not be called from interrupt
+ *	context.  We hand the port back to the our control.
+ */
+void serial8250_unregister_port(int line)
+{
+	struct uart_8250_port *uart = &serial8250_ports[line];
+
+	mutex_lock(&serial_mutex);
+	uart_remove_one_port(&serial8250_reg, &uart->port);
+	if (serial8250_isa_devs) {
+		uart->port.flags &= ~UPF_BOOT_AUTOCONF;
+		uart->port.type = PORT_UNKNOWN;
+		uart->port.dev = &serial8250_isa_devs->dev;
+		uart->capabilities = uart_config[uart->port.type].flags;
+		uart_add_one_port(&serial8250_reg, &uart->port);
+	} else {
+		uart->port.dev = NULL;
+	}
+	mutex_unlock(&serial_mutex);
+}
+EXPORT_SYMBOL(serial8250_unregister_port);
+
+static int __init serial8250_init(void)
+{
+	int ret;
+
+	serial8250_isa_init_ports();
+
+	printk(KERN_INFO "Serial: 8250/16550 driver, "
+		"%d ports, IRQ sharing %sabled\n", nr_uarts,
+		share_irqs ? "en" : "dis");
+
+#ifdef CONFIG_SPARC
+	ret = sunserial_register_minors(&serial8250_reg, UART_NR);
+#else
+	serial8250_reg.nr = UART_NR;
+	ret = uart_register_driver(&serial8250_reg);
+#endif
+	if (ret)
+		goto out;
+
+	ret = serial8250_pnp_init();
+	if (ret)
+		goto unreg_uart_drv;
+
+	serial8250_isa_devs = platform_device_alloc("serial8250",
+						    PLAT8250_DEV_LEGACY);
+	if (!serial8250_isa_devs) {
+		ret = -ENOMEM;
+		goto unreg_pnp;
+	}
+
+	ret = platform_device_add(serial8250_isa_devs);
+	if (ret)
+		goto put_dev;
+
+	serial8250_register_ports(&serial8250_reg, &serial8250_isa_devs->dev);
+
+	ret = platform_driver_register(&serial8250_isa_driver);
+	if (ret == 0)
+		goto out;
+
+	platform_device_del(serial8250_isa_devs);
+put_dev:
+	platform_device_put(serial8250_isa_devs);
+unreg_pnp:
+	serial8250_pnp_exit();
+unreg_uart_drv:
+#ifdef CONFIG_SPARC
+	sunserial_unregister_minors(&serial8250_reg, UART_NR);
+#else
+	uart_unregister_driver(&serial8250_reg);
+#endif
+out:
+	return ret;
+}
+
+static void __exit serial8250_exit(void)
+{
+	struct platform_device *isa_dev = serial8250_isa_devs;
+
+	/*
+	 * This tells serial8250_unregister_port() not to re-register
+	 * the ports (thereby making serial8250_isa_driver permanently
+	 * in use.)
+	 */
+	serial8250_isa_devs = NULL;
+
+	platform_driver_unregister(&serial8250_isa_driver);
+	platform_device_unregister(isa_dev);
+
+	serial8250_pnp_exit();
+
+#ifdef CONFIG_SPARC
+	sunserial_unregister_minors(&serial8250_reg, UART_NR);
+#else
+	uart_unregister_driver(&serial8250_reg);
+#endif
+}
+
+module_init(serial8250_init);
+module_exit(serial8250_exit);
+
+EXPORT_SYMBOL(serial8250_suspend_port);
+EXPORT_SYMBOL(serial8250_resume_port);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic 8250/16x50 serial driver");
+
+module_param(share_irqs, uint, 0644);
+MODULE_PARM_DESC(share_irqs, "Share IRQs with other non-8250/16x50 devices"
+	" (unsafe)");
+
+module_param(nr_uarts, uint, 0644);
+MODULE_PARM_DESC(nr_uarts, "Maximum number of UARTs supported. (1-" __MODULE_STRING(CONFIG_SERIAL_8250_NR_UARTS) ")");
+
+module_param(skip_txen_test, uint, 0644);
+MODULE_PARM_DESC(skip_txen_test, "Skip checking for the TXEN bug at init time");
+
+#ifdef CONFIG_SERIAL_8250_RSA
+module_param_array(probe_rsa, ulong, &probe_rsa_count, 0444);
+MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA");
+#endif
+MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR);
+
+#ifndef MODULE
+/* This module was renamed to 8250_core in 3.7.  Keep the old "8250" name
+ * working as well for the module options so we don't break people.  We
+ * need to keep the names identical and the convenient macros will happily
+ * refuse to let us do that by failing the build with redefinition errors
+ * of global variables.  So we stick them inside a dummy function to avoid
+ * those conflicts.  The options still get parsed, and the redefined
+ * MODULE_PARAM_PREFIX lets us keep the "8250." syntax alive.
+ *
+ * This is hacky.  I'm sorry.
+ */
+static void __used s8250_options(void)
+{
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "8250_core."
+
+	module_param_cb(share_irqs, &param_ops_uint, &share_irqs, 0644);
+	module_param_cb(nr_uarts, &param_ops_uint, &nr_uarts, 0644);
+	module_param_cb(skip_txen_test, &param_ops_uint, &skip_txen_test, 0644);
+#ifdef CONFIG_SERIAL_8250_RSA
+	__module_param_call(MODULE_PARAM_PREFIX, probe_rsa,
+		&param_array_ops, .arr = &__param_arr_probe_rsa,
+		0444, -1);
+#endif
+}
+#else
+MODULE_ALIAS("8250_core");
+#endif
diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile
index a23838a4d535..36d68d054307 100644
--- a/drivers/tty/serial/8250/Makefile
+++ b/drivers/tty/serial/8250/Makefile
@@ -2,10 +2,10 @@
 # Makefile for the 8250 serial device drivers.
 #
 
-obj-$(CONFIG_SERIAL_8250)		+= 8250_core.o
-8250_core-y				:= 8250.o
-8250_core-$(CONFIG_SERIAL_8250_PNP)	+= 8250_pnp.o
-8250_core-$(CONFIG_SERIAL_8250_DMA)	+= 8250_dma.o
+obj-$(CONFIG_SERIAL_8250)		+= 8250.o
+8250-y					:= 8250_core.o
+8250-$(CONFIG_SERIAL_8250_PNP)		+= 8250_pnp.o
+8250-$(CONFIG_SERIAL_8250_DMA)		+= 8250_dma.o
 obj-$(CONFIG_SERIAL_8250_GSC)		+= 8250_gsc.o
 obj-$(CONFIG_SERIAL_8250_PCI)		+= 8250_pci.o
 obj-$(CONFIG_SERIAL_8250_HP300)		+= 8250_hp300.o
-- 
cgit v1.2.2


From 9326b047e4fd4a8da72e59d913214a1803e9709c Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 19 Mar 2013 11:34:57 +0100
Subject: TTY: 8250, deprecated 8250_core.* options

They were introduced by mistake in 3.7. Let's deprecate them now. For
the reasons, see the text in Kconfig below.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c |  2 ++
 drivers/tty/serial/8250/Kconfig     | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 2d563cb9057e..35f9c96aada9 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -3418,6 +3418,7 @@ MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA");
 #endif
 MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR);
 
+#ifdef CONFIG_SERIAL_8250_DEPRECATED_OPTIONS
 #ifndef MODULE
 /* This module was renamed to 8250_core in 3.7.  Keep the old "8250" name
  * working as well for the module options so we don't break people.  We
@@ -3446,3 +3447,4 @@ static void __used s8250_options(void)
 #else
 MODULE_ALIAS("8250_core");
 #endif
+#endif
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index 2ef9537bcb2c..80fe91e64a52 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -33,6 +33,23 @@ config SERIAL_8250
 	  Most people will say Y or M here, so that they can use serial mice,
 	  modems and similar devices connecting to the standard serial ports.
 
+config SERIAL_8250_DEPRECATED_OPTIONS
+	bool "Support 8250_core.* kernel options (DEPRECATED)"
+	depends on SERIAL_8250
+	default y
+	---help---
+	  In 3.7 we renamed 8250 to 8250_core by mistake, so now we have to
+	  accept kernel parameters in both forms like 8250_core.nr_uarts=4 and
+	  8250.nr_uarts=4. We now renamed the module back to 8250, but if
+	  anybody noticed in 3.7 and changed their userspace we still have to
+	  keep the 8350_core.* options around until they revert the changes
+	  they already did.
+
+	  If 8250 is built as a module, this adds 8250_core alias instead. 
+
+	  If you did not notice yet and/or you have userspace from pre-3.7, it
+	  is safe (and recommended) to say N here.
+
 config SERIAL_8250_PNP
 	bool "8250/16550 PNP device support" if EXPERT
 	depends on SERIAL_8250 && PNP
-- 
cgit v1.2.2


From 855f6fd941019ecc9525ca038b78f50c6c1e80a8 Mon Sep 17 00:00:00 2001
From: John Linn <john.linn@xilinx.com>
Date: Fri, 22 Mar 2013 18:49:27 +0100
Subject: Xilinx: ARM: UART: clear pending irqs before enabling irqs

The Boot ROM has an issue which will cause the driver to
lock up as pending irqs are not being cleared. With them
cleared it prevents that issue.

This patch is needed for the current (3.9-rc3) mainline kernel. I guess
it went unnoticed, because it was only tested with u-boot up until now.
And u-boot maybe handles this.

[s.trumtrar@pengutronix.de: cherry-picked from linux-xlnx.git]
Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/xilinx_uartps.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index ba451c7209fc..f36bbba1ac8b 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -578,6 +578,8 @@ static int xuartps_startup(struct uart_port *port)
 	/* Receive Timeout register is enabled with value of 10 */
 	xuartps_writel(10, XUARTPS_RXTOUT_OFFSET);
 
+	/* Clear out any pending interrupts before enabling them */
+	xuartps_writel(xuartps_readl(XUARTPS_ISR_OFFSET), XUARTPS_ISR_OFFSET);
 
 	/* Set the Interrupt Registers with desired interrupts */
 	xuartps_writel(XUARTPS_IXR_TXEMPTY | XUARTPS_IXR_PARITY |
-- 
cgit v1.2.2


From 68e13a67ddfb55af386b903ab9ca56358930f79c Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 25 Mar 2013 16:57:17 -0700
Subject: workqueue: rename wq_mutex to wq_pool_mutex

wq->flush_mutex will be renamed to wq->mutex and cover all fields
specific to each workqueue and eventually replace pwq_lock, which will
make locking simpler and easier to understand.

Rename wq_mutex to wq_pool_mutex to avoid confusion with wq->mutex.
After the scheduled changes, wq_pool_mutex won't be protecting
anything specific to each workqueue instance anyway.

This patch is pure rename.

tj: s/wqs_mutex/wq_pool_mutex/.  Rewrote description.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 109 +++++++++++++++++++++++++++--------------------------
 1 file changed, 55 insertions(+), 54 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 47f258799bf2..064157eac4c8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -123,9 +123,9 @@ enum {
  * MG: pool->manager_mutex and pool->lock protected.  Writes require both
  *     locks.  Reads can happen under either lock.
  *
- * WQ: wq_mutex protected.
+ * PL: wq_pool_mutex protected.
  *
- * WR: wq_mutex protected for writes.  Sched-RCU protected for reads.
+ * PR: wq_pool_mutex protected for writes.  Sched-RCU protected for reads.
  *
  * PW: pwq_lock protected.
  *
@@ -163,8 +163,8 @@ struct worker_pool {
 	struct idr		worker_idr;	/* MG: worker IDs and iteration */
 
 	struct workqueue_attrs	*attrs;		/* I: worker attributes */
-	struct hlist_node	hash_node;	/* WQ: unbound_pool_hash node */
-	int			refcnt;		/* WQ: refcnt for unbound pools */
+	struct hlist_node	hash_node;	/* PL: unbound_pool_hash node */
+	int			refcnt;		/* PL: refcnt for unbound pools */
 
 	/*
 	 * The current concurrency level.  As it's likely to be accessed
@@ -226,10 +226,10 @@ struct wq_device;
  * the appropriate worker_pool through its pool_workqueues.
  */
 struct workqueue_struct {
-	unsigned int		flags;		/* WQ: WQ_* flags */
+	unsigned int		flags;		/* PL: WQ_* flags */
 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
 	struct list_head	pwqs;		/* FR: all pwqs of this wq */
-	struct list_head	list;		/* WQ: list of all workqueues */
+	struct list_head	list;		/* PL: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
 	int			work_color;	/* F: current work color */
@@ -242,7 +242,7 @@ struct workqueue_struct {
 	struct list_head	maydays;	/* MD: pwqs requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
-	int			nr_drainers;	/* WQ: drain in progress */
+	int			nr_drainers;	/* PL: drain in progress */
 	int			saved_max_active; /* PW: saved pwq max_active */
 
 #ifdef CONFIG_SYSFS
@@ -256,20 +256,20 @@ struct workqueue_struct {
 
 static struct kmem_cache *pwq_cache;
 
-static DEFINE_MUTEX(wq_mutex);		/* protects workqueues and pools */
+static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
 static DEFINE_SPINLOCK(pwq_lock);	/* protects pool_workqueues */
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
-static LIST_HEAD(workqueues);		/* WQ: list of all workqueues */
-static bool workqueue_freezing;		/* WQ: have wqs started freezing? */
+static LIST_HEAD(workqueues);		/* PL: list of all workqueues */
+static bool workqueue_freezing;		/* PL: have wqs started freezing? */
 
 /* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
 				     cpu_worker_pools);
 
-static DEFINE_IDR(worker_pool_idr);	/* WR: idr of all pools */
+static DEFINE_IDR(worker_pool_idr);	/* PR: idr of all pools */
 
-/* WQ: hash of all unbound pools keyed by pool->attrs */
+/* PL: hash of all unbound pools keyed by pool->attrs */
 static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
 
 /* I: attributes used when instantiating standard unbound pools on demand */
@@ -293,10 +293,10 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
 
-#define assert_rcu_or_wq_mutex()					\
+#define assert_rcu_or_pool_mutex()					\
 	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
-			   lockdep_is_held(&wq_mutex),			\
-			   "sched RCU or wq_mutex should be held")
+			   lockdep_is_held(&wq_pool_mutex),		\
+			   "sched RCU or wq_pool_mutex should be held")
 
 #define assert_rcu_or_pwq_lock()					\
 	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
@@ -323,16 +323,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
  * @pool: iteration cursor
  * @pi: integer used for iteration
  *
- * This must be called either with wq_mutex held or sched RCU read locked.
- * If the pool needs to be used beyond the locking in effect, the caller is
- * responsible for guaranteeing that the pool stays online.
+ * This must be called either with wq_pool_mutex held or sched RCU read
+ * locked.  If the pool needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pool stays online.
  *
  * The if/else clause exists only for the lockdep assertion and can be
  * ignored.
  */
 #define for_each_pool(pool, pi)						\
 	idr_for_each_entry(&worker_pool_idr, pool, pi)			\
-		if (({ assert_rcu_or_wq_mutex(); false; })) { }		\
+		if (({ assert_rcu_or_pool_mutex(); false; })) { }	\
 		else
 
 /**
@@ -489,7 +489,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 {
 	int ret;
 
-	lockdep_assert_held(&wq_mutex);
+	lockdep_assert_held(&wq_pool_mutex);
 
 	do {
 		if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL))
@@ -607,9 +607,9 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
  *
  * Return the worker_pool @work was last associated with.  %NULL if none.
  *
- * Pools are created and destroyed under wq_mutex, and allows read access
- * under sched-RCU read lock.  As such, this function should be called
- * under wq_mutex or with preemption disabled.
+ * Pools are created and destroyed under wq_pool_mutex, and allows read
+ * access under sched-RCU read lock.  As such, this function should be
+ * called under wq_pool_mutex or with preemption disabled.
  *
  * All fields of the returned pool are accessible as long as the above
  * mentioned locking is in effect.  If the returned pool needs to be used
@@ -621,7 +621,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
 	unsigned long data = atomic_long_read(&work->data);
 	int pool_id;
 
-	assert_rcu_or_wq_mutex();
+	assert_rcu_or_pool_mutex();
 
 	if (data & WORK_STRUCT_PWQ)
 		return ((struct pool_workqueue *)
@@ -2684,10 +2684,10 @@ void drain_workqueue(struct workqueue_struct *wq)
 	 * hotter than drain_workqueue() and already looks at @wq->flags.
 	 * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
 	 */
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 	if (!wq->nr_drainers++)
 		wq->flags |= __WQ_DRAINING;
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 reflush:
 	flush_workqueue(wq);
 
@@ -2714,10 +2714,10 @@ reflush:
 
 	local_irq_enable();
 
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 	if (!--wq->nr_drainers)
 		wq->flags &= ~__WQ_DRAINING;
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
@@ -3430,16 +3430,16 @@ static void put_unbound_pool(struct worker_pool *pool)
 {
 	struct worker *worker;
 
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 	if (--pool->refcnt) {
-		mutex_unlock(&wq_mutex);
+		mutex_unlock(&wq_pool_mutex);
 		return;
 	}
 
 	/* sanity checks */
 	if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) ||
 	    WARN_ON(!list_empty(&pool->worklist))) {
-		mutex_unlock(&wq_mutex);
+		mutex_unlock(&wq_pool_mutex);
 		return;
 	}
 
@@ -3448,7 +3448,7 @@ static void put_unbound_pool(struct worker_pool *pool)
 		idr_remove(&worker_pool_idr, pool->id);
 	hash_del(&pool->hash_node);
 
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 
 	/*
 	 * Become the manager and destroy all workers.  Grabbing
@@ -3489,7 +3489,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 	u32 hash = wqattrs_hash(attrs);
 	struct worker_pool *pool;
 
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 
 	/* do we already have a matching pool? */
 	hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
@@ -3520,10 +3520,10 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 	/* install */
 	hash_add(unbound_pool_hash, &pool->hash_node, hash);
 out_unlock:
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 	return pool;
 fail:
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 	if (pool)
 		put_unbound_pool(pool);
 	return NULL;
@@ -3803,10 +3803,11 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		goto err_destroy;
 
 	/*
-	 * wq_mutex protects global freeze state and workqueues list.  Grab
-	 * it, adjust max_active and add the new @wq to workqueues list.
+	 * wq_pool_mutex protects global freeze state and workqueues list.
+	 * Grab it, adjust max_active and add the new @wq to workqueues
+	 * list.
 	 */
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 
 	spin_lock_irq(&pwq_lock);
 	for_each_pwq(pwq, wq)
@@ -3815,7 +3816,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 
 	list_add(&wq->list, &workqueues);
 
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 
 	return wq;
 
@@ -3866,9 +3867,9 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
 	 */
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 	list_del_init(&wq->list);
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 
 	workqueue_sysfs_unregister(wq);
 
@@ -4198,7 +4199,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
-		mutex_lock(&wq_mutex);
+		mutex_lock(&wq_pool_mutex);
 
 		for_each_pool(pool, pi) {
 			mutex_lock(&pool->manager_mutex);
@@ -4216,7 +4217,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 			mutex_unlock(&pool->manager_mutex);
 		}
 
-		mutex_unlock(&wq_mutex);
+		mutex_unlock(&wq_pool_mutex);
 		break;
 	}
 	return NOTIFY_OK;
@@ -4292,7 +4293,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  * pool->worklist.
  *
  * CONTEXT:
- * Grabs and releases wq_mutex, pwq_lock and pool->lock's.
+ * Grabs and releases wq_pool_mutex, pwq_lock and pool->lock's.
  */
 void freeze_workqueues_begin(void)
 {
@@ -4301,7 +4302,7 @@ void freeze_workqueues_begin(void)
 	struct pool_workqueue *pwq;
 	int pi;
 
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 
 	WARN_ON_ONCE(workqueue_freezing);
 	workqueue_freezing = true;
@@ -4322,7 +4323,7 @@ void freeze_workqueues_begin(void)
 	}
 	spin_unlock_irq(&pwq_lock);
 
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 }
 
 /**
@@ -4332,7 +4333,7 @@ void freeze_workqueues_begin(void)
  * between freeze_workqueues_begin() and thaw_workqueues().
  *
  * CONTEXT:
- * Grabs and releases wq_mutex.
+ * Grabs and releases wq_pool_mutex.
  *
  * RETURNS:
  * %true if some freezable workqueues are still busy.  %false if freezing
@@ -4344,7 +4345,7 @@ bool freeze_workqueues_busy(void)
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
 
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 
 	WARN_ON_ONCE(!workqueue_freezing);
 
@@ -4367,7 +4368,7 @@ bool freeze_workqueues_busy(void)
 		rcu_read_unlock_sched();
 	}
 out_unlock:
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 	return busy;
 }
 
@@ -4378,7 +4379,7 @@ out_unlock:
  * frozen works are transferred to their respective pool worklists.
  *
  * CONTEXT:
- * Grabs and releases wq_mutex, pwq_lock and pool->lock's.
+ * Grabs and releases wq_pool_mutex, pwq_lock and pool->lock's.
  */
 void thaw_workqueues(void)
 {
@@ -4387,7 +4388,7 @@ void thaw_workqueues(void)
 	struct worker_pool *pool;
 	int pi;
 
-	mutex_lock(&wq_mutex);
+	mutex_lock(&wq_pool_mutex);
 
 	if (!workqueue_freezing)
 		goto out_unlock;
@@ -4410,7 +4411,7 @@ void thaw_workqueues(void)
 
 	workqueue_freezing = false;
 out_unlock:
-	mutex_unlock(&wq_mutex);
+	mutex_unlock(&wq_pool_mutex);
 }
 #endif /* CONFIG_FREEZER */
 
@@ -4442,9 +4443,9 @@ static int __init init_workqueues(void)
 			pool->attrs->nice = std_nice[i++];
 
 			/* alloc pool ID */
-			mutex_lock(&wq_mutex);
+			mutex_lock(&wq_pool_mutex);
 			BUG_ON(worker_pool_assign_id(pool));
-			mutex_unlock(&wq_mutex);
+			mutex_unlock(&wq_pool_mutex);
 		}
 	}
 
-- 
cgit v1.2.2


From 3c25a55daadc7e7058926f5728fba7721d824ffb Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 25 Mar 2013 16:57:17 -0700
Subject: workqueue: rename wq->flush_mutex to wq->mutex

Currently pwq->flush_mutex protects many fields of a workqueue
including, especially, the pwqs list.  We're going to expand this
mutex to protect most of a workqueue and eventually replace pwq_lock,
which will make locking simpler and easier to understand.

Drop the "flush_" prefix in preparation.

This patch is pure rename.

tj: Rebased on top of the current dev branch.  Updated description.
    Use WQ: and WR: instead of Q: and QR: for synchronization labels.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 064157eac4c8..d448edae3513 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -118,8 +118,6 @@ enum {
  *    cpu or grabbing pool->lock is enough for read access.  If
  *    POOL_DISASSOCIATED is set, it's identical to L.
  *
- * F: wq->flush_mutex protected.
- *
  * MG: pool->manager_mutex and pool->lock protected.  Writes require both
  *     locks.  Reads can happen under either lock.
  *
@@ -129,8 +127,10 @@ enum {
  *
  * PW: pwq_lock protected.
  *
- * FR: wq->flush_mutex and pwq_lock protected for writes.  Sched-RCU
- *     protected for reads.
+ * WQ: wq->mutex protected.
+ *
+ * WR: wq->mutex and pwq_lock protected for writes.  Sched-RCU protected
+ *     for reads.
  *
  * MD: wq_mayday_lock protected.
  */
@@ -197,7 +197,7 @@ struct pool_workqueue {
 	int			nr_active;	/* L: nr of active works */
 	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
-	struct list_head	pwqs_node;	/* FR: node on wq->pwqs */
+	struct list_head	pwqs_node;	/* WR: node on wq->pwqs */
 	struct list_head	mayday_node;	/* MD: node on wq->maydays */
 
 	/*
@@ -214,8 +214,8 @@ struct pool_workqueue {
  * Structure used to wait for workqueue flush.
  */
 struct wq_flusher {
-	struct list_head	list;		/* F: list of flushers */
-	int			flush_color;	/* F: flush color waiting for */
+	struct list_head	list;		/* WQ: list of flushers */
+	int			flush_color;	/* WQ: flush color waiting for */
 	struct completion	done;		/* flush completion */
 };
 
@@ -228,16 +228,16 @@ struct wq_device;
 struct workqueue_struct {
 	unsigned int		flags;		/* PL: WQ_* flags */
 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
-	struct list_head	pwqs;		/* FR: all pwqs of this wq */
+	struct list_head	pwqs;		/* WR: all pwqs of this wq */
 	struct list_head	list;		/* PL: list of all workqueues */
 
-	struct mutex		flush_mutex;	/* protects wq flushing */
-	int			work_color;	/* F: current work color */
-	int			flush_color;	/* F: current flush color */
+	struct mutex		mutex;		/* protects this wq */
+	int			work_color;	/* WQ: current work color */
+	int			flush_color;	/* WQ: current flush color */
 	atomic_t		nr_pwqs_to_flush; /* flush in progress */
-	struct wq_flusher	*first_flusher;	/* F: first flusher */
-	struct list_head	flusher_queue;	/* F: flush waiters */
-	struct list_head	flusher_overflow; /* F: flush overflow list */
+	struct wq_flusher	*first_flusher;	/* WQ: first flusher */
+	struct list_head	flusher_queue;	/* WQ: flush waiters */
+	struct list_head	flusher_overflow; /* WQ: flush overflow list */
 
 	struct list_head	maydays;	/* MD: pwqs requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
@@ -2460,7 +2460,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
  * advanced to @work_color.
  *
  * CONTEXT:
- * mutex_lock(wq->flush_mutex).
+ * mutex_lock(wq->mutex).
  *
  * RETURNS:
  * %true if @flush_color >= 0 and there's something to flush.  %false
@@ -2529,7 +2529,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 	lock_map_acquire(&wq->lockdep_map);
 	lock_map_release(&wq->lockdep_map);
 
-	mutex_lock(&wq->flush_mutex);
+	mutex_lock(&wq->mutex);
 
 	/*
 	 * Start-to-wait phase
@@ -2574,7 +2574,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 		list_add_tail(&this_flusher.list, &wq->flusher_overflow);
 	}
 
-	mutex_unlock(&wq->flush_mutex);
+	mutex_unlock(&wq->mutex);
 
 	wait_for_completion(&this_flusher.done);
 
@@ -2587,7 +2587,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 	if (wq->first_flusher != &this_flusher)
 		return;
 
-	mutex_lock(&wq->flush_mutex);
+	mutex_lock(&wq->mutex);
 
 	/* we might have raced, check again with mutex held */
 	if (wq->first_flusher != &this_flusher)
@@ -2659,7 +2659,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 	}
 
 out_unlock:
-	mutex_unlock(&wq->flush_mutex);
+	mutex_unlock(&wq->mutex);
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
@@ -3550,15 +3550,15 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 		return;
 
 	/*
-	 * Unlink @pwq.  Synchronization against flush_mutex isn't strictly
+	 * Unlink @pwq.  Synchronization against wq->mutex isn't strictly
 	 * necessary on release but do it anyway.  It's easier to verify
 	 * and consistent with the linking path.
 	 */
-	mutex_lock(&wq->flush_mutex);
+	mutex_lock(&wq->mutex);
 	spin_lock_irq(&pwq_lock);
 	list_del_rcu(&pwq->pwqs_node);
 	spin_unlock_irq(&pwq_lock);
-	mutex_unlock(&wq->flush_mutex);
+	mutex_unlock(&wq->mutex);
 
 	put_unbound_pool(pool);
 	call_rcu_sched(&pwq->rcu, rcu_free_pwq);
@@ -3627,12 +3627,12 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	INIT_LIST_HEAD(&pwq->mayday_node);
 	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 
-	mutex_lock(&wq->flush_mutex);
+	mutex_lock(&wq->mutex);
 	spin_lock_irq(&pwq_lock);
 
 	/*
 	 * Set the matching work_color.  This is synchronized with
-	 * flush_mutex to avoid confusing flush_workqueue().
+	 * wq->mutex to avoid confusing flush_workqueue().
 	 */
 	if (p_last_pwq)
 		*p_last_pwq = first_pwq(wq);
@@ -3645,7 +3645,7 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 
 	spin_unlock_irq(&pwq_lock);
-	mutex_unlock(&wq->flush_mutex);
+	mutex_unlock(&wq->mutex);
 }
 
 /**
@@ -3762,7 +3762,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	/* init wq */
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
-	mutex_init(&wq->flush_mutex);
+	mutex_init(&wq->mutex);
 	atomic_set(&wq->nr_pwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->pwqs);
 	INIT_LIST_HEAD(&wq->flusher_queue);
-- 
cgit v1.2.2


From 87fc741e94cf64445c698486982b30afa0811eca Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 25 Mar 2013 16:57:18 -0700
Subject: workqueue: protect wq->nr_drainers and ->flags with wq->mutex

We're expanding wq->mutex to cover all fields specific to each
workqueue with the end goal of replacing pwq_lock which will make
locking simpler and easier to understand.

wq->nr_drainers and ->flags are specific to each workqueue.  Protect
->nr_drainers and ->flags with wq->mutex instead of pool_mutex.

tj: Rebased on top of the current dev branch.  Updated description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d448edae3513..3ac2c4d85607 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -226,7 +226,7 @@ struct wq_device;
  * the appropriate worker_pool through its pool_workqueues.
  */
 struct workqueue_struct {
-	unsigned int		flags;		/* PL: WQ_* flags */
+	unsigned int		flags;		/* WQ: WQ_* flags */
 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
 	struct list_head	pwqs;		/* WR: all pwqs of this wq */
 	struct list_head	list;		/* PL: list of all workqueues */
@@ -242,7 +242,7 @@ struct workqueue_struct {
 	struct list_head	maydays;	/* MD: pwqs requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
-	int			nr_drainers;	/* PL: drain in progress */
+	int			nr_drainers;	/* WQ: drain in progress */
 	int			saved_max_active; /* PW: saved pwq max_active */
 
 #ifdef CONFIG_SYSFS
@@ -2684,10 +2684,10 @@ void drain_workqueue(struct workqueue_struct *wq)
 	 * hotter than drain_workqueue() and already looks at @wq->flags.
 	 * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
 	 */
-	mutex_lock(&wq_pool_mutex);
+	mutex_lock(&wq->mutex);
 	if (!wq->nr_drainers++)
 		wq->flags |= __WQ_DRAINING;
-	mutex_unlock(&wq_pool_mutex);
+	mutex_unlock(&wq->mutex);
 reflush:
 	flush_workqueue(wq);
 
@@ -2714,10 +2714,10 @@ reflush:
 
 	local_irq_enable();
 
-	mutex_lock(&wq_pool_mutex);
+	mutex_lock(&wq->mutex);
 	if (!--wq->nr_drainers)
 		wq->flags &= ~__WQ_DRAINING;
-	mutex_unlock(&wq_pool_mutex);
+	mutex_unlock(&wq->mutex);
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
-- 
cgit v1.2.2


From b09f4fd39c0e562aff3682773f4c451d6125048c Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 25 Mar 2013 16:57:18 -0700
Subject: workqueue: protect wq->pwqs and iteration with wq->mutex

We're expanding wq->mutex to cover all fields specific to each
workqueue with the end goal of replacing pwq_lock which will make
locking simpler and easier to understand.

init_and_link_pwq() and pwq_unbound_release_workfn() already grab
wq->mutex when adding or removing a pwq from wq->pwqs list.  This
patch makes it official that the list is wq->mutex protected for
writes and updates readers accoridingly.  Explicit IRQ toggles for
sched-RCU read-locking in flush_workqueue_prep_pwqs() and
drain_workqueues() are removed as the surrounding wq->mutex can
provide sufficient synchronization.

Also, assert_rcu_or_pwq_lock() is renamed to assert_rcu_or_wq_mutex()
and checks for wq->mutex too.

pwq_lock locking and assertion are not removed by this patch and a
couple of for_each_pwq() iterations are still protected by it.
They'll be removed by future patches.

tj: Rebased on top of the current dev branch.  Updated description.
    Folded in assert_rcu_or_wq_mutex() renaming from a later patch
    along with associated comment updates.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 42 ++++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3ac2c4d85607..9c32fd171d5c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -204,7 +204,7 @@ struct pool_workqueue {
 	 * Release of unbound pwq is punted to system_wq.  See put_pwq()
 	 * and pwq_unbound_release_workfn() for details.  pool_workqueue
 	 * itself is also sched-RCU protected so that the first pwq can be
-	 * determined without grabbing pwq_lock.
+	 * determined without grabbing wq->mutex.
 	 */
 	struct work_struct	unbound_release_work;
 	struct rcu_head		rcu;
@@ -298,10 +298,11 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 			   lockdep_is_held(&wq_pool_mutex),		\
 			   "sched RCU or wq_pool_mutex should be held")
 
-#define assert_rcu_or_pwq_lock()					\
+#define assert_rcu_or_wq_mutex(wq)					\
 	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
+			   lockdep_is_held(&wq->mutex) ||		\
 			   lockdep_is_held(&pwq_lock),			\
-			   "sched RCU or pwq_lock should be held")
+			   "sched RCU or wq->mutex should be held")
 
 #ifdef CONFIG_LOCKDEP
 #define assert_manager_or_pool_lock(pool)				\
@@ -356,7 +357,7 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
  * @pwq: iteration cursor
  * @wq: the target workqueue
  *
- * This must be called either with pwq_lock held or sched RCU read locked.
+ * This must be called either with wq->mutex held or sched RCU read locked.
  * If the pwq needs to be used beyond the locking in effect, the caller is
  * responsible for guaranteeing that the pwq stays online.
  *
@@ -365,7 +366,7 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
  */
 #define for_each_pwq(pwq, wq)						\
 	list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node)		\
-		if (({ assert_rcu_or_pwq_lock(); false; })) { }		\
+		if (({ assert_rcu_or_wq_mutex(wq); false; })) { }	\
 		else
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -504,13 +505,13 @@ static int worker_pool_assign_id(struct worker_pool *pool)
  * first_pwq - return the first pool_workqueue of the specified workqueue
  * @wq: the target workqueue
  *
- * This must be called either with pwq_lock held or sched RCU read locked.
+ * This must be called either with wq->mutex held or sched RCU read locked.
  * If the pwq needs to be used beyond the locking in effect, the caller is
  * responsible for guaranteeing that the pwq stays online.
  */
 static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
 {
-	assert_rcu_or_pwq_lock();
+	assert_rcu_or_wq_mutex(wq);
 	return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
 				      pwqs_node);
 }
@@ -2477,12 +2478,10 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 		atomic_set(&wq->nr_pwqs_to_flush, 1);
 	}
 
-	local_irq_disable();
-
 	for_each_pwq(pwq, wq) {
 		struct worker_pool *pool = pwq->pool;
 
-		spin_lock(&pool->lock);
+		spin_lock_irq(&pool->lock);
 
 		if (flush_color >= 0) {
 			WARN_ON_ONCE(pwq->flush_color != -1);
@@ -2499,11 +2498,9 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 			pwq->work_color = work_color;
 		}
 
-		spin_unlock(&pool->lock);
+		spin_unlock_irq(&pool->lock);
 	}
 
-	local_irq_enable();
-
 	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
 		complete(&wq->first_flusher->done);
 
@@ -2691,14 +2688,14 @@ void drain_workqueue(struct workqueue_struct *wq)
 reflush:
 	flush_workqueue(wq);
 
-	local_irq_disable();
+	mutex_lock(&wq->mutex);
 
 	for_each_pwq(pwq, wq) {
 		bool drained;
 
-		spin_lock(&pwq->pool->lock);
+		spin_lock_irq(&pwq->pool->lock);
 		drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
-		spin_unlock(&pwq->pool->lock);
+		spin_unlock_irq(&pwq->pool->lock);
 
 		if (drained)
 			continue;
@@ -2708,13 +2705,10 @@ reflush:
 			pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
 				wq->name, flush_cnt);
 
-		local_irq_enable();
+		mutex_unlock(&wq->mutex);
 		goto reflush;
 	}
 
-	local_irq_enable();
-
-	mutex_lock(&wq->mutex);
 	if (!--wq->nr_drainers)
 		wq->flags &= ~__WQ_DRAINING;
 	mutex_unlock(&wq->mutex);
@@ -3843,13 +3837,13 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	drain_workqueue(wq);
 
 	/* sanity checks */
-	spin_lock_irq(&pwq_lock);
+	mutex_lock(&wq->mutex);
 	for_each_pwq(pwq, wq) {
 		int i;
 
 		for (i = 0; i < WORK_NR_COLORS; i++) {
 			if (WARN_ON(pwq->nr_in_flight[i])) {
-				spin_unlock_irq(&pwq_lock);
+				mutex_unlock(&wq->mutex);
 				return;
 			}
 		}
@@ -3857,11 +3851,11 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		if (WARN_ON(pwq->refcnt > 1) ||
 		    WARN_ON(pwq->nr_active) ||
 		    WARN_ON(!list_empty(&pwq->delayed_works))) {
-			spin_unlock_irq(&pwq_lock);
+			mutex_unlock(&wq->mutex);
 			return;
 		}
 	}
-	spin_unlock_irq(&pwq_lock);
+	mutex_unlock(&wq->mutex);
 
 	/*
 	 * wq list is used to freeze wq, remove from list after
-- 
cgit v1.2.2


From a357fc03262988f2aa6c4a668b89be22b11ff1e7 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 25 Mar 2013 16:57:19 -0700
Subject: workqueue: protect wq->saved_max_active with wq->mutex

We're expanding wq->mutex to cover all fields specific to each
workqueue with the end goal of replacing pwq_lock which will make
locking simpler and easier to understand.

This patch makes wq->saved_max_active protected by wq->mutex instead
of pwq_lock.  As pwq_lock locking around pwq_adjust_mac_active() is no
longer necessary, this patch also replaces pwq_lock lockings of
for_each_pwq() around pwq_adjust_max_active() to wq->mutex.

tj: Rebased on top of the current dev branch.  Updated description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9c32fd171d5c..af6087a5a10a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -243,7 +243,7 @@ struct workqueue_struct {
 	struct worker		*rescuer;	/* I: rescue worker */
 
 	int			nr_drainers;	/* WQ: drain in progress */
-	int			saved_max_active; /* PW: saved pwq max_active */
+	int			saved_max_active; /* WQ: saved pwq max_active */
 
 #ifdef CONFIG_SYSFS
 	struct wq_device	*wq_dev;	/* I: for sysfs interface */
@@ -3579,13 +3579,13 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 	bool freezable = wq->flags & WQ_FREEZABLE;
 
 	/* for @wq->saved_max_active */
-	lockdep_assert_held(&pwq_lock);
+	lockdep_assert_held(&wq->mutex);
 
 	/* fast exit for non-freezable wqs */
 	if (!freezable && pwq->max_active == wq->saved_max_active)
 		return;
 
-	spin_lock(&pwq->pool->lock);
+	spin_lock_irq(&pwq->pool->lock);
 
 	if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) {
 		pwq->max_active = wq->saved_max_active;
@@ -3603,7 +3603,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 		pwq->max_active = 0;
 	}
 
-	spin_unlock(&pwq->pool->lock);
+	spin_unlock_irq(&pwq->pool->lock);
 }
 
 static void init_and_link_pwq(struct pool_workqueue *pwq,
@@ -3622,7 +3622,6 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 
 	mutex_lock(&wq->mutex);
-	spin_lock_irq(&pwq_lock);
 
 	/*
 	 * Set the matching work_color.  This is synchronized with
@@ -3636,9 +3635,10 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	pwq_adjust_max_active(pwq);
 
 	/* link in @pwq */
+	spin_lock_irq(&pwq_lock);
 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
-
 	spin_unlock_irq(&pwq_lock);
+
 	mutex_unlock(&wq->mutex);
 }
 
@@ -3803,10 +3803,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	 */
 	mutex_lock(&wq_pool_mutex);
 
-	spin_lock_irq(&pwq_lock);
+	mutex_lock(&wq->mutex);
 	for_each_pwq(pwq, wq)
 		pwq_adjust_max_active(pwq);
-	spin_unlock_irq(&pwq_lock);
+	mutex_unlock(&wq->mutex);
 
 	list_add(&wq->list, &workqueues);
 
@@ -3917,14 +3917,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
-	spin_lock_irq(&pwq_lock);
+	mutex_lock(&wq->mutex);
 
 	wq->saved_max_active = max_active;
 
 	for_each_pwq(pwq, wq)
 		pwq_adjust_max_active(pwq);
 
-	spin_unlock_irq(&pwq_lock);
+	mutex_unlock(&wq->mutex);
 }
 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 
@@ -4287,7 +4287,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  * pool->worklist.
  *
  * CONTEXT:
- * Grabs and releases wq_pool_mutex, pwq_lock and pool->lock's.
+ * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
  */
 void freeze_workqueues_begin(void)
 {
@@ -4309,13 +4309,12 @@ void freeze_workqueues_begin(void)
 		spin_unlock_irq(&pool->lock);
 	}
 
-	/* suppress further executions by setting max_active to zero */
-	spin_lock_irq(&pwq_lock);
 	list_for_each_entry(wq, &workqueues, list) {
+		mutex_lock(&wq->mutex);
 		for_each_pwq(pwq, wq)
 			pwq_adjust_max_active(pwq);
+		mutex_unlock(&wq->mutex);
 	}
-	spin_unlock_irq(&pwq_lock);
 
 	mutex_unlock(&wq_pool_mutex);
 }
@@ -4373,7 +4372,7 @@ out_unlock:
  * frozen works are transferred to their respective pool worklists.
  *
  * CONTEXT:
- * Grabs and releases wq_pool_mutex, pwq_lock and pool->lock's.
+ * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
  */
 void thaw_workqueues(void)
 {
@@ -4396,12 +4395,12 @@ void thaw_workqueues(void)
 	}
 
 	/* restore max_active and repopulate worklist */
-	spin_lock_irq(&pwq_lock);
 	list_for_each_entry(wq, &workqueues, list) {
+		mutex_lock(&wq->mutex);
 		for_each_pwq(pwq, wq)
 			pwq_adjust_max_active(pwq);
+		mutex_unlock(&wq->mutex);
 	}
-	spin_unlock_irq(&pwq_lock);
 
 	workqueue_freezing = false;
 out_unlock:
-- 
cgit v1.2.2


From b5927605478b740d73192f587e458de1632106e8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 25 Mar 2013 16:57:19 -0700
Subject: workqueue: remove pwq_lock which is no longer used

To simplify locking, the previous patches expanded wq->mutex to
protect all fields of each workqueue instance including the pwqs list
leaving pwq_lock without any user.  Remove the unused pwq_lock.

tj: Rebased on top of the current dev branch.  Updated description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index af6087a5a10a..04a8b98d30ce 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,12 +125,9 @@ enum {
  *
  * PR: wq_pool_mutex protected for writes.  Sched-RCU protected for reads.
  *
- * PW: pwq_lock protected.
- *
  * WQ: wq->mutex protected.
  *
- * WR: wq->mutex and pwq_lock protected for writes.  Sched-RCU protected
- *     for reads.
+ * WR: wq->mutex protected for writes.  Sched-RCU protected for reads.
  *
  * MD: wq_mayday_lock protected.
  */
@@ -257,7 +254,6 @@ struct workqueue_struct {
 static struct kmem_cache *pwq_cache;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
-static DEFINE_SPINLOCK(pwq_lock);	/* protects pool_workqueues */
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
 static LIST_HEAD(workqueues);		/* PL: list of all workqueues */
@@ -300,8 +296,7 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 
 #define assert_rcu_or_wq_mutex(wq)					\
 	rcu_lockdep_assert(rcu_read_lock_sched_held() ||		\
-			   lockdep_is_held(&wq->mutex) ||		\
-			   lockdep_is_held(&pwq_lock),			\
+			   lockdep_is_held(&wq->mutex),			\
 			   "sched RCU or wq->mutex should be held")
 
 #ifdef CONFIG_LOCKDEP
@@ -3549,9 +3544,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	 * and consistent with the linking path.
 	 */
 	mutex_lock(&wq->mutex);
-	spin_lock_irq(&pwq_lock);
 	list_del_rcu(&pwq->pwqs_node);
-	spin_unlock_irq(&pwq_lock);
 	mutex_unlock(&wq->mutex);
 
 	put_unbound_pool(pool);
@@ -3635,9 +3628,7 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	pwq_adjust_max_active(pwq);
 
 	/* link in @pwq */
-	spin_lock_irq(&pwq_lock);
 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
-	spin_unlock_irq(&pwq_lock);
 
 	mutex_unlock(&wq->mutex);
 }
-- 
cgit v1.2.2


From d8d595dfce7925627de78b9eecc8598a6ffda610 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Mon, 25 Mar 2013 19:22:31 -0600
Subject: block: removes dynamic allocation on stack

This patch removes dynamic allocation on the stack error.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dma.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index d523e9c56578..95047e111a33 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -986,7 +986,10 @@ void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 	int j;
 	int cnt;
 	struct rsxx_dma *dma;
-	struct list_head issued_dmas[card->n_targets];
+	struct list_head *issued_dmas;
+
+	issued_dmas = kzalloc(sizeof(*issued_dmas) * card->n_targets,
+			      GFP_KERNEL);
 
 	for (i = 0; i < card->n_targets; i++) {
 		INIT_LIST_HEAD(&issued_dmas[i]);
@@ -1025,6 +1028,8 @@ void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 		}
 		spin_unlock(&card->ctrl[i].queue_lock);
 	}
+
+	kfree(issued_dmas);
 }
 
 void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
-- 
cgit v1.2.2


From a1f6c6b147cc5e83ec36dab8370bd5ec5fa1def6 Mon Sep 17 00:00:00 2001
From: xunleer <xunleer.li@huawei.com>
Date: Tue, 5 Mar 2013 07:44:20 +0000
Subject: ixgbevf: don't release the soft entries

When the ixgbevf driver is opened the request to allocate MSIX irq
vectors may fail.  In that case the driver will call ixgbevf_down()
which will call ixgbevf_irq_disable() to clear the HW interrupt
registers and calls synchronize_irq() using the msix_entries pointer in
the adapter structure.  However, when the function to request the MSIX
irq vectors failed it had already freed the msix_entries which causes
an OOPs from using the NULL pointer in synchronize_irq().

The calls to pci_disable_msix() and to free the msix_entries memory
should not occur if device open fails.  Instead they should be called
during device driver removal to balance with the call to
pci_enable_msix() and the call to allocate msix_entries memory
during the device probe and driver load.

Signed-off-by: Li Xun <xunleer.li@huawei.com>
Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 24 +++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index c3db6cd69b68..2b6cb5ca48ee 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -944,9 +944,17 @@ free_queue_irqs:
 		free_irq(adapter->msix_entries[vector].vector,
 			 adapter->q_vector[vector]);
 	}
-	pci_disable_msix(adapter->pdev);
-	kfree(adapter->msix_entries);
-	adapter->msix_entries = NULL;
+	/* This failure is non-recoverable - it indicates the system is
+	 * out of MSIX vector resources and the VF driver cannot run
+	 * without them.  Set the number of msix vectors to zero
+	 * indicating that not enough can be allocated.  The error
+	 * will be returned to the user indicating device open failed.
+	 * Any further attempts to force the driver to open will also
+	 * fail.  The only way to recover is to unload the driver and
+	 * reload it again.  If the system has recovered some MSIX
+	 * vectors then it may succeed.
+	 */
+	adapter->num_msix_vectors = 0;
 	return err;
 }
 
@@ -2572,6 +2580,15 @@ static int ixgbevf_open(struct net_device *netdev)
 	struct ixgbe_hw *hw = &adapter->hw;
 	int err;
 
+	/* A previous failure to open the device because of a lack of
+	 * available MSIX vector resources may have reset the number
+	 * of msix vectors variable to zero.  The only way to recover
+	 * is to unload/reload the driver and hope that the system has
+	 * been able to recover some MSIX vector resources.
+	 */
+	if (!adapter->num_msix_vectors)
+		return -ENOMEM;
+
 	/* disallow open during test */
 	if (test_bit(__IXGBEVF_TESTING, &adapter->state))
 		return -EBUSY;
@@ -2628,7 +2645,6 @@ static int ixgbevf_open(struct net_device *netdev)
 
 err_req_irq:
 	ixgbevf_down(adapter);
-	ixgbevf_free_irq(adapter);
 err_setup_rx:
 	ixgbevf_free_all_rx_resources(adapter);
 err_setup_tx:
-- 
cgit v1.2.2


From 22c12752d183f39aa8e2cc884cfcb23c0cb6d98d Mon Sep 17 00:00:00 2001
From: Lior Levy <lior.levy@intel.com>
Date: Tue, 12 Mar 2013 15:49:32 +0000
Subject: igb: fix i350 anti spoofing config

Fix a problem in i350 where anti spoofing configuration was written into a
wrong register.

Signed-off-by: Lior Levy <lior.levy@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_82575.c | 33 ++++++++++++++++------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index b64542acfa34..12b1d8480808 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -1818,27 +1818,32 @@ out:
  **/
 void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf)
 {
-	u32 dtxswc;
+	u32 reg_val, reg_offset;
 
 	switch (hw->mac.type) {
 	case e1000_82576:
+		reg_offset = E1000_DTXSWC;
+		break;
 	case e1000_i350:
-		dtxswc = rd32(E1000_DTXSWC);
-		if (enable) {
-			dtxswc |= (E1000_DTXSWC_MAC_SPOOF_MASK |
-				   E1000_DTXSWC_VLAN_SPOOF_MASK);
-			/* The PF can spoof - it has to in order to
-			 * support emulation mode NICs */
-			dtxswc ^= (1 << pf | 1 << (pf + MAX_NUM_VFS));
-		} else {
-			dtxswc &= ~(E1000_DTXSWC_MAC_SPOOF_MASK |
-				    E1000_DTXSWC_VLAN_SPOOF_MASK);
-		}
-		wr32(E1000_DTXSWC, dtxswc);
+		reg_offset = E1000_TXSWC;
 		break;
 	default:
-		break;
+		return;
+	}
+
+	reg_val = rd32(reg_offset);
+	if (enable) {
+		reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK |
+			     E1000_DTXSWC_VLAN_SPOOF_MASK);
+		/* The PF can spoof - it has to in order to
+		 * support emulation mode NICs
+		 */
+		reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS));
+	} else {
+		reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK |
+			     E1000_DTXSWC_VLAN_SPOOF_MASK);
 	}
+	wr32(reg_offset, reg_val);
 }
 
 /**
-- 
cgit v1.2.2


From d0f63acc2ff354a525f7bc7ba90e81f49b6c2ef8 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 13 Mar 2013 15:50:24 +0000
Subject: igb: Fix null pointer dereference

The max_vfs= option has always been self limiting to the number of VFs
supported by the device.  fa44f2f1 added SR-IOV configuration via
sysfs, but in the process broke this self correction factor.  The
failing path is:

igb_probe
  igb_sw_init
    if (max_vfs > 7) {
        adapter->vfs_allocated_count = 7;
    ...
    igb_probe_vfs
    igb_enable_sriov(, max_vfs)
      if (num_vfs > 7) {
        err = -EPERM;
        ...

This leaves vfs_allocated_count = 7 and vf_data = NULL, so we bomb out
when igb_probe finally calls igb_reset.  It seems like a really bad
idea, and somewhat pointless, to set vfs_allocated_count separate from
vf_data, but limiting max_vfs is enough to avoid the null pointer.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 4dbd62968c7a..2ae888678b23 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2652,7 +2652,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
 		if (max_vfs > 7) {
 			dev_warn(&pdev->dev,
 				 "Maximum of 7 VFs per PF, using max\n");
-			adapter->vfs_allocated_count = 7;
+			max_vfs = adapter->vfs_allocated_count = 7;
 		} else
 			adapter->vfs_allocated_count = max_vfs;
 		if (adapter->vfs_allocated_count)
-- 
cgit v1.2.2


From d5e51a10d21761faaf069cac6f1c0311cf332820 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 13 Mar 2013 15:50:29 +0000
Subject: igb: SR-IOV init reordering

igb is ineffective at setting a lower total VFs because:

int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
{
        ...
        /* Shouldn't change if VFs already enabled */
        if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
                return -EBUSY;

Swap init ordering.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 2ae888678b23..8496adfc6a68 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2542,8 +2542,8 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
 	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
 		return;
 
-	igb_enable_sriov(pdev, max_vfs);
 	pci_sriov_set_totalvfs(pdev, 7);
+	igb_enable_sriov(pdev, max_vfs);
 
 #endif /* CONFIG_PCI_IOV */
 }
-- 
cgit v1.2.2


From 05ec29e8fa9b6ec8d4ad5d2f6d5fc5467c7970bc Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Wed, 20 Mar 2013 09:06:29 +0000
Subject: igb: make sensor info static

Trivial sparse warning.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_hwmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c
index 4623502054d5..0478a1abe541 100644
--- a/drivers/net/ethernet/intel/igb/igb_hwmon.c
+++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c
@@ -39,7 +39,7 @@
 #include <linux/pci.h>
 
 #ifdef CONFIG_IGB_HWMON
-struct i2c_board_info i350_sensor_info = {
+static struct i2c_board_info i350_sensor_info = {
 	I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
 };
 
-- 
cgit v1.2.2


From 75517d92119a3cd364f618ee962055b3ded8c396 Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Wed, 20 Mar 2013 09:06:34 +0000
Subject: igb: fix PHC stopping on max freq

For 82576 MAC type, max_adj is reported as 1000000000 ppb. However, if
this value is passed to igb_ptp_adjfreq_82576, incvalue overflows out of
INCVALUE_82576_MASK, resulting in setting of zero TIMINCA.incvalue, stopping
the PHC (instead of going at twice the nominal speed).

Fix the advertised max_adj value to the largest value hardware can handle.
As there is no min_adj value available (-max_adj is used instead), this will
also prevent stopping the clock intentionally. It's probably not a big deal,
other igb MAC types don't support stopping the clock, either.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Matthew Vick <matthew.vick@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ptp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 0987822359f0..0a237507ee85 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -740,7 +740,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
 	case e1000_82576:
 		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
 		adapter->ptp_caps.owner = THIS_MODULE;
-		adapter->ptp_caps.max_adj = 1000000000;
+		adapter->ptp_caps.max_adj = 999999881;
 		adapter->ptp_caps.n_ext_ts = 0;
 		adapter->ptp_caps.pps = 0;
 		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576;
-- 
cgit v1.2.2


From 751c644b95bb48aaa8825f0c66abbcc184d92051 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 26 Mar 2013 02:27:11 -0700
Subject: pid: Handle the exit of a multi-threaded init.

When a multi-threaded init exits and the initial thread is not the
last thread to exit the initial thread hangs around as a zombie
until the last thread exits.  In that case zap_pid_ns_processes
needs to wait until there are only 2 hashed pids in the pid
namespace not one.

v2. Replace thread_pid_vnr(me) == 1 with the test thread_group_leader(me)
    as suggested by Oleg.

Cc: stable@vger.kernel.org
Cc: Oleg Nesterov <oleg@redhat.com>
Reported-by: Caj Larsson <caj@omnicloud.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/pid_namespace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index c1c3dc1c6023..bea15bdf82b0 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 	int nr;
 	int rc;
 	struct task_struct *task, *me = current;
+	int init_pids = thread_group_leader(me) ? 1 : 2;
 
 	/* Don't allow any more processes into the pid namespace */
 	disable_pid_allocation(pid_ns);
@@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 	 */
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (pid_ns->nr_hashed == 1)
+		if (pid_ns->nr_hashed == init_pids)
 			break;
 		schedule();
 	}
-- 
cgit v1.2.2


From 35ccecef6ed48a5602755ddf580c45a026a1dc05 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 25 Mar 2013 14:45:54 -0300
Subject: [media] [REGRESSION] bt8xx: Fix too large height in cropcap

Since commit a1fd287780c8e91fed4957b30c757b0c93021162:
"[media] bttv-driver: fix two warnings"
cropcap.defrect.height and cropcap.bounds.height for the PAL entry are 32
resp 30 pixels too large, if a userspace app (ie xawtv) actually tries to use
the full advertised height, the resulting image is broken in ways only a
screenshot can describe.
The cause of this is the fix for this warning:
drivers/media/pci/bt8xx/bttv-driver.c:308:3: warning: initialized field overwritten [-Woverride-init]
In this chunk of the commit:
@@ -301,11 +301,10 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
                        /* totalwidth */ 1135,
                        /* sqwidth */ 944,
                        /* vdelay */ 0x20,
-                       /* sheight */ 576,
-                       /* videostart0 */ 23)
                /* bt878 (and bt848?) can capture another
                   line below active video. */
-               .cropcap.bounds.height = (576 + 2) + 0x20 - 2,
+                       /* sheight */ (576 + 2) + 0x20 - 2,
+                       /* videostart0 */ 23)
        },{
                .v4l2_id        = V4L2_STD_NTSC_M | V4L2_STD_NTSC_M_KR,
                .name           = "NTSC",
Which replaces the overriding of cropcap.bounds.height initialization outside
of the CROPCAP macro (which also initializes it), with passing a
different sheight value to the CROPCAP macro.
There are 2 problems with this warning fix:
1) The sheight value is used twice in the CROPCAP macro, and the old code
   only changed one resulting value.
2) The old code increased the .cropcap.bounds.height value (and did not
   touch the .cropcap.defrect.height value at all) by 2, where as the fixed
   code increases it by 32, as the fixed code passes (576 + 2) + 0x20 - 2
   to the CROPCAP macro, but the + 0x20 - 2 is already done by the macro so
   now is done twice for .cropcap.bounds.height, and also is applied to
   .cropcap.defrect.height where it should not be applied at all.
This patch fixes this by adding an extraheight parameter to the CROPCAP entry
and using it for the PAL entry.

Cc: stable@kernel.org	# For Kernel 3.8
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/pci/bt8xx/bttv-driver.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
index ccd18e4ee789..54579e4c740b 100644
--- a/drivers/media/pci/bt8xx/bttv-driver.c
+++ b/drivers/media/pci/bt8xx/bttv-driver.c
@@ -250,17 +250,19 @@ static u8 SRAM_Table[][60] =
    vdelay	start of active video in 2 * field lines relative to
 		trailing edge of /VRESET pulse (VDELAY register).
    sheight	height of active video in 2 * field lines.
+   extraheight	Added to sheight for cropcap.bounds.height only
    videostart0	ITU-R frame line number of the line corresponding
 		to vdelay in the first field. */
 #define CROPCAP(minhdelayx1, hdelayx1, swidth, totalwidth, sqwidth,	 \
-		vdelay, sheight, videostart0)				 \
+		vdelay, sheight, extraheight, videostart0)		 \
 	.cropcap.bounds.left = minhdelayx1,				 \
 	/* * 2 because vertically we count field lines times two, */	 \
 	/* e.g. 23 * 2 to 23 * 2 + 576 in PAL-BGHI defrect. */		 \
 	.cropcap.bounds.top = (videostart0) * 2 - (vdelay) + MIN_VDELAY, \
 	/* 4 is a safety margin at the end of the line. */		 \
 	.cropcap.bounds.width = (totalwidth) - (minhdelayx1) - 4,	 \
-	.cropcap.bounds.height = (sheight) + (vdelay) - MIN_VDELAY,	 \
+	.cropcap.bounds.height = (sheight) + (extraheight) + (vdelay) -	 \
+				 MIN_VDELAY,				 \
 	.cropcap.defrect.left = hdelayx1,				 \
 	.cropcap.defrect.top = (videostart0) * 2,			 \
 	.cropcap.defrect.width = swidth,				 \
@@ -301,9 +303,10 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 			/* totalwidth */ 1135,
 			/* sqwidth */ 944,
 			/* vdelay */ 0x20,
-		/* bt878 (and bt848?) can capture another
-		   line below active video. */
-			/* sheight */ (576 + 2) + 0x20 - 2,
+			/* sheight */ 576,
+			/* bt878 (and bt848?) can capture another
+			   line below active video. */
+			/* extraheight */ 2,
 			/* videostart0 */ 23)
 	},{
 		.v4l2_id        = V4L2_STD_NTSC_M | V4L2_STD_NTSC_M_KR,
@@ -330,6 +333,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 			/* sqwidth */ 780,
 			/* vdelay */ 0x1a,
 			/* sheight */ 480,
+			/* extraheight */ 0,
 			/* videostart0 */ 23)
 	},{
 		.v4l2_id        = V4L2_STD_SECAM,
@@ -355,6 +359,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 			/* sqwidth */ 944,
 			/* vdelay */ 0x20,
 			/* sheight */ 576,
+			/* extraheight */ 0,
 			/* videostart0 */ 23)
 	},{
 		.v4l2_id        = V4L2_STD_PAL_Nc,
@@ -380,6 +385,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 			/* sqwidth */ 780,
 			/* vdelay */ 0x1a,
 			/* sheight */ 576,
+			/* extraheight */ 0,
 			/* videostart0 */ 23)
 	},{
 		.v4l2_id        = V4L2_STD_PAL_M,
@@ -405,6 +411,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 			/* sqwidth */ 780,
 			/* vdelay */ 0x1a,
 			/* sheight */ 480,
+			/* extraheight */ 0,
 			/* videostart0 */ 23)
 	},{
 		.v4l2_id        = V4L2_STD_PAL_N,
@@ -430,6 +437,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 			/* sqwidth */ 944,
 			/* vdelay */ 0x20,
 			/* sheight */ 576,
+			/* extraheight */ 0,
 			/* videostart0 */ 23)
 	},{
 		.v4l2_id        = V4L2_STD_NTSC_M_JP,
@@ -455,6 +463,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 			/* sqwidth */ 780,
 			/* vdelay */ 0x16,
 			/* sheight */ 480,
+			/* extraheight */ 0,
 			/* videostart0 */ 23)
 	},{
 		/* that one hopefully works with the strange timing
@@ -484,6 +493,7 @@ const struct bttv_tvnorm bttv_tvnorms[] = {
 			/* sqwidth */ 944,
 			/* vdelay */ 0x1a,
 			/* sheight */ 480,
+			/* extraheight */ 0,
 			/* videostart0 */ 23)
 	}
 };
-- 
cgit v1.2.2


From 4fdc782416b29b77681ceec9ba74cdf5ee5e4051 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Mon, 11 Mar 2013 22:21:28 +0800
Subject: x86, io_apic: remove duplicated include from irq_remapping.c

Remove duplicated include.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/irq_remapping.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index d56f8c17c5fe..7c11ff368d07 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -2,7 +2,6 @@
 #include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/cpumask.h>
 #include <linux/errno.h>
 #include <linux/msi.h>
 #include <linux/irq.h>
-- 
cgit v1.2.2


From 76a0e68129d7d24eb995a6871ab47081bbfa0acc Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Mon, 25 Mar 2013 22:26:21 +0000
Subject: pch_gbe: fix ip_summed checksum reporting on rx

skb->ip_summed should be CHECKSUM_UNNECESSARY when the driver reports that
checksums were correct and CHECKSUM_NONE in any other case. They're
currently placed vice versa, which breaks the forwarding scenario. Fix it
by placing them as described above.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 39ab4d09faaa..73ce7dd6b954 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -1726,9 +1726,9 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
 
 			skb->protocol = eth_type_trans(skb, netdev);
 			if (tcp_ip_status & PCH_GBE_RXD_ACC_STAT_TCPIPOK)
-				skb->ip_summed = CHECKSUM_NONE;
-			else
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
+			else
+				skb->ip_summed = CHECKSUM_NONE;
 
 			napi_gro_receive(&adapter->napi, skb);
 			(*work_done)++;
-- 
cgit v1.2.2


From eba0e3c3a0ba7b96f01cbe997680f6a4401a0bfc Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Tue, 26 Mar 2013 10:49:55 +0800
Subject: USB: serial: fix hang when opening port

Johan's 'fix use-after-free in TIOCMIWAIT' patchset[1] introduces
one bug which can cause kernel hang when opening port.

This patch initialized the 'port->delta_msr_wait' waitqueue head
to fix the bug which is introduced in 3.9-rc4.

[1], http://marc.info/?l=linux-usb&m=136368139627876&w=2

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Acked-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/usb-serial.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 2e70efa08b77..5d9b178484fd 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -903,6 +903,7 @@ static int usb_serial_probe(struct usb_interface *interface,
 		port->port.ops = &serial_port_ops;
 		port->serial = serial;
 		spin_lock_init(&port->lock);
+		init_waitqueue_head(&port->delta_msr_wait);
 		/* Keep this for private driver use for the moment but
 		   should probably go away */
 		INIT_WORK(&port->work, usb_serial_port_work);
-- 
cgit v1.2.2


From 14134f6584212d585b310ce95428014b653dfaf6 Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Mon, 25 Mar 2013 17:02:04 +0000
Subject: af_unix: dont send SCM_CREDENTIAL when dest socket is NULL

SCM_SCREDENTIALS should apply to write() syscalls only either source or destination
socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong,
and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom).

Origionally-authored-by: Karel Srot <ksrot@redhat.com>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f153a8d6e339..971282b6f6a3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
 	if (UNIXCB(skb).cred)
 		return;
 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
-	    !other->sk_socket ||
-	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+	    (other->sk_socket &&
+	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) {
 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
 		UNIXCB(skb).cred = get_current_cred();
 	}
-- 
cgit v1.2.2


From 9fe16b78ee17579cb4f333534cf7043e94c67024 Mon Sep 17 00:00:00 2001
From: Veaceslav Falico <vfalico@redhat.com>
Date: Tue, 26 Mar 2013 17:43:28 +0100
Subject: bonding: remove already created master sysfs link on failure

If slave sysfs symlink failes to be created - we end up without removing
the master sysfs symlink. Remove it in case of failure.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_sysfs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 1c9e09fbdff8..db103e03ba05 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -183,6 +183,11 @@ int bond_create_slave_symlinks(struct net_device *master,
 	sprintf(linkname, "slave_%s", slave->name);
 	ret = sysfs_create_link(&(master->dev.kobj), &(slave->dev.kobj),
 				linkname);
+
+	/* free the master link created earlier in case of error */
+	if (ret)
+		sysfs_remove_link(&(slave->dev.kobj), "master");
+
 	return ret;
 
 }
-- 
cgit v1.2.2


From 4adaa611020fa6ac65b0ac8db78276af4ec04e63 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@fusionio.com>
Date: Tue, 26 Mar 2013 13:07:00 -0400
Subject: Btrfs: fix race between mmap writes and compression

Btrfs uses page_mkwrite to ensure stable pages during
crc calculations and mmap workloads.  We call clear_page_dirty_for_io
before we do any crcs, and this forces any application with the file
mapped to wait for the crc to finish before it is allowed to change
the file.

With compression on, the clear_page_dirty_for_io step is happening after
we've compressed the pages.  This means the applications might be
changing the pages while we are compressing them, and some of those
modifications might not hit the disk.

This commit adds the clear_page_dirty_for_io before compression starts
and makes sure to redirty the page if we have to fallback to
uncompressed IO as well.

Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Reported-by: Alexandre Oliva <oliva@gnu.org>
cc: stable@vger.kernel.org
---
 fs/btrfs/extent_io.c | 33 +++++++++++++++++++++++++++++++++
 fs/btrfs/extent_io.h |  2 ++
 fs/btrfs/inode.c     | 14 ++++++++++++++
 3 files changed, 49 insertions(+)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..cdee391fc7bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
 				GFP_NOFS);
 }
 
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+	unsigned long index = start >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	struct page *page;
+
+	while (index <= end_index) {
+		page = find_get_page(inode->i_mapping, index);
+		BUG_ON(!page); /* Pages should be in the extent_io_tree */
+		clear_page_dirty_for_io(page);
+		page_cache_release(page);
+		index++;
+	}
+	return 0;
+}
+
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+	unsigned long index = start >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+	struct page *page;
+
+	while (index <= end_index) {
+		page = find_get_page(inode->i_mapping, index);
+		BUG_ON(!page); /* Pages should be in the extent_io_tree */
+		account_page_redirty(page);
+		__set_page_dirty_nobuffers(page);
+		page_cache_release(page);
+		index++;
+	}
+	return 0;
+}
+
 /*
  * helper function to set both pages and extents in the tree writeback
  */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..258c92156857 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
 		      unsigned long *map_len);
 int extent_range_uptodate(struct extent_io_tree *tree,
 			  u64 start, u64 end);
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
 int extent_clear_unlock_delalloc(struct inode *inode,
 				struct extent_io_tree *tree,
 				u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1f26888825e2..6a6e13c53086 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
 	int i;
 	int will_compress;
 	int compress_type = root->fs_info->compress_type;
+	int redirty = 0;
 
 	/* if this is a small write inside eof, kick off a defrag */
 	if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +416,17 @@ again:
 		if (BTRFS_I(inode)->force_compress)
 			compress_type = BTRFS_I(inode)->force_compress;
 
+		/*
+		 * we need to call clear_page_dirty_for_io on each
+		 * page in the range.  Otherwise applications with the file
+		 * mmap'd can wander in and change the page contents while
+		 * we are compressing them.
+		 *
+		 * If the compression fails for any reason, we set the pages
+		 * dirty again later on.
+		 */
+		extent_range_clear_dirty_for_io(inode, start, end);
+		redirty = 1;
 		ret = btrfs_compress_pages(compress_type,
 					   inode->i_mapping, start,
 					   total_compressed, pages,
@@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:
 			__set_page_dirty_nobuffers(locked_page);
 			/* unlocked later on in the async handlers */
 		}
+		if (redirty)
+			extent_range_redirty_for_io(inode, start, end);
 		add_async_extent(async_cow, start, end - start + 1,
 				 0, NULL, 0, BTRFS_COMPRESS_NONE);
 		*num_added += 1;
-- 
cgit v1.2.2


From 330305cc4a6b0cb75c22fc01b8826f0ad755550f Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Sun, 24 Mar 2013 17:36:29 +0000
Subject: ipv4: Fix ip-header identification for gso packets.

ip-header id needs to be incremented even if IP_DF flag is set.
This behaviour was changed in commit 490ab08127cebc25e3a26
(IP_GRE: Fix IP-Identification).

Following patch fixes it so that identification is always
incremented.

Reported-by: Cong Wang <amwang@redhat.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipip.h | 16 ++++++----------
 net/ipv4/af_inet.c |  3 +--
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/include/net/ipip.h b/include/net/ipip.h
index fd19625ff99d..982141c15200 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -77,15 +77,11 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb,
 {
 	struct iphdr *iph = ip_hdr(skb);
 
-	if (iph->frag_off & htons(IP_DF))
-		iph->id	= 0;
-	else {
-		/* Use inner packet iph-id if possible. */
-		if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
-			iph->id	= old_iph->id;
-		else
-			__ip_select_ident(iph, dst,
-					  (skb_shinfo(skb)->gso_segs ?: 1) - 1);
-	}
+	/* Use inner packet iph-id if possible. */
+	if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
+		iph->id	= old_iph->id;
+	else
+		__ip_select_ident(iph, dst,
+				  (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 }
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 68f6a94f7661..c929d9c1c4b6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1333,8 +1333,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				iph->frag_off |= htons(IP_MF);
 			offset += (skb->len - skb->mac_len - iph->ihl * 4);
 		} else  {
-			if (!(iph->frag_off & htons(IP_DF)))
-				iph->id = htons(id++);
+			iph->id = htons(id++);
 		}
 		iph->tot_len = htons(skb->len - skb->mac_len);
 		iph->check = 0;
-- 
cgit v1.2.2


From eddc0a3abff273842a94784d2d022bbc36dc9015 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 21 Mar 2013 02:30:41 -0700
Subject: yama:  Better permission check for ptraceme

Change the permission check for yama_ptrace_ptracee to the standard
ptrace permission check, testing if the traceer has CAP_SYS_PTRACE
in the tracees user namespace.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 security/yama/yama_lsm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 23414b93771f..13c88fbcf037 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -347,10 +347,8 @@ int yama_ptrace_traceme(struct task_struct *parent)
 	/* Only disallow PTRACE_TRACEME on more aggressive settings. */
 	switch (ptrace_scope) {
 	case YAMA_SCOPE_CAPABILITY:
-		rcu_read_lock();
-		if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
+		if (!has_ns_capability(parent, current_user_ns(), CAP_SYS_PTRACE))
 			rc = -EPERM;
-		rcu_read_unlock();
 		break;
 	case YAMA_SCOPE_NO_ATTACH:
 		rc = -EPERM;
-- 
cgit v1.2.2


From 4dcaf47258d59010802bd0eda933f69ee7d98cc7 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 26 Mar 2013 11:03:07 -0500
Subject: rsxx: enable error return of rsxx_eeh_save_issued_dmas()

Commit d8d595df introduced a bug where we did not check for a NULL
return from kmalloc(). Make rsxx_eeh_save_issued_dmas() return an
error for that case, and make the callers handle that.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/core.c      | 19 ++++++++++++++++---
 drivers/block/rsxx/dma.c       |  6 +++++-
 drivers/block/rsxx/rsxx_priv.h |  2 +-
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 93f28191a0ff..5af21f2db29c 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -323,10 +323,11 @@ static int card_shutdown(struct rsxx_cardinfo *card)
 	return 0;
 }
 
-static void rsxx_eeh_frozen(struct pci_dev *dev)
+static int rsxx_eeh_frozen(struct pci_dev *dev)
 {
 	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
 	int i;
+	int st;
 
 	dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
 
@@ -342,7 +343,9 @@ static void rsxx_eeh_frozen(struct pci_dev *dev)
 
 	pci_disable_device(dev);
 
-	rsxx_eeh_save_issued_dmas(card);
+	st = rsxx_eeh_save_issued_dmas(card);
+	if (st)
+		return st;
 
 	rsxx_eeh_save_issued_creg(card);
 
@@ -356,6 +359,8 @@ static void rsxx_eeh_frozen(struct pci_dev *dev)
 					    card->ctrl[i].cmd.buf,
 					    card->ctrl[i].cmd.dma_addr);
 	}
+
+	return 0;
 }
 
 static void rsxx_eeh_failure(struct pci_dev *dev)
@@ -399,6 +404,8 @@ static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
 static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev,
 					    enum pci_channel_state error)
 {
+	int st;
+
 	if (dev->revision < RSXX_EEH_SUPPORT)
 		return PCI_ERS_RESULT_NONE;
 
@@ -407,7 +414,13 @@ static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev,
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 
-	rsxx_eeh_frozen(dev);
+	st = rsxx_eeh_frozen(dev);
+	if (st) {
+		dev_err(&dev->dev, "Slot reset setup failed\n");
+		rsxx_eeh_failure(dev);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
 	return PCI_ERS_RESULT_NEED_RESET;
 }
 
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 95047e111a33..7594c6ddc181 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -980,7 +980,7 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 	}
 }
 
-void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
+int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 {
 	int i;
 	int j;
@@ -990,6 +990,8 @@ void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 
 	issued_dmas = kzalloc(sizeof(*issued_dmas) * card->n_targets,
 			      GFP_KERNEL);
+	if (!issued_dmas)
+		return -ENOMEM;
 
 	for (i = 0; i < card->n_targets; i++) {
 		INIT_LIST_HEAD(&issued_dmas[i]);
@@ -1030,6 +1032,8 @@ void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 	}
 
 	kfree(issued_dmas);
+
+	return 0;
 }
 
 void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 8a7ac87f1dc5..382e8bf5c03b 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -381,7 +381,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   rsxx_dma_cb cb,
 			   void *cb_data);
 int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
-void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
+int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
 void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
 
-- 
cgit v1.2.2


From 80b00df291684850b5659ec95fb1fd2acbd2c0ec Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 26 Mar 2013 11:06:35 -0500
Subject: rsxx: remove unused variable

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dma.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 7594c6ddc181..0607513cfb41 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -1056,7 +1056,6 @@ void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 {
 	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
 	int i;
 
 	for (i = 0; i < card->n_targets; i++) {
-- 
cgit v1.2.2


From 7ea600b5314529f9d1b9d6d3c41cb26fce6a7a4a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Mar 2013 18:25:57 -0400
Subject: Nest rename_lock inside vfsmount_lock

... lest we get livelocks between path_is_under() and d_path() and friends.

The thing is, wrt fairness lglocks are more similar to rwsems than to rwlocks;
it is possible to have thread B spin on attempt to take lock shared while thread
A is already holding it shared, if B is on lower-numbered CPU than A and there's
a thread C spinning on attempt to take the same lock exclusive.

As the result, we need consistent ordering between vfsmount_lock (lglock) and
rename_lock (seq_lock), even though everything that takes both is going to take
vfsmount_lock only shared.

Spotted-by: Brad Spengler <spender@grsecurity.net>
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index fbfae008ba44..e8bc3420d63e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2542,7 +2542,6 @@ static int prepend_path(const struct path *path,
 	bool slash = false;
 	int error = 0;
 
-	br_read_lock(&vfsmount_lock);
 	while (dentry != root->dentry || vfsmnt != root->mnt) {
 		struct dentry * parent;
 
@@ -2572,8 +2571,6 @@ static int prepend_path(const struct path *path,
 	if (!error && !slash)
 		error = prepend(buffer, buflen, "/", 1);
 
-out:
-	br_read_unlock(&vfsmount_lock);
 	return error;
 
 global_root:
@@ -2590,7 +2587,7 @@ global_root:
 		error = prepend(buffer, buflen, "/", 1);
 	if (!error)
 		error = is_mounted(vfsmnt) ? 1 : 2;
-	goto out;
+	return error;
 }
 
 /**
@@ -2617,9 +2614,11 @@ char *__d_path(const struct path *path,
 	int error;
 
 	prepend(&res, &buflen, "\0", 1);
+	br_read_lock(&vfsmount_lock);
 	write_seqlock(&rename_lock);
 	error = prepend_path(path, root, &res, &buflen);
 	write_sequnlock(&rename_lock);
+	br_read_unlock(&vfsmount_lock);
 
 	if (error < 0)
 		return ERR_PTR(error);
@@ -2636,9 +2635,11 @@ char *d_absolute_path(const struct path *path,
 	int error;
 
 	prepend(&res, &buflen, "\0", 1);
+	br_read_lock(&vfsmount_lock);
 	write_seqlock(&rename_lock);
 	error = prepend_path(path, &root, &res, &buflen);
 	write_sequnlock(&rename_lock);
+	br_read_unlock(&vfsmount_lock);
 
 	if (error > 1)
 		error = -EINVAL;
@@ -2702,11 +2703,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 
 	get_fs_root(current->fs, &root);
+	br_read_lock(&vfsmount_lock);
 	write_seqlock(&rename_lock);
 	error = path_with_deleted(path, &root, &res, &buflen);
+	write_sequnlock(&rename_lock);
+	br_read_unlock(&vfsmount_lock);
 	if (error < 0)
 		res = ERR_PTR(error);
-	write_sequnlock(&rename_lock);
 	path_put(&root);
 	return res;
 }
@@ -2830,6 +2833,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 	get_fs_root_and_pwd(current->fs, &root, &pwd);
 
 	error = -ENOENT;
+	br_read_lock(&vfsmount_lock);
 	write_seqlock(&rename_lock);
 	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
@@ -2839,6 +2843,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 		prepend(&cwd, &buflen, "\0", 1);
 		error = prepend_path(&pwd, &root, &cwd, &buflen);
 		write_sequnlock(&rename_lock);
+		br_read_unlock(&vfsmount_lock);
 
 		if (error < 0)
 			goto out;
@@ -2859,6 +2864,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 		}
 	} else {
 		write_sequnlock(&rename_lock);
+		br_read_unlock(&vfsmount_lock);
 	}
 
 out:
-- 
cgit v1.2.2


From 469dd1c4ac0869cf7d1f87eac9b5a93865c10b76 Mon Sep 17 00:00:00 2001
From: Fabio Valentini <fafatheone@gmail.com>
Date: Mon, 11 Mar 2013 19:16:34 +0000
Subject: ACPI / PM: fix suspend and resume on Sony Vaio VGN-FW21M

Add Sony Vaio VGN-FW21M to the device blacklist in
drivers/acpi/sleep.c.

Fixes suspend/resume on this device (device no longer reboots
instead of resuming).

References: https://bugzilla.kernel.org/show_bug.cgi?id=55001
Signed-off-by: Fabio Valentini <fafatheone@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sleep.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 24213033fbae..9c1a435d10e6 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -193,6 +193,14 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
 	},
 	{
 	.callback = init_nvs_nosave,
+	.ident = "Sony Vaio VGN-FW21M",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FW21M"),
+		},
+	},
+	{
+	.callback = init_nvs_nosave,
 	.ident = "Sony Vaio VPCEB17FX",
 	.matches = {
 		DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
-- 
cgit v1.2.2


From aaf9d93be71c68558c25b4052ac979ee6b7eb809 Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Tue, 19 Mar 2013 06:48:07 +0000
Subject: ACPI / APEI: fix error status check condition for CPER

In Table 18-289, ACPI5.0 SPEC, the error data length in CPER
Generic Error Data Entry can be 0, which means this generic
error data entry can have only one header. So fix the check
conditon for it.

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/cper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index 1e5d8a40101e..fefc2ca7cc3e 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -405,7 +405,7 @@ int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
 		return rc;
 	data_len = estatus->data_length;
 	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
-	while (data_len > sizeof(*gdata)) {
+	while (data_len >= sizeof(*gdata)) {
 		gedata_len = gdata->error_data_length;
 		if (gedata_len > data_len - sizeof(*gdata))
 			return -EINVAL;
-- 
cgit v1.2.2


From b8b6611048b7b57b86fbdc9153649ad4dc52135f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 11 Mar 2013 05:05:16 +0000
Subject: PCI / ACPI: hold acpi_scan_lock during root bus hotplug

During merging the PCI tree with the PM/ACPI tree, Linus noticed
that we don't use the same lock using patten about ACPI PCI root as
acpiphp.

Here apply the same locking patten, and we need to execute
acpi_bus_hot_remove_device() via acpi_os_hotplug_execute()
as it also holds acpi_scan_lock.

[rjw: Changelog]
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
No-objection-from: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pci_root.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 0ac546d5e53f..5ff173066127 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -646,6 +646,7 @@ static void handle_root_bridge_insertion(acpi_handle handle)
 
 static void handle_root_bridge_removal(struct acpi_device *device)
 {
+	acpi_status status;
 	struct acpi_eject_event *ej_event;
 
 	ej_event = kmalloc(sizeof(*ej_event), GFP_KERNEL);
@@ -661,7 +662,9 @@ static void handle_root_bridge_removal(struct acpi_device *device)
 	ej_event->device = device;
 	ej_event->event = ACPI_NOTIFY_EJECT_REQUEST;
 
-	acpi_bus_hot_remove_device(ej_event);
+	status = acpi_os_hotplug_execute(acpi_bus_hot_remove_device, ej_event);
+	if (ACPI_FAILURE(status))
+		kfree(ej_event);
 }
 
 static void _handle_hotplug_event_root(struct work_struct *work)
@@ -676,8 +679,9 @@ static void _handle_hotplug_event_root(struct work_struct *work)
 	handle = hp_work->handle;
 	type = hp_work->type;
 
-	root = acpi_pci_find_root(handle);
+	acpi_scan_lock_acquire();
 
+	root = acpi_pci_find_root(handle);
 	acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
 
 	switch (type) {
@@ -711,6 +715,7 @@ static void _handle_hotplug_event_root(struct work_struct *work)
 		break;
 	}
 
+	acpi_scan_lock_release();
 	kfree(hp_work); /* allocated in handle_hotplug_event_bridge */
 	kfree(buffer.pointer);
 }
-- 
cgit v1.2.2


From e8cd81693bbbb15db57d3c9aa7dd90eda4842874 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Mar 2013 20:30:17 -0400
Subject: vt: synchronize_rcu() under spinlock is not nice...

vcs_poll_data_free() calls unregister_vt_notifier(), which calls
atomic_notifier_chain_unregister(), which calls synchronize_rcu().
Do it *after* we'd dropped ->f_lock.

Cc: stable@vger.kernel.org (all kernels since 2.6.37)
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/tty/vt/vc_screen.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index e4ca345873c3..d7799deacb21 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -93,7 +93,7 @@ vcs_poll_data_free(struct vcs_poll_data *poll)
 static struct vcs_poll_data *
 vcs_poll_data_get(struct file *file)
 {
-	struct vcs_poll_data *poll = file->private_data;
+	struct vcs_poll_data *poll = file->private_data, *kill = NULL;
 
 	if (poll)
 		return poll;
@@ -122,10 +122,12 @@ vcs_poll_data_get(struct file *file)
 		file->private_data = poll;
 	} else {
 		/* someone else raced ahead of us */
-		vcs_poll_data_free(poll);
+		kill = poll;
 		poll = file->private_data;
 	}
 	spin_unlock(&file->f_lock);
+	if (kill)
+		vcs_poll_data_free(kill);
 
 	return poll;
 }
-- 
cgit v1.2.2


From c2a2876e863356b092967ea62bebdb4dd663af80 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Tue, 26 Mar 2013 22:48:23 +0100
Subject: iommu/amd: Make sure dma_ops are set for hotplug devices

There is a bug introduced with commit 27c2127 that causes
devices which are hot unplugged and then hot-replugged to
not have per-device dma_ops set. This causes these devices
to not function correctly. Fixed with this patch.

Cc: stable@vger.kernel.org
Reported-by: Andreas Degert <andreas.degert@googlemail.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/amd_iommu.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 98f555dafb55..b287ca33833d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2466,18 +2466,16 @@ static int device_change_notifier(struct notifier_block *nb,
 
 		/* allocate a protection domain if a device is added */
 		dma_domain = find_protection_domain(devid);
-		if (dma_domain)
-			goto out;
-		dma_domain = dma_ops_domain_alloc();
-		if (!dma_domain)
-			goto out;
-		dma_domain->target_dev = devid;
-
-		spin_lock_irqsave(&iommu_pd_list_lock, flags);
-		list_add_tail(&dma_domain->list, &iommu_pd_list);
-		spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
-
-		dev_data = get_dev_data(dev);
+		if (!dma_domain) {
+			dma_domain = dma_ops_domain_alloc();
+			if (!dma_domain)
+				goto out;
+			dma_domain->target_dev = devid;
+
+			spin_lock_irqsave(&iommu_pd_list_lock, flags);
+			list_add_tail(&dma_domain->list, &iommu_pd_list);
+			spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
+		}
 
 		dev->archdata.dma_ops = &amd_iommu_dma_ops;
 
-- 
cgit v1.2.2


From 4c43755506ececbe903585265aa8408e937620a1 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 22 Mar 2013 18:53:57 +0100
Subject: HID: multitouch: fix touchpad buttons

Commit "HID: multitouch: use the callback "report" instead..." breaks the
buttons of touchpads following the HID multitouch specification.
The buttons were emmitted through hid-input, but as now the events
are generated only in hid-multitouch, the buttons are not emmitted anymore.

The input_event() call is far much simpler than the hid-input one as
many of the different tests do not apply to multitouch touchpads.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-multitouch.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 7a1ebb867cf4..82e9211b3ca9 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -621,6 +621,7 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field,
 {
 	struct mt_device *td = hid_get_drvdata(hid);
 	__s32 quirks = td->mtclass.quirks;
+	struct input_dev *input = field->hidinput->input;
 
 	if (hid->claimed & HID_CLAIMED_INPUT) {
 		switch (usage->hid) {
@@ -670,13 +671,16 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field,
 			break;
 
 		default:
+			if (usage->type)
+				input_event(input, usage->type, usage->code,
+						value);
 			return;
 		}
 
 		if (usage->usage_index + 1 == field->report_count) {
 			/* we only take into account the last report. */
 			if (usage->hid == td->last_slot_field)
-				mt_complete_slot(td, field->hidinput->input);
+				mt_complete_slot(td, input);
 
 			if (field->index == td->last_field_index
 				&& td->num_received >= td->num_expected)
-- 
cgit v1.2.2


From 3151527ee007b73a0ebd296010f1c0454a919c7d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 15 Mar 2013 01:45:51 -0700
Subject: userns:  Don't allow creation if the user is chrooted

Guarantee that the policy of which files may be access that is
established by setting the root directory will not be violated
by user namespaces by verifying that the root directory points
to the root of the mount namespace at the time of user namespace
creation.

Changing the root is a privileged operation, and as a matter of policy
it serves to limit unprivileged processes to files below the current
root directory.

For reasons of simplicity and comprehensibility the privilege to
change the root directory is gated solely on the CAP_SYS_CHROOT
capability in the user namespace.  Therefore when creating a user
namespace we must ensure that the policy of which files may be access
can not be violated by changing the root directory.

Anyone who runs a processes in a chroot and would like to use user
namespace can setup the same view of filesystems with a mount
namespace instead.  With this result that this is not a practical
limitation for using user namespaces.

Cc: stable@vger.kernel.org
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c            | 24 ++++++++++++++++++++++++
 include/linux/fs_struct.h |  2 ++
 kernel/user_namespace.c   |  9 +++++++++
 3 files changed, 35 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index 50ca17d3cb45..a3035223d421 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2732,6 +2732,30 @@ bool our_mnt(struct vfsmount *mnt)
 	return check_mnt(real_mount(mnt));
 }
 
+bool current_chrooted(void)
+{
+	/* Does the current process have a non-standard root */
+	struct path ns_root;
+	struct path fs_root;
+	bool chrooted;
+
+	/* Find the namespace root */
+	ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
+	ns_root.dentry = ns_root.mnt->mnt_root;
+	path_get(&ns_root);
+	while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
+		;
+
+	get_fs_root(current->fs, &fs_root);
+
+	chrooted = !path_equal(&fs_root, &ns_root);
+
+	path_put(&fs_root);
+	path_put(&ns_root);
+
+	return chrooted;
+}
+
 static void *mntns_get(struct task_struct *task)
 {
 	struct mnt_namespace *ns = NULL;
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 729eded4b24f..2b93a9a5a1e6 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -50,4 +50,6 @@ static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root,
 	spin_unlock(&fs->lock);
 }
 
+extern bool current_chrooted(void);
+
 #endif /* _LINUX_FS_STRUCT_H */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index b14f4d342043..0f1e42884577 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -61,6 +61,15 @@ int create_user_ns(struct cred *new)
 	kgid_t group = new->egid;
 	int ret;
 
+	/*
+	 * Verify that we can not violate the policy of which files
+	 * may be accessed that is specified by the root directory,
+	 * by verifing that the root directory is at the root of the
+	 * mount namespace which allows all files to be accessed.
+	 */
+	if (current_chrooted())
+		return -EPERM;
+
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
 	 * created a user_namespace.
-- 
cgit v1.2.2


From 90563b198e4c6674c63672fae1923da467215f45 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 22 Mar 2013 03:10:15 -0700
Subject: vfs: Add a mount flag to lock read only bind mounts

When a read-only bind mount is copied from mount namespace in a higher
privileged user namespace to a mount namespace in a lesser privileged
user namespace, it should not be possible to remove the the read-only
restriction.

Add a MNT_LOCK_READONLY mount flag to indicate that a mount must
remain read-only.

CC: stable@vger.kernel.org
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c        | 3 +++
 include/linux/mount.h | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index a3035223d421..8505b5ece5de 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1713,6 +1713,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
 	if (readonly_request == __mnt_is_readonly(mnt))
 		return 0;
 
+	if (mnt->mnt_flags & MNT_LOCK_READONLY)
+		return -EPERM;
+
 	if (readonly_request)
 		error = mnt_make_readonly(real_mount(mnt));
 	else
diff --git a/include/linux/mount.h b/include/linux/mount.h
index d7029f4a191a..73005f9957ea 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -47,6 +47,8 @@ struct mnt_namespace;
 
 #define MNT_INTERNAL	0x4000
 
+#define MNT_LOCK_READONLY	0x400000
+
 struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
-- 
cgit v1.2.2


From 132c94e31b8bca8ea921f9f96a57d684fa4ae0a9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 22 Mar 2013 04:08:05 -0700
Subject: vfs: Carefully propogate mounts across user namespaces

As a matter of policy MNT_READONLY should not be changable if the
original mounter had more privileges than creator of the mount
namespace.

Add the flag CL_UNPRIVILEGED to note when we are copying a mount from
a mount namespace that requires more privileges to a mount namespace
that requires fewer privileges.

When the CL_UNPRIVILEGED flag is set cause clone_mnt to set MNT_NO_REMOUNT
if any of the mnt flags that should never be changed are set.

This protects both mount propagation and the initial creation of a less
privileged mount namespace.

Cc: stable@vger.kernel.org
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c | 6 +++++-
 fs/pnode.c     | 6 ++++++
 fs/pnode.h     | 1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 8505b5ece5de..968d4c5eae03 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 	}
 
 	mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+	/* Don't allow unprivileged users to change mount flags */
+	if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
+		mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+
 	atomic_inc(&sb->s_active);
 	mnt->mnt.mnt_sb = sb;
 	mnt->mnt.mnt_root = dget(root);
@@ -2342,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	/* First pass: copy the tree topology */
 	copy_flags = CL_COPY_ALL | CL_EXPIRE;
 	if (user_ns != mnt_ns->user_ns)
-		copy_flags |= CL_SHARED_TO_SLAVE;
+		copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
 	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
 	if (IS_ERR(new)) {
 		up_write(&namespace_sem);
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a51ac0d..8b29d2164da6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -9,6 +9,7 @@
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/fs.h>
+#include <linux/nsproxy.h>
 #include "internal.h"
 #include "pnode.h"
 
@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest,
 int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
 		    struct mount *source_mnt, struct list_head *tree_list)
 {
+	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
 	struct mount *m, *child;
 	int ret = 0;
 	struct mount *prev_dest_mnt = dest_mnt;
@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
 
 		source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
 
+		/* Notice when we are propagating across user namespaces */
+		if (m->mnt_ns->user_ns != user_ns)
+			type |= CL_UNPRIVILEGED;
+
 		child = copy_tree(source, source->mnt.mnt_root, type);
 		if (IS_ERR(child)) {
 			ret = PTR_ERR(child);
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3445c..a0493d5ebfbf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -23,6 +23,7 @@
 #define CL_MAKE_SHARED 		0x08
 #define CL_PRIVATE 		0x10
 #define CL_SHARED_TO_SLAVE	0x20
+#define CL_UNPRIVILEGED		0x40
 
 static inline void set_mnt_shared(struct mount *mnt)
 {
-- 
cgit v1.2.2


From a636b702ed1805e988ad3d8ff8b52c060f8b341c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 21 Mar 2013 18:13:15 -0700
Subject: ipc: Restrict mounting the mqueue filesystem

Only allow mounting the mqueue filesystem if the caller has CAP_SYS_ADMIN
rights over the ipc namespace.   The principle here is if you create
or have capabilities over it you can mount it, otherwise you get to live
with what other people have mounted.

This information is not particularly sensitive and mqueue essentially
only reports which posix messages queues exist.  Still when creating a
restricted environment for an application to live any extra
information may be of use to someone with sufficient creativity.  The
historical if imperfect way this information has been restricted has
been not to allow mounts and restricting this to ipc namespace
creators maintains the spirit of the historical restriction.

Cc: stable@vger.kernel.org
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 ipc/mqueue.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e5c4f609f22c..c4ae32ec6c6b 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -330,8 +330,16 @@ static struct dentry *mqueue_mount(struct file_system_type *fs_type,
 			 int flags, const char *dev_name,
 			 void *data)
 {
-	if (!(flags & MS_KERNMOUNT))
-		data = current->nsproxy->ipc_ns;
+	if (!(flags & MS_KERNMOUNT)) {
+		struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+		/* Don't allow mounting unless the caller has CAP_SYS_ADMIN
+		 * over the ipc namespace.
+		 */
+		if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+			return ERR_PTR(-EPERM);
+
+		data = ns;
+	}
 	return mount_ns(fs_type, flags, data, mqueue_fill_super);
 }
 
-- 
cgit v1.2.2


From 87a8ebd637dafc255070f503909a053cf0d98d3f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 24 Mar 2013 14:28:27 -0700
Subject: userns: Restrict when proc and sysfs can be mounted

Only allow unprivileged mounts of proc and sysfs if they are already
mounted when the user namespace is created.

proc and sysfs are interesting because they have content that is
per namespace, and so fresh mounts are needed when new namespaces
are created while at the same time proc and sysfs have content that
is shared between every instance.

Respect the policy of who may see the shared content of proc and sysfs
by only allowing new mounts if there was an existing mount at the time
the user namespace was created.

In practice there are only two interesting cases: proc and sysfs are
mounted at their usual places, proc and sysfs are not mounted at all
(some form of mount namespace jail).

Cc: stable@vger.kernel.org
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c                 | 21 +++++++++++++++++++++
 fs/proc/root.c                 |  4 ++++
 fs/sysfs/mount.c               |  4 ++++
 include/linux/user_namespace.h |  4 ++++
 kernel/user.c                  |  2 ++
 kernel/user_namespace.c        |  2 ++
 6 files changed, 37 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index 968d4c5eae03..d581e45c0a9f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2763,6 +2763,27 @@ bool current_chrooted(void)
 	return chrooted;
 }
 
+void update_mnt_policy(struct user_namespace *userns)
+{
+	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+	struct mount *mnt;
+
+	down_read(&namespace_sem);
+	list_for_each_entry(mnt, &ns->list, mnt_list) {
+		switch (mnt->mnt.mnt_sb->s_magic) {
+		case SYSFS_MAGIC:
+			userns->may_mount_sysfs = true;
+			break;
+		case PROC_SUPER_MAGIC:
+			userns->may_mount_proc = true;
+			break;
+		}
+		if (userns->may_mount_sysfs && userns->may_mount_proc)
+			break;
+	}
+	up_read(&namespace_sem);
+}
+
 static void *mntns_get(struct task_struct *task)
 {
 	struct mnt_namespace *ns = NULL;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac26bac..9c7fab1d23f0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/user_namespace.h>
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
 #include <linux/parser.h>
@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
 	} else {
 		ns = task_active_pid_ns(current);
 		options = data;
+
+		if (!current_user_ns()->may_mount_proc)
+			return ERR_PTR(-EPERM);
 	}
 
 	sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8d924b5ec733..afd83273e6ce 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/user_namespace.h>
 
 #include "sysfs.h"
 
@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	struct super_block *sb;
 	int error;
 
+	if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+		return ERR_PTR(-EPERM);
+
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		return ERR_PTR(-ENOMEM);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4ce009324933..b6b215f13b45 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,6 +26,8 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
+	bool			may_mount_sysfs;
+	bool			may_mount_proc;
 };
 
 extern struct user_namespace init_user_ns;
@@ -82,4 +84,6 @@ static inline void put_user_ns(struct user_namespace *ns)
 
 #endif
 
+void update_mnt_policy(struct user_namespace *userns);
+
 #endif /* _LINUX_USER_H */
diff --git a/kernel/user.c b/kernel/user.c
index e81978e8c03b..8e635a18ab52 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,8 @@ struct user_namespace init_user_ns = {
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.proc_inum = PROC_USER_INIT_INO,
+	.may_mount_sysfs = true,
+	.may_mount_proc = true,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0f1e42884577..a54f26f82eb2 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -96,6 +96,8 @@ int create_user_ns(struct cred *new)
 
 	set_cred_user_ns(new, ns);
 
+	update_mnt_policy(ns);
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From c8fa48d3722a9be89acf3486444e87583379c97c Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Tue, 26 Mar 2013 18:36:01 +0100
Subject: usb: Fix compile error by selecting USB_OTG_UTILS

The current lpc32xx_defconfig breaks like this, caused by recent phy
restructuring:

  LD      init/built-in.o
drivers/built-in.o: In function `usb_hcd_nxp_probe':
drivers/usb/host/ohci-nxp.c:224: undefined reference to `isp1301_get_client'
drivers/built-in.o: In function `lpc32xx_udc_probe':
drivers/usb/gadget/lpc32xx_udc.c:3104: undefined reference to
`isp1301_get_client' distcc[27867] ERROR: compile (null) on localhost failed
make: *** [vmlinux] Error 1

Caused by 1c2088812f095df77f4b3224b65db79d7111a300 (usb: Makefile: fix
drivers/usb/phy/ Makefile entry)

This patch fixes this by selecting USB_OTG_UTILS in Kconfig which
causes the phy driver to be built again.

Signed-off-by: Roland Stigge <stigge@antcom.de>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/Kconfig | 1 +
 drivers/usb/phy/Kconfig    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 5a0c541daf89..c7525b1cad74 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -145,6 +145,7 @@ config USB_LPC32XX
 	tristate "LPC32XX USB Peripheral Controller"
 	depends on ARCH_LPC32XX
 	select USB_ISP1301
+	select USB_OTG_UTILS
 	help
 	   This option selects the USB device controller in the LPC32xx SoC.
 
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 65217a590068..90549382eba5 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -38,6 +38,7 @@ config USB_ISP1301
 	tristate "NXP ISP1301 USB transceiver support"
 	depends on USB || USB_GADGET
 	depends on I2C
+	select USB_OTG_UTILS
 	help
 	  Say Y here to add support for the NXP ISP1301 USB transceiver driver.
 	  This chip is typically used as USB transceiver for USB host, gadget
-- 
cgit v1.2.2


From 76fc253723add627cf28c09c79fb67e71f9e4782 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 22 Mar 2013 10:15:47 -0400
Subject: xen/acpi-stub: Disable it b/c the acpi_processor_add is no longer
 called.

With the Xen ACPI stub code (CONFIG_XEN_STUB=y) enabled, the power
C and P states are no longer uploaded to the hypervisor.

The reason is that the Xen CPU hotplug code: xen-acpi-cpuhotplug.c
and the xen-acpi-stub.c register themselves as the "processor" type object.

That means the generic processor (processor_driver.c) stops
working and it does not call (acpi_processor_add) which populates the

         per_cpu(processors, pr->id) = pr;

structure. The 'pr' is gathered from the acpi_processor_get_info function
which does the job of finding the C-states and figuring out PBLK address.

The 'processors->pr' is then later used by xen-acpi-processor.c (the one that
uploads C and P states to the hypervisor). Since it is NULL, we end
skip the gathering of _PSD, _PSS, _PCT, etc and never upload the power
management data.

The end result is that enabling the CONFIG_XEN_STUB in the build means that
xen-acpi-processor is not working anymore.

This temporary patch fixes it by marking the XEN_STUB driver as
BROKEN until this can be properly fixed.

CC: jinsong.liu@intel.com
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 5a32232cf7c1..67af155cf602 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -182,7 +182,7 @@ config XEN_PRIVCMD
 
 config XEN_STUB
 	bool "Xen stub drivers"
-	depends on XEN && X86_64
+	depends on XEN && X86_64 && BROKEN
 	default n
 	help
 	  Allow kernel to install stub drivers, to reserve space for Xen drivers,
-- 
cgit v1.2.2


From d3eb2c89e7ba996e8781b22a6e7d0a895ef55630 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 22 Mar 2013 10:34:28 -0400
Subject: xen/mmu: Move the setting of pvops.write_cr3 to later phase in
 bootup.

We move the setting of write_cr3 from the early bootup variant
(see git commit 0cc9129d75ef8993702d97ab0e49542c15ac6ab9
"x86-64, xen, mmu: Provide an early version of write_cr3.")
to a more appropiate location.

This new location sets all of the other non-early variants
of pvops calls - and most importantly is before the
alternative_asm mechanism kicks in.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e8e34938c57d..6afbb2ca9a0a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1467,8 +1467,6 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 	__xen_write_cr3(true, cr3);
 
 	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
-
-	pv_mmu_ops.write_cr3 = &xen_write_cr3;
 }
 #endif
 
@@ -2122,6 +2120,7 @@ static void __init xen_post_allocator_init(void)
 #endif
 
 #ifdef CONFIG_X86_64
+	pv_mmu_ops.write_cr3 = &xen_write_cr3;
 	SetPagePinned(virt_to_page(level3_user_vsyscall));
 #endif
 	xen_mark_init_mm_pinned();
-- 
cgit v1.2.2


From c26377e62f4e6bfb4d99ef88526047209701a83f Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Mon, 25 Mar 2013 14:11:19 +0000
Subject: xen/events: avoid race with raising an event in unmask_evtchn()

In unmask_evtchn(), when the mask bit is cleared after testing for
pending and the event becomes pending between the test and clear, then
the upcall will not become pending and the event may be lost or
delayed.

Avoid this by always clearing the mask bit before checking for
pending.  If a hypercall is needed, remask the event as
EVTCHNOP_unmask will only retrigger pending events if they were
masked.

This fixes a regression introduced in 3.7 by
b5e579232d635b79a3da052964cb357ccda8d9ea (xen/events: fix
unmask_evtchn for PV on HVM guests) which reordered the clear mask and
check pending operations.

Changes in v2:
- set mask before hypercall.

Cc: stable@vger.kernel.org
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/events.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index d17aa41a9041..aa85881d17b2 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -403,11 +403,23 @@ static void unmask_evtchn(int port)
 
 	if (unlikely((cpu != cpu_from_evtchn(port))))
 		do_hypercall = 1;
-	else
+	else {
+		/*
+		 * Need to clear the mask before checking pending to
+		 * avoid a race with an event becoming pending.
+		 *
+		 * EVTCHNOP_unmask will only trigger an upcall if the
+		 * mask bit was set, so if a hypercall is needed
+		 * remask the event.
+		 */
+		sync_clear_bit(port, BM(&s->evtchn_mask[0]));
 		evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
 
-	if (unlikely(evtchn_pending && xen_hvm_domain()))
-		do_hypercall = 1;
+		if (unlikely(evtchn_pending && xen_hvm_domain())) {
+			sync_set_bit(port, BM(&s->evtchn_mask[0]));
+			do_hypercall = 1;
+		}
+	}
 
 	/* Slow path (hypercall) if this is a non-local port or if this is
 	 * an hvm domain and an event is pending (hvm domains don't have
@@ -418,8 +430,6 @@ static void unmask_evtchn(int port)
 	} else {
 		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
 
-		sync_clear_bit(port, BM(&s->evtchn_mask[0]));
-
 		/*
 		 * The following is basically the equivalent of
 		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
-- 
cgit v1.2.2


From 3e84f48edfd33b2e209a117c11fb9ce637cc9b67 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 27 Mar 2013 15:20:30 +0000
Subject: vfs/splice: Fix missed checks in new __kernel_write() helper

Commit 06ae43f34bcc ("Don't bother with redoing rw_verify_area() from
default_file_splice_from()") lost the checks to test existence of the
write/aio_write methods.  My apologies ;-/

Eventually, we want that in fs/splice.c side of things (no point
repeating it for every buffer, after all), but for now this is the
obvious minimal fix.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/read_write.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index f7b5a23b804b..e6ddc8dceb96 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -424,6 +424,9 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
 	const char __user *p;
 	ssize_t ret;
 
+	if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+		return -EINVAL;
+
 	old_fs = get_fs();
 	set_fs(get_ds());
 	p = (__force const char __user *)buf;
-- 
cgit v1.2.2


From adaa4b8e4d47eeb114513c2f7a172929154b94bd Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Thu, 21 Mar 2013 14:30:23 +0000
Subject: Btrfs: fix EIO from btrfs send in is_extent_unchanged for punched
 holes

When you take a snapshot, punch a hole where there has been data, then take
another snapshot and try to send an incremental stream, btrfs send would
give you EIO. That is because is_extent_unchanged had no support for holes
being punched. With this patch, instead of returning EIO we just return
0 (== the extent is not unchanged) and we're good.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Cc: Alexander Block <ablock84@gmail.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/send.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 68da757615ae..ed897dc11356 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3945,12 +3945,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 		    found_key.type != key.type) {
 			key.offset += right_len;
 			break;
-		} else {
-			if (found_key.offset != key.offset + right_len) {
-				/* Should really not happen */
-				ret = -EIO;
-				goto out;
-			}
+		}
+		if (found_key.offset != key.offset + right_len) {
+			ret = 0;
+			goto out;
 		}
 		key = found_key;
 	}
-- 
cgit v1.2.2


From f4881bc7a83eff263789dd524b7c269d138d4af5 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Mon, 25 Mar 2013 16:03:35 -0400
Subject: Btrfs: fix space leak when we fail to reserve metadata space

Dave reported a warning when running xfstest 275.  We have been leaking delalloc
metadata space when our reservations fail.  This is because we were improperly
calculating how much space to free for our checksum reservations.  The problem
is we would sometimes free up space that had already been freed in another
thread and we would end up with negative usage for the delalloc space.  This
patch fixes the problem by calculating how much space the other threads would
have already freed, and then calculate how much space we need to free had we not
done the reservation at all, and then freeing any excess space.  This makes
xfstests 275 no longer have leaked space.  Thanks

Cc: stable@vger.kernel.org
Reported-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/extent-tree.c | 47 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a8ff25aedca1..a22b5cc921ad 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4815,14 +4815,49 @@ out_fail:
 	 * If the inodes csum_bytes is the same as the original
 	 * csum_bytes then we know we haven't raced with any free()ers
 	 * so we can just reduce our inodes csum bytes and carry on.
-	 * Otherwise we have to do the normal free thing to account for
-	 * the case that the free side didn't free up its reserve
-	 * because of this outstanding reservation.
 	 */
-	if (BTRFS_I(inode)->csum_bytes == csum_bytes)
+	if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
 		calc_csum_metadata_size(inode, num_bytes, 0);
-	else
-		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+	} else {
+		u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
+		u64 bytes;
+
+		/*
+		 * This is tricky, but first we need to figure out how much we
+		 * free'd from any free-ers that occured during this
+		 * reservation, so we reset ->csum_bytes to the csum_bytes
+		 * before we dropped our lock, and then call the free for the
+		 * number of bytes that were freed while we were trying our
+		 * reservation.
+		 */
+		bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
+		BTRFS_I(inode)->csum_bytes = csum_bytes;
+		to_free = calc_csum_metadata_size(inode, bytes, 0);
+
+
+		/*
+		 * Now we need to see how much we would have freed had we not
+		 * been making this reservation and our ->csum_bytes were not
+		 * artificially inflated.
+		 */
+		BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
+		bytes = csum_bytes - orig_csum_bytes;
+		bytes = calc_csum_metadata_size(inode, bytes, 0);
+
+		/*
+		 * Now reset ->csum_bytes to what it should be.  If bytes is
+		 * more than to_free then we would have free'd more space had we
+		 * not had an artificially high ->csum_bytes, so we need to free
+		 * the remainder.  If bytes is the same or less then we don't
+		 * need to do anything, the other free-ers did the correct
+		 * thing.
+		 */
+		BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
+		if (bytes > to_free)
+			to_free = bytes - to_free;
+		else
+			to_free = 0;
+	}
 	spin_unlock(&BTRFS_I(inode)->lock);
 	if (dropped)
 		to_free += btrfs_calc_trans_metadata_size(root, dropped);
-- 
cgit v1.2.2


From 6e137ed3f30574f314733d4b7a86ea6523232b14 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Tue, 26 Mar 2013 15:26:55 -0400
Subject: Btrfs: fix space accounting for unlink and rename

We are way over-reserving for unlink and rename.  Rename is just some random
huge number and unlink accounts for tree log operations that don't actually
happen during unlink, not to mention the tree log doesn't take from the trans
block rsv anyway so it's completely useless.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/inode.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6a6e13c53086..8cab424c75f8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3693,11 +3693,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
 	 * 1 for the dir item
 	 * 1 for the dir index
 	 * 1 for the inode ref
-	 * 1 for the inode ref in the tree log
-	 * 2 for the dir entries in the log
 	 * 1 for the inode
 	 */
-	trans = btrfs_start_transaction(root, 8);
+	trans = btrfs_start_transaction(root, 5);
 	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
 		return trans;
 
@@ -8141,7 +8139,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items
 	 * should cover the worst case number of items we'll modify.
 	 */
-	trans = btrfs_start_transaction(root, 20);
+	trans = btrfs_start_transaction(root, 11);
 	if (IS_ERR(trans)) {
                 ret = PTR_ERR(trans);
                 goto out_notrans;
-- 
cgit v1.2.2


From db1d607d3ca5cbb283cbb17d648cd7e8dc67cc7b Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Tue, 26 Mar 2013 15:29:11 -0400
Subject: Btrfs: hold the ordered operations mutex when waiting on ordered
 extents

We need to hold the ordered_operations mutex while waiting on ordered extents
since we splice and run the ordered extents list.  We need to make sure anybody
else who wants to wait on ordered extents does actually wait for them to be
completed.  This will keep us from bailing out of flushing in case somebody is
already waiting on ordered extents to complete.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ordered-data.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index dc08d77b717e..005c45db699e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
 	INIT_LIST_HEAD(&splice);
 	INIT_LIST_HEAD(&works);
 
+	mutex_lock(&root->fs_info->ordered_operations_mutex);
 	spin_lock(&root->fs_info->ordered_extent_lock);
 	list_splice_init(&root->fs_info->ordered_extents, &splice);
 	while (!list_empty(&splice)) {
@@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
 
 		cond_resched();
 	}
+	mutex_unlock(&root->fs_info->ordered_operations_mutex);
 }
 
 /*
-- 
cgit v1.2.2


From fdf30d1c1b386e1b73116cc7e0fb14e962b763b0 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Tue, 26 Mar 2013 15:31:45 -0400
Subject: Btrfs: limit the global reserve to 512mb

A user reported a problem where he was getting early ENOSPC with hundreds of
gigs of free data space and 6 gigs of free metadata space.  This is because the
global block reserve was taking up the entire free metadata space.  This is
ridiculous, we have infrastructure in place to throttle if we start using too
much of the global reserve, so instead of letting it get this huge just limit it
to 512mb so that users can still get work done.  This allowed the user to
complete his rsync without issues.  Thanks

Cc: stable@vger.kernel.org
Reported-and-tested-by: Stefan Priebe <s.priebe@profihost.ag>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a22b5cc921ad..0d8478700d78 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4460,7 +4460,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
 	spin_lock(&sinfo->lock);
 	spin_lock(&block_rsv->lock);
 
-	block_rsv->size = num_bytes;
+	block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
 
 	num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
 		    sinfo->bytes_reserved + sinfo->bytes_readonly +
-- 
cgit v1.2.2


From a7975026ff9ddf91ba190ae2b71699dd156395e3 Mon Sep 17 00:00:00 2001
From: Wang Shilong <wangsl-fnst@cn.fujitsu.com>
Date: Mon, 25 Mar 2013 11:08:23 +0000
Subject: Btrfs: fix double free in the btrfs_qgroup_account_ref()

The function btrfs_find_all_roots is responsible to allocate
memory for 'roots' and free it if errors happen,so the caller should not
free it again since the work has been done.

Besides,'tmp' is allocated after the function btrfs_find_all_roots,
so we can return directly if btrfs_find_all_roots() fails.

Signed-off-by: Wang Shilong <wangsl-fnst@cn.fujitsu.com>
Reviewed-by: Miao Xie <miaox@cn.fujitsu.com>
Reviewed-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/qgroup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5471e47d6559..b44124dd2370 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
 				   sgn > 0 ? node->seq - 1 : node->seq, &roots);
 	if (ret < 0)
-		goto out;
+		return ret;
 
 	spin_lock(&fs_info->qgroup_lock);
 	quota_root = fs_info->quota_root;
@@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 	ret = 0;
 unlock:
 	spin_unlock(&fs_info->qgroup_lock);
-out:
 	ulist_free(roots);
 	ulist_free(tmp);
 
-- 
cgit v1.2.2


From 39847c4d3d91f487f9ab3d083ee5d0f8419f105c Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Thu, 28 Mar 2013 08:08:20 +0000
Subject: Btrfs: fix wrong reservation of csums

We reserve the space for csums only when we write data into a file, in
the other cases, such as tree log, log replay, we don't do reservation,
so we can use the reservation of the transaction handle just for the former.
And for the latter, we should use the tree's own reservation. But the
function - btrfs_csum_file_blocks() didn't differentiate between these
two types of the cases, fix it.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/file-item.c | 2 --
 fs/btrfs/inode.c     | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ec160202be3e..b7e529d2860f 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -728,7 +728,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 
 	sector_sum = sums->sums;
-	trans->adding_csums = 1;
 again:
 	next_offset = (u64)-1;
 	found_next = 0;
@@ -899,7 +898,6 @@ next_sector:
 		goto again;
 	}
 out:
-	trans->adding_csums = 0;
 	btrfs_free_path(path);
 	return ret;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8cab424c75f8..b88381582dab 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1757,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 	struct btrfs_ordered_sum *sum;
 
 	list_for_each_entry(sum, list, list) {
+		trans->adding_csums = 1;
 		btrfs_csum_file_blocks(trans,
 		       BTRFS_I(inode)->root->fs_info->csum_root, sum);
+		trans->adding_csums = 0;
 	}
 	return 0;
 }
-- 
cgit v1.2.2


From 82d130ff390be67d980d8b6f39e921c0b1d8d8e0 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Thu, 28 Mar 2013 08:12:15 +0000
Subject: Btrfs: fix wrong return value of btrfs_lookup_csum()

If we don't find the expected csum item, but find a csum item which is
adjacent to the specified extent, we should return -EFBIG, or we should
return -ENOENT. But btrfs_lookup_csum() return -EFBIG even the csum item
is not adjacent to the specified extent. Fix it.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/file-item.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b7e529d2860f..c4628a201cb3 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
 		csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
 		csums_in_item /= csum_size;
 
-		if (csum_offset >= csums_in_item) {
+		if (csum_offset == csums_in_item) {
 			ret = -EFBIG;
 			goto fail;
+		} else if (csum_offset > csums_in_item) {
+			goto fail;
 		}
 	}
 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
-- 
cgit v1.2.2


From c613c5f686b5493290aeb6a3c4b3b2371a8582cf Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Thu, 28 Mar 2013 09:43:43 -0600
Subject: mg_disk: fix error return code in mg_probe()

Fix to return a negative error code from the error handling
case instead of 0, as returned elsewhere in this function.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Reviewed-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/mg_disk.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 1788f491e0fb..076ae7f1b781 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -890,8 +890,10 @@ static int mg_probe(struct platform_device *plat_dev)
 	gpio_direction_output(host->rst, 1);
 
 	/* reset out pin */
-	if (!(prv_data->dev_attr & MG_DEV_MASK))
+	if (!(prv_data->dev_attr & MG_DEV_MASK)) {
+		err = -EINVAL;
 		goto probe_err_3a;
+	}
 
 	if (prv_data->dev_attr != MG_BOOT_DEV) {
 		rsc = platform_get_resource_byname(plat_dev, IORESOURCE_IO,
-- 
cgit v1.2.2


From 482b0b5d82bd916cc0c55a2abf65bdc69023b843 Mon Sep 17 00:00:00 2001
From: Konstantin Holoborodko <klh.kernel@gmail.com>
Date: Fri, 29 Mar 2013 00:06:13 +0900
Subject: usb: ftdi_sio: Add support for Mitsubishi FX-USB-AW/-BD

It enhances the driver for FTDI-based USB serial adapters
to recognize Mitsubishi Electric Corp. USB/RS422 Converters
as FT232BM chips and support them.
https://search.meau.com/?q=FX-USB-AW

Signed-off-by: Konstantin Holoborodko <klh.kernel@gmail.com>
Tested-by: Konstantin Holoborodko <klh.kernel@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ftdi_sio.c     | 1 +
 drivers/usb/serial/ftdi_sio_ids.h | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index d4809d551473..9886180e45f1 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -640,6 +640,7 @@ static struct usb_device_id id_table_combined [] = {
 	{ USB_DEVICE(FTDI_VID, FTDI_RM_CANVIEW_PID) },
 	{ USB_DEVICE(ACTON_VID, ACTON_SPECTRAPRO_PID) },
 	{ USB_DEVICE(CONTEC_VID, CONTEC_COM1USBH_PID) },
+	{ USB_DEVICE(MITSUBISHI_VID, MITSUBISHI_FXUSB_PID) },
 	{ USB_DEVICE(BANDB_VID, BANDB_USOTL4_PID) },
 	{ USB_DEVICE(BANDB_VID, BANDB_USTL4_PID) },
 	{ USB_DEVICE(BANDB_VID, BANDB_USO9ML2_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 9d359e189a64..e79861eeed4c 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -583,6 +583,13 @@
 #define CONTEC_VID		0x06CE	/* Vendor ID */
 #define CONTEC_COM1USBH_PID	0x8311	/* COM-1(USB)H */
 
+/*
+ * Mitsubishi Electric Corp. (http://www.meau.com)
+ * Submitted by Konstantin Holoborodko
+ */
+#define MITSUBISHI_VID		0x06D3
+#define MITSUBISHI_FXUSB_PID	0x0284 /* USB/RS422 converters: FX-USB-AW/-BD */
+
 /*
  * Definitions for B&B Electronics products.
  */
-- 
cgit v1.2.2


From 09a9f1d27892255cfb9c91203f19476765e2d8d1 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Thu, 28 Mar 2013 16:26:23 -0700
Subject: Revert "mm: introduce VM_POPULATE flag to better deal with racy
 userspace programs"

This reverts commit 186930500985 ("mm: introduce VM_POPULATE flag to
better deal with racy userspace programs").

VM_POPULATE only has any effect when userspace plays racy games with
vmas by trying to unmap and remap memory regions that mmap or mlock are
operating on.

Also, the only effect of VM_POPULATE when userspace plays such games is
that it avoids populating new memory regions that get remapped into the
address range that was being operated on by the original mmap or mlock
calls.

Let's remove VM_POPULATE as there isn't any strong argument to mandate a
new vm_flag.

Signed-off-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h   |  1 -
 include/linux/mman.h |  4 +---
 mm/fremap.c          | 12 ++----------
 mm/mlock.c           | 11 +++++------
 mm/mmap.c            |  4 +++-
 5 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7acc9dc73c9f..e19ff30ad0a2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -87,7 +87,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
 #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
 
-#define VM_POPULATE     0x00001000
 #define VM_LOCKED	0x00002000
 #define VM_IO           0x00004000	/* Memory mapped I/O or similar */
 
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 61c7a87e5d2b..9aa863da287f 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -79,8 +79,6 @@ calc_vm_flag_bits(unsigned long flags)
 {
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
-	       ((flags & MAP_LOCKED) ? (VM_LOCKED | VM_POPULATE) : 0) |
-	       (((flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE) ?
-							VM_POPULATE : 0);
+	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
 }
 #endif /* _LINUX_MMAN_H */
diff --git a/mm/fremap.c b/mm/fremap.c
index 4723ac8d2fc2..87da3590c61e 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -204,10 +204,8 @@ get_write_lock:
 			unsigned long addr;
 			struct file *file = get_file(vma->vm_file);
 
-			vm_flags = vma->vm_flags;
-			if (!(flags & MAP_NONBLOCK))
-				vm_flags |= VM_POPULATE;
-			addr = mmap_region(file, start, size, vm_flags, pgoff);
+			addr = mmap_region(file, start, size,
+					vma->vm_flags, pgoff);
 			fput(file);
 			if (IS_ERR_VALUE(addr)) {
 				err = addr;
@@ -226,12 +224,6 @@ get_write_lock:
 		mutex_unlock(&mapping->i_mmap_mutex);
 	}
 
-	if (!(flags & MAP_NONBLOCK) && !(vma->vm_flags & VM_POPULATE)) {
-		if (!has_write_lock)
-			goto get_write_lock;
-		vma->vm_flags |= VM_POPULATE;
-	}
-
 	if (vma->vm_flags & VM_LOCKED) {
 		/*
 		 * drop PG_Mlocked flag for over-mapped range
diff --git a/mm/mlock.c b/mm/mlock.c
index 1c5e33fce639..79b7cf7d1bca 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -358,7 +358,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
 
 		newflags = vma->vm_flags & ~VM_LOCKED;
 		if (on)
-			newflags |= VM_LOCKED | VM_POPULATE;
+			newflags |= VM_LOCKED;
 
 		tmp = vma->vm_end;
 		if (tmp > end)
@@ -418,8 +418,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
 		 * range with the first VMA. Also, skip undesirable VMA types.
 		 */
 		nend = min(end, vma->vm_end);
-		if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_POPULATE)) !=
-		    VM_POPULATE)
+		if (vma->vm_flags & (VM_IO | VM_PFNMAP))
 			continue;
 		if (nstart < vma->vm_start)
 			nstart = vma->vm_start;
@@ -492,9 +491,9 @@ static int do_mlockall(int flags)
 	struct vm_area_struct * vma, * prev = NULL;
 
 	if (flags & MCL_FUTURE)
-		current->mm->def_flags |= VM_LOCKED | VM_POPULATE;
+		current->mm->def_flags |= VM_LOCKED;
 	else
-		current->mm->def_flags &= ~(VM_LOCKED | VM_POPULATE);
+		current->mm->def_flags &= ~VM_LOCKED;
 	if (flags == MCL_FUTURE)
 		goto out;
 
@@ -503,7 +502,7 @@ static int do_mlockall(int flags)
 
 		newflags = vma->vm_flags & ~VM_LOCKED;
 		if (flags & MCL_CURRENT)
-			newflags |= VM_LOCKED | VM_POPULATE;
+			newflags |= VM_LOCKED;
 
 		/* Ignore errors */
 		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
diff --git a/mm/mmap.c b/mm/mmap.c
index 2664a47cec93..6466699b16cb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1306,7 +1306,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	}
 
 	addr = mmap_region(file, addr, len, vm_flags, pgoff);
-	if (!IS_ERR_VALUE(addr) && (vm_flags & VM_POPULATE))
+	if (!IS_ERR_VALUE(addr) &&
+	    ((vm_flags & VM_LOCKED) ||
+	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
 		*populate = len;
 	return addr;
 }
-- 
cgit v1.2.2


From 5dade71050e799d8679698a6145e2ba46cdeac2a Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 27 Mar 2013 17:23:41 -0700
Subject: tcm_vhost: Avoid VIRTIO_RING_F_EVENT_IDX feature bit

This patch adds a VHOST_SCSI_FEATURES mask minus VIRTIO_RING_F_EVENT_IDX
so that vhost-scsi-pci userspace will strip this feature bit once
GET_FEATURES reports it as being unsupported on the host.

This is to avoid a bug where ->handle_kicks() are missed when EVENT_IDX
is enabled by default in userspace code.

(mst: Rename to VHOST_SCSI_FEATURES + add comment)

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Asias He <asias@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/vhost/tcm_vhost.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index 43fb11ee2e8d..2968b4934659 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -60,6 +60,15 @@ enum {
 	VHOST_SCSI_VQ_IO = 2,
 };
 
+/*
+ * VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
+ * kernel but disabling it helps.
+ * TODO: debug and remove the workaround.
+ */
+enum {
+	VHOST_SCSI_FEATURES = VHOST_FEATURES & (~VIRTIO_RING_F_EVENT_IDX)
+};
+
 #define VHOST_SCSI_MAX_TARGET	256
 #define VHOST_SCSI_MAX_VQ	128
 
@@ -946,7 +955,7 @@ static void vhost_scsi_flush(struct vhost_scsi *vs)
 
 static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
 {
-	if (features & ~VHOST_FEATURES)
+	if (features & ~VHOST_SCSI_FEATURES)
 		return -EOPNOTSUPP;
 
 	mutex_lock(&vs->dev.mutex);
@@ -992,7 +1001,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
 			return -EFAULT;
 		return 0;
 	case VHOST_GET_FEATURES:
-		features = VHOST_FEATURES;
+		features = VHOST_SCSI_FEATURES;
 		if (copy_to_user(featurep, &features, sizeof features))
 			return -EFAULT;
 		return 0;
-- 
cgit v1.2.2


From f85eda8d75d37a3796cee7f5a906e50e3f13d9e1 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 28 Mar 2013 23:06:00 -0700
Subject: target: Fix RESERVATION_CONFLICT status regression for iscsi-target
 special case

This patch fixes a regression introduced in v3.8-rc1 code where a failed
target_check_reservation() check in target_setup_cmd_from_cdb() was causing
an incorrect SAM_STAT_GOOD status to be returned during a WRITE operation
performed by an unregistered / unreserved iscsi initiator port.

This regression is only effecting iscsi-target due to a special case check
for TCM_RESERVATION_CONFLICT within iscsi_target_erl1.c:iscsit_execute_cmd(),
and was still correctly disallowing WRITE commands from backend submission
for unregistered / unreserved initiator ports, while returning the incorrect
SAM_STAT_GOOD status due to the missing SAM_STAT_RESERVATION_CONFLICT
assignment.

This regression was first introduced with:

commit de103c93aff0bed0ae984274e5dc8b95899badab
Author: Christoph Hellwig <hch@lst.de>
Date:   Tue Nov 6 12:24:09 2012 -0800

    target: pass sense_reason as a return value

Go ahead and re-add the missing SAM_STAT_RESERVATION_CONFLICT assignment
during a target_check_reservation() failure, so that iscsi-target code
sends the correct SCSI status.

All other fabrics using target_submit_cmd_*() with a RESERVATION_CONFLICT
call to transport_generic_request_failure() are not effected by this bug.

Reported-by: Jeff Leung <jleung@curriegrad2004.ca>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 2030b608136d..3243ea790eab 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1139,8 +1139,10 @@ target_setup_cmd_from_cdb(struct se_cmd *cmd, unsigned char *cdb)
 		return ret;
 
 	ret = target_check_reservation(cmd);
-	if (ret)
+	if (ret) {
+		cmd->scsi_status = SAM_STAT_RESERVATION_CONFLICT;
 		return ret;
+	}
 
 	ret = dev->transport->parse_cdb(cmd);
 	if (ret)
-- 
cgit v1.2.2


From d8fe29e9dea8d7d61fd140d8779326856478fc62 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Fri, 29 Mar 2013 08:09:34 -0600
Subject: Btrfs: don't drop path when printing out tree errors in scrub

A user reported a panic where we were panicing somewhere in
tree_backref_for_extent from scrub_print_warning.  He only captured the trace
but looking at scrub_print_warning we drop the path right before we mess with
the extent buffer to print out a bunch of stuff, which isn't right.  So fix this
by dropping the path after we use the eb if we need to.  Thanks,

Cc: stable@vger.kernel.org
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
---
 fs/btrfs/scrub.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 53c3501fa4ca..85e072b956d5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
 	eb = path->nodes[0];
 	ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
 	item_size = btrfs_item_size_nr(eb, path->slots[0]);
-	btrfs_release_path(path);
 
 	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 		do {
@@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
 				ret < 0 ? -1 : ref_level,
 				ret < 0 ? -1 : ref_root);
 		} while (ret != 1);
+		btrfs_release_path(path);
 	} else {
+		btrfs_release_path(path);
 		swarn.path = path;
 		swarn.dev = dev;
 		iterate_extent_inodes(fs_info, found_key.objectid,
-- 
cgit v1.2.2


From ed176886b68fbc450ddbe808684a142fcad72b56 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 29 Mar 2013 11:02:30 -0700
Subject: ia64 idle: delete stale (*idle)() function pointer

Commit 3e7fc708eb41 ("ia64 idle: delete pm_idle") in 3.9-rc1 didn't
finish the job, leaving an un-initialized reference to (*idle)().

[ Haven't seen a crash from this - but seems like we are just being
  lucky that "idle" is zero so it does get initialized before we jump to
  randomland  - Len ]

Reported-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/process.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index e34f565f595a..6f7dc8b7b35c 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -291,7 +291,6 @@ cpu_idle (void)
 		}
 
 		if (!need_resched()) {
-			void (*idle)(void);
 #ifdef CONFIG_SMP
 			min_xtp();
 #endif
@@ -299,9 +298,7 @@ cpu_idle (void)
 			if (mark_idle)
 				(*mark_idle)(1);
 
-			if (!idle)
-				idle = default_idle;
-			(*idle)();
+			default_idle();
 			if (mark_idle)
 				(*mark_idle)(0);
 #ifdef CONFIG_SMP
-- 
cgit v1.2.2


From 6e2a4505dba0cae8faa701426185dfb7b49f537c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 27 Mar 2013 09:16:30 -0500
Subject: rbd: don't zero-fill non-image object requests

A result of ENOENT from a read request for an object that's part of
an rbd image indicates that there is a hole in that portion of the
image.  Similarly, a short read for such an object indicates that
the remainder of the read should be interpreted a full read with
zeros filling out the end of the request.

This behavior is not correct for objects that are not backing rbd
image data.  Currently rbd_img_obj_request_callback() assumes it
should be done for all objects.

Change rbd_img_obj_request_callback() so it only does this zeroing
for image objects.  Encapsulate that special handling in its own
function.  Add an assertion that the image object request is a bio
request, since we assume that (and we currently don't support any
other types).

This resolves a problem identified here:
    http://tracker.ceph.com/issues/4559

The regression was introduced by bf0d5f503dc11d6314c0503591d258d60ee9c944.

Reported-by: Dan van der Ster <dan@vanderster.com>
Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-off-by: Sage Weil <sage@inktank.com>
---
 drivers/block/rbd.c | 47 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6c81a4c040b9..f556f8a8b3f9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1264,6 +1264,32 @@ static bool obj_request_done_test(struct rbd_obj_request *obj_request)
 	return atomic_read(&obj_request->done) != 0;
 }
 
+static void
+rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
+{
+	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
+		obj_request, obj_request->img_request, obj_request->result,
+		obj_request->xferred, obj_request->length);
+	/*
+	 * ENOENT means a hole in the image.  We zero-fill the
+	 * entire length of the request.  A short read also implies
+	 * zero-fill to the end of the request.  Either way we
+	 * update the xferred count to indicate the whole request
+	 * was satisfied.
+	 */
+	BUG_ON(obj_request->type != OBJ_REQUEST_BIO);
+	if (obj_request->result == -ENOENT) {
+		zero_bio_chain(obj_request->bio_list, 0);
+		obj_request->result = 0;
+		obj_request->xferred = obj_request->length;
+	} else if (obj_request->xferred < obj_request->length &&
+			!obj_request->result) {
+		zero_bio_chain(obj_request->bio_list, obj_request->xferred);
+		obj_request->xferred = obj_request->length;
+	}
+	obj_request_done_set(obj_request);
+}
+
 static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
 {
 	dout("%s: obj %p cb %p\n", __func__, obj_request,
@@ -1284,23 +1310,10 @@ static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
 {
 	dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
 		obj_request->result, obj_request->xferred, obj_request->length);
-	/*
-	 * ENOENT means a hole in the object.  We zero-fill the
-	 * entire length of the request.  A short read also implies
-	 * zero-fill to the end of the request.  Either way we
-	 * update the xferred count to indicate the whole request
-	 * was satisfied.
-	 */
-	if (obj_request->result == -ENOENT) {
-		zero_bio_chain(obj_request->bio_list, 0);
-		obj_request->result = 0;
-		obj_request->xferred = obj_request->length;
-	} else if (obj_request->xferred < obj_request->length &&
-			!obj_request->result) {
-		zero_bio_chain(obj_request->bio_list, obj_request->xferred);
-		obj_request->xferred = obj_request->length;
-	}
-	obj_request_done_set(obj_request);
+	if (obj_request->img_request)
+		rbd_img_obj_request_read_callback(obj_request);
+	else
+		obj_request_done_set(obj_request);
 }
 
 static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
-- 
cgit v1.2.2


From 46a1f21a679abaaeae6db9969963dc998c9f1c1c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 29 Mar 2013 22:59:53 +0100
Subject: PNP: List Rafael Wysocki as a maintainer

The Adam Belay's e-mail address in MAINTAINERS under PNP SUPPORT
is not valid any more and I started to maintain that code in the
meantime as a matter of fact, so list myself as a maintainer of it
along with Bjorn and remove the Adam's entry from it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 44135e3d7410..60a45838893f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6209,7 +6209,7 @@ F:	include/linux/power_supply.h
 F:	drivers/power/
 
 PNP SUPPORT
-M:	Adam Belay <abelay@mit.edu>
+M:	Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 M:	Bjorn Helgaas <bhelgaas@google.com>
 S:	Maintained
 F:	drivers/pnp/
-- 
cgit v1.2.2


From f73bb9b35596e045feacdf4d2fd32cfb087e2411 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 3 Mar 2013 20:51:28 +0000
Subject: dmaengine: dw_dma: fix endianess for DT xlate function

As reported by Wu Fengguang's build robot tracking sparse warnings, the
dma_spec arguments in the dw_dma_xlate are already byte swapped on
little-endian platforms and must not get swapped again. This code is
currently not used anywhere, but will be used in Linux 3.10 when the
ARM SPEAr platform starts using the generic DMA DT binding.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw_dmac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index c599558faeda..eb81ec9d5b91 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1276,9 +1276,9 @@ static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec,
 	if (dma_spec->args_count != 3)
 		return NULL;
 
-	fargs.req = be32_to_cpup(dma_spec->args+0);
-	fargs.src = be32_to_cpup(dma_spec->args+1);
-	fargs.dst = be32_to_cpup(dma_spec->args+2);
+	fargs.req = dma_spec->args[0];
+	fargs.src = dma_spec->args[1];
+	fargs.dst = dma_spec->args[2];
 
 	if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS ||
 		    fargs.src >= dw->nr_masters ||
-- 
cgit v1.2.2


From bce95c63ef1bcf528ea45c41505eb4c21560d92d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 20 Feb 2013 13:52:17 +0200
Subject: dw_dmac: adjust slave_id accordingly to request line base

On some hardware configurations we have got the request line with the offset.
The patch introduces convert_slave_id() helper for that cases. The request line
base is came from the driver data provided by the platform_device_id table.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw_dmac.c      | 17 ++++++++++++++++-
 drivers/dma/dw_dmac_regs.h |  1 +
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index eb81ec9d5b91..43a5329d4483 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1001,6 +1001,13 @@ static inline void convert_burst(u32 *maxburst)
 		*maxburst = 0;
 }
 
+static inline void convert_slave_id(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+	dwc->dma_sconfig.slave_id -= dw->request_line_base;
+}
+
 static int
 set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 {
@@ -1015,6 +1022,7 @@ set_runtime_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 
 	convert_burst(&dwc->dma_sconfig.src_maxburst);
 	convert_burst(&dwc->dma_sconfig.dst_maxburst);
+	convert_slave_id(dwc);
 
 	return 0;
 }
@@ -1628,6 +1636,7 @@ dw_dma_parse_dt(struct platform_device *pdev)
 
 static int dw_probe(struct platform_device *pdev)
 {
+	const struct platform_device_id *match;
 	struct dw_dma_platform_data *pdata;
 	struct resource		*io;
 	struct dw_dma		*dw;
@@ -1711,6 +1720,11 @@ static int dw_probe(struct platform_device *pdev)
 		memcpy(dw->data_width, pdata->data_width, 4);
 	}
 
+	/* Get the base request line if set */
+	match = platform_get_device_id(pdev);
+	if (match)
+		dw->request_line_base = (unsigned int)match->driver_data;
+
 	/* Calculate all channel mask before DMA setup */
 	dw->all_chan_mask = (1 << nr_channels) - 1;
 
@@ -1906,7 +1920,8 @@ MODULE_DEVICE_TABLE(of, dw_dma_id_table);
 #endif
 
 static const struct platform_device_id dw_dma_ids[] = {
-	{ "INTL9C60", 0 },
+	/* Name,	Request Line Base */
+	{ "INTL9C60",	(kernel_ulong_t)16 },
 	{ }
 };
 
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index cf0ce5c77d60..4d02c3669b75 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -247,6 +247,7 @@ struct dw_dma {
 	/* hardware configuration */
 	unsigned char		nr_masters;
 	unsigned char		data_width[4];
+	unsigned int		request_line_base;
 
 	struct dw_dma_chan	chan[0];
 };
-- 
cgit v1.2.2


From dbf520a9d7d4d5ba28d2947be11e34099a5e3e20 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Sun, 31 Mar 2013 00:04:40 +0000
Subject: Revert "lockdep: check that no locks held at freeze time"

This reverts commit 6aa9707099c4b25700940eb3d016f16c4434360d.

Commit 6aa9707099c4 ("lockdep: check that no locks held at freeze time")
causes problems with NFS root filesystems.  The failures were noticed on
OMAP2 and 3 boards during kernel init:

  [ BUG: swapper/0/1 still has locks held! ]
  3.9.0-rc3-00344-ga937536 #1 Not tainted
  -------------------------------------
  1 lock held by swapper/0/1:
   #0:  (&type->s_umount_key#13/1){+.+.+.}, at: [<c011e84c>] sget+0x248/0x574

  stack backtrace:
    rpc_wait_bit_killable
    __wait_on_bit
    out_of_line_wait_on_bit
    __rpc_execute
    rpc_run_task
    rpc_call_sync
    nfs_proc_get_root
    nfs_get_root
    nfs_fs_mount_common
    nfs_try_mount
    nfs_fs_mount
    mount_fs
    vfs_kern_mount
    do_mount
    sys_mount
    do_mount_root
    mount_root
    prepare_namespace
    kernel_init_freeable
    kernel_init

Although the rootfs mounts, the system is unstable.  Here's a transcript
from a PM test:

  http://www.pwsan.com/omap/testlogs/test_v3.9-rc3/20130317194234/pm/37xxevm/37xxevm_log.txt

Here's what the test log should look like:

  http://www.pwsan.com/omap/testlogs/test_v3.8/20130218214403/pm/37xxevm/37xxevm_log.txt

Mailing list discussion is here:

  http://lkml.org/lkml/2013/3/4/221

Deal with this for v3.9 by reverting the problem commit, until folks can
figure out the right long-term course of action.

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: Jeff Layton <jlayton@redhat.com>
Cc: Shawn Guo <shawn.guo@linaro.org>
Cc: <maciej.rutecki@gmail.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ben Chan <benchan@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/debug_locks.h |  4 ++--
 include/linux/freezer.h     |  3 ---
 kernel/exit.c               |  2 +-
 kernel/lockdep.c            | 17 +++++++++--------
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index a975de1ff59f..3bd46f766751 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -51,7 +51,7 @@ struct task_struct;
 extern void debug_show_all_locks(void);
 extern void debug_show_held_locks(struct task_struct *task);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
-extern void debug_check_no_locks_held(void);
+extern void debug_check_no_locks_held(struct task_struct *task);
 #else
 static inline void debug_show_all_locks(void)
 {
@@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len)
 }
 
 static inline void
-debug_check_no_locks_held(void)
+debug_check_no_locks_held(struct task_struct *task)
 {
 }
 #endif
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 043a5cf8b5ba..e70df40d84f6 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -3,7 +3,6 @@
 #ifndef FREEZER_H_INCLUDED
 #define FREEZER_H_INCLUDED
 
-#include <linux/debug_locks.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
@@ -49,8 +48,6 @@ extern void thaw_kernel_threads(void);
 
 static inline bool try_to_freeze(void)
 {
-	if (!(current->flags & PF_NOFREEZE))
-		debug_check_no_locks_held();
 	might_sleep();
 	if (likely(!freezing(current)))
 		return false;
diff --git a/kernel/exit.c b/kernel/exit.c
index 51e485ca9935..60bc027c61c3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -835,7 +835,7 @@ void do_exit(long code)
 	/*
 	 * Make sure we are holding no locks:
 	 */
-	debug_check_no_locks_held();
+	debug_check_no_locks_held(tsk);
 	/*
 	 * We can do this unlocked here. The futex code uses this flag
 	 * just to verify whether the pi state cleanup has been done
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 259db207b5d9..8a0efac4f99d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -4088,7 +4088,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 }
 EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
 
-static void print_held_locks_bug(void)
+static void print_held_locks_bug(struct task_struct *curr)
 {
 	if (!debug_locks_off())
 		return;
@@ -4097,21 +4097,22 @@ static void print_held_locks_bug(void)
 
 	printk("\n");
 	printk("=====================================\n");
-	printk("[ BUG: %s/%d still has locks held! ]\n",
-	       current->comm, task_pid_nr(current));
+	printk("[ BUG: lock held at task exit time! ]\n");
 	print_kernel_ident();
 	printk("-------------------------------------\n");
-	lockdep_print_held_locks(current);
+	printk("%s/%d is exiting with locks still held!\n",
+		curr->comm, task_pid_nr(curr));
+	lockdep_print_held_locks(curr);
+
 	printk("\nstack backtrace:\n");
 	dump_stack();
 }
 
-void debug_check_no_locks_held(void)
+void debug_check_no_locks_held(struct task_struct *task)
 {
-	if (unlikely(current->lockdep_depth > 0))
-		print_held_locks_bug();
+	if (unlikely(task->lockdep_depth > 0))
+		print_held_locks_bug(task);
 }
-EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
 
 void debug_show_all_locks(void)
 {
-- 
cgit v1.2.2


From 07961ac7c0ee8b546658717034fe692fd12eefa9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 31 Mar 2013 15:12:43 -0700
Subject: Linux 3.9-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 54d2b2a0fef0..58a165b02af1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Unicycling Gorilla
 
 # *DOCUMENTATION*
-- 
cgit v1.2.2


From bc0caf099d9df4dd0fad24992b043b40541f4200 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:31 -0700
Subject: workqueue: fix race condition in unbound workqueue free path

8864b4e59 ("workqueue: implement get/put_pwq()") implemented pwq
(pool_workqueue) refcnting which frees workqueue when the last pwq
goes away.  It determined whether it was the last pwq by testing
wq->pwqs is empty.  Unfortunately, the test was done outside wq->mutex
and multiple pwq release could race and try to free wq multiple times
leading to oops.

Test wq->pwqs emptiness while holding wq->mutex.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 04a8b98d30ce..4d344326ae97 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3534,6 +3534,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 						  unbound_release_work);
 	struct workqueue_struct *wq = pwq->wq;
 	struct worker_pool *pool = pwq->pool;
+	bool is_last;
 
 	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
 		return;
@@ -3545,6 +3546,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	 */
 	mutex_lock(&wq->mutex);
 	list_del_rcu(&pwq->pwqs_node);
+	is_last = list_empty(&wq->pwqs);
 	mutex_unlock(&wq->mutex);
 
 	put_unbound_pool(pool);
@@ -3554,7 +3556,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	 * If we're the last pwq going away, @wq is already dead and no one
 	 * is gonna access it anymore.  Free it.
 	 */
-	if (list_empty(&wq->pwqs))
+	if (is_last)
 		kfree(wq);
 }
 
-- 
cgit v1.2.2


From 13e2e556013a543eebd238d1c2759195e3c0c9fc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:31 -0700
Subject: workqueue: fix unbound workqueue attrs hashing / comparison

29c91e9912b ("workqueue: implement attribute-based unbound worker_pool
management") implemented attrs based worker_pool matching.  It tried
to avoid false negative when comparing cpumasks with custom hash
function; unfortunately, the hash and comparison functions fail to
ignore CPUs which are not possible.  It incorrectly assumed that
bitmap_copy() skips leftover bits in the last word of bitmap and
cpumask_equal() ignores impossible CPUs.

This patch updates attrs->cpumask handling such that impossible CPUs
are properly ignored.

* Hash and copy functions no longer do anything special.  They expect
  their callers to clear impossible CPUs.

* alloc_workqueue_attrs() initializes the cpumask to cpu_possible_mask
  instead of setting all bits and explicit cpumask_setall() for
  unbound_std_wq_attrs[] in init_workqueues() is dropped.

* apply_workqueue_attrs() is now responsible for ignoring impossible
  CPUs.  It makes a copy of @attrs and clears impossible CPUs before
  doing anything else.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 54 ++++++++++++++++++++++--------------------------------
 1 file changed, 22 insertions(+), 32 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4d344326ae97..abe1f0da4600 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3302,7 +3302,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
 	if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
 		goto fail;
 
-	cpumask_setall(attrs->cpumask);
+	cpumask_copy(attrs->cpumask, cpu_possible_mask);
 	return attrs;
 fail:
 	free_workqueue_attrs(attrs);
@@ -3316,33 +3316,14 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 	cpumask_copy(to->cpumask, from->cpumask);
 }
 
-/*
- * Hacky implementation of jhash of bitmaps which only considers the
- * specified number of bits.  We probably want a proper implementation in
- * include/linux/jhash.h.
- */
-static u32 jhash_bitmap(const unsigned long *bitmap, int bits, u32 hash)
-{
-	int nr_longs = bits / BITS_PER_LONG;
-	int nr_leftover = bits % BITS_PER_LONG;
-	unsigned long leftover = 0;
-
-	if (nr_longs)
-		hash = jhash(bitmap, nr_longs * sizeof(long), hash);
-	if (nr_leftover) {
-		bitmap_copy(&leftover, bitmap + nr_longs, nr_leftover);
-		hash = jhash(&leftover, sizeof(long), hash);
-	}
-	return hash;
-}
-
 /* hash value of the content of @attr */
 static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
 {
 	u32 hash = 0;
 
 	hash = jhash_1word(attrs->nice, hash);
-	hash = jhash_bitmap(cpumask_bits(attrs->cpumask), nr_cpu_ids, hash);
+	hash = jhash(cpumask_bits(attrs->cpumask),
+		     BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
 	return hash;
 }
 
@@ -3652,7 +3633,8 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs)
 {
-	struct pool_workqueue *pwq, *last_pwq;
+	struct workqueue_attrs *new_attrs;
+	struct pool_workqueue *pwq = NULL, *last_pwq;
 	struct worker_pool *pool;
 
 	/* only unbound workqueues can change attributes */
@@ -3663,15 +3645,21 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
 		return -EINVAL;
 
+	/* make a copy of @attrs and sanitize it */
+	new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!new_attrs)
+		goto enomem;
+
+	copy_workqueue_attrs(new_attrs, attrs);
+	cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
+
 	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
 	if (!pwq)
-		return -ENOMEM;
+		goto enomem;
 
-	pool = get_unbound_pool(attrs);
-	if (!pool) {
-		kmem_cache_free(pwq_cache, pwq);
-		return -ENOMEM;
-	}
+	pool = get_unbound_pool(new_attrs);
+	if (!pool)
+		goto enomem;
 
 	init_and_link_pwq(pwq, wq, pool, &last_pwq);
 	if (last_pwq) {
@@ -3681,6 +3669,11 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	}
 
 	return 0;
+
+enomem:
+	kmem_cache_free(pwq_cache, pwq);
+	free_workqueue_attrs(new_attrs);
+	return -ENOMEM;
 }
 
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
@@ -4450,10 +4443,7 @@ static int __init init_workqueues(void)
 		struct workqueue_attrs *attrs;
 
 		BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
-
 		attrs->nice = std_nice[i];
-		cpumask_setall(attrs->cpumask);
-
 		unbound_std_wq_attrs[i] = attrs;
 	}
 
-- 
cgit v1.2.2


From 4862125b0256a946d2749a1d5003b0604bc3cb4d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:31 -0700
Subject: workqueue: fix memory leak in apply_workqueue_attrs()

apply_workqueue_attrs() wasn't freeing temp attrs variable @new_attrs
in its success path.  Fix it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index abe1f0da4600..89480fc8eaa3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3636,6 +3636,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	struct workqueue_attrs *new_attrs;
 	struct pool_workqueue *pwq = NULL, *last_pwq;
 	struct worker_pool *pool;
+	int ret;
 
 	/* only unbound workqueues can change attributes */
 	if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
@@ -3668,12 +3669,16 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 		spin_unlock_irq(&last_pwq->pool->lock);
 	}
 
-	return 0;
+	ret = 0;
+	/* fall through */
+out_free:
+	free_workqueue_attrs(new_attrs);
+	return ret;
 
 enomem:
 	kmem_cache_free(pwq_cache, pwq);
-	free_workqueue_attrs(new_attrs);
-	return -ENOMEM;
+	ret = -ENOMEM;
+	goto out_free;
 }
 
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
-- 
cgit v1.2.2


From a892cacc7f4960a39c0fad7bbdf04c5cbf7c229e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:32 -0700
Subject: workqueue: move pwq_pool_locking outside of get/put_unbound_pool()

The scheduled NUMA affinity support for unbound workqueues would need
to walk workqueues list and pool related operations on each workqueue.

Move wq_pool_mutex locking out of get/put_unbound_pool() to their
callers so that pool operations can be performed while walking the
workqueues list, which is also protected by wq_pool_mutex.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 89480fc8eaa3..2bf3d8c6e128 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3395,31 +3395,28 @@ static void rcu_free_pool(struct rcu_head *rcu)
  * safe manner.  get_unbound_pool() calls this function on its failure path
  * and this function should be able to release pools which went through,
  * successfully or not, init_worker_pool().
+ *
+ * Should be called with wq_pool_mutex held.
  */
 static void put_unbound_pool(struct worker_pool *pool)
 {
 	struct worker *worker;
 
-	mutex_lock(&wq_pool_mutex);
-	if (--pool->refcnt) {
-		mutex_unlock(&wq_pool_mutex);
+	lockdep_assert_held(&wq_pool_mutex);
+
+	if (--pool->refcnt)
 		return;
-	}
 
 	/* sanity checks */
 	if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) ||
-	    WARN_ON(!list_empty(&pool->worklist))) {
-		mutex_unlock(&wq_pool_mutex);
+	    WARN_ON(!list_empty(&pool->worklist)))
 		return;
-	}
 
 	/* release id and unhash */
 	if (pool->id >= 0)
 		idr_remove(&worker_pool_idr, pool->id);
 	hash_del(&pool->hash_node);
 
-	mutex_unlock(&wq_pool_mutex);
-
 	/*
 	 * Become the manager and destroy all workers.  Grabbing
 	 * manager_arb prevents @pool's workers from blocking on
@@ -3453,13 +3450,15 @@ static void put_unbound_pool(struct worker_pool *pool)
  * reference count and return it.  If there already is a matching
  * worker_pool, it will be used; otherwise, this function attempts to
  * create a new one.  On failure, returns NULL.
+ *
+ * Should be called with wq_pool_mutex held.
  */
 static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 {
 	u32 hash = wqattrs_hash(attrs);
 	struct worker_pool *pool;
 
-	mutex_lock(&wq_pool_mutex);
+	lockdep_assert_held(&wq_pool_mutex);
 
 	/* do we already have a matching pool? */
 	hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
@@ -3490,10 +3489,8 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 	/* install */
 	hash_add(unbound_pool_hash, &pool->hash_node, hash);
 out_unlock:
-	mutex_unlock(&wq_pool_mutex);
 	return pool;
 fail:
-	mutex_unlock(&wq_pool_mutex);
 	if (pool)
 		put_unbound_pool(pool);
 	return NULL;
@@ -3530,7 +3527,10 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	is_last = list_empty(&wq->pwqs);
 	mutex_unlock(&wq->mutex);
 
+	mutex_lock(&wq_pool_mutex);
 	put_unbound_pool(pool);
+	mutex_unlock(&wq_pool_mutex);
+
 	call_rcu_sched(&pwq->rcu, rcu_free_pwq);
 
 	/*
@@ -3654,13 +3654,21 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	copy_workqueue_attrs(new_attrs, attrs);
 	cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
 
+	mutex_lock(&wq_pool_mutex);
+
 	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
-	if (!pwq)
+	if (!pwq) {
+		mutex_unlock(&wq_pool_mutex);
 		goto enomem;
+	}
 
 	pool = get_unbound_pool(new_attrs);
-	if (!pool)
+	if (!pool) {
+		mutex_unlock(&wq_pool_mutex);
 		goto enomem;
+	}
+
+	mutex_unlock(&wq_pool_mutex);
 
 	init_and_link_pwq(pwq, wq, pool, &last_pwq);
 	if (last_pwq) {
-- 
cgit v1.2.2


From bce903809ab3f29eca97e0be5537778c1689c82b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:32 -0700
Subject: workqueue: add wq_numa_tbl_len and wq_numa_possible_cpumask[]

Unbound workqueues are going to be NUMA-affine.  Add wq_numa_tbl_len
and wq_numa_possible_cpumask[] in preparation.  The former is the
highest NUMA node ID + 1 and the latter is masks of possibles CPUs for
each NUMA node.

This patch only introduces these.  Future patches will make use of
them.

v2: NUMA initialization move into wq_numa_init().  Also, the possible
    cpumask array is not created if there aren't multiple nodes on the
    system.  wq_numa_enabled bool added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2bf3d8c6e128..5ca46a2e2616 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -44,6 +44,7 @@
 #include <linux/jhash.h>
 #include <linux/hashtable.h>
 #include <linux/rculist.h>
+#include <linux/nodemask.h>
 
 #include "workqueue_internal.h"
 
@@ -253,6 +254,12 @@ struct workqueue_struct {
 
 static struct kmem_cache *pwq_cache;
 
+static int wq_numa_tbl_len;		/* highest possible NUMA node id + 1 */
+static cpumask_var_t *wq_numa_possible_cpumask;
+					/* possible CPUs of each node */
+
+static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
+
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
@@ -4407,6 +4414,43 @@ out_unlock:
 }
 #endif /* CONFIG_FREEZER */
 
+static void __init wq_numa_init(void)
+{
+	cpumask_var_t *tbl;
+	int node, cpu;
+
+	/* determine NUMA pwq table len - highest node id + 1 */
+	for_each_node(node)
+		wq_numa_tbl_len = max(wq_numa_tbl_len, node + 1);
+
+	if (num_possible_nodes() <= 1)
+		return;
+
+	/*
+	 * We want masks of possible CPUs of each node which isn't readily
+	 * available.  Build one from cpu_to_node() which should have been
+	 * fully initialized by now.
+	 */
+	tbl = kzalloc(wq_numa_tbl_len * sizeof(tbl[0]), GFP_KERNEL);
+	BUG_ON(!tbl);
+
+	for_each_node(node)
+		BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, node));
+
+	for_each_possible_cpu(cpu) {
+		node = cpu_to_node(cpu);
+		if (WARN_ON(node == NUMA_NO_NODE)) {
+			pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
+			/* happens iff arch is bonkers, let's just proceed */
+			return;
+		}
+		cpumask_set_cpu(cpu, tbl[node]);
+	}
+
+	wq_numa_possible_cpumask = tbl;
+	wq_numa_enabled = true;
+}
+
 static int __init init_workqueues(void)
 {
 	int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
@@ -4423,6 +4467,8 @@ static int __init init_workqueues(void)
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
+	wq_numa_init();
+
 	/* initialize CPU pools */
 	for_each_possible_cpu(cpu) {
 		struct worker_pool *pool;
-- 
cgit v1.2.2


From e3c916a4c7f51722785d34d9f9802b70dac3ce93 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:32 -0700
Subject: workqueue: drop 'H' from kworker names of unbound worker pools

Currently, all workqueue workers which have negative nice value has
'H' postfixed to their names.  This is necessary for per-cpu workers
as they use the CPU number instead of pool->id to identify the pool
and the 'H' postfix is the only thing distinguishing normal and
highpri workers.

As workers for unbound pools use pool->id, the 'H' postfix is purely
informational.  TASK_COMM_LEN is 16 and after the static part and
delimiters, there are only five characters left for the pool and
worker IDs.  We're expecting to have more unbound pools with the
scheduled NUMA awareness support.  Let's drop the non-essential 'H'
postfix from unbound kworker name.

While at it, restructure kthread_create*() invocation to help future
NUMA related changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5ca46a2e2616..248d18aa2a5d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1644,9 +1644,10 @@ static struct worker *alloc_worker(void)
  */
 static struct worker *create_worker(struct worker_pool *pool)
 {
-	const char *pri = pool->attrs->nice < 0  ? "H" : "";
 	struct worker *worker = NULL;
+	int node = pool->cpu >= 0 ? cpu_to_node(pool->cpu) : NUMA_NO_NODE;
 	int id = -1;
+	char id_buf[16];
 
 	lockdep_assert_held(&pool->manager_mutex);
 
@@ -1672,13 +1673,13 @@ static struct worker *create_worker(struct worker_pool *pool)
 	worker->id = id;
 
 	if (pool->cpu >= 0)
-		worker->task = kthread_create_on_node(worker_thread,
-					worker, cpu_to_node(pool->cpu),
-					"kworker/%d:%d%s", pool->cpu, id, pri);
+		snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
+			 pool->attrs->nice < 0  ? "H" : "");
 	else
-		worker->task = kthread_create(worker_thread, worker,
-					      "kworker/u%d:%d%s",
-					      pool->id, id, pri);
+		snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
+
+	worker->task = kthread_create_on_node(worker_thread, worker, node,
+					      "kworker/%s", id_buf);
 	if (IS_ERR(worker->task))
 		goto fail;
 
-- 
cgit v1.2.2


From f3f90ad46934202eeefac454fd5d89bf73c6aa34 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:34 -0700
Subject: workqueue: determine NUMA node of workers accourding to the allowed
 cpumask

When worker tasks are created using kthread_create_on_node(),
currently only per-cpu ones have the matching NUMA node specified.
All unbound workers are always created with NUMA_NO_NODE.

Now that an unbound worker pool may have an arbitrary cpumask
associated with it, this isn't optimal.  Add pool->node which is
determined by the pool's cpumask.  If the pool's cpumask is contained
inside a NUMA node proper, the pool is associated with that node, and
all workers of the pool are created on that node.

This currently only makes difference for unbound worker pools with
cpumask contained inside single NUMA node, but this will serve as
foundation for making all unbound pools NUMA-affine.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 248d18aa2a5d..3e18c7b865eb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -138,6 +138,7 @@ enum {
 struct worker_pool {
 	spinlock_t		lock;		/* the pool lock */
 	int			cpu;		/* I: the associated cpu */
+	int			node;		/* I: the associated node ID */
 	int			id;		/* I: pool ID */
 	unsigned int		flags;		/* X: flags */
 
@@ -1645,7 +1646,6 @@ static struct worker *alloc_worker(void)
 static struct worker *create_worker(struct worker_pool *pool)
 {
 	struct worker *worker = NULL;
-	int node = pool->cpu >= 0 ? cpu_to_node(pool->cpu) : NUMA_NO_NODE;
 	int id = -1;
 	char id_buf[16];
 
@@ -1678,7 +1678,7 @@ static struct worker *create_worker(struct worker_pool *pool)
 	else
 		snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
 
-	worker->task = kthread_create_on_node(worker_thread, worker, node,
+	worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
 					      "kworker/%s", id_buf);
 	if (IS_ERR(worker->task))
 		goto fail;
@@ -3360,6 +3360,7 @@ static int init_worker_pool(struct worker_pool *pool)
 	spin_lock_init(&pool->lock);
 	pool->id = -1;
 	pool->cpu = -1;
+	pool->node = NUMA_NO_NODE;
 	pool->flags |= POOL_DISASSOCIATED;
 	INIT_LIST_HEAD(&pool->worklist);
 	INIT_LIST_HEAD(&pool->idle_list);
@@ -3465,6 +3466,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 {
 	u32 hash = wqattrs_hash(attrs);
 	struct worker_pool *pool;
+	int node;
 
 	lockdep_assert_held(&wq_pool_mutex);
 
@@ -3487,6 +3489,17 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
 	copy_workqueue_attrs(pool->attrs, attrs);
 
+	/* if cpumask is contained inside a NUMA node, we belong to that node */
+	if (wq_numa_enabled) {
+		for_each_node(node) {
+			if (cpumask_subset(pool->attrs->cpumask,
+					   wq_numa_possible_cpumask[node])) {
+				pool->node = node;
+				break;
+			}
+		}
+	}
+
 	if (worker_pool_assign_id(pool) < 0)
 		goto fail;
 
@@ -4480,6 +4493,7 @@ static int __init init_workqueues(void)
 			pool->cpu = cpu;
 			cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
 			pool->attrs->nice = std_nice[i++];
+			pool->node = cpu_to_node(cpu);
 
 			/* alloc pool ID */
 			mutex_lock(&wq_pool_mutex);
-- 
cgit v1.2.2


From 6029a91829ad2bd876fed78bc088d3469a9dd777 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:34 -0700
Subject: workqueue: add workqueue->unbound_attrs

Currently, when exposing attrs of an unbound workqueue via sysfs, the
workqueue_attrs of first_pwq() is used as that should equal the
current state of the workqueue.

The planned NUMA affinity support will make unbound workqueues make
use of multiple pool_workqueues for different NUMA nodes and the above
assumption will no longer hold.  Introduce workqueue->unbound_attrs
which records the current attrs in effect and use it for sysfs instead
of first_pwq()->attrs.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3e18c7b865eb..32b474463053 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -244,6 +244,8 @@ struct workqueue_struct {
 	int			nr_drainers;	/* WQ: drain in progress */
 	int			saved_max_active; /* WQ: saved pwq max_active */
 
+	struct workqueue_attrs	*unbound_attrs;	/* WQ: only for unbound wqs */
+
 #ifdef CONFIG_SYSFS
 	struct wq_device	*wq_dev;	/* I: for sysfs interface */
 #endif
@@ -3088,10 +3090,9 @@ static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
 	struct workqueue_struct *wq = dev_to_wq(dev);
 	int written;
 
-	rcu_read_lock_sched();
-	written = scnprintf(buf, PAGE_SIZE, "%d\n",
-			    first_pwq(wq)->pool->attrs->nice);
-	rcu_read_unlock_sched();
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
+	mutex_unlock(&wq->mutex);
 
 	return written;
 }
@@ -3105,9 +3106,9 @@ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
 	if (!attrs)
 		return NULL;
 
-	rcu_read_lock_sched();
-	copy_workqueue_attrs(attrs, first_pwq(wq)->pool->attrs);
-	rcu_read_unlock_sched();
+	mutex_lock(&wq->mutex);
+	copy_workqueue_attrs(attrs, wq->unbound_attrs);
+	mutex_unlock(&wq->mutex);
 	return attrs;
 }
 
@@ -3138,10 +3139,9 @@ static ssize_t wq_cpumask_show(struct device *dev,
 	struct workqueue_struct *wq = dev_to_wq(dev);
 	int written;
 
-	rcu_read_lock_sched();
-	written = cpumask_scnprintf(buf, PAGE_SIZE,
-				    first_pwq(wq)->pool->attrs->cpumask);
-	rcu_read_unlock_sched();
+	mutex_lock(&wq->mutex);
+	written = cpumask_scnprintf(buf, PAGE_SIZE, wq->unbound_attrs->cpumask);
+	mutex_unlock(&wq->mutex);
 
 	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
 	return written;
@@ -3558,8 +3558,10 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	 * If we're the last pwq going away, @wq is already dead and no one
 	 * is gonna access it anymore.  Free it.
 	 */
-	if (is_last)
+	if (is_last) {
+		free_workqueue_attrs(wq->unbound_attrs);
 		kfree(wq);
+	}
 }
 
 /**
@@ -3634,6 +3636,9 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	/* link in @pwq */
 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 
+	if (wq->flags & WQ_UNBOUND)
+		copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
+
 	mutex_unlock(&wq->mutex);
 }
 
@@ -3766,6 +3771,12 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	if (!wq)
 		return NULL;
 
+	if (flags & WQ_UNBOUND) {
+		wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+		if (!wq->unbound_attrs)
+			goto err_free_wq;
+	}
+
 	vsnprintf(wq->name, namelen, fmt, args1);
 	va_end(args);
 	va_end(args1);
@@ -3835,6 +3846,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	return wq;
 
 err_free_wq:
+	free_workqueue_attrs(wq->unbound_attrs);
 	kfree(wq);
 	return NULL;
 err_destroy:
-- 
cgit v1.2.2


From ecf6881ff349ad8670ec53a7586002d20b5f3b2e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:34 -0700
Subject: workqueue: make workqueue->name[] fixed len

Currently workqueue->name[] is of flexible length.  We want to use the
flexible field for something more useful and there isn't much benefit
in allowing arbitrary name length anyway.  Make it fixed len capping
at 24 bytes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 32b474463053..c8c5838c52c9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -101,6 +101,8 @@ enum {
 	 */
 	RESCUER_NICE_LEVEL	= -20,
 	HIGHPRI_NICE_LEVEL	= -20,
+
+	WQ_NAME_LEN		= 24,
 };
 
 /*
@@ -252,7 +254,7 @@ struct workqueue_struct {
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
-	char			name[];		/* I: workqueue name */
+	char			name[WQ_NAME_LEN]; /* I: workqueue name */
 };
 
 static struct kmem_cache *pwq_cache;
@@ -3757,17 +3759,12 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 					       struct lock_class_key *key,
 					       const char *lock_name, ...)
 {
-	va_list args, args1;
+	va_list args;
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
-	size_t namelen;
-
-	/* determine namelen, allocate wq and format name */
-	va_start(args, lock_name);
-	va_copy(args1, args);
-	namelen = vsnprintf(NULL, 0, fmt, args) + 1;
 
-	wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL);
+	/* allocate wq and format name */
+	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
 		return NULL;
 
@@ -3777,9 +3774,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 			goto err_free_wq;
 	}
 
-	vsnprintf(wq->name, namelen, fmt, args1);
+	va_start(args, lock_name);
+	vsnprintf(wq->name, sizeof(wq->name), fmt, args);
 	va_end(args);
-	va_end(args1);
 
 	max_active = max_active ?: WQ_DFL_ACTIVE;
 	max_active = wq_clamp_max_active(max_active, flags, wq->name);
-- 
cgit v1.2.2


From 2728fd2f098c3cc5efaf3f0433855e579d5e4f28 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:35 -0700
Subject: workqueue: move hot fields of workqueue_struct to the end

Move wq->flags and ->cpu_pwqs to the end of workqueue_struct and align
them to the cacheline.  These two fields are used in the work item
issue path and thus hot.  The scheduled NUMA affinity support will add
dispatch table at the end of workqueue_struct and relocating these two
fields will allow us hitting only single cacheline on hot paths.

Note that wq->pwqs isn't moved although it currently is being used in
the work item issue path for unbound workqueues.  The dispatch table
mentioned above will replace its use in the issue path, so it will
become cold once NUMA support is implemented.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c8c5838c52c9..4c53fa216732 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -227,8 +227,6 @@ struct wq_device;
  * the appropriate worker_pool through its pool_workqueues.
  */
 struct workqueue_struct {
-	unsigned int		flags;		/* WQ: WQ_* flags */
-	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
 	struct list_head	pwqs;		/* WR: all pwqs of this wq */
 	struct list_head	list;		/* PL: list of all workqueues */
 
@@ -255,6 +253,10 @@ struct workqueue_struct {
 	struct lockdep_map	lockdep_map;
 #endif
 	char			name[WQ_NAME_LEN]; /* I: workqueue name */
+
+	/* hot fields used during command issue, aligned to cacheline */
+	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
+	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
 };
 
 static struct kmem_cache *pwq_cache;
-- 
cgit v1.2.2


From df2d5ae4995b3fb9392b6089b9623d20b6c3a542 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:35 -0700
Subject: workqueue: map an unbound workqueues to multiple per-node
 pool_workqueues

Currently, an unbound workqueue has only one "current" pool_workqueue
associated with it.  It may have multple pool_workqueues but only the
first pool_workqueue servies new work items.  For NUMA affinity, we
want to change this so that there are multiple current pool_workqueues
serving different NUMA nodes.

Introduce workqueue->numa_pwq_tbl[] which is indexed by NUMA node and
points to the pool_workqueue to use for each possible node.  This
replaces first_pwq() in __queue_work() and workqueue_congested().

numa_pwq_tbl[] is currently initialized to point to the same
pool_workqueue as first_pwq() so this patch doesn't make any behavior
changes.

v2: Use rcu_dereference_raw() in unbound_pwq_by_node() as the function
    may be called only with wq->mutex held.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 48 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4c53fa216732..170226a24da8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -257,6 +257,7 @@ struct workqueue_struct {
 	/* hot fields used during command issue, aligned to cacheline */
 	unsigned int		flags ____cacheline_aligned; /* WQ: WQ_* flags */
 	struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
+	struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
 };
 
 static struct kmem_cache *pwq_cache;
@@ -525,6 +526,22 @@ static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
 				      pwqs_node);
 }
 
+/**
+ * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
+ * @wq: the target workqueue
+ * @node: the node ID
+ *
+ * This must be called either with pwq_lock held or sched RCU read locked.
+ * If the pwq needs to be used beyond the locking in effect, the caller is
+ * responsible for guaranteeing that the pwq stays online.
+ */
+static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
+						  int node)
+{
+	assert_rcu_or_wq_mutex(wq);
+	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
+}
+
 static unsigned int work_color_to_flags(int color)
 {
 	return color << WORK_STRUCT_COLOR_SHIFT;
@@ -1278,14 +1295,14 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 retry:
+	if (req_cpu == WORK_CPU_UNBOUND)
+		cpu = raw_smp_processor_id();
+
 	/* pwq which will be used unless @work is executing elsewhere */
-	if (!(wq->flags & WQ_UNBOUND)) {
-		if (cpu == WORK_CPU_UNBOUND)
-			cpu = raw_smp_processor_id();
+	if (!(wq->flags & WQ_UNBOUND))
 		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
-	} else {
-		pwq = first_pwq(wq);
-	}
+	else
+		pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
 
 	/*
 	 * If @work was previously on a different pool, it might still be
@@ -1315,8 +1332,8 @@ retry:
 	 * pwq is determined and locked.  For unbound pools, we could have
 	 * raced with pwq release and it could already be dead.  If its
 	 * refcnt is zero, repeat pwq selection.  Note that pwqs never die
-	 * without another pwq replacing it as the first pwq or while a
-	 * work item is executing on it, so the retying is guaranteed to
+	 * without another pwq replacing it in the numa_pwq_tbl or while
+	 * work items are executing on it, so the retrying is guaranteed to
 	 * make forward-progress.
 	 */
 	if (unlikely(!pwq->refcnt)) {
@@ -3614,6 +3631,8 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 			      struct worker_pool *pool,
 			      struct pool_workqueue **p_last_pwq)
 {
+	int node;
+
 	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 
 	pwq->pool = pool;
@@ -3640,8 +3659,11 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	/* link in @pwq */
 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 
-	if (wq->flags & WQ_UNBOUND)
+	if (wq->flags & WQ_UNBOUND) {
 		copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
+		for_each_node(node)
+			rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
+	}
 
 	mutex_unlock(&wq->mutex);
 }
@@ -3761,12 +3783,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 					       struct lock_class_key *key,
 					       const char *lock_name, ...)
 {
+	size_t tbl_size = 0;
 	va_list args;
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
 
 	/* allocate wq and format name */
-	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+	if (flags & WQ_UNBOUND)
+		tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
+
+	wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
 	if (!wq)
 		return NULL;
 
@@ -3994,7 +4020,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 	if (!(wq->flags & WQ_UNBOUND))
 		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
 	else
-		pwq = first_pwq(wq);
+		pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
 
 	ret = !list_empty(&pwq->delayed_works);
 	rcu_read_unlock_sched();
-- 
cgit v1.2.2


From f147f29eb7c4959e5f8be604ce2d23979c86378c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:35 -0700
Subject: workqueue: break init_and_link_pwq() into two functions and introduce
 alloc_unbound_pwq()

Break init_and_link_pwq() into init_pwq() and link_pwq() and move
unbound-workqueue specific handling into apply_workqueue_attrs().
Also, factor out unbound pool and pool_workqueue allocation into
alloc_unbound_pwq().

This reorganization is to prepare for NUMA affinity and doesn't
introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 78 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 29 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 170226a24da8..c8d047b6c895 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3626,13 +3626,10 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 	spin_unlock_irq(&pwq->pool->lock);
 }
 
-static void init_and_link_pwq(struct pool_workqueue *pwq,
-			      struct workqueue_struct *wq,
-			      struct worker_pool *pool,
-			      struct pool_workqueue **p_last_pwq)
+/* initialize newly zalloced @pwq which is associated with @wq and @pool */
+static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
+		     struct worker_pool *pool)
 {
-	int node;
-
 	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 
 	pwq->pool = pool;
@@ -3642,8 +3639,15 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	INIT_LIST_HEAD(&pwq->delayed_works);
 	INIT_LIST_HEAD(&pwq->mayday_node);
 	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
+}
 
-	mutex_lock(&wq->mutex);
+/* sync @pwq with the current state of its associated wq and link it */
+static void link_pwq(struct pool_workqueue *pwq,
+		     struct pool_workqueue **p_last_pwq)
+{
+	struct workqueue_struct *wq = pwq->wq;
+
+	lockdep_assert_held(&wq->mutex);
 
 	/*
 	 * Set the matching work_color.  This is synchronized with
@@ -3658,14 +3662,29 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 
 	/* link in @pwq */
 	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
+}
 
-	if (wq->flags & WQ_UNBOUND) {
-		copy_workqueue_attrs(wq->unbound_attrs, pool->attrs);
-		for_each_node(node)
-			rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
+/* obtain a pool matching @attr and create a pwq associating the pool and @wq */
+static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
+					const struct workqueue_attrs *attrs)
+{
+	struct worker_pool *pool;
+	struct pool_workqueue *pwq;
+
+	lockdep_assert_held(&wq_pool_mutex);
+
+	pool = get_unbound_pool(attrs);
+	if (!pool)
+		return NULL;
+
+	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
+	if (!pwq) {
+		put_unbound_pool(pool);
+		return NULL;
 	}
 
-	mutex_unlock(&wq->mutex);
+	init_pwq(pwq, wq, pool);
+	return pwq;
 }
 
 /**
@@ -3686,9 +3705,8 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs)
 {
 	struct workqueue_attrs *new_attrs;
-	struct pool_workqueue *pwq = NULL, *last_pwq;
-	struct worker_pool *pool;
-	int ret;
+	struct pool_workqueue *pwq, *last_pwq;
+	int node, ret;
 
 	/* only unbound workqueues can change attributes */
 	if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
@@ -3707,22 +3725,21 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
 
 	mutex_lock(&wq_pool_mutex);
-
-	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
-	if (!pwq) {
-		mutex_unlock(&wq_pool_mutex);
+	pwq = alloc_unbound_pwq(wq, new_attrs);
+	mutex_unlock(&wq_pool_mutex);
+	if (!pwq)
 		goto enomem;
-	}
 
-	pool = get_unbound_pool(new_attrs);
-	if (!pool) {
-		mutex_unlock(&wq_pool_mutex);
-		goto enomem;
-	}
+	mutex_lock(&wq->mutex);
 
-	mutex_unlock(&wq_pool_mutex);
+	link_pwq(pwq, &last_pwq);
+
+	copy_workqueue_attrs(wq->unbound_attrs, new_attrs);
+	for_each_node(node)
+		rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
+
+	mutex_unlock(&wq->mutex);
 
-	init_and_link_pwq(pwq, wq, pool, &last_pwq);
 	if (last_pwq) {
 		spin_lock_irq(&last_pwq->pool->lock);
 		put_pwq(last_pwq);
@@ -3736,7 +3753,6 @@ out_free:
 	return ret;
 
 enomem:
-	kmem_cache_free(pwq_cache, pwq);
 	ret = -ENOMEM;
 	goto out_free;
 }
@@ -3757,7 +3773,11 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			struct worker_pool *cpu_pools =
 				per_cpu(cpu_worker_pools, cpu);
 
-			init_and_link_pwq(pwq, wq, &cpu_pools[highpri], NULL);
+			init_pwq(pwq, wq, &cpu_pools[highpri]);
+
+			mutex_lock(&wq->mutex);
+			link_pwq(pwq, NULL);
+			mutex_unlock(&wq->mutex);
 		}
 		return 0;
 	} else {
-- 
cgit v1.2.2


From e50aba9aea63b7617887b4d9694184f478731c82 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:35 -0700
Subject: workqueue: use NUMA-aware allocation for pool_workqueues

Use kmem_cache_alloc_node() with @pool->node instead of
kmem_cache_zalloc() when allocating a pool_workqueue so that it's
allocated on the same node as the associated worker_pool.  As there's
no no kmem_cache_zalloc_node(), move zeroing to init_pwq().

This was suggested by Lai Jiangshan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c8d047b6c895..07ec57459457 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3626,12 +3626,14 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 	spin_unlock_irq(&pwq->pool->lock);
 }
 
-/* initialize newly zalloced @pwq which is associated with @wq and @pool */
+/* initialize newly alloced @pwq which is associated with @wq and @pool */
 static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
 		     struct worker_pool *pool)
 {
 	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 
+	memset(pwq, 0, sizeof(*pwq));
+
 	pwq->pool = pool;
 	pwq->wq = wq;
 	pwq->flush_color = -1;
@@ -3677,7 +3679,7 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
 	if (!pool)
 		return NULL;
 
-	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
+	pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
 	if (!pwq) {
 		put_unbound_pool(pool);
 		return NULL;
-- 
cgit v1.2.2


From 1befcf3073fa083e7dc48c384ce06f3bd900f514 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:35 -0700
Subject: workqueue: introduce numa_pwq_tbl_install()

Factor out pool_workqueue linking and installation into numa_pwq_tbl[]
from apply_workqueue_attrs() into numa_pwq_tbl_install().  link_pwq()
is made safe to call multiple times.  numa_pwq_tbl_install() links the
pwq, installs it into numa_pwq_tbl[] at the specified node and returns
the old entry.

@last_pwq is removed from link_pwq() as the return value of the new
function can be used instead.

This is to prepare for NUMA affinity support for unbound workqueues.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 07ec57459457..3825c14304e1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3639,24 +3639,26 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
 	pwq->flush_color = -1;
 	pwq->refcnt = 1;
 	INIT_LIST_HEAD(&pwq->delayed_works);
+	INIT_LIST_HEAD(&pwq->pwqs_node);
 	INIT_LIST_HEAD(&pwq->mayday_node);
 	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 }
 
 /* sync @pwq with the current state of its associated wq and link it */
-static void link_pwq(struct pool_workqueue *pwq,
-		     struct pool_workqueue **p_last_pwq)
+static void link_pwq(struct pool_workqueue *pwq)
 {
 	struct workqueue_struct *wq = pwq->wq;
 
 	lockdep_assert_held(&wq->mutex);
 
+	/* may be called multiple times, ignore if already linked */
+	if (!list_empty(&pwq->pwqs_node))
+		return;
+
 	/*
 	 * Set the matching work_color.  This is synchronized with
 	 * wq->mutex to avoid confusing flush_workqueue().
 	 */
-	if (p_last_pwq)
-		*p_last_pwq = first_pwq(wq);
 	pwq->work_color = wq->work_color;
 
 	/* sync max_active to the current setting */
@@ -3689,6 +3691,23 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
 	return pwq;
 }
 
+/* install @pwq into @wq's numa_pwq_tbl[] for @node and return the old pwq */
+static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
+						   int node,
+						   struct pool_workqueue *pwq)
+{
+	struct pool_workqueue *old_pwq;
+
+	lockdep_assert_held(&wq->mutex);
+
+	/* link_pwq() can handle duplicate calls */
+	link_pwq(pwq);
+
+	old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
+	rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
+	return old_pwq;
+}
+
 /**
  * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
  * @wq: the target workqueue
@@ -3707,7 +3726,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs)
 {
 	struct workqueue_attrs *new_attrs;
-	struct pool_workqueue *pwq, *last_pwq;
+	struct pool_workqueue *pwq, *last_pwq = NULL;
 	int node, ret;
 
 	/* only unbound workqueues can change attributes */
@@ -3734,11 +3753,9 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 
 	mutex_lock(&wq->mutex);
 
-	link_pwq(pwq, &last_pwq);
-
 	copy_workqueue_attrs(wq->unbound_attrs, new_attrs);
 	for_each_node(node)
-		rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
+		last_pwq = numa_pwq_tbl_install(wq, node, pwq);
 
 	mutex_unlock(&wq->mutex);
 
@@ -3778,7 +3795,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			init_pwq(pwq, wq, &cpu_pools[highpri]);
 
 			mutex_lock(&wq->mutex);
-			link_pwq(pwq, NULL);
+			link_pwq(pwq);
 			mutex_unlock(&wq->mutex);
 		}
 		return 0;
-- 
cgit v1.2.2


From dce90d47c4288c7d3c1988bebb059ea7451d5fd5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:35 -0700
Subject: workqueue: introduce put_pwq_unlocked()

Factor out lock pool, put_pwq(), unlock sequence into
put_pwq_unlocked().  The two existing places are converted and there
will be more with NUMA affinity support.

This is to prepare for NUMA affinity support for unbound workqueues
and doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3825c14304e1..d9a4aeb844d5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1057,6 +1057,25 @@ static void put_pwq(struct pool_workqueue *pwq)
 	schedule_work(&pwq->unbound_release_work);
 }
 
+/**
+ * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
+ * @pwq: pool_workqueue to put (can be %NULL)
+ *
+ * put_pwq() with locking.  This function also allows %NULL @pwq.
+ */
+static void put_pwq_unlocked(struct pool_workqueue *pwq)
+{
+	if (pwq) {
+		/*
+		 * As both pwqs and pools are sched-RCU protected, the
+		 * following lock operations are safe.
+		 */
+		spin_lock_irq(&pwq->pool->lock);
+		put_pwq(pwq);
+		spin_unlock_irq(&pwq->pool->lock);
+	}
+}
+
 static void pwq_activate_delayed_work(struct work_struct *work)
 {
 	struct pool_workqueue *pwq = get_work_pwq(work);
@@ -3759,12 +3778,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 
 	mutex_unlock(&wq->mutex);
 
-	if (last_pwq) {
-		spin_lock_irq(&last_pwq->pool->lock);
-		put_pwq(last_pwq);
-		spin_unlock_irq(&last_pwq->pool->lock);
-	}
-
+	put_pwq_unlocked(last_pwq);
 	ret = 0;
 	/* fall through */
 out_free:
@@ -3979,16 +3993,12 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	} else {
 		/*
 		 * We're the sole accessor of @wq at this point.  Directly
-		 * access the first pwq and put the base ref.  As both pwqs
-		 * and pools are sched-RCU protected, the lock operations
-		 * are safe.  @wq will be freed when the last pwq is
-		 * released.
+		 * access the first pwq and put the base ref.  @wq will be
+		 * freed when the last pwq is released.
 		 */
 		pwq = list_first_entry(&wq->pwqs, struct pool_workqueue,
 				       pwqs_node);
-		spin_lock_irq(&pwq->pool->lock);
-		put_pwq(pwq);
-		spin_unlock_irq(&pwq->pool->lock);
+		put_pwq_unlocked(pwq);
 	}
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
-- 
cgit v1.2.2


From 4c16bd327c74d6678858706211a0c6e4e53eb3e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:36 -0700
Subject: workqueue: implement NUMA affinity for unbound workqueues

Currently, an unbound workqueue has single current, or first, pwq
(pool_workqueue) to which all new work items are queued.  This often
isn't optimal on NUMA machines as workers may jump around across node
boundaries and work items get assigned to workers without any regard
to NUMA affinity.

This patch implements NUMA affinity for unbound workqueues.  Instead
of mapping all entries of numa_pwq_tbl[] to the same pwq,
apply_workqueue_attrs() now creates a separate pwq covering the
intersecting CPUs for each NUMA node which has online CPUs in
@attrs->cpumask.  Nodes which don't have intersecting possible CPUs
are mapped to pwqs covering whole @attrs->cpumask.

As CPUs come up and go down, the pool association is changed
accordingly.  Changing pool association may involve allocating new
pools which may fail.  To avoid failing CPU_DOWN, each workqueue
always keeps a default pwq which covers whole attrs->cpumask which is
used as fallback if pool creation fails during a CPU hotplug
operation.

This ensures that all work items issued on a NUMA node is executed on
the same node as long as the workqueue allows execution on the CPUs of
the node.

As this maps a workqueue to multiple pwqs and max_active is per-pwq,
this change the behavior of max_active.  The limit is now per NUMA
node instead of global.  While this is an actual change, max_active is
already per-cpu for per-cpu workqueues and primarily used as safety
mechanism rather than for active concurrency control.  Concurrency is
usually limited from workqueue users by the number of concurrently
active work items and this change shouldn't matter much.

v2: Fixed pwq freeing in apply_workqueue_attrs() error path.  Spotted
    by Lai.

v3: The previous version incorrectly made a workqueue spanning
    multiple nodes spread work items over all online CPUs when some of
    its nodes don't have any desired cpus.  Reimplemented so that NUMA
    affinity is properly updated as CPUs go up and down.  This problem
    was spotted by Lai Jiangshan.

v4: destroy_workqueue() was putting wq->dfl_pwq and then clearing it;
    however, wq may be freed at any time after dfl_pwq is put making
    the clearing use-after-free.  Clear wq->dfl_pwq before putting it.

v5: apply_workqueue_attrs() was leaking @tmp_attrs, @new_attrs and
    @pwq_tbl after success.  Fixed.

    Retry loop in wq_update_unbound_numa_attrs() isn't necessary as
    application of new attrs is excluded via CPU hotplug.  Removed.

    Documentation on CPU affinity guarantee on CPU_DOWN added.

    All changes are suggested by Lai Jiangshan.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 278 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 259 insertions(+), 19 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d9a4aeb844d5..57cd77de4a4f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -45,6 +45,7 @@
 #include <linux/hashtable.h>
 #include <linux/rculist.h>
 #include <linux/nodemask.h>
+#include <linux/moduleparam.h>
 
 #include "workqueue_internal.h"
 
@@ -245,6 +246,7 @@ struct workqueue_struct {
 	int			saved_max_active; /* WQ: saved pwq max_active */
 
 	struct workqueue_attrs	*unbound_attrs;	/* WQ: only for unbound wqs */
+	struct pool_workqueue	*dfl_pwq;	/* WQ: only for unbound wqs */
 
 #ifdef CONFIG_SYSFS
 	struct wq_device	*wq_dev;	/* I: for sysfs interface */
@@ -268,6 +270,9 @@ static cpumask_var_t *wq_numa_possible_cpumask;
 
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
+/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
+static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
+
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
@@ -3710,6 +3715,61 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
 	return pwq;
 }
 
+/* undo alloc_unbound_pwq(), used only in the error path */
+static void free_unbound_pwq(struct pool_workqueue *pwq)
+{
+	lockdep_assert_held(&wq_pool_mutex);
+
+	if (pwq) {
+		put_unbound_pool(pwq->pool);
+		kfree(pwq);
+	}
+}
+
+/**
+ * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
+ * @attrs: the wq_attrs of interest
+ * @node: the target NUMA node
+ * @cpu_going_down: if >= 0, the CPU to consider as offline
+ * @cpumask: outarg, the resulting cpumask
+ *
+ * Calculate the cpumask a workqueue with @attrs should use on @node.  If
+ * @cpu_going_down is >= 0, that cpu is considered offline during
+ * calculation.  The result is stored in @cpumask.  This function returns
+ * %true if the resulting @cpumask is different from @attrs->cpumask,
+ * %false if equal.
+ *
+ * If NUMA affinity is not enabled, @attrs->cpumask is always used.  If
+ * enabled and @node has online CPUs requested by @attrs, the returned
+ * cpumask is the intersection of the possible CPUs of @node and
+ * @attrs->cpumask.
+ *
+ * The caller is responsible for ensuring that the cpumask of @node stays
+ * stable.
+ */
+static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
+				 int cpu_going_down, cpumask_t *cpumask)
+{
+	if (!wq_numa_enabled)
+		goto use_dfl;
+
+	/* does @node have any online CPUs @attrs wants? */
+	cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
+	if (cpu_going_down >= 0)
+		cpumask_clear_cpu(cpu_going_down, cpumask);
+
+	if (cpumask_empty(cpumask))
+		goto use_dfl;
+
+	/* yeap, return possible CPUs in @node that @attrs wants */
+	cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
+	return !cpumask_equal(cpumask, attrs->cpumask);
+
+use_dfl:
+	cpumask_copy(cpumask, attrs->cpumask);
+	return false;
+}
+
 /* install @pwq into @wq's numa_pwq_tbl[] for @node and return the old pwq */
 static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
 						   int node,
@@ -3732,11 +3792,12 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
  * @wq: the target workqueue
  * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
  *
- * Apply @attrs to an unbound workqueue @wq.  If @attrs doesn't match the
- * current attributes, a new pwq is created and made the first pwq which
- * will serve all new work items.  Older pwqs are released as in-flight
- * work items finish.  Note that a work item which repeatedly requeues
- * itself back-to-back will stay on its current pwq.
+ * Apply @attrs to an unbound workqueue @wq.  Unless disabled, on NUMA
+ * machines, this function maps a separate pwq to each NUMA node with
+ * possibles CPUs in @attrs->cpumask so that work items are affine to the
+ * NUMA node it was issued on.  Older pwqs are released as in-flight work
+ * items finish.  Note that a work item which repeatedly requeues itself
+ * back-to-back will stay on its current pwq.
  *
  * Performs GFP_KERNEL allocations.  Returns 0 on success and -errno on
  * failure.
@@ -3744,8 +3805,8 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
 int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs)
 {
-	struct workqueue_attrs *new_attrs;
-	struct pool_workqueue *pwq, *last_pwq = NULL;
+	struct workqueue_attrs *new_attrs, *tmp_attrs;
+	struct pool_workqueue **pwq_tbl, *dfl_pwq;
 	int node, ret;
 
 	/* only unbound workqueues can change attributes */
@@ -3756,40 +3817,191 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
 		return -EINVAL;
 
-	/* make a copy of @attrs and sanitize it */
+	pwq_tbl = kzalloc(wq_numa_tbl_len * sizeof(pwq_tbl[0]), GFP_KERNEL);
 	new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
-	if (!new_attrs)
+	tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!pwq_tbl || !new_attrs || !tmp_attrs)
 		goto enomem;
 
+	/* make a copy of @attrs and sanitize it */
 	copy_workqueue_attrs(new_attrs, attrs);
 	cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
 
+	/*
+	 * We may create multiple pwqs with differing cpumasks.  Make a
+	 * copy of @new_attrs which will be modified and used to obtain
+	 * pools.
+	 */
+	copy_workqueue_attrs(tmp_attrs, new_attrs);
+
+	/*
+	 * CPUs should stay stable across pwq creations and installations.
+	 * Pin CPUs, determine the target cpumask for each node and create
+	 * pwqs accordingly.
+	 */
+	get_online_cpus();
+
 	mutex_lock(&wq_pool_mutex);
-	pwq = alloc_unbound_pwq(wq, new_attrs);
+
+	/*
+	 * If something goes wrong during CPU up/down, we'll fall back to
+	 * the default pwq covering whole @attrs->cpumask.  Always create
+	 * it even if we don't use it immediately.
+	 */
+	dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
+	if (!dfl_pwq)
+		goto enomem_pwq;
+
+	for_each_node(node) {
+		if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
+			pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
+			if (!pwq_tbl[node])
+				goto enomem_pwq;
+		} else {
+			dfl_pwq->refcnt++;
+			pwq_tbl[node] = dfl_pwq;
+		}
+	}
+
 	mutex_unlock(&wq_pool_mutex);
-	if (!pwq)
-		goto enomem;
 
+	/* all pwqs have been created successfully, let's install'em */
 	mutex_lock(&wq->mutex);
 
 	copy_workqueue_attrs(wq->unbound_attrs, new_attrs);
+
+	/* save the previous pwq and install the new one */
 	for_each_node(node)
-		last_pwq = numa_pwq_tbl_install(wq, node, pwq);
+		pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]);
+
+	/* @dfl_pwq might not have been used, ensure it's linked */
+	link_pwq(dfl_pwq);
+	swap(wq->dfl_pwq, dfl_pwq);
 
 	mutex_unlock(&wq->mutex);
 
-	put_pwq_unlocked(last_pwq);
+	/* put the old pwqs */
+	for_each_node(node)
+		put_pwq_unlocked(pwq_tbl[node]);
+	put_pwq_unlocked(dfl_pwq);
+
+	put_online_cpus();
 	ret = 0;
 	/* fall through */
 out_free:
+	free_workqueue_attrs(tmp_attrs);
 	free_workqueue_attrs(new_attrs);
+	kfree(pwq_tbl);
 	return ret;
 
+enomem_pwq:
+	free_unbound_pwq(dfl_pwq);
+	for_each_node(node)
+		if (pwq_tbl && pwq_tbl[node] != dfl_pwq)
+			free_unbound_pwq(pwq_tbl[node]);
+	mutex_unlock(&wq_pool_mutex);
+	put_online_cpus();
 enomem:
 	ret = -ENOMEM;
 	goto out_free;
 }
 
+/**
+ * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
+ * @wq: the target workqueue
+ * @cpu: the CPU coming up or going down
+ * @online: whether @cpu is coming up or going down
+ *
+ * This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and
+ * %CPU_DOWN_FAILED.  @cpu is being hot[un]plugged, update NUMA affinity of
+ * @wq accordingly.
+ *
+ * If NUMA affinity can't be adjusted due to memory allocation failure, it
+ * falls back to @wq->dfl_pwq which may not be optimal but is always
+ * correct.
+ *
+ * Note that when the last allowed CPU of a NUMA node goes offline for a
+ * workqueue with a cpumask spanning multiple nodes, the workers which were
+ * already executing the work items for the workqueue will lose their CPU
+ * affinity and may execute on any CPU.  This is similar to how per-cpu
+ * workqueues behave on CPU_DOWN.  If a workqueue user wants strict
+ * affinity, it's the user's responsibility to flush the work item from
+ * CPU_DOWN_PREPARE.
+ */
+static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
+				   bool online)
+{
+	int node = cpu_to_node(cpu);
+	int cpu_off = online ? -1 : cpu;
+	struct pool_workqueue *old_pwq = NULL, *pwq;
+	struct workqueue_attrs *target_attrs;
+	cpumask_t *cpumask;
+
+	lockdep_assert_held(&wq_pool_mutex);
+
+	if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND))
+		return;
+
+	/*
+	 * We don't wanna alloc/free wq_attrs for each wq for each CPU.
+	 * Let's use a preallocated one.  The following buf is protected by
+	 * CPU hotplug exclusion.
+	 */
+	target_attrs = wq_update_unbound_numa_attrs_buf;
+	cpumask = target_attrs->cpumask;
+
+	mutex_lock(&wq->mutex);
+
+	copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
+	pwq = unbound_pwq_by_node(wq, node);
+
+	/*
+	 * Let's determine what needs to be done.  If the target cpumask is
+	 * different from wq's, we need to compare it to @pwq's and create
+	 * a new one if they don't match.  If the target cpumask equals
+	 * wq's, the default pwq should be used.  If @pwq is already the
+	 * default one, nothing to do; otherwise, install the default one.
+	 */
+	if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
+		if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
+			goto out_unlock;
+	} else {
+		if (pwq == wq->dfl_pwq)
+			goto out_unlock;
+		else
+			goto use_dfl_pwq;
+	}
+
+	mutex_unlock(&wq->mutex);
+
+	/* create a new pwq */
+	pwq = alloc_unbound_pwq(wq, target_attrs);
+	if (!pwq) {
+		pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
+			   wq->name);
+		goto out_unlock;
+	}
+
+	/*
+	 * Install the new pwq.  As this function is called only from CPU
+	 * hotplug callbacks and applying a new attrs is wrapped with
+	 * get/put_online_cpus(), @wq->unbound_attrs couldn't have changed
+	 * inbetween.
+	 */
+	mutex_lock(&wq->mutex);
+	old_pwq = numa_pwq_tbl_install(wq, node, pwq);
+	goto out_unlock;
+
+use_dfl_pwq:
+	spin_lock_irq(&wq->dfl_pwq->pool->lock);
+	get_pwq(wq->dfl_pwq);
+	spin_unlock_irq(&wq->dfl_pwq->pool->lock);
+	old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
+out_unlock:
+	mutex_unlock(&wq->mutex);
+	put_pwq_unlocked(old_pwq);
+}
+
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
 	bool highpri = wq->flags & WQ_HIGHPRI;
@@ -3942,6 +4154,7 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
 void destroy_workqueue(struct workqueue_struct *wq)
 {
 	struct pool_workqueue *pwq;
+	int node;
 
 	/* drain it before proceeding with destruction */
 	drain_workqueue(wq);
@@ -3993,11 +4206,21 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	} else {
 		/*
 		 * We're the sole accessor of @wq at this point.  Directly
-		 * access the first pwq and put the base ref.  @wq will be
-		 * freed when the last pwq is released.
+		 * access numa_pwq_tbl[] and dfl_pwq to put the base refs.
+		 * @wq will be freed when the last pwq is released.
 		 */
-		pwq = list_first_entry(&wq->pwqs, struct pool_workqueue,
-				       pwqs_node);
+		for_each_node(node) {
+			pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
+			RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
+			put_pwq_unlocked(pwq);
+		}
+
+		/*
+		 * Put dfl_pwq.  @wq may be freed any time after dfl_pwq is
+		 * put.  Don't access it afterwards.
+		 */
+		pwq = wq->dfl_pwq;
+		wq->dfl_pwq = NULL;
 		put_pwq_unlocked(pwq);
 	}
 }
@@ -4285,6 +4508,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 {
 	int cpu = (unsigned long)hcpu;
 	struct worker_pool *pool;
+	struct workqueue_struct *wq;
 	int pi;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
@@ -4317,6 +4541,10 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 			mutex_unlock(&pool->manager_mutex);
 		}
 
+		/* update NUMA affinity of unbound workqueues */
+		list_for_each_entry(wq, &workqueues, list)
+			wq_update_unbound_numa(wq, cpu, true);
+
 		mutex_unlock(&wq_pool_mutex);
 		break;
 	}
@@ -4333,12 +4561,21 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 {
 	int cpu = (unsigned long)hcpu;
 	struct work_struct unbind_work;
+	struct workqueue_struct *wq;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DOWN_PREPARE:
-		/* unbinding should happen on the local CPU */
+		/* unbinding per-cpu workers should happen on the local CPU */
 		INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
 		queue_work_on(cpu, system_highpri_wq, &unbind_work);
+
+		/* update NUMA affinity of unbound workqueues */
+		mutex_lock(&wq_pool_mutex);
+		list_for_each_entry(wq, &workqueues, list)
+			wq_update_unbound_numa(wq, cpu, false);
+		mutex_unlock(&wq_pool_mutex);
+
+		/* wait for per-cpu unbinding to finish */
 		flush_work(&unbind_work);
 		break;
 	}
@@ -4526,6 +4763,9 @@ static void __init wq_numa_init(void)
 	if (num_possible_nodes() <= 1)
 		return;
 
+	wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
+	BUG_ON(!wq_update_unbound_numa_attrs_buf);
+
 	/*
 	 * We want masks of possible CPUs of each node which isn't readily
 	 * available.  Build one from cpu_to_node() which should have been
-- 
cgit v1.2.2


From d55262c4d164759a8debe772da6c9b16059dec47 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:38 -0700
Subject: workqueue: update sysfs interface to reflect NUMA awareness and a
 kernel param to disable NUMA affinity

Unbound workqueues are now NUMA aware.  Let's add some control knobs
and update sysfs interface accordingly.

* Add kernel param workqueue.numa_disable which disables NUMA affinity
  globally.

* Replace sysfs file "pool_id" with "pool_ids" which contain
  node:pool_id pairs.  This change is userland-visible but "pool_id"
  hasn't seen a release yet, so this is okay.

* Add a new sysf files "numa" which can toggle NUMA affinity on
  individual workqueues.  This is implemented as attrs->no_numa whichn
  is special in that it isn't part of a pool's attributes.  It only
  affects how apply_workqueue_attrs() picks which pools to use.

After "pool_ids" change, first_pwq() doesn't have any user left.
Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 Documentation/kernel-parameters.txt |  9 ++++
 include/linux/workqueue.h           |  5 +++
 kernel/workqueue.c                  | 82 ++++++++++++++++++++++++++-----------
 3 files changed, 73 insertions(+), 23 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4609e81dbc37..c75ea0b8ec59 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3222,6 +3222,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			or other driver-specific files in the
 			Documentation/watchdog/ directory.
 
+	workqueue.disable_numa
+			By default, all work items queued to unbound
+			workqueues are affine to the NUMA nodes they're
+			issued on, which results in better behavior in
+			general.  If NUMA affinity needs to be disabled for
+			whatever reason, this option can be used.  Note
+			that this also can be controlled per-workqueue for
+			workqueues visible under /sys/bus/workqueue/.
+
 	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
 			default x2apic cluster mode on platforms
 			supporting x2apic.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 835d12b76960..717975639378 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -119,10 +119,15 @@ struct delayed_work {
 /*
  * A struct for workqueue attributes.  This can be used to change
  * attributes of an unbound workqueue.
+ *
+ * Unlike other fields, ->no_numa isn't a property of a worker_pool.  It
+ * only modifies how apply_workqueue_attrs() select pools and thus doesn't
+ * participate in pool hash calculations or equality comparisons.
  */
 struct workqueue_attrs {
 	int			nice;		/* nice level */
 	cpumask_var_t		cpumask;	/* allowed CPUs */
+	bool			no_numa;	/* disable NUMA affinity */
 };
 
 static inline struct delayed_work *to_delayed_work(struct work_struct *work)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 57cd77de4a4f..729ac6a44860 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -268,6 +268,9 @@ static int wq_numa_tbl_len;		/* highest possible NUMA node id + 1 */
 static cpumask_var_t *wq_numa_possible_cpumask;
 					/* possible CPUs of each node */
 
+static bool wq_disable_numa;
+module_param_named(disable_numa, wq_disable_numa, bool, 0444);
+
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -516,21 +519,6 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 	return ret;
 }
 
-/**
- * first_pwq - return the first pool_workqueue of the specified workqueue
- * @wq: the target workqueue
- *
- * This must be called either with wq->mutex held or sched RCU read locked.
- * If the pwq needs to be used beyond the locking in effect, the caller is
- * responsible for guaranteeing that the pwq stays online.
- */
-static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
-{
-	assert_rcu_or_wq_mutex(wq);
-	return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
-				      pwqs_node);
-}
-
 /**
  * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
  * @wq: the target workqueue
@@ -3114,16 +3102,21 @@ static struct device_attribute wq_sysfs_attrs[] = {
 	__ATTR_NULL,
 };
 
-static ssize_t wq_pool_id_show(struct device *dev,
-			       struct device_attribute *attr, char *buf)
+static ssize_t wq_pool_ids_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	struct workqueue_struct *wq = dev_to_wq(dev);
-	struct worker_pool *pool;
-	int written;
+	const char *delim = "";
+	int node, written = 0;
 
 	rcu_read_lock_sched();
-	pool = first_pwq(wq)->pool;
-	written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id);
+	for_each_node(node) {
+		written += scnprintf(buf + written, PAGE_SIZE - written,
+				     "%s%d:%d", delim, node,
+				     unbound_pwq_by_node(wq, node)->pool->id);
+		delim = " ";
+	}
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
 	rcu_read_unlock_sched();
 
 	return written;
@@ -3212,10 +3205,46 @@ static ssize_t wq_cpumask_store(struct device *dev,
 	return ret ?: count;
 }
 
+static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%d\n",
+			    !wq->unbound_attrs->no_numa);
+	mutex_unlock(&wq->mutex);
+
+	return written;
+}
+
+static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int v, ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	ret = -EINVAL;
+	if (sscanf(buf, "%d", &v) == 1) {
+		attrs->no_numa = !v;
+		ret = apply_workqueue_attrs(wq, attrs);
+	}
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
 static struct device_attribute wq_sysfs_unbound_attrs[] = {
-	__ATTR(pool_id, 0444, wq_pool_id_show, NULL),
+	__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
 	__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
 	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+	__ATTR(numa, 0644, wq_numa_show, wq_numa_store),
 	__ATTR_NULL,
 };
 
@@ -3750,7 +3779,7 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
 static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
 				 int cpu_going_down, cpumask_t *cpumask)
 {
-	if (!wq_numa_enabled)
+	if (!wq_numa_enabled || attrs->no_numa)
 		goto use_dfl;
 
 	/* does @node have any online CPUs @attrs wants? */
@@ -3951,6 +3980,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
 	cpumask = target_attrs->cpumask;
 
 	mutex_lock(&wq->mutex);
+	if (wq->unbound_attrs->no_numa)
+		goto out_unlock;
 
 	copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
 	pwq = unbound_pwq_by_node(wq, node);
@@ -4763,6 +4794,11 @@ static void __init wq_numa_init(void)
 	if (num_possible_nodes() <= 1)
 		return;
 
+	if (wq_disable_numa) {
+		pr_info("workqueue: NUMA affinity support disabled\n");
+		return;
+	}
+
 	wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
 	BUG_ON(!wq_update_unbound_numa_attrs_buf);
 
-- 
cgit v1.2.2


From 181387da2d64c3129e5b5186c4dd388bc5041d53 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 19:08:06 -0700
Subject: writeback: remove unused bdi_pending_list

There's no user left.  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Fengguang Wu <fengguang.wu@intel.com>
---
 include/linux/backing-dev.h | 1 -
 mm/backing-dev.c            | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 350459910fe1..a5ef27f5411a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -130,7 +130,6 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
-extern struct list_head bdi_pending_list;
 
 static inline int wb_has_dirty_io(struct bdi_writeback *wb)
 {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 41733c5dc820..657569b3fcf6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -31,13 +31,11 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info);
 static struct class *bdi_class;
 
 /*
- * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
- * reader side protection for bdi_pending_list. bdi_list has RCU reader side
+ * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
  * locking.
  */
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
-LIST_HEAD(bdi_pending_list);
 
 void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
 {
-- 
cgit v1.2.2


From 839a8e8660b6777e7fe4e80af1a048aebe2b5977 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 19:08:06 -0700
Subject: writeback: replace custom worker pool implementation with unbound
 workqueue

Writeback implements its own worker pool - each bdi can be associated
with a worker thread which is created and destroyed dynamically.  The
worker thread for the default bdi is always present and serves as the
"forker" thread which forks off worker threads for other bdis.

there's no reason for writeback to implement its own worker pool when
using unbound workqueue instead is much simpler and more efficient.
This patch replaces custom worker pool implementation in writeback
with an unbound workqueue.

The conversion isn't too complicated but the followings are worth
mentioning.

* bdi_writeback->last_active, task and wakeup_timer are removed.
  delayed_work ->dwork is added instead.  Explicit timer handling is
  no longer necessary.  Everything works by either queueing / modding
  / flushing / canceling the delayed_work item.

* bdi_writeback_thread() becomes bdi_writeback_workfn() which runs off
  bdi_writeback->dwork.  On each execution, it processes
  bdi->work_list and reschedules itself if there are more things to
  do.

  The function also handles low-mem condition, which used to be
  handled by the forker thread.  If the function is running off a
  rescuer thread, it only writes out limited number of pages so that
  the rescuer can serve other bdis too.  This preserves the flusher
  creation failure behavior of the forker thread.

* INIT_LIST_HEAD(&bdi->bdi_list) is used to tell
  bdi_writeback_workfn() about on-going bdi unregistration so that it
  always drains work_list even if it's running off the rescuer.  Note
  that the original code was broken in this regard.  Under memory
  pressure, a bdi could finish unregistration with non-empty
  work_list.

* The default bdi is no longer special.  It now is treated the same as
  any other bdi and bdi_cap_flush_forker() is removed.

* BDI_pending is no longer used.  Removed.

* Some tracepoints become non-applicable.  The following TPs are
  removed - writeback_nothread, writeback_wake_thread,
  writeback_wake_forker_thread, writeback_thread_start,
  writeback_thread_stop.

Everything, including devices coming and going away and rescuer
operation under simulated memory pressure, seems to work fine in my
test setup.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
---
 fs/fs-writeback.c                | 102 +++++-----------
 include/linux/backing-dev.h      |  15 +--
 include/trace/events/writeback.h |   5 -
 mm/backing-dev.c                 | 255 +++++----------------------------------
 4 files changed, 65 insertions(+), 312 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 21f46fb3a101..8067d3719e94 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -22,7 +22,6 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
-#include <linux/freezer.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
@@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head)
 #define CREATE_TRACE_POINTS
 #include <trace/events/writeback.h>
 
-/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
-static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
-{
-	if (bdi->wb.task) {
-		wake_up_process(bdi->wb.task);
-	} else {
-		/*
-		 * The bdi thread isn't there, wake up the forker thread which
-		 * will create and run it.
-		 */
-		wake_up_process(default_backing_dev_info.wb.task);
-	}
-}
-
 static void bdi_queue_work(struct backing_dev_info *bdi,
 			   struct wb_writeback_work *work)
 {
@@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 
 	spin_lock_bh(&bdi->wb_lock);
 	list_add_tail(&work->list, &bdi->work_list);
-	if (!bdi->wb.task)
-		trace_writeback_nothread(bdi, work);
-	bdi_wakeup_flusher(bdi);
 	spin_unlock_bh(&bdi->wb_lock);
+
+	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
 }
 
 static void
@@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 	 */
 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
 	if (!work) {
-		if (bdi->wb.task) {
-			trace_writeback_nowork(bdi);
-			wake_up_process(bdi->wb.task);
-		}
+		trace_writeback_nowork(bdi);
+		mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
 		return;
 	}
 
@@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
 	 * writeback as soon as there is no other work to do.
 	 */
 	trace_writeback_wake_background(bdi);
-	spin_lock_bh(&bdi->wb_lock);
-	bdi_wakeup_flusher(bdi);
-	spin_unlock_bh(&bdi->wb_lock);
+	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
 }
 
 /*
@@ -1020,66 +1000,48 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 
 /*
  * Handle writeback of dirty data for the device backed by this bdi. Also
- * wakes up periodically and does kupdated style flushing.
+ * reschedules periodically and does kupdated style flushing.
  */
-int bdi_writeback_thread(void *data)
+void bdi_writeback_workfn(struct work_struct *work)
 {
-	struct bdi_writeback *wb = data;
+	struct bdi_writeback *wb = container_of(to_delayed_work(work),
+						struct bdi_writeback, dwork);
 	struct backing_dev_info *bdi = wb->bdi;
 	long pages_written;
 
 	current->flags |= PF_SWAPWRITE;
-	set_freezable();
-	wb->last_active = jiffies;
-
-	/*
-	 * Our parent may run at a different priority, just set us to normal
-	 */
-	set_user_nice(current, 0);
-
-	trace_writeback_thread_start(bdi);
 
-	while (!kthread_freezable_should_stop(NULL)) {
+	if (likely(!current_is_workqueue_rescuer() ||
+		   list_empty(&bdi->bdi_list))) {
 		/*
-		 * Remove own delayed wake-up timer, since we are already awake
-		 * and we'll take care of the periodic write-back.
+		 * The normal path.  Keep writing back @bdi until its
+		 * work_list is empty.  Note that this path is also taken
+		 * if @bdi is shutting down even when we're running off the
+		 * rescuer as work_list needs to be drained.
 		 */
-		del_timer(&wb->wakeup_timer);
-
-		pages_written = wb_do_writeback(wb, 0);
-
+		do {
+			pages_written = wb_do_writeback(wb, 0);
+			trace_writeback_pages_written(pages_written);
+		} while (!list_empty(&bdi->work_list));
+	} else {
+		/*
+		 * bdi_wq can't get enough workers and we're running off
+		 * the emergency worker.  Don't hog it.  Hopefully, 1024 is
+		 * enough for efficient IO.
+		 */
+		pages_written = writeback_inodes_wb(&bdi->wb, 1024,
+						    WB_REASON_FORKER_THREAD);
 		trace_writeback_pages_written(pages_written);
-
-		if (pages_written)
-			wb->last_active = jiffies;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
-			__set_current_state(TASK_RUNNING);
-			continue;
-		}
-
-		if (wb_has_dirty_io(wb) && dirty_writeback_interval)
-			schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
-		else {
-			/*
-			 * We have nothing to do, so can go sleep without any
-			 * timeout and save power. When a work is queued or
-			 * something is made dirty - we will be woken up.
-			 */
-			schedule();
-		}
 	}
 
-	/* Flush any work that raced with us exiting */
-	if (!list_empty(&bdi->work_list))
-		wb_do_writeback(wb, 1);
+	if (!list_empty(&bdi->work_list) ||
+	    (wb_has_dirty_io(wb) && dirty_writeback_interval))
+		queue_delayed_work(bdi_wq, &wb->dwork,
+			msecs_to_jiffies(dirty_writeback_interval * 10));
 
-	trace_writeback_thread_stop(bdi);
-	return 0;
+	current->flags &= ~PF_SWAPWRITE;
 }
 
-
 /*
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index a5ef27f5411a..c3881553f7d1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,6 +18,7 @@
 #include <linux/writeback.h>
 #include <linux/atomic.h>
 #include <linux/sysctl.h>
+#include <linux/workqueue.h>
 
 struct page;
 struct device;
@@ -27,7 +28,6 @@ struct dentry;
  * Bits in backing_dev_info.state
  */
 enum bdi_state {
-	BDI_pending,		/* On its way to being activated */
 	BDI_wb_alloc,		/* Default embedded wb allocated */
 	BDI_async_congested,	/* The async (write) queue is getting full */
 	BDI_sync_congested,	/* The sync queue is getting full */
@@ -53,10 +53,8 @@ struct bdi_writeback {
 	unsigned int nr;
 
 	unsigned long last_old_flush;	/* last old data flush */
-	unsigned long last_active;	/* last time bdi thread was active */
 
-	struct task_struct *task;	/* writeback thread */
-	struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */
+	struct delayed_work dwork;	/* work item used for writeback */
 	struct list_head b_dirty;	/* dirty inodes */
 	struct list_head b_io;		/* parked for writeback */
 	struct list_head b_more_io;	/* parked for more writeback */
@@ -123,7 +121,7 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 			enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
-int bdi_writeback_thread(void *data);
+void bdi_writeback_workfn(struct work_struct *work);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
 void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
@@ -131,6 +129,8 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
 
+extern struct workqueue_struct *bdi_wq;
+
 static inline int wb_has_dirty_io(struct bdi_writeback *wb)
 {
 	return !list_empty(&wb->b_dirty) ||
@@ -335,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
 	return bdi->capabilities & BDI_CAP_SWAP_BACKED;
 }
 
-static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
-{
-	return bdi == &default_backing_dev_info;
-}
-
 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 {
 	return bdi_cap_writeback_dirty(mapping->backing_dev_info);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 6a16fd2e70ed..464ea82e10db 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 DEFINE_EVENT(writeback_work_class, name, \
 	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
 	TP_ARGS(bdi, work))
-DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread);
 DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
 DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
 DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
@@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \
 
 DEFINE_WRITEBACK_EVENT(writeback_nowork);
 DEFINE_WRITEBACK_EVENT(writeback_wake_background);
-DEFINE_WRITEBACK_EVENT(writeback_wake_thread);
-DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread);
 DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
 DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
-DEFINE_WRITEBACK_EVENT(writeback_thread_start);
-DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
 
 DECLARE_EVENT_CLASS(wbc_class,
 	TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 657569b3fcf6..2857d4f6bca4 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -37,6 +37,9 @@ static struct class *bdi_class;
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
 
+/* bdi_wq serves all asynchronous writeback tasks */
+struct workqueue_struct *bdi_wq;
+
 void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
 {
 	if (wb1 < wb2) {
@@ -255,6 +258,11 @@ static int __init default_bdi_init(void)
 {
 	int err;
 
+	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
+					      WQ_UNBOUND, 0);
+	if (!bdi_wq)
+		return -ENOMEM;
+
 	err = bdi_init(&default_backing_dev_info);
 	if (!err)
 		bdi_register(&default_backing_dev_info, NULL, "default");
@@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
 	return wb_has_dirty_io(&bdi->wb);
 }
 
-static void wakeup_timer_fn(unsigned long data)
-{
-	struct backing_dev_info *bdi = (struct backing_dev_info *)data;
-
-	spin_lock_bh(&bdi->wb_lock);
-	if (bdi->wb.task) {
-		trace_writeback_wake_thread(bdi);
-		wake_up_process(bdi->wb.task);
-	} else if (bdi->dev) {
-		/*
-		 * When bdi tasks are inactive for long time, they are killed.
-		 * In this case we have to wake-up the forker thread which
-		 * should create and run the bdi thread.
-		 */
-		trace_writeback_wake_forker_thread(bdi);
-		wake_up_process(default_backing_dev_info.wb.task);
-	}
-	spin_unlock_bh(&bdi->wb_lock);
-}
-
 /*
  * This function is used when the first inode for this bdi is marked dirty. It
  * wakes-up the corresponding bdi thread which should then take care of the
@@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
 	unsigned long timeout;
 
 	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
-	mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
-}
-
-/*
- * Calculate the longest interval (jiffies) bdi threads are allowed to be
- * inactive.
- */
-static unsigned long bdi_longest_inactive(void)
-{
-	unsigned long interval;
-
-	interval = msecs_to_jiffies(dirty_writeback_interval * 10);
-	return max(5UL * 60 * HZ, interval);
-}
-
-/*
- * Clear pending bit and wakeup anybody waiting for flusher thread creation or
- * shutdown
- */
-static void bdi_clear_pending(struct backing_dev_info *bdi)
-{
-	clear_bit(BDI_pending, &bdi->state);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&bdi->state, BDI_pending);
-}
-
-static int bdi_forker_thread(void *ptr)
-{
-	struct bdi_writeback *me = ptr;
-
-	current->flags |= PF_SWAPWRITE;
-	set_freezable();
-
-	/*
-	 * Our parent may run at a different priority, just set us to normal
-	 */
-	set_user_nice(current, 0);
-
-	for (;;) {
-		struct task_struct *task = NULL;
-		struct backing_dev_info *bdi;
-		enum {
-			NO_ACTION,   /* Nothing to do */
-			FORK_THREAD, /* Fork bdi thread */
-			KILL_THREAD, /* Kill inactive bdi thread */
-		} action = NO_ACTION;
-
-		/*
-		 * Temporary measure, we want to make sure we don't see
-		 * dirty data on the default backing_dev_info
-		 */
-		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
-			del_timer(&me->wakeup_timer);
-			wb_do_writeback(me, 0);
-		}
-
-		spin_lock_bh(&bdi_lock);
-		/*
-		 * In the following loop we are going to check whether we have
-		 * some work to do without any synchronization with tasks
-		 * waking us up to do work for them. Set the task state here
-		 * so that we don't miss wakeups after verifying conditions.
-		 */
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		list_for_each_entry(bdi, &bdi_list, bdi_list) {
-			bool have_dirty_io;
-
-			if (!bdi_cap_writeback_dirty(bdi) ||
-			     bdi_cap_flush_forker(bdi))
-				continue;
-
-			WARN(!test_bit(BDI_registered, &bdi->state),
-			     "bdi %p/%s is not registered!\n", bdi, bdi->name);
-
-			have_dirty_io = !list_empty(&bdi->work_list) ||
-					wb_has_dirty_io(&bdi->wb);
-
-			/*
-			 * If the bdi has work to do, but the thread does not
-			 * exist - create it.
-			 */
-			if (!bdi->wb.task && have_dirty_io) {
-				/*
-				 * Set the pending bit - if someone will try to
-				 * unregister this bdi - it'll wait on this bit.
-				 */
-				set_bit(BDI_pending, &bdi->state);
-				action = FORK_THREAD;
-				break;
-			}
-
-			spin_lock(&bdi->wb_lock);
-
-			/*
-			 * If there is no work to do and the bdi thread was
-			 * inactive long enough - kill it. The wb_lock is taken
-			 * to make sure no-one adds more work to this bdi and
-			 * wakes the bdi thread up.
-			 */
-			if (bdi->wb.task && !have_dirty_io &&
-			    time_after(jiffies, bdi->wb.last_active +
-						bdi_longest_inactive())) {
-				task = bdi->wb.task;
-				bdi->wb.task = NULL;
-				spin_unlock(&bdi->wb_lock);
-				set_bit(BDI_pending, &bdi->state);
-				action = KILL_THREAD;
-				break;
-			}
-			spin_unlock(&bdi->wb_lock);
-		}
-		spin_unlock_bh(&bdi_lock);
-
-		/* Keep working if default bdi still has things to do */
-		if (!list_empty(&me->bdi->work_list))
-			__set_current_state(TASK_RUNNING);
-
-		switch (action) {
-		case FORK_THREAD:
-			__set_current_state(TASK_RUNNING);
-			task = kthread_create(bdi_writeback_thread, &bdi->wb,
-					      "flush-%s", dev_name(bdi->dev));
-			if (IS_ERR(task)) {
-				/*
-				 * If thread creation fails, force writeout of
-				 * the bdi from the thread. Hopefully 1024 is
-				 * large enough for efficient IO.
-				 */
-				writeback_inodes_wb(&bdi->wb, 1024,
-						    WB_REASON_FORKER_THREAD);
-			} else {
-				/*
-				 * The spinlock makes sure we do not lose
-				 * wake-ups when racing with 'bdi_queue_work()'.
-				 * And as soon as the bdi thread is visible, we
-				 * can start it.
-				 */
-				spin_lock_bh(&bdi->wb_lock);
-				bdi->wb.task = task;
-				spin_unlock_bh(&bdi->wb_lock);
-				wake_up_process(task);
-			}
-			bdi_clear_pending(bdi);
-			break;
-
-		case KILL_THREAD:
-			__set_current_state(TASK_RUNNING);
-			kthread_stop(task);
-			bdi_clear_pending(bdi);
-			break;
-
-		case NO_ACTION:
-			if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
-				/*
-				 * There are no dirty data. The only thing we
-				 * should now care about is checking for
-				 * inactive bdi threads and killing them. Thus,
-				 * let's sleep for longer time, save energy and
-				 * be friendly for battery-driven devices.
-				 */
-				schedule_timeout(bdi_longest_inactive());
-			else
-				schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
-			try_to_freeze();
-			break;
-		}
-	}
-
-	return 0;
+	mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
 }
 
 /*
@@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
 	spin_unlock_bh(&bdi_lock);
 
 	synchronize_rcu_expedited();
+
+	/* bdi_list is now unused, clear it to mark @bdi dying */
+	INIT_LIST_HEAD(&bdi->bdi_list);
 }
 
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
@@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 
 	bdi->dev = dev;
 
-	/*
-	 * Just start the forker thread for our default backing_dev_info,
-	 * and add other bdi's to the list. They will get a thread created
-	 * on-demand when they need it.
-	 */
-	if (bdi_cap_flush_forker(bdi)) {
-		struct bdi_writeback *wb = &bdi->wb;
-
-		wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
-						dev_name(dev));
-		if (IS_ERR(wb->task))
-			return PTR_ERR(wb->task);
-	}
-
 	bdi_debug_register(bdi, dev_name(dev));
 	set_bit(BDI_registered, &bdi->state);
 
@@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);
  */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
-	struct task_struct *task;
-
 	if (!bdi_cap_writeback_dirty(bdi))
 		return;
 
@@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 	bdi_remove_from_list(bdi);
 
 	/*
-	 * If setup is pending, wait for that to complete first
+	 * Drain work list and shutdown the delayed_work.  At this point,
+	 * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
+	 * is dying and its work_list needs to be drained no matter what.
 	 */
-	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
-			TASK_UNINTERRUPTIBLE);
+	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+	flush_delayed_work(&bdi->wb.dwork);
+	WARN_ON(!list_empty(&bdi->work_list));
 
 	/*
-	 * Finally, kill the kernel thread. We don't need to be RCU
-	 * safe anymore, since the bdi is gone from visibility.
+	 * This shouldn't be necessary unless @bdi for some reason has
+	 * unflushed dirty IO after work_list is drained.  Do it anyway
+	 * just in case.
 	 */
-	spin_lock_bh(&bdi->wb_lock);
-	task = bdi->wb.task;
-	bdi->wb.task = NULL;
-	spin_unlock_bh(&bdi->wb_lock);
-
-	if (task)
-		kthread_stop(task);
+	cancel_delayed_work_sync(&bdi->wb.dwork);
 }
 
 /*
@@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)
 		bdi_set_min_ratio(bdi, 0);
 		trace_writeback_bdi_unregister(bdi);
 		bdi_prune_sb(bdi);
-		del_timer_sync(&bdi->wb.wakeup_timer);
 
-		if (!bdi_cap_flush_forker(bdi))
-			bdi_wb_shutdown(bdi);
+		bdi_wb_shutdown(bdi);
 		bdi_debug_unregister(bdi);
 
 		spin_lock_bh(&bdi->wb_lock);
@@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
 	INIT_LIST_HEAD(&wb->b_io);
 	INIT_LIST_HEAD(&wb->b_more_io);
 	spin_lock_init(&wb->list_lock);
-	setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
+	INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
 }
 
 /*
@@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
 	bdi_unregister(bdi);
 
 	/*
-	 * If bdi_unregister() had already been called earlier, the
-	 * wakeup_timer could still be armed because bdi_prune_sb()
-	 * can race with the bdi_wakeup_thread_delayed() calls from
-	 * __mark_inode_dirty().
+	 * If bdi_unregister() had already been called earlier, the dwork
+	 * could still be pending because bdi_prune_sb() can race with the
+	 * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
 	 */
-	del_timer_sync(&bdi->wb.wakeup_timer);
+	cancel_delayed_work_sync(&bdi->wb.dwork);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
 		percpu_counter_destroy(&bdi->bdi_stat[i]);
-- 
cgit v1.2.2


From b5c872ddb7083c7909fb76a170c3807e04564bb3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 19:08:06 -0700
Subject: writeback: expose the bdi_wq workqueue

There are cases where userland wants to tweak the priority and
affinity of writeback flushers.  Expose bdi_wq to userland by setting
WQ_SYSFS.  It appears under /sys/bus/workqueue/devices/writeback/ and
allows adjusting maximum concurrency level, cpumask and nice level.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/backing-dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 2857d4f6bca4..502517492258 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -259,7 +259,7 @@ static int __init default_bdi_init(void)
 	int err;
 
 	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
-					      WQ_UNBOUND, 0);
+					      WQ_UNBOUND | WQ_SYSFS, 0);
 	if (!bdi_wq)
 		return -ENOMEM;
 
-- 
cgit v1.2.2