From 26047e2d6bde5b2e1b791e0ec1c3234894fdf3fa Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 30 Nov 2012 11:28:55 +0100 Subject: ASoC: cs4271: fix sparse warning Make the flag in the pdata of type bool to fix a sparse warning. Signed-off-by: Daniel Mack Reported-by: Fengguang Wu Signed-off-by: Mark Brown --- include/sound/cs4271.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/cs4271.h b/include/sound/cs4271.h index 6d9e15ed1dcf..dd8c48d14ed9 100644 --- a/include/sound/cs4271.h +++ b/include/sound/cs4271.h @@ -19,7 +19,7 @@ struct cs4271_platform_data { int gpio_nreset; /* GPIO driving Reset pin, if any */ - int amutec_eq_bmutec:1; /* flag to enable AMUTEC=BMUTEC */ + bool amutec_eq_bmutec; /* flag to enable AMUTEC=BMUTEC */ }; #endif /* __CS4271_H */ -- cgit v1.2.2 From 252b3e8c1bc0c2b20348ae87d67efcd0a8209f72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 11 Dec 2012 04:07:42 +0000 Subject: netfilter: xt_CT: fix crash while destroy ct templates In (d871bef netfilter: ctnetlink: dump entries from the dying and unconfirmed lists), we assume that all conntrack objects are inserted in any of the existing lists. However, template conntrack objects were not. This results in hitting BUG_ON in the destroy_conntrack path while removing a rule that uses the CT target. This patch fixes the situation by adding the template lists, which is where template conntrack objects reside now. Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index a1d83cc8bf85..923cb20051ed 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -71,6 +71,7 @@ struct netns_ct { struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; struct hlist_nulls_head dying; + struct hlist_nulls_head tmpl; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; -- cgit v1.2.2 From b45305fce5bb1abec263fcff9d81ebecd6306ede Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 17 Dec 2012 16:21:27 +0100 Subject: drm/i915: Implement workaround for broken CS tlb on i830/845 Now that Chris Wilson demonstrated that the key for stability on early gen 2 is to simple _never_ exchange the physical backing storage of batch buffers I've tried a stab at a kernel solution. Doesn't look too nefarious imho, now that I don't try to be too clever for my own good any more. v2: After discussing the various techniques, we've decided to always blit batches on the suspect devices, but allow userspace to opt out of the kernel workaround assume full responsibility for providing coherent batches. The principal reason is that avoiding the blit does improve performance in a few key microbenchmarks and also in cairo-trace replays. Signed-Off-by: Daniel Vetter Signed-off-by: Chris Wilson [danvet: - Drop the hunk which uses HAS_BROKEN_CS_TLB to implement the ring wrap w/a. Suggested by Chris Wilson. - Also add the ACTHD check from Chris Wilson for the error state dumping, so that we still catch batches when userspace opts out of the w/a.] Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b746a3cf5fa9..c4d2e9c74002 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -307,6 +307,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 +#define I915_PARAM_HAS_PINNED_BATCHES 24 typedef struct drm_i915_getparam { int param; @@ -677,6 +678,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_SECURE (1<<9) +/** Inform the kernel that the batch is and will always be pinned. This + * negates the requirement for a workaround to be performed to avoid + * an incoherent CS (such as can be found on 830/845). If this flag is + * not passed, the kernel will endeavour to make sure the batch is + * coherent with the CS before execution. If this flag is passed, + * userspace assumes the responsibility for ensuring the same. + */ +#define I915_EXEC_IS_PINNED (1<<10) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK -- cgit v1.2.2 From b81034506fc9b879cb726feb01342be0cdbe6e25 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Dec 2012 20:37:06 +0000 Subject: drm: Export routines for inserting preallocated nodes into the mm manager Required by i915 in order to avoid the allocation in the middle of manipulating the drm_mm lists. Use a pair of stubs to preserve the existing EXPORT_SYMBOLs for backporting; to be removed later. Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula [danvet: bikeshedded-away the atomic parameter, it's not yet used anywhere.] Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- include/drm/drm_mm.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 06d7f798a08c..0f4a366f6fa6 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -158,12 +158,29 @@ static inline struct drm_mm_node *drm_mm_get_block_atomic_range( return drm_mm_get_block_range_generic(parent, size, alignment, 0, start, end, 1); } -extern int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment); + +extern int drm_mm_insert_node(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment); extern int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment, - unsigned long start, unsigned long end); + unsigned long size, + unsigned alignment, + unsigned long start, + unsigned long end); +extern int drm_mm_insert_node_generic(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment, + unsigned long color); +extern int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment, + unsigned long color, + unsigned long start, + unsigned long end); extern void drm_mm_put_block(struct drm_mm_node *cur); extern void drm_mm_remove_node(struct drm_mm_node *node); extern void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new); -- cgit v1.2.2 From 9bde4f0b1c83d1129a9fc8ec5b2611ba6dab1215 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 19 Dec 2012 16:05:00 +0000 Subject: ASoC: core: Fix SOC_DOUBLE_RANGE() macros Although we've had macros defining double _RANGE controls for a while now they've not actually been backed up properly by the implementation, it's treated everything as mono. Fix that by implementing the handling in the stereo controls, ensuring that the mono controls don't mistakenly get treated as stereo. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- include/sound/soc.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 769e27c774a3..bc56738cb109 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -58,8 +58,9 @@ .info = snd_soc_info_volsw_range, .get = snd_soc_get_volsw_range, \ .put = snd_soc_put_volsw_range, \ .private_value = (unsigned long)&(struct soc_mixer_control) \ - {.reg = xreg, .shift = xshift, .min = xmin,\ - .max = xmax, .platform_max = xmax, .invert = xinvert} } + {.reg = xreg, .rreg = xreg, .shift = xshift, \ + .rshift = xshift, .min = xmin, .max = xmax, \ + .platform_max = xmax, .invert = xinvert} } #define SOC_SINGLE_TLV(xname, reg, shift, max, invert, tlv_array) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ |\ @@ -88,8 +89,9 @@ .info = snd_soc_info_volsw_range, \ .get = snd_soc_get_volsw_range, .put = snd_soc_put_volsw_range, \ .private_value = (unsigned long)&(struct soc_mixer_control) \ - {.reg = xreg, .shift = xshift, .min = xmin,\ - .max = xmax, .platform_max = xmax, .invert = xinvert} } + {.reg = xreg, .rreg = xreg, .shift = xshift, \ + .rshift = xshift, .min = xmin, .max = xmax, \ + .platform_max = xmax, .invert = xinvert} } #define SOC_DOUBLE(xname, reg, shift_left, shift_right, max, invert) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname),\ .info = snd_soc_info_volsw, .get = snd_soc_get_volsw, \ -- cgit v1.2.2 From 10db9069eb5c60195170a4119bdbcbce69a4945f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Dec 2012 01:54:51 +0000 Subject: netfilter: xt_CT: recover NOTRACK target support Florian Westphal reported that the removal of the NOTRACK target (9655050 netfilter: remove xt_NOTRACK) is breaking some existing setups. That removal was scheduled for removal since long time ago as described in Documentation/feature-removal-schedule.txt What: xt_NOTRACK Files: net/netfilter/xt_NOTRACK.c When: April 2011 Why: Superseded by xt_CT Still, people may have not notice / may have decided to stick to an old iptables version. I agree with him in that some more conservative approach by spotting some printk to warn users for some time is less agressive. Current iptables 1.4.16.3 already contains the aliasing support that makes it point to the CT target, so upgrading would fix it. Still, the policy so far has been to avoid pushing our users to upgrade. As a solution, this patch recovers the NOTRACK target inside the CT target and it now spots a warning. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/x_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 591db7d657a3..c24060ee411e 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -8,6 +8,7 @@ struct ebt_table; struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; + bool notrack_deprecated_warning; #if defined(CONFIG_BRIDGE_NF_EBTABLES) || \ defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE) struct ebt_table *broute_table; -- cgit v1.2.2 From 4520fb3c3690f2643006d85f09ecb74554c10e95 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Dec 2012 13:28:54 -0500 Subject: ext4: split off ext4_journalled_invalidatepage() In data=journal mode we don't need delalloc or DIO handling in invalidatepage and similarly in other modes we don't need the journal handling. So split invalidatepage implementations. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index f6372b011366..7e8c36bc7082 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -451,7 +451,7 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage, TP_ARGS(page) ); -TRACE_EVENT(ext4_invalidatepage, +DECLARE_EVENT_CLASS(ext4_invalidatepage_op, TP_PROTO(struct page *page, unsigned long offset), TP_ARGS(page, offset), @@ -477,6 +477,18 @@ TRACE_EVENT(ext4_invalidatepage, (unsigned long) __entry->index, __entry->offset) ); +DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage, + TP_PROTO(struct page *page, unsigned long offset), + + TP_ARGS(page, offset) +); + +DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage, + TP_PROTO(struct page *page, unsigned long offset), + + TP_ARGS(page, offset) +); + TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), -- cgit v1.2.2 From 53e872681fed6a43047e71bf927f77d06f467988 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Dec 2012 13:29:52 -0500 Subject: ext4: fix deadlock in journal_unmap_buffer() We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1be23d9fdacb..e30b66346942 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *, extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); -extern void jbd2_journal_invalidatepage(journal_t *, +extern int jbd2_journal_invalidatepage(journal_t *, struct page *, unsigned long); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int jbd2_journal_stop(handle_t *); -- cgit v1.2.2 From 08b60f8438879a84246d7debded31c9cb7aea6e4 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 24 Dec 2012 11:14:58 -0700 Subject: namei.h: include errno.h This solves: In file included from fs/ext3/symlink.c:20:0: include/linux/namei.h: In function 'retry_estale': include/linux/namei.h:114:19: error: 'ESTALE' undeclared (first use in this function) Signed-off-by: Stephen Warren Signed-off-by: Al Viro --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index e998c030061d..5a5ff57ceed4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -2,6 +2,7 @@ #define _LINUX_NAMEI_H #include +#include #include #include -- cgit v1.2.2 From 130f1b8f35f14d27c43da755f3c9226318c17f57 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 26 Dec 2012 10:39:23 -0700 Subject: PCI: Add PCIe Link Capability link speed and width names Add standard #defines for the Supported Link Speeds field in the PCIe Link Capabilities register. Note that prior to PCIe spec r3.0, these encodings were defined: 0001b 2.5GT/s Link speed supported 0010b 5.0GT/s and 2.5GT/s Link speed supported Starting with spec r3.0, these encodings refer to bits 0 and 1 in the Supported Link Speeds Vector in the Link Capabilities 2 register, and bits 0 and 1 there mean 2.5 GT/s and 5.0 GT/s, respectively. Therefore, code that followed r2.0 and interpreted 0x1 as 2.5GT/s and 0x2 as 5.0GT/s will continue to work, and we can identify a device using the new encodings because it will have a non-zero Link Capabilities 2 register. Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 6b7b6f1e2fd6..ebfadc56d1b4 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -458,6 +458,8 @@ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_SLS_2_5GB 0x1 /* LNKCAP2 SLS Vector bit 0 (2.5GT/s) */ +#define PCI_EXP_LNKCAP_SLS_5_0GB 0x2 /* LNKCAP2 SLS Vector bit 1 (5.0GT/s) */ #define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ #define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ #define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ -- cgit v1.2.2 From 812089e01b9f65f90fc8fc670d8cce72a0e01fbb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 1 Dec 2012 12:37:20 -0800 Subject: PCI: Reduce Ricoh 0xe822 SD card reader base clock frequency to 50MHz Otherwise it fails like this on cards like the Transcend 16GB SDHC card: mmc0: new SDHC card at address b368 mmcblk0: mmc0:b368 SDC 15.0 GiB mmcblk0: error -110 sending status command, retrying mmcblk0: error -84 transferring data, sector 0, nr 8, cmd response 0x900, card status 0xb0 Tested on my Lenovo x200 laptop. [bhelgaas: changelog] Signed-off-by: Andy Lutomirski Signed-off-by: Bjorn Helgaas Acked-by: Chris Ball CC: Manoj Iyer CC: stable@vger.kernel.org --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f8447376ddb..0eb65796bcb9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1568,6 +1568,7 @@ #define PCI_DEVICE_ID_RICOH_RL5C476 0x0476 #define PCI_DEVICE_ID_RICOH_RL5C478 0x0478 #define PCI_DEVICE_ID_RICOH_R5C822 0x0822 +#define PCI_DEVICE_ID_RICOH_R5CE822 0xe822 #define PCI_DEVICE_ID_RICOH_R5CE823 0xe823 #define PCI_DEVICE_ID_RICOH_R5C832 0x0832 #define PCI_DEVICE_ID_RICOH_R5C843 0x0843 -- cgit v1.2.2 From a7a88b23737095e6c18a20c5d4eef9e25ec5b829 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 2 Jan 2013 02:04:23 -0800 Subject: mempolicy: remove arg from mpol_parse_str, mpol_to_str Remove the unused argument (formerly no_context) from mpol_parse_str() and from mpol_to_str(). Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 9adc270de7ef..92bc9988a180 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -165,11 +165,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, #ifdef CONFIG_TMPFS -extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context); +extern int mpol_parse_str(char *str, struct mempolicy **mpol); #endif -extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, - int no_context); +extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ static inline int vma_migratable(struct vm_area_struct *vma) @@ -296,15 +295,13 @@ static inline void check_highest_zone(int k) } #ifdef CONFIG_TMPFS -static inline int mpol_parse_str(char *str, struct mempolicy **mpol, - int no_context) +static inline int mpol_parse_str(char *str, struct mempolicy **mpol) { return 1; /* error */ } #endif -static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, - int no_context) +static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) { return 0; } -- cgit v1.2.2 From 42288fe366c4f1ce7522bc9f27d0bc2a81c55264 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 21 Dec 2012 23:10:25 +0000 Subject: mm: mempolicy: Convert shared_policy mutex to spinlock Sasha was fuzzing with trinity and reported the following problem: BUG: sleeping function called from invalid context at kernel/mutex.c:269 in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main 2 locks held by trinity-main/6361: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x1e4/0x4f0 #1: (&(&mm->page_table_lock)->rlock){+.+...}, at: [] handle_pte_fault+0x3f7/0x6a0 Pid: 6361, comm: trinity-main Tainted: G W 3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74 Call Trace: __might_sleep+0x1c3/0x1e0 mutex_lock_nested+0x29/0x50 mpol_shared_policy_lookup+0x2e/0x90 shmem_get_policy+0x2e/0x30 get_vma_policy+0x5a/0xa0 mpol_misplaced+0x41/0x1d0 handle_pte_fault+0x465/0x6a0 This was triggered by a different version of automatic NUMA balancing but in theory the current version is vunerable to the same problem. do_numa_page -> numa_migrate_prep -> mpol_misplaced -> get_vma_policy -> shmem_get_policy It's very unlikely this will happen as shared pages are not marked pte_numa -- see the page_mapcount() check in change_pte_range() -- but it is possible. To address this, this patch restores sp->lock as originally implemented by Kosaki Motohiro. In the path where get_vma_policy() is called, it should not be calling sp_alloc() so it is not necessary to treat the PTL specially. Signed-off-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 92bc9988a180..0d7df39a5885 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -123,7 +123,7 @@ struct sp_node { struct shared_policy { struct rb_root root; - struct mutex mutex; + spinlock_t lock; }; void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); -- cgit v1.2.2 From 3d33fcc11bdd11b6949cf5c406726a094395dc4f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Jan 2013 15:12:55 +0000 Subject: UAPI: Remove empty Kbuild files Empty files can get deleted by the patch program, so remove empty Kbuild files and their links from the parent Kbuilds. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/Kbuild | 3 --- include/linux/Kbuild | 5 ----- include/linux/hdlc/Kbuild | 0 include/linux/hsi/Kbuild | 0 include/linux/raid/Kbuild | 0 include/linux/usb/Kbuild | 0 include/rdma/Kbuild | 0 include/sound/Kbuild | 0 8 files changed, 8 deletions(-) delete mode 100644 include/linux/Kbuild delete mode 100644 include/linux/hdlc/Kbuild delete mode 100644 include/linux/hsi/Kbuild delete mode 100644 include/linux/raid/Kbuild delete mode 100644 include/linux/usb/Kbuild delete mode 100644 include/rdma/Kbuild delete mode 100644 include/sound/Kbuild (limited to 'include') diff --git a/include/Kbuild b/include/Kbuild index 83256b64166a..1dfd33e8d43b 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -1,8 +1,5 @@ # Top-level Makefile calls into asm-$(ARCH) # List only non-arch directories below -header-y += linux/ -header-y += sound/ -header-y += rdma/ header-y += video/ header-y += scsi/ diff --git a/include/linux/Kbuild b/include/linux/Kbuild deleted file mode 100644 index 7fe2dae251e5..000000000000 --- a/include/linux/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -header-y += dvb/ -header-y += hdlc/ -header-y += hsi/ -header-y += raid/ -header-y += usb/ diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/hsi/Kbuild b/include/linux/hsi/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/sound/Kbuild b/include/sound/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 -- cgit v1.2.2 From f568f6ca811fe681ecfd11c4ce78b6aa488020c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:02:05 -0800 Subject: pstore: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from the pstore filesystem. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Signed-off-by: Greg Kroah-Hartman --- include/linux/pstore_ram.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 098d2a838296..cb6ab5feab67 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -46,9 +46,8 @@ struct persistent_ram_zone { size_t old_log_size; }; -struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, u32 sig, - int ecc_size); +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, int ecc_size); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -- cgit v1.2.2 From 0f58a01ddd5e8177255705ba15e64c3b74d67993 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:12:59 -0800 Subject: Drivers: bcma: remove __dev* attributes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit, __devexit_p, and __devexit from these drivers. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: "Rafał Miłecki" Signed-off-by: Greg Kroah-Hartman --- include/linux/bcma/bcma_driver_gmac_cmn.h | 2 +- include/linux/bcma/bcma_driver_pci.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h index def894b83b0d..4dd1f33e36a2 100644 --- a/include/linux/bcma/bcma_driver_gmac_cmn.h +++ b/include/linux/bcma/bcma_driver_gmac_cmn.h @@ -92,7 +92,7 @@ struct bcma_drv_gmac_cmn { #define gmac_cmn_write32(gc, offset, val) bcma_write32((gc)->core, offset, val) #ifdef CONFIG_BCMA_DRIVER_GMAC_CMN -extern void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); +extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); #else static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { } #endif diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 41da581e1612..c48d98d27b77 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -214,7 +214,7 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc); +extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); extern void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend); -- cgit v1.2.2 From e389623a68622e3c9be440ab522fac1aa1ca3454 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:15:49 -0800 Subject: include: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from some include files that were previously missed. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/parport.h | 4 ++-- include/linux/ata_platform.h | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h index 40528cb977e8..2c9f9d4336ca 100644 --- a/include/asm-generic/parport.h +++ b/include/asm-generic/parport.h @@ -10,8 +10,8 @@ * to devices on the PCI bus. */ -static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); -static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) +static int parport_pc_find_isa_ports(int autoirq, int autodma); +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) { #ifdef CONFIG_ISA return parport_pc_find_isa_ports(autoirq, autodma); diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h index fe9989636b62..b9fde17f767c 100644 --- a/include/linux/ata_platform.h +++ b/include/linux/ata_platform.h @@ -15,12 +15,12 @@ struct pata_platform_info { unsigned int irq_flags; }; -extern int __devinit __pata_platform_probe(struct device *dev, - struct resource *io_res, - struct resource *ctl_res, - struct resource *irq_res, - unsigned int ioport_shift, - int __pio_mask); +extern int __pata_platform_probe(struct device *dev, + struct resource *io_res, + struct resource *ctl_res, + struct resource *irq_res, + unsigned int ioport_shift, + int __pio_mask); /* * Marvell SATA private data -- cgit v1.2.2 From d81aecb5e680311e1f3fd71e49e6a4072d2374d2 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Tue, 18 Dec 2012 02:30:17 +0900 Subject: drm/exynos: change file license to GPL This patch changes file license to GPL Most of exynos files had been copied from some random file and not updated correctly. So this patch corrects the file license. Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- include/drm/exynos_drm.h | 22 ++++------------------ include/uapi/drm/exynos_drm.h | 22 ++++------------------ 2 files changed, 8 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h index 808dad29607a..d6aeaf3c6d6c 100644 --- a/include/drm/exynos_drm.h +++ b/include/drm/exynos_drm.h @@ -6,24 +6,10 @@ * Joonyoung Shim * Seung-Woo Kim * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. */ #ifndef _EXYNOS_DRM_H_ #define _EXYNOS_DRM_H_ diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index e7f52c334005..81354d0e7fd7 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -6,24 +6,10 @@ * Joonyoung Shim * Seung-Woo Kim * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. */ #ifndef _UAPI_EXYNOS_DRM_H_ -- cgit v1.2.2 From 4f21877cb8b9e9f85a541af43b9ee66d9006bb69 Mon Sep 17 00:00:00 2001 From: Eunchul Kim Date: Sat, 22 Dec 2012 17:49:24 +0900 Subject: drm/exynos: consider both case of vflip and hflip. This patch considers both case of vflip and hflip. If we want that the contents in buffer to be rotated to 180 degree, then we can use h,vflip or 180 degree. Changelog v2: - added EXYNOS_DRM_FLIP_BOTH enum value to avoid build warnning. Signed-off-by: Eunchul Kim Signed-off-by: Inki Dae --- include/uapi/drm/exynos_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index 81354d0e7fd7..d5844122ff32 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -171,6 +171,8 @@ enum drm_exynos_flip { EXYNOS_DRM_FLIP_NONE = (0 << 0), EXYNOS_DRM_FLIP_VERTICAL = (1 << 0), EXYNOS_DRM_FLIP_HORIZONTAL = (1 << 1), + EXYNOS_DRM_FLIP_BOTH = EXYNOS_DRM_FLIP_VERTICAL | + EXYNOS_DRM_FLIP_HORIZONTAL, }; enum drm_exynos_degree { -- cgit v1.2.2 From 03f595668017f1a1fb971c02fc37140bc6e7bb1c Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:50 -0800 Subject: ipc: add sysctl to specify desired next object id Add 3 new variables and sysctls to tune them (by one "next_id" variable for messages, semaphores and shared memory respectively). This variable can be used to set desired id for next allocated IPC object. By default it's equal to -1 and old behaviour is preserved. If this variable is non-negative, then desired idr will be extracted from it and used as a start value to search for free IDR slot. Notes: 1) this patch doesn't guarantee that the new object will have desired id. So it's up to user space how to handle new object with wrong id. 2) After a sucessful id allocation attempt, "next_id" will be set back to -1 (if it was non-negative). [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index fe771978e877..ae221a7b5092 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -24,6 +24,7 @@ struct ipc_ids { unsigned short seq_max; struct rw_semaphore rw_mutex; struct idr ipcs_idr; + int next_id; }; struct ipc_namespace { -- cgit v1.2.2 From f9dd87f4738c7555aca2cdf8cb2b2326cafb0cad Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:52 -0800 Subject: ipc: message queue receive cleanup Move all message related manipulation into one function msg_fill(). Actually, two functions because of the compat one. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/msg.h b/include/linux/msg.h index 7a4b9e97d29a..fc5743a554e6 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -34,7 +34,8 @@ struct msg_queue { /* Helper routines for sys_msgsnd and sys_msgrcv */ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); -extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext, - size_t msgsz, long msgtyp, int msgflg); +extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, + int msgflg, + long (*msg_fill)(void __user *, struct msg_msg *, size_t)); #endif /* _LINUX_MSG_H */ -- cgit v1.2.2 From 4a674f34ba04a002244edaf891b5da7fc1473ae8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:55 -0800 Subject: ipc: introduce message queue copy feature This patch is required for checkpoint/restore in userspace. c/r requires some way to get all pending IPC messages without deleting them from the queue (checkpoint can fail and in this case tasks will be resumed, so queue have to be valid). To achive this, new operation flag MSG_COPY for sys_msgrcv() system call was introduced. If this flag was specified, then mtype is interpreted as number of the message to copy. If MSG_COPY is set, then kernel will allocate dummy message with passed size, and then use new copy_msg() helper function to copy desired message (instead of unlinking it from the queue). Notes: 1) Return -ENOSYS if MSG_COPY is specified, but CONFIG_CHECKPOINT_RESTORE is not set. Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/msg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h index 78dbd2f996a3..22d95c6854e0 100644 --- a/include/uapi/linux/msg.h +++ b/include/uapi/linux/msg.h @@ -10,6 +10,7 @@ /* msgrcv options */ #define MSG_NOERROR 010000 /* no error if message is too big */ #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/ +#define MSG_COPY 040000 /* copy (not remove) all queue messages */ /* Obsolete, used only for backwards compatibility and libc5 compiles */ struct msqid_ds { -- cgit v1.2.2 From 3a665531a3b7c2ad2c87903b24646be6916340e4 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:56 -0800 Subject: selftests: IPC message queue copy feature test This test can be used to check wheither kernel supports IPC message queue copy and restore features (required by CRIU project). Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/msg.h b/include/linux/msg.h index fc5743a554e6..391af8d11cce 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -36,6 +36,7 @@ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, - long (*msg_fill)(void __user *, struct msg_msg *, size_t)); + long (*msg_fill)(void __user *, struct msg_msg *, + size_t)); #endif /* _LINUX_MSG_H */ -- cgit v1.2.2 From a458431e176ddb27e8ef8b98c2a681b217337393 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 4 Jan 2013 15:35:08 -0800 Subject: mm: fix zone_watermark_ok_safe() accounting of isolated pages Commit 702d1a6e0766 ("memory-hotplug: fix kswapd looping forever problem") added an isolated pageblocks counter (nr_pageblock_isolate in struct zone) and used it to adjust free pages counter in zone_watermark_ok_safe() to prevent kswapd looping forever problem. Then later, commit 2139cbe627b8 ("cma: fix counting of isolated pages") fixed accounting of isolated pages in global free pages counter. It made the previous zone_watermark_ok_safe() fix unnecessary and potentially harmful (cause now isolated pages may be accounted twice making free pages counter incorrect). This patch removes the special isolated pageblocks counter altogether which fixes zone_watermark_ok_safe() free pages check. Reported-by: Tomasz Stanislawski Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Aaditya Kumar Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Marek Szyprowski Cc: Michal Nazarewicz Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4bec5be82cab..73b64a38b984 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -503,14 +503,6 @@ struct zone { * rarely used fields: */ const char *name; -#ifdef CONFIG_MEMORY_ISOLATION - /* - * the number of MIGRATE_ISOLATE *pageblock*. - * We need this for free page counting. Look at zone_watermark_ok_safe. - * It's protected by zone->lock - */ - int nr_pageblock_isolate; -#endif } ____cacheline_internodealigned_in_smp; typedef enum { -- cgit v1.2.2 From 53a59fc67f97374758e63a9c785891ec62324c81 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 4 Jan 2013 15:35:12 -0800 Subject: mm: limit mmu_gather batching to fix soft lockups on !CONFIG_PREEMPT Since commit e303297e6c3a ("mm: extended batches for generic mmu_gather") we are batching pages to be freed until either tlb_next_batch cannot allocate a new batch or we are done. This works just fine most of the time but we can get in troubles with non-preemptible kernel (CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY) on large machines where too aggressive batching might lead to soft lockups during process exit path (exit_mmap) because there are no scheduling points down the free_pages_and_swap_cache path and so the freeing can take long enough to trigger the soft lockup. The lockup is harmless except when the system is setup to panic on softlockup which is not that unusual. The simplest way to work around this issue is to limit the maximum number of batches in a single mmu_gather. 10k of collected pages should be safe to prevent from soft lockups (we would have 2ms for one) even if they are all freed without an explicit scheduling point. This patch doesn't add any new explicit scheduling points because it relies on zap_pmd_range during page tables zapping which calls cond_resched per PMD. The following lockup has been reported for 3.0 kernel with a huge process (in order of hundreds gigs but I do know any more details). BUG: soft lockup - CPU#56 stuck for 22s! [kernel:31053] Modules linked in: af_packet nfs lockd fscache auth_rpcgss nfs_acl sunrpc mptctl mptbase autofs4 binfmt_misc dm_round_robin dm_multipath bonding cpufreq_conservative cpufreq_userspace cpufreq_powersave pcc_cpufreq mperf microcode fuse loop osst sg sd_mod crc_t10dif st qla2xxx scsi_transport_fc scsi_tgt netxen_nic i7core_edac iTCO_wdt joydev e1000e serio_raw pcspkr edac_core iTCO_vendor_support acpi_power_meter rtc_cmos hpwdt hpilo button container usbhid hid dm_mirror dm_region_hash dm_log linear uhci_hcd ehci_hcd usbcore usb_common scsi_dh_emc scsi_dh_alua scsi_dh_hp_sw scsi_dh_rdac scsi_dh dm_snapshot pcnet32 mii edd dm_mod raid1 ext3 mbcache jbd fan thermal processor thermal_sys hwmon cciss scsi_mod Supported: Yes CPU 56 Pid: 31053, comm: kernel Not tainted 3.0.31-0.9-default #1 HP ProLiant DL580 G7 RIP: 0010: _raw_spin_unlock_irqrestore+0x8/0x10 RSP: 0018:ffff883ec1037af0 EFLAGS: 00000206 RAX: 0000000000000e00 RBX: ffffea01a0817e28 RCX: ffff88803ffd9e80 RDX: 0000000000000200 RSI: 0000000000000206 RDI: 0000000000000206 RBP: 0000000000000002 R08: 0000000000000001 R09: ffff887ec724a400 R10: 0000000000000000 R11: dead000000200200 R12: ffffffff8144c26e R13: 0000000000000030 R14: 0000000000000297 R15: 000000000000000e FS: 00007ed834282700(0000) GS:ffff88c03f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000068b240 CR3: 0000003ec13c5000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kernel (pid: 31053, threadinfo ffff883ec1036000, task ffff883ebd5d4100) Call Trace: release_pages+0xc5/0x260 free_pages_and_swap_cache+0x9d/0xc0 tlb_flush_mmu+0x5c/0x80 tlb_finish_mmu+0xe/0x50 exit_mmap+0xbd/0x120 mmput+0x49/0x120 exit_mm+0x122/0x160 do_exit+0x17a/0x430 do_group_exit+0x3d/0xb0 get_signal_to_deliver+0x247/0x480 do_signal+0x71/0x1b0 do_notify_resume+0x98/0xb0 int_signal+0x12/0x17 DWARF2 unwinder stuck at int_signal+0x12/0x17 Signed-off-by: Michal Hocko Cc: [3.0+] Cc: Mel Gorman Cc: Rik van Riel Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/tlb.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index ed6642ad03e0..25f01d0bc149 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -78,6 +78,14 @@ struct mmu_gather_batch { #define MAX_GATHER_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) +/* + * Limit the maximum number of mmu_gather batches to reduce a risk of soft + * lockups for non-preemptible kernels on huge machines when a lot of memory + * is zapped during unmapping. + * 10K pages freed at once should be safe even without a preemption point. + */ +#define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) + /* struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ @@ -96,6 +104,7 @@ struct mmu_gather { struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; + unsigned int batch_count; }; #define HAVE_GENERIC_MMU_GATHER -- cgit v1.2.2 From 901593f2bf221659a605bdc1dcb11376ea934163 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 19 Dec 2012 16:51:06 +0000 Subject: drm: Only evict the blocks required to create the requested hole Avoid clobbering adjacent blocks if they happen to expire earlier and amalgamate together to form the requested hole. In passing this fixes a regression from commit ea7b1dd44867e9cd6bac67e7c9fc3f128b5b255c Author: Daniel Vetter Date: Fri Feb 18 17:59:12 2011 +0100 drm: mm: track free areas implicitly which swaps the end address for size (with a potential overflow) and effectively causes the eviction code to clobber almost all earlier buffers above the evictee. v2: Check the original hole not the adjusted as the coloring may confuse us when later searching for the overlapping nodes. Also make sure that we do apply the range restriction and color adjustment in the same order for both scanning, searching and insertion. v3: Send the version that was actually tested. Note that this seems to be ducttape of decent quality ot paper over some of our unbind related gpu hangs reported since 3.7. It is not fully effective though, and certainly doesn't fix the underlying bug. Signed-off-by: Chris Wilson Cc: Daniel Vetter [danvet: Added note plus bugzilla link and tested-by.] Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55984 Tested-by: Norbert Preining Acked-by: Dave Airlie --- include/drm/drm_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 0f4a366f6fa6..3527fb3f75bb 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -70,7 +70,7 @@ struct drm_mm { unsigned long scan_color; unsigned long scan_size; unsigned long scan_hit_start; - unsigned scan_hit_size; + unsigned long scan_hit_end; unsigned scanned_blocks; unsigned long scan_start; unsigned long scan_end; -- cgit v1.2.2 From 08c097fc3bb283299a6915a6a3795edab85979b1 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 9 Jan 2013 14:16:30 +0000 Subject: cred: Remove tgcred pointer from struct cred Commit 3a50597de863 ("KEYS: Make the session and process keyrings per-thread") removed the definition of the thread_group_cred structure, but left a now unused pointer in struct cred. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index abb2cd50f6b2..04421e825365 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -128,7 +128,6 @@ struct cred { struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ - struct thread_group_cred *tgcred; /* thread-group shared credentials */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ -- cgit v1.2.2 From 54b956b903607f8f8878754dd4352da6a54a1da2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Jan 2013 10:57:01 -0800 Subject: Remove __dev* markings from init.h Now that all in-kernel users of __dev* are gone, let's remove them from init.h to keep them from popping up again and again. Thanks to Bill Pemberton for doing all of the hard work to make removal of this possible. Cc: Bill Pemberton Cc: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- include/linux/init.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index a799273714ac..10ed4f436458 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -93,14 +93,6 @@ #define __exit __section(.exit.text) __exitused __cold notrace -/* Used for HOTPLUG, but that is always enabled now, so just make them noops */ -#define __devinit -#define __devinitdata -#define __devinitconst -#define __devexit -#define __devexitdata -#define __devexitconst - /* Used for HOTPLUG_CPU */ #define __cpuinit __section(.cpuinit.text) __cold notrace #define __cpuinitdata __section(.cpuinit.data) @@ -337,18 +329,6 @@ void __init parse_early_options(char *cmdline); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ -/* Functions marked as __devexit may be discarded at kernel link time, depending - on config options. Newer versions of binutils detect references from - retained sections to discarded sections and flag an error. Pointers to - __devexit functions must use __devexit_p(function_name), the wrapper will - insert either the function_name or NULL, depending on the config options. - */ -#if defined(MODULE) || defined(CONFIG_HOTPLUG) -#define __devexit_p(x) x -#else -#define __devexit_p(x) NULL -#endif - #ifdef MODULE #define __exit_p(x) x #else -- cgit v1.2.2 From 47ecfcb7d01418fcbfbc75183ba5e28e98b667b2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 11 Jan 2013 09:27:01 +0000 Subject: mm: compaction: Partially revert capture of suitable high-order page Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when waiting for POLLIN on a local TCP socket. It was easier to trigger if there was disk IO and dirty pages at the same time and he bisected it to commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). The intention of that patch was to improve high-order allocations under memory pressure after changes made to reclaim in 3.6 drastically hurt THP allocations but the approach was flawed. For Eric, the problem was that page->pfmemalloc was not being cleared for captured pages leading to a poor interaction with swap-over-NFS support causing the packets to be dropped. However, I identified a few more problems with the patch including the fact that it can increase contention on zone->lock in some cases which could result in async direct compaction being aborted early. In retrospect the capture patch took the wrong approach. What it should have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it was allocating for THP and avoided races that way. While the patch was showing to improve allocation success rates at the time, the benefit is marginal given the relative complexity and it should be revisited from scratch in the context of the other reclaim-related changes that have taken place since the patch was first written and tested. This patch partially reverts commit 1fb3f8ca "mm: compaction: capture a suitable high-order page immediately when it is made available". Reported-and-tested-by: Eric Wong Tested-by: Eric Dumazet Cc: stable@vger.kernel.org Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 ++-- include/linux/mm.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 6ecb6dc2f303..cc7bddeaf553 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - bool sync, bool *contended, struct page **page); + bool sync, bool *contended); extern int compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 63204078f72b..66e2f7c61e5c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); int split_free_page(struct page *page); -int capture_free_page(struct page *page, int alloc_order, int migratetype); /* * Compound pages have a destructor function. Provide a -- cgit v1.2.2 From 896f97ea95c1d29c0520ee0766b66b7f64cb967c Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Fri, 11 Jan 2013 14:31:36 -0800 Subject: lib: cpu_rmap: avoid flushing all workqueues In some cases, free_irq_cpu_rmap() is called while holding a lock (eg rtnl). This can lead to deadlocks, because it invokes flush_scheduled_work() which ends up waiting for whole system workqueue to flush, but some pending works might try to acquire the lock we are already holding. This commit uses reference-counting to replace irq_run_affinity_notifiers(). It also removes irq_run_affinity_notifiers() altogether. [akpm@linux-foundation.org: eliminate free_cpu_rmap, rename cpu_rmap_reclaim() to cpu_rmap_release(), propagate kref_put() retval from cpu_rmap_put()] Signed-off-by: David Decotigny Reviewed-by: Ben Hutchings Acked-by: Eric Dumazet Reviewed-by: Josh Triplett Cc: "David S. Miller" Cc: Or Gerlitz Acked-by: Amir Vadai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu_rmap.h | 13 ++++--------- include/linux/interrupt.h | 5 ----- 2 files changed, 4 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index ac3bbb5b9502..1739510d8994 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -13,9 +13,11 @@ #include #include #include +#include /** * struct cpu_rmap - CPU affinity reverse-map + * @refcount: kref for object * @size: Number of objects to be reverse-mapped * @used: Number of objects added * @obj: Pointer to array of object pointers @@ -23,6 +25,7 @@ * based on affinity masks */ struct cpu_rmap { + struct kref refcount; u16 size, used; void **obj; struct { @@ -33,15 +36,7 @@ struct cpu_rmap { #define CPU_RMAP_DIST_INF 0xffff extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); - -/** - * free_cpu_rmap - free CPU affinity reverse-map - * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL - */ -static inline void free_cpu_rmap(struct cpu_rmap *rmap) -{ - kfree(rmap); -} +extern int cpu_rmap_put(struct cpu_rmap *rmap); extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj); extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5e4e6170f43a..5fa5afeeb759 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -268,11 +268,6 @@ struct irq_affinity_notify { extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); -static inline void irq_run_affinity_notifiers(void) -{ - flush_scheduled_work(); -} - #else /* CONFIG_SMP */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) -- cgit v1.2.2 From 1b963c81b14509e330e0fe3218b645ece2738dc5 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 11 Jan 2013 14:31:56 -0800 Subject: lockdep, rwsem: provide down_write_nest_lock() down_write_nest_lock() provides a means to annotate locking scenario where an outer lock is guaranteed to serialize the order nested locks are being acquired. This is analogoue to already existing mutex_lock_nest_lock() and spin_lock_nest_lock(). Signed-off-by: Jiri Kosina Cc: Rik van Riel Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Mel Gorman Tested-by: Sedat Dilek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 3 +++ include/linux/rwsem.h | 9 +++++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 00e46376e28f..2bca44b0893c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -524,14 +524,17 @@ static inline void print_irqtrace_events(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCK_ALLOC # ifdef CONFIG_PROVE_LOCKING # define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i) +# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) # define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, NULL, i) # else # define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i) +# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) # define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, NULL, i) # endif # define rwsem_release(l, n, i) lock_release(l, n, i) #else # define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_nest(l, s, t, n, i) do { } while (0) # define rwsem_acquire_read(l, s, t, i) do { } while (0) # define rwsem_release(l, n, i) do { } while (0) #endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 54bd7cd7ecbd..413cc11e414a 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -125,8 +125,17 @@ extern void downgrade_write(struct rw_semaphore *sem); */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); +extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); + +# define down_write_nest_lock(sem, nest_lock) \ +do { \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ + _down_write_nest_lock(sem, &(nest_lock)->dep_map); \ +} while (0); + #else # define down_read_nested(sem, subclass) down_read(sem) +# define down_write_nest_lock(sem, nest_lock) down_read(sem) # define down_write_nested(sem, subclass) down_write(sem) #endif -- cgit v1.2.2 From 7b9205bd775afc4439ed86d617f9042ee9e76a71 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Jan 2013 14:32:05 -0800 Subject: audit: create explicit AUDIT_SECCOMP event type The seccomp path was using AUDIT_ANOM_ABEND from when seccomp mode 1 could only kill a process. While we still want to make sure an audit record is forced on a kill, this should use a separate record type since seccomp mode 2 introduces other behaviors. In the case of "handled" behaviors (process wasn't killed), only emit a record if the process is under inspection. This change also fixes userspace examination of seccomp audit events, since it was considered malformed due to missing fields of the AUDIT_ANOM_ABEND event type. Signed-off-by: Kees Cook Cc: Al Viro Cc: Eric Paris Cc: Jeff Layton Cc: "Eric W. Biederman" Cc: Julien Tinnes Acked-by: Will Drewry Acked-by: Steve Grubb Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 3 ++- include/uapi/linux/audit.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index bce729afbcf9..9d5104d7aba9 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -157,7 +157,8 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (unlikely(!audit_dummy_context())) + /* Force a record to be reported if a signal was delivered. */ + if (signr || unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 76352ac45f24..09a2d94ab113 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -106,6 +106,7 @@ #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ #define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ #define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */ +#define AUDIT_SECCOMP 1326 /* Secure Computing event */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ -- cgit v1.2.2 From c0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 11 Jan 2013 14:32:13 -0800 Subject: linux/audit.h: move ptrace.h include to kernel header While the kernel internals want pt_regs (and so it includes linux/ptrace.h), the user version of audit.h does not need it. So move the include out of the uapi version. This avoids issues where people want the audit defines and userland ptrace api. Including both the kernel ptrace and the userland ptrace headers can easily lead to failure. Signed-off-by: Mike Frysinger Cc: Eric Paris Cc: Al Viro Reviewed-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 1 + include/uapi/linux/audit.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 9d5104d7aba9..5a6d718adf34 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -24,6 +24,7 @@ #define _LINUX_AUDIT_H_ #include +#include #include struct audit_sig_info { diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 09a2d94ab113..9f096f1c0907 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -26,7 +26,6 @@ #include #include -#include /* The netlink messages for the audit system is divided into blocks: * 1000 - 1099 are for commanding the audit system -- cgit v1.2.2 From 8fb74b9fb2b182d54beee592350d9ea1f325917a Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 11 Jan 2013 14:32:16 -0800 Subject: mm: compaction: partially revert capture of suitable high-order page Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when waiting for POLLIN on a local TCP socket. It was easier to trigger if there was disk IO and dirty pages at the same time and he bisected it to commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). The intention of that patch was to improve high-order allocations under memory pressure after changes made to reclaim in 3.6 drastically hurt THP allocations but the approach was flawed. For Eric, the problem was that page->pfmemalloc was not being cleared for captured pages leading to a poor interaction with swap-over-NFS support causing the packets to be dropped. However, I identified a few more problems with the patch including the fact that it can increase contention on zone->lock in some cases which could result in async direct compaction being aborted early. In retrospect the capture patch took the wrong approach. What it should have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it was allocating for THP and avoided races that way. While the patch was showing to improve allocation success rates at the time, the benefit is marginal given the relative complexity and it should be revisited from scratch in the context of the other reclaim-related changes that have taken place since the patch was first written and tested. This patch partially reverts commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page immediately when it is made available"). Reported-and-tested-by: Eric Wong Tested-by: Eric Dumazet Cc: Signed-off-by: Mel Gorman Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 4 ++-- include/linux/mm.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 6ecb6dc2f303..cc7bddeaf553 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - bool sync, bool *contended, struct page **page); + bool sync, bool *contended); extern int compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 63204078f72b..66e2f7c61e5c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); int split_free_page(struct page *page); -int capture_free_page(struct page *page, int alloc_order, int migratetype); /* * Compound pages have a destructor function. Provide a -- cgit v1.2.2 From 3cb7a56344ca45ee56d71c5f8fe9f922306bff1f Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Fri, 11 Jan 2013 14:32:20 -0800 Subject: lib/rbtree.c: avoid the use of non-static __always_inline lib/rbtree.c declared __rb_erase_color() as __always_inline void, and then exported it with EXPORT_SYMBOL. This was because __rb_erase_color() must be exported for augmented rbtree users, but it must also be inlined into rb_erase() so that the dummy callback can get optimized out of that call site. (Actually with a modern compiler, none of the dummy callback functions should even be generated as separate text functions). The above usage is legal C, but it was unusual enough for some compilers to warn about it. This change makes things more explicit, with a static __always_inline ____rb_erase_color function for use in rb_erase(), and a separate non-inline __rb_erase_color function for use in rb_erase_augmented call sites. Signed-off-by: Michel Lespinasse Reported-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 2ac60c9cf644..fea49b5da12a 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -123,9 +123,9 @@ __rb_change_child(struct rb_node *old, struct rb_node *new, extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); -static __always_inline void -rb_erase_augmented(struct rb_node *node, struct rb_root *root, - const struct rb_augment_callbacks *augment) +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) { struct rb_node *child = node->rb_right, *tmp = node->rb_left; struct rb_node *parent, *rebalance; @@ -217,6 +217,14 @@ rb_erase_augmented(struct rb_node *node, struct rb_root *root, } augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } -- cgit v1.2.2 From d07d7507bfb4e23735c9b83e397c43e1e8a173e8 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 10 Jan 2013 23:19:10 +0000 Subject: net, wireless: overwrite default_ethtool_ops Since: commit 2c60db037034d27f8c636403355d52872da92f81 Author: Eric Dumazet Date: Sun Sep 16 09:17:26 2012 +0000 net: provide a default dev->ethtool_ops wireless core does not correctly assign ethtool_ops. After alloc_netdev*() call, some cfg80211 drivers provide they own ethtool_ops, but some do not. For them, wireless core provide generic cfg80211_ethtool_ops, which is assigned in NETDEV_REGISTER notify call: if (!dev->ethtool_ops) dev->ethtool_ops = &cfg80211_ethtool_ops; But after Eric's commit, dev->ethtool_ops is no longer NULL (on cfg80211 drivers without custom ethtool_ops), but points to &default_ethtool_ops. In order to fix the problem, provide function which will overwrite default_ethtool_ops and use it by wireless core. Signed-off-by: Stanislaw Gruszka Acked-by: Johannes Berg Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c599e4782d45..9ef07d0868b6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -60,6 +60,9 @@ struct wireless_dev; #define SET_ETHTOOL_OPS(netdev,ops) \ ( (netdev)->ethtool_ops = (ops) ) +extern void netdev_set_default_ethtool_ops(struct net_device *dev, + const struct ethtool_ops *ops); + /* hardware address assignment types */ #define NET_ADDR_PERM 0 /* address is permanent (default) */ #define NET_ADDR_RANDOM 1 /* address is generated randomly */ -- cgit v1.2.2 From 1e47ee8367babe6a5e8adf44a714c7086657b87e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 10 Jan 2013 16:12:01 +0100 Subject: netfilter: nf_conntrack: fix BUG_ON while removing nf_conntrack with netns canqun zhang reported that we're hitting BUG_ON in the nf_conntrack_destroy path when calling kfree_skb while rmmod'ing the nf_conntrack module. Currently, the nf_ct_destroy hook is being set to NULL in the destroy path of conntrack.init_net. However, this is a problem since init_net may be destroyed before any other existing netns (we cannot assume any specific ordering while releasing existing netns according to what I read in recent emails). Thanks to Gao feng for initial patch to address this issue. Reported-by: canqun zhang Acked-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index d8f5b9f52169..e98aeb3da033 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -31,6 +31,8 @@ extern void nf_conntrack_cleanup(struct net *net); extern int nf_conntrack_proto_init(struct net *net); extern void nf_conntrack_proto_fini(struct net *net); +extern void nf_conntrack_cleanup_end(void); + extern bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, -- cgit v1.2.2 From 5dbbaf2de89613d19a9286d4db0a535ca2735d26 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 14 Jan 2013 07:12:19 +0000 Subject: tun: fix LSM/SELinux labeling of tun/tap devices This patch corrects some problems with LSM/SELinux that were introduced with the multiqueue patchset. The problem stems from the fact that the multiqueue work changed the relationship between the tun device and its associated socket; before the socket persisted for the life of the device, however after the multiqueue changes the socket only persisted for the life of the userspace connection (fd open). For non-persistent devices this is not an issue, but for persistent devices this can cause the tun device to lose its SELinux label. We correct this problem by adding an opaque LSM security blob to the tun device struct which allows us to have the LSM security state, e.g. SELinux labeling information, persist for the lifetime of the tun device. In the process we tweak the LSM hooks to work with this new approach to TUN device/socket labeling and introduce a new LSM hook, security_tun_dev_attach_queue(), to approve requests to attach to a TUN queue via TUNSETQUEUE. The SELinux code has been adjusted to match the new LSM hooks, the other LSMs do not make use of the LSM TUN controls. This patch makes use of the recently added "tun_socket:attach_queue" permission to restrict access to the TUNSETQUEUE operation. On older SELinux policies which do not define the "tun_socket:attach_queue" permission the access control decision for TUNSETQUEUE will be handled according to the SELinux policy's unknown permission setting. Signed-off-by: Paul Moore Acked-by: Eric Paris Tested-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/security.h | 59 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 0f6afc657f77..eee7478cda70 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -989,17 +989,29 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * tells the LSM to decrement the number of secmark labeling rules loaded * @req_classify_flow: * Sets the flow's sid to the openreq sid. + * @tun_dev_alloc_security: + * This hook allows a module to allocate a security structure for a TUN + * device. + * @security pointer to a security structure pointer. + * Returns a zero on success, negative values on failure. + * @tun_dev_free_security: + * This hook allows a module to free the security structure for a TUN + * device. + * @security pointer to the TUN device's security structure * @tun_dev_create: * Check permissions prior to creating a new TUN device. - * @tun_dev_post_create: - * This hook allows a module to update or allocate a per-socket security - * structure. - * @sk contains the newly created sock structure. + * @tun_dev_attach_queue: + * Check permissions prior to attaching to a TUN device queue. + * @security pointer to the TUN device's security structure. * @tun_dev_attach: - * Check permissions prior to attaching to a persistent TUN device. This - * hook can also be used by the module to update any security state + * This hook can be used by the module to update any security state * associated with the TUN device's sock structure. * @sk contains the existing sock structure. + * @security pointer to the TUN device's security structure. + * @tun_dev_open: + * This hook can be used by the module to update any security state + * associated with the TUN device's security structure. + * @security pointer to the TUN devices's security structure. * * Security hooks for XFRM operations. * @@ -1620,9 +1632,12 @@ struct security_operations { void (*secmark_refcount_inc) (void); void (*secmark_refcount_dec) (void); void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); - int (*tun_dev_create)(void); - void (*tun_dev_post_create)(struct sock *sk); - int (*tun_dev_attach)(struct sock *sk); + int (*tun_dev_alloc_security) (void **security); + void (*tun_dev_free_security) (void *security); + int (*tun_dev_create) (void); + int (*tun_dev_attach_queue) (void *security); + int (*tun_dev_attach) (struct sock *sk, void *security); + int (*tun_dev_open) (void *security); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2566,9 +2581,12 @@ void security_inet_conn_established(struct sock *sk, int security_secmark_relabel_packet(u32 secid); void security_secmark_refcount_inc(void); void security_secmark_refcount_dec(void); +int security_tun_dev_alloc_security(void **security); +void security_tun_dev_free_security(void *security); int security_tun_dev_create(void); -void security_tun_dev_post_create(struct sock *sk); -int security_tun_dev_attach(struct sock *sk); +int security_tun_dev_attach_queue(void *security); +int security_tun_dev_attach(struct sock *sk, void *security); +int security_tun_dev_open(void *security); #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct sock *sock, @@ -2733,16 +2751,31 @@ static inline void security_secmark_refcount_dec(void) { } +static inline int security_tun_dev_alloc_security(void **security) +{ + return 0; +} + +static inline void security_tun_dev_free_security(void *security) +{ +} + static inline int security_tun_dev_create(void) { return 0; } -static inline void security_tun_dev_post_create(struct sock *sk) +static inline int security_tun_dev_attach_queue(void *security) +{ + return 0; +} + +static inline int security_tun_dev_attach(struct sock *sk, void *security) { + return 0; } -static inline int security_tun_dev_attach(struct sock *sk) +static inline int security_tun_dev_open(void *security) { return 0; } -- cgit v1.2.2