From 252b3e8c1bc0c2b20348ae87d67efcd0a8209f72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 11 Dec 2012 04:07:42 +0000 Subject: netfilter: xt_CT: fix crash while destroy ct templates In (d871bef netfilter: ctnetlink: dump entries from the dying and unconfirmed lists), we assume that all conntrack objects are inserted in any of the existing lists. However, template conntrack objects were not. This results in hitting BUG_ON in the destroy_conntrack path while removing a rule that uses the CT target. This patch fixes the situation by adding the template lists, which is where template conntrack objects reside now. Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index a1d83cc8bf85..923cb20051ed 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -71,6 +71,7 @@ struct netns_ct { struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; struct hlist_nulls_head dying; + struct hlist_nulls_head tmpl; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; -- cgit v1.2.2 From b45305fce5bb1abec263fcff9d81ebecd6306ede Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 17 Dec 2012 16:21:27 +0100 Subject: drm/i915: Implement workaround for broken CS tlb on i830/845 Now that Chris Wilson demonstrated that the key for stability on early gen 2 is to simple _never_ exchange the physical backing storage of batch buffers I've tried a stab at a kernel solution. Doesn't look too nefarious imho, now that I don't try to be too clever for my own good any more. v2: After discussing the various techniques, we've decided to always blit batches on the suspect devices, but allow userspace to opt out of the kernel workaround assume full responsibility for providing coherent batches. The principal reason is that avoiding the blit does improve performance in a few key microbenchmarks and also in cairo-trace replays. Signed-Off-by: Daniel Vetter Signed-off-by: Chris Wilson [danvet: - Drop the hunk which uses HAS_BROKEN_CS_TLB to implement the ring wrap w/a. Suggested by Chris Wilson. - Also add the ACTHD check from Chris Wilson for the error state dumping, so that we still catch batches when userspace opts out of the w/a.] Signed-off-by: Daniel Vetter --- include/uapi/drm/i915_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b746a3cf5fa9..c4d2e9c74002 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -307,6 +307,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 +#define I915_PARAM_HAS_PINNED_BATCHES 24 typedef struct drm_i915_getparam { int param; @@ -677,6 +678,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_SECURE (1<<9) +/** Inform the kernel that the batch is and will always be pinned. This + * negates the requirement for a workaround to be performed to avoid + * an incoherent CS (such as can be found on 830/845). If this flag is + * not passed, the kernel will endeavour to make sure the batch is + * coherent with the CS before execution. If this flag is passed, + * userspace assumes the responsibility for ensuring the same. + */ +#define I915_EXEC_IS_PINNED (1<<10) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK -- cgit v1.2.2 From b81034506fc9b879cb726feb01342be0cdbe6e25 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Dec 2012 20:37:06 +0000 Subject: drm: Export routines for inserting preallocated nodes into the mm manager Required by i915 in order to avoid the allocation in the middle of manipulating the drm_mm lists. Use a pair of stubs to preserve the existing EXPORT_SYMBOLs for backporting; to be removed later. Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula [danvet: bikeshedded-away the atomic parameter, it's not yet used anywhere.] Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- include/drm/drm_mm.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 06d7f798a08c..0f4a366f6fa6 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -158,12 +158,29 @@ static inline struct drm_mm_node *drm_mm_get_block_atomic_range( return drm_mm_get_block_range_generic(parent, size, alignment, 0, start, end, 1); } -extern int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment); + +extern int drm_mm_insert_node(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment); extern int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment, - unsigned long start, unsigned long end); + unsigned long size, + unsigned alignment, + unsigned long start, + unsigned long end); +extern int drm_mm_insert_node_generic(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment, + unsigned long color); +extern int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment, + unsigned long color, + unsigned long start, + unsigned long end); extern void drm_mm_put_block(struct drm_mm_node *cur); extern void drm_mm_remove_node(struct drm_mm_node *node); extern void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new); -- cgit v1.2.2 From 30e6c9fa93cf3dbc7cc6df1d748ad25e4264545a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Dec 2012 17:25:08 +0000 Subject: net: devnet_rename_seq should be a seqcount Using a seqlock for devnet_rename_seq is not a good idea, as device_rename() can sleep. As we hold RTNL, we dont need a protection for writers, and only need a seqcount so that readers can catch a change done by a writer. Bug added in commit c91f6df2db4972d3 (sockopt: Change getsockopt() of SO_BINDTODEVICE to return an interface name) Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Brian Haley Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 02e0f6b156c3..c599e4782d45 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1576,7 +1576,7 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern rwlock_t dev_base_lock; /* Device list lock */ -extern seqlock_t devnet_rename_seq; /* Device rename lock */ +extern seqcount_t devnet_rename_seq; /* Device rename seq */ #define for_each_netdev(net, d) \ -- cgit v1.2.2 From 10db9069eb5c60195170a4119bdbcbce69a4945f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Dec 2012 01:54:51 +0000 Subject: netfilter: xt_CT: recover NOTRACK target support Florian Westphal reported that the removal of the NOTRACK target (9655050 netfilter: remove xt_NOTRACK) is breaking some existing setups. That removal was scheduled for removal since long time ago as described in Documentation/feature-removal-schedule.txt What: xt_NOTRACK Files: net/netfilter/xt_NOTRACK.c When: April 2011 Why: Superseded by xt_CT Still, people may have not notice / may have decided to stick to an old iptables version. I agree with him in that some more conservative approach by spotting some printk to warn users for some time is less agressive. Current iptables 1.4.16.3 already contains the aliasing support that makes it point to the CT target, so upgrading would fix it. Still, the policy so far has been to avoid pushing our users to upgrade. As a solution, this patch recovers the NOTRACK target inside the CT target and it now spots a warning. Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/x_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 591db7d657a3..c24060ee411e 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -8,6 +8,7 @@ struct ebt_table; struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; + bool notrack_deprecated_warning; #if defined(CONFIG_BRIDGE_NF_EBTABLES) || \ defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE) struct ebt_table *broute_table; -- cgit v1.2.2 From 4520fb3c3690f2643006d85f09ecb74554c10e95 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Dec 2012 13:28:54 -0500 Subject: ext4: split off ext4_journalled_invalidatepage() In data=journal mode we don't need delalloc or DIO handling in invalidatepage and similarly in other modes we don't need the journal handling. So split invalidatepage implementations. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index f6372b011366..7e8c36bc7082 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -451,7 +451,7 @@ DEFINE_EVENT(ext4__page_op, ext4_releasepage, TP_ARGS(page) ); -TRACE_EVENT(ext4_invalidatepage, +DECLARE_EVENT_CLASS(ext4_invalidatepage_op, TP_PROTO(struct page *page, unsigned long offset), TP_ARGS(page, offset), @@ -477,6 +477,18 @@ TRACE_EVENT(ext4_invalidatepage, (unsigned long) __entry->index, __entry->offset) ); +DEFINE_EVENT(ext4_invalidatepage_op, ext4_invalidatepage, + TP_PROTO(struct page *page, unsigned long offset), + + TP_ARGS(page, offset) +); + +DEFINE_EVENT(ext4_invalidatepage_op, ext4_journalled_invalidatepage, + TP_PROTO(struct page *page, unsigned long offset), + + TP_ARGS(page, offset) +); + TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), -- cgit v1.2.2 From 53e872681fed6a43047e71bf927f77d06f467988 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Dec 2012 13:29:52 -0500 Subject: ext4: fix deadlock in journal_unmap_buffer() We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1be23d9fdacb..e30b66346942 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *, extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); extern void journal_sync_buffer (struct buffer_head *); -extern void jbd2_journal_invalidatepage(journal_t *, +extern int jbd2_journal_invalidatepage(journal_t *, struct page *, unsigned long); extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); extern int jbd2_journal_stop(handle_t *); -- cgit v1.2.2 From 08b60f8438879a84246d7debded31c9cb7aea6e4 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 24 Dec 2012 11:14:58 -0700 Subject: namei.h: include errno.h This solves: In file included from fs/ext3/symlink.c:20:0: include/linux/namei.h: In function 'retry_estale': include/linux/namei.h:114:19: error: 'ESTALE' undeclared (first use in this function) Signed-off-by: Stephen Warren Signed-off-by: Al Viro --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index e998c030061d..5a5ff57ceed4 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -2,6 +2,7 @@ #define _LINUX_NAMEI_H #include +#include #include #include -- cgit v1.2.2 From c876ad7682155958d0c9c27afe9017925c230d64 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 21 Dec 2012 20:27:12 -0800 Subject: pidns: Stop pid allocation when init dies Oleg pointed out that in a pid namespace the sequence. - pid 1 becomes a zombie - setns(thepidns), fork,... - reaping pid 1. - The injected processes exiting. Can lead to processes attempting access their child reaper and instead following a stale pointer. That waitpid for init can return before all of the processes in the pid namespace have exited is also unfortunate. Avoid these problems by disabling the allocation of new pids in a pid namespace when init dies, instead of when the last process in a pid namespace is reaped. Pointed-out-by: Oleg Nesterov Reviewed-by: Oleg Nesterov Signed-off-by: "Eric W. Biederman" --- include/linux/pid.h | 1 + include/linux/pid_namespace.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pid.h b/include/linux/pid.h index b152d44fb181..2381c973d897 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -121,6 +121,7 @@ int next_pidmap(struct pid_namespace *pid_ns, unsigned int last); extern struct pid *alloc_pid(struct pid_namespace *ns); extern void free_pid(struct pid *pid); +extern void disable_pid_allocation(struct pid_namespace *ns); /* * ns_of_pid() returns the pid namespace in which the specified pid was diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index bf285999273a..215e5e3dda10 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -21,7 +21,7 @@ struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; int last_pid; - int nr_hashed; + unsigned int nr_hashed; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; @@ -42,6 +42,8 @@ struct pid_namespace { extern struct pid_namespace init_pid_ns; +#define PIDNS_HASH_ADDING (1U << 31) + #ifdef CONFIG_PID_NS static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) { -- cgit v1.2.2 From 130f1b8f35f14d27c43da755f3c9226318c17f57 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 26 Dec 2012 10:39:23 -0700 Subject: PCI: Add PCIe Link Capability link speed and width names Add standard #defines for the Supported Link Speeds field in the PCIe Link Capabilities register. Note that prior to PCIe spec r3.0, these encodings were defined: 0001b 2.5GT/s Link speed supported 0010b 5.0GT/s and 2.5GT/s Link speed supported Starting with spec r3.0, these encodings refer to bits 0 and 1 in the Supported Link Speeds Vector in the Link Capabilities 2 register, and bits 0 and 1 there mean 2.5 GT/s and 5.0 GT/s, respectively. Therefore, code that followed r2.0 and interpreted 0x1 as 2.5GT/s and 0x2 as 5.0GT/s will continue to work, and we can identify a device using the new encodings because it will have a non-zero Link Capabilities 2 register. Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 6b7b6f1e2fd6..ebfadc56d1b4 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -458,6 +458,8 @@ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_SLS_2_5GB 0x1 /* LNKCAP2 SLS Vector bit 0 (2.5GT/s) */ +#define PCI_EXP_LNKCAP_SLS_5_0GB 0x2 /* LNKCAP2 SLS Vector bit 1 (5.0GT/s) */ #define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ #define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ #define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ -- cgit v1.2.2 From 812089e01b9f65f90fc8fc670d8cce72a0e01fbb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 1 Dec 2012 12:37:20 -0800 Subject: PCI: Reduce Ricoh 0xe822 SD card reader base clock frequency to 50MHz Otherwise it fails like this on cards like the Transcend 16GB SDHC card: mmc0: new SDHC card at address b368 mmcblk0: mmc0:b368 SDC 15.0 GiB mmcblk0: error -110 sending status command, retrying mmcblk0: error -84 transferring data, sector 0, nr 8, cmd response 0x900, card status 0xb0 Tested on my Lenovo x200 laptop. [bhelgaas: changelog] Signed-off-by: Andy Lutomirski Signed-off-by: Bjorn Helgaas Acked-by: Chris Ball CC: Manoj Iyer CC: stable@vger.kernel.org --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f8447376ddb..0eb65796bcb9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1568,6 +1568,7 @@ #define PCI_DEVICE_ID_RICOH_RL5C476 0x0476 #define PCI_DEVICE_ID_RICOH_RL5C478 0x0478 #define PCI_DEVICE_ID_RICOH_R5C822 0x0822 +#define PCI_DEVICE_ID_RICOH_R5CE822 0xe822 #define PCI_DEVICE_ID_RICOH_R5CE823 0xe823 #define PCI_DEVICE_ID_RICOH_R5C832 0x0832 #define PCI_DEVICE_ID_RICOH_R5C843 0x0843 -- cgit v1.2.2 From 3d0dcfbd8fa2a1e63fabb5f8edac8b8a27860d98 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 25 Dec 2012 20:48:24 +0000 Subject: netprio_cgroup: define sk_cgrp_prioidx only if NETPRIO_CGROUP is enabled sock->sk_cgrp_prioidx won't be used at all if CONFIG_NETPRIO_CGROUP=n. Signed-off-by: Li Zefan Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 93a6745bfdb2..182ca99405ad 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -367,7 +367,7 @@ struct sock { unsigned short sk_ack_backlog; unsigned short sk_max_ack_backlog; __u32 sk_priority; -#ifdef CONFIG_CGROUPS +#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) __u32 sk_cgrp_prioidx; #endif struct pid *sk_peer_pid; -- cgit v1.2.2 From ad4b3fb7ff9940bcdb1e4cd62bd189d10fa636ba Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 21 Dec 2012 13:03:50 -0500 Subject: mm: Fix PageHead when !CONFIG_PAGEFLAGS_EXTENDED Unfortunately with !CONFIG_PAGEFLAGS_EXTENDED, (!PageHead) is false, and (PageHead) is true, for tail pages. If this is indeed the intended behavior, which I doubt because it breaks cache cleaning on some ARM systems, then the nomenclature is highly problematic. This patch makes sure PageHead is only true for head pages and PageTail is only true for tail pages, and neither is true for non-compound pages. [ This buglet seems ancient - seems to have been introduced back in Apr 2008 in commit 6a1e7f777f61: "pageflags: convert to the use of new macros". And the reason nobody noticed is because the PageHead() tests are almost all about just sanity-checking, and only used on pages that are actual page heads. The fact that the old code returned true for tail pages too was thus not really noticeable. - Linus ] Signed-off-by: Christoffer Dall Acked-by: Andrea Arcangeli Cc: Andrew Morton Cc: Will Deacon Cc: Steve Capper Cc: Christoph Lameter Cc: stable@kernel.org # 2.6.26+ Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b5d13841604e..70473da47b3f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -362,7 +362,7 @@ static inline void ClearPageCompound(struct page *page) * pages on the LRU and/or pagecache. */ TESTPAGEFLAG(Compound, compound) -__PAGEFLAG(Head, compound) +__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound) /* * PG_reclaim is used in combination with PG_compound to mark the @@ -374,8 +374,14 @@ __PAGEFLAG(Head, compound) * PG_compound & PG_reclaim => Tail page * PG_compound & ~PG_reclaim => Head page */ +#define PG_head_mask ((1L << PG_compound)) #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) +static inline int PageHead(struct page *page) +{ + return ((page->flags & PG_head_tail_mask) == PG_head_mask); +} + static inline int PageTail(struct page *page) { return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); -- cgit v1.2.2 From a7a88b23737095e6c18a20c5d4eef9e25ec5b829 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 2 Jan 2013 02:04:23 -0800 Subject: mempolicy: remove arg from mpol_parse_str, mpol_to_str Remove the unused argument (formerly no_context) from mpol_parse_str() and from mpol_to_str(). Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 9adc270de7ef..92bc9988a180 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -165,11 +165,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, #ifdef CONFIG_TMPFS -extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context); +extern int mpol_parse_str(char *str, struct mempolicy **mpol); #endif -extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, - int no_context); +extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ static inline int vma_migratable(struct vm_area_struct *vma) @@ -296,15 +295,13 @@ static inline void check_highest_zone(int k) } #ifdef CONFIG_TMPFS -static inline int mpol_parse_str(char *str, struct mempolicy **mpol, - int no_context) +static inline int mpol_parse_str(char *str, struct mempolicy **mpol) { return 1; /* error */ } #endif -static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, - int no_context) +static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) { return 0; } -- cgit v1.2.2 From 42288fe366c4f1ce7522bc9f27d0bc2a81c55264 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 21 Dec 2012 23:10:25 +0000 Subject: mm: mempolicy: Convert shared_policy mutex to spinlock Sasha was fuzzing with trinity and reported the following problem: BUG: sleeping function called from invalid context at kernel/mutex.c:269 in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main 2 locks held by trinity-main/6361: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x1e4/0x4f0 #1: (&(&mm->page_table_lock)->rlock){+.+...}, at: [] handle_pte_fault+0x3f7/0x6a0 Pid: 6361, comm: trinity-main Tainted: G W 3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74 Call Trace: __might_sleep+0x1c3/0x1e0 mutex_lock_nested+0x29/0x50 mpol_shared_policy_lookup+0x2e/0x90 shmem_get_policy+0x2e/0x30 get_vma_policy+0x5a/0xa0 mpol_misplaced+0x41/0x1d0 handle_pte_fault+0x465/0x6a0 This was triggered by a different version of automatic NUMA balancing but in theory the current version is vunerable to the same problem. do_numa_page -> numa_migrate_prep -> mpol_misplaced -> get_vma_policy -> shmem_get_policy It's very unlikely this will happen as shared pages are not marked pte_numa -- see the page_mapcount() check in change_pte_range() -- but it is possible. To address this, this patch restores sp->lock as originally implemented by Kosaki Motohiro. In the path where get_vma_policy() is called, it should not be calling sp_alloc() so it is not necessary to treat the PTL specially. Signed-off-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 92bc9988a180..0d7df39a5885 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -123,7 +123,7 @@ struct sp_node { struct shared_policy { struct rb_root root; - struct mutex mutex; + spinlock_t lock; }; void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); -- cgit v1.2.2 From 3d33fcc11bdd11b6949cf5c406726a094395dc4f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Jan 2013 15:12:55 +0000 Subject: UAPI: Remove empty Kbuild files Empty files can get deleted by the patch program, so remove empty Kbuild files and their links from the parent Kbuilds. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/Kbuild | 3 --- include/linux/Kbuild | 5 ----- include/linux/hdlc/Kbuild | 0 include/linux/hsi/Kbuild | 0 include/linux/raid/Kbuild | 0 include/linux/usb/Kbuild | 0 include/rdma/Kbuild | 0 include/sound/Kbuild | 0 8 files changed, 8 deletions(-) delete mode 100644 include/linux/Kbuild delete mode 100644 include/linux/hdlc/Kbuild delete mode 100644 include/linux/hsi/Kbuild delete mode 100644 include/linux/raid/Kbuild delete mode 100644 include/linux/usb/Kbuild delete mode 100644 include/rdma/Kbuild delete mode 100644 include/sound/Kbuild (limited to 'include') diff --git a/include/Kbuild b/include/Kbuild index 83256b64166a..1dfd33e8d43b 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -1,8 +1,5 @@ # Top-level Makefile calls into asm-$(ARCH) # List only non-arch directories below -header-y += linux/ -header-y += sound/ -header-y += rdma/ header-y += video/ header-y += scsi/ diff --git a/include/linux/Kbuild b/include/linux/Kbuild deleted file mode 100644 index 7fe2dae251e5..000000000000 --- a/include/linux/Kbuild +++ /dev/null @@ -1,5 +0,0 @@ -header-y += dvb/ -header-y += hdlc/ -header-y += hsi/ -header-y += raid/ -header-y += usb/ diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/hsi/Kbuild b/include/linux/hsi/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/include/sound/Kbuild b/include/sound/Kbuild deleted file mode 100644 index e69de29bb2d1..000000000000 -- cgit v1.2.2 From f568f6ca811fe681ecfd11c4ce78b6aa488020c0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:02:05 -0800 Subject: pstore: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from the pstore filesystem. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: Anton Vorontsov Cc: Colin Cross Cc: Kees Cook Cc: Tony Luck Signed-off-by: Greg Kroah-Hartman --- include/linux/pstore_ram.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 098d2a838296..cb6ab5feab67 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -46,9 +46,8 @@ struct persistent_ram_zone { size_t old_log_size; }; -struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, - size_t size, u32 sig, - int ecc_size); +struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, + u32 sig, int ecc_size); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); -- cgit v1.2.2 From 0f58a01ddd5e8177255705ba15e64c3b74d67993 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:12:59 -0800 Subject: Drivers: bcma: remove __dev* attributes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit, __devexit_p, and __devexit from these drivers. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Cc: "Rafał Miłecki" Signed-off-by: Greg Kroah-Hartman --- include/linux/bcma/bcma_driver_gmac_cmn.h | 2 +- include/linux/bcma/bcma_driver_pci.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h index def894b83b0d..4dd1f33e36a2 100644 --- a/include/linux/bcma/bcma_driver_gmac_cmn.h +++ b/include/linux/bcma/bcma_driver_gmac_cmn.h @@ -92,7 +92,7 @@ struct bcma_drv_gmac_cmn { #define gmac_cmn_write32(gc, offset, val) bcma_write32((gc)->core, offset, val) #ifdef CONFIG_BCMA_DRIVER_GMAC_CMN -extern void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); +extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc); #else static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { } #endif diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 41da581e1612..c48d98d27b77 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -214,7 +214,7 @@ struct bcma_drv_pci { #define pcicore_write16(pc, offset, val) bcma_write16((pc)->core, offset, val) #define pcicore_write32(pc, offset, val) bcma_write32((pc)->core, offset, val) -extern void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc); +extern void bcma_core_pci_init(struct bcma_drv_pci *pc); extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc, struct bcma_device *core, bool enable); extern void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend); -- cgit v1.2.2 From e389623a68622e3c9be440ab522fac1aa1ca3454 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 21 Dec 2012 15:15:49 -0800 Subject: include: remove __dev* attributes. CONFIG_HOTPLUG is going away as an option. As a result, the __dev* markings need to be removed. This change removes the use of __devinit from some include files that were previously missed. Based on patches originally written by Bill Pemberton, but redone by me in order to handle some of the coding style issues better, by hand. Cc: Bill Pemberton Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/parport.h | 4 ++-- include/linux/ata_platform.h | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h index 40528cb977e8..2c9f9d4336ca 100644 --- a/include/asm-generic/parport.h +++ b/include/asm-generic/parport.h @@ -10,8 +10,8 @@ * to devices on the PCI bus. */ -static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); -static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) +static int parport_pc_find_isa_ports(int autoirq, int autodma); +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) { #ifdef CONFIG_ISA return parport_pc_find_isa_ports(autoirq, autodma); diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h index fe9989636b62..b9fde17f767c 100644 --- a/include/linux/ata_platform.h +++ b/include/linux/ata_platform.h @@ -15,12 +15,12 @@ struct pata_platform_info { unsigned int irq_flags; }; -extern int __devinit __pata_platform_probe(struct device *dev, - struct resource *io_res, - struct resource *ctl_res, - struct resource *irq_res, - unsigned int ioport_shift, - int __pio_mask); +extern int __pata_platform_probe(struct device *dev, + struct resource *io_res, + struct resource *ctl_res, + struct resource *irq_res, + unsigned int ioport_shift, + int __pio_mask); /* * Marvell SATA private data -- cgit v1.2.2 From d81aecb5e680311e1f3fd71e49e6a4072d2374d2 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Tue, 18 Dec 2012 02:30:17 +0900 Subject: drm/exynos: change file license to GPL This patch changes file license to GPL Most of exynos files had been copied from some random file and not updated correctly. So this patch corrects the file license. Signed-off-by: Inki Dae Signed-off-by: Kyungmin Park --- include/drm/exynos_drm.h | 22 ++++------------------ include/uapi/drm/exynos_drm.h | 22 ++++------------------ 2 files changed, 8 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h index 808dad29607a..d6aeaf3c6d6c 100644 --- a/include/drm/exynos_drm.h +++ b/include/drm/exynos_drm.h @@ -6,24 +6,10 @@ * Joonyoung Shim * Seung-Woo Kim * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. */ #ifndef _EXYNOS_DRM_H_ #define _EXYNOS_DRM_H_ diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index e7f52c334005..81354d0e7fd7 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -6,24 +6,10 @@ * Joonyoung Shim * Seung-Woo Kim * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. */ #ifndef _UAPI_EXYNOS_DRM_H_ -- cgit v1.2.2 From 4f21877cb8b9e9f85a541af43b9ee66d9006bb69 Mon Sep 17 00:00:00 2001 From: Eunchul Kim Date: Sat, 22 Dec 2012 17:49:24 +0900 Subject: drm/exynos: consider both case of vflip and hflip. This patch considers both case of vflip and hflip. If we want that the contents in buffer to be rotated to 180 degree, then we can use h,vflip or 180 degree. Changelog v2: - added EXYNOS_DRM_FLIP_BOTH enum value to avoid build warnning. Signed-off-by: Eunchul Kim Signed-off-by: Inki Dae --- include/uapi/drm/exynos_drm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h index 81354d0e7fd7..d5844122ff32 100644 --- a/include/uapi/drm/exynos_drm.h +++ b/include/uapi/drm/exynos_drm.h @@ -171,6 +171,8 @@ enum drm_exynos_flip { EXYNOS_DRM_FLIP_NONE = (0 << 0), EXYNOS_DRM_FLIP_VERTICAL = (1 << 0), EXYNOS_DRM_FLIP_HORIZONTAL = (1 << 1), + EXYNOS_DRM_FLIP_BOTH = EXYNOS_DRM_FLIP_VERTICAL | + EXYNOS_DRM_FLIP_HORIZONTAL, }; enum drm_exynos_degree { -- cgit v1.2.2 From 03f595668017f1a1fb971c02fc37140bc6e7bb1c Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:50 -0800 Subject: ipc: add sysctl to specify desired next object id Add 3 new variables and sysctls to tune them (by one "next_id" variable for messages, semaphores and shared memory respectively). This variable can be used to set desired id for next allocated IPC object. By default it's equal to -1 and old behaviour is preserved. If this variable is non-negative, then desired idr will be extracted from it and used as a start value to search for free IDR slot. Notes: 1) this patch doesn't guarantee that the new object will have desired id. So it's up to user space how to handle new object with wrong id. 2) After a sucessful id allocation attempt, "next_id" will be set back to -1 (if it was non-negative). [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index fe771978e877..ae221a7b5092 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -24,6 +24,7 @@ struct ipc_ids { unsigned short seq_max; struct rw_semaphore rw_mutex; struct idr ipcs_idr; + int next_id; }; struct ipc_namespace { -- cgit v1.2.2 From f9dd87f4738c7555aca2cdf8cb2b2326cafb0cad Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:52 -0800 Subject: ipc: message queue receive cleanup Move all message related manipulation into one function msg_fill(). Actually, two functions because of the compat one. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/msg.h b/include/linux/msg.h index 7a4b9e97d29a..fc5743a554e6 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -34,7 +34,8 @@ struct msg_queue { /* Helper routines for sys_msgsnd and sys_msgrcv */ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); -extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext, - size_t msgsz, long msgtyp, int msgflg); +extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, + int msgflg, + long (*msg_fill)(void __user *, struct msg_msg *, size_t)); #endif /* _LINUX_MSG_H */ -- cgit v1.2.2 From 4a674f34ba04a002244edaf891b5da7fc1473ae8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:55 -0800 Subject: ipc: introduce message queue copy feature This patch is required for checkpoint/restore in userspace. c/r requires some way to get all pending IPC messages without deleting them from the queue (checkpoint can fail and in this case tasks will be resumed, so queue have to be valid). To achive this, new operation flag MSG_COPY for sys_msgrcv() system call was introduced. If this flag was specified, then mtype is interpreted as number of the message to copy. If MSG_COPY is set, then kernel will allocate dummy message with passed size, and then use new copy_msg() helper function to copy desired message (instead of unlinking it from the queue). Notes: 1) Return -ENOSYS if MSG_COPY is specified, but CONFIG_CHECKPOINT_RESTORE is not set. Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/msg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h index 78dbd2f996a3..22d95c6854e0 100644 --- a/include/uapi/linux/msg.h +++ b/include/uapi/linux/msg.h @@ -10,6 +10,7 @@ /* msgrcv options */ #define MSG_NOERROR 010000 /* no error if message is too big */ #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/ +#define MSG_COPY 040000 /* copy (not remove) all queue messages */ /* Obsolete, used only for backwards compatibility and libc5 compiles */ struct msqid_ds { -- cgit v1.2.2 From 3a665531a3b7c2ad2c87903b24646be6916340e4 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:56 -0800 Subject: selftests: IPC message queue copy feature test This test can be used to check wheither kernel supports IPC message queue copy and restore features (required by CRIU project). Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/msg.h b/include/linux/msg.h index fc5743a554e6..391af8d11cce 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -36,6 +36,7 @@ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, size_t msgsz, int msgflg); extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, - long (*msg_fill)(void __user *, struct msg_msg *, size_t)); + long (*msg_fill)(void __user *, struct msg_msg *, + size_t)); #endif /* _LINUX_MSG_H */ -- cgit v1.2.2 From a458431e176ddb27e8ef8b98c2a681b217337393 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 4 Jan 2013 15:35:08 -0800 Subject: mm: fix zone_watermark_ok_safe() accounting of isolated pages Commit 702d1a6e0766 ("memory-hotplug: fix kswapd looping forever problem") added an isolated pageblocks counter (nr_pageblock_isolate in struct zone) and used it to adjust free pages counter in zone_watermark_ok_safe() to prevent kswapd looping forever problem. Then later, commit 2139cbe627b8 ("cma: fix counting of isolated pages") fixed accounting of isolated pages in global free pages counter. It made the previous zone_watermark_ok_safe() fix unnecessary and potentially harmful (cause now isolated pages may be accounted twice making free pages counter incorrect). This patch removes the special isolated pageblocks counter altogether which fixes zone_watermark_ok_safe() free pages check. Reported-by: Tomasz Stanislawski Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kyungmin Park Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Aaditya Kumar Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Michal Hocko Cc: Marek Szyprowski Cc: Michal Nazarewicz Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4bec5be82cab..73b64a38b984 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -503,14 +503,6 @@ struct zone { * rarely used fields: */ const char *name; -#ifdef CONFIG_MEMORY_ISOLATION - /* - * the number of MIGRATE_ISOLATE *pageblock*. - * We need this for free page counting. Look at zone_watermark_ok_safe. - * It's protected by zone->lock - */ - int nr_pageblock_isolate; -#endif } ____cacheline_internodealigned_in_smp; typedef enum { -- cgit v1.2.2 From 53a59fc67f97374758e63a9c785891ec62324c81 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 4 Jan 2013 15:35:12 -0800 Subject: mm: limit mmu_gather batching to fix soft lockups on !CONFIG_PREEMPT Since commit e303297e6c3a ("mm: extended batches for generic mmu_gather") we are batching pages to be freed until either tlb_next_batch cannot allocate a new batch or we are done. This works just fine most of the time but we can get in troubles with non-preemptible kernel (CONFIG_PREEMPT_NONE or CONFIG_PREEMPT_VOLUNTARY) on large machines where too aggressive batching might lead to soft lockups during process exit path (exit_mmap) because there are no scheduling points down the free_pages_and_swap_cache path and so the freeing can take long enough to trigger the soft lockup. The lockup is harmless except when the system is setup to panic on softlockup which is not that unusual. The simplest way to work around this issue is to limit the maximum number of batches in a single mmu_gather. 10k of collected pages should be safe to prevent from soft lockups (we would have 2ms for one) even if they are all freed without an explicit scheduling point. This patch doesn't add any new explicit scheduling points because it relies on zap_pmd_range during page tables zapping which calls cond_resched per PMD. The following lockup has been reported for 3.0 kernel with a huge process (in order of hundreds gigs but I do know any more details). BUG: soft lockup - CPU#56 stuck for 22s! [kernel:31053] Modules linked in: af_packet nfs lockd fscache auth_rpcgss nfs_acl sunrpc mptctl mptbase autofs4 binfmt_misc dm_round_robin dm_multipath bonding cpufreq_conservative cpufreq_userspace cpufreq_powersave pcc_cpufreq mperf microcode fuse loop osst sg sd_mod crc_t10dif st qla2xxx scsi_transport_fc scsi_tgt netxen_nic i7core_edac iTCO_wdt joydev e1000e serio_raw pcspkr edac_core iTCO_vendor_support acpi_power_meter rtc_cmos hpwdt hpilo button container usbhid hid dm_mirror dm_region_hash dm_log linear uhci_hcd ehci_hcd usbcore usb_common scsi_dh_emc scsi_dh_alua scsi_dh_hp_sw scsi_dh_rdac scsi_dh dm_snapshot pcnet32 mii edd dm_mod raid1 ext3 mbcache jbd fan thermal processor thermal_sys hwmon cciss scsi_mod Supported: Yes CPU 56 Pid: 31053, comm: kernel Not tainted 3.0.31-0.9-default #1 HP ProLiant DL580 G7 RIP: 0010: _raw_spin_unlock_irqrestore+0x8/0x10 RSP: 0018:ffff883ec1037af0 EFLAGS: 00000206 RAX: 0000000000000e00 RBX: ffffea01a0817e28 RCX: ffff88803ffd9e80 RDX: 0000000000000200 RSI: 0000000000000206 RDI: 0000000000000206 RBP: 0000000000000002 R08: 0000000000000001 R09: ffff887ec724a400 R10: 0000000000000000 R11: dead000000200200 R12: ffffffff8144c26e R13: 0000000000000030 R14: 0000000000000297 R15: 000000000000000e FS: 00007ed834282700(0000) GS:ffff88c03f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000068b240 CR3: 0000003ec13c5000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kernel (pid: 31053, threadinfo ffff883ec1036000, task ffff883ebd5d4100) Call Trace: release_pages+0xc5/0x260 free_pages_and_swap_cache+0x9d/0xc0 tlb_flush_mmu+0x5c/0x80 tlb_finish_mmu+0xe/0x50 exit_mmap+0xbd/0x120 mmput+0x49/0x120 exit_mm+0x122/0x160 do_exit+0x17a/0x430 do_group_exit+0x3d/0xb0 get_signal_to_deliver+0x247/0x480 do_signal+0x71/0x1b0 do_notify_resume+0x98/0xb0 int_signal+0x12/0x17 DWARF2 unwinder stuck at int_signal+0x12/0x17 Signed-off-by: Michal Hocko Cc: [3.0+] Cc: Mel Gorman Cc: Rik van Riel Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/tlb.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index ed6642ad03e0..25f01d0bc149 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -78,6 +78,14 @@ struct mmu_gather_batch { #define MAX_GATHER_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) +/* + * Limit the maximum number of mmu_gather batches to reduce a risk of soft + * lockups for non-preemptible kernels on huge machines when a lot of memory + * is zapped during unmapping. + * 10K pages freed at once should be safe even without a preemption point. + */ +#define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) + /* struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ @@ -96,6 +104,7 @@ struct mmu_gather { struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; + unsigned int batch_count; }; #define HAVE_GENERIC_MMU_GATHER -- cgit v1.2.2 From 08c097fc3bb283299a6915a6a3795edab85979b1 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 9 Jan 2013 14:16:30 +0000 Subject: cred: Remove tgcred pointer from struct cred Commit 3a50597de863 ("KEYS: Make the session and process keyrings per-thread") removed the definition of the thread_group_cred structure, but left a now unused pointer in struct cred. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index abb2cd50f6b2..04421e825365 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -128,7 +128,6 @@ struct cred { struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ - struct thread_group_cred *tgcred; /* thread-group shared credentials */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ -- cgit v1.2.2