From 18cb2aef91b37dbce2bec2f39bb1dddd0e9dd838 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 7 Aug 2010 03:26:23 +0900 Subject: percpu: handle __percpu notations in UP accessors UP accessors didn't take care of __percpu notations leading to a lot of spurious sparse warnings on UP configurations. Fix it. Signed-off-by: Namhyung Kim Signed-off-by: Tejun Heo --- include/asm-generic/percpu.h | 15 ++++++++++----- include/linux/percpu.h | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b5043a9890d8..08923b684768 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -70,11 +70,16 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ -#define per_cpu(var, cpu) (*((void)(cpu), &(var))) -#define __get_cpu_var(var) (var) -#define __raw_get_cpu_var(var) (var) -#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) +#define VERIFY_PERCPU_PTR(__p) ({ \ + __verify_pcpu_ptr((__p)); \ + (typeof(*(__p)) __kernel __force *)(__p); \ +}) + +#define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) +#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) +#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) #endif /* SMP */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b8b9084527b1..49466b13c5c6 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -149,7 +149,7 @@ extern void __init percpu_init_late(void); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) /* can't distinguish from other static vars, always false */ static inline bool is_kernel_percpu_address(unsigned long addr) -- cgit v1.2.2 From 92298e668372f2f6c8a79fb272f13d65161a4876 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 13 Aug 2010 10:22:17 +1000 Subject: PCI: provide stub pci_domain_nr function for !CONFIG_PCI configs Allows the new PCI domain aware DRM code to compile on m68k. Reported-by: Geert Uytterhoeven Signed-off-by: Dave Airlie Signed-off-by: Jesse Barnes --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index b1d17956a153..c8d95e369ff4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1214,6 +1214,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) { return NULL; } +static inline int pci_domain_nr(struct pci_bus *bus) +{ return 0; } + #define dev_is_pci(d) (false) #define dev_is_pf(d) (false) #define dev_num_vf(d) (0) -- cgit v1.2.2 From 5dd531a03ad721b41911ddb32e6e0481404e7aaf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Aug 2010 13:52:19 +0200 Subject: block: add function call to switch the IO scheduler from a driver Currently drivers must do an elevator_exit() + elevator_init() to switch IO schedulers. There are a few problems with this: - Since commit 1abec4fdbb142e3ccb6ce99832fae42129134a96, elevator_init() requires a zeroed out q->elevator pointer. The two existing in-kernel users don't do that. - It will only work at initialization time, since using the above two-staged construct does not properly quisce the queue. So add elevator_change() which takes care of this, and convert the elv_iosched_store() sysfs interface to use this helper as well. Reported-by: Peter Oberparleiter Reported-by: Kevin Vigor Signed-off-by: Jens Axboe --- include/linux/elevator.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2c958f4fce1e..926b50322a46 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -136,6 +136,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); +extern int elevator_change(struct request_queue *, const char *); extern int elv_rq_merge_ok(struct request *, struct bio *); /* -- cgit v1.2.2 From e41e704bc4f49057fc68b643108366e6e6781aa3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Aug 2010 14:22:47 +0200 Subject: workqueue: improve destroy_workqueue() debuggability Now that the worklist is global, having works pending after wq destruction can easily lead to oops and destroy_workqueue() have several BUG_ON()s to catch these cases. Unfortunately, BUG_ON() doesn't tell much about how the work became pending after the final flush_workqueue(). This patch adds WQ_DYING which is set before the final flush begins. If a work is requested to be queued on a dying workqueue, WARN_ON_ONCE() is triggered and the request is ignored. This clearly indicates which caller is trying to queue a work on a dying workqueue and keeps the system working in most cases. Locking rule comment is updated such that the 'I' rule includes modifying the field from destruction path. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4f9d277bcd9a..c959666eafca 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -241,6 +241,8 @@ enum { WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ + WQ_DYING = 1 << 6, /* internal: workqueue is dying */ + WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, -- cgit v1.2.2 From 2b8fd9186d9275b07aef43e5bb4e98cd571f9a7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 23 Aug 2010 23:55:59 +0200 Subject: ACPI/PCI: Do not preserve _OSC control bits returned by a query There is the assumption in acpi_pci_osc_control_set() that it is always sufficient to compare the mask of _OSC control bits to be requested with the result of an _OSC query where all of the known control bits have been checked. However, in general, that need not be the case. For example, if an _OSC feature A depends on an _OSC feature B and control of A, B plus another _OSC feature C is requested simultaneously, the BIOS may return A, B, C, while it would only return C if A and C were requested without B. That may result in passing a wrong mask of _OSC control bits to an _OSC control request, in which case the BIOS may only grant control of a subset of the requested features. Moreover, acpi_pci_run_osc() will return error code if that happens and the caller of acpi_pci_osc_control_set() will not know that it's been granted control of some _OSC features. Consequently, the system will generally not work as expected. Apart from this acpi_pci_osc_control_set() always uses the mask of _OSC control bits returned by the very first invocation of acpi_pci_query_osc(), but that is done with the second argument equal to OSC_PCI_SEGMENT_GROUPS_SUPPORT which generally happens to affect the returned _OSC control bits. For these reasons, make acpi_pci_osc_control_set() always check if control of the requested _OSC features will be granted before making the final control request. As a result, the osc_control_qry and osc_queried members of struct acpi_pci_root are not necessary any more, so drop them and remove the remaining code referring to them. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/acpi/acpi_bus.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index baacd98e7cc6..4de84ce3a927 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -377,9 +377,6 @@ struct acpi_pci_root { u32 osc_support_set; /* _OSC state of support bits */ u32 osc_control_set; /* _OSC state of control bits */ - u32 osc_control_qry; /* the latest _OSC query result */ - - u32 osc_queried:1; /* has _OSC control been queried? */ }; /* helper */ -- cgit v1.2.2 From 75fb60f26befb59dbfa05cb122972642b7bdd219 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 23 Aug 2010 23:53:11 +0200 Subject: ACPI/PCI: Negotiate _OSC control bits before requesting them It is possible that the BIOS will not grant control of all _OSC features requested via acpi_pci_osc_control_set(), so it is recommended to negotiate the final set of _OSC features with the query flag set before calling _OSC to request control of these features. To implement it, rework acpi_pci_osc_control_set() so that the caller can specify the mask of _OSC control bits to negotiate and the mask of _OSC control bits that are absolutely necessary to it. Then, acpi_pci_osc_control_set() will run _OSC queries in a loop until the mask of _OSC control bits returned by the BIOS is equal to the mask passed to it. Also, before running the _OSC request acpi_pci_osc_control_set() will check if the caller's required control bits are present in the final mask. Using this mechanism we will be able to avoid situations in which the BIOS doesn't grant control of certain _OSC features, because they depend on some other _OSC features that have not been requested. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/linux/acpi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ccf94dc5acdf..c227757feb06 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -304,8 +304,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); OSC_PCI_EXPRESS_PME_CONTROL | \ OSC_PCI_EXPRESS_AER_CONTROL | \ OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) - -extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); +extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, + u32 *mask, u32 req); extern void acpi_early_init(void); #else /* !CONFIG_ACPI */ -- cgit v1.2.2 From 8a2e8e5dec7e29c56a46ba176c664ab6a3d04118 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Aug 2010 10:33:56 +0200 Subject: workqueue: fix cwq->nr_active underflow cwq->nr_active is used to keep track of how many work items are active for the cpu workqueue, where 'active' is defined as either pending on global worklist or executing. This is used to implement the max_active limit and workqueue freezing. If a work item is queued after nr_active has already reached max_active, the work item doesn't increment nr_active and is put on the delayed queue and gets activated later as previous active work items retire. try_to_grab_pending() which is used in the cancellation path unconditionally decremented nr_active whether the work item being cancelled is currently active or delayed, so cancelling a delayed work item makes nr_active underflow. This breaks max_active enforcement and triggers BUG_ON() in destroy_workqueue() later on. This patch fixes this bug by adding a flag WORK_STRUCT_DELAYED, which is set while a work item in on the delayed list and making try_to_grab_pending() decrement nr_active iff the work item is currently active. The addition of the flag enlarges cwq alignment to 256 bytes which is getting a bit too large. It's scheduled to be reduced back to 128 bytes by merging WORK_STRUCT_PENDING and WORK_STRUCT_CWQ in the next devel cycle. Signed-off-by: Tejun Heo Reported-by: Johannes Berg --- include/linux/workqueue.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index c959666eafca..f11100f96482 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -25,18 +25,20 @@ typedef void (*work_func_t)(struct work_struct *work); enum { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_CWQ_BIT = 1, /* data points to cwq */ - WORK_STRUCT_LINKED_BIT = 2, /* next work is linked to this one */ + WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ + WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */ + WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 3, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ + WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ #else - WORK_STRUCT_COLOR_SHIFT = 3, /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ #endif WORK_STRUCT_COLOR_BITS = 4, WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, + WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK @@ -59,8 +61,8 @@ enum { /* * Reserve 7 bits off of cwq pointer w/ debugobjects turned - * off. This makes cwqs aligned to 128 bytes which isn't too - * excessive while allowing 15 workqueue flush colors. + * off. This makes cwqs aligned to 256 bytes and allows 15 + * workqueue flush colors. */ WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, -- cgit v1.2.2 From a28dec2f26013aad89446b1f708f948617bc28a2 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sun, 8 Aug 2010 18:03:33 +0400 Subject: powerpc/85xx: Add P1021 PCI IDs and quirks This is needed for proper PCI-E support on P1021 SoCs. Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d36cad..10d33309e9a6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2300,6 +2300,8 @@ #define PCI_DEVICE_ID_P2010 0x0079 #define PCI_DEVICE_ID_P1020E 0x0100 #define PCI_DEVICE_ID_P1020 0x0101 +#define PCI_DEVICE_ID_P1021E 0x0102 +#define PCI_DEVICE_ID_P1021 0x0103 #define PCI_DEVICE_ID_P1011E 0x0108 #define PCI_DEVICE_ID_P1011 0x0109 #define PCI_DEVICE_ID_P1022E 0x0110 -- cgit v1.2.2 From 4e4438b86527e8bf1f49503a30d487e401e64f9c Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 1 Sep 2010 08:55:24 -0600 Subject: gpiolib: Add 'struct gpio_chip' forward declaration for !GPIOLIB case With CONFIG_GPIOLIB=n, the 'struct gpio_chip' is not declared, so the following pops up on PowerPC: cc1: warnings being treated as errors In file included from arch/powerpc/platforms/52xx/mpc52xx_common.c:19: include/linux/of_gpio.h:74: warning: 'struct gpio_chip' declared inside parameter list include/linux/of_gpio.h:74: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/of_gpio.h:75: warning: 'struct gpio_chip' declared inside parameter list make[2]: *** [arch/powerpc/platforms/52xx/mpc52xx_common.o] Error 1 This patch fixes the issue by providing the proper forward declaration. Signed-off-by: Anton Vorontsov Signed-off-by: Grant Likely --- include/linux/gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 03f616b78cfa..e41f7dd1ae67 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -13,6 +13,7 @@ #include struct device; +struct gpio_chip; /* * Some platforms don't support the GPIO programming interface. -- cgit v1.2.2 From ef5dc121d5a0bb1fa477c5395277259f07d318a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 2 Sep 2010 15:48:16 -0700 Subject: mutex: Fix annotations to include it in kernel-locking docbook Fix kernel-doc notation in linux/mutex.h and kernel/mutex.c, then add these 2 files to the kernel-locking docbook as the Mutex API reference chapter. Add one API function to mutex-design.txt and correct a typo in that file. Signed-off-by: Randy Dunlap Cc: Rusty Russell LKML-Reference: <20100902154816.6cc2f9ad.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar --- include/linux/mutex.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 878cab4f5fcc..f363bc8fdc74 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -78,6 +78,14 @@ struct mutex_waiter { # include #else # define __DEBUG_MUTEX_INITIALIZER(lockname) +/** + * mutex_init - initialize the mutex + * @mutex: the mutex to be initialized + * + * Initialize the mutex to unlocked state. + * + * It is not allowed to initialize an already locked mutex. + */ # define mutex_init(mutex) \ do { \ static struct lock_class_key __key; \ -- cgit v1.2.2 From 3fb5a991916091a908d53608a5899240039fb51e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 2 Sep 2010 15:42:43 +0000 Subject: cls_cgroup: Fix rcu lockdep warning Dave reported an rcu lockdep warning on 2.6.35.4 kernel task->cgroups and task->cgroups->subsys[i] are protected by RCU. So we avoid accessing invalid pointers here. This might happen, for example, when you are deref-ing those pointers while someone move @task from one cgroup to another. Reported-by: Dave Jones Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- include/net/cls_cgroup.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc3536409..ef6c24a529e1 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -27,11 +27,17 @@ struct cgroup_cls_state #ifdef CONFIG_NET_CLS_CGROUP static inline u32 task_cls_classid(struct task_struct *p) { + int classid; + if (in_interrupt()) return 0; - return container_of(task_subsys_state(p, net_cls_subsys_id), - struct cgroup_cls_state, css)->classid; + rcu_read_lock(); + classid = container_of(task_subsys_state(p, net_cls_subsys_id), + struct cgroup_cls_state, css)->classid; + rcu_read_unlock(); + + return classid; } #else extern int net_cls_subsys_id; -- cgit v1.2.2 From 71cad0554956de87c3fc413b1eac9313887eb14f Mon Sep 17 00:00:00 2001 From: Philippe Langlais Date: Tue, 31 Aug 2010 14:19:09 +0200 Subject: serial: fix port type conflict between NS16550A & U6_16550A Bug seen by Dr. David Alan Gilbert with sparse Signed-off-by: Philippe Langlais Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 3 +-- include/linux/serial_core.h | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/serial.h b/include/linux/serial.h index 1ebc694a6d52..ef914061511e 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -77,8 +77,7 @@ struct serial_struct { #define PORT_16654 11 #define PORT_16850 12 #define PORT_RSA 13 /* RSA-DV II/S card */ -#define PORT_U6_16550A 14 -#define PORT_MAX 14 +#define PORT_MAX 13 #define SERIAL_IO_PORT 0 #define SERIAL_IO_HUB6 1 diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 64458a9a8938..563e23400913 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -44,7 +44,8 @@ #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ #define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ -#define PORT_MAX_8250 18 /* max port ID */ +#define PORT_U6_16550A 19 /* ST-Ericsson U6xxx internal UART */ +#define PORT_MAX_8250 19 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed -- cgit v1.2.2 From 29bc17ecb856ffb2b47c7009a71971c6f9334205 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 4 Sep 2010 22:56:44 +0200 Subject: io-mapping: Fix the address space annotations Fixes a bunch of sparse warnings in io-mapping.h because of the inconsistent __iomem usage. Signed-off-by: Francisco Jerez LKML-Reference: <1283633804-11749-2-git-send-email-currojerez@riseup.net> Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0a6b3d5c490c..7fb592793738 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -79,7 +79,7 @@ io_mapping_free(struct io_mapping *mapping) } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) @@ -94,12 +94,12 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { iounmap_atomic(vaddr, slot); } -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { resource_size_t phys_addr; @@ -111,7 +111,7 @@ io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { iounmap(vaddr); } @@ -125,38 +125,38 @@ struct io_mapping; static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { - return (struct io_mapping *) ioremap_wc(base, size); + return (struct io_mapping __force *) ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { - iounmap(mapping); + iounmap((void __force __iomem *) mapping); } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { } /* Non-atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { } -- cgit v1.2.2 From 73457f0f836956747e0394320be2163c050e96ef Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 6 Aug 2010 01:59:14 +0300 Subject: cgroups: fix API thinko cgroup_attach_task_current_cg API that have upstream is backwards: we really need an API to attach to the cgroups from another process A to the current one. In our case (vhost), a priveledged user wants to attach it's task to cgroups from a less priveledged one, the API makes us run it in the other task's context, and this fails. So let's make the API generic and just pass in 'from' and 'to' tasks. Add an inline wrapper for cgroup_attach_task_current_cg to avoid breaking bisect. Signed-off-by: Michael S. Tsirkin Acked-by: Li Zefan Acked-by: Paul Menage --- include/linux/cgroup.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..5a53d8f039a2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,11 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +640,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; -- cgit v1.2.2 From f8f235e5bbf4e61f3e0886a44afb1dc4cfe8f337 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 27 Aug 2010 11:08:57 +0800 Subject: agp/intel: Fix cache control for Sandybridge Sandybridge GTT has new cache control bits in PTE, which controls graphics page cache in LLC or LLC/MLC, so we need to extend the mask function to respect the new bits. And set cache control to always LLC only by default on Gen6. Signed-off-by: Zhenyu Wang Cc: stable@kernel.org Signed-off-by: Chris Wilson --- include/linux/intel-gtt.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/intel-gtt.h (limited to 'include') diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h new file mode 100644 index 000000000000..1d19ab2afa39 --- /dev/null +++ b/include/linux/intel-gtt.h @@ -0,0 +1,20 @@ +/* + * Common Intel AGPGART and GTT definitions. + */ +#ifndef _INTEL_GTT_H +#define _INTEL_GTT_H + +#include + +/* This is for Intel only GTT controls. + * + * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only + */ + +#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2) +#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4) + +/* flag for GFDT type */ +#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3) + +#endif -- cgit v1.2.2 From febc88c5948f81114f64c3412011d695aecae233 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Sep 2010 14:46:37 +0200 Subject: semaphore: Add DEFINE_SEMAPHORE The full cleanup of init_MUTEX[_LOCKED] and DECLARE_MUTEX has not been done. Some of the users are real semaphores and we should name them as such instead of confusing everyone with "MUTEX". Provide the infrastructure to get finally rid of init_MUTEX[_LOCKED] and DECLARE_MUTEX. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Christoph Hellwig LKML-Reference: <20100907125054.795929962@linutronix.de> --- include/linux/semaphore.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 7415839ac890..5310d27abd2a 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -26,6 +26,9 @@ struct semaphore { .wait_list = LIST_HEAD_INIT((name).wait_list), \ } +#define DEFINE_SEMAPHORE(name) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) + #define DECLARE_MUTEX(name) \ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) -- cgit v1.2.2 From e3e55ff5854655d8723ad8b307f02515aecc3df5 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 7 Sep 2010 15:52:06 +0800 Subject: spi/dw_spi: clean the cs_control code commit 052dc7c45i "spi/dw_spi: conditional transfer mode change" introduced cs_control code, which has a bug by using bit offset for spi mode to set transfer mode in control register. Also it forces devices who don't need cs_control to re-configure the control registers for each spi transfer. This patch will fix them Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- include/linux/spi/dw_spi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h index cc813f95a2f2..c91302f3a257 100644 --- a/include/linux/spi/dw_spi.h +++ b/include/linux/spi/dw_spi.h @@ -14,7 +14,9 @@ #define SPI_MODE_OFFSET 6 #define SPI_SCPH_OFFSET 6 #define SPI_SCOL_OFFSET 7 + #define SPI_TMOD_OFFSET 8 +#define SPI_TMOD_MASK (0x3 << SPI_TMOD_OFFSET) #define SPI_TMOD_TR 0x0 /* xmit & recv */ #define SPI_TMOD_TO 0x1 /* xmit only */ #define SPI_TMOD_RO 0x2 /* recv only */ -- cgit v1.2.2 From 6523ce1525e88c598c75a1a6b8c4edddfa9defe8 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 5 Sep 2010 18:02:29 +0000 Subject: ipvs: fix active FTP - Do not create expectation when forwarding the PORT command to avoid blocking the connection. The problem is that nf_conntrack_ftp.c:help() tries to create the same expectation later in POST_ROUTING and drops the packet with "dropping packet" message after failure in nf_ct_expect_related. - Change ip_vs_update_conntrack to alter the conntrack for related connections from real server. If we do not alter the reply in this direction the next packet from client sent to vport 20 comes as NEW connection. We alter it but may be some collision happens for both conntracks and the second conntrack gets destroyed immediately. The connection stucks too. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a4747a0f7303..f976885f686f 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -955,6 +955,9 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, + int outin); + #endif /* __KERNEL__ */ #endif /* _NET_IP_VS_H */ -- cgit v1.2.2 From 719f835853a92f6090258114a72ffe41f09155cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Sep 2010 05:08:44 +0000 Subject: udp: add rehash on connect() commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation) added a secondary hash on UDP, hashed on (local addr, local port). Problem is that following sequence : fd = socket(...) connect(fd, &remote, ...) not only selects remote end point (address and port), but also sets local address, while UDP stack stored in secondary hash table the socket while its local address was INADDR_ANY (or ipv6 equivalent) Sequence is : - autobind() : choose a random local port, insert socket in hash tables [while local address is INADDR_ANY] - connect() : set remote address and port, change local address to IP given by a route lookup. When an incoming UDP frame comes, if more than 10 sockets are found in primary hash table, we switch to secondary table, and fail to find socket because its local address changed. One solution to this problem is to rehash datagram socket if needed. We add a new rehash(struct socket *) method in "struct proto", and implement this method for UDP v4 & v6, using a common helper. This rehashing only takes care of secondary hash table, since primary hash (based on local port only) is not changed. Reported-by: Krzysztof Piotr Oledzki Signed-off-by: Eric Dumazet Tested-by: Krzysztof Piotr Oledzki Signed-off-by: David S. Miller --- include/net/sock.h | 1 + include/net/udp.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe16..adab9dc58183 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -752,6 +752,7 @@ struct proto { /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); + void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); /* Keeping track of sockets in use */ diff --git a/include/net/udp.h b/include/net/udp.h index 7abdf305da50..a184d3496b13 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct sock *sk) } extern void udp_lib_unhash(struct sock *sk); +extern void udp_lib_rehash(struct sock *sk, u16 new_hash); static inline void udp_lib_close(struct sock *sk, long timeout) { -- cgit v1.2.2 From d530148ae8bffe1b33f50d1776d185a6e85dc774 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 20 Aug 2010 16:49:43 +0800 Subject: dquot: do full inode dirty in allocating space Alex Shi found a regression when doing ffsb test. The test has several threads, and each thread creates a small file, write to it and then delete it. ffsb reports about 20% regression and Alex bisected it to 43d2932d88e4. The test will call __mark_inode_dirty 3 times. without this commit, we only take inode_lock one time, while with it, we take the lock 3 times with flags ( I_DIRTY_SYNC,I_DIRTY_PAGES,I_DIRTY). Perf shows the lock contention increased too much. Below proposed patch fixes it. fs is allocating blocks, which usually means file writes and the inode will be dirtied soon. We fully dirty the inode to reduce some inode_lock contention in several calls of __mark_inode_dirty. Jan Kara: Added comment. Signed-off-by: Shaohua Li Signed-off-by: Alex Shi Signed-off-by: Jan Kara --- include/linux/quotaops.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index d50ba858cfe0..d1a9193960f1 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -274,8 +274,14 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr) int ret; ret = dquot_alloc_space_nodirty(inode, nr); - if (!ret) - mark_inode_dirty_sync(inode); + if (!ret) { + /* + * Mark inode fully dirty. Since we are allocating blocks, inode + * would become fully dirty soon anyway and it reportedly + * reduces inode_lock contention. + */ + mark_inode_dirty(inode); + } return ret; } -- cgit v1.2.2 From 39aa3cb3e8250db9188a6f1e3fb62ffa1a717678 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Tue, 31 Aug 2010 15:52:27 +0200 Subject: mm: Move vma_stack_continue into mm.h So it can be used by all that need to check for that. Signed-off-by: Stefan Bader Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index e6b1210772ce..74949fbef8c6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -864,6 +864,12 @@ int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +/* Is the vma a continuation of the stack vma above it? */ +static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); -- cgit v1.2.2 From a73f8844e1fc54c3762555c1cf1f71774142ca91 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 8 Sep 2010 16:54:54 -0600 Subject: lglock: make lg_lock_global() actually lock globally lg_lock_global() currently only acquires spinlocks for online CPUs, but it's meant to lock all possible CPUs. Lglock-protected resources may be associated with removed CPUs - and, indeed, that could happen with the per-superblock open files lists. At Nick's suggestion, change for_each_online_cpu() to for_each_possible_cpu() to protect accesses to those resources. Cc: Al Viro Acked-by: Nick Piggin Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- include/linux/lglock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index b288cb713b90..f549056fb20b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -150,7 +150,7 @@ int i; \ preempt_disable(); \ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_lock(lock); \ @@ -161,7 +161,7 @@ void name##_global_unlock(void) { \ int i; \ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_unlock(lock); \ -- cgit v1.2.2 From f3c65b2870f2481f3646bc410a58a12989ecc704 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 9 Sep 2010 16:37:24 -0700 Subject: mmc: avoid getting CID on SDIO-only cards The introduction of support for SD combo cards breaks the initialization of all CSR SDIO chips. The GO_IDLE (CMD0) in mmc_sd_get_cid() causes CSR chips to be reset (this is non-standard behavior). When initializing an SDIO card check for a combo card by using the memory present bit in the R4 response to IO_SEND_OP_COND (CMD5). This avoids the call to mmc_sd_get_cid() on an SDIO-only card. Signed-off-by: David Vrabel Acked-by: Michal Mirolaw Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmc/sdio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index 329a8faa6e37..245cdacee544 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -38,6 +38,8 @@ * [8:0] Byte/block count */ +#define R4_MEMORY_PRESENT (1 << 27) + /* SDIO status in R5 Type -- cgit v1.2.2 From e0bf1024b36be90da241af3c2767311e055b612c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 9 Sep 2010 16:37:26 -0700 Subject: kfifo: add parenthesis for macro parameter reference Some macro parameter references inside typeof() operator are not enclosed with parenthesis. It should be safer to add them. Signed-off-by: Huang Ying Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 58 +++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 4aa95f203f3e..62dbee554f60 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -214,7 +214,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.in = __tmp->kfifo.out = 0; \ }) @@ -228,7 +228,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset_out(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.out = __tmp->kfifo.in; \ }) @@ -238,7 +238,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_len(fifo) \ ({ \ - typeof(fifo + 1) __tmpl = (fifo); \ + typeof((fifo) + 1) __tmpl = (fifo); \ __tmpl->kfifo.in - __tmpl->kfifo.out; \ }) @@ -248,7 +248,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_empty(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ __tmpq->kfifo.in == __tmpq->kfifo.out; \ }) @@ -258,7 +258,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_full(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ kfifo_len(__tmpq) > __tmpq->kfifo.mask; \ }) @@ -269,7 +269,7 @@ __kfifo_must_check_helper(unsigned int val) #define kfifo_avail(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ const size_t __recsize = sizeof(*__tmpq->rectype); \ unsigned int __avail = kfifo_size(__tmpq) - kfifo_len(__tmpq); \ (__recsize) ? ((__avail <= __recsize) ? 0 : \ @@ -284,7 +284,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_skip(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__recsize) \ @@ -302,7 +302,7 @@ __kfifo_must_check_helper( \ #define kfifo_peek_len(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ (!__recsize) ? kfifo_len(__tmp) * sizeof(*__tmp->type) : \ @@ -325,7 +325,7 @@ __kfifo_must_check_helper( \ #define kfifo_alloc(fifo, size, gfp_mask) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_alloc(__kfifo, size, sizeof(*__tmp->type), gfp_mask) : \ @@ -339,7 +339,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_free(fifo) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__is_kfifo_ptr(__tmp)) \ __kfifo_free(__kfifo); \ @@ -358,7 +358,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_init(fifo, buffer, size) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_init(__kfifo, buffer, size, sizeof(*__tmp->type)) : \ @@ -379,8 +379,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_put(fifo, val) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -421,8 +421,8 @@ __kfifo_must_check_helper( \ #define kfifo_get(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -462,8 +462,8 @@ __kfifo_must_check_helper( \ #define kfifo_peek(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -501,8 +501,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_in(fifo, buf, n) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -554,8 +554,8 @@ __kfifo_must_check_helper( \ #define kfifo_out(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -611,7 +611,7 @@ __kfifo_must_check_helper( \ #define kfifo_from_user(fifo, from, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const void __user *__from = (from); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -639,7 +639,7 @@ __kfifo_must_check_helper( \ #define kfifo_to_user(fifo, to, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -666,7 +666,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -690,7 +690,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -717,7 +717,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -741,7 +741,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -766,8 +766,8 @@ __kfifo_must_check_helper( \ #define kfifo_out_peek(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ -- cgit v1.2.2 From 31583bb0cf6cc40f2a468a4d2f3b9cbefd24f891 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 9 Sep 2010 16:37:37 -0700 Subject: cgroups: fix API thinko Add cgroup_attach_task_all() The existing cgroup_attach_task_current_cg() API is called by a thread to attach another thread to all of its cgroups; this is unsuitable for cases where a privileged task wants to attach itself to the cgroups of a less privileged one, since the call must be made from the context of the target task. This patch adds a more generic cgroup_attach_task_all() API that allows both the source task and to-be-moved task to be specified. cgroup_attach_task_current_cg() becomes a specialization of the more generic new function. [menage@google.com: rewrote changelog] [akpm@linux-foundation.org: address reviewer comments] Signed-off-by: Michael S. Tsirkin Tested-by: Alex Williamson Acked-by: Paul Menage Cc: Li Zefan Cc: Ben Blum Cc: Sridhar Samudrala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..0c991023ee47 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); + +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +641,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; -- cgit v1.2.2 From 4969c1192d15afa3389e7ae3302096ff684ba655 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Sep 2010 16:37:52 -0700 Subject: mm: fix swapin race condition The pte_same check is reliable only if the swap entry remains pinned (by the page lock on swapcache). We've also to ensure the swapcache isn't removed before we take the lock as try_to_free_swap won't care about the page pin. One of the possible impacts of this patch is that a KSM-shared page can point to the anon_vma of another process, which could exit before the page is freed. This can leave a page with a pointer to a recycled anon_vma object, or worse, a pointer to something that is no longer an anon_vma. [riel@redhat.com: changelog help] Signed-off-by: Andrea Arcangeli Acked-by: Hugh Dickins Reviewed-by: Rik van Riel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ksm.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 74d691ee9121..3319a6967626 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -16,6 +16,9 @@ struct stable_node; struct mem_cgroup; +struct page *ksm_does_need_to_copy(struct page *page, + struct vm_area_struct *vma, unsigned long address); + #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); @@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page, * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ -struct page *ksm_does_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address); -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = page_anon_vma(page); - if (!anon_vma || - (anon_vma->root == vma->anon_vma->root && - page->index == linear_page_index(vma, address))) - return page; - - return ksm_does_need_to_copy(page, vma, address); + return anon_vma && + (anon_vma->root != vma->anon_vma->root || + page->index != linear_page_index(vma, address)); } int page_referenced_ksm(struct page *page, @@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { - return page; + return 0; } static inline int page_referenced_ksm(struct page *page, -- cgit v1.2.2 From 5affb607720d734ca572b8a77c5c7d62d3042b6f Mon Sep 17 00:00:00 2001 From: Gregory Bean Date: Thu, 9 Sep 2010 16:38:02 -0700 Subject: gpio: sx150x: correct and refine reset-on-probe behavior Replace the arbitrary software-reset call from the device-probe method, because: - It is defective. To work correctly, it should be two byte writes, not a single word write. As it stands, it does nothing. - Some devices with sx150x expanders installed have their NRESET pins ganged on the same line, so resetting one causes the others to reset - not a nice thing to do arbitrarily! - The probe, usually taking place at boot, implies a recent hard-reset, so a software reset at this point is just a waste of energy anyway. Therefore, make it optional, defaulting to off, as this will match the common case of probing at powerup and also matches the current broken no-op behavior. Signed-off-by: Gregory Bean Reviewed-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/i2c/sx150x.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h index ee3049cb9ba5..52baa79d69a7 100644 --- a/include/linux/i2c/sx150x.h +++ b/include/linux/i2c/sx150x.h @@ -63,6 +63,9 @@ * IRQ lines will appear. Similarly to gpio_base, the expander * will create a block of irqs beginning at this number. * This value is ignored if irq_summary is < 0. + * @reset_during_probe: If set to true, the driver will trigger a full + * reset of the chip at the beginning of the probe + * in order to place it in a known state. */ struct sx150x_platform_data { unsigned gpio_base; @@ -73,6 +76,7 @@ struct sx150x_platform_data { u16 io_polarity; int irq_summary; unsigned irq_base; + bool reset_during_probe; }; #endif /* __LINUX_I2C_SX150X_H */ -- cgit v1.2.2 From c956126c137d97acb6f4d56fa9572d0bcc84e4ed Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 9 Sep 2010 16:38:03 -0700 Subject: gpio: doc updates There's been some recent confusion about error checking GPIO numbers. briefly, it should be handled mostly during setup, when gpio_request() is called, and NEVER by expectig gpio_is_valid to report more than never-usable GPIO numbers. [akpm@linux-foundation.org: terminate unterminated comment] Signed-off-by: David Brownell Cc: Eric Miao" Cc: "Ryan Mallon" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/gpio.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index c7376bf80b06..8ca18e26d7e3 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -16,15 +16,27 @@ * While the GPIO programming interface defines valid GPIO numbers * to be in the range 0..MAX_INT, this library restricts them to the * smaller range 0..ARCH_NR_GPIOS-1. + * + * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of + * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is + * actually an estimate of a board-specific value. */ #ifndef ARCH_NR_GPIOS #define ARCH_NR_GPIOS 256 #endif +/* + * "valid" GPIO numbers are nonnegative and may be passed to + * setup routines like gpio_request(). only some valid numbers + * can successfully be requested and used. + * + * Invalid GPIO numbers are useful for indicating no-such-GPIO in + * platform data and other tables. + */ + static inline int gpio_is_valid(int number) { - /* only some non-negative numbers are valid */ return ((unsigned)number) < ARCH_NR_GPIOS; } -- cgit v1.2.2 From 910321ea817a202ff70fac666e37e2c8e2f88823 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 9 Sep 2010 16:38:07 -0700 Subject: swap: revert special hibernation allocation Please revert 2.6.36-rc commit d2997b1042ec150616c1963b5e5e919ffd0b0ebf "hibernation: freeze swap at hibernation". It complicated matters by adding a second swap allocation path, just for hibernation; without in any way fixing the issue that it was intended to address - page reclaim after fixing the hibernation image might free swap from a page already imaged as swapcache, letting its swap be reallocated to store a different page of the image: resulting in data corruption if the imaged page were freed as clean then swapped back in. Pages freed to si->swap_map were still in danger of being reallocated by the alternative allocation path. I guess it inadvertently fixed slow SSD swap allocation for hibernation, as reported by Nigel Cunningham: by missing out the discards that occur on the usual swap allocation path; but that was unintentional, and needs a separate fix. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: "Rafael J. Wysocki" Cc: Ondrej Zary Cc: Andrea Gelmini Cc: Balbir Singh Cc: Andrea Arcangeli Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2fee51a11b73..bf4eb62506db 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -315,6 +315,7 @@ extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); +extern swp_entry_t get_swap_page_of_type(int); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); @@ -331,13 +332,6 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_HIBERNATION -void hibernation_freeze_swap(void); -void hibernation_thaw_swap(void); -swp_entry_t get_swap_for_hibernation(int type); -void swap_free_for_hibernation(swp_entry_t val); -#endif - /* linux/mm/thrash.c */ extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); -- cgit v1.2.2 From 3399446632739fcd05fd8b272b476a69c6e6d14a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 9 Sep 2010 16:38:11 -0700 Subject: swap: discard while swapping only if SWAP_FLAG_DISCARD Tests with recent firmware on Intel X25-M 80GB and OCZ Vertex 60GB SSDs show a shift since I last tested in December: in part because of firmware updates, in part because of the necessary move from barriers to awaiting completion at the block layer. While discard at swapon still shows as slightly beneficial on both, discarding 1MB swap cluster when allocating is now disadvanteous: adds 25% overhead on Intel, adds 230% on OCZ (YMMV). Surrender: discard as presently implemented is more hindrance than help for swap; but might prove useful on other devices, or with improvements. So continue to do the discard at swapon, but make discard while swapping conditional on a SWAP_FLAG_DISCARD to sys_swapon() (which has been using only the lower 16 bits of int flags). We can add a --discard or -d to swapon(8), and a "discard" to swap in /etc/fstab: matching the mount option for btrfs, ext4, fat, gfs2, nilfs2. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Cc: Nigel Cunningham Cc: Tejun Heo Cc: Jens Axboe Cc: James Bottomley Cc: "Martin K. Petersen" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index bf4eb62506db..7cdd63366f88 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -19,6 +19,7 @@ struct bio; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 +#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */ static inline int current_is_kswapd(void) { @@ -142,7 +143,7 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDABLE = (1 << 2), /* swapon+blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ -- cgit v1.2.2 From aa45484031ddee09b06350ab8528bfe5b2c76d1c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 9 Sep 2010 16:38:17 -0700 Subject: mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is cheaper than scanning a number of lists. To avoid synchronization overhead, counter deltas are maintained on a per-cpu basis and drained both periodically and when the delta is above a threshold. On large CPU systems, the difference between the estimated and real value of NR_FREE_PAGES can be very high. If NR_FREE_PAGES is much higher than number of real free page in buddy, the VM can allocate pages below min watermark, at worst reducing the real number of pages to zero. Even if the OOM killer kills some victim for freeing memory, it may not free memory if the exit path requires a new page resulting in livelock. This patch introduces a zone_page_state_snapshot() function (courtesy of Christoph) that takes a slightly more accurate view of an arbitrary vmstat counter. It is used to read NR_FREE_PAGES while kswapd is awake to avoid the watermark being accidentally broken. The estimate is not perfect and may result in cache line bounces but is expected to be lighter than the IPI calls necessary to continually drain the per-cpu counters while kswapd is awake. Signed-off-by: Christoph Lameter Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++++++++ include/linux/vmstat.h | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6e6e62648a4d..3984c4eb41fd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -283,6 +283,13 @@ struct zone { /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; + /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several @@ -441,6 +448,12 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } +#ifdef CONFIG_SMP +unsigned long zone_nr_free_pages(struct zone *zone); +#else +#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES) +#endif /* CONFIG_SMP */ + /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 7f43ccdc1d38..eaaea37b3b75 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -170,6 +170,28 @@ static inline unsigned long zone_page_state(struct zone *zone, return x; } +/* + * More accurate version that also considers the currently pending + * deltas. For that we need to loop over all cpus to find the current + * deltas. There is no synchronization so the result cannot be + * exactly accurate either. + */ +static inline unsigned long zone_page_state_snapshot(struct zone *zone, + enum zone_stat_item item) +{ + long x = atomic_long_read(&zone->vm_stat[item]); + +#ifdef CONFIG_SMP + int cpu; + for_each_online_cpu(cpu) + x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item]; + + if (x < 0) + x = 0; +#endif + return x; +} + extern unsigned long global_reclaimable_pages(void); extern unsigned long zone_reclaimable_pages(struct zone *zone); -- cgit v1.2.2 From e2f3d75fc0e4a0d03c61872bad39ffa2e74a04ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 7 Sep 2010 14:05:31 +0200 Subject: libata: skip EH autopsy and recovery during suspend For some mysterious reason, certain hardware reacts badly to usual EH actions while the system is going for suspend. As the devices won't be needed until the system is resumed, ask EH to skip usual autopsy and recovery and proceed directly to suspend. Signed-off-by: Tejun Heo Tested-by: Stephan Diestelhorst Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index f010f18a0f86..7de282d8bedf 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -335,6 +335,7 @@ enum { ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ ATA_EHI_NO_AUTOPSY = (1 << 2), /* no autopsy */ ATA_EHI_QUIET = (1 << 3), /* be quiet */ + ATA_EHI_NO_RECOVERY = (1 << 4), /* no recovery */ ATA_EHI_DID_SOFTRESET = (1 << 16), /* already soft-reset this port */ ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ -- cgit v1.2.2 From ea3c64506ea7965f86f030155e6fdef381de10e2 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 31 Aug 2010 16:20:36 -0700 Subject: libata-sff: Reenable Port Multiplier after libata-sff remodeling. Keep track of the link on the which the current request is in progress. It allows support of links behind port multiplier. Not all libata-sff is PMP compliant. Code for native BMDMA controller does not take in accound PMP. Tested on Marvell 7042 and Sil7526. Signed-off-by: Gwendal Grignou Signed-off-by: Jeff Garzik --- include/linux/libata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 7de282d8bedf..45fb2967b66d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -724,6 +724,7 @@ struct ata_port { struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */ u8 ctl; /* cache of ATA control register */ u8 last_ctl; /* Cache last written value */ + struct ata_link* sff_pio_task_link; /* link currently used */ struct delayed_work sff_pio_task; #ifdef CONFIG_ATA_BMDMA struct ata_bmdma_prd *bmdma_prd; /* BMDMA SG list */ @@ -1595,7 +1596,7 @@ extern void ata_sff_irq_on(struct ata_port *ap); extern void ata_sff_irq_clear(struct ata_port *ap); extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq); -extern void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay); +extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); extern unsigned int ata_sff_port_intr(struct ata_port *ap, -- cgit v1.2.2 From 006abe887c5e637d059c44310de6c92f36aded3b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 12 Sep 2010 19:55:25 -0400 Subject: SUNRPC: Fix a race in rpc_info_open There is a race between rpc_info_open and rpc_release_client() in that nothing stops a process from opening the file after the clnt->cl_kref goes to zero. Fix this by using atomic_inc_unless_zero()... Reported-by: J. Bruce Fields Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- include/linux/sunrpc/clnt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 569dc722a600..85f38a63f098 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -30,7 +30,7 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { - struct kref cl_kref; /* Number of references */ + atomic_t cl_count; /* Number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ -- cgit v1.2.2 From c54fce6eff197d9c57c97afbf6c9722ce434fc8f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 10 Sep 2010 16:51:36 +0200 Subject: workqueue: add documentation Update copyright notice and add Documentation/workqueue.txt. Randy Dunlap, Dave Chinner: misc fixes. Signed-off-by: Tejun Heo Reviewed-By: Florian Mickler Cc: Ingo Molnar Cc: Christoph Lameter Cc: Randy Dunlap Cc: Dave Chinner --- include/linux/workqueue.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f11100f96482..25e02c941bac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -235,6 +235,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define work_clear_pending(work) \ clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) +/* + * Workqueue flags and constants. For details, please refer to + * Documentation/workqueue.txt. + */ enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ -- cgit v1.2.2 From 7b334fcb45b757ffb093696ca3de1b0c8b4a33f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Sep 2010 23:51:02 +0100 Subject: drm: Use a nondestructive mode for output detect when polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Destructive load-detection is very expensive and due to failings elsewhere can trigger system wide stalls of up to 600ms. A simple first step to correcting this is not to invoke such an expensive and destructive load-detection operation automatically. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29536 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=16265 Reported-by: Bruno Prémont Tested-by: Sitsofe Wheeler Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_crtc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c9f3cc5949a8..5536223fbac8 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -386,7 +386,8 @@ struct drm_connector_funcs { void (*dpms)(struct drm_connector *connector, int mode); void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); - enum drm_connector_status (*detect)(struct drm_connector *connector); + enum drm_connector_status (*detect)(struct drm_connector *connector, + bool nondestructive); int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); -- cgit v1.2.2 From 930a9e283516a3a3595c0c515113f1b78d07f695 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Sep 2010 11:07:23 +0100 Subject: drm: Use a nondestructive mode for output detect when polling (v2) v2: Julien Cristau pointed out that @nondestructive results in double-negatives and confusion when trying to interpret the parameter, so use @force instead. Much easier to type as well. ;-) And fix the miscompilation of vmgfx reported by Sedat Dilek. Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_crtc.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 5536223fbac8..3e5a51af757c 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -386,8 +386,15 @@ struct drm_connector_funcs { void (*dpms)(struct drm_connector *connector, int mode); void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); + + /* Check to see if anything is attached to the connector. + * @force is set to false whilst polling, true when checking the + * connector due to user request. @force can be used by the driver + * to avoid expensive, destructive operations during automated + * probing. + */ enum drm_connector_status (*detect)(struct drm_connector *connector, - bool nondestructive); + bool force); int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); -- cgit v1.2.2 From c41d68a513c71e35a14f66d71782d27a79a81ea6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 7 Sep 2010 16:16:18 -0700 Subject: compat: Make compat_alloc_user_space() incorporate the access_ok() compat_alloc_user_space() expects the caller to independently call access_ok() to verify the returned area. A missing call could introduce problems on some architectures. This patch incorporates the access_ok() check into compat_alloc_user_space() and also adds a sanity check on the length. The existing compat_alloc_user_space() implementations are renamed arch_compat_alloc_user_space() and are used as part of the implementation of the new global function. This patch assumes NULL will cause __get_user()/__put_user() to either fail or access userspace on all architectures. This should be followed by checking the return value of compat_access_user_space() for NULL in the callers, at which time the access_ok() in the callers can also be removed. Reported-by: Ben Hawkes Signed-off-by: H. Peter Anvin Acked-by: Benjamin Herrenschmidt Acked-by: Chris Metcalf Acked-by: David S. Miller Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Acked-by: Tony Luck Cc: Andrew Morton Cc: Arnd Bergmann Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Helge Deller Cc: James Bottomley Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: Paul Mackerras Cc: Ralf Baechle Cc: --- include/linux/compat.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 9ddc8780e8db..5778b559d59c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvector(int type, const struct compat_iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, struct iovec **ret_pointer); + +extern void __user *compat_alloc_user_space(unsigned long len); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ -- cgit v1.2.2 From 01f83d69844d307be2aa6fea88b0e8fe5cbdb2f4 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Wed, 15 Sep 2010 10:27:52 -0700 Subject: tcp: Prevent overzealous packetization by SWS logic. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If peer uses tiny MSS (say, 75 bytes) and similarly tiny advertised window, the SWS logic will packetize to half the MSS unnecessarily. This causes problems with some embedded devices. However for large MSS devices we do want to half-MSS packetize otherwise we never get enough packets into the pipe for things like fast retransmit and recovery to work. Be careful also to handle the case where MSS > window, otherwise we'll never send until the probe timer. Reported-by: ツ Leandro Melo de Sales Signed-off-by: David S. Miller --- include/net/tcp.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index eaa9582779d0..3e4b33e36602 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -475,8 +475,22 @@ extern unsigned int tcp_current_mss(struct sock *sk); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) { - if (tp->max_window && pktsize > (tp->max_window >> 1)) - return max(tp->max_window >> 1, 68U - tp->tcp_header_len); + int cutoff; + + /* When peer uses tiny windows, there is no use in packetizing + * to sub-MSS pieces for the sake of SWS or making sure there + * are enough packets in the pipe for fast recovery. + * + * On the other hand, for extremely large MSS devices, handling + * smaller than MSS windows in this way does make sense. + */ + if (tp->max_window >= 512) + cutoff = (tp->max_window >> 1); + else + cutoff = tp->max_window; + + if (cutoff && pktsize > cutoff) + return max_t(int, cutoff, 68U - tp->tcp_header_len); else return pktsize; } -- cgit v1.2.2 From f0f9deae9e7c421fa0c1c627beb8e174325e1ba7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 17 Sep 2010 16:55:03 -0700 Subject: netpoll: Disable IRQ around RCU dereference in netpoll_rx We cannot use rcu_dereference_bh safely in netpoll_rx as we may be called with IRQs disabled. We could however simply disable IRQs as that too causes BH to be disabled and is safe in either case. Thanks to John Linville for discovering this bug and providing a patch. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netpoll.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 791d5109f34c..50d8009be86c 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -63,20 +63,20 @@ static inline bool netpoll_rx(struct sk_buff *skb) unsigned long flags; bool ret = false; - rcu_read_lock_bh(); + local_irq_save(flags); npinfo = rcu_dereference_bh(skb->dev->npinfo); if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags)) goto out; - spin_lock_irqsave(&npinfo->rx_lock, flags); + spin_lock(&npinfo->rx_lock); /* check rx_flags again with the lock held */ if (npinfo->rx_flags && __netpoll_rx(skb)) ret = true; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); + spin_unlock(&npinfo->rx_lock); out: - rcu_read_unlock_bh(); + local_irq_restore(flags); return ret; } -- cgit v1.2.2 From 8b15575cae7a93a784c3005c42b069edd9ba64dd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Sep 2010 14:35:37 -0700 Subject: fs: {lock,unlock}_flocks() stubs to prepare for BKL removal The lock structs are currently protected by the BKL, but are accessed by code in fs/locks.c and misc file system and DLM code. These stubs will allow all users to switch to the new interface before the implementation is changed to a spinlock. Acked-by: Arnd Bergmann Signed-off-by: Sage Weil Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..63d069bd80b7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1093,6 +1093,10 @@ struct file_lock { #include +/* temporary stubs for BKL removal */ +#define lock_flocks() lock_kernel() +#define unlock_flocks() unlock_kernel() + extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING -- cgit v1.2.2 From 710224fa2750cf449c02dd115548acebfdd2c86a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 22 Sep 2010 13:04:55 -0700 Subject: arm: fix "arm: fix pci_set_consistent_dma_mask for dmabounce devices" This fixes the regression caused by the commit 6fee48cd330c68 ("dma-mapping: arm: use generic pci_set_dma_mask and pci_set_consistent_dma_mask"). ARM needs to clip the dma coherent mask for dmabounce devices. This restores the old trick. Note that strictly speaking, the DMA API doesn't allow architectures to do such but I'm not sure it's worth adding the new API to set the dma mask that allows architectures to clip it. Reported-by: Krzysztof Halasa Signed-off-by: FUJITA Tomonori Acked-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ce29b8151198..ba8319ae5fcc 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -102,6 +102,9 @@ static inline u64 dma_get_mask(struct device *dev) return DMA_BIT_MASK(32); } +#ifdef ARCH_HAS_DMA_SET_COHERENT_MASK +int dma_set_coherent_mask(struct device *dev, u64 mask); +#else static inline int dma_set_coherent_mask(struct device *dev, u64 mask) { if (!dma_supported(dev, mask)) @@ -109,6 +112,7 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) dev->coherent_dma_mask = mask; return 0; } +#endif extern u64 dma_get_required_mask(struct device *dev); -- cgit v1.2.2