From bb0822954aab7d23a3f902c2a103ee0242f6046e Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 16 Aug 2011 13:37:14 -0600 Subject: squeeze max-pause area and drop pass-good area Revert the pass-good area introduced in ffd1f609ab10 ("writeback: introduce max-pause and pass-good dirty limits") and make the max-pause area smaller and safe. This fixes ~30% performance regression in the ext3 data=writeback fio_mmap_randwrite_64k/fio_mmap_randrw_64k test cases, where there are 12 JBOD disks, on each disk runs 8 concurrent tasks doing reads+writes. Using deadline scheduler also has a regression, but not that big as CFQ, so this suggests we have some write starvation. The test logs show that - the disks are sometimes under utilized - global dirty pages sometimes rush high to the pass-good area for several hundred seconds, while in the mean time some bdi dirty pages drop to very low value (bdi_dirty << bdi_thresh). Then suddenly the global dirty pages dropped under global dirty threshold and bdi_dirty rush very high (for example, 2 times higher than bdi_thresh). During which time balance_dirty_pages() is not called at all. So the problems are 1) The random writes progress so slow that they break the assumption of the max-pause logic that "8 pages per 200ms is typically more than enough to curb heavy dirtiers". 2) The max-pause logic ignored task_bdi_thresh and thus opens the possibility for some bdi's to over dirty pages, leading to (bdi_dirty >> bdi_thresh) and then (bdi_thresh >> bdi_dirty) for others. 3) The higher max-pause/pass-good thresholds somehow leads to the bad swing of dirty pages. The fix is to allow the task to slightly dirty over task_bdi_thresh, but no way to exceed bdi_dirty and/or global dirty_thresh. Tests show that it fixed the JBOD regression completely (both behavior and performance), while still being able to cut down large pause times in balance_dirty_pages() for single-disk cases. Reported-by: Li Shaohua Tested-by: Li Shaohua Acked-by: Jan Kara Signed-off-by: Wu Fengguang --- include/linux/writeback.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f1bfa12ea246..2b8963ff0f35 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -12,15 +12,6 @@ * * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) * - * The 1/16 region above the global dirty limit will be put to maximum pauses: - * - * (limit, limit + limit/DIRTY_MAXPAUSE_AREA) - * - * The 1/16 region above the max-pause region, dirty exceeded bdi's will be put - * to loops: - * - * (limit + limit/DIRTY_MAXPAUSE_AREA, limit + limit/DIRTY_PASSGOOD_AREA) - * * Further beyond, all dirtier tasks will enter a loop waiting (possibly long * time) for the dirty pages to drop, unless written enough pages. * @@ -31,8 +22,6 @@ */ #define DIRTY_SCOPE 8 #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) -#define DIRTY_MAXPAUSE_AREA 16 -#define DIRTY_PASSGOOD_AREA 8 /* * 4MB minimal write chunk size -- cgit v1.2.2 From 0d7c5f2572ccfa7bf83292b1506926663f2d164a Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:31 -0500 Subject: drivers:misc:ti-st: platform hooks for chip states Certain platform specific or Host-WiLink Interface specific actions would be required to be taken when the chip is being enabled and after the chip is disabled such as configuration of the mux modes for the GPIO of host connected to the nshutdown of the chip or relinquishing UART after the chip is disabled. Similar actions can also be taken when the chip is in deep sleep or when the chip is awake. Performance enhancements such as configuring the host to run faster when chip is awake and slower when chip is asleep can also be made here. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- include/linux/ti_wilink_st.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index b004e557caa9..2ef4385da6bf 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -410,7 +410,28 @@ struct gps_event_hdr { u16 plen; } __attribute__ ((packed)); -/* platform data */ +/** + * struct ti_st_plat_data - platform data shared between ST driver and + * platform specific board file which adds the ST device. + * @nshutdown_gpio: Host's GPIO line to which chip's BT_EN is connected. + * @dev_name: The UART/TTY name to which chip is interfaced. (eg: /dev/ttyS1) + * @flow_cntrl: Should always be 1, since UART's CTS/RTS is used for PM + * purposes. + * @baud_rate: The baud rate supported by the Host UART controller, this will + * be shared across with the chip via a HCI VS command from User-Space Init + * Mgr application. + * @suspend: + * @resume: legacy PM routines hooked to platform specific board file, so as + * to take chip-host interface specific action. + * @chip_enable: + * @chip_disable: Platform/Interface specific mux mode setting, GPIO + * configuring, Host side PM disabling etc.. can be done here. + * @chip_asleep: + * @chip_awake: Chip specific deep sleep states is communicated to Host + * specific board-xx.c to take actions such as cut UART clocks when chip + * asleep or run host faster when chip awake etc.. + * + */ struct ti_st_plat_data { long nshutdown_gpio; unsigned char dev_name[UART_DEV_NAME_LEN]; /* uart name */ @@ -418,6 +439,10 @@ struct ti_st_plat_data { unsigned long baud_rate; int (*suspend)(struct platform_device *, pm_message_t); int (*resume)(struct platform_device *); + int (*chip_enable) (struct kim_data_s *); + int (*chip_disable) (struct kim_data_s *); + int (*chip_asleep) (struct kim_data_s *); + int (*chip_awake) (struct kim_data_s *); }; #endif /* TI_WILINK_ST_H */ -- cgit v1.2.2 From 24d406a6bf736f7aebdc8fa0f0ec86e0890c6d24 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Aug 2011 14:59:28 +0200 Subject: TTY: pty, fix pty counting tty_operations->remove is normally called like: queue_release_one_tty ->tty_shutdown ->tty_driver_remove_tty ->tty_operations->remove However tty_shutdown() is called from queue_release_one_tty() only if tty_operations->shutdown is NULL. But for pty, it is not. pty_unix98_shutdown() is used there as ->shutdown. So tty_operations->remove of pty (i.e. pty_unix98_remove()) is never called. This results in invalid pty_count. I.e. what can be seen in /proc/sys/kernel/pty/nr. I see this was already reported at: https://lkml.org/lkml/2009/11/5/370 But it was not fixed since then. This patch is kind of a hackish way. The problem lies in ->install. We allocate there another tty (so-called tty->link). So ->install is called once, but ->remove twice, for both tty and tty->link. The fix here is to count both tty and tty->link and divide the count by 2 for user. And to have ->remove called, let's make tty_driver_remove_tty() global and call that from pty_unix98_shutdown() (tty_operations->shutdown). While at it, let's document that when ->shutdown is defined, tty_shutdown() is not called. Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: "H. Peter Anvin" Cc: stable Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 ++ include/linux/tty_driver.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 44bc0c5617e1..5f2ede82b3d6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -421,6 +421,8 @@ extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); +extern void tty_driver_remove_tty(struct tty_driver *driver, + struct tty_struct *tty); extern void tty_shutdown(struct tty_struct *tty); extern void tty_free_termios(struct tty_struct *tty); extern int is_current_pgrp_orphaned(void); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 9deeac855240..ecdaeb98b293 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -47,6 +47,9 @@ * * This routine is called synchronously when a particular tty device * is closed for the last time freeing up the resources. + * Note that tty_shutdown() is not called if ops->shutdown is defined. + * This means one is responsible to take care of calling ops->remove (e.g. + * via tty_driver_remove_tty) and releasing tty->termios. * * * void (*cleanup)(struct tty_struct * tty); -- cgit v1.2.2 From be27425dcc516fd08245b047ea57f83b8f6f0903 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 19 Aug 2011 16:15:10 -0700 Subject: Add a personality to report 2.6.x version numbers I ran into a couple of programs which broke with the new Linux 3.0 version. Some of those were binary only. I tried to use LD_PRELOAD to work around it, but it was quite difficult and in one case impossible because of a mix of 32bit and 64bit executables. For example, all kind of management software from HP doesnt work, unless we pretend to run a 2.6 kernel. $ uname -a Linux svivoipvnx001 3.0.0-08107-g97cd98f #1062 SMP Fri Aug 12 18:11:45 CEST 2011 i686 i686 i386 GNU/Linux $ hpacucli ctrl all show Error: No controllers detected. $ rpm -qf /usr/sbin/hpacucli hpacucli-8.75-12.0 Another notable case is that Python now reports "linux3" from sys.platform(); which in turn can break things that were checking sys.platform() == "linux2": https://bugzilla.mozilla.org/show_bug.cgi?id=664564 It seems pretty clear to me though it's a bug in the apps that are using '==' instead of .startswith(), but this allows us to unbreak broken programs. This patch adds a UNAME26 personality that makes the kernel report a 2.6.40+x version number instead. The x is the x in 3.x. I know this is somewhat ugly, but I didn't find a better workaround, and compatibility to existing programs is important. Some programs also read /proc/sys/kernel/osrelease. This can be worked around in user space with mount --bind (and a mount namespace) To use: wget ftp://ftp.kernel.org/pub/linux/kernel/people/ak/uname26/uname26.c gcc -o uname26 uname26.c ./uname26 program Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/personality.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/personality.h b/include/linux/personality.h index eec3bae164d4..8fc7dd1a57ff 100644 --- a/include/linux/personality.h +++ b/include/linux/personality.h @@ -22,6 +22,7 @@ extern int __set_personality(unsigned int); * These occupy the top three bytes. */ enum { + UNAME26 = 0x0020000, ADDR_NO_RANDOMIZE = 0x0040000, /* disable randomization of VA space */ FDPIC_FUNCPTRS = 0x0080000, /* userspace function ptrs point to descriptors * (signal handling) -- cgit v1.2.2 From e096d0c7e2e4e5893792db865dd065ac73cf1f00 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 25 Aug 2011 07:48:12 -0400 Subject: lockdep: Add helper function for dir vs file i_mutex annotation Purely in-memory filesystems do not use the inode hash as the dcache tells us if an entry already exists. As a result, they do not call unlock_new_inode, and thus directory inodes do not get put into a different lockdep class for i_sem. We need the different lockdep classes, because the locking order for i_mutex is different for directory inodes and regular inodes. Directory inodes can do "readdir()", which takes i_mutex *before* possibly taking mm->mmap_sem (due to a page fault while copying the directory entry to user space). In contrast, regular inodes can be mmap'ed, which takes mm->mmap_sem before accessing i_mutex. The two cases can never happen for the same inode, so no real deadlock can occur, but without the different lockdep classes, lockdep cannot understand that. As a result, if CONFIG_DEBUG_LOCK_ALLOC is set, this can lead to false positives from lockdep like below: find/645 is trying to acquire lock: (&mm->mmap_sem){++++++}, at: [] might_fault+0x5c/0xac but task is already holding lock: (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [] vfs_readdir+0x5b/0xb4 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sb->s_type->i_mutex_key#15){+.+.+.}: [] lock_acquire+0xbf/0x103 [] __mutex_lock_common+0x4c/0x361 [] mutex_lock_nested+0x40/0x45 [] hugetlbfs_file_mmap+0x82/0x110 [] mmap_region+0x258/0x432 [] do_mmap_pgoff+0x2ac/0x306 [] sys_mmap_pgoff+0x118/0x16a [] sys_mmap+0x22/0x24 [] system_call_fastpath+0x16/0x1b -> #0 (&mm->mmap_sem){++++++}: [] __lock_acquire+0xa1a/0xcf7 [] lock_acquire+0xbf/0x103 [] might_fault+0x89/0xac [] filldir+0x6f/0xc7 [] dcache_readdir+0x67/0x205 [] vfs_readdir+0x7b/0xb4 [] sys_getdents+0x7e/0xd1 [] system_call_fastpath+0x16/0x1b This patch moves the directory vs file lockdep annotation into a helper function that can be called by in-memory filesystems and has hugetlbfs call it. Signed-off-by: Josh Boyer Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 178cdb4f1d4a..c2bd68f2277a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2318,6 +2318,11 @@ extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*te extern struct inode * iget_locked(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern void lockdep_annotate_inode_mutex_key(struct inode *inode); +#else +static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; +#endif extern void unlock_new_inode(struct inode *); extern unsigned int get_next_ino(void); -- cgit v1.2.2 From a801876638c5ce650223476c4eb8f37cea32dc1c Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Thu, 25 Aug 2011 15:59:06 -0700 Subject: MAINTAINERS: Evgeniy has moved Signed-off-by: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/connector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index 0c69ad825b39..3c9c54fd5690 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -1,7 +1,7 @@ /* * connector.h * - * 2004-2005 Copyright (c) Evgeniy Polyakov + * 2004-2005 Copyright (c) Evgeniy Polyakov * All rights reserved. * * This program is free software; you can redistribute it and/or modify -- cgit v1.2.2 From 284fb68d00c56e971ed01e0b4bac5ddd4d1b74ab Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 25 Aug 2011 15:59:13 -0700 Subject: rapidio: fix use of non-compatible registers Replace/remove use of RIO v.1.2 registers/bits that are not forward-compatible with newer versions of RapidIO specification. RapidIO specification v.1.3 removed Write Port CSR, Doorbell CSR, Mailbox CSR and Mailbox and Doorbell bits of the PEF CAR. Use of removed (since RIO v.1.3) register bits affects users of currently available 1.3 and 2.x compliant devices who may use not so recent kernel versions. Removing checks for unsupported bits makes corresponding routines compatible with all versions of RapidIO specification. Therefore, backporting makes stable kernel versions compliant with RIO v.1.3 and later as well. Signed-off-by: Alexandre Bounine Cc: Kumar Gala Cc: Matt Porter Cc: Li Yang Cc: Thomas Moll Cc: Chul Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rio_regs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index 9026b30238f3..218168a2b5e9 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -36,12 +36,12 @@ #define RIO_PEF_PROCESSOR 0x20000000 /* [I] Processor */ #define RIO_PEF_SWITCH 0x10000000 /* [I] Switch */ #define RIO_PEF_MULTIPORT 0x08000000 /* [VI, 2.1] Multiport */ -#define RIO_PEF_INB_MBOX 0x00f00000 /* [II] Mailboxes */ -#define RIO_PEF_INB_MBOX0 0x00800000 /* [II] Mailbox 0 */ -#define RIO_PEF_INB_MBOX1 0x00400000 /* [II] Mailbox 1 */ -#define RIO_PEF_INB_MBOX2 0x00200000 /* [II] Mailbox 2 */ -#define RIO_PEF_INB_MBOX3 0x00100000 /* [II] Mailbox 3 */ -#define RIO_PEF_INB_DOORBELL 0x00080000 /* [II] Doorbells */ +#define RIO_PEF_INB_MBOX 0x00f00000 /* [II, <= 1.2] Mailboxes */ +#define RIO_PEF_INB_MBOX0 0x00800000 /* [II, <= 1.2] Mailbox 0 */ +#define RIO_PEF_INB_MBOX1 0x00400000 /* [II, <= 1.2] Mailbox 1 */ +#define RIO_PEF_INB_MBOX2 0x00200000 /* [II, <= 1.2] Mailbox 2 */ +#define RIO_PEF_INB_MBOX3 0x00100000 /* [II, <= 1.2] Mailbox 3 */ +#define RIO_PEF_INB_DOORBELL 0x00080000 /* [II, <= 1.2] Doorbells */ #define RIO_PEF_EXT_RT 0x00000200 /* [III, 1.3] Extended route table support */ #define RIO_PEF_STD_RT 0x00000100 /* [III, 1.3] Standard route table support */ #define RIO_PEF_CTLS 0x00000010 /* [III] CTLS */ @@ -102,7 +102,7 @@ #define RIO_SWITCH_RT_LIMIT 0x34 /* [III, 1.3] Switch Route Table Destination ID Limit CAR */ #define RIO_RT_MAX_DESTID 0x0000ffff -#define RIO_MBOX_CSR 0x40 /* [II] Mailbox CSR */ +#define RIO_MBOX_CSR 0x40 /* [II, <= 1.2] Mailbox CSR */ #define RIO_MBOX0_AVAIL 0x80000000 /* [II] Mbox 0 avail */ #define RIO_MBOX0_FULL 0x40000000 /* [II] Mbox 0 full */ #define RIO_MBOX0_EMPTY 0x20000000 /* [II] Mbox 0 empty */ @@ -128,8 +128,8 @@ #define RIO_MBOX3_FAIL 0x00000008 /* [II] Mbox 3 fail */ #define RIO_MBOX3_ERROR 0x00000004 /* [II] Mbox 3 error */ -#define RIO_WRITE_PORT_CSR 0x44 /* [I] Write Port CSR */ -#define RIO_DOORBELL_CSR 0x44 /* [II] Doorbell CSR */ +#define RIO_WRITE_PORT_CSR 0x44 /* [I, <= 1.2] Write Port CSR */ +#define RIO_DOORBELL_CSR 0x44 /* [II, <= 1.2] Doorbell CSR */ #define RIO_DOORBELL_AVAIL 0x80000000 /* [II] Doorbell avail */ #define RIO_DOORBELL_FULL 0x40000000 /* [II] Doorbell full */ #define RIO_DOORBELL_EMPTY 0x20000000 /* [II] Doorbell empty */ -- cgit v1.2.2 From cc7993f6439b49909a8792660c4d0741fec9d584 Mon Sep 17 00:00:00 2001 From: Dilan Lee Date: Thu, 25 Aug 2011 15:59:17 -0700 Subject: backlight: add a callback 'notify_after' for backlight control We need a callback to do some things after pwm_enable, pwm_disable and pwm_config. Signed-off-by: Dilan Lee Reviewed-by: Robert Morell Reviewed-by: Arun Murthy Cc: Richard Purdie Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pwm_backlight.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index 5e3e25a3c9c3..63d2df43e61a 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -14,6 +14,7 @@ struct platform_pwm_backlight_data { unsigned int pwm_period_ns; int (*init)(struct device *dev); int (*notify)(struct device *dev, int brightness); + void (*notify_after)(struct device *dev, int brightness); void (*exit)(struct device *dev); int (*check_fb)(struct device *dev, struct fb_info *info); }; -- cgit v1.2.2 From f5b940997397229975ea073679b03967932a541b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 26 Aug 2011 18:03:11 -0400 Subject: All Arch: remove linkage for sys_nfsservctl system call The nfsservctl system call is now gone, so we should remove all linkage for it. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- include/linux/compat.h | 1 - include/linux/syscalls.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 8779405e15a8..c6e7523bf765 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -438,7 +438,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, struct compat_timespec __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); -asmlinkage long compat_sys_nfsservctl(int cmd, void *notused, void *notused2); asmlinkage long compat_sys_signalfd4(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize, int flags); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8c03b98df5f9..1ff0ec2a5e8d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -702,9 +702,6 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args); asmlinkage long sys_sysinfo(struct sysinfo __user *info); asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); -asmlinkage long sys_nfsservctl(int cmd, - struct nfsctl_arg __user *arg, - void __user *res); asmlinkage long sys_syslog(int type, char __user *buf, int len); asmlinkage long sys_uselib(const char __user *library); asmlinkage long sys_ni_syscall(void); -- cgit v1.2.2