diff options
| -rw-r--r-- | .mailmap | 1 | ||||
| -rw-r--r-- | MAINTAINERS | 12 | ||||
| -rw-r--r-- | arch/blackfin/mach-bf533/boards/stamp.c | 1 | ||||
| -rw-r--r-- | fs/fcntl.c | 5 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 5 | ||||
| -rw-r--r-- | fs/ocfs2/namei.c | 43 | ||||
| -rw-r--r-- | include/linux/fs.h | 2 | ||||
| -rw-r--r-- | include/linux/rmap.h | 10 | ||||
| -rw-r--r-- | include/linux/writeback.h | 1 | ||||
| -rw-r--r-- | include/uapi/asm-generic/fcntl.h | 2 | ||||
| -rw-r--r-- | kernel/exit.c | 12 | ||||
| -rw-r--r-- | mm/Kconfig.debug | 9 | ||||
| -rw-r--r-- | mm/memcontrol.c | 17 | ||||
| -rw-r--r-- | mm/memory.c | 27 | ||||
| -rw-r--r-- | mm/page-writeback.c | 43 | ||||
| -rw-r--r-- | mm/rmap.c | 42 | ||||
| -rw-r--r-- | mm/vmscan.c | 24 |
17 files changed, 155 insertions, 101 deletions
| @@ -51,6 +51,7 @@ Greg Kroah-Hartman <gregkh@suse.de> | |||
| 51 | Greg Kroah-Hartman <greg@kroah.com> | 51 | Greg Kroah-Hartman <greg@kroah.com> |
| 52 | Henk Vergonet <Henk.Vergonet@gmail.com> | 52 | Henk Vergonet <Henk.Vergonet@gmail.com> |
| 53 | Henrik Kretzschmar <henne@nachtwindheim.de> | 53 | Henrik Kretzschmar <henne@nachtwindheim.de> |
| 54 | Henrik Rydberg <rydberg@bitmath.org> | ||
| 54 | Herbert Xu <herbert@gondor.apana.org.au> | 55 | Herbert Xu <herbert@gondor.apana.org.au> |
| 55 | Jacob Shin <Jacob.Shin@amd.com> | 56 | Jacob Shin <Jacob.Shin@amd.com> |
| 56 | James Bottomley <jejb@mulgrave.(none)> | 57 | James Bottomley <jejb@mulgrave.(none)> |
diff --git a/MAINTAINERS b/MAINTAINERS index ddb9ac8d32b3..79b2e4ba78ee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -724,15 +724,15 @@ F: include/uapi/linux/apm_bios.h | |||
| 724 | F: drivers/char/apm-emulation.c | 724 | F: drivers/char/apm-emulation.c |
| 725 | 725 | ||
| 726 | APPLE BCM5974 MULTITOUCH DRIVER | 726 | APPLE BCM5974 MULTITOUCH DRIVER |
| 727 | M: Henrik Rydberg <rydberg@euromail.se> | 727 | M: Henrik Rydberg <rydberg@bitmath.org> |
| 728 | L: linux-input@vger.kernel.org | 728 | L: linux-input@vger.kernel.org |
| 729 | S: Maintained | 729 | S: Odd fixes |
| 730 | F: drivers/input/mouse/bcm5974.c | 730 | F: drivers/input/mouse/bcm5974.c |
| 731 | 731 | ||
| 732 | APPLE SMC DRIVER | 732 | APPLE SMC DRIVER |
| 733 | M: Henrik Rydberg <rydberg@euromail.se> | 733 | M: Henrik Rydberg <rydberg@bitmath.org> |
| 734 | L: lm-sensors@lm-sensors.org | 734 | L: lm-sensors@lm-sensors.org |
| 735 | S: Maintained | 735 | S: Odd fixes |
| 736 | F: drivers/hwmon/applesmc.c | 736 | F: drivers/hwmon/applesmc.c |
| 737 | 737 | ||
| 738 | APPLETALK NETWORK LAYER | 738 | APPLETALK NETWORK LAYER |
| @@ -4940,10 +4940,10 @@ F: include/uapi/linux/input.h | |||
| 4940 | F: include/linux/input/ | 4940 | F: include/linux/input/ |
| 4941 | 4941 | ||
| 4942 | INPUT MULTITOUCH (MT) PROTOCOL | 4942 | INPUT MULTITOUCH (MT) PROTOCOL |
| 4943 | M: Henrik Rydberg <rydberg@euromail.se> | 4943 | M: Henrik Rydberg <rydberg@bitmath.org> |
| 4944 | L: linux-input@vger.kernel.org | 4944 | L: linux-input@vger.kernel.org |
| 4945 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/rydberg/input-mt.git | 4945 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/rydberg/input-mt.git |
| 4946 | S: Maintained | 4946 | S: Odd fixes |
| 4947 | F: Documentation/input/multi-touch-protocol.txt | 4947 | F: Documentation/input/multi-touch-protocol.txt |
| 4948 | F: drivers/input/input-mt.c | 4948 | F: drivers/input/input-mt.c |
| 4949 | K: \b(ABS|SYN)_MT_ | 4949 | K: \b(ABS|SYN)_MT_ |
diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index 6f4bac969bf7..23eada79439c 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/device.h> | 9 | #include <linux/device.h> |
| 10 | #include <linux/delay.h> | ||
| 10 | #include <linux/platform_device.h> | 11 | #include <linux/platform_device.h> |
| 11 | #include <linux/mtd/mtd.h> | 12 | #include <linux/mtd/mtd.h> |
| 12 | #include <linux/mtd/partitions.h> | 13 | #include <linux/mtd/partitions.h> |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 99d440a4a6ba..ee85cd4e136a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
| @@ -740,14 +740,15 @@ static int __init fcntl_init(void) | |||
| 740 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY | 740 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY |
| 741 | * is defined as O_NONBLOCK on some platforms and not on others. | 741 | * is defined as O_NONBLOCK on some platforms and not on others. |
| 742 | */ | 742 | */ |
| 743 | BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | 743 | BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( |
| 744 | O_RDONLY | O_WRONLY | O_RDWR | | 744 | O_RDONLY | O_WRONLY | O_RDWR | |
| 745 | O_CREAT | O_EXCL | O_NOCTTY | | 745 | O_CREAT | O_EXCL | O_NOCTTY | |
| 746 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ | 746 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ |
| 747 | __O_SYNC | O_DSYNC | FASYNC | | 747 | __O_SYNC | O_DSYNC | FASYNC | |
| 748 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | 748 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | |
| 749 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | 749 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | |
| 750 | __FMODE_EXEC | O_PATH | __O_TMPFILE | 750 | __FMODE_EXEC | O_PATH | __O_TMPFILE | |
| 751 | __FMODE_NONOTIFY | ||
| 751 | )); | 752 | )); |
| 752 | 753 | ||
| 753 | fasync_cache = kmem_cache_create("fasync_cache", | 754 | fasync_cache = kmem_cache_create("fasync_cache", |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 79b5af5e6a7b..cecd875653e4 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -2023,11 +2023,8 @@ leave: | |||
| 2023 | dlm_lockres_drop_inflight_ref(dlm, res); | 2023 | dlm_lockres_drop_inflight_ref(dlm, res); |
| 2024 | spin_unlock(&res->spinlock); | 2024 | spin_unlock(&res->spinlock); |
| 2025 | 2025 | ||
| 2026 | if (ret < 0) { | 2026 | if (ret < 0) |
| 2027 | mlog_errno(ret); | 2027 | mlog_errno(ret); |
| 2028 | if (newlock) | ||
| 2029 | dlm_lock_put(newlock); | ||
| 2030 | } | ||
| 2031 | 2028 | ||
| 2032 | return ret; | 2029 | return ret; |
| 2033 | } | 2030 | } |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b931e04e3388..914c121ec890 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -94,6 +94,14 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb, | |||
| 94 | struct inode *inode, | 94 | struct inode *inode, |
| 95 | const char *symname); | 95 | const char *symname); |
| 96 | 96 | ||
| 97 | static int ocfs2_double_lock(struct ocfs2_super *osb, | ||
| 98 | struct buffer_head **bh1, | ||
| 99 | struct inode *inode1, | ||
| 100 | struct buffer_head **bh2, | ||
| 101 | struct inode *inode2, | ||
| 102 | int rename); | ||
| 103 | |||
| 104 | static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2); | ||
| 97 | /* An orphan dir name is an 8 byte value, printed as a hex string */ | 105 | /* An orphan dir name is an 8 byte value, printed as a hex string */ |
| 98 | #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) | 106 | #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) |
| 99 | 107 | ||
| @@ -678,8 +686,10 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
| 678 | { | 686 | { |
| 679 | handle_t *handle; | 687 | handle_t *handle; |
| 680 | struct inode *inode = old_dentry->d_inode; | 688 | struct inode *inode = old_dentry->d_inode; |
| 689 | struct inode *old_dir = old_dentry->d_parent->d_inode; | ||
| 681 | int err; | 690 | int err; |
| 682 | struct buffer_head *fe_bh = NULL; | 691 | struct buffer_head *fe_bh = NULL; |
| 692 | struct buffer_head *old_dir_bh = NULL; | ||
| 683 | struct buffer_head *parent_fe_bh = NULL; | 693 | struct buffer_head *parent_fe_bh = NULL; |
| 684 | struct ocfs2_dinode *fe = NULL; | 694 | struct ocfs2_dinode *fe = NULL; |
| 685 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 695 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
| @@ -696,19 +706,33 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
| 696 | 706 | ||
| 697 | dquot_initialize(dir); | 707 | dquot_initialize(dir); |
| 698 | 708 | ||
| 699 | err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); | 709 | err = ocfs2_double_lock(osb, &old_dir_bh, old_dir, |
| 710 | &parent_fe_bh, dir, 0); | ||
| 700 | if (err < 0) { | 711 | if (err < 0) { |
| 701 | if (err != -ENOENT) | 712 | if (err != -ENOENT) |
| 702 | mlog_errno(err); | 713 | mlog_errno(err); |
| 703 | return err; | 714 | return err; |
| 704 | } | 715 | } |
| 705 | 716 | ||
| 717 | /* make sure both dirs have bhs | ||
| 718 | * get an extra ref on old_dir_bh if old==new */ | ||
| 719 | if (!parent_fe_bh) { | ||
| 720 | if (old_dir_bh) { | ||
| 721 | parent_fe_bh = old_dir_bh; | ||
| 722 | get_bh(parent_fe_bh); | ||
| 723 | } else { | ||
| 724 | mlog(ML_ERROR, "%s: no old_dir_bh!\n", osb->uuid_str); | ||
| 725 | err = -EIO; | ||
| 726 | goto out; | ||
| 727 | } | ||
| 728 | } | ||
| 729 | |||
| 706 | if (!dir->i_nlink) { | 730 | if (!dir->i_nlink) { |
| 707 | err = -ENOENT; | 731 | err = -ENOENT; |
| 708 | goto out; | 732 | goto out; |
| 709 | } | 733 | } |
| 710 | 734 | ||
| 711 | err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name, | 735 | err = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name, |
| 712 | old_dentry->d_name.len, &old_de_ino); | 736 | old_dentry->d_name.len, &old_de_ino); |
| 713 | if (err) { | 737 | if (err) { |
| 714 | err = -ENOENT; | 738 | err = -ENOENT; |
| @@ -801,10 +825,11 @@ out_unlock_inode: | |||
| 801 | ocfs2_inode_unlock(inode, 1); | 825 | ocfs2_inode_unlock(inode, 1); |
| 802 | 826 | ||
| 803 | out: | 827 | out: |
| 804 | ocfs2_inode_unlock(dir, 1); | 828 | ocfs2_double_unlock(old_dir, dir); |
| 805 | 829 | ||
| 806 | brelse(fe_bh); | 830 | brelse(fe_bh); |
| 807 | brelse(parent_fe_bh); | 831 | brelse(parent_fe_bh); |
| 832 | brelse(old_dir_bh); | ||
| 808 | 833 | ||
| 809 | ocfs2_free_dir_lookup_result(&lookup); | 834 | ocfs2_free_dir_lookup_result(&lookup); |
| 810 | 835 | ||
| @@ -1072,14 +1097,15 @@ static int ocfs2_check_if_ancestor(struct ocfs2_super *osb, | |||
| 1072 | } | 1097 | } |
| 1073 | 1098 | ||
| 1074 | /* | 1099 | /* |
| 1075 | * The only place this should be used is rename! | 1100 | * The only place this should be used is rename and link! |
| 1076 | * if they have the same id, then the 1st one is the only one locked. | 1101 | * if they have the same id, then the 1st one is the only one locked. |
| 1077 | */ | 1102 | */ |
| 1078 | static int ocfs2_double_lock(struct ocfs2_super *osb, | 1103 | static int ocfs2_double_lock(struct ocfs2_super *osb, |
| 1079 | struct buffer_head **bh1, | 1104 | struct buffer_head **bh1, |
| 1080 | struct inode *inode1, | 1105 | struct inode *inode1, |
| 1081 | struct buffer_head **bh2, | 1106 | struct buffer_head **bh2, |
| 1082 | struct inode *inode2) | 1107 | struct inode *inode2, |
| 1108 | int rename) | ||
| 1083 | { | 1109 | { |
| 1084 | int status; | 1110 | int status; |
| 1085 | int inode1_is_ancestor, inode2_is_ancestor; | 1111 | int inode1_is_ancestor, inode2_is_ancestor; |
| @@ -1127,7 +1153,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
| 1127 | } | 1153 | } |
| 1128 | /* lock id2 */ | 1154 | /* lock id2 */ |
| 1129 | status = ocfs2_inode_lock_nested(inode2, bh2, 1, | 1155 | status = ocfs2_inode_lock_nested(inode2, bh2, 1, |
| 1130 | OI_LS_RENAME1); | 1156 | rename == 1 ? OI_LS_RENAME1 : OI_LS_PARENT); |
| 1131 | if (status < 0) { | 1157 | if (status < 0) { |
| 1132 | if (status != -ENOENT) | 1158 | if (status != -ENOENT) |
| 1133 | mlog_errno(status); | 1159 | mlog_errno(status); |
| @@ -1136,7 +1162,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
| 1136 | } | 1162 | } |
| 1137 | 1163 | ||
| 1138 | /* lock id1 */ | 1164 | /* lock id1 */ |
| 1139 | status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2); | 1165 | status = ocfs2_inode_lock_nested(inode1, bh1, 1, |
| 1166 | rename == 1 ? OI_LS_RENAME2 : OI_LS_PARENT); | ||
| 1140 | if (status < 0) { | 1167 | if (status < 0) { |
| 1141 | /* | 1168 | /* |
| 1142 | * An error return must mean that no cluster locks | 1169 | * An error return must mean that no cluster locks |
| @@ -1252,7 +1279,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 1252 | 1279 | ||
| 1253 | /* if old and new are the same, this'll just do one lock. */ | 1280 | /* if old and new are the same, this'll just do one lock. */ |
| 1254 | status = ocfs2_double_lock(osb, &old_dir_bh, old_dir, | 1281 | status = ocfs2_double_lock(osb, &old_dir_bh, old_dir, |
| 1255 | &new_dir_bh, new_dir); | 1282 | &new_dir_bh, new_dir, 1); |
| 1256 | if (status < 0) { | 1283 | if (status < 0) { |
| 1257 | mlog_errno(status); | 1284 | mlog_errno(status); |
| 1258 | goto bail; | 1285 | goto bail; |
diff --git a/include/linux/fs.h b/include/linux/fs.h index f90c0282c114..42efe13077b6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -135,7 +135,7 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, | |||
| 135 | #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) | 135 | #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) |
| 136 | 136 | ||
| 137 | /* File was opened by fanotify and shouldn't generate fanotify events */ | 137 | /* File was opened by fanotify and shouldn't generate fanotify events */ |
| 138 | #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) | 138 | #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) |
| 139 | 139 | ||
| 140 | /* | 140 | /* |
| 141 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector | 141 | * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c0c2bce6b0b7..d9d7e7e56352 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
| @@ -37,6 +37,16 @@ struct anon_vma { | |||
| 37 | atomic_t refcount; | 37 | atomic_t refcount; |
| 38 | 38 | ||
| 39 | /* | 39 | /* |
| 40 | * Count of child anon_vmas and VMAs which points to this anon_vma. | ||
| 41 | * | ||
| 42 | * This counter is used for making decision about reusing anon_vma | ||
| 43 | * instead of forking new one. See comments in function anon_vma_clone. | ||
| 44 | */ | ||
| 45 | unsigned degree; | ||
| 46 | |||
| 47 | struct anon_vma *parent; /* Parent of this anon_vma */ | ||
| 48 | |||
| 49 | /* | ||
| 40 | * NOTE: the LSB of the rb_root.rb_node is set by | 50 | * NOTE: the LSB of the rb_root.rb_node is set by |
| 41 | * mm_take_all_locks() _after_ taking the above lock. So the | 51 | * mm_take_all_locks() _after_ taking the above lock. So the |
| 42 | * rb_root must only be read/written after taking the above lock | 52 | * rb_root must only be read/written after taking the above lock |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a219be961c0a..00048339c23e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -177,7 +177,6 @@ int write_cache_pages(struct address_space *mapping, | |||
| 177 | struct writeback_control *wbc, writepage_t writepage, | 177 | struct writeback_control *wbc, writepage_t writepage, |
| 178 | void *data); | 178 | void *data); |
| 179 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc); | 179 | int do_writepages(struct address_space *mapping, struct writeback_control *wbc); |
| 180 | void set_page_dirty_balance(struct page *page); | ||
| 181 | void writeback_set_ratelimit(void); | 180 | void writeback_set_ratelimit(void); |
| 182 | void tag_pages_for_writeback(struct address_space *mapping, | 181 | void tag_pages_for_writeback(struct address_space *mapping, |
| 183 | pgoff_t start, pgoff_t end); | 182 | pgoff_t start, pgoff_t end); |
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 7543b3e51331..e063effe0cc1 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | /* | 6 | /* |
| 7 | * FMODE_EXEC is 0x20 | 7 | * FMODE_EXEC is 0x20 |
| 8 | * FMODE_NONOTIFY is 0x1000000 | 8 | * FMODE_NONOTIFY is 0x4000000 |
| 9 | * These cannot be used by userspace O_* until internal and external open | 9 | * These cannot be used by userspace O_* until internal and external open |
| 10 | * flags are split. | 10 | * flags are split. |
| 11 | * -Eric Paris | 11 | * -Eric Paris |
diff --git a/kernel/exit.c b/kernel/exit.c index 1ea4369890a3..6806c55475ee 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -1287,9 +1287,15 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) | |||
| 1287 | static int wait_consider_task(struct wait_opts *wo, int ptrace, | 1287 | static int wait_consider_task(struct wait_opts *wo, int ptrace, |
| 1288 | struct task_struct *p) | 1288 | struct task_struct *p) |
| 1289 | { | 1289 | { |
| 1290 | /* | ||
| 1291 | * We can race with wait_task_zombie() from another thread. | ||
| 1292 | * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition | ||
| 1293 | * can't confuse the checks below. | ||
| 1294 | */ | ||
| 1295 | int exit_state = ACCESS_ONCE(p->exit_state); | ||
| 1290 | int ret; | 1296 | int ret; |
| 1291 | 1297 | ||
| 1292 | if (unlikely(p->exit_state == EXIT_DEAD)) | 1298 | if (unlikely(exit_state == EXIT_DEAD)) |
| 1293 | return 0; | 1299 | return 0; |
| 1294 | 1300 | ||
| 1295 | ret = eligible_child(wo, p); | 1301 | ret = eligible_child(wo, p); |
| @@ -1310,7 +1316,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, | |||
| 1310 | return 0; | 1316 | return 0; |
| 1311 | } | 1317 | } |
| 1312 | 1318 | ||
| 1313 | if (unlikely(p->exit_state == EXIT_TRACE)) { | 1319 | if (unlikely(exit_state == EXIT_TRACE)) { |
| 1314 | /* | 1320 | /* |
| 1315 | * ptrace == 0 means we are the natural parent. In this case | 1321 | * ptrace == 0 means we are the natural parent. In this case |
| 1316 | * we should clear notask_error, debugger will notify us. | 1322 | * we should clear notask_error, debugger will notify us. |
| @@ -1337,7 +1343,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, | |||
| 1337 | } | 1343 | } |
| 1338 | 1344 | ||
| 1339 | /* slay zombie? */ | 1345 | /* slay zombie? */ |
| 1340 | if (p->exit_state == EXIT_ZOMBIE) { | 1346 | if (exit_state == EXIT_ZOMBIE) { |
| 1341 | /* we don't reap group leaders with subthreads */ | 1347 | /* we don't reap group leaders with subthreads */ |
| 1342 | if (!delay_group_leader(p)) { | 1348 | if (!delay_group_leader(p)) { |
| 1343 | /* | 1349 | /* |
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 56badfc4810a..957d3da53ddd 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug | |||
| @@ -14,7 +14,6 @@ config DEBUG_PAGEALLOC | |||
| 14 | depends on !KMEMCHECK | 14 | depends on !KMEMCHECK |
| 15 | select PAGE_EXTENSION | 15 | select PAGE_EXTENSION |
| 16 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC | 16 | select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC |
| 17 | select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC | ||
| 18 | ---help--- | 17 | ---help--- |
| 19 | Unmap pages from the kernel linear mapping after free_pages(). | 18 | Unmap pages from the kernel linear mapping after free_pages(). |
| 20 | This results in a large slowdown, but helps to find certain types | 19 | This results in a large slowdown, but helps to find certain types |
| @@ -27,13 +26,5 @@ config DEBUG_PAGEALLOC | |||
| 27 | that would result in incorrect warnings of memory corruption after | 26 | that would result in incorrect warnings of memory corruption after |
| 28 | a resume because free pages are not saved to the suspend image. | 27 | a resume because free pages are not saved to the suspend image. |
| 29 | 28 | ||
| 30 | config WANT_PAGE_DEBUG_FLAGS | ||
| 31 | bool | ||
| 32 | |||
| 33 | config PAGE_POISONING | 29 | config PAGE_POISONING |
| 34 | bool | 30 | bool |
| 35 | select WANT_PAGE_DEBUG_FLAGS | ||
| 36 | |||
| 37 | config PAGE_GUARD | ||
| 38 | bool | ||
| 39 | select WANT_PAGE_DEBUG_FLAGS | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ef91e856c7e4..851924fa5170 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -3043,18 +3043,6 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
| 3043 | if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { | 3043 | if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { |
| 3044 | mem_cgroup_swap_statistics(from, false); | 3044 | mem_cgroup_swap_statistics(from, false); |
| 3045 | mem_cgroup_swap_statistics(to, true); | 3045 | mem_cgroup_swap_statistics(to, true); |
| 3046 | /* | ||
| 3047 | * This function is only called from task migration context now. | ||
| 3048 | * It postpones page_counter and refcount handling till the end | ||
| 3049 | * of task migration(mem_cgroup_clear_mc()) for performance | ||
| 3050 | * improvement. But we cannot postpone css_get(to) because if | ||
| 3051 | * the process that has been moved to @to does swap-in, the | ||
| 3052 | * refcount of @to might be decreased to 0. | ||
| 3053 | * | ||
| 3054 | * We are in attach() phase, so the cgroup is guaranteed to be | ||
| 3055 | * alive, so we can just call css_get(). | ||
| 3056 | */ | ||
| 3057 | css_get(&to->css); | ||
| 3058 | return 0; | 3046 | return 0; |
| 3059 | } | 3047 | } |
| 3060 | return -EINVAL; | 3048 | return -EINVAL; |
| @@ -4679,6 +4667,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
| 4679 | if (parent_css == NULL) { | 4667 | if (parent_css == NULL) { |
| 4680 | root_mem_cgroup = memcg; | 4668 | root_mem_cgroup = memcg; |
| 4681 | page_counter_init(&memcg->memory, NULL); | 4669 | page_counter_init(&memcg->memory, NULL); |
| 4670 | memcg->soft_limit = PAGE_COUNTER_MAX; | ||
| 4682 | page_counter_init(&memcg->memsw, NULL); | 4671 | page_counter_init(&memcg->memsw, NULL); |
| 4683 | page_counter_init(&memcg->kmem, NULL); | 4672 | page_counter_init(&memcg->kmem, NULL); |
| 4684 | } | 4673 | } |
| @@ -4724,6 +4713,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
| 4724 | 4713 | ||
| 4725 | if (parent->use_hierarchy) { | 4714 | if (parent->use_hierarchy) { |
| 4726 | page_counter_init(&memcg->memory, &parent->memory); | 4715 | page_counter_init(&memcg->memory, &parent->memory); |
| 4716 | memcg->soft_limit = PAGE_COUNTER_MAX; | ||
| 4727 | page_counter_init(&memcg->memsw, &parent->memsw); | 4717 | page_counter_init(&memcg->memsw, &parent->memsw); |
| 4728 | page_counter_init(&memcg->kmem, &parent->kmem); | 4718 | page_counter_init(&memcg->kmem, &parent->kmem); |
| 4729 | 4719 | ||
| @@ -4733,6 +4723,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
| 4733 | */ | 4723 | */ |
| 4734 | } else { | 4724 | } else { |
| 4735 | page_counter_init(&memcg->memory, NULL); | 4725 | page_counter_init(&memcg->memory, NULL); |
| 4726 | memcg->soft_limit = PAGE_COUNTER_MAX; | ||
| 4736 | page_counter_init(&memcg->memsw, NULL); | 4727 | page_counter_init(&memcg->memsw, NULL); |
| 4737 | page_counter_init(&memcg->kmem, NULL); | 4728 | page_counter_init(&memcg->kmem, NULL); |
| 4738 | /* | 4729 | /* |
| @@ -4807,7 +4798,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) | |||
| 4807 | mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); | 4798 | mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); |
| 4808 | mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); | 4799 | mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); |
| 4809 | memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); | 4800 | memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); |
| 4810 | memcg->soft_limit = 0; | 4801 | memcg->soft_limit = PAGE_COUNTER_MAX; |
| 4811 | } | 4802 | } |
| 4812 | 4803 | ||
| 4813 | #ifdef CONFIG_MMU | 4804 | #ifdef CONFIG_MMU |
diff --git a/mm/memory.c b/mm/memory.c index d7e497e98f46..c6565f00fb38 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -2137,17 +2137,24 @@ reuse: | |||
| 2137 | if (!dirty_page) | 2137 | if (!dirty_page) |
| 2138 | return ret; | 2138 | return ret; |
| 2139 | 2139 | ||
| 2140 | /* | ||
| 2141 | * Yes, Virginia, this is actually required to prevent a race | ||
| 2142 | * with clear_page_dirty_for_io() from clearing the page dirty | ||
| 2143 | * bit after it clear all dirty ptes, but before a racing | ||
| 2144 | * do_wp_page installs a dirty pte. | ||
| 2145 | * | ||
| 2146 | * do_shared_fault is protected similarly. | ||
| 2147 | */ | ||
| 2148 | if (!page_mkwrite) { | 2140 | if (!page_mkwrite) { |
| 2149 | wait_on_page_locked(dirty_page); | 2141 | struct address_space *mapping; |
| 2150 | set_page_dirty_balance(dirty_page); | 2142 | int dirtied; |
| 2143 | |||
| 2144 | lock_page(dirty_page); | ||
| 2145 | dirtied = set_page_dirty(dirty_page); | ||
| 2146 | VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page); | ||
| 2147 | mapping = dirty_page->mapping; | ||
| 2148 | unlock_page(dirty_page); | ||
| 2149 | |||
| 2150 | if (dirtied && mapping) { | ||
| 2151 | /* | ||
| 2152 | * Some device drivers do not set page.mapping | ||
| 2153 | * but still dirty their pages | ||
| 2154 | */ | ||
| 2155 | balance_dirty_pages_ratelimited(mapping); | ||
| 2156 | } | ||
| 2157 | |||
| 2151 | /* file_update_time outside page_lock */ | 2158 | /* file_update_time outside page_lock */ |
| 2152 | if (vma->vm_file) | 2159 | if (vma->vm_file) |
| 2153 | file_update_time(vma->vm_file); | 2160 | file_update_time(vma->vm_file); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d5d81f5384d1..6f4335238e33 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -1541,16 +1541,6 @@ pause: | |||
| 1541 | bdi_start_background_writeback(bdi); | 1541 | bdi_start_background_writeback(bdi); |
| 1542 | } | 1542 | } |
| 1543 | 1543 | ||
| 1544 | void set_page_dirty_balance(struct page *page) | ||
| 1545 | { | ||
| 1546 | if (set_page_dirty(page)) { | ||
| 1547 | struct address_space *mapping = page_mapping(page); | ||
| 1548 | |||
| 1549 | if (mapping) | ||
| 1550 | balance_dirty_pages_ratelimited(mapping); | ||
| 1551 | } | ||
| 1552 | } | ||
| 1553 | |||
| 1554 | static DEFINE_PER_CPU(int, bdp_ratelimits); | 1544 | static DEFINE_PER_CPU(int, bdp_ratelimits); |
| 1555 | 1545 | ||
| 1556 | /* | 1546 | /* |
| @@ -2123,32 +2113,25 @@ EXPORT_SYMBOL(account_page_dirtied); | |||
| 2123 | * page dirty in that case, but not all the buffers. This is a "bottom-up" | 2113 | * page dirty in that case, but not all the buffers. This is a "bottom-up" |
| 2124 | * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. | 2114 | * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. |
| 2125 | * | 2115 | * |
| 2126 | * Most callers have locked the page, which pins the address_space in memory. | 2116 | * The caller must ensure this doesn't race with truncation. Most will simply |
| 2127 | * But zap_pte_range() does not lock the page, however in that case the | 2117 | * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and |
| 2128 | * mapping is pinned by the vma's ->vm_file reference. | 2118 | * the pte lock held, which also locks out truncation. |
| 2129 | * | ||
| 2130 | * We take care to handle the case where the page was truncated from the | ||
| 2131 | * mapping by re-checking page_mapping() inside tree_lock. | ||
| 2132 | */ | 2119 | */ |
| 2133 | int __set_page_dirty_nobuffers(struct page *page) | 2120 | int __set_page_dirty_nobuffers(struct page *page) |
| 2134 | { | 2121 | { |
| 2135 | if (!TestSetPageDirty(page)) { | 2122 | if (!TestSetPageDirty(page)) { |
| 2136 | struct address_space *mapping = page_mapping(page); | 2123 | struct address_space *mapping = page_mapping(page); |
| 2137 | struct address_space *mapping2; | ||
| 2138 | unsigned long flags; | 2124 | unsigned long flags; |
| 2139 | 2125 | ||
| 2140 | if (!mapping) | 2126 | if (!mapping) |
| 2141 | return 1; | 2127 | return 1; |
| 2142 | 2128 | ||
| 2143 | spin_lock_irqsave(&mapping->tree_lock, flags); | 2129 | spin_lock_irqsave(&mapping->tree_lock, flags); |
| 2144 | mapping2 = page_mapping(page); | 2130 | BUG_ON(page_mapping(page) != mapping); |
| 2145 | if (mapping2) { /* Race with truncate? */ | 2131 | WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); |
| 2146 | BUG_ON(mapping2 != mapping); | 2132 | account_page_dirtied(page, mapping); |
| 2147 | WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); | 2133 | radix_tree_tag_set(&mapping->page_tree, page_index(page), |
| 2148 | account_page_dirtied(page, mapping); | 2134 | PAGECACHE_TAG_DIRTY); |
| 2149 | radix_tree_tag_set(&mapping->page_tree, | ||
| 2150 | page_index(page), PAGECACHE_TAG_DIRTY); | ||
| 2151 | } | ||
| 2152 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 2135 | spin_unlock_irqrestore(&mapping->tree_lock, flags); |
| 2153 | if (mapping->host) { | 2136 | if (mapping->host) { |
| 2154 | /* !PageAnon && !swapper_space */ | 2137 | /* !PageAnon && !swapper_space */ |
| @@ -2305,12 +2288,10 @@ int clear_page_dirty_for_io(struct page *page) | |||
| 2305 | /* | 2288 | /* |
| 2306 | * We carefully synchronise fault handlers against | 2289 | * We carefully synchronise fault handlers against |
| 2307 | * installing a dirty pte and marking the page dirty | 2290 | * installing a dirty pte and marking the page dirty |
| 2308 | * at this point. We do this by having them hold the | 2291 | * at this point. We do this by having them hold the |
| 2309 | * page lock at some point after installing their | 2292 | * page lock while dirtying the page, and pages are |
| 2310 | * pte, but before marking the page dirty. | 2293 | * always locked coming in here, so we get the desired |
| 2311 | * Pages are always locked coming in here, so we get | 2294 | * exclusion. |
| 2312 | * the desired exclusion. See mm/memory.c:do_wp_page() | ||
| 2313 | * for more comments. | ||
| 2314 | */ | 2295 | */ |
| 2315 | if (TestClearPageDirty(page)) { | 2296 | if (TestClearPageDirty(page)) { |
| 2316 | dec_zone_page_state(page, NR_FILE_DIRTY); | 2297 | dec_zone_page_state(page, NR_FILE_DIRTY); |
| @@ -72,6 +72,8 @@ static inline struct anon_vma *anon_vma_alloc(void) | |||
| 72 | anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); | 72 | anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); |
| 73 | if (anon_vma) { | 73 | if (anon_vma) { |
| 74 | atomic_set(&anon_vma->refcount, 1); | 74 | atomic_set(&anon_vma->refcount, 1); |
| 75 | anon_vma->degree = 1; /* Reference for first vma */ | ||
| 76 | anon_vma->parent = anon_vma; | ||
| 75 | /* | 77 | /* |
| 76 | * Initialise the anon_vma root to point to itself. If called | 78 | * Initialise the anon_vma root to point to itself. If called |
| 77 | * from fork, the root will be reset to the parents anon_vma. | 79 | * from fork, the root will be reset to the parents anon_vma. |
| @@ -188,6 +190,8 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
| 188 | if (likely(!vma->anon_vma)) { | 190 | if (likely(!vma->anon_vma)) { |
| 189 | vma->anon_vma = anon_vma; | 191 | vma->anon_vma = anon_vma; |
| 190 | anon_vma_chain_link(vma, avc, anon_vma); | 192 | anon_vma_chain_link(vma, avc, anon_vma); |
| 193 | /* vma reference or self-parent link for new root */ | ||
| 194 | anon_vma->degree++; | ||
| 191 | allocated = NULL; | 195 | allocated = NULL; |
| 192 | avc = NULL; | 196 | avc = NULL; |
| 193 | } | 197 | } |
| @@ -236,6 +240,14 @@ static inline void unlock_anon_vma_root(struct anon_vma *root) | |||
| 236 | /* | 240 | /* |
| 237 | * Attach the anon_vmas from src to dst. | 241 | * Attach the anon_vmas from src to dst. |
| 238 | * Returns 0 on success, -ENOMEM on failure. | 242 | * Returns 0 on success, -ENOMEM on failure. |
| 243 | * | ||
| 244 | * If dst->anon_vma is NULL this function tries to find and reuse existing | ||
| 245 | * anon_vma which has no vmas and only one child anon_vma. This prevents | ||
| 246 | * degradation of anon_vma hierarchy to endless linear chain in case of | ||
| 247 | * constantly forking task. On the other hand, an anon_vma with more than one | ||
| 248 | * child isn't reused even if there was no alive vma, thus rmap walker has a | ||
| 249 | * good chance of avoiding scanning the whole hierarchy when it searches where | ||
| 250 | * page is mapped. | ||
| 239 | */ | 251 | */ |
| 240 | int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | 252 | int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) |
| 241 | { | 253 | { |
| @@ -256,7 +268,21 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | |||
| 256 | anon_vma = pavc->anon_vma; | 268 | anon_vma = pavc->anon_vma; |
| 257 | root = lock_anon_vma_root(root, anon_vma); | 269 | root = lock_anon_vma_root(root, anon_vma); |
| 258 | anon_vma_chain_link(dst, avc, anon_vma); | 270 | anon_vma_chain_link(dst, avc, anon_vma); |
| 271 | |||
| 272 | /* | ||
| 273 | * Reuse existing anon_vma if its degree lower than two, | ||
| 274 | * that means it has no vma and only one anon_vma child. | ||
| 275 | * | ||
| 276 | * Do not chose parent anon_vma, otherwise first child | ||
| 277 | * will always reuse it. Root anon_vma is never reused: | ||
| 278 | * it has self-parent reference and at least one child. | ||
| 279 | */ | ||
| 280 | if (!dst->anon_vma && anon_vma != src->anon_vma && | ||
| 281 | anon_vma->degree < 2) | ||
| 282 | dst->anon_vma = anon_vma; | ||
| 259 | } | 283 | } |
| 284 | if (dst->anon_vma) | ||
| 285 | dst->anon_vma->degree++; | ||
| 260 | unlock_anon_vma_root(root); | 286 | unlock_anon_vma_root(root); |
| 261 | return 0; | 287 | return 0; |
| 262 | 288 | ||
| @@ -280,6 +306,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | |||
| 280 | if (!pvma->anon_vma) | 306 | if (!pvma->anon_vma) |
| 281 | return 0; | 307 | return 0; |
| 282 | 308 | ||
| 309 | /* Drop inherited anon_vma, we'll reuse existing or allocate new. */ | ||
| 310 | vma->anon_vma = NULL; | ||
| 311 | |||
| 283 | /* | 312 | /* |
| 284 | * First, attach the new VMA to the parent VMA's anon_vmas, | 313 | * First, attach the new VMA to the parent VMA's anon_vmas, |
| 285 | * so rmap can find non-COWed pages in child processes. | 314 | * so rmap can find non-COWed pages in child processes. |
| @@ -288,6 +317,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | |||
| 288 | if (error) | 317 | if (error) |
| 289 | return error; | 318 | return error; |
| 290 | 319 | ||
| 320 | /* An existing anon_vma has been reused, all done then. */ | ||
| 321 | if (vma->anon_vma) | ||
| 322 | return 0; | ||
| 323 | |||
| 291 | /* Then add our own anon_vma. */ | 324 | /* Then add our own anon_vma. */ |
| 292 | anon_vma = anon_vma_alloc(); | 325 | anon_vma = anon_vma_alloc(); |
| 293 | if (!anon_vma) | 326 | if (!anon_vma) |
| @@ -301,6 +334,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | |||
| 301 | * lock any of the anon_vmas in this anon_vma tree. | 334 | * lock any of the anon_vmas in this anon_vma tree. |
| 302 | */ | 335 | */ |
| 303 | anon_vma->root = pvma->anon_vma->root; | 336 | anon_vma->root = pvma->anon_vma->root; |
| 337 | anon_vma->parent = pvma->anon_vma; | ||
| 304 | /* | 338 | /* |
| 305 | * With refcounts, an anon_vma can stay around longer than the | 339 | * With refcounts, an anon_vma can stay around longer than the |
| 306 | * process it belongs to. The root anon_vma needs to be pinned until | 340 | * process it belongs to. The root anon_vma needs to be pinned until |
| @@ -311,6 +345,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | |||
| 311 | vma->anon_vma = anon_vma; | 345 | vma->anon_vma = anon_vma; |
| 312 | anon_vma_lock_write(anon_vma); | 346 | anon_vma_lock_write(anon_vma); |
| 313 | anon_vma_chain_link(vma, avc, anon_vma); | 347 | anon_vma_chain_link(vma, avc, anon_vma); |
| 348 | anon_vma->parent->degree++; | ||
| 314 | anon_vma_unlock_write(anon_vma); | 349 | anon_vma_unlock_write(anon_vma); |
| 315 | 350 | ||
| 316 | return 0; | 351 | return 0; |
| @@ -341,12 +376,16 @@ void unlink_anon_vmas(struct vm_area_struct *vma) | |||
| 341 | * Leave empty anon_vmas on the list - we'll need | 376 | * Leave empty anon_vmas on the list - we'll need |
| 342 | * to free them outside the lock. | 377 | * to free them outside the lock. |
| 343 | */ | 378 | */ |
| 344 | if (RB_EMPTY_ROOT(&anon_vma->rb_root)) | 379 | if (RB_EMPTY_ROOT(&anon_vma->rb_root)) { |
| 380 | anon_vma->parent->degree--; | ||
| 345 | continue; | 381 | continue; |
| 382 | } | ||
| 346 | 383 | ||
| 347 | list_del(&avc->same_vma); | 384 | list_del(&avc->same_vma); |
| 348 | anon_vma_chain_free(avc); | 385 | anon_vma_chain_free(avc); |
| 349 | } | 386 | } |
| 387 | if (vma->anon_vma) | ||
| 388 | vma->anon_vma->degree--; | ||
| 350 | unlock_anon_vma_root(root); | 389 | unlock_anon_vma_root(root); |
| 351 | 390 | ||
| 352 | /* | 391 | /* |
| @@ -357,6 +396,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) | |||
| 357 | list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { | 396 | list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { |
| 358 | struct anon_vma *anon_vma = avc->anon_vma; | 397 | struct anon_vma *anon_vma = avc->anon_vma; |
| 359 | 398 | ||
| 399 | BUG_ON(anon_vma->degree); | ||
| 360 | put_anon_vma(anon_vma); | 400 | put_anon_vma(anon_vma); |
| 361 | 401 | ||
| 362 | list_del(&avc->same_vma); | 402 | list_del(&avc->same_vma); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index bd9a72bc4a1b..ab2505c3ef54 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -2921,18 +2921,20 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | |||
| 2921 | return false; | 2921 | return false; |
| 2922 | 2922 | ||
| 2923 | /* | 2923 | /* |
| 2924 | * There is a potential race between when kswapd checks its watermarks | 2924 | * The throttled processes are normally woken up in balance_pgdat() as |
| 2925 | * and a process gets throttled. There is also a potential race if | 2925 | * soon as pfmemalloc_watermark_ok() is true. But there is a potential |
| 2926 | * processes get throttled, kswapd wakes, a large process exits therby | 2926 | * race between when kswapd checks the watermarks and a process gets |
| 2927 | * balancing the zones that causes kswapd to miss a wakeup. If kswapd | 2927 | * throttled. There is also a potential race if processes get |
| 2928 | * is going to sleep, no process should be sleeping on pfmemalloc_wait | 2928 | * throttled, kswapd wakes, a large process exits thereby balancing the |
| 2929 | * so wake them now if necessary. If necessary, processes will wake | 2929 | * zones, which causes kswapd to exit balance_pgdat() before reaching |
| 2930 | * kswapd and get throttled again | 2930 | * the wake up checks. If kswapd is going to sleep, no process should |
| 2931 | * be sleeping on pfmemalloc_wait, so wake them now if necessary. If | ||
| 2932 | * the wake up is premature, processes will wake kswapd and get | ||
| 2933 | * throttled again. The difference from wake ups in balance_pgdat() is | ||
| 2934 | * that here we are under prepare_to_wait(). | ||
| 2931 | */ | 2935 | */ |
| 2932 | if (waitqueue_active(&pgdat->pfmemalloc_wait)) { | 2936 | if (waitqueue_active(&pgdat->pfmemalloc_wait)) |
| 2933 | wake_up(&pgdat->pfmemalloc_wait); | 2937 | wake_up_all(&pgdat->pfmemalloc_wait); |
| 2934 | return false; | ||
| 2935 | } | ||
| 2936 | 2938 | ||
| 2937 | return pgdat_balanced(pgdat, order, classzone_idx); | 2939 | return pgdat_balanced(pgdat, order, classzone_idx); |
| 2938 | } | 2940 | } |
