aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-09-19 19:08:03 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-09-19 19:08:03 -0400
commitd2ffb0103aaefa9b169da042cf39ce27bfb6cdbb (patch)
tree967273cfc51bf649cf5f9f4f4ad0cf0be4b633fc
parent7fadce0d60d09427e0027d3d468781b08ca0b3d1 (diff)
parentb92ae139c308c5223521ed6ec022148b81312809 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "20 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: rapidio/rio_cm: avoid GFP_KERNEL in atomic context Revert "ocfs2: bump up o2cb network protocol version" ocfs2: fix start offset to ocfs2_zero_range_for_truncate() cgroup: duplicate cgroup reference when cloning sockets mm: memcontrol: make per-cpu charge cache IRQ-safe for socket accounting ocfs2: fix double unlock in case retry after free truncate log fanotify: fix list corruption in fanotify_get_response() fsnotify: add a way to stop queueing events on group shutdown ocfs2: fix trans extend while free cached blocks ocfs2: fix trans extend while flush truncate log ipc/shm: fix crash if CONFIG_SHMEM is not set mm: fix the page_swap_info() BUG_ON check autofs: use dentry flags to block walks during expire MAINTAINERS: update email for VLYNQ bus entry mm: avoid endless recursion in dump_page() mm, thp: fix leaking mapped pte in __collapse_huge_page_swapin() khugepaged: fix use-after-free in collapse_huge_page() MAINTAINERS: Maik has moved ocfs2/dlm: fix race between convert and migration mem-hotplug: don't clear the only node in new_node_page()
-rw-r--r--MAINTAINERS4
-rw-r--r--drivers/rapidio/rio_cm.c19
-rw-r--r--fs/autofs4/expire.c55
-rw-r--r--fs/notify/fanotify/fanotify.c13
-rw-r--r--fs/notify/fanotify/fanotify_user.c36
-rw-r--r--fs/notify/group.c19
-rw-r--r--fs/notify/notification.c23
-rw-r--r--fs/ocfs2/alloc.c56
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h5
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c12
-rw-r--r--fs/ocfs2/file.c34
-rw-r--r--fs/ocfs2/suballoc.c14
-rw-r--r--fs/ramfs/file-mmu.c9
-rw-r--r--include/linux/fsnotify_backend.h6
-rw-r--r--kernel/cgroup.c6
-rw-r--r--mm/debug.c6
-rw-r--r--mm/khugepaged.c25
-rw-r--r--mm/memcontrol.c31
-rw-r--r--mm/memory_hotplug.c4
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/swapfile.c1
-rw-r--r--net/core/sock.c5
22 files changed, 240 insertions, 146 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 644ff65d336d..a0ce40f4c66c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6103,7 +6103,7 @@ S: Supported
6103F: drivers/cpufreq/intel_pstate.c 6103F: drivers/cpufreq/intel_pstate.c
6104 6104
6105INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) 6105INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
6106M: Maik Broemme <mbroemme@plusserver.de> 6106M: Maik Broemme <mbroemme@libmpq.org>
6107L: linux-fbdev@vger.kernel.org 6107L: linux-fbdev@vger.kernel.org
6108S: Maintained 6108S: Maintained
6109F: Documentation/fb/intelfb.txt 6109F: Documentation/fb/intelfb.txt
@@ -12569,7 +12569,7 @@ F: include/linux/if_*vlan.h
12569F: net/8021q/ 12569F: net/8021q/
12570 12570
12571VLYNQ BUS 12571VLYNQ BUS
12572M: Florian Fainelli <florian@openwrt.org> 12572M: Florian Fainelli <f.fainelli@gmail.com>
12573L: openwrt-devel@lists.openwrt.org (subscribers-only) 12573L: openwrt-devel@lists.openwrt.org (subscribers-only)
12574S: Maintained 12574S: Maintained
12575F: drivers/vlynq/vlynq.c 12575F: drivers/vlynq/vlynq.c
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index 3fa17ac8df54..cebc296463ad 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -2247,17 +2247,30 @@ static int rio_cm_shutdown(struct notifier_block *nb, unsigned long code,
2247{ 2247{
2248 struct rio_channel *ch; 2248 struct rio_channel *ch;
2249 unsigned int i; 2249 unsigned int i;
2250 LIST_HEAD(list);
2250 2251
2251 riocm_debug(EXIT, "."); 2252 riocm_debug(EXIT, ".");
2252 2253
2254 /*
2255 * If there are any channels left in connected state send
2256 * close notification to the connection partner.
2257 * First build a list of channels that require a closing
2258 * notification because function riocm_send_close() should
2259 * be called outside of spinlock protected code.
2260 */
2253 spin_lock_bh(&idr_lock); 2261 spin_lock_bh(&idr_lock);
2254 idr_for_each_entry(&ch_idr, ch, i) { 2262 idr_for_each_entry(&ch_idr, ch, i) {
2255 riocm_debug(EXIT, "close ch %d", ch->id); 2263 if (ch->state == RIO_CM_CONNECTED) {
2256 if (ch->state == RIO_CM_CONNECTED) 2264 riocm_debug(EXIT, "close ch %d", ch->id);
2257 riocm_send_close(ch); 2265 idr_remove(&ch_idr, ch->id);
2266 list_add(&ch->ch_node, &list);
2267 }
2258 } 2268 }
2259 spin_unlock_bh(&idr_lock); 2269 spin_unlock_bh(&idr_lock);
2260 2270
2271 list_for_each_entry(ch, &list, ch_node)
2272 riocm_send_close(ch);
2273
2261 return NOTIFY_DONE; 2274 return NOTIFY_DONE;
2262} 2275}
2263 2276
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index b493909e7492..d8e6d421c27f 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry,
417 } 417 }
418 return NULL; 418 return NULL;
419} 419}
420
420/* 421/*
421 * Find an eligible tree to time-out 422 * Find an eligible tree to time-out
422 * A tree is eligible if :- 423 * A tree is eligible if :-
@@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
432 struct dentry *root = sb->s_root; 433 struct dentry *root = sb->s_root;
433 struct dentry *dentry; 434 struct dentry *dentry;
434 struct dentry *expired; 435 struct dentry *expired;
436 struct dentry *found;
435 struct autofs_info *ino; 437 struct autofs_info *ino;
436 438
437 if (!root) 439 if (!root)
@@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
442 444
443 dentry = NULL; 445 dentry = NULL;
444 while ((dentry = get_next_positive_subdir(dentry, root))) { 446 while ((dentry = get_next_positive_subdir(dentry, root))) {
447 int flags = how;
448
445 spin_lock(&sbi->fs_lock); 449 spin_lock(&sbi->fs_lock);
446 ino = autofs4_dentry_ino(dentry); 450 ino = autofs4_dentry_ino(dentry);
447 if (ino->flags & AUTOFS_INF_WANT_EXPIRE) 451 if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
448 expired = NULL;
449 else
450 expired = should_expire(dentry, mnt, timeout, how);
451 if (!expired) {
452 spin_unlock(&sbi->fs_lock); 452 spin_unlock(&sbi->fs_lock);
453 continue; 453 continue;
454 } 454 }
455 spin_unlock(&sbi->fs_lock);
456
457 expired = should_expire(dentry, mnt, timeout, flags);
458 if (!expired)
459 continue;
460
461 spin_lock(&sbi->fs_lock);
455 ino = autofs4_dentry_ino(expired); 462 ino = autofs4_dentry_ino(expired);
456 ino->flags |= AUTOFS_INF_WANT_EXPIRE; 463 ino->flags |= AUTOFS_INF_WANT_EXPIRE;
457 spin_unlock(&sbi->fs_lock); 464 spin_unlock(&sbi->fs_lock);
458 synchronize_rcu(); 465 synchronize_rcu();
459 spin_lock(&sbi->fs_lock);
460 if (should_expire(expired, mnt, timeout, how)) {
461 if (expired != dentry)
462 dput(dentry);
463 goto found;
464 }
465 466
467 /* Make sure a reference is not taken on found if
468 * things have changed.
469 */
470 flags &= ~AUTOFS_EXP_LEAVES;
471 found = should_expire(expired, mnt, timeout, how);
472 if (!found || found != expired)
473 /* Something has changed, continue */
474 goto next;
475
476 if (expired != dentry)
477 dput(dentry);
478
479 spin_lock(&sbi->fs_lock);
480 goto found;
481next:
482 spin_lock(&sbi->fs_lock);
466 ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; 483 ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
484 spin_unlock(&sbi->fs_lock);
467 if (expired != dentry) 485 if (expired != dentry)
468 dput(expired); 486 dput(expired);
469 spin_unlock(&sbi->fs_lock);
470 } 487 }
471 return NULL; 488 return NULL;
472 489
@@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
483 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 500 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
484 struct autofs_info *ino = autofs4_dentry_ino(dentry); 501 struct autofs_info *ino = autofs4_dentry_ino(dentry);
485 int status; 502 int status;
503 int state;
486 504
487 /* Block on any pending expire */ 505 /* Block on any pending expire */
488 if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) 506 if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
@@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
490 if (rcu_walk) 508 if (rcu_walk)
491 return -ECHILD; 509 return -ECHILD;
492 510
511retry:
493 spin_lock(&sbi->fs_lock); 512 spin_lock(&sbi->fs_lock);
494 if (ino->flags & AUTOFS_INF_EXPIRING) { 513 state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
514 if (state == AUTOFS_INF_WANT_EXPIRE) {
515 spin_unlock(&sbi->fs_lock);
516 /*
517 * Possibly being selected for expire, wait until
518 * it's selected or not.
519 */
520 schedule_timeout_uninterruptible(HZ/10);
521 goto retry;
522 }
523 if (state & AUTOFS_INF_EXPIRING) {
495 spin_unlock(&sbi->fs_lock); 524 spin_unlock(&sbi->fs_lock);
496 525
497 pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); 526 pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d2f97ecca6a5..e0e5f7c3c99f 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
67 67
68 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 68 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
69 69
70 wait_event(group->fanotify_data.access_waitq, event->response || 70 wait_event(group->fanotify_data.access_waitq, event->response);
71 atomic_read(&group->fanotify_data.bypass_perm));
72
73 if (!event->response) { /* bypass_perm set */
74 /*
75 * Event was canceled because group is being destroyed. Remove
76 * it from group's event list because we are responsible for
77 * freeing the permission event.
78 */
79 fsnotify_remove_event(group, &event->fae.fse);
80 return 0;
81 }
82 71
83 /* userspace responded, convert to something usable */ 72 /* userspace responded, convert to something usable */
84 switch (event->response) { 73 switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8e8e6bcd1d43..a64313868d3a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file)
358 358
359#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 359#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
360 struct fanotify_perm_event_info *event, *next; 360 struct fanotify_perm_event_info *event, *next;
361 struct fsnotify_event *fsn_event;
361 362
362 /* 363 /*
363 * There may be still new events arriving in the notification queue 364 * Stop new events from arriving in the notification queue. since
364 * but since userspace cannot use fanotify fd anymore, no event can 365 * userspace cannot use fanotify fd anymore, no event can enter or
365 * enter or leave access_list by now. 366 * leave access_list by now either.
366 */ 367 */
367 spin_lock(&group->fanotify_data.access_lock); 368 fsnotify_group_stop_queueing(group);
368
369 atomic_inc(&group->fanotify_data.bypass_perm);
370 369
370 /*
371 * Process all permission events on access_list and notification queue
372 * and simulate reply from userspace.
373 */
374 spin_lock(&group->fanotify_data.access_lock);
371 list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, 375 list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
372 fae.fse.list) { 376 fae.fse.list) {
373 pr_debug("%s: found group=%p event=%p\n", __func__, group, 377 pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file)
379 spin_unlock(&group->fanotify_data.access_lock); 383 spin_unlock(&group->fanotify_data.access_lock);
380 384
381 /* 385 /*
382 * Since bypass_perm is set, newly queued events will not wait for 386 * Destroy all non-permission events. For permission events just
383 * access response. Wake up the already sleeping ones now. 387 * dequeue them and set the response. They will be freed once the
384 * synchronize_srcu() in fsnotify_destroy_group() will wait for all 388 * response is consumed and fanotify_get_response() returns.
385 * processes sleeping in fanotify_handle_event() waiting for access
386 * response and thus also for all permission events to be freed.
387 */ 389 */
390 mutex_lock(&group->notification_mutex);
391 while (!fsnotify_notify_queue_is_empty(group)) {
392 fsn_event = fsnotify_remove_first_event(group);
393 if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
394 fsnotify_destroy_event(group, fsn_event);
395 else
396 FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
397 }
398 mutex_unlock(&group->notification_mutex);
399
400 /* Response for all permission events it set, wakeup waiters */
388 wake_up(&group->fanotify_data.access_waitq); 401 wake_up(&group->fanotify_data.access_waitq);
389#endif 402#endif
390 403
@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
755 spin_lock_init(&group->fanotify_data.access_lock); 768 spin_lock_init(&group->fanotify_data.access_lock);
756 init_waitqueue_head(&group->fanotify_data.access_waitq); 769 init_waitqueue_head(&group->fanotify_data.access_waitq);
757 INIT_LIST_HEAD(&group->fanotify_data.access_list); 770 INIT_LIST_HEAD(&group->fanotify_data.access_list);
758 atomic_set(&group->fanotify_data.bypass_perm, 0);
759#endif 771#endif
760 switch (flags & FAN_ALL_CLASS_BITS) { 772 switch (flags & FAN_ALL_CLASS_BITS) {
761 case FAN_CLASS_NOTIF: 773 case FAN_CLASS_NOTIF:
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 3e2dd85be5dd..b47f7cfdcaa4 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
40} 40}
41 41
42/* 42/*
43 * Stop queueing new events for this group. Once this function returns
44 * fsnotify_add_event() will not add any new events to the group's queue.
45 */
46void fsnotify_group_stop_queueing(struct fsnotify_group *group)
47{
48 mutex_lock(&group->notification_mutex);
49 group->shutdown = true;
50 mutex_unlock(&group->notification_mutex);
51}
52
53/*
43 * Trying to get rid of a group. Remove all marks, flush all events and release 54 * Trying to get rid of a group. Remove all marks, flush all events and release
44 * the group reference. 55 * the group reference.
45 * Note that another thread calling fsnotify_clear_marks_by_group() may still 56 * Note that another thread calling fsnotify_clear_marks_by_group() may still
@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
47 */ 58 */
48void fsnotify_destroy_group(struct fsnotify_group *group) 59void fsnotify_destroy_group(struct fsnotify_group *group)
49{ 60{
61 /*
62 * Stop queueing new events. The code below is careful enough to not
63 * require this but fanotify needs to stop queuing events even before
64 * fsnotify_destroy_group() is called and this makes the other callers
65 * of fsnotify_destroy_group() to see the same behavior.
66 */
67 fsnotify_group_stop_queueing(group);
68
50 /* clear all inode marks for this group, attach them to destroy_list */ 69 /* clear all inode marks for this group, attach them to destroy_list */
51 fsnotify_detach_group_marks(group); 70 fsnotify_detach_group_marks(group);
52 71
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index a95d8e037aeb..e455e83ceeeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
82 * Add an event to the group notification queue. The group can later pull this 82 * Add an event to the group notification queue. The group can later pull this
83 * event off the queue to deal with. The function returns 0 if the event was 83 * event off the queue to deal with. The function returns 0 if the event was
84 * added to the queue, 1 if the event was merged with some other queued event, 84 * added to the queue, 1 if the event was merged with some other queued event,
85 * 2 if the queue of events has overflown. 85 * 2 if the event was not queued - either the queue of events has overflown
86 * or the group is shutting down.
86 */ 87 */
87int fsnotify_add_event(struct fsnotify_group *group, 88int fsnotify_add_event(struct fsnotify_group *group,
88 struct fsnotify_event *event, 89 struct fsnotify_event *event,
@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group,
96 97
97 mutex_lock(&group->notification_mutex); 98 mutex_lock(&group->notification_mutex);
98 99
100 if (group->shutdown) {
101 mutex_unlock(&group->notification_mutex);
102 return 2;
103 }
104
99 if (group->q_len >= group->max_events) { 105 if (group->q_len >= group->max_events) {
100 ret = 2; 106 ret = 2;
101 /* Queue overflow event only if it isn't already queued */ 107 /* Queue overflow event only if it isn't already queued */
@@ -126,21 +132,6 @@ queue:
126} 132}
127 133
128/* 134/*
129 * Remove @event from group's notification queue. It is the responsibility of
130 * the caller to destroy the event.
131 */
132void fsnotify_remove_event(struct fsnotify_group *group,
133 struct fsnotify_event *event)
134{
135 mutex_lock(&group->notification_mutex);
136 if (!list_empty(&event->list)) {
137 list_del_init(&event->list);
138 group->q_len--;
139 }
140 mutex_unlock(&group->notification_mutex);
141}
142
143/*
144 * Remove and return the first event from the notification list. It is the 135 * Remove and return the first event from the notification list. It is the
145 * responsibility of the caller to destroy the obtained event 136 * responsibility of the caller to destroy the obtained event
146 */ 137 */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7dabbc31060e..f165f867f332 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5922,7 +5922,6 @@ bail:
5922} 5922}
5923 5923
5924static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, 5924static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5925 handle_t *handle,
5926 struct inode *data_alloc_inode, 5925 struct inode *data_alloc_inode,
5927 struct buffer_head *data_alloc_bh) 5926 struct buffer_head *data_alloc_bh)
5928{ 5927{
@@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5935 struct ocfs2_truncate_log *tl; 5934 struct ocfs2_truncate_log *tl;
5936 struct inode *tl_inode = osb->osb_tl_inode; 5935 struct inode *tl_inode = osb->osb_tl_inode;
5937 struct buffer_head *tl_bh = osb->osb_tl_bh; 5936 struct buffer_head *tl_bh = osb->osb_tl_bh;
5937 handle_t *handle;
5938 5938
5939 di = (struct ocfs2_dinode *) tl_bh->b_data; 5939 di = (struct ocfs2_dinode *) tl_bh->b_data;
5940 tl = &di->id2.i_dealloc; 5940 tl = &di->id2.i_dealloc;
5941 i = le16_to_cpu(tl->tl_used) - 1; 5941 i = le16_to_cpu(tl->tl_used) - 1;
5942 while (i >= 0) { 5942 while (i >= 0) {
5943 handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
5944 if (IS_ERR(handle)) {
5945 status = PTR_ERR(handle);
5946 mlog_errno(status);
5947 goto bail;
5948 }
5949
5943 /* Caller has given us at least enough credits to 5950 /* Caller has given us at least enough credits to
5944 * update the truncate log dinode */ 5951 * update the truncate log dinode */
5945 status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, 5952 status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
@@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5974 } 5981 }
5975 } 5982 }
5976 5983
5977 status = ocfs2_extend_trans(handle, 5984 ocfs2_commit_trans(osb, handle);
5978 OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
5979 if (status < 0) {
5980 mlog_errno(status);
5981 goto bail;
5982 }
5983 i--; 5985 i--;
5984 } 5986 }
5985 5987
@@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
5994{ 5996{
5995 int status; 5997 int status;
5996 unsigned int num_to_flush; 5998 unsigned int num_to_flush;
5997 handle_t *handle;
5998 struct inode *tl_inode = osb->osb_tl_inode; 5999 struct inode *tl_inode = osb->osb_tl_inode;
5999 struct inode *data_alloc_inode = NULL; 6000 struct inode *data_alloc_inode = NULL;
6000 struct buffer_head *tl_bh = osb->osb_tl_bh; 6001 struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
6038 goto out_mutex; 6039 goto out_mutex;
6039 } 6040 }
6040 6041
6041 handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); 6042 status = ocfs2_replay_truncate_records(osb, data_alloc_inode,
6042 if (IS_ERR(handle)) {
6043 status = PTR_ERR(handle);
6044 mlog_errno(status);
6045 goto out_unlock;
6046 }
6047
6048 status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
6049 data_alloc_bh); 6043 data_alloc_bh);
6050 if (status < 0) 6044 if (status < 0)
6051 mlog_errno(status); 6045 mlog_errno(status);
6052 6046
6053 ocfs2_commit_trans(osb, handle);
6054
6055out_unlock:
6056 brelse(data_alloc_bh); 6047 brelse(data_alloc_bh);
6057 ocfs2_inode_unlock(data_alloc_inode, 1); 6048 ocfs2_inode_unlock(data_alloc_inode, 1);
6058 6049
@@ -6413,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
6413 goto out_mutex; 6404 goto out_mutex;
6414 } 6405 }
6415 6406
6416 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
6417 if (IS_ERR(handle)) {
6418 ret = PTR_ERR(handle);
6419 mlog_errno(ret);
6420 goto out_unlock;
6421 }
6422
6423 while (head) { 6407 while (head) {
6424 if (head->free_bg) 6408 if (head->free_bg)
6425 bg_blkno = head->free_bg; 6409 bg_blkno = head->free_bg;
6426 else 6410 else
6427 bg_blkno = ocfs2_which_suballoc_group(head->free_blk, 6411 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
6428 head->free_bit); 6412 head->free_bit);
6413 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
6414 if (IS_ERR(handle)) {
6415 ret = PTR_ERR(handle);
6416 mlog_errno(ret);
6417 goto out_unlock;
6418 }
6419
6429 trace_ocfs2_free_cached_blocks( 6420 trace_ocfs2_free_cached_blocks(
6430 (unsigned long long)head->free_blk, head->free_bit); 6421 (unsigned long long)head->free_blk, head->free_bit);
6431 6422
6432 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, 6423 ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
6433 head->free_bit, bg_blkno, 1); 6424 head->free_bit, bg_blkno, 1);
6434 if (ret) { 6425 if (ret)
6435 mlog_errno(ret); 6426 mlog_errno(ret);
6436 goto out_journal;
6437 }
6438 6427
6439 ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); 6428 ocfs2_commit_trans(osb, handle);
6440 if (ret) {
6441 mlog_errno(ret);
6442 goto out_journal;
6443 }
6444 6429
6445 tmp = head; 6430 tmp = head;
6446 head = head->free_next; 6431 head = head->free_next;
6447 kfree(tmp); 6432 kfree(tmp);
6448 } 6433 }
6449 6434
6450out_journal:
6451 ocfs2_commit_trans(osb, handle);
6452
6453out_unlock: 6435out_unlock:
6454 ocfs2_inode_unlock(inode, 1); 6436 ocfs2_inode_unlock(inode, 1);
6455 brelse(di_bh); 6437 brelse(di_bh);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 94b18369b1cc..b95e7df5b76a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,9 +44,6 @@
44 * version here in tcp_internal.h should not need to be bumped for 44 * version here in tcp_internal.h should not need to be bumped for
45 * filesystem locking changes. 45 * filesystem locking changes.
46 * 46 *
47 * New in version 12
48 * - Negotiate hb timeout when storage is down.
49 *
50 * New in version 11 47 * New in version 11
51 * - Negotiation of filesystem locking in the dlm join. 48 * - Negotiation of filesystem locking in the dlm join.
52 * 49 *
@@ -78,7 +75,7 @@
78 * - full 64 bit i_size in the metadata lock lvbs 75 * - full 64 bit i_size in the metadata lock lvbs
79 * - introduction of "rw" lock and pushing meta/data locking down 76 * - introduction of "rw" lock and pushing meta/data locking down
80 */ 77 */
81#define O2NET_PROTOCOL_VERSION 12ULL 78#define O2NET_PROTOCOL_VERSION 11ULL
82struct o2net_handshake { 79struct o2net_handshake {
83 __be64 protocol_version; 80 __be64 protocol_version;
84 __be64 connector_id; 81 __be64 connector_id;
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index cdeafb4e7ed6..0bb128659d4b 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
268 struct dlm_lock *lock, int flags, int type) 268 struct dlm_lock *lock, int flags, int type)
269{ 269{
270 enum dlm_status status; 270 enum dlm_status status;
271 u8 old_owner = res->owner;
272 271
273 mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, 272 mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
274 lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); 273 lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
335 334
336 spin_lock(&res->spinlock); 335 spin_lock(&res->spinlock);
337 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 336 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
338 lock->convert_pending = 0;
339 /* if it failed, move it back to granted queue. 337 /* if it failed, move it back to granted queue.
340 * if master returns DLM_NORMAL and then down before sending ast, 338 * if master returns DLM_NORMAL and then down before sending ast,
341 * it may have already been moved to granted queue, reset to 339 * it may have already been moved to granted queue, reset to
@@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
344 if (status != DLM_NOTQUEUED) 342 if (status != DLM_NOTQUEUED)
345 dlm_error(status); 343 dlm_error(status);
346 dlm_revert_pending_convert(res, lock); 344 dlm_revert_pending_convert(res, lock);
347 } else if ((res->state & DLM_LOCK_RES_RECOVERING) || 345 } else if (!lock->convert_pending) {
348 (old_owner != res->owner)) { 346 mlog(0, "%s: res %.*s, owner died and lock has been moved back "
349 mlog(0, "res %.*s is in recovering or has been recovered.\n", 347 "to granted list, retry convert.\n",
350 res->lockname.len, res->lockname.name); 348 dlm->name, res->lockname.len, res->lockname.name);
351 status = DLM_RECOVERING; 349 status = DLM_RECOVERING;
352 } 350 }
351
352 lock->convert_pending = 0;
353bail: 353bail:
354 spin_unlock(&res->spinlock); 354 spin_unlock(&res->spinlock);
355 355
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4e7b0dc22450..0b055bfb8e86 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1506 u64 start, u64 len) 1506 u64 start, u64 len)
1507{ 1507{
1508 int ret = 0; 1508 int ret = 0;
1509 u64 tmpend, end = start + len; 1509 u64 tmpend = 0;
1510 u64 end = start + len;
1510 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1511 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1511 unsigned int csize = osb->s_clustersize; 1512 unsigned int csize = osb->s_clustersize;
1512 handle_t *handle; 1513 handle_t *handle;
@@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1538 } 1539 }
1539 1540
1540 /* 1541 /*
1541 * We want to get the byte offset of the end of the 1st cluster. 1542 * If start is on a cluster boundary and end is somewhere in another
1543 * cluster, we have not COWed the cluster starting at start, unless
1544 * end is also within the same cluster. So, in this case, we skip this
1545 * first call to ocfs2_zero_range_for_truncate() truncate and move on
1546 * to the next one.
1542 */ 1547 */
1543 tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); 1548 if ((start & (csize - 1)) != 0) {
1544 if (tmpend > end) 1549 /*
1545 tmpend = end; 1550 * We want to get the byte offset of the end of the 1st
1551 * cluster.
1552 */
1553 tmpend = (u64)osb->s_clustersize +
1554 (start & ~(osb->s_clustersize - 1));
1555 if (tmpend > end)
1556 tmpend = end;
1546 1557
1547 trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, 1558 trace_ocfs2_zero_partial_clusters_range1(
1548 (unsigned long long)tmpend); 1559 (unsigned long long)start,
1560 (unsigned long long)tmpend);
1549 1561
1550 ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); 1562 ret = ocfs2_zero_range_for_truncate(inode, handle, start,
1551 if (ret) 1563 tmpend);
1552 mlog_errno(ret); 1564 if (ret)
1565 mlog_errno(ret);
1566 }
1553 1567
1554 if (tmpend < end) { 1568 if (tmpend < end) {
1555 /* 1569 /*
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ea47120a85ff..6ad3533940ba 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1199,14 +1199,24 @@ retry:
1199 inode_unlock((*ac)->ac_inode); 1199 inode_unlock((*ac)->ac_inode);
1200 1200
1201 ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted); 1201 ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
1202 if (ret == 1) 1202 if (ret == 1) {
1203 iput((*ac)->ac_inode);
1204 (*ac)->ac_inode = NULL;
1203 goto retry; 1205 goto retry;
1206 }
1204 1207
1205 if (ret < 0) 1208 if (ret < 0)
1206 mlog_errno(ret); 1209 mlog_errno(ret);
1207 1210
1208 inode_lock((*ac)->ac_inode); 1211 inode_lock((*ac)->ac_inode);
1209 ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); 1212 ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
1213 if (ret < 0) {
1214 mlog_errno(ret);
1215 inode_unlock((*ac)->ac_inode);
1216 iput((*ac)->ac_inode);
1217 (*ac)->ac_inode = NULL;
1218 goto bail;
1219 }
1210 } 1220 }
1211 if (status < 0) { 1221 if (status < 0) {
1212 if (status != -ENOSPC) 1222 if (status != -ENOSPC)
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 183a212694bf..12af0490322f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -27,9 +27,17 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/ramfs.h> 29#include <linux/ramfs.h>
30#include <linux/sched.h>
30 31
31#include "internal.h" 32#include "internal.h"
32 33
34static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
35 unsigned long addr, unsigned long len, unsigned long pgoff,
36 unsigned long flags)
37{
38 return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
39}
40
33const struct file_operations ramfs_file_operations = { 41const struct file_operations ramfs_file_operations = {
34 .read_iter = generic_file_read_iter, 42 .read_iter = generic_file_read_iter,
35 .write_iter = generic_file_write_iter, 43 .write_iter = generic_file_write_iter,
@@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = {
38 .splice_read = generic_file_splice_read, 46 .splice_read = generic_file_splice_read,
39 .splice_write = iter_file_splice_write, 47 .splice_write = iter_file_splice_write,
40 .llseek = generic_file_llseek, 48 .llseek = generic_file_llseek,
49 .get_unmapped_area = ramfs_mmu_get_unmapped_area,
41}; 50};
42 51
43const struct inode_operations ramfs_file_inode_operations = { 52const struct inode_operations ramfs_file_inode_operations = {
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 58205f33af02..7268ed076be8 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -148,6 +148,7 @@ struct fsnotify_group {
148 #define FS_PRIO_1 1 /* fanotify content based access control */ 148 #define FS_PRIO_1 1 /* fanotify content based access control */
149 #define FS_PRIO_2 2 /* fanotify pre-content access */ 149 #define FS_PRIO_2 2 /* fanotify pre-content access */
150 unsigned int priority; 150 unsigned int priority;
151 bool shutdown; /* group is being shut down, don't queue more events */
151 152
152 /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ 153 /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
153 struct mutex mark_mutex; /* protect marks_list */ 154 struct mutex mark_mutex; /* protect marks_list */
@@ -179,7 +180,6 @@ struct fsnotify_group {
179 spinlock_t access_lock; 180 spinlock_t access_lock;
180 struct list_head access_list; 181 struct list_head access_list;
181 wait_queue_head_t access_waitq; 182 wait_queue_head_t access_waitq;
182 atomic_t bypass_perm;
183#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ 183#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
184 int f_flags; 184 int f_flags;
185 unsigned int max_marks; 185 unsigned int max_marks;
@@ -292,6 +292,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op
292extern void fsnotify_get_group(struct fsnotify_group *group); 292extern void fsnotify_get_group(struct fsnotify_group *group);
293/* drop reference on a group from fsnotify_alloc_group */ 293/* drop reference on a group from fsnotify_alloc_group */
294extern void fsnotify_put_group(struct fsnotify_group *group); 294extern void fsnotify_put_group(struct fsnotify_group *group);
295/* group destruction begins, stop queuing new events */
296extern void fsnotify_group_stop_queueing(struct fsnotify_group *group);
295/* destroy group */ 297/* destroy group */
296extern void fsnotify_destroy_group(struct fsnotify_group *group); 298extern void fsnotify_destroy_group(struct fsnotify_group *group);
297/* fasync handler function */ 299/* fasync handler function */
@@ -304,8 +306,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group,
304 struct fsnotify_event *event, 306 struct fsnotify_event *event,
305 int (*merge)(struct list_head *, 307 int (*merge)(struct list_head *,
306 struct fsnotify_event *)); 308 struct fsnotify_event *));
307/* Remove passed event from groups notification queue */
308extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
309/* true if the group notification queue is empty */ 309/* true if the group notification queue is empty */
310extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); 310extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
311/* return, but do not dequeue the first event on the notification queue */ 311/* return, but do not dequeue the first event on the notification queue */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7f5221..5e8dab5bf9ad 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -6270,6 +6270,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6270 if (cgroup_sk_alloc_disabled) 6270 if (cgroup_sk_alloc_disabled)
6271 return; 6271 return;
6272 6272
6273 /* Socket clone path */
6274 if (skcd->val) {
6275 cgroup_get(sock_cgroup_ptr(skcd));
6276 return;
6277 }
6278
6273 rcu_read_lock(); 6279 rcu_read_lock();
6274 6280
6275 while (true) { 6281 while (true) {
diff --git a/mm/debug.c b/mm/debug.c
index 8865bfb41b0b..74c7cae4f683 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = {
42 42
43void __dump_page(struct page *page, const char *reason) 43void __dump_page(struct page *page, const char *reason)
44{ 44{
45 int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
46
45 pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", 47 pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
46 page, page_ref_count(page), page_mapcount(page), 48 page, page_ref_count(page), mapcount,
47 page->mapping, page->index); 49 page->mapping, page_to_pgoff(page));
48 if (PageCompound(page)) 50 if (PageCompound(page))
49 pr_cont(" compound_mapcount: %d", compound_mapcount(page)); 51 pr_cont(" compound_mapcount: %d", compound_mapcount(page));
50 pr_cont("\n"); 52 pr_cont("\n");
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79c52d0061af..728d7790dc2d 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
838 * value (scan code). 838 * value (scan code).
839 */ 839 */
840 840
841static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) 841static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
842 struct vm_area_struct **vmap)
842{ 843{
843 struct vm_area_struct *vma; 844 struct vm_area_struct *vma;
844 unsigned long hstart, hend; 845 unsigned long hstart, hend;
@@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
846 if (unlikely(khugepaged_test_exit(mm))) 847 if (unlikely(khugepaged_test_exit(mm)))
847 return SCAN_ANY_PROCESS; 848 return SCAN_ANY_PROCESS;
848 849
849 vma = find_vma(mm, address); 850 *vmap = vma = find_vma(mm, address);
850 if (!vma) 851 if (!vma)
851 return SCAN_VMA_NULL; 852 return SCAN_VMA_NULL;
852 853
@@ -881,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
881 .pmd = pmd, 882 .pmd = pmd,
882 }; 883 };
883 884
885 /* we only decide to swapin, if there is enough young ptes */
886 if (referenced < HPAGE_PMD_NR/2) {
887 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
888 return false;
889 }
884 fe.pte = pte_offset_map(pmd, address); 890 fe.pte = pte_offset_map(pmd, address);
885 for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE; 891 for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
886 fe.pte++, fe.address += PAGE_SIZE) { 892 fe.pte++, fe.address += PAGE_SIZE) {
@@ -888,17 +894,12 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
888 if (!is_swap_pte(pteval)) 894 if (!is_swap_pte(pteval))
889 continue; 895 continue;
890 swapped_in++; 896 swapped_in++;
891 /* we only decide to swapin, if there is enough young ptes */
892 if (referenced < HPAGE_PMD_NR/2) {
893 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
894 return false;
895 }
896 ret = do_swap_page(&fe, pteval); 897 ret = do_swap_page(&fe, pteval);
897 898
898 /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ 899 /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
899 if (ret & VM_FAULT_RETRY) { 900 if (ret & VM_FAULT_RETRY) {
900 down_read(&mm->mmap_sem); 901 down_read(&mm->mmap_sem);
901 if (hugepage_vma_revalidate(mm, address)) { 902 if (hugepage_vma_revalidate(mm, address, &fe.vma)) {
902 /* vma is no longer available, don't continue to swapin */ 903 /* vma is no longer available, don't continue to swapin */
903 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); 904 trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
904 return false; 905 return false;
@@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
923static void collapse_huge_page(struct mm_struct *mm, 924static void collapse_huge_page(struct mm_struct *mm,
924 unsigned long address, 925 unsigned long address,
925 struct page **hpage, 926 struct page **hpage,
926 struct vm_area_struct *vma,
927 int node, int referenced) 927 int node, int referenced)
928{ 928{
929 pmd_t *pmd, _pmd; 929 pmd_t *pmd, _pmd;
@@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm,
933 spinlock_t *pmd_ptl, *pte_ptl; 933 spinlock_t *pmd_ptl, *pte_ptl;
934 int isolated = 0, result = 0; 934 int isolated = 0, result = 0;
935 struct mem_cgroup *memcg; 935 struct mem_cgroup *memcg;
936 struct vm_area_struct *vma;
936 unsigned long mmun_start; /* For mmu_notifiers */ 937 unsigned long mmun_start; /* For mmu_notifiers */
937 unsigned long mmun_end; /* For mmu_notifiers */ 938 unsigned long mmun_end; /* For mmu_notifiers */
938 gfp_t gfp; 939 gfp_t gfp;
@@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm,
961 } 962 }
962 963
963 down_read(&mm->mmap_sem); 964 down_read(&mm->mmap_sem);
964 result = hugepage_vma_revalidate(mm, address); 965 result = hugepage_vma_revalidate(mm, address, &vma);
965 if (result) { 966 if (result) {
966 mem_cgroup_cancel_charge(new_page, memcg, true); 967 mem_cgroup_cancel_charge(new_page, memcg, true);
967 up_read(&mm->mmap_sem); 968 up_read(&mm->mmap_sem);
@@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm,
994 * handled by the anon_vma lock + PG_lock. 995 * handled by the anon_vma lock + PG_lock.
995 */ 996 */
996 down_write(&mm->mmap_sem); 997 down_write(&mm->mmap_sem);
997 result = hugepage_vma_revalidate(mm, address); 998 result = hugepage_vma_revalidate(mm, address, &vma);
998 if (result) 999 if (result)
999 goto out; 1000 goto out;
1000 /* check if the pmd is still valid */ 1001 /* check if the pmd is still valid */
@@ -1202,7 +1203,7 @@ out_unmap:
1202 if (ret) { 1203 if (ret) {
1203 node = khugepaged_find_target_node(); 1204 node = khugepaged_find_target_node();
1204 /* collapse_huge_page will return with the mmap_sem released */ 1205 /* collapse_huge_page will return with the mmap_sem released */
1205 collapse_huge_page(mm, address, hpage, vma, node, referenced); 1206 collapse_huge_page(mm, address, hpage, node, referenced);
1206 } 1207 }
1207out: 1208out:
1208 trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, 1209 trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9a6a51a7c416..4be518d4e68a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex);
1740static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) 1740static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
1741{ 1741{
1742 struct memcg_stock_pcp *stock; 1742 struct memcg_stock_pcp *stock;
1743 unsigned long flags;
1743 bool ret = false; 1744 bool ret = false;
1744 1745
1745 if (nr_pages > CHARGE_BATCH) 1746 if (nr_pages > CHARGE_BATCH)
1746 return ret; 1747 return ret;
1747 1748
1748 stock = &get_cpu_var(memcg_stock); 1749 local_irq_save(flags);
1750
1751 stock = this_cpu_ptr(&memcg_stock);
1749 if (memcg == stock->cached && stock->nr_pages >= nr_pages) { 1752 if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
1750 stock->nr_pages -= nr_pages; 1753 stock->nr_pages -= nr_pages;
1751 ret = true; 1754 ret = true;
1752 } 1755 }
1753 put_cpu_var(memcg_stock); 1756
1757 local_irq_restore(flags);
1758
1754 return ret; 1759 return ret;
1755} 1760}
1756 1761
@@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock)
1771 stock->cached = NULL; 1776 stock->cached = NULL;
1772} 1777}
1773 1778
1774/*
1775 * This must be called under preempt disabled or must be called by
1776 * a thread which is pinned to local cpu.
1777 */
1778static void drain_local_stock(struct work_struct *dummy) 1779static void drain_local_stock(struct work_struct *dummy)
1779{ 1780{
1780 struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock); 1781 struct memcg_stock_pcp *stock;
1782 unsigned long flags;
1783
1784 local_irq_save(flags);
1785
1786 stock = this_cpu_ptr(&memcg_stock);
1781 drain_stock(stock); 1787 drain_stock(stock);
1782 clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); 1788 clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
1789
1790 local_irq_restore(flags);
1783} 1791}
1784 1792
1785/* 1793/*
@@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy)
1788 */ 1796 */
1789static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) 1797static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
1790{ 1798{
1791 struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); 1799 struct memcg_stock_pcp *stock;
1800 unsigned long flags;
1801
1802 local_irq_save(flags);
1792 1803
1804 stock = this_cpu_ptr(&memcg_stock);
1793 if (stock->cached != memcg) { /* reset if necessary */ 1805 if (stock->cached != memcg) { /* reset if necessary */
1794 drain_stock(stock); 1806 drain_stock(stock);
1795 stock->cached = memcg; 1807 stock->cached = memcg;
1796 } 1808 }
1797 stock->nr_pages += nr_pages; 1809 stock->nr_pages += nr_pages;
1798 put_cpu_var(memcg_stock); 1810
1811 local_irq_restore(flags);
1799} 1812}
1800 1813
1801/* 1814/*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 41266dc29f33..b58906b6215c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1567,7 +1567,9 @@ static struct page *new_node_page(struct page *page, unsigned long private,
1567 return alloc_huge_page_node(page_hstate(compound_head(page)), 1567 return alloc_huge_page_node(page_hstate(compound_head(page)),
1568 next_node_in(nid, nmask)); 1568 next_node_in(nid, nmask));
1569 1569
1570 node_clear(nid, nmask); 1570 if (nid != next_node_in(nid, nmask))
1571 node_clear(nid, nmask);
1572
1571 if (PageHighMem(page) 1573 if (PageHighMem(page)
1572 || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) 1574 || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
1573 gfp_mask |= __GFP_HIGHMEM; 1575 gfp_mask |= __GFP_HIGHMEM;
diff --git a/mm/page_io.c b/mm/page_io.c
index 16bd82fad38c..eafe5ddc2b54 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -264,6 +264,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
264 int ret; 264 int ret;
265 struct swap_info_struct *sis = page_swap_info(page); 265 struct swap_info_struct *sis = page_swap_info(page);
266 266
267 BUG_ON(!PageSwapCache(page));
267 if (sis->flags & SWP_FILE) { 268 if (sis->flags & SWP_FILE) {
268 struct kiocb kiocb; 269 struct kiocb kiocb;
269 struct file *swap_file = sis->swap_file; 270 struct file *swap_file = sis->swap_file;
@@ -337,6 +338,7 @@ int swap_readpage(struct page *page)
337 int ret = 0; 338 int ret = 0;
338 struct swap_info_struct *sis = page_swap_info(page); 339 struct swap_info_struct *sis = page_swap_info(page);
339 340
341 BUG_ON(!PageSwapCache(page));
340 VM_BUG_ON_PAGE(!PageLocked(page), page); 342 VM_BUG_ON_PAGE(!PageLocked(page), page);
341 VM_BUG_ON_PAGE(PageUptodate(page), page); 343 VM_BUG_ON_PAGE(PageUptodate(page), page);
342 if (frontswap_load(page) == 0) { 344 if (frontswap_load(page) == 0) {
@@ -386,6 +388,7 @@ int swap_set_page_dirty(struct page *page)
386 388
387 if (sis->flags & SWP_FILE) { 389 if (sis->flags & SWP_FILE) {
388 struct address_space *mapping = sis->swap_file->f_mapping; 390 struct address_space *mapping = sis->swap_file->f_mapping;
391 BUG_ON(!PageSwapCache(page));
389 return mapping->a_ops->set_page_dirty(page); 392 return mapping->a_ops->set_page_dirty(page);
390 } else { 393 } else {
391 return __set_page_dirty_no_writeback(page); 394 return __set_page_dirty_no_writeback(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 78cfa292a29a..2657accc6e2b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry)
2724struct swap_info_struct *page_swap_info(struct page *page) 2724struct swap_info_struct *page_swap_info(struct page *page)
2725{ 2725{
2726 swp_entry_t swap = { .val = page_private(page) }; 2726 swp_entry_t swap = { .val = page_private(page) };
2727 BUG_ON(!PageSwapCache(page));
2728 return swap_info[swp_type(swap)]; 2727 return swap_info[swp_type(swap)];
2729} 2728}
2730 2729
diff --git a/net/core/sock.c b/net/core/sock.c
index 25dab8b60223..fd7b41edf1ce 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1362,7 +1362,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1362 if (!try_module_get(prot->owner)) 1362 if (!try_module_get(prot->owner))
1363 goto out_free_sec; 1363 goto out_free_sec;
1364 sk_tx_queue_clear(sk); 1364 sk_tx_queue_clear(sk);
1365 cgroup_sk_alloc(&sk->sk_cgrp_data);
1366 } 1365 }
1367 1366
1368 return sk; 1367 return sk;
@@ -1422,6 +1421,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1422 sock_net_set(sk, net); 1421 sock_net_set(sk, net);
1423 atomic_set(&sk->sk_wmem_alloc, 1); 1422 atomic_set(&sk->sk_wmem_alloc, 1);
1424 1423
1424 cgroup_sk_alloc(&sk->sk_cgrp_data);
1425 sock_update_classid(&sk->sk_cgrp_data); 1425 sock_update_classid(&sk->sk_cgrp_data);
1426 sock_update_netprioidx(&sk->sk_cgrp_data); 1426 sock_update_netprioidx(&sk->sk_cgrp_data);
1427 } 1427 }
@@ -1566,6 +1566,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1566 newsk->sk_priority = 0; 1566 newsk->sk_priority = 0;
1567 newsk->sk_incoming_cpu = raw_smp_processor_id(); 1567 newsk->sk_incoming_cpu = raw_smp_processor_id();
1568 atomic64_set(&newsk->sk_cookie, 0); 1568 atomic64_set(&newsk->sk_cookie, 0);
1569
1570 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1571
1569 /* 1572 /*
1570 * Before updating sk_refcnt, we must commit prior changes to memory 1573 * Before updating sk_refcnt, we must commit prior changes to memory
1571 * (Documentation/RCU/rculist_nulls.txt for details) 1574 * (Documentation/RCU/rculist_nulls.txt for details)