diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-09-19 19:08:03 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-09-19 19:08:03 -0400 |
commit | d2ffb0103aaefa9b169da042cf39ce27bfb6cdbb (patch) | |
tree | 967273cfc51bf649cf5f9f4f4ad0cf0be4b633fc | |
parent | 7fadce0d60d09427e0027d3d468781b08ca0b3d1 (diff) | |
parent | b92ae139c308c5223521ed6ec022148b81312809 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton:
"20 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
rapidio/rio_cm: avoid GFP_KERNEL in atomic context
Revert "ocfs2: bump up o2cb network protocol version"
ocfs2: fix start offset to ocfs2_zero_range_for_truncate()
cgroup: duplicate cgroup reference when cloning sockets
mm: memcontrol: make per-cpu charge cache IRQ-safe for socket accounting
ocfs2: fix double unlock in case retry after free truncate log
fanotify: fix list corruption in fanotify_get_response()
fsnotify: add a way to stop queueing events on group shutdown
ocfs2: fix trans extend while free cached blocks
ocfs2: fix trans extend while flush truncate log
ipc/shm: fix crash if CONFIG_SHMEM is not set
mm: fix the page_swap_info() BUG_ON check
autofs: use dentry flags to block walks during expire
MAINTAINERS: update email for VLYNQ bus entry
mm: avoid endless recursion in dump_page()
mm, thp: fix leaking mapped pte in __collapse_huge_page_swapin()
khugepaged: fix use-after-free in collapse_huge_page()
MAINTAINERS: Maik has moved
ocfs2/dlm: fix race between convert and migration
mem-hotplug: don't clear the only node in new_node_page()
-rw-r--r-- | MAINTAINERS | 4 | ||||
-rw-r--r-- | drivers/rapidio/rio_cm.c | 19 | ||||
-rw-r--r-- | fs/autofs4/expire.c | 55 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 13 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 36 | ||||
-rw-r--r-- | fs/notify/group.c | 19 | ||||
-rw-r--r-- | fs/notify/notification.c | 23 | ||||
-rw-r--r-- | fs/ocfs2/alloc.c | 56 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmconvert.c | 12 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 34 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.c | 14 | ||||
-rw-r--r-- | fs/ramfs/file-mmu.c | 9 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 6 | ||||
-rw-r--r-- | kernel/cgroup.c | 6 | ||||
-rw-r--r-- | mm/debug.c | 6 | ||||
-rw-r--r-- | mm/khugepaged.c | 25 | ||||
-rw-r--r-- | mm/memcontrol.c | 31 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 4 | ||||
-rw-r--r-- | mm/page_io.c | 3 | ||||
-rw-r--r-- | mm/swapfile.c | 1 | ||||
-rw-r--r-- | net/core/sock.c | 5 |
22 files changed, 240 insertions, 146 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 644ff65d336d..a0ce40f4c66c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -6103,7 +6103,7 @@ S: Supported | |||
6103 | F: drivers/cpufreq/intel_pstate.c | 6103 | F: drivers/cpufreq/intel_pstate.c |
6104 | 6104 | ||
6105 | INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) | 6105 | INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) |
6106 | M: Maik Broemme <mbroemme@plusserver.de> | 6106 | M: Maik Broemme <mbroemme@libmpq.org> |
6107 | L: linux-fbdev@vger.kernel.org | 6107 | L: linux-fbdev@vger.kernel.org |
6108 | S: Maintained | 6108 | S: Maintained |
6109 | F: Documentation/fb/intelfb.txt | 6109 | F: Documentation/fb/intelfb.txt |
@@ -12569,7 +12569,7 @@ F: include/linux/if_*vlan.h | |||
12569 | F: net/8021q/ | 12569 | F: net/8021q/ |
12570 | 12570 | ||
12571 | VLYNQ BUS | 12571 | VLYNQ BUS |
12572 | M: Florian Fainelli <florian@openwrt.org> | 12572 | M: Florian Fainelli <f.fainelli@gmail.com> |
12573 | L: openwrt-devel@lists.openwrt.org (subscribers-only) | 12573 | L: openwrt-devel@lists.openwrt.org (subscribers-only) |
12574 | S: Maintained | 12574 | S: Maintained |
12575 | F: drivers/vlynq/vlynq.c | 12575 | F: drivers/vlynq/vlynq.c |
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 3fa17ac8df54..cebc296463ad 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c | |||
@@ -2247,17 +2247,30 @@ static int rio_cm_shutdown(struct notifier_block *nb, unsigned long code, | |||
2247 | { | 2247 | { |
2248 | struct rio_channel *ch; | 2248 | struct rio_channel *ch; |
2249 | unsigned int i; | 2249 | unsigned int i; |
2250 | LIST_HEAD(list); | ||
2250 | 2251 | ||
2251 | riocm_debug(EXIT, "."); | 2252 | riocm_debug(EXIT, "."); |
2252 | 2253 | ||
2254 | /* | ||
2255 | * If there are any channels left in connected state send | ||
2256 | * close notification to the connection partner. | ||
2257 | * First build a list of channels that require a closing | ||
2258 | * notification because function riocm_send_close() should | ||
2259 | * be called outside of spinlock protected code. | ||
2260 | */ | ||
2253 | spin_lock_bh(&idr_lock); | 2261 | spin_lock_bh(&idr_lock); |
2254 | idr_for_each_entry(&ch_idr, ch, i) { | 2262 | idr_for_each_entry(&ch_idr, ch, i) { |
2255 | riocm_debug(EXIT, "close ch %d", ch->id); | 2263 | if (ch->state == RIO_CM_CONNECTED) { |
2256 | if (ch->state == RIO_CM_CONNECTED) | 2264 | riocm_debug(EXIT, "close ch %d", ch->id); |
2257 | riocm_send_close(ch); | 2265 | idr_remove(&ch_idr, ch->id); |
2266 | list_add(&ch->ch_node, &list); | ||
2267 | } | ||
2258 | } | 2268 | } |
2259 | spin_unlock_bh(&idr_lock); | 2269 | spin_unlock_bh(&idr_lock); |
2260 | 2270 | ||
2271 | list_for_each_entry(ch, &list, ch_node) | ||
2272 | riocm_send_close(ch); | ||
2273 | |||
2261 | return NOTIFY_DONE; | 2274 | return NOTIFY_DONE; |
2262 | } | 2275 | } |
2263 | 2276 | ||
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index b493909e7492..d8e6d421c27f 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry, | |||
417 | } | 417 | } |
418 | return NULL; | 418 | return NULL; |
419 | } | 419 | } |
420 | |||
420 | /* | 421 | /* |
421 | * Find an eligible tree to time-out | 422 | * Find an eligible tree to time-out |
422 | * A tree is eligible if :- | 423 | * A tree is eligible if :- |
@@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
432 | struct dentry *root = sb->s_root; | 433 | struct dentry *root = sb->s_root; |
433 | struct dentry *dentry; | 434 | struct dentry *dentry; |
434 | struct dentry *expired; | 435 | struct dentry *expired; |
436 | struct dentry *found; | ||
435 | struct autofs_info *ino; | 437 | struct autofs_info *ino; |
436 | 438 | ||
437 | if (!root) | 439 | if (!root) |
@@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
442 | 444 | ||
443 | dentry = NULL; | 445 | dentry = NULL; |
444 | while ((dentry = get_next_positive_subdir(dentry, root))) { | 446 | while ((dentry = get_next_positive_subdir(dentry, root))) { |
447 | int flags = how; | ||
448 | |||
445 | spin_lock(&sbi->fs_lock); | 449 | spin_lock(&sbi->fs_lock); |
446 | ino = autofs4_dentry_ino(dentry); | 450 | ino = autofs4_dentry_ino(dentry); |
447 | if (ino->flags & AUTOFS_INF_WANT_EXPIRE) | 451 | if (ino->flags & AUTOFS_INF_WANT_EXPIRE) { |
448 | expired = NULL; | ||
449 | else | ||
450 | expired = should_expire(dentry, mnt, timeout, how); | ||
451 | if (!expired) { | ||
452 | spin_unlock(&sbi->fs_lock); | 452 | spin_unlock(&sbi->fs_lock); |
453 | continue; | 453 | continue; |
454 | } | 454 | } |
455 | spin_unlock(&sbi->fs_lock); | ||
456 | |||
457 | expired = should_expire(dentry, mnt, timeout, flags); | ||
458 | if (!expired) | ||
459 | continue; | ||
460 | |||
461 | spin_lock(&sbi->fs_lock); | ||
455 | ino = autofs4_dentry_ino(expired); | 462 | ino = autofs4_dentry_ino(expired); |
456 | ino->flags |= AUTOFS_INF_WANT_EXPIRE; | 463 | ino->flags |= AUTOFS_INF_WANT_EXPIRE; |
457 | spin_unlock(&sbi->fs_lock); | 464 | spin_unlock(&sbi->fs_lock); |
458 | synchronize_rcu(); | 465 | synchronize_rcu(); |
459 | spin_lock(&sbi->fs_lock); | ||
460 | if (should_expire(expired, mnt, timeout, how)) { | ||
461 | if (expired != dentry) | ||
462 | dput(dentry); | ||
463 | goto found; | ||
464 | } | ||
465 | 466 | ||
467 | /* Make sure a reference is not taken on found if | ||
468 | * things have changed. | ||
469 | */ | ||
470 | flags &= ~AUTOFS_EXP_LEAVES; | ||
471 | found = should_expire(expired, mnt, timeout, how); | ||
472 | if (!found || found != expired) | ||
473 | /* Something has changed, continue */ | ||
474 | goto next; | ||
475 | |||
476 | if (expired != dentry) | ||
477 | dput(dentry); | ||
478 | |||
479 | spin_lock(&sbi->fs_lock); | ||
480 | goto found; | ||
481 | next: | ||
482 | spin_lock(&sbi->fs_lock); | ||
466 | ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; | 483 | ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; |
484 | spin_unlock(&sbi->fs_lock); | ||
467 | if (expired != dentry) | 485 | if (expired != dentry) |
468 | dput(expired); | 486 | dput(expired); |
469 | spin_unlock(&sbi->fs_lock); | ||
470 | } | 487 | } |
471 | return NULL; | 488 | return NULL; |
472 | 489 | ||
@@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) | |||
483 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 500 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
484 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 501 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
485 | int status; | 502 | int status; |
503 | int state; | ||
486 | 504 | ||
487 | /* Block on any pending expire */ | 505 | /* Block on any pending expire */ |
488 | if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) | 506 | if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) |
@@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) | |||
490 | if (rcu_walk) | 508 | if (rcu_walk) |
491 | return -ECHILD; | 509 | return -ECHILD; |
492 | 510 | ||
511 | retry: | ||
493 | spin_lock(&sbi->fs_lock); | 512 | spin_lock(&sbi->fs_lock); |
494 | if (ino->flags & AUTOFS_INF_EXPIRING) { | 513 | state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING); |
514 | if (state == AUTOFS_INF_WANT_EXPIRE) { | ||
515 | spin_unlock(&sbi->fs_lock); | ||
516 | /* | ||
517 | * Possibly being selected for expire, wait until | ||
518 | * it's selected or not. | ||
519 | */ | ||
520 | schedule_timeout_uninterruptible(HZ/10); | ||
521 | goto retry; | ||
522 | } | ||
523 | if (state & AUTOFS_INF_EXPIRING) { | ||
495 | spin_unlock(&sbi->fs_lock); | 524 | spin_unlock(&sbi->fs_lock); |
496 | 525 | ||
497 | pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); | 526 | pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); |
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d2f97ecca6a5..e0e5f7c3c99f 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group, | |||
67 | 67 | ||
68 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 68 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
69 | 69 | ||
70 | wait_event(group->fanotify_data.access_waitq, event->response || | 70 | wait_event(group->fanotify_data.access_waitq, event->response); |
71 | atomic_read(&group->fanotify_data.bypass_perm)); | ||
72 | |||
73 | if (!event->response) { /* bypass_perm set */ | ||
74 | /* | ||
75 | * Event was canceled because group is being destroyed. Remove | ||
76 | * it from group's event list because we are responsible for | ||
77 | * freeing the permission event. | ||
78 | */ | ||
79 | fsnotify_remove_event(group, &event->fae.fse); | ||
80 | return 0; | ||
81 | } | ||
82 | 71 | ||
83 | /* userspace responded, convert to something usable */ | 72 | /* userspace responded, convert to something usable */ |
84 | switch (event->response) { | 73 | switch (event->response) { |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 8e8e6bcd1d43..a64313868d3a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
358 | 358 | ||
359 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 359 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
360 | struct fanotify_perm_event_info *event, *next; | 360 | struct fanotify_perm_event_info *event, *next; |
361 | struct fsnotify_event *fsn_event; | ||
361 | 362 | ||
362 | /* | 363 | /* |
363 | * There may be still new events arriving in the notification queue | 364 | * Stop new events from arriving in the notification queue. since |
364 | * but since userspace cannot use fanotify fd anymore, no event can | 365 | * userspace cannot use fanotify fd anymore, no event can enter or |
365 | * enter or leave access_list by now. | 366 | * leave access_list by now either. |
366 | */ | 367 | */ |
367 | spin_lock(&group->fanotify_data.access_lock); | 368 | fsnotify_group_stop_queueing(group); |
368 | |||
369 | atomic_inc(&group->fanotify_data.bypass_perm); | ||
370 | 369 | ||
370 | /* | ||
371 | * Process all permission events on access_list and notification queue | ||
372 | * and simulate reply from userspace. | ||
373 | */ | ||
374 | spin_lock(&group->fanotify_data.access_lock); | ||
371 | list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, | 375 | list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, |
372 | fae.fse.list) { | 376 | fae.fse.list) { |
373 | pr_debug("%s: found group=%p event=%p\n", __func__, group, | 377 | pr_debug("%s: found group=%p event=%p\n", __func__, group, |
@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
379 | spin_unlock(&group->fanotify_data.access_lock); | 383 | spin_unlock(&group->fanotify_data.access_lock); |
380 | 384 | ||
381 | /* | 385 | /* |
382 | * Since bypass_perm is set, newly queued events will not wait for | 386 | * Destroy all non-permission events. For permission events just |
383 | * access response. Wake up the already sleeping ones now. | 387 | * dequeue them and set the response. They will be freed once the |
384 | * synchronize_srcu() in fsnotify_destroy_group() will wait for all | 388 | * response is consumed and fanotify_get_response() returns. |
385 | * processes sleeping in fanotify_handle_event() waiting for access | ||
386 | * response and thus also for all permission events to be freed. | ||
387 | */ | 389 | */ |
390 | mutex_lock(&group->notification_mutex); | ||
391 | while (!fsnotify_notify_queue_is_empty(group)) { | ||
392 | fsn_event = fsnotify_remove_first_event(group); | ||
393 | if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) | ||
394 | fsnotify_destroy_event(group, fsn_event); | ||
395 | else | ||
396 | FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; | ||
397 | } | ||
398 | mutex_unlock(&group->notification_mutex); | ||
399 | |||
400 | /* Response for all permission events it set, wakeup waiters */ | ||
388 | wake_up(&group->fanotify_data.access_waitq); | 401 | wake_up(&group->fanotify_data.access_waitq); |
389 | #endif | 402 | #endif |
390 | 403 | ||
@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | |||
755 | spin_lock_init(&group->fanotify_data.access_lock); | 768 | spin_lock_init(&group->fanotify_data.access_lock); |
756 | init_waitqueue_head(&group->fanotify_data.access_waitq); | 769 | init_waitqueue_head(&group->fanotify_data.access_waitq); |
757 | INIT_LIST_HEAD(&group->fanotify_data.access_list); | 770 | INIT_LIST_HEAD(&group->fanotify_data.access_list); |
758 | atomic_set(&group->fanotify_data.bypass_perm, 0); | ||
759 | #endif | 771 | #endif |
760 | switch (flags & FAN_ALL_CLASS_BITS) { | 772 | switch (flags & FAN_ALL_CLASS_BITS) { |
761 | case FAN_CLASS_NOTIF: | 773 | case FAN_CLASS_NOTIF: |
diff --git a/fs/notify/group.c b/fs/notify/group.c index 3e2dd85be5dd..b47f7cfdcaa4 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c | |||
@@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) | |||
40 | } | 40 | } |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Stop queueing new events for this group. Once this function returns | ||
44 | * fsnotify_add_event() will not add any new events to the group's queue. | ||
45 | */ | ||
46 | void fsnotify_group_stop_queueing(struct fsnotify_group *group) | ||
47 | { | ||
48 | mutex_lock(&group->notification_mutex); | ||
49 | group->shutdown = true; | ||
50 | mutex_unlock(&group->notification_mutex); | ||
51 | } | ||
52 | |||
53 | /* | ||
43 | * Trying to get rid of a group. Remove all marks, flush all events and release | 54 | * Trying to get rid of a group. Remove all marks, flush all events and release |
44 | * the group reference. | 55 | * the group reference. |
45 | * Note that another thread calling fsnotify_clear_marks_by_group() may still | 56 | * Note that another thread calling fsnotify_clear_marks_by_group() may still |
@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) | |||
47 | */ | 58 | */ |
48 | void fsnotify_destroy_group(struct fsnotify_group *group) | 59 | void fsnotify_destroy_group(struct fsnotify_group *group) |
49 | { | 60 | { |
61 | /* | ||
62 | * Stop queueing new events. The code below is careful enough to not | ||
63 | * require this but fanotify needs to stop queuing events even before | ||
64 | * fsnotify_destroy_group() is called and this makes the other callers | ||
65 | * of fsnotify_destroy_group() to see the same behavior. | ||
66 | */ | ||
67 | fsnotify_group_stop_queueing(group); | ||
68 | |||
50 | /* clear all inode marks for this group, attach them to destroy_list */ | 69 | /* clear all inode marks for this group, attach them to destroy_list */ |
51 | fsnotify_detach_group_marks(group); | 70 | fsnotify_detach_group_marks(group); |
52 | 71 | ||
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index a95d8e037aeb..e455e83ceeeb 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, | |||
82 | * Add an event to the group notification queue. The group can later pull this | 82 | * Add an event to the group notification queue. The group can later pull this |
83 | * event off the queue to deal with. The function returns 0 if the event was | 83 | * event off the queue to deal with. The function returns 0 if the event was |
84 | * added to the queue, 1 if the event was merged with some other queued event, | 84 | * added to the queue, 1 if the event was merged with some other queued event, |
85 | * 2 if the queue of events has overflown. | 85 | * 2 if the event was not queued - either the queue of events has overflown |
86 | * or the group is shutting down. | ||
86 | */ | 87 | */ |
87 | int fsnotify_add_event(struct fsnotify_group *group, | 88 | int fsnotify_add_event(struct fsnotify_group *group, |
88 | struct fsnotify_event *event, | 89 | struct fsnotify_event *event, |
@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group, | |||
96 | 97 | ||
97 | mutex_lock(&group->notification_mutex); | 98 | mutex_lock(&group->notification_mutex); |
98 | 99 | ||
100 | if (group->shutdown) { | ||
101 | mutex_unlock(&group->notification_mutex); | ||
102 | return 2; | ||
103 | } | ||
104 | |||
99 | if (group->q_len >= group->max_events) { | 105 | if (group->q_len >= group->max_events) { |
100 | ret = 2; | 106 | ret = 2; |
101 | /* Queue overflow event only if it isn't already queued */ | 107 | /* Queue overflow event only if it isn't already queued */ |
@@ -126,21 +132,6 @@ queue: | |||
126 | } | 132 | } |
127 | 133 | ||
128 | /* | 134 | /* |
129 | * Remove @event from group's notification queue. It is the responsibility of | ||
130 | * the caller to destroy the event. | ||
131 | */ | ||
132 | void fsnotify_remove_event(struct fsnotify_group *group, | ||
133 | struct fsnotify_event *event) | ||
134 | { | ||
135 | mutex_lock(&group->notification_mutex); | ||
136 | if (!list_empty(&event->list)) { | ||
137 | list_del_init(&event->list); | ||
138 | group->q_len--; | ||
139 | } | ||
140 | mutex_unlock(&group->notification_mutex); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Remove and return the first event from the notification list. It is the | 135 | * Remove and return the first event from the notification list. It is the |
145 | * responsibility of the caller to destroy the obtained event | 136 | * responsibility of the caller to destroy the obtained event |
146 | */ | 137 | */ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 7dabbc31060e..f165f867f332 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -5922,7 +5922,6 @@ bail: | |||
5922 | } | 5922 | } |
5923 | 5923 | ||
5924 | static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, | 5924 | static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, |
5925 | handle_t *handle, | ||
5926 | struct inode *data_alloc_inode, | 5925 | struct inode *data_alloc_inode, |
5927 | struct buffer_head *data_alloc_bh) | 5926 | struct buffer_head *data_alloc_bh) |
5928 | { | 5927 | { |
@@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, | |||
5935 | struct ocfs2_truncate_log *tl; | 5934 | struct ocfs2_truncate_log *tl; |
5936 | struct inode *tl_inode = osb->osb_tl_inode; | 5935 | struct inode *tl_inode = osb->osb_tl_inode; |
5937 | struct buffer_head *tl_bh = osb->osb_tl_bh; | 5936 | struct buffer_head *tl_bh = osb->osb_tl_bh; |
5937 | handle_t *handle; | ||
5938 | 5938 | ||
5939 | di = (struct ocfs2_dinode *) tl_bh->b_data; | 5939 | di = (struct ocfs2_dinode *) tl_bh->b_data; |
5940 | tl = &di->id2.i_dealloc; | 5940 | tl = &di->id2.i_dealloc; |
5941 | i = le16_to_cpu(tl->tl_used) - 1; | 5941 | i = le16_to_cpu(tl->tl_used) - 1; |
5942 | while (i >= 0) { | 5942 | while (i >= 0) { |
5943 | handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); | ||
5944 | if (IS_ERR(handle)) { | ||
5945 | status = PTR_ERR(handle); | ||
5946 | mlog_errno(status); | ||
5947 | goto bail; | ||
5948 | } | ||
5949 | |||
5943 | /* Caller has given us at least enough credits to | 5950 | /* Caller has given us at least enough credits to |
5944 | * update the truncate log dinode */ | 5951 | * update the truncate log dinode */ |
5945 | status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, | 5952 | status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, |
@@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, | |||
5974 | } | 5981 | } |
5975 | } | 5982 | } |
5976 | 5983 | ||
5977 | status = ocfs2_extend_trans(handle, | 5984 | ocfs2_commit_trans(osb, handle); |
5978 | OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); | ||
5979 | if (status < 0) { | ||
5980 | mlog_errno(status); | ||
5981 | goto bail; | ||
5982 | } | ||
5983 | i--; | 5985 | i--; |
5984 | } | 5986 | } |
5985 | 5987 | ||
@@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
5994 | { | 5996 | { |
5995 | int status; | 5997 | int status; |
5996 | unsigned int num_to_flush; | 5998 | unsigned int num_to_flush; |
5997 | handle_t *handle; | ||
5998 | struct inode *tl_inode = osb->osb_tl_inode; | 5999 | struct inode *tl_inode = osb->osb_tl_inode; |
5999 | struct inode *data_alloc_inode = NULL; | 6000 | struct inode *data_alloc_inode = NULL; |
6000 | struct buffer_head *tl_bh = osb->osb_tl_bh; | 6001 | struct buffer_head *tl_bh = osb->osb_tl_bh; |
@@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | |||
6038 | goto out_mutex; | 6039 | goto out_mutex; |
6039 | } | 6040 | } |
6040 | 6041 | ||
6041 | handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); | 6042 | status = ocfs2_replay_truncate_records(osb, data_alloc_inode, |
6042 | if (IS_ERR(handle)) { | ||
6043 | status = PTR_ERR(handle); | ||
6044 | mlog_errno(status); | ||
6045 | goto out_unlock; | ||
6046 | } | ||
6047 | |||
6048 | status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, | ||
6049 | data_alloc_bh); | 6043 | data_alloc_bh); |
6050 | if (status < 0) | 6044 | if (status < 0) |
6051 | mlog_errno(status); | 6045 | mlog_errno(status); |
6052 | 6046 | ||
6053 | ocfs2_commit_trans(osb, handle); | ||
6054 | |||
6055 | out_unlock: | ||
6056 | brelse(data_alloc_bh); | 6047 | brelse(data_alloc_bh); |
6057 | ocfs2_inode_unlock(data_alloc_inode, 1); | 6048 | ocfs2_inode_unlock(data_alloc_inode, 1); |
6058 | 6049 | ||
@@ -6413,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb, | |||
6413 | goto out_mutex; | 6404 | goto out_mutex; |
6414 | } | 6405 | } |
6415 | 6406 | ||
6416 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
6417 | if (IS_ERR(handle)) { | ||
6418 | ret = PTR_ERR(handle); | ||
6419 | mlog_errno(ret); | ||
6420 | goto out_unlock; | ||
6421 | } | ||
6422 | |||
6423 | while (head) { | 6407 | while (head) { |
6424 | if (head->free_bg) | 6408 | if (head->free_bg) |
6425 | bg_blkno = head->free_bg; | 6409 | bg_blkno = head->free_bg; |
6426 | else | 6410 | else |
6427 | bg_blkno = ocfs2_which_suballoc_group(head->free_blk, | 6411 | bg_blkno = ocfs2_which_suballoc_group(head->free_blk, |
6428 | head->free_bit); | 6412 | head->free_bit); |
6413 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
6414 | if (IS_ERR(handle)) { | ||
6415 | ret = PTR_ERR(handle); | ||
6416 | mlog_errno(ret); | ||
6417 | goto out_unlock; | ||
6418 | } | ||
6419 | |||
6429 | trace_ocfs2_free_cached_blocks( | 6420 | trace_ocfs2_free_cached_blocks( |
6430 | (unsigned long long)head->free_blk, head->free_bit); | 6421 | (unsigned long long)head->free_blk, head->free_bit); |
6431 | 6422 | ||
6432 | ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, | 6423 | ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, |
6433 | head->free_bit, bg_blkno, 1); | 6424 | head->free_bit, bg_blkno, 1); |
6434 | if (ret) { | 6425 | if (ret) |
6435 | mlog_errno(ret); | 6426 | mlog_errno(ret); |
6436 | goto out_journal; | ||
6437 | } | ||
6438 | 6427 | ||
6439 | ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); | 6428 | ocfs2_commit_trans(osb, handle); |
6440 | if (ret) { | ||
6441 | mlog_errno(ret); | ||
6442 | goto out_journal; | ||
6443 | } | ||
6444 | 6429 | ||
6445 | tmp = head; | 6430 | tmp = head; |
6446 | head = head->free_next; | 6431 | head = head->free_next; |
6447 | kfree(tmp); | 6432 | kfree(tmp); |
6448 | } | 6433 | } |
6449 | 6434 | ||
6450 | out_journal: | ||
6451 | ocfs2_commit_trans(osb, handle); | ||
6452 | |||
6453 | out_unlock: | 6435 | out_unlock: |
6454 | ocfs2_inode_unlock(inode, 1); | 6436 | ocfs2_inode_unlock(inode, 1); |
6455 | brelse(di_bh); | 6437 | brelse(di_bh); |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 94b18369b1cc..b95e7df5b76a 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -44,9 +44,6 @@ | |||
44 | * version here in tcp_internal.h should not need to be bumped for | 44 | * version here in tcp_internal.h should not need to be bumped for |
45 | * filesystem locking changes. | 45 | * filesystem locking changes. |
46 | * | 46 | * |
47 | * New in version 12 | ||
48 | * - Negotiate hb timeout when storage is down. | ||
49 | * | ||
50 | * New in version 11 | 47 | * New in version 11 |
51 | * - Negotiation of filesystem locking in the dlm join. | 48 | * - Negotiation of filesystem locking in the dlm join. |
52 | * | 49 | * |
@@ -78,7 +75,7 @@ | |||
78 | * - full 64 bit i_size in the metadata lock lvbs | 75 | * - full 64 bit i_size in the metadata lock lvbs |
79 | * - introduction of "rw" lock and pushing meta/data locking down | 76 | * - introduction of "rw" lock and pushing meta/data locking down |
80 | */ | 77 | */ |
81 | #define O2NET_PROTOCOL_VERSION 12ULL | 78 | #define O2NET_PROTOCOL_VERSION 11ULL |
82 | struct o2net_handshake { | 79 | struct o2net_handshake { |
83 | __be64 protocol_version; | 80 | __be64 protocol_version; |
84 | __be64 connector_id; | 81 | __be64 connector_id; |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index cdeafb4e7ed6..0bb128659d4b 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, | |||
268 | struct dlm_lock *lock, int flags, int type) | 268 | struct dlm_lock *lock, int flags, int type) |
269 | { | 269 | { |
270 | enum dlm_status status; | 270 | enum dlm_status status; |
271 | u8 old_owner = res->owner; | ||
272 | 271 | ||
273 | mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, | 272 | mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, |
274 | lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); | 273 | lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); |
@@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, | |||
335 | 334 | ||
336 | spin_lock(&res->spinlock); | 335 | spin_lock(&res->spinlock); |
337 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 336 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
338 | lock->convert_pending = 0; | ||
339 | /* if it failed, move it back to granted queue. | 337 | /* if it failed, move it back to granted queue. |
340 | * if master returns DLM_NORMAL and then down before sending ast, | 338 | * if master returns DLM_NORMAL and then down before sending ast, |
341 | * it may have already been moved to granted queue, reset to | 339 | * it may have already been moved to granted queue, reset to |
@@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, | |||
344 | if (status != DLM_NOTQUEUED) | 342 | if (status != DLM_NOTQUEUED) |
345 | dlm_error(status); | 343 | dlm_error(status); |
346 | dlm_revert_pending_convert(res, lock); | 344 | dlm_revert_pending_convert(res, lock); |
347 | } else if ((res->state & DLM_LOCK_RES_RECOVERING) || | 345 | } else if (!lock->convert_pending) { |
348 | (old_owner != res->owner)) { | 346 | mlog(0, "%s: res %.*s, owner died and lock has been moved back " |
349 | mlog(0, "res %.*s is in recovering or has been recovered.\n", | 347 | "to granted list, retry convert.\n", |
350 | res->lockname.len, res->lockname.name); | 348 | dlm->name, res->lockname.len, res->lockname.name); |
351 | status = DLM_RECOVERING; | 349 | status = DLM_RECOVERING; |
352 | } | 350 | } |
351 | |||
352 | lock->convert_pending = 0; | ||
353 | bail: | 353 | bail: |
354 | spin_unlock(&res->spinlock); | 354 | spin_unlock(&res->spinlock); |
355 | 355 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4e7b0dc22450..0b055bfb8e86 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, | |||
1506 | u64 start, u64 len) | 1506 | u64 start, u64 len) |
1507 | { | 1507 | { |
1508 | int ret = 0; | 1508 | int ret = 0; |
1509 | u64 tmpend, end = start + len; | 1509 | u64 tmpend = 0; |
1510 | u64 end = start + len; | ||
1510 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1511 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1511 | unsigned int csize = osb->s_clustersize; | 1512 | unsigned int csize = osb->s_clustersize; |
1512 | handle_t *handle; | 1513 | handle_t *handle; |
@@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, | |||
1538 | } | 1539 | } |
1539 | 1540 | ||
1540 | /* | 1541 | /* |
1541 | * We want to get the byte offset of the end of the 1st cluster. | 1542 | * If start is on a cluster boundary and end is somewhere in another |
1543 | * cluster, we have not COWed the cluster starting at start, unless | ||
1544 | * end is also within the same cluster. So, in this case, we skip this | ||
1545 | * first call to ocfs2_zero_range_for_truncate() truncate and move on | ||
1546 | * to the next one. | ||
1542 | */ | 1547 | */ |
1543 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | 1548 | if ((start & (csize - 1)) != 0) { |
1544 | if (tmpend > end) | 1549 | /* |
1545 | tmpend = end; | 1550 | * We want to get the byte offset of the end of the 1st |
1551 | * cluster. | ||
1552 | */ | ||
1553 | tmpend = (u64)osb->s_clustersize + | ||
1554 | (start & ~(osb->s_clustersize - 1)); | ||
1555 | if (tmpend > end) | ||
1556 | tmpend = end; | ||
1546 | 1557 | ||
1547 | trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, | 1558 | trace_ocfs2_zero_partial_clusters_range1( |
1548 | (unsigned long long)tmpend); | 1559 | (unsigned long long)start, |
1560 | (unsigned long long)tmpend); | ||
1549 | 1561 | ||
1550 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | 1562 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, |
1551 | if (ret) | 1563 | tmpend); |
1552 | mlog_errno(ret); | 1564 | if (ret) |
1565 | mlog_errno(ret); | ||
1566 | } | ||
1553 | 1567 | ||
1554 | if (tmpend < end) { | 1568 | if (tmpend < end) { |
1555 | /* | 1569 | /* |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ea47120a85ff..6ad3533940ba 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -1199,14 +1199,24 @@ retry: | |||
1199 | inode_unlock((*ac)->ac_inode); | 1199 | inode_unlock((*ac)->ac_inode); |
1200 | 1200 | ||
1201 | ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted); | 1201 | ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted); |
1202 | if (ret == 1) | 1202 | if (ret == 1) { |
1203 | iput((*ac)->ac_inode); | ||
1204 | (*ac)->ac_inode = NULL; | ||
1203 | goto retry; | 1205 | goto retry; |
1206 | } | ||
1204 | 1207 | ||
1205 | if (ret < 0) | 1208 | if (ret < 0) |
1206 | mlog_errno(ret); | 1209 | mlog_errno(ret); |
1207 | 1210 | ||
1208 | inode_lock((*ac)->ac_inode); | 1211 | inode_lock((*ac)->ac_inode); |
1209 | ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); | 1212 | ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); |
1213 | if (ret < 0) { | ||
1214 | mlog_errno(ret); | ||
1215 | inode_unlock((*ac)->ac_inode); | ||
1216 | iput((*ac)->ac_inode); | ||
1217 | (*ac)->ac_inode = NULL; | ||
1218 | goto bail; | ||
1219 | } | ||
1210 | } | 1220 | } |
1211 | if (status < 0) { | 1221 | if (status < 0) { |
1212 | if (status != -ENOSPC) | 1222 | if (status != -ENOSPC) |
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 183a212694bf..12af0490322f 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c | |||
@@ -27,9 +27,17 @@ | |||
27 | #include <linux/fs.h> | 27 | #include <linux/fs.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/ramfs.h> | 29 | #include <linux/ramfs.h> |
30 | #include <linux/sched.h> | ||
30 | 31 | ||
31 | #include "internal.h" | 32 | #include "internal.h" |
32 | 33 | ||
34 | static unsigned long ramfs_mmu_get_unmapped_area(struct file *file, | ||
35 | unsigned long addr, unsigned long len, unsigned long pgoff, | ||
36 | unsigned long flags) | ||
37 | { | ||
38 | return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); | ||
39 | } | ||
40 | |||
33 | const struct file_operations ramfs_file_operations = { | 41 | const struct file_operations ramfs_file_operations = { |
34 | .read_iter = generic_file_read_iter, | 42 | .read_iter = generic_file_read_iter, |
35 | .write_iter = generic_file_write_iter, | 43 | .write_iter = generic_file_write_iter, |
@@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = { | |||
38 | .splice_read = generic_file_splice_read, | 46 | .splice_read = generic_file_splice_read, |
39 | .splice_write = iter_file_splice_write, | 47 | .splice_write = iter_file_splice_write, |
40 | .llseek = generic_file_llseek, | 48 | .llseek = generic_file_llseek, |
49 | .get_unmapped_area = ramfs_mmu_get_unmapped_area, | ||
41 | }; | 50 | }; |
42 | 51 | ||
43 | const struct inode_operations ramfs_file_inode_operations = { | 52 | const struct inode_operations ramfs_file_inode_operations = { |
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 58205f33af02..7268ed076be8 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h | |||
@@ -148,6 +148,7 @@ struct fsnotify_group { | |||
148 | #define FS_PRIO_1 1 /* fanotify content based access control */ | 148 | #define FS_PRIO_1 1 /* fanotify content based access control */ |
149 | #define FS_PRIO_2 2 /* fanotify pre-content access */ | 149 | #define FS_PRIO_2 2 /* fanotify pre-content access */ |
150 | unsigned int priority; | 150 | unsigned int priority; |
151 | bool shutdown; /* group is being shut down, don't queue more events */ | ||
151 | 152 | ||
152 | /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ | 153 | /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ |
153 | struct mutex mark_mutex; /* protect marks_list */ | 154 | struct mutex mark_mutex; /* protect marks_list */ |
@@ -179,7 +180,6 @@ struct fsnotify_group { | |||
179 | spinlock_t access_lock; | 180 | spinlock_t access_lock; |
180 | struct list_head access_list; | 181 | struct list_head access_list; |
181 | wait_queue_head_t access_waitq; | 182 | wait_queue_head_t access_waitq; |
182 | atomic_t bypass_perm; | ||
183 | #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ | 183 | #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ |
184 | int f_flags; | 184 | int f_flags; |
185 | unsigned int max_marks; | 185 | unsigned int max_marks; |
@@ -292,6 +292,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op | |||
292 | extern void fsnotify_get_group(struct fsnotify_group *group); | 292 | extern void fsnotify_get_group(struct fsnotify_group *group); |
293 | /* drop reference on a group from fsnotify_alloc_group */ | 293 | /* drop reference on a group from fsnotify_alloc_group */ |
294 | extern void fsnotify_put_group(struct fsnotify_group *group); | 294 | extern void fsnotify_put_group(struct fsnotify_group *group); |
295 | /* group destruction begins, stop queuing new events */ | ||
296 | extern void fsnotify_group_stop_queueing(struct fsnotify_group *group); | ||
295 | /* destroy group */ | 297 | /* destroy group */ |
296 | extern void fsnotify_destroy_group(struct fsnotify_group *group); | 298 | extern void fsnotify_destroy_group(struct fsnotify_group *group); |
297 | /* fasync handler function */ | 299 | /* fasync handler function */ |
@@ -304,8 +306,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group, | |||
304 | struct fsnotify_event *event, | 306 | struct fsnotify_event *event, |
305 | int (*merge)(struct list_head *, | 307 | int (*merge)(struct list_head *, |
306 | struct fsnotify_event *)); | 308 | struct fsnotify_event *)); |
307 | /* Remove passed event from groups notification queue */ | ||
308 | extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event); | ||
309 | /* true if the group notification queue is empty */ | 309 | /* true if the group notification queue is empty */ |
310 | extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); | 310 | extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); |
311 | /* return, but do not dequeue the first event on the notification queue */ | 311 | /* return, but do not dequeue the first event on the notification queue */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..5e8dab5bf9ad 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -6270,6 +6270,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) | |||
6270 | if (cgroup_sk_alloc_disabled) | 6270 | if (cgroup_sk_alloc_disabled) |
6271 | return; | 6271 | return; |
6272 | 6272 | ||
6273 | /* Socket clone path */ | ||
6274 | if (skcd->val) { | ||
6275 | cgroup_get(sock_cgroup_ptr(skcd)); | ||
6276 | return; | ||
6277 | } | ||
6278 | |||
6273 | rcu_read_lock(); | 6279 | rcu_read_lock(); |
6274 | 6280 | ||
6275 | while (true) { | 6281 | while (true) { |
diff --git a/mm/debug.c b/mm/debug.c index 8865bfb41b0b..74c7cae4f683 100644 --- a/mm/debug.c +++ b/mm/debug.c | |||
@@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = { | |||
42 | 42 | ||
43 | void __dump_page(struct page *page, const char *reason) | 43 | void __dump_page(struct page *page, const char *reason) |
44 | { | 44 | { |
45 | int mapcount = PageSlab(page) ? 0 : page_mapcount(page); | ||
46 | |||
45 | pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", | 47 | pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", |
46 | page, page_ref_count(page), page_mapcount(page), | 48 | page, page_ref_count(page), mapcount, |
47 | page->mapping, page->index); | 49 | page->mapping, page_to_pgoff(page)); |
48 | if (PageCompound(page)) | 50 | if (PageCompound(page)) |
49 | pr_cont(" compound_mapcount: %d", compound_mapcount(page)); | 51 | pr_cont(" compound_mapcount: %d", compound_mapcount(page)); |
50 | pr_cont("\n"); | 52 | pr_cont("\n"); |
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 79c52d0061af..728d7790dc2d 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c | |||
@@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) | |||
838 | * value (scan code). | 838 | * value (scan code). |
839 | */ | 839 | */ |
840 | 840 | ||
841 | static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) | 841 | static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, |
842 | struct vm_area_struct **vmap) | ||
842 | { | 843 | { |
843 | struct vm_area_struct *vma; | 844 | struct vm_area_struct *vma; |
844 | unsigned long hstart, hend; | 845 | unsigned long hstart, hend; |
@@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) | |||
846 | if (unlikely(khugepaged_test_exit(mm))) | 847 | if (unlikely(khugepaged_test_exit(mm))) |
847 | return SCAN_ANY_PROCESS; | 848 | return SCAN_ANY_PROCESS; |
848 | 849 | ||
849 | vma = find_vma(mm, address); | 850 | *vmap = vma = find_vma(mm, address); |
850 | if (!vma) | 851 | if (!vma) |
851 | return SCAN_VMA_NULL; | 852 | return SCAN_VMA_NULL; |
852 | 853 | ||
@@ -881,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, | |||
881 | .pmd = pmd, | 882 | .pmd = pmd, |
882 | }; | 883 | }; |
883 | 884 | ||
885 | /* we only decide to swapin, if there is enough young ptes */ | ||
886 | if (referenced < HPAGE_PMD_NR/2) { | ||
887 | trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); | ||
888 | return false; | ||
889 | } | ||
884 | fe.pte = pte_offset_map(pmd, address); | 890 | fe.pte = pte_offset_map(pmd, address); |
885 | for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE; | 891 | for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE; |
886 | fe.pte++, fe.address += PAGE_SIZE) { | 892 | fe.pte++, fe.address += PAGE_SIZE) { |
@@ -888,17 +894,12 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, | |||
888 | if (!is_swap_pte(pteval)) | 894 | if (!is_swap_pte(pteval)) |
889 | continue; | 895 | continue; |
890 | swapped_in++; | 896 | swapped_in++; |
891 | /* we only decide to swapin, if there is enough young ptes */ | ||
892 | if (referenced < HPAGE_PMD_NR/2) { | ||
893 | trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); | ||
894 | return false; | ||
895 | } | ||
896 | ret = do_swap_page(&fe, pteval); | 897 | ret = do_swap_page(&fe, pteval); |
897 | 898 | ||
898 | /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ | 899 | /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ |
899 | if (ret & VM_FAULT_RETRY) { | 900 | if (ret & VM_FAULT_RETRY) { |
900 | down_read(&mm->mmap_sem); | 901 | down_read(&mm->mmap_sem); |
901 | if (hugepage_vma_revalidate(mm, address)) { | 902 | if (hugepage_vma_revalidate(mm, address, &fe.vma)) { |
902 | /* vma is no longer available, don't continue to swapin */ | 903 | /* vma is no longer available, don't continue to swapin */ |
903 | trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); | 904 | trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); |
904 | return false; | 905 | return false; |
@@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, | |||
923 | static void collapse_huge_page(struct mm_struct *mm, | 924 | static void collapse_huge_page(struct mm_struct *mm, |
924 | unsigned long address, | 925 | unsigned long address, |
925 | struct page **hpage, | 926 | struct page **hpage, |
926 | struct vm_area_struct *vma, | ||
927 | int node, int referenced) | 927 | int node, int referenced) |
928 | { | 928 | { |
929 | pmd_t *pmd, _pmd; | 929 | pmd_t *pmd, _pmd; |
@@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
933 | spinlock_t *pmd_ptl, *pte_ptl; | 933 | spinlock_t *pmd_ptl, *pte_ptl; |
934 | int isolated = 0, result = 0; | 934 | int isolated = 0, result = 0; |
935 | struct mem_cgroup *memcg; | 935 | struct mem_cgroup *memcg; |
936 | struct vm_area_struct *vma; | ||
936 | unsigned long mmun_start; /* For mmu_notifiers */ | 937 | unsigned long mmun_start; /* For mmu_notifiers */ |
937 | unsigned long mmun_end; /* For mmu_notifiers */ | 938 | unsigned long mmun_end; /* For mmu_notifiers */ |
938 | gfp_t gfp; | 939 | gfp_t gfp; |
@@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
961 | } | 962 | } |
962 | 963 | ||
963 | down_read(&mm->mmap_sem); | 964 | down_read(&mm->mmap_sem); |
964 | result = hugepage_vma_revalidate(mm, address); | 965 | result = hugepage_vma_revalidate(mm, address, &vma); |
965 | if (result) { | 966 | if (result) { |
966 | mem_cgroup_cancel_charge(new_page, memcg, true); | 967 | mem_cgroup_cancel_charge(new_page, memcg, true); |
967 | up_read(&mm->mmap_sem); | 968 | up_read(&mm->mmap_sem); |
@@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
994 | * handled by the anon_vma lock + PG_lock. | 995 | * handled by the anon_vma lock + PG_lock. |
995 | */ | 996 | */ |
996 | down_write(&mm->mmap_sem); | 997 | down_write(&mm->mmap_sem); |
997 | result = hugepage_vma_revalidate(mm, address); | 998 | result = hugepage_vma_revalidate(mm, address, &vma); |
998 | if (result) | 999 | if (result) |
999 | goto out; | 1000 | goto out; |
1000 | /* check if the pmd is still valid */ | 1001 | /* check if the pmd is still valid */ |
@@ -1202,7 +1203,7 @@ out_unmap: | |||
1202 | if (ret) { | 1203 | if (ret) { |
1203 | node = khugepaged_find_target_node(); | 1204 | node = khugepaged_find_target_node(); |
1204 | /* collapse_huge_page will return with the mmap_sem released */ | 1205 | /* collapse_huge_page will return with the mmap_sem released */ |
1205 | collapse_huge_page(mm, address, hpage, vma, node, referenced); | 1206 | collapse_huge_page(mm, address, hpage, node, referenced); |
1206 | } | 1207 | } |
1207 | out: | 1208 | out: |
1208 | trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, | 1209 | trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9a6a51a7c416..4be518d4e68a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex); | |||
1740 | static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | 1740 | static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1741 | { | 1741 | { |
1742 | struct memcg_stock_pcp *stock; | 1742 | struct memcg_stock_pcp *stock; |
1743 | unsigned long flags; | ||
1743 | bool ret = false; | 1744 | bool ret = false; |
1744 | 1745 | ||
1745 | if (nr_pages > CHARGE_BATCH) | 1746 | if (nr_pages > CHARGE_BATCH) |
1746 | return ret; | 1747 | return ret; |
1747 | 1748 | ||
1748 | stock = &get_cpu_var(memcg_stock); | 1749 | local_irq_save(flags); |
1750 | |||
1751 | stock = this_cpu_ptr(&memcg_stock); | ||
1749 | if (memcg == stock->cached && stock->nr_pages >= nr_pages) { | 1752 | if (memcg == stock->cached && stock->nr_pages >= nr_pages) { |
1750 | stock->nr_pages -= nr_pages; | 1753 | stock->nr_pages -= nr_pages; |
1751 | ret = true; | 1754 | ret = true; |
1752 | } | 1755 | } |
1753 | put_cpu_var(memcg_stock); | 1756 | |
1757 | local_irq_restore(flags); | ||
1758 | |||
1754 | return ret; | 1759 | return ret; |
1755 | } | 1760 | } |
1756 | 1761 | ||
@@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock) | |||
1771 | stock->cached = NULL; | 1776 | stock->cached = NULL; |
1772 | } | 1777 | } |
1773 | 1778 | ||
1774 | /* | ||
1775 | * This must be called under preempt disabled or must be called by | ||
1776 | * a thread which is pinned to local cpu. | ||
1777 | */ | ||
1778 | static void drain_local_stock(struct work_struct *dummy) | 1779 | static void drain_local_stock(struct work_struct *dummy) |
1779 | { | 1780 | { |
1780 | struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock); | 1781 | struct memcg_stock_pcp *stock; |
1782 | unsigned long flags; | ||
1783 | |||
1784 | local_irq_save(flags); | ||
1785 | |||
1786 | stock = this_cpu_ptr(&memcg_stock); | ||
1781 | drain_stock(stock); | 1787 | drain_stock(stock); |
1782 | clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); | 1788 | clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); |
1789 | |||
1790 | local_irq_restore(flags); | ||
1783 | } | 1791 | } |
1784 | 1792 | ||
1785 | /* | 1793 | /* |
@@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy) | |||
1788 | */ | 1796 | */ |
1789 | static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | 1797 | static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1790 | { | 1798 | { |
1791 | struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); | 1799 | struct memcg_stock_pcp *stock; |
1800 | unsigned long flags; | ||
1801 | |||
1802 | local_irq_save(flags); | ||
1792 | 1803 | ||
1804 | stock = this_cpu_ptr(&memcg_stock); | ||
1793 | if (stock->cached != memcg) { /* reset if necessary */ | 1805 | if (stock->cached != memcg) { /* reset if necessary */ |
1794 | drain_stock(stock); | 1806 | drain_stock(stock); |
1795 | stock->cached = memcg; | 1807 | stock->cached = memcg; |
1796 | } | 1808 | } |
1797 | stock->nr_pages += nr_pages; | 1809 | stock->nr_pages += nr_pages; |
1798 | put_cpu_var(memcg_stock); | 1810 | |
1811 | local_irq_restore(flags); | ||
1799 | } | 1812 | } |
1800 | 1813 | ||
1801 | /* | 1814 | /* |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 41266dc29f33..b58906b6215c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1567,7 +1567,9 @@ static struct page *new_node_page(struct page *page, unsigned long private, | |||
1567 | return alloc_huge_page_node(page_hstate(compound_head(page)), | 1567 | return alloc_huge_page_node(page_hstate(compound_head(page)), |
1568 | next_node_in(nid, nmask)); | 1568 | next_node_in(nid, nmask)); |
1569 | 1569 | ||
1570 | node_clear(nid, nmask); | 1570 | if (nid != next_node_in(nid, nmask)) |
1571 | node_clear(nid, nmask); | ||
1572 | |||
1571 | if (PageHighMem(page) | 1573 | if (PageHighMem(page) |
1572 | || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) | 1574 | || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) |
1573 | gfp_mask |= __GFP_HIGHMEM; | 1575 | gfp_mask |= __GFP_HIGHMEM; |
diff --git a/mm/page_io.c b/mm/page_io.c index 16bd82fad38c..eafe5ddc2b54 100644 --- a/mm/page_io.c +++ b/mm/page_io.c | |||
@@ -264,6 +264,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, | |||
264 | int ret; | 264 | int ret; |
265 | struct swap_info_struct *sis = page_swap_info(page); | 265 | struct swap_info_struct *sis = page_swap_info(page); |
266 | 266 | ||
267 | BUG_ON(!PageSwapCache(page)); | ||
267 | if (sis->flags & SWP_FILE) { | 268 | if (sis->flags & SWP_FILE) { |
268 | struct kiocb kiocb; | 269 | struct kiocb kiocb; |
269 | struct file *swap_file = sis->swap_file; | 270 | struct file *swap_file = sis->swap_file; |
@@ -337,6 +338,7 @@ int swap_readpage(struct page *page) | |||
337 | int ret = 0; | 338 | int ret = 0; |
338 | struct swap_info_struct *sis = page_swap_info(page); | 339 | struct swap_info_struct *sis = page_swap_info(page); |
339 | 340 | ||
341 | BUG_ON(!PageSwapCache(page)); | ||
340 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 342 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
341 | VM_BUG_ON_PAGE(PageUptodate(page), page); | 343 | VM_BUG_ON_PAGE(PageUptodate(page), page); |
342 | if (frontswap_load(page) == 0) { | 344 | if (frontswap_load(page) == 0) { |
@@ -386,6 +388,7 @@ int swap_set_page_dirty(struct page *page) | |||
386 | 388 | ||
387 | if (sis->flags & SWP_FILE) { | 389 | if (sis->flags & SWP_FILE) { |
388 | struct address_space *mapping = sis->swap_file->f_mapping; | 390 | struct address_space *mapping = sis->swap_file->f_mapping; |
391 | BUG_ON(!PageSwapCache(page)); | ||
389 | return mapping->a_ops->set_page_dirty(page); | 392 | return mapping->a_ops->set_page_dirty(page); |
390 | } else { | 393 | } else { |
391 | return __set_page_dirty_no_writeback(page); | 394 | return __set_page_dirty_no_writeback(page); |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 78cfa292a29a..2657accc6e2b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry) | |||
2724 | struct swap_info_struct *page_swap_info(struct page *page) | 2724 | struct swap_info_struct *page_swap_info(struct page *page) |
2725 | { | 2725 | { |
2726 | swp_entry_t swap = { .val = page_private(page) }; | 2726 | swp_entry_t swap = { .val = page_private(page) }; |
2727 | BUG_ON(!PageSwapCache(page)); | ||
2728 | return swap_info[swp_type(swap)]; | 2727 | return swap_info[swp_type(swap)]; |
2729 | } | 2728 | } |
2730 | 2729 | ||
diff --git a/net/core/sock.c b/net/core/sock.c index 25dab8b60223..fd7b41edf1ce 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1362,7 +1362,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, | |||
1362 | if (!try_module_get(prot->owner)) | 1362 | if (!try_module_get(prot->owner)) |
1363 | goto out_free_sec; | 1363 | goto out_free_sec; |
1364 | sk_tx_queue_clear(sk); | 1364 | sk_tx_queue_clear(sk); |
1365 | cgroup_sk_alloc(&sk->sk_cgrp_data); | ||
1366 | } | 1365 | } |
1367 | 1366 | ||
1368 | return sk; | 1367 | return sk; |
@@ -1422,6 +1421,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | |||
1422 | sock_net_set(sk, net); | 1421 | sock_net_set(sk, net); |
1423 | atomic_set(&sk->sk_wmem_alloc, 1); | 1422 | atomic_set(&sk->sk_wmem_alloc, 1); |
1424 | 1423 | ||
1424 | cgroup_sk_alloc(&sk->sk_cgrp_data); | ||
1425 | sock_update_classid(&sk->sk_cgrp_data); | 1425 | sock_update_classid(&sk->sk_cgrp_data); |
1426 | sock_update_netprioidx(&sk->sk_cgrp_data); | 1426 | sock_update_netprioidx(&sk->sk_cgrp_data); |
1427 | } | 1427 | } |
@@ -1566,6 +1566,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
1566 | newsk->sk_priority = 0; | 1566 | newsk->sk_priority = 0; |
1567 | newsk->sk_incoming_cpu = raw_smp_processor_id(); | 1567 | newsk->sk_incoming_cpu = raw_smp_processor_id(); |
1568 | atomic64_set(&newsk->sk_cookie, 0); | 1568 | atomic64_set(&newsk->sk_cookie, 0); |
1569 | |||
1570 | cgroup_sk_alloc(&newsk->sk_cgrp_data); | ||
1571 | |||
1569 | /* | 1572 | /* |
1570 | * Before updating sk_refcnt, we must commit prior changes to memory | 1573 | * Before updating sk_refcnt, we must commit prior changes to memory |
1571 | * (Documentation/RCU/rculist_nulls.txt for details) | 1574 | * (Documentation/RCU/rculist_nulls.txt for details) |