19 files changed, 240 insertions, 76 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index ea4d0058fd1b..60b1163dba28 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4716,6 +4716,7 @@ L:	linux-iio@vger.kernel.org
 S:      Maintained
 F:      drivers/iio/
 F:      drivers/staging/iio/
+F:      include/linux/iio/
 IKANOS/ADI EAGLE ADSL USB DRIVER
 M:      Matthieu Castet <castet.matthieu@free.fr>
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 2ad0b5bce44b..3920ee45aa59 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -560,7 +560,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
        }
        if (page_zero_filled(uncmem)) {
-                kunmap_atomic(user_mem);
+                if (user_mem)
+                        kunmap_atomic(user_mem);
                /* Free memory associated with this sector now. */
                bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
                zram_free_page(zram, index);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9d3e9c50066a..89326acd4561 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
                                              &fsnotify_mark_srcu);
        }
+        /*
+         * We need to merge inode & vfsmount mark lists so that inode mark
+         * ignore masks are properly reflected for mount mark notifications.
+         * That's why this traversal is so complicated...
+         */
        while (inode_node || vfsmount_node) {
-                inode_group = vfsmount_group = NULL;
+                inode_group = NULL;
+                inode_mark = NULL;
+                vfsmount_group = NULL;
+                vfsmount_mark = NULL;
                if (inode_node) {
                        inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
                        vfsmount_group = vfsmount_mark->group;
                }
-                if (inode_group > vfsmount_group) {
+                if (inode_group && vfsmount_group) {
-                        /* handle inode */
+                        int cmp = fsnotify_compare_groups(inode_group,
-                        ret = send_to_group(to_tell, inode_mark, NULL, mask,
+                                                          vfsmount_group);
-                                            data, data_is, cookie, file_name);
+                        if (cmp > 0) {
-                        /* we didn't use the vfsmount_mark */
+                                inode_group = NULL;
-                        vfsmount_group = NULL;
+                                inode_mark = NULL;
-                } else if (vfsmount_group > inode_group) {
+                        } else if (cmp < 0) {
-                        ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
+                                vfsmount_group = NULL;
-                                            data, data_is, cookie, file_name);
+                                vfsmount_mark = NULL;
-                        inode_group = NULL;
+                        }
-                } else {
-                        ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
-                                            mask, data, data_is, cookie,
-                                            file_name);
                }
+                ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
+                                    data, data_is, cookie, file_name);
                if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
                        goto out;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 9c0898c4cfe1..3b68b0ae0a97 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
 /* protects reads of inode and vfsmount marks list */
 extern struct srcu_struct fsnotify_mark_srcu;
+/* compare two groups for sorting of marks lists */
+extern int fsnotify_compare_groups(struct fsnotify_group *a,
+                                   struct fsnotify_group *b);
 extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
                                                __u32 mask);
 /* add a mark to an inode */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index e8497144b323..dfbf5447eea4 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 {
        struct fsnotify_mark *lmark, *last = NULL;
        int ret = 0;
+        int cmp;
        mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
                        goto out;
                }
-                if (mark->group->priority < lmark->group->priority)
+                cmp = fsnotify_compare_groups(lmark->group, mark->group);
-                        continue;
+                if (cmp < 0)
-                if ((mark->group->priority == lmark->group->priority) &&
-                    (mark->group < lmark->group))
                        continue;
                hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d90deaa08e78..34c38fabf514 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
 }
 /*
+ * Sorting function for lists of fsnotify marks.
+ *
+ * Fanotify supports different notification classes (reflected as priority of
+ * notification group). Events shall be passed to notification groups in
+ * decreasing priority order. To achieve this marks in notification lists for
+ * inodes and vfsmounts are sorted so that priorities of corresponding groups
+ * are descending.
+ *
+ * Furthermore correct handling of the ignore mask requires processing inode
+ * and vfsmount marks of each group together. Using the group address as
+ * further sort criterion provides a unique sorting order and thus we can
+ * merge inode and vfsmount lists of marks in linear time and find groups
+ * present in both lists.
+ *
+ * A return value of 1 signifies that b has priority over a.
+ * A return value of 0 signifies that the two marks have to be handled together.
+ * A return value of -1 signifies that a has priority over b.
+ */
+int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
+{
+        if (a == b)
+                return 0;
+        if (!a)
+                return 1;
+        if (!b)
+                return -1;
+        if (a->priority < b->priority)
+                return 1;
+        if (a->priority > b->priority)
+                return -1;
+        if (a < b)
+                return 1;
+        return -1;
+}
+/*
 * Attach an initialized mark to a given group and fs object.
 * These marks may be used for the fsnotify backend to determine which
 * event types should be delivered to which group.
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index ac851e8376b1..faefa72a11eb 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
        struct mount *m = real_mount(mnt);
        struct fsnotify_mark *lmark, *last = NULL;
        int ret = 0;
+        int cmp;
        mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
                        goto out;
                }
-                if (mark->group->priority < lmark->group->priority)
+                cmp = fsnotify_compare_groups(lmark->group, mark->group);
-                        continue;
+                if (cmp < 0)
-                if ((mark->group->priority == lmark->group->priority) &&
-                    (mark->group < lmark->group))
                        continue;
                hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 4e2bd4c95b66..0995c2de8162 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
 extern unsigned long free_all_bootmem(void);
+extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 extern void free_bootmem_node(pg_data_t *pgdat,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 48bf12ef6620..ffe66e381c04 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -431,6 +431,15 @@ struct zone {
         */
        int                     nr_migrate_reserve_block;
+#ifdef CONFIG_MEMORY_ISOLATION
+        /*
+         * Number of isolated pageblock. It is used to solve incorrect
+         * freepage counting problem due to racy retrieving migratetype
+         * of pageblock. Protected by zone->lock.
+         */
+        unsigned long           nr_isolate_pageblock;
+#endif
 #ifdef CONFIG_MEMORY_HOTPLUG
        /* see spanned/present_pages for more description */
        seqlock_t               span_seqlock;
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 3fff8e774067..2dc1e1697b45 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -2,6 +2,10 @@
 #define __LINUX_PAGEISOLATION_H
 #ifdef CONFIG_MEMORY_ISOLATION
+static inline bool has_isolate_pageblock(struct zone *zone)
+{
+        return zone->nr_isolate_pageblock;
+}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
        return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype)
        return migratetype == MIGRATE_ISOLATE;
 }
 #else
+static inline bool has_isolate_pageblock(struct zone *zone)
+{
+        return false;
+}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
        return false;
diff --git a/kernel/panic.c b/kernel/panic.c
index d09dc5c32c67..cf80672b7924 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -244,6 +244,7 @@ static const struct tnt tnts[] = {
 *  'I' - Working around severe firmware bug.
 *  'O' - Out-of-tree module has been loaded.
 *  'E' - Unsigned module has been loaded.
+ *  'L' - A soft lockup has previously occurred.
 *
 *      The string is overwritten by the next call to print_tainted().
 */
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 8a000cebb0d7..477be696511d 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 static int reset_managed_pages_done __initdata;
-static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
+void reset_node_managed_pages(pg_data_t *pgdat)
 {
        struct zone *z;
-        if (reset_managed_pages_done)
-                return;
        for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
                z->managed_pages = 0;
 }
@@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void)
 {
        struct pglist_data *pgdat;
+        if (reset_managed_pages_done)
+                return;
        for_each_online_pgdat(pgdat)
                reset_node_managed_pages(pgdat);
        reset_managed_pages_done = 1;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index ec74cf0123ef..f9792ba3537c 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -479,6 +479,16 @@ isolate_freepages_range(struct compact_control *cc,
                block_end_pfn = min(block_end_pfn, end_pfn);
+                /*
+                 * pfn could pass the block_end_pfn if isolated freepage
+                 * is more than pageblock order. In this case, we adjust
+                 * scanning range to right one.
+                 */
+                if (pfn >= block_end_pfn) {
+                        block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+                        block_end_pfn = min(block_end_pfn, end_pfn);
+                }
                if (!pageblock_pfn_to_page(pfn, block_end_pfn, cc->zone))
                        break;
@@ -1029,8 +1039,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
        }
        acct_isolated(zone, cc);
-        /* Record where migration scanner will be restarted */
+        /*
-        cc->migrate_pfn = low_pfn;
+         * Record where migration scanner will be restarted. If we end up in
+         * the same pageblock as the free scanner, make the scanners fully
+         * meet so that compact_finished() terminates compaction.
+         */
+        cc->migrate_pfn = (end_pfn <= cc->free_pfn) ? low_pfn : cc->free_pfn;
        return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
 }
diff --git a/mm/internal.h b/mm/internal.h
index 829304090b90..a4f90ba7068e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -108,6 +108,31 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
 /*
 * in mm/page_alloc.c
 */
+/*
+ * Locate the struct page for both the matching buddy in our
+ * pair (buddy1) and the combined O(n+1) page they form (page).
+ *
+ * 1) Any buddy B1 will have an order O twin B2 which satisfies
+ * the following equation:
+ *     B2 = B1 ^ (1 << O)
+ * For example, if the starting buddy (buddy2) is #8 its order
+ * 1 buddy is #10:
+ *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
+ *
+ * 2) Any buddy B will have an order O+1 parent P which
+ * satisfies the following equation:
+ *     P = B & ~(1 << O)
+ *
+ * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
+ */
+static inline unsigned long
+__find_buddy_index(unsigned long page_idx, unsigned int order)
+{
+        return page_idx ^ (1 << order);
+}
+extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned long order);
 #ifdef CONFIG_MEMORY_FAILURE
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 252e1dbbed86..1bf4807cb21e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -31,6 +31,7 @@
 #include <linux/stop_machine.h>
 #include <linux/hugetlb.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 #include <asm/tlbflush.h>
@@ -1066,6 +1067,16 @@ out:
 }
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
+static void reset_node_present_pages(pg_data_t *pgdat)
+{
+        struct zone *z;
+        for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
+                z->present_pages = 0;
+        pgdat->node_present_pages = 0;
+}
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
 static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 {
@@ -1096,6 +1107,21 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
        build_all_zonelists(pgdat, NULL);
        mutex_unlock(&zonelists_mutex);
+        /*
+         * zone->managed_pages is set to an approximate value in
+         * free_area_init_core(), which will cause
+         * /sys/device/system/node/nodeX/meminfo has wrong data.
+         * So reset it to 0 before any memory is onlined.
+         */
+        reset_node_managed_pages(pgdat);
+        /*
+         * When memory is hot-added, all the memory is in offline state. So
+         * clear all zones' present_pages because they will be updated in
+         * online_pages() and offline_pages().
+         */
+        reset_node_present_pages(pgdat);
        return pgdat;
 }
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7c7ab32ee503..90b50468333e 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void)
 static int reset_managed_pages_done __initdata;
-static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
+void reset_node_managed_pages(pg_data_t *pgdat)
 {
        struct zone *z;
-        if (reset_managed_pages_done)
-                return;
        for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
                z->managed_pages = 0;
 }
@@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void)
 {
        struct pglist_data *pgdat;
+        if (reset_managed_pages_done)
+                return;
        for_each_online_pgdat(pgdat)
                reset_node_managed_pages(pgdat);
        reset_managed_pages_done = 1;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9cd36b822444..616a2c956b4b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -467,29 +467,6 @@ static inline void rmv_page_order(struct page *page)
 }
 /*
- * Locate the struct page for both the matching buddy in our
- * pair (buddy1) and the combined O(n+1) page they form (page).
- *
- * 1) Any buddy B1 will have an order O twin B2 which satisfies
- * the following equation:
- *     B2 = B1 ^ (1 << O)
- * For example, if the starting buddy (buddy2) is #8 its order
- * 1 buddy is #10:
- *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
- *
- * 2) Any buddy B will have an order O+1 parent P which
- * satisfies the following equation:
- *     P = B & ~(1 << O)
- *
- * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
- */
-static inline unsigned long
-__find_buddy_index(unsigned long page_idx, unsigned int order)
-{
-        return page_idx ^ (1 << order);
-}
-/*
 * This function checks whether a page is free && is the buddy
 * we can do coalesce a page and its buddy if
 * (a) the buddy is not in a hole &&
@@ -569,6 +546,7 @@ static inline void __free_one_page(struct page *page,
        unsigned long combined_idx;
        unsigned long uninitialized_var(buddy_idx);
        struct page *buddy;
+        int max_order = MAX_ORDER;
        VM_BUG_ON(!zone_is_initialized(zone));
@@ -577,13 +555,24 @@ static inline void __free_one_page(struct page *page,
                        return;
        VM_BUG_ON(migratetype == -1);
+        if (is_migrate_isolate(migratetype)) {
+                /*
+                 * We restrict max order of merging to prevent merge
+                 * between freepages on isolate pageblock and normal
+                 * pageblock. Without this, pageblock isolation
+                 * could cause incorrect freepage accounting.
+                 */
+                max_order = min(MAX_ORDER, pageblock_order + 1);
+        } else {
+                __mod_zone_freepage_state(zone, 1 << order, migratetype);
+        }
-        page_idx = pfn & ((1 << MAX_ORDER) - 1);
+        page_idx = pfn & ((1 << max_order) - 1);
        VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
        VM_BUG_ON_PAGE(bad_range(zone, page), page);
-        while (order < MAX_ORDER-1) {
+        while (order < max_order - 1) {
                buddy_idx = __find_buddy_index(page_idx, order);
                buddy = page + (buddy_idx - page_idx);
                if (!page_is_buddy(page, buddy, order))
@@ -594,9 +583,11 @@ static inline void __free_one_page(struct page *page,
                 */
                if (page_is_guard(buddy)) {
                        clear_page_guard_flag(buddy);
-                        set_page_private(page, 0);
+                        set_page_private(buddy, 0);
-                        __mod_zone_freepage_state(zone, 1 << order,
+                        if (!is_migrate_isolate(migratetype)) {
-                                                  migratetype);
+                                __mod_zone_freepage_state(zone, 1 << order,
+                                                          migratetype);
+                        }
                } else {
                        list_del(&buddy->lru);
                        zone->free_area[order].nr_free--;
@@ -715,14 +706,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                        /* must delete as __free_one_page list manipulates */
                        list_del(&page->lru);
                        mt = get_freepage_migratetype(page);
+                        if (unlikely(has_isolate_pageblock(zone)))
+                                mt = get_pageblock_migratetype(page);
                        /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
                        __free_one_page(page, page_to_pfn(page), zone, 0, mt);
                        trace_mm_page_pcpu_drain(page, 0, mt);
-                        if (likely(!is_migrate_isolate_page(page))) {
-                                __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
-                                if (is_migrate_cma(mt))
-                                        __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
-                        }
                } while (--to_free && --batch_free && !list_empty(list));
        }
        spin_unlock(&zone->lock);
@@ -739,9 +728,11 @@ static void free_one_page(struct zone *zone,
        if (nr_scanned)
                __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
+        if (unlikely(has_isolate_pageblock(zone) ||
+                is_migrate_isolate(migratetype))) {
+                migratetype = get_pfnblock_migratetype(page, pfn);
+        }
        __free_one_page(page, pfn, zone, order, migratetype);
-        if (unlikely(!is_migrate_isolate(migratetype)))
-                __mod_zone_freepage_state(zone, 1 << order, migratetype);
        spin_unlock(&zone->lock);
 }
@@ -1484,7 +1475,7 @@ void split_page(struct page *page, unsigned int order)
 }
 EXPORT_SYMBOL_GPL(split_page);
-static int __isolate_free_page(struct page *page, unsigned int order)
+int __isolate_free_page(struct page *page, unsigned int order)
 {
        unsigned long watermark;
        struct zone *zone;
@@ -6408,13 +6399,12 @@ int alloc_contig_range(unsigned long start, unsigned long end,
        /* Make sure the range is really isolated. */
        if (test_pages_isolated(outer_start, end, false)) {
-                pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
+                pr_info("%s: [%lx, %lx) PFNs busy\n",
-                       outer_start, end);
+                        __func__, outer_start, end);
                ret = -EBUSY;
                goto done;
        }
        /* Grab isolated pages from freelists. */
        outer_end = isolate_freepages_range(&cc, outer_start, end);
        if (!outer_end) {
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index d1473b2e9481..c8778f7e208e 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -60,6 +60,7 @@ out:
                int migratetype = get_pageblock_migratetype(page);
                set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+                zone->nr_isolate_pageblock++;
                nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
                __mod_zone_freepage_state(zone, -nr_pages, migratetype);
@@ -75,16 +76,54 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 {
        struct zone *zone;
        unsigned long flags, nr_pages;
+        struct page *isolated_page = NULL;
+        unsigned int order;
+        unsigned long page_idx, buddy_idx;
+        struct page *buddy;
        zone = page_zone(page);
        spin_lock_irqsave(&zone->lock, flags);
        if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
                goto out;
-        nr_pages = move_freepages_block(zone, page, migratetype);
-        __mod_zone_freepage_state(zone, nr_pages, migratetype);
+        /*
+         * Because freepage with more than pageblock_order on isolated
+         * pageblock is restricted to merge due to freepage counting problem,
+         * it is possible that there is free buddy page.
+         * move_freepages_block() doesn't care of merge so we need other
+         * approach in order to merge them. Isolation and free will make
+         * these pages to be merged.
+         */
+        if (PageBuddy(page)) {
+                order = page_order(page);
+                if (order >= pageblock_order) {
+                        page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
+                        buddy_idx = __find_buddy_index(page_idx, order);
+                        buddy = page + (buddy_idx - page_idx);
+                        if (!is_migrate_isolate_page(buddy)) {
+                                __isolate_free_page(page, order);
+                                set_page_refcounted(page);
+                                isolated_page = page;
+                        }
+                }
+        }
+        /*
+         * If we isolate freepage with more than pageblock_order, there
+         * should be no freepage in the range, so we could avoid costly
+         * pageblock scanning for freepage moving.
+         */
+        if (!isolated_page) {
+                nr_pages = move_freepages_block(zone, page, migratetype);
+                __mod_zone_freepage_state(zone, nr_pages, migratetype);
+        }
        set_pageblock_migratetype(page, migratetype);
+        zone->nr_isolate_pageblock--;
 out:
        spin_unlock_irqrestore(&zone->lock, flags);
+        if (isolated_page)
+                __free_pages(isolated_page, order);
 }
 static inline struct page *
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 406944207b61..dcdab81bd240 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -259,6 +259,10 @@ struct kmem_cache *find_mergeable(size_t size, size_t align,
                if (s->size - size >= sizeof(void *))
                        continue;
+                if (IS_ENABLED(CONFIG_SLAB) && align &&
+                        (align > s->align || s->align % align))
+                        continue;
                return s;
        }
        return NULL;