Merge branch 'akpm' (incoming from Andrew)

Merge second patch-bomb from Andrew Morton: - various misc bits - the rest of MM - add generic fixmap.h, use it - backlight updates - dynamic_debug updates - printk() updates - checkpatch updates - binfmt_elf - ramfs - init/ - autofs4 - drivers/rtc - nilfs - hfsplus - Documentation/ - coredump - procfs - fork - exec - kexec - kdump - partitions - rapidio - rbtree - userns - memstick - w1 - decompressors * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (197 commits) lib/decompress_unlz4.c: always set an error return code on failures romfs: fix returm err while getting inode in fill_super drivers/w1/masters/w1-gpio.c: add strong pullup emulation drivers/memstick/host/rtsx_pci_ms.c: fix ms card data transfer bug userns: relax the posix_acl_valid() checks arch/sh/kernel/dwarf.c: use rbtree postorder iteration helper instead of solution using repeated rb_erase() fs-ext3-use-rbtree-postorder-iteration-helper-instead-of-opencoding-fix fs/ext3: use rbtree postorder iteration helper instead of opencoding fs/jffs2: use rbtree postorder iteration helper instead of opencoding fs/ext4: use rbtree postorder iteration helper instead of opencoding fs/ubifs: use rbtree postorder iteration helper instead of opencoding net/netfilter/ipset/ip_set_hash_netiface.c: use rbtree postorder iteration instead of opencoding rbtree/test: test rbtree_postorder_for_each_entry_safe() rbtree/test: move rb_node to the middle of the test struct rapidio: add modular rapidio core build into powerpc and mips branches partitions/efi: complete documentation of gpt kernel param purpose kdump: add /sys/kernel/vmcoreinfo ABI documentation kdump: fix exported size of vmcoreinfo note kexec: add sysctl to disable kexec_load fs/exec.c: call arch_pick_mmap_layout() only once ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-01-23 22:11:50 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-01-23 22:11:50 -0500
commit: 3aacd625f20129f5a41ea3ff3b5353b0e4dabd01 (patch)
tree: 7cf4ea65397f80098b30494df31cfc8f5fa26d63 /mm/mlock.c
parent: 7e21774db5cc9cf8fe93a64a2f0c6cf47db8ab24 (diff)
parent: 2a1d689c9ba42a6066540fb221b6ecbd6298b728 (diff)
1 files changed, 62 insertions, 46 deletions
diff --git a/mm/mlock.c b/mm/mlock.c
index 10819ed4df3e..4e1a68162285 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -91,6 +91,26 @@ void mlock_vma_page(struct page *page)
 }
 /*
+ * Isolate a page from LRU with optional get_page() pin.
+ * Assumes lru_lock already held and page already pinned.
+ */
+static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
+{
+        if (PageLRU(page)) {
+                struct lruvec *lruvec;
+                lruvec = mem_cgroup_page_lruvec(page, page_zone(page));
+                if (getpage)
+                        get_page(page);
+                ClearPageLRU(page);
+                del_page_from_lru_list(page, lruvec, page_lru(page));
+                return true;
+        }
+        return false;
+}
+/*
 * Finish munlock after successful page isolation
 *
 * Page must be locked. This is a wrapper for try_to_munlock()
@@ -126,9 +146,9 @@ static void __munlock_isolated_page(struct page *page)
 static void __munlock_isolation_failed(struct page *page)
 {
        if (PageUnevictable(page))
-                count_vm_event(UNEVICTABLE_PGSTRANDED);
+                __count_vm_event(UNEVICTABLE_PGSTRANDED);
        else
-                count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+                __count_vm_event(UNEVICTABLE_PGMUNLOCKED);
 }
 /**
@@ -152,28 +172,34 @@ static void __munlock_isolation_failed(struct page *page)
 unsigned int munlock_vma_page(struct page *page)
 {
        unsigned int nr_pages;
+        struct zone *zone = page_zone(page);
        BUG_ON(!PageLocked(page));
-        if (TestClearPageMlocked(page)) {
-                nr_pages = hpage_nr_pages(page);
-                mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
-                if (!isolate_lru_page(page))
-                        __munlock_isolated_page(page);
-                else
-                        __munlock_isolation_failed(page);
-        } else {
-                nr_pages = hpage_nr_pages(page);
-        }
        /*
-         * Regardless of the original PageMlocked flag, we determine nr_pages
+         * Serialize with any parallel __split_huge_page_refcount() which
-         * after touching the flag. This leaves a possible race with a THP page
+         * might otherwise copy PageMlocked to part of the tail pages before
-         * split, such that a whole THP page was munlocked, but nr_pages == 1.
+         * we clear it in the head page. It also stabilizes hpage_nr_pages().
-         * Returning a smaller mask due to that is OK, the worst that can
-         * happen is subsequent useless scanning of the former tail pages.
-         * The NR_MLOCK accounting can however become broken.
         */
+        spin_lock_irq(&zone->lru_lock);
+        nr_pages = hpage_nr_pages(page);
+        if (!TestClearPageMlocked(page))
+                goto unlock_out;
+        __mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
+        if (__munlock_isolate_lru_page(page, true)) {
+                spin_unlock_irq(&zone->lru_lock);
+                __munlock_isolated_page(page);
+                goto out;
+        }
+        __munlock_isolation_failed(page);
+unlock_out:
+        spin_unlock_irq(&zone->lru_lock);
+out:
        return nr_pages - 1;
 }
@@ -253,8 +279,8 @@ static int __mlock_posix_error_return(long retval)
 static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
                int *pgrescued)
 {
-        VM_BUG_ON(PageLRU(page));
+        VM_BUG_ON_PAGE(PageLRU(page), page);
-        VM_BUG_ON(!PageLocked(page));
+        VM_BUG_ON_PAGE(!PageLocked(page), page);
        if (page_mapcount(page) <= 1 && page_evictable(page)) {
                pagevec_add(pvec, page);
@@ -310,34 +336,24 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
                struct page *page = pvec->pages[i];
                if (TestClearPageMlocked(page)) {
-                        struct lruvec *lruvec;
-                        int lru;
-                        if (PageLRU(page)) {
-                                lruvec = mem_cgroup_page_lruvec(page, zone);
-                                lru = page_lru(page);
-                                /*
-                                 * We already have pin from follow_page_mask()
-                                 * so we can spare the get_page() here.
-                                 */
-                                ClearPageLRU(page);
-                                del_page_from_lru_list(page, lruvec, lru);
-                        } else {
-                                __munlock_isolation_failed(page);
-                                goto skip_munlock;
-                        }
-                } else {
-skip_munlock:
                        /*
-                         * We won't be munlocking this page in the next phase
+                         * We already have pin from follow_page_mask()
-                         * but we still need to release the follow_page_mask()
+                         * so we can spare the get_page() here.
-                         * pin. We cannot do it under lru_lock however. If it's
-                         * the last pin, __page_cache_release would deadlock.
                         */
-                        pagevec_add(&pvec_putback, pvec->pages[i]);
+                        if (__munlock_isolate_lru_page(page, false))
-                        pvec->pages[i] = NULL;
+                                continue;
+                        else
+                                __munlock_isolation_failed(page);
                }
+                /*
+                 * We won't be munlocking this page in the next phase
+                 * but we still need to release the follow_page_mask()
+                 * pin. We cannot do it under lru_lock however. If it's
+                 * the last pin, __page_cache_release() would deadlock.
+                 */
+                pagevec_add(&pvec_putback, pvec->pages[i]);
+                pvec->pages[i] = NULL;
        }
        delta_munlocked = -nr + pagevec_count(&pvec_putback);
        __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-01-23 22:11:50 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-01-23 22:11:50 -0500
commit	3aacd625f20129f5a41ea3ff3b5353b0e4dabd01 (patch)
tree	7cf4ea65397f80098b30494df31cfc8f5fa26d63 /mm/mlock.c
parent	7e21774db5cc9cf8fe93a64a2f0c6cf47db8ab24 (diff)
parent	2a1d689c9ba42a6066540fb221b6ecbd6298b728 (diff)