From 1735527cd6d215863dae7d4df8c7267d4337bb5d Mon Sep 17 00:00:00 2001 From: Namhoon Kim Date: Fri, 30 Sep 2016 02:06:05 -0400 Subject: seems to work --- include/linux/rmap.h | 3 +- include/litmus/replicate_lib.h | 4 +- litmus/litmus.c | 24 +++--- mm/migrate.c | 55 +++++++++----- mm/page_alloc.c | 2 +- mm/rmap.c | 166 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 218 insertions(+), 36 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c89c53a113a8..7c90e029c7c6 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -188,7 +188,8 @@ int page_referenced(struct page *, int is_locked, #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) int try_to_unmap(struct page *, enum ttu_flags flags); - +int try_to_unmap_one_only(struct page *page, struct vm_area_struct *vma, + unsigned long address, void *arg); /* * Used by uprobes to replace a userspace page safely */ diff --git a/include/litmus/replicate_lib.h b/include/litmus/replicate_lib.h index 98bfb9707144..16db7d81b66b 100644 --- a/include/litmus/replicate_lib.h +++ b/include/litmus/replicate_lib.h @@ -8,8 +8,8 @@ struct shared_lib_page { struct page *master_page; struct page *r_page; - unsigned long master_pfn; - unsigned long r_pfn; + unsigned long int master_pfn; + unsigned long int r_pfn; struct list_head list; }; diff --git a/litmus/litmus.c b/litmus/litmus.c index d31138c9b9a6..6088de312bb5 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -407,10 +407,12 @@ asmlinkage long sys_set_page_color(int cpu) continue; } - TRACE_TASK(current, "addr: %08x, pfn: %ld, _mapcount: %d, _count: %d flags: %s%s%s\n", vma_itr->vm_start + PAGE_SIZE*i, page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page), vma_itr->vm_flags&VM_READ?"r":"-", vma_itr->vm_flags&VM_WRITE?"w":"-", vma_itr->vm_flags&VM_EXEC?"x":"-"); + TRACE_TASK(current, "addr: %08x, pfn: %05lx, _mapcount: %d, _count: %d flags: %s%s%s\n", vma_itr->vm_start + PAGE_SIZE*i, page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page), vma_itr->vm_flags&VM_READ?"r":"-", vma_itr->vm_flags&VM_WRITE?"w":"-", vma_itr->vm_flags&VM_EXEC?"x":"-"); pages_in_vma++; - + +// for simple debug if (page_count(old_page) > 2 && vma_itr->vm_file != NULL && !(vma_itr->vm_flags&VM_WRITE)) { + //if (page_count(old_page) < 10 && page_count(old_page) > 3 && vma_itr->vm_file != NULL && !(vma_itr->vm_flags&VM_WRITE)) { struct shared_lib_page *lib_page; int is_exist = 0; @@ -433,10 +435,10 @@ asmlinkage long sys_set_page_color(int cpu) lib_page->master_pfn = page_to_pfn(old_page); lib_page->r_pfn = INVALID_PFN; list_add_tail(&lib_page->list, &shared_lib_pages); - TRACE_TASK(current, "NEW PAGE %ld ADDED.\n", lib_page->master_pfn); + TRACE_TASK(current, "NEW PAGE %05lx ADDED.\n", lib_page->master_pfn); } else { - TRACE_TASK(current, "FOUND PAGE %ld in the list.\n", lib_page->master_pfn); + TRACE_TASK(current, "FOUND PAGE %05lx in the list.\n", lib_page->master_pfn); } /* add to task_shared_pagelist */ @@ -445,7 +447,7 @@ asmlinkage long sys_set_page_color(int cpu) list_add_tail(&old_page->lru, &task_shared_pagelist); inc_zone_page_state(old_page, NR_ISOLATED_ANON + !PageSwapBacked(old_page)); nr_shared_pages++; - TRACE_TASK(current, "SHARED isolate_lur_page success\n"); + TRACE_TASK(current, "SHARED isolate_lru_page success\n"); } else { TRACE_TASK(current, "SHARED isolate_lru_page failed\n"); } @@ -459,7 +461,6 @@ asmlinkage long sys_set_page_color(int cpu) nr_pages++; } else { TRACE_TASK(current, "isolate_lru_page failed\n"); - TRACE_TASK(current, "page_lru = %d PageLRU = %d\n", page_lru(old_page), PageLRU(old_page)); nr_failed++; } //printk(KERN_INFO "PRIVATE _mapcount = %d, _count = %d\n", page_mapcount(old_page), page_count(old_page)); @@ -546,7 +547,7 @@ asmlinkage long sys_set_page_color(int cpu) rcu_read_lock(); list_for_each_entry(lpage, &shared_lib_pages, list) { - TRACE_TASK(current, "master_PFN = %ld r_PFN = %ld PageSwapCache=%d\n", lpage->master_pfn, lpage->r_pfn, PageSwapCache(lpage->master_page)); + TRACE_TASK(current, "master_PFN = %05lx r_PFN = %05lx PageSwapCache=%d\n", lpage->master_pfn, lpage->r_pfn, PageSwapCache(lpage->master_page)); } rcu_read_unlock(); } @@ -577,7 +578,7 @@ asmlinkage long sys_set_page_color(int cpu) continue; } - TRACE_TASK(current, "addr: %08x, pfn: %ld, _mapcount: %d, _count: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page)); + TRACE_TASK(current, "addr: %08x, pfn: %05lx, _mapcount: %d, _count: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page)); put_page(old_page); } @@ -593,7 +594,6 @@ asmlinkage long sys_set_page_color(int cpu) asmlinkage long sys_test_call(unsigned int param) { long ret = 0; - unsigned long flags; struct vm_area_struct *vma_itr = NULL; TRACE_CUR("test_call param = %d\n", param); @@ -604,7 +604,7 @@ asmlinkage long sys_test_call(unsigned int param) while (vma_itr != NULL) { int i, num_pages; struct page* old_page; - TRACE_TASK(current, "--------------------------------------------\n"); + TRACE_TASK(current, "------------------------------------------------------\n"); TRACE_TASK(current, "vm_start : %lx\n", vma_itr->vm_start); TRACE_TASK(current, "vm_end : %lx\n", vma_itr->vm_end); TRACE_TASK(current, "vm_flags : %lx\n", vma_itr->vm_flags); @@ -635,12 +635,12 @@ asmlinkage long sys_test_call(unsigned int param) continue; } - TRACE_TASK(current, "addr: %08x, pfn: %ld, _mapcount: %d, _count: %d flags: %s%s%s\n", vma_itr->vm_start + PAGE_SIZE*i, page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page), vma_itr->vm_flags&VM_READ?"r":"-", vma_itr->vm_flags&VM_WRITE?"w":"-", vma_itr->vm_flags&VM_EXEC?"x":"-"); + TRACE_TASK(current, "addr: %08x, pfn: %05lx, _mapcount: %d, _count: %d flags: %s%s%s\n", vma_itr->vm_start + PAGE_SIZE*i, page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page), vma_itr->vm_flags&VM_READ?"r":"-", vma_itr->vm_flags&VM_WRITE?"w":"-", vma_itr->vm_flags&VM_EXEC?"x":"-"); put_page(old_page); } vma_itr = vma_itr->vm_next; } - printk(KERN_INFO "--------------------------------------------\n"); + TRACE_TASK(current, "------------------------------------------------------\n"); up_read(¤t->mm->mmap_sem); } else if (param == 1) { diff --git a/mm/migrate.c b/mm/migrate.c index d135547b3a3f..dbb46068a3f3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -406,6 +406,7 @@ int replicate_page_move_mapping(struct address_space *mapping, int extra_count) { int expected_count = 1 + extra_count; + int prev_count = page_count(page); void **pslot; BUG_ON(!mapping); @@ -417,7 +418,7 @@ int replicate_page_move_mapping(struct address_space *mapping, expected_count += 1 + page_has_private(page); TRACE_TASK(current, "page_count(page) = %d, expected_count = %d, page_has_private? %d\n", page_count(page), expected_count, page_has_private(page)); - +/* if (page_count(page) != expected_count || radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { spin_unlock_irq(&mapping->tree_lock); @@ -425,12 +426,12 @@ int replicate_page_move_mapping(struct address_space *mapping, return -EAGAIN; } - if (!page_freeze_refs(page, expected_count)) { + if (!page_freeze_refs(page, expected_count)) { // if page_count(page) == expected_count, then set page_count = 0 spin_unlock_irq(&mapping->tree_lock); TRACE_TASK(current, "2\n"); return -EAGAIN; } - +*/ /* * In the async migration case of moving a page with buffers, lock the * buffers using trylock before the mapping is moved. If the mapping @@ -455,7 +456,7 @@ int replicate_page_move_mapping(struct address_space *mapping, set_page_private(newpage, page_private(page)); } - radix_tree_replace_slot(pslot, newpage); + //radix_tree_replace_slot(pslot, newpage); //radix_tree_replace_slot(pslot, page); /* @@ -463,7 +464,8 @@ int replicate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_unfreeze_refs(page, expected_count - 1); + //page_unfreeze_refs(page, expected_count - 1); + page_unfreeze_refs(page, prev_count); /* * If moved to a different zone then also account @@ -738,7 +740,7 @@ int replicate_page(struct address_space *mapping, int rc, extra_count = 0; BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - //extra_count = page_count(page) - 2; + rc = replicate_page_move_mapping(mapping, newpage, page, NULL, mode, extra_count); TRACE_TASK(current, "replicate_page_move_mapping returned %d\n", rc); if (rc != MIGRATEPAGE_SUCCESS) @@ -1147,6 +1149,7 @@ static int __unmap_and_copy(struct page *page, struct page *newpage, int force, enum migrate_mode mode, int has_replica) { int rc = -EAGAIN; + int ttu_ret = SWAP_AGAIN; int page_was_mapped = 0; struct anon_vma *anon_vma = NULL; @@ -1270,17 +1273,27 @@ static int __unmap_and_copy(struct page *page, struct page *newpage, /* Establish migration ptes or remove ptes */ if (page_mapped(page)) { - try_to_unmap(page, - TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + // ttu_ret = try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + struct rmap_walk_control rwc = { + .rmap_one = try_to_unmap_one_only, + .arg = (void *)(TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS), + }; + + ttu_ret = rmap_walk(page, &rwc); + page_was_mapped = 1; TRACE_TASK(current, "Page %d unmapped from all PTEs\n", page_to_pfn(page)); } skip_unmap: - if (!page_mapped(page)) { + //if (!page_mapped(page)) { + if (ttu_ret == SWAP_SUCCESS) { TRACE_TASK(current, "Call copy_to_new_page\n"); rc = copy_to_new_page(newpage, page, page_was_mapped, mode, has_replica); - } + } else if (ttu_ret == SWAP_AGAIN) + printk(KERN_ERR "rmap_walk returned SWAP_AGAIN\n"); + else + printk(KERN_ERR "rmap_walk failed\n"); if (rc && page_was_mapped) remove_migration_ptes(page, page); @@ -1399,20 +1412,22 @@ static ICE_noinline int unmap_and_copy(new_page_t get_new_page, rcu_read_unlock(); if (is_exist_in_psl) - TRACE_TASK(current, "Page %ld exists in PSL list\n", lib_page->master_pfn); + TRACE_TASK(current, "Page %x exists in PSL list\n", lib_page->master_pfn); if (lib_page->r_page == NULL) { newpage = get_new_page(page, private, &result); if (!newpage) return -ENOMEM; + printk(KERN_ERR "Page %lx allocated\n", page_to_pfn(newpage)); } else { newpage = lib_page->r_page; has_replica = 1; + printk(KERN_ERR "Page %lx found\n", page_to_pfn(newpage)); } if (page_count(page) == 1) { /* page was freed from under us. So we are done. */ - TRACE_TASK(current, "page %ld _count == 1\n", page_to_pfn(page)); + TRACE_TASK(current, "page %x _count == 1\n", page_to_pfn(page)); goto out; } @@ -1422,25 +1437,25 @@ static ICE_noinline int unmap_and_copy(new_page_t get_new_page, rc = __unmap_and_copy(page, newpage, force, mode, has_replica); - if (has_replica == 0) { + if (has_replica == 0 && rc == MIGRATEPAGE_SUCCESS) { lib_page->r_page = newpage; lib_page->r_pfn = page_to_pfn(newpage); } out: -TRACE_TASK(current, "__unmap_and_copy returned %d\n", rc); -// if (rc != -EAGAIN) { +TRACE_TASK(current, "__unmap_and_copy returned %s\n", rc==MIGRATEPAGE_SUCCESS?"SUCCESS":"FAIL"); + if (rc != -EAGAIN) { /* * A page that has been migrated has all references * removed and will be freed. A page that has not been * migrated will have kepts its references and be * restored. */ -// list_del(&page->lru); -// dec_zone_page_state(page, NR_ISOLATED_ANON + -// page_is_file_cache(page)); -// putback_lru_page(page); -// } + list_del(&page->lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + putback_lru_page(page); + } //TRACE_TASK(current, "old page freed\n"); /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 950c002bbb45..3ffde2a09765 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -662,7 +662,7 @@ static inline int free_pages_check(struct page *page) if (unlikely(page_mapcount(page))) bad_reason = "nonzero mapcount"; if (unlikely(page->mapping != NULL)) - bad_reason = "non-NULL mapping"; + bad_reason = "non-NULL mapping free_check"; if (unlikely(atomic_read(&page->_count) != 0)) bad_reason = "nonzero _count"; if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) { diff --git a/mm/rmap.c b/mm/rmap.c index 24dd3f9fee27..86678671506b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1317,6 +1317,172 @@ out_mlock: return ret; } +/* + * @arg: enum ttu_flags will be passed to this argument + */ +static int try_to_unmap_one_entry(struct page *page, struct vm_area_struct *vma, + unsigned long address, void *arg) +{ + struct mm_struct *mm = vma->vm_mm; + pte_t *pte; + pte_t pteval; + spinlock_t *ptl; + int ret = SWAP_AGAIN; + enum ttu_flags flags = (enum ttu_flags)arg; + + pte = page_check_address(page, mm, address, &ptl, 0); + if (!pte) + goto out; + + /* + * If the page is mlock()d, we cannot swap it out. + * If it's recently referenced (perhaps page_referenced + * skipped over this mm) then we should reactivate it. + */ + if (!(flags & TTU_IGNORE_MLOCK)) { + if (vma->vm_flags & VM_LOCKED) + goto out_mlock; + + if (flags & TTU_MUNLOCK) + goto out_unmap; + } + if (!(flags & TTU_IGNORE_ACCESS)) { + if (ptep_clear_flush_young_notify(vma, address, pte)) { + ret = SWAP_FAIL; + goto out_unmap; + } + } + + /* Nuke the page table entry. */ + flush_cache_page(vma, address, page_to_pfn(page)); + pteval = ptep_clear_flush(vma, address, pte); + + /* Move the dirty bit to the physical page now the pte is gone. */ + if (pte_dirty(pteval)) + set_page_dirty(page); + + /* Update high watermark before we lower rss */ + update_hiwater_rss(mm); + + if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { + if (!PageHuge(page)) { + if (PageAnon(page)) + dec_mm_counter(mm, MM_ANONPAGES); + else + dec_mm_counter(mm, MM_FILEPAGES); + } + set_pte_at(mm, address, pte, + swp_entry_to_pte(make_hwpoison_entry(page))); + } else if (pte_unused(pteval)) { + /* + * The guest indicated that the page content is of no + * interest anymore. Simply discard the pte, vmscan + * will take care of the rest. + */ + if (PageAnon(page)) + dec_mm_counter(mm, MM_ANONPAGES); + else + dec_mm_counter(mm, MM_FILEPAGES); + } else if (PageAnon(page)) { + swp_entry_t entry = { .val = page_private(page) }; + pte_t swp_pte; + + if (PageSwapCache(page)) { + /* + * Store the swap location in the pte. + * See handle_pte_fault() ... + */ + if (swap_duplicate(entry) < 0) { + set_pte_at(mm, address, pte, pteval); + ret = SWAP_FAIL; + goto out_unmap; + } + if (list_empty(&mm->mmlist)) { + spin_lock(&mmlist_lock); + if (list_empty(&mm->mmlist)) + list_add(&mm->mmlist, &init_mm.mmlist); + spin_unlock(&mmlist_lock); + } + dec_mm_counter(mm, MM_ANONPAGES); + inc_mm_counter(mm, MM_SWAPENTS); + } else if (IS_ENABLED(CONFIG_MIGRATION)) { + /* + * Store the pfn of the page in a special migration + * pte. do_swap_page() will wait until the migration + * pte is removed and then restart fault handling. + */ + BUG_ON(!(flags & TTU_MIGRATION)); + entry = make_migration_entry(page, pte_write(pteval)); + } + swp_pte = swp_entry_to_pte(entry); + if (pte_soft_dirty(pteval)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + set_pte_at(mm, address, pte, swp_pte); + } else if (IS_ENABLED(CONFIG_MIGRATION) && + (flags & TTU_MIGRATION)) { + /* Establish migration entry for a file page */ + swp_entry_t entry; + entry = make_migration_entry(page, pte_write(pteval)); + set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); +printk(KERN_ERR "established migration entry for page %05lx PTE_WRITE = %d\n", page_to_pfn(page), pte_write(pteval)); + } else + dec_mm_counter(mm, MM_FILEPAGES); + + page_remove_rmap(page); + page_cache_release(page); + +out_unmap: + pte_unmap_unlock(pte, ptl); + if (ret != SWAP_FAIL && !(flags & TTU_MUNLOCK)) { + mmu_notifier_invalidate_page(mm, address); + ret = SWAP_SUCCESS; + } +out: + return ret; + +out_mlock: + pte_unmap_unlock(pte, ptl); + + + /* + * We need mmap_sem locking, Otherwise VM_LOCKED check makes + * unstable result and race. Plus, We can't wait here because + * we now hold anon_vma->rwsem or mapping->i_mmap_rwsem. + * if trylock failed, the page remain in evictable lru and later + * vmscan could retry to move the page to unevictable lru if the + * page is actually mlocked. + */ + if (down_read_trylock(&vma->vm_mm->mmap_sem)) { + if (vma->vm_flags & VM_LOCKED) { + mlock_vma_page(page); + ret = SWAP_MLOCK; + } + up_read(&vma->vm_mm->mmap_sem); + } + return ret; +} + +int try_to_unmap_one_only(struct page *page, struct vm_area_struct *vma, + unsigned long address, void *arg) +{ + struct mm_struct *mm = vma->vm_mm; + struct mm_struct *current_mm; + //int ret = SWAP_AGAIN; + + rcu_read_lock(); + get_task_struct(current); + rcu_read_unlock(); + current_mm = get_task_mm(current); + put_task_struct(current); + if (!current_mm) + BUG(); + + if (mm == current_mm) { + return try_to_unmap_one_entry(page, vma, address, arg); + } + return SWAP_AGAIN; +} + bool is_vma_temporary_stack(struct vm_area_struct *vma) { int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); -- cgit v1.2.2