From 0e6a44a09800be09924707025646b3f3e3700306 Mon Sep 17 00:00:00 2001 From: Namhoon Kim Date: Thu, 29 Sep 2016 15:05:48 -0400 Subject: 9/29/2016 find_get_page_readonly checks SCHED_LITMUS --- include/linux/mm.h | 2 + include/linux/mmzone.h | 1 + include/linux/page-flags.h | 6 +++ include/linux/vm_event_item.h | 1 + init/main.c | 1 + litmus/litmus.c | 6 +++ mm/Makefile | 2 +- mm/debug.c | 1 + mm/filemap.c | 119 +++++++++++++++++++++++++++++++++++++----- mm/internal.h | 10 ++++ mm/memory.c | 19 +++++-- mm/page-writeback.c | 3 +- mm/vmscan.c | 7 ++- mm/vmstat.c | 4 ++ 14 files changed, 160 insertions(+), 22 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0755b9fd03a7..55df1f8bf4cb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2200,5 +2200,7 @@ void __init setup_nr_node_ids(void); static inline void setup_nr_node_ids(void) {} #endif +extern void replication_init(void); + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 54d74f6eb233..abc63c255d44 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -126,6 +126,7 @@ enum zone_stat_item { NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, + NR_REPL_PAGES, NR_FILE_DIRTY, NR_WRITEBACK, NR_SLAB_RECLAIMABLE, diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f34e040b34e9..8b0d7723f3c9 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -97,6 +97,7 @@ enum pageflags { PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ + PG_replicated, /* Page is replicated pagecache */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif @@ -289,6 +290,11 @@ PAGEFLAG_FALSE(HWPoison) #define __PG_HWPOISON 0 #endif +#define PageReplicated(page) test_bit(PG_replicated, &(page)->flags) +#define __SetPageReplicated(page) do { BUG_ON(PageDirty(page) || PageWriteback(page)); __set_bit(PG_replicated, &(page)->flags); } while (0) +#define SetPageReplicated(page) do { BUG_ON(PageDirty(page) || PageWriteback(page)); set_bit(PG_replicated, &(page)->flags); } while (0) +#define ClearPageReplicated(page) clear_bit(PG_replicated, &(page)->flags) + /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9246d32dc973..62820318d8ad 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -25,6 +25,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), PGFREE, PGACTIVATE, PGDEACTIVATE, PGFAULT, PGMAJFAULT, + PGREPLICATED, PGREPLICAZAP, FOR_ALL_ZONES(PGREFILL), FOR_ALL_ZONES(PGSTEAL_KSWAPD), FOR_ALL_ZONES(PGSTEAL_DIRECT), diff --git a/init/main.c b/init/main.c index 2a89545e0a5d..88917d93fbe4 100644 --- a/init/main.c +++ b/init/main.c @@ -628,6 +628,7 @@ asmlinkage __visible void __init start_kernel(void) kmemleak_init(); setup_per_cpu_pageset(); numa_policy_init(); + replication_init(); if (late_time_init) late_time_init(); sched_clock_init(); diff --git a/litmus/litmus.c b/litmus/litmus.c index f88cd16ab86d..d31138c9b9a6 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -571,6 +571,12 @@ asmlinkage long sys_set_page_color(int cpu) put_page(old_page); continue; } + + if (page_count(old_page) - page_mapcount(old_page) == 1) { + put_page(old_page); + continue; + } + TRACE_TASK(current, "addr: %08x, pfn: %ld, _mapcount: %d, _count: %d\n", vma_itr->vm_start + PAGE_SIZE*i, __page_to_pfn(old_page), page_mapcount(old_page), page_count(old_page)); put_page(old_page); } diff --git a/mm/Makefile b/mm/Makefile index 98c4eaeabdcb..98d28edd36a5 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -56,7 +56,7 @@ obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_MEMTEST) += memtest.o -obj-$(CONFIG_MIGRATION) += migrate.o +obj-$(CONFIG_MIGRATION) += migrate.o replication.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o obj-$(CONFIG_PAGE_COUNTER) += page_counter.o diff --git a/mm/debug.c b/mm/debug.c index 3eb3ac2fcee7..dbc3ea81dde7 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -36,6 +36,7 @@ static const struct trace_print_flags pageflag_names[] = { {1UL << PG_reclaim, "reclaim" }, {1UL << PG_swapbacked, "swapbacked" }, {1UL << PG_unevictable, "unevictable" }, + {1UL << PG_replicated, "replicated" }, #ifdef CONFIG_MMU {1UL << PG_mlocked, "mlocked" }, #endif diff --git a/mm/filemap.c b/mm/filemap.c index 8ea609718839..93853e337f07 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -973,13 +973,21 @@ repeat: page = NULL; pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); if (pagep) { - void *pdesc; - pdesc = radix_tree_deref_slot(pagep); - if (pdesc) - page = (struct page*)pdesc; - //page = radix_tree_deref_slot(pagep); + page = radix_tree_deref_slot(pagep); if (unlikely(!page)) goto out; + if (is_pcache_desc(page)) { + struct pcache_desc *pcd; +printk(KERN_INFO "PCACHE_DESC\n"); + pcd = ptr_to_pcache_desc(page); + page = pcd->master; + page_cache_get_speculative(page); + + unreplicate_pcache(mapping, page->index, 0); + + goto out; + } + if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) goto repeat; @@ -1178,6 +1186,20 @@ repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) continue; + + if (is_pcache_desc(page)) { + struct pcache_desc *pcd; + printk(KERN_INFO "PCACHE_DESC\n"); + + pcd = ptr_to_pcache_desc(page); + page = pcd->master; + page_cache_get_speculative(page); + + unreplicate_pcache(mapping, page->index, 0); + + goto export; + } + if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) goto restart; @@ -1241,6 +1263,20 @@ repeat: if (unlikely(!page)) continue; + if (is_pcache_desc(page)) { + struct pcache_desc *pcd; + + printk(KERN_INFO "PCACHE_DESC\n"); + + pcd = ptr_to_pcache_desc(page); + page = pcd->master; + page_cache_get_speculative(page); + + unreplicate_pcache(mapping, page->index, 0); + + goto export; + } + if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { /* @@ -1268,6 +1304,7 @@ repeat: goto repeat; } +export: pages[ret] = page; if (++ret == nr_pages) break; @@ -1309,6 +1346,20 @@ repeat: if (unlikely(!page)) break; + if (is_pcache_desc(page)) { + struct pcache_desc *pcd; + + printk(KERN_INFO "PCACHE_DESC\n"); + + pcd = ptr_to_pcache_desc(page); + page = pcd->master; + page_cache_get_speculative(page); + + unreplicate_pcache(mapping, page->index, 0); + + goto export; + } + if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { /* @@ -1334,7 +1385,7 @@ repeat: page_cache_release(page); goto repeat; } - +export: /* * must check mapping and index after taking the ref. * otherwise we can get both false positives and false @@ -1385,6 +1436,20 @@ repeat: if (unlikely(!page)) continue; + if (is_pcache_desc(page)) { + struct pcache_desc *pcd; + + printk(KERN_INFO "PCACHE_DESC BUG!!!!!!!!!!\n"); + + pcd = ptr_to_pcache_desc(page); + page = pcd->master; + page_cache_get_speculative(page); + + unreplicate_pcache(mapping, page->index, 0); + + goto export; + } + if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { /* @@ -1416,7 +1481,7 @@ repeat: page_cache_release(page); goto repeat; } - +export: pages[ret] = page; if (++ret == nr_pages) break; @@ -1492,7 +1557,11 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, cond_resched(); find_page: - page = find_get_page(mapping, index); + if (is_realtime(current)) + page = find_get_page_readonly(mapping, index); + else + page = find_get_page(mapping, index); + if (!page) { page_cache_sync_readahead(mapping, ra, filp, @@ -1644,7 +1713,8 @@ readpage: unlock_page(page); } - goto page_ok; + page_cache_release(page); + goto find_page; readpage_error: /* UHHUH! A synchronous read error occurred. Report it */ @@ -1888,9 +1958,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* * Do we have something in the page cache already? */ -if (is_realtime(current)) - printk("FILEMAP_FAULT %ld\n", vma->vm_start); - page = find_get_page(mapping, offset); + if ((vmf->flags & FAULT_FLAG_WRITE) || !is_realtime(current)) + page = find_get_page(mapping, offset); + else + page = find_get_page_readonly(mapping, offset); + if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { /* * We found the page, so try async readahead before @@ -1904,7 +1976,10 @@ if (is_realtime(current)) mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); ret = VM_FAULT_MAJOR; retry_find: - page = find_get_page(mapping, offset); + if ((vmf->flags & FAULT_FLAG_WRITE) || !is_realtime(current)) + page = find_get_page(mapping, offset); + else + page = find_get_page_readonly(mapping, offset); if (!page) goto no_cached_page; } @@ -2012,6 +2087,22 @@ repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) goto next; + + if (is_pcache_desc(page)) { + struct pcache_desc *pcd; + +printk(KERN_INFO "PCACHE_DESC FILE_MAP_PAGES\n"); + + pcd = ptr_to_pcache_desc(page); + page = pcd->master; + if (!page_cache_get_speculative(page)) + goto repeat; + + //unreplicate_pcache(mapping, page->index, 0); + + goto export; + } + if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) break; @@ -2027,7 +2118,7 @@ repeat: page_cache_release(page); goto repeat; } - +export: if (!PageUptodate(page) || PageReadahead(page) || PageHWPoison(page)) diff --git a/mm/internal.h b/mm/internal.h index a25e359a4039..ccc349b59d00 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -433,4 +433,14 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ #define ALLOC_FAIR 0x100 /* fair zone allocation */ +extern int reclaim_replicated_page(struct address_space *mapping, + struct page *page); +extern struct page *find_get_page_readonly(struct address_space *mapping, + unsigned long offset); +extern int is_pcache_desc(void *ptr); +extern struct pcache_desc *ptr_to_pcache_desc(void *ptr); +extern void *pcache_desc_to_ptr(struct pcache_desc *pcd); +extern void unreplicate_pcache(struct address_space *mapping, unsigned long offset, int locked); +int page_write_fault_retry(struct page *page); + #endif /* __MM_INTERNAL_H */ diff --git a/mm/memory.c b/mm/memory.c index 22e037e3364e..1fc358bec6d5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2231,15 +2231,24 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma, * read-only shared pages can get COWed by * get_user_pages(.write=1, .force=1). */ - if (vma->vm_ops && vma->vm_ops->page_mkwrite) { +// if (vma->vm_ops && vma->vm_ops->page_mkwrite) { + { int tmp; pte_unmap_unlock(page_table, ptl); - tmp = do_page_mkwrite(vma, old_page, address); - if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { + + if (page_write_fault_retry(old_page)) { page_cache_release(old_page); - return tmp; + return 0; + } + + if (vma->vm_ops && vma->vm_ops->page_mkwrite) { + tmp = do_page_mkwrite(vma, old_page, address); + if (unlikely(!tmp || (tmp & + (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) { + page_cache_release(old_page); + return tmp; + } } /* * Since we dropped the lock we need to revalidate diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7e39ffceb566..161af608b7e2 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2287,7 +2287,8 @@ int clear_page_dirty_for_io(struct page *page) BUG_ON(!PageLocked(page)); - if (mapping && mapping_cap_account_dirty(mapping)) { + //if (mapping && mapping_cap_account_dirty(mapping)) { + if (mapping) { /* * Yes, Virginia, this is indeed insane. * diff --git a/mm/vmscan.c b/mm/vmscan.c index 5e8eadd71bac..b9b6bef90169 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -582,6 +582,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, BUG_ON(!PageLocked(page)); BUG_ON(mapping != page_mapping(page)); +again: spin_lock_irq(&mapping->tree_lock); /* * The non racy check for a busy page. @@ -640,7 +641,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (reclaimed && page_is_file_cache(page) && !mapping_exiting(mapping)) shadow = workingset_eviction(mapping, page); - __delete_from_page_cache(page, shadow); + if (PageReplicated(page)) { + if (reclaim_replicated_page(mapping, page)) + goto again; + } else + __delete_from_page_cache(page, shadow); spin_unlock_irq(&mapping->tree_lock); if (freepage != NULL) diff --git a/mm/vmstat.c b/mm/vmstat.c index 4f5cd974e11a..6af8ea00cbef 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -708,6 +708,7 @@ const char * const vmstat_text[] = { "nr_anon_pages", "nr_mapped", "nr_file_pages", + "nr_repl_pages", "nr_dirty", "nr_writeback", "nr_slab_reclaimable", @@ -760,6 +761,9 @@ const char * const vmstat_text[] = { "pgfault", "pgmajfault", + "pgreplicated", + "pgreplicazap", + TEXTS_FOR_ZONES("pgrefill") TEXTS_FOR_ZONES("pgsteal_kswapd") TEXTS_FOR_ZONES("pgsteal_direct") -- cgit v1.2.2