aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNamhoon Kim <namhoonk@cs.unc.edu>2016-09-23 10:07:04 -0400
committerNamhoon Kim <namhoonk@cs.unc.edu>2016-09-23 10:07:04 -0400
commit1b2c1185069cf723ac4122e7cad99d538f36d973 (patch)
treeb888bd909775aabe98d8805e4c7391916e67cab9
parentd7352bf3c9392104c34b56e2c0756a14db81b68a (diff)
9/23/2016test
-rw-r--r--mm/replication.c575
1 files changed, 575 insertions, 0 deletions
diff --git a/mm/replication.c b/mm/replication.c
new file mode 100644
index 000000000000..aab1553078c8
--- /dev/null
+++ b/mm/replication.c
@@ -0,0 +1,575 @@
1/*
2 * linux/mm/replication.c
3 * pagecache replication
4 */
5#include <linux/init.h>
6#include <linux/mm.h>
7#include <linux/mmzone.h>
8#include <linux/swap.h>
9#include <linux/fs.h>
10#include <linux/pagemap.h>
11#include <linux/page-flags.h>
12#include <linux/pagevec.h>
13#include <linux/gfp.h>
14#include <linux/slab.h>
15#include <linux/radix-tree.h>
16#include <linux/spinlock.h>
17
18#include <litmus/litmus.h>
19
20#include "internal.h"
21
22#define MAX_NUMCPUS 4
23
24static struct kmem_cache *pcache_desc_cachep;
25
26void __init replication_init(void)
27{
28 pcache_desc_cachep = kmem_cache_create("pcache_desc",
29 sizeof(struct pcache_desc), 0, SLAB_PANIC, NULL);
30 printk(KERN_INFO "Page replication initialized.\n");
31}
32
33static struct pcache_desc *alloc_pcache_desc(void)
34{
35 struct pcache_desc *ret;
36
37 /* NOIO because find_get_page_readonly may be called in the IO path */
38 ret = kmem_cache_alloc(pcache_desc_cachep, GFP_ATOMIC);
39 if (ret) {
40 memset(ret, 0, sizeof(struct pcache_desc));
41 /* XXX: should use non-atomic preloads */
42 INIT_RADIX_TREE(&ret->page_tree, GFP_ATOMIC);
43 }
44 return ret;
45}
46
47static void free_pcache_desc(struct pcache_desc *pcd)
48{
49 kmem_cache_free(pcache_desc_cachep, pcd);
50}
51
52/*
53 * Free the struct pcache_desc, and all slaves. The pagecache refcount is
54 * retained for the master (because presumably we're collapsing the replication.
55 *
56 * Returns 1 if any of the slaves had a non-zero mapcount (in which case, we'll
57 * have to unmap them), otherwise returns 0.
58 */
59static int release_pcache_desc(struct pcache_desc *pcd)
60{
61 int ret = 0;
62 int i;
63
64 page_cache_get(pcd->master);
65 for_each_cpu(i, &pcd->cpus_present) {
66 struct page *page;
67
68 page = radix_tree_delete(&pcd->page_tree, i);
69 BUG_ON(!page);
70 if (page != pcd->master) {
71 BUG_ON(PageDirty(page));
72 BUG_ON(!PageUptodate(page));
73 dec_zone_page_state(page, NR_REPL_PAGES);
74 page->mapping = NULL;
75 if (page_mapped(page))
76 ret = 1; /* tell caller to unmap the ptes */
77 }
78 page_cache_release(page);
79 }
80
81 free_pcache_desc(pcd);
82
83 return ret;
84}
85
86#define PCACHE_DESC_BIT 4 /* 1 is used internally by the radix-tree */
87
88static inline int __is_pcache_desc(void *ptr)
89{
90 if ((unsigned long)ptr & PCACHE_DESC_BIT)
91 return 1;
92 return 0;
93}
94
95int is_pcache_desc(void *ptr)
96{
97 return __is_pcache_desc(ptr);
98}
99
100struct pcache_desc *ptr_to_pcache_desc(void *ptr)
101{
102 BUG_ON(!__is_pcache_desc(ptr));
103 return (struct pcache_desc *)((unsigned long)ptr & ~PCACHE_DESC_BIT);
104}
105
106void *pcache_desc_to_ptr(struct pcache_desc *pcd)
107{
108 BUG_ON(__is_pcache_desc(pcd));
109 return (void *)((unsigned long)pcd | PCACHE_DESC_BIT);
110}
111
112/*
113 * Must be called with the page locked and tree_lock held to give a non-racy
114 * answer.
115 */
116static int should_replicate_pcache(struct page *page, struct address_space *mapping,
117 unsigned long offset)
118{
119 umode_t mode;
120
121 if (unlikely(PageSwapCache(page)))
122 return 0;
123printk(KERN_INFO "[Pg %ld] _count = %d, _mapcount = %d\n", page_to_pfn(page), page_count(page), page_mapcount(page));
124 if (page_count(page) != 2 + page_mapcount(page))
125 return 0;
126 smp_rmb();
127 if (!PageUptodate(page) || PageDirty(page) || PageWriteback(page))
128 return 0;
129
130 if (!PagePrivate(page))
131 return 1;
132
133 mode = mapping->host->i_mode;
134 if (S_ISREG(mode) || S_ISBLK(mode))
135 return 1;
136
137 return 0;
138}
139
140/*
141 * Try to convert pagecache coordinate (mapping, offset) (with page residing)
142 * into a replicated pagecache.
143 *
144 * Returns 1 if we leave with a successfully converted pagecache. Otherwise 0.
145 * (note, that return value is racy, so it is a hint only)
146 */
147static int try_to_replicate_pcache(struct page *page, struct address_space *mapping,
148 unsigned long offset)
149{
150 int cpu;
151 void **pslot;
152 struct pcache_desc *pcd;
153 int ret = 0;
154
155 //lock_page(page);
156 if (!trylock_page(page)) {
157printk(KERN_INFO "TRYLOCK_PAGE failed\n");
158 return ret;
159 }
160
161 if (unlikely(!page->mapping))
162 goto out;
163
164 pcd = alloc_pcache_desc();
165 if (!pcd)
166 goto out;
167
168 if (!tsk_rt(current)) {
169 BUG();
170 goto out;
171 }
172
173 cpu = tsk_rt(current)->task_params.cpu;
174
175 pcd->master = page;
176 //cpumask_set_cpu(cpu, &pcd->cpus_present);
177 //if (radix_tree_insert(&pcd->page_tree, cpu, page))
178 // goto out_pcd;
179
180 spin_lock_irq(&mapping->tree_lock);
181
182 /* The non-racy check */
183 if (unlikely(!should_replicate_pcache(page, mapping, offset)))
184 goto out_lock;
185
186 pslot = radix_tree_lookup_slot(&mapping->page_tree, offset);
187
188 /* Already been replicated? Return yes! */
189 if (unlikely(is_pcache_desc(radix_tree_deref_slot(pslot)))) {
190 free_pcache_desc(pcd);
191 ret = 1;
192 goto out_lock;
193 }
194 /*
195 * The page is being held in pagecache and kept unreplicated because
196 * it is locked. The following bugchecks.
197 */
198 BUG_ON(!pslot);
199 BUG_ON(page != radix_tree_deref_slot(pslot));
200 BUG_ON(is_pcache_desc(radix_tree_deref_slot(pslot)));
201
202 radix_tree_replace_slot(pslot, pcache_desc_to_ptr(pcd));
203 radix_tree_tag_set(&mapping->page_tree, offset, PAGECACHE_TAG_REPLICATED);
204 ret = 1;
205
206out_lock:
207 spin_unlock_irq(&mapping->tree_lock);
208out_pcd:
209 if (ret == 0)
210 free_pcache_desc(pcd);
211out:
212 unlock_page(page);
213 return ret;
214}
215
216/*
217 * Called with tree_lock held for write, and (mapping, offset) guaranteed to be
218 * replicated. Drops tree_lock.
219 */
220static void __unreplicate_pcache(struct address_space *mapping,
221 unsigned long offset)
222{
223 void **pslot;
224 struct pcache_desc *pcd;
225 struct page *page;
226
227 pslot = radix_tree_lookup_slot(&mapping->page_tree, offset);
228
229 /* Gone? Success */
230 if (unlikely(!pslot)) {
231 spin_unlock_irq(&mapping->tree_lock);
232 return;
233 }
234
235 /* Already been un-replicated? Success */
236 if (unlikely(!is_pcache_desc(radix_tree_deref_slot(pslot)))) {
237 spin_unlock_irq(&mapping->tree_lock);
238 return;
239 }
240
241 pcd = ptr_to_pcache_desc(radix_tree_deref_slot(pslot));
242
243 page = pcd->master;
244 BUG_ON(PageDirty(page));
245 BUG_ON(!PageUptodate(page));
246
247 radix_tree_replace_slot(pslot, page);
248 radix_tree_tag_clear(&mapping->page_tree, offset, PAGECACHE_TAG_REPLICATED);
249
250 spin_unlock_irq(&mapping->tree_lock);
251
252 /*
253 * XXX: this actually changes all the find_get_pages APIs, so
254 * we might want to just coax unmap_mapping_range into not
255 * sleeping instead.
256 */
257 //might_sleep();
258
259 if (release_pcache_desc(pcd)) {
260 /* release_pcache_desc saw some mapped slaves */
261 unmap_mapping_range(mapping, (loff_t)offset<<PAGE_CACHE_SHIFT,
262 PAGE_CACHE_SIZE, 0);
263 }
264}
265
266/*
267 * Collapse pagecache coordinate (mapping, offset) into a non-replicated
268 * state. Must not fail.
269 */
270void unreplicate_pcache(struct address_space *mapping, unsigned long offset)
271{
272 spin_lock_irq(&mapping->tree_lock);
273 __unreplicate_pcache(mapping, offset);
274}
275
276/*
277 * Insert a newly replicated page into (mapping, offset) at node nid.
278 * Called without tree_lock. May not be successful.
279 *
280 * Returns 1 on success, otherwise 0.
281 */
282static int insert_replicated_page(struct page *page, struct address_space *mapping,
283 unsigned long offset, int cpu)
284{
285 void **pslot;
286 struct pcache_desc *pcd;
287
288 BUG_ON(!PageUptodate(page));
289
290 spin_lock_irq(&mapping->tree_lock);
291 pslot = radix_tree_lookup_slot(&mapping->page_tree, offset);
292
293 /* Truncated? */
294 if (unlikely(!pslot))
295 goto failed;
296
297 /* Not replicated? */
298 if (unlikely(!is_pcache_desc(radix_tree_deref_slot(pslot))))
299 goto failed;
300
301 pcd = ptr_to_pcache_desc(radix_tree_deref_slot(pslot));
302
303 if (unlikely(cpumask_test_cpu(cpu, &pcd->cpus_present)))
304 goto failed;
305
306 if (radix_tree_insert(&pcd->page_tree, cpu, page))
307 goto failed;
308
309 page_cache_get(page);
310 cpumask_set_cpu(cpu, &pcd->cpus_present);
311 __inc_zone_page_state(page, NR_REPL_PAGES);
312 spin_unlock_irq(&mapping->tree_lock);
313
314 page->mapping = mapping;
315 page->index = offset;
316
317 lru_cache_add(page);
318
319 return 1;
320
321failed:
322 spin_unlock_irq(&mapping->tree_lock);
323 return 0;
324}
325
326/*
327 * Removes a replicated (not master) page. Called with tree_lock held for write
328 */
329static void __remove_replicated_page(struct pcache_desc *pcd, struct page *page,
330 struct address_space *mapping, unsigned long offset)
331{
332 //int nid = page_to_nid(page);
333 int cpu;
334 BUG_ON(page == pcd->master);
335 //BUG_ON(!node_isset(nid, pcd->nodes_present));
336 //BUG_ON(radix_tree_delete(&pcd->page_tree, cpu) != page);
337 //node_clear(nid, pcd->nodes_present);
338 //for_each_node_mask(nid, pcd->nodes_present) {
339 for_each_cpu(cpu, &pcd->cpus_present) {
340 if (radix_tree_lookup(&pcd->page_tree, cpu) != page)
341 continue;
342 BUG_ON(radix_tree_delete(&pcd->page_tree, cpu) != page);
343 //node_clear(nid, pcd->nodes_present);
344 cpumask_clear_cpu(cpu, &pcd->cpus_present);
345 page->mapping = NULL;
346 __dec_zone_page_state(page, NR_REPL_PAGES);
347 return;
348 }
349 BUG();
350}
351
352/*
353 * Reclaim a replicated page. Called with tree_lock held for write and the
354 * page locked.
355 * Drops tree_lock and returns 1 and the caller should retry. Otherwise
356 * retains the tree_lock and returns 0 if successful.
357 */
358int reclaim_replicated_page(struct address_space *mapping, struct page *page)
359{
360 struct pcache_desc *pcd;
361
362 pcd = radix_tree_lookup(&mapping->page_tree, page->index);
363 if (page == pcd->master) {
364 __unreplicate_pcache(mapping, page->index);
365 return 1;
366 } else {
367 __remove_replicated_page(pcd, page, mapping, page->index);
368 return 0;
369 }
370}
371
372/*
373 * Try to create a replica of page at the given nid.
374 * Called without any locks held. page has its refcount elevated.
375 * Returns the newly replicated page with an elevated refcount on
376 * success, or NULL on failure.
377 */
378static struct page *try_to_create_replica(struct address_space *mapping,
379 unsigned long offset, struct page *page, int nid)
380{
381 struct page *repl_page;
382
383// repl_page = alloc_pages_node(nid, mapping_gfp_mask(mapping) |
384// __GFP_THISNODE | __GFP_NORETRY, 0);
385 repl_page = alloc_pages(GFP_ATOMIC,0); //page_cache_alloc(mapping);
386 if (!repl_page)
387 return page; /* failed alloc, just return the master */
388
389 copy_highpage(repl_page, page);
390 flush_dcache_page(repl_page);
391 page->mapping = mapping;
392 page->index = offset;
393 SetPageUptodate(repl_page); /* XXX: can use nonatomic */
394
395 page_cache_release(page);
396 insert_replicated_page(repl_page, mapping, offset, nid);
397
398printk(KERN_INFO "[Pg %ld] P%d copied to %ld\n", page_to_pfn(page), nid, page_to_pfn(repl_page));
399 return repl_page;
400}
401
402/*
403 * find_get_page - find and get a page reference
404 * @mapping: the address_space to search
405 * @offset: the page index
406 *
407 * Is there a pagecache struct page at the given (mapping, offset) tuple?
408 * If yes, increment its refcount and return it; if no, return NULL.
409 */
410struct page *find_get_page_readonly(struct address_space *mapping,
411 unsigned long offset)
412{
413 int cpu;
414 struct page *page;
415 page = NULL;
416
417 rcu_read_lock();
418retry:
419 if (!tsk_rt(current))
420 goto unlock;
421
422 cpu = tsk_rt(current)->task_params.cpu;
423 page = radix_tree_lookup(&mapping->page_tree, offset);
424 if (!page)
425 goto unlock;
426
427 if (is_pcache_desc(page)) {
428 struct pcache_desc *pcd;
429 pcd = ptr_to_pcache_desc(page);
430 if (!cpumask_test_cpu(cpu, &pcd->cpus_present)) {
431 page = pcd->master;
432 page_cache_get(page);
433
434 page = try_to_create_replica(mapping, offset, page, cpu);
435printk(KERN_INFO "[Pg %ld] P%d SECOND TRY: page replicated\n", page_to_pfn(page), cpu);
436 } else {
437 page = radix_tree_lookup(&pcd->page_tree, cpu);
438 page_cache_get(page);
439printk(KERN_INFO "[Pg %ld] P%d replicated page found\n", page_to_pfn(page), cpu);
440 }
441 BUG_ON(!page);
442 goto out;
443 } else if (page) {
444 page_cache_get(page);
445
446 if (should_replicate_pcache(page, mapping, offset)) {
447 if (try_to_replicate_pcache(page, mapping, offset)) {
448 page_cache_release(page);
449printk(KERN_INFO "[Pg %ld] P%d FIRST TRY: replace page with pcd\n", page_to_pfn(page), cpu);
450 goto retry;
451 }
452 goto out;
453 }
454 }
455unlock:
456 rcu_read_unlock();
457out:
458 return page;
459}
460/*
461struct page *find_get_page_readonly(struct address_space *mapping,
462 unsigned long offset)
463{
464 int cpu;
465 struct page *page;
466 page = NULL;
467retry:
468 spin_lock_irq(&mapping->tree_lock);
469
470 if (!tsk_rt(current))
471 goto out;
472
473 cpu = tsk_rt(current)->task_params.cpu;
474 page = radix_tree_lookup(&mapping->page_tree, offset);
475 if (!page)
476 goto out;
477
478 if (is_pcache_desc(page)) {
479 struct pcache_desc *pcd;
480 pcd = ptr_to_pcache_desc(page);
481 if (!cpumask_test_cpu(cpu, &pcd->cpus_present)) {
482 page = pcd->master;
483 page_cache_get(page);
484 spin_unlock_irq(&mapping->tree_lock);
485
486 page = try_to_create_replica(mapping, offset, page, cpu);
487printk(KERN_INFO "[Pg %ld] P%d SECOND TRY: page replicated\n", page_to_pfn(page), cpu);
488 } else {
489 page = radix_tree_lookup(&pcd->page_tree, cpu);
490 page_cache_get(page);
491 spin_unlock_irq(&mapping->tree_lock);
492printk(KERN_INFO "[Pg %ld] P%d replicated page found\n", page_to_pfn(page), cpu);
493 }
494 BUG_ON(!page);
495 return page;
496 } else if (page) {
497 page_cache_get(page);
498
499 if (should_replicate_pcache(page, mapping, offset)) {
500 spin_unlock_irq(&mapping->tree_lock);
501 if (try_to_replicate_pcache(page, mapping, offset)) {
502 page_cache_release(page);
503printk(KERN_INFO "[Pg %ld] P%d FIRST TRY: replace page with pcd\n", page_to_pfn(page), cpu);
504 goto retry;
505 }
506 return page;
507 }
508 }
509out:
510 spin_unlock_irq(&mapping->tree_lock);
511 return page;
512}
513*/
514/*
515 * Takes a page at the given mapping, and returns an unreplicated
516 * page with elevated refcount.
517 *
518 * Called with rcu_read_lock held for read
519 */
520struct page *get_unreplicated_page(struct address_space *mapping,
521 unsigned long offset, struct page *page)
522{
523 if (page) {
524 if (is_pcache_desc(page)) {
525 struct pcache_desc *pcd;
526
527 pcd = ptr_to_pcache_desc(page);
528 page = pcd->master;
529 page_cache_get(page);
530
531 //spin_unlock_irq(&mapping->tree_lock);
532 unreplicate_pcache(mapping, page->index);
533
534 return page;
535 }
536
537 page_cache_get(page);
538 }
539 //spin_unlock_irq(&mapping->tree_lock);
540 return page;
541}
542
543/*
544 * Collapse a possible page replication. The page is held unreplicated by
545 * the elevated refcount on the passed-in page.
546 */
547struct page *get_unreplicated_page_fault(struct page *page)
548{
549 struct address_space *mapping;
550 struct page *master;
551 pgoff_t offset;
552
553 /* could be broken vs truncate? but at least truncate will remove pte */
554 offset = page->index;
555 mapping = page->mapping;
556 if (!mapping)
557 return page;
558
559 /*
560 * Take the page lock in order to ensure that we're synchronised
561 * against another task doing clear_page_dirty_for_io()
562 */
563 master = find_lock_entry(mapping, offset);
564 if (master) {
565 /*
566 * Dirty the page to prevent the replication from being
567 * set up again.
568 */
569 set_page_dirty(master);
570 unlock_page(master);
571 //page_cache_release(page);
572 }
573
574 return master;
575}