aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Borntraeger <borntraeger@de.ibm.com>2008-03-25 13:47:12 -0400
committerAvi Kivity <avi@qumranet.com>2008-04-27 05:00:40 -0400
commit5b7baf05783b1ac97a510243d7e82293416a7cf6 (patch)
tree15b753206d9a3fd5edd0029405f8acfc69237861
parent402b08622d9ac6e32e25289573272e0f21bb58a7 (diff)
s390: KVM preparation: host memory management changes for s390 kvm
This patch changes the s390 memory management defintions to use the pgste field for dirty and reference bit tracking of host and guest code. Usually on s390, dirty and referenced are tracked in storage keys, which belong to the physical page. This changes with virtualization: The guest and host dirty/reference bits are defined to be the logical OR of the values for the mapping and the physical page. This patch implements the necessary changes in pgtable.h for s390. There is a common code change in mm/rmap.c, the call to page_test_and_clear_young must be moved. This is a no-op for all architecture but s390. page_referenced checks the referenced bits for the physiscal page and for all mappings: o The physical page is checked with page_test_and_clear_young. o The mappings are checked with ptep_test_and_clear_young and friends. Without pgstes (the current implementation on Linux s390) the physical page check is implemented but the mapping callbacks are no-ops because dirty and referenced are not tracked in the s390 page tables. The pgstes introduces guest and host dirty and reference bits for s390 in the host mapping. These mapping must be checked before page_test_and_clear_young resets the reference bit. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Acked-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r--include/asm-s390/pgtable.h92
-rw-r--r--mm/rmap.c7
2 files changed, 93 insertions, 6 deletions
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 8e9a629dc199..7fe5c4b6d82d 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -30,6 +30,7 @@
30 */ 30 */
31#ifndef __ASSEMBLY__ 31#ifndef __ASSEMBLY__
32#include <linux/mm_types.h> 32#include <linux/mm_types.h>
33#include <asm/bitops.h>
33#include <asm/bug.h> 34#include <asm/bug.h>
34#include <asm/processor.h> 35#include <asm/processor.h>
35 36
@@ -258,6 +259,13 @@ extern char empty_zero_page[PAGE_SIZE];
258 * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. 259 * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
259 */ 260 */
260 261
262/* Page status table bits for virtualization */
263#define RCP_PCL_BIT 55
264#define RCP_HR_BIT 54
265#define RCP_HC_BIT 53
266#define RCP_GR_BIT 50
267#define RCP_GC_BIT 49
268
261#ifndef __s390x__ 269#ifndef __s390x__
262 270
263/* Bits in the segment table address-space-control-element */ 271/* Bits in the segment table address-space-control-element */
@@ -513,6 +521,48 @@ static inline int pte_file(pte_t pte)
513#define __HAVE_ARCH_PTE_SAME 521#define __HAVE_ARCH_PTE_SAME
514#define pte_same(a,b) (pte_val(a) == pte_val(b)) 522#define pte_same(a,b) (pte_val(a) == pte_val(b))
515 523
524static inline void rcp_lock(pte_t *ptep)
525{
526#ifdef CONFIG_PGSTE
527 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
528 preempt_disable();
529 while (test_and_set_bit(RCP_PCL_BIT, pgste))
530 ;
531#endif
532}
533
534static inline void rcp_unlock(pte_t *ptep)
535{
536#ifdef CONFIG_PGSTE
537 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
538 clear_bit(RCP_PCL_BIT, pgste);
539 preempt_enable();
540#endif
541}
542
543/* forward declaration for SetPageUptodate in page-flags.h*/
544static inline void page_clear_dirty(struct page *page);
545#include <linux/page-flags.h>
546
547static inline void ptep_rcp_copy(pte_t *ptep)
548{
549#ifdef CONFIG_PGSTE
550 struct page *page = virt_to_page(pte_val(*ptep));
551 unsigned int skey;
552 unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
553
554 skey = page_get_storage_key(page_to_phys(page));
555 if (skey & _PAGE_CHANGED)
556 set_bit(RCP_GC_BIT, pgste);
557 if (skey & _PAGE_REFERENCED)
558 set_bit(RCP_GR_BIT, pgste);
559 if (test_and_clear_bit(RCP_HC_BIT, pgste))
560 SetPageDirty(page);
561 if (test_and_clear_bit(RCP_HR_BIT, pgste))
562 SetPageReferenced(page);
563#endif
564}
565
516/* 566/*
517 * query functions pte_write/pte_dirty/pte_young only work if 567 * query functions pte_write/pte_dirty/pte_young only work if
518 * pte_present() is true. Undefined behaviour if not.. 568 * pte_present() is true. Undefined behaviour if not..
@@ -599,6 +649,8 @@ static inline void pmd_clear(pmd_t *pmd)
599 649
600static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 650static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
601{ 651{
652 if (mm->context.pgstes)
653 ptep_rcp_copy(ptep);
602 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 654 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
603 if (mm->context.noexec) 655 if (mm->context.noexec)
604 pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; 656 pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY;
@@ -667,6 +719,24 @@ static inline pte_t pte_mkyoung(pte_t pte)
667static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 719static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
668 unsigned long addr, pte_t *ptep) 720 unsigned long addr, pte_t *ptep)
669{ 721{
722#ifdef CONFIG_PGSTE
723 unsigned long physpage;
724 int young;
725 unsigned long *pgste;
726
727 if (!vma->vm_mm->context.pgstes)
728 return 0;
729 physpage = pte_val(*ptep) & PAGE_MASK;
730 pgste = (unsigned long *) (ptep + PTRS_PER_PTE);
731
732 young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0);
733 rcp_lock(ptep);
734 if (young)
735 set_bit(RCP_GR_BIT, pgste);
736 young |= test_and_clear_bit(RCP_HR_BIT, pgste);
737 rcp_unlock(ptep);
738 return young;
739#endif
670 return 0; 740 return 0;
671} 741}
672 742
@@ -674,7 +744,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
674static inline int ptep_clear_flush_young(struct vm_area_struct *vma, 744static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
675 unsigned long address, pte_t *ptep) 745 unsigned long address, pte_t *ptep)
676{ 746{
677 /* No need to flush TLB; bits are in storage key */ 747 /* No need to flush TLB
748 * On s390 reference bits are in storage key and never in TLB
749 * With virtualization we handle the reference bit, without we
750 * we can simply return */
751#ifdef CONFIG_PGSTE
752 return ptep_test_and_clear_young(vma, address, ptep);
753#endif
678 return 0; 754 return 0;
679} 755}
680 756
@@ -693,15 +769,25 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
693 : "=m" (*ptep) : "m" (*ptep), 769 : "=m" (*ptep) : "m" (*ptep),
694 "a" (pto), "a" (address)); 770 "a" (pto), "a" (address));
695 } 771 }
696 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
697} 772}
698 773
699static inline void ptep_invalidate(struct mm_struct *mm, 774static inline void ptep_invalidate(struct mm_struct *mm,
700 unsigned long address, pte_t *ptep) 775 unsigned long address, pte_t *ptep)
701{ 776{
777 if (mm->context.pgstes) {
778 rcp_lock(ptep);
779 __ptep_ipte(address, ptep);
780 ptep_rcp_copy(ptep);
781 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
782 rcp_unlock(ptep);
783 return;
784 }
702 __ptep_ipte(address, ptep); 785 __ptep_ipte(address, ptep);
703 if (mm->context.noexec) 786 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
787 if (mm->context.noexec) {
704 __ptep_ipte(address, ptep + PTRS_PER_PTE); 788 __ptep_ipte(address, ptep + PTRS_PER_PTE);
789 pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY;
790 }
705} 791}
706 792
707/* 793/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 997f06907b6d..e9bb6b1093f6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -413,9 +413,6 @@ int page_referenced(struct page *page, int is_locked,
413{ 413{
414 int referenced = 0; 414 int referenced = 0;
415 415
416 if (page_test_and_clear_young(page))
417 referenced++;
418
419 if (TestClearPageReferenced(page)) 416 if (TestClearPageReferenced(page))
420 referenced++; 417 referenced++;
421 418
@@ -433,6 +430,10 @@ int page_referenced(struct page *page, int is_locked,
433 unlock_page(page); 430 unlock_page(page);
434 } 431 }
435 } 432 }
433
434 if (page_test_and_clear_young(page))
435 referenced++;
436
436 return referenced; 437 return referenced;
437} 438}
438 439