diff options
Diffstat (limited to 'arch/s390/include/asm/pgtable.h')
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 607 |
1 files changed, 312 insertions, 295 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 02ace3491c51..c4773a2ef3d3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -31,9 +31,8 @@ | |||
31 | #ifndef __ASSEMBLY__ | 31 | #ifndef __ASSEMBLY__ |
32 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
33 | #include <linux/mm_types.h> | 33 | #include <linux/mm_types.h> |
34 | #include <asm/bitops.h> | ||
35 | #include <asm/bug.h> | 34 | #include <asm/bug.h> |
36 | #include <asm/processor.h> | 35 | #include <asm/page.h> |
37 | 36 | ||
38 | extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); | 37 | extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); |
39 | extern void paging_init(void); | 38 | extern void paging_init(void); |
@@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START; | |||
243 | /* Software bits in the page table entry */ | 242 | /* Software bits in the page table entry */ |
244 | #define _PAGE_SWT 0x001 /* SW pte type bit t */ | 243 | #define _PAGE_SWT 0x001 /* SW pte type bit t */ |
245 | #define _PAGE_SWX 0x002 /* SW pte type bit x */ | 244 | #define _PAGE_SWX 0x002 /* SW pte type bit x */ |
246 | #define _PAGE_SPECIAL 0x004 /* SW associated with special page */ | 245 | #define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ |
246 | #define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ | ||
247 | #define _PAGE_SPECIAL 0x010 /* SW associated with special page */ | ||
247 | #define __HAVE_ARCH_PTE_SPECIAL | 248 | #define __HAVE_ARCH_PTE_SPECIAL |
248 | 249 | ||
249 | /* Set of bits not changed in pte_modify */ | 250 | /* Set of bits not changed in pte_modify */ |
250 | #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) | 251 | #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) |
251 | 252 | ||
252 | /* Six different types of pages. */ | 253 | /* Six different types of pages. */ |
253 | #define _PAGE_TYPE_EMPTY 0x400 | 254 | #define _PAGE_TYPE_EMPTY 0x400 |
@@ -256,8 +257,6 @@ extern unsigned long VMALLOC_START; | |||
256 | #define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ | 257 | #define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ |
257 | #define _PAGE_TYPE_RO 0x200 | 258 | #define _PAGE_TYPE_RO 0x200 |
258 | #define _PAGE_TYPE_RW 0x000 | 259 | #define _PAGE_TYPE_RW 0x000 |
259 | #define _PAGE_TYPE_EX_RO 0x202 | ||
260 | #define _PAGE_TYPE_EX_RW 0x002 | ||
261 | 260 | ||
262 | /* | 261 | /* |
263 | * Only four types for huge pages, using the invalid bit and protection bit | 262 | * Only four types for huge pages, using the invalid bit and protection bit |
@@ -287,8 +286,6 @@ extern unsigned long VMALLOC_START; | |||
287 | * _PAGE_TYPE_FILE 11?1 -> 11?1 | 286 | * _PAGE_TYPE_FILE 11?1 -> 11?1 |
288 | * _PAGE_TYPE_RO 0100 -> 1100 | 287 | * _PAGE_TYPE_RO 0100 -> 1100 |
289 | * _PAGE_TYPE_RW 0000 -> 1000 | 288 | * _PAGE_TYPE_RW 0000 -> 1000 |
290 | * _PAGE_TYPE_EX_RO 0110 -> 1110 | ||
291 | * _PAGE_TYPE_EX_RW 0010 -> 1010 | ||
292 | * | 289 | * |
293 | * pte_none is true for bits combinations 1000, 1010, 1100, 1110 | 290 | * pte_none is true for bits combinations 1000, 1010, 1100, 1110 |
294 | * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 | 291 | * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 |
@@ -297,14 +294,17 @@ extern unsigned long VMALLOC_START; | |||
297 | */ | 294 | */ |
298 | 295 | ||
299 | /* Page status table bits for virtualization */ | 296 | /* Page status table bits for virtualization */ |
300 | #define RCP_PCL_BIT 55 | 297 | #define RCP_ACC_BITS 0xf000000000000000UL |
301 | #define RCP_HR_BIT 54 | 298 | #define RCP_FP_BIT 0x0800000000000000UL |
302 | #define RCP_HC_BIT 53 | 299 | #define RCP_PCL_BIT 0x0080000000000000UL |
303 | #define RCP_GR_BIT 50 | 300 | #define RCP_HR_BIT 0x0040000000000000UL |
304 | #define RCP_GC_BIT 49 | 301 | #define RCP_HC_BIT 0x0020000000000000UL |
305 | 302 | #define RCP_GR_BIT 0x0004000000000000UL | |
306 | /* User dirty bit for KVM's migration feature */ | 303 | #define RCP_GC_BIT 0x0002000000000000UL |
307 | #define KVM_UD_BIT 47 | 304 | |
305 | /* User dirty / referenced bit for KVM's migration feature */ | ||
306 | #define KVM_UR_BIT 0x0000800000000000UL | ||
307 | #define KVM_UC_BIT 0x0000400000000000UL | ||
308 | 308 | ||
309 | #ifndef __s390x__ | 309 | #ifndef __s390x__ |
310 | 310 | ||
@@ -377,85 +377,54 @@ extern unsigned long VMALLOC_START; | |||
377 | #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ | 377 | #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ |
378 | _ASCE_ALT_EVENT) | 378 | _ASCE_ALT_EVENT) |
379 | 379 | ||
380 | /* Bits int the storage key */ | ||
381 | #define _PAGE_CHANGED 0x02 /* HW changed bit */ | ||
382 | #define _PAGE_REFERENCED 0x04 /* HW referenced bit */ | ||
383 | |||
384 | /* | 380 | /* |
385 | * Page protection definitions. | 381 | * Page protection definitions. |
386 | */ | 382 | */ |
387 | #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) | 383 | #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) |
388 | #define PAGE_RO __pgprot(_PAGE_TYPE_RO) | 384 | #define PAGE_RO __pgprot(_PAGE_TYPE_RO) |
389 | #define PAGE_RW __pgprot(_PAGE_TYPE_RW) | 385 | #define PAGE_RW __pgprot(_PAGE_TYPE_RW) |
390 | #define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO) | ||
391 | #define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW) | ||
392 | 386 | ||
393 | #define PAGE_KERNEL PAGE_RW | 387 | #define PAGE_KERNEL PAGE_RW |
394 | #define PAGE_COPY PAGE_RO | 388 | #define PAGE_COPY PAGE_RO |
395 | 389 | ||
396 | /* | 390 | /* |
397 | * Dependent on the EXEC_PROTECT option s390 can do execute protection. | 391 | * On s390 the page table entry has an invalid bit and a read-only bit. |
398 | * Write permission always implies read permission. In theory with a | 392 | * Read permission implies execute permission and write permission |
399 | * primary/secondary page table execute only can be implemented but | 393 | * implies read permission. |
400 | * it would cost an additional bit in the pte to distinguish all the | ||
401 | * different pte types. To avoid that execute permission currently | ||
402 | * implies read permission as well. | ||
403 | */ | 394 | */ |
404 | /*xwr*/ | 395 | /*xwr*/ |
405 | #define __P000 PAGE_NONE | 396 | #define __P000 PAGE_NONE |
406 | #define __P001 PAGE_RO | 397 | #define __P001 PAGE_RO |
407 | #define __P010 PAGE_RO | 398 | #define __P010 PAGE_RO |
408 | #define __P011 PAGE_RO | 399 | #define __P011 PAGE_RO |
409 | #define __P100 PAGE_EX_RO | 400 | #define __P100 PAGE_RO |
410 | #define __P101 PAGE_EX_RO | 401 | #define __P101 PAGE_RO |
411 | #define __P110 PAGE_EX_RO | 402 | #define __P110 PAGE_RO |
412 | #define __P111 PAGE_EX_RO | 403 | #define __P111 PAGE_RO |
413 | 404 | ||
414 | #define __S000 PAGE_NONE | 405 | #define __S000 PAGE_NONE |
415 | #define __S001 PAGE_RO | 406 | #define __S001 PAGE_RO |
416 | #define __S010 PAGE_RW | 407 | #define __S010 PAGE_RW |
417 | #define __S011 PAGE_RW | 408 | #define __S011 PAGE_RW |
418 | #define __S100 PAGE_EX_RO | 409 | #define __S100 PAGE_RO |
419 | #define __S101 PAGE_EX_RO | 410 | #define __S101 PAGE_RO |
420 | #define __S110 PAGE_EX_RW | 411 | #define __S110 PAGE_RW |
421 | #define __S111 PAGE_EX_RW | 412 | #define __S111 PAGE_RW |
422 | |||
423 | #ifndef __s390x__ | ||
424 | # define PxD_SHADOW_SHIFT 1 | ||
425 | #else /* __s390x__ */ | ||
426 | # define PxD_SHADOW_SHIFT 2 | ||
427 | #endif /* __s390x__ */ | ||
428 | 413 | ||
429 | static inline void *get_shadow_table(void *table) | 414 | static inline int mm_exclusive(struct mm_struct *mm) |
430 | { | 415 | { |
431 | unsigned long addr, offset; | 416 | return likely(mm == current->active_mm && |
432 | struct page *page; | 417 | atomic_read(&mm->context.attach_count) <= 1); |
433 | |||
434 | addr = (unsigned long) table; | ||
435 | offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1); | ||
436 | page = virt_to_page((void *)(addr ^ offset)); | ||
437 | return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL); | ||
438 | } | 418 | } |
439 | 419 | ||
440 | /* | 420 | static inline int mm_has_pgste(struct mm_struct *mm) |
441 | * Certain architectures need to do special things when PTEs | ||
442 | * within a page table are directly modified. Thus, the following | ||
443 | * hook is made available. | ||
444 | */ | ||
445 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
446 | pte_t *ptep, pte_t entry) | ||
447 | { | 421 | { |
448 | *ptep = entry; | 422 | #ifdef CONFIG_PGSTE |
449 | if (mm->context.noexec) { | 423 | if (unlikely(mm->context.has_pgste)) |
450 | if (!(pte_val(entry) & _PAGE_INVALID) && | 424 | return 1; |
451 | (pte_val(entry) & _PAGE_SWX)) | 425 | #endif |
452 | pte_val(entry) |= _PAGE_RO; | 426 | return 0; |
453 | else | ||
454 | pte_val(entry) = _PAGE_TYPE_EMPTY; | ||
455 | ptep[PTRS_PER_PTE] = entry; | ||
456 | } | ||
457 | } | 427 | } |
458 | |||
459 | /* | 428 | /* |
460 | * pgd/pmd/pte query functions | 429 | * pgd/pmd/pte query functions |
461 | */ | 430 | */ |
@@ -568,52 +537,127 @@ static inline int pte_special(pte_t pte) | |||
568 | } | 537 | } |
569 | 538 | ||
570 | #define __HAVE_ARCH_PTE_SAME | 539 | #define __HAVE_ARCH_PTE_SAME |
571 | #define pte_same(a,b) (pte_val(a) == pte_val(b)) | 540 | static inline int pte_same(pte_t a, pte_t b) |
541 | { | ||
542 | return pte_val(a) == pte_val(b); | ||
543 | } | ||
572 | 544 | ||
573 | static inline void rcp_lock(pte_t *ptep) | 545 | static inline pgste_t pgste_get_lock(pte_t *ptep) |
574 | { | 546 | { |
547 | unsigned long new = 0; | ||
575 | #ifdef CONFIG_PGSTE | 548 | #ifdef CONFIG_PGSTE |
576 | unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | 549 | unsigned long old; |
550 | |||
577 | preempt_disable(); | 551 | preempt_disable(); |
578 | while (test_and_set_bit(RCP_PCL_BIT, pgste)) | 552 | asm( |
579 | ; | 553 | " lg %0,%2\n" |
554 | "0: lgr %1,%0\n" | ||
555 | " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ | ||
556 | " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ | ||
557 | " csg %0,%1,%2\n" | ||
558 | " jl 0b\n" | ||
559 | : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) | ||
560 | : "Q" (ptep[PTRS_PER_PTE]) : "cc"); | ||
580 | #endif | 561 | #endif |
562 | return __pgste(new); | ||
581 | } | 563 | } |
582 | 564 | ||
583 | static inline void rcp_unlock(pte_t *ptep) | 565 | static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) |
584 | { | 566 | { |
585 | #ifdef CONFIG_PGSTE | 567 | #ifdef CONFIG_PGSTE |
586 | unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | 568 | asm( |
587 | clear_bit(RCP_PCL_BIT, pgste); | 569 | " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ |
570 | " stg %1,%0\n" | ||
571 | : "=Q" (ptep[PTRS_PER_PTE]) | ||
572 | : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); | ||
588 | preempt_enable(); | 573 | preempt_enable(); |
589 | #endif | 574 | #endif |
590 | } | 575 | } |
591 | 576 | ||
592 | /* forward declaration for SetPageUptodate in page-flags.h*/ | 577 | static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) |
593 | static inline void page_clear_dirty(struct page *page, int mapped); | ||
594 | #include <linux/page-flags.h> | ||
595 | |||
596 | static inline void ptep_rcp_copy(pte_t *ptep) | ||
597 | { | 578 | { |
598 | #ifdef CONFIG_PGSTE | 579 | #ifdef CONFIG_PGSTE |
599 | struct page *page = virt_to_page(pte_val(*ptep)); | 580 | unsigned long pfn, bits; |
600 | unsigned int skey; | 581 | unsigned char skey; |
601 | unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | 582 | |
602 | 583 | pfn = pte_val(*ptep) >> PAGE_SHIFT; | |
603 | skey = page_get_storage_key(page_to_phys(page)); | 584 | skey = page_get_storage_key(pfn); |
604 | if (skey & _PAGE_CHANGED) { | 585 | bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); |
605 | set_bit_simple(RCP_GC_BIT, pgste); | 586 | /* Clear page changed & referenced bit in the storage key */ |
606 | set_bit_simple(KVM_UD_BIT, pgste); | 587 | if (bits) { |
588 | skey ^= bits; | ||
589 | page_set_storage_key(pfn, skey, 1); | ||
607 | } | 590 | } |
608 | if (skey & _PAGE_REFERENCED) | 591 | /* Transfer page changed & referenced bit to guest bits in pgste */ |
609 | set_bit_simple(RCP_GR_BIT, pgste); | 592 | pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ |
610 | if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { | 593 | /* Get host changed & referenced bits from pgste */ |
611 | SetPageDirty(page); | 594 | bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; |
612 | set_bit_simple(KVM_UD_BIT, pgste); | 595 | /* Clear host bits in pgste. */ |
613 | } | 596 | pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); |
614 | if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) | 597 | pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); |
615 | SetPageReferenced(page); | 598 | /* Copy page access key and fetch protection bit to pgste */ |
599 | pgste_val(pgste) |= | ||
600 | (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; | ||
601 | /* Transfer changed and referenced to kvm user bits */ | ||
602 | pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ | ||
603 | /* Transfer changed & referenced to pte sofware bits */ | ||
604 | pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ | ||
616 | #endif | 605 | #endif |
606 | return pgste; | ||
607 | |||
608 | } | ||
609 | |||
610 | static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) | ||
611 | { | ||
612 | #ifdef CONFIG_PGSTE | ||
613 | int young; | ||
614 | |||
615 | young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); | ||
616 | /* Transfer page referenced bit to pte software bit (host view) */ | ||
617 | if (young || (pgste_val(pgste) & RCP_HR_BIT)) | ||
618 | pte_val(*ptep) |= _PAGE_SWR; | ||
619 | /* Clear host referenced bit in pgste. */ | ||
620 | pgste_val(pgste) &= ~RCP_HR_BIT; | ||
621 | /* Transfer page referenced bit to guest bit in pgste */ | ||
622 | pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ | ||
623 | #endif | ||
624 | return pgste; | ||
625 | |||
626 | } | ||
627 | |||
628 | static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) | ||
629 | { | ||
630 | #ifdef CONFIG_PGSTE | ||
631 | unsigned long pfn; | ||
632 | unsigned long okey, nkey; | ||
633 | |||
634 | pfn = pte_val(*ptep) >> PAGE_SHIFT; | ||
635 | okey = nkey = page_get_storage_key(pfn); | ||
636 | nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); | ||
637 | /* Set page access key and fetch protection bit from pgste */ | ||
638 | nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; | ||
639 | if (okey != nkey) | ||
640 | page_set_storage_key(pfn, nkey, 1); | ||
641 | #endif | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * Certain architectures need to do special things when PTEs | ||
646 | * within a page table are directly modified. Thus, the following | ||
647 | * hook is made available. | ||
648 | */ | ||
649 | static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
650 | pte_t *ptep, pte_t entry) | ||
651 | { | ||
652 | pgste_t pgste; | ||
653 | |||
654 | if (mm_has_pgste(mm)) { | ||
655 | pgste = pgste_get_lock(ptep); | ||
656 | pgste_set_pte(ptep, pgste); | ||
657 | *ptep = entry; | ||
658 | pgste_set_unlock(ptep, pgste); | ||
659 | } else | ||
660 | *ptep = entry; | ||
617 | } | 661 | } |
618 | 662 | ||
619 | /* | 663 | /* |
@@ -627,19 +671,19 @@ static inline int pte_write(pte_t pte) | |||
627 | 671 | ||
628 | static inline int pte_dirty(pte_t pte) | 672 | static inline int pte_dirty(pte_t pte) |
629 | { | 673 | { |
630 | /* A pte is neither clean nor dirty on s/390. The dirty bit | 674 | #ifdef CONFIG_PGSTE |
631 | * is in the storage key. See page_test_and_clear_dirty for | 675 | if (pte_val(pte) & _PAGE_SWC) |
632 | * details. | 676 | return 1; |
633 | */ | 677 | #endif |
634 | return 0; | 678 | return 0; |
635 | } | 679 | } |
636 | 680 | ||
637 | static inline int pte_young(pte_t pte) | 681 | static inline int pte_young(pte_t pte) |
638 | { | 682 | { |
639 | /* A pte is neither young nor old on s/390. The young bit | 683 | #ifdef CONFIG_PGSTE |
640 | * is in the storage key. See page_test_and_clear_young for | 684 | if (pte_val(pte) & _PAGE_SWR) |
641 | * details. | 685 | return 1; |
642 | */ | 686 | #endif |
643 | return 0; | 687 | return 0; |
644 | } | 688 | } |
645 | 689 | ||
@@ -647,64 +691,30 @@ static inline int pte_young(pte_t pte) | |||
647 | * pgd/pmd/pte modification functions | 691 | * pgd/pmd/pte modification functions |
648 | */ | 692 | */ |
649 | 693 | ||
650 | #ifndef __s390x__ | 694 | static inline void pgd_clear(pgd_t *pgd) |
651 | |||
652 | #define pgd_clear(pgd) do { } while (0) | ||
653 | #define pud_clear(pud) do { } while (0) | ||
654 | |||
655 | #else /* __s390x__ */ | ||
656 | |||
657 | static inline void pgd_clear_kernel(pgd_t * pgd) | ||
658 | { | 695 | { |
696 | #ifdef __s390x__ | ||
659 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) | 697 | if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) |
660 | pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; | 698 | pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; |
699 | #endif | ||
661 | } | 700 | } |
662 | 701 | ||
663 | static inline void pgd_clear(pgd_t * pgd) | 702 | static inline void pud_clear(pud_t *pud) |
664 | { | ||
665 | pgd_t *shadow = get_shadow_table(pgd); | ||
666 | |||
667 | pgd_clear_kernel(pgd); | ||
668 | if (shadow) | ||
669 | pgd_clear_kernel(shadow); | ||
670 | } | ||
671 | |||
672 | static inline void pud_clear_kernel(pud_t *pud) | ||
673 | { | 703 | { |
704 | #ifdef __s390x__ | ||
674 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) | 705 | if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) |
675 | pud_val(*pud) = _REGION3_ENTRY_EMPTY; | 706 | pud_val(*pud) = _REGION3_ENTRY_EMPTY; |
707 | #endif | ||
676 | } | 708 | } |
677 | 709 | ||
678 | static inline void pud_clear(pud_t *pud) | 710 | static inline void pmd_clear(pmd_t *pmdp) |
679 | { | ||
680 | pud_t *shadow = get_shadow_table(pud); | ||
681 | |||
682 | pud_clear_kernel(pud); | ||
683 | if (shadow) | ||
684 | pud_clear_kernel(shadow); | ||
685 | } | ||
686 | |||
687 | #endif /* __s390x__ */ | ||
688 | |||
689 | static inline void pmd_clear_kernel(pmd_t * pmdp) | ||
690 | { | 711 | { |
691 | pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; | 712 | pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; |
692 | } | 713 | } |
693 | 714 | ||
694 | static inline void pmd_clear(pmd_t *pmd) | ||
695 | { | ||
696 | pmd_t *shadow = get_shadow_table(pmd); | ||
697 | |||
698 | pmd_clear_kernel(pmd); | ||
699 | if (shadow) | ||
700 | pmd_clear_kernel(shadow); | ||
701 | } | ||
702 | |||
703 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 715 | static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
704 | { | 716 | { |
705 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | 717 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; |
706 | if (mm->context.noexec) | ||
707 | pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; | ||
708 | } | 718 | } |
709 | 719 | ||
710 | /* | 720 | /* |
@@ -734,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte) | |||
734 | 744 | ||
735 | static inline pte_t pte_mkclean(pte_t pte) | 745 | static inline pte_t pte_mkclean(pte_t pte) |
736 | { | 746 | { |
737 | /* The only user of pte_mkclean is the fork() code. | 747 | #ifdef CONFIG_PGSTE |
738 | We must *not* clear the *physical* page dirty bit | 748 | pte_val(pte) &= ~_PAGE_SWC; |
739 | just because fork() wants to clear the dirty bit in | 749 | #endif |
740 | *one* of the page's mappings. So we just do nothing. */ | ||
741 | return pte; | 750 | return pte; |
742 | } | 751 | } |
743 | 752 | ||
744 | static inline pte_t pte_mkdirty(pte_t pte) | 753 | static inline pte_t pte_mkdirty(pte_t pte) |
745 | { | 754 | { |
746 | /* We do not explicitly set the dirty bit because the | ||
747 | * sske instruction is slow. It is faster to let the | ||
748 | * next instruction set the dirty bit. | ||
749 | */ | ||
750 | return pte; | 755 | return pte; |
751 | } | 756 | } |
752 | 757 | ||
753 | static inline pte_t pte_mkold(pte_t pte) | 758 | static inline pte_t pte_mkold(pte_t pte) |
754 | { | 759 | { |
755 | /* S/390 doesn't keep its dirty/referenced bit in the pte. | 760 | #ifdef CONFIG_PGSTE |
756 | * There is no point in clearing the real referenced bit. | 761 | pte_val(pte) &= ~_PAGE_SWR; |
757 | */ | 762 | #endif |
758 | return pte; | 763 | return pte; |
759 | } | 764 | } |
760 | 765 | ||
761 | static inline pte_t pte_mkyoung(pte_t pte) | 766 | static inline pte_t pte_mkyoung(pte_t pte) |
762 | { | 767 | { |
763 | /* S/390 doesn't keep its dirty/referenced bit in the pte. | ||
764 | * There is no point in setting the real referenced bit. | ||
765 | */ | ||
766 | return pte; | 768 | return pte; |
767 | } | 769 | } |
768 | 770 | ||
@@ -800,62 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte) | |||
800 | } | 802 | } |
801 | #endif | 803 | #endif |
802 | 804 | ||
803 | #ifdef CONFIG_PGSTE | ||
804 | /* | 805 | /* |
805 | * Get (and clear) the user dirty bit for a PTE. | 806 | * Get (and clear) the user dirty bit for a pte. |
806 | */ | 807 | */ |
807 | static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, | 808 | static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, |
808 | pte_t *ptep) | 809 | pte_t *ptep) |
809 | { | 810 | { |
810 | int dirty; | 811 | pgste_t pgste; |
811 | unsigned long *pgste; | 812 | int dirty = 0; |
812 | struct page *page; | 813 | |
813 | unsigned int skey; | 814 | if (mm_has_pgste(mm)) { |
814 | 815 | pgste = pgste_get_lock(ptep); | |
815 | if (!mm->context.has_pgste) | 816 | pgste = pgste_update_all(ptep, pgste); |
816 | return -EINVAL; | 817 | dirty = !!(pgste_val(pgste) & KVM_UC_BIT); |
817 | rcp_lock(ptep); | 818 | pgste_val(pgste) &= ~KVM_UC_BIT; |
818 | pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | 819 | pgste_set_unlock(ptep, pgste); |
819 | page = virt_to_page(pte_val(*ptep)); | 820 | return dirty; |
820 | skey = page_get_storage_key(page_to_phys(page)); | ||
821 | if (skey & _PAGE_CHANGED) { | ||
822 | set_bit_simple(RCP_GC_BIT, pgste); | ||
823 | set_bit_simple(KVM_UD_BIT, pgste); | ||
824 | } | 821 | } |
825 | if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { | ||
826 | SetPageDirty(page); | ||
827 | set_bit_simple(KVM_UD_BIT, pgste); | ||
828 | } | ||
829 | dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); | ||
830 | if (skey & _PAGE_CHANGED) | ||
831 | page_clear_dirty(page, 1); | ||
832 | rcp_unlock(ptep); | ||
833 | return dirty; | 822 | return dirty; |
834 | } | 823 | } |
835 | #endif | 824 | |
825 | /* | ||
826 | * Get (and clear) the user referenced bit for a pte. | ||
827 | */ | ||
828 | static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, | ||
829 | pte_t *ptep) | ||
830 | { | ||
831 | pgste_t pgste; | ||
832 | int young = 0; | ||
833 | |||
834 | if (mm_has_pgste(mm)) { | ||
835 | pgste = pgste_get_lock(ptep); | ||
836 | pgste = pgste_update_young(ptep, pgste); | ||
837 | young = !!(pgste_val(pgste) & KVM_UR_BIT); | ||
838 | pgste_val(pgste) &= ~KVM_UR_BIT; | ||
839 | pgste_set_unlock(ptep, pgste); | ||
840 | } | ||
841 | return young; | ||
842 | } | ||
836 | 843 | ||
837 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | 844 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
838 | static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, | 845 | static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, |
839 | unsigned long addr, pte_t *ptep) | 846 | unsigned long addr, pte_t *ptep) |
840 | { | 847 | { |
841 | #ifdef CONFIG_PGSTE | 848 | pgste_t pgste; |
842 | unsigned long physpage; | 849 | pte_t pte; |
843 | int young; | ||
844 | unsigned long *pgste; | ||
845 | 850 | ||
846 | if (!vma->vm_mm->context.has_pgste) | 851 | if (mm_has_pgste(vma->vm_mm)) { |
847 | return 0; | 852 | pgste = pgste_get_lock(ptep); |
848 | physpage = pte_val(*ptep) & PAGE_MASK; | 853 | pgste = pgste_update_young(ptep, pgste); |
849 | pgste = (unsigned long *) (ptep + PTRS_PER_PTE); | 854 | pte = *ptep; |
850 | 855 | *ptep = pte_mkold(pte); | |
851 | young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); | 856 | pgste_set_unlock(ptep, pgste); |
852 | rcp_lock(ptep); | 857 | return pte_young(pte); |
853 | if (young) | 858 | } |
854 | set_bit_simple(RCP_GR_BIT, pgste); | ||
855 | young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste); | ||
856 | rcp_unlock(ptep); | ||
857 | return young; | ||
858 | #endif | ||
859 | return 0; | 859 | return 0; |
860 | } | 860 | } |
861 | 861 | ||
@@ -867,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, | |||
867 | * On s390 reference bits are in storage key and never in TLB | 867 | * On s390 reference bits are in storage key and never in TLB |
868 | * With virtualization we handle the reference bit, without we | 868 | * With virtualization we handle the reference bit, without we |
869 | * we can simply return */ | 869 | * we can simply return */ |
870 | #ifdef CONFIG_PGSTE | ||
871 | return ptep_test_and_clear_young(vma, address, ptep); | 870 | return ptep_test_and_clear_young(vma, address, ptep); |
872 | #endif | ||
873 | return 0; | ||
874 | } | 871 | } |
875 | 872 | ||
876 | static inline void __ptep_ipte(unsigned long address, pte_t *ptep) | 873 | static inline void __ptep_ipte(unsigned long address, pte_t *ptep) |
@@ -890,25 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) | |||
890 | } | 887 | } |
891 | } | 888 | } |
892 | 889 | ||
893 | static inline void ptep_invalidate(struct mm_struct *mm, | ||
894 | unsigned long address, pte_t *ptep) | ||
895 | { | ||
896 | if (mm->context.has_pgste) { | ||
897 | rcp_lock(ptep); | ||
898 | __ptep_ipte(address, ptep); | ||
899 | ptep_rcp_copy(ptep); | ||
900 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
901 | rcp_unlock(ptep); | ||
902 | return; | ||
903 | } | ||
904 | __ptep_ipte(address, ptep); | ||
905 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
906 | if (mm->context.noexec) { | ||
907 | __ptep_ipte(address, ptep + PTRS_PER_PTE); | ||
908 | pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY; | ||
909 | } | ||
910 | } | ||
911 | |||
912 | /* | 890 | /* |
913 | * This is hard to understand. ptep_get_and_clear and ptep_clear_flush | 891 | * This is hard to understand. ptep_get_and_clear and ptep_clear_flush |
914 | * both clear the TLB for the unmapped pte. The reason is that | 892 | * both clear the TLB for the unmapped pte. The reason is that |
@@ -923,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm, | |||
923 | * is a nop. | 901 | * is a nop. |
924 | */ | 902 | */ |
925 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR | 903 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
926 | #define ptep_get_and_clear(__mm, __address, __ptep) \ | 904 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, |
927 | ({ \ | 905 | unsigned long address, pte_t *ptep) |
928 | pte_t __pte = *(__ptep); \ | 906 | { |
929 | (__mm)->context.flush_mm = 1; \ | 907 | pgste_t pgste; |
930 | if (atomic_read(&(__mm)->context.attach_count) > 1 || \ | 908 | pte_t pte; |
931 | (__mm) != current->active_mm) \ | 909 | |
932 | ptep_invalidate(__mm, __address, __ptep); \ | 910 | mm->context.flush_mm = 1; |
933 | else \ | 911 | if (mm_has_pgste(mm)) |
934 | pte_clear((__mm), (__address), (__ptep)); \ | 912 | pgste = pgste_get_lock(ptep); |
935 | __pte; \ | 913 | |
936 | }) | 914 | pte = *ptep; |
915 | if (!mm_exclusive(mm)) | ||
916 | __ptep_ipte(address, ptep); | ||
917 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
918 | |||
919 | if (mm_has_pgste(mm)) { | ||
920 | pgste = pgste_update_all(&pte, pgste); | ||
921 | pgste_set_unlock(ptep, pgste); | ||
922 | } | ||
923 | return pte; | ||
924 | } | ||
925 | |||
926 | #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION | ||
927 | static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, | ||
928 | unsigned long address, | ||
929 | pte_t *ptep) | ||
930 | { | ||
931 | pte_t pte; | ||
932 | |||
933 | mm->context.flush_mm = 1; | ||
934 | if (mm_has_pgste(mm)) | ||
935 | pgste_get_lock(ptep); | ||
936 | |||
937 | pte = *ptep; | ||
938 | if (!mm_exclusive(mm)) | ||
939 | __ptep_ipte(address, ptep); | ||
940 | return pte; | ||
941 | } | ||
942 | |||
943 | static inline void ptep_modify_prot_commit(struct mm_struct *mm, | ||
944 | unsigned long address, | ||
945 | pte_t *ptep, pte_t pte) | ||
946 | { | ||
947 | *ptep = pte; | ||
948 | if (mm_has_pgste(mm)) | ||
949 | pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); | ||
950 | } | ||
937 | 951 | ||
938 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH | 952 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
939 | static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, | 953 | static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, |
940 | unsigned long address, pte_t *ptep) | 954 | unsigned long address, pte_t *ptep) |
941 | { | 955 | { |
942 | pte_t pte = *ptep; | 956 | pgste_t pgste; |
943 | ptep_invalidate(vma->vm_mm, address, ptep); | 957 | pte_t pte; |
958 | |||
959 | if (mm_has_pgste(vma->vm_mm)) | ||
960 | pgste = pgste_get_lock(ptep); | ||
961 | |||
962 | pte = *ptep; | ||
963 | __ptep_ipte(address, ptep); | ||
964 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
965 | |||
966 | if (mm_has_pgste(vma->vm_mm)) { | ||
967 | pgste = pgste_update_all(&pte, pgste); | ||
968 | pgste_set_unlock(ptep, pgste); | ||
969 | } | ||
944 | return pte; | 970 | return pte; |
945 | } | 971 | } |
946 | 972 | ||
@@ -953,76 +979,67 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, | |||
953 | */ | 979 | */ |
954 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL | 980 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
955 | static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, | 981 | static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, |
956 | unsigned long addr, | 982 | unsigned long address, |
957 | pte_t *ptep, int full) | 983 | pte_t *ptep, int full) |
958 | { | 984 | { |
959 | pte_t pte = *ptep; | 985 | pgste_t pgste; |
986 | pte_t pte; | ||
987 | |||
988 | if (mm_has_pgste(mm)) | ||
989 | pgste = pgste_get_lock(ptep); | ||
990 | |||
991 | pte = *ptep; | ||
992 | if (!full) | ||
993 | __ptep_ipte(address, ptep); | ||
994 | pte_val(*ptep) = _PAGE_TYPE_EMPTY; | ||
960 | 995 | ||
961 | if (full) | 996 | if (mm_has_pgste(mm)) { |
962 | pte_clear(mm, addr, ptep); | 997 | pgste = pgste_update_all(&pte, pgste); |
963 | else | 998 | pgste_set_unlock(ptep, pgste); |
964 | ptep_invalidate(mm, addr, ptep); | 999 | } |
965 | return pte; | 1000 | return pte; |
966 | } | 1001 | } |
967 | 1002 | ||
968 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT | 1003 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT |
969 | #define ptep_set_wrprotect(__mm, __addr, __ptep) \ | 1004 | static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, |
970 | ({ \ | 1005 | unsigned long address, pte_t *ptep) |
971 | pte_t __pte = *(__ptep); \ | 1006 | { |
972 | if (pte_write(__pte)) { \ | 1007 | pgste_t pgste; |
973 | (__mm)->context.flush_mm = 1; \ | 1008 | pte_t pte = *ptep; |
974 | if (atomic_read(&(__mm)->context.attach_count) > 1 || \ | ||
975 | (__mm) != current->active_mm) \ | ||
976 | ptep_invalidate(__mm, __addr, __ptep); \ | ||
977 | set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ | ||
978 | } \ | ||
979 | }) | ||
980 | 1009 | ||
981 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | 1010 | if (pte_write(pte)) { |
982 | #define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ | 1011 | mm->context.flush_mm = 1; |
983 | ({ \ | 1012 | if (mm_has_pgste(mm)) |
984 | int __changed = !pte_same(*(__ptep), __entry); \ | 1013 | pgste = pgste_get_lock(ptep); |
985 | if (__changed) { \ | ||
986 | ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ | ||
987 | set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ | ||
988 | } \ | ||
989 | __changed; \ | ||
990 | }) | ||
991 | 1014 | ||
992 | /* | 1015 | if (!mm_exclusive(mm)) |
993 | * Test and clear dirty bit in storage key. | 1016 | __ptep_ipte(address, ptep); |
994 | * We can't clear the changed bit atomically. This is a potential | 1017 | *ptep = pte_wrprotect(pte); |
995 | * race against modification of the referenced bit. This function | ||
996 | * should therefore only be called if it is not mapped in any | ||
997 | * address space. | ||
998 | */ | ||
999 | #define __HAVE_ARCH_PAGE_TEST_DIRTY | ||
1000 | static inline int page_test_dirty(struct page *page) | ||
1001 | { | ||
1002 | return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; | ||
1003 | } | ||
1004 | 1018 | ||
1005 | #define __HAVE_ARCH_PAGE_CLEAR_DIRTY | 1019 | if (mm_has_pgste(mm)) |
1006 | static inline void page_clear_dirty(struct page *page, int mapped) | 1020 | pgste_set_unlock(ptep, pgste); |
1007 | { | 1021 | } |
1008 | page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); | 1022 | return pte; |
1009 | } | 1023 | } |
1010 | 1024 | ||
1011 | /* | 1025 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS |
1012 | * Test and clear referenced bit in storage key. | 1026 | static inline int ptep_set_access_flags(struct vm_area_struct *vma, |
1013 | */ | 1027 | unsigned long address, pte_t *ptep, |
1014 | #define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG | 1028 | pte_t entry, int dirty) |
1015 | static inline int page_test_and_clear_young(struct page *page) | ||
1016 | { | 1029 | { |
1017 | unsigned long physpage = page_to_phys(page); | 1030 | pgste_t pgste; |
1018 | int ccode; | 1031 | |
1019 | 1032 | if (pte_same(*ptep, entry)) | |
1020 | asm volatile( | 1033 | return 0; |
1021 | " rrbe 0,%1\n" | 1034 | if (mm_has_pgste(vma->vm_mm)) |
1022 | " ipm %0\n" | 1035 | pgste = pgste_get_lock(ptep); |
1023 | " srl %0,28\n" | 1036 | |
1024 | : "=d" (ccode) : "a" (physpage) : "cc" ); | 1037 | __ptep_ipte(address, ptep); |
1025 | return ccode & 2; | 1038 | *ptep = entry; |
1039 | |||
1040 | if (mm_has_pgste(vma->vm_mm)) | ||
1041 | pgste_set_unlock(ptep, pgste); | ||
1042 | return 1; | ||
1026 | } | 1043 | } |
1027 | 1044 | ||
1028 | /* | 1045 | /* |