diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2014-04-03 07:55:01 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2014-04-03 08:31:00 -0400 |
commit | 1b948d6caec4f28e3524244ca0f77c6ae8ddceef (patch) | |
tree | bc7e1d5800f10c39979d3f47872ba7047568f8a4 /arch/s390 | |
parent | 02a8f3abb708919149cb657a5202f4603f0c38e2 (diff) |
s390/mm,tlb: optimize TLB flushing for zEC12
The zEC12 machines introduced the local-clearing control for the IDTE
and IPTE instruction. If the control is set only the TLB of the local
CPU is cleared of entries, either all entries of a single address space
for IDTE, or the entry for a single page-table entry for IPTE.
Without the local-clearing control the TLB flush is broadcasted to all
CPUs in the configuration, which is expensive.
The reset of the bit mask of the CPUs that need flushing after a
non-local IDTE is tricky. As TLB entries for an address space remain
in the TLB even if the address space is detached a new bit field is
required to keep track of attached CPUs vs. CPUs in the need of a
flush. After a non-local flush with IDTE the bit-field of attached CPUs
is copied to the bit-field of CPUs in need of a flush. The ordering
of operations on cpu_attach_mask, attach_count and mm_cpumask(mm) is
such that an underindication in mm_cpumask(mm) is prevented but an
overindication in mm_cpumask(mm) is possible.
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/include/asm/mmu.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/mmu_context.h | 5 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 128 | ||||
-rw-r--r-- | arch/s390/include/asm/setup.h | 3 | ||||
-rw-r--r-- | arch/s390/include/asm/tlbflush.h | 115 | ||||
-rw-r--r-- | arch/s390/kernel/early.c | 2 | ||||
-rw-r--r-- | arch/s390/kernel/smp.c | 6 | ||||
-rw-r--r-- | arch/s390/mm/hugetlbpage.c | 5 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 7 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 4 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 2 |
11 files changed, 213 insertions, 66 deletions
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index ff132ac64ddd..f77695a82f64 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h | |||
@@ -1,9 +1,11 @@ | |||
1 | #ifndef __MMU_H | 1 | #ifndef __MMU_H |
2 | #define __MMU_H | 2 | #define __MMU_H |
3 | 3 | ||
4 | #include <linux/cpumask.h> | ||
4 | #include <linux/errno.h> | 5 | #include <linux/errno.h> |
5 | 6 | ||
6 | typedef struct { | 7 | typedef struct { |
8 | cpumask_t cpu_attach_mask; | ||
7 | atomic_t attach_count; | 9 | atomic_t attach_count; |
8 | unsigned int flush_mm; | 10 | unsigned int flush_mm; |
9 | spinlock_t list_lock; | 11 | spinlock_t list_lock; |
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 7abf318b1522..71a258839039 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h | |||
@@ -15,6 +15,7 @@ | |||
15 | static inline int init_new_context(struct task_struct *tsk, | 15 | static inline int init_new_context(struct task_struct *tsk, |
16 | struct mm_struct *mm) | 16 | struct mm_struct *mm) |
17 | { | 17 | { |
18 | cpumask_clear(&mm->context.cpu_attach_mask); | ||
18 | atomic_set(&mm->context.attach_count, 0); | 19 | atomic_set(&mm->context.attach_count, 0); |
19 | mm->context.flush_mm = 0; | 20 | mm->context.flush_mm = 0; |
20 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; | 21 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; |
@@ -59,6 +60,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
59 | 60 | ||
60 | if (prev == next) | 61 | if (prev == next) |
61 | return; | 62 | return; |
63 | if (MACHINE_HAS_TLB_LC) | ||
64 | cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); | ||
62 | if (atomic_inc_return(&next->context.attach_count) >> 16) { | 65 | if (atomic_inc_return(&next->context.attach_count) >> 16) { |
63 | /* Delay update_user_asce until all TLB flushes are done. */ | 66 | /* Delay update_user_asce until all TLB flushes are done. */ |
64 | set_tsk_thread_flag(tsk, TIF_TLB_WAIT); | 67 | set_tsk_thread_flag(tsk, TIF_TLB_WAIT); |
@@ -73,6 +76,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
73 | } | 76 | } |
74 | atomic_dec(&prev->context.attach_count); | 77 | atomic_dec(&prev->context.attach_count); |
75 | WARN_ON(atomic_read(&prev->context.attach_count) < 0); | 78 | WARN_ON(atomic_read(&prev->context.attach_count) < 0); |
79 | if (MACHINE_HAS_TLB_LC) | ||
80 | cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); | ||
76 | } | 81 | } |
77 | 82 | ||
78 | #define finish_arch_post_lock_switch finish_arch_post_lock_switch | 83 | #define finish_arch_post_lock_switch finish_arch_post_lock_switch |
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1ab75eaacbd4..66d51834f2cb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h | |||
@@ -1068,12 +1068,35 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) | |||
1068 | : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); | 1068 | : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); |
1069 | } | 1069 | } |
1070 | 1070 | ||
1071 | static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) | ||
1072 | { | ||
1073 | unsigned long pto = (unsigned long) ptep; | ||
1074 | |||
1075 | #ifndef CONFIG_64BIT | ||
1076 | /* pto in ESA mode must point to the start of the segment table */ | ||
1077 | pto &= 0x7ffffc00; | ||
1078 | #endif | ||
1079 | /* Invalidation + local TLB flush for the pte */ | ||
1080 | asm volatile( | ||
1081 | " .insn rrf,0xb2210000,%2,%3,0,1" | ||
1082 | : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); | ||
1083 | } | ||
1084 | |||
1071 | static inline void ptep_flush_direct(struct mm_struct *mm, | 1085 | static inline void ptep_flush_direct(struct mm_struct *mm, |
1072 | unsigned long address, pte_t *ptep) | 1086 | unsigned long address, pte_t *ptep) |
1073 | { | 1087 | { |
1088 | int active, count; | ||
1089 | |||
1074 | if (pte_val(*ptep) & _PAGE_INVALID) | 1090 | if (pte_val(*ptep) & _PAGE_INVALID) |
1075 | return; | 1091 | return; |
1076 | __ptep_ipte(address, ptep); | 1092 | active = (mm == current->active_mm) ? 1 : 0; |
1093 | count = atomic_add_return(0x10000, &mm->context.attach_count); | ||
1094 | if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && | ||
1095 | cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) | ||
1096 | __ptep_ipte_local(address, ptep); | ||
1097 | else | ||
1098 | __ptep_ipte(address, ptep); | ||
1099 | atomic_sub(0x10000, &mm->context.attach_count); | ||
1077 | } | 1100 | } |
1078 | 1101 | ||
1079 | static inline void ptep_flush_lazy(struct mm_struct *mm, | 1102 | static inline void ptep_flush_lazy(struct mm_struct *mm, |
@@ -1382,35 +1405,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) | |||
1382 | #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) | 1405 | #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) |
1383 | #define pte_unmap(pte) do { } while (0) | 1406 | #define pte_unmap(pte) do { } while (0) |
1384 | 1407 | ||
1385 | static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) | ||
1386 | { | ||
1387 | unsigned long sto = (unsigned long) pmdp - | ||
1388 | pmd_index(address) * sizeof(pmd_t); | ||
1389 | |||
1390 | if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) { | ||
1391 | asm volatile( | ||
1392 | " .insn rrf,0xb98e0000,%2,%3,0,0" | ||
1393 | : "=m" (*pmdp) | ||
1394 | : "m" (*pmdp), "a" (sto), | ||
1395 | "a" ((address & HPAGE_MASK)) | ||
1396 | : "cc" | ||
1397 | ); | ||
1398 | } | ||
1399 | } | ||
1400 | |||
1401 | static inline void __pmd_csp(pmd_t *pmdp) | ||
1402 | { | ||
1403 | register unsigned long reg2 asm("2") = pmd_val(*pmdp); | ||
1404 | register unsigned long reg3 asm("3") = pmd_val(*pmdp) | | ||
1405 | _SEGMENT_ENTRY_INVALID; | ||
1406 | register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; | ||
1407 | |||
1408 | asm volatile( | ||
1409 | " csp %1,%3" | ||
1410 | : "=m" (*pmdp) | ||
1411 | : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); | ||
1412 | } | ||
1413 | |||
1414 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) | 1408 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) |
1415 | static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) | 1409 | static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) |
1416 | { | 1410 | { |
@@ -1479,18 +1473,80 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) | |||
1479 | } | 1473 | } |
1480 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ | 1474 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ |
1481 | 1475 | ||
1476 | static inline void __pmdp_csp(pmd_t *pmdp) | ||
1477 | { | ||
1478 | register unsigned long reg2 asm("2") = pmd_val(*pmdp); | ||
1479 | register unsigned long reg3 asm("3") = pmd_val(*pmdp) | | ||
1480 | _SEGMENT_ENTRY_INVALID; | ||
1481 | register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; | ||
1482 | |||
1483 | asm volatile( | ||
1484 | " csp %1,%3" | ||
1485 | : "=m" (*pmdp) | ||
1486 | : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); | ||
1487 | } | ||
1488 | |||
1489 | static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp) | ||
1490 | { | ||
1491 | unsigned long sto; | ||
1492 | |||
1493 | sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); | ||
1494 | asm volatile( | ||
1495 | " .insn rrf,0xb98e0000,%2,%3,0,0" | ||
1496 | : "=m" (*pmdp) | ||
1497 | : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) | ||
1498 | : "cc" ); | ||
1499 | } | ||
1500 | |||
1501 | static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) | ||
1502 | { | ||
1503 | unsigned long sto; | ||
1504 | |||
1505 | sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); | ||
1506 | asm volatile( | ||
1507 | " .insn rrf,0xb98e0000,%2,%3,0,1" | ||
1508 | : "=m" (*pmdp) | ||
1509 | : "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK)) | ||
1510 | : "cc" ); | ||
1511 | } | ||
1512 | |||
1513 | static inline void pmdp_flush_direct(struct mm_struct *mm, | ||
1514 | unsigned long address, pmd_t *pmdp) | ||
1515 | { | ||
1516 | int active, count; | ||
1517 | |||
1518 | if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) | ||
1519 | return; | ||
1520 | if (!MACHINE_HAS_IDTE) { | ||
1521 | __pmdp_csp(pmdp); | ||
1522 | return; | ||
1523 | } | ||
1524 | active = (mm == current->active_mm) ? 1 : 0; | ||
1525 | count = atomic_add_return(0x10000, &mm->context.attach_count); | ||
1526 | if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && | ||
1527 | cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) | ||
1528 | __pmdp_idte_local(address, pmdp); | ||
1529 | else | ||
1530 | __pmdp_idte(address, pmdp); | ||
1531 | atomic_sub(0x10000, &mm->context.attach_count); | ||
1532 | } | ||
1533 | |||
1482 | static inline void pmdp_flush_lazy(struct mm_struct *mm, | 1534 | static inline void pmdp_flush_lazy(struct mm_struct *mm, |
1483 | unsigned long address, pmd_t *pmdp) | 1535 | unsigned long address, pmd_t *pmdp) |
1484 | { | 1536 | { |
1485 | int active, count; | 1537 | int active, count; |
1486 | 1538 | ||
1539 | if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) | ||
1540 | return; | ||
1487 | active = (mm == current->active_mm) ? 1 : 0; | 1541 | active = (mm == current->active_mm) ? 1 : 0; |
1488 | count = atomic_add_return(0x10000, &mm->context.attach_count); | 1542 | count = atomic_add_return(0x10000, &mm->context.attach_count); |
1489 | if ((count & 0xffff) <= active) { | 1543 | if ((count & 0xffff) <= active) { |
1490 | pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; | 1544 | pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; |
1491 | mm->context.flush_mm = 1; | 1545 | mm->context.flush_mm = 1; |
1492 | } else | 1546 | } else if (MACHINE_HAS_IDTE) |
1493 | __pmd_idte(address, pmdp); | 1547 | __pmdp_idte(address, pmdp); |
1548 | else | ||
1549 | __pmdp_csp(pmdp); | ||
1494 | atomic_sub(0x10000, &mm->context.attach_count); | 1550 | atomic_sub(0x10000, &mm->context.attach_count); |
1495 | } | 1551 | } |
1496 | 1552 | ||
@@ -1543,7 +1599,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, | |||
1543 | pmd_t pmd; | 1599 | pmd_t pmd; |
1544 | 1600 | ||
1545 | pmd = *pmdp; | 1601 | pmd = *pmdp; |
1546 | __pmd_idte(address, pmdp); | 1602 | pmdp_flush_direct(vma->vm_mm, address, pmdp); |
1547 | *pmdp = pmd_mkold(pmd); | 1603 | *pmdp = pmd_mkold(pmd); |
1548 | return pmd_young(pmd); | 1604 | return pmd_young(pmd); |
1549 | } | 1605 | } |
@@ -1554,7 +1610,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, | |||
1554 | { | 1610 | { |
1555 | pmd_t pmd = *pmdp; | 1611 | pmd_t pmd = *pmdp; |
1556 | 1612 | ||
1557 | __pmd_idte(address, pmdp); | 1613 | pmdp_flush_direct(mm, address, pmdp); |
1558 | pmd_clear(pmdp); | 1614 | pmd_clear(pmdp); |
1559 | return pmd; | 1615 | return pmd; |
1560 | } | 1616 | } |
@@ -1570,7 +1626,7 @@ static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, | |||
1570 | static inline void pmdp_invalidate(struct vm_area_struct *vma, | 1626 | static inline void pmdp_invalidate(struct vm_area_struct *vma, |
1571 | unsigned long address, pmd_t *pmdp) | 1627 | unsigned long address, pmd_t *pmdp) |
1572 | { | 1628 | { |
1573 | __pmd_idte(address, pmdp); | 1629 | pmdp_flush_direct(vma->vm_mm, address, pmdp); |
1574 | } | 1630 | } |
1575 | 1631 | ||
1576 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT | 1632 | #define __HAVE_ARCH_PMDP_SET_WRPROTECT |
@@ -1580,7 +1636,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, | |||
1580 | pmd_t pmd = *pmdp; | 1636 | pmd_t pmd = *pmdp; |
1581 | 1637 | ||
1582 | if (pmd_write(pmd)) { | 1638 | if (pmd_write(pmd)) { |
1583 | __pmd_idte(address, pmdp); | 1639 | pmdp_flush_direct(mm, address, pmdp); |
1584 | set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); | 1640 | set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); |
1585 | } | 1641 | } |
1586 | } | 1642 | } |
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 406f3a1e63ef..b31b22dba948 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h | |||
@@ -68,6 +68,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, | |||
68 | #define MACHINE_FLAG_TOPOLOGY (1UL << 14) | 68 | #define MACHINE_FLAG_TOPOLOGY (1UL << 14) |
69 | #define MACHINE_FLAG_TE (1UL << 15) | 69 | #define MACHINE_FLAG_TE (1UL << 15) |
70 | #define MACHINE_FLAG_RRBM (1UL << 16) | 70 | #define MACHINE_FLAG_RRBM (1UL << 16) |
71 | #define MACHINE_FLAG_TLB_LC (1UL << 17) | ||
71 | 72 | ||
72 | #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) | 73 | #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) |
73 | #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) | 74 | #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) |
@@ -90,6 +91,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, | |||
90 | #define MACHINE_HAS_TOPOLOGY (0) | 91 | #define MACHINE_HAS_TOPOLOGY (0) |
91 | #define MACHINE_HAS_TE (0) | 92 | #define MACHINE_HAS_TE (0) |
92 | #define MACHINE_HAS_RRBM (0) | 93 | #define MACHINE_HAS_RRBM (0) |
94 | #define MACHINE_HAS_TLB_LC (0) | ||
93 | #else /* CONFIG_64BIT */ | 95 | #else /* CONFIG_64BIT */ |
94 | #define MACHINE_HAS_IEEE (1) | 96 | #define MACHINE_HAS_IEEE (1) |
95 | #define MACHINE_HAS_CSP (1) | 97 | #define MACHINE_HAS_CSP (1) |
@@ -102,6 +104,7 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, | |||
102 | #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) | 104 | #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) |
103 | #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) | 105 | #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) |
104 | #define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) | 106 | #define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) |
107 | #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) | ||
105 | #endif /* CONFIG_64BIT */ | 108 | #endif /* CONFIG_64BIT */ |
106 | 109 | ||
107 | /* | 110 | /* |
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index f9fef0425fee..16c9c88658c8 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h | |||
@@ -7,19 +7,41 @@ | |||
7 | #include <asm/pgalloc.h> | 7 | #include <asm/pgalloc.h> |
8 | 8 | ||
9 | /* | 9 | /* |
10 | * Flush all tlb entries on the local cpu. | 10 | * Flush all TLB entries on the local CPU. |
11 | */ | 11 | */ |
12 | static inline void __tlb_flush_local(void) | 12 | static inline void __tlb_flush_local(void) |
13 | { | 13 | { |
14 | asm volatile("ptlb" : : : "memory"); | 14 | asm volatile("ptlb" : : : "memory"); |
15 | } | 15 | } |
16 | 16 | ||
17 | #ifdef CONFIG_SMP | ||
18 | /* | 17 | /* |
19 | * Flush all tlb entries on all cpus. | 18 | * Flush TLB entries for a specific ASCE on all CPUs |
20 | */ | 19 | */ |
20 | static inline void __tlb_flush_idte(unsigned long asce) | ||
21 | { | ||
22 | /* Global TLB flush for the mm */ | ||
23 | asm volatile( | ||
24 | " .insn rrf,0xb98e0000,0,%0,%1,0" | ||
25 | : : "a" (2048), "a" (asce) : "cc"); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Flush TLB entries for a specific ASCE on the local CPU | ||
30 | */ | ||
31 | static inline void __tlb_flush_idte_local(unsigned long asce) | ||
32 | { | ||
33 | /* Local TLB flush for the mm */ | ||
34 | asm volatile( | ||
35 | " .insn rrf,0xb98e0000,0,%0,%1,1" | ||
36 | : : "a" (2048), "a" (asce) : "cc"); | ||
37 | } | ||
38 | |||
39 | #ifdef CONFIG_SMP | ||
21 | void smp_ptlb_all(void); | 40 | void smp_ptlb_all(void); |
22 | 41 | ||
42 | /* | ||
43 | * Flush all TLB entries on all CPUs. | ||
44 | */ | ||
23 | static inline void __tlb_flush_global(void) | 45 | static inline void __tlb_flush_global(void) |
24 | { | 46 | { |
25 | register unsigned long reg2 asm("2"); | 47 | register unsigned long reg2 asm("2"); |
@@ -42,36 +64,89 @@ static inline void __tlb_flush_global(void) | |||
42 | : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" ); | 64 | : : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" ); |
43 | } | 65 | } |
44 | 66 | ||
67 | /* | ||
68 | * Flush TLB entries for a specific mm on all CPUs (in case gmap is used | ||
69 | * this implicates multiple ASCEs!). | ||
70 | */ | ||
45 | static inline void __tlb_flush_full(struct mm_struct *mm) | 71 | static inline void __tlb_flush_full(struct mm_struct *mm) |
46 | { | 72 | { |
47 | cpumask_t local_cpumask; | ||
48 | |||
49 | preempt_disable(); | 73 | preempt_disable(); |
50 | /* | 74 | atomic_add(0x10000, &mm->context.attach_count); |
51 | * If the process only ran on the local cpu, do a local flush. | 75 | if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { |
52 | */ | 76 | /* Local TLB flush */ |
53 | cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id())); | ||
54 | if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) | ||
55 | __tlb_flush_local(); | 77 | __tlb_flush_local(); |
56 | else | 78 | } else { |
79 | /* Global TLB flush */ | ||
57 | __tlb_flush_global(); | 80 | __tlb_flush_global(); |
81 | /* Reset TLB flush mask */ | ||
82 | if (MACHINE_HAS_TLB_LC) | ||
83 | cpumask_copy(mm_cpumask(mm), | ||
84 | &mm->context.cpu_attach_mask); | ||
85 | } | ||
86 | atomic_sub(0x10000, &mm->context.attach_count); | ||
58 | preempt_enable(); | 87 | preempt_enable(); |
59 | } | 88 | } |
89 | |||
90 | /* | ||
91 | * Flush TLB entries for a specific ASCE on all CPUs. | ||
92 | */ | ||
93 | static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) | ||
94 | { | ||
95 | int active, count; | ||
96 | |||
97 | preempt_disable(); | ||
98 | active = (mm == current->active_mm) ? 1 : 0; | ||
99 | count = atomic_add_return(0x10000, &mm->context.attach_count); | ||
100 | if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && | ||
101 | cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { | ||
102 | __tlb_flush_idte_local(asce); | ||
103 | } else { | ||
104 | if (MACHINE_HAS_IDTE) | ||
105 | __tlb_flush_idte(asce); | ||
106 | else | ||
107 | __tlb_flush_global(); | ||
108 | /* Reset TLB flush mask */ | ||
109 | if (MACHINE_HAS_TLB_LC) | ||
110 | cpumask_copy(mm_cpumask(mm), | ||
111 | &mm->context.cpu_attach_mask); | ||
112 | } | ||
113 | atomic_sub(0x10000, &mm->context.attach_count); | ||
114 | preempt_enable(); | ||
115 | } | ||
116 | |||
117 | static inline void __tlb_flush_kernel(void) | ||
118 | { | ||
119 | if (MACHINE_HAS_IDTE) | ||
120 | __tlb_flush_idte((unsigned long) init_mm.pgd | | ||
121 | init_mm.context.asce_bits); | ||
122 | else | ||
123 | __tlb_flush_global(); | ||
124 | } | ||
60 | #else | 125 | #else |
61 | #define __tlb_flush_full(mm) __tlb_flush_local() | ||
62 | #define __tlb_flush_global() __tlb_flush_local() | 126 | #define __tlb_flush_global() __tlb_flush_local() |
63 | #endif | 127 | #define __tlb_flush_full(mm) __tlb_flush_local() |
64 | 128 | ||
65 | /* | 129 | /* |
66 | * Flush all tlb entries of a page table on all cpus. | 130 | * Flush TLB entries for a specific ASCE on all CPUs. |
67 | */ | 131 | */ |
68 | static inline void __tlb_flush_idte(unsigned long asce) | 132 | static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) |
69 | { | 133 | { |
70 | asm volatile( | 134 | if (MACHINE_HAS_TLB_LC) |
71 | " .insn rrf,0xb98e0000,0,%0,%1,0" | 135 | __tlb_flush_idte_local(asce); |
72 | : : "a" (2048), "a" (asce) : "cc" ); | 136 | else |
137 | __tlb_flush_local(); | ||
73 | } | 138 | } |
74 | 139 | ||
140 | static inline void __tlb_flush_kernel(void) | ||
141 | { | ||
142 | if (MACHINE_HAS_TLB_LC) | ||
143 | __tlb_flush_idte_local((unsigned long) init_mm.pgd | | ||
144 | init_mm.context.asce_bits); | ||
145 | else | ||
146 | __tlb_flush_local(); | ||
147 | } | ||
148 | #endif | ||
149 | |||
75 | static inline void __tlb_flush_mm(struct mm_struct * mm) | 150 | static inline void __tlb_flush_mm(struct mm_struct * mm) |
76 | { | 151 | { |
77 | /* | 152 | /* |
@@ -80,7 +155,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) | |||
80 | * only ran on the local cpu. | 155 | * only ran on the local cpu. |
81 | */ | 156 | */ |
82 | if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) | 157 | if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) |
83 | __tlb_flush_idte((unsigned long) mm->pgd | | 158 | __tlb_flush_asce(mm, (unsigned long) mm->pgd | |
84 | mm->context.asce_bits); | 159 | mm->context.asce_bits); |
85 | else | 160 | else |
86 | __tlb_flush_full(mm); | 161 | __tlb_flush_full(mm); |
@@ -130,7 +205,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, | |||
130 | static inline void flush_tlb_kernel_range(unsigned long start, | 205 | static inline void flush_tlb_kernel_range(unsigned long start, |
131 | unsigned long end) | 206 | unsigned long end) |
132 | { | 207 | { |
133 | __tlb_flush_mm(&init_mm); | 208 | __tlb_flush_kernel(); |
134 | } | 209 | } |
135 | 210 | ||
136 | #endif /* _S390_TLBFLUSH_H */ | 211 | #endif /* _S390_TLBFLUSH_H */ |
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 6b594439cca5..a734f3585ceb 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c | |||
@@ -386,6 +386,8 @@ static __init void detect_machine_facilities(void) | |||
386 | S390_lowcore.machine_flags |= MACHINE_FLAG_TE; | 386 | S390_lowcore.machine_flags |= MACHINE_FLAG_TE; |
387 | if (test_facility(66)) | 387 | if (test_facility(66)) |
388 | S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; | 388 | S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; |
389 | if (test_facility(51)) | ||
390 | S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; | ||
389 | #endif | 391 | #endif |
390 | } | 392 | } |
391 | 393 | ||
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 366d14460c2b..42a501d13a3b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c | |||
@@ -236,6 +236,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) | |||
236 | { | 236 | { |
237 | struct _lowcore *lc = pcpu->lowcore; | 237 | struct _lowcore *lc = pcpu->lowcore; |
238 | 238 | ||
239 | if (MACHINE_HAS_TLB_LC) | ||
240 | cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); | ||
241 | cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); | ||
239 | atomic_inc(&init_mm.context.attach_count); | 242 | atomic_inc(&init_mm.context.attach_count); |
240 | lc->cpu_nr = cpu; | 243 | lc->cpu_nr = cpu; |
241 | lc->percpu_offset = __per_cpu_offset[cpu]; | 244 | lc->percpu_offset = __per_cpu_offset[cpu]; |
@@ -760,6 +763,9 @@ void __cpu_die(unsigned int cpu) | |||
760 | cpu_relax(); | 763 | cpu_relax(); |
761 | pcpu_free_lowcore(pcpu); | 764 | pcpu_free_lowcore(pcpu); |
762 | atomic_dec(&init_mm.context.attach_count); | 765 | atomic_dec(&init_mm.context.attach_count); |
766 | cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); | ||
767 | if (MACHINE_HAS_TLB_LC) | ||
768 | cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); | ||
763 | } | 769 | } |
764 | 770 | ||
765 | void __noreturn cpu_die(void) | 771 | void __noreturn cpu_die(void) |
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d261c62e40a6..0727a55d87d9 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c | |||
@@ -123,10 +123,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | |||
123 | pmd_t *pmdp = (pmd_t *) ptep; | 123 | pmd_t *pmdp = (pmd_t *) ptep; |
124 | pte_t pte = huge_ptep_get(ptep); | 124 | pte_t pte = huge_ptep_get(ptep); |
125 | 125 | ||
126 | if (MACHINE_HAS_IDTE) | 126 | pmdp_flush_direct(mm, addr, pmdp); |
127 | __pmd_idte(addr, pmdp); | ||
128 | else | ||
129 | __pmd_csp(pmdp); | ||
130 | pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; | 127 | pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; |
131 | return pte; | 128 | return pte; |
132 | } | 129 | } |
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ad446b0c55b6..0c1073ed1e84 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -124,8 +124,6 @@ void __init paging_init(void) | |||
124 | __ctl_load(S390_lowcore.kernel_asce, 13, 13); | 124 | __ctl_load(S390_lowcore.kernel_asce, 13, 13); |
125 | arch_local_irq_restore(4UL << (BITS_PER_LONG - 8)); | 125 | arch_local_irq_restore(4UL << (BITS_PER_LONG - 8)); |
126 | 126 | ||
127 | atomic_set(&init_mm.context.attach_count, 1); | ||
128 | |||
129 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | 127 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
130 | sparse_init(); | 128 | sparse_init(); |
131 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | 129 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); |
@@ -136,6 +134,11 @@ void __init paging_init(void) | |||
136 | 134 | ||
137 | void __init mem_init(void) | 135 | void __init mem_init(void) |
138 | { | 136 | { |
137 | if (MACHINE_HAS_TLB_LC) | ||
138 | cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); | ||
139 | cpumask_set_cpu(0, mm_cpumask(&init_mm)); | ||
140 | atomic_set(&init_mm.context.attach_count, 1); | ||
141 | |||
139 | max_mapnr = max_low_pfn; | 142 | max_mapnr = max_low_pfn; |
140 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | 143 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); |
141 | 144 | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 24c62900b532..c57c63380184 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -200,7 +200,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) | |||
200 | static void gmap_flush_tlb(struct gmap *gmap) | 200 | static void gmap_flush_tlb(struct gmap *gmap) |
201 | { | 201 | { |
202 | if (MACHINE_HAS_IDTE) | 202 | if (MACHINE_HAS_IDTE) |
203 | __tlb_flush_idte((unsigned long) gmap->table | | 203 | __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | |
204 | _ASCE_TYPE_REGION1); | 204 | _ASCE_TYPE_REGION1); |
205 | else | 205 | else |
206 | __tlb_flush_global(); | 206 | __tlb_flush_global(); |
@@ -219,7 +219,7 @@ void gmap_free(struct gmap *gmap) | |||
219 | 219 | ||
220 | /* Flush tlb. */ | 220 | /* Flush tlb. */ |
221 | if (MACHINE_HAS_IDTE) | 221 | if (MACHINE_HAS_IDTE) |
222 | __tlb_flush_idte((unsigned long) gmap->table | | 222 | __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table | |
223 | _ASCE_TYPE_REGION1); | 223 | _ASCE_TYPE_REGION1); |
224 | else | 224 | else |
225 | __tlb_flush_global(); | 225 | __tlb_flush_global(); |
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index bcfb70b60be6..72b04de18283 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c | |||
@@ -138,7 +138,6 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) | |||
138 | } | 138 | } |
139 | ret = 0; | 139 | ret = 0; |
140 | out: | 140 | out: |
141 | flush_tlb_kernel_range(start, end); | ||
142 | return ret; | 141 | return ret; |
143 | } | 142 | } |
144 | 143 | ||
@@ -265,7 +264,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) | |||
265 | memset((void *)start, 0, end - start); | 264 | memset((void *)start, 0, end - start); |
266 | ret = 0; | 265 | ret = 0; |
267 | out: | 266 | out: |
268 | flush_tlb_kernel_range(start, end); | ||
269 | return ret; | 267 | return ret; |
270 | } | 268 | } |
271 | 269 | ||