diff options
author | Palmer Dabbelt <palmer@dabbelt.com> | 2017-07-10 21:06:09 -0400 |
---|---|---|
committer | Palmer Dabbelt <palmer@dabbelt.com> | 2017-09-26 18:26:47 -0400 |
commit | 07037db5d479f90377c998259a4f9a469c404edf (patch) | |
tree | 31f69eeb15052afe454c49673b6b398692f451d4 | |
parent | 6d60b6ee0c9777b92c47f6dc8aad1dd90612e4fa (diff) |
RISC-V: Paging and MMU
This patch contains code to manage the RISC-V MMU, including definitions
of the page tables and the page walking code.
Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
-rw-r--r-- | arch/riscv/include/asm/mmu_context.h | 69 | ||||
-rw-r--r-- | arch/riscv/include/asm/page.h | 130 | ||||
-rw-r--r-- | arch/riscv/include/asm/pgalloc.h | 124 | ||||
-rw-r--r-- | arch/riscv/include/asm/pgtable-32.h | 25 | ||||
-rw-r--r-- | arch/riscv/include/asm/pgtable-64.h | 84 | ||||
-rw-r--r-- | arch/riscv/include/asm/pgtable-bits.h | 48 | ||||
-rw-r--r-- | arch/riscv/include/asm/pgtable.h | 430 | ||||
-rw-r--r-- | arch/riscv/mm/fault.c | 282 |
8 files changed, 1192 insertions, 0 deletions
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h new file mode 100644 index 000000000000..de1fc1631fc4 --- /dev/null +++ b/arch/riscv/include/asm/mmu_context.h | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Regents of the University of California | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef _ASM_RISCV_MMU_CONTEXT_H | ||
15 | #define _ASM_RISCV_MMU_CONTEXT_H | ||
16 | |||
17 | #include <asm-generic/mm_hooks.h> | ||
18 | |||
19 | #include <linux/mm.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <asm/tlbflush.h> | ||
22 | |||
23 | static inline void enter_lazy_tlb(struct mm_struct *mm, | ||
24 | struct task_struct *task) | ||
25 | { | ||
26 | } | ||
27 | |||
28 | /* Initialize context-related info for a new mm_struct */ | ||
29 | static inline int init_new_context(struct task_struct *task, | ||
30 | struct mm_struct *mm) | ||
31 | { | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | static inline void destroy_context(struct mm_struct *mm) | ||
36 | { | ||
37 | } | ||
38 | |||
39 | static inline pgd_t *current_pgdir(void) | ||
40 | { | ||
41 | return pfn_to_virt(csr_read(sptbr) & SPTBR_PPN); | ||
42 | } | ||
43 | |||
44 | static inline void set_pgdir(pgd_t *pgd) | ||
45 | { | ||
46 | csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); | ||
47 | } | ||
48 | |||
49 | static inline void switch_mm(struct mm_struct *prev, | ||
50 | struct mm_struct *next, struct task_struct *task) | ||
51 | { | ||
52 | if (likely(prev != next)) { | ||
53 | set_pgdir(next->pgd); | ||
54 | local_flush_tlb_all(); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | static inline void activate_mm(struct mm_struct *prev, | ||
59 | struct mm_struct *next) | ||
60 | { | ||
61 | switch_mm(prev, next, NULL); | ||
62 | } | ||
63 | |||
64 | static inline void deactivate_mm(struct task_struct *task, | ||
65 | struct mm_struct *mm) | ||
66 | { | ||
67 | } | ||
68 | |||
69 | #endif /* _ASM_RISCV_MMU_CONTEXT_H */ | ||
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h new file mode 100644 index 000000000000..06cfbb3aacbb --- /dev/null +++ b/arch/riscv/include/asm/page.h | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com> | ||
3 | * Copyright (C) 2012 Regents of the University of California | ||
4 | * Copyright (C) 2017 SiFive | ||
5 | * Copyright (C) 2017 XiaojingZhu <zhuxiaoj@ict.ac.cn> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation, version 2. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | */ | ||
16 | |||
17 | #ifndef _ASM_RISCV_PAGE_H | ||
18 | #define _ASM_RISCV_PAGE_H | ||
19 | |||
20 | #include <linux/pfn.h> | ||
21 | #include <linux/const.h> | ||
22 | |||
23 | #define PAGE_SHIFT (12) | ||
24 | #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) | ||
25 | #define PAGE_MASK (~(PAGE_SIZE - 1)) | ||
26 | |||
27 | /* | ||
28 | * PAGE_OFFSET -- the first address of the first page of memory. | ||
29 | * When not using MMU this corresponds to the first free page in | ||
30 | * physical memory (aligned on a page boundary). | ||
31 | */ | ||
32 | #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) | ||
33 | |||
34 | #define KERN_VIRT_SIZE (-PAGE_OFFSET) | ||
35 | |||
36 | #ifndef __ASSEMBLY__ | ||
37 | |||
38 | #define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) | ||
39 | #define PAGE_DOWN(addr) ((addr)&(~((PAGE_SIZE)-1))) | ||
40 | |||
41 | /* align addr on a size boundary - adjust address up/down if needed */ | ||
42 | #define _ALIGN_UP(addr, size) (((addr)+((size)-1))&(~((size)-1))) | ||
43 | #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1))) | ||
44 | |||
45 | /* align addr on a size boundary - adjust address up if needed */ | ||
46 | #define _ALIGN(addr, size) _ALIGN_UP(addr, size) | ||
47 | |||
48 | #define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE) | ||
49 | #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) | ||
50 | |||
51 | #define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE) | ||
52 | #define copy_user_page(vto, vfrom, vaddr, topg) \ | ||
53 | memcpy((vto), (vfrom), PAGE_SIZE) | ||
54 | |||
55 | /* | ||
56 | * Use struct definitions to apply C type checking | ||
57 | */ | ||
58 | |||
59 | /* Page Global Directory entry */ | ||
60 | typedef struct { | ||
61 | unsigned long pgd; | ||
62 | } pgd_t; | ||
63 | |||
64 | /* Page Table entry */ | ||
65 | typedef struct { | ||
66 | unsigned long pte; | ||
67 | } pte_t; | ||
68 | |||
69 | typedef struct { | ||
70 | unsigned long pgprot; | ||
71 | } pgprot_t; | ||
72 | |||
73 | typedef struct page *pgtable_t; | ||
74 | |||
75 | #define pte_val(x) ((x).pte) | ||
76 | #define pgd_val(x) ((x).pgd) | ||
77 | #define pgprot_val(x) ((x).pgprot) | ||
78 | |||
79 | #define __pte(x) ((pte_t) { (x) }) | ||
80 | #define __pgd(x) ((pgd_t) { (x) }) | ||
81 | #define __pgprot(x) ((pgprot_t) { (x) }) | ||
82 | |||
83 | #ifdef CONFIG_64BITS | ||
84 | #define PTE_FMT "%016lx" | ||
85 | #else | ||
86 | #define PTE_FMT "%08lx" | ||
87 | #endif | ||
88 | |||
89 | extern unsigned long va_pa_offset; | ||
90 | extern unsigned long pfn_base; | ||
91 | |||
92 | extern unsigned long max_low_pfn; | ||
93 | extern unsigned long min_low_pfn; | ||
94 | |||
95 | #define __pa(x) ((unsigned long)(x) - va_pa_offset) | ||
96 | #define __va(x) ((void *)((unsigned long) (x) + va_pa_offset)) | ||
97 | |||
98 | #define phys_to_pfn(phys) (PFN_DOWN(phys)) | ||
99 | #define pfn_to_phys(pfn) (PFN_PHYS(pfn)) | ||
100 | |||
101 | #define virt_to_pfn(vaddr) (phys_to_pfn(__pa(vaddr))) | ||
102 | #define pfn_to_virt(pfn) (__va(pfn_to_phys(pfn))) | ||
103 | |||
104 | #define virt_to_page(vaddr) (pfn_to_page(virt_to_pfn(vaddr))) | ||
105 | #define page_to_virt(page) (pfn_to_virt(page_to_pfn(page))) | ||
106 | |||
107 | #define page_to_phys(page) (pfn_to_phys(page_to_pfn(page))) | ||
108 | #define page_to_bus(page) (page_to_phys(page)) | ||
109 | #define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr))) | ||
110 | |||
111 | #define pfn_valid(pfn) \ | ||
112 | (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr)) | ||
113 | |||
114 | #define ARCH_PFN_OFFSET (pfn_base) | ||
115 | |||
116 | #endif /* __ASSEMBLY__ */ | ||
117 | |||
118 | #define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) | ||
119 | |||
120 | #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ | ||
121 | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) | ||
122 | |||
123 | #include <asm-generic/memory_model.h> | ||
124 | #include <asm-generic/getorder.h> | ||
125 | |||
126 | /* vDSO support */ | ||
127 | /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ | ||
128 | #define __HAVE_ARCH_GATE_AREA | ||
129 | |||
130 | #endif /* _ASM_RISCV_PAGE_H */ | ||
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h new file mode 100644 index 000000000000..a79ed5faff3a --- /dev/null +++ b/arch/riscv/include/asm/pgalloc.h | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com> | ||
3 | * Copyright (C) 2012 Regents of the University of California | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation, version 2. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _ASM_RISCV_PGALLOC_H | ||
16 | #define _ASM_RISCV_PGALLOC_H | ||
17 | |||
18 | #include <linux/mm.h> | ||
19 | #include <asm/tlb.h> | ||
20 | |||
21 | static inline void pmd_populate_kernel(struct mm_struct *mm, | ||
22 | pmd_t *pmd, pte_t *pte) | ||
23 | { | ||
24 | unsigned long pfn = virt_to_pfn(pte); | ||
25 | |||
26 | set_pmd(pmd, __pmd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); | ||
27 | } | ||
28 | |||
29 | static inline void pmd_populate(struct mm_struct *mm, | ||
30 | pmd_t *pmd, pgtable_t pte) | ||
31 | { | ||
32 | unsigned long pfn = virt_to_pfn(page_address(pte)); | ||
33 | |||
34 | set_pmd(pmd, __pmd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); | ||
35 | } | ||
36 | |||
37 | #ifndef __PAGETABLE_PMD_FOLDED | ||
38 | static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) | ||
39 | { | ||
40 | unsigned long pfn = virt_to_pfn(pmd); | ||
41 | |||
42 | set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); | ||
43 | } | ||
44 | #endif /* __PAGETABLE_PMD_FOLDED */ | ||
45 | |||
46 | #define pmd_pgtable(pmd) pmd_page(pmd) | ||
47 | |||
48 | static inline pgd_t *pgd_alloc(struct mm_struct *mm) | ||
49 | { | ||
50 | pgd_t *pgd; | ||
51 | |||
52 | pgd = (pgd_t *)__get_free_page(GFP_KERNEL); | ||
53 | if (likely(pgd != NULL)) { | ||
54 | memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); | ||
55 | /* Copy kernel mappings */ | ||
56 | memcpy(pgd + USER_PTRS_PER_PGD, | ||
57 | init_mm.pgd + USER_PTRS_PER_PGD, | ||
58 | (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); | ||
59 | } | ||
60 | return pgd; | ||
61 | } | ||
62 | |||
63 | static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
64 | { | ||
65 | free_page((unsigned long)pgd); | ||
66 | } | ||
67 | |||
68 | #ifndef __PAGETABLE_PMD_FOLDED | ||
69 | |||
70 | static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) | ||
71 | { | ||
72 | return (pmd_t *)__get_free_page( | ||
73 | GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO); | ||
74 | } | ||
75 | |||
76 | static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) | ||
77 | { | ||
78 | free_page((unsigned long)pmd); | ||
79 | } | ||
80 | |||
81 | #define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) | ||
82 | |||
83 | #endif /* __PAGETABLE_PMD_FOLDED */ | ||
84 | |||
85 | static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, | ||
86 | unsigned long address) | ||
87 | { | ||
88 | return (pte_t *)__get_free_page( | ||
89 | GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO); | ||
90 | } | ||
91 | |||
92 | static inline struct page *pte_alloc_one(struct mm_struct *mm, | ||
93 | unsigned long address) | ||
94 | { | ||
95 | struct page *pte; | ||
96 | |||
97 | pte = alloc_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO); | ||
98 | if (likely(pte != NULL)) | ||
99 | pgtable_page_ctor(pte); | ||
100 | return pte; | ||
101 | } | ||
102 | |||
103 | static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) | ||
104 | { | ||
105 | free_page((unsigned long)pte); | ||
106 | } | ||
107 | |||
108 | static inline void pte_free(struct mm_struct *mm, pgtable_t pte) | ||
109 | { | ||
110 | pgtable_page_dtor(pte); | ||
111 | __free_page(pte); | ||
112 | } | ||
113 | |||
114 | #define __pte_free_tlb(tlb, pte, buf) \ | ||
115 | do { \ | ||
116 | pgtable_page_dtor(pte); \ | ||
117 | tlb_remove_page((tlb), pte); \ | ||
118 | } while (0) | ||
119 | |||
120 | static inline void check_pgt_cache(void) | ||
121 | { | ||
122 | } | ||
123 | |||
124 | #endif /* _ASM_RISCV_PGALLOC_H */ | ||
diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h new file mode 100644 index 000000000000..d61974b74182 --- /dev/null +++ b/arch/riscv/include/asm/pgtable-32.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Regents of the University of California | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef _ASM_RISCV_PGTABLE_32_H | ||
15 | #define _ASM_RISCV_PGTABLE_32_H | ||
16 | |||
17 | #include <asm-generic/pgtable-nopmd.h> | ||
18 | #include <linux/const.h> | ||
19 | |||
20 | /* Size of region mapped by a page global directory */ | ||
21 | #define PGDIR_SHIFT 22 | ||
22 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) | ||
23 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | ||
24 | |||
25 | #endif /* _ASM_RISCV_PGTABLE_32_H */ | ||
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h new file mode 100644 index 000000000000..7aa0ea9bd8bb --- /dev/null +++ b/arch/riscv/include/asm/pgtable-64.h | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Regents of the University of California | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef _ASM_RISCV_PGTABLE_64_H | ||
15 | #define _ASM_RISCV_PGTABLE_64_H | ||
16 | |||
17 | #include <linux/const.h> | ||
18 | |||
19 | #define PGDIR_SHIFT 30 | ||
20 | /* Size of region mapped by a page global directory */ | ||
21 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) | ||
22 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | ||
23 | |||
24 | #define PMD_SHIFT 21 | ||
25 | /* Size of region mapped by a page middle directory */ | ||
26 | #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) | ||
27 | #define PMD_MASK (~(PMD_SIZE - 1)) | ||
28 | |||
29 | /* Page Middle Directory entry */ | ||
30 | typedef struct { | ||
31 | unsigned long pmd; | ||
32 | } pmd_t; | ||
33 | |||
34 | #define pmd_val(x) ((x).pmd) | ||
35 | #define __pmd(x) ((pmd_t) { (x) }) | ||
36 | |||
37 | #define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t)) | ||
38 | |||
39 | static inline int pud_present(pud_t pud) | ||
40 | { | ||
41 | return (pud_val(pud) & _PAGE_PRESENT); | ||
42 | } | ||
43 | |||
44 | static inline int pud_none(pud_t pud) | ||
45 | { | ||
46 | return (pud_val(pud) == 0); | ||
47 | } | ||
48 | |||
49 | static inline int pud_bad(pud_t pud) | ||
50 | { | ||
51 | return !pud_present(pud); | ||
52 | } | ||
53 | |||
54 | static inline void set_pud(pud_t *pudp, pud_t pud) | ||
55 | { | ||
56 | *pudp = pud; | ||
57 | } | ||
58 | |||
59 | static inline void pud_clear(pud_t *pudp) | ||
60 | { | ||
61 | set_pud(pudp, __pud(0)); | ||
62 | } | ||
63 | |||
64 | static inline unsigned long pud_page_vaddr(pud_t pud) | ||
65 | { | ||
66 | return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); | ||
67 | } | ||
68 | |||
69 | #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) | ||
70 | |||
71 | static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) | ||
72 | { | ||
73 | return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); | ||
74 | } | ||
75 | |||
76 | static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) | ||
77 | { | ||
78 | return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); | ||
79 | } | ||
80 | |||
81 | #define pmd_ERROR(e) \ | ||
82 | pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) | ||
83 | |||
84 | #endif /* _ASM_RISCV_PGTABLE_64_H */ | ||
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h new file mode 100644 index 000000000000..997ddbb1d370 --- /dev/null +++ b/arch/riscv/include/asm/pgtable-bits.h | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Regents of the University of California | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef _ASM_RISCV_PGTABLE_BITS_H | ||
15 | #define _ASM_RISCV_PGTABLE_BITS_H | ||
16 | |||
17 | /* | ||
18 | * PTE format: | ||
19 | * | XLEN-1 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | ||
20 | * PFN reserved for SW D A G U X W R V | ||
21 | */ | ||
22 | |||
23 | #define _PAGE_ACCESSED_OFFSET 6 | ||
24 | |||
25 | #define _PAGE_PRESENT (1 << 0) | ||
26 | #define _PAGE_READ (1 << 1) /* Readable */ | ||
27 | #define _PAGE_WRITE (1 << 2) /* Writable */ | ||
28 | #define _PAGE_EXEC (1 << 3) /* Executable */ | ||
29 | #define _PAGE_USER (1 << 4) /* User */ | ||
30 | #define _PAGE_GLOBAL (1 << 5) /* Global */ | ||
31 | #define _PAGE_ACCESSED (1 << 6) /* Set by hardware on any access */ | ||
32 | #define _PAGE_DIRTY (1 << 7) /* Set by hardware on any write */ | ||
33 | #define _PAGE_SOFT (1 << 8) /* Reserved for software */ | ||
34 | |||
35 | #define _PAGE_SPECIAL _PAGE_SOFT | ||
36 | #define _PAGE_TABLE _PAGE_PRESENT | ||
37 | |||
38 | #define _PAGE_PFN_SHIFT 10 | ||
39 | |||
40 | /* Set of bits to preserve across pte_modify() */ | ||
41 | #define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ | ||
42 | _PAGE_WRITE | _PAGE_EXEC | \ | ||
43 | _PAGE_USER | _PAGE_GLOBAL)) | ||
44 | |||
45 | /* Advertise support for _PAGE_SPECIAL */ | ||
46 | #define __HAVE_ARCH_PTE_SPECIAL | ||
47 | |||
48 | #endif /* _ASM_RISCV_PGTABLE_BITS_H */ | ||
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h new file mode 100644 index 000000000000..3399257780b2 --- /dev/null +++ b/arch/riscv/include/asm/pgtable.h | |||
@@ -0,0 +1,430 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2012 Regents of the University of California | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | */ | ||
13 | |||
14 | #ifndef _ASM_RISCV_PGTABLE_H | ||
15 | #define _ASM_RISCV_PGTABLE_H | ||
16 | |||
17 | #include <linux/mmzone.h> | ||
18 | |||
19 | #include <asm/pgtable-bits.h> | ||
20 | |||
21 | #ifndef __ASSEMBLY__ | ||
22 | |||
23 | #ifdef CONFIG_MMU | ||
24 | |||
25 | /* Page Upper Directory not used in RISC-V */ | ||
26 | #include <asm-generic/pgtable-nopud.h> | ||
27 | #include <asm/page.h> | ||
28 | #include <asm/tlbflush.h> | ||
29 | #include <linux/mm_types.h> | ||
30 | |||
31 | #ifdef CONFIG_64BIT | ||
32 | #include <asm/pgtable-64.h> | ||
33 | #else | ||
34 | #include <asm/pgtable-32.h> | ||
35 | #endif /* CONFIG_64BIT */ | ||
36 | |||
37 | /* Number of entries in the page global directory */ | ||
38 | #define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) | ||
39 | /* Number of entries in the page table */ | ||
40 | #define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) | ||
41 | |||
42 | /* Number of PGD entries that a user-mode program can use */ | ||
43 | #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) | ||
44 | #define FIRST_USER_ADDRESS 0 | ||
45 | |||
46 | /* Page protection bits */ | ||
47 | #define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER) | ||
48 | |||
49 | #define PAGE_NONE __pgprot(0) | ||
50 | #define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ) | ||
51 | #define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE) | ||
52 | #define PAGE_EXEC __pgprot(_PAGE_BASE | _PAGE_EXEC) | ||
53 | #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) | ||
54 | #define PAGE_WRITE_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | \ | ||
55 | _PAGE_EXEC | _PAGE_WRITE) | ||
56 | |||
57 | #define PAGE_COPY PAGE_READ | ||
58 | #define PAGE_COPY_EXEC PAGE_EXEC | ||
59 | #define PAGE_COPY_READ_EXEC PAGE_READ_EXEC | ||
60 | #define PAGE_SHARED PAGE_WRITE | ||
61 | #define PAGE_SHARED_EXEC PAGE_WRITE_EXEC | ||
62 | |||
63 | #define _PAGE_KERNEL (_PAGE_READ \ | ||
64 | | _PAGE_WRITE \ | ||
65 | | _PAGE_PRESENT \ | ||
66 | | _PAGE_ACCESSED \ | ||
67 | | _PAGE_DIRTY) | ||
68 | |||
69 | #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) | ||
70 | #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) | ||
71 | |||
72 | extern pgd_t swapper_pg_dir[]; | ||
73 | |||
74 | /* MAP_PRIVATE permissions: xwr (copy-on-write) */ | ||
75 | #define __P000 PAGE_NONE | ||
76 | #define __P001 PAGE_READ | ||
77 | #define __P010 PAGE_COPY | ||
78 | #define __P011 PAGE_COPY | ||
79 | #define __P100 PAGE_EXEC | ||
80 | #define __P101 PAGE_READ_EXEC | ||
81 | #define __P110 PAGE_COPY_EXEC | ||
82 | #define __P111 PAGE_COPY_READ_EXEC | ||
83 | |||
84 | /* MAP_SHARED permissions: xwr */ | ||
85 | #define __S000 PAGE_NONE | ||
86 | #define __S001 PAGE_READ | ||
87 | #define __S010 PAGE_SHARED | ||
88 | #define __S011 PAGE_SHARED | ||
89 | #define __S100 PAGE_EXEC | ||
90 | #define __S101 PAGE_READ_EXEC | ||
91 | #define __S110 PAGE_SHARED_EXEC | ||
92 | #define __S111 PAGE_SHARED_EXEC | ||
93 | |||
94 | /* | ||
95 | * ZERO_PAGE is a global shared page that is always zero, | ||
96 | * used for zero-mapped memory areas, etc. | ||
97 | */ | ||
98 | extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; | ||
99 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | ||
100 | |||
101 | static inline int pmd_present(pmd_t pmd) | ||
102 | { | ||
103 | return (pmd_val(pmd) & _PAGE_PRESENT); | ||
104 | } | ||
105 | |||
106 | static inline int pmd_none(pmd_t pmd) | ||
107 | { | ||
108 | return (pmd_val(pmd) == 0); | ||
109 | } | ||
110 | |||
111 | static inline int pmd_bad(pmd_t pmd) | ||
112 | { | ||
113 | return !pmd_present(pmd); | ||
114 | } | ||
115 | |||
116 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | ||
117 | { | ||
118 | *pmdp = pmd; | ||
119 | } | ||
120 | |||
121 | static inline void pmd_clear(pmd_t *pmdp) | ||
122 | { | ||
123 | set_pmd(pmdp, __pmd(0)); | ||
124 | } | ||
125 | |||
126 | |||
127 | static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) | ||
128 | { | ||
129 | return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); | ||
130 | } | ||
131 | |||
132 | #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) | ||
133 | |||
134 | /* Locate an entry in the page global directory */ | ||
135 | static inline pgd_t *pgd_offset(const struct mm_struct *mm, unsigned long addr) | ||
136 | { | ||
137 | return mm->pgd + pgd_index(addr); | ||
138 | } | ||
139 | /* Locate an entry in the kernel page global directory */ | ||
140 | #define pgd_offset_k(addr) pgd_offset(&init_mm, (addr)) | ||
141 | |||
142 | static inline struct page *pmd_page(pmd_t pmd) | ||
143 | { | ||
144 | return pfn_to_page(pmd_val(pmd) >> _PAGE_PFN_SHIFT); | ||
145 | } | ||
146 | |||
147 | static inline unsigned long pmd_page_vaddr(pmd_t pmd) | ||
148 | { | ||
149 | return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PAGE_PFN_SHIFT); | ||
150 | } | ||
151 | |||
152 | /* Yields the page frame number (PFN) of a page table entry */ | ||
153 | static inline unsigned long pte_pfn(pte_t pte) | ||
154 | { | ||
155 | return (pte_val(pte) >> _PAGE_PFN_SHIFT); | ||
156 | } | ||
157 | |||
158 | #define pte_page(x) pfn_to_page(pte_pfn(x)) | ||
159 | |||
160 | /* Constructs a page table entry */ | ||
161 | static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) | ||
162 | { | ||
163 | return __pte((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); | ||
164 | } | ||
165 | |||
166 | static inline pte_t mk_pte(struct page *page, pgprot_t prot) | ||
167 | { | ||
168 | return pfn_pte(page_to_pfn(page), prot); | ||
169 | } | ||
170 | |||
171 | #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) | ||
172 | |||
173 | static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) | ||
174 | { | ||
175 | return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(addr); | ||
176 | } | ||
177 | |||
178 | #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) | ||
179 | #define pte_unmap(pte) ((void)(pte)) | ||
180 | |||
181 | /* | ||
182 | * Certain architectures need to do special things when PTEs within | ||
183 | * a page table are directly modified. Thus, the following hook is | ||
184 | * made available. | ||
185 | */ | ||
186 | static inline void set_pte(pte_t *ptep, pte_t pteval) | ||
187 | { | ||
188 | *ptep = pteval; | ||
189 | } | ||
190 | |||
191 | static inline void set_pte_at(struct mm_struct *mm, | ||
192 | unsigned long addr, pte_t *ptep, pte_t pteval) | ||
193 | { | ||
194 | set_pte(ptep, pteval); | ||
195 | } | ||
196 | |||
197 | static inline void pte_clear(struct mm_struct *mm, | ||
198 | unsigned long addr, pte_t *ptep) | ||
199 | { | ||
200 | set_pte_at(mm, addr, ptep, __pte(0)); | ||
201 | } | ||
202 | |||
203 | static inline int pte_present(pte_t pte) | ||
204 | { | ||
205 | return (pte_val(pte) & _PAGE_PRESENT); | ||
206 | } | ||
207 | |||
208 | static inline int pte_none(pte_t pte) | ||
209 | { | ||
210 | return (pte_val(pte) == 0); | ||
211 | } | ||
212 | |||
213 | /* static inline int pte_read(pte_t pte) */ | ||
214 | |||
215 | static inline int pte_write(pte_t pte) | ||
216 | { | ||
217 | return pte_val(pte) & _PAGE_WRITE; | ||
218 | } | ||
219 | |||
220 | static inline int pte_huge(pte_t pte) | ||
221 | { | ||
222 | return pte_present(pte) | ||
223 | && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); | ||
224 | } | ||
225 | |||
226 | /* static inline int pte_exec(pte_t pte) */ | ||
227 | |||
228 | static inline int pte_dirty(pte_t pte) | ||
229 | { | ||
230 | return pte_val(pte) & _PAGE_DIRTY; | ||
231 | } | ||
232 | |||
233 | static inline int pte_young(pte_t pte) | ||
234 | { | ||
235 | return pte_val(pte) & _PAGE_ACCESSED; | ||
236 | } | ||
237 | |||
238 | static inline int pte_special(pte_t pte) | ||
239 | { | ||
240 | return pte_val(pte) & _PAGE_SPECIAL; | ||
241 | } | ||
242 | |||
243 | /* static inline pte_t pte_rdprotect(pte_t pte) */ | ||
244 | |||
245 | static inline pte_t pte_wrprotect(pte_t pte) | ||
246 | { | ||
247 | return __pte(pte_val(pte) & ~(_PAGE_WRITE)); | ||
248 | } | ||
249 | |||
250 | /* static inline pte_t pte_mkread(pte_t pte) */ | ||
251 | |||
252 | static inline pte_t pte_mkwrite(pte_t pte) | ||
253 | { | ||
254 | return __pte(pte_val(pte) | _PAGE_WRITE); | ||
255 | } | ||
256 | |||
257 | /* static inline pte_t pte_mkexec(pte_t pte) */ | ||
258 | |||
259 | static inline pte_t pte_mkdirty(pte_t pte) | ||
260 | { | ||
261 | return __pte(pte_val(pte) | _PAGE_DIRTY); | ||
262 | } | ||
263 | |||
264 | static inline pte_t pte_mkclean(pte_t pte) | ||
265 | { | ||
266 | return __pte(pte_val(pte) & ~(_PAGE_DIRTY)); | ||
267 | } | ||
268 | |||
269 | static inline pte_t pte_mkyoung(pte_t pte) | ||
270 | { | ||
271 | return __pte(pte_val(pte) | _PAGE_ACCESSED); | ||
272 | } | ||
273 | |||
274 | static inline pte_t pte_mkold(pte_t pte) | ||
275 | { | ||
276 | return __pte(pte_val(pte) & ~(_PAGE_ACCESSED)); | ||
277 | } | ||
278 | |||
279 | static inline pte_t pte_mkspecial(pte_t pte) | ||
280 | { | ||
281 | return __pte(pte_val(pte) | _PAGE_SPECIAL); | ||
282 | } | ||
283 | |||
284 | /* Modify page protection bits */ | ||
285 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | ||
286 | { | ||
287 | return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); | ||
288 | } | ||
289 | |||
290 | #define pgd_ERROR(e) \ | ||
291 | pr_err("%s:%d: bad pgd " PTE_FMT ".\n", __FILE__, __LINE__, pgd_val(e)) | ||
292 | |||
293 | |||
294 | /* Commit new configuration to MMU hardware */ | ||
295 | static inline void update_mmu_cache(struct vm_area_struct *vma, | ||
296 | unsigned long address, pte_t *ptep) | ||
297 | { | ||
298 | /* | ||
299 | * The kernel assumes that TLBs don't cache invalid entries, but | ||
300 | * in RISC-V, SFENCE.VMA specifies an ordering constraint, not a | ||
301 | * cache flush; it is necessary even after writing invalid entries. | ||
302 | * Relying on flush_tlb_fix_spurious_fault would suffice, but | ||
303 | * the extra traps reduce performance. So, eagerly SFENCE.VMA. | ||
304 | */ | ||
305 | local_flush_tlb_page(address); | ||
306 | } | ||
307 | |||
308 | #define __HAVE_ARCH_PTE_SAME | ||
309 | static inline int pte_same(pte_t pte_a, pte_t pte_b) | ||
310 | { | ||
311 | return pte_val(pte_a) == pte_val(pte_b); | ||
312 | } | ||
313 | |||
314 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | ||
315 | static inline int ptep_set_access_flags(struct vm_area_struct *vma, | ||
316 | unsigned long address, pte_t *ptep, | ||
317 | pte_t entry, int dirty) | ||
318 | { | ||
319 | if (!pte_same(*ptep, entry)) | ||
320 | set_pte_at(vma->vm_mm, address, ptep, entry); | ||
321 | /* | ||
322 | * update_mmu_cache will unconditionally execute, handling both | ||
323 | * the case that the PTE changed and the spurious fault case. | ||
324 | */ | ||
325 | return true; | ||
326 | } | ||
327 | |||
328 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR | ||
329 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, | ||
330 | unsigned long address, pte_t *ptep) | ||
331 | { | ||
332 | return __pte(atomic_long_xchg((atomic_long_t *)ptep, 0)); | ||
333 | } | ||
334 | |||
335 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | ||
336 | static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, | ||
337 | unsigned long address, | ||
338 | pte_t *ptep) | ||
339 | { | ||
340 | if (!pte_young(*ptep)) | ||
341 | return 0; | ||
342 | return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep)); | ||
343 | } | ||
344 | |||
345 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT | ||
346 | static inline void ptep_set_wrprotect(struct mm_struct *mm, | ||
347 | unsigned long address, pte_t *ptep) | ||
348 | { | ||
349 | atomic_long_and(~(unsigned long)_PAGE_WRITE, (atomic_long_t *)ptep); | ||
350 | } | ||
351 | |||
352 | #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH | ||
353 | static inline int ptep_clear_flush_young(struct vm_area_struct *vma, | ||
354 | unsigned long address, pte_t *ptep) | ||
355 | { | ||
356 | /* | ||
357 | * This comment is borrowed from x86, but applies equally to RISC-V: | ||
358 | * | ||
359 | * Clearing the accessed bit without a TLB flush | ||
360 | * doesn't cause data corruption. [ It could cause incorrect | ||
361 | * page aging and the (mistaken) reclaim of hot pages, but the | ||
362 | * chance of that should be relatively low. ] | ||
363 | * | ||
364 | * So as a performance optimization don't flush the TLB when | ||
365 | * clearing the accessed bit, it will eventually be flushed by | ||
366 | * a context switch or a VM operation anyway. [ In the rare | ||
367 | * event of it not getting flushed for a long time the delay | ||
368 | * shouldn't really matter because there's no real memory | ||
369 | * pressure for swapout to react to. ] | ||
370 | */ | ||
371 | return ptep_test_and_clear_young(vma, address, ptep); | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * Encode and decode a swap entry | ||
376 | * | ||
377 | * Format of swap PTE: | ||
378 | * bit 0: _PAGE_PRESENT (zero) | ||
379 | * bit 1: reserved for future use (zero) | ||
380 | * bits 2 to 6: swap type | ||
381 | * bits 7 to XLEN-1: swap offset | ||
382 | */ | ||
383 | #define __SWP_TYPE_SHIFT 2 | ||
384 | #define __SWP_TYPE_BITS 5 | ||
385 | #define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1) | ||
386 | #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) | ||
387 | |||
388 | #define MAX_SWAPFILES_CHECK() \ | ||
389 | BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) | ||
390 | |||
391 | #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) | ||
392 | #define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT) | ||
393 | #define __swp_entry(type, offset) ((swp_entry_t) \ | ||
394 | { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) | ||
395 | |||
396 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) | ||
397 | #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) | ||
398 | |||
399 | #ifdef CONFIG_FLATMEM | ||
400 | #define kern_addr_valid(addr) (1) /* FIXME */ | ||
401 | #endif | ||
402 | |||
403 | extern void paging_init(void); | ||
404 | |||
405 | static inline void pgtable_cache_init(void) | ||
406 | { | ||
407 | /* No page table caches to initialize */ | ||
408 | } | ||
409 | |||
410 | #endif /* CONFIG_MMU */ | ||
411 | |||
412 | #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) | ||
413 | #define VMALLOC_END (PAGE_OFFSET - 1) | ||
414 | #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) | ||
415 | |||
416 | /* | ||
417 | * Task size is 0x40000000000 for RV64 or 0xb800000 for RV32. | ||
418 | * Note that PGDIR_SIZE must evenly divide TASK_SIZE. | ||
419 | */ | ||
420 | #ifdef CONFIG_64BIT | ||
421 | #define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2) | ||
422 | #else | ||
423 | #define TASK_SIZE VMALLOC_START | ||
424 | #endif | ||
425 | |||
426 | #include <asm-generic/pgtable.h> | ||
427 | |||
428 | #endif /* !__ASSEMBLY__ */ | ||
429 | |||
430 | #endif /* _ASM_RISCV_PGTABLE_H */ | ||
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c new file mode 100644 index 000000000000..df2ca3c65048 --- /dev/null +++ b/arch/riscv/mm/fault.c | |||
@@ -0,0 +1,282 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. | ||
3 | * Lennox Wu <lennox.wu@sunplusct.com> | ||
4 | * Chen Liqin <liqin.chen@sunplusct.com> | ||
5 | * Copyright (C) 2012 Regents of the University of California | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, see the file COPYING, or write | ||
19 | * to the Free Software Foundation, Inc., | ||
20 | */ | ||
21 | |||
22 | |||
23 | #include <linux/mm.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/perf_event.h> | ||
27 | #include <linux/signal.h> | ||
28 | #include <linux/uaccess.h> | ||
29 | |||
30 | #include <asm/pgalloc.h> | ||
31 | #include <asm/ptrace.h> | ||
32 | #include <asm/uaccess.h> | ||
33 | |||
34 | /* | ||
35 | * This routine handles page faults. It determines the address and the | ||
36 | * problem, and then passes it off to one of the appropriate routines. | ||
37 | */ | ||
38 | asmlinkage void do_page_fault(struct pt_regs *regs) | ||
39 | { | ||
40 | struct task_struct *tsk; | ||
41 | struct vm_area_struct *vma; | ||
42 | struct mm_struct *mm; | ||
43 | unsigned long addr, cause; | ||
44 | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | ||
45 | int fault, code = SEGV_MAPERR; | ||
46 | |||
47 | cause = regs->scause; | ||
48 | addr = regs->sbadaddr; | ||
49 | |||
50 | tsk = current; | ||
51 | mm = tsk->mm; | ||
52 | |||
53 | /* | ||
54 | * Fault-in kernel-space virtual memory on-demand. | ||
55 | * The 'reference' page table is init_mm.pgd. | ||
56 | * | ||
57 | * NOTE! We MUST NOT take any locks for this case. We may | ||
58 | * be in an interrupt or a critical region, and should | ||
59 | * only copy the information from the master page table, | ||
60 | * nothing more. | ||
61 | */ | ||
62 | if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) | ||
63 | goto vmalloc_fault; | ||
64 | |||
65 | /* Enable interrupts if they were enabled in the parent context. */ | ||
66 | if (likely(regs->sstatus & SR_PIE)) | ||
67 | local_irq_enable(); | ||
68 | |||
69 | /* | ||
70 | * If we're in an interrupt, have no user context, or are running | ||
71 | * in an atomic region, then we must not take the fault. | ||
72 | */ | ||
73 | if (unlikely(faulthandler_disabled() || !mm)) | ||
74 | goto no_context; | ||
75 | |||
76 | if (user_mode(regs)) | ||
77 | flags |= FAULT_FLAG_USER; | ||
78 | |||
79 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); | ||
80 | |||
81 | retry: | ||
82 | down_read(&mm->mmap_sem); | ||
83 | vma = find_vma(mm, addr); | ||
84 | if (unlikely(!vma)) | ||
85 | goto bad_area; | ||
86 | if (likely(vma->vm_start <= addr)) | ||
87 | goto good_area; | ||
88 | if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) | ||
89 | goto bad_area; | ||
90 | if (unlikely(expand_stack(vma, addr))) | ||
91 | goto bad_area; | ||
92 | |||
93 | /* | ||
94 | * Ok, we have a good vm_area for this memory access, so | ||
95 | * we can handle it. | ||
96 | */ | ||
97 | good_area: | ||
98 | code = SEGV_ACCERR; | ||
99 | |||
100 | switch (cause) { | ||
101 | case EXC_INST_PAGE_FAULT: | ||
102 | if (!(vma->vm_flags & VM_EXEC)) | ||
103 | goto bad_area; | ||
104 | break; | ||
105 | case EXC_LOAD_PAGE_FAULT: | ||
106 | if (!(vma->vm_flags & VM_READ)) | ||
107 | goto bad_area; | ||
108 | break; | ||
109 | case EXC_STORE_PAGE_FAULT: | ||
110 | if (!(vma->vm_flags & VM_WRITE)) | ||
111 | goto bad_area; | ||
112 | flags |= FAULT_FLAG_WRITE; | ||
113 | break; | ||
114 | default: | ||
115 | panic("%s: unhandled cause %lu", __func__, cause); | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * If for any reason at all we could not handle the fault, | ||
120 | * make sure we exit gracefully rather than endlessly redo | ||
121 | * the fault. | ||
122 | */ | ||
123 | fault = handle_mm_fault(vma, addr, flags); | ||
124 | |||
125 | /* | ||
126 | * If we need to retry but a fatal signal is pending, handle the | ||
127 | * signal first. We do not need to release the mmap_sem because it | ||
128 | * would already be released in __lock_page_or_retry in mm/filemap.c. | ||
129 | */ | ||
130 | if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk)) | ||
131 | return; | ||
132 | |||
133 | if (unlikely(fault & VM_FAULT_ERROR)) { | ||
134 | if (fault & VM_FAULT_OOM) | ||
135 | goto out_of_memory; | ||
136 | else if (fault & VM_FAULT_SIGBUS) | ||
137 | goto do_sigbus; | ||
138 | BUG(); | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Major/minor page fault accounting is only done on the | ||
143 | * initial attempt. If we go through a retry, it is extremely | ||
144 | * likely that the page will be found in page cache at that point. | ||
145 | */ | ||
146 | if (flags & FAULT_FLAG_ALLOW_RETRY) { | ||
147 | if (fault & VM_FAULT_MAJOR) { | ||
148 | tsk->maj_flt++; | ||
149 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, | ||
150 | 1, regs, addr); | ||
151 | } else { | ||
152 | tsk->min_flt++; | ||
153 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, | ||
154 | 1, regs, addr); | ||
155 | } | ||
156 | if (fault & VM_FAULT_RETRY) { | ||
157 | /* | ||
158 | * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk | ||
159 | * of starvation. | ||
160 | */ | ||
161 | flags &= ~(FAULT_FLAG_ALLOW_RETRY); | ||
162 | flags |= FAULT_FLAG_TRIED; | ||
163 | |||
164 | /* | ||
165 | * No need to up_read(&mm->mmap_sem) as we would | ||
166 | * have already released it in __lock_page_or_retry | ||
167 | * in mm/filemap.c. | ||
168 | */ | ||
169 | goto retry; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | up_read(&mm->mmap_sem); | ||
174 | return; | ||
175 | |||
176 | /* | ||
177 | * Something tried to access memory that isn't in our memory map. | ||
178 | * Fix it, but check if it's kernel or user first. | ||
179 | */ | ||
180 | bad_area: | ||
181 | up_read(&mm->mmap_sem); | ||
182 | /* User mode accesses just cause a SIGSEGV */ | ||
183 | if (user_mode(regs)) { | ||
184 | do_trap(regs, SIGSEGV, code, addr, tsk); | ||
185 | return; | ||
186 | } | ||
187 | |||
188 | no_context: | ||
189 | /* Are we prepared to handle this kernel fault? */ | ||
190 | if (fixup_exception(regs)) | ||
191 | return; | ||
192 | |||
193 | /* | ||
194 | * Oops. The kernel tried to access some bad page. We'll have to | ||
195 | * terminate things with extreme prejudice. | ||
196 | */ | ||
197 | bust_spinlocks(1); | ||
198 | pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", | ||
199 | (addr < PAGE_SIZE) ? "NULL pointer dereference" : | ||
200 | "paging request", addr); | ||
201 | die(regs, "Oops"); | ||
202 | do_exit(SIGKILL); | ||
203 | |||
204 | /* | ||
205 | * We ran out of memory, call the OOM killer, and return the userspace | ||
206 | * (which will retry the fault, or kill us if we got oom-killed). | ||
207 | */ | ||
208 | out_of_memory: | ||
209 | up_read(&mm->mmap_sem); | ||
210 | if (!user_mode(regs)) | ||
211 | goto no_context; | ||
212 | pagefault_out_of_memory(); | ||
213 | return; | ||
214 | |||
215 | do_sigbus: | ||
216 | up_read(&mm->mmap_sem); | ||
217 | /* Kernel mode? Handle exceptions or die */ | ||
218 | if (!user_mode(regs)) | ||
219 | goto no_context; | ||
220 | do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk); | ||
221 | return; | ||
222 | |||
223 | vmalloc_fault: | ||
224 | { | ||
225 | pgd_t *pgd, *pgd_k; | ||
226 | pud_t *pud, *pud_k; | ||
227 | p4d_t *p4d, *p4d_k; | ||
228 | pmd_t *pmd, *pmd_k; | ||
229 | pte_t *pte_k; | ||
230 | int index; | ||
231 | |||
232 | if (user_mode(regs)) | ||
233 | goto bad_area; | ||
234 | |||
235 | /* | ||
236 | * Synchronize this task's top level page-table | ||
237 | * with the 'reference' page table. | ||
238 | * | ||
239 | * Do _not_ use "tsk->active_mm->pgd" here. | ||
240 | * We might be inside an interrupt in the middle | ||
241 | * of a task switch. | ||
242 | */ | ||
243 | index = pgd_index(addr); | ||
244 | pgd = (pgd_t *)pfn_to_virt(csr_read(sptbr)) + index; | ||
245 | pgd_k = init_mm.pgd + index; | ||
246 | |||
247 | if (!pgd_present(*pgd_k)) | ||
248 | goto no_context; | ||
249 | set_pgd(pgd, *pgd_k); | ||
250 | |||
251 | p4d = p4d_offset(pgd, addr); | ||
252 | p4d_k = p4d_offset(pgd_k, addr); | ||
253 | if (!p4d_present(*p4d_k)) | ||
254 | goto no_context; | ||
255 | |||
256 | pud = pud_offset(p4d, addr); | ||
257 | pud_k = pud_offset(p4d_k, addr); | ||
258 | if (!pud_present(*pud_k)) | ||
259 | goto no_context; | ||
260 | |||
261 | /* | ||
262 | * Since the vmalloc area is global, it is unnecessary | ||
263 | * to copy individual PTEs | ||
264 | */ | ||
265 | pmd = pmd_offset(pud, addr); | ||
266 | pmd_k = pmd_offset(pud_k, addr); | ||
267 | if (!pmd_present(*pmd_k)) | ||
268 | goto no_context; | ||
269 | set_pmd(pmd, *pmd_k); | ||
270 | |||
271 | /* | ||
272 | * Make sure the actual PTE exists as well to | ||
273 | * catch kernel vmalloc-area accesses to non-mapped | ||
274 | * addresses. If we don't do this, this will just | ||
275 | * silently loop forever. | ||
276 | */ | ||
277 | pte_k = pte_offset_kernel(pmd_k, addr); | ||
278 | if (!pte_present(*pte_k)) | ||
279 | goto no_context; | ||
280 | return; | ||
281 | } | ||
282 | } | ||