diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/mm_types.h | 4 | ||||
-rw-r--r-- | include/linux/mmu_notifier.h | 279 |
2 files changed, 283 insertions, 0 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 746f975b58ef..386edbe2cb4e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/rbtree.h> | 10 | #include <linux/rbtree.h> |
11 | #include <linux/rwsem.h> | 11 | #include <linux/rwsem.h> |
12 | #include <linux/completion.h> | 12 | #include <linux/completion.h> |
13 | #include <linux/cpumask.h> | ||
13 | #include <asm/page.h> | 14 | #include <asm/page.h> |
14 | #include <asm/mmu.h> | 15 | #include <asm/mmu.h> |
15 | 16 | ||
@@ -253,6 +254,9 @@ struct mm_struct { | |||
253 | struct file *exe_file; | 254 | struct file *exe_file; |
254 | unsigned long num_exe_file_vmas; | 255 | unsigned long num_exe_file_vmas; |
255 | #endif | 256 | #endif |
257 | #ifdef CONFIG_MMU_NOTIFIER | ||
258 | struct mmu_notifier_mm *mmu_notifier_mm; | ||
259 | #endif | ||
256 | }; | 260 | }; |
257 | 261 | ||
258 | #endif /* _LINUX_MM_TYPES_H */ | 262 | #endif /* _LINUX_MM_TYPES_H */ |
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 index 000000000000..b77486d152cd --- /dev/null +++ b/include/linux/mmu_notifier.h | |||
@@ -0,0 +1,279 @@ | |||
1 | #ifndef _LINUX_MMU_NOTIFIER_H | ||
2 | #define _LINUX_MMU_NOTIFIER_H | ||
3 | |||
4 | #include <linux/list.h> | ||
5 | #include <linux/spinlock.h> | ||
6 | #include <linux/mm_types.h> | ||
7 | |||
8 | struct mmu_notifier; | ||
9 | struct mmu_notifier_ops; | ||
10 | |||
11 | #ifdef CONFIG_MMU_NOTIFIER | ||
12 | |||
13 | /* | ||
14 | * The mmu notifier_mm structure is allocated and installed in | ||
15 | * mm->mmu_notifier_mm inside the mm_take_all_locks() protected | ||
16 | * critical section and it's released only when mm_count reaches zero | ||
17 | * in mmdrop(). | ||
18 | */ | ||
19 | struct mmu_notifier_mm { | ||
20 | /* all mmu notifiers registerd in this mm are queued in this list */ | ||
21 | struct hlist_head list; | ||
22 | /* to serialize the list modifications and hlist_unhashed */ | ||
23 | spinlock_t lock; | ||
24 | }; | ||
25 | |||
26 | struct mmu_notifier_ops { | ||
27 | /* | ||
28 | * Called either by mmu_notifier_unregister or when the mm is | ||
29 | * being destroyed by exit_mmap, always before all pages are | ||
30 | * freed. This can run concurrently with other mmu notifier | ||
31 | * methods (the ones invoked outside the mm context) and it | ||
32 | * should tear down all secondary mmu mappings and freeze the | ||
33 | * secondary mmu. If this method isn't implemented you've to | ||
34 | * be sure that nothing could possibly write to the pages | ||
35 | * through the secondary mmu by the time the last thread with | ||
36 | * tsk->mm == mm exits. | ||
37 | * | ||
38 | * As side note: the pages freed after ->release returns could | ||
39 | * be immediately reallocated by the gart at an alias physical | ||
40 | * address with a different cache model, so if ->release isn't | ||
41 | * implemented because all _software_ driven memory accesses | ||
42 | * through the secondary mmu are terminated by the time the | ||
43 | * last thread of this mm quits, you've also to be sure that | ||
44 | * speculative _hardware_ operations can't allocate dirty | ||
45 | * cachelines in the cpu that could not be snooped and made | ||
46 | * coherent with the other read and write operations happening | ||
47 | * through the gart alias address, so leading to memory | ||
48 | * corruption. | ||
49 | */ | ||
50 | void (*release)(struct mmu_notifier *mn, | ||
51 | struct mm_struct *mm); | ||
52 | |||
53 | /* | ||
54 | * clear_flush_young is called after the VM is | ||
55 | * test-and-clearing the young/accessed bitflag in the | ||
56 | * pte. This way the VM will provide proper aging to the | ||
57 | * accesses to the page through the secondary MMUs and not | ||
58 | * only to the ones through the Linux pte. | ||
59 | */ | ||
60 | int (*clear_flush_young)(struct mmu_notifier *mn, | ||
61 | struct mm_struct *mm, | ||
62 | unsigned long address); | ||
63 | |||
64 | /* | ||
65 | * Before this is invoked any secondary MMU is still ok to | ||
66 | * read/write to the page previously pointed to by the Linux | ||
67 | * pte because the page hasn't been freed yet and it won't be | ||
68 | * freed until this returns. If required set_page_dirty has to | ||
69 | * be called internally to this method. | ||
70 | */ | ||
71 | void (*invalidate_page)(struct mmu_notifier *mn, | ||
72 | struct mm_struct *mm, | ||
73 | unsigned long address); | ||
74 | |||
75 | /* | ||
76 | * invalidate_range_start() and invalidate_range_end() must be | ||
77 | * paired and are called only when the mmap_sem and/or the | ||
78 | * locks protecting the reverse maps are held. The subsystem | ||
79 | * must guarantee that no additional references are taken to | ||
80 | * the pages in the range established between the call to | ||
81 | * invalidate_range_start() and the matching call to | ||
82 | * invalidate_range_end(). | ||
83 | * | ||
84 | * Invalidation of multiple concurrent ranges may be | ||
85 | * optionally permitted by the driver. Either way the | ||
86 | * establishment of sptes is forbidden in the range passed to | ||
87 | * invalidate_range_begin/end for the whole duration of the | ||
88 | * invalidate_range_begin/end critical section. | ||
89 | * | ||
90 | * invalidate_range_start() is called when all pages in the | ||
91 | * range are still mapped and have at least a refcount of one. | ||
92 | * | ||
93 | * invalidate_range_end() is called when all pages in the | ||
94 | * range have been unmapped and the pages have been freed by | ||
95 | * the VM. | ||
96 | * | ||
97 | * The VM will remove the page table entries and potentially | ||
98 | * the page between invalidate_range_start() and | ||
99 | * invalidate_range_end(). If the page must not be freed | ||
100 | * because of pending I/O or other circumstances then the | ||
101 | * invalidate_range_start() callback (or the initial mapping | ||
102 | * by the driver) must make sure that the refcount is kept | ||
103 | * elevated. | ||
104 | * | ||
105 | * If the driver increases the refcount when the pages are | ||
106 | * initially mapped into an address space then either | ||
107 | * invalidate_range_start() or invalidate_range_end() may | ||
108 | * decrease the refcount. If the refcount is decreased on | ||
109 | * invalidate_range_start() then the VM can free pages as page | ||
110 | * table entries are removed. If the refcount is only | ||
111 | * droppped on invalidate_range_end() then the driver itself | ||
112 | * will drop the last refcount but it must take care to flush | ||
113 | * any secondary tlb before doing the final free on the | ||
114 | * page. Pages will no longer be referenced by the linux | ||
115 | * address space but may still be referenced by sptes until | ||
116 | * the last refcount is dropped. | ||
117 | */ | ||
118 | void (*invalidate_range_start)(struct mmu_notifier *mn, | ||
119 | struct mm_struct *mm, | ||
120 | unsigned long start, unsigned long end); | ||
121 | void (*invalidate_range_end)(struct mmu_notifier *mn, | ||
122 | struct mm_struct *mm, | ||
123 | unsigned long start, unsigned long end); | ||
124 | }; | ||
125 | |||
126 | /* | ||
127 | * The notifier chains are protected by mmap_sem and/or the reverse map | ||
128 | * semaphores. Notifier chains are only changed when all reverse maps and | ||
129 | * the mmap_sem locks are taken. | ||
130 | * | ||
131 | * Therefore notifier chains can only be traversed when either | ||
132 | * | ||
133 | * 1. mmap_sem is held. | ||
134 | * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock). | ||
135 | * 3. No other concurrent thread can access the list (release) | ||
136 | */ | ||
137 | struct mmu_notifier { | ||
138 | struct hlist_node hlist; | ||
139 | const struct mmu_notifier_ops *ops; | ||
140 | }; | ||
141 | |||
142 | static inline int mm_has_notifiers(struct mm_struct *mm) | ||
143 | { | ||
144 | return unlikely(mm->mmu_notifier_mm); | ||
145 | } | ||
146 | |||
147 | extern int mmu_notifier_register(struct mmu_notifier *mn, | ||
148 | struct mm_struct *mm); | ||
149 | extern int __mmu_notifier_register(struct mmu_notifier *mn, | ||
150 | struct mm_struct *mm); | ||
151 | extern void mmu_notifier_unregister(struct mmu_notifier *mn, | ||
152 | struct mm_struct *mm); | ||
153 | extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); | ||
154 | extern void __mmu_notifier_release(struct mm_struct *mm); | ||
155 | extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | ||
156 | unsigned long address); | ||
157 | extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, | ||
158 | unsigned long address); | ||
159 | extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, | ||
160 | unsigned long start, unsigned long end); | ||
161 | extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, | ||
162 | unsigned long start, unsigned long end); | ||
163 | |||
164 | static inline void mmu_notifier_release(struct mm_struct *mm) | ||
165 | { | ||
166 | if (mm_has_notifiers(mm)) | ||
167 | __mmu_notifier_release(mm); | ||
168 | } | ||
169 | |||
170 | static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | ||
171 | unsigned long address) | ||
172 | { | ||
173 | if (mm_has_notifiers(mm)) | ||
174 | return __mmu_notifier_clear_flush_young(mm, address); | ||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, | ||
179 | unsigned long address) | ||
180 | { | ||
181 | if (mm_has_notifiers(mm)) | ||
182 | __mmu_notifier_invalidate_page(mm, address); | ||
183 | } | ||
184 | |||
185 | static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, | ||
186 | unsigned long start, unsigned long end) | ||
187 | { | ||
188 | if (mm_has_notifiers(mm)) | ||
189 | __mmu_notifier_invalidate_range_start(mm, start, end); | ||
190 | } | ||
191 | |||
192 | static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, | ||
193 | unsigned long start, unsigned long end) | ||
194 | { | ||
195 | if (mm_has_notifiers(mm)) | ||
196 | __mmu_notifier_invalidate_range_end(mm, start, end); | ||
197 | } | ||
198 | |||
199 | static inline void mmu_notifier_mm_init(struct mm_struct *mm) | ||
200 | { | ||
201 | mm->mmu_notifier_mm = NULL; | ||
202 | } | ||
203 | |||
204 | static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) | ||
205 | { | ||
206 | if (mm_has_notifiers(mm)) | ||
207 | __mmu_notifier_mm_destroy(mm); | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * These two macros will sometime replace ptep_clear_flush. | ||
212 | * ptep_clear_flush is impleemnted as macro itself, so this also is | ||
213 | * implemented as a macro until ptep_clear_flush will converted to an | ||
214 | * inline function, to diminish the risk of compilation failure. The | ||
215 | * invalidate_page method over time can be moved outside the PT lock | ||
216 | * and these two macros can be later removed. | ||
217 | */ | ||
218 | #define ptep_clear_flush_notify(__vma, __address, __ptep) \ | ||
219 | ({ \ | ||
220 | pte_t __pte; \ | ||
221 | struct vm_area_struct *___vma = __vma; \ | ||
222 | unsigned long ___address = __address; \ | ||
223 | __pte = ptep_clear_flush(___vma, ___address, __ptep); \ | ||
224 | mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \ | ||
225 | __pte; \ | ||
226 | }) | ||
227 | |||
228 | #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ | ||
229 | ({ \ | ||
230 | int __young; \ | ||
231 | struct vm_area_struct *___vma = __vma; \ | ||
232 | unsigned long ___address = __address; \ | ||
233 | __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ | ||
234 | __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ | ||
235 | ___address); \ | ||
236 | __young; \ | ||
237 | }) | ||
238 | |||
239 | #else /* CONFIG_MMU_NOTIFIER */ | ||
240 | |||
241 | static inline void mmu_notifier_release(struct mm_struct *mm) | ||
242 | { | ||
243 | } | ||
244 | |||
245 | static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | ||
246 | unsigned long address) | ||
247 | { | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, | ||
252 | unsigned long address) | ||
253 | { | ||
254 | } | ||
255 | |||
256 | static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, | ||
257 | unsigned long start, unsigned long end) | ||
258 | { | ||
259 | } | ||
260 | |||
261 | static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, | ||
262 | unsigned long start, unsigned long end) | ||
263 | { | ||
264 | } | ||
265 | |||
266 | static inline void mmu_notifier_mm_init(struct mm_struct *mm) | ||
267 | { | ||
268 | } | ||
269 | |||
270 | static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) | ||
271 | { | ||
272 | } | ||
273 | |||
274 | #define ptep_clear_flush_young_notify ptep_clear_flush_young | ||
275 | #define ptep_clear_flush_notify ptep_clear_flush | ||
276 | |||
277 | #endif /* CONFIG_MMU_NOTIFIER */ | ||
278 | |||
279 | #endif /* _LINUX_MMU_NOTIFIER_H */ | ||