aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-06-16 07:30:00 -0400
committerIngo Molnar <mingo@elte.hu>2008-06-25 09:15:53 -0400
commit1ea0704e0da65b2b46f9142ff1391163aac24060 (patch)
tree420215f0876bc9cf3da5a21e5c5904611f821faa /include
parentd02859ecb321c8c0f74cb9bbe3f51a59e58822b0 (diff)
mm: add a ptep_modify_prot transaction abstraction
This patch adds an API for doing read-modify-write updates to a pte's protection bits which may race against hardware updates to the pte. After reading the pte, the hardware may asynchonously set the accessed or dirty bits on a pte, which would be lost when writing back the modified pte value. The existing technique to handle this race is to use ptep_get_and_clear() atomically fetch the old pte value and clear it in memory. This has the effect of marking the pte as non-present, which will prevent the hardware from updating its state. When the new value is written back, the pte will be present again, and the hardware can resume updating the access/dirty flags. When running in a virtualized environment, pagetable updates are relatively expensive, since they generally involve some trap into the hypervisor. To mitigate the cost of these updates, we tend to batch them. However, because of the atomic nature of ptep_get_and_clear(), it is inherently non-batchable. This new interface allows batching by giving the underlying implementation enough information to open a transaction between the read and write phases: ptep_modify_prot_start() returns the current pte value, and puts the pte entry into a state where either the hardware will not update the pte, or if it does, the updates will be preserved on commit. ptep_modify_prot_commit() writes back the updated pte, makes sure that any hardware updates made since ptep_modify_prot_start() are preserved. ptep_modify_prot_start() and _commit() must be exactly paired, and used while holding the appropriate pte lock. They do not protect against other software updates of the pte in any way. The current implementations of ptep_modify_prot_start and _commit are functionally unchanged from before: _start() uses ptep_get_and_clear() fetch the pte and zero the entry, preventing any hardware updates. _commit() simply writes the new pte value back knowing that the hardware has not updated the pte in the meantime. The only current user of this interface is mprotect Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h57
1 files changed, 57 insertions, 0 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 44ef329531c3..4fce3db2cecc 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -197,6 +197,63 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd)
197} 197}
198#endif /* CONFIG_MMU */ 198#endif /* CONFIG_MMU */
199 199
200static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
201 unsigned long addr,
202 pte_t *ptep)
203{
204 /*
205 * Get the current pte state, but zero it out to make it
206 * non-present, preventing the hardware from asynchronously
207 * updating it.
208 */
209 return ptep_get_and_clear(mm, addr, ptep);
210}
211
212static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
213 unsigned long addr,
214 pte_t *ptep, pte_t pte)
215{
216 /*
217 * The pte is non-present, so there's no hardware state to
218 * preserve.
219 */
220 set_pte_at(mm, addr, ptep, pte);
221}
222
223#ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
224/*
225 * Start a pte protection read-modify-write transaction, which
226 * protects against asynchronous hardware modifications to the pte.
227 * The intention is not to prevent the hardware from making pte
228 * updates, but to prevent any updates it may make from being lost.
229 *
230 * This does not protect against other software modifications of the
231 * pte; the appropriate pte lock must be held over the transation.
232 *
233 * Note that this interface is intended to be batchable, meaning that
234 * ptep_modify_prot_commit may not actually update the pte, but merely
235 * queue the update to be done at some later time. The update must be
236 * actually committed before the pte lock is released, however.
237 */
238static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
239 unsigned long addr,
240 pte_t *ptep)
241{
242 return __ptep_modify_prot_start(mm, addr, ptep);
243}
244
245/*
246 * Commit an update to a pte, leaving any hardware-controlled bits in
247 * the PTE unmodified.
248 */
249static inline void ptep_modify_prot_commit(struct mm_struct *mm,
250 unsigned long addr,
251 pte_t *ptep, pte_t pte)
252{
253 __ptep_modify_prot_commit(mm, addr, ptep, pte);
254}
255#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
256
200/* 257/*
201 * A facility to provide lazy MMU batching. This allows PTE updates and 258 * A facility to provide lazy MMU batching. This allows PTE updates and
202 * page invalidations to be delayed until a call to leave lazy MMU mode 259 * page invalidations to be delayed until a call to leave lazy MMU mode