diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-03 17:32:37 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-03-03 17:32:37 -0500 |
commit | f2a4165526a9746afc4ba5413b1756614a49d450 (patch) | |
tree | d5cdf4ee27f77bfbfcb133666a538670accc9a57 | |
parent | b24746c7be75384d182845375c96433d713981bb (diff) | |
parent | 340430c572f7b2b275d39965e88bafa71693cb23 (diff) |
Merge branch 'tracing/mmiotrace' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing/mmiotrace' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86 mmiotrace: fix race with release_kmmio_fault_page()
x86 mmiotrace: improve handling of secondary faults
x86 mmiotrace: split set_page_presence()
x86 mmiotrace: fix save/restore page table state
x86 mmiotrace: WARN_ONCE if dis/arming a page fails
x86: add far read test to testmmiotrace
x86: count errors in testmmiotrace.ko
-rw-r--r-- | arch/x86/mm/kmmio.c | 149 | ||||
-rw-r--r-- | arch/x86/mm/testmmiotrace.c | 70 |
2 files changed, 153 insertions, 66 deletions
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93d82038af4b..9f205030d9aa 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -32,11 +32,14 @@ struct kmmio_fault_page { | |||
32 | struct list_head list; | 32 | struct list_head list; |
33 | struct kmmio_fault_page *release_next; | 33 | struct kmmio_fault_page *release_next; |
34 | unsigned long page; /* location of the fault page */ | 34 | unsigned long page; /* location of the fault page */ |
35 | bool old_presence; /* page presence prior to arming */ | ||
36 | bool armed; | ||
35 | 37 | ||
36 | /* | 38 | /* |
37 | * Number of times this page has been registered as a part | 39 | * Number of times this page has been registered as a part |
38 | * of a probe. If zero, page is disarmed and this may be freed. | 40 | * of a probe. If zero, page is disarmed and this may be freed. |
39 | * Used only by writers (RCU). | 41 | * Used only by writers (RCU) and post_kmmio_handler(). |
42 | * Protected by kmmio_lock, when linked into kmmio_page_table. | ||
40 | */ | 43 | */ |
41 | int count; | 44 | int count; |
42 | }; | 45 | }; |
@@ -105,57 +108,85 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | |||
105 | return NULL; | 108 | return NULL; |
106 | } | 109 | } |
107 | 110 | ||
108 | static void set_page_present(unsigned long addr, bool present, | 111 | static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) |
109 | unsigned int *pglevel) | 112 | { |
113 | pmdval_t v = pmd_val(*pmd); | ||
114 | *old = !!(v & _PAGE_PRESENT); | ||
115 | v &= ~_PAGE_PRESENT; | ||
116 | if (present) | ||
117 | v |= _PAGE_PRESENT; | ||
118 | set_pmd(pmd, __pmd(v)); | ||
119 | } | ||
120 | |||
121 | static void set_pte_presence(pte_t *pte, bool present, bool *old) | ||
122 | { | ||
123 | pteval_t v = pte_val(*pte); | ||
124 | *old = !!(v & _PAGE_PRESENT); | ||
125 | v &= ~_PAGE_PRESENT; | ||
126 | if (present) | ||
127 | v |= _PAGE_PRESENT; | ||
128 | set_pte_atomic(pte, __pte(v)); | ||
129 | } | ||
130 | |||
131 | static int set_page_presence(unsigned long addr, bool present, bool *old) | ||
110 | { | 132 | { |
111 | pteval_t pteval; | ||
112 | pmdval_t pmdval; | ||
113 | unsigned int level; | 133 | unsigned int level; |
114 | pmd_t *pmd; | ||
115 | pte_t *pte = lookup_address(addr, &level); | 134 | pte_t *pte = lookup_address(addr, &level); |
116 | 135 | ||
117 | if (!pte) { | 136 | if (!pte) { |
118 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); | 137 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); |
119 | return; | 138 | return -1; |
120 | } | 139 | } |
121 | 140 | ||
122 | if (pglevel) | ||
123 | *pglevel = level; | ||
124 | |||
125 | switch (level) { | 141 | switch (level) { |
126 | case PG_LEVEL_2M: | 142 | case PG_LEVEL_2M: |
127 | pmd = (pmd_t *)pte; | 143 | set_pmd_presence((pmd_t *)pte, present, old); |
128 | pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; | ||
129 | if (present) | ||
130 | pmdval |= _PAGE_PRESENT; | ||
131 | set_pmd(pmd, __pmd(pmdval)); | ||
132 | break; | 144 | break; |
133 | |||
134 | case PG_LEVEL_4K: | 145 | case PG_LEVEL_4K: |
135 | pteval = pte_val(*pte) & ~_PAGE_PRESENT; | 146 | set_pte_presence(pte, present, old); |
136 | if (present) | ||
137 | pteval |= _PAGE_PRESENT; | ||
138 | set_pte_atomic(pte, __pte(pteval)); | ||
139 | break; | 147 | break; |
140 | |||
141 | default: | 148 | default: |
142 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 149 | pr_err("kmmio: unexpected page level 0x%x.\n", level); |
143 | return; | 150 | return -1; |
144 | } | 151 | } |
145 | 152 | ||
146 | __flush_tlb_one(addr); | 153 | __flush_tlb_one(addr); |
154 | return 0; | ||
147 | } | 155 | } |
148 | 156 | ||
149 | /** Mark the given page as not present. Access to it will trigger a fault. */ | 157 | /* |
150 | static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | 158 | * Mark the given page as not present. Access to it will trigger a fault. |
159 | * | ||
160 | * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the | ||
161 | * protection is ignored here. RCU read lock is assumed held, so the struct | ||
162 | * will not disappear unexpectedly. Furthermore, the caller must guarantee, | ||
163 | * that double arming the same virtual address (page) cannot occur. | ||
164 | * | ||
165 | * Double disarming on the other hand is allowed, and may occur when a fault | ||
166 | * and mmiotrace shutdown happen simultaneously. | ||
167 | */ | ||
168 | static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | ||
151 | { | 169 | { |
152 | set_page_present(page & PAGE_MASK, false, pglevel); | 170 | int ret; |
171 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | ||
172 | if (f->armed) { | ||
173 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | ||
174 | f->page, f->count, f->old_presence); | ||
175 | } | ||
176 | ret = set_page_presence(f->page, false, &f->old_presence); | ||
177 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | ||
178 | f->armed = true; | ||
179 | return ret; | ||
153 | } | 180 | } |
154 | 181 | ||
155 | /** Mark the given page as present. */ | 182 | /** Restore the given page to saved presence state. */ |
156 | static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | 183 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) |
157 | { | 184 | { |
158 | set_page_present(page & PAGE_MASK, true, pglevel); | 185 | bool tmp; |
186 | int ret = set_page_presence(f->page, f->old_presence, &tmp); | ||
187 | WARN_ONCE(ret < 0, | ||
188 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | ||
189 | f->armed = false; | ||
159 | } | 190 | } |
160 | 191 | ||
161 | /* | 192 | /* |
@@ -202,28 +233,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
202 | 233 | ||
203 | ctx = &get_cpu_var(kmmio_ctx); | 234 | ctx = &get_cpu_var(kmmio_ctx); |
204 | if (ctx->active) { | 235 | if (ctx->active) { |
205 | disarm_kmmio_fault_page(faultpage->page, NULL); | ||
206 | if (addr == ctx->addr) { | 236 | if (addr == ctx->addr) { |
207 | /* | 237 | /* |
208 | * On SMP we sometimes get recursive probe hits on the | 238 | * A second fault on the same page means some other |
209 | * same address. Context is already saved, fall out. | 239 | * condition needs handling by do_page_fault(), the |
240 | * page really not being present is the most common. | ||
210 | */ | 241 | */ |
211 | pr_debug("kmmio: duplicate probe hit on CPU %d, for " | 242 | pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", |
212 | "address 0x%08lx.\n", | 243 | addr, smp_processor_id()); |
213 | smp_processor_id(), addr); | 244 | |
214 | ret = 1; | 245 | if (!faultpage->old_presence) |
215 | goto no_kmmio_ctx; | 246 | pr_info("kmmio: unexpected secondary hit for " |
216 | } | 247 | "address 0x%08lx on CPU %d.\n", addr, |
217 | /* | 248 | smp_processor_id()); |
218 | * Prevent overwriting already in-flight context. | 249 | } else { |
219 | * This should not happen, let's hope disarming at least | 250 | /* |
220 | * prevents a panic. | 251 | * Prevent overwriting already in-flight context. |
221 | */ | 252 | * This should not happen, let's hope disarming at |
222 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | 253 | * least prevents a panic. |
254 | */ | ||
255 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | ||
223 | "for address 0x%08lx. Ignoring.\n", | 256 | "for address 0x%08lx. Ignoring.\n", |
224 | smp_processor_id(), addr); | 257 | smp_processor_id(), addr); |
225 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", | 258 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", |
226 | ctx->addr); | 259 | ctx->addr); |
260 | disarm_kmmio_fault_page(faultpage); | ||
261 | } | ||
227 | goto no_kmmio_ctx; | 262 | goto no_kmmio_ctx; |
228 | } | 263 | } |
229 | ctx->active++; | 264 | ctx->active++; |
@@ -244,7 +279,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) | |||
244 | regs->flags &= ~X86_EFLAGS_IF; | 279 | regs->flags &= ~X86_EFLAGS_IF; |
245 | 280 | ||
246 | /* Now we set present bit in PTE and single step. */ | 281 | /* Now we set present bit in PTE and single step. */ |
247 | disarm_kmmio_fault_page(ctx->fpage->page, NULL); | 282 | disarm_kmmio_fault_page(ctx->fpage); |
248 | 283 | ||
249 | /* | 284 | /* |
250 | * If another cpu accesses the same page while we are stepping, | 285 | * If another cpu accesses the same page while we are stepping, |
@@ -275,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
275 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | 310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); |
276 | 311 | ||
277 | if (!ctx->active) { | 312 | if (!ctx->active) { |
278 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", | 313 | pr_warning("kmmio: spurious debug trap on CPU %d.\n", |
279 | smp_processor_id()); | 314 | smp_processor_id()); |
280 | goto out; | 315 | goto out; |
281 | } | 316 | } |
@@ -283,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
283 | if (ctx->probe && ctx->probe->post_handler) | 318 | if (ctx->probe && ctx->probe->post_handler) |
284 | ctx->probe->post_handler(ctx->probe, condition, regs); | 319 | ctx->probe->post_handler(ctx->probe, condition, regs); |
285 | 320 | ||
286 | arm_kmmio_fault_page(ctx->fpage->page, NULL); | 321 | /* Prevent racing against release_kmmio_fault_page(). */ |
322 | spin_lock(&kmmio_lock); | ||
323 | if (ctx->fpage->count) | ||
324 | arm_kmmio_fault_page(ctx->fpage); | ||
325 | spin_unlock(&kmmio_lock); | ||
287 | 326 | ||
288 | regs->flags &= ~X86_EFLAGS_TF; | 327 | regs->flags &= ~X86_EFLAGS_TF; |
289 | regs->flags |= ctx->saved_flags; | 328 | regs->flags |= ctx->saved_flags; |
@@ -315,20 +354,24 @@ static int add_kmmio_fault_page(unsigned long page) | |||
315 | f = get_kmmio_fault_page(page); | 354 | f = get_kmmio_fault_page(page); |
316 | if (f) { | 355 | if (f) { |
317 | if (!f->count) | 356 | if (!f->count) |
318 | arm_kmmio_fault_page(f->page, NULL); | 357 | arm_kmmio_fault_page(f); |
319 | f->count++; | 358 | f->count++; |
320 | return 0; | 359 | return 0; |
321 | } | 360 | } |
322 | 361 | ||
323 | f = kmalloc(sizeof(*f), GFP_ATOMIC); | 362 | f = kzalloc(sizeof(*f), GFP_ATOMIC); |
324 | if (!f) | 363 | if (!f) |
325 | return -1; | 364 | return -1; |
326 | 365 | ||
327 | f->count = 1; | 366 | f->count = 1; |
328 | f->page = page; | 367 | f->page = page; |
329 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
330 | 368 | ||
331 | arm_kmmio_fault_page(f->page, NULL); | 369 | if (arm_kmmio_fault_page(f)) { |
370 | kfree(f); | ||
371 | return -1; | ||
372 | } | ||
373 | |||
374 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
332 | 375 | ||
333 | return 0; | 376 | return 0; |
334 | } | 377 | } |
@@ -347,7 +390,7 @@ static void release_kmmio_fault_page(unsigned long page, | |||
347 | f->count--; | 390 | f->count--; |
348 | BUG_ON(f->count < 0); | 391 | BUG_ON(f->count < 0); |
349 | if (!f->count) { | 392 | if (!f->count) { |
350 | disarm_kmmio_fault_page(f->page, NULL); | 393 | disarm_kmmio_fault_page(f); |
351 | f->release_next = *release_list; | 394 | f->release_next = *release_list; |
352 | *release_list = f; | 395 | *release_list = f; |
353 | } | 396 | } |
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index ab50a8d7402c..427fd1b56df5 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Written by Pekka Paalanen, 2008 <pq@iki.fi> | 2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> |
3 | */ | 3 | */ |
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/io.h> | 5 | #include <linux/io.h> |
@@ -9,35 +9,74 @@ | |||
9 | 9 | ||
10 | static unsigned long mmio_address; | 10 | static unsigned long mmio_address; |
11 | module_param(mmio_address, ulong, 0); | 11 | module_param(mmio_address, ulong, 0); |
12 | MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); | 12 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " |
13 | "(or 8 MB if read_far is non-zero)."); | ||
14 | |||
15 | static unsigned long read_far = 0x400100; | ||
16 | module_param(read_far, ulong, 0); | ||
17 | MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB " | ||
18 | "(default: 0x400100)."); | ||
19 | |||
20 | static unsigned v16(unsigned i) | ||
21 | { | ||
22 | return i * 12 + 7; | ||
23 | } | ||
24 | |||
25 | static unsigned v32(unsigned i) | ||
26 | { | ||
27 | return i * 212371 + 13; | ||
28 | } | ||
13 | 29 | ||
14 | static void do_write_test(void __iomem *p) | 30 | static void do_write_test(void __iomem *p) |
15 | { | 31 | { |
16 | unsigned int i; | 32 | unsigned int i; |
33 | pr_info(MODULE_NAME ": write test.\n"); | ||
17 | mmiotrace_printk("Write test.\n"); | 34 | mmiotrace_printk("Write test.\n"); |
35 | |||
18 | for (i = 0; i < 256; i++) | 36 | for (i = 0; i < 256; i++) |
19 | iowrite8(i, p + i); | 37 | iowrite8(i, p + i); |
38 | |||
20 | for (i = 1024; i < (5 * 1024); i += 2) | 39 | for (i = 1024; i < (5 * 1024); i += 2) |
21 | iowrite16(i * 12 + 7, p + i); | 40 | iowrite16(v16(i), p + i); |
41 | |||
22 | for (i = (5 * 1024); i < (16 * 1024); i += 4) | 42 | for (i = (5 * 1024); i < (16 * 1024); i += 4) |
23 | iowrite32(i * 212371 + 13, p + i); | 43 | iowrite32(v32(i), p + i); |
24 | } | 44 | } |
25 | 45 | ||
26 | static void do_read_test(void __iomem *p) | 46 | static void do_read_test(void __iomem *p) |
27 | { | 47 | { |
28 | unsigned int i; | 48 | unsigned int i; |
49 | unsigned errs[3] = { 0 }; | ||
50 | pr_info(MODULE_NAME ": read test.\n"); | ||
29 | mmiotrace_printk("Read test.\n"); | 51 | mmiotrace_printk("Read test.\n"); |
52 | |||
30 | for (i = 0; i < 256; i++) | 53 | for (i = 0; i < 256; i++) |
31 | ioread8(p + i); | 54 | if (ioread8(p + i) != i) |
55 | ++errs[0]; | ||
56 | |||
32 | for (i = 1024; i < (5 * 1024); i += 2) | 57 | for (i = 1024; i < (5 * 1024); i += 2) |
33 | ioread16(p + i); | 58 | if (ioread16(p + i) != v16(i)) |
59 | ++errs[1]; | ||
60 | |||
34 | for (i = (5 * 1024); i < (16 * 1024); i += 4) | 61 | for (i = (5 * 1024); i < (16 * 1024); i += 4) |
35 | ioread32(p + i); | 62 | if (ioread32(p + i) != v32(i)) |
63 | ++errs[2]; | ||
64 | |||
65 | mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n", | ||
66 | errs[0], errs[1], errs[2]); | ||
36 | } | 67 | } |
37 | 68 | ||
38 | static void do_test(void) | 69 | static void do_read_far_test(void __iomem *p) |
39 | { | 70 | { |
40 | void __iomem *p = ioremap_nocache(mmio_address, 0x4000); | 71 | pr_info(MODULE_NAME ": read far test.\n"); |
72 | mmiotrace_printk("Read far test.\n"); | ||
73 | |||
74 | ioread32(p + read_far); | ||
75 | } | ||
76 | |||
77 | static void do_test(unsigned long size) | ||
78 | { | ||
79 | void __iomem *p = ioremap_nocache(mmio_address, size); | ||
41 | if (!p) { | 80 | if (!p) { |
42 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); | 81 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); |
43 | return; | 82 | return; |
@@ -45,11 +84,15 @@ static void do_test(void) | |||
45 | mmiotrace_printk("ioremap returned %p.\n", p); | 84 | mmiotrace_printk("ioremap returned %p.\n", p); |
46 | do_write_test(p); | 85 | do_write_test(p); |
47 | do_read_test(p); | 86 | do_read_test(p); |
87 | if (read_far && read_far < size - 4) | ||
88 | do_read_far_test(p); | ||
48 | iounmap(p); | 89 | iounmap(p); |
49 | } | 90 | } |
50 | 91 | ||
51 | static int __init init(void) | 92 | static int __init init(void) |
52 | { | 93 | { |
94 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); | ||
95 | |||
53 | if (mmio_address == 0) { | 96 | if (mmio_address == 0) { |
54 | pr_err(MODULE_NAME ": you have to use the module argument " | 97 | pr_err(MODULE_NAME ": you have to use the module argument " |
55 | "mmio_address.\n"); | 98 | "mmio_address.\n"); |
@@ -58,10 +101,11 @@ static int __init init(void) | |||
58 | return -ENXIO; | 101 | return -ENXIO; |
59 | } | 102 | } |
60 | 103 | ||
61 | pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " | 104 | pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " |
62 | "in PCI address space, and writing " | 105 | "address space, and writing 16 kB of rubbish in there.\n", |
63 | "rubbish in there.\n", mmio_address); | 106 | size >> 10, mmio_address); |
64 | do_test(); | 107 | do_test(size); |
108 | pr_info(MODULE_NAME ": All done.\n"); | ||
65 | return 0; | 109 | return 0; |
66 | } | 110 | } |
67 | 111 | ||