aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-15 14:26:35 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-15 14:26:35 -0500
commit83c2f912b43c3a7babbb6cb7ae2a5276c1ed2a3e (patch)
treeeaa7f50dea154d9f19721db69c7adde64d48848f /arch
parentf0ed5b9a28536b8be2f578a9450cfa42ab31ccf8 (diff)
parent172d1b0b73256551f100fc00c69e356d047103f5 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (39 commits) perf tools: Fix compile error on x86_64 Ubuntu perf report: Fix --stdio output alignment when --showcpuutilization used perf annotate: Get rid of field_sep check perf annotate: Fix usage string perf kmem: Fix a memory leak perf kmem: Add missing closedir() calls perf top: Add error message for EMFILE perf test: Change type of '-v' option to INCR perf script: Add missing closedir() calls tracing: Fix compile error when static ftrace is enabled recordmcount: Fix handling of elf64 big-endian objects. perf tools: Add const.h to MANIFEST to make perf-tar-src-pkg work again perf tools: Add support for guest/host-only profiling perf kvm: Do guest-only counting by default perf top: Don't update total_period on process_sample perf hists: Stop using 'self' for struct hist_entry perf hists: Rename total_session to total_period x86: Add counter when debug stack is used with interrupts enabled x86: Allow NMIs to hit breakpoints in i386 x86: Keep current stack in NMI breakpoints ...
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/include/asm/debugreg.h22
-rw-r--r--arch/x86/include/asm/desc.h12
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/entry_64.S218
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/nmi.c102
-rw-r--r--arch/x86/kernel/traps.c20
7 files changed, 369 insertions, 33 deletions
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 078ad0caefc..b903d5ea394 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -101,6 +101,28 @@ extern void aout_dump_debugregs(struct user *dump);
101 101
102extern void hw_breakpoint_restore(void); 102extern void hw_breakpoint_restore(void);
103 103
104#ifdef CONFIG_X86_64
105DECLARE_PER_CPU(int, debug_stack_usage);
106static inline void debug_stack_usage_inc(void)
107{
108 __get_cpu_var(debug_stack_usage)++;
109}
110static inline void debug_stack_usage_dec(void)
111{
112 __get_cpu_var(debug_stack_usage)--;
113}
114int is_debug_stack(unsigned long addr);
115void debug_stack_set_zero(void);
116void debug_stack_reset(void);
117#else /* !X86_64 */
118static inline int is_debug_stack(unsigned long addr) { return 0; }
119static inline void debug_stack_set_zero(void) { }
120static inline void debug_stack_reset(void) { }
121static inline void debug_stack_usage_inc(void) { }
122static inline void debug_stack_usage_dec(void) { }
123#endif /* X86_64 */
124
125
104#endif /* __KERNEL__ */ 126#endif /* __KERNEL__ */
105 127
106#endif /* _ASM_X86_DEBUGREG_H */ 128#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 41935fadfdf..e95822d683f 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -35,6 +35,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
35 35
36extern struct desc_ptr idt_descr; 36extern struct desc_ptr idt_descr;
37extern gate_desc idt_table[]; 37extern gate_desc idt_table[];
38extern struct desc_ptr nmi_idt_descr;
39extern gate_desc nmi_idt_table[];
38 40
39struct gdt_page { 41struct gdt_page {
40 struct desc_struct gdt[GDT_ENTRIES]; 42 struct desc_struct gdt[GDT_ENTRIES];
@@ -307,6 +309,16 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
307 desc->limit = (limit >> 16) & 0xf; 309 desc->limit = (limit >> 16) & 0xf;
308} 310}
309 311
312#ifdef CONFIG_X86_64
313static inline void set_nmi_gate(int gate, void *addr)
314{
315 gate_desc s;
316
317 pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
318 write_idt_entry(nmi_idt_table, gate, &s);
319}
320#endif
321
310static inline void _set_gate(int gate, unsigned type, void *addr, 322static inline void _set_gate(int gate, unsigned type, void *addr,
311 unsigned dpl, unsigned ist, unsigned seg) 323 unsigned dpl, unsigned ist, unsigned seg)
312{ 324{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 850f2963a42..d43cad74f16 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1021,6 +1021,8 @@ __setup("clearcpuid=", setup_disablecpuid);
1021 1021
1022#ifdef CONFIG_X86_64 1022#ifdef CONFIG_X86_64
1023struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1023struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
1024struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
1025 (unsigned long) nmi_idt_table };
1024 1026
1025DEFINE_PER_CPU_FIRST(union irq_stack_union, 1027DEFINE_PER_CPU_FIRST(union irq_stack_union,
1026 irq_stack_union) __aligned(PAGE_SIZE); 1028 irq_stack_union) __aligned(PAGE_SIZE);
@@ -1085,6 +1087,26 @@ unsigned long kernel_eflags;
1085 */ 1087 */
1086DEFINE_PER_CPU(struct orig_ist, orig_ist); 1088DEFINE_PER_CPU(struct orig_ist, orig_ist);
1087 1089
1090static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
1091DEFINE_PER_CPU(int, debug_stack_usage);
1092
1093int is_debug_stack(unsigned long addr)
1094{
1095 return __get_cpu_var(debug_stack_usage) ||
1096 (addr <= __get_cpu_var(debug_stack_addr) &&
1097 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
1098}
1099
1100void debug_stack_set_zero(void)
1101{
1102 load_idt((const struct desc_ptr *)&nmi_idt_descr);
1103}
1104
1105void debug_stack_reset(void)
1106{
1107 load_idt((const struct desc_ptr *)&idt_descr);
1108}
1109
1088#else /* CONFIG_X86_64 */ 1110#else /* CONFIG_X86_64 */
1089 1111
1090DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 1112DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
@@ -1212,6 +1234,8 @@ void __cpuinit cpu_init(void)
1212 estacks += exception_stack_sizes[v]; 1234 estacks += exception_stack_sizes[v];
1213 oist->ist[v] = t->x86_tss.ist[v] = 1235 oist->ist[v] = t->x86_tss.ist[v] =
1214 (unsigned long)estacks; 1236 (unsigned long)estacks;
1237 if (v == DEBUG_STACK-1)
1238 per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
1215 } 1239 }
1216 } 1240 }
1217 1241
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a20e1cb9dc8..940ba711fc2 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1480,62 +1480,214 @@ ENTRY(error_exit)
1480 CFI_ENDPROC 1480 CFI_ENDPROC
1481END(error_exit) 1481END(error_exit)
1482 1482
1483/*
1484 * Test if a given stack is an NMI stack or not.
1485 */
1486 .macro test_in_nmi reg stack nmi_ret normal_ret
1487 cmpq %\reg, \stack
1488 ja \normal_ret
1489 subq $EXCEPTION_STKSZ, %\reg
1490 cmpq %\reg, \stack
1491 jb \normal_ret
1492 jmp \nmi_ret
1493 .endm
1483 1494
1484 /* runs on exception stack */ 1495 /* runs on exception stack */
1485ENTRY(nmi) 1496ENTRY(nmi)
1486 INTR_FRAME 1497 INTR_FRAME
1487 PARAVIRT_ADJUST_EXCEPTION_FRAME 1498 PARAVIRT_ADJUST_EXCEPTION_FRAME
1488 pushq_cfi $-1 1499 /*
1500 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1501 * the iretq it performs will take us out of NMI context.
1502 * This means that we can have nested NMIs where the next
1503 * NMI is using the top of the stack of the previous NMI. We
1504 * can't let it execute because the nested NMI will corrupt the
1505 * stack of the previous NMI. NMI handlers are not re-entrant
1506 * anyway.
1507 *
1508 * To handle this case we do the following:
1509 * Check the a special location on the stack that contains
1510 * a variable that is set when NMIs are executing.
1511 * The interrupted task's stack is also checked to see if it
1512 * is an NMI stack.
1513 * If the variable is not set and the stack is not the NMI
1514 * stack then:
1515 * o Set the special variable on the stack
1516 * o Copy the interrupt frame into a "saved" location on the stack
1517 * o Copy the interrupt frame into a "copy" location on the stack
1518 * o Continue processing the NMI
1519 * If the variable is set or the previous stack is the NMI stack:
1520 * o Modify the "copy" location to jump to the repeate_nmi
1521 * o return back to the first NMI
1522 *
1523 * Now on exit of the first NMI, we first clear the stack variable
1524 * The NMI stack will tell any nested NMIs at that point that it is
1525 * nested. Then we pop the stack normally with iret, and if there was
1526 * a nested NMI that updated the copy interrupt stack frame, a
1527 * jump will be made to the repeat_nmi code that will handle the second
1528 * NMI.
1529 */
1530
1531 /* Use %rdx as out temp variable throughout */
1532 pushq_cfi %rdx
1533
1534 /*
1535 * Check the special variable on the stack to see if NMIs are
1536 * executing.
1537 */
1538 cmp $1, -8(%rsp)
1539 je nested_nmi
1540
1541 /*
1542 * Now test if the previous stack was an NMI stack.
1543 * We need the double check. We check the NMI stack to satisfy the
1544 * race when the first NMI clears the variable before returning.
1545 * We check the variable because the first NMI could be in a
1546 * breakpoint routine using a breakpoint stack.
1547 */
1548 lea 6*8(%rsp), %rdx
1549 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1550
1551nested_nmi:
1552 /*
1553 * Do nothing if we interrupted the fixup in repeat_nmi.
1554 * It's about to repeat the NMI handler, so we are fine
1555 * with ignoring this one.
1556 */
1557 movq $repeat_nmi, %rdx
1558 cmpq 8(%rsp), %rdx
1559 ja 1f
1560 movq $end_repeat_nmi, %rdx
1561 cmpq 8(%rsp), %rdx
1562 ja nested_nmi_out
1563
15641:
1565 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1566 leaq -6*8(%rsp), %rdx
1567 movq %rdx, %rsp
1568 CFI_ADJUST_CFA_OFFSET 6*8
1569 pushq_cfi $__KERNEL_DS
1570 pushq_cfi %rdx
1571 pushfq_cfi
1572 pushq_cfi $__KERNEL_CS
1573 pushq_cfi $repeat_nmi
1574
1575 /* Put stack back */
1576 addq $(11*8), %rsp
1577 CFI_ADJUST_CFA_OFFSET -11*8
1578
1579nested_nmi_out:
1580 popq_cfi %rdx
1581
1582 /* No need to check faults here */
1583 INTERRUPT_RETURN
1584
1585first_nmi:
1586 /*
1587 * Because nested NMIs will use the pushed location that we
1588 * stored in rdx, we must keep that space available.
1589 * Here's what our stack frame will look like:
1590 * +-------------------------+
1591 * | original SS |
1592 * | original Return RSP |
1593 * | original RFLAGS |
1594 * | original CS |
1595 * | original RIP |
1596 * +-------------------------+
1597 * | temp storage for rdx |
1598 * +-------------------------+
1599 * | NMI executing variable |
1600 * +-------------------------+
1601 * | Saved SS |
1602 * | Saved Return RSP |
1603 * | Saved RFLAGS |
1604 * | Saved CS |
1605 * | Saved RIP |
1606 * +-------------------------+
1607 * | copied SS |
1608 * | copied Return RSP |
1609 * | copied RFLAGS |
1610 * | copied CS |
1611 * | copied RIP |
1612 * +-------------------------+
1613 * | pt_regs |
1614 * +-------------------------+
1615 *
1616 * The saved RIP is used to fix up the copied RIP that a nested
1617 * NMI may zero out. The original stack frame and the temp storage
1618 * is also used by nested NMIs and can not be trusted on exit.
1619 */
1620 /* Set the NMI executing variable on the stack. */
1621 pushq_cfi $1
1622
1623 /* Copy the stack frame to the Saved frame */
1624 .rept 5
1625 pushq_cfi 6*8(%rsp)
1626 .endr
1627
1628 /* Make another copy, this one may be modified by nested NMIs */
1629 .rept 5
1630 pushq_cfi 4*8(%rsp)
1631 .endr
1632
1633 /* Do not pop rdx, nested NMIs will corrupt it */
1634 movq 11*8(%rsp), %rdx
1635
1636 /*
1637 * Everything below this point can be preempted by a nested
1638 * NMI if the first NMI took an exception. Repeated NMIs
1639 * caused by an exception and nested NMI will start here, and
1640 * can still be preempted by another NMI.
1641 */
1642restart_nmi:
1643 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1489 subq $ORIG_RAX-R15, %rsp 1644 subq $ORIG_RAX-R15, %rsp
1490 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1645 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1646 /*
1647 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1648 * as we should not be calling schedule in NMI context.
1649 * Even with normal interrupts enabled. An NMI should not be
1650 * setting NEED_RESCHED or anything that normal interrupts and
1651 * exceptions might do.
1652 */
1491 call save_paranoid 1653 call save_paranoid
1492 DEFAULT_FRAME 0 1654 DEFAULT_FRAME 0
1493 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1655 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1494 movq %rsp,%rdi 1656 movq %rsp,%rdi
1495 movq $-1,%rsi 1657 movq $-1,%rsi
1496 call do_nmi 1658 call do_nmi
1497#ifdef CONFIG_TRACE_IRQFLAGS
1498 /* paranoidexit; without TRACE_IRQS_OFF */
1499 /* ebx: no swapgs flag */
1500 DISABLE_INTERRUPTS(CLBR_NONE)
1501 testl %ebx,%ebx /* swapgs needed? */ 1659 testl %ebx,%ebx /* swapgs needed? */
1502 jnz nmi_restore 1660 jnz nmi_restore
1503 testl $3,CS(%rsp)
1504 jnz nmi_userspace
1505nmi_swapgs: 1661nmi_swapgs:
1506 SWAPGS_UNSAFE_STACK 1662 SWAPGS_UNSAFE_STACK
1507nmi_restore: 1663nmi_restore:
1508 RESTORE_ALL 8 1664 RESTORE_ALL 8
1665 /* Clear the NMI executing stack variable */
1666 movq $0, 10*8(%rsp)
1509 jmp irq_return 1667 jmp irq_return
1510nmi_userspace:
1511 GET_THREAD_INFO(%rcx)
1512 movl TI_flags(%rcx),%ebx
1513 andl $_TIF_WORK_MASK,%ebx
1514 jz nmi_swapgs
1515 movq %rsp,%rdi /* &pt_regs */
1516 call sync_regs
1517 movq %rax,%rsp /* switch stack for scheduling */
1518 testl $_TIF_NEED_RESCHED,%ebx
1519 jnz nmi_schedule
1520 movl %ebx,%edx /* arg3: thread flags */
1521 ENABLE_INTERRUPTS(CLBR_NONE)
1522 xorl %esi,%esi /* arg2: oldset */
1523 movq %rsp,%rdi /* arg1: &pt_regs */
1524 call do_notify_resume
1525 DISABLE_INTERRUPTS(CLBR_NONE)
1526 jmp nmi_userspace
1527nmi_schedule:
1528 ENABLE_INTERRUPTS(CLBR_ANY)
1529 call schedule
1530 DISABLE_INTERRUPTS(CLBR_ANY)
1531 jmp nmi_userspace
1532 CFI_ENDPROC
1533#else
1534 jmp paranoid_exit
1535 CFI_ENDPROC 1668 CFI_ENDPROC
1536#endif
1537END(nmi) 1669END(nmi)
1538 1670
1671 /*
1672 * If an NMI hit an iret because of an exception or breakpoint,
1673 * it can lose its NMI context, and a nested NMI may come in.
1674 * In that case, the nested NMI will change the preempted NMI's
1675 * stack to jump to here when it does the final iret.
1676 */
1677repeat_nmi:
1678 INTR_FRAME
1679 /* Update the stack variable to say we are still in NMI */
1680 movq $1, 5*8(%rsp)
1681
1682 /* copy the saved stack back to copy stack */
1683 .rept 5
1684 pushq_cfi 4*8(%rsp)
1685 .endr
1686
1687 jmp restart_nmi
1688 CFI_ENDPROC
1689end_repeat_nmi:
1690
1539ENTRY(ignore_sysret) 1691ENTRY(ignore_sysret)
1540 CFI_STARTPROC 1692 CFI_STARTPROC
1541 mov $-ENOSYS,%eax 1693 mov $-ENOSYS,%eax
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index e11e39478a4..40f4eb3766d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -417,6 +417,10 @@ ENTRY(phys_base)
417ENTRY(idt_table) 417ENTRY(idt_table)
418 .skip IDT_ENTRIES * 16 418 .skip IDT_ENTRIES * 16
419 419
420 .align L1_CACHE_BYTES
421ENTRY(nmi_idt_table)
422 .skip IDT_ENTRIES * 16
423
420 __PAGE_ALIGNED_BSS 424 __PAGE_ALIGNED_BSS
421 .align PAGE_SIZE 425 .align PAGE_SIZE
422ENTRY(empty_zero_page) 426ENTRY(empty_zero_page)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index e88f37b58dd..47acaf31916 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -405,9 +405,108 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
405 unknown_nmi_error(reason, regs); 405 unknown_nmi_error(reason, regs);
406} 406}
407 407
408/*
409 * NMIs can hit breakpoints which will cause it to lose its
410 * NMI context with the CPU when the breakpoint does an iret.
411 */
412#ifdef CONFIG_X86_32
413/*
414 * For i386, NMIs use the same stack as the kernel, and we can
415 * add a workaround to the iret problem in C. Simply have 3 states
416 * the NMI can be in.
417 *
418 * 1) not running
419 * 2) executing
420 * 3) latched
421 *
422 * When no NMI is in progress, it is in the "not running" state.
423 * When an NMI comes in, it goes into the "executing" state.
424 * Normally, if another NMI is triggered, it does not interrupt
425 * the running NMI and the HW will simply latch it so that when
426 * the first NMI finishes, it will restart the second NMI.
427 * (Note, the latch is binary, thus multiple NMIs triggering,
428 * when one is running, are ignored. Only one NMI is restarted.)
429 *
430 * If an NMI hits a breakpoint that executes an iret, another
431 * NMI can preempt it. We do not want to allow this new NMI
432 * to run, but we want to execute it when the first one finishes.
433 * We set the state to "latched", and the first NMI will perform
434 * an cmpxchg on the state, and if it doesn't successfully
435 * reset the state to "not running" it will restart the next
436 * NMI.
437 */
438enum nmi_states {
439 NMI_NOT_RUNNING,
440 NMI_EXECUTING,
441 NMI_LATCHED,
442};
443static DEFINE_PER_CPU(enum nmi_states, nmi_state);
444
445#define nmi_nesting_preprocess(regs) \
446 do { \
447 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
448 __get_cpu_var(nmi_state) = NMI_LATCHED; \
449 return; \
450 } \
451 nmi_restart: \
452 __get_cpu_var(nmi_state) = NMI_EXECUTING; \
453 } while (0)
454
455#define nmi_nesting_postprocess() \
456 do { \
457 if (cmpxchg(&__get_cpu_var(nmi_state), \
458 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
459 goto nmi_restart; \
460 } while (0)
461#else /* x86_64 */
462/*
463 * In x86_64 things are a bit more difficult. This has the same problem
464 * where an NMI hitting a breakpoint that calls iret will remove the
465 * NMI context, allowing a nested NMI to enter. What makes this more
466 * difficult is that both NMIs and breakpoints have their own stack.
467 * When a new NMI or breakpoint is executed, the stack is set to a fixed
468 * point. If an NMI is nested, it will have its stack set at that same
469 * fixed address that the first NMI had, and will start corrupting the
470 * stack. This is handled in entry_64.S, but the same problem exists with
471 * the breakpoint stack.
472 *
473 * If a breakpoint is being processed, and the debug stack is being used,
474 * if an NMI comes in and also hits a breakpoint, the stack pointer
475 * will be set to the same fixed address as the breakpoint that was
476 * interrupted, causing that stack to be corrupted. To handle this case,
477 * check if the stack that was interrupted is the debug stack, and if
478 * so, change the IDT so that new breakpoints will use the current stack
479 * and not switch to the fixed address. On return of the NMI, switch back
480 * to the original IDT.
481 */
482static DEFINE_PER_CPU(int, update_debug_stack);
483
484static inline void nmi_nesting_preprocess(struct pt_regs *regs)
485{
486 /*
487 * If we interrupted a breakpoint, it is possible that
488 * the nmi handler will have breakpoints too. We need to
489 * change the IDT such that breakpoints that happen here
490 * continue to use the NMI stack.
491 */
492 if (unlikely(is_debug_stack(regs->sp))) {
493 debug_stack_set_zero();
494 __get_cpu_var(update_debug_stack) = 1;
495 }
496}
497
498static inline void nmi_nesting_postprocess(void)
499{
500 if (unlikely(__get_cpu_var(update_debug_stack)))
501 debug_stack_reset();
502}
503#endif
504
408dotraplinkage notrace __kprobes void 505dotraplinkage notrace __kprobes void
409do_nmi(struct pt_regs *regs, long error_code) 506do_nmi(struct pt_regs *regs, long error_code)
410{ 507{
508 nmi_nesting_preprocess(regs);
509
411 nmi_enter(); 510 nmi_enter();
412 511
413 inc_irq_stat(__nmi_count); 512 inc_irq_stat(__nmi_count);
@@ -416,6 +515,9 @@ do_nmi(struct pt_regs *regs, long error_code)
416 default_do_nmi(regs); 515 default_do_nmi(regs);
417 516
418 nmi_exit(); 517 nmi_exit();
518
519 /* On i386, may loop back to preprocess */
520 nmi_nesting_postprocess();
419} 521}
420 522
421void stop_nmi(void) 523void stop_nmi(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fa1191fb679..482ec3af206 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -311,9 +311,15 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
311 == NOTIFY_STOP) 311 == NOTIFY_STOP)
312 return; 312 return;
313 313
314 /*
315 * Let others (NMI) know that the debug stack is in use
316 * as we may switch to the interrupt stack.
317 */
318 debug_stack_usage_inc();
314 preempt_conditional_sti(regs); 319 preempt_conditional_sti(regs);
315 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 320 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
316 preempt_conditional_cli(regs); 321 preempt_conditional_cli(regs);
322 debug_stack_usage_dec();
317} 323}
318 324
319#ifdef CONFIG_X86_64 325#ifdef CONFIG_X86_64
@@ -406,6 +412,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
406 SIGTRAP) == NOTIFY_STOP) 412 SIGTRAP) == NOTIFY_STOP)
407 return; 413 return;
408 414
415 /*
416 * Let others (NMI) know that the debug stack is in use
417 * as we may switch to the interrupt stack.
418 */
419 debug_stack_usage_inc();
420
409 /* It's safe to allow irq's after DR6 has been saved */ 421 /* It's safe to allow irq's after DR6 has been saved */
410 preempt_conditional_sti(regs); 422 preempt_conditional_sti(regs);
411 423
@@ -413,6 +425,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
413 handle_vm86_trap((struct kernel_vm86_regs *) regs, 425 handle_vm86_trap((struct kernel_vm86_regs *) regs,
414 error_code, 1); 426 error_code, 1);
415 preempt_conditional_cli(regs); 427 preempt_conditional_cli(regs);
428 debug_stack_usage_dec();
416 return; 429 return;
417 } 430 }
418 431
@@ -432,6 +445,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
432 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) 445 if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
433 send_sigtrap(tsk, regs, error_code, si_code); 446 send_sigtrap(tsk, regs, error_code, si_code);
434 preempt_conditional_cli(regs); 447 preempt_conditional_cli(regs);
448 debug_stack_usage_dec();
435 449
436 return; 450 return;
437} 451}
@@ -718,4 +732,10 @@ void __init trap_init(void)
718 cpu_init(); 732 cpu_init();
719 733
720 x86_init.irqs.trap_init(); 734 x86_init.irqs.trap_init();
735
736#ifdef CONFIG_X86_64
737 memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
738 set_nmi_gate(1, &debug);
739 set_nmi_gate(3, &int3);
740#endif
721} 741}