diff options
| author | Avi Kivity <avi@redhat.com> | 2011-12-25 08:44:43 -0500 |
|---|---|---|
| committer | Avi Kivity <avi@redhat.com> | 2011-12-27 04:22:24 -0500 |
| commit | 9e31905f293ae84e4f120ed9e414031eaefa0bdf (patch) | |
| tree | 153204ff0dca820e760007bc24075ec7fb46a276 /arch | |
| parent | ff5c2c0316ff0e3e2dba3ca14167d994453df093 (diff) | |
| parent | b3d9468a8bd218a695e3a0ff112cd4efd27b670a (diff) | |
Merge remote-tracking branch 'tip/perf/core' into kvm-updates/3.3
* tip/perf/core: (66 commits)
perf, x86: Expose perf capability to other modules
perf, x86: Implement arch event mask as quirk
x86, perf: Disable non available architectural events
jump_label: Provide jump_label_key initializers
jump_label, x86: Fix section mismatch
perf, core: Rate limit perf_sched_events jump_label patching
perf: Fix enable_on_exec for sibling events
perf: Remove superfluous arguments
perf, x86: Prefer fixed-purpose counters when scheduling
perf, x86: Fix event scheduler for constraints with overlapping counters
perf, x86: Implement event scheduler helper functions
perf: Avoid a useless pmu_disable() in the perf-tick
x86/tools: Add decoded instruction dump mode
x86: Update instruction decoder to support new AVX formats
x86/tools: Fix insn_sanity message outputs
x86/tools: Fix instruction decoder message output
x86: Fix instruction decoder to handle grouped AVX instructions
x86/tools: Fix Makefile to build all test tools
perf test: Soft errors shouldn't stop the "Validate PERF_RECORD_" test
perf test: Validate PERF_RECORD_ events and perf_sample fields
...
Signed-off-by: Avi Kivity <avi@redhat.com>
* commit 'b3d9468a8bd218a695e3a0ff112cd4efd27b670a': (66 commits)
perf, x86: Expose perf capability to other modules
perf, x86: Implement arch event mask as quirk
x86, perf: Disable non available architectural events
jump_label: Provide jump_label_key initializers
jump_label, x86: Fix section mismatch
perf, core: Rate limit perf_sched_events jump_label patching
perf: Fix enable_on_exec for sibling events
perf: Remove superfluous arguments
perf, x86: Prefer fixed-purpose counters when scheduling
perf, x86: Fix event scheduler for constraints with overlapping counters
perf, x86: Implement event scheduler helper functions
perf: Avoid a useless pmu_disable() in the perf-tick
x86/tools: Add decoded instruction dump mode
x86: Update instruction decoder to support new AVX formats
x86/tools: Fix insn_sanity message outputs
x86/tools: Fix instruction decoder message output
x86: Fix instruction decoder to handle grouped AVX instructions
x86/tools: Fix Makefile to build all test tools
perf test: Soft errors shouldn't stop the "Validate PERF_RECORD_" test
perf test: Validate PERF_RECORD_ events and perf_sample fields
...
Diffstat (limited to 'arch')
| -rw-r--r-- | arch/Kconfig | 4 | ||||
| -rw-r--r-- | arch/x86/include/asm/insn.h | 7 | ||||
| -rw-r--r-- | arch/x86/include/asm/perf_event.h | 29 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 254 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 51 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 72 | ||||
| -rw-r--r-- | arch/x86/kernel/jump_label.c | 2 | ||||
| -rw-r--r-- | arch/x86/lib/inat.c | 9 | ||||
| -rw-r--r-- | arch/x86/lib/insn.c | 4 | ||||
| -rw-r--r-- | arch/x86/lib/x86-opcode-map.txt | 606 | ||||
| -rw-r--r-- | arch/x86/oprofile/Makefile | 3 | ||||
| -rw-r--r-- | arch/x86/oprofile/init.c | 30 | ||||
| -rw-r--r-- | arch/x86/oprofile/nmi_int.c | 27 | ||||
| -rw-r--r-- | arch/x86/oprofile/nmi_timer_int.c | 50 | ||||
| -rw-r--r-- | arch/x86/tools/Makefile | 11 | ||||
| -rw-r--r-- | arch/x86/tools/gen-insn-attr-x86.awk | 21 | ||||
| -rw-r--r-- | arch/x86/tools/insn_sanity.c | 275 |
18 files changed, 1013 insertions, 444 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 4b0669cbb3b0..2505740b81d2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX | |||
| 30 | config HAVE_OPROFILE | 30 | config HAVE_OPROFILE |
| 31 | bool | 31 | bool |
| 32 | 32 | ||
| 33 | config OPROFILE_NMI_TIMER | ||
| 34 | def_bool y | ||
| 35 | depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI | ||
| 36 | |||
| 33 | config KPROBES | 37 | config KPROBES |
| 34 | bool "Kprobes" | 38 | bool "Kprobes" |
| 35 | depends on MODULES | 39 | depends on MODULES |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 88c765e16410..74df3f1eddfd 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
| @@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn) | |||
| 137 | return (insn->vex_prefix.value != 0); | 137 | return (insn->vex_prefix.value != 0); |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | /* Ensure this instruction is decoded completely */ | ||
| 141 | static inline int insn_complete(struct insn *insn) | ||
| 142 | { | ||
| 143 | return insn->opcode.got && insn->modrm.got && insn->sib.got && | ||
| 144 | insn->displacement.got && insn->immediate.got; | ||
| 145 | } | ||
| 146 | |||
| 140 | static inline insn_byte_t insn_vex_m_bits(struct insn *insn) | 147 | static inline insn_byte_t insn_vex_m_bits(struct insn *insn) |
| 141 | { | 148 | { |
| 142 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | 149 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f61c62f7d5d8..b50e9d15aae0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
| @@ -57,6 +57,7 @@ | |||
| 57 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | 57 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) |
| 58 | 58 | ||
| 59 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 | 59 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 |
| 60 | #define ARCH_PERFMON_EVENTS_COUNT 7 | ||
| 60 | 61 | ||
| 61 | /* | 62 | /* |
| 62 | * Intel "Architectural Performance Monitoring" CPUID | 63 | * Intel "Architectural Performance Monitoring" CPUID |
| @@ -72,6 +73,19 @@ union cpuid10_eax { | |||
| 72 | unsigned int full; | 73 | unsigned int full; |
| 73 | }; | 74 | }; |
| 74 | 75 | ||
| 76 | union cpuid10_ebx { | ||
| 77 | struct { | ||
| 78 | unsigned int no_unhalted_core_cycles:1; | ||
| 79 | unsigned int no_instructions_retired:1; | ||
| 80 | unsigned int no_unhalted_reference_cycles:1; | ||
| 81 | unsigned int no_llc_reference:1; | ||
| 82 | unsigned int no_llc_misses:1; | ||
| 83 | unsigned int no_branch_instruction_retired:1; | ||
| 84 | unsigned int no_branch_misses_retired:1; | ||
| 85 | } split; | ||
| 86 | unsigned int full; | ||
| 87 | }; | ||
| 88 | |||
| 75 | union cpuid10_edx { | 89 | union cpuid10_edx { |
| 76 | struct { | 90 | struct { |
| 77 | unsigned int num_counters_fixed:5; | 91 | unsigned int num_counters_fixed:5; |
| @@ -81,6 +95,15 @@ union cpuid10_edx { | |||
| 81 | unsigned int full; | 95 | unsigned int full; |
| 82 | }; | 96 | }; |
| 83 | 97 | ||
| 98 | struct x86_pmu_capability { | ||
| 99 | int version; | ||
| 100 | int num_counters_gp; | ||
| 101 | int num_counters_fixed; | ||
| 102 | int bit_width_gp; | ||
| 103 | int bit_width_fixed; | ||
| 104 | unsigned int events_mask; | ||
| 105 | int events_mask_len; | ||
| 106 | }; | ||
| 84 | 107 | ||
| 85 | /* | 108 | /* |
| 86 | * Fixed-purpose performance events: | 109 | * Fixed-purpose performance events: |
| @@ -202,6 +225,7 @@ struct perf_guest_switch_msr { | |||
| 202 | }; | 225 | }; |
| 203 | 226 | ||
| 204 | extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); | 227 | extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); |
| 228 | extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); | ||
| 205 | #else | 229 | #else |
| 206 | static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) | 230 | static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) |
| 207 | { | 231 | { |
| @@ -209,6 +233,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) | |||
| 209 | return NULL; | 233 | return NULL; |
| 210 | } | 234 | } |
| 211 | 235 | ||
| 236 | static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | ||
| 237 | { | ||
| 238 | memset(cap, 0, sizeof(*cap)); | ||
| 239 | } | ||
| 240 | |||
| 212 | static inline void perf_events_lapic_init(void) { } | 241 | static inline void perf_events_lapic_init(void) { } |
| 213 | #endif | 242 | #endif |
| 214 | 243 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bda212a0010..930fe4879542 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event) | |||
| 484 | return event->pmu == &pmu; | 484 | return event->pmu == &pmu; |
| 485 | } | 485 | } |
| 486 | 486 | ||
| 487 | /* | ||
| 488 | * Event scheduler state: | ||
| 489 | * | ||
| 490 | * Assign events iterating over all events and counters, beginning | ||
| 491 | * with events with least weights first. Keep the current iterator | ||
| 492 | * state in struct sched_state. | ||
| 493 | */ | ||
| 494 | struct sched_state { | ||
| 495 | int weight; | ||
| 496 | int event; /* event index */ | ||
| 497 | int counter; /* counter index */ | ||
| 498 | int unassigned; /* number of events to be assigned left */ | ||
| 499 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 500 | }; | ||
| 501 | |||
| 502 | /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ | ||
| 503 | #define SCHED_STATES_MAX 2 | ||
| 504 | |||
| 505 | struct perf_sched { | ||
| 506 | int max_weight; | ||
| 507 | int max_events; | ||
| 508 | struct event_constraint **constraints; | ||
| 509 | struct sched_state state; | ||
| 510 | int saved_states; | ||
| 511 | struct sched_state saved[SCHED_STATES_MAX]; | ||
| 512 | }; | ||
| 513 | |||
| 514 | /* | ||
| 515 | * Initialize interator that runs through all events and counters. | ||
| 516 | */ | ||
| 517 | static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, | ||
| 518 | int num, int wmin, int wmax) | ||
| 519 | { | ||
| 520 | int idx; | ||
| 521 | |||
| 522 | memset(sched, 0, sizeof(*sched)); | ||
| 523 | sched->max_events = num; | ||
| 524 | sched->max_weight = wmax; | ||
| 525 | sched->constraints = c; | ||
| 526 | |||
| 527 | for (idx = 0; idx < num; idx++) { | ||
| 528 | if (c[idx]->weight == wmin) | ||
| 529 | break; | ||
| 530 | } | ||
| 531 | |||
| 532 | sched->state.event = idx; /* start with min weight */ | ||
| 533 | sched->state.weight = wmin; | ||
| 534 | sched->state.unassigned = num; | ||
| 535 | } | ||
| 536 | |||
| 537 | static void perf_sched_save_state(struct perf_sched *sched) | ||
| 538 | { | ||
| 539 | if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) | ||
| 540 | return; | ||
| 541 | |||
| 542 | sched->saved[sched->saved_states] = sched->state; | ||
| 543 | sched->saved_states++; | ||
| 544 | } | ||
| 545 | |||
| 546 | static bool perf_sched_restore_state(struct perf_sched *sched) | ||
| 547 | { | ||
| 548 | if (!sched->saved_states) | ||
| 549 | return false; | ||
| 550 | |||
| 551 | sched->saved_states--; | ||
| 552 | sched->state = sched->saved[sched->saved_states]; | ||
| 553 | |||
| 554 | /* continue with next counter: */ | ||
| 555 | clear_bit(sched->state.counter++, sched->state.used); | ||
| 556 | |||
| 557 | return true; | ||
| 558 | } | ||
| 559 | |||
| 560 | /* | ||
| 561 | * Select a counter for the current event to schedule. Return true on | ||
| 562 | * success. | ||
| 563 | */ | ||
| 564 | static bool __perf_sched_find_counter(struct perf_sched *sched) | ||
| 565 | { | ||
| 566 | struct event_constraint *c; | ||
| 567 | int idx; | ||
| 568 | |||
| 569 | if (!sched->state.unassigned) | ||
| 570 | return false; | ||
| 571 | |||
| 572 | if (sched->state.event >= sched->max_events) | ||
| 573 | return false; | ||
| 574 | |||
| 575 | c = sched->constraints[sched->state.event]; | ||
| 576 | |||
| 577 | /* Prefer fixed purpose counters */ | ||
| 578 | if (x86_pmu.num_counters_fixed) { | ||
| 579 | idx = X86_PMC_IDX_FIXED; | ||
| 580 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { | ||
| 581 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
| 582 | goto done; | ||
| 583 | } | ||
| 584 | } | ||
| 585 | /* Grab the first unused counter starting with idx */ | ||
| 586 | idx = sched->state.counter; | ||
| 587 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { | ||
| 588 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
| 589 | goto done; | ||
| 590 | } | ||
| 591 | |||
| 592 | return false; | ||
| 593 | |||
| 594 | done: | ||
| 595 | sched->state.counter = idx; | ||
| 596 | |||
| 597 | if (c->overlap) | ||
| 598 | perf_sched_save_state(sched); | ||
| 599 | |||
| 600 | return true; | ||
| 601 | } | ||
| 602 | |||
| 603 | static bool perf_sched_find_counter(struct perf_sched *sched) | ||
| 604 | { | ||
| 605 | while (!__perf_sched_find_counter(sched)) { | ||
| 606 | if (!perf_sched_restore_state(sched)) | ||
| 607 | return false; | ||
| 608 | } | ||
| 609 | |||
| 610 | return true; | ||
| 611 | } | ||
| 612 | |||
| 613 | /* | ||
| 614 | * Go through all unassigned events and find the next one to schedule. | ||
| 615 | * Take events with the least weight first. Return true on success. | ||
| 616 | */ | ||
| 617 | static bool perf_sched_next_event(struct perf_sched *sched) | ||
| 618 | { | ||
| 619 | struct event_constraint *c; | ||
| 620 | |||
| 621 | if (!sched->state.unassigned || !--sched->state.unassigned) | ||
| 622 | return false; | ||
| 623 | |||
| 624 | do { | ||
| 625 | /* next event */ | ||
| 626 | sched->state.event++; | ||
| 627 | if (sched->state.event >= sched->max_events) { | ||
| 628 | /* next weight */ | ||
| 629 | sched->state.event = 0; | ||
| 630 | sched->state.weight++; | ||
| 631 | if (sched->state.weight > sched->max_weight) | ||
| 632 | return false; | ||
| 633 | } | ||
| 634 | c = sched->constraints[sched->state.event]; | ||
| 635 | } while (c->weight != sched->state.weight); | ||
| 636 | |||
| 637 | sched->state.counter = 0; /* start with first counter */ | ||
| 638 | |||
| 639 | return true; | ||
| 640 | } | ||
| 641 | |||
| 642 | /* | ||
| 643 | * Assign a counter for each event. | ||
| 644 | */ | ||
| 645 | static int perf_assign_events(struct event_constraint **constraints, int n, | ||
| 646 | int wmin, int wmax, int *assign) | ||
| 647 | { | ||
| 648 | struct perf_sched sched; | ||
| 649 | |||
| 650 | perf_sched_init(&sched, constraints, n, wmin, wmax); | ||
| 651 | |||
| 652 | do { | ||
| 653 | if (!perf_sched_find_counter(&sched)) | ||
| 654 | break; /* failed */ | ||
| 655 | if (assign) | ||
| 656 | assign[sched.state.event] = sched.state.counter; | ||
| 657 | } while (perf_sched_next_event(&sched)); | ||
| 658 | |||
| 659 | return sched.state.unassigned; | ||
| 660 | } | ||
| 661 | |||
| 487 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 662 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
| 488 | { | 663 | { |
| 489 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 664 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
| 490 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 665 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 491 | int i, j, w, wmax, num = 0; | 666 | int i, wmin, wmax, num = 0; |
| 492 | struct hw_perf_event *hwc; | 667 | struct hw_perf_event *hwc; |
| 493 | 668 | ||
| 494 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 669 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
| 495 | 670 | ||
| 496 | for (i = 0; i < n; i++) { | 671 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
| 497 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 672 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
| 498 | constraints[i] = c; | 673 | constraints[i] = c; |
| 674 | wmin = min(wmin, c->weight); | ||
| 675 | wmax = max(wmax, c->weight); | ||
| 499 | } | 676 | } |
| 500 | 677 | ||
| 501 | /* | 678 | /* |
| @@ -521,59 +698,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
| 521 | if (assign) | 698 | if (assign) |
| 522 | assign[i] = hwc->idx; | 699 | assign[i] = hwc->idx; |
| 523 | } | 700 | } |
| 524 | if (i == n) | ||
| 525 | goto done; | ||
| 526 | |||
| 527 | /* | ||
| 528 | * begin slow path | ||
| 529 | */ | ||
| 530 | |||
| 531 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
| 532 | |||
| 533 | /* | ||
| 534 | * weight = number of possible counters | ||
| 535 | * | ||
| 536 | * 1 = most constrained, only works on one counter | ||
| 537 | * wmax = least constrained, works on any counter | ||
| 538 | * | ||
| 539 | * assign events to counters starting with most | ||
| 540 | * constrained events. | ||
| 541 | */ | ||
| 542 | wmax = x86_pmu.num_counters; | ||
| 543 | |||
| 544 | /* | ||
| 545 | * when fixed event counters are present, | ||
| 546 | * wmax is incremented by 1 to account | ||
| 547 | * for one more choice | ||
| 548 | */ | ||
| 549 | if (x86_pmu.num_counters_fixed) | ||
| 550 | wmax++; | ||
| 551 | |||
| 552 | for (w = 1, num = n; num && w <= wmax; w++) { | ||
| 553 | /* for each event */ | ||
| 554 | for (i = 0; num && i < n; i++) { | ||
| 555 | c = constraints[i]; | ||
| 556 | hwc = &cpuc->event_list[i]->hw; | ||
| 557 | |||
| 558 | if (c->weight != w) | ||
| 559 | continue; | ||
| 560 | |||
| 561 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { | ||
| 562 | if (!test_bit(j, used_mask)) | ||
| 563 | break; | ||
| 564 | } | ||
| 565 | |||
| 566 | if (j == X86_PMC_IDX_MAX) | ||
| 567 | break; | ||
| 568 | 701 | ||
| 569 | __set_bit(j, used_mask); | 702 | /* slow path */ |
| 703 | if (i != n) | ||
| 704 | num = perf_assign_events(constraints, n, wmin, wmax, assign); | ||
| 570 | 705 | ||
| 571 | if (assign) | ||
| 572 | assign[i] = j; | ||
| 573 | num--; | ||
| 574 | } | ||
| 575 | } | ||
| 576 | done: | ||
| 577 | /* | 706 | /* |
| 578 | * scheduling failed or is just a simulation, | 707 | * scheduling failed or is just a simulation, |
| 579 | * free resources if necessary | 708 | * free resources if necessary |
| @@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void) | |||
| 1119 | 1248 | ||
| 1120 | static int __init init_hw_perf_events(void) | 1249 | static int __init init_hw_perf_events(void) |
| 1121 | { | 1250 | { |
| 1251 | struct x86_pmu_quirk *quirk; | ||
| 1122 | struct event_constraint *c; | 1252 | struct event_constraint *c; |
| 1123 | int err; | 1253 | int err; |
| 1124 | 1254 | ||
| @@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void) | |||
| 1147 | 1277 | ||
| 1148 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1278 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
| 1149 | 1279 | ||
| 1150 | if (x86_pmu.quirks) | 1280 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) |
| 1151 | x86_pmu.quirks(); | 1281 | quirk->func(); |
| 1152 | 1282 | ||
| 1153 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1283 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { |
| 1154 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1284 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
| @@ -1171,7 +1301,7 @@ static int __init init_hw_perf_events(void) | |||
| 1171 | 1301 | ||
| 1172 | unconstrained = (struct event_constraint) | 1302 | unconstrained = (struct event_constraint) |
| 1173 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1303 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
| 1174 | 0, x86_pmu.num_counters); | 1304 | 0, x86_pmu.num_counters, 0); |
| 1175 | 1305 | ||
| 1176 | if (x86_pmu.event_constraints) { | 1306 | if (x86_pmu.event_constraints) { |
| 1177 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1307 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
| @@ -1566,3 +1696,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
| 1566 | 1696 | ||
| 1567 | return misc; | 1697 | return misc; |
| 1568 | } | 1698 | } |
| 1699 | |||
| 1700 | void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | ||
| 1701 | { | ||
| 1702 | cap->version = x86_pmu.version; | ||
| 1703 | cap->num_counters_gp = x86_pmu.num_counters; | ||
| 1704 | cap->num_counters_fixed = x86_pmu.num_counters_fixed; | ||
| 1705 | cap->bit_width_gp = x86_pmu.cntval_bits; | ||
| 1706 | cap->bit_width_fixed = x86_pmu.cntval_bits; | ||
| 1707 | cap->events_mask = (unsigned int)x86_pmu.events_maskl; | ||
| 1708 | cap->events_mask_len = x86_pmu.events_mask_len; | ||
| 1709 | } | ||
| 1710 | EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4b..8944062f46e2 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
| @@ -45,6 +45,7 @@ struct event_constraint { | |||
| 45 | u64 code; | 45 | u64 code; |
| 46 | u64 cmask; | 46 | u64 cmask; |
| 47 | int weight; | 47 | int weight; |
| 48 | int overlap; | ||
| 48 | }; | 49 | }; |
| 49 | 50 | ||
| 50 | struct amd_nb { | 51 | struct amd_nb { |
| @@ -151,15 +152,40 @@ struct cpu_hw_events { | |||
| 151 | void *kfree_on_online; | 152 | void *kfree_on_online; |
| 152 | }; | 153 | }; |
| 153 | 154 | ||
| 154 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | 155 | #define __EVENT_CONSTRAINT(c, n, m, w, o) {\ |
| 155 | { .idxmsk64 = (n) }, \ | 156 | { .idxmsk64 = (n) }, \ |
| 156 | .code = (c), \ | 157 | .code = (c), \ |
| 157 | .cmask = (m), \ | 158 | .cmask = (m), \ |
| 158 | .weight = (w), \ | 159 | .weight = (w), \ |
| 160 | .overlap = (o), \ | ||
| 159 | } | 161 | } |
| 160 | 162 | ||
| 161 | #define EVENT_CONSTRAINT(c, n, m) \ | 163 | #define EVENT_CONSTRAINT(c, n, m) \ |
| 162 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 164 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) |
| 165 | |||
| 166 | /* | ||
| 167 | * The overlap flag marks event constraints with overlapping counter | ||
| 168 | * masks. This is the case if the counter mask of such an event is not | ||
| 169 | * a subset of any other counter mask of a constraint with an equal or | ||
| 170 | * higher weight, e.g.: | ||
| 171 | * | ||
| 172 | * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); | ||
| 173 | * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); | ||
| 174 | * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); | ||
| 175 | * | ||
| 176 | * The event scheduler may not select the correct counter in the first | ||
| 177 | * cycle because it needs to know which subsequent events will be | ||
| 178 | * scheduled. It may fail to schedule the events then. So we set the | ||
| 179 | * overlap flag for such constraints to give the scheduler a hint which | ||
| 180 | * events to select for counter rescheduling. | ||
| 181 | * | ||
| 182 | * Care must be taken as the rescheduling algorithm is O(n!) which | ||
| 183 | * will increase scheduling cycles for an over-commited system | ||
| 184 | * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros | ||
| 185 | * and its counter masks must be kept at a minimum. | ||
| 186 | */ | ||
| 187 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ | ||
| 188 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) | ||
| 163 | 189 | ||
| 164 | /* | 190 | /* |
| 165 | * Constraint on the Event code. | 191 | * Constraint on the Event code. |
| @@ -235,6 +261,11 @@ union perf_capabilities { | |||
| 235 | u64 capabilities; | 261 | u64 capabilities; |
| 236 | }; | 262 | }; |
| 237 | 263 | ||
| 264 | struct x86_pmu_quirk { | ||
| 265 | struct x86_pmu_quirk *next; | ||
| 266 | void (*func)(void); | ||
| 267 | }; | ||
| 268 | |||
| 238 | /* | 269 | /* |
| 239 | * struct x86_pmu - generic x86 pmu | 270 | * struct x86_pmu - generic x86 pmu |
| 240 | */ | 271 | */ |
| @@ -259,6 +290,11 @@ struct x86_pmu { | |||
| 259 | int num_counters_fixed; | 290 | int num_counters_fixed; |
| 260 | int cntval_bits; | 291 | int cntval_bits; |
| 261 | u64 cntval_mask; | 292 | u64 cntval_mask; |
| 293 | union { | ||
| 294 | unsigned long events_maskl; | ||
| 295 | unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; | ||
| 296 | }; | ||
| 297 | int events_mask_len; | ||
| 262 | int apic; | 298 | int apic; |
| 263 | u64 max_period; | 299 | u64 max_period; |
| 264 | struct event_constraint * | 300 | struct event_constraint * |
| @@ -268,7 +304,7 @@ struct x86_pmu { | |||
| 268 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 304 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
| 269 | struct perf_event *event); | 305 | struct perf_event *event); |
| 270 | struct event_constraint *event_constraints; | 306 | struct event_constraint *event_constraints; |
| 271 | void (*quirks)(void); | 307 | struct x86_pmu_quirk *quirks; |
| 272 | int perfctr_second_write; | 308 | int perfctr_second_write; |
| 273 | 309 | ||
| 274 | int (*cpu_prepare)(int cpu); | 310 | int (*cpu_prepare)(int cpu); |
| @@ -309,6 +345,15 @@ struct x86_pmu { | |||
| 309 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | 345 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); |
| 310 | }; | 346 | }; |
| 311 | 347 | ||
| 348 | #define x86_add_quirk(func_) \ | ||
| 349 | do { \ | ||
| 350 | static struct x86_pmu_quirk __quirk __initdata = { \ | ||
| 351 | .func = func_, \ | ||
| 352 | }; \ | ||
| 353 | __quirk.next = x86_pmu.quirks; \ | ||
| 354 | x86_pmu.quirks = &__quirk; \ | ||
| 355 | } while (0) | ||
| 356 | |||
| 312 | #define ERF_NO_HT_SHARING 1 | 357 | #define ERF_NO_HT_SHARING 1 |
| 313 | #define ERF_HAS_RSP_1 2 | 358 | #define ERF_HAS_RSP_1 2 |
| 314 | 359 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a2..0397b23be8e9 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
| @@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
| 492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
| 493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | 493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); |
| 494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | 494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); |
| 495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); | 495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
| 496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | 496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
| 497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | 497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
| 498 | 498 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 8d601b18bf9f..2c3bf53d0302 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -1519,7 +1519,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
| 1519 | .guest_get_msrs = intel_guest_get_msrs, | 1519 | .guest_get_msrs = intel_guest_get_msrs, |
| 1520 | }; | 1520 | }; |
| 1521 | 1521 | ||
| 1522 | static void intel_clovertown_quirks(void) | 1522 | static __init void intel_clovertown_quirk(void) |
| 1523 | { | 1523 | { |
| 1524 | /* | 1524 | /* |
| 1525 | * PEBS is unreliable due to: | 1525 | * PEBS is unreliable due to: |
| @@ -1545,19 +1545,60 @@ static void intel_clovertown_quirks(void) | |||
| 1545 | x86_pmu.pebs_constraints = NULL; | 1545 | x86_pmu.pebs_constraints = NULL; |
| 1546 | } | 1546 | } |
| 1547 | 1547 | ||
| 1548 | static void intel_sandybridge_quirks(void) | 1548 | static __init void intel_sandybridge_quirk(void) |
| 1549 | { | 1549 | { |
| 1550 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | 1550 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); |
| 1551 | x86_pmu.pebs = 0; | 1551 | x86_pmu.pebs = 0; |
| 1552 | x86_pmu.pebs_constraints = NULL; | 1552 | x86_pmu.pebs_constraints = NULL; |
| 1553 | } | 1553 | } |
| 1554 | 1554 | ||
| 1555 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { | ||
| 1556 | { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, | ||
| 1557 | { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, | ||
| 1558 | { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, | ||
| 1559 | { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, | ||
| 1560 | { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, | ||
| 1561 | { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, | ||
| 1562 | { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, | ||
| 1563 | }; | ||
| 1564 | |||
| 1565 | static __init void intel_arch_events_quirk(void) | ||
| 1566 | { | ||
| 1567 | int bit; | ||
| 1568 | |||
| 1569 | /* disable event that reported as not presend by cpuid */ | ||
| 1570 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { | ||
| 1571 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; | ||
| 1572 | printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", | ||
| 1573 | intel_arch_events_map[bit].name); | ||
| 1574 | } | ||
| 1575 | } | ||
| 1576 | |||
| 1577 | static __init void intel_nehalem_quirk(void) | ||
| 1578 | { | ||
| 1579 | union cpuid10_ebx ebx; | ||
| 1580 | |||
| 1581 | ebx.full = x86_pmu.events_maskl; | ||
| 1582 | if (ebx.split.no_branch_misses_retired) { | ||
| 1583 | /* | ||
| 1584 | * Erratum AAJ80 detected, we work it around by using | ||
| 1585 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
| 1586 | * branch-misses, but it's still much better than the | ||
| 1587 | * architectural event which is often completely bogus: | ||
| 1588 | */ | ||
| 1589 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
| 1590 | ebx.split.no_branch_misses_retired = 0; | ||
| 1591 | x86_pmu.events_maskl = ebx.full; | ||
| 1592 | printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); | ||
| 1593 | } | ||
| 1594 | } | ||
| 1595 | |||
| 1555 | __init int intel_pmu_init(void) | 1596 | __init int intel_pmu_init(void) |
| 1556 | { | 1597 | { |
| 1557 | union cpuid10_edx edx; | 1598 | union cpuid10_edx edx; |
| 1558 | union cpuid10_eax eax; | 1599 | union cpuid10_eax eax; |
| 1600 | union cpuid10_ebx ebx; | ||
| 1559 | unsigned int unused; | 1601 | unsigned int unused; |
| 1560 | unsigned int ebx; | ||
| 1561 | int version; | 1602 | int version; |
| 1562 | 1603 | ||
| 1563 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 1604 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
| @@ -1574,8 +1615,8 @@ __init int intel_pmu_init(void) | |||
| 1574 | * Check whether the Architectural PerfMon supports | 1615 | * Check whether the Architectural PerfMon supports |
| 1575 | * Branch Misses Retired hw_event or not. | 1616 | * Branch Misses Retired hw_event or not. |
| 1576 | */ | 1617 | */ |
| 1577 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | 1618 | cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); |
| 1578 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | 1619 | if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) |
| 1579 | return -ENODEV; | 1620 | return -ENODEV; |
| 1580 | 1621 | ||
| 1581 | version = eax.split.version_id; | 1622 | version = eax.split.version_id; |
| @@ -1589,6 +1630,9 @@ __init int intel_pmu_init(void) | |||
| 1589 | x86_pmu.cntval_bits = eax.split.bit_width; | 1630 | x86_pmu.cntval_bits = eax.split.bit_width; |
| 1590 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; | 1631 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
| 1591 | 1632 | ||
| 1633 | x86_pmu.events_maskl = ebx.full; | ||
| 1634 | x86_pmu.events_mask_len = eax.split.mask_length; | ||
| 1635 | |||
| 1592 | /* | 1636 | /* |
| 1593 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1637 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
| 1594 | * assume at least 3 events: | 1638 | * assume at least 3 events: |
| @@ -1608,6 +1652,8 @@ __init int intel_pmu_init(void) | |||
| 1608 | 1652 | ||
| 1609 | intel_ds_init(); | 1653 | intel_ds_init(); |
| 1610 | 1654 | ||
| 1655 | x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ | ||
| 1656 | |||
| 1611 | /* | 1657 | /* |
| 1612 | * Install the hw-cache-events table: | 1658 | * Install the hw-cache-events table: |
| 1613 | */ | 1659 | */ |
| @@ -1617,7 +1663,7 @@ __init int intel_pmu_init(void) | |||
| 1617 | break; | 1663 | break; |
| 1618 | 1664 | ||
| 1619 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 1665 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
| 1620 | x86_pmu.quirks = intel_clovertown_quirks; | 1666 | x86_add_quirk(intel_clovertown_quirk); |
| 1621 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 1667 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
| 1622 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 1668 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
| 1623 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 1669 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
| @@ -1651,17 +1697,8 @@ __init int intel_pmu_init(void) | |||
| 1651 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1697 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
| 1652 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1698 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; |
| 1653 | 1699 | ||
| 1654 | if (ebx & 0x40) { | 1700 | x86_add_quirk(intel_nehalem_quirk); |
| 1655 | /* | ||
| 1656 | * Erratum AAJ80 detected, we work it around by using | ||
| 1657 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
| 1658 | * branch-misses, but it's still much better than the | ||
| 1659 | * architectural event which is often completely bogus: | ||
| 1660 | */ | ||
| 1661 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
| 1662 | 1701 | ||
| 1663 | pr_cont("erratum AAJ80 worked around, "); | ||
| 1664 | } | ||
| 1665 | pr_cont("Nehalem events, "); | 1702 | pr_cont("Nehalem events, "); |
| 1666 | break; | 1703 | break; |
| 1667 | 1704 | ||
| @@ -1701,7 +1738,7 @@ __init int intel_pmu_init(void) | |||
| 1701 | break; | 1738 | break; |
| 1702 | 1739 | ||
| 1703 | case 42: /* SandyBridge */ | 1740 | case 42: /* SandyBridge */ |
| 1704 | x86_pmu.quirks = intel_sandybridge_quirks; | 1741 | x86_add_quirk(intel_sandybridge_quirk); |
| 1705 | case 45: /* SandyBridge, "Romely-EP" */ | 1742 | case 45: /* SandyBridge, "Romely-EP" */ |
| 1706 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1743 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
| 1707 | sizeof(hw_cache_event_ids)); | 1744 | sizeof(hw_cache_event_ids)); |
| @@ -1738,5 +1775,6 @@ __init int intel_pmu_init(void) | |||
| 1738 | break; | 1775 | break; |
| 1739 | } | 1776 | } |
| 1740 | } | 1777 | } |
| 1778 | |||
| 1741 | return 0; | 1779 | return 0; |
| 1742 | } | 1780 | } |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index ea9d5f2f13ef..2889b3d43882 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
| @@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
| 50 | put_online_cpus(); | 50 | put_online_cpus(); |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | void arch_jump_label_transform_static(struct jump_entry *entry, | 53 | __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, |
| 54 | enum jump_label_type type) | 54 | enum jump_label_type type) |
| 55 | { | 55 | { |
| 56 | __jump_label_transform(entry, type, text_poke_early); | 56 | __jump_label_transform(entry, type, text_poke_early); |
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 46fc4ee09fc4..88ad5fbda6e1 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c | |||
| @@ -82,9 +82,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, | |||
| 82 | const insn_attr_t *table; | 82 | const insn_attr_t *table; |
| 83 | if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) | 83 | if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) |
| 84 | return 0; | 84 | return 0; |
| 85 | table = inat_avx_tables[vex_m][vex_p]; | 85 | /* At first, this checks the master table */ |
| 86 | table = inat_avx_tables[vex_m][0]; | ||
| 86 | if (!table) | 87 | if (!table) |
| 87 | return 0; | 88 | return 0; |
| 89 | if (!inat_is_group(table[opcode]) && vex_p) { | ||
| 90 | /* If this is not a group, get attribute directly */ | ||
| 91 | table = inat_avx_tables[vex_m][vex_p]; | ||
| 92 | if (!table) | ||
| 93 | return 0; | ||
| 94 | } | ||
| 88 | return table[opcode]; | 95 | return table[opcode]; |
| 89 | } | 96 | } |
| 90 | 97 | ||
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 374562ed6704..5a1f9f3e3fbb 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
| @@ -202,7 +202,7 @@ void insn_get_opcode(struct insn *insn) | |||
| 202 | m = insn_vex_m_bits(insn); | 202 | m = insn_vex_m_bits(insn); |
| 203 | p = insn_vex_p_bits(insn); | 203 | p = insn_vex_p_bits(insn); |
| 204 | insn->attr = inat_get_avx_attribute(op, m, p); | 204 | insn->attr = inat_get_avx_attribute(op, m, p); |
| 205 | if (!inat_accept_vex(insn->attr)) | 205 | if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) |
| 206 | insn->attr = 0; /* This instruction is bad */ | 206 | insn->attr = 0; /* This instruction is bad */ |
| 207 | goto end; /* VEX has only 1 byte for opcode */ | 207 | goto end; /* VEX has only 1 byte for opcode */ |
| 208 | } | 208 | } |
| @@ -249,6 +249,8 @@ void insn_get_modrm(struct insn *insn) | |||
| 249 | pfx = insn_last_prefix(insn); | 249 | pfx = insn_last_prefix(insn); |
| 250 | insn->attr = inat_get_group_attribute(mod, pfx, | 250 | insn->attr = inat_get_group_attribute(mod, pfx, |
| 251 | insn->attr); | 251 | insn->attr); |
| 252 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) | ||
| 253 | insn->attr = 0; /* This is bad */ | ||
| 252 | } | 254 | } |
| 253 | } | 255 | } |
| 254 | 256 | ||
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index a793da5e560e..5b83c51c12e0 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
| @@ -1,5 +1,11 @@ | |||
| 1 | # x86 Opcode Maps | 1 | # x86 Opcode Maps |
| 2 | # | 2 | # |
| 3 | # This is (mostly) based on following documentations. | ||
| 4 | # - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2 | ||
| 5 | # (#325383-040US, October 2011) | ||
| 6 | # - Intel(R) Advanced Vector Extensions Programming Reference | ||
| 7 | # (#319433-011,JUNE 2011). | ||
| 8 | # | ||
| 3 | #<Opcode maps> | 9 | #<Opcode maps> |
| 4 | # Table: table-name | 10 | # Table: table-name |
| 5 | # Referrer: escaped-name | 11 | # Referrer: escaped-name |
| @@ -15,10 +21,13 @@ | |||
| 15 | # EndTable | 21 | # EndTable |
| 16 | # | 22 | # |
| 17 | # AVX Superscripts | 23 | # AVX Superscripts |
| 18 | # (VEX): this opcode can accept VEX prefix. | 24 | # (v): this opcode requires VEX prefix. |
| 19 | # (oVEX): this opcode requires VEX prefix. | 25 | # (v1): this opcode only supports 128bit VEX. |
| 20 | # (o128): this opcode only supports 128bit VEX. | 26 | # |
| 21 | # (o256): this opcode only supports 256bit VEX. | 27 | # Last Prefix Superscripts |
| 28 | # - (66): the last prefix is 0x66 | ||
| 29 | # - (F3): the last prefix is 0xF3 | ||
| 30 | # - (F2): the last prefix is 0xF2 | ||
| 22 | # | 31 | # |
| 23 | 32 | ||
| 24 | Table: one byte opcode | 33 | Table: one byte opcode |
| @@ -199,8 +208,8 @@ a0: MOV AL,Ob | |||
| 199 | a1: MOV rAX,Ov | 208 | a1: MOV rAX,Ov |
| 200 | a2: MOV Ob,AL | 209 | a2: MOV Ob,AL |
| 201 | a3: MOV Ov,rAX | 210 | a3: MOV Ov,rAX |
| 202 | a4: MOVS/B Xb,Yb | 211 | a4: MOVS/B Yb,Xb |
| 203 | a5: MOVS/W/D/Q Xv,Yv | 212 | a5: MOVS/W/D/Q Yv,Xv |
| 204 | a6: CMPS/B Xb,Yb | 213 | a6: CMPS/B Xb,Yb |
| 205 | a7: CMPS/W/D Xv,Yv | 214 | a7: CMPS/W/D Xv,Yv |
| 206 | a8: TEST AL,Ib | 215 | a8: TEST AL,Ib |
| @@ -233,8 +242,8 @@ c0: Grp2 Eb,Ib (1A) | |||
| 233 | c1: Grp2 Ev,Ib (1A) | 242 | c1: Grp2 Ev,Ib (1A) |
| 234 | c2: RETN Iw (f64) | 243 | c2: RETN Iw (f64) |
| 235 | c3: RETN | 244 | c3: RETN |
| 236 | c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) | 245 | c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) |
| 237 | c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) | 246 | c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) |
| 238 | c6: Grp11 Eb,Ib (1A) | 247 | c6: Grp11 Eb,Ib (1A) |
| 239 | c7: Grp11 Ev,Iz (1A) | 248 | c7: Grp11 Ev,Iz (1A) |
| 240 | c8: ENTER Iw,Ib | 249 | c8: ENTER Iw,Ib |
| @@ -320,14 +329,19 @@ AVXcode: 1 | |||
| 320 | # 3DNow! uses the last imm byte as opcode extension. | 329 | # 3DNow! uses the last imm byte as opcode extension. |
| 321 | 0f: 3DNow! Pq,Qq,Ib | 330 | 0f: 3DNow! Pq,Qq,Ib |
| 322 | # 0x0f 0x10-0x1f | 331 | # 0x0f 0x10-0x1f |
| 323 | 10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) | 332 | # NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands |
| 324 | 11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) | 333 | # but it actually has operands. And also, vmovss and vmovsd only accept 128bit. |
| 325 | 12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) | 334 | # MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. |
| 326 | 13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) | 335 | # Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming |
| 327 | 14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) | 336 | # Reference A.1 |
| 328 | 15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) | 337 | 10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) |
| 329 | 16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) | 338 | 11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) |
| 330 | 17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) | 339 | 12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) |
| 340 | 13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) | ||
| 341 | 14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) | ||
| 342 | 15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) | ||
| 343 | 16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) | ||
| 344 | 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) | ||
| 331 | 18: Grp16 (1A) | 345 | 18: Grp16 (1A) |
| 332 | 19: | 346 | 19: |
| 333 | 1a: | 347 | 1a: |
| @@ -345,14 +359,14 @@ AVXcode: 1 | |||
| 345 | 25: | 359 | 25: |
| 346 | 26: | 360 | 26: |
| 347 | 27: | 361 | 27: |
| 348 | 28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) | 362 | 28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) |
| 349 | 29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) | 363 | 29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) |
| 350 | 2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) | 364 | 2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) |
| 351 | 2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) | 365 | 2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) |
| 352 | 2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) | 366 | 2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) |
| 353 | 2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) | 367 | 2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) |
| 354 | 2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) | 368 | 2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) |
| 355 | 2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) | 369 | 2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) |
| 356 | # 0x0f 0x30-0x3f | 370 | # 0x0f 0x30-0x3f |
| 357 | 30: WRMSR | 371 | 30: WRMSR |
| 358 | 31: RDTSC | 372 | 31: RDTSC |
| @@ -388,65 +402,66 @@ AVXcode: 1 | |||
| 388 | 4e: CMOVLE/NG Gv,Ev | 402 | 4e: CMOVLE/NG Gv,Ev |
| 389 | 4f: CMOVNLE/G Gv,Ev | 403 | 4f: CMOVNLE/G Gv,Ev |
| 390 | # 0x0f 0x50-0x5f | 404 | # 0x0f 0x50-0x5f |
| 391 | 50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) | 405 | 50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) |
| 392 | 51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) | 406 | 51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) |
| 393 | 52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) | 407 | 52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) |
| 394 | 53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) | 408 | 53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) |
| 395 | 54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) | 409 | 54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) |
| 396 | 55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) | 410 | 55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) |
| 397 | 56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) | 411 | 56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) |
| 398 | 57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) | 412 | 57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) |
| 399 | 58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) | 413 | 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) |
| 400 | 59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) | 414 | 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) |
| 401 | 5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) | 415 | 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) |
| 402 | 5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) | 416 | 5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) |
| 403 | 5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) | 417 | 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) |
| 404 | 5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) | 418 | 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) |
| 405 | 5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) | 419 | 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) |
| 406 | 5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) | 420 | 5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) |
| 407 | # 0x0f 0x60-0x6f | 421 | # 0x0f 0x60-0x6f |
| 408 | 60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) | 422 | 60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) |
| 409 | 61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) | 423 | 61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) |
| 410 | 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) | 424 | 62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) |
| 411 | 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) | 425 | 63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) |
| 412 | 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) | 426 | 64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) |
| 413 | 65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) | 427 | 65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) |
| 414 | 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) | 428 | 66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) |
| 415 | 67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) | 429 | 67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) |
| 416 | 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) | 430 | 68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) |
| 417 | 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) | 431 | 69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) |
| 418 | 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) | 432 | 6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) |
| 419 | 6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) | 433 | 6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) |
| 420 | 6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) | 434 | 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) |
| 421 | 6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) | 435 | 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) |
| 422 | 6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) | 436 | 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) |
| 423 | 6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) | 437 | 6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) |
| 424 | # 0x0f 0x70-0x7f | 438 | # 0x0f 0x70-0x7f |
| 425 | 70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) | 439 | 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) |
| 426 | 71: Grp12 (1A) | 440 | 71: Grp12 (1A) |
| 427 | 72: Grp13 (1A) | 441 | 72: Grp13 (1A) |
| 428 | 73: Grp14 (1A) | 442 | 73: Grp14 (1A) |
| 429 | 74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) | 443 | 74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) |
| 430 | 75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) | 444 | 75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) |
| 431 | 76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) | 445 | 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) |
| 432 | 77: emms/vzeroupper/vzeroall (VEX) | 446 | # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. |
| 433 | 78: VMREAD Ed/q,Gd/q | 447 | 77: emms | vzeroupper | vzeroall |
| 434 | 79: VMWRITE Gd/q,Ed/q | 448 | 78: VMREAD Ey,Gy |
| 449 | 79: VMWRITE Gy,Ey | ||
| 435 | 7a: | 450 | 7a: |
| 436 | 7b: | 451 | 7b: |
| 437 | 7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) | 452 | 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) |
| 438 | 7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) | 453 | 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) |
| 439 | 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) | 454 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) |
| 440 | 7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) | 455 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) |
| 441 | # 0x0f 0x80-0x8f | 456 | # 0x0f 0x80-0x8f |
| 442 | 80: JO Jz (f64) | 457 | 80: JO Jz (f64) |
| 443 | 81: JNO Jz (f64) | 458 | 81: JNO Jz (f64) |
| 444 | 82: JB/JNAE/JC Jz (f64) | 459 | 82: JB/JC/JNAE Jz (f64) |
| 445 | 83: JNB/JAE/JNC Jz (f64) | 460 | 83: JAE/JNB/JNC Jz (f64) |
| 446 | 84: JZ/JE Jz (f64) | 461 | 84: JE/JZ Jz (f64) |
| 447 | 85: JNZ/JNE Jz (f64) | 462 | 85: JNE/JNZ Jz (f64) |
| 448 | 86: JBE/JNA Jz (f64) | 463 | 86: JBE/JNA Jz (f64) |
| 449 | 87: JNBE/JA Jz (f64) | 464 | 87: JA/JNBE Jz (f64) |
| 450 | 88: JS Jz (f64) | 465 | 88: JS Jz (f64) |
| 451 | 89: JNS Jz (f64) | 466 | 89: JNS Jz (f64) |
| 452 | 8a: JP/JPE Jz (f64) | 467 | 8a: JP/JPE Jz (f64) |
| @@ -502,18 +517,18 @@ b8: JMPE | POPCNT Gv,Ev (F3) | |||
| 502 | b9: Grp10 (1A) | 517 | b9: Grp10 (1A) |
| 503 | ba: Grp8 Ev,Ib (1A) | 518 | ba: Grp8 Ev,Ib (1A) |
| 504 | bb: BTC Ev,Gv | 519 | bb: BTC Ev,Gv |
| 505 | bc: BSF Gv,Ev | 520 | bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) |
| 506 | bd: BSR Gv,Ev | 521 | bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) |
| 507 | be: MOVSX Gv,Eb | 522 | be: MOVSX Gv,Eb |
| 508 | bf: MOVSX Gv,Ew | 523 | bf: MOVSX Gv,Ew |
| 509 | # 0x0f 0xc0-0xcf | 524 | # 0x0f 0xc0-0xcf |
| 510 | c0: XADD Eb,Gb | 525 | c0: XADD Eb,Gb |
| 511 | c1: XADD Ev,Gv | 526 | c1: XADD Ev,Gv |
| 512 | c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) | 527 | c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) |
| 513 | c3: movnti Md/q,Gd/q | 528 | c3: movnti My,Gy |
| 514 | c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) | 529 | c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) |
| 515 | c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) | 530 | c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) |
| 516 | c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) | 531 | c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) |
| 517 | c7: Grp9 (1A) | 532 | c7: Grp9 (1A) |
| 518 | c8: BSWAP RAX/EAX/R8/R8D | 533 | c8: BSWAP RAX/EAX/R8/R8D |
| 519 | c9: BSWAP RCX/ECX/R9/R9D | 534 | c9: BSWAP RCX/ECX/R9/R9D |
| @@ -524,55 +539,55 @@ cd: BSWAP RBP/EBP/R13/R13D | |||
| 524 | ce: BSWAP RSI/ESI/R14/R14D | 539 | ce: BSWAP RSI/ESI/R14/R14D |
| 525 | cf: BSWAP RDI/EDI/R15/R15D | 540 | cf: BSWAP RDI/EDI/R15/R15D |
| 526 | # 0x0f 0xd0-0xdf | 541 | # 0x0f 0xd0-0xdf |
| 527 | d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) | 542 | d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) |
| 528 | d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) | 543 | d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) |
| 529 | d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) | 544 | d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) |
| 530 | d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) | 545 | d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) |
| 531 | d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) | 546 | d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) |
| 532 | d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) | 547 | d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) |
| 533 | d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) | 548 | d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) |
| 534 | d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) | 549 | d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) |
| 535 | d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) | 550 | d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) |
| 536 | d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) | 551 | d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) |
| 537 | da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) | 552 | da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) |
| 538 | db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) | 553 | db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) |
| 539 | dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) | 554 | dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) |
| 540 | dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) | 555 | dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) |
| 541 | de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) | 556 | de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) |
| 542 | df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) | 557 | df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) |
| 543 | # 0x0f 0xe0-0xef | 558 | # 0x0f 0xe0-0xef |
| 544 | e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) | 559 | e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) |
| 545 | e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) | 560 | e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) |
| 546 | e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) | 561 | e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) |
| 547 | e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) | 562 | e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) |
| 548 | e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) | 563 | e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) |
| 549 | e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) | 564 | e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) |
| 550 | e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) | 565 | e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) |
| 551 | e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) | 566 | e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) |
| 552 | e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) | 567 | e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) |
| 553 | e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) | 568 | e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) |
| 554 | ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) | 569 | ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) |
| 555 | eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) | 570 | eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) |
| 556 | ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) | 571 | ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) |
| 557 | ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) | 572 | ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) |
| 558 | ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) | 573 | ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) |
| 559 | ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) | 574 | ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) |
| 560 | # 0x0f 0xf0-0xff | 575 | # 0x0f 0xf0-0xff |
| 561 | f0: lddqu Vdq,Mdq (F2),(VEX) | 576 | f0: vlddqu Vx,Mx (F2) |
| 562 | f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) | 577 | f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) |
| 563 | f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) | 578 | f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) |
| 564 | f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) | 579 | f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) |
| 565 | f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) | 580 | f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) |
| 566 | f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) | 581 | f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) |
| 567 | f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) | 582 | f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) |
| 568 | f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) | 583 | f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) |
| 569 | f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) | 584 | f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) |
| 570 | f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) | 585 | f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) |
| 571 | fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) | 586 | fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) |
| 572 | fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) | 587 | fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) |
| 573 | fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) | 588 | fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) |
| 574 | fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) | 589 | fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) |
| 575 | fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) | 590 | fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) |
| 576 | ff: | 591 | ff: |
| 577 | EndTable | 592 | EndTable |
| 578 | 593 | ||
| @@ -580,155 +595,193 @@ Table: 3-byte opcode 1 (0x0f 0x38) | |||
| 580 | Referrer: 3-byte escape 1 | 595 | Referrer: 3-byte escape 1 |
| 581 | AVXcode: 2 | 596 | AVXcode: 2 |
| 582 | # 0x0f 0x38 0x00-0x0f | 597 | # 0x0f 0x38 0x00-0x0f |
| 583 | 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) | 598 | 00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) |
| 584 | 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) | 599 | 01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) |
| 585 | 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) | 600 | 02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) |
| 586 | 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) | 601 | 03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) |
| 587 | 04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) | 602 | 04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) |
| 588 | 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) | 603 | 05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) |
| 589 | 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) | 604 | 06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) |
| 590 | 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) | 605 | 07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) |
| 591 | 08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) | 606 | 08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) |
| 592 | 09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) | 607 | 09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) |
| 593 | 0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) | 608 | 0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) |
| 594 | 0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) | 609 | 0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) |
| 595 | 0c: Vpermilps /r (66),(oVEX) | 610 | 0c: vpermilps Vx,Hx,Wx (66),(v) |
| 596 | 0d: Vpermilpd /r (66),(oVEX) | 611 | 0d: vpermilpd Vx,Hx,Wx (66),(v) |
| 597 | 0e: vtestps /r (66),(oVEX) | 612 | 0e: vtestps Vx,Wx (66),(v) |
| 598 | 0f: vtestpd /r (66),(oVEX) | 613 | 0f: vtestpd Vx,Wx (66),(v) |
| 599 | # 0x0f 0x38 0x10-0x1f | 614 | # 0x0f 0x38 0x10-0x1f |
| 600 | 10: pblendvb Vdq,Wdq (66) | 615 | 10: pblendvb Vdq,Wdq (66) |
| 601 | 11: | 616 | 11: |
| 602 | 12: | 617 | 12: |
| 603 | 13: | 618 | 13: vcvtph2ps Vx,Wx,Ib (66),(v) |
| 604 | 14: blendvps Vdq,Wdq (66) | 619 | 14: blendvps Vdq,Wdq (66) |
| 605 | 15: blendvpd Vdq,Wdq (66) | 620 | 15: blendvpd Vdq,Wdq (66) |
| 606 | 16: | 621 | 16: vpermps Vqq,Hqq,Wqq (66),(v) |
| 607 | 17: ptest Vdq,Wdq (66),(VEX) | 622 | 17: vptest Vx,Wx (66) |
| 608 | 18: vbroadcastss /r (66),(oVEX) | 623 | 18: vbroadcastss Vx,Wd (66),(v) |
| 609 | 19: vbroadcastsd /r (66),(oVEX),(o256) | 624 | 19: vbroadcastsd Vqq,Wq (66),(v) |
| 610 | 1a: vbroadcastf128 /r (66),(oVEX),(o256) | 625 | 1a: vbroadcastf128 Vqq,Mdq (66),(v) |
| 611 | 1b: | 626 | 1b: |
| 612 | 1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) | 627 | 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) |
| 613 | 1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) | 628 | 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) |
| 614 | 1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) | 629 | 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) |
| 615 | 1f: | 630 | 1f: |
| 616 | # 0x0f 0x38 0x20-0x2f | 631 | # 0x0f 0x38 0x20-0x2f |
| 617 | 20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) | 632 | 20: vpmovsxbw Vx,Ux/Mq (66),(v1) |
| 618 | 21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) | 633 | 21: vpmovsxbd Vx,Ux/Md (66),(v1) |
| 619 | 22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) | 634 | 22: vpmovsxbq Vx,Ux/Mw (66),(v1) |
| 620 | 23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) | 635 | 23: vpmovsxwd Vx,Ux/Mq (66),(v1) |
| 621 | 24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) | 636 | 24: vpmovsxwq Vx,Ux/Md (66),(v1) |
| 622 | 25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) | 637 | 25: vpmovsxdq Vx,Ux/Mq (66),(v1) |
| 623 | 26: | 638 | 26: |
| 624 | 27: | 639 | 27: |
| 625 | 28: pmuldq Vdq,Wdq (66),(VEX),(o128) | 640 | 28: vpmuldq Vx,Hx,Wx (66),(v1) |
| 626 | 29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) | 641 | 29: vpcmpeqq Vx,Hx,Wx (66),(v1) |
| 627 | 2a: movntdqa Vdq,Mdq (66),(VEX),(o128) | 642 | 2a: vmovntdqa Vx,Mx (66),(v1) |
| 628 | 2b: packusdw Vdq,Wdq (66),(VEX),(o128) | 643 | 2b: vpackusdw Vx,Hx,Wx (66),(v1) |
| 629 | 2c: vmaskmovps(ld) /r (66),(oVEX) | 644 | 2c: vmaskmovps Vx,Hx,Mx (66),(v) |
| 630 | 2d: vmaskmovpd(ld) /r (66),(oVEX) | 645 | 2d: vmaskmovpd Vx,Hx,Mx (66),(v) |
| 631 | 2e: vmaskmovps(st) /r (66),(oVEX) | 646 | 2e: vmaskmovps Mx,Hx,Vx (66),(v) |
| 632 | 2f: vmaskmovpd(st) /r (66),(oVEX) | 647 | 2f: vmaskmovpd Mx,Hx,Vx (66),(v) |
| 633 | # 0x0f 0x38 0x30-0x3f | 648 | # 0x0f 0x38 0x30-0x3f |
| 634 | 30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) | 649 | 30: vpmovzxbw Vx,Ux/Mq (66),(v1) |
| 635 | 31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) | 650 | 31: vpmovzxbd Vx,Ux/Md (66),(v1) |
| 636 | 32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) | 651 | 32: vpmovzxbq Vx,Ux/Mw (66),(v1) |
| 637 | 33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) | 652 | 33: vpmovzxwd Vx,Ux/Mq (66),(v1) |
| 638 | 34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) | 653 | 34: vpmovzxwq Vx,Ux/Md (66),(v1) |
| 639 | 35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) | 654 | 35: vpmovzxdq Vx,Ux/Mq (66),(v1) |
| 640 | 36: | 655 | 36: vpermd Vqq,Hqq,Wqq (66),(v) |
| 641 | 37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) | 656 | 37: vpcmpgtq Vx,Hx,Wx (66),(v1) |
| 642 | 38: pminsb Vdq,Wdq (66),(VEX),(o128) | 657 | 38: vpminsb Vx,Hx,Wx (66),(v1) |
| 643 | 39: pminsd Vdq,Wdq (66),(VEX),(o128) | 658 | 39: vpminsd Vx,Hx,Wx (66),(v1) |
| 644 | 3a: pminuw Vdq,Wdq (66),(VEX),(o128) | 659 | 3a: vpminuw Vx,Hx,Wx (66),(v1) |
| 645 | 3b: pminud Vdq,Wdq (66),(VEX),(o128) | 660 | 3b: vpminud Vx,Hx,Wx (66),(v1) |
| 646 | 3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) | 661 | 3c: vpmaxsb Vx,Hx,Wx (66),(v1) |
| 647 | 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) | 662 | 3d: vpmaxsd Vx,Hx,Wx (66),(v1) |
| 648 | 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) | 663 | 3e: vpmaxuw Vx,Hx,Wx (66),(v1) |
| 649 | 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) | 664 | 3f: vpmaxud Vx,Hx,Wx (66),(v1) |
| 650 | # 0x0f 0x38 0x40-0x8f | 665 | # 0x0f 0x38 0x40-0x8f |
| 651 | 40: pmulld Vdq,Wdq (66),(VEX),(o128) | 666 | 40: vpmulld Vx,Hx,Wx (66),(v1) |
| 652 | 41: phminposuw Vdq,Wdq (66),(VEX),(o128) | 667 | 41: vphminposuw Vdq,Wdq (66),(v1) |
| 653 | 80: INVEPT Gd/q,Mdq (66) | 668 | 42: |
| 654 | 81: INVPID Gd/q,Mdq (66) | 669 | 43: |
| 670 | 44: | ||
| 671 | 45: vpsrlvd/q Vx,Hx,Wx (66),(v) | ||
| 672 | 46: vpsravd Vx,Hx,Wx (66),(v) | ||
| 673 | 47: vpsllvd/q Vx,Hx,Wx (66),(v) | ||
| 674 | # Skip 0x48-0x57 | ||
| 675 | 58: vpbroadcastd Vx,Wx (66),(v) | ||
| 676 | 59: vpbroadcastq Vx,Wx (66),(v) | ||
| 677 | 5a: vbroadcasti128 Vqq,Mdq (66),(v) | ||
| 678 | # Skip 0x5b-0x77 | ||
| 679 | 78: vpbroadcastb Vx,Wx (66),(v) | ||
| 680 | 79: vpbroadcastw Vx,Wx (66),(v) | ||
| 681 | # Skip 0x7a-0x7f | ||
| 682 | 80: INVEPT Gy,Mdq (66) | ||
| 683 | 81: INVPID Gy,Mdq (66) | ||
| 684 | 82: INVPCID Gy,Mdq (66) | ||
| 685 | 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) | ||
| 686 | 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) | ||
| 655 | # 0x0f 0x38 0x90-0xbf (FMA) | 687 | # 0x0f 0x38 0x90-0xbf (FMA) |
| 656 | 96: vfmaddsub132pd/ps /r (66),(VEX) | 688 | 90: vgatherdd/q Vx,Hx,Wx (66),(v) |
| 657 | 97: vfmsubadd132pd/ps /r (66),(VEX) | 689 | 91: vgatherqd/q Vx,Hx,Wx (66),(v) |
| 658 | 98: vfmadd132pd/ps /r (66),(VEX) | 690 | 92: vgatherdps/d Vx,Hx,Wx (66),(v) |
| 659 | 99: vfmadd132sd/ss /r (66),(VEX),(o128) | 691 | 93: vgatherqps/d Vx,Hx,Wx (66),(v) |
| 660 | 9a: vfmsub132pd/ps /r (66),(VEX) | 692 | 94: |
| 661 | 9b: vfmsub132sd/ss /r (66),(VEX),(o128) | 693 | 95: |
| 662 | 9c: vfnmadd132pd/ps /r (66),(VEX) | 694 | 96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) |
| 663 | 9d: vfnmadd132sd/ss /r (66),(VEX),(o128) | 695 | 97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) |
| 664 | 9e: vfnmsub132pd/ps /r (66),(VEX) | 696 | 98: vfmadd132ps/d Vx,Hx,Wx (66),(v) |
| 665 | 9f: vfnmsub132sd/ss /r (66),(VEX),(o128) | 697 | 99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) |
| 666 | a6: vfmaddsub213pd/ps /r (66),(VEX) | 698 | 9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) |
| 667 | a7: vfmsubadd213pd/ps /r (66),(VEX) | 699 | 9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) |
| 668 | a8: vfmadd213pd/ps /r (66),(VEX) | 700 | 9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) |
| 669 | a9: vfmadd213sd/ss /r (66),(VEX),(o128) | 701 | 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) |
| 670 | aa: vfmsub213pd/ps /r (66),(VEX) | 702 | 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) |
| 671 | ab: vfmsub213sd/ss /r (66),(VEX),(o128) | 703 | 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) |
| 672 | ac: vfnmadd213pd/ps /r (66),(VEX) | 704 | a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) |
| 673 | ad: vfnmadd213sd/ss /r (66),(VEX),(o128) | 705 | a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) |
| 674 | ae: vfnmsub213pd/ps /r (66),(VEX) | 706 | a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) |
| 675 | af: vfnmsub213sd/ss /r (66),(VEX),(o128) | 707 | a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) |
| 676 | b6: vfmaddsub231pd/ps /r (66),(VEX) | 708 | aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) |
| 677 | b7: vfmsubadd231pd/ps /r (66),(VEX) | 709 | ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) |
| 678 | b8: vfmadd231pd/ps /r (66),(VEX) | 710 | ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) |
| 679 | b9: vfmadd231sd/ss /r (66),(VEX),(o128) | 711 | ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) |
| 680 | ba: vfmsub231pd/ps /r (66),(VEX) | 712 | ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) |
| 681 | bb: vfmsub231sd/ss /r (66),(VEX),(o128) | 713 | af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) |
| 682 | bc: vfnmadd231pd/ps /r (66),(VEX) | 714 | b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) |
| 683 | bd: vfnmadd231sd/ss /r (66),(VEX),(o128) | 715 | b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) |
| 684 | be: vfnmsub231pd/ps /r (66),(VEX) | 716 | b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) |
| 685 | bf: vfnmsub231sd/ss /r (66),(VEX),(o128) | 717 | b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) |
| 718 | ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) | ||
| 719 | bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
| 720 | bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) | ||
| 721 | bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
| 722 | be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) | ||
| 723 | bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
| 686 | # 0x0f 0x38 0xc0-0xff | 724 | # 0x0f 0x38 0xc0-0xff |
| 687 | db: aesimc Vdq,Wdq (66),(VEX),(o128) | 725 | db: VAESIMC Vdq,Wdq (66),(v1) |
| 688 | dc: aesenc Vdq,Wdq (66),(VEX),(o128) | 726 | dc: VAESENC Vdq,Hdq,Wdq (66),(v1) |
| 689 | dd: aesenclast Vdq,Wdq (66),(VEX),(o128) | 727 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) |
| 690 | de: aesdec Vdq,Wdq (66),(VEX),(o128) | 728 | de: VAESDEC Vdq,Hdq,Wdq (66),(v1) |
| 691 | df: aesdeclast Vdq,Wdq (66),(VEX),(o128) | 729 | df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) |
| 692 | f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) | 730 | f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) |
| 693 | f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) | 731 | f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) |
| 732 | f3: ANDN Gy,By,Ey (v) | ||
| 733 | f4: Grp17 (1A) | ||
| 734 | f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | ||
| 735 | f6: MULX By,Gy,rDX,Ey (F2),(v) | ||
| 736 | f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) | ||
| 694 | EndTable | 737 | EndTable |
| 695 | 738 | ||
| 696 | Table: 3-byte opcode 2 (0x0f 0x3a) | 739 | Table: 3-byte opcode 2 (0x0f 0x3a) |
| 697 | Referrer: 3-byte escape 2 | 740 | Referrer: 3-byte escape 2 |
| 698 | AVXcode: 3 | 741 | AVXcode: 3 |
| 699 | # 0x0f 0x3a 0x00-0xff | 742 | # 0x0f 0x3a 0x00-0xff |
| 700 | 04: vpermilps /r,Ib (66),(oVEX) | 743 | 00: vpermq Vqq,Wqq,Ib (66),(v) |
| 701 | 05: vpermilpd /r,Ib (66),(oVEX) | 744 | 01: vpermpd Vqq,Wqq,Ib (66),(v) |
| 702 | 06: vperm2f128 /r,Ib (66),(oVEX),(o256) | 745 | 02: vpblendd Vx,Hx,Wx,Ib (66),(v) |
| 703 | 08: roundps Vdq,Wdq,Ib (66),(VEX) | 746 | 03: |
| 704 | 09: roundpd Vdq,Wdq,Ib (66),(VEX) | 747 | 04: vpermilps Vx,Wx,Ib (66),(v) |
| 705 | 0a: roundss Vss,Wss,Ib (66),(VEX),(o128) | 748 | 05: vpermilpd Vx,Wx,Ib (66),(v) |
| 706 | 0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) | 749 | 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) |
| 707 | 0c: blendps Vdq,Wdq,Ib (66),(VEX) | 750 | 07: |
| 708 | 0d: blendpd Vdq,Wdq,Ib (66),(VEX) | 751 | 08: vroundps Vx,Wx,Ib (66) |
| 709 | 0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) | 752 | 09: vroundpd Vx,Wx,Ib (66) |
| 710 | 0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) | 753 | 0a: vroundss Vss,Wss,Ib (66),(v1) |
| 711 | 14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) | 754 | 0b: vroundsd Vsd,Wsd,Ib (66),(v1) |
| 712 | 15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) | 755 | 0c: vblendps Vx,Hx,Wx,Ib (66) |
| 713 | 16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) | 756 | 0d: vblendpd Vx,Hx,Wx,Ib (66) |
| 714 | 17: extractps Ed,Vdq,Ib (66),(VEX),(o128) | 757 | 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) |
| 715 | 18: vinsertf128 /r,Ib (66),(oVEX),(o256) | 758 | 0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) |
| 716 | 19: vextractf128 /r,Ib (66),(oVEX),(o256) | 759 | 14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) |
| 717 | 20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) | 760 | 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) |
| 718 | 21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) | 761 | 16: vpextrd/q Ey,Vdq,Ib (66),(v1) |
| 719 | 22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) | 762 | 17: vextractps Ed,Vdq,Ib (66),(v1) |
| 720 | 40: dpps Vdq,Wdq,Ib (66),(VEX) | 763 | 18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) |
| 721 | 41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) | 764 | 19: vextractf128 Wdq,Vqq,Ib (66),(v) |
| 722 | 42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) | 765 | 1d: vcvtps2ph Wx,Vx,Ib (66),(v) |
| 723 | 44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) | 766 | 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) |
| 724 | 4a: vblendvps /r,Ib (66),(oVEX) | 767 | 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) |
| 725 | 4b: vblendvpd /r,Ib (66),(oVEX) | 768 | 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) |
| 726 | 4c: vpblendvb /r,Ib (66),(oVEX),(o128) | 769 | 38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) |
| 727 | 60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) | 770 | 39: vextracti128 Wdq,Vqq,Ib (66),(v) |
| 728 | 61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) | 771 | 40: vdpps Vx,Hx,Wx,Ib (66) |
| 729 | 62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) | 772 | 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) |
| 730 | 63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) | 773 | 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) |
| 731 | df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) | 774 | 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) |
| 775 | 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) | ||
| 776 | 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) | ||
| 777 | 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) | ||
| 778 | 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) | ||
| 779 | 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) | ||
| 780 | 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) | ||
| 781 | 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) | ||
| 782 | 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) | ||
| 783 | df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) | ||
| 784 | f0: RORX Gy,Ey,Ib (F2),(v) | ||
| 732 | EndTable | 785 | EndTable |
| 733 | 786 | ||
| 734 | GrpTable: Grp1 | 787 | GrpTable: Grp1 |
| @@ -790,7 +843,7 @@ GrpTable: Grp5 | |||
| 790 | 2: CALLN Ev (f64) | 843 | 2: CALLN Ev (f64) |
| 791 | 3: CALLF Ep | 844 | 3: CALLF Ep |
| 792 | 4: JMPN Ev (f64) | 845 | 4: JMPN Ev (f64) |
| 793 | 5: JMPF Ep | 846 | 5: JMPF Mp |
| 794 | 6: PUSH Ev (d64) | 847 | 6: PUSH Ev (d64) |
| 795 | 7: | 848 | 7: |
| 796 | EndTable | 849 | EndTable |
| @@ -807,7 +860,7 @@ EndTable | |||
| 807 | GrpTable: Grp7 | 860 | GrpTable: Grp7 |
| 808 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | 861 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) |
| 809 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) | 862 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) |
| 810 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | 863 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) |
| 811 | 3: LIDT Ms | 864 | 3: LIDT Ms |
| 812 | 4: SMSW Mw/Rv | 865 | 4: SMSW Mw/Rv |
| 813 | 5: | 866 | 5: |
| @@ -824,44 +877,45 @@ EndTable | |||
| 824 | 877 | ||
| 825 | GrpTable: Grp9 | 878 | GrpTable: Grp9 |
| 826 | 1: CMPXCHG8B/16B Mq/Mdq | 879 | 1: CMPXCHG8B/16B Mq/Mdq |
| 827 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | 880 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) |
| 828 | 7: VMPTRST Mq | 881 | 7: VMPTRST Mq | VMPTRST Mq (F3) |
| 829 | EndTable | 882 | EndTable |
| 830 | 883 | ||
| 831 | GrpTable: Grp10 | 884 | GrpTable: Grp10 |
| 832 | EndTable | 885 | EndTable |
| 833 | 886 | ||
| 834 | GrpTable: Grp11 | 887 | GrpTable: Grp11 |
| 888 | # Note: the operands are given by group opcode | ||
| 835 | 0: MOV | 889 | 0: MOV |
| 836 | EndTable | 890 | EndTable |
| 837 | 891 | ||
| 838 | GrpTable: Grp12 | 892 | GrpTable: Grp12 |
| 839 | 2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) | 893 | 2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) |
| 840 | 4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) | 894 | 4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) |
| 841 | 6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) | 895 | 6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) |
| 842 | EndTable | 896 | EndTable |
| 843 | 897 | ||
| 844 | GrpTable: Grp13 | 898 | GrpTable: Grp13 |
| 845 | 2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) | 899 | 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) |
| 846 | 4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) | 900 | 4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) |
| 847 | 6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) | 901 | 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) |
| 848 | EndTable | 902 | EndTable |
| 849 | 903 | ||
| 850 | GrpTable: Grp14 | 904 | GrpTable: Grp14 |
| 851 | 2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) | 905 | 2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) |
| 852 | 3: psrldq Udq,Ib (66),(11B),(VEX),(o128) | 906 | 3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) |
| 853 | 6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) | 907 | 6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) |
| 854 | 7: pslldq Udq,Ib (66),(11B),(VEX),(o128) | 908 | 7: vpslldq Hx,Ux,Ib (66),(11B),(v1) |
| 855 | EndTable | 909 | EndTable |
| 856 | 910 | ||
| 857 | GrpTable: Grp15 | 911 | GrpTable: Grp15 |
| 858 | 0: fxsave | 912 | 0: fxsave | RDFSBASE Ry (F3),(11B) |
| 859 | 1: fxstor | 913 | 1: fxstor | RDGSBASE Ry (F3),(11B) |
| 860 | 2: ldmxcsr (VEX) | 914 | 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) |
| 861 | 3: stmxcsr (VEX) | 915 | 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) |
| 862 | 4: XSAVE | 916 | 4: XSAVE |
| 863 | 5: XRSTOR | lfence (11B) | 917 | 5: XRSTOR | lfence (11B) |
| 864 | 6: mfence (11B) | 918 | 6: XSAVEOPT | mfence (11B) |
| 865 | 7: clflush | sfence (11B) | 919 | 7: clflush | sfence (11B) |
| 866 | EndTable | 920 | EndTable |
| 867 | 921 | ||
| @@ -872,6 +926,12 @@ GrpTable: Grp16 | |||
| 872 | 3: prefetch T2 | 926 | 3: prefetch T2 |
| 873 | EndTable | 927 | EndTable |
| 874 | 928 | ||
| 929 | GrpTable: Grp17 | ||
| 930 | 1: BLSR By,Ey (v) | ||
| 931 | 2: BLSMSK By,Ey (v) | ||
| 932 | 3: BLSI By,Ey (v) | ||
| 933 | EndTable | ||
| 934 | |||
| 875 | # AMD's Prefetch Group | 935 | # AMD's Prefetch Group |
| 876 | GrpTable: GrpP | 936 | GrpTable: GrpP |
| 877 | 0: PREFETCH | 937 | 0: PREFETCH |
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 446902b2a6b6..1599f568f0e2 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile | |||
| @@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ | |||
| 4 | oprof.o cpu_buffer.o buffer_sync.o \ | 4 | oprof.o cpu_buffer.o buffer_sync.o \ |
| 5 | event_buffer.o oprofile_files.o \ | 5 | event_buffer.o oprofile_files.o \ |
| 6 | oprofilefs.o oprofile_stats.o \ | 6 | oprofilefs.o oprofile_stats.o \ |
| 7 | timer_int.o ) | 7 | timer_int.o nmi_timer_int.o ) |
| 8 | 8 | ||
| 9 | oprofile-y := $(DRIVER_OBJS) init.o backtrace.o | 9 | oprofile-y := $(DRIVER_OBJS) init.o backtrace.o |
| 10 | oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ | 10 | oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ |
| 11 | op_model_ppro.o op_model_p4.o | 11 | op_model_ppro.o op_model_p4.o |
| 12 | oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o | ||
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index f148cf652678..9e138d00ad36 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c | |||
| @@ -16,37 +16,23 @@ | |||
| 16 | * with the NMI mode driver. | 16 | * with the NMI mode driver. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 19 | extern int op_nmi_init(struct oprofile_operations *ops); | 20 | extern int op_nmi_init(struct oprofile_operations *ops); |
| 20 | extern int op_nmi_timer_init(struct oprofile_operations *ops); | ||
| 21 | extern void op_nmi_exit(void); | 21 | extern void op_nmi_exit(void); |
| 22 | extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); | 22 | #else |
| 23 | static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; } | ||
| 24 | static void op_nmi_exit(void) { } | ||
| 25 | #endif | ||
| 23 | 26 | ||
| 24 | static int nmi_timer; | 27 | extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); |
| 25 | 28 | ||
| 26 | int __init oprofile_arch_init(struct oprofile_operations *ops) | 29 | int __init oprofile_arch_init(struct oprofile_operations *ops) |
| 27 | { | 30 | { |
| 28 | int ret; | ||
| 29 | |||
| 30 | ret = -ENODEV; | ||
| 31 | |||
| 32 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 33 | ret = op_nmi_init(ops); | ||
| 34 | #endif | ||
| 35 | nmi_timer = (ret != 0); | ||
| 36 | #ifdef CONFIG_X86_IO_APIC | ||
| 37 | if (nmi_timer) | ||
| 38 | ret = op_nmi_timer_init(ops); | ||
| 39 | #endif | ||
| 40 | ops->backtrace = x86_backtrace; | 31 | ops->backtrace = x86_backtrace; |
| 41 | 32 | return op_nmi_init(ops); | |
| 42 | return ret; | ||
| 43 | } | 33 | } |
| 44 | 34 | ||
| 45 | |||
| 46 | void oprofile_arch_exit(void) | 35 | void oprofile_arch_exit(void) |
| 47 | { | 36 | { |
| 48 | #ifdef CONFIG_X86_LOCAL_APIC | 37 | op_nmi_exit(); |
| 49 | if (!nmi_timer) | ||
| 50 | op_nmi_exit(); | ||
| 51 | #endif | ||
| 52 | } | 38 | } |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 75f9528e0372..26b8a8514ee5 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
| @@ -595,24 +595,36 @@ static int __init p4_init(char **cpu_type) | |||
| 595 | return 0; | 595 | return 0; |
| 596 | } | 596 | } |
| 597 | 597 | ||
| 598 | static int force_arch_perfmon; | 598 | enum __force_cpu_type { |
| 599 | static int force_cpu_type(const char *str, struct kernel_param *kp) | 599 | reserved = 0, /* do not force */ |
| 600 | timer, | ||
| 601 | arch_perfmon, | ||
| 602 | }; | ||
| 603 | |||
| 604 | static int force_cpu_type; | ||
| 605 | |||
| 606 | static int set_cpu_type(const char *str, struct kernel_param *kp) | ||
| 600 | { | 607 | { |
| 601 | if (!strcmp(str, "arch_perfmon")) { | 608 | if (!strcmp(str, "timer")) { |
| 602 | force_arch_perfmon = 1; | 609 | force_cpu_type = timer; |
| 610 | printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); | ||
| 611 | } else if (!strcmp(str, "arch_perfmon")) { | ||
| 612 | force_cpu_type = arch_perfmon; | ||
| 603 | printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); | 613 | printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); |
| 614 | } else { | ||
| 615 | force_cpu_type = 0; | ||
| 604 | } | 616 | } |
| 605 | 617 | ||
| 606 | return 0; | 618 | return 0; |
| 607 | } | 619 | } |
| 608 | module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); | 620 | module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); |
| 609 | 621 | ||
| 610 | static int __init ppro_init(char **cpu_type) | 622 | static int __init ppro_init(char **cpu_type) |
| 611 | { | 623 | { |
| 612 | __u8 cpu_model = boot_cpu_data.x86_model; | 624 | __u8 cpu_model = boot_cpu_data.x86_model; |
| 613 | struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ | 625 | struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ |
| 614 | 626 | ||
| 615 | if (force_arch_perfmon && cpu_has_arch_perfmon) | 627 | if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) |
| 616 | return 0; | 628 | return 0; |
| 617 | 629 | ||
| 618 | /* | 630 | /* |
| @@ -679,6 +691,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
| 679 | if (!cpu_has_apic) | 691 | if (!cpu_has_apic) |
| 680 | return -ENODEV; | 692 | return -ENODEV; |
| 681 | 693 | ||
| 694 | if (force_cpu_type == timer) | ||
| 695 | return -ENODEV; | ||
| 696 | |||
| 682 | switch (vendor) { | 697 | switch (vendor) { |
| 683 | case X86_VENDOR_AMD: | 698 | case X86_VENDOR_AMD: |
| 684 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ | 699 | /* Needs to be at least an Athlon (or hammer in 32bit mode) */ |
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c deleted file mode 100644 index 7f8052cd6620..000000000000 --- a/arch/x86/oprofile/nmi_timer_int.c +++ /dev/null | |||
| @@ -1,50 +0,0 @@ | |||
| 1 | /** | ||
| 2 | * @file nmi_timer_int.c | ||
| 3 | * | ||
| 4 | * @remark Copyright 2003 OProfile authors | ||
| 5 | * @remark Read the file COPYING | ||
| 6 | * | ||
| 7 | * @author Zwane Mwaikambo <zwane@linuxpower.ca> | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/init.h> | ||
| 11 | #include <linux/smp.h> | ||
| 12 | #include <linux/errno.h> | ||
| 13 | #include <linux/oprofile.h> | ||
| 14 | #include <linux/rcupdate.h> | ||
| 15 | #include <linux/kdebug.h> | ||
| 16 | |||
| 17 | #include <asm/nmi.h> | ||
| 18 | #include <asm/apic.h> | ||
| 19 | #include <asm/ptrace.h> | ||
| 20 | |||
| 21 | static int profile_timer_exceptions_notify(unsigned int val, struct pt_regs *regs) | ||
| 22 | { | ||
| 23 | oprofile_add_sample(regs, 0); | ||
| 24 | return NMI_HANDLED; | ||
| 25 | } | ||
| 26 | |||
| 27 | static int timer_start(void) | ||
| 28 | { | ||
| 29 | if (register_nmi_handler(NMI_LOCAL, profile_timer_exceptions_notify, | ||
| 30 | 0, "oprofile-timer")) | ||
| 31 | return 1; | ||
| 32 | return 0; | ||
| 33 | } | ||
| 34 | |||
| 35 | |||
| 36 | static void timer_stop(void) | ||
| 37 | { | ||
| 38 | unregister_nmi_handler(NMI_LOCAL, "oprofile-timer"); | ||
| 39 | synchronize_sched(); /* Allow already-started NMIs to complete. */ | ||
| 40 | } | ||
| 41 | |||
| 42 | |||
| 43 | int __init op_nmi_timer_init(struct oprofile_operations *ops) | ||
| 44 | { | ||
| 45 | ops->start = timer_start; | ||
| 46 | ops->stop = timer_stop; | ||
| 47 | ops->cpu_type = "timer"; | ||
| 48 | printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); | ||
| 49 | return 0; | ||
| 50 | } | ||
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index f82082677337..d511aa97533a 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile | |||
| @@ -18,14 +18,21 @@ chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk | |||
| 18 | quiet_cmd_posttest = TEST $@ | 18 | quiet_cmd_posttest = TEST $@ |
| 19 | cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) | 19 | cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) |
| 20 | 20 | ||
| 21 | posttest: $(obj)/test_get_len vmlinux | 21 | quiet_cmd_sanitytest = TEST $@ |
| 22 | cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000 | ||
| 23 | |||
| 24 | posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity | ||
| 22 | $(call cmd,posttest) | 25 | $(call cmd,posttest) |
| 26 | $(call cmd,sanitytest) | ||
| 23 | 27 | ||
| 24 | hostprogs-y := test_get_len | 28 | hostprogs-y += test_get_len insn_sanity |
| 25 | 29 | ||
| 26 | # -I needed for generated C source and C source which in the kernel tree. | 30 | # -I needed for generated C source and C source which in the kernel tree. |
| 27 | HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ | 31 | HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ |
| 28 | 32 | ||
| 33 | HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ | ||
| 34 | |||
| 29 | # Dependencies are also needed. | 35 | # Dependencies are also needed. |
| 30 | $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c | 36 | $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c |
| 31 | 37 | ||
| 38 | $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c | ||
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index eaf11f52fc0b..5f6a5b6c3a15 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk | |||
| @@ -47,7 +47,7 @@ BEGIN { | |||
| 47 | sep_expr = "^\\|$" | 47 | sep_expr = "^\\|$" |
| 48 | group_expr = "^Grp[0-9A-Za-z]+" | 48 | group_expr = "^Grp[0-9A-Za-z]+" |
| 49 | 49 | ||
| 50 | imm_expr = "^[IJAO][a-z]" | 50 | imm_expr = "^[IJAOL][a-z]" |
| 51 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | 51 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" |
| 52 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | 52 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" |
| 53 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" | 53 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" |
| @@ -59,6 +59,7 @@ BEGIN { | |||
| 59 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" | 59 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" |
| 60 | imm_flag["Ob"] = "INAT_MOFFSET" | 60 | imm_flag["Ob"] = "INAT_MOFFSET" |
| 61 | imm_flag["Ov"] = "INAT_MOFFSET" | 61 | imm_flag["Ov"] = "INAT_MOFFSET" |
| 62 | imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
| 62 | 63 | ||
| 63 | modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" | 64 | modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" |
| 64 | force64_expr = "\\([df]64\\)" | 65 | force64_expr = "\\([df]64\\)" |
| @@ -70,8 +71,12 @@ BEGIN { | |||
| 70 | lprefix3_expr = "\\(F2\\)" | 71 | lprefix3_expr = "\\(F2\\)" |
| 71 | max_lprefix = 4 | 72 | max_lprefix = 4 |
| 72 | 73 | ||
| 73 | vexok_expr = "\\(VEX\\)" | 74 | # All opcodes starting with lower-case 'v' or with (v1) superscript |
| 74 | vexonly_expr = "\\(oVEX\\)" | 75 | # accepts VEX prefix |
| 76 | vexok_opcode_expr = "^v.*" | ||
| 77 | vexok_expr = "\\(v1\\)" | ||
| 78 | # All opcodes with (v) superscript supports *only* VEX prefix | ||
| 79 | vexonly_expr = "\\(v\\)" | ||
| 75 | 80 | ||
| 76 | prefix_expr = "\\(Prefix\\)" | 81 | prefix_expr = "\\(Prefix\\)" |
| 77 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | 82 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" |
| @@ -85,8 +90,8 @@ BEGIN { | |||
| 85 | prefix_num["SEG=GS"] = "INAT_PFX_GS" | 90 | prefix_num["SEG=GS"] = "INAT_PFX_GS" |
| 86 | prefix_num["SEG=SS"] = "INAT_PFX_SS" | 91 | prefix_num["SEG=SS"] = "INAT_PFX_SS" |
| 87 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | 92 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" |
| 88 | prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" | 93 | prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" |
| 89 | prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" | 94 | prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" |
| 90 | 95 | ||
| 91 | clear_vars() | 96 | clear_vars() |
| 92 | } | 97 | } |
| @@ -310,12 +315,10 @@ function convert_operands(count,opnd, i,j,imm,mod) | |||
| 310 | if (match(opcode, fpu_expr)) | 315 | if (match(opcode, fpu_expr)) |
| 311 | flags = add_flags(flags, "INAT_MODRM") | 316 | flags = add_flags(flags, "INAT_MODRM") |
| 312 | 317 | ||
| 313 | # check VEX only code | 318 | # check VEX codes |
| 314 | if (match(ext, vexonly_expr)) | 319 | if (match(ext, vexonly_expr)) |
| 315 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") | 320 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") |
| 316 | 321 | else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) | |
| 317 | # check VEX only code | ||
| 318 | if (match(ext, vexok_expr)) | ||
| 319 | flags = add_flags(flags, "INAT_VEXOK") | 322 | flags = add_flags(flags, "INAT_VEXOK") |
| 320 | 323 | ||
| 321 | # check prefixes | 324 | # check prefixes |
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c new file mode 100644 index 000000000000..cc2f8c131286 --- /dev/null +++ b/arch/x86/tools/insn_sanity.c | |||
| @@ -0,0 +1,275 @@ | |||
| 1 | /* | ||
| 2 | * x86 decoder sanity test - based on test_get_insn.c | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, write to the Free Software | ||
| 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
| 17 | * | ||
| 18 | * Copyright (C) IBM Corporation, 2009 | ||
| 19 | * Copyright (C) Hitachi, Ltd., 2011 | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <stdlib.h> | ||
| 23 | #include <stdio.h> | ||
| 24 | #include <string.h> | ||
| 25 | #include <assert.h> | ||
| 26 | #include <unistd.h> | ||
| 27 | #include <sys/types.h> | ||
| 28 | #include <sys/stat.h> | ||
| 29 | #include <fcntl.h> | ||
| 30 | |||
| 31 | #define unlikely(cond) (cond) | ||
| 32 | #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) | ||
| 33 | |||
| 34 | #include <asm/insn.h> | ||
| 35 | #include <inat.c> | ||
| 36 | #include <insn.c> | ||
| 37 | |||
| 38 | /* | ||
| 39 | * Test of instruction analysis against tampering. | ||
| 40 | * Feed random binary to instruction decoder and ensure not to | ||
| 41 | * access out-of-instruction-buffer. | ||
| 42 | */ | ||
| 43 | |||
| 44 | #define DEFAULT_MAX_ITER 10000 | ||
| 45 | #define INSN_NOP 0x90 | ||
| 46 | |||
| 47 | static const char *prog; /* Program name */ | ||
| 48 | static int verbose; /* Verbosity */ | ||
| 49 | static int x86_64; /* x86-64 bit mode flag */ | ||
| 50 | static unsigned int seed; /* Random seed */ | ||
| 51 | static unsigned long iter_start; /* Start of iteration number */ | ||
| 52 | static unsigned long iter_end = DEFAULT_MAX_ITER; /* End of iteration number */ | ||
| 53 | static FILE *input_file; /* Input file name */ | ||
| 54 | |||
| 55 | static void usage(const char *err) | ||
| 56 | { | ||
| 57 | if (err) | ||
| 58 | fprintf(stderr, "Error: %s\n\n", err); | ||
| 59 | fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); | ||
| 60 | fprintf(stderr, "\t-y 64bit mode\n"); | ||
| 61 | fprintf(stderr, "\t-n 32bit mode\n"); | ||
| 62 | fprintf(stderr, "\t-v Verbosity(-vv dumps any decoded result)\n"); | ||
| 63 | fprintf(stderr, "\t-s Give a random seed (and iteration number)\n"); | ||
| 64 | fprintf(stderr, "\t-m Give a maximum iteration number\n"); | ||
| 65 | fprintf(stderr, "\t-i Give an input file with decoded binary\n"); | ||
| 66 | exit(1); | ||
| 67 | } | ||
| 68 | |||
| 69 | static void dump_field(FILE *fp, const char *name, const char *indent, | ||
| 70 | struct insn_field *field) | ||
| 71 | { | ||
| 72 | fprintf(fp, "%s.%s = {\n", indent, name); | ||
| 73 | fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", | ||
| 74 | indent, field->value, field->bytes[0], field->bytes[1], | ||
| 75 | field->bytes[2], field->bytes[3]); | ||
| 76 | fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, | ||
| 77 | field->got, field->nbytes); | ||
| 78 | } | ||
| 79 | |||
| 80 | static void dump_insn(FILE *fp, struct insn *insn) | ||
| 81 | { | ||
| 82 | fprintf(fp, "Instruction = {\n"); | ||
| 83 | dump_field(fp, "prefixes", "\t", &insn->prefixes); | ||
| 84 | dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); | ||
| 85 | dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); | ||
| 86 | dump_field(fp, "opcode", "\t", &insn->opcode); | ||
| 87 | dump_field(fp, "modrm", "\t", &insn->modrm); | ||
| 88 | dump_field(fp, "sib", "\t", &insn->sib); | ||
| 89 | dump_field(fp, "displacement", "\t", &insn->displacement); | ||
| 90 | dump_field(fp, "immediate1", "\t", &insn->immediate1); | ||
| 91 | dump_field(fp, "immediate2", "\t", &insn->immediate2); | ||
| 92 | fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", | ||
| 93 | insn->attr, insn->opnd_bytes, insn->addr_bytes); | ||
| 94 | fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", | ||
| 95 | insn->length, insn->x86_64, insn->kaddr); | ||
| 96 | } | ||
| 97 | |||
| 98 | static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter, | ||
| 99 | unsigned char *insn_buf, struct insn *insn) | ||
| 100 | { | ||
| 101 | int i; | ||
| 102 | |||
| 103 | fprintf(fp, "%s:\n", msg); | ||
| 104 | |||
| 105 | dump_insn(fp, insn); | ||
| 106 | |||
| 107 | fprintf(fp, "You can reproduce this with below command(s);\n"); | ||
| 108 | |||
| 109 | /* Input a decoded instruction sequence directly */ | ||
| 110 | fprintf(fp, " $ echo "); | ||
| 111 | for (i = 0; i < MAX_INSN_SIZE; i++) | ||
| 112 | fprintf(fp, " %02x", insn_buf[i]); | ||
| 113 | fprintf(fp, " | %s -i -\n", prog); | ||
| 114 | |||
| 115 | if (!input_file) { | ||
| 116 | fprintf(fp, "Or \n"); | ||
| 117 | /* Give a seed and iteration number */ | ||
| 118 | fprintf(fp, " $ %s -s 0x%x,%lu\n", prog, seed, nr_iter); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static void init_random_seed(void) | ||
| 123 | { | ||
| 124 | int fd; | ||
| 125 | |||
| 126 | fd = open("/dev/urandom", O_RDONLY); | ||
| 127 | if (fd < 0) | ||
| 128 | goto fail; | ||
| 129 | |||
| 130 | if (read(fd, &seed, sizeof(seed)) != sizeof(seed)) | ||
| 131 | goto fail; | ||
| 132 | |||
| 133 | close(fd); | ||
| 134 | return; | ||
| 135 | fail: | ||
| 136 | usage("Failed to open /dev/urandom"); | ||
| 137 | } | ||
| 138 | |||
| 139 | /* Read given instruction sequence from the input file */ | ||
| 140 | static int read_next_insn(unsigned char *insn_buf) | ||
| 141 | { | ||
| 142 | char buf[256] = "", *tmp; | ||
| 143 | int i; | ||
| 144 | |||
| 145 | tmp = fgets(buf, ARRAY_SIZE(buf), input_file); | ||
| 146 | if (tmp == NULL || feof(input_file)) | ||
| 147 | return 0; | ||
| 148 | |||
| 149 | for (i = 0; i < MAX_INSN_SIZE; i++) { | ||
| 150 | insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16); | ||
| 151 | if (*tmp != ' ') | ||
| 152 | break; | ||
| 153 | } | ||
| 154 | |||
| 155 | return i; | ||
| 156 | } | ||
| 157 | |||
| 158 | static int generate_insn(unsigned char *insn_buf) | ||
| 159 | { | ||
| 160 | int i; | ||
| 161 | |||
| 162 | if (input_file) | ||
| 163 | return read_next_insn(insn_buf); | ||
| 164 | |||
| 165 | /* Fills buffer with random binary up to MAX_INSN_SIZE */ | ||
| 166 | for (i = 0; i < MAX_INSN_SIZE - 1; i += 2) | ||
| 167 | *(unsigned short *)(&insn_buf[i]) = random() & 0xffff; | ||
| 168 | |||
| 169 | while (i < MAX_INSN_SIZE) | ||
| 170 | insn_buf[i++] = random() & 0xff; | ||
| 171 | |||
| 172 | return i; | ||
| 173 | } | ||
| 174 | |||
| 175 | static void parse_args(int argc, char **argv) | ||
| 176 | { | ||
| 177 | int c; | ||
| 178 | char *tmp = NULL; | ||
| 179 | int set_seed = 0; | ||
| 180 | |||
| 181 | prog = argv[0]; | ||
| 182 | while ((c = getopt(argc, argv, "ynvs:m:i:")) != -1) { | ||
| 183 | switch (c) { | ||
| 184 | case 'y': | ||
| 185 | x86_64 = 1; | ||
| 186 | break; | ||
| 187 | case 'n': | ||
| 188 | x86_64 = 0; | ||
| 189 | break; | ||
| 190 | case 'v': | ||
| 191 | verbose++; | ||
| 192 | break; | ||
| 193 | case 'i': | ||
| 194 | if (strcmp("-", optarg) == 0) | ||
| 195 | input_file = stdin; | ||
| 196 | else | ||
| 197 | input_file = fopen(optarg, "r"); | ||
| 198 | if (!input_file) | ||
| 199 | usage("Failed to open input file"); | ||
| 200 | break; | ||
| 201 | case 's': | ||
| 202 | seed = (unsigned int)strtoul(optarg, &tmp, 0); | ||
| 203 | if (*tmp == ',') { | ||
| 204 | optarg = tmp + 1; | ||
| 205 | iter_start = strtoul(optarg, &tmp, 0); | ||
| 206 | } | ||
| 207 | if (*tmp != '\0' || tmp == optarg) | ||
| 208 | usage("Failed to parse seed"); | ||
| 209 | set_seed = 1; | ||
| 210 | break; | ||
| 211 | case 'm': | ||
| 212 | iter_end = strtoul(optarg, &tmp, 0); | ||
| 213 | if (*tmp != '\0' || tmp == optarg) | ||
| 214 | usage("Failed to parse max_iter"); | ||
| 215 | break; | ||
| 216 | default: | ||
| 217 | usage(NULL); | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | /* Check errors */ | ||
| 222 | if (iter_end < iter_start) | ||
| 223 | usage("Max iteration number must be bigger than iter-num"); | ||
| 224 | |||
| 225 | if (set_seed && input_file) | ||
| 226 | usage("Don't use input file (-i) with random seed (-s)"); | ||
| 227 | |||
| 228 | /* Initialize random seed */ | ||
| 229 | if (!input_file) { | ||
| 230 | if (!set_seed) /* No seed is given */ | ||
| 231 | init_random_seed(); | ||
| 232 | srand(seed); | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | int main(int argc, char **argv) | ||
| 237 | { | ||
| 238 | struct insn insn; | ||
| 239 | int insns = 0; | ||
| 240 | int errors = 0; | ||
| 241 | unsigned long i; | ||
| 242 | unsigned char insn_buf[MAX_INSN_SIZE * 2]; | ||
| 243 | |||
| 244 | parse_args(argc, argv); | ||
| 245 | |||
| 246 | /* Prepare stop bytes with NOPs */ | ||
| 247 | memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE); | ||
| 248 | |||
| 249 | for (i = 0; i < iter_end; i++) { | ||
| 250 | if (generate_insn(insn_buf) <= 0) | ||
| 251 | break; | ||
| 252 | |||
| 253 | if (i < iter_start) /* Skip to given iteration number */ | ||
| 254 | continue; | ||
| 255 | |||
| 256 | /* Decode an instruction */ | ||
| 257 | insn_init(&insn, insn_buf, x86_64); | ||
| 258 | insn_get_length(&insn); | ||
| 259 | |||
| 260 | if (insn.next_byte <= insn.kaddr || | ||
| 261 | insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { | ||
| 262 | /* Access out-of-range memory */ | ||
| 263 | dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn); | ||
| 264 | errors++; | ||
| 265 | } else if (verbose && !insn_complete(&insn)) | ||
| 266 | dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); | ||
| 267 | else if (verbose >= 2) | ||
| 268 | dump_insn(stdout, &insn); | ||
| 269 | insns++; | ||
| 270 | } | ||
| 271 | |||
| 272 | fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); | ||
| 273 | |||
| 274 | return errors ? 1 : 0; | ||
| 275 | } | ||
