diff options
Diffstat (limited to 'arch')
119 files changed, 7700 insertions, 2873 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index bd620a481bee..49778bb43782 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -647,7 +647,7 @@ config ARCH_S3C2410 | |||
647 | select ARCH_HAS_CPUFREQ | 647 | select ARCH_HAS_CPUFREQ |
648 | select HAVE_CLK | 648 | select HAVE_CLK |
649 | select ARCH_USES_GETTIMEOFFSET | 649 | select ARCH_USES_GETTIMEOFFSET |
650 | select HAVE_S3C2410_I2C | 650 | select HAVE_S3C2410_I2C if I2C |
651 | help | 651 | help |
652 | Samsung S3C2410X CPU based systems, such as the Simtec Electronics | 652 | Samsung S3C2410X CPU based systems, such as the Simtec Electronics |
653 | BAST (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or | 653 | BAST (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or |
@@ -677,8 +677,8 @@ config ARCH_S3C64XX | |||
677 | select S3C_DEV_NAND | 677 | select S3C_DEV_NAND |
678 | select USB_ARCH_HAS_OHCI | 678 | select USB_ARCH_HAS_OHCI |
679 | select SAMSUNG_GPIOLIB_4BIT | 679 | select SAMSUNG_GPIOLIB_4BIT |
680 | select HAVE_S3C2410_I2C | 680 | select HAVE_S3C2410_I2C if I2C |
681 | select HAVE_S3C2410_WATCHDOG | 681 | select HAVE_S3C2410_WATCHDOG if WATCHDOG |
682 | help | 682 | help |
683 | Samsung S3C64XX series based systems | 683 | Samsung S3C64XX series based systems |
684 | 684 | ||
@@ -687,10 +687,10 @@ config ARCH_S5P64X0 | |||
687 | select CPU_V6 | 687 | select CPU_V6 |
688 | select GENERIC_GPIO | 688 | select GENERIC_GPIO |
689 | select HAVE_CLK | 689 | select HAVE_CLK |
690 | select HAVE_S3C2410_WATCHDOG | 690 | select HAVE_S3C2410_WATCHDOG if WATCHDOG |
691 | select ARCH_USES_GETTIMEOFFSET | 691 | select ARCH_USES_GETTIMEOFFSET |
692 | select HAVE_S3C2410_I2C | 692 | select HAVE_S3C2410_I2C if I2C |
693 | select HAVE_S3C_RTC | 693 | select HAVE_S3C_RTC if RTC_CLASS |
694 | help | 694 | help |
695 | Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440, | 695 | Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440, |
696 | SMDK6450. | 696 | SMDK6450. |
@@ -701,7 +701,7 @@ config ARCH_S5P6442 | |||
701 | select GENERIC_GPIO | 701 | select GENERIC_GPIO |
702 | select HAVE_CLK | 702 | select HAVE_CLK |
703 | select ARCH_USES_GETTIMEOFFSET | 703 | select ARCH_USES_GETTIMEOFFSET |
704 | select HAVE_S3C2410_WATCHDOG | 704 | select HAVE_S3C2410_WATCHDOG if WATCHDOG |
705 | help | 705 | help |
706 | Samsung S5P6442 CPU based systems | 706 | Samsung S5P6442 CPU based systems |
707 | 707 | ||
@@ -712,9 +712,9 @@ config ARCH_S5PC100 | |||
712 | select CPU_V7 | 712 | select CPU_V7 |
713 | select ARM_L1_CACHE_SHIFT_6 | 713 | select ARM_L1_CACHE_SHIFT_6 |
714 | select ARCH_USES_GETTIMEOFFSET | 714 | select ARCH_USES_GETTIMEOFFSET |
715 | select HAVE_S3C2410_I2C | 715 | select HAVE_S3C2410_I2C if I2C |
716 | select HAVE_S3C_RTC | 716 | select HAVE_S3C_RTC if RTC_CLASS |
717 | select HAVE_S3C2410_WATCHDOG | 717 | select HAVE_S3C2410_WATCHDOG if WATCHDOG |
718 | help | 718 | help |
719 | Samsung S5PC100 series based systems | 719 | Samsung S5PC100 series based systems |
720 | 720 | ||
@@ -727,9 +727,9 @@ config ARCH_S5PV210 | |||
727 | select ARM_L1_CACHE_SHIFT_6 | 727 | select ARM_L1_CACHE_SHIFT_6 |
728 | select ARCH_HAS_CPUFREQ | 728 | select ARCH_HAS_CPUFREQ |
729 | select ARCH_USES_GETTIMEOFFSET | 729 | select ARCH_USES_GETTIMEOFFSET |
730 | select HAVE_S3C2410_I2C | 730 | select HAVE_S3C2410_I2C if I2C |
731 | select HAVE_S3C_RTC | 731 | select HAVE_S3C_RTC if RTC_CLASS |
732 | select HAVE_S3C2410_WATCHDOG | 732 | select HAVE_S3C2410_WATCHDOG if WATCHDOG |
733 | help | 733 | help |
734 | Samsung S5PV210/S5PC110 series based systems | 734 | Samsung S5PV210/S5PC110 series based systems |
735 | 735 | ||
@@ -740,9 +740,9 @@ config ARCH_S5PV310 | |||
740 | select GENERIC_GPIO | 740 | select GENERIC_GPIO |
741 | select HAVE_CLK | 741 | select HAVE_CLK |
742 | select GENERIC_CLOCKEVENTS | 742 | select GENERIC_CLOCKEVENTS |
743 | select HAVE_S3C_RTC | 743 | select HAVE_S3C_RTC if RTC_CLASS |
744 | select HAVE_S3C2410_I2C | 744 | select HAVE_S3C2410_I2C if I2C |
745 | select HAVE_S3C2410_WATCHDOG | 745 | select HAVE_S3C2410_WATCHDOG if WATCHDOG |
746 | help | 746 | help |
747 | Samsung S5PV310 series based systems | 747 | Samsung S5PV310 series based systems |
748 | 748 | ||
@@ -1206,10 +1206,11 @@ config SMP | |||
1206 | depends on EXPERIMENTAL | 1206 | depends on EXPERIMENTAL |
1207 | depends on GENERIC_CLOCKEVENTS | 1207 | depends on GENERIC_CLOCKEVENTS |
1208 | depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \ | 1208 | depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \ |
1209 | MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\ | 1209 | MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \ |
1210 | ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 | 1210 | ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \ |
1211 | ARCH_MSM_SCORPIONMP | ||
1211 | select USE_GENERIC_SMP_HELPERS | 1212 | select USE_GENERIC_SMP_HELPERS |
1212 | select HAVE_ARM_SCU | 1213 | select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP |
1213 | help | 1214 | help |
1214 | This enables support for systems with more than one CPU. If you have | 1215 | This enables support for systems with more than one CPU. If you have |
1215 | a system with only one CPU, like most personal computers, say N. If | 1216 | a system with only one CPU, like most personal computers, say N. If |
@@ -1284,6 +1285,7 @@ config NR_CPUS | |||
1284 | config HOTPLUG_CPU | 1285 | config HOTPLUG_CPU |
1285 | bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" | 1286 | bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" |
1286 | depends on SMP && HOTPLUG && EXPERIMENTAL | 1287 | depends on SMP && HOTPLUG && EXPERIMENTAL |
1288 | depends on !ARCH_MSM | ||
1287 | help | 1289 | help |
1288 | Say Y here to experiment with turning CPUs off and on. CPUs | 1290 | Say Y here to experiment with turning CPUs off and on. CPUs |
1289 | can be controlled through /sys/devices/system/cpu. | 1291 | can be controlled through /sys/devices/system/cpu. |
@@ -1292,7 +1294,7 @@ config LOCAL_TIMERS | |||
1292 | bool "Use local timer interrupts" | 1294 | bool "Use local timer interrupts" |
1293 | depends on SMP | 1295 | depends on SMP |
1294 | default y | 1296 | default y |
1295 | select HAVE_ARM_TWD | 1297 | select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP |
1296 | help | 1298 | help |
1297 | Enable support for local timers on SMP platforms, rather then the | 1299 | Enable support for local timers on SMP platforms, rather then the |
1298 | legacy IPI broadcast method. Local timers allows the system | 1300 | legacy IPI broadcast method. Local timers allows the system |
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 6825c34646d4..9be21ba648cd 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S | |||
@@ -1084,6 +1084,6 @@ memdump: mov r12, r0 | |||
1084 | reloc_end: | 1084 | reloc_end: |
1085 | 1085 | ||
1086 | .align | 1086 | .align |
1087 | .section ".stack", "w" | 1087 | .section ".stack", "aw", %nobits |
1088 | user_stack: .space 4096 | 1088 | user_stack: .space 4096 |
1089 | user_stack_end: | 1089 | user_stack_end: |
diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in index d08168941bd6..366a924019ac 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.in +++ b/arch/arm/boot/compressed/vmlinux.lds.in | |||
@@ -57,7 +57,7 @@ SECTIONS | |||
57 | .bss : { *(.bss) } | 57 | .bss : { *(.bss) } |
58 | _end = .; | 58 | _end = .; |
59 | 59 | ||
60 | .stack (NOLOAD) : { *(.stack) } | 60 | .stack : { *(.stack) } |
61 | 61 | ||
62 | .stab 0 : { *(.stab) } | 62 | .stab 0 : { *(.stab) } |
63 | .stabstr 0 : { *(.stabstr) } | 63 | .stabstr 0 : { *(.stabstr) } |
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 062b58c029ab..749bb6622404 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h | |||
@@ -238,7 +238,7 @@ | |||
238 | @ Slightly optimised to avoid incrementing the pointer twice | 238 | @ Slightly optimised to avoid incrementing the pointer twice |
239 | usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort | 239 | usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort |
240 | .if \rept == 2 | 240 | .if \rept == 2 |
241 | usraccoff \instr, \reg, \ptr, \inc, 4, \cond, \abort | 241 | usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort |
242 | .endif | 242 | .endif |
243 | 243 | ||
244 | add\cond \ptr, #\rept * \inc | 244 | add\cond \ptr, #\rept * \inc |
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 68870c776671..b4ffe9d5b526 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h | |||
@@ -13,6 +13,10 @@ typedef struct { | |||
13 | 13 | ||
14 | #ifdef CONFIG_CPU_HAS_ASID | 14 | #ifdef CONFIG_CPU_HAS_ASID |
15 | #define ASID(mm) ((mm)->context.id & 255) | 15 | #define ASID(mm) ((mm)->context.id & 255) |
16 | |||
17 | /* init_mm.context.id_lock should be initialized. */ | ||
18 | #define INIT_MM_CONTEXT(name) \ | ||
19 | .context.id_lock = __SPIN_LOCK_UNLOCKED(name.context.id_lock), | ||
16 | #else | 20 | #else |
17 | #define ASID(mm) (0) | 21 | #define ASID(mm) (0) |
18 | #endif | 22 | #endif |
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index b155414192da..53d1d5deb111 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h | |||
@@ -374,6 +374,9 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) | |||
374 | 374 | ||
375 | #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) | 375 | #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) |
376 | 376 | ||
377 | /* we don't need complex calculations here as the pmd is folded into the pgd */ | ||
378 | #define pmd_addr_end(addr,end) (end) | ||
379 | |||
377 | /* | 380 | /* |
378 | * Conversion functions: convert a page and protection to a page entry, | 381 | * Conversion functions: convert a page and protection to a page entry, |
379 | * and a page entry and page directory to the page they refer to. | 382 | * and a page entry and page directory to the page they refer to. |
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492a..421a4bb88fed 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -4,9 +4,7 @@ | |||
4 | * ARM performance counter support. | 4 | * ARM performance counter support. |
5 | * | 5 | * |
6 | * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles | 6 | * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles |
7 | * | 7 | * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> |
8 | * ARMv7 support: Jean Pihet <jpihet@mvista.com> | ||
9 | * 2010 (c) MontaVista Software, LLC. | ||
10 | * | 8 | * |
11 | * This code is based on the sparc64 perf event code, which is in turn based | 9 | * This code is based on the sparc64 perf event code, which is in turn based |
12 | * on the x86 code. Callchain code is based on the ARM OProfile backtrace | 10 | * on the x86 code. Callchain code is based on the ARM OProfile backtrace |
@@ -69,29 +67,23 @@ struct cpu_hw_events { | |||
69 | }; | 67 | }; |
70 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | 68 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); |
71 | 69 | ||
72 | /* PMU names. */ | ||
73 | static const char *arm_pmu_names[] = { | ||
74 | [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", | ||
75 | [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", | ||
76 | [ARM_PERF_PMU_ID_V6] = "v6", | ||
77 | [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", | ||
78 | [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", | ||
79 | [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", | ||
80 | }; | ||
81 | |||
82 | struct arm_pmu { | 70 | struct arm_pmu { |
83 | enum arm_perf_pmu_ids id; | 71 | enum arm_perf_pmu_ids id; |
72 | const char *name; | ||
84 | irqreturn_t (*handle_irq)(int irq_num, void *dev); | 73 | irqreturn_t (*handle_irq)(int irq_num, void *dev); |
85 | void (*enable)(struct hw_perf_event *evt, int idx); | 74 | void (*enable)(struct hw_perf_event *evt, int idx); |
86 | void (*disable)(struct hw_perf_event *evt, int idx); | 75 | void (*disable)(struct hw_perf_event *evt, int idx); |
87 | int (*event_map)(int evt); | ||
88 | u64 (*raw_event)(u64); | ||
89 | int (*get_event_idx)(struct cpu_hw_events *cpuc, | 76 | int (*get_event_idx)(struct cpu_hw_events *cpuc, |
90 | struct hw_perf_event *hwc); | 77 | struct hw_perf_event *hwc); |
91 | u32 (*read_counter)(int idx); | 78 | u32 (*read_counter)(int idx); |
92 | void (*write_counter)(int idx, u32 val); | 79 | void (*write_counter)(int idx, u32 val); |
93 | void (*start)(void); | 80 | void (*start)(void); |
94 | void (*stop)(void); | 81 | void (*stop)(void); |
82 | const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] | ||
83 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
84 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
85 | const unsigned (*event_map)[PERF_COUNT_HW_MAX]; | ||
86 | u32 raw_event_mask; | ||
95 | int num_events; | 87 | int num_events; |
96 | u64 max_period; | 88 | u64 max_period; |
97 | }; | 89 | }; |
@@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); | |||
136 | 128 | ||
137 | #define CACHE_OP_UNSUPPORTED 0xFFFF | 129 | #define CACHE_OP_UNSUPPORTED 0xFFFF |
138 | 130 | ||
139 | static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
140 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
141 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
142 | |||
143 | static int | 131 | static int |
144 | armpmu_map_cache_event(u64 config) | 132 | armpmu_map_cache_event(u64 config) |
145 | { | 133 | { |
@@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config) | |||
157 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | 145 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) |
158 | return -EINVAL; | 146 | return -EINVAL; |
159 | 147 | ||
160 | ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; | 148 | ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; |
161 | 149 | ||
162 | if (ret == CACHE_OP_UNSUPPORTED) | 150 | if (ret == CACHE_OP_UNSUPPORTED) |
163 | return -ENOENT; | 151 | return -ENOENT; |
@@ -166,6 +154,19 @@ armpmu_map_cache_event(u64 config) | |||
166 | } | 154 | } |
167 | 155 | ||
168 | static int | 156 | static int |
157 | armpmu_map_event(u64 config) | ||
158 | { | ||
159 | int mapping = (*armpmu->event_map)[config]; | ||
160 | return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; | ||
161 | } | ||
162 | |||
163 | static int | ||
164 | armpmu_map_raw_event(u64 config) | ||
165 | { | ||
166 | return (int)(config & armpmu->raw_event_mask); | ||
167 | } | ||
168 | |||
169 | static int | ||
169 | armpmu_event_set_period(struct perf_event *event, | 170 | armpmu_event_set_period(struct perf_event *event, |
170 | struct hw_perf_event *hwc, | 171 | struct hw_perf_event *hwc, |
171 | int idx) | 172 | int idx) |
@@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event) | |||
458 | 459 | ||
459 | /* Decode the generic type into an ARM event identifier. */ | 460 | /* Decode the generic type into an ARM event identifier. */ |
460 | if (PERF_TYPE_HARDWARE == event->attr.type) { | 461 | if (PERF_TYPE_HARDWARE == event->attr.type) { |
461 | mapping = armpmu->event_map(event->attr.config); | 462 | mapping = armpmu_map_event(event->attr.config); |
462 | } else if (PERF_TYPE_HW_CACHE == event->attr.type) { | 463 | } else if (PERF_TYPE_HW_CACHE == event->attr.type) { |
463 | mapping = armpmu_map_cache_event(event->attr.config); | 464 | mapping = armpmu_map_cache_event(event->attr.config); |
464 | } else if (PERF_TYPE_RAW == event->attr.type) { | 465 | } else if (PERF_TYPE_RAW == event->attr.type) { |
465 | mapping = armpmu->raw_event(event->attr.config); | 466 | mapping = armpmu_map_raw_event(event->attr.config); |
466 | } else { | 467 | } else { |
467 | pr_debug("event type %x not supported\n", event->attr.type); | 468 | pr_debug("event type %x not supported\n", event->attr.type); |
468 | return -EOPNOTSUPP; | 469 | return -EOPNOTSUPP; |
@@ -603,2366 +604,10 @@ static struct pmu pmu = { | |||
603 | .read = armpmu_read, | 604 | .read = armpmu_read, |
604 | }; | 605 | }; |
605 | 606 | ||
606 | /* | 607 | /* Include the PMU-specific implementations. */ |
607 | * ARMv6 Performance counter handling code. | 608 | #include "perf_event_xscale.c" |
608 | * | 609 | #include "perf_event_v6.c" |
609 | * ARMv6 has 2 configurable performance counters and a single cycle counter. | 610 | #include "perf_event_v7.c" |
610 | * They all share a single reset bit but can be written to zero so we can use | ||
611 | * that for a reset. | ||
612 | * | ||
613 | * The counters can't be individually enabled or disabled so when we remove | ||
614 | * one event and replace it with another we could get spurious counts from the | ||
615 | * wrong event. However, we can take advantage of the fact that the | ||
616 | * performance counters can export events to the event bus, and the event bus | ||
617 | * itself can be monitored. This requires that we *don't* export the events to | ||
618 | * the event bus. The procedure for disabling a configurable counter is: | ||
619 | * - change the counter to count the ETMEXTOUT[0] signal (0x20). This | ||
620 | * effectively stops the counter from counting. | ||
621 | * - disable the counter's interrupt generation (each counter has it's | ||
622 | * own interrupt enable bit). | ||
623 | * Once stopped, the counter value can be written as 0 to reset. | ||
624 | * | ||
625 | * To enable a counter: | ||
626 | * - enable the counter's interrupt generation. | ||
627 | * - set the new event type. | ||
628 | * | ||
629 | * Note: the dedicated cycle counter only counts cycles and can't be | ||
630 | * enabled/disabled independently of the others. When we want to disable the | ||
631 | * cycle counter, we have to just disable the interrupt reporting and start | ||
632 | * ignoring that counter. When re-enabling, we have to reset the value and | ||
633 | * enable the interrupt. | ||
634 | */ | ||
635 | |||
636 | enum armv6_perf_types { | ||
637 | ARMV6_PERFCTR_ICACHE_MISS = 0x0, | ||
638 | ARMV6_PERFCTR_IBUF_STALL = 0x1, | ||
639 | ARMV6_PERFCTR_DDEP_STALL = 0x2, | ||
640 | ARMV6_PERFCTR_ITLB_MISS = 0x3, | ||
641 | ARMV6_PERFCTR_DTLB_MISS = 0x4, | ||
642 | ARMV6_PERFCTR_BR_EXEC = 0x5, | ||
643 | ARMV6_PERFCTR_BR_MISPREDICT = 0x6, | ||
644 | ARMV6_PERFCTR_INSTR_EXEC = 0x7, | ||
645 | ARMV6_PERFCTR_DCACHE_HIT = 0x9, | ||
646 | ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, | ||
647 | ARMV6_PERFCTR_DCACHE_MISS = 0xB, | ||
648 | ARMV6_PERFCTR_DCACHE_WBACK = 0xC, | ||
649 | ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, | ||
650 | ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, | ||
651 | ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, | ||
652 | ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, | ||
653 | ARMV6_PERFCTR_WBUF_DRAINED = 0x12, | ||
654 | ARMV6_PERFCTR_CPU_CYCLES = 0xFF, | ||
655 | ARMV6_PERFCTR_NOP = 0x20, | ||
656 | }; | ||
657 | |||
658 | enum armv6_counters { | ||
659 | ARMV6_CYCLE_COUNTER = 1, | ||
660 | ARMV6_COUNTER0, | ||
661 | ARMV6_COUNTER1, | ||
662 | }; | ||
663 | |||
664 | /* | ||
665 | * The hardware events that we support. We do support cache operations but | ||
666 | * we have harvard caches and no way to combine instruction and data | ||
667 | * accesses/misses in hardware. | ||
668 | */ | ||
669 | static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { | ||
670 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, | ||
671 | [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, | ||
672 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
673 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
674 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, | ||
675 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, | ||
676 | [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, | ||
677 | }; | ||
678 | |||
679 | static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
680 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
681 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
682 | [C(L1D)] = { | ||
683 | /* | ||
684 | * The performance counters don't differentiate between read | ||
685 | * and write accesses/misses so this isn't strictly correct, | ||
686 | * but it's the best we can do. Writes and reads get | ||
687 | * combined. | ||
688 | */ | ||
689 | [C(OP_READ)] = { | ||
690 | [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, | ||
691 | [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, | ||
692 | }, | ||
693 | [C(OP_WRITE)] = { | ||
694 | [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, | ||
695 | [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, | ||
696 | }, | ||
697 | [C(OP_PREFETCH)] = { | ||
698 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
699 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
700 | }, | ||
701 | }, | ||
702 | [C(L1I)] = { | ||
703 | [C(OP_READ)] = { | ||
704 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
705 | [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, | ||
706 | }, | ||
707 | [C(OP_WRITE)] = { | ||
708 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
709 | [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, | ||
710 | }, | ||
711 | [C(OP_PREFETCH)] = { | ||
712 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
713 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
714 | }, | ||
715 | }, | ||
716 | [C(LL)] = { | ||
717 | [C(OP_READ)] = { | ||
718 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
719 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
720 | }, | ||
721 | [C(OP_WRITE)] = { | ||
722 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
723 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
724 | }, | ||
725 | [C(OP_PREFETCH)] = { | ||
726 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
727 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
728 | }, | ||
729 | }, | ||
730 | [C(DTLB)] = { | ||
731 | /* | ||
732 | * The ARM performance counters can count micro DTLB misses, | ||
733 | * micro ITLB misses and main TLB misses. There isn't an event | ||
734 | * for TLB misses, so use the micro misses here and if users | ||
735 | * want the main TLB misses they can use a raw counter. | ||
736 | */ | ||
737 | [C(OP_READ)] = { | ||
738 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
739 | [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, | ||
740 | }, | ||
741 | [C(OP_WRITE)] = { | ||
742 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
743 | [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, | ||
744 | }, | ||
745 | [C(OP_PREFETCH)] = { | ||
746 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
747 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
748 | }, | ||
749 | }, | ||
750 | [C(ITLB)] = { | ||
751 | [C(OP_READ)] = { | ||
752 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
753 | [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, | ||
754 | }, | ||
755 | [C(OP_WRITE)] = { | ||
756 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
757 | [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, | ||
758 | }, | ||
759 | [C(OP_PREFETCH)] = { | ||
760 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
761 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
762 | }, | ||
763 | }, | ||
764 | [C(BPU)] = { | ||
765 | [C(OP_READ)] = { | ||
766 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
767 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
768 | }, | ||
769 | [C(OP_WRITE)] = { | ||
770 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
771 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
772 | }, | ||
773 | [C(OP_PREFETCH)] = { | ||
774 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
775 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
776 | }, | ||
777 | }, | ||
778 | }; | ||
779 | |||
780 | enum armv6mpcore_perf_types { | ||
781 | ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, | ||
782 | ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, | ||
783 | ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, | ||
784 | ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, | ||
785 | ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, | ||
786 | ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, | ||
787 | ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, | ||
788 | ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, | ||
789 | ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, | ||
790 | ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, | ||
791 | ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, | ||
792 | ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, | ||
793 | ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, | ||
794 | ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, | ||
795 | ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, | ||
796 | ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, | ||
797 | ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, | ||
798 | ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, | ||
799 | ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, | ||
800 | ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, | ||
801 | }; | ||
802 | |||
803 | /* | ||
804 | * The hardware events that we support. We do support cache operations but | ||
805 | * we have harvard caches and no way to combine instruction and data | ||
806 | * accesses/misses in hardware. | ||
807 | */ | ||
808 | static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { | ||
809 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, | ||
810 | [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, | ||
811 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
812 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
813 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, | ||
814 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, | ||
815 | [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, | ||
816 | }; | ||
817 | |||
818 | static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
819 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
820 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
821 | [C(L1D)] = { | ||
822 | [C(OP_READ)] = { | ||
823 | [C(RESULT_ACCESS)] = | ||
824 | ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, | ||
825 | [C(RESULT_MISS)] = | ||
826 | ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, | ||
827 | }, | ||
828 | [C(OP_WRITE)] = { | ||
829 | [C(RESULT_ACCESS)] = | ||
830 | ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, | ||
831 | [C(RESULT_MISS)] = | ||
832 | ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, | ||
833 | }, | ||
834 | [C(OP_PREFETCH)] = { | ||
835 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
836 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
837 | }, | ||
838 | }, | ||
839 | [C(L1I)] = { | ||
840 | [C(OP_READ)] = { | ||
841 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
842 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, | ||
843 | }, | ||
844 | [C(OP_WRITE)] = { | ||
845 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
846 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, | ||
847 | }, | ||
848 | [C(OP_PREFETCH)] = { | ||
849 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
850 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
851 | }, | ||
852 | }, | ||
853 | [C(LL)] = { | ||
854 | [C(OP_READ)] = { | ||
855 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
856 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
857 | }, | ||
858 | [C(OP_WRITE)] = { | ||
859 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
860 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
861 | }, | ||
862 | [C(OP_PREFETCH)] = { | ||
863 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
864 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
865 | }, | ||
866 | }, | ||
867 | [C(DTLB)] = { | ||
868 | /* | ||
869 | * The ARM performance counters can count micro DTLB misses, | ||
870 | * micro ITLB misses and main TLB misses. There isn't an event | ||
871 | * for TLB misses, so use the micro misses here and if users | ||
872 | * want the main TLB misses they can use a raw counter. | ||
873 | */ | ||
874 | [C(OP_READ)] = { | ||
875 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
876 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, | ||
877 | }, | ||
878 | [C(OP_WRITE)] = { | ||
879 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
880 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, | ||
881 | }, | ||
882 | [C(OP_PREFETCH)] = { | ||
883 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
884 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
885 | }, | ||
886 | }, | ||
887 | [C(ITLB)] = { | ||
888 | [C(OP_READ)] = { | ||
889 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
890 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, | ||
891 | }, | ||
892 | [C(OP_WRITE)] = { | ||
893 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
894 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, | ||
895 | }, | ||
896 | [C(OP_PREFETCH)] = { | ||
897 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
898 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
899 | }, | ||
900 | }, | ||
901 | [C(BPU)] = { | ||
902 | [C(OP_READ)] = { | ||
903 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
904 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
905 | }, | ||
906 | [C(OP_WRITE)] = { | ||
907 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
908 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
909 | }, | ||
910 | [C(OP_PREFETCH)] = { | ||
911 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
912 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
913 | }, | ||
914 | }, | ||
915 | }; | ||
916 | |||
917 | static inline unsigned long | ||
918 | armv6_pmcr_read(void) | ||
919 | { | ||
920 | u32 val; | ||
921 | asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); | ||
922 | return val; | ||
923 | } | ||
924 | |||
925 | static inline void | ||
926 | armv6_pmcr_write(unsigned long val) | ||
927 | { | ||
928 | asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); | ||
929 | } | ||
930 | |||
931 | #define ARMV6_PMCR_ENABLE (1 << 0) | ||
932 | #define ARMV6_PMCR_CTR01_RESET (1 << 1) | ||
933 | #define ARMV6_PMCR_CCOUNT_RESET (1 << 2) | ||
934 | #define ARMV6_PMCR_CCOUNT_DIV (1 << 3) | ||
935 | #define ARMV6_PMCR_COUNT0_IEN (1 << 4) | ||
936 | #define ARMV6_PMCR_COUNT1_IEN (1 << 5) | ||
937 | #define ARMV6_PMCR_CCOUNT_IEN (1 << 6) | ||
938 | #define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) | ||
939 | #define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) | ||
940 | #define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) | ||
941 | #define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 | ||
942 | #define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) | ||
943 | #define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 | ||
944 | #define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) | ||
945 | |||
946 | #define ARMV6_PMCR_OVERFLOWED_MASK \ | ||
947 | (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ | ||
948 | ARMV6_PMCR_CCOUNT_OVERFLOW) | ||
949 | |||
950 | static inline int | ||
951 | armv6_pmcr_has_overflowed(unsigned long pmcr) | ||
952 | { | ||
953 | return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); | ||
954 | } | ||
955 | |||
956 | static inline int | ||
957 | armv6_pmcr_counter_has_overflowed(unsigned long pmcr, | ||
958 | enum armv6_counters counter) | ||
959 | { | ||
960 | int ret = 0; | ||
961 | |||
962 | if (ARMV6_CYCLE_COUNTER == counter) | ||
963 | ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; | ||
964 | else if (ARMV6_COUNTER0 == counter) | ||
965 | ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; | ||
966 | else if (ARMV6_COUNTER1 == counter) | ||
967 | ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; | ||
968 | else | ||
969 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
970 | |||
971 | return ret; | ||
972 | } | ||
973 | |||
974 | static inline u32 | ||
975 | armv6pmu_read_counter(int counter) | ||
976 | { | ||
977 | unsigned long value = 0; | ||
978 | |||
979 | if (ARMV6_CYCLE_COUNTER == counter) | ||
980 | asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); | ||
981 | else if (ARMV6_COUNTER0 == counter) | ||
982 | asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); | ||
983 | else if (ARMV6_COUNTER1 == counter) | ||
984 | asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); | ||
985 | else | ||
986 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
987 | |||
988 | return value; | ||
989 | } | ||
990 | |||
991 | static inline void | ||
992 | armv6pmu_write_counter(int counter, | ||
993 | u32 value) | ||
994 | { | ||
995 | if (ARMV6_CYCLE_COUNTER == counter) | ||
996 | asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); | ||
997 | else if (ARMV6_COUNTER0 == counter) | ||
998 | asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); | ||
999 | else if (ARMV6_COUNTER1 == counter) | ||
1000 | asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); | ||
1001 | else | ||
1002 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
1003 | } | ||
1004 | |||
1005 | void | ||
1006 | armv6pmu_enable_event(struct hw_perf_event *hwc, | ||
1007 | int idx) | ||
1008 | { | ||
1009 | unsigned long val, mask, evt, flags; | ||
1010 | |||
1011 | if (ARMV6_CYCLE_COUNTER == idx) { | ||
1012 | mask = 0; | ||
1013 | evt = ARMV6_PMCR_CCOUNT_IEN; | ||
1014 | } else if (ARMV6_COUNTER0 == idx) { | ||
1015 | mask = ARMV6_PMCR_EVT_COUNT0_MASK; | ||
1016 | evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | | ||
1017 | ARMV6_PMCR_COUNT0_IEN; | ||
1018 | } else if (ARMV6_COUNTER1 == idx) { | ||
1019 | mask = ARMV6_PMCR_EVT_COUNT1_MASK; | ||
1020 | evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | | ||
1021 | ARMV6_PMCR_COUNT1_IEN; | ||
1022 | } else { | ||
1023 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
1024 | return; | ||
1025 | } | ||
1026 | |||
1027 | /* | ||
1028 | * Mask out the current event and set the counter to count the event | ||
1029 | * that we're interested in. | ||
1030 | */ | ||
1031 | spin_lock_irqsave(&pmu_lock, flags); | ||
1032 | val = armv6_pmcr_read(); | ||
1033 | val &= ~mask; | ||
1034 | val |= evt; | ||
1035 | armv6_pmcr_write(val); | ||
1036 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
1037 | } | ||
1038 | |||
1039 | static irqreturn_t | ||
1040 | armv6pmu_handle_irq(int irq_num, | ||
1041 | void *dev) | ||
1042 | { | ||
1043 | unsigned long pmcr = armv6_pmcr_read(); | ||
1044 | struct perf_sample_data data; | ||
1045 | struct cpu_hw_events *cpuc; | ||
1046 | struct pt_regs *regs; | ||
1047 | int idx; | ||
1048 | |||
1049 | if (!armv6_pmcr_has_overflowed(pmcr)) | ||
1050 | return IRQ_NONE; | ||
1051 | |||
1052 | regs = get_irq_regs(); | ||
1053 | |||
1054 | /* | ||
1055 | * The interrupts are cleared by writing the overflow flags back to | ||
1056 | * the control register. All of the other bits don't have any effect | ||
1057 | * if they are rewritten, so write the whole value back. | ||
1058 | */ | ||
1059 | armv6_pmcr_write(pmcr); | ||
1060 | |||
1061 | perf_sample_data_init(&data, 0); | ||
1062 | |||
1063 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1064 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
1065 | struct perf_event *event = cpuc->events[idx]; | ||
1066 | struct hw_perf_event *hwc; | ||
1067 | |||
1068 | if (!test_bit(idx, cpuc->active_mask)) | ||
1069 | continue; | ||
1070 | |||
1071 | /* | ||
1072 | * We have a single interrupt for all counters. Check that | ||
1073 | * each counter has overflowed before we process it. | ||
1074 | */ | ||
1075 | if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) | ||
1076 | continue; | ||
1077 | |||
1078 | hwc = &event->hw; | ||
1079 | armpmu_event_update(event, hwc, idx); | ||
1080 | data.period = event->hw.last_period; | ||
1081 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
1082 | continue; | ||
1083 | |||
1084 | if (perf_event_overflow(event, 0, &data, regs)) | ||
1085 | armpmu->disable(hwc, idx); | ||
1086 | } | ||
1087 | |||
1088 | /* | ||
1089 | * Handle the pending perf events. | ||
1090 | * | ||
1091 | * Note: this call *must* be run with interrupts disabled. For | ||
1092 | * platforms that can have the PMU interrupts raised as an NMI, this | ||
1093 | * will not work. | ||
1094 | */ | ||
1095 | irq_work_run(); | ||
1096 | |||
1097 | return IRQ_HANDLED; | ||
1098 | } | ||
1099 | |||
1100 | static void | ||
1101 | armv6pmu_start(void) | ||
1102 | { | ||
1103 | unsigned long flags, val; | ||
1104 | |||
1105 | spin_lock_irqsave(&pmu_lock, flags); | ||
1106 | val = armv6_pmcr_read(); | ||
1107 | val |= ARMV6_PMCR_ENABLE; | ||
1108 | armv6_pmcr_write(val); | ||
1109 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
1110 | } | ||
1111 | |||
1112 | void | ||
1113 | armv6pmu_stop(void) | ||
1114 | { | ||
1115 | unsigned long flags, val; | ||
1116 | |||
1117 | spin_lock_irqsave(&pmu_lock, flags); | ||
1118 | val = armv6_pmcr_read(); | ||
1119 | val &= ~ARMV6_PMCR_ENABLE; | ||
1120 | armv6_pmcr_write(val); | ||
1121 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
1122 | } | ||
1123 | |||
1124 | static inline int | ||
1125 | armv6pmu_event_map(int config) | ||
1126 | { | ||
1127 | int mapping = armv6_perf_map[config]; | ||
1128 | if (HW_OP_UNSUPPORTED == mapping) | ||
1129 | mapping = -EOPNOTSUPP; | ||
1130 | return mapping; | ||
1131 | } | ||
1132 | |||
1133 | static inline int | ||
1134 | armv6mpcore_pmu_event_map(int config) | ||
1135 | { | ||
1136 | int mapping = armv6mpcore_perf_map[config]; | ||
1137 | if (HW_OP_UNSUPPORTED == mapping) | ||
1138 | mapping = -EOPNOTSUPP; | ||
1139 | return mapping; | ||
1140 | } | ||
1141 | |||
1142 | static u64 | ||
1143 | armv6pmu_raw_event(u64 config) | ||
1144 | { | ||
1145 | return config & 0xff; | ||
1146 | } | ||
1147 | |||
1148 | static int | ||
1149 | armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
1150 | struct hw_perf_event *event) | ||
1151 | { | ||
1152 | /* Always place a cycle counter into the cycle counter. */ | ||
1153 | if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { | ||
1154 | if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) | ||
1155 | return -EAGAIN; | ||
1156 | |||
1157 | return ARMV6_CYCLE_COUNTER; | ||
1158 | } else { | ||
1159 | /* | ||
1160 | * For anything other than a cycle counter, try and use | ||
1161 | * counter0 and counter1. | ||
1162 | */ | ||
1163 | if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { | ||
1164 | return ARMV6_COUNTER1; | ||
1165 | } | ||
1166 | |||
1167 | if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { | ||
1168 | return ARMV6_COUNTER0; | ||
1169 | } | ||
1170 | |||
1171 | /* The counters are all in use. */ | ||
1172 | return -EAGAIN; | ||
1173 | } | ||
1174 | } | ||
1175 | |||
1176 | static void | ||
1177 | armv6pmu_disable_event(struct hw_perf_event *hwc, | ||
1178 | int idx) | ||
1179 | { | ||
1180 | unsigned long val, mask, evt, flags; | ||
1181 | |||
1182 | if (ARMV6_CYCLE_COUNTER == idx) { | ||
1183 | mask = ARMV6_PMCR_CCOUNT_IEN; | ||
1184 | evt = 0; | ||
1185 | } else if (ARMV6_COUNTER0 == idx) { | ||
1186 | mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; | ||
1187 | evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; | ||
1188 | } else if (ARMV6_COUNTER1 == idx) { | ||
1189 | mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; | ||
1190 | evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; | ||
1191 | } else { | ||
1192 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
1193 | return; | ||
1194 | } | ||
1195 | |||
1196 | /* | ||
1197 | * Mask out the current event and set the counter to count the number | ||
1198 | * of ETM bus signal assertion cycles. The external reporting should | ||
1199 | * be disabled and so this should never increment. | ||
1200 | */ | ||
1201 | spin_lock_irqsave(&pmu_lock, flags); | ||
1202 | val = armv6_pmcr_read(); | ||
1203 | val &= ~mask; | ||
1204 | val |= evt; | ||
1205 | armv6_pmcr_write(val); | ||
1206 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
1207 | } | ||
1208 | |||
1209 | static void | ||
1210 | armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, | ||
1211 | int idx) | ||
1212 | { | ||
1213 | unsigned long val, mask, flags, evt = 0; | ||
1214 | |||
1215 | if (ARMV6_CYCLE_COUNTER == idx) { | ||
1216 | mask = ARMV6_PMCR_CCOUNT_IEN; | ||
1217 | } else if (ARMV6_COUNTER0 == idx) { | ||
1218 | mask = ARMV6_PMCR_COUNT0_IEN; | ||
1219 | } else if (ARMV6_COUNTER1 == idx) { | ||
1220 | mask = ARMV6_PMCR_COUNT1_IEN; | ||
1221 | } else { | ||
1222 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
1223 | return; | ||
1224 | } | ||
1225 | |||
1226 | /* | ||
1227 | * Unlike UP ARMv6, we don't have a way of stopping the counters. We | ||
1228 | * simply disable the interrupt reporting. | ||
1229 | */ | ||
1230 | spin_lock_irqsave(&pmu_lock, flags); | ||
1231 | val = armv6_pmcr_read(); | ||
1232 | val &= ~mask; | ||
1233 | val |= evt; | ||
1234 | armv6_pmcr_write(val); | ||
1235 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
1236 | } | ||
1237 | |||
1238 | static const struct arm_pmu armv6pmu = { | ||
1239 | .id = ARM_PERF_PMU_ID_V6, | ||
1240 | .handle_irq = armv6pmu_handle_irq, | ||
1241 | .enable = armv6pmu_enable_event, | ||
1242 | .disable = armv6pmu_disable_event, | ||
1243 | .event_map = armv6pmu_event_map, | ||
1244 | .raw_event = armv6pmu_raw_event, | ||
1245 | .read_counter = armv6pmu_read_counter, | ||
1246 | .write_counter = armv6pmu_write_counter, | ||
1247 | .get_event_idx = armv6pmu_get_event_idx, | ||
1248 | .start = armv6pmu_start, | ||
1249 | .stop = armv6pmu_stop, | ||
1250 | .num_events = 3, | ||
1251 | .max_period = (1LLU << 32) - 1, | ||
1252 | }; | ||
1253 | |||
1254 | /* | ||
1255 | * ARMv6mpcore is almost identical to single core ARMv6 with the exception | ||
1256 | * that some of the events have different enumerations and that there is no | ||
1257 | * *hack* to stop the programmable counters. To stop the counters we simply | ||
1258 | * disable the interrupt reporting and update the event. When unthrottling we | ||
1259 | * reset the period and enable the interrupt reporting. | ||
1260 | */ | ||
1261 | static const struct arm_pmu armv6mpcore_pmu = { | ||
1262 | .id = ARM_PERF_PMU_ID_V6MP, | ||
1263 | .handle_irq = armv6pmu_handle_irq, | ||
1264 | .enable = armv6pmu_enable_event, | ||
1265 | .disable = armv6mpcore_pmu_disable_event, | ||
1266 | .event_map = armv6mpcore_pmu_event_map, | ||
1267 | .raw_event = armv6pmu_raw_event, | ||
1268 | .read_counter = armv6pmu_read_counter, | ||
1269 | .write_counter = armv6pmu_write_counter, | ||
1270 | .get_event_idx = armv6pmu_get_event_idx, | ||
1271 | .start = armv6pmu_start, | ||
1272 | .stop = armv6pmu_stop, | ||
1273 | .num_events = 3, | ||
1274 | .max_period = (1LLU << 32) - 1, | ||
1275 | }; | ||
1276 | |||
1277 | /* | ||
1278 | * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. | ||
1279 | * | ||
1280 | * Copied from ARMv6 code, with the low level code inspired | ||
1281 | * by the ARMv7 Oprofile code. | ||
1282 | * | ||
1283 | * Cortex-A8 has up to 4 configurable performance counters and | ||
1284 | * a single cycle counter. | ||
1285 | * Cortex-A9 has up to 31 configurable performance counters and | ||
1286 | * a single cycle counter. | ||
1287 | * | ||
1288 | * All counters can be enabled/disabled and IRQ masked separately. The cycle | ||
1289 | * counter and all 4 performance counters together can be reset separately. | ||
1290 | */ | ||
1291 | |||
1292 | /* Common ARMv7 event types */ | ||
1293 | enum armv7_perf_types { | ||
1294 | ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, | ||
1295 | ARMV7_PERFCTR_IFETCH_MISS = 0x01, | ||
1296 | ARMV7_PERFCTR_ITLB_MISS = 0x02, | ||
1297 | ARMV7_PERFCTR_DCACHE_REFILL = 0x03, | ||
1298 | ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, | ||
1299 | ARMV7_PERFCTR_DTLB_REFILL = 0x05, | ||
1300 | ARMV7_PERFCTR_DREAD = 0x06, | ||
1301 | ARMV7_PERFCTR_DWRITE = 0x07, | ||
1302 | |||
1303 | ARMV7_PERFCTR_EXC_TAKEN = 0x09, | ||
1304 | ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, | ||
1305 | ARMV7_PERFCTR_CID_WRITE = 0x0B, | ||
1306 | /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. | ||
1307 | * It counts: | ||
1308 | * - all branch instructions, | ||
1309 | * - instructions that explicitly write the PC, | ||
1310 | * - exception generating instructions. | ||
1311 | */ | ||
1312 | ARMV7_PERFCTR_PC_WRITE = 0x0C, | ||
1313 | ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, | ||
1314 | ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, | ||
1315 | ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, | ||
1316 | ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, | ||
1317 | |||
1318 | ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, | ||
1319 | |||
1320 | ARMV7_PERFCTR_CPU_CYCLES = 0xFF | ||
1321 | }; | ||
1322 | |||
1323 | /* ARMv7 Cortex-A8 specific event types */ | ||
1324 | enum armv7_a8_perf_types { | ||
1325 | ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, | ||
1326 | |||
1327 | ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, | ||
1328 | |||
1329 | ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, | ||
1330 | ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, | ||
1331 | ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, | ||
1332 | ARMV7_PERFCTR_L2_ACCESS = 0x43, | ||
1333 | ARMV7_PERFCTR_L2_CACH_MISS = 0x44, | ||
1334 | ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, | ||
1335 | ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, | ||
1336 | ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, | ||
1337 | ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, | ||
1338 | ARMV7_PERFCTR_L1_DATA_MISS = 0x49, | ||
1339 | ARMV7_PERFCTR_L1_INST_MISS = 0x4A, | ||
1340 | ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, | ||
1341 | ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, | ||
1342 | ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, | ||
1343 | ARMV7_PERFCTR_L2_NEON = 0x4E, | ||
1344 | ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, | ||
1345 | ARMV7_PERFCTR_L1_INST = 0x50, | ||
1346 | ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, | ||
1347 | ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, | ||
1348 | ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, | ||
1349 | ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, | ||
1350 | ARMV7_PERFCTR_OP_EXECUTED = 0x55, | ||
1351 | ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, | ||
1352 | ARMV7_PERFCTR_CYCLES_INST = 0x57, | ||
1353 | ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, | ||
1354 | ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, | ||
1355 | ARMV7_PERFCTR_NEON_CYCLES = 0x5A, | ||
1356 | |||
1357 | ARMV7_PERFCTR_PMU0_EVENTS = 0x70, | ||
1358 | ARMV7_PERFCTR_PMU1_EVENTS = 0x71, | ||
1359 | ARMV7_PERFCTR_PMU_EVENTS = 0x72, | ||
1360 | }; | ||
1361 | |||
1362 | /* ARMv7 Cortex-A9 specific event types */ | ||
1363 | enum armv7_a9_perf_types { | ||
1364 | ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, | ||
1365 | ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, | ||
1366 | ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, | ||
1367 | |||
1368 | ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, | ||
1369 | ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, | ||
1370 | |||
1371 | ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, | ||
1372 | ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, | ||
1373 | ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, | ||
1374 | ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, | ||
1375 | ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, | ||
1376 | ARMV7_PERFCTR_DATA_EVICTION = 0x65, | ||
1377 | ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, | ||
1378 | ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, | ||
1379 | ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, | ||
1380 | |||
1381 | ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, | ||
1382 | |||
1383 | ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, | ||
1384 | ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, | ||
1385 | ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, | ||
1386 | ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, | ||
1387 | ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, | ||
1388 | |||
1389 | ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, | ||
1390 | ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, | ||
1391 | ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, | ||
1392 | ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, | ||
1393 | ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, | ||
1394 | ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, | ||
1395 | ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, | ||
1396 | |||
1397 | ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, | ||
1398 | ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, | ||
1399 | |||
1400 | ARMV7_PERFCTR_ISB_INST = 0x90, | ||
1401 | ARMV7_PERFCTR_DSB_INST = 0x91, | ||
1402 | ARMV7_PERFCTR_DMB_INST = 0x92, | ||
1403 | ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, | ||
1404 | |||
1405 | ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, | ||
1406 | ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, | ||
1407 | ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, | ||
1408 | ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, | ||
1409 | ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, | ||
1410 | ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 | ||
1411 | }; | ||
1412 | |||
1413 | /* | ||
1414 | * Cortex-A8 HW events mapping | ||
1415 | * | ||
1416 | * The hardware events that we support. We do support cache operations but | ||
1417 | * we have harvard caches and no way to combine instruction and data | ||
1418 | * accesses/misses in hardware. | ||
1419 | */ | ||
1420 | static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { | ||
1421 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, | ||
1422 | [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, | ||
1423 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
1424 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
1425 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, | ||
1426 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
1427 | [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, | ||
1428 | }; | ||
1429 | |||
1430 | static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
1431 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
1432 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
1433 | [C(L1D)] = { | ||
1434 | /* | ||
1435 | * The performance counters don't differentiate between read | ||
1436 | * and write accesses/misses so this isn't strictly correct, | ||
1437 | * but it's the best we can do. Writes and reads get | ||
1438 | * combined. | ||
1439 | */ | ||
1440 | [C(OP_READ)] = { | ||
1441 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, | ||
1442 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, | ||
1443 | }, | ||
1444 | [C(OP_WRITE)] = { | ||
1445 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, | ||
1446 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, | ||
1447 | }, | ||
1448 | [C(OP_PREFETCH)] = { | ||
1449 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1450 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1451 | }, | ||
1452 | }, | ||
1453 | [C(L1I)] = { | ||
1454 | [C(OP_READ)] = { | ||
1455 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, | ||
1456 | [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, | ||
1457 | }, | ||
1458 | [C(OP_WRITE)] = { | ||
1459 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, | ||
1460 | [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, | ||
1461 | }, | ||
1462 | [C(OP_PREFETCH)] = { | ||
1463 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1464 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1465 | }, | ||
1466 | }, | ||
1467 | [C(LL)] = { | ||
1468 | [C(OP_READ)] = { | ||
1469 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, | ||
1470 | [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, | ||
1471 | }, | ||
1472 | [C(OP_WRITE)] = { | ||
1473 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, | ||
1474 | [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, | ||
1475 | }, | ||
1476 | [C(OP_PREFETCH)] = { | ||
1477 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1478 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1479 | }, | ||
1480 | }, | ||
1481 | [C(DTLB)] = { | ||
1482 | /* | ||
1483 | * Only ITLB misses and DTLB refills are supported. | ||
1484 | * If users want the DTLB refills misses a raw counter | ||
1485 | * must be used. | ||
1486 | */ | ||
1487 | [C(OP_READ)] = { | ||
1488 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1489 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, | ||
1490 | }, | ||
1491 | [C(OP_WRITE)] = { | ||
1492 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1493 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, | ||
1494 | }, | ||
1495 | [C(OP_PREFETCH)] = { | ||
1496 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1497 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1498 | }, | ||
1499 | }, | ||
1500 | [C(ITLB)] = { | ||
1501 | [C(OP_READ)] = { | ||
1502 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1503 | [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, | ||
1504 | }, | ||
1505 | [C(OP_WRITE)] = { | ||
1506 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1507 | [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, | ||
1508 | }, | ||
1509 | [C(OP_PREFETCH)] = { | ||
1510 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1511 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1512 | }, | ||
1513 | }, | ||
1514 | [C(BPU)] = { | ||
1515 | [C(OP_READ)] = { | ||
1516 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, | ||
1517 | [C(RESULT_MISS)] | ||
1518 | = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
1519 | }, | ||
1520 | [C(OP_WRITE)] = { | ||
1521 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, | ||
1522 | [C(RESULT_MISS)] | ||
1523 | = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
1524 | }, | ||
1525 | [C(OP_PREFETCH)] = { | ||
1526 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1527 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1528 | }, | ||
1529 | }, | ||
1530 | }; | ||
1531 | |||
1532 | /* | ||
1533 | * Cortex-A9 HW events mapping | ||
1534 | */ | ||
1535 | static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { | ||
1536 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, | ||
1537 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
1538 | ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, | ||
1539 | [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, | ||
1540 | [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, | ||
1541 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, | ||
1542 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
1543 | [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, | ||
1544 | }; | ||
1545 | |||
1546 | static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
1547 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
1548 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
1549 | [C(L1D)] = { | ||
1550 | /* | ||
1551 | * The performance counters don't differentiate between read | ||
1552 | * and write accesses/misses so this isn't strictly correct, | ||
1553 | * but it's the best we can do. Writes and reads get | ||
1554 | * combined. | ||
1555 | */ | ||
1556 | [C(OP_READ)] = { | ||
1557 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, | ||
1558 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, | ||
1559 | }, | ||
1560 | [C(OP_WRITE)] = { | ||
1561 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, | ||
1562 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, | ||
1563 | }, | ||
1564 | [C(OP_PREFETCH)] = { | ||
1565 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1566 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1567 | }, | ||
1568 | }, | ||
1569 | [C(L1I)] = { | ||
1570 | [C(OP_READ)] = { | ||
1571 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1572 | [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, | ||
1573 | }, | ||
1574 | [C(OP_WRITE)] = { | ||
1575 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1576 | [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, | ||
1577 | }, | ||
1578 | [C(OP_PREFETCH)] = { | ||
1579 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1580 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1581 | }, | ||
1582 | }, | ||
1583 | [C(LL)] = { | ||
1584 | [C(OP_READ)] = { | ||
1585 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1586 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1587 | }, | ||
1588 | [C(OP_WRITE)] = { | ||
1589 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1590 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1591 | }, | ||
1592 | [C(OP_PREFETCH)] = { | ||
1593 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1594 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1595 | }, | ||
1596 | }, | ||
1597 | [C(DTLB)] = { | ||
1598 | /* | ||
1599 | * Only ITLB misses and DTLB refills are supported. | ||
1600 | * If users want the DTLB refills misses a raw counter | ||
1601 | * must be used. | ||
1602 | */ | ||
1603 | [C(OP_READ)] = { | ||
1604 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1605 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, | ||
1606 | }, | ||
1607 | [C(OP_WRITE)] = { | ||
1608 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1609 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, | ||
1610 | }, | ||
1611 | [C(OP_PREFETCH)] = { | ||
1612 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1613 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1614 | }, | ||
1615 | }, | ||
1616 | [C(ITLB)] = { | ||
1617 | [C(OP_READ)] = { | ||
1618 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1619 | [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, | ||
1620 | }, | ||
1621 | [C(OP_WRITE)] = { | ||
1622 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1623 | [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, | ||
1624 | }, | ||
1625 | [C(OP_PREFETCH)] = { | ||
1626 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1627 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1628 | }, | ||
1629 | }, | ||
1630 | [C(BPU)] = { | ||
1631 | [C(OP_READ)] = { | ||
1632 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, | ||
1633 | [C(RESULT_MISS)] | ||
1634 | = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
1635 | }, | ||
1636 | [C(OP_WRITE)] = { | ||
1637 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, | ||
1638 | [C(RESULT_MISS)] | ||
1639 | = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
1640 | }, | ||
1641 | [C(OP_PREFETCH)] = { | ||
1642 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
1643 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
1644 | }, | ||
1645 | }, | ||
1646 | }; | ||
1647 | |||
1648 | /* | ||
1649 | * Perf Events counters | ||
1650 | */ | ||
1651 | enum armv7_counters { | ||
1652 | ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ | ||
1653 | ARMV7_COUNTER0 = 2, /* First event counter */ | ||
1654 | }; | ||
1655 | |||
1656 | /* | ||
1657 | * The cycle counter is ARMV7_CYCLE_COUNTER. | ||
1658 | * The first event counter is ARMV7_COUNTER0. | ||
1659 | * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). | ||
1660 | */ | ||
1661 | #define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) | ||
1662 | |||
1663 | /* | ||
1664 | * ARMv7 low level PMNC access | ||
1665 | */ | ||
1666 | |||
1667 | /* | ||
1668 | * Per-CPU PMNC: config reg | ||
1669 | */ | ||
1670 | #define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ | ||
1671 | #define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ | ||
1672 | #define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ | ||
1673 | #define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ | ||
1674 | #define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ | ||
1675 | #define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ | ||
1676 | #define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ | ||
1677 | #define ARMV7_PMNC_N_MASK 0x1f | ||
1678 | #define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ | ||
1679 | |||
1680 | /* | ||
1681 | * Available counters | ||
1682 | */ | ||
1683 | #define ARMV7_CNT0 0 /* First event counter */ | ||
1684 | #define ARMV7_CCNT 31 /* Cycle counter */ | ||
1685 | |||
1686 | /* Perf Event to low level counters mapping */ | ||
1687 | #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) | ||
1688 | |||
1689 | /* | ||
1690 | * CNTENS: counters enable reg | ||
1691 | */ | ||
1692 | #define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
1693 | #define ARMV7_CNTENS_C (1 << ARMV7_CCNT) | ||
1694 | |||
1695 | /* | ||
1696 | * CNTENC: counters disable reg | ||
1697 | */ | ||
1698 | #define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
1699 | #define ARMV7_CNTENC_C (1 << ARMV7_CCNT) | ||
1700 | |||
1701 | /* | ||
1702 | * INTENS: counters overflow interrupt enable reg | ||
1703 | */ | ||
1704 | #define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
1705 | #define ARMV7_INTENS_C (1 << ARMV7_CCNT) | ||
1706 | |||
1707 | /* | ||
1708 | * INTENC: counters overflow interrupt disable reg | ||
1709 | */ | ||
1710 | #define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
1711 | #define ARMV7_INTENC_C (1 << ARMV7_CCNT) | ||
1712 | |||
1713 | /* | ||
1714 | * EVTSEL: Event selection reg | ||
1715 | */ | ||
1716 | #define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ | ||
1717 | |||
1718 | /* | ||
1719 | * SELECT: Counter selection reg | ||
1720 | */ | ||
1721 | #define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ | ||
1722 | |||
1723 | /* | ||
1724 | * FLAG: counters overflow flag status reg | ||
1725 | */ | ||
1726 | #define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
1727 | #define ARMV7_FLAG_C (1 << ARMV7_CCNT) | ||
1728 | #define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ | ||
1729 | #define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK | ||
1730 | |||
1731 | static inline unsigned long armv7_pmnc_read(void) | ||
1732 | { | ||
1733 | u32 val; | ||
1734 | asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); | ||
1735 | return val; | ||
1736 | } | ||
1737 | |||
1738 | static inline void armv7_pmnc_write(unsigned long val) | ||
1739 | { | ||
1740 | val &= ARMV7_PMNC_MASK; | ||
1741 | asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); | ||
1742 | } | ||
1743 | |||
1744 | static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) | ||
1745 | { | ||
1746 | return pmnc & ARMV7_OVERFLOWED_MASK; | ||
1747 | } | ||
1748 | |||
1749 | static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, | ||
1750 | enum armv7_counters counter) | ||
1751 | { | ||
1752 | int ret = 0; | ||
1753 | |||
1754 | if (counter == ARMV7_CYCLE_COUNTER) | ||
1755 | ret = pmnc & ARMV7_FLAG_C; | ||
1756 | else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) | ||
1757 | ret = pmnc & ARMV7_FLAG_P(counter); | ||
1758 | else | ||
1759 | pr_err("CPU%u checking wrong counter %d overflow status\n", | ||
1760 | smp_processor_id(), counter); | ||
1761 | |||
1762 | return ret; | ||
1763 | } | ||
1764 | |||
1765 | static inline int armv7_pmnc_select_counter(unsigned int idx) | ||
1766 | { | ||
1767 | u32 val; | ||
1768 | |||
1769 | if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { | ||
1770 | pr_err("CPU%u selecting wrong PMNC counter" | ||
1771 | " %d\n", smp_processor_id(), idx); | ||
1772 | return -1; | ||
1773 | } | ||
1774 | |||
1775 | val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; | ||
1776 | asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); | ||
1777 | |||
1778 | return idx; | ||
1779 | } | ||
1780 | |||
1781 | static inline u32 armv7pmu_read_counter(int idx) | ||
1782 | { | ||
1783 | unsigned long value = 0; | ||
1784 | |||
1785 | if (idx == ARMV7_CYCLE_COUNTER) | ||
1786 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); | ||
1787 | else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { | ||
1788 | if (armv7_pmnc_select_counter(idx) == idx) | ||
1789 | asm volatile("mrc p15, 0, %0, c9, c13, 2" | ||
1790 | : "=r" (value)); | ||
1791 | } else | ||
1792 | pr_err("CPU%u reading wrong counter %d\n", | ||
1793 | smp_processor_id(), idx); | ||
1794 | |||
1795 | return value; | ||
1796 | } | ||
1797 | |||
1798 | static inline void armv7pmu_write_counter(int idx, u32 value) | ||
1799 | { | ||
1800 | if (idx == ARMV7_CYCLE_COUNTER) | ||
1801 | asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); | ||
1802 | else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { | ||
1803 | if (armv7_pmnc_select_counter(idx) == idx) | ||
1804 | asm volatile("mcr p15, 0, %0, c9, c13, 2" | ||
1805 | : : "r" (value)); | ||
1806 | } else | ||
1807 | pr_err("CPU%u writing wrong counter %d\n", | ||
1808 | smp_processor_id(), idx); | ||
1809 | } | ||
1810 | |||
1811 | static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) | ||
1812 | { | ||
1813 | if (armv7_pmnc_select_counter(idx) == idx) { | ||
1814 | val &= ARMV7_EVTSEL_MASK; | ||
1815 | asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); | ||
1816 | } | ||
1817 | } | ||
1818 | |||
1819 | static inline u32 armv7_pmnc_enable_counter(unsigned int idx) | ||
1820 | { | ||
1821 | u32 val; | ||
1822 | |||
1823 | if ((idx != ARMV7_CYCLE_COUNTER) && | ||
1824 | ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { | ||
1825 | pr_err("CPU%u enabling wrong PMNC counter" | ||
1826 | " %d\n", smp_processor_id(), idx); | ||
1827 | return -1; | ||
1828 | } | ||
1829 | |||
1830 | if (idx == ARMV7_CYCLE_COUNTER) | ||
1831 | val = ARMV7_CNTENS_C; | ||
1832 | else | ||
1833 | val = ARMV7_CNTENS_P(idx); | ||
1834 | |||
1835 | asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); | ||
1836 | |||
1837 | return idx; | ||
1838 | } | ||
1839 | |||
1840 | static inline u32 armv7_pmnc_disable_counter(unsigned int idx) | ||
1841 | { | ||
1842 | u32 val; | ||
1843 | |||
1844 | |||
1845 | if ((idx != ARMV7_CYCLE_COUNTER) && | ||
1846 | ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { | ||
1847 | pr_err("CPU%u disabling wrong PMNC counter" | ||
1848 | " %d\n", smp_processor_id(), idx); | ||
1849 | return -1; | ||
1850 | } | ||
1851 | |||
1852 | if (idx == ARMV7_CYCLE_COUNTER) | ||
1853 | val = ARMV7_CNTENC_C; | ||
1854 | else | ||
1855 | val = ARMV7_CNTENC_P(idx); | ||
1856 | |||
1857 | asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); | ||
1858 | |||
1859 | return idx; | ||
1860 | } | ||
1861 | |||
1862 | static inline u32 armv7_pmnc_enable_intens(unsigned int idx) | ||
1863 | { | ||
1864 | u32 val; | ||
1865 | |||
1866 | if ((idx != ARMV7_CYCLE_COUNTER) && | ||
1867 | ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { | ||
1868 | pr_err("CPU%u enabling wrong PMNC counter" | ||
1869 | " interrupt enable %d\n", smp_processor_id(), idx); | ||
1870 | return -1; | ||
1871 | } | ||
1872 | |||
1873 | if (idx == ARMV7_CYCLE_COUNTER) | ||
1874 | val = ARMV7_INTENS_C; | ||
1875 | else | ||
1876 | val = ARMV7_INTENS_P(idx); | ||
1877 | |||
1878 | asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); | ||
1879 | |||
1880 | return idx; | ||
1881 | } | ||
1882 | |||
1883 | static inline u32 armv7_pmnc_disable_intens(unsigned int idx) | ||
1884 | { | ||
1885 | u32 val; | ||
1886 | |||
1887 | if ((idx != ARMV7_CYCLE_COUNTER) && | ||
1888 | ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { | ||
1889 | pr_err("CPU%u disabling wrong PMNC counter" | ||
1890 | " interrupt enable %d\n", smp_processor_id(), idx); | ||
1891 | return -1; | ||
1892 | } | ||
1893 | |||
1894 | if (idx == ARMV7_CYCLE_COUNTER) | ||
1895 | val = ARMV7_INTENC_C; | ||
1896 | else | ||
1897 | val = ARMV7_INTENC_P(idx); | ||
1898 | |||
1899 | asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); | ||
1900 | |||
1901 | return idx; | ||
1902 | } | ||
1903 | |||
1904 | static inline u32 armv7_pmnc_getreset_flags(void) | ||
1905 | { | ||
1906 | u32 val; | ||
1907 | |||
1908 | /* Read */ | ||
1909 | asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); | ||
1910 | |||
1911 | /* Write to clear flags */ | ||
1912 | val &= ARMV7_FLAG_MASK; | ||
1913 | asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); | ||
1914 | |||
1915 | return val; | ||
1916 | } | ||
1917 | |||
1918 | #ifdef DEBUG | ||
1919 | static void armv7_pmnc_dump_regs(void) | ||
1920 | { | ||
1921 | u32 val; | ||
1922 | unsigned int cnt; | ||
1923 | |||
1924 | printk(KERN_INFO "PMNC registers dump:\n"); | ||
1925 | |||
1926 | asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); | ||
1927 | printk(KERN_INFO "PMNC =0x%08x\n", val); | ||
1928 | |||
1929 | asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); | ||
1930 | printk(KERN_INFO "CNTENS=0x%08x\n", val); | ||
1931 | |||
1932 | asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); | ||
1933 | printk(KERN_INFO "INTENS=0x%08x\n", val); | ||
1934 | |||
1935 | asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); | ||
1936 | printk(KERN_INFO "FLAGS =0x%08x\n", val); | ||
1937 | |||
1938 | asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); | ||
1939 | printk(KERN_INFO "SELECT=0x%08x\n", val); | ||
1940 | |||
1941 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); | ||
1942 | printk(KERN_INFO "CCNT =0x%08x\n", val); | ||
1943 | |||
1944 | for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { | ||
1945 | armv7_pmnc_select_counter(cnt); | ||
1946 | asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); | ||
1947 | printk(KERN_INFO "CNT[%d] count =0x%08x\n", | ||
1948 | cnt-ARMV7_EVENT_CNT_TO_CNTx, val); | ||
1949 | asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); | ||
1950 | printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", | ||
1951 | cnt-ARMV7_EVENT_CNT_TO_CNTx, val); | ||
1952 | } | ||
1953 | } | ||
1954 | #endif | ||
1955 | |||
1956 | void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1957 | { | ||
1958 | unsigned long flags; | ||
1959 | |||
1960 | /* | ||
1961 | * Enable counter and interrupt, and set the counter to count | ||
1962 | * the event that we're interested in. | ||
1963 | */ | ||
1964 | spin_lock_irqsave(&pmu_lock, flags); | ||
1965 | |||
1966 | /* | ||
1967 | * Disable counter | ||
1968 | */ | ||
1969 | armv7_pmnc_disable_counter(idx); | ||
1970 | |||
1971 | /* | ||
1972 | * Set event (if destined for PMNx counters) | ||
1973 | * We don't need to set the event if it's a cycle count | ||
1974 | */ | ||
1975 | if (idx != ARMV7_CYCLE_COUNTER) | ||
1976 | armv7_pmnc_write_evtsel(idx, hwc->config_base); | ||
1977 | |||
1978 | /* | ||
1979 | * Enable interrupt for this counter | ||
1980 | */ | ||
1981 | armv7_pmnc_enable_intens(idx); | ||
1982 | |||
1983 | /* | ||
1984 | * Enable counter | ||
1985 | */ | ||
1986 | armv7_pmnc_enable_counter(idx); | ||
1987 | |||
1988 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
1989 | } | ||
1990 | |||
1991 | static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1992 | { | ||
1993 | unsigned long flags; | ||
1994 | |||
1995 | /* | ||
1996 | * Disable counter and interrupt | ||
1997 | */ | ||
1998 | spin_lock_irqsave(&pmu_lock, flags); | ||
1999 | |||
2000 | /* | ||
2001 | * Disable counter | ||
2002 | */ | ||
2003 | armv7_pmnc_disable_counter(idx); | ||
2004 | |||
2005 | /* | ||
2006 | * Disable interrupt for this counter | ||
2007 | */ | ||
2008 | armv7_pmnc_disable_intens(idx); | ||
2009 | |||
2010 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2011 | } | ||
2012 | |||
2013 | static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) | ||
2014 | { | ||
2015 | unsigned long pmnc; | ||
2016 | struct perf_sample_data data; | ||
2017 | struct cpu_hw_events *cpuc; | ||
2018 | struct pt_regs *regs; | ||
2019 | int idx; | ||
2020 | |||
2021 | /* | ||
2022 | * Get and reset the IRQ flags | ||
2023 | */ | ||
2024 | pmnc = armv7_pmnc_getreset_flags(); | ||
2025 | |||
2026 | /* | ||
2027 | * Did an overflow occur? | ||
2028 | */ | ||
2029 | if (!armv7_pmnc_has_overflowed(pmnc)) | ||
2030 | return IRQ_NONE; | ||
2031 | |||
2032 | /* | ||
2033 | * Handle the counter(s) overflow(s) | ||
2034 | */ | ||
2035 | regs = get_irq_regs(); | ||
2036 | |||
2037 | perf_sample_data_init(&data, 0); | ||
2038 | |||
2039 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
2040 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
2041 | struct perf_event *event = cpuc->events[idx]; | ||
2042 | struct hw_perf_event *hwc; | ||
2043 | |||
2044 | if (!test_bit(idx, cpuc->active_mask)) | ||
2045 | continue; | ||
2046 | |||
2047 | /* | ||
2048 | * We have a single interrupt for all counters. Check that | ||
2049 | * each counter has overflowed before we process it. | ||
2050 | */ | ||
2051 | if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) | ||
2052 | continue; | ||
2053 | |||
2054 | hwc = &event->hw; | ||
2055 | armpmu_event_update(event, hwc, idx); | ||
2056 | data.period = event->hw.last_period; | ||
2057 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
2058 | continue; | ||
2059 | |||
2060 | if (perf_event_overflow(event, 0, &data, regs)) | ||
2061 | armpmu->disable(hwc, idx); | ||
2062 | } | ||
2063 | |||
2064 | /* | ||
2065 | * Handle the pending perf events. | ||
2066 | * | ||
2067 | * Note: this call *must* be run with interrupts disabled. For | ||
2068 | * platforms that can have the PMU interrupts raised as an NMI, this | ||
2069 | * will not work. | ||
2070 | */ | ||
2071 | irq_work_run(); | ||
2072 | |||
2073 | return IRQ_HANDLED; | ||
2074 | } | ||
2075 | |||
2076 | static void armv7pmu_start(void) | ||
2077 | { | ||
2078 | unsigned long flags; | ||
2079 | |||
2080 | spin_lock_irqsave(&pmu_lock, flags); | ||
2081 | /* Enable all counters */ | ||
2082 | armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); | ||
2083 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2084 | } | ||
2085 | |||
2086 | static void armv7pmu_stop(void) | ||
2087 | { | ||
2088 | unsigned long flags; | ||
2089 | |||
2090 | spin_lock_irqsave(&pmu_lock, flags); | ||
2091 | /* Disable all counters */ | ||
2092 | armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); | ||
2093 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2094 | } | ||
2095 | |||
2096 | static inline int armv7_a8_pmu_event_map(int config) | ||
2097 | { | ||
2098 | int mapping = armv7_a8_perf_map[config]; | ||
2099 | if (HW_OP_UNSUPPORTED == mapping) | ||
2100 | mapping = -EOPNOTSUPP; | ||
2101 | return mapping; | ||
2102 | } | ||
2103 | |||
2104 | static inline int armv7_a9_pmu_event_map(int config) | ||
2105 | { | ||
2106 | int mapping = armv7_a9_perf_map[config]; | ||
2107 | if (HW_OP_UNSUPPORTED == mapping) | ||
2108 | mapping = -EOPNOTSUPP; | ||
2109 | return mapping; | ||
2110 | } | ||
2111 | |||
2112 | static u64 armv7pmu_raw_event(u64 config) | ||
2113 | { | ||
2114 | return config & 0xff; | ||
2115 | } | ||
2116 | |||
2117 | static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
2118 | struct hw_perf_event *event) | ||
2119 | { | ||
2120 | int idx; | ||
2121 | |||
2122 | /* Always place a cycle counter into the cycle counter. */ | ||
2123 | if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { | ||
2124 | if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) | ||
2125 | return -EAGAIN; | ||
2126 | |||
2127 | return ARMV7_CYCLE_COUNTER; | ||
2128 | } else { | ||
2129 | /* | ||
2130 | * For anything other than a cycle counter, try and use | ||
2131 | * the events counters | ||
2132 | */ | ||
2133 | for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { | ||
2134 | if (!test_and_set_bit(idx, cpuc->used_mask)) | ||
2135 | return idx; | ||
2136 | } | ||
2137 | |||
2138 | /* The counters are all in use. */ | ||
2139 | return -EAGAIN; | ||
2140 | } | ||
2141 | } | ||
2142 | |||
2143 | static struct arm_pmu armv7pmu = { | ||
2144 | .handle_irq = armv7pmu_handle_irq, | ||
2145 | .enable = armv7pmu_enable_event, | ||
2146 | .disable = armv7pmu_disable_event, | ||
2147 | .raw_event = armv7pmu_raw_event, | ||
2148 | .read_counter = armv7pmu_read_counter, | ||
2149 | .write_counter = armv7pmu_write_counter, | ||
2150 | .get_event_idx = armv7pmu_get_event_idx, | ||
2151 | .start = armv7pmu_start, | ||
2152 | .stop = armv7pmu_stop, | ||
2153 | .max_period = (1LLU << 32) - 1, | ||
2154 | }; | ||
2155 | |||
2156 | static u32 __init armv7_reset_read_pmnc(void) | ||
2157 | { | ||
2158 | u32 nb_cnt; | ||
2159 | |||
2160 | /* Initialize & Reset PMNC: C and P bits */ | ||
2161 | armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); | ||
2162 | |||
2163 | /* Read the nb of CNTx counters supported from PMNC */ | ||
2164 | nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; | ||
2165 | |||
2166 | /* Add the CPU cycles counter and return */ | ||
2167 | return nb_cnt + 1; | ||
2168 | } | ||
2169 | |||
2170 | /* | ||
2171 | * ARMv5 [xscale] Performance counter handling code. | ||
2172 | * | ||
2173 | * Based on xscale OProfile code. | ||
2174 | * | ||
2175 | * There are two variants of the xscale PMU that we support: | ||
2176 | * - xscale1pmu: 2 event counters and a cycle counter | ||
2177 | * - xscale2pmu: 4 event counters and a cycle counter | ||
2178 | * The two variants share event definitions, but have different | ||
2179 | * PMU structures. | ||
2180 | */ | ||
2181 | |||
2182 | enum xscale_perf_types { | ||
2183 | XSCALE_PERFCTR_ICACHE_MISS = 0x00, | ||
2184 | XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, | ||
2185 | XSCALE_PERFCTR_DATA_STALL = 0x02, | ||
2186 | XSCALE_PERFCTR_ITLB_MISS = 0x03, | ||
2187 | XSCALE_PERFCTR_DTLB_MISS = 0x04, | ||
2188 | XSCALE_PERFCTR_BRANCH = 0x05, | ||
2189 | XSCALE_PERFCTR_BRANCH_MISS = 0x06, | ||
2190 | XSCALE_PERFCTR_INSTRUCTION = 0x07, | ||
2191 | XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, | ||
2192 | XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, | ||
2193 | XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, | ||
2194 | XSCALE_PERFCTR_DCACHE_MISS = 0x0B, | ||
2195 | XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, | ||
2196 | XSCALE_PERFCTR_PC_CHANGED = 0x0D, | ||
2197 | XSCALE_PERFCTR_BCU_REQUEST = 0x10, | ||
2198 | XSCALE_PERFCTR_BCU_FULL = 0x11, | ||
2199 | XSCALE_PERFCTR_BCU_DRAIN = 0x12, | ||
2200 | XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, | ||
2201 | XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, | ||
2202 | XSCALE_PERFCTR_RMW = 0x16, | ||
2203 | /* XSCALE_PERFCTR_CCNT is not hardware defined */ | ||
2204 | XSCALE_PERFCTR_CCNT = 0xFE, | ||
2205 | XSCALE_PERFCTR_UNUSED = 0xFF, | ||
2206 | }; | ||
2207 | |||
2208 | enum xscale_counters { | ||
2209 | XSCALE_CYCLE_COUNTER = 1, | ||
2210 | XSCALE_COUNTER0, | ||
2211 | XSCALE_COUNTER1, | ||
2212 | XSCALE_COUNTER2, | ||
2213 | XSCALE_COUNTER3, | ||
2214 | }; | ||
2215 | |||
2216 | static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { | ||
2217 | [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, | ||
2218 | [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, | ||
2219 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
2220 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
2221 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, | ||
2222 | [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, | ||
2223 | [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, | ||
2224 | }; | ||
2225 | |||
2226 | static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
2227 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
2228 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
2229 | [C(L1D)] = { | ||
2230 | [C(OP_READ)] = { | ||
2231 | [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, | ||
2232 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, | ||
2233 | }, | ||
2234 | [C(OP_WRITE)] = { | ||
2235 | [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, | ||
2236 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, | ||
2237 | }, | ||
2238 | [C(OP_PREFETCH)] = { | ||
2239 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2240 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2241 | }, | ||
2242 | }, | ||
2243 | [C(L1I)] = { | ||
2244 | [C(OP_READ)] = { | ||
2245 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2246 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, | ||
2247 | }, | ||
2248 | [C(OP_WRITE)] = { | ||
2249 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2250 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, | ||
2251 | }, | ||
2252 | [C(OP_PREFETCH)] = { | ||
2253 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2254 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2255 | }, | ||
2256 | }, | ||
2257 | [C(LL)] = { | ||
2258 | [C(OP_READ)] = { | ||
2259 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2260 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2261 | }, | ||
2262 | [C(OP_WRITE)] = { | ||
2263 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2264 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2265 | }, | ||
2266 | [C(OP_PREFETCH)] = { | ||
2267 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2268 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2269 | }, | ||
2270 | }, | ||
2271 | [C(DTLB)] = { | ||
2272 | [C(OP_READ)] = { | ||
2273 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2274 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, | ||
2275 | }, | ||
2276 | [C(OP_WRITE)] = { | ||
2277 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2278 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, | ||
2279 | }, | ||
2280 | [C(OP_PREFETCH)] = { | ||
2281 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2282 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2283 | }, | ||
2284 | }, | ||
2285 | [C(ITLB)] = { | ||
2286 | [C(OP_READ)] = { | ||
2287 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2288 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, | ||
2289 | }, | ||
2290 | [C(OP_WRITE)] = { | ||
2291 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2292 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, | ||
2293 | }, | ||
2294 | [C(OP_PREFETCH)] = { | ||
2295 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2296 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2297 | }, | ||
2298 | }, | ||
2299 | [C(BPU)] = { | ||
2300 | [C(OP_READ)] = { | ||
2301 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2302 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2303 | }, | ||
2304 | [C(OP_WRITE)] = { | ||
2305 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2306 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2307 | }, | ||
2308 | [C(OP_PREFETCH)] = { | ||
2309 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
2310 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
2311 | }, | ||
2312 | }, | ||
2313 | }; | ||
2314 | |||
2315 | #define XSCALE_PMU_ENABLE 0x001 | ||
2316 | #define XSCALE_PMN_RESET 0x002 | ||
2317 | #define XSCALE_CCNT_RESET 0x004 | ||
2318 | #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) | ||
2319 | #define XSCALE_PMU_CNT64 0x008 | ||
2320 | |||
2321 | static inline int | ||
2322 | xscalepmu_event_map(int config) | ||
2323 | { | ||
2324 | int mapping = xscale_perf_map[config]; | ||
2325 | if (HW_OP_UNSUPPORTED == mapping) | ||
2326 | mapping = -EOPNOTSUPP; | ||
2327 | return mapping; | ||
2328 | } | ||
2329 | |||
2330 | static u64 | ||
2331 | xscalepmu_raw_event(u64 config) | ||
2332 | { | ||
2333 | return config & 0xff; | ||
2334 | } | ||
2335 | |||
2336 | #define XSCALE1_OVERFLOWED_MASK 0x700 | ||
2337 | #define XSCALE1_CCOUNT_OVERFLOW 0x400 | ||
2338 | #define XSCALE1_COUNT0_OVERFLOW 0x100 | ||
2339 | #define XSCALE1_COUNT1_OVERFLOW 0x200 | ||
2340 | #define XSCALE1_CCOUNT_INT_EN 0x040 | ||
2341 | #define XSCALE1_COUNT0_INT_EN 0x010 | ||
2342 | #define XSCALE1_COUNT1_INT_EN 0x020 | ||
2343 | #define XSCALE1_COUNT0_EVT_SHFT 12 | ||
2344 | #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) | ||
2345 | #define XSCALE1_COUNT1_EVT_SHFT 20 | ||
2346 | #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) | ||
2347 | |||
2348 | static inline u32 | ||
2349 | xscale1pmu_read_pmnc(void) | ||
2350 | { | ||
2351 | u32 val; | ||
2352 | asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); | ||
2353 | return val; | ||
2354 | } | ||
2355 | |||
2356 | static inline void | ||
2357 | xscale1pmu_write_pmnc(u32 val) | ||
2358 | { | ||
2359 | /* upper 4bits and 7, 11 are write-as-0 */ | ||
2360 | val &= 0xffff77f; | ||
2361 | asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); | ||
2362 | } | ||
2363 | |||
2364 | static inline int | ||
2365 | xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, | ||
2366 | enum xscale_counters counter) | ||
2367 | { | ||
2368 | int ret = 0; | ||
2369 | |||
2370 | switch (counter) { | ||
2371 | case XSCALE_CYCLE_COUNTER: | ||
2372 | ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; | ||
2373 | break; | ||
2374 | case XSCALE_COUNTER0: | ||
2375 | ret = pmnc & XSCALE1_COUNT0_OVERFLOW; | ||
2376 | break; | ||
2377 | case XSCALE_COUNTER1: | ||
2378 | ret = pmnc & XSCALE1_COUNT1_OVERFLOW; | ||
2379 | break; | ||
2380 | default: | ||
2381 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
2382 | } | ||
2383 | |||
2384 | return ret; | ||
2385 | } | ||
2386 | |||
2387 | static irqreturn_t | ||
2388 | xscale1pmu_handle_irq(int irq_num, void *dev) | ||
2389 | { | ||
2390 | unsigned long pmnc; | ||
2391 | struct perf_sample_data data; | ||
2392 | struct cpu_hw_events *cpuc; | ||
2393 | struct pt_regs *regs; | ||
2394 | int idx; | ||
2395 | |||
2396 | /* | ||
2397 | * NOTE: there's an A stepping erratum that states if an overflow | ||
2398 | * bit already exists and another occurs, the previous | ||
2399 | * Overflow bit gets cleared. There's no workaround. | ||
2400 | * Fixed in B stepping or later. | ||
2401 | */ | ||
2402 | pmnc = xscale1pmu_read_pmnc(); | ||
2403 | |||
2404 | /* | ||
2405 | * Write the value back to clear the overflow flags. Overflow | ||
2406 | * flags remain in pmnc for use below. We also disable the PMU | ||
2407 | * while we process the interrupt. | ||
2408 | */ | ||
2409 | xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); | ||
2410 | |||
2411 | if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) | ||
2412 | return IRQ_NONE; | ||
2413 | |||
2414 | regs = get_irq_regs(); | ||
2415 | |||
2416 | perf_sample_data_init(&data, 0); | ||
2417 | |||
2418 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
2419 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
2420 | struct perf_event *event = cpuc->events[idx]; | ||
2421 | struct hw_perf_event *hwc; | ||
2422 | |||
2423 | if (!test_bit(idx, cpuc->active_mask)) | ||
2424 | continue; | ||
2425 | |||
2426 | if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) | ||
2427 | continue; | ||
2428 | |||
2429 | hwc = &event->hw; | ||
2430 | armpmu_event_update(event, hwc, idx); | ||
2431 | data.period = event->hw.last_period; | ||
2432 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
2433 | continue; | ||
2434 | |||
2435 | if (perf_event_overflow(event, 0, &data, regs)) | ||
2436 | armpmu->disable(hwc, idx); | ||
2437 | } | ||
2438 | |||
2439 | irq_work_run(); | ||
2440 | |||
2441 | /* | ||
2442 | * Re-enable the PMU. | ||
2443 | */ | ||
2444 | pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; | ||
2445 | xscale1pmu_write_pmnc(pmnc); | ||
2446 | |||
2447 | return IRQ_HANDLED; | ||
2448 | } | ||
2449 | |||
2450 | static void | ||
2451 | xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
2452 | { | ||
2453 | unsigned long val, mask, evt, flags; | ||
2454 | |||
2455 | switch (idx) { | ||
2456 | case XSCALE_CYCLE_COUNTER: | ||
2457 | mask = 0; | ||
2458 | evt = XSCALE1_CCOUNT_INT_EN; | ||
2459 | break; | ||
2460 | case XSCALE_COUNTER0: | ||
2461 | mask = XSCALE1_COUNT0_EVT_MASK; | ||
2462 | evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | | ||
2463 | XSCALE1_COUNT0_INT_EN; | ||
2464 | break; | ||
2465 | case XSCALE_COUNTER1: | ||
2466 | mask = XSCALE1_COUNT1_EVT_MASK; | ||
2467 | evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | | ||
2468 | XSCALE1_COUNT1_INT_EN; | ||
2469 | break; | ||
2470 | default: | ||
2471 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
2472 | return; | ||
2473 | } | ||
2474 | |||
2475 | spin_lock_irqsave(&pmu_lock, flags); | ||
2476 | val = xscale1pmu_read_pmnc(); | ||
2477 | val &= ~mask; | ||
2478 | val |= evt; | ||
2479 | xscale1pmu_write_pmnc(val); | ||
2480 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2481 | } | ||
2482 | |||
2483 | static void | ||
2484 | xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
2485 | { | ||
2486 | unsigned long val, mask, evt, flags; | ||
2487 | |||
2488 | switch (idx) { | ||
2489 | case XSCALE_CYCLE_COUNTER: | ||
2490 | mask = XSCALE1_CCOUNT_INT_EN; | ||
2491 | evt = 0; | ||
2492 | break; | ||
2493 | case XSCALE_COUNTER0: | ||
2494 | mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; | ||
2495 | evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; | ||
2496 | break; | ||
2497 | case XSCALE_COUNTER1: | ||
2498 | mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; | ||
2499 | evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; | ||
2500 | break; | ||
2501 | default: | ||
2502 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
2503 | return; | ||
2504 | } | ||
2505 | |||
2506 | spin_lock_irqsave(&pmu_lock, flags); | ||
2507 | val = xscale1pmu_read_pmnc(); | ||
2508 | val &= ~mask; | ||
2509 | val |= evt; | ||
2510 | xscale1pmu_write_pmnc(val); | ||
2511 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2512 | } | ||
2513 | |||
2514 | static int | ||
2515 | xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
2516 | struct hw_perf_event *event) | ||
2517 | { | ||
2518 | if (XSCALE_PERFCTR_CCNT == event->config_base) { | ||
2519 | if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) | ||
2520 | return -EAGAIN; | ||
2521 | |||
2522 | return XSCALE_CYCLE_COUNTER; | ||
2523 | } else { | ||
2524 | if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { | ||
2525 | return XSCALE_COUNTER1; | ||
2526 | } | ||
2527 | |||
2528 | if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { | ||
2529 | return XSCALE_COUNTER0; | ||
2530 | } | ||
2531 | |||
2532 | return -EAGAIN; | ||
2533 | } | ||
2534 | } | ||
2535 | |||
2536 | static void | ||
2537 | xscale1pmu_start(void) | ||
2538 | { | ||
2539 | unsigned long flags, val; | ||
2540 | |||
2541 | spin_lock_irqsave(&pmu_lock, flags); | ||
2542 | val = xscale1pmu_read_pmnc(); | ||
2543 | val |= XSCALE_PMU_ENABLE; | ||
2544 | xscale1pmu_write_pmnc(val); | ||
2545 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2546 | } | ||
2547 | |||
2548 | static void | ||
2549 | xscale1pmu_stop(void) | ||
2550 | { | ||
2551 | unsigned long flags, val; | ||
2552 | |||
2553 | spin_lock_irqsave(&pmu_lock, flags); | ||
2554 | val = xscale1pmu_read_pmnc(); | ||
2555 | val &= ~XSCALE_PMU_ENABLE; | ||
2556 | xscale1pmu_write_pmnc(val); | ||
2557 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2558 | } | ||
2559 | |||
2560 | static inline u32 | ||
2561 | xscale1pmu_read_counter(int counter) | ||
2562 | { | ||
2563 | u32 val = 0; | ||
2564 | |||
2565 | switch (counter) { | ||
2566 | case XSCALE_CYCLE_COUNTER: | ||
2567 | asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); | ||
2568 | break; | ||
2569 | case XSCALE_COUNTER0: | ||
2570 | asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); | ||
2571 | break; | ||
2572 | case XSCALE_COUNTER1: | ||
2573 | asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); | ||
2574 | break; | ||
2575 | } | ||
2576 | |||
2577 | return val; | ||
2578 | } | ||
2579 | |||
2580 | static inline void | ||
2581 | xscale1pmu_write_counter(int counter, u32 val) | ||
2582 | { | ||
2583 | switch (counter) { | ||
2584 | case XSCALE_CYCLE_COUNTER: | ||
2585 | asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); | ||
2586 | break; | ||
2587 | case XSCALE_COUNTER0: | ||
2588 | asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); | ||
2589 | break; | ||
2590 | case XSCALE_COUNTER1: | ||
2591 | asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); | ||
2592 | break; | ||
2593 | } | ||
2594 | } | ||
2595 | |||
2596 | static const struct arm_pmu xscale1pmu = { | ||
2597 | .id = ARM_PERF_PMU_ID_XSCALE1, | ||
2598 | .handle_irq = xscale1pmu_handle_irq, | ||
2599 | .enable = xscale1pmu_enable_event, | ||
2600 | .disable = xscale1pmu_disable_event, | ||
2601 | .event_map = xscalepmu_event_map, | ||
2602 | .raw_event = xscalepmu_raw_event, | ||
2603 | .read_counter = xscale1pmu_read_counter, | ||
2604 | .write_counter = xscale1pmu_write_counter, | ||
2605 | .get_event_idx = xscale1pmu_get_event_idx, | ||
2606 | .start = xscale1pmu_start, | ||
2607 | .stop = xscale1pmu_stop, | ||
2608 | .num_events = 3, | ||
2609 | .max_period = (1LLU << 32) - 1, | ||
2610 | }; | ||
2611 | |||
2612 | #define XSCALE2_OVERFLOWED_MASK 0x01f | ||
2613 | #define XSCALE2_CCOUNT_OVERFLOW 0x001 | ||
2614 | #define XSCALE2_COUNT0_OVERFLOW 0x002 | ||
2615 | #define XSCALE2_COUNT1_OVERFLOW 0x004 | ||
2616 | #define XSCALE2_COUNT2_OVERFLOW 0x008 | ||
2617 | #define XSCALE2_COUNT3_OVERFLOW 0x010 | ||
2618 | #define XSCALE2_CCOUNT_INT_EN 0x001 | ||
2619 | #define XSCALE2_COUNT0_INT_EN 0x002 | ||
2620 | #define XSCALE2_COUNT1_INT_EN 0x004 | ||
2621 | #define XSCALE2_COUNT2_INT_EN 0x008 | ||
2622 | #define XSCALE2_COUNT3_INT_EN 0x010 | ||
2623 | #define XSCALE2_COUNT0_EVT_SHFT 0 | ||
2624 | #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) | ||
2625 | #define XSCALE2_COUNT1_EVT_SHFT 8 | ||
2626 | #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) | ||
2627 | #define XSCALE2_COUNT2_EVT_SHFT 16 | ||
2628 | #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) | ||
2629 | #define XSCALE2_COUNT3_EVT_SHFT 24 | ||
2630 | #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) | ||
2631 | |||
2632 | static inline u32 | ||
2633 | xscale2pmu_read_pmnc(void) | ||
2634 | { | ||
2635 | u32 val; | ||
2636 | asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); | ||
2637 | /* bits 1-2 and 4-23 are read-unpredictable */ | ||
2638 | return val & 0xff000009; | ||
2639 | } | ||
2640 | |||
2641 | static inline void | ||
2642 | xscale2pmu_write_pmnc(u32 val) | ||
2643 | { | ||
2644 | /* bits 4-23 are write-as-0, 24-31 are write ignored */ | ||
2645 | val &= 0xf; | ||
2646 | asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); | ||
2647 | } | ||
2648 | |||
2649 | static inline u32 | ||
2650 | xscale2pmu_read_overflow_flags(void) | ||
2651 | { | ||
2652 | u32 val; | ||
2653 | asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); | ||
2654 | return val; | ||
2655 | } | ||
2656 | |||
2657 | static inline void | ||
2658 | xscale2pmu_write_overflow_flags(u32 val) | ||
2659 | { | ||
2660 | asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); | ||
2661 | } | ||
2662 | |||
2663 | static inline u32 | ||
2664 | xscale2pmu_read_event_select(void) | ||
2665 | { | ||
2666 | u32 val; | ||
2667 | asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); | ||
2668 | return val; | ||
2669 | } | ||
2670 | |||
2671 | static inline void | ||
2672 | xscale2pmu_write_event_select(u32 val) | ||
2673 | { | ||
2674 | asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); | ||
2675 | } | ||
2676 | |||
2677 | static inline u32 | ||
2678 | xscale2pmu_read_int_enable(void) | ||
2679 | { | ||
2680 | u32 val; | ||
2681 | asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); | ||
2682 | return val; | ||
2683 | } | ||
2684 | |||
2685 | static void | ||
2686 | xscale2pmu_write_int_enable(u32 val) | ||
2687 | { | ||
2688 | asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); | ||
2689 | } | ||
2690 | |||
2691 | static inline int | ||
2692 | xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, | ||
2693 | enum xscale_counters counter) | ||
2694 | { | ||
2695 | int ret = 0; | ||
2696 | |||
2697 | switch (counter) { | ||
2698 | case XSCALE_CYCLE_COUNTER: | ||
2699 | ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; | ||
2700 | break; | ||
2701 | case XSCALE_COUNTER0: | ||
2702 | ret = of_flags & XSCALE2_COUNT0_OVERFLOW; | ||
2703 | break; | ||
2704 | case XSCALE_COUNTER1: | ||
2705 | ret = of_flags & XSCALE2_COUNT1_OVERFLOW; | ||
2706 | break; | ||
2707 | case XSCALE_COUNTER2: | ||
2708 | ret = of_flags & XSCALE2_COUNT2_OVERFLOW; | ||
2709 | break; | ||
2710 | case XSCALE_COUNTER3: | ||
2711 | ret = of_flags & XSCALE2_COUNT3_OVERFLOW; | ||
2712 | break; | ||
2713 | default: | ||
2714 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
2715 | } | ||
2716 | |||
2717 | return ret; | ||
2718 | } | ||
2719 | |||
2720 | static irqreturn_t | ||
2721 | xscale2pmu_handle_irq(int irq_num, void *dev) | ||
2722 | { | ||
2723 | unsigned long pmnc, of_flags; | ||
2724 | struct perf_sample_data data; | ||
2725 | struct cpu_hw_events *cpuc; | ||
2726 | struct pt_regs *regs; | ||
2727 | int idx; | ||
2728 | |||
2729 | /* Disable the PMU. */ | ||
2730 | pmnc = xscale2pmu_read_pmnc(); | ||
2731 | xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); | ||
2732 | |||
2733 | /* Check the overflow flag register. */ | ||
2734 | of_flags = xscale2pmu_read_overflow_flags(); | ||
2735 | if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) | ||
2736 | return IRQ_NONE; | ||
2737 | |||
2738 | /* Clear the overflow bits. */ | ||
2739 | xscale2pmu_write_overflow_flags(of_flags); | ||
2740 | |||
2741 | regs = get_irq_regs(); | ||
2742 | |||
2743 | perf_sample_data_init(&data, 0); | ||
2744 | |||
2745 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
2746 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
2747 | struct perf_event *event = cpuc->events[idx]; | ||
2748 | struct hw_perf_event *hwc; | ||
2749 | |||
2750 | if (!test_bit(idx, cpuc->active_mask)) | ||
2751 | continue; | ||
2752 | |||
2753 | if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) | ||
2754 | continue; | ||
2755 | |||
2756 | hwc = &event->hw; | ||
2757 | armpmu_event_update(event, hwc, idx); | ||
2758 | data.period = event->hw.last_period; | ||
2759 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
2760 | continue; | ||
2761 | |||
2762 | if (perf_event_overflow(event, 0, &data, regs)) | ||
2763 | armpmu->disable(hwc, idx); | ||
2764 | } | ||
2765 | |||
2766 | irq_work_run(); | ||
2767 | |||
2768 | /* | ||
2769 | * Re-enable the PMU. | ||
2770 | */ | ||
2771 | pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; | ||
2772 | xscale2pmu_write_pmnc(pmnc); | ||
2773 | |||
2774 | return IRQ_HANDLED; | ||
2775 | } | ||
2776 | |||
2777 | static void | ||
2778 | xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
2779 | { | ||
2780 | unsigned long flags, ien, evtsel; | ||
2781 | |||
2782 | ien = xscale2pmu_read_int_enable(); | ||
2783 | evtsel = xscale2pmu_read_event_select(); | ||
2784 | |||
2785 | switch (idx) { | ||
2786 | case XSCALE_CYCLE_COUNTER: | ||
2787 | ien |= XSCALE2_CCOUNT_INT_EN; | ||
2788 | break; | ||
2789 | case XSCALE_COUNTER0: | ||
2790 | ien |= XSCALE2_COUNT0_INT_EN; | ||
2791 | evtsel &= ~XSCALE2_COUNT0_EVT_MASK; | ||
2792 | evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; | ||
2793 | break; | ||
2794 | case XSCALE_COUNTER1: | ||
2795 | ien |= XSCALE2_COUNT1_INT_EN; | ||
2796 | evtsel &= ~XSCALE2_COUNT1_EVT_MASK; | ||
2797 | evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; | ||
2798 | break; | ||
2799 | case XSCALE_COUNTER2: | ||
2800 | ien |= XSCALE2_COUNT2_INT_EN; | ||
2801 | evtsel &= ~XSCALE2_COUNT2_EVT_MASK; | ||
2802 | evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; | ||
2803 | break; | ||
2804 | case XSCALE_COUNTER3: | ||
2805 | ien |= XSCALE2_COUNT3_INT_EN; | ||
2806 | evtsel &= ~XSCALE2_COUNT3_EVT_MASK; | ||
2807 | evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; | ||
2808 | break; | ||
2809 | default: | ||
2810 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
2811 | return; | ||
2812 | } | ||
2813 | |||
2814 | spin_lock_irqsave(&pmu_lock, flags); | ||
2815 | xscale2pmu_write_event_select(evtsel); | ||
2816 | xscale2pmu_write_int_enable(ien); | ||
2817 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2818 | } | ||
2819 | |||
2820 | static void | ||
2821 | xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
2822 | { | ||
2823 | unsigned long flags, ien, evtsel; | ||
2824 | |||
2825 | ien = xscale2pmu_read_int_enable(); | ||
2826 | evtsel = xscale2pmu_read_event_select(); | ||
2827 | |||
2828 | switch (idx) { | ||
2829 | case XSCALE_CYCLE_COUNTER: | ||
2830 | ien &= ~XSCALE2_CCOUNT_INT_EN; | ||
2831 | break; | ||
2832 | case XSCALE_COUNTER0: | ||
2833 | ien &= ~XSCALE2_COUNT0_INT_EN; | ||
2834 | evtsel &= ~XSCALE2_COUNT0_EVT_MASK; | ||
2835 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; | ||
2836 | break; | ||
2837 | case XSCALE_COUNTER1: | ||
2838 | ien &= ~XSCALE2_COUNT1_INT_EN; | ||
2839 | evtsel &= ~XSCALE2_COUNT1_EVT_MASK; | ||
2840 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; | ||
2841 | break; | ||
2842 | case XSCALE_COUNTER2: | ||
2843 | ien &= ~XSCALE2_COUNT2_INT_EN; | ||
2844 | evtsel &= ~XSCALE2_COUNT2_EVT_MASK; | ||
2845 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; | ||
2846 | break; | ||
2847 | case XSCALE_COUNTER3: | ||
2848 | ien &= ~XSCALE2_COUNT3_INT_EN; | ||
2849 | evtsel &= ~XSCALE2_COUNT3_EVT_MASK; | ||
2850 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; | ||
2851 | break; | ||
2852 | default: | ||
2853 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
2854 | return; | ||
2855 | } | ||
2856 | |||
2857 | spin_lock_irqsave(&pmu_lock, flags); | ||
2858 | xscale2pmu_write_event_select(evtsel); | ||
2859 | xscale2pmu_write_int_enable(ien); | ||
2860 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2861 | } | ||
2862 | |||
2863 | static int | ||
2864 | xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
2865 | struct hw_perf_event *event) | ||
2866 | { | ||
2867 | int idx = xscale1pmu_get_event_idx(cpuc, event); | ||
2868 | if (idx >= 0) | ||
2869 | goto out; | ||
2870 | |||
2871 | if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) | ||
2872 | idx = XSCALE_COUNTER3; | ||
2873 | else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) | ||
2874 | idx = XSCALE_COUNTER2; | ||
2875 | out: | ||
2876 | return idx; | ||
2877 | } | ||
2878 | |||
2879 | static void | ||
2880 | xscale2pmu_start(void) | ||
2881 | { | ||
2882 | unsigned long flags, val; | ||
2883 | |||
2884 | spin_lock_irqsave(&pmu_lock, flags); | ||
2885 | val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; | ||
2886 | val |= XSCALE_PMU_ENABLE; | ||
2887 | xscale2pmu_write_pmnc(val); | ||
2888 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2889 | } | ||
2890 | |||
2891 | static void | ||
2892 | xscale2pmu_stop(void) | ||
2893 | { | ||
2894 | unsigned long flags, val; | ||
2895 | |||
2896 | spin_lock_irqsave(&pmu_lock, flags); | ||
2897 | val = xscale2pmu_read_pmnc(); | ||
2898 | val &= ~XSCALE_PMU_ENABLE; | ||
2899 | xscale2pmu_write_pmnc(val); | ||
2900 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
2901 | } | ||
2902 | |||
2903 | static inline u32 | ||
2904 | xscale2pmu_read_counter(int counter) | ||
2905 | { | ||
2906 | u32 val = 0; | ||
2907 | |||
2908 | switch (counter) { | ||
2909 | case XSCALE_CYCLE_COUNTER: | ||
2910 | asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); | ||
2911 | break; | ||
2912 | case XSCALE_COUNTER0: | ||
2913 | asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); | ||
2914 | break; | ||
2915 | case XSCALE_COUNTER1: | ||
2916 | asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); | ||
2917 | break; | ||
2918 | case XSCALE_COUNTER2: | ||
2919 | asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); | ||
2920 | break; | ||
2921 | case XSCALE_COUNTER3: | ||
2922 | asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); | ||
2923 | break; | ||
2924 | } | ||
2925 | |||
2926 | return val; | ||
2927 | } | ||
2928 | |||
2929 | static inline void | ||
2930 | xscale2pmu_write_counter(int counter, u32 val) | ||
2931 | { | ||
2932 | switch (counter) { | ||
2933 | case XSCALE_CYCLE_COUNTER: | ||
2934 | asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); | ||
2935 | break; | ||
2936 | case XSCALE_COUNTER0: | ||
2937 | asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); | ||
2938 | break; | ||
2939 | case XSCALE_COUNTER1: | ||
2940 | asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); | ||
2941 | break; | ||
2942 | case XSCALE_COUNTER2: | ||
2943 | asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); | ||
2944 | break; | ||
2945 | case XSCALE_COUNTER3: | ||
2946 | asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); | ||
2947 | break; | ||
2948 | } | ||
2949 | } | ||
2950 | |||
2951 | static const struct arm_pmu xscale2pmu = { | ||
2952 | .id = ARM_PERF_PMU_ID_XSCALE2, | ||
2953 | .handle_irq = xscale2pmu_handle_irq, | ||
2954 | .enable = xscale2pmu_enable_event, | ||
2955 | .disable = xscale2pmu_disable_event, | ||
2956 | .event_map = xscalepmu_event_map, | ||
2957 | .raw_event = xscalepmu_raw_event, | ||
2958 | .read_counter = xscale2pmu_read_counter, | ||
2959 | .write_counter = xscale2pmu_write_counter, | ||
2960 | .get_event_idx = xscale2pmu_get_event_idx, | ||
2961 | .start = xscale2pmu_start, | ||
2962 | .stop = xscale2pmu_stop, | ||
2963 | .num_events = 5, | ||
2964 | .max_period = (1LLU << 32) - 1, | ||
2965 | }; | ||
2966 | 611 | ||
2967 | static int __init | 612 | static int __init |
2968 | init_hw_perf_events(void) | 613 | init_hw_perf_events(void) |
@@ -2977,37 +622,16 @@ init_hw_perf_events(void) | |||
2977 | case 0xB360: /* ARM1136 */ | 622 | case 0xB360: /* ARM1136 */ |
2978 | case 0xB560: /* ARM1156 */ | 623 | case 0xB560: /* ARM1156 */ |
2979 | case 0xB760: /* ARM1176 */ | 624 | case 0xB760: /* ARM1176 */ |
2980 | armpmu = &armv6pmu; | 625 | armpmu = armv6pmu_init(); |
2981 | memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, | ||
2982 | sizeof(armv6_perf_cache_map)); | ||
2983 | break; | 626 | break; |
2984 | case 0xB020: /* ARM11mpcore */ | 627 | case 0xB020: /* ARM11mpcore */ |
2985 | armpmu = &armv6mpcore_pmu; | 628 | armpmu = armv6mpcore_pmu_init(); |
2986 | memcpy(armpmu_perf_cache_map, | ||
2987 | armv6mpcore_perf_cache_map, | ||
2988 | sizeof(armv6mpcore_perf_cache_map)); | ||
2989 | break; | 629 | break; |
2990 | case 0xC080: /* Cortex-A8 */ | 630 | case 0xC080: /* Cortex-A8 */ |
2991 | armv7pmu.id = ARM_PERF_PMU_ID_CA8; | 631 | armpmu = armv7_a8_pmu_init(); |
2992 | memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, | ||
2993 | sizeof(armv7_a8_perf_cache_map)); | ||
2994 | armv7pmu.event_map = armv7_a8_pmu_event_map; | ||
2995 | armpmu = &armv7pmu; | ||
2996 | |||
2997 | /* Reset PMNC and read the nb of CNTx counters | ||
2998 | supported */ | ||
2999 | armv7pmu.num_events = armv7_reset_read_pmnc(); | ||
3000 | break; | 632 | break; |
3001 | case 0xC090: /* Cortex-A9 */ | 633 | case 0xC090: /* Cortex-A9 */ |
3002 | armv7pmu.id = ARM_PERF_PMU_ID_CA9; | 634 | armpmu = armv7_a9_pmu_init(); |
3003 | memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, | ||
3004 | sizeof(armv7_a9_perf_cache_map)); | ||
3005 | armv7pmu.event_map = armv7_a9_pmu_event_map; | ||
3006 | armpmu = &armv7pmu; | ||
3007 | |||
3008 | /* Reset PMNC and read the nb of CNTx counters | ||
3009 | supported */ | ||
3010 | armv7pmu.num_events = armv7_reset_read_pmnc(); | ||
3011 | break; | 635 | break; |
3012 | } | 636 | } |
3013 | /* Intel CPUs [xscale]. */ | 637 | /* Intel CPUs [xscale]. */ |
@@ -3015,21 +639,17 @@ init_hw_perf_events(void) | |||
3015 | part_number = (cpuid >> 13) & 0x7; | 639 | part_number = (cpuid >> 13) & 0x7; |
3016 | switch (part_number) { | 640 | switch (part_number) { |
3017 | case 1: | 641 | case 1: |
3018 | armpmu = &xscale1pmu; | 642 | armpmu = xscale1pmu_init(); |
3019 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | ||
3020 | sizeof(xscale_perf_cache_map)); | ||
3021 | break; | 643 | break; |
3022 | case 2: | 644 | case 2: |
3023 | armpmu = &xscale2pmu; | 645 | armpmu = xscale2pmu_init(); |
3024 | memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, | ||
3025 | sizeof(xscale_perf_cache_map)); | ||
3026 | break; | 646 | break; |
3027 | } | 647 | } |
3028 | } | 648 | } |
3029 | 649 | ||
3030 | if (armpmu) { | 650 | if (armpmu) { |
3031 | pr_info("enabled with %s PMU driver, %d counters available\n", | 651 | pr_info("enabled with %s PMU driver, %d counters available\n", |
3032 | arm_pmu_names[armpmu->id], armpmu->num_events); | 652 | armpmu->name, armpmu->num_events); |
3033 | } else { | 653 | } else { |
3034 | pr_info("no hardware support available\n"); | 654 | pr_info("no hardware support available\n"); |
3035 | } | 655 | } |
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c new file mode 100644 index 000000000000..7aeb07da9076 --- /dev/null +++ b/arch/arm/kernel/perf_event_v6.c | |||
@@ -0,0 +1,672 @@ | |||
1 | /* | ||
2 | * ARMv6 Performance counter handling code. | ||
3 | * | ||
4 | * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles | ||
5 | * | ||
6 | * ARMv6 has 2 configurable performance counters and a single cycle counter. | ||
7 | * They all share a single reset bit but can be written to zero so we can use | ||
8 | * that for a reset. | ||
9 | * | ||
10 | * The counters can't be individually enabled or disabled so when we remove | ||
11 | * one event and replace it with another we could get spurious counts from the | ||
12 | * wrong event. However, we can take advantage of the fact that the | ||
13 | * performance counters can export events to the event bus, and the event bus | ||
14 | * itself can be monitored. This requires that we *don't* export the events to | ||
15 | * the event bus. The procedure for disabling a configurable counter is: | ||
16 | * - change the counter to count the ETMEXTOUT[0] signal (0x20). This | ||
17 | * effectively stops the counter from counting. | ||
18 | * - disable the counter's interrupt generation (each counter has it's | ||
19 | * own interrupt enable bit). | ||
20 | * Once stopped, the counter value can be written as 0 to reset. | ||
21 | * | ||
22 | * To enable a counter: | ||
23 | * - enable the counter's interrupt generation. | ||
24 | * - set the new event type. | ||
25 | * | ||
26 | * Note: the dedicated cycle counter only counts cycles and can't be | ||
27 | * enabled/disabled independently of the others. When we want to disable the | ||
28 | * cycle counter, we have to just disable the interrupt reporting and start | ||
29 | * ignoring that counter. When re-enabling, we have to reset the value and | ||
30 | * enable the interrupt. | ||
31 | */ | ||
32 | |||
33 | #ifdef CONFIG_CPU_V6 | ||
34 | enum armv6_perf_types { | ||
35 | ARMV6_PERFCTR_ICACHE_MISS = 0x0, | ||
36 | ARMV6_PERFCTR_IBUF_STALL = 0x1, | ||
37 | ARMV6_PERFCTR_DDEP_STALL = 0x2, | ||
38 | ARMV6_PERFCTR_ITLB_MISS = 0x3, | ||
39 | ARMV6_PERFCTR_DTLB_MISS = 0x4, | ||
40 | ARMV6_PERFCTR_BR_EXEC = 0x5, | ||
41 | ARMV6_PERFCTR_BR_MISPREDICT = 0x6, | ||
42 | ARMV6_PERFCTR_INSTR_EXEC = 0x7, | ||
43 | ARMV6_PERFCTR_DCACHE_HIT = 0x9, | ||
44 | ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, | ||
45 | ARMV6_PERFCTR_DCACHE_MISS = 0xB, | ||
46 | ARMV6_PERFCTR_DCACHE_WBACK = 0xC, | ||
47 | ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, | ||
48 | ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, | ||
49 | ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, | ||
50 | ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, | ||
51 | ARMV6_PERFCTR_WBUF_DRAINED = 0x12, | ||
52 | ARMV6_PERFCTR_CPU_CYCLES = 0xFF, | ||
53 | ARMV6_PERFCTR_NOP = 0x20, | ||
54 | }; | ||
55 | |||
56 | enum armv6_counters { | ||
57 | ARMV6_CYCLE_COUNTER = 1, | ||
58 | ARMV6_COUNTER0, | ||
59 | ARMV6_COUNTER1, | ||
60 | }; | ||
61 | |||
62 | /* | ||
63 | * The hardware events that we support. We do support cache operations but | ||
64 | * we have harvard caches and no way to combine instruction and data | ||
65 | * accesses/misses in hardware. | ||
66 | */ | ||
67 | static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { | ||
68 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, | ||
69 | [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, | ||
70 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
71 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
72 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, | ||
73 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, | ||
74 | [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, | ||
75 | }; | ||
76 | |||
77 | static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
78 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
79 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
80 | [C(L1D)] = { | ||
81 | /* | ||
82 | * The performance counters don't differentiate between read | ||
83 | * and write accesses/misses so this isn't strictly correct, | ||
84 | * but it's the best we can do. Writes and reads get | ||
85 | * combined. | ||
86 | */ | ||
87 | [C(OP_READ)] = { | ||
88 | [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, | ||
89 | [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, | ||
90 | }, | ||
91 | [C(OP_WRITE)] = { | ||
92 | [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, | ||
93 | [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, | ||
94 | }, | ||
95 | [C(OP_PREFETCH)] = { | ||
96 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
97 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
98 | }, | ||
99 | }, | ||
100 | [C(L1I)] = { | ||
101 | [C(OP_READ)] = { | ||
102 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
103 | [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, | ||
104 | }, | ||
105 | [C(OP_WRITE)] = { | ||
106 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
107 | [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, | ||
108 | }, | ||
109 | [C(OP_PREFETCH)] = { | ||
110 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
111 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
112 | }, | ||
113 | }, | ||
114 | [C(LL)] = { | ||
115 | [C(OP_READ)] = { | ||
116 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
117 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
118 | }, | ||
119 | [C(OP_WRITE)] = { | ||
120 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
121 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
122 | }, | ||
123 | [C(OP_PREFETCH)] = { | ||
124 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
125 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
126 | }, | ||
127 | }, | ||
128 | [C(DTLB)] = { | ||
129 | /* | ||
130 | * The ARM performance counters can count micro DTLB misses, | ||
131 | * micro ITLB misses and main TLB misses. There isn't an event | ||
132 | * for TLB misses, so use the micro misses here and if users | ||
133 | * want the main TLB misses they can use a raw counter. | ||
134 | */ | ||
135 | [C(OP_READ)] = { | ||
136 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
137 | [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, | ||
138 | }, | ||
139 | [C(OP_WRITE)] = { | ||
140 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
141 | [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, | ||
142 | }, | ||
143 | [C(OP_PREFETCH)] = { | ||
144 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
145 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
146 | }, | ||
147 | }, | ||
148 | [C(ITLB)] = { | ||
149 | [C(OP_READ)] = { | ||
150 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
151 | [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, | ||
152 | }, | ||
153 | [C(OP_WRITE)] = { | ||
154 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
155 | [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, | ||
156 | }, | ||
157 | [C(OP_PREFETCH)] = { | ||
158 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
159 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
160 | }, | ||
161 | }, | ||
162 | [C(BPU)] = { | ||
163 | [C(OP_READ)] = { | ||
164 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
165 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
166 | }, | ||
167 | [C(OP_WRITE)] = { | ||
168 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
169 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
170 | }, | ||
171 | [C(OP_PREFETCH)] = { | ||
172 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
173 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
174 | }, | ||
175 | }, | ||
176 | }; | ||
177 | |||
178 | enum armv6mpcore_perf_types { | ||
179 | ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, | ||
180 | ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, | ||
181 | ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, | ||
182 | ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, | ||
183 | ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, | ||
184 | ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, | ||
185 | ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, | ||
186 | ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, | ||
187 | ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, | ||
188 | ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, | ||
189 | ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, | ||
190 | ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, | ||
191 | ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, | ||
192 | ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, | ||
193 | ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, | ||
194 | ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, | ||
195 | ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, | ||
196 | ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, | ||
197 | ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, | ||
198 | ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, | ||
199 | }; | ||
200 | |||
201 | /* | ||
202 | * The hardware events that we support. We do support cache operations but | ||
203 | * we have harvard caches and no way to combine instruction and data | ||
204 | * accesses/misses in hardware. | ||
205 | */ | ||
206 | static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { | ||
207 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, | ||
208 | [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, | ||
209 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
210 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
211 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, | ||
212 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, | ||
213 | [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, | ||
214 | }; | ||
215 | |||
216 | static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
217 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
218 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
219 | [C(L1D)] = { | ||
220 | [C(OP_READ)] = { | ||
221 | [C(RESULT_ACCESS)] = | ||
222 | ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, | ||
223 | [C(RESULT_MISS)] = | ||
224 | ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, | ||
225 | }, | ||
226 | [C(OP_WRITE)] = { | ||
227 | [C(RESULT_ACCESS)] = | ||
228 | ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, | ||
229 | [C(RESULT_MISS)] = | ||
230 | ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, | ||
231 | }, | ||
232 | [C(OP_PREFETCH)] = { | ||
233 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
234 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
235 | }, | ||
236 | }, | ||
237 | [C(L1I)] = { | ||
238 | [C(OP_READ)] = { | ||
239 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
240 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, | ||
241 | }, | ||
242 | [C(OP_WRITE)] = { | ||
243 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
244 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, | ||
245 | }, | ||
246 | [C(OP_PREFETCH)] = { | ||
247 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
248 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
249 | }, | ||
250 | }, | ||
251 | [C(LL)] = { | ||
252 | [C(OP_READ)] = { | ||
253 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
254 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
255 | }, | ||
256 | [C(OP_WRITE)] = { | ||
257 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
258 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
259 | }, | ||
260 | [C(OP_PREFETCH)] = { | ||
261 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
262 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
263 | }, | ||
264 | }, | ||
265 | [C(DTLB)] = { | ||
266 | /* | ||
267 | * The ARM performance counters can count micro DTLB misses, | ||
268 | * micro ITLB misses and main TLB misses. There isn't an event | ||
269 | * for TLB misses, so use the micro misses here and if users | ||
270 | * want the main TLB misses they can use a raw counter. | ||
271 | */ | ||
272 | [C(OP_READ)] = { | ||
273 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
274 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, | ||
275 | }, | ||
276 | [C(OP_WRITE)] = { | ||
277 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
278 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, | ||
279 | }, | ||
280 | [C(OP_PREFETCH)] = { | ||
281 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
282 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
283 | }, | ||
284 | }, | ||
285 | [C(ITLB)] = { | ||
286 | [C(OP_READ)] = { | ||
287 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
288 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, | ||
289 | }, | ||
290 | [C(OP_WRITE)] = { | ||
291 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
292 | [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, | ||
293 | }, | ||
294 | [C(OP_PREFETCH)] = { | ||
295 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
296 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
297 | }, | ||
298 | }, | ||
299 | [C(BPU)] = { | ||
300 | [C(OP_READ)] = { | ||
301 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
302 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
303 | }, | ||
304 | [C(OP_WRITE)] = { | ||
305 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
306 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
307 | }, | ||
308 | [C(OP_PREFETCH)] = { | ||
309 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
310 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
311 | }, | ||
312 | }, | ||
313 | }; | ||
314 | |||
315 | static inline unsigned long | ||
316 | armv6_pmcr_read(void) | ||
317 | { | ||
318 | u32 val; | ||
319 | asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); | ||
320 | return val; | ||
321 | } | ||
322 | |||
323 | static inline void | ||
324 | armv6_pmcr_write(unsigned long val) | ||
325 | { | ||
326 | asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); | ||
327 | } | ||
328 | |||
329 | #define ARMV6_PMCR_ENABLE (1 << 0) | ||
330 | #define ARMV6_PMCR_CTR01_RESET (1 << 1) | ||
331 | #define ARMV6_PMCR_CCOUNT_RESET (1 << 2) | ||
332 | #define ARMV6_PMCR_CCOUNT_DIV (1 << 3) | ||
333 | #define ARMV6_PMCR_COUNT0_IEN (1 << 4) | ||
334 | #define ARMV6_PMCR_COUNT1_IEN (1 << 5) | ||
335 | #define ARMV6_PMCR_CCOUNT_IEN (1 << 6) | ||
336 | #define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) | ||
337 | #define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) | ||
338 | #define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) | ||
339 | #define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 | ||
340 | #define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) | ||
341 | #define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 | ||
342 | #define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) | ||
343 | |||
344 | #define ARMV6_PMCR_OVERFLOWED_MASK \ | ||
345 | (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ | ||
346 | ARMV6_PMCR_CCOUNT_OVERFLOW) | ||
347 | |||
348 | static inline int | ||
349 | armv6_pmcr_has_overflowed(unsigned long pmcr) | ||
350 | { | ||
351 | return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; | ||
352 | } | ||
353 | |||
354 | static inline int | ||
355 | armv6_pmcr_counter_has_overflowed(unsigned long pmcr, | ||
356 | enum armv6_counters counter) | ||
357 | { | ||
358 | int ret = 0; | ||
359 | |||
360 | if (ARMV6_CYCLE_COUNTER == counter) | ||
361 | ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; | ||
362 | else if (ARMV6_COUNTER0 == counter) | ||
363 | ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; | ||
364 | else if (ARMV6_COUNTER1 == counter) | ||
365 | ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; | ||
366 | else | ||
367 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
368 | |||
369 | return ret; | ||
370 | } | ||
371 | |||
372 | static inline u32 | ||
373 | armv6pmu_read_counter(int counter) | ||
374 | { | ||
375 | unsigned long value = 0; | ||
376 | |||
377 | if (ARMV6_CYCLE_COUNTER == counter) | ||
378 | asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); | ||
379 | else if (ARMV6_COUNTER0 == counter) | ||
380 | asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); | ||
381 | else if (ARMV6_COUNTER1 == counter) | ||
382 | asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); | ||
383 | else | ||
384 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
385 | |||
386 | return value; | ||
387 | } | ||
388 | |||
389 | static inline void | ||
390 | armv6pmu_write_counter(int counter, | ||
391 | u32 value) | ||
392 | { | ||
393 | if (ARMV6_CYCLE_COUNTER == counter) | ||
394 | asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); | ||
395 | else if (ARMV6_COUNTER0 == counter) | ||
396 | asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); | ||
397 | else if (ARMV6_COUNTER1 == counter) | ||
398 | asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); | ||
399 | else | ||
400 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
401 | } | ||
402 | |||
403 | void | ||
404 | armv6pmu_enable_event(struct hw_perf_event *hwc, | ||
405 | int idx) | ||
406 | { | ||
407 | unsigned long val, mask, evt, flags; | ||
408 | |||
409 | if (ARMV6_CYCLE_COUNTER == idx) { | ||
410 | mask = 0; | ||
411 | evt = ARMV6_PMCR_CCOUNT_IEN; | ||
412 | } else if (ARMV6_COUNTER0 == idx) { | ||
413 | mask = ARMV6_PMCR_EVT_COUNT0_MASK; | ||
414 | evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | | ||
415 | ARMV6_PMCR_COUNT0_IEN; | ||
416 | } else if (ARMV6_COUNTER1 == idx) { | ||
417 | mask = ARMV6_PMCR_EVT_COUNT1_MASK; | ||
418 | evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | | ||
419 | ARMV6_PMCR_COUNT1_IEN; | ||
420 | } else { | ||
421 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
422 | return; | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * Mask out the current event and set the counter to count the event | ||
427 | * that we're interested in. | ||
428 | */ | ||
429 | spin_lock_irqsave(&pmu_lock, flags); | ||
430 | val = armv6_pmcr_read(); | ||
431 | val &= ~mask; | ||
432 | val |= evt; | ||
433 | armv6_pmcr_write(val); | ||
434 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
435 | } | ||
436 | |||
437 | static irqreturn_t | ||
438 | armv6pmu_handle_irq(int irq_num, | ||
439 | void *dev) | ||
440 | { | ||
441 | unsigned long pmcr = armv6_pmcr_read(); | ||
442 | struct perf_sample_data data; | ||
443 | struct cpu_hw_events *cpuc; | ||
444 | struct pt_regs *regs; | ||
445 | int idx; | ||
446 | |||
447 | if (!armv6_pmcr_has_overflowed(pmcr)) | ||
448 | return IRQ_NONE; | ||
449 | |||
450 | regs = get_irq_regs(); | ||
451 | |||
452 | /* | ||
453 | * The interrupts are cleared by writing the overflow flags back to | ||
454 | * the control register. All of the other bits don't have any effect | ||
455 | * if they are rewritten, so write the whole value back. | ||
456 | */ | ||
457 | armv6_pmcr_write(pmcr); | ||
458 | |||
459 | perf_sample_data_init(&data, 0); | ||
460 | |||
461 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
462 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
463 | struct perf_event *event = cpuc->events[idx]; | ||
464 | struct hw_perf_event *hwc; | ||
465 | |||
466 | if (!test_bit(idx, cpuc->active_mask)) | ||
467 | continue; | ||
468 | |||
469 | /* | ||
470 | * We have a single interrupt for all counters. Check that | ||
471 | * each counter has overflowed before we process it. | ||
472 | */ | ||
473 | if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) | ||
474 | continue; | ||
475 | |||
476 | hwc = &event->hw; | ||
477 | armpmu_event_update(event, hwc, idx); | ||
478 | data.period = event->hw.last_period; | ||
479 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
480 | continue; | ||
481 | |||
482 | if (perf_event_overflow(event, 0, &data, regs)) | ||
483 | armpmu->disable(hwc, idx); | ||
484 | } | ||
485 | |||
486 | /* | ||
487 | * Handle the pending perf events. | ||
488 | * | ||
489 | * Note: this call *must* be run with interrupts disabled. For | ||
490 | * platforms that can have the PMU interrupts raised as an NMI, this | ||
491 | * will not work. | ||
492 | */ | ||
493 | irq_work_run(); | ||
494 | |||
495 | return IRQ_HANDLED; | ||
496 | } | ||
497 | |||
498 | static void | ||
499 | armv6pmu_start(void) | ||
500 | { | ||
501 | unsigned long flags, val; | ||
502 | |||
503 | spin_lock_irqsave(&pmu_lock, flags); | ||
504 | val = armv6_pmcr_read(); | ||
505 | val |= ARMV6_PMCR_ENABLE; | ||
506 | armv6_pmcr_write(val); | ||
507 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
508 | } | ||
509 | |||
510 | static void | ||
511 | armv6pmu_stop(void) | ||
512 | { | ||
513 | unsigned long flags, val; | ||
514 | |||
515 | spin_lock_irqsave(&pmu_lock, flags); | ||
516 | val = armv6_pmcr_read(); | ||
517 | val &= ~ARMV6_PMCR_ENABLE; | ||
518 | armv6_pmcr_write(val); | ||
519 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
520 | } | ||
521 | |||
522 | static int | ||
523 | armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
524 | struct hw_perf_event *event) | ||
525 | { | ||
526 | /* Always place a cycle counter into the cycle counter. */ | ||
527 | if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { | ||
528 | if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) | ||
529 | return -EAGAIN; | ||
530 | |||
531 | return ARMV6_CYCLE_COUNTER; | ||
532 | } else { | ||
533 | /* | ||
534 | * For anything other than a cycle counter, try and use | ||
535 | * counter0 and counter1. | ||
536 | */ | ||
537 | if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) | ||
538 | return ARMV6_COUNTER1; | ||
539 | |||
540 | if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) | ||
541 | return ARMV6_COUNTER0; | ||
542 | |||
543 | /* The counters are all in use. */ | ||
544 | return -EAGAIN; | ||
545 | } | ||
546 | } | ||
547 | |||
548 | static void | ||
549 | armv6pmu_disable_event(struct hw_perf_event *hwc, | ||
550 | int idx) | ||
551 | { | ||
552 | unsigned long val, mask, evt, flags; | ||
553 | |||
554 | if (ARMV6_CYCLE_COUNTER == idx) { | ||
555 | mask = ARMV6_PMCR_CCOUNT_IEN; | ||
556 | evt = 0; | ||
557 | } else if (ARMV6_COUNTER0 == idx) { | ||
558 | mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; | ||
559 | evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; | ||
560 | } else if (ARMV6_COUNTER1 == idx) { | ||
561 | mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; | ||
562 | evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; | ||
563 | } else { | ||
564 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
565 | return; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Mask out the current event and set the counter to count the number | ||
570 | * of ETM bus signal assertion cycles. The external reporting should | ||
571 | * be disabled and so this should never increment. | ||
572 | */ | ||
573 | spin_lock_irqsave(&pmu_lock, flags); | ||
574 | val = armv6_pmcr_read(); | ||
575 | val &= ~mask; | ||
576 | val |= evt; | ||
577 | armv6_pmcr_write(val); | ||
578 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
579 | } | ||
580 | |||
581 | static void | ||
582 | armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, | ||
583 | int idx) | ||
584 | { | ||
585 | unsigned long val, mask, flags, evt = 0; | ||
586 | |||
587 | if (ARMV6_CYCLE_COUNTER == idx) { | ||
588 | mask = ARMV6_PMCR_CCOUNT_IEN; | ||
589 | } else if (ARMV6_COUNTER0 == idx) { | ||
590 | mask = ARMV6_PMCR_COUNT0_IEN; | ||
591 | } else if (ARMV6_COUNTER1 == idx) { | ||
592 | mask = ARMV6_PMCR_COUNT1_IEN; | ||
593 | } else { | ||
594 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
595 | return; | ||
596 | } | ||
597 | |||
598 | /* | ||
599 | * Unlike UP ARMv6, we don't have a way of stopping the counters. We | ||
600 | * simply disable the interrupt reporting. | ||
601 | */ | ||
602 | spin_lock_irqsave(&pmu_lock, flags); | ||
603 | val = armv6_pmcr_read(); | ||
604 | val &= ~mask; | ||
605 | val |= evt; | ||
606 | armv6_pmcr_write(val); | ||
607 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
608 | } | ||
609 | |||
610 | static const struct arm_pmu armv6pmu = { | ||
611 | .id = ARM_PERF_PMU_ID_V6, | ||
612 | .name = "v6", | ||
613 | .handle_irq = armv6pmu_handle_irq, | ||
614 | .enable = armv6pmu_enable_event, | ||
615 | .disable = armv6pmu_disable_event, | ||
616 | .read_counter = armv6pmu_read_counter, | ||
617 | .write_counter = armv6pmu_write_counter, | ||
618 | .get_event_idx = armv6pmu_get_event_idx, | ||
619 | .start = armv6pmu_start, | ||
620 | .stop = armv6pmu_stop, | ||
621 | .cache_map = &armv6_perf_cache_map, | ||
622 | .event_map = &armv6_perf_map, | ||
623 | .raw_event_mask = 0xFF, | ||
624 | .num_events = 3, | ||
625 | .max_period = (1LLU << 32) - 1, | ||
626 | }; | ||
627 | |||
628 | const struct arm_pmu *__init armv6pmu_init(void) | ||
629 | { | ||
630 | return &armv6pmu; | ||
631 | } | ||
632 | |||
633 | /* | ||
634 | * ARMv6mpcore is almost identical to single core ARMv6 with the exception | ||
635 | * that some of the events have different enumerations and that there is no | ||
636 | * *hack* to stop the programmable counters. To stop the counters we simply | ||
637 | * disable the interrupt reporting and update the event. When unthrottling we | ||
638 | * reset the period and enable the interrupt reporting. | ||
639 | */ | ||
640 | static const struct arm_pmu armv6mpcore_pmu = { | ||
641 | .id = ARM_PERF_PMU_ID_V6MP, | ||
642 | .name = "v6mpcore", | ||
643 | .handle_irq = armv6pmu_handle_irq, | ||
644 | .enable = armv6pmu_enable_event, | ||
645 | .disable = armv6mpcore_pmu_disable_event, | ||
646 | .read_counter = armv6pmu_read_counter, | ||
647 | .write_counter = armv6pmu_write_counter, | ||
648 | .get_event_idx = armv6pmu_get_event_idx, | ||
649 | .start = armv6pmu_start, | ||
650 | .stop = armv6pmu_stop, | ||
651 | .cache_map = &armv6mpcore_perf_cache_map, | ||
652 | .event_map = &armv6mpcore_perf_map, | ||
653 | .raw_event_mask = 0xFF, | ||
654 | .num_events = 3, | ||
655 | .max_period = (1LLU << 32) - 1, | ||
656 | }; | ||
657 | |||
658 | const struct arm_pmu *__init armv6mpcore_pmu_init(void) | ||
659 | { | ||
660 | return &armv6mpcore_pmu; | ||
661 | } | ||
662 | #else | ||
663 | const struct arm_pmu *__init armv6pmu_init(void) | ||
664 | { | ||
665 | return NULL; | ||
666 | } | ||
667 | |||
668 | const struct arm_pmu *__init armv6mpcore_pmu_init(void) | ||
669 | { | ||
670 | return NULL; | ||
671 | } | ||
672 | #endif /* CONFIG_CPU_V6 */ | ||
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c new file mode 100644 index 000000000000..4d0423969df9 --- /dev/null +++ b/arch/arm/kernel/perf_event_v7.c | |||
@@ -0,0 +1,906 @@ | |||
1 | /* | ||
2 | * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. | ||
3 | * | ||
4 | * ARMv7 support: Jean Pihet <jpihet@mvista.com> | ||
5 | * 2010 (c) MontaVista Software, LLC. | ||
6 | * | ||
7 | * Copied from ARMv6 code, with the low level code inspired | ||
8 | * by the ARMv7 Oprofile code. | ||
9 | * | ||
10 | * Cortex-A8 has up to 4 configurable performance counters and | ||
11 | * a single cycle counter. | ||
12 | * Cortex-A9 has up to 31 configurable performance counters and | ||
13 | * a single cycle counter. | ||
14 | * | ||
15 | * All counters can be enabled/disabled and IRQ masked separately. The cycle | ||
16 | * counter and all 4 performance counters together can be reset separately. | ||
17 | */ | ||
18 | |||
19 | #ifdef CONFIG_CPU_V7 | ||
20 | /* Common ARMv7 event types */ | ||
21 | enum armv7_perf_types { | ||
22 | ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, | ||
23 | ARMV7_PERFCTR_IFETCH_MISS = 0x01, | ||
24 | ARMV7_PERFCTR_ITLB_MISS = 0x02, | ||
25 | ARMV7_PERFCTR_DCACHE_REFILL = 0x03, | ||
26 | ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, | ||
27 | ARMV7_PERFCTR_DTLB_REFILL = 0x05, | ||
28 | ARMV7_PERFCTR_DREAD = 0x06, | ||
29 | ARMV7_PERFCTR_DWRITE = 0x07, | ||
30 | |||
31 | ARMV7_PERFCTR_EXC_TAKEN = 0x09, | ||
32 | ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, | ||
33 | ARMV7_PERFCTR_CID_WRITE = 0x0B, | ||
34 | /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. | ||
35 | * It counts: | ||
36 | * - all branch instructions, | ||
37 | * - instructions that explicitly write the PC, | ||
38 | * - exception generating instructions. | ||
39 | */ | ||
40 | ARMV7_PERFCTR_PC_WRITE = 0x0C, | ||
41 | ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, | ||
42 | ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, | ||
43 | ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, | ||
44 | ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, | ||
45 | |||
46 | ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, | ||
47 | |||
48 | ARMV7_PERFCTR_CPU_CYCLES = 0xFF | ||
49 | }; | ||
50 | |||
51 | /* ARMv7 Cortex-A8 specific event types */ | ||
52 | enum armv7_a8_perf_types { | ||
53 | ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, | ||
54 | |||
55 | ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, | ||
56 | |||
57 | ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, | ||
58 | ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, | ||
59 | ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, | ||
60 | ARMV7_PERFCTR_L2_ACCESS = 0x43, | ||
61 | ARMV7_PERFCTR_L2_CACH_MISS = 0x44, | ||
62 | ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, | ||
63 | ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, | ||
64 | ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, | ||
65 | ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, | ||
66 | ARMV7_PERFCTR_L1_DATA_MISS = 0x49, | ||
67 | ARMV7_PERFCTR_L1_INST_MISS = 0x4A, | ||
68 | ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, | ||
69 | ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, | ||
70 | ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, | ||
71 | ARMV7_PERFCTR_L2_NEON = 0x4E, | ||
72 | ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, | ||
73 | ARMV7_PERFCTR_L1_INST = 0x50, | ||
74 | ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, | ||
75 | ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, | ||
76 | ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, | ||
77 | ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, | ||
78 | ARMV7_PERFCTR_OP_EXECUTED = 0x55, | ||
79 | ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, | ||
80 | ARMV7_PERFCTR_CYCLES_INST = 0x57, | ||
81 | ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, | ||
82 | ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, | ||
83 | ARMV7_PERFCTR_NEON_CYCLES = 0x5A, | ||
84 | |||
85 | ARMV7_PERFCTR_PMU0_EVENTS = 0x70, | ||
86 | ARMV7_PERFCTR_PMU1_EVENTS = 0x71, | ||
87 | ARMV7_PERFCTR_PMU_EVENTS = 0x72, | ||
88 | }; | ||
89 | |||
90 | /* ARMv7 Cortex-A9 specific event types */ | ||
91 | enum armv7_a9_perf_types { | ||
92 | ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, | ||
93 | ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, | ||
94 | ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, | ||
95 | |||
96 | ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, | ||
97 | ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, | ||
98 | |||
99 | ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, | ||
100 | ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, | ||
101 | ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, | ||
102 | ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, | ||
103 | ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, | ||
104 | ARMV7_PERFCTR_DATA_EVICTION = 0x65, | ||
105 | ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, | ||
106 | ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, | ||
107 | ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, | ||
108 | |||
109 | ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, | ||
110 | |||
111 | ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, | ||
112 | ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, | ||
113 | ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, | ||
114 | ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, | ||
115 | ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, | ||
116 | |||
117 | ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, | ||
118 | ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, | ||
119 | ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, | ||
120 | ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, | ||
121 | ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, | ||
122 | ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, | ||
123 | ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, | ||
124 | |||
125 | ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, | ||
126 | ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, | ||
127 | |||
128 | ARMV7_PERFCTR_ISB_INST = 0x90, | ||
129 | ARMV7_PERFCTR_DSB_INST = 0x91, | ||
130 | ARMV7_PERFCTR_DMB_INST = 0x92, | ||
131 | ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, | ||
132 | |||
133 | ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, | ||
134 | ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, | ||
135 | ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, | ||
136 | ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, | ||
137 | ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, | ||
138 | ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 | ||
139 | }; | ||
140 | |||
141 | /* | ||
142 | * Cortex-A8 HW events mapping | ||
143 | * | ||
144 | * The hardware events that we support. We do support cache operations but | ||
145 | * we have harvard caches and no way to combine instruction and data | ||
146 | * accesses/misses in hardware. | ||
147 | */ | ||
148 | static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { | ||
149 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, | ||
150 | [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, | ||
151 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
152 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
153 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, | ||
154 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
155 | [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, | ||
156 | }; | ||
157 | |||
158 | static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
159 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
160 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
161 | [C(L1D)] = { | ||
162 | /* | ||
163 | * The performance counters don't differentiate between read | ||
164 | * and write accesses/misses so this isn't strictly correct, | ||
165 | * but it's the best we can do. Writes and reads get | ||
166 | * combined. | ||
167 | */ | ||
168 | [C(OP_READ)] = { | ||
169 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, | ||
170 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, | ||
171 | }, | ||
172 | [C(OP_WRITE)] = { | ||
173 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, | ||
174 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, | ||
175 | }, | ||
176 | [C(OP_PREFETCH)] = { | ||
177 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
178 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
179 | }, | ||
180 | }, | ||
181 | [C(L1I)] = { | ||
182 | [C(OP_READ)] = { | ||
183 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, | ||
184 | [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, | ||
185 | }, | ||
186 | [C(OP_WRITE)] = { | ||
187 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, | ||
188 | [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, | ||
189 | }, | ||
190 | [C(OP_PREFETCH)] = { | ||
191 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
192 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
193 | }, | ||
194 | }, | ||
195 | [C(LL)] = { | ||
196 | [C(OP_READ)] = { | ||
197 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, | ||
198 | [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, | ||
199 | }, | ||
200 | [C(OP_WRITE)] = { | ||
201 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, | ||
202 | [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, | ||
203 | }, | ||
204 | [C(OP_PREFETCH)] = { | ||
205 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
206 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
207 | }, | ||
208 | }, | ||
209 | [C(DTLB)] = { | ||
210 | /* | ||
211 | * Only ITLB misses and DTLB refills are supported. | ||
212 | * If users want the DTLB refills misses a raw counter | ||
213 | * must be used. | ||
214 | */ | ||
215 | [C(OP_READ)] = { | ||
216 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
217 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, | ||
218 | }, | ||
219 | [C(OP_WRITE)] = { | ||
220 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
221 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, | ||
222 | }, | ||
223 | [C(OP_PREFETCH)] = { | ||
224 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
225 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
226 | }, | ||
227 | }, | ||
228 | [C(ITLB)] = { | ||
229 | [C(OP_READ)] = { | ||
230 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
231 | [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, | ||
232 | }, | ||
233 | [C(OP_WRITE)] = { | ||
234 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
235 | [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, | ||
236 | }, | ||
237 | [C(OP_PREFETCH)] = { | ||
238 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
239 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
240 | }, | ||
241 | }, | ||
242 | [C(BPU)] = { | ||
243 | [C(OP_READ)] = { | ||
244 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, | ||
245 | [C(RESULT_MISS)] | ||
246 | = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
247 | }, | ||
248 | [C(OP_WRITE)] = { | ||
249 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, | ||
250 | [C(RESULT_MISS)] | ||
251 | = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
252 | }, | ||
253 | [C(OP_PREFETCH)] = { | ||
254 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
255 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
256 | }, | ||
257 | }, | ||
258 | }; | ||
259 | |||
260 | /* | ||
261 | * Cortex-A9 HW events mapping | ||
262 | */ | ||
263 | static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { | ||
264 | [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, | ||
265 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
266 | ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, | ||
267 | [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, | ||
268 | [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, | ||
269 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, | ||
270 | [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
271 | [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, | ||
272 | }; | ||
273 | |||
274 | static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
275 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
277 | [C(L1D)] = { | ||
278 | /* | ||
279 | * The performance counters don't differentiate between read | ||
280 | * and write accesses/misses so this isn't strictly correct, | ||
281 | * but it's the best we can do. Writes and reads get | ||
282 | * combined. | ||
283 | */ | ||
284 | [C(OP_READ)] = { | ||
285 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, | ||
286 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, | ||
287 | }, | ||
288 | [C(OP_WRITE)] = { | ||
289 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, | ||
290 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, | ||
291 | }, | ||
292 | [C(OP_PREFETCH)] = { | ||
293 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
294 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
295 | }, | ||
296 | }, | ||
297 | [C(L1I)] = { | ||
298 | [C(OP_READ)] = { | ||
299 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
300 | [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, | ||
301 | }, | ||
302 | [C(OP_WRITE)] = { | ||
303 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
304 | [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, | ||
305 | }, | ||
306 | [C(OP_PREFETCH)] = { | ||
307 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
308 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
309 | }, | ||
310 | }, | ||
311 | [C(LL)] = { | ||
312 | [C(OP_READ)] = { | ||
313 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
314 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
315 | }, | ||
316 | [C(OP_WRITE)] = { | ||
317 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
318 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
319 | }, | ||
320 | [C(OP_PREFETCH)] = { | ||
321 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
322 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
323 | }, | ||
324 | }, | ||
325 | [C(DTLB)] = { | ||
326 | /* | ||
327 | * Only ITLB misses and DTLB refills are supported. | ||
328 | * If users want the DTLB refills misses a raw counter | ||
329 | * must be used. | ||
330 | */ | ||
331 | [C(OP_READ)] = { | ||
332 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
333 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, | ||
334 | }, | ||
335 | [C(OP_WRITE)] = { | ||
336 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
337 | [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, | ||
338 | }, | ||
339 | [C(OP_PREFETCH)] = { | ||
340 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
341 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
342 | }, | ||
343 | }, | ||
344 | [C(ITLB)] = { | ||
345 | [C(OP_READ)] = { | ||
346 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
347 | [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, | ||
348 | }, | ||
349 | [C(OP_WRITE)] = { | ||
350 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
351 | [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, | ||
352 | }, | ||
353 | [C(OP_PREFETCH)] = { | ||
354 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
355 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
356 | }, | ||
357 | }, | ||
358 | [C(BPU)] = { | ||
359 | [C(OP_READ)] = { | ||
360 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, | ||
361 | [C(RESULT_MISS)] | ||
362 | = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
363 | }, | ||
364 | [C(OP_WRITE)] = { | ||
365 | [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, | ||
366 | [C(RESULT_MISS)] | ||
367 | = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, | ||
368 | }, | ||
369 | [C(OP_PREFETCH)] = { | ||
370 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
371 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
372 | }, | ||
373 | }, | ||
374 | }; | ||
375 | |||
376 | /* | ||
377 | * Perf Events counters | ||
378 | */ | ||
379 | enum armv7_counters { | ||
380 | ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ | ||
381 | ARMV7_COUNTER0 = 2, /* First event counter */ | ||
382 | }; | ||
383 | |||
384 | /* | ||
385 | * The cycle counter is ARMV7_CYCLE_COUNTER. | ||
386 | * The first event counter is ARMV7_COUNTER0. | ||
387 | * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). | ||
388 | */ | ||
389 | #define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) | ||
390 | |||
391 | /* | ||
392 | * ARMv7 low level PMNC access | ||
393 | */ | ||
394 | |||
395 | /* | ||
396 | * Per-CPU PMNC: config reg | ||
397 | */ | ||
398 | #define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ | ||
399 | #define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ | ||
400 | #define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ | ||
401 | #define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ | ||
402 | #define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ | ||
403 | #define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ | ||
404 | #define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ | ||
405 | #define ARMV7_PMNC_N_MASK 0x1f | ||
406 | #define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ | ||
407 | |||
408 | /* | ||
409 | * Available counters | ||
410 | */ | ||
411 | #define ARMV7_CNT0 0 /* First event counter */ | ||
412 | #define ARMV7_CCNT 31 /* Cycle counter */ | ||
413 | |||
414 | /* Perf Event to low level counters mapping */ | ||
415 | #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) | ||
416 | |||
417 | /* | ||
418 | * CNTENS: counters enable reg | ||
419 | */ | ||
420 | #define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
421 | #define ARMV7_CNTENS_C (1 << ARMV7_CCNT) | ||
422 | |||
423 | /* | ||
424 | * CNTENC: counters disable reg | ||
425 | */ | ||
426 | #define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
427 | #define ARMV7_CNTENC_C (1 << ARMV7_CCNT) | ||
428 | |||
429 | /* | ||
430 | * INTENS: counters overflow interrupt enable reg | ||
431 | */ | ||
432 | #define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
433 | #define ARMV7_INTENS_C (1 << ARMV7_CCNT) | ||
434 | |||
435 | /* | ||
436 | * INTENC: counters overflow interrupt disable reg | ||
437 | */ | ||
438 | #define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
439 | #define ARMV7_INTENC_C (1 << ARMV7_CCNT) | ||
440 | |||
441 | /* | ||
442 | * EVTSEL: Event selection reg | ||
443 | */ | ||
444 | #define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ | ||
445 | |||
446 | /* | ||
447 | * SELECT: Counter selection reg | ||
448 | */ | ||
449 | #define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ | ||
450 | |||
451 | /* | ||
452 | * FLAG: counters overflow flag status reg | ||
453 | */ | ||
454 | #define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) | ||
455 | #define ARMV7_FLAG_C (1 << ARMV7_CCNT) | ||
456 | #define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ | ||
457 | #define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK | ||
458 | |||
459 | static inline unsigned long armv7_pmnc_read(void) | ||
460 | { | ||
461 | u32 val; | ||
462 | asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); | ||
463 | return val; | ||
464 | } | ||
465 | |||
466 | static inline void armv7_pmnc_write(unsigned long val) | ||
467 | { | ||
468 | val &= ARMV7_PMNC_MASK; | ||
469 | asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); | ||
470 | } | ||
471 | |||
472 | static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) | ||
473 | { | ||
474 | return pmnc & ARMV7_OVERFLOWED_MASK; | ||
475 | } | ||
476 | |||
477 | static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, | ||
478 | enum armv7_counters counter) | ||
479 | { | ||
480 | int ret = 0; | ||
481 | |||
482 | if (counter == ARMV7_CYCLE_COUNTER) | ||
483 | ret = pmnc & ARMV7_FLAG_C; | ||
484 | else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) | ||
485 | ret = pmnc & ARMV7_FLAG_P(counter); | ||
486 | else | ||
487 | pr_err("CPU%u checking wrong counter %d overflow status\n", | ||
488 | smp_processor_id(), counter); | ||
489 | |||
490 | return ret; | ||
491 | } | ||
492 | |||
493 | static inline int armv7_pmnc_select_counter(unsigned int idx) | ||
494 | { | ||
495 | u32 val; | ||
496 | |||
497 | if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { | ||
498 | pr_err("CPU%u selecting wrong PMNC counter" | ||
499 | " %d\n", smp_processor_id(), idx); | ||
500 | return -1; | ||
501 | } | ||
502 | |||
503 | val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; | ||
504 | asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); | ||
505 | |||
506 | return idx; | ||
507 | } | ||
508 | |||
509 | static inline u32 armv7pmu_read_counter(int idx) | ||
510 | { | ||
511 | unsigned long value = 0; | ||
512 | |||
513 | if (idx == ARMV7_CYCLE_COUNTER) | ||
514 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); | ||
515 | else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { | ||
516 | if (armv7_pmnc_select_counter(idx) == idx) | ||
517 | asm volatile("mrc p15, 0, %0, c9, c13, 2" | ||
518 | : "=r" (value)); | ||
519 | } else | ||
520 | pr_err("CPU%u reading wrong counter %d\n", | ||
521 | smp_processor_id(), idx); | ||
522 | |||
523 | return value; | ||
524 | } | ||
525 | |||
526 | static inline void armv7pmu_write_counter(int idx, u32 value) | ||
527 | { | ||
528 | if (idx == ARMV7_CYCLE_COUNTER) | ||
529 | asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); | ||
530 | else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { | ||
531 | if (armv7_pmnc_select_counter(idx) == idx) | ||
532 | asm volatile("mcr p15, 0, %0, c9, c13, 2" | ||
533 | : : "r" (value)); | ||
534 | } else | ||
535 | pr_err("CPU%u writing wrong counter %d\n", | ||
536 | smp_processor_id(), idx); | ||
537 | } | ||
538 | |||
539 | static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) | ||
540 | { | ||
541 | if (armv7_pmnc_select_counter(idx) == idx) { | ||
542 | val &= ARMV7_EVTSEL_MASK; | ||
543 | asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); | ||
544 | } | ||
545 | } | ||
546 | |||
547 | static inline u32 armv7_pmnc_enable_counter(unsigned int idx) | ||
548 | { | ||
549 | u32 val; | ||
550 | |||
551 | if ((idx != ARMV7_CYCLE_COUNTER) && | ||
552 | ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { | ||
553 | pr_err("CPU%u enabling wrong PMNC counter" | ||
554 | " %d\n", smp_processor_id(), idx); | ||
555 | return -1; | ||
556 | } | ||
557 | |||
558 | if (idx == ARMV7_CYCLE_COUNTER) | ||
559 | val = ARMV7_CNTENS_C; | ||
560 | else | ||
561 | val = ARMV7_CNTENS_P(idx); | ||
562 | |||
563 | asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); | ||
564 | |||
565 | return idx; | ||
566 | } | ||
567 | |||
568 | static inline u32 armv7_pmnc_disable_counter(unsigned int idx) | ||
569 | { | ||
570 | u32 val; | ||
571 | |||
572 | |||
573 | if ((idx != ARMV7_CYCLE_COUNTER) && | ||
574 | ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { | ||
575 | pr_err("CPU%u disabling wrong PMNC counter" | ||
576 | " %d\n", smp_processor_id(), idx); | ||
577 | return -1; | ||
578 | } | ||
579 | |||
580 | if (idx == ARMV7_CYCLE_COUNTER) | ||
581 | val = ARMV7_CNTENC_C; | ||
582 | else | ||
583 | val = ARMV7_CNTENC_P(idx); | ||
584 | |||
585 | asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); | ||
586 | |||
587 | return idx; | ||
588 | } | ||
589 | |||
590 | static inline u32 armv7_pmnc_enable_intens(unsigned int idx) | ||
591 | { | ||
592 | u32 val; | ||
593 | |||
594 | if ((idx != ARMV7_CYCLE_COUNTER) && | ||
595 | ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { | ||
596 | pr_err("CPU%u enabling wrong PMNC counter" | ||
597 | " interrupt enable %d\n", smp_processor_id(), idx); | ||
598 | return -1; | ||
599 | } | ||
600 | |||
601 | if (idx == ARMV7_CYCLE_COUNTER) | ||
602 | val = ARMV7_INTENS_C; | ||
603 | else | ||
604 | val = ARMV7_INTENS_P(idx); | ||
605 | |||
606 | asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); | ||
607 | |||
608 | return idx; | ||
609 | } | ||
610 | |||
611 | static inline u32 armv7_pmnc_disable_intens(unsigned int idx) | ||
612 | { | ||
613 | u32 val; | ||
614 | |||
615 | if ((idx != ARMV7_CYCLE_COUNTER) && | ||
616 | ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { | ||
617 | pr_err("CPU%u disabling wrong PMNC counter" | ||
618 | " interrupt enable %d\n", smp_processor_id(), idx); | ||
619 | return -1; | ||
620 | } | ||
621 | |||
622 | if (idx == ARMV7_CYCLE_COUNTER) | ||
623 | val = ARMV7_INTENC_C; | ||
624 | else | ||
625 | val = ARMV7_INTENC_P(idx); | ||
626 | |||
627 | asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); | ||
628 | |||
629 | return idx; | ||
630 | } | ||
631 | |||
632 | static inline u32 armv7_pmnc_getreset_flags(void) | ||
633 | { | ||
634 | u32 val; | ||
635 | |||
636 | /* Read */ | ||
637 | asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); | ||
638 | |||
639 | /* Write to clear flags */ | ||
640 | val &= ARMV7_FLAG_MASK; | ||
641 | asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); | ||
642 | |||
643 | return val; | ||
644 | } | ||
645 | |||
646 | #ifdef DEBUG | ||
647 | static void armv7_pmnc_dump_regs(void) | ||
648 | { | ||
649 | u32 val; | ||
650 | unsigned int cnt; | ||
651 | |||
652 | printk(KERN_INFO "PMNC registers dump:\n"); | ||
653 | |||
654 | asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); | ||
655 | printk(KERN_INFO "PMNC =0x%08x\n", val); | ||
656 | |||
657 | asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); | ||
658 | printk(KERN_INFO "CNTENS=0x%08x\n", val); | ||
659 | |||
660 | asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); | ||
661 | printk(KERN_INFO "INTENS=0x%08x\n", val); | ||
662 | |||
663 | asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); | ||
664 | printk(KERN_INFO "FLAGS =0x%08x\n", val); | ||
665 | |||
666 | asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); | ||
667 | printk(KERN_INFO "SELECT=0x%08x\n", val); | ||
668 | |||
669 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); | ||
670 | printk(KERN_INFO "CCNT =0x%08x\n", val); | ||
671 | |||
672 | for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { | ||
673 | armv7_pmnc_select_counter(cnt); | ||
674 | asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); | ||
675 | printk(KERN_INFO "CNT[%d] count =0x%08x\n", | ||
676 | cnt-ARMV7_EVENT_CNT_TO_CNTx, val); | ||
677 | asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); | ||
678 | printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", | ||
679 | cnt-ARMV7_EVENT_CNT_TO_CNTx, val); | ||
680 | } | ||
681 | } | ||
682 | #endif | ||
683 | |||
684 | void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
685 | { | ||
686 | unsigned long flags; | ||
687 | |||
688 | /* | ||
689 | * Enable counter and interrupt, and set the counter to count | ||
690 | * the event that we're interested in. | ||
691 | */ | ||
692 | spin_lock_irqsave(&pmu_lock, flags); | ||
693 | |||
694 | /* | ||
695 | * Disable counter | ||
696 | */ | ||
697 | armv7_pmnc_disable_counter(idx); | ||
698 | |||
699 | /* | ||
700 | * Set event (if destined for PMNx counters) | ||
701 | * We don't need to set the event if it's a cycle count | ||
702 | */ | ||
703 | if (idx != ARMV7_CYCLE_COUNTER) | ||
704 | armv7_pmnc_write_evtsel(idx, hwc->config_base); | ||
705 | |||
706 | /* | ||
707 | * Enable interrupt for this counter | ||
708 | */ | ||
709 | armv7_pmnc_enable_intens(idx); | ||
710 | |||
711 | /* | ||
712 | * Enable counter | ||
713 | */ | ||
714 | armv7_pmnc_enable_counter(idx); | ||
715 | |||
716 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
717 | } | ||
718 | |||
719 | static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
720 | { | ||
721 | unsigned long flags; | ||
722 | |||
723 | /* | ||
724 | * Disable counter and interrupt | ||
725 | */ | ||
726 | spin_lock_irqsave(&pmu_lock, flags); | ||
727 | |||
728 | /* | ||
729 | * Disable counter | ||
730 | */ | ||
731 | armv7_pmnc_disable_counter(idx); | ||
732 | |||
733 | /* | ||
734 | * Disable interrupt for this counter | ||
735 | */ | ||
736 | armv7_pmnc_disable_intens(idx); | ||
737 | |||
738 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
739 | } | ||
740 | |||
741 | static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) | ||
742 | { | ||
743 | unsigned long pmnc; | ||
744 | struct perf_sample_data data; | ||
745 | struct cpu_hw_events *cpuc; | ||
746 | struct pt_regs *regs; | ||
747 | int idx; | ||
748 | |||
749 | /* | ||
750 | * Get and reset the IRQ flags | ||
751 | */ | ||
752 | pmnc = armv7_pmnc_getreset_flags(); | ||
753 | |||
754 | /* | ||
755 | * Did an overflow occur? | ||
756 | */ | ||
757 | if (!armv7_pmnc_has_overflowed(pmnc)) | ||
758 | return IRQ_NONE; | ||
759 | |||
760 | /* | ||
761 | * Handle the counter(s) overflow(s) | ||
762 | */ | ||
763 | regs = get_irq_regs(); | ||
764 | |||
765 | perf_sample_data_init(&data, 0); | ||
766 | |||
767 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
768 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
769 | struct perf_event *event = cpuc->events[idx]; | ||
770 | struct hw_perf_event *hwc; | ||
771 | |||
772 | if (!test_bit(idx, cpuc->active_mask)) | ||
773 | continue; | ||
774 | |||
775 | /* | ||
776 | * We have a single interrupt for all counters. Check that | ||
777 | * each counter has overflowed before we process it. | ||
778 | */ | ||
779 | if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) | ||
780 | continue; | ||
781 | |||
782 | hwc = &event->hw; | ||
783 | armpmu_event_update(event, hwc, idx); | ||
784 | data.period = event->hw.last_period; | ||
785 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
786 | continue; | ||
787 | |||
788 | if (perf_event_overflow(event, 0, &data, regs)) | ||
789 | armpmu->disable(hwc, idx); | ||
790 | } | ||
791 | |||
792 | /* | ||
793 | * Handle the pending perf events. | ||
794 | * | ||
795 | * Note: this call *must* be run with interrupts disabled. For | ||
796 | * platforms that can have the PMU interrupts raised as an NMI, this | ||
797 | * will not work. | ||
798 | */ | ||
799 | irq_work_run(); | ||
800 | |||
801 | return IRQ_HANDLED; | ||
802 | } | ||
803 | |||
804 | static void armv7pmu_start(void) | ||
805 | { | ||
806 | unsigned long flags; | ||
807 | |||
808 | spin_lock_irqsave(&pmu_lock, flags); | ||
809 | /* Enable all counters */ | ||
810 | armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); | ||
811 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
812 | } | ||
813 | |||
814 | static void armv7pmu_stop(void) | ||
815 | { | ||
816 | unsigned long flags; | ||
817 | |||
818 | spin_lock_irqsave(&pmu_lock, flags); | ||
819 | /* Disable all counters */ | ||
820 | armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); | ||
821 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
822 | } | ||
823 | |||
824 | static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
825 | struct hw_perf_event *event) | ||
826 | { | ||
827 | int idx; | ||
828 | |||
829 | /* Always place a cycle counter into the cycle counter. */ | ||
830 | if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { | ||
831 | if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) | ||
832 | return -EAGAIN; | ||
833 | |||
834 | return ARMV7_CYCLE_COUNTER; | ||
835 | } else { | ||
836 | /* | ||
837 | * For anything other than a cycle counter, try and use | ||
838 | * the events counters | ||
839 | */ | ||
840 | for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { | ||
841 | if (!test_and_set_bit(idx, cpuc->used_mask)) | ||
842 | return idx; | ||
843 | } | ||
844 | |||
845 | /* The counters are all in use. */ | ||
846 | return -EAGAIN; | ||
847 | } | ||
848 | } | ||
849 | |||
850 | static struct arm_pmu armv7pmu = { | ||
851 | .handle_irq = armv7pmu_handle_irq, | ||
852 | .enable = armv7pmu_enable_event, | ||
853 | .disable = armv7pmu_disable_event, | ||
854 | .read_counter = armv7pmu_read_counter, | ||
855 | .write_counter = armv7pmu_write_counter, | ||
856 | .get_event_idx = armv7pmu_get_event_idx, | ||
857 | .start = armv7pmu_start, | ||
858 | .stop = armv7pmu_stop, | ||
859 | .raw_event_mask = 0xFF, | ||
860 | .max_period = (1LLU << 32) - 1, | ||
861 | }; | ||
862 | |||
863 | static u32 __init armv7_reset_read_pmnc(void) | ||
864 | { | ||
865 | u32 nb_cnt; | ||
866 | |||
867 | /* Initialize & Reset PMNC: C and P bits */ | ||
868 | armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); | ||
869 | |||
870 | /* Read the nb of CNTx counters supported from PMNC */ | ||
871 | nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; | ||
872 | |||
873 | /* Add the CPU cycles counter and return */ | ||
874 | return nb_cnt + 1; | ||
875 | } | ||
876 | |||
877 | const struct arm_pmu *__init armv7_a8_pmu_init(void) | ||
878 | { | ||
879 | armv7pmu.id = ARM_PERF_PMU_ID_CA8; | ||
880 | armv7pmu.name = "ARMv7 Cortex-A8"; | ||
881 | armv7pmu.cache_map = &armv7_a8_perf_cache_map; | ||
882 | armv7pmu.event_map = &armv7_a8_perf_map; | ||
883 | armv7pmu.num_events = armv7_reset_read_pmnc(); | ||
884 | return &armv7pmu; | ||
885 | } | ||
886 | |||
887 | const struct arm_pmu *__init armv7_a9_pmu_init(void) | ||
888 | { | ||
889 | armv7pmu.id = ARM_PERF_PMU_ID_CA9; | ||
890 | armv7pmu.name = "ARMv7 Cortex-A9"; | ||
891 | armv7pmu.cache_map = &armv7_a9_perf_cache_map; | ||
892 | armv7pmu.event_map = &armv7_a9_perf_map; | ||
893 | armv7pmu.num_events = armv7_reset_read_pmnc(); | ||
894 | return &armv7pmu; | ||
895 | } | ||
896 | #else | ||
897 | const struct arm_pmu *__init armv7_a8_pmu_init(void) | ||
898 | { | ||
899 | return NULL; | ||
900 | } | ||
901 | |||
902 | const struct arm_pmu *__init armv7_a9_pmu_init(void) | ||
903 | { | ||
904 | return NULL; | ||
905 | } | ||
906 | #endif /* CONFIG_CPU_V7 */ | ||
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c new file mode 100644 index 000000000000..4e9592789d40 --- /dev/null +++ b/arch/arm/kernel/perf_event_xscale.c | |||
@@ -0,0 +1,807 @@ | |||
1 | /* | ||
2 | * ARMv5 [xscale] Performance counter handling code. | ||
3 | * | ||
4 | * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com> | ||
5 | * | ||
6 | * Based on the previous xscale OProfile code. | ||
7 | * | ||
8 | * There are two variants of the xscale PMU that we support: | ||
9 | * - xscale1pmu: 2 event counters and a cycle counter | ||
10 | * - xscale2pmu: 4 event counters and a cycle counter | ||
11 | * The two variants share event definitions, but have different | ||
12 | * PMU structures. | ||
13 | */ | ||
14 | |||
15 | #ifdef CONFIG_CPU_XSCALE | ||
16 | enum xscale_perf_types { | ||
17 | XSCALE_PERFCTR_ICACHE_MISS = 0x00, | ||
18 | XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, | ||
19 | XSCALE_PERFCTR_DATA_STALL = 0x02, | ||
20 | XSCALE_PERFCTR_ITLB_MISS = 0x03, | ||
21 | XSCALE_PERFCTR_DTLB_MISS = 0x04, | ||
22 | XSCALE_PERFCTR_BRANCH = 0x05, | ||
23 | XSCALE_PERFCTR_BRANCH_MISS = 0x06, | ||
24 | XSCALE_PERFCTR_INSTRUCTION = 0x07, | ||
25 | XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, | ||
26 | XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, | ||
27 | XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, | ||
28 | XSCALE_PERFCTR_DCACHE_MISS = 0x0B, | ||
29 | XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, | ||
30 | XSCALE_PERFCTR_PC_CHANGED = 0x0D, | ||
31 | XSCALE_PERFCTR_BCU_REQUEST = 0x10, | ||
32 | XSCALE_PERFCTR_BCU_FULL = 0x11, | ||
33 | XSCALE_PERFCTR_BCU_DRAIN = 0x12, | ||
34 | XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, | ||
35 | XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, | ||
36 | XSCALE_PERFCTR_RMW = 0x16, | ||
37 | /* XSCALE_PERFCTR_CCNT is not hardware defined */ | ||
38 | XSCALE_PERFCTR_CCNT = 0xFE, | ||
39 | XSCALE_PERFCTR_UNUSED = 0xFF, | ||
40 | }; | ||
41 | |||
42 | enum xscale_counters { | ||
43 | XSCALE_CYCLE_COUNTER = 1, | ||
44 | XSCALE_COUNTER0, | ||
45 | XSCALE_COUNTER1, | ||
46 | XSCALE_COUNTER2, | ||
47 | XSCALE_COUNTER3, | ||
48 | }; | ||
49 | |||
50 | static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { | ||
51 | [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, | ||
52 | [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, | ||
53 | [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, | ||
54 | [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, | ||
55 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, | ||
56 | [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, | ||
57 | [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, | ||
58 | }; | ||
59 | |||
60 | static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] | ||
61 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
62 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
63 | [C(L1D)] = { | ||
64 | [C(OP_READ)] = { | ||
65 | [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, | ||
66 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, | ||
67 | }, | ||
68 | [C(OP_WRITE)] = { | ||
69 | [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, | ||
70 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, | ||
71 | }, | ||
72 | [C(OP_PREFETCH)] = { | ||
73 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
74 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
75 | }, | ||
76 | }, | ||
77 | [C(L1I)] = { | ||
78 | [C(OP_READ)] = { | ||
79 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
80 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, | ||
81 | }, | ||
82 | [C(OP_WRITE)] = { | ||
83 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
84 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, | ||
85 | }, | ||
86 | [C(OP_PREFETCH)] = { | ||
87 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
88 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
89 | }, | ||
90 | }, | ||
91 | [C(LL)] = { | ||
92 | [C(OP_READ)] = { | ||
93 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
94 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
95 | }, | ||
96 | [C(OP_WRITE)] = { | ||
97 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
98 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
99 | }, | ||
100 | [C(OP_PREFETCH)] = { | ||
101 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
102 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
103 | }, | ||
104 | }, | ||
105 | [C(DTLB)] = { | ||
106 | [C(OP_READ)] = { | ||
107 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
108 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, | ||
109 | }, | ||
110 | [C(OP_WRITE)] = { | ||
111 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
112 | [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, | ||
113 | }, | ||
114 | [C(OP_PREFETCH)] = { | ||
115 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
116 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
117 | }, | ||
118 | }, | ||
119 | [C(ITLB)] = { | ||
120 | [C(OP_READ)] = { | ||
121 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
122 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, | ||
123 | }, | ||
124 | [C(OP_WRITE)] = { | ||
125 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
126 | [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, | ||
127 | }, | ||
128 | [C(OP_PREFETCH)] = { | ||
129 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
130 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
131 | }, | ||
132 | }, | ||
133 | [C(BPU)] = { | ||
134 | [C(OP_READ)] = { | ||
135 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
136 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
137 | }, | ||
138 | [C(OP_WRITE)] = { | ||
139 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
140 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
141 | }, | ||
142 | [C(OP_PREFETCH)] = { | ||
143 | [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, | ||
144 | [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, | ||
145 | }, | ||
146 | }, | ||
147 | }; | ||
148 | |||
149 | #define XSCALE_PMU_ENABLE 0x001 | ||
150 | #define XSCALE_PMN_RESET 0x002 | ||
151 | #define XSCALE_CCNT_RESET 0x004 | ||
152 | #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) | ||
153 | #define XSCALE_PMU_CNT64 0x008 | ||
154 | |||
155 | #define XSCALE1_OVERFLOWED_MASK 0x700 | ||
156 | #define XSCALE1_CCOUNT_OVERFLOW 0x400 | ||
157 | #define XSCALE1_COUNT0_OVERFLOW 0x100 | ||
158 | #define XSCALE1_COUNT1_OVERFLOW 0x200 | ||
159 | #define XSCALE1_CCOUNT_INT_EN 0x040 | ||
160 | #define XSCALE1_COUNT0_INT_EN 0x010 | ||
161 | #define XSCALE1_COUNT1_INT_EN 0x020 | ||
162 | #define XSCALE1_COUNT0_EVT_SHFT 12 | ||
163 | #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) | ||
164 | #define XSCALE1_COUNT1_EVT_SHFT 20 | ||
165 | #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) | ||
166 | |||
167 | static inline u32 | ||
168 | xscale1pmu_read_pmnc(void) | ||
169 | { | ||
170 | u32 val; | ||
171 | asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); | ||
172 | return val; | ||
173 | } | ||
174 | |||
175 | static inline void | ||
176 | xscale1pmu_write_pmnc(u32 val) | ||
177 | { | ||
178 | /* upper 4bits and 7, 11 are write-as-0 */ | ||
179 | val &= 0xffff77f; | ||
180 | asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); | ||
181 | } | ||
182 | |||
183 | static inline int | ||
184 | xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, | ||
185 | enum xscale_counters counter) | ||
186 | { | ||
187 | int ret = 0; | ||
188 | |||
189 | switch (counter) { | ||
190 | case XSCALE_CYCLE_COUNTER: | ||
191 | ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; | ||
192 | break; | ||
193 | case XSCALE_COUNTER0: | ||
194 | ret = pmnc & XSCALE1_COUNT0_OVERFLOW; | ||
195 | break; | ||
196 | case XSCALE_COUNTER1: | ||
197 | ret = pmnc & XSCALE1_COUNT1_OVERFLOW; | ||
198 | break; | ||
199 | default: | ||
200 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
201 | } | ||
202 | |||
203 | return ret; | ||
204 | } | ||
205 | |||
206 | static irqreturn_t | ||
207 | xscale1pmu_handle_irq(int irq_num, void *dev) | ||
208 | { | ||
209 | unsigned long pmnc; | ||
210 | struct perf_sample_data data; | ||
211 | struct cpu_hw_events *cpuc; | ||
212 | struct pt_regs *regs; | ||
213 | int idx; | ||
214 | |||
215 | /* | ||
216 | * NOTE: there's an A stepping erratum that states if an overflow | ||
217 | * bit already exists and another occurs, the previous | ||
218 | * Overflow bit gets cleared. There's no workaround. | ||
219 | * Fixed in B stepping or later. | ||
220 | */ | ||
221 | pmnc = xscale1pmu_read_pmnc(); | ||
222 | |||
223 | /* | ||
224 | * Write the value back to clear the overflow flags. Overflow | ||
225 | * flags remain in pmnc for use below. We also disable the PMU | ||
226 | * while we process the interrupt. | ||
227 | */ | ||
228 | xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); | ||
229 | |||
230 | if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) | ||
231 | return IRQ_NONE; | ||
232 | |||
233 | regs = get_irq_regs(); | ||
234 | |||
235 | perf_sample_data_init(&data, 0); | ||
236 | |||
237 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
238 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
239 | struct perf_event *event = cpuc->events[idx]; | ||
240 | struct hw_perf_event *hwc; | ||
241 | |||
242 | if (!test_bit(idx, cpuc->active_mask)) | ||
243 | continue; | ||
244 | |||
245 | if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) | ||
246 | continue; | ||
247 | |||
248 | hwc = &event->hw; | ||
249 | armpmu_event_update(event, hwc, idx); | ||
250 | data.period = event->hw.last_period; | ||
251 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
252 | continue; | ||
253 | |||
254 | if (perf_event_overflow(event, 0, &data, regs)) | ||
255 | armpmu->disable(hwc, idx); | ||
256 | } | ||
257 | |||
258 | irq_work_run(); | ||
259 | |||
260 | /* | ||
261 | * Re-enable the PMU. | ||
262 | */ | ||
263 | pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; | ||
264 | xscale1pmu_write_pmnc(pmnc); | ||
265 | |||
266 | return IRQ_HANDLED; | ||
267 | } | ||
268 | |||
269 | static void | ||
270 | xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
271 | { | ||
272 | unsigned long val, mask, evt, flags; | ||
273 | |||
274 | switch (idx) { | ||
275 | case XSCALE_CYCLE_COUNTER: | ||
276 | mask = 0; | ||
277 | evt = XSCALE1_CCOUNT_INT_EN; | ||
278 | break; | ||
279 | case XSCALE_COUNTER0: | ||
280 | mask = XSCALE1_COUNT0_EVT_MASK; | ||
281 | evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | | ||
282 | XSCALE1_COUNT0_INT_EN; | ||
283 | break; | ||
284 | case XSCALE_COUNTER1: | ||
285 | mask = XSCALE1_COUNT1_EVT_MASK; | ||
286 | evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | | ||
287 | XSCALE1_COUNT1_INT_EN; | ||
288 | break; | ||
289 | default: | ||
290 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | spin_lock_irqsave(&pmu_lock, flags); | ||
295 | val = xscale1pmu_read_pmnc(); | ||
296 | val &= ~mask; | ||
297 | val |= evt; | ||
298 | xscale1pmu_write_pmnc(val); | ||
299 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
300 | } | ||
301 | |||
302 | static void | ||
303 | xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
304 | { | ||
305 | unsigned long val, mask, evt, flags; | ||
306 | |||
307 | switch (idx) { | ||
308 | case XSCALE_CYCLE_COUNTER: | ||
309 | mask = XSCALE1_CCOUNT_INT_EN; | ||
310 | evt = 0; | ||
311 | break; | ||
312 | case XSCALE_COUNTER0: | ||
313 | mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; | ||
314 | evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; | ||
315 | break; | ||
316 | case XSCALE_COUNTER1: | ||
317 | mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; | ||
318 | evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; | ||
319 | break; | ||
320 | default: | ||
321 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
322 | return; | ||
323 | } | ||
324 | |||
325 | spin_lock_irqsave(&pmu_lock, flags); | ||
326 | val = xscale1pmu_read_pmnc(); | ||
327 | val &= ~mask; | ||
328 | val |= evt; | ||
329 | xscale1pmu_write_pmnc(val); | ||
330 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
331 | } | ||
332 | |||
333 | static int | ||
334 | xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
335 | struct hw_perf_event *event) | ||
336 | { | ||
337 | if (XSCALE_PERFCTR_CCNT == event->config_base) { | ||
338 | if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) | ||
339 | return -EAGAIN; | ||
340 | |||
341 | return XSCALE_CYCLE_COUNTER; | ||
342 | } else { | ||
343 | if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) | ||
344 | return XSCALE_COUNTER1; | ||
345 | |||
346 | if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) | ||
347 | return XSCALE_COUNTER0; | ||
348 | |||
349 | return -EAGAIN; | ||
350 | } | ||
351 | } | ||
352 | |||
353 | static void | ||
354 | xscale1pmu_start(void) | ||
355 | { | ||
356 | unsigned long flags, val; | ||
357 | |||
358 | spin_lock_irqsave(&pmu_lock, flags); | ||
359 | val = xscale1pmu_read_pmnc(); | ||
360 | val |= XSCALE_PMU_ENABLE; | ||
361 | xscale1pmu_write_pmnc(val); | ||
362 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
363 | } | ||
364 | |||
365 | static void | ||
366 | xscale1pmu_stop(void) | ||
367 | { | ||
368 | unsigned long flags, val; | ||
369 | |||
370 | spin_lock_irqsave(&pmu_lock, flags); | ||
371 | val = xscale1pmu_read_pmnc(); | ||
372 | val &= ~XSCALE_PMU_ENABLE; | ||
373 | xscale1pmu_write_pmnc(val); | ||
374 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
375 | } | ||
376 | |||
377 | static inline u32 | ||
378 | xscale1pmu_read_counter(int counter) | ||
379 | { | ||
380 | u32 val = 0; | ||
381 | |||
382 | switch (counter) { | ||
383 | case XSCALE_CYCLE_COUNTER: | ||
384 | asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); | ||
385 | break; | ||
386 | case XSCALE_COUNTER0: | ||
387 | asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); | ||
388 | break; | ||
389 | case XSCALE_COUNTER1: | ||
390 | asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); | ||
391 | break; | ||
392 | } | ||
393 | |||
394 | return val; | ||
395 | } | ||
396 | |||
397 | static inline void | ||
398 | xscale1pmu_write_counter(int counter, u32 val) | ||
399 | { | ||
400 | switch (counter) { | ||
401 | case XSCALE_CYCLE_COUNTER: | ||
402 | asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); | ||
403 | break; | ||
404 | case XSCALE_COUNTER0: | ||
405 | asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); | ||
406 | break; | ||
407 | case XSCALE_COUNTER1: | ||
408 | asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); | ||
409 | break; | ||
410 | } | ||
411 | } | ||
412 | |||
413 | static const struct arm_pmu xscale1pmu = { | ||
414 | .id = ARM_PERF_PMU_ID_XSCALE1, | ||
415 | .name = "xscale1", | ||
416 | .handle_irq = xscale1pmu_handle_irq, | ||
417 | .enable = xscale1pmu_enable_event, | ||
418 | .disable = xscale1pmu_disable_event, | ||
419 | .read_counter = xscale1pmu_read_counter, | ||
420 | .write_counter = xscale1pmu_write_counter, | ||
421 | .get_event_idx = xscale1pmu_get_event_idx, | ||
422 | .start = xscale1pmu_start, | ||
423 | .stop = xscale1pmu_stop, | ||
424 | .cache_map = &xscale_perf_cache_map, | ||
425 | .event_map = &xscale_perf_map, | ||
426 | .raw_event_mask = 0xFF, | ||
427 | .num_events = 3, | ||
428 | .max_period = (1LLU << 32) - 1, | ||
429 | }; | ||
430 | |||
431 | const struct arm_pmu *__init xscale1pmu_init(void) | ||
432 | { | ||
433 | return &xscale1pmu; | ||
434 | } | ||
435 | |||
436 | #define XSCALE2_OVERFLOWED_MASK 0x01f | ||
437 | #define XSCALE2_CCOUNT_OVERFLOW 0x001 | ||
438 | #define XSCALE2_COUNT0_OVERFLOW 0x002 | ||
439 | #define XSCALE2_COUNT1_OVERFLOW 0x004 | ||
440 | #define XSCALE2_COUNT2_OVERFLOW 0x008 | ||
441 | #define XSCALE2_COUNT3_OVERFLOW 0x010 | ||
442 | #define XSCALE2_CCOUNT_INT_EN 0x001 | ||
443 | #define XSCALE2_COUNT0_INT_EN 0x002 | ||
444 | #define XSCALE2_COUNT1_INT_EN 0x004 | ||
445 | #define XSCALE2_COUNT2_INT_EN 0x008 | ||
446 | #define XSCALE2_COUNT3_INT_EN 0x010 | ||
447 | #define XSCALE2_COUNT0_EVT_SHFT 0 | ||
448 | #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) | ||
449 | #define XSCALE2_COUNT1_EVT_SHFT 8 | ||
450 | #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) | ||
451 | #define XSCALE2_COUNT2_EVT_SHFT 16 | ||
452 | #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) | ||
453 | #define XSCALE2_COUNT3_EVT_SHFT 24 | ||
454 | #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) | ||
455 | |||
456 | static inline u32 | ||
457 | xscale2pmu_read_pmnc(void) | ||
458 | { | ||
459 | u32 val; | ||
460 | asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); | ||
461 | /* bits 1-2 and 4-23 are read-unpredictable */ | ||
462 | return val & 0xff000009; | ||
463 | } | ||
464 | |||
465 | static inline void | ||
466 | xscale2pmu_write_pmnc(u32 val) | ||
467 | { | ||
468 | /* bits 4-23 are write-as-0, 24-31 are write ignored */ | ||
469 | val &= 0xf; | ||
470 | asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); | ||
471 | } | ||
472 | |||
473 | static inline u32 | ||
474 | xscale2pmu_read_overflow_flags(void) | ||
475 | { | ||
476 | u32 val; | ||
477 | asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); | ||
478 | return val; | ||
479 | } | ||
480 | |||
481 | static inline void | ||
482 | xscale2pmu_write_overflow_flags(u32 val) | ||
483 | { | ||
484 | asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); | ||
485 | } | ||
486 | |||
487 | static inline u32 | ||
488 | xscale2pmu_read_event_select(void) | ||
489 | { | ||
490 | u32 val; | ||
491 | asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); | ||
492 | return val; | ||
493 | } | ||
494 | |||
495 | static inline void | ||
496 | xscale2pmu_write_event_select(u32 val) | ||
497 | { | ||
498 | asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); | ||
499 | } | ||
500 | |||
501 | static inline u32 | ||
502 | xscale2pmu_read_int_enable(void) | ||
503 | { | ||
504 | u32 val; | ||
505 | asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); | ||
506 | return val; | ||
507 | } | ||
508 | |||
509 | static void | ||
510 | xscale2pmu_write_int_enable(u32 val) | ||
511 | { | ||
512 | asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); | ||
513 | } | ||
514 | |||
515 | static inline int | ||
516 | xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, | ||
517 | enum xscale_counters counter) | ||
518 | { | ||
519 | int ret = 0; | ||
520 | |||
521 | switch (counter) { | ||
522 | case XSCALE_CYCLE_COUNTER: | ||
523 | ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; | ||
524 | break; | ||
525 | case XSCALE_COUNTER0: | ||
526 | ret = of_flags & XSCALE2_COUNT0_OVERFLOW; | ||
527 | break; | ||
528 | case XSCALE_COUNTER1: | ||
529 | ret = of_flags & XSCALE2_COUNT1_OVERFLOW; | ||
530 | break; | ||
531 | case XSCALE_COUNTER2: | ||
532 | ret = of_flags & XSCALE2_COUNT2_OVERFLOW; | ||
533 | break; | ||
534 | case XSCALE_COUNTER3: | ||
535 | ret = of_flags & XSCALE2_COUNT3_OVERFLOW; | ||
536 | break; | ||
537 | default: | ||
538 | WARN_ONCE(1, "invalid counter number (%d)\n", counter); | ||
539 | } | ||
540 | |||
541 | return ret; | ||
542 | } | ||
543 | |||
544 | static irqreturn_t | ||
545 | xscale2pmu_handle_irq(int irq_num, void *dev) | ||
546 | { | ||
547 | unsigned long pmnc, of_flags; | ||
548 | struct perf_sample_data data; | ||
549 | struct cpu_hw_events *cpuc; | ||
550 | struct pt_regs *regs; | ||
551 | int idx; | ||
552 | |||
553 | /* Disable the PMU. */ | ||
554 | pmnc = xscale2pmu_read_pmnc(); | ||
555 | xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); | ||
556 | |||
557 | /* Check the overflow flag register. */ | ||
558 | of_flags = xscale2pmu_read_overflow_flags(); | ||
559 | if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) | ||
560 | return IRQ_NONE; | ||
561 | |||
562 | /* Clear the overflow bits. */ | ||
563 | xscale2pmu_write_overflow_flags(of_flags); | ||
564 | |||
565 | regs = get_irq_regs(); | ||
566 | |||
567 | perf_sample_data_init(&data, 0); | ||
568 | |||
569 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
570 | for (idx = 0; idx <= armpmu->num_events; ++idx) { | ||
571 | struct perf_event *event = cpuc->events[idx]; | ||
572 | struct hw_perf_event *hwc; | ||
573 | |||
574 | if (!test_bit(idx, cpuc->active_mask)) | ||
575 | continue; | ||
576 | |||
577 | if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) | ||
578 | continue; | ||
579 | |||
580 | hwc = &event->hw; | ||
581 | armpmu_event_update(event, hwc, idx); | ||
582 | data.period = event->hw.last_period; | ||
583 | if (!armpmu_event_set_period(event, hwc, idx)) | ||
584 | continue; | ||
585 | |||
586 | if (perf_event_overflow(event, 0, &data, regs)) | ||
587 | armpmu->disable(hwc, idx); | ||
588 | } | ||
589 | |||
590 | irq_work_run(); | ||
591 | |||
592 | /* | ||
593 | * Re-enable the PMU. | ||
594 | */ | ||
595 | pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; | ||
596 | xscale2pmu_write_pmnc(pmnc); | ||
597 | |||
598 | return IRQ_HANDLED; | ||
599 | } | ||
600 | |||
601 | static void | ||
602 | xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
603 | { | ||
604 | unsigned long flags, ien, evtsel; | ||
605 | |||
606 | ien = xscale2pmu_read_int_enable(); | ||
607 | evtsel = xscale2pmu_read_event_select(); | ||
608 | |||
609 | switch (idx) { | ||
610 | case XSCALE_CYCLE_COUNTER: | ||
611 | ien |= XSCALE2_CCOUNT_INT_EN; | ||
612 | break; | ||
613 | case XSCALE_COUNTER0: | ||
614 | ien |= XSCALE2_COUNT0_INT_EN; | ||
615 | evtsel &= ~XSCALE2_COUNT0_EVT_MASK; | ||
616 | evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; | ||
617 | break; | ||
618 | case XSCALE_COUNTER1: | ||
619 | ien |= XSCALE2_COUNT1_INT_EN; | ||
620 | evtsel &= ~XSCALE2_COUNT1_EVT_MASK; | ||
621 | evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; | ||
622 | break; | ||
623 | case XSCALE_COUNTER2: | ||
624 | ien |= XSCALE2_COUNT2_INT_EN; | ||
625 | evtsel &= ~XSCALE2_COUNT2_EVT_MASK; | ||
626 | evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; | ||
627 | break; | ||
628 | case XSCALE_COUNTER3: | ||
629 | ien |= XSCALE2_COUNT3_INT_EN; | ||
630 | evtsel &= ~XSCALE2_COUNT3_EVT_MASK; | ||
631 | evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; | ||
632 | break; | ||
633 | default: | ||
634 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
635 | return; | ||
636 | } | ||
637 | |||
638 | spin_lock_irqsave(&pmu_lock, flags); | ||
639 | xscale2pmu_write_event_select(evtsel); | ||
640 | xscale2pmu_write_int_enable(ien); | ||
641 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
642 | } | ||
643 | |||
644 | static void | ||
645 | xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
646 | { | ||
647 | unsigned long flags, ien, evtsel; | ||
648 | |||
649 | ien = xscale2pmu_read_int_enable(); | ||
650 | evtsel = xscale2pmu_read_event_select(); | ||
651 | |||
652 | switch (idx) { | ||
653 | case XSCALE_CYCLE_COUNTER: | ||
654 | ien &= ~XSCALE2_CCOUNT_INT_EN; | ||
655 | break; | ||
656 | case XSCALE_COUNTER0: | ||
657 | ien &= ~XSCALE2_COUNT0_INT_EN; | ||
658 | evtsel &= ~XSCALE2_COUNT0_EVT_MASK; | ||
659 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; | ||
660 | break; | ||
661 | case XSCALE_COUNTER1: | ||
662 | ien &= ~XSCALE2_COUNT1_INT_EN; | ||
663 | evtsel &= ~XSCALE2_COUNT1_EVT_MASK; | ||
664 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; | ||
665 | break; | ||
666 | case XSCALE_COUNTER2: | ||
667 | ien &= ~XSCALE2_COUNT2_INT_EN; | ||
668 | evtsel &= ~XSCALE2_COUNT2_EVT_MASK; | ||
669 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; | ||
670 | break; | ||
671 | case XSCALE_COUNTER3: | ||
672 | ien &= ~XSCALE2_COUNT3_INT_EN; | ||
673 | evtsel &= ~XSCALE2_COUNT3_EVT_MASK; | ||
674 | evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; | ||
675 | break; | ||
676 | default: | ||
677 | WARN_ONCE(1, "invalid counter number (%d)\n", idx); | ||
678 | return; | ||
679 | } | ||
680 | |||
681 | spin_lock_irqsave(&pmu_lock, flags); | ||
682 | xscale2pmu_write_event_select(evtsel); | ||
683 | xscale2pmu_write_int_enable(ien); | ||
684 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
685 | } | ||
686 | |||
687 | static int | ||
688 | xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, | ||
689 | struct hw_perf_event *event) | ||
690 | { | ||
691 | int idx = xscale1pmu_get_event_idx(cpuc, event); | ||
692 | if (idx >= 0) | ||
693 | goto out; | ||
694 | |||
695 | if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) | ||
696 | idx = XSCALE_COUNTER3; | ||
697 | else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) | ||
698 | idx = XSCALE_COUNTER2; | ||
699 | out: | ||
700 | return idx; | ||
701 | } | ||
702 | |||
703 | static void | ||
704 | xscale2pmu_start(void) | ||
705 | { | ||
706 | unsigned long flags, val; | ||
707 | |||
708 | spin_lock_irqsave(&pmu_lock, flags); | ||
709 | val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; | ||
710 | val |= XSCALE_PMU_ENABLE; | ||
711 | xscale2pmu_write_pmnc(val); | ||
712 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
713 | } | ||
714 | |||
715 | static void | ||
716 | xscale2pmu_stop(void) | ||
717 | { | ||
718 | unsigned long flags, val; | ||
719 | |||
720 | spin_lock_irqsave(&pmu_lock, flags); | ||
721 | val = xscale2pmu_read_pmnc(); | ||
722 | val &= ~XSCALE_PMU_ENABLE; | ||
723 | xscale2pmu_write_pmnc(val); | ||
724 | spin_unlock_irqrestore(&pmu_lock, flags); | ||
725 | } | ||
726 | |||
727 | static inline u32 | ||
728 | xscale2pmu_read_counter(int counter) | ||
729 | { | ||
730 | u32 val = 0; | ||
731 | |||
732 | switch (counter) { | ||
733 | case XSCALE_CYCLE_COUNTER: | ||
734 | asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); | ||
735 | break; | ||
736 | case XSCALE_COUNTER0: | ||
737 | asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); | ||
738 | break; | ||
739 | case XSCALE_COUNTER1: | ||
740 | asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); | ||
741 | break; | ||
742 | case XSCALE_COUNTER2: | ||
743 | asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); | ||
744 | break; | ||
745 | case XSCALE_COUNTER3: | ||
746 | asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); | ||
747 | break; | ||
748 | } | ||
749 | |||
750 | return val; | ||
751 | } | ||
752 | |||
753 | static inline void | ||
754 | xscale2pmu_write_counter(int counter, u32 val) | ||
755 | { | ||
756 | switch (counter) { | ||
757 | case XSCALE_CYCLE_COUNTER: | ||
758 | asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); | ||
759 | break; | ||
760 | case XSCALE_COUNTER0: | ||
761 | asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); | ||
762 | break; | ||
763 | case XSCALE_COUNTER1: | ||
764 | asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); | ||
765 | break; | ||
766 | case XSCALE_COUNTER2: | ||
767 | asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); | ||
768 | break; | ||
769 | case XSCALE_COUNTER3: | ||
770 | asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); | ||
771 | break; | ||
772 | } | ||
773 | } | ||
774 | |||
775 | static const struct arm_pmu xscale2pmu = { | ||
776 | .id = ARM_PERF_PMU_ID_XSCALE2, | ||
777 | .name = "xscale2", | ||
778 | .handle_irq = xscale2pmu_handle_irq, | ||
779 | .enable = xscale2pmu_enable_event, | ||
780 | .disable = xscale2pmu_disable_event, | ||
781 | .read_counter = xscale2pmu_read_counter, | ||
782 | .write_counter = xscale2pmu_write_counter, | ||
783 | .get_event_idx = xscale2pmu_get_event_idx, | ||
784 | .start = xscale2pmu_start, | ||
785 | .stop = xscale2pmu_stop, | ||
786 | .cache_map = &xscale_perf_cache_map, | ||
787 | .event_map = &xscale_perf_map, | ||
788 | .raw_event_mask = 0xFF, | ||
789 | .num_events = 5, | ||
790 | .max_period = (1LLU << 32) - 1, | ||
791 | }; | ||
792 | |||
793 | const struct arm_pmu *__init xscale2pmu_init(void) | ||
794 | { | ||
795 | return &xscale2pmu; | ||
796 | } | ||
797 | #else | ||
798 | const struct arm_pmu *__init xscale1pmu_init(void) | ||
799 | { | ||
800 | return NULL; | ||
801 | } | ||
802 | |||
803 | const struct arm_pmu *__init xscale2pmu_init(void) | ||
804 | { | ||
805 | return NULL; | ||
806 | } | ||
807 | #endif /* CONFIG_CPU_XSCALE */ | ||
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 1e4cbd4e7be9..64f6bc1a9132 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S | |||
@@ -174,8 +174,8 @@ ENDPROC(_find_next_bit_be) | |||
174 | */ | 174 | */ |
175 | .L_found: | 175 | .L_found: |
176 | #if __LINUX_ARM_ARCH__ >= 5 | 176 | #if __LINUX_ARM_ARCH__ >= 5 |
177 | rsb r1, r3, #0 | 177 | rsb r0, r3, #0 |
178 | and r3, r3, r1 | 178 | and r3, r3, r0 |
179 | clz r3, r3 | 179 | clz r3, r3 |
180 | rsb r3, r3, #31 | 180 | rsb r3, r3, #31 |
181 | add r0, r2, r3 | 181 | add r0, r2, r3 |
@@ -190,5 +190,7 @@ ENDPROC(_find_next_bit_be) | |||
190 | addeq r2, r2, #1 | 190 | addeq r2, r2, #1 |
191 | mov r0, r2 | 191 | mov r0, r2 |
192 | #endif | 192 | #endif |
193 | cmp r1, r0 @ Clamp to maxbit | ||
194 | movlo r0, r1 | ||
193 | mov pc, lr | 195 | mov pc, lr |
194 | 196 | ||
diff --git a/arch/arm/mach-aaec2000/include/mach/vmalloc.h b/arch/arm/mach-aaec2000/include/mach/vmalloc.h index cff4e0a996ce..a6299e8321bd 100644 --- a/arch/arm/mach-aaec2000/include/mach/vmalloc.h +++ b/arch/arm/mach-aaec2000/include/mach/vmalloc.h | |||
@@ -11,6 +11,6 @@ | |||
11 | #ifndef __ASM_ARCH_VMALLOC_H | 11 | #ifndef __ASM_ARCH_VMALLOC_H |
12 | #define __ASM_ARCH_VMALLOC_H | 12 | #define __ASM_ARCH_VMALLOC_H |
13 | 13 | ||
14 | #define VMALLOC_END 0xd0000000 | 14 | #define VMALLOC_END 0xd0000000UL |
15 | 15 | ||
16 | #endif /* __ASM_ARCH_VMALLOC_H */ | 16 | #endif /* __ASM_ARCH_VMALLOC_H */ |
diff --git a/arch/arm/mach-bcmring/include/mach/vmalloc.h b/arch/arm/mach-bcmring/include/mach/vmalloc.h index 3db3a09fd398..7397bd7817d9 100644 --- a/arch/arm/mach-bcmring/include/mach/vmalloc.h +++ b/arch/arm/mach-bcmring/include/mach/vmalloc.h | |||
@@ -22,4 +22,4 @@ | |||
22 | * 0xe0000000 to 0xefffffff. This gives us 256 MB of vm space and handles | 22 | * 0xe0000000 to 0xefffffff. This gives us 256 MB of vm space and handles |
23 | * larger physical memory designs better. | 23 | * larger physical memory designs better. |
24 | */ | 24 | */ |
25 | #define VMALLOC_END 0xf0000000 | 25 | #define VMALLOC_END 0xf0000000UL |
diff --git a/arch/arm/mach-clps711x/include/mach/vmalloc.h b/arch/arm/mach-clps711x/include/mach/vmalloc.h index 30b3a287ed88..467b96137e47 100644 --- a/arch/arm/mach-clps711x/include/mach/vmalloc.h +++ b/arch/arm/mach-clps711x/include/mach/vmalloc.h | |||
@@ -17,4 +17,4 @@ | |||
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 | */ | 19 | */ |
20 | #define VMALLOC_END 0xd0000000 | 20 | #define VMALLOC_END 0xd0000000UL |
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 9be261beae7d..2652af124acd 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c | |||
@@ -359,8 +359,8 @@ static struct clk_lookup dm355_clks[] = { | |||
359 | CLK(NULL, "uart1", &uart1_clk), | 359 | CLK(NULL, "uart1", &uart1_clk), |
360 | CLK(NULL, "uart2", &uart2_clk), | 360 | CLK(NULL, "uart2", &uart2_clk), |
361 | CLK("i2c_davinci.1", NULL, &i2c_clk), | 361 | CLK("i2c_davinci.1", NULL, &i2c_clk), |
362 | CLK("davinci-asp.0", NULL, &asp0_clk), | 362 | CLK("davinci-mcbsp.0", NULL, &asp0_clk), |
363 | CLK("davinci-asp.1", NULL, &asp1_clk), | 363 | CLK("davinci-mcbsp.1", NULL, &asp1_clk), |
364 | CLK("davinci_mmc.0", NULL, &mmcsd0_clk), | 364 | CLK("davinci_mmc.0", NULL, &mmcsd0_clk), |
365 | CLK("davinci_mmc.1", NULL, &mmcsd1_clk), | 365 | CLK("davinci_mmc.1", NULL, &mmcsd1_clk), |
366 | CLK("spi_davinci.0", NULL, &spi0_clk), | 366 | CLK("spi_davinci.0", NULL, &spi0_clk), |
@@ -664,7 +664,7 @@ static struct resource dm355_asp1_resources[] = { | |||
664 | }; | 664 | }; |
665 | 665 | ||
666 | static struct platform_device dm355_asp1_device = { | 666 | static struct platform_device dm355_asp1_device = { |
667 | .name = "davinci-asp", | 667 | .name = "davinci-mcbsp", |
668 | .id = 1, | 668 | .id = 1, |
669 | .num_resources = ARRAY_SIZE(dm355_asp1_resources), | 669 | .num_resources = ARRAY_SIZE(dm355_asp1_resources), |
670 | .resource = dm355_asp1_resources, | 670 | .resource = dm355_asp1_resources, |
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index a12065e87266..c466d710d3c1 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c | |||
@@ -459,7 +459,7 @@ static struct clk_lookup dm365_clks[] = { | |||
459 | CLK(NULL, "usb", &usb_clk), | 459 | CLK(NULL, "usb", &usb_clk), |
460 | CLK("davinci_emac.1", NULL, &emac_clk), | 460 | CLK("davinci_emac.1", NULL, &emac_clk), |
461 | CLK("davinci_voicecodec", NULL, &voicecodec_clk), | 461 | CLK("davinci_voicecodec", NULL, &voicecodec_clk), |
462 | CLK("davinci-asp.0", NULL, &asp0_clk), | 462 | CLK("davinci-mcbsp", NULL, &asp0_clk), |
463 | CLK(NULL, "rto", &rto_clk), | 463 | CLK(NULL, "rto", &rto_clk), |
464 | CLK(NULL, "mjcp", &mjcp_clk), | 464 | CLK(NULL, "mjcp", &mjcp_clk), |
465 | CLK(NULL, NULL, NULL), | 465 | CLK(NULL, NULL, NULL), |
@@ -922,8 +922,8 @@ static struct resource dm365_asp_resources[] = { | |||
922 | }; | 922 | }; |
923 | 923 | ||
924 | static struct platform_device dm365_asp_device = { | 924 | static struct platform_device dm365_asp_device = { |
925 | .name = "davinci-asp", | 925 | .name = "davinci-mcbsp", |
926 | .id = 0, | 926 | .id = -1, |
927 | .num_resources = ARRAY_SIZE(dm365_asp_resources), | 927 | .num_resources = ARRAY_SIZE(dm365_asp_resources), |
928 | .resource = dm365_asp_resources, | 928 | .resource = dm365_asp_resources, |
929 | }; | 929 | }; |
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index 0608dd776a16..9a2376b3137c 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c | |||
@@ -302,7 +302,7 @@ static struct clk_lookup dm644x_clks[] = { | |||
302 | CLK("davinci_emac.1", NULL, &emac_clk), | 302 | CLK("davinci_emac.1", NULL, &emac_clk), |
303 | CLK("i2c_davinci.1", NULL, &i2c_clk), | 303 | CLK("i2c_davinci.1", NULL, &i2c_clk), |
304 | CLK("palm_bk3710", NULL, &ide_clk), | 304 | CLK("palm_bk3710", NULL, &ide_clk), |
305 | CLK("davinci-asp", NULL, &asp_clk), | 305 | CLK("davinci-mcbsp", NULL, &asp_clk), |
306 | CLK("davinci_mmc.0", NULL, &mmcsd_clk), | 306 | CLK("davinci_mmc.0", NULL, &mmcsd_clk), |
307 | CLK(NULL, "spi", &spi_clk), | 307 | CLK(NULL, "spi", &spi_clk), |
308 | CLK(NULL, "gpio", &gpio_clk), | 308 | CLK(NULL, "gpio", &gpio_clk), |
@@ -580,7 +580,7 @@ static struct resource dm644x_asp_resources[] = { | |||
580 | }; | 580 | }; |
581 | 581 | ||
582 | static struct platform_device dm644x_asp_device = { | 582 | static struct platform_device dm644x_asp_device = { |
583 | .name = "davinci-asp", | 583 | .name = "davinci-mcbsp", |
584 | .id = -1, | 584 | .id = -1, |
585 | .num_resources = ARRAY_SIZE(dm644x_asp_resources), | 585 | .num_resources = ARRAY_SIZE(dm644x_asp_resources), |
586 | .resource = dm644x_asp_resources, | 586 | .resource = dm644x_asp_resources, |
diff --git a/arch/arm/mach-ebsa110/include/mach/vmalloc.h b/arch/arm/mach-ebsa110/include/mach/vmalloc.h index 60bde56fba4c..ea141b7a3e03 100644 --- a/arch/arm/mach-ebsa110/include/mach/vmalloc.h +++ b/arch/arm/mach-ebsa110/include/mach/vmalloc.h | |||
@@ -7,4 +7,4 @@ | |||
7 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | #define VMALLOC_END 0xdf000000 | 10 | #define VMALLOC_END 0xdf000000UL |
diff --git a/arch/arm/mach-footbridge/include/mach/vmalloc.h b/arch/arm/mach-footbridge/include/mach/vmalloc.h index 0ffbb7c85e59..40ba78e5782b 100644 --- a/arch/arm/mach-footbridge/include/mach/vmalloc.h +++ b/arch/arm/mach-footbridge/include/mach/vmalloc.h | |||
@@ -7,4 +7,4 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | 9 | ||
10 | #define VMALLOC_END 0xf0000000 | 10 | #define VMALLOC_END 0xf0000000UL |
diff --git a/arch/arm/mach-h720x/include/mach/vmalloc.h b/arch/arm/mach-h720x/include/mach/vmalloc.h index a45915b88756..8520b4a4d4e6 100644 --- a/arch/arm/mach-h720x/include/mach/vmalloc.h +++ b/arch/arm/mach-h720x/include/mach/vmalloc.h | |||
@@ -5,6 +5,6 @@ | |||
5 | #ifndef __ARCH_ARM_VMALLOC_H | 5 | #ifndef __ARCH_ARM_VMALLOC_H |
6 | #define __ARCH_ARM_VMALLOC_H | 6 | #define __ARCH_ARM_VMALLOC_H |
7 | 7 | ||
8 | #define VMALLOC_END 0xd0000000 | 8 | #define VMALLOC_END 0xd0000000UL |
9 | 9 | ||
10 | #endif | 10 | #endif |
diff --git a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c index 026263c665ca..7e1e9dc2c8fc 100644 --- a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c +++ b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c | |||
@@ -250,9 +250,6 @@ static const struct imxuart_platform_data uart_pdata __initconst = { | |||
250 | .flags = IMXUART_HAVE_RTSCTS, | 250 | .flags = IMXUART_HAVE_RTSCTS, |
251 | }; | 251 | }; |
252 | 252 | ||
253 | #if defined(CONFIG_TOUCHSCREEN_ADS7846) \ | ||
254 | || defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) | ||
255 | |||
256 | #define ADS7846_PENDOWN (GPIO_PORTD | 25) | 253 | #define ADS7846_PENDOWN (GPIO_PORTD | 25) |
257 | 254 | ||
258 | static void ads7846_dev_init(void) | 255 | static void ads7846_dev_init(void) |
@@ -273,9 +270,7 @@ static struct ads7846_platform_data ads7846_config __initdata = { | |||
273 | .get_pendown_state = ads7846_get_pendown_state, | 270 | .get_pendown_state = ads7846_get_pendown_state, |
274 | .keep_vref_on = 1, | 271 | .keep_vref_on = 1, |
275 | }; | 272 | }; |
276 | #endif | ||
277 | 273 | ||
278 | #if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE) | ||
279 | static struct spi_board_info eukrea_mbimx27_spi_board_info[] __initdata = { | 274 | static struct spi_board_info eukrea_mbimx27_spi_board_info[] __initdata = { |
280 | [0] = { | 275 | [0] = { |
281 | .modalias = "ads7846", | 276 | .modalias = "ads7846", |
@@ -294,7 +289,6 @@ static const struct spi_imx_master eukrea_mbimx27_spi0_data __initconst = { | |||
294 | .chipselect = eukrea_mbimx27_spi_cs, | 289 | .chipselect = eukrea_mbimx27_spi_cs, |
295 | .num_chipselect = ARRAY_SIZE(eukrea_mbimx27_spi_cs), | 290 | .num_chipselect = ARRAY_SIZE(eukrea_mbimx27_spi_cs), |
296 | }; | 291 | }; |
297 | #endif | ||
298 | 292 | ||
299 | static struct i2c_board_info eukrea_mbimx27_i2c_devices[] = { | 293 | static struct i2c_board_info eukrea_mbimx27_i2c_devices[] = { |
300 | { | 294 | { |
diff --git a/arch/arm/mach-integrator/include/mach/vmalloc.h b/arch/arm/mach-integrator/include/mach/vmalloc.h index e056e7cf5645..2f5a2bafb11f 100644 --- a/arch/arm/mach-integrator/include/mach/vmalloc.h +++ b/arch/arm/mach-integrator/include/mach/vmalloc.h | |||
@@ -17,4 +17,4 @@ | |||
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 | */ | 19 | */ |
20 | #define VMALLOC_END 0xd0000000 | 20 | #define VMALLOC_END 0xd0000000UL |
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig index dbbcfeb919db..31e5fd63ec9a 100644 --- a/arch/arm/mach-msm/Kconfig +++ b/arch/arm/mach-msm/Kconfig | |||
@@ -49,6 +49,8 @@ endchoice | |||
49 | 49 | ||
50 | config MSM_SOC_REV_A | 50 | config MSM_SOC_REV_A |
51 | bool | 51 | bool |
52 | config ARCH_MSM_SCORPIONMP | ||
53 | bool | ||
52 | 54 | ||
53 | config ARCH_MSM_ARM11 | 55 | config ARCH_MSM_ARM11 |
54 | bool | 56 | bool |
diff --git a/arch/arm/mach-msm/include/mach/vmalloc.h b/arch/arm/mach-msm/include/mach/vmalloc.h index 31a32ad062dc..d138448eff16 100644 --- a/arch/arm/mach-msm/include/mach/vmalloc.h +++ b/arch/arm/mach-msm/include/mach/vmalloc.h | |||
@@ -16,7 +16,7 @@ | |||
16 | #ifndef __ASM_ARCH_MSM_VMALLOC_H | 16 | #ifndef __ASM_ARCH_MSM_VMALLOC_H |
17 | #define __ASM_ARCH_MSM_VMALLOC_H | 17 | #define __ASM_ARCH_MSM_VMALLOC_H |
18 | 18 | ||
19 | #define VMALLOC_END 0xd0000000 | 19 | #define VMALLOC_END 0xd0000000UL |
20 | 20 | ||
21 | #endif | 21 | #endif |
22 | 22 | ||
diff --git a/arch/arm/mach-mx25/devices-imx25.h b/arch/arm/mach-mx25/devices-imx25.h index 93afa10b13cf..d94d282fa676 100644 --- a/arch/arm/mach-mx25/devices-imx25.h +++ b/arch/arm/mach-mx25/devices-imx25.h | |||
@@ -42,9 +42,9 @@ extern const struct imx_mxc_nand_data imx25_mxc_nand_data __initconst; | |||
42 | #define imx25_add_mxc_nand(pdata) \ | 42 | #define imx25_add_mxc_nand(pdata) \ |
43 | imx_add_mxc_nand(&imx25_mxc_nand_data, pdata) | 43 | imx_add_mxc_nand(&imx25_mxc_nand_data, pdata) |
44 | 44 | ||
45 | extern const struct imx_spi_imx_data imx25_spi_imx_data[] __initconst; | 45 | extern const struct imx_spi_imx_data imx25_cspi_data[] __initconst; |
46 | #define imx25_add_spi_imx(id, pdata) \ | 46 | #define imx25_add_spi_imx(id, pdata) \ |
47 | imx_add_spi_imx(&imx25_spi_imx_data[id], pdata) | 47 | imx_add_spi_imx(&imx25_cspi_data[id], pdata) |
48 | #define imx25_add_spi_imx0(pdata) imx25_add_spi_imx(0, pdata) | 48 | #define imx25_add_spi_imx0(pdata) imx25_add_spi_imx(0, pdata) |
49 | #define imx25_add_spi_imx1(pdata) imx25_add_spi_imx(1, pdata) | 49 | #define imx25_add_spi_imx1(pdata) imx25_add_spi_imx(1, pdata) |
50 | #define imx25_add_spi_imx2(pdata) imx25_add_spi_imx(2, pdata) | 50 | #define imx25_add_spi_imx2(pdata) imx25_add_spi_imx(2, pdata) |
diff --git a/arch/arm/mach-mx3/mach-pcm037_eet.c b/arch/arm/mach-mx3/mach-pcm037_eet.c index 99e0894e07db..fda56545d2fd 100644 --- a/arch/arm/mach-mx3/mach-pcm037_eet.c +++ b/arch/arm/mach-mx3/mach-pcm037_eet.c | |||
@@ -14,6 +14,7 @@ | |||
14 | 14 | ||
15 | #include <mach/common.h> | 15 | #include <mach/common.h> |
16 | #include <mach/iomux-mx3.h> | 16 | #include <mach/iomux-mx3.h> |
17 | #include <mach/spi.h> | ||
17 | 18 | ||
18 | #include <asm/mach-types.h> | 19 | #include <asm/mach-types.h> |
19 | 20 | ||
@@ -59,14 +60,12 @@ static struct spi_board_info pcm037_spi_dev[] = { | |||
59 | }; | 60 | }; |
60 | 61 | ||
61 | /* Platform Data for MXC CSPI */ | 62 | /* Platform Data for MXC CSPI */ |
62 | #if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE) | ||
63 | static int pcm037_spi1_cs[] = {MXC_SPI_CS(1), IOMUX_TO_GPIO(MX31_PIN_KEY_COL7)}; | 63 | static int pcm037_spi1_cs[] = {MXC_SPI_CS(1), IOMUX_TO_GPIO(MX31_PIN_KEY_COL7)}; |
64 | 64 | ||
65 | static const struct spi_imx_master pcm037_spi1_pdata __initconst = { | 65 | static const struct spi_imx_master pcm037_spi1_pdata __initconst = { |
66 | .chipselect = pcm037_spi1_cs, | 66 | .chipselect = pcm037_spi1_cs, |
67 | .num_chipselect = ARRAY_SIZE(pcm037_spi1_cs), | 67 | .num_chipselect = ARRAY_SIZE(pcm037_spi1_cs), |
68 | }; | 68 | }; |
69 | #endif | ||
70 | 69 | ||
71 | /* GPIO-keys input device */ | 70 | /* GPIO-keys input device */ |
72 | static struct gpio_keys_button pcm037_gpio_keys[] = { | 71 | static struct gpio_keys_button pcm037_gpio_keys[] = { |
@@ -171,7 +170,7 @@ static struct platform_device pcm037_gpio_keys_device = { | |||
171 | }, | 170 | }, |
172 | }; | 171 | }; |
173 | 172 | ||
174 | static int eet_init_devices(void) | 173 | static int __init eet_init_devices(void) |
175 | { | 174 | { |
176 | if (!machine_is_pcm037() || pcm037_variant() != PCM037_EET) | 175 | if (!machine_is_pcm037() || pcm037_variant() != PCM037_EET) |
177 | return 0; | 176 | return 0; |
diff --git a/arch/arm/mach-netx/include/mach/vmalloc.h b/arch/arm/mach-netx/include/mach/vmalloc.h index 7cca3574308f..871f1ef7bff5 100644 --- a/arch/arm/mach-netx/include/mach/vmalloc.h +++ b/arch/arm/mach-netx/include/mach/vmalloc.h | |||
@@ -16,4 +16,4 @@ | |||
16 | * along with this program; if not, write to the Free Software | 16 | * along with this program; if not, write to the Free Software |
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | #define VMALLOC_END 0xd0000000 | 19 | #define VMALLOC_END 0xd0000000UL |
diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c index ea0d80a89da7..e7f9ee63dce5 100644 --- a/arch/arm/mach-omap1/devices.c +++ b/arch/arm/mach-omap1/devices.c | |||
@@ -321,10 +321,9 @@ static struct platform_device omap_wdt_device = { | |||
321 | static int __init omap_init_wdt(void) | 321 | static int __init omap_init_wdt(void) |
322 | { | 322 | { |
323 | if (!cpu_is_omap16xx()) | 323 | if (!cpu_is_omap16xx()) |
324 | return; | 324 | return -ENODEV; |
325 | 325 | ||
326 | platform_device_register(&omap_wdt_device); | 326 | return platform_device_register(&omap_wdt_device); |
327 | return 0; | ||
328 | } | 327 | } |
329 | subsys_initcall(omap_init_wdt); | 328 | subsys_initcall(omap_init_wdt); |
330 | #endif | 329 | #endif |
diff --git a/arch/arm/mach-omap1/include/mach/camera.h b/arch/arm/mach-omap1/include/mach/camera.h index fd54b452eb22..847d00f0bb0a 100644 --- a/arch/arm/mach-omap1/include/mach/camera.h +++ b/arch/arm/mach-omap1/include/mach/camera.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __ASM_ARCH_CAMERA_H_ | 1 | #ifndef __ASM_ARCH_CAMERA_H_ |
2 | #define __ASM_ARCH_CAMERA_H_ | 2 | #define __ASM_ARCH_CAMERA_H_ |
3 | 3 | ||
4 | #include <media/omap1_camera.h> | ||
5 | |||
4 | void omap1_camera_init(void *); | 6 | void omap1_camera_init(void *); |
5 | 7 | ||
6 | static inline void omap1_set_camera_info(struct omap1_cam_platform_data *info) | 8 | static inline void omap1_set_camera_info(struct omap1_cam_platform_data *info) |
diff --git a/arch/arm/mach-omap1/include/mach/vmalloc.h b/arch/arm/mach-omap1/include/mach/vmalloc.h index b001f67d695b..22ec4a479577 100644 --- a/arch/arm/mach-omap1/include/mach/vmalloc.h +++ b/arch/arm/mach-omap1/include/mach/vmalloc.h | |||
@@ -17,4 +17,4 @@ | |||
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 | */ | 19 | */ |
20 | #define VMALLOC_END 0xd8000000 | 20 | #define VMALLOC_END 0xd8000000UL |
diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c index 067f4379c87f..53ac762518bd 100644 --- a/arch/arm/mach-omap2/board-devkit8000.c +++ b/arch/arm/mach-omap2/board-devkit8000.c | |||
@@ -242,9 +242,6 @@ static int devkit8000_twl_gpio_setup(struct device *dev, | |||
242 | mmc[0].gpio_cd = gpio + 0; | 242 | mmc[0].gpio_cd = gpio + 0; |
243 | omap2_hsmmc_init(mmc); | 243 | omap2_hsmmc_init(mmc); |
244 | 244 | ||
245 | /* link regulators to MMC adapters */ | ||
246 | devkit8000_vmmc1_supply.dev = mmc[0].dev; | ||
247 | |||
248 | /* TWL4030_GPIO_MAX + 1 == ledB, PMU_STAT (out, active low LED) */ | 245 | /* TWL4030_GPIO_MAX + 1 == ledB, PMU_STAT (out, active low LED) */ |
249 | gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1; | 246 | gpio_leds[2].gpio = gpio + TWL4030_GPIO_MAX + 1; |
250 | 247 | ||
diff --git a/arch/arm/mach-omap2/include/mach/vmalloc.h b/arch/arm/mach-omap2/include/mach/vmalloc.h index 4da31e997efe..866319947760 100644 --- a/arch/arm/mach-omap2/include/mach/vmalloc.h +++ b/arch/arm/mach-omap2/include/mach/vmalloc.h | |||
@@ -17,4 +17,4 @@ | |||
17 | * along with this program; if not, write to the Free Software | 17 | * along with this program; if not, write to the Free Software |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 | */ | 19 | */ |
20 | #define VMALLOC_END 0xf8000000 | 20 | #define VMALLOC_END 0xf8000000UL |
diff --git a/arch/arm/mach-pnx4008/include/mach/vmalloc.h b/arch/arm/mach-pnx4008/include/mach/vmalloc.h index 31b65ee07b0b..184913c71141 100644 --- a/arch/arm/mach-pnx4008/include/mach/vmalloc.h +++ b/arch/arm/mach-pnx4008/include/mach/vmalloc.h | |||
@@ -17,4 +17,4 @@ | |||
17 | * The vmalloc() routines leaves a hole of 4kB between each vmalloced | 17 | * The vmalloc() routines leaves a hole of 4kB between each vmalloced |
18 | * area for the same reason. ;) | 18 | * area for the same reason. ;) |
19 | */ | 19 | */ |
20 | #define VMALLOC_END 0xd0000000 | 20 | #define VMALLOC_END 0xd0000000UL |
diff --git a/arch/arm/mach-rpc/include/mach/vmalloc.h b/arch/arm/mach-rpc/include/mach/vmalloc.h index 3bcd86fadb81..fb700228637a 100644 --- a/arch/arm/mach-rpc/include/mach/vmalloc.h +++ b/arch/arm/mach-rpc/include/mach/vmalloc.h | |||
@@ -7,4 +7,4 @@ | |||
7 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
8 | * published by the Free Software Foundation. | 8 | * published by the Free Software Foundation. |
9 | */ | 9 | */ |
10 | #define VMALLOC_END 0xdc000000 | 10 | #define VMALLOC_END 0xdc000000UL |
diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig index 1ca7bdc6485c..579d2f0f4dd0 100644 --- a/arch/arm/mach-s3c64xx/Kconfig +++ b/arch/arm/mach-s3c64xx/Kconfig | |||
@@ -143,7 +143,7 @@ config MACH_SMDK6410 | |||
143 | select S3C_DEV_USB_HSOTG | 143 | select S3C_DEV_USB_HSOTG |
144 | select S3C_DEV_WDT | 144 | select S3C_DEV_WDT |
145 | select SAMSUNG_DEV_KEYPAD | 145 | select SAMSUNG_DEV_KEYPAD |
146 | select HAVE_S3C2410_WATCHDOG | 146 | select HAVE_S3C2410_WATCHDOG if WATCHDOG |
147 | select S3C64XX_SETUP_SDHCI | 147 | select S3C64XX_SETUP_SDHCI |
148 | select S3C64XX_SETUP_I2C1 | 148 | select S3C64XX_SETUP_I2C1 |
149 | select S3C64XX_SETUP_IDE | 149 | select S3C64XX_SETUP_IDE |
diff --git a/arch/arm/mach-shark/include/mach/vmalloc.h b/arch/arm/mach-shark/include/mach/vmalloc.h index 8e845b6a7cb5..b10df988526d 100644 --- a/arch/arm/mach-shark/include/mach/vmalloc.h +++ b/arch/arm/mach-shark/include/mach/vmalloc.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * arch/arm/mach-shark/include/mach/vmalloc.h | 2 | * arch/arm/mach-shark/include/mach/vmalloc.h |
3 | */ | 3 | */ |
4 | #define VMALLOC_END 0xd0000000 | 4 | #define VMALLOC_END 0xd0000000UL |
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 32d9e2816e56..d3260542b943 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c | |||
@@ -163,11 +163,13 @@ static struct mtd_partition nor_flash_partitions[] = { | |||
163 | .name = "loader", | 163 | .name = "loader", |
164 | .offset = 0x00000000, | 164 | .offset = 0x00000000, |
165 | .size = 512 * 1024, | 165 | .size = 512 * 1024, |
166 | .mask_flags = MTD_WRITEABLE, | ||
166 | }, | 167 | }, |
167 | { | 168 | { |
168 | .name = "bootenv", | 169 | .name = "bootenv", |
169 | .offset = MTDPART_OFS_APPEND, | 170 | .offset = MTDPART_OFS_APPEND, |
170 | .size = 512 * 1024, | 171 | .size = 512 * 1024, |
172 | .mask_flags = MTD_WRITEABLE, | ||
171 | }, | 173 | }, |
172 | { | 174 | { |
173 | .name = "kernel_ro", | 175 | .name = "kernel_ro", |
@@ -581,6 +583,10 @@ static int fsi_set_rate(int is_porta, int rate) | |||
581 | return -EINVAL; | 583 | return -EINVAL; |
582 | 584 | ||
583 | switch (rate) { | 585 | switch (rate) { |
586 | case 44100: | ||
587 | clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 11283000)); | ||
588 | ret = SH_FSI_ACKMD_256 | SH_FSI_BPFMD_64; | ||
589 | break; | ||
584 | case 48000: | 590 | case 48000: |
585 | clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000)); | 591 | clk_set_rate(fsib_clk, clk_round_rate(fsib_clk, 85428000)); |
586 | clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000)); | 592 | clk_set_rate(fdiv_clk, clk_round_rate(fdiv_clk, 12204000)); |
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 7db31e6c6bf2..b25ce90a346e 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c | |||
@@ -220,8 +220,7 @@ static void pllc2_disable(struct clk *clk) | |||
220 | __raw_writel(__raw_readl(PLLC2CR) & ~0x80000000, PLLC2CR); | 220 | __raw_writel(__raw_readl(PLLC2CR) & ~0x80000000, PLLC2CR); |
221 | } | 221 | } |
222 | 222 | ||
223 | static int pllc2_set_rate(struct clk *clk, | 223 | static int pllc2_set_rate(struct clk *clk, unsigned long rate) |
224 | unsigned long rate, int algo_id) | ||
225 | { | 224 | { |
226 | unsigned long value; | 225 | unsigned long value; |
227 | int idx; | 226 | int idx; |
@@ -463,8 +462,7 @@ static int fsidiv_enable(struct clk *clk) | |||
463 | return 0; | 462 | return 0; |
464 | } | 463 | } |
465 | 464 | ||
466 | static int fsidiv_set_rate(struct clk *clk, | 465 | static int fsidiv_set_rate(struct clk *clk, unsigned long rate) |
467 | unsigned long rate, int algo_id) | ||
468 | { | 466 | { |
469 | int idx; | 467 | int idx; |
470 | 468 | ||
diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c index 4cd3cae38e72..30b2f400666a 100644 --- a/arch/arm/mach-shmobile/intc-sh7372.c +++ b/arch/arm/mach-shmobile/intc-sh7372.c | |||
@@ -98,7 +98,7 @@ static struct intc_vect intca_vectors[] __initdata = { | |||
98 | INTC_VECT(IRQ14A, 0x03c0), INTC_VECT(IRQ15A, 0x03e0), | 98 | INTC_VECT(IRQ14A, 0x03c0), INTC_VECT(IRQ15A, 0x03e0), |
99 | INTC_VECT(IRQ16A, 0x3200), INTC_VECT(IRQ17A, 0x3220), | 99 | INTC_VECT(IRQ16A, 0x3200), INTC_VECT(IRQ17A, 0x3220), |
100 | INTC_VECT(IRQ18A, 0x3240), INTC_VECT(IRQ19A, 0x3260), | 100 | INTC_VECT(IRQ18A, 0x3240), INTC_VECT(IRQ19A, 0x3260), |
101 | INTC_VECT(IRQ20A, 0x3280), INTC_VECT(IRQ31A, 0x32a0), | 101 | INTC_VECT(IRQ20A, 0x3280), INTC_VECT(IRQ21A, 0x32a0), |
102 | INTC_VECT(IRQ22A, 0x32c0), INTC_VECT(IRQ23A, 0x32e0), | 102 | INTC_VECT(IRQ22A, 0x32c0), INTC_VECT(IRQ23A, 0x32e0), |
103 | INTC_VECT(IRQ24A, 0x3300), INTC_VECT(IRQ25A, 0x3320), | 103 | INTC_VECT(IRQ24A, 0x3300), INTC_VECT(IRQ25A, 0x3320), |
104 | INTC_VECT(IRQ26A, 0x3340), INTC_VECT(IRQ27A, 0x3360), | 104 | INTC_VECT(IRQ26A, 0x3340), INTC_VECT(IRQ27A, 0x3360), |
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index 73fb1a551ec6..608a1372b172 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c | |||
@@ -75,14 +75,14 @@ void __init ux500_init_irq(void) | |||
75 | static inline void ux500_cache_wait(void __iomem *reg, unsigned long mask) | 75 | static inline void ux500_cache_wait(void __iomem *reg, unsigned long mask) |
76 | { | 76 | { |
77 | /* wait for the operation to complete */ | 77 | /* wait for the operation to complete */ |
78 | while (readl(reg) & mask) | 78 | while (readl_relaxed(reg) & mask) |
79 | ; | 79 | ; |
80 | } | 80 | } |
81 | 81 | ||
82 | static inline void ux500_cache_sync(void) | 82 | static inline void ux500_cache_sync(void) |
83 | { | 83 | { |
84 | void __iomem *base = __io_address(UX500_L2CC_BASE); | 84 | void __iomem *base = __io_address(UX500_L2CC_BASE); |
85 | writel(0, base + L2X0_CACHE_SYNC); | 85 | writel_relaxed(0, base + L2X0_CACHE_SYNC); |
86 | ux500_cache_wait(base + L2X0_CACHE_SYNC, 1); | 86 | ux500_cache_wait(base + L2X0_CACHE_SYNC, 1); |
87 | } | 87 | } |
88 | 88 | ||
@@ -107,7 +107,7 @@ static void ux500_l2x0_inv_all(void) | |||
107 | uint32_t l2x0_way_mask = (1<<16) - 1; /* Bitmask of active ways */ | 107 | uint32_t l2x0_way_mask = (1<<16) - 1; /* Bitmask of active ways */ |
108 | 108 | ||
109 | /* invalidate all ways */ | 109 | /* invalidate all ways */ |
110 | writel(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); | 110 | writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); |
111 | ux500_cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); | 111 | ux500_cache_wait(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); |
112 | ux500_cache_sync(); | 112 | ux500_cache_sync(); |
113 | } | 113 | } |
diff --git a/arch/arm/mach-versatile/include/mach/vmalloc.h b/arch/arm/mach-versatile/include/mach/vmalloc.h index ebd8a2543d3b..7d8e069ad51b 100644 --- a/arch/arm/mach-versatile/include/mach/vmalloc.h +++ b/arch/arm/mach-versatile/include/mach/vmalloc.h | |||
@@ -18,4 +18,4 @@ | |||
18 | * along with this program; if not, write to the Free Software | 18 | * along with this program; if not, write to the Free Software |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | */ | 20 | */ |
21 | #define VMALLOC_END 0xd8000000 | 21 | #define VMALLOC_END 0xd8000000UL |
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 17e7b0b57e49..55c17a6fb22f 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c | |||
@@ -206,8 +206,8 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, | |||
206 | */ | 206 | */ |
207 | if (pfn_valid(pfn)) { | 207 | if (pfn_valid(pfn)) { |
208 | printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory. This leads\n" | 208 | printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory. This leads\n" |
209 | KERN_WARNING "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n" | 209 | "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n" |
210 | KERN_WARNING "will fail in the next kernel release. Please fix your driver.\n"); | 210 | "will fail in the next kernel release. Please fix your driver.\n"); |
211 | WARN_ON(1); | 211 | WARN_ON(1); |
212 | } | 212 | } |
213 | 213 | ||
diff --git a/arch/arm/plat-mxc/devices/platform-imx-dma.c b/arch/arm/plat-mxc/devices/platform-imx-dma.c index 02d989018059..3a705c7877dd 100644 --- a/arch/arm/plat-mxc/devices/platform-imx-dma.c +++ b/arch/arm/plat-mxc/devices/platform-imx-dma.c | |||
@@ -12,15 +12,7 @@ | |||
12 | 12 | ||
13 | #include <mach/hardware.h> | 13 | #include <mach/hardware.h> |
14 | #include <mach/devices-common.h> | 14 | #include <mach/devices-common.h> |
15 | #ifdef SDMA_IS_MERGED | ||
16 | #include <mach/sdma.h> | 15 | #include <mach/sdma.h> |
17 | #else | ||
18 | struct sdma_platform_data { | ||
19 | int sdma_version; | ||
20 | char *cpu_name; | ||
21 | int to_version; | ||
22 | }; | ||
23 | #endif | ||
24 | 16 | ||
25 | struct imx_imx_sdma_data { | 17 | struct imx_imx_sdma_data { |
26 | resource_size_t iobase; | 18 | resource_size_t iobase; |
diff --git a/arch/arm/plat-mxc/devices/platform-spi_imx.c b/arch/arm/plat-mxc/devices/platform-spi_imx.c index e48340ec331e..17f724c9452d 100644 --- a/arch/arm/plat-mxc/devices/platform-spi_imx.c +++ b/arch/arm/plat-mxc/devices/platform-spi_imx.c | |||
@@ -27,6 +27,7 @@ const struct imx_spi_imx_data imx21_cspi_data[] __initconst = { | |||
27 | imx_spi_imx_data_entry(MX21, CSPI, "imx21-cspi", _id, _hwid, SZ_4K) | 27 | imx_spi_imx_data_entry(MX21, CSPI, "imx21-cspi", _id, _hwid, SZ_4K) |
28 | imx21_cspi_data_entry(0, 1), | 28 | imx21_cspi_data_entry(0, 1), |
29 | imx21_cspi_data_entry(1, 2), | 29 | imx21_cspi_data_entry(1, 2), |
30 | }; | ||
30 | #endif | 31 | #endif |
31 | 32 | ||
32 | #ifdef CONFIG_ARCH_MX25 | 33 | #ifdef CONFIG_ARCH_MX25 |
diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c index aedf9c1d645e..63cdc6025bd7 100644 --- a/arch/arm/plat-nomadik/timer.c +++ b/arch/arm/plat-nomadik/timer.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) 2008 STMicroelectronics | 4 | * Copyright (C) 2008 STMicroelectronics |
5 | * Copyright (C) 2010 Alessandro Rubini | 5 | * Copyright (C) 2010 Alessandro Rubini |
6 | * Copyright (C) 2010 Linus Walleij for ST-Ericsson | ||
6 | * | 7 | * |
7 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2, as | 9 | * it under the terms of the GNU General Public License version 2, as |
@@ -16,11 +17,13 @@ | |||
16 | #include <linux/clk.h> | 17 | #include <linux/clk.h> |
17 | #include <linux/jiffies.h> | 18 | #include <linux/jiffies.h> |
18 | #include <linux/err.h> | 19 | #include <linux/err.h> |
20 | #include <linux/cnt32_to_63.h> | ||
21 | #include <linux/timer.h> | ||
19 | #include <asm/mach/time.h> | 22 | #include <asm/mach/time.h> |
20 | 23 | ||
21 | #include <plat/mtu.h> | 24 | #include <plat/mtu.h> |
22 | 25 | ||
23 | void __iomem *mtu_base; /* ssigned by machine code */ | 26 | void __iomem *mtu_base; /* Assigned by machine code */ |
24 | 27 | ||
25 | /* | 28 | /* |
26 | * Kernel assumes that sched_clock can be called early | 29 | * Kernel assumes that sched_clock can be called early |
@@ -48,16 +51,82 @@ static struct clocksource nmdk_clksrc = { | |||
48 | /* | 51 | /* |
49 | * Override the global weak sched_clock symbol with this | 52 | * Override the global weak sched_clock symbol with this |
50 | * local implementation which uses the clocksource to get some | 53 | * local implementation which uses the clocksource to get some |
51 | * better resolution when scheduling the kernel. We accept that | 54 | * better resolution when scheduling the kernel. |
52 | * this wraps around for now, since it is just a relative time | 55 | * |
53 | * stamp. (Inspired by OMAP implementation.) | 56 | * Because the hardware timer period may be quite short |
57 | * (32.3 secs on the 133 MHz MTU timer selection on ux500) | ||
58 | * and because cnt32_to_63() needs to be called at least once per | ||
59 | * half period to work properly, a kernel keepwarm() timer is set up | ||
60 | * to ensure this requirement is always met. | ||
61 | * | ||
62 | * Also the sched_clock timer will wrap around at some point, | ||
63 | * here we set it to run continously for a year. | ||
54 | */ | 64 | */ |
65 | #define SCHED_CLOCK_MIN_WRAP 3600*24*365 | ||
66 | static struct timer_list cnt32_to_63_keepwarm_timer; | ||
67 | static u32 sched_mult; | ||
68 | static u32 sched_shift; | ||
69 | |||
55 | unsigned long long notrace sched_clock(void) | 70 | unsigned long long notrace sched_clock(void) |
56 | { | 71 | { |
57 | return clocksource_cyc2ns(nmdk_clksrc.read( | 72 | u64 cycles; |
58 | &nmdk_clksrc), | 73 | |
59 | nmdk_clksrc.mult, | 74 | if (unlikely(!mtu_base)) |
60 | nmdk_clksrc.shift); | 75 | return 0; |
76 | |||
77 | cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0))); | ||
78 | /* | ||
79 | * sched_mult is guaranteed to be even so will | ||
80 | * shift out bit 63 | ||
81 | */ | ||
82 | return (cycles * sched_mult) >> sched_shift; | ||
83 | } | ||
84 | |||
85 | /* Just kick sched_clock every so often */ | ||
86 | static void cnt32_to_63_keepwarm(unsigned long data) | ||
87 | { | ||
88 | mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); | ||
89 | (void) sched_clock(); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * Set up a timer to keep sched_clock():s 32_to_63 algorithm warm | ||
94 | * once in half a 32bit timer wrap interval. | ||
95 | */ | ||
96 | static void __init nmdk_sched_clock_init(unsigned long rate) | ||
97 | { | ||
98 | u32 v; | ||
99 | unsigned long delta; | ||
100 | u64 days; | ||
101 | |||
102 | /* Find the apropriate mult and shift factors */ | ||
103 | clocks_calc_mult_shift(&sched_mult, &sched_shift, | ||
104 | rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP); | ||
105 | /* We need to multiply by an even number to get rid of bit 63 */ | ||
106 | if (sched_mult & 1) | ||
107 | sched_mult++; | ||
108 | |||
109 | /* Let's see what we get, take max counter and scale it */ | ||
110 | days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift; | ||
111 | do_div(days, NSEC_PER_SEC); | ||
112 | do_div(days, (3600*24)); | ||
113 | |||
114 | pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n", | ||
115 | (64 - sched_shift), rate, (unsigned long) days); | ||
116 | |||
117 | /* | ||
118 | * Program a timer to kick us at half 32bit wraparound | ||
119 | * Formula: seconds per wrap = (2^32) / f | ||
120 | */ | ||
121 | v = 0xFFFFFFFFUL / rate; | ||
122 | /* We want half of the wrap time to keep cnt32_to_63 warm */ | ||
123 | v /= 2; | ||
124 | pr_debug("sched_clock: prescaled timer rate: %lu Hz, " | ||
125 | "initialize keepwarm timer every %d seconds\n", rate, v); | ||
126 | /* Convert seconds to jiffies */ | ||
127 | delta = msecs_to_jiffies(v*1000); | ||
128 | setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta); | ||
129 | mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta)); | ||
61 | } | 130 | } |
62 | 131 | ||
63 | /* Clockevent device: use one-shot mode */ | 132 | /* Clockevent device: use one-shot mode */ |
@@ -161,13 +230,15 @@ void __init nmdk_timer_init(void) | |||
161 | writel(0, mtu_base + MTU_BGLR(0)); | 230 | writel(0, mtu_base + MTU_BGLR(0)); |
162 | writel(cr | MTU_CRn_ENA, mtu_base + MTU_CR(0)); | 231 | writel(cr | MTU_CRn_ENA, mtu_base + MTU_CR(0)); |
163 | 232 | ||
164 | /* Now the scheduling clock is ready */ | 233 | /* Now the clock source is ready */ |
165 | nmdk_clksrc.read = nmdk_read_timer; | 234 | nmdk_clksrc.read = nmdk_read_timer; |
166 | 235 | ||
167 | if (clocksource_register(&nmdk_clksrc)) | 236 | if (clocksource_register(&nmdk_clksrc)) |
168 | pr_err("timer: failed to initialize clock source %s\n", | 237 | pr_err("timer: failed to initialize clock source %s\n", |
169 | nmdk_clksrc.name); | 238 | nmdk_clksrc.name); |
170 | 239 | ||
240 | nmdk_sched_clock_init(rate); | ||
241 | |||
171 | /* Timer 1 is used for events */ | 242 | /* Timer 1 is used for events */ |
172 | 243 | ||
173 | clockevents_calc_mult_shift(&nmdk_clkevt, rate, MTU_MIN_RANGE); | 244 | clockevents_calc_mult_shift(&nmdk_clkevt, rate, MTU_MIN_RANGE); |
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index f5c5b8da9a87..2c2826571d45 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c | |||
@@ -1983,6 +1983,8 @@ static int omap2_dma_handle_ch(int ch) | |||
1983 | 1983 | ||
1984 | dma_write(OMAP2_DMA_CSR_CLEAR_MASK, CSR(ch)); | 1984 | dma_write(OMAP2_DMA_CSR_CLEAR_MASK, CSR(ch)); |
1985 | dma_write(1 << ch, IRQSTATUS_L0); | 1985 | dma_write(1 << ch, IRQSTATUS_L0); |
1986 | /* read back the register to flush the write */ | ||
1987 | dma_read(IRQSTATUS_L0); | ||
1986 | 1988 | ||
1987 | /* If the ch is not chained then chain_id will be -1 */ | 1989 | /* If the ch is not chained then chain_id will be -1 */ |
1988 | if (dma_chan[ch].chain_id != -1) { | 1990 | if (dma_chan[ch].chain_id != -1) { |
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index cd0c090ebc54..b407bc8ad918 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c | |||
@@ -7,7 +7,6 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/smp_lock.h> | ||
11 | #include <linux/unistd.h> | 10 | #include <linux/unistd.h> |
12 | #include <linux/user.h> | 11 | #include <linux/user.h> |
13 | #include <linux/uaccess.h> | 12 | #include <linux/uaccess.h> |
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 2b63b0191f52..efad12071c2e 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/smp_lock.h> | ||
20 | #include <linux/stddef.h> | 19 | #include <linux/stddef.h> |
21 | #include <linux/unistd.h> | 20 | #include <linux/unistd.h> |
22 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 97478138e361..933bd388efb2 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/smp.h> | 30 | #include <linux/smp.h> |
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/stddef.h> | 31 | #include <linux/stddef.h> |
33 | #include <linux/unistd.h> | 32 | #include <linux/unistd.h> |
34 | #include <linux/ptrace.h> | 33 | #include <linux/ptrace.h> |
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c index 3a078ad3aa44..331de723c676 100644 --- a/arch/ia64/hp/sim/simscsi.c +++ b/arch/ia64/hp/sim/simscsi.c | |||
@@ -202,7 +202,7 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode) | |||
202 | } | 202 | } |
203 | 203 | ||
204 | static int | 204 | static int |
205 | simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) | 205 | simscsi_queuecommand_lck (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) |
206 | { | 206 | { |
207 | unsigned int target_id = sc->device->id; | 207 | unsigned int target_id = sc->device->id; |
208 | char fname[MAX_ROOT_LEN+16]; | 208 | char fname[MAX_ROOT_LEN+16]; |
@@ -326,6 +326,8 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) | |||
326 | return 0; | 326 | return 0; |
327 | } | 327 | } |
328 | 328 | ||
329 | static DEF_SCSI_QCMD(simscsi_queuecommand) | ||
330 | |||
329 | static int | 331 | static int |
330 | simscsi_host_reset (struct scsi_cmnd *sc) | 332 | simscsi_host_reset (struct scsi_cmnd *sc) |
331 | { | 333 | { |
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 18732ab23292..c2a1fc23dd75 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
21 | #include <linux/smp_lock.h> | ||
22 | #include <linux/stddef.h> | 21 | #include <linux/stddef.h> |
23 | #include <linux/unistd.h> | 22 | #include <linux/unistd.h> |
24 | #include <linux/ptrace.h> | 23 | #include <linux/ptrace.h> |
diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 6d3390590e5b..e2a63af5d517 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/mm.h> | 20 | #include <linux/mm.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | #include <linux/smp_lock.h> | ||
23 | #include <linux/stddef.h> | 22 | #include <linux/stddef.h> |
24 | #include <linux/unistd.h> | 23 | #include <linux/unistd.h> |
25 | #include <linux/ptrace.h> | 24 | #include <linux/ptrace.h> |
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 0d0f8049a17b..e1b14a6ed544 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | #include <linux/smp.h> | 16 | #include <linux/smp.h> |
17 | #include <linux/smp_lock.h> | ||
18 | #include <linux/stddef.h> | 17 | #include <linux/stddef.h> |
19 | #include <linux/unistd.h> | 18 | #include <linux/unistd.h> |
20 | #include <linux/ptrace.h> | 19 | #include <linux/ptrace.h> |
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c index ba430a03bc7a..30394081d9b6 100644 --- a/arch/parisc/hpux/sys_hpux.c +++ b/arch/parisc/hpux/sys_hpux.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/namei.h> | 28 | #include <linux/namei.h> |
29 | #include <linux/sched.h> | 29 | #include <linux/sched.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/syscalls.h> | 31 | #include <linux/syscalls.h> |
33 | #include <linux/utsname.h> | 32 | #include <linux/utsname.h> |
34 | #include <linux/vfs.h> | 33 | #include <linux/vfs.h> |
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 9779ece2b070..88a0ad14a9c9 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/times.h> | 20 | #include <linux/times.h> |
21 | #include <linux/time.h> | 21 | #include <linux/time.h> |
22 | #include <linux/smp.h> | 22 | #include <linux/smp.h> |
23 | #include <linux/smp_lock.h> | ||
24 | #include <linux/sem.h> | 23 | #include <linux/sem.h> |
25 | #include <linux/msg.h> | 24 | #include <linux/msg.h> |
26 | #include <linux/shm.h> | 25 | #include <linux/shm.h> |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b6447190e1a2..e625e9e034ae 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -4,6 +4,10 @@ config PPC32 | |||
4 | bool | 4 | bool |
5 | default y if !PPC64 | 5 | default y if !PPC64 |
6 | 6 | ||
7 | config 32BIT | ||
8 | bool | ||
9 | default y if PPC32 | ||
10 | |||
7 | config 64BIT | 11 | config 64BIT |
8 | bool | 12 | bool |
9 | default y if PPC64 | 13 | default y if PPC64 |
diff --git a/arch/powerpc/boot/div64.S b/arch/powerpc/boot/div64.S index 722f360a32a9..d271ab542673 100644 --- a/arch/powerpc/boot/div64.S +++ b/arch/powerpc/boot/div64.S | |||
@@ -33,9 +33,10 @@ __div64_32: | |||
33 | cntlzw r0,r5 # we are shifting the dividend right | 33 | cntlzw r0,r5 # we are shifting the dividend right |
34 | li r10,-1 # to make it < 2^32, and shifting | 34 | li r10,-1 # to make it < 2^32, and shifting |
35 | srw r10,r10,r0 # the divisor right the same amount, | 35 | srw r10,r10,r0 # the divisor right the same amount, |
36 | add r9,r4,r10 # rounding up (so the estimate cannot | 36 | addc r9,r4,r10 # rounding up (so the estimate cannot |
37 | andc r11,r6,r10 # ever be too large, only too small) | 37 | andc r11,r6,r10 # ever be too large, only too small) |
38 | andc r9,r9,r10 | 38 | andc r9,r9,r10 |
39 | addze r9,r9 | ||
39 | or r11,r5,r11 | 40 | or r11,r5,r11 |
40 | rotlw r9,r9,r0 | 41 | rotlw r9,r9,r0 |
41 | rotlw r11,r11,r0 | 42 | rotlw r11,r11,r0 |
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7a9db64f3f04..42850ee00ada 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c | |||
@@ -337,7 +337,7 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) | |||
337 | /* FP registers 32 -> 63 */ | 337 | /* FP registers 32 -> 63 */ |
338 | #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) | 338 | #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) |
339 | if (current) | 339 | if (current) |
340 | memcpy(mem, current->thread.evr[regno-32], | 340 | memcpy(mem, ¤t->thread.evr[regno-32], |
341 | dbg_reg_def[regno].size); | 341 | dbg_reg_def[regno].size); |
342 | #else | 342 | #else |
343 | /* fp registers not used by kernel, leave zero */ | 343 | /* fp registers not used by kernel, leave zero */ |
@@ -362,7 +362,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) | |||
362 | if (regno >= 32 && regno < 64) { | 362 | if (regno >= 32 && regno < 64) { |
363 | /* FP registers 32 -> 63 */ | 363 | /* FP registers 32 -> 63 */ |
364 | #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) | 364 | #if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) |
365 | memcpy(current->thread.evr[regno-32], mem, | 365 | memcpy(¤t->thread.evr[regno-32], mem, |
366 | dbg_reg_def[regno].size); | 366 | dbg_reg_def[regno].size); |
367 | #else | 367 | #else |
368 | /* fp registers not used by kernel, leave zero */ | 368 | /* fp registers not used by kernel, leave zero */ |
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2a178b0ebcdf..ce6f61c6f871 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c | |||
@@ -497,9 +497,8 @@ static void __init emergency_stack_init(void) | |||
497 | } | 497 | } |
498 | 498 | ||
499 | /* | 499 | /* |
500 | * Called into from start_kernel, after lock_kernel has been called. | 500 | * Called into from start_kernel this initializes bootmem, which is used |
501 | * Initializes bootmem, which is unsed to manage page allocation until | 501 | * to manage page allocation until mem_init is called. |
502 | * mem_init is called. | ||
503 | */ | 502 | */ |
504 | void __init setup_arch(char **cmdline_p) | 503 | void __init setup_arch(char **cmdline_p) |
505 | { | 504 | { |
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index b1b6043a56c4..4e5bf1edc0f2 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/resource.h> | 23 | #include <linux/resource.h> |
24 | #include <linux/times.h> | 24 | #include <linux/times.h> |
25 | #include <linux/smp.h> | 25 | #include <linux/smp.h> |
26 | #include <linux/smp_lock.h> | ||
27 | #include <linux/sem.h> | 26 | #include <linux/sem.h> |
28 | #include <linux/msg.h> | 27 | #include <linux/msg.h> |
29 | #include <linux/shm.h> | 28 | #include <linux/shm.h> |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 83f534d862db..5e9584405c45 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -1123,7 +1123,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
1123 | else | 1123 | else |
1124 | #endif /* CONFIG_PPC_HAS_HASH_64K */ | 1124 | #endif /* CONFIG_PPC_HAS_HASH_64K */ |
1125 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, | 1125 | rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, |
1126 | subpage_protection(pgdir, ea)); | 1126 | subpage_protection(mm, ea)); |
1127 | 1127 | ||
1128 | /* Dump some info in case of hash insertion failure, they should | 1128 | /* Dump some info in case of hash insertion failure, they should |
1129 | * never happen so it is really useful to know if/when they do | 1129 | * never happen so it is really useful to know if/when they do |
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 8b04c54e596f..8526bd9d2aa3 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S | |||
@@ -138,8 +138,11 @@ | |||
138 | cmpldi cr0,r15,0 /* Check for user region */ | 138 | cmpldi cr0,r15,0 /* Check for user region */ |
139 | std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ | 139 | std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ |
140 | beq normal_tlb_miss | 140 | beq normal_tlb_miss |
141 | |||
142 | li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ | ||
143 | oris r11,r11,_PAGE_ACCESSED@h | ||
141 | /* XXX replace the RMW cycles with immediate loads + writes */ | 144 | /* XXX replace the RMW cycles with immediate loads + writes */ |
142 | 1: mfspr r10,SPRN_MAS1 | 145 | mfspr r10,SPRN_MAS1 |
143 | cmpldi cr0,r15,8 /* Check for vmalloc region */ | 146 | cmpldi cr0,r15,8 /* Check for vmalloc region */ |
144 | rlwinm r10,r10,0,16,1 /* Clear TID */ | 147 | rlwinm r10,r10,0,16,1 /* Clear TID */ |
145 | mtspr SPRN_MAS1,r10 | 148 | mtspr SPRN_MAS1,r10 |
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 36c0c449a899..2a030d89bbc6 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c | |||
@@ -585,6 +585,6 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, | |||
585 | ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); | 585 | ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); |
586 | 586 | ||
587 | /* Finally limit subsequent allocations */ | 587 | /* Finally limit subsequent allocations */ |
588 | memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size); | 588 | memblock_set_current_limit(first_memblock_base + ppc64_rma_size); |
589 | } | 589 | } |
590 | #endif /* CONFIG_PPC64 */ | 590 | #endif /* CONFIG_PPC64 */ |
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index c667f0f02c34..3139814f6439 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig | |||
@@ -47,6 +47,12 @@ config LPARCFG | |||
47 | config PPC_PSERIES_DEBUG | 47 | config PPC_PSERIES_DEBUG |
48 | depends on PPC_PSERIES && PPC_EARLY_DEBUG | 48 | depends on PPC_PSERIES && PPC_EARLY_DEBUG |
49 | bool "Enable extra debug logging in platforms/pseries" | 49 | bool "Enable extra debug logging in platforms/pseries" |
50 | help | ||
51 | Say Y here if you want the pseries core to produce a bunch of | ||
52 | debug messages to the system log. Select this if you are having a | ||
53 | problem with the pseries core and want to see more of what is | ||
54 | going on. This does not enable debugging in lpar.c, which must | ||
55 | be manually done due to its verbosity. | ||
50 | default y | 56 | default y |
51 | 57 | ||
52 | config PPC_SMLPAR | 58 | config PPC_SMLPAR |
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 34b7dc12e731..17a11c82e6f8 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c | |||
@@ -21,8 +21,6 @@ | |||
21 | * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> | 21 | * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #undef DEBUG | ||
25 | |||
26 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
27 | #include <linux/init.h> | 25 | #include <linux/init.h> |
28 | #include <linux/list.h> | 26 | #include <linux/list.h> |
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4b7a062dee15..5fcc92a12d3e 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c | |||
@@ -25,8 +25,6 @@ | |||
25 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 25 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #undef DEBUG | ||
29 | |||
30 | #include <linux/pci.h> | 28 | #include <linux/pci.h> |
31 | #include <asm/pci-bridge.h> | 29 | #include <asm/pci-bridge.h> |
32 | #include <asm/ppc-pci.h> | 30 | #include <asm/ppc-pci.h> |
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 45e0c6199f36..05221b13ffb1 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug | |||
@@ -6,6 +6,18 @@ config TRACE_IRQFLAGS_SUPPORT | |||
6 | 6 | ||
7 | source "lib/Kconfig.debug" | 7 | source "lib/Kconfig.debug" |
8 | 8 | ||
9 | config STRICT_DEVMEM | ||
10 | def_bool y | ||
11 | prompt "Filter access to /dev/mem" | ||
12 | ---help--- | ||
13 | This option restricts access to /dev/mem. If this option is | ||
14 | disabled, you allow userspace access to all memory, including | ||
15 | kernel and userspace memory. Accidental memory access is likely | ||
16 | to be disastrous. | ||
17 | Memory access is required for experts who want to debug the kernel. | ||
18 | |||
19 | If you are unsure, say Y. | ||
20 | |||
9 | config DEBUG_STRICT_USER_COPY_CHECKS | 21 | config DEBUG_STRICT_USER_COPY_CHECKS |
10 | bool "Strict user copy size checks" | 22 | bool "Strict user copy size checks" |
11 | ---help--- | 23 | ---help--- |
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index a8729ea7e9ac..3c987e9ec8d6 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h | |||
@@ -130,6 +130,11 @@ struct page; | |||
130 | void arch_free_page(struct page *page, int order); | 130 | void arch_free_page(struct page *page, int order); |
131 | void arch_alloc_page(struct page *page, int order); | 131 | void arch_alloc_page(struct page *page, int order); |
132 | 132 | ||
133 | static inline int devmem_is_allowed(unsigned long pfn) | ||
134 | { | ||
135 | return 0; | ||
136 | } | ||
137 | |||
133 | #define HAVE_ARCH_FREE_PAGE | 138 | #define HAVE_ARCH_FREE_PAGE |
134 | #define HAVE_ARCH_ALLOC_PAGE | 139 | #define HAVE_ARCH_ALLOC_PAGE |
135 | 140 | ||
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 1e6449c79ab6..53acaa86dd94 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/resource.h> | 25 | #include <linux/resource.h> |
26 | #include <linux/times.h> | 26 | #include <linux/times.h> |
27 | #include <linux/smp.h> | 27 | #include <linux/smp.h> |
28 | #include <linux/smp_lock.h> | ||
29 | #include <linux/sem.h> | 28 | #include <linux/sem.h> |
30 | #include <linux/msg.h> | 29 | #include <linux/msg.h> |
31 | #include <linux/shm.h> | 30 | #include <linux/shm.h> |
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index d60fc4398516..2564793ec2b6 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/sections.h> | 30 | #include <asm/sections.h> |
31 | #include <linux/module.h> | 31 | #include <linux/module.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/hardirq.h> | ||
33 | 34 | ||
34 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 35 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
35 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | 36 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
@@ -212,7 +213,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | |||
212 | /* Set the PER control regs, turns on single step for this address */ | 213 | /* Set the PER control regs, turns on single step for this address */ |
213 | __ctl_load(kprobe_per_regs, 9, 11); | 214 | __ctl_load(kprobe_per_regs, 9, 11); |
214 | regs->psw.mask |= PSW_MASK_PER; | 215 | regs->psw.mask |= PSW_MASK_PER; |
215 | regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); | 216 | regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); |
216 | } | 217 | } |
217 | 218 | ||
218 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | 219 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) |
@@ -239,7 +240,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
239 | __get_cpu_var(current_kprobe) = p; | 240 | __get_cpu_var(current_kprobe) = p; |
240 | /* Save the interrupt and per flags */ | 241 | /* Save the interrupt and per flags */ |
241 | kcb->kprobe_saved_imask = regs->psw.mask & | 242 | kcb->kprobe_saved_imask = regs->psw.mask & |
242 | (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK); | 243 | (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT); |
243 | /* Save the control regs that govern PER */ | 244 | /* Save the control regs that govern PER */ |
244 | __ctl_store(kcb->kprobe_saved_ctl, 9, 11); | 245 | __ctl_store(kcb->kprobe_saved_ctl, 9, 11); |
245 | } | 246 | } |
@@ -316,8 +317,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
316 | return 1; | 317 | return 1; |
317 | 318 | ||
318 | ss_probe: | 319 | ss_probe: |
319 | if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) | ||
320 | local_irq_disable(); | ||
321 | prepare_singlestep(p, regs); | 320 | prepare_singlestep(p, regs); |
322 | kcb->kprobe_status = KPROBE_HIT_SS; | 321 | kcb->kprobe_status = KPROBE_HIT_SS; |
323 | return 1; | 322 | return 1; |
@@ -350,6 +349,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, | |||
350 | struct hlist_node *node, *tmp; | 349 | struct hlist_node *node, *tmp; |
351 | unsigned long flags, orig_ret_address = 0; | 350 | unsigned long flags, orig_ret_address = 0; |
352 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 351 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
352 | kprobe_opcode_t *correct_ret_addr = NULL; | ||
353 | 353 | ||
354 | INIT_HLIST_HEAD(&empty_rp); | 354 | INIT_HLIST_HEAD(&empty_rp); |
355 | kretprobe_hash_lock(current, &head, &flags); | 355 | kretprobe_hash_lock(current, &head, &flags); |
@@ -372,10 +372,32 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, | |||
372 | /* another task is sharing our hash bucket */ | 372 | /* another task is sharing our hash bucket */ |
373 | continue; | 373 | continue; |
374 | 374 | ||
375 | if (ri->rp && ri->rp->handler) | 375 | orig_ret_address = (unsigned long)ri->ret_addr; |
376 | ri->rp->handler(ri, regs); | 376 | |
377 | if (orig_ret_address != trampoline_address) | ||
378 | /* | ||
379 | * This is the real return address. Any other | ||
380 | * instances associated with this task are for | ||
381 | * other calls deeper on the call stack | ||
382 | */ | ||
383 | break; | ||
384 | } | ||
385 | |||
386 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | ||
387 | |||
388 | correct_ret_addr = ri->ret_addr; | ||
389 | hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { | ||
390 | if (ri->task != current) | ||
391 | /* another task is sharing our hash bucket */ | ||
392 | continue; | ||
377 | 393 | ||
378 | orig_ret_address = (unsigned long)ri->ret_addr; | 394 | orig_ret_address = (unsigned long)ri->ret_addr; |
395 | |||
396 | if (ri->rp && ri->rp->handler) { | ||
397 | ri->ret_addr = correct_ret_addr; | ||
398 | ri->rp->handler(ri, regs); | ||
399 | } | ||
400 | |||
379 | recycle_rp_inst(ri, &empty_rp); | 401 | recycle_rp_inst(ri, &empty_rp); |
380 | 402 | ||
381 | if (orig_ret_address != trampoline_address) { | 403 | if (orig_ret_address != trampoline_address) { |
@@ -387,7 +409,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, | |||
387 | break; | 409 | break; |
388 | } | 410 | } |
389 | } | 411 | } |
390 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | 412 | |
391 | regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; | 413 | regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; |
392 | 414 | ||
393 | reset_current_kprobe(); | 415 | reset_current_kprobe(); |
@@ -465,8 +487,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) | |||
465 | goto out; | 487 | goto out; |
466 | } | 488 | } |
467 | reset_current_kprobe(); | 489 | reset_current_kprobe(); |
468 | if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) | ||
469 | local_irq_enable(); | ||
470 | out: | 490 | out: |
471 | preempt_enable_no_resched(); | 491 | preempt_enable_no_resched(); |
472 | 492 | ||
@@ -482,7 +502,7 @@ out: | |||
482 | return 1; | 502 | return 1; |
483 | } | 503 | } |
484 | 504 | ||
485 | int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | 505 | static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr) |
486 | { | 506 | { |
487 | struct kprobe *cur = kprobe_running(); | 507 | struct kprobe *cur = kprobe_running(); |
488 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | 508 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
@@ -508,8 +528,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | |||
508 | restore_previous_kprobe(kcb); | 528 | restore_previous_kprobe(kcb); |
509 | else { | 529 | else { |
510 | reset_current_kprobe(); | 530 | reset_current_kprobe(); |
511 | if (regs->psw.mask & (PSW_MASK_PER | PSW_MASK_IO)) | ||
512 | local_irq_enable(); | ||
513 | } | 531 | } |
514 | preempt_enable_no_resched(); | 532 | preempt_enable_no_resched(); |
515 | break; | 533 | break; |
@@ -553,6 +571,18 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | |||
553 | return 0; | 571 | return 0; |
554 | } | 572 | } |
555 | 573 | ||
574 | int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | ||
575 | { | ||
576 | int ret; | ||
577 | |||
578 | if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) | ||
579 | local_irq_disable(); | ||
580 | ret = kprobe_trap_handler(regs, trapnr); | ||
581 | if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) | ||
582 | local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); | ||
583 | return ret; | ||
584 | } | ||
585 | |||
556 | /* | 586 | /* |
557 | * Wrapper routine to for handling exceptions. | 587 | * Wrapper routine to for handling exceptions. |
558 | */ | 588 | */ |
@@ -560,8 +590,12 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
560 | unsigned long val, void *data) | 590 | unsigned long val, void *data) |
561 | { | 591 | { |
562 | struct die_args *args = (struct die_args *)data; | 592 | struct die_args *args = (struct die_args *)data; |
593 | struct pt_regs *regs = args->regs; | ||
563 | int ret = NOTIFY_DONE; | 594 | int ret = NOTIFY_DONE; |
564 | 595 | ||
596 | if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) | ||
597 | local_irq_disable(); | ||
598 | |||
565 | switch (val) { | 599 | switch (val) { |
566 | case DIE_BPT: | 600 | case DIE_BPT: |
567 | if (kprobe_handler(args->regs)) | 601 | if (kprobe_handler(args->regs)) |
@@ -572,16 +606,17 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
572 | ret = NOTIFY_STOP; | 606 | ret = NOTIFY_STOP; |
573 | break; | 607 | break; |
574 | case DIE_TRAP: | 608 | case DIE_TRAP: |
575 | /* kprobe_running() needs smp_processor_id() */ | 609 | if (!preemptible() && kprobe_running() && |
576 | preempt_disable(); | 610 | kprobe_trap_handler(args->regs, args->trapnr)) |
577 | if (kprobe_running() && | ||
578 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
579 | ret = NOTIFY_STOP; | 611 | ret = NOTIFY_STOP; |
580 | preempt_enable(); | ||
581 | break; | 612 | break; |
582 | default: | 613 | default: |
583 | break; | 614 | break; |
584 | } | 615 | } |
616 | |||
617 | if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT)) | ||
618 | local_irq_restore(regs->psw.mask & ~PSW_MASK_PER); | ||
619 | |||
585 | return ret; | 620 | return ret; |
586 | } | 621 | } |
587 | 622 | ||
@@ -595,6 +630,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
595 | 630 | ||
596 | /* setup return addr to the jprobe handler routine */ | 631 | /* setup return addr to the jprobe handler routine */ |
597 | regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE; | 632 | regs->psw.addr = (unsigned long)(jp->entry) | PSW_ADDR_AMODE; |
633 | regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT); | ||
598 | 634 | ||
599 | /* r14 is the function return address */ | 635 | /* r14 is the function return address */ |
600 | kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14]; | 636 | kcb->jprobe_saved_r14 = (unsigned long)regs->gprs[14]; |
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 38e641cdd977..45b405ca2567 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c | |||
@@ -20,18 +20,17 @@ | |||
20 | static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, | 20 | static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, |
21 | unsigned long end, int write, struct page **pages, int *nr) | 21 | unsigned long end, int write, struct page **pages, int *nr) |
22 | { | 22 | { |
23 | unsigned long mask, result; | 23 | unsigned long mask; |
24 | pte_t *ptep, pte; | 24 | pte_t *ptep, pte; |
25 | struct page *page; | 25 | struct page *page; |
26 | 26 | ||
27 | result = write ? 0 : _PAGE_RO; | 27 | mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; |
28 | mask = result | _PAGE_INVALID | _PAGE_SPECIAL; | ||
29 | 28 | ||
30 | ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); | 29 | ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); |
31 | do { | 30 | do { |
32 | pte = *ptep; | 31 | pte = *ptep; |
33 | barrier(); | 32 | barrier(); |
34 | if ((pte_val(pte) & mask) != result) | 33 | if ((pte_val(pte) & mask) != 0) |
35 | return 0; | 34 | return 0; |
36 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | 35 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); |
37 | page = pte_page(pte); | 36 | page = pte_page(pte); |
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index 46d5179c9f49..e3c73cdd8c90 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h | |||
@@ -199,10 +199,13 @@ extern unsigned long get_wchan(struct task_struct *p); | |||
199 | #define ARCH_HAS_PREFETCHW | 199 | #define ARCH_HAS_PREFETCHW |
200 | static inline void prefetch(void *x) | 200 | static inline void prefetch(void *x) |
201 | { | 201 | { |
202 | __asm__ __volatile__ ("pref @%0\n\t" : : "r" (x) : "memory"); | 202 | __builtin_prefetch(x, 0, 3); |
203 | } | 203 | } |
204 | 204 | ||
205 | #define prefetchw(x) prefetch(x) | 205 | static inline void prefetchw(void *x) |
206 | { | ||
207 | __builtin_prefetch(x, 1, 3); | ||
208 | } | ||
206 | #endif | 209 | #endif |
207 | 210 | ||
208 | #endif /* __KERNEL__ */ | 211 | #endif /* __KERNEL__ */ |
diff --git a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c index 4eabc68cd753..b601fa3978d1 100644 --- a/arch/sh/kernel/cpu/sh4/clock-sh4-202.c +++ b/arch/sh/kernel/cpu/sh4/clock-sh4-202.c | |||
@@ -110,7 +110,7 @@ static int shoc_clk_verify_rate(struct clk *clk, unsigned long rate) | |||
110 | return 0; | 110 | return 0; |
111 | } | 111 | } |
112 | 112 | ||
113 | static int shoc_clk_set_rate(struct clk *clk, unsigned long rate, int algo_id) | 113 | static int shoc_clk_set_rate(struct clk *clk, unsigned long rate) |
114 | { | 114 | { |
115 | unsigned long frqcr3; | 115 | unsigned long frqcr3; |
116 | unsigned int tmp; | 116 | unsigned int tmp; |
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c index 81f58371613d..8c6a350df751 100644 --- a/arch/sh/kernel/sys_sh.c +++ b/arch/sh/kernel/sys_sh.c | |||
@@ -88,7 +88,7 @@ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, int op) | |||
88 | } | 88 | } |
89 | 89 | ||
90 | if (op & CACHEFLUSH_I) | 90 | if (op & CACHEFLUSH_I) |
91 | flush_cache_all(); | 91 | flush_icache_range(addr, addr+len); |
92 | 92 | ||
93 | up_read(¤t->mm->mmap_sem); | 93 | up_read(¤t->mm->mmap_sem); |
94 | return 0; | 94 | return 0; |
diff --git a/arch/sh/kernel/vsyscall/vsyscall-trapa.S b/arch/sh/kernel/vsyscall/vsyscall-trapa.S index 3b6eb34c43fa..3e70f851cdc6 100644 --- a/arch/sh/kernel/vsyscall/vsyscall-trapa.S +++ b/arch/sh/kernel/vsyscall/vsyscall-trapa.S | |||
@@ -8,9 +8,9 @@ __kernel_vsyscall: | |||
8 | * fill out .eh_frame -- PFM. */ | 8 | * fill out .eh_frame -- PFM. */ |
9 | .LEND_vsyscall: | 9 | .LEND_vsyscall: |
10 | .size __kernel_vsyscall,.-.LSTART_vsyscall | 10 | .size __kernel_vsyscall,.-.LSTART_vsyscall |
11 | .previous | ||
12 | 11 | ||
13 | .section .eh_frame,"a",@progbits | 12 | .section .eh_frame,"a",@progbits |
13 | .previous | ||
14 | .LCIE: | 14 | .LCIE: |
15 | .ualong .LCIE_end - .LCIE_start | 15 | .ualong .LCIE_end - .LCIE_start |
16 | .LCIE_start: | 16 | .LCIE_start: |
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 7524689b03d2..16582d85368a 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
13 | #include <linux/threads.h> | 13 | #include <linux/threads.h> |
14 | #include <linux/smp.h> | 14 | #include <linux/smp.h> |
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/interrupt.h> | 15 | #include <linux/interrupt.h> |
17 | #include <linux/kernel_stat.h> | 16 | #include <linux/kernel_stat.h> |
18 | #include <linux/init.h> | 17 | #include <linux/init.h> |
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index e6375a750d9a..6db18c6927fb 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/resource.h> | 17 | #include <linux/resource.h> |
18 | #include <linux/times.h> | 18 | #include <linux/times.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <linux/smp_lock.h> | ||
21 | #include <linux/sem.h> | 20 | #include <linux/sem.h> |
22 | #include <linux/msg.h> | 21 | #include <linux/msg.h> |
23 | #include <linux/shm.h> | 22 | #include <linux/shm.h> |
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 675c9e11ada5..42b282fa6112 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/mman.h> | 19 | #include <linux/mman.h> |
20 | #include <linux/utsname.h> | 20 | #include <linux/utsname.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | #include <linux/smp_lock.h> | ||
23 | #include <linux/ipc.h> | 22 | #include <linux/ipc.h> |
24 | 23 | ||
25 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 12b9f352595f..4491f4cb2695 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <asm/system.h> | 16 | #include <asm/system.h> |
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/smp_lock.h> | ||
20 | #include <linux/perf_event.h> | 19 | #include <linux/perf_event.h> |
21 | 20 | ||
22 | enum direction { | 21 | enum direction { |
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index b351770cbdd6..3107381e576d 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/string.h> | 9 | #include <linux/string.h> |
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
12 | #include <linux/smp_lock.h> | ||
13 | 12 | ||
14 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
15 | 14 | ||
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 07ec8a865c1d..e11b5fcb70eb 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -329,6 +329,18 @@ endmenu # Tilera-specific configuration | |||
329 | 329 | ||
330 | menu "Bus options" | 330 | menu "Bus options" |
331 | 331 | ||
332 | config PCI | ||
333 | bool "PCI support" | ||
334 | default y | ||
335 | select PCI_DOMAINS | ||
336 | ---help--- | ||
337 | Enable PCI root complex support, so PCIe endpoint devices can | ||
338 | be attached to the Tile chip. Many, but not all, PCI devices | ||
339 | are supported under Tilera's root complex driver. | ||
340 | |||
341 | config PCI_DOMAINS | ||
342 | bool | ||
343 | |||
332 | config NO_IOMEM | 344 | config NO_IOMEM |
333 | def_bool !PCI | 345 | def_bool !PCI |
334 | 346 | ||
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index c5741da4eeac..14a3f8556ace 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h | |||
@@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size) | |||
137 | mb_incoherent(); | 137 | mb_incoherent(); |
138 | } | 138 | } |
139 | 139 | ||
140 | /* | ||
141 | * Flush & invalidate a VA range that is homed remotely on a single core, | ||
142 | * waiting until the memory controller holds the flushed values. | ||
143 | */ | ||
144 | static inline void finv_buffer_remote(void *buffer, size_t size) | ||
145 | { | ||
146 | char *p; | ||
147 | int i; | ||
148 | |||
149 | /* | ||
150 | * Flush and invalidate the buffer out of the local L1/L2 | ||
151 | * and request the home cache to flush and invalidate as well. | ||
152 | */ | ||
153 | __finv_buffer(buffer, size); | ||
154 | |||
155 | /* | ||
156 | * Wait for the home cache to acknowledge that it has processed | ||
157 | * all the flush-and-invalidate requests. This does not mean | ||
158 | * that the flushed data has reached the memory controller yet, | ||
159 | * but it does mean the home cache is processing the flushes. | ||
160 | */ | ||
161 | __insn_mf(); | ||
162 | |||
163 | /* | ||
164 | * Issue a load to the last cache line, which can't complete | ||
165 | * until all the previously-issued flushes to the same memory | ||
166 | * controller have also completed. If we weren't striping | ||
167 | * memory, that one load would be sufficient, but since we may | ||
168 | * be, we also need to back up to the last load issued to | ||
169 | * another memory controller, which would be the point where | ||
170 | * we crossed an 8KB boundary (the granularity of striping | ||
171 | * across memory controllers). Keep backing up and doing this | ||
172 | * until we are before the beginning of the buffer, or have | ||
173 | * hit all the controllers. | ||
174 | */ | ||
175 | for (i = 0, p = (char *)buffer + size - 1; | ||
176 | i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer; | ||
177 | ++i) { | ||
178 | const unsigned long STRIPE_WIDTH = 8192; | ||
179 | |||
180 | /* Force a load instruction to issue. */ | ||
181 | *(volatile char *)p; | ||
182 | |||
183 | /* Jump to end of previous stripe. */ | ||
184 | p -= STRIPE_WIDTH; | ||
185 | p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1)); | ||
186 | } | ||
187 | |||
188 | /* Wait for the loads (and thus flushes) to have completed. */ | ||
189 | __insn_mf(); | ||
190 | } | ||
191 | |||
140 | #endif /* _ASM_TILE_CACHEFLUSH_H */ | 192 | #endif /* _ASM_TILE_CACHEFLUSH_H */ |
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index ee43328713ab..d3cbb9b14cbe 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h | |||
@@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr); | |||
55 | #define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) | 55 | #define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) |
56 | #define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) | 56 | #define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) |
57 | 57 | ||
58 | void __iomem *ioport_map(unsigned long port, unsigned int len); | ||
59 | extern inline void ioport_unmap(void __iomem *addr) {} | ||
60 | |||
61 | #define mmiowb() | 58 | #define mmiowb() |
62 | 59 | ||
63 | /* Conversion between virtual and physical mappings. */ | 60 | /* Conversion between virtual and physical mappings. */ |
@@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, | |||
189 | * we never run, uses them unconditionally. | 186 | * we never run, uses them unconditionally. |
190 | */ | 187 | */ |
191 | 188 | ||
192 | static inline int ioport_panic(void) | 189 | static inline long ioport_panic(void) |
193 | { | 190 | { |
194 | panic("inb/outb and friends do not exist on tile"); | 191 | panic("inb/outb and friends do not exist on tile"); |
195 | return 0; | 192 | return 0; |
196 | } | 193 | } |
197 | 194 | ||
195 | static inline void __iomem *ioport_map(unsigned long port, unsigned int len) | ||
196 | { | ||
197 | return (void __iomem *) ioport_panic(); | ||
198 | } | ||
199 | |||
200 | static inline void ioport_unmap(void __iomem *addr) | ||
201 | { | ||
202 | ioport_panic(); | ||
203 | } | ||
204 | |||
198 | static inline u8 inb(unsigned long addr) | 205 | static inline u8 inb(unsigned long addr) |
199 | { | 206 | { |
200 | return ioport_panic(); | 207 | return ioport_panic(); |
diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h deleted file mode 100644 index e853b0e2793b..000000000000 --- a/arch/tile/include/asm/pci-bridge.h +++ /dev/null | |||
@@ -1,117 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _ASM_TILE_PCI_BRIDGE_H | ||
16 | #define _ASM_TILE_PCI_BRIDGE_H | ||
17 | |||
18 | #include <linux/ioport.h> | ||
19 | #include <linux/pci.h> | ||
20 | |||
21 | struct device_node; | ||
22 | struct pci_controller; | ||
23 | |||
24 | /* | ||
25 | * pci_io_base returns the memory address at which you can access | ||
26 | * the I/O space for PCI bus number `bus' (or NULL on error). | ||
27 | */ | ||
28 | extern void __iomem *pci_bus_io_base(unsigned int bus); | ||
29 | extern unsigned long pci_bus_io_base_phys(unsigned int bus); | ||
30 | extern unsigned long pci_bus_mem_base_phys(unsigned int bus); | ||
31 | |||
32 | /* Allocate a new PCI host bridge structure */ | ||
33 | extern struct pci_controller *pcibios_alloc_controller(void); | ||
34 | |||
35 | /* Helper function for setting up resources */ | ||
36 | extern void pci_init_resource(struct resource *res, unsigned long start, | ||
37 | unsigned long end, int flags, char *name); | ||
38 | |||
39 | /* Get the PCI host controller for a bus */ | ||
40 | extern struct pci_controller *pci_bus_to_hose(int bus); | ||
41 | |||
42 | /* | ||
43 | * Structure of a PCI controller (host bridge) | ||
44 | */ | ||
45 | struct pci_controller { | ||
46 | int index; /* PCI domain number */ | ||
47 | struct pci_bus *root_bus; | ||
48 | |||
49 | int first_busno; | ||
50 | int last_busno; | ||
51 | |||
52 | int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ | ||
53 | int hv_mem_fd; /* fd to Hypervisor for MMIO operations */ | ||
54 | |||
55 | struct pci_ops *ops; | ||
56 | |||
57 | int irq_base; /* Base IRQ from the Hypervisor */ | ||
58 | int plx_gen1; /* flag for PLX Gen 1 configuration */ | ||
59 | |||
60 | /* Address ranges that are routed to this controller/bridge. */ | ||
61 | struct resource mem_resources[3]; | ||
62 | }; | ||
63 | |||
64 | static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus) | ||
65 | { | ||
66 | return bus->sysdata; | ||
67 | } | ||
68 | |||
69 | extern void setup_indirect_pci_nomap(struct pci_controller *hose, | ||
70 | void __iomem *cfg_addr, void __iomem *cfg_data); | ||
71 | extern void setup_indirect_pci(struct pci_controller *hose, | ||
72 | u32 cfg_addr, u32 cfg_data); | ||
73 | extern void setup_grackle(struct pci_controller *hose); | ||
74 | |||
75 | extern unsigned char common_swizzle(struct pci_dev *, unsigned char *); | ||
76 | |||
77 | /* | ||
78 | * The following code swizzles for exactly one bridge. The routine | ||
79 | * common_swizzle below handles multiple bridges. But there are a | ||
80 | * some boards that don't follow the PCI spec's suggestion so we | ||
81 | * break this piece out separately. | ||
82 | */ | ||
83 | static inline unsigned char bridge_swizzle(unsigned char pin, | ||
84 | unsigned char idsel) | ||
85 | { | ||
86 | return (((pin-1) + idsel) % 4) + 1; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * The following macro is used to lookup irqs in a standard table | ||
91 | * format for those PPC systems that do not already have PCI | ||
92 | * interrupts properly routed. | ||
93 | */ | ||
94 | /* FIXME - double check this */ | ||
95 | #define PCI_IRQ_TABLE_LOOKUP ({ \ | ||
96 | long _ctl_ = -1; \ | ||
97 | if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \ | ||
98 | _ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \ | ||
99 | _ctl_; \ | ||
100 | }) | ||
101 | |||
102 | /* | ||
103 | * Scan the buses below a given PCI host bridge and assign suitable | ||
104 | * resources to all devices found. | ||
105 | */ | ||
106 | extern int pciauto_bus_scan(struct pci_controller *, int); | ||
107 | |||
108 | #ifdef CONFIG_PCI | ||
109 | extern unsigned long pci_address_to_pio(phys_addr_t address); | ||
110 | #else | ||
111 | static inline unsigned long pci_address_to_pio(phys_addr_t address) | ||
112 | { | ||
113 | return (unsigned long)-1; | ||
114 | } | ||
115 | #endif | ||
116 | |||
117 | #endif /* _ASM_TILE_PCI_BRIDGE_H */ | ||
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index b0c15da2d5d5..c3fc458a0d32 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h | |||
@@ -15,7 +15,29 @@ | |||
15 | #ifndef _ASM_TILE_PCI_H | 15 | #ifndef _ASM_TILE_PCI_H |
16 | #define _ASM_TILE_PCI_H | 16 | #define _ASM_TILE_PCI_H |
17 | 17 | ||
18 | #include <asm/pci-bridge.h> | 18 | #include <linux/pci.h> |
19 | |||
20 | /* | ||
21 | * Structure of a PCI controller (host bridge) | ||
22 | */ | ||
23 | struct pci_controller { | ||
24 | int index; /* PCI domain number */ | ||
25 | struct pci_bus *root_bus; | ||
26 | |||
27 | int first_busno; | ||
28 | int last_busno; | ||
29 | |||
30 | int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ | ||
31 | int hv_mem_fd; /* fd to Hypervisor for MMIO operations */ | ||
32 | |||
33 | struct pci_ops *ops; | ||
34 | |||
35 | int irq_base; /* Base IRQ from the Hypervisor */ | ||
36 | int plx_gen1; /* flag for PLX Gen 1 configuration */ | ||
37 | |||
38 | /* Address ranges that are routed to this controller/bridge. */ | ||
39 | struct resource mem_resources[3]; | ||
40 | }; | ||
19 | 41 | ||
20 | /* | 42 | /* |
21 | * The hypervisor maps the entirety of CPA-space as bus addresses, so | 43 | * The hypervisor maps the entirety of CPA-space as bus addresses, so |
@@ -24,56 +46,12 @@ | |||
24 | */ | 46 | */ |
25 | #define PCI_DMA_BUS_IS_PHYS 1 | 47 | #define PCI_DMA_BUS_IS_PHYS 1 |
26 | 48 | ||
27 | struct pci_controller *pci_bus_to_hose(int bus); | ||
28 | unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp); | ||
29 | int __init tile_pci_init(void); | 49 | int __init tile_pci_init(void); |
30 | void pci_iounmap(struct pci_dev *dev, void __iomem *addr); | ||
31 | void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); | ||
32 | void __devinit pcibios_fixup_bus(struct pci_bus *bus); | ||
33 | 50 | ||
34 | int __devinit _tile_cfg_read(struct pci_controller *hose, | 51 | void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); |
35 | int bus, | 52 | static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} |
36 | int slot, | ||
37 | int function, | ||
38 | int offset, | ||
39 | int size, | ||
40 | u32 *val); | ||
41 | int __devinit _tile_cfg_write(struct pci_controller *hose, | ||
42 | int bus, | ||
43 | int slot, | ||
44 | int function, | ||
45 | int offset, | ||
46 | int size, | ||
47 | u32 val); | ||
48 | 53 | ||
49 | /* | 54 | void __devinit pcibios_fixup_bus(struct pci_bus *bus); |
50 | * These are used to to config reads and writes in the early stages of | ||
51 | * setup before the driver infrastructure has been set up enough to be | ||
52 | * able to do config reads and writes. | ||
53 | */ | ||
54 | #define early_cfg_read(where, size, value) \ | ||
55 | _tile_cfg_read(controller, \ | ||
56 | current_bus, \ | ||
57 | pci_slot, \ | ||
58 | pci_fn, \ | ||
59 | where, \ | ||
60 | size, \ | ||
61 | value) | ||
62 | |||
63 | #define early_cfg_write(where, size, value) \ | ||
64 | _tile_cfg_write(controller, \ | ||
65 | current_bus, \ | ||
66 | pci_slot, \ | ||
67 | pci_fn, \ | ||
68 | where, \ | ||
69 | size, \ | ||
70 | value) | ||
71 | |||
72 | |||
73 | |||
74 | #define PCICFG_BYTE 1 | ||
75 | #define PCICFG_WORD 2 | ||
76 | #define PCICFG_DWORD 4 | ||
77 | 55 | ||
78 | #define TILE_NUM_PCIE 2 | 56 | #define TILE_NUM_PCIE 2 |
79 | 57 | ||
@@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus) | |||
88 | } | 66 | } |
89 | 67 | ||
90 | /* | 68 | /* |
91 | * I/O space is currently not supported. | 69 | * pcibios_assign_all_busses() tells whether or not the bus numbers |
70 | * should be reassigned, in case the BIOS didn't do it correctly, or | ||
71 | * in case we don't have a BIOS and we want to let Linux do it. | ||
92 | */ | 72 | */ |
73 | static inline int pcibios_assign_all_busses(void) | ||
74 | { | ||
75 | return 1; | ||
76 | } | ||
93 | 77 | ||
94 | #define TILE_PCIE_LOWER_IO 0x0 | 78 | /* |
95 | #define TILE_PCIE_UPPER_IO 0x10000 | 79 | * No special bus mastering setup handling. |
96 | #define TILE_PCIE_PCIE_IO_SIZE 0x0000FFFF | 80 | */ |
97 | |||
98 | #define _PAGE_NO_CACHE 0 | ||
99 | #define _PAGE_GUARDED 0 | ||
100 | |||
101 | |||
102 | #define pcibios_assign_all_busses() pci_assign_all_buses | ||
103 | extern int pci_assign_all_buses; | ||
104 | |||
105 | static inline void pcibios_set_master(struct pci_dev *dev) | 81 | static inline void pcibios_set_master(struct pci_dev *dev) |
106 | { | 82 | { |
107 | /* No special bus mastering setup handling */ | ||
108 | } | 83 | } |
109 | 84 | ||
110 | #define PCIBIOS_MIN_MEM 0 | 85 | #define PCIBIOS_MIN_MEM 0 |
111 | #define PCIBIOS_MIN_IO TILE_PCIE_LOWER_IO | 86 | #define PCIBIOS_MIN_IO 0 |
112 | 87 | ||
113 | /* | 88 | /* |
114 | * This flag tells if the platform is TILEmpower that needs | 89 | * This flag tells if the platform is TILEmpower that needs |
115 | * special configuration for the PLX switch chip. | 90 | * special configuration for the PLX switch chip. |
116 | */ | 91 | */ |
117 | extern int blade_pci; | 92 | extern int tile_plx_gen1; |
93 | |||
94 | /* Use any cpu for PCI. */ | ||
95 | #define cpumask_of_pcibus(bus) cpu_online_mask | ||
118 | 96 | ||
119 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ | 97 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ |
120 | #include <asm-generic/pci-dma-compat.h> | 98 | #include <asm-generic/pci-dma-compat.h> |
@@ -122,7 +100,4 @@ extern int blade_pci; | |||
122 | /* generic pci stuff */ | 100 | /* generic pci stuff */ |
123 | #include <asm-generic/pci.h> | 101 | #include <asm-generic/pci.h> |
124 | 102 | ||
125 | /* Use any cpu for PCI. */ | ||
126 | #define cpumask_of_pcibus(bus) cpu_online_mask | ||
127 | |||
128 | #endif /* _ASM_TILE_PCI_H */ | 103 | #endif /* _ASM_TILE_PCI_H */ |
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 1747ff3946b2..a9e7c8760334 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h | |||
@@ -292,8 +292,18 @@ extern int kstack_hash; | |||
292 | /* Are we using huge pages in the TLB for kernel data? */ | 292 | /* Are we using huge pages in the TLB for kernel data? */ |
293 | extern int kdata_huge; | 293 | extern int kdata_huge; |
294 | 294 | ||
295 | /* Support standard Linux prefetching. */ | ||
296 | #define ARCH_HAS_PREFETCH | ||
297 | #define prefetch(x) __builtin_prefetch(x) | ||
295 | #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() | 298 | #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() |
296 | 299 | ||
300 | /* Bring a value into the L1D, faulting the TLB if necessary. */ | ||
301 | #ifdef __tilegx__ | ||
302 | #define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x)) | ||
303 | #else | ||
304 | #define prefetch_L1(x) __insn_prefetch_L1((void *)(x)) | ||
305 | #endif | ||
306 | |||
297 | #else /* __ASSEMBLY__ */ | 307 | #else /* __ASSEMBLY__ */ |
298 | 308 | ||
299 | /* Do some slow action (e.g. read a slow SPR). */ | 309 | /* Do some slow action (e.g. read a slow SPR). */ |
diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h new file mode 100644 index 000000000000..3a73b2b44913 --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_impl.h | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * @file drivers/xgbe/impl.h | ||
17 | * Implementation details for the NetIO library. | ||
18 | */ | ||
19 | |||
20 | #ifndef __DRV_XGBE_IMPL_H__ | ||
21 | #define __DRV_XGBE_IMPL_H__ | ||
22 | |||
23 | #include <hv/netio_errors.h> | ||
24 | #include <hv/netio_intf.h> | ||
25 | #include <hv/drv_xgbe_intf.h> | ||
26 | |||
27 | |||
28 | /** How many groups we have (log2). */ | ||
29 | #define LOG2_NUM_GROUPS (12) | ||
30 | /** How many groups we have. */ | ||
31 | #define NUM_GROUPS (1 << LOG2_NUM_GROUPS) | ||
32 | |||
33 | /** Number of output requests we'll buffer per tile. */ | ||
34 | #define EPP_REQS_PER_TILE (32) | ||
35 | |||
36 | /** Words used in an eDMA command without checksum acceleration. */ | ||
37 | #define EDMA_WDS_NO_CSUM 8 | ||
38 | /** Words used in an eDMA command with checksum acceleration. */ | ||
39 | #define EDMA_WDS_CSUM 10 | ||
40 | /** Total available words in the eDMA command FIFO. */ | ||
41 | #define EDMA_WDS_TOTAL 128 | ||
42 | |||
43 | |||
44 | /* | ||
45 | * FIXME: These definitions are internal and should have underscores! | ||
46 | * NOTE: The actual numeric values here are intentional and allow us to | ||
47 | * optimize the concept "if small ... else if large ... else ...", by | ||
48 | * checking for the low bit being set, and then for non-zero. | ||
49 | * These are used as array indices, so they must have the values (0, 1, 2) | ||
50 | * in some order. | ||
51 | */ | ||
52 | #define SIZE_SMALL (1) /**< Small packet queue. */ | ||
53 | #define SIZE_LARGE (2) /**< Large packet queue. */ | ||
54 | #define SIZE_JUMBO (0) /**< Jumbo packet queue. */ | ||
55 | |||
56 | /** The number of "SIZE_xxx" values. */ | ||
57 | #define NETIO_NUM_SIZES 3 | ||
58 | |||
59 | |||
60 | /* | ||
61 | * Default numbers of packets for IPP drivers. These values are chosen | ||
62 | * such that CIPP1 will not overflow its L2 cache. | ||
63 | */ | ||
64 | |||
65 | /** The default number of small packets. */ | ||
66 | #define NETIO_DEFAULT_SMALL_PACKETS 2750 | ||
67 | /** The default number of large packets. */ | ||
68 | #define NETIO_DEFAULT_LARGE_PACKETS 2500 | ||
69 | /** The default number of jumbo packets. */ | ||
70 | #define NETIO_DEFAULT_JUMBO_PACKETS 250 | ||
71 | |||
72 | |||
73 | /** Log2 of the size of a memory arena. */ | ||
74 | #define NETIO_ARENA_SHIFT 24 /* 16 MB */ | ||
75 | /** Size of a memory arena. */ | ||
76 | #define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT) | ||
77 | |||
78 | |||
79 | /** A queue of packets. | ||
80 | * | ||
81 | * This structure partially defines a queue of packets waiting to be | ||
82 | * processed. The queue as a whole is written to by an interrupt handler and | ||
83 | * read by non-interrupt code; this data structure is what's touched by the | ||
84 | * interrupt handler. The other part of the queue state, the read offset, is | ||
85 | * kept in user space, not in hypervisor space, so it is in a separate data | ||
86 | * structure. | ||
87 | * | ||
88 | * The read offset (__packet_receive_read in the user part of the queue | ||
89 | * structure) points to the next packet to be read. When the read offset is | ||
90 | * equal to the write offset, the queue is empty; therefore the queue must | ||
91 | * contain one more slot than the required maximum queue size. | ||
92 | * | ||
93 | * Here's an example of all 3 state variables and what they mean. All | ||
94 | * pointers move left to right. | ||
95 | * | ||
96 | * @code | ||
97 | * I I V V V V I I I I | ||
98 | * 0 1 2 3 4 5 6 7 8 9 10 | ||
99 | * ^ ^ ^ ^ | ||
100 | * | | | | ||
101 | * | | __last_packet_plus_one | ||
102 | * | __buffer_write | ||
103 | * __packet_receive_read | ||
104 | * @endcode | ||
105 | * | ||
106 | * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one | ||
107 | * = 10). The read pointer is at 2, and the write pointer is at 6; thus, | ||
108 | * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining | ||
109 | * slots are invalid (do not contain a packet). | ||
110 | */ | ||
111 | typedef struct { | ||
112 | /** Byte offset of the next notify packet to be written: zero for the first | ||
113 | * packet on the queue, sizeof (netio_pkt_t) for the second packet on the | ||
114 | * queue, etc. */ | ||
115 | volatile uint32_t __packet_write; | ||
116 | |||
117 | /** Offset of the packet after the last valid packet (i.e., when any | ||
118 | * pointer is incremented to this value, it wraps back to zero). */ | ||
119 | uint32_t __last_packet_plus_one; | ||
120 | } | ||
121 | __netio_packet_queue_t; | ||
122 | |||
123 | |||
124 | /** A queue of buffers. | ||
125 | * | ||
126 | * This structure partially defines a queue of empty buffers which have been | ||
127 | * obtained via requests to the IPP. (The elements of the queue are packet | ||
128 | * handles, which are transformed into a full netio_pkt_t when the buffer is | ||
129 | * retrieved.) The queue as a whole is written to by an interrupt handler and | ||
130 | * read by non-interrupt code; this data structure is what's touched by the | ||
131 | * interrupt handler. The other parts of the queue state, the read offset and | ||
132 | * requested write offset, are kept in user space, not in hypervisor space, so | ||
133 | * they are in a separate data structure. | ||
134 | * | ||
135 | * The read offset (__buffer_read in the user part of the queue structure) | ||
136 | * points to the next buffer to be read. When the read offset is equal to the | ||
137 | * write offset, the queue is empty; therefore the queue must contain one more | ||
138 | * slot than the required maximum queue size. | ||
139 | * | ||
140 | * The requested write offset (__buffer_requested_write in the user part of | ||
141 | * the queue structure) points to the slot which will hold the next buffer we | ||
142 | * request from the IPP, once we get around to sending such a request. When | ||
143 | * the requested write offset is equal to the write offset, no requests for | ||
144 | * new buffers are outstanding; when the requested write offset is one greater | ||
145 | * than the read offset, no more requests may be sent. | ||
146 | * | ||
147 | * Note that, unlike the packet_queue, the buffer_queue places incoming | ||
148 | * buffers at decreasing addresses. This makes the check for "is it time to | ||
149 | * wrap the buffer pointer" cheaper in the assembly code which receives new | ||
150 | * buffers, and means that the value which defines the queue size, | ||
151 | * __last_buffer, is different than in the packet queue. Also, the offset | ||
152 | * used in the packet_queue is already scaled by the size of a packet; here we | ||
153 | * use unscaled slot indices for the offsets. (These differences are | ||
154 | * historical, and in the future it's possible that the packet_queue will look | ||
155 | * more like this queue.) | ||
156 | * | ||
157 | * @code | ||
158 | * Here's an example of all 4 state variables and what they mean. Remember: | ||
159 | * all pointers move right to left. | ||
160 | * | ||
161 | * V V V I I R R V V V | ||
162 | * 0 1 2 3 4 5 6 7 8 9 | ||
163 | * ^ ^ ^ ^ | ||
164 | * | | | | | ||
165 | * | | | __last_buffer | ||
166 | * | | __buffer_write | ||
167 | * | __buffer_requested_write | ||
168 | * __buffer_read | ||
169 | * @endcode | ||
170 | * | ||
171 | * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9). | ||
172 | * The read pointer is at 2, and the write pointer is at 6; thus, there are | ||
173 | * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write | ||
174 | * pointer is at 4; thus, requests have been made to the IPP for buffers which | ||
175 | * will be placed in slots 6 and 5 when they arrive. Finally, the remaining | ||
176 | * slots are invalid (do not contain a buffer). | ||
177 | */ | ||
178 | typedef struct | ||
179 | { | ||
180 | /** Ordinal number of the next buffer to be written: 0 for the first slot in | ||
181 | * the queue, 1 for the second slot in the queue, etc. */ | ||
182 | volatile uint32_t __buffer_write; | ||
183 | |||
184 | /** Ordinal number of the last buffer (i.e., when any pointer is decremented | ||
185 | * below zero, it is reloaded with this value). */ | ||
186 | uint32_t __last_buffer; | ||
187 | } | ||
188 | __netio_buffer_queue_t; | ||
189 | |||
190 | |||
191 | /** | ||
192 | * An object for providing Ethernet packets to a process. | ||
193 | */ | ||
194 | typedef struct __netio_queue_impl_t | ||
195 | { | ||
196 | /** The queue of packets waiting to be received. */ | ||
197 | __netio_packet_queue_t __packet_receive_queue; | ||
198 | /** The intr bit mask that IDs this device. */ | ||
199 | unsigned int __intr_id; | ||
200 | /** Offset to queues of empty buffers, one per size. */ | ||
201 | uint32_t __buffer_queue[NETIO_NUM_SIZES]; | ||
202 | /** The address of the first EPP tile, or -1 if no EPP. */ | ||
203 | /* ISSUE: Actually this is always "0" or "~0". */ | ||
204 | uint32_t __epp_location; | ||
205 | /** The queue ID that this queue represents. */ | ||
206 | unsigned int __queue_id; | ||
207 | /** Number of acknowledgements received. */ | ||
208 | volatile uint32_t __acks_received; | ||
209 | /** Last completion number received for packet_sendv. */ | ||
210 | volatile uint32_t __last_completion_rcv; | ||
211 | /** Number of packets allowed to be outstanding. */ | ||
212 | uint32_t __max_outstanding; | ||
213 | /** First VA available for packets. */ | ||
214 | void* __va_0; | ||
215 | /** First VA in second range available for packets. */ | ||
216 | void* __va_1; | ||
217 | /** Padding to align the "__packets" field to the size of a netio_pkt_t. */ | ||
218 | uint32_t __padding[3]; | ||
219 | /** The packets themselves. */ | ||
220 | netio_pkt_t __packets[0]; | ||
221 | } | ||
222 | netio_queue_impl_t; | ||
223 | |||
224 | |||
225 | /** | ||
226 | * An object for managing the user end of a NetIO queue. | ||
227 | */ | ||
228 | typedef struct __netio_queue_user_impl_t | ||
229 | { | ||
230 | /** The next incoming packet to be read. */ | ||
231 | uint32_t __packet_receive_read; | ||
232 | /** The next empty buffers to be read, one index per size. */ | ||
233 | uint8_t __buffer_read[NETIO_NUM_SIZES]; | ||
234 | /** Where the empty buffer we next request from the IPP will go, one index | ||
235 | * per size. */ | ||
236 | uint8_t __buffer_requested_write[NETIO_NUM_SIZES]; | ||
237 | /** PCIe interface flag. */ | ||
238 | uint8_t __pcie; | ||
239 | /** Number of packets left to be received before we send a credit update. */ | ||
240 | uint32_t __receive_credit_remaining; | ||
241 | /** Value placed in __receive_credit_remaining when it reaches zero. */ | ||
242 | uint32_t __receive_credit_interval; | ||
243 | /** First fast I/O routine index. */ | ||
244 | uint32_t __fastio_index; | ||
245 | /** Number of acknowledgements expected. */ | ||
246 | uint32_t __acks_outstanding; | ||
247 | /** Last completion number requested. */ | ||
248 | uint32_t __last_completion_req; | ||
249 | /** File descriptor for driver. */ | ||
250 | int __fd; | ||
251 | } | ||
252 | netio_queue_user_impl_t; | ||
253 | |||
254 | |||
255 | #define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */ | ||
256 | #define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */ | ||
257 | |||
258 | |||
259 | /** Internal structure used to convey packet send information to the | ||
260 | * hypervisor. FIXME: Actually, it's not used for that anymore, but | ||
261 | * netio_packet_send() still uses it internally. | ||
262 | */ | ||
263 | typedef struct | ||
264 | { | ||
265 | uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */ | ||
266 | uint16_t transfer_size; /**< Size of packet */ | ||
267 | uint32_t va; /**< VA of start of packet */ | ||
268 | __netio_pkt_handle_t handle; /**< Packet handle */ | ||
269 | uint32_t csum0; /**< First checksum word */ | ||
270 | uint32_t csum1; /**< Second checksum word */ | ||
271 | } | ||
272 | __netio_send_cmd_t; | ||
273 | |||
274 | |||
275 | /** Flags used in two contexts: | ||
276 | * - As the "flags" member in the __netio_send_cmd_t, above; used only | ||
277 | * for netio_pkt_send_{prepare,commit}. | ||
278 | * - As part of the flags passed to the various send packet fast I/O calls. | ||
279 | */ | ||
280 | |||
281 | /** Need acknowledgement on this packet. Note that some code in the | ||
282 | * normal send_pkt fast I/O handler assumes that this is equal to 1. */ | ||
283 | #define __NETIO_SEND_FLG_ACK 0x1 | ||
284 | |||
285 | /** Do checksum on this packet. (Only used with the __netio_send_cmd_t; | ||
286 | * normal packet sends use a special fast I/O index to denote checksumming, | ||
287 | * and multi-segment sends test the checksum descriptor.) */ | ||
288 | #define __NETIO_SEND_FLG_CSUM 0x2 | ||
289 | |||
290 | /** Get a completion on this packet. Only used with multi-segment sends. */ | ||
291 | #define __NETIO_SEND_FLG_COMPLETION 0x4 | ||
292 | |||
293 | /** Position of the number-of-extra-segments value in the flags word. | ||
294 | Only used with multi-segment sends. */ | ||
295 | #define __NETIO_SEND_FLG_XSEG_SHIFT 3 | ||
296 | |||
297 | /** Width of the number-of-extra-segments value in the flags word. */ | ||
298 | #define __NETIO_SEND_FLG_XSEG_WIDTH 2 | ||
299 | |||
300 | #endif /* __DRV_XGBE_IMPL_H__ */ | ||
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h new file mode 100644 index 000000000000..146e47d5334b --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_intf.h | |||
@@ -0,0 +1,615 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * @file drv_xgbe_intf.h | ||
17 | * Interface to the hypervisor XGBE driver. | ||
18 | */ | ||
19 | |||
20 | #ifndef __DRV_XGBE_INTF_H__ | ||
21 | #define __DRV_XGBE_INTF_H__ | ||
22 | |||
23 | /** | ||
24 | * An object for forwarding VAs and PAs to the hypervisor. | ||
25 | * @ingroup types | ||
26 | * | ||
27 | * This allows the supervisor to specify a number of areas of memory to | ||
28 | * store packet buffers. | ||
29 | */ | ||
30 | typedef struct | ||
31 | { | ||
32 | /** The physical address of the memory. */ | ||
33 | HV_PhysAddr pa; | ||
34 | /** Page table entry for the memory. This is only used to derive the | ||
35 | * memory's caching mode; the PA bits are ignored. */ | ||
36 | HV_PTE pte; | ||
37 | /** The virtual address of the memory. */ | ||
38 | HV_VirtAddr va; | ||
39 | /** Size (in bytes) of the memory area. */ | ||
40 | int size; | ||
41 | |||
42 | } | ||
43 | netio_ipp_address_t; | ||
44 | |||
45 | /** The various pread/pwrite offsets into the hypervisor-level driver. | ||
46 | * @ingroup types | ||
47 | */ | ||
48 | typedef enum | ||
49 | { | ||
50 | /** Inform the Linux driver of the address of the NetIO arena memory. | ||
51 | * This offset is actually only used to convey information from netio | ||
52 | * to the Linux driver; it never makes it from there to the hypervisor. | ||
53 | * Write-only; takes a uint32_t specifying the VA address. */ | ||
54 | NETIO_FIXED_ADDR = 0x5000000000000000ULL, | ||
55 | |||
56 | /** Inform the Linux driver of the size of the NetIO arena memory. | ||
57 | * This offset is actually only used to convey information from netio | ||
58 | * to the Linux driver; it never makes it from there to the hypervisor. | ||
59 | * Write-only; takes a uint32_t specifying the VA size. */ | ||
60 | NETIO_FIXED_SIZE = 0x5100000000000000ULL, | ||
61 | |||
62 | /** Register current tile with IPP. Write then read: write, takes a | ||
63 | * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */ | ||
64 | NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL, | ||
65 | |||
66 | /** Unregister current tile from IPP. Write-only, takes a dummy argument. */ | ||
67 | NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL, | ||
68 | |||
69 | /** Start packets flowing. Write-only, takes a dummy argument. */ | ||
70 | NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL, | ||
71 | |||
72 | /** Stop packets flowing. Write-only, takes a dummy argument. */ | ||
73 | NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL, | ||
74 | |||
75 | /** Configure group (typically we group on VLAN). Write-only: takes an | ||
76 | * array of netio_group_t's, low 24 bits of the offset is the base group | ||
77 | * number times the size of a netio_group_t. */ | ||
78 | NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL, | ||
79 | |||
80 | /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low | ||
81 | * 24 bits of the offset is the base bucket number times the size of a | ||
82 | * netio_bucket_t. */ | ||
83 | NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL, | ||
84 | |||
85 | /** Get/set a parameter. Read or write: read or write data is the parameter | ||
86 | * value, low 32 bits of the offset is a __netio_getset_offset_t. */ | ||
87 | NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL, | ||
88 | |||
89 | /** Get fast I/O index. Read-only; returns a 4-byte base index value. */ | ||
90 | NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL, | ||
91 | |||
92 | /** Configure hijack IP address. Packets with this IPv4 dest address | ||
93 | * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address | ||
94 | * in some standard form. FIXME: Define the form! */ | ||
95 | NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL, | ||
96 | |||
97 | /** | ||
98 | * Offsets beyond this point are reserved for the supervisor (although that | ||
99 | * enforcement must be done by the supervisor driver itself). | ||
100 | */ | ||
101 | NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL, | ||
102 | |||
103 | /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */ | ||
104 | NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL, | ||
105 | |||
106 | /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */ | ||
107 | NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL, | ||
108 | |||
109 | /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux | ||
110 | * userspace code due to limitations in the pread/pwrite syscalls. */ | ||
111 | |||
112 | /** Drain LIPP buffers. */ | ||
113 | NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL, | ||
114 | |||
115 | /** Supply a netio_ipp_address_t to be used as shared memory for the | ||
116 | * LEPP command queue. */ | ||
117 | NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL, | ||
118 | |||
119 | /* 0xFC... is currently unused. */ | ||
120 | |||
121 | /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */ | ||
122 | NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL, | ||
123 | |||
124 | /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */ | ||
125 | NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL, | ||
126 | |||
127 | /** Supply packet arena. Write-only, takes an array of | ||
128 | * netio_ipp_address_t values. */ | ||
129 | NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL, | ||
130 | } netio_hv_offset_t; | ||
131 | |||
132 | /** Extract the base offset from an offset */ | ||
133 | #define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL) | ||
134 | /** Extract the local offset from an offset */ | ||
135 | #define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL) | ||
136 | |||
137 | |||
138 | /** | ||
139 | * Get/set offset. | ||
140 | */ | ||
141 | typedef union | ||
142 | { | ||
143 | struct | ||
144 | { | ||
145 | uint64_t addr:48; /**< Class-specific address */ | ||
146 | unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */ | ||
147 | unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */ | ||
148 | } | ||
149 | bits; /**< Bitfields */ | ||
150 | uint64_t word; /**< Aggregated value to use as the offset */ | ||
151 | } | ||
152 | __netio_getset_offset_t; | ||
153 | |||
154 | /** | ||
155 | * Fast I/O index offsets (must be contiguous). | ||
156 | */ | ||
157 | typedef enum | ||
158 | { | ||
159 | NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */ | ||
160 | NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */ | ||
161 | NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */ | ||
162 | NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */ | ||
163 | NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */ | ||
164 | NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */ | ||
165 | NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */ | ||
166 | NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */ | ||
167 | } netio_fastio_index_t; | ||
168 | |||
169 | /** 3-word return type for Fast I/O call. */ | ||
170 | typedef struct | ||
171 | { | ||
172 | int err; /**< Error code. */ | ||
173 | uint32_t val0; /**< Value. Meaning depends upon the specific call. */ | ||
174 | uint32_t val1; /**< Value. Meaning depends upon the specific call. */ | ||
175 | } netio_fastio_rv3_t; | ||
176 | |||
177 | /** 0-argument fast I/O call */ | ||
178 | int __netio_fastio0(uint32_t fastio_index); | ||
179 | /** 1-argument fast I/O call */ | ||
180 | int __netio_fastio1(uint32_t fastio_index, uint32_t arg0); | ||
181 | /** 3-argument fast I/O call, 2-word return value */ | ||
182 | netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0, | ||
183 | uint32_t arg1, uint32_t arg2); | ||
184 | /** 4-argument fast I/O call */ | ||
185 | int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, | ||
186 | uint32_t arg2, uint32_t arg3); | ||
187 | /** 6-argument fast I/O call */ | ||
188 | int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, | ||
189 | uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5); | ||
190 | /** 9-argument fast I/O call */ | ||
191 | int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, | ||
192 | uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5, | ||
193 | uint32_t arg6, uint32_t arg7, uint32_t arg8); | ||
194 | |||
195 | /** Allocate an empty packet. | ||
196 | * @param fastio_index Fast I/O index. | ||
197 | * @param size Size of the packet to allocate. | ||
198 | */ | ||
199 | #define __netio_fastio_allocate(fastio_index, size) \ | ||
200 | __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size) | ||
201 | |||
202 | /** Free a buffer. | ||
203 | * @param fastio_index Fast I/O index. | ||
204 | * @param handle Handle for the packet to free. | ||
205 | */ | ||
206 | #define __netio_fastio_free_buffer(fastio_index, handle) \ | ||
207 | __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle) | ||
208 | |||
209 | /** Increment our receive credits. | ||
210 | * @param fastio_index Fast I/O index. | ||
211 | * @param credits Number of credits to add. | ||
212 | */ | ||
213 | #define __netio_fastio_return_credits(fastio_index, credits) \ | ||
214 | __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits) | ||
215 | |||
216 | /** Send packet, no checksum. | ||
217 | * @param fastio_index Fast I/O index. | ||
218 | * @param ackflag Nonzero if we want an ack. | ||
219 | * @param size Size of the packet. | ||
220 | * @param va Virtual address of start of packet. | ||
221 | * @param handle Packet handle. | ||
222 | */ | ||
223 | #define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \ | ||
224 | __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \ | ||
225 | size, va, handle) | ||
226 | |||
227 | /** Send packet, calculate checksum. | ||
228 | * @param fastio_index Fast I/O index. | ||
229 | * @param ackflag Nonzero if we want an ack. | ||
230 | * @param size Size of the packet. | ||
231 | * @param va Virtual address of start of packet. | ||
232 | * @param handle Packet handle. | ||
233 | * @param csum0 Shim checksum header. | ||
234 | * @param csum1 Checksum seed. | ||
235 | */ | ||
236 | #define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \ | ||
237 | csum0, csum1) \ | ||
238 | __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \ | ||
239 | size, va, handle, csum0, csum1) | ||
240 | |||
241 | |||
242 | /** Format for the "csum0" argument to the __netio_fastio_send routines | ||
243 | * and LEPP. Note that this is currently exactly identical to the | ||
244 | * ShimProtocolOffloadHeader. | ||
245 | */ | ||
246 | typedef union | ||
247 | { | ||
248 | struct | ||
249 | { | ||
250 | unsigned int start_byte:7; /**< The first byte to be checksummed */ | ||
251 | unsigned int count:14; /**< Number of bytes to be checksummed. */ | ||
252 | unsigned int destination_byte:7; /**< The byte to write the checksum to. */ | ||
253 | unsigned int reserved:4; /**< Reserved. */ | ||
254 | } bits; /**< Decomposed method of access. */ | ||
255 | unsigned int word; /**< To send out the IDN. */ | ||
256 | } __netio_checksum_header_t; | ||
257 | |||
258 | |||
259 | /** Sendv packet with 1 or 2 segments. | ||
260 | * @param fastio_index Fast I/O index. | ||
261 | * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus | ||
262 | * 1 in next 2 bits; expected checksum in high 16 bits. | ||
263 | * @param confno Confirmation number to request, if notify flag set. | ||
264 | * @param csum0 Checksum descriptor; if zero, no checksum. | ||
265 | * @param va_F Virtual address of first segment. | ||
266 | * @param va_L Virtual address of last segment, if 2 segments. | ||
267 | * @param len_F_L Length of first segment in low 16 bits; length of last | ||
268 | * segment, if 2 segments, in high 16 bits. | ||
269 | */ | ||
270 | #define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \ | ||
271 | va_F, va_L, len_F_L) \ | ||
272 | __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ | ||
273 | csum0, va_F, va_L, len_F_L) | ||
274 | |||
275 | /** Send packet on PCIe interface. | ||
276 | * @param fastio_index Fast I/O index. | ||
277 | * @param flags Ack/csum/notify flags in low 3 bits. | ||
278 | * @param confno Confirmation number to request, if notify flag set. | ||
279 | * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe. | ||
280 | * @param va_F Virtual address of the packet buffer. | ||
281 | * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0. | ||
282 | * @param len_F_L Length of the packet buffer in low 16 bits. | ||
283 | */ | ||
284 | #define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \ | ||
285 | va_F, va_L, len_F_L) \ | ||
286 | __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \ | ||
287 | csum0, va_F, va_L, len_F_L) | ||
288 | |||
289 | /** Sendv packet with 3 or 4 segments. | ||
290 | * @param fastio_index Fast I/O index. | ||
291 | * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus | ||
292 | * 1 in next 2 bits; expected checksum in high 16 bits. | ||
293 | * @param confno Confirmation number to request, if notify flag set. | ||
294 | * @param csum0 Checksum descriptor; if zero, no checksum. | ||
295 | * @param va_F Virtual address of first segment. | ||
296 | * @param va_L Virtual address of last segment (third segment if 3 segments, | ||
297 | * fourth segment if 4 segments). | ||
298 | * @param len_F_L Length of first segment in low 16 bits; length of last | ||
299 | * segment in high 16 bits. | ||
300 | * @param va_M0 Virtual address of "middle 0" segment; this segment is sent | ||
301 | * second when there are three segments, and third if there are four. | ||
302 | * @param va_M1 Virtual address of "middle 1" segment; this segment is sent | ||
303 | * second when there are four segments. | ||
304 | * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle | ||
305 | * 1 segment, if 4 segments, in high 16 bits. | ||
306 | */ | ||
307 | #define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \ | ||
308 | va_L, len_F_L, va_M0, va_M1, len_M0_M1) \ | ||
309 | __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ | ||
310 | csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1) | ||
311 | |||
312 | /** Send vector of packets. | ||
313 | * @param fastio_index Fast I/O index. | ||
314 | * @param seqno Number of packets transmitted so far on this interface; | ||
315 | * used to decide which packets should be acknowledged. | ||
316 | * @param nentries Number of entries in vector. | ||
317 | * @param va Virtual address of start of vector entry array. | ||
318 | * @return 3-word netio_fastio_rv3_t structure. The structure's err member | ||
319 | * is an error code, or zero if no error. The val0 member is the | ||
320 | * updated value of seqno; it has been incremented by 1 for each | ||
321 | * packet sent. That increment may be less than nentries if an | ||
322 | * error occured, or if some of the entries in the vector contain | ||
323 | * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the | ||
324 | * updated value of nentries; it has been decremented by 1 for each | ||
325 | * vector entry processed. Again, that decrement may be less than | ||
326 | * nentries (leaving the returned value positive) if an error | ||
327 | * occurred. | ||
328 | */ | ||
329 | #define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \ | ||
330 | __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \ | ||
331 | nentries, va) | ||
332 | |||
333 | |||
334 | /** An egress DMA command for LEPP. */ | ||
335 | typedef struct | ||
336 | { | ||
337 | /** Is this a TSO transfer? | ||
338 | * | ||
339 | * NOTE: This field is always 0, to distinguish it from | ||
340 | * lepp_tso_cmd_t. It must come first! | ||
341 | */ | ||
342 | uint8_t tso : 1; | ||
343 | |||
344 | /** Unused padding bits. */ | ||
345 | uint8_t _unused : 3; | ||
346 | |||
347 | /** Should this packet be sent directly from caches instead of DRAM, | ||
348 | * using hash-for-home to locate the packet data? | ||
349 | */ | ||
350 | uint8_t hash_for_home : 1; | ||
351 | |||
352 | /** Should we compute a checksum? */ | ||
353 | uint8_t compute_checksum : 1; | ||
354 | |||
355 | /** Is this the final buffer for this packet? | ||
356 | * | ||
357 | * A single packet can be split over several input buffers (a "gather" | ||
358 | * operation). This flag indicates that this is the last buffer | ||
359 | * in a packet. | ||
360 | */ | ||
361 | uint8_t end_of_packet : 1; | ||
362 | |||
363 | /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */ | ||
364 | uint8_t send_completion : 1; | ||
365 | |||
366 | /** High bits of Client Physical Address of the start of the buffer | ||
367 | * to be egressed. | ||
368 | * | ||
369 | * NOTE: Only 6 bits are actually needed here, as CPAs are | ||
370 | * currently 38 bits. So two bits could be scavenged from this. | ||
371 | */ | ||
372 | uint8_t cpa_hi; | ||
373 | |||
374 | /** The number of bytes to be egressed. */ | ||
375 | uint16_t length; | ||
376 | |||
377 | /** Low 32 bits of Client Physical Address of the start of the buffer | ||
378 | * to be egressed. | ||
379 | */ | ||
380 | uint32_t cpa_lo; | ||
381 | |||
382 | /** Checksum information (only used if 'compute_checksum'). */ | ||
383 | __netio_checksum_header_t checksum_data; | ||
384 | |||
385 | } lepp_cmd_t; | ||
386 | |||
387 | |||
388 | /** A chunk of physical memory for a TSO egress. */ | ||
389 | typedef struct | ||
390 | { | ||
391 | /** The low bits of the CPA. */ | ||
392 | uint32_t cpa_lo; | ||
393 | /** The high bits of the CPA. */ | ||
394 | uint16_t cpa_hi : 15; | ||
395 | /** Should this packet be sent directly from caches instead of DRAM, | ||
396 | * using hash-for-home to locate the packet data? | ||
397 | */ | ||
398 | uint16_t hash_for_home : 1; | ||
399 | /** The length in bytes. */ | ||
400 | uint16_t length; | ||
401 | } lepp_frag_t; | ||
402 | |||
403 | |||
404 | /** An LEPP command that handles TSO. */ | ||
405 | typedef struct | ||
406 | { | ||
407 | /** Is this a TSO transfer? | ||
408 | * | ||
409 | * NOTE: This field is always 1, to distinguish it from | ||
410 | * lepp_cmd_t. It must come first! | ||
411 | */ | ||
412 | uint8_t tso : 1; | ||
413 | |||
414 | /** Unused padding bits. */ | ||
415 | uint8_t _unused : 7; | ||
416 | |||
417 | /** Size of the header[] array in bytes. It must be in the range | ||
418 | * [40, 127], which are the smallest header for a TCP packet over | ||
419 | * Ethernet and the maximum possible prepend size supported by | ||
420 | * hardware, respectively. Note that the array storage must be | ||
421 | * padded out to a multiple of four bytes so that the following | ||
422 | * LEPP command is aligned properly. | ||
423 | */ | ||
424 | uint8_t header_size; | ||
425 | |||
426 | /** Byte offset of the IP header in header[]. */ | ||
427 | uint8_t ip_offset; | ||
428 | |||
429 | /** Byte offset of the TCP header in header[]. */ | ||
430 | uint8_t tcp_offset; | ||
431 | |||
432 | /** The number of bytes to use for the payload of each packet, | ||
433 | * except of course the last one, which may not have enough bytes. | ||
434 | * This means that each Ethernet packet except the last will have a | ||
435 | * size of header_size + payload_size. | ||
436 | */ | ||
437 | uint16_t payload_size; | ||
438 | |||
439 | /** The length of the 'frags' array that follows this struct. */ | ||
440 | uint16_t num_frags; | ||
441 | |||
442 | /** The actual frags. */ | ||
443 | lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */]; | ||
444 | |||
445 | /* | ||
446 | * The packet header template logically follows frags[], | ||
447 | * but you can't declare that in C. | ||
448 | * | ||
449 | * uint32_t header[header_size_in_words_rounded_up]; | ||
450 | */ | ||
451 | |||
452 | } lepp_tso_cmd_t; | ||
453 | |||
454 | |||
455 | /** An LEPP completion ring entry. */ | ||
456 | typedef void* lepp_comp_t; | ||
457 | |||
458 | |||
459 | /** Maximum number of frags for one TSO command. This is adapted from | ||
460 | * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for | ||
461 | * our page size of exactly 65536. We add one for a "body" fragment. | ||
462 | */ | ||
463 | #define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1) | ||
464 | |||
465 | /** Total number of bytes needed for an lepp_tso_cmd_t. */ | ||
466 | #define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ | ||
467 | (sizeof(lepp_tso_cmd_t) + \ | ||
468 | (num_frags) * sizeof(lepp_frag_t) + \ | ||
469 | (((header_size) + 3) & -4)) | ||
470 | |||
471 | /** The size of the lepp "cmd" queue. */ | ||
472 | #define LEPP_CMD_QUEUE_BYTES \ | ||
473 | (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \ | ||
474 | (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t)) | ||
475 | |||
476 | /** The largest possible command that can go in lepp_queue_t::cmds[]. */ | ||
477 | #define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128) | ||
478 | |||
479 | /** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive). | ||
480 | */ | ||
481 | #define LEPP_CMD_LIMIT \ | ||
482 | (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE) | ||
483 | |||
484 | /** The maximum number of completions in an LEPP queue. */ | ||
485 | #define LEPP_COMP_QUEUE_SIZE \ | ||
486 | ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t)) | ||
487 | |||
488 | /** Increment an index modulo the queue size. */ | ||
489 | #define LEPP_QINC(var) \ | ||
490 | (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1)) | ||
491 | |||
492 | /** A queue used to convey egress commands from the client to LEPP. */ | ||
493 | typedef struct | ||
494 | { | ||
495 | /** Index of first completion not yet processed by user code. | ||
496 | * If this is equal to comp_busy, there are no such completions. | ||
497 | * | ||
498 | * NOTE: This is only read/written by the user. | ||
499 | */ | ||
500 | unsigned int comp_head; | ||
501 | |||
502 | /** Index of first completion record not yet completed. | ||
503 | * If this is equal to comp_tail, there are no such completions. | ||
504 | * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever | ||
505 | * a command with the 'completion' bit set is finished. | ||
506 | * | ||
507 | * NOTE: This is only written by LEPP, only read by the user. | ||
508 | */ | ||
509 | volatile unsigned int comp_busy; | ||
510 | |||
511 | /** Index of the first empty slot in the completion ring. | ||
512 | * Entries from this up to but not including comp_head (in ring order) | ||
513 | * can be filled in with completion data. | ||
514 | * | ||
515 | * NOTE: This is only read/written by the user. | ||
516 | */ | ||
517 | unsigned int comp_tail; | ||
518 | |||
519 | /** Byte index of first command enqueued for LEPP but not yet processed. | ||
520 | * | ||
521 | * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. | ||
522 | * | ||
523 | * NOTE: LEPP advances this counter as soon as it no longer needs | ||
524 | * the cmds[] storage for this entry, but the transfer is not actually | ||
525 | * complete (i.e. the buffer pointed to by the command is no longer | ||
526 | * needed) until comp_busy advances. | ||
527 | * | ||
528 | * If this is equal to cmd_tail, the ring is empty. | ||
529 | * | ||
530 | * NOTE: This is only written by LEPP, only read by the user. | ||
531 | */ | ||
532 | volatile unsigned int cmd_head; | ||
533 | |||
534 | /** Byte index of first empty slot in the command ring. This field can | ||
535 | * be incremented up to but not equal to cmd_head (because that would | ||
536 | * mean the ring is empty). | ||
537 | * | ||
538 | * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. | ||
539 | * | ||
540 | * NOTE: This is read/written by the user, only read by LEPP. | ||
541 | */ | ||
542 | volatile unsigned int cmd_tail; | ||
543 | |||
544 | /** A ring of variable-sized egress DMA commands. | ||
545 | * | ||
546 | * NOTE: Only written by the user, only read by LEPP. | ||
547 | */ | ||
548 | char cmds[LEPP_CMD_QUEUE_BYTES] | ||
549 | __attribute__((aligned(CHIP_L2_LINE_SIZE()))); | ||
550 | |||
551 | /** A ring of user completion data. | ||
552 | * NOTE: Only read/written by the user. | ||
553 | */ | ||
554 | lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE] | ||
555 | __attribute__((aligned(CHIP_L2_LINE_SIZE()))); | ||
556 | } lepp_queue_t; | ||
557 | |||
558 | |||
559 | /** An internal helper function for determining the number of entries | ||
560 | * available in a ring buffer, given that there is one sentinel. | ||
561 | */ | ||
562 | static inline unsigned int | ||
563 | _lepp_num_free_slots(unsigned int head, unsigned int tail) | ||
564 | { | ||
565 | /* | ||
566 | * One entry is reserved for use as a sentinel, to distinguish | ||
567 | * "empty" from "full". So we compute | ||
568 | * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation. | ||
569 | */ | ||
570 | return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0); | ||
571 | } | ||
572 | |||
573 | |||
574 | /** Returns how many new comp entries can be enqueued. */ | ||
575 | static inline unsigned int | ||
576 | lepp_num_free_comp_slots(const lepp_queue_t* q) | ||
577 | { | ||
578 | return _lepp_num_free_slots(q->comp_head, q->comp_tail); | ||
579 | } | ||
580 | |||
581 | static inline int | ||
582 | lepp_qsub(int v1, int v2) | ||
583 | { | ||
584 | int delta = v1 - v2; | ||
585 | return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE); | ||
586 | } | ||
587 | |||
588 | |||
589 | /** FIXME: Check this from linux, via a new "pwrite()" call. */ | ||
590 | #define LIPP_VERSION 1 | ||
591 | |||
592 | |||
593 | /** We use exactly two bytes of alignment padding. */ | ||
594 | #define LIPP_PACKET_PADDING 2 | ||
595 | |||
596 | /** The minimum size of a "small" buffer (including the padding). */ | ||
597 | #define LIPP_SMALL_PACKET_SIZE 128 | ||
598 | |||
599 | /* | ||
600 | * NOTE: The following two values should total to less than around | ||
601 | * 13582, to keep the total size used for "lipp_state_t" below 64K. | ||
602 | */ | ||
603 | |||
604 | /** The maximum number of "small" buffers. | ||
605 | * This is enough for 53 network cpus with 128 credits. Note that | ||
606 | * if these are exhausted, we will fall back to using large buffers. | ||
607 | */ | ||
608 | #define LIPP_SMALL_BUFFERS 6785 | ||
609 | |||
610 | /** The maximum number of "large" buffers. | ||
611 | * This is enough for 53 network cpus with 128 credits. | ||
612 | */ | ||
613 | #define LIPP_LARGE_BUFFERS 6785 | ||
614 | |||
615 | #endif /* __DRV_XGBE_INTF_H__ */ | ||
diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h new file mode 100644 index 000000000000..e1591bff61b5 --- /dev/null +++ b/arch/tile/include/hv/netio_errors.h | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * Error codes returned from NetIO routines. | ||
17 | */ | ||
18 | |||
19 | #ifndef __NETIO_ERRORS_H__ | ||
20 | #define __NETIO_ERRORS_H__ | ||
21 | |||
22 | /** | ||
23 | * @addtogroup error | ||
24 | * | ||
25 | * @brief The error codes returned by NetIO functions. | ||
26 | * | ||
27 | * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and | ||
28 | * a negative value if an error occurs. | ||
29 | * | ||
30 | * In cases where a NetIO function failed due to a error reported by | ||
31 | * system libraries, the error code will be the negation of the | ||
32 | * system errno at the time of failure. The @ref netio_strerror() | ||
33 | * function will deliver error strings for both NetIO and system error | ||
34 | * codes. | ||
35 | * | ||
36 | * @{ | ||
37 | */ | ||
38 | |||
39 | /** The set of all NetIO errors. */ | ||
40 | typedef enum | ||
41 | { | ||
42 | /** Operation successfully completed. */ | ||
43 | NETIO_NO_ERROR = 0, | ||
44 | |||
45 | /** A packet was successfully retrieved from an input queue. */ | ||
46 | NETIO_PKT = 0, | ||
47 | |||
48 | /** Largest NetIO error number. */ | ||
49 | NETIO_ERR_MAX = -701, | ||
50 | |||
51 | /** The tile is not registered with the IPP. */ | ||
52 | NETIO_NOT_REGISTERED = -701, | ||
53 | |||
54 | /** No packet was available to retrieve from the input queue. */ | ||
55 | NETIO_NOPKT = -702, | ||
56 | |||
57 | /** The requested function is not implemented. */ | ||
58 | NETIO_NOT_IMPLEMENTED = -703, | ||
59 | |||
60 | /** On a registration operation, the target queue already has the maximum | ||
61 | * number of tiles registered for it, and no more may be added. On a | ||
62 | * packet send operation, the output queue is full and nothing more can | ||
63 | * be queued until some of the queued packets are actually transmitted. */ | ||
64 | NETIO_QUEUE_FULL = -704, | ||
65 | |||
66 | /** The calling process or thread is not bound to exactly one CPU. */ | ||
67 | NETIO_BAD_AFFINITY = -705, | ||
68 | |||
69 | /** Cannot allocate memory on requested controllers. */ | ||
70 | NETIO_CANNOT_HOME = -706, | ||
71 | |||
72 | /** On a registration operation, the IPP specified is not configured | ||
73 | * to support the options requested; for instance, the application | ||
74 | * wants a specific type of tagged headers which the configured IPP | ||
75 | * doesn't support. Or, the supplied configuration information is | ||
76 | * not self-consistent, or is out of range; for instance, specifying | ||
77 | * both NETIO_RECV and NETIO_NO_RECV, or asking for more than | ||
78 | * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket | ||
79 | * configure operation, the number of items, or the base item, was | ||
80 | * out of range. | ||
81 | */ | ||
82 | NETIO_BAD_CONFIG = -707, | ||
83 | |||
84 | /** Too many tiles have registered to transmit packets. */ | ||
85 | NETIO_TOOMANY_XMIT = -708, | ||
86 | |||
87 | /** Packet transmission was attempted on a queue which was registered | ||
88 | with transmit disabled. */ | ||
89 | NETIO_UNREG_XMIT = -709, | ||
90 | |||
91 | /** This tile is already registered with the IPP. */ | ||
92 | NETIO_ALREADY_REGISTERED = -710, | ||
93 | |||
94 | /** The Ethernet link is down. The application should try again later. */ | ||
95 | NETIO_LINK_DOWN = -711, | ||
96 | |||
97 | /** An invalid memory buffer has been specified. This may be an unmapped | ||
98 | * virtual address, or one which does not meet alignment requirements. | ||
99 | * For netio_input_register(), this error may be returned when multiple | ||
100 | * processes specify different memory regions to be used for NetIO | ||
101 | * buffers. That can happen if these processes specify explicit memory | ||
102 | * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init() | ||
103 | * has not been called by a common ancestor of the processes. | ||
104 | */ | ||
105 | NETIO_FAULT = -712, | ||
106 | |||
107 | /** Cannot combine user-managed shared memory and cache coherence. */ | ||
108 | NETIO_BAD_CACHE_CONFIG = -713, | ||
109 | |||
110 | /** Smallest NetIO error number. */ | ||
111 | NETIO_ERR_MIN = -713, | ||
112 | |||
113 | #ifndef __DOXYGEN__ | ||
114 | /** Used internally to mean that no response is needed; never returned to | ||
115 | * an application. */ | ||
116 | NETIO_NO_RESPONSE = 1 | ||
117 | #endif | ||
118 | } netio_error_t; | ||
119 | |||
120 | /** @} */ | ||
121 | |||
122 | #endif /* __NETIO_ERRORS_H__ */ | ||
diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h new file mode 100644 index 000000000000..8d20972aba2c --- /dev/null +++ b/arch/tile/include/hv/netio_intf.h | |||
@@ -0,0 +1,2975 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * NetIO interface structures and macros. | ||
17 | */ | ||
18 | |||
19 | #ifndef __NETIO_INTF_H__ | ||
20 | #define __NETIO_INTF_H__ | ||
21 | |||
22 | #include <hv/netio_errors.h> | ||
23 | |||
24 | #ifdef __KERNEL__ | ||
25 | #include <linux/types.h> | ||
26 | #else | ||
27 | #include <stdint.h> | ||
28 | #endif | ||
29 | |||
30 | #if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) | ||
31 | #include <assert.h> | ||
32 | #define netio_assert assert /**< Enable assertions from macros */ | ||
33 | #else | ||
34 | #define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */ | ||
35 | #endif | ||
36 | |||
37 | /* | ||
38 | * If none of these symbols are defined, we're building libnetio in an | ||
39 | * environment where we have pthreads, so we'll enable locking. | ||
40 | */ | ||
41 | #if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \ | ||
42 | !defined(__NEWLIB__) | ||
43 | #define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */ | ||
44 | |||
45 | /* | ||
46 | * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on | ||
47 | * per-packet NetIO operations. We still do pthread locking on things | ||
48 | * like netio_input_register, though. This is used for building | ||
49 | * libnetio_unlocked. | ||
50 | */ | ||
51 | #ifndef NETIO_UNLOCKED | ||
52 | |||
53 | /* Avoid PLT overhead by using our own inlined per-cpu lock. */ | ||
54 | #include <sched.h> | ||
55 | typedef int _netio_percpu_mutex_t; | ||
56 | |||
57 | static __inline int | ||
58 | _netio_percpu_mutex_init(_netio_percpu_mutex_t* lock) | ||
59 | { | ||
60 | *lock = 0; | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static __inline int | ||
65 | _netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock) | ||
66 | { | ||
67 | while (__builtin_expect(__insn_tns(lock), 0)) | ||
68 | sched_yield(); | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static __inline int | ||
73 | _netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock) | ||
74 | { | ||
75 | *lock = 0; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | #else /* NETIO_UNLOCKED */ | ||
80 | |||
81 | /* Don't do any locking for per-packet NetIO operations. */ | ||
82 | typedef int _netio_percpu_mutex_t; | ||
83 | #define _netio_percpu_mutex_init(L) | ||
84 | #define _netio_percpu_mutex_lock(L) | ||
85 | #define _netio_percpu_mutex_unlock(L) | ||
86 | |||
87 | #endif /* NETIO_UNLOCKED */ | ||
88 | #endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */ | ||
89 | |||
90 | /** How many tiles can register for a given queue. | ||
91 | * @ingroup setup */ | ||
92 | #define NETIO_MAX_TILES_PER_QUEUE 64 | ||
93 | |||
94 | |||
95 | /** Largest permissible queue identifier. | ||
96 | * @ingroup setup */ | ||
97 | #define NETIO_MAX_QUEUE_ID 255 | ||
98 | |||
99 | |||
100 | #ifndef __DOXYGEN__ | ||
101 | |||
102 | /* Metadata packet checksum/ethertype flags. */ | ||
103 | |||
104 | /** The L4 checksum has not been calculated. */ | ||
105 | #define _NETIO_PKT_NO_L4_CSUM_SHIFT 0 | ||
106 | #define _NETIO_PKT_NO_L4_CSUM_RMASK 1 | ||
107 | #define _NETIO_PKT_NO_L4_CSUM_MASK \ | ||
108 | (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT) | ||
109 | |||
110 | /** The L3 checksum has not been calculated. */ | ||
111 | #define _NETIO_PKT_NO_L3_CSUM_SHIFT 1 | ||
112 | #define _NETIO_PKT_NO_L3_CSUM_RMASK 1 | ||
113 | #define _NETIO_PKT_NO_L3_CSUM_MASK \ | ||
114 | (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT) | ||
115 | |||
116 | /** The L3 checksum is incorrect (or perhaps has not been calculated). */ | ||
117 | #define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2 | ||
118 | #define _NETIO_PKT_BAD_L3_CSUM_RMASK 1 | ||
119 | #define _NETIO_PKT_BAD_L3_CSUM_MASK \ | ||
120 | (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT) | ||
121 | |||
122 | /** The Ethernet packet type is unrecognized. */ | ||
123 | #define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3 | ||
124 | #define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1 | ||
125 | #define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \ | ||
126 | (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \ | ||
127 | _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT) | ||
128 | |||
129 | /* Metadata packet type flags. */ | ||
130 | |||
131 | /** Where the packet type bits are; this field is the index into | ||
132 | * _netio_pkt_info. */ | ||
133 | #define _NETIO_PKT_TYPE_SHIFT 4 | ||
134 | #define _NETIO_PKT_TYPE_RMASK 0x3F | ||
135 | |||
136 | /** How many VLAN tags the packet has, and, if we have two, which one we | ||
137 | * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom) | ||
138 | * tag is counted here. */ | ||
139 | #define _NETIO_PKT_VLAN_SHIFT 4 | ||
140 | #define _NETIO_PKT_VLAN_RMASK 0x3 | ||
141 | #define _NETIO_PKT_VLAN_MASK \ | ||
142 | (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT) | ||
143 | #define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */ | ||
144 | #define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */ | ||
145 | #define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */ | ||
146 | #define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */ | ||
147 | |||
148 | /** Which proprietary tags the packet has. */ | ||
149 | #define _NETIO_PKT_TAG_SHIFT 6 | ||
150 | #define _NETIO_PKT_TAG_RMASK 0x3 | ||
151 | #define _NETIO_PKT_TAG_MASK \ | ||
152 | (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT) | ||
153 | #define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */ | ||
154 | #define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */ | ||
155 | #define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */ | ||
156 | #define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */ | ||
157 | |||
158 | /** Whether a packet has an LLC + SNAP header. */ | ||
159 | #define _NETIO_PKT_SNAP_SHIFT 8 | ||
160 | #define _NETIO_PKT_SNAP_RMASK 0x1 | ||
161 | #define _NETIO_PKT_SNAP_MASK \ | ||
162 | (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT) | ||
163 | |||
164 | /* NOTE: Bits 9 and 10 are unused. */ | ||
165 | |||
166 | /** Length of any custom data before the L2 header, in words. */ | ||
167 | #define _NETIO_PKT_CUSTOM_LEN_SHIFT 11 | ||
168 | #define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F | ||
169 | #define _NETIO_PKT_CUSTOM_LEN_MASK \ | ||
170 | (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT) | ||
171 | |||
172 | /** The L4 checksum is incorrect (or perhaps has not been calculated). */ | ||
173 | #define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16 | ||
174 | #define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1 | ||
175 | #define _NETIO_PKT_BAD_L4_CSUM_MASK \ | ||
176 | (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT) | ||
177 | |||
178 | /** Length of the L2 header, in words. */ | ||
179 | #define _NETIO_PKT_L2_LEN_SHIFT 17 | ||
180 | #define _NETIO_PKT_L2_LEN_RMASK 0x1F | ||
181 | #define _NETIO_PKT_L2_LEN_MASK \ | ||
182 | (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT) | ||
183 | |||
184 | |||
185 | /* Flags in minimal packet metadata. */ | ||
186 | |||
187 | /** We need an eDMA checksum on this packet. */ | ||
188 | #define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0 | ||
189 | #define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1 | ||
190 | #define _NETIO_PKT_NEED_EDMA_CSUM_MASK \ | ||
191 | (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT) | ||
192 | |||
193 | /* Data within the packet information table. */ | ||
194 | |||
195 | /* Note that, for efficiency, code which uses these fields assumes that none | ||
196 | * of the shift values below are zero. See uses below for an explanation. */ | ||
197 | |||
198 | /** Offset within the L2 header of the innermost ethertype (in halfwords). */ | ||
199 | #define _NETIO_PKT_INFO_ETYPE_SHIFT 6 | ||
200 | #define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F | ||
201 | |||
202 | /** Offset within the L2 header of the VLAN tag (in halfwords). */ | ||
203 | #define _NETIO_PKT_INFO_VLAN_SHIFT 11 | ||
204 | #define _NETIO_PKT_INFO_VLAN_RMASK 0x1F | ||
205 | |||
206 | #endif | ||
207 | |||
208 | |||
209 | /** The size of a memory buffer representing a small packet. | ||
210 | * @ingroup egress */ | ||
211 | #define SMALL_PACKET_SIZE 256 | ||
212 | |||
213 | /** The size of a memory buffer representing a large packet. | ||
214 | * @ingroup egress */ | ||
215 | #define LARGE_PACKET_SIZE 2048 | ||
216 | |||
217 | /** The size of a memory buffer representing a jumbo packet. | ||
218 | * @ingroup egress */ | ||
219 | #define JUMBO_PACKET_SIZE (12 * 1024) | ||
220 | |||
221 | |||
222 | /* Common ethertypes. | ||
223 | * @ingroup ingress */ | ||
224 | /** @{ */ | ||
225 | /** The ethertype of IPv4. */ | ||
226 | #define ETHERTYPE_IPv4 (0x0800) | ||
227 | /** The ethertype of ARP. */ | ||
228 | #define ETHERTYPE_ARP (0x0806) | ||
229 | /** The ethertype of VLANs. */ | ||
230 | #define ETHERTYPE_VLAN (0x8100) | ||
231 | /** The ethertype of a Q-in-Q header. */ | ||
232 | #define ETHERTYPE_Q_IN_Q (0x9100) | ||
233 | /** The ethertype of IPv6. */ | ||
234 | #define ETHERTYPE_IPv6 (0x86DD) | ||
235 | /** The ethertype of MPLS. */ | ||
236 | #define ETHERTYPE_MPLS (0x8847) | ||
237 | /** @} */ | ||
238 | |||
239 | |||
240 | /** The possible return values of NETIO_PKT_STATUS. | ||
241 | * @ingroup ingress | ||
242 | */ | ||
243 | typedef enum | ||
244 | { | ||
245 | /** No problems were detected with this packet. */ | ||
246 | NETIO_PKT_STATUS_OK, | ||
247 | /** The packet is undersized; this is expected behavior if the packet's | ||
248 | * ethertype is unrecognized, but otherwise the packet is likely corrupt. */ | ||
249 | NETIO_PKT_STATUS_UNDERSIZE, | ||
250 | /** The packet is oversized and some trailing bytes have been discarded. | ||
251 | This is expected behavior for short packets, since it's impossible to | ||
252 | precisely determine the amount of padding which may have been added to | ||
253 | them to make them meet the minimum Ethernet packet size. */ | ||
254 | NETIO_PKT_STATUS_OVERSIZE, | ||
255 | /** The packet was judged to be corrupt by hardware (for instance, it had | ||
256 | a bad CRC, or part of it was discarded due to lack of buffer space in | ||
257 | the I/O shim) and should be discarded. */ | ||
258 | NETIO_PKT_STATUS_BAD | ||
259 | } netio_pkt_status_t; | ||
260 | |||
261 | |||
262 | /** Log2 of how many buckets we have. */ | ||
263 | #define NETIO_LOG2_NUM_BUCKETS (10) | ||
264 | |||
265 | /** How many buckets we have. | ||
266 | * @ingroup ingress */ | ||
267 | #define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS) | ||
268 | |||
269 | |||
270 | /** | ||
271 | * @brief A group-to-bucket identifier. | ||
272 | * | ||
273 | * @ingroup setup | ||
274 | * | ||
275 | * This tells us what to do with a given group. | ||
276 | */ | ||
277 | typedef union { | ||
278 | /** The header broken down into bits. */ | ||
279 | struct { | ||
280 | /** Whether we should balance on L4, if available */ | ||
281 | unsigned int __balance_on_l4:1; | ||
282 | /** Whether we should balance on L3, if available */ | ||
283 | unsigned int __balance_on_l3:1; | ||
284 | /** Whether we should balance on L2, if available */ | ||
285 | unsigned int __balance_on_l2:1; | ||
286 | /** Reserved for future use */ | ||
287 | unsigned int __reserved:1; | ||
288 | /** The base bucket to use to send traffic */ | ||
289 | unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS; | ||
290 | /** The mask to apply to the balancing value. This must be one less | ||
291 | * than a power of two, e.g. 0x3 or 0xFF. | ||
292 | */ | ||
293 | unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS; | ||
294 | /** Pad to 32 bits */ | ||
295 | unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS); | ||
296 | } bits; | ||
297 | /** To send out the IDN. */ | ||
298 | unsigned int word; | ||
299 | } | ||
300 | netio_group_t; | ||
301 | |||
302 | |||
303 | /** | ||
304 | * @brief A VLAN-to-bucket identifier. | ||
305 | * | ||
306 | * @ingroup setup | ||
307 | * | ||
308 | * This tells us what to do with a given VLAN. | ||
309 | */ | ||
310 | typedef netio_group_t netio_vlan_t; | ||
311 | |||
312 | |||
313 | /** | ||
314 | * A bucket-to-queue mapping. | ||
315 | * @ingroup setup | ||
316 | */ | ||
317 | typedef unsigned char netio_bucket_t; | ||
318 | |||
319 | |||
320 | /** | ||
321 | * A packet size can always fit in a netio_size_t. | ||
322 | * @ingroup setup | ||
323 | */ | ||
324 | typedef unsigned int netio_size_t; | ||
325 | |||
326 | |||
327 | /** | ||
328 | * @brief Ethernet standard (ingress) packet metadata. | ||
329 | * | ||
330 | * @ingroup ingress | ||
331 | * | ||
332 | * This is additional data associated with each packet. | ||
333 | * This structure is opaque and accessed through the @ref ingress. | ||
334 | * | ||
335 | * Also, the buffer population operation currently assumes that standard | ||
336 | * metadata is at least as large as minimal metadata, and will need to be | ||
337 | * modified if that is no longer the case. | ||
338 | */ | ||
339 | typedef struct | ||
340 | { | ||
341 | #ifdef __DOXYGEN__ | ||
342 | /** This structure is opaque. */ | ||
343 | unsigned char opaque[24]; | ||
344 | #else | ||
345 | /** The overall ordinal of the packet */ | ||
346 | unsigned int __packet_ordinal; | ||
347 | /** The ordinal of the packet within the group */ | ||
348 | unsigned int __group_ordinal; | ||
349 | /** The best flow hash IPP could compute. */ | ||
350 | unsigned int __flow_hash; | ||
351 | /** Flags pertaining to checksum calculation, packet type, etc. */ | ||
352 | unsigned int __flags; | ||
353 | /** The first word of "user data". */ | ||
354 | unsigned int __user_data_0; | ||
355 | /** The second word of "user data". */ | ||
356 | unsigned int __user_data_1; | ||
357 | #endif | ||
358 | } | ||
359 | netio_pkt_metadata_t; | ||
360 | |||
361 | |||
362 | /** To ensure that the L3 header is aligned mod 4, the L2 header should be | ||
363 | * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes | ||
364 | * long. The standard way to do this is to simply add 2 bytes of padding | ||
365 | * before the L2 header. | ||
366 | */ | ||
367 | #define NETIO_PACKET_PADDING 2 | ||
368 | |||
369 | |||
370 | |||
371 | /** | ||
372 | * @brief Ethernet minimal (egress) packet metadata. | ||
373 | * | ||
374 | * @ingroup egress | ||
375 | * | ||
376 | * This structure represents information about packets which have | ||
377 | * been processed by @ref netio_populate_buffer() or | ||
378 | * @ref netio_populate_prepend_buffer(). This structure is opaque | ||
379 | * and accessed through the @ref egress. | ||
380 | * | ||
381 | * @internal This structure is actually copied into the memory used by | ||
382 | * standard metadata, which is assumed to be large enough. | ||
383 | */ | ||
384 | typedef struct | ||
385 | { | ||
386 | #ifdef __DOXYGEN__ | ||
387 | /** This structure is opaque. */ | ||
388 | unsigned char opaque[14]; | ||
389 | #else | ||
390 | /** The offset of the L2 header from the start of the packet data. */ | ||
391 | unsigned short l2_offset; | ||
392 | /** The offset of the L3 header from the start of the packet data. */ | ||
393 | unsigned short l3_offset; | ||
394 | /** Where to write the checksum. */ | ||
395 | unsigned char csum_location; | ||
396 | /** Where to start checksumming from. */ | ||
397 | unsigned char csum_start; | ||
398 | /** Flags pertaining to checksum calculation etc. */ | ||
399 | unsigned short flags; | ||
400 | /** The L2 length of the packet. */ | ||
401 | unsigned short l2_length; | ||
402 | /** The checksum with which to seed the checksum generator. */ | ||
403 | unsigned short csum_seed; | ||
404 | /** How much to checksum. */ | ||
405 | unsigned short csum_length; | ||
406 | #endif | ||
407 | } | ||
408 | netio_pkt_minimal_metadata_t; | ||
409 | |||
410 | |||
411 | #ifndef __DOXYGEN__ | ||
412 | |||
413 | /** | ||
414 | * @brief An I/O notification header. | ||
415 | * | ||
416 | * This is the first word of data received from an I/O shim in a notification | ||
417 | * packet. It contains framing and status information. | ||
418 | */ | ||
419 | typedef union | ||
420 | { | ||
421 | unsigned int word; /**< The whole word. */ | ||
422 | /** The various fields. */ | ||
423 | struct | ||
424 | { | ||
425 | unsigned int __channel:7; /**< Resource channel. */ | ||
426 | unsigned int __type:4; /**< Type. */ | ||
427 | unsigned int __ack:1; /**< Whether an acknowledgement is needed. */ | ||
428 | unsigned int __reserved:1; /**< Reserved. */ | ||
429 | unsigned int __protocol:1; /**< A protocol-specific word is added. */ | ||
430 | unsigned int __status:2; /**< Status of the transfer. */ | ||
431 | unsigned int __framing:2; /**< Framing of the transfer. */ | ||
432 | unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */ | ||
433 | } bits; | ||
434 | } | ||
435 | __netio_pkt_notif_t; | ||
436 | |||
437 | |||
438 | /** | ||
439 | * Returns the base address of the packet. | ||
440 | */ | ||
441 | #define _NETIO_PKT_HANDLE_BASE(p) \ | ||
442 | ((unsigned char*)((p).word & 0xFFFFFFC0)) | ||
443 | |||
444 | /** | ||
445 | * Returns the base address of the packet. | ||
446 | */ | ||
447 | #define _NETIO_PKT_BASE(p) \ | ||
448 | _NETIO_PKT_HANDLE_BASE(p->__packet) | ||
449 | |||
450 | /** | ||
451 | * @brief An I/O notification packet (second word) | ||
452 | * | ||
453 | * This is the second word of data received from an I/O shim in a notification | ||
454 | * packet. This is the virtual address of the packet buffer, plus some flag | ||
455 | * bits. (The virtual address of the packet is always 256-byte aligned so we | ||
456 | * have room for 8 bits' worth of flags in the low 8 bits.) | ||
457 | * | ||
458 | * @internal | ||
459 | * NOTE: The low two bits must contain "__queue", so the "packet size" | ||
460 | * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly. | ||
461 | * | ||
462 | * If __addr or __offset are moved, _NETIO_PKT_BASE | ||
463 | * (defined right below this) must be changed. | ||
464 | */ | ||
465 | typedef union | ||
466 | { | ||
467 | unsigned int word; /**< The whole word. */ | ||
468 | /** The various fields. */ | ||
469 | struct | ||
470 | { | ||
471 | /** Which queue the packet will be returned to once it is sent back to | ||
472 | the IPP. This is one of the SIZE_xxx values. */ | ||
473 | unsigned int __queue:2; | ||
474 | |||
475 | /** The IPP handle of the sending IPP. */ | ||
476 | unsigned int __ipp_handle:2; | ||
477 | |||
478 | /** Reserved for future use. */ | ||
479 | unsigned int __reserved:1; | ||
480 | |||
481 | /** If 1, this packet has minimal (egress) metadata; otherwise, it | ||
482 | has standard (ingress) metadata. */ | ||
483 | unsigned int __minimal:1; | ||
484 | |||
485 | /** Offset of the metadata within the packet. This value is multiplied | ||
486 | * by 64 and added to the base packet address to get the metadata | ||
487 | * address. Note that this field is aligned within the word such that | ||
488 | * you can easily extract the metadata address with a 26-bit mask. */ | ||
489 | unsigned int __offset:2; | ||
490 | |||
491 | /** The top 24 bits of the packet's virtual address. */ | ||
492 | unsigned int __addr:24; | ||
493 | } bits; | ||
494 | } | ||
495 | __netio_pkt_handle_t; | ||
496 | |||
497 | #endif /* !__DOXYGEN__ */ | ||
498 | |||
499 | |||
500 | /** | ||
501 | * @brief A handle for an I/O packet's storage. | ||
502 | * @ingroup ingress | ||
503 | * | ||
504 | * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its | ||
505 | * packet metadata removed. It is a much smaller type that exists to | ||
506 | * facilitate applications where the full ::netio_pkt_t type is too | ||
507 | * large, such as those that cache enormous numbers of packets or wish | ||
508 | * to transmit packet descriptors over the UDN. | ||
509 | * | ||
510 | * Because there is no metadata, most ::netio_pkt_t operations cannot be | ||
511 | * performed on a netio_pkt_handle_t. It supports only | ||
512 | * netio_free_handle() (to free the buffer) and | ||
513 | * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents). | ||
514 | * The application must acquire any additional metadata it wants from the | ||
515 | * original ::netio_pkt_t and record it separately. | ||
516 | * | ||
517 | * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling | ||
518 | * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be | ||
519 | * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can | ||
520 | * be tested for validity with NETIO_PKT_HANDLE_IS_VALID(). | ||
521 | */ | ||
522 | typedef struct | ||
523 | { | ||
524 | unsigned int word; /**< Opaque bits. */ | ||
525 | } netio_pkt_handle_t; | ||
526 | |||
527 | /** | ||
528 | * @brief A packet descriptor. | ||
529 | * | ||
530 | * @ingroup ingress | ||
531 | * @ingroup egress | ||
532 | * | ||
533 | * This data structure represents a packet. The structure is manipulated | ||
534 | * through the @ref ingress and the @ref egress. | ||
535 | * | ||
536 | * While the contents of a netio_pkt_t are opaque, the structure itself is | ||
537 | * portable. This means that it may be shared between all tiles which have | ||
538 | * done a netio_input_register() call for the interface on which the pkt_t | ||
539 | * was initially received (via netio_get_packet()) or retrieved (via | ||
540 | * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to | ||
541 | * another tile via shared memory, or via a UDN message, or by other means. | ||
542 | * The destination tile may then use the pkt_t as if it had originally been | ||
543 | * received locally; it may read or write the packet's data, read its | ||
544 | * metadata, free the packet, send the packet, transfer the netio_pkt_t to | ||
545 | * yet another tile, and so forth. | ||
546 | * | ||
547 | * Once a netio_pkt_t has been transferred to a second tile, the first tile | ||
548 | * should not reference the original copy; in particular, if more than one | ||
549 | * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will | ||
550 | * become corrupted. Note also that each tile which reads or modifies | ||
551 | * packet data must obey the memory coherency rules outlined in @ref input. | ||
552 | */ | ||
553 | typedef struct | ||
554 | { | ||
555 | #ifdef __DOXYGEN__ | ||
556 | /** This structure is opaque. */ | ||
557 | unsigned char opaque[32]; | ||
558 | #else | ||
559 | /** For an ingress packet (one with standard metadata), this is the | ||
560 | * notification header we got from the I/O shim. For an egress packet | ||
561 | * (one with minimal metadata), this word is zero if the packet has not | ||
562 | * been populated, and nonzero if it has. */ | ||
563 | __netio_pkt_notif_t __notif_header; | ||
564 | |||
565 | /** Virtual address of the packet buffer, plus state flags. */ | ||
566 | __netio_pkt_handle_t __packet; | ||
567 | |||
568 | /** Metadata associated with the packet. */ | ||
569 | netio_pkt_metadata_t __metadata; | ||
570 | #endif | ||
571 | } | ||
572 | netio_pkt_t; | ||
573 | |||
574 | |||
575 | #ifndef __DOXYGEN__ | ||
576 | |||
577 | #define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header) | ||
578 | #define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle) | ||
579 | #define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue) | ||
580 | #define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header) | ||
581 | #define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle) | ||
582 | #define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal) | ||
583 | #define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue) | ||
584 | #define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags) | ||
585 | |||
586 | /* Packet information table, used by the attribute access functions below. */ | ||
587 | extern const uint16_t _netio_pkt_info[]; | ||
588 | |||
589 | #endif /* __DOXYGEN__ */ | ||
590 | |||
591 | |||
592 | #ifndef __DOXYGEN__ | ||
593 | /* These macros are deprecated and will disappear in a future MDE release. */ | ||
594 | #define NETIO_PKT_GOOD_CHECKSUM(pkt) \ | ||
595 | NETIO_PKT_L4_CSUM_CORRECT(pkt) | ||
596 | #define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \ | ||
597 | NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt) | ||
598 | #endif /* __DOXYGEN__ */ | ||
599 | |||
600 | |||
601 | /* Packet attribute access functions. */ | ||
602 | |||
603 | /** Return a pointer to the metadata for a packet. | ||
604 | * @ingroup ingress | ||
605 | * | ||
606 | * Calling this function once and passing the result to other retrieval | ||
607 | * functions with a "_M" suffix usually improves performance. This | ||
608 | * function must be called on an 'ingress' packet (i.e. one retrieved | ||
609 | * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or | ||
610 | * @ref netio_populate_prepend_buffer have not been called). Use of this | ||
611 | * function on an 'egress' packet will cause an assertion failure. | ||
612 | * | ||
613 | * @param[in] pkt Packet on which to operate. | ||
614 | * @return A pointer to the packet's standard metadata. | ||
615 | */ | ||
616 | static __inline netio_pkt_metadata_t* | ||
617 | NETIO_PKT_METADATA(netio_pkt_t* pkt) | ||
618 | { | ||
619 | netio_assert(!pkt->__packet.bits.__minimal); | ||
620 | return &pkt->__metadata; | ||
621 | } | ||
622 | |||
623 | |||
624 | /** Return a pointer to the minimal metadata for a packet. | ||
625 | * @ingroup egress | ||
626 | * | ||
627 | * Calling this function once and passing the result to other retrieval | ||
628 | * functions with a "_MM" suffix usually improves performance. This | ||
629 | * function must be called on an 'egress' packet (i.e. one on which | ||
630 | * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer() | ||
631 | * have been called, or one retrieved by @ref netio_get_buffer()). Use of | ||
632 | * this function on an 'ingress' packet will cause an assertion failure. | ||
633 | * | ||
634 | * @param[in] pkt Packet on which to operate. | ||
635 | * @return A pointer to the packet's standard metadata. | ||
636 | */ | ||
637 | static __inline netio_pkt_minimal_metadata_t* | ||
638 | NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt) | ||
639 | { | ||
640 | netio_assert(pkt->__packet.bits.__minimal); | ||
641 | return (netio_pkt_minimal_metadata_t*) &pkt->__metadata; | ||
642 | } | ||
643 | |||
644 | |||
645 | /** Determine whether a packet has 'minimal' metadata. | ||
646 | * @ingroup pktfuncs | ||
647 | * | ||
648 | * This function will return nonzero if the packet is an 'egress' | ||
649 | * packet (i.e. one on which @ref netio_populate_buffer() or | ||
650 | * @ref netio_populate_prepend_buffer() have been called, or one | ||
651 | * retrieved by @ref netio_get_buffer()), and zero if the packet | ||
652 | * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(), | ||
653 | * which has not been converted into an 'egress' packet). | ||
654 | * | ||
655 | * @param[in] pkt Packet on which to operate. | ||
656 | * @return Nonzero if the packet has minimal metadata. | ||
657 | */ | ||
658 | static __inline unsigned int | ||
659 | NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt) | ||
660 | { | ||
661 | return pkt->__packet.bits.__minimal; | ||
662 | } | ||
663 | |||
664 | |||
665 | /** Return a handle for a packet's storage. | ||
666 | * @ingroup pktfuncs | ||
667 | * | ||
668 | * @param[in] pkt Packet on which to operate. | ||
669 | * @return A handle for the packet's storage. | ||
670 | */ | ||
671 | static __inline netio_pkt_handle_t | ||
672 | NETIO_PKT_HANDLE(netio_pkt_t* pkt) | ||
673 | { | ||
674 | netio_pkt_handle_t h; | ||
675 | h.word = pkt->__packet.word; | ||
676 | return h; | ||
677 | } | ||
678 | |||
679 | |||
680 | /** A special reserved value indicating the absence of a packet handle. | ||
681 | * | ||
682 | * @ingroup pktfuncs | ||
683 | */ | ||
684 | #define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 }) | ||
685 | |||
686 | |||
687 | /** Test whether a packet handle is valid. | ||
688 | * | ||
689 | * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE | ||
690 | * to indicate no packet at all. This function tests to see if a packet | ||
691 | * handle is a real handle, not this special reserved value. | ||
692 | * | ||
693 | * @ingroup pktfuncs | ||
694 | * | ||
695 | * @param[in] handle Handle on which to operate. | ||
696 | * @return One if the packet handle is valid, else zero. | ||
697 | */ | ||
698 | static __inline unsigned int | ||
699 | NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle) | ||
700 | { | ||
701 | return handle.word != 0; | ||
702 | } | ||
703 | |||
704 | |||
705 | |||
706 | /** Return a pointer to the start of the packet's custom header. | ||
707 | * A custom header may or may not be present, depending upon the IPP; its | ||
708 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
709 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
710 | * the custom header precedes the L2 header in the packet buffer. | ||
711 | * @ingroup ingress | ||
712 | * | ||
713 | * @param[in] handle Handle on which to operate. | ||
714 | * @return A pointer to start of the packet. | ||
715 | */ | ||
716 | static __inline unsigned char* | ||
717 | NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle) | ||
718 | { | ||
719 | return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING; | ||
720 | } | ||
721 | |||
722 | |||
723 | /** Return the length of the packet's custom header. | ||
724 | * A custom header may or may not be present, depending upon the IPP; its | ||
725 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
726 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
727 | * the custom header precedes the L2 header in the packet buffer. | ||
728 | * | ||
729 | * @ingroup ingress | ||
730 | * | ||
731 | * @param[in] mda Pointer to packet's standard metadata. | ||
732 | * @param[in] pkt Packet on which to operate. | ||
733 | * @return The length of the packet's custom header, in bytes. | ||
734 | */ | ||
735 | static __inline netio_size_t | ||
736 | NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
737 | { | ||
738 | /* | ||
739 | * Note that we effectively need to extract a quantity from the flags word | ||
740 | * which is measured in words, and then turn it into bytes by shifting | ||
741 | * it left by 2. We do this all at once by just shifting right two less | ||
742 | * bits, and shifting the mask up two bits. | ||
743 | */ | ||
744 | return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) & | ||
745 | (_NETIO_PKT_CUSTOM_LEN_RMASK << 2)); | ||
746 | } | ||
747 | |||
748 | |||
749 | /** Return the length of the packet, starting with the custom header. | ||
750 | * A custom header may or may not be present, depending upon the IPP; its | ||
751 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
752 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
753 | * the custom header precedes the L2 header in the packet buffer. | ||
754 | * @ingroup ingress | ||
755 | * | ||
756 | * @param[in] mda Pointer to packet's standard metadata. | ||
757 | * @param[in] pkt Packet on which to operate. | ||
758 | * @return The length of the packet, in bytes. | ||
759 | */ | ||
760 | static __inline netio_size_t | ||
761 | NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
762 | { | ||
763 | return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size - | ||
764 | NETIO_PACKET_PADDING); | ||
765 | } | ||
766 | |||
767 | |||
768 | /** Return a pointer to the start of the packet's custom header. | ||
769 | * A custom header may or may not be present, depending upon the IPP; its | ||
770 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
771 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
772 | * the custom header precedes the L2 header in the packet buffer. | ||
773 | * @ingroup ingress | ||
774 | * | ||
775 | * @param[in] mda Pointer to packet's standard metadata. | ||
776 | * @param[in] pkt Packet on which to operate. | ||
777 | * @return A pointer to start of the packet. | ||
778 | */ | ||
779 | static __inline unsigned char* | ||
780 | NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
781 | { | ||
782 | return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt)); | ||
783 | } | ||
784 | |||
785 | |||
786 | /** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. | ||
787 | * @ingroup ingress | ||
788 | * | ||
789 | * @param[in] mda Pointer to packet's standard metadata. | ||
790 | * @param[in] pkt Packet on which to operate. | ||
791 | * @return The length of the packet's L2 header, in bytes. | ||
792 | */ | ||
793 | static __inline netio_size_t | ||
794 | NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
795 | { | ||
796 | /* | ||
797 | * Note that we effectively need to extract a quantity from the flags word | ||
798 | * which is measured in words, and then turn it into bytes by shifting | ||
799 | * it left by 2. We do this all at once by just shifting right two less | ||
800 | * bits, and shifting the mask up two bits. We then add two bytes. | ||
801 | */ | ||
802 | return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) & | ||
803 | (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2; | ||
804 | } | ||
805 | |||
806 | |||
807 | /** Return the length of the packet, starting with the L2 (Ethernet) header. | ||
808 | * @ingroup ingress | ||
809 | * | ||
810 | * @param[in] mda Pointer to packet's standard metadata. | ||
811 | * @param[in] pkt Packet on which to operate. | ||
812 | * @return The length of the packet, in bytes. | ||
813 | */ | ||
814 | static __inline netio_size_t | ||
815 | NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
816 | { | ||
817 | return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) - | ||
818 | NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt)); | ||
819 | } | ||
820 | |||
821 | |||
822 | /** Return a pointer to the start of the packet's L2 (Ethernet) header. | ||
823 | * @ingroup ingress | ||
824 | * | ||
825 | * @param[in] mda Pointer to packet's standard metadata. | ||
826 | * @param[in] pkt Packet on which to operate. | ||
827 | * @return A pointer to start of the packet. | ||
828 | */ | ||
829 | static __inline unsigned char* | ||
830 | NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
831 | { | ||
832 | return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) + | ||
833 | NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt)); | ||
834 | } | ||
835 | |||
836 | |||
837 | /** Retrieve the length of the packet, starting with the L3 (generally, | ||
838 | * the IP) header. | ||
839 | * @ingroup ingress | ||
840 | * | ||
841 | * @param[in] mda Pointer to packet's standard metadata. | ||
842 | * @param[in] pkt Packet on which to operate. | ||
843 | * @return Length of the packet's L3 header and data, in bytes. | ||
844 | */ | ||
845 | static __inline netio_size_t | ||
846 | NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
847 | { | ||
848 | return (NETIO_PKT_L2_LENGTH_M(mda, pkt) - | ||
849 | NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt)); | ||
850 | } | ||
851 | |||
852 | |||
853 | /** Return a pointer to the packet's L3 (generally, the IP) header. | ||
854 | * @ingroup ingress | ||
855 | * | ||
856 | * Note that we guarantee word alignment of the L3 header. | ||
857 | * | ||
858 | * @param[in] mda Pointer to packet's standard metadata. | ||
859 | * @param[in] pkt Packet on which to operate. | ||
860 | * @return A pointer to the packet's L3 header. | ||
861 | */ | ||
862 | static __inline unsigned char* | ||
863 | NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
864 | { | ||
865 | return (NETIO_PKT_L2_DATA_M(mda, pkt) + | ||
866 | NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt)); | ||
867 | } | ||
868 | |||
869 | |||
870 | /** Return the ordinal of the packet. | ||
871 | * @ingroup ingress | ||
872 | * | ||
873 | * Each packet is given an ordinal number when it is delivered by the IPP. | ||
874 | * In the medium term, the ordinal is unique and monotonically increasing, | ||
875 | * being incremented by 1 for each packet; the ordinal of the first packet | ||
876 | * delivered after the IPP starts is zero. (Since the ordinal is of finite | ||
877 | * size, given enough input packets, it will eventually wrap around to zero; | ||
878 | * in the long term, therefore, ordinals are not unique.) The ordinals | ||
879 | * handed out by different IPPs are not disjoint, so two packets from | ||
880 | * different IPPs may have identical ordinals. Packets dropped by the | ||
881 | * IPP or by the I/O shim are not assigned ordinals. | ||
882 | * | ||
883 | * @param[in] mda Pointer to packet's standard metadata. | ||
884 | * @param[in] pkt Packet on which to operate. | ||
885 | * @return The packet's per-IPP packet ordinal. | ||
886 | */ | ||
887 | static __inline unsigned int | ||
888 | NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
889 | { | ||
890 | return mda->__packet_ordinal; | ||
891 | } | ||
892 | |||
893 | |||
894 | /** Return the per-group ordinal of the packet. | ||
895 | * @ingroup ingress | ||
896 | * | ||
897 | * Each packet is given a per-group ordinal number when it is | ||
898 | * delivered by the IPP. By default, the group is the packet's VLAN, | ||
899 | * although IPP can be recompiled to use different values. In | ||
900 | * the medium term, the ordinal is unique and monotonically | ||
901 | * increasing, being incremented by 1 for each packet; the ordinal of | ||
902 | * the first packet distributed to a particular group is zero. | ||
903 | * (Since the ordinal is of finite size, given enough input packets, | ||
904 | * it will eventually wrap around to zero; in the long term, | ||
905 | * therefore, ordinals are not unique.) The ordinals handed out by | ||
906 | * different IPPs are not disjoint, so two packets from different IPPs | ||
907 | * may have identical ordinals; similarly, packets distributed to | ||
908 | * different groups may have identical ordinals. Packets dropped by | ||
909 | * the IPP or by the I/O shim are not assigned ordinals. | ||
910 | * | ||
911 | * @param[in] mda Pointer to packet's standard metadata. | ||
912 | * @param[in] pkt Packet on which to operate. | ||
913 | * @return The packet's per-IPP, per-group ordinal. | ||
914 | */ | ||
915 | static __inline unsigned int | ||
916 | NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
917 | { | ||
918 | return mda->__group_ordinal; | ||
919 | } | ||
920 | |||
921 | |||
922 | /** Return the VLAN ID assigned to the packet. | ||
923 | * @ingroup ingress | ||
924 | * | ||
925 | * This value is usually contained within the packet header. | ||
926 | * | ||
927 | * This value will be zero if the packet does not have a VLAN tag, or if | ||
928 | * this value was not extracted from the packet. | ||
929 | * | ||
930 | * @param[in] mda Pointer to packet's standard metadata. | ||
931 | * @param[in] pkt Packet on which to operate. | ||
932 | * @return The packet's VLAN ID. | ||
933 | */ | ||
934 | static __inline unsigned short | ||
935 | NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
936 | { | ||
937 | int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK; | ||
938 | unsigned short* pkt_p; | ||
939 | int index; | ||
940 | unsigned short val; | ||
941 | |||
942 | if (vl == _NETIO_PKT_VLAN_NONE) | ||
943 | return 0; | ||
944 | |||
945 | pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); | ||
946 | index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; | ||
947 | |||
948 | val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) & | ||
949 | _NETIO_PKT_INFO_VLAN_RMASK]; | ||
950 | |||
951 | #ifdef __TILECC__ | ||
952 | return (__insn_bytex(val) >> 16) & 0xFFF; | ||
953 | #else | ||
954 | return (__builtin_bswap32(val) >> 16) & 0xFFF; | ||
955 | #endif | ||
956 | } | ||
957 | |||
958 | |||
959 | /** Return the ethertype of the packet. | ||
960 | * @ingroup ingress | ||
961 | * | ||
962 | * This value is usually contained within the packet header. | ||
963 | * | ||
964 | * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M() | ||
965 | * returns true, and otherwise, may not be well defined. | ||
966 | * | ||
967 | * @param[in] mda Pointer to packet's standard metadata. | ||
968 | * @param[in] pkt Packet on which to operate. | ||
969 | * @return The packet's ethertype. | ||
970 | */ | ||
971 | static __inline unsigned short | ||
972 | NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
973 | { | ||
974 | unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); | ||
975 | int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; | ||
976 | |||
977 | unsigned short val = | ||
978 | pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) & | ||
979 | _NETIO_PKT_INFO_ETYPE_RMASK]; | ||
980 | |||
981 | return __builtin_bswap32(val) >> 16; | ||
982 | } | ||
983 | |||
984 | |||
985 | /** Return the flow hash computed on the packet. | ||
986 | * @ingroup ingress | ||
987 | * | ||
988 | * For TCP and UDP packets, this hash is calculated by hashing together | ||
989 | * the "5-tuple" values, specifically the source IP address, destination | ||
990 | * IP address, protocol type, source port and destination port. | ||
991 | * The hash value is intended to be helpful for millions of distinct | ||
992 | * flows. | ||
993 | * | ||
994 | * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is | ||
995 | * derived by hashing together the source and destination IP addresses. | ||
996 | * | ||
997 | * For MPLS-encapsulated packets, the flow hash is derived by hashing | ||
998 | * the first MPLS label. | ||
999 | * | ||
1000 | * For all other packets the flow hash is computed from the source | ||
1001 | * and destination Ethernet addresses. | ||
1002 | * | ||
1003 | * The hash is symmetric, meaning it produces the same value if the | ||
1004 | * source and destination are swapped. The only exceptions are | ||
1005 | * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple | ||
1006 | * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 | ||
1007 | * (Encap Security Payload), which use only the destination address | ||
1008 | * since the source address is not meaningful. | ||
1009 | * | ||
1010 | * @param[in] mda Pointer to packet's standard metadata. | ||
1011 | * @param[in] pkt Packet on which to operate. | ||
1012 | * @return The packet's 32-bit flow hash. | ||
1013 | */ | ||
1014 | static __inline unsigned int | ||
1015 | NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1016 | { | ||
1017 | return mda->__flow_hash; | ||
1018 | } | ||
1019 | |||
1020 | |||
1021 | /** Return the first word of "user data" for the packet. | ||
1022 | * | ||
1023 | * The contents of the user data words depend on the IPP. | ||
1024 | * | ||
1025 | * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first | ||
1026 | * word of user data contains the least significant bits of the 64-bit | ||
1027 | * arrival cycle count (see @c get_cycle_count_low()). | ||
1028 | * | ||
1029 | * See the <em>System Programmer's Guide</em> for details. | ||
1030 | * | ||
1031 | * @ingroup ingress | ||
1032 | * | ||
1033 | * @param[in] mda Pointer to packet's standard metadata. | ||
1034 | * @param[in] pkt Packet on which to operate. | ||
1035 | * @return The packet's first word of "user data". | ||
1036 | */ | ||
1037 | static __inline unsigned int | ||
1038 | NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1039 | { | ||
1040 | return mda->__user_data_0; | ||
1041 | } | ||
1042 | |||
1043 | |||
1044 | /** Return the second word of "user data" for the packet. | ||
1045 | * | ||
1046 | * The contents of the user data words depend on the IPP. | ||
1047 | * | ||
1048 | * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second | ||
1049 | * word of user data contains the most significant bits of the 64-bit | ||
1050 | * arrival cycle count (see @c get_cycle_count_high()). | ||
1051 | * | ||
1052 | * See the <em>System Programmer's Guide</em> for details. | ||
1053 | * | ||
1054 | * @ingroup ingress | ||
1055 | * | ||
1056 | * @param[in] mda Pointer to packet's standard metadata. | ||
1057 | * @param[in] pkt Packet on which to operate. | ||
1058 | * @return The packet's second word of "user data". | ||
1059 | */ | ||
1060 | static __inline unsigned int | ||
1061 | NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1062 | { | ||
1063 | return mda->__user_data_1; | ||
1064 | } | ||
1065 | |||
1066 | |||
1067 | /** Determine whether the L4 (TCP/UDP) checksum was calculated. | ||
1068 | * @ingroup ingress | ||
1069 | * | ||
1070 | * @param[in] mda Pointer to packet's standard metadata. | ||
1071 | * @param[in] pkt Packet on which to operate. | ||
1072 | * @return Nonzero if the L4 checksum was calculated. | ||
1073 | */ | ||
1074 | static __inline unsigned int | ||
1075 | NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1076 | { | ||
1077 | return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK); | ||
1078 | } | ||
1079 | |||
1080 | |||
1081 | /** Determine whether the L4 (TCP/UDP) checksum was calculated and found to | ||
1082 | * be correct. | ||
1083 | * @ingroup ingress | ||
1084 | * | ||
1085 | * @param[in] mda Pointer to packet's standard metadata. | ||
1086 | * @param[in] pkt Packet on which to operate. | ||
1087 | * @return Nonzero if the checksum was calculated and is correct. | ||
1088 | */ | ||
1089 | static __inline unsigned int | ||
1090 | NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1091 | { | ||
1092 | return !(mda->__flags & | ||
1093 | (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK)); | ||
1094 | } | ||
1095 | |||
1096 | |||
1097 | /** Determine whether the L3 (IP) checksum was calculated. | ||
1098 | * @ingroup ingress | ||
1099 | * | ||
1100 | * @param[in] mda Pointer to packet's standard metadata. | ||
1101 | * @param[in] pkt Packet on which to operate. | ||
1102 | * @return Nonzero if the L3 (IP) checksum was calculated. | ||
1103 | */ | ||
1104 | static __inline unsigned int | ||
1105 | NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1106 | { | ||
1107 | return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK); | ||
1108 | } | ||
1109 | |||
1110 | |||
1111 | /** Determine whether the L3 (IP) checksum was calculated and found to be | ||
1112 | * correct. | ||
1113 | * @ingroup ingress | ||
1114 | * | ||
1115 | * @param[in] mda Pointer to packet's standard metadata. | ||
1116 | * @param[in] pkt Packet on which to operate. | ||
1117 | * @return Nonzero if the checksum was calculated and is correct. | ||
1118 | */ | ||
1119 | static __inline unsigned int | ||
1120 | NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1121 | { | ||
1122 | return !(mda->__flags & | ||
1123 | (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK)); | ||
1124 | } | ||
1125 | |||
1126 | |||
1127 | /** Determine whether the ethertype was recognized and L3 packet data was | ||
1128 | * processed. | ||
1129 | * @ingroup ingress | ||
1130 | * | ||
1131 | * @param[in] mda Pointer to packet's standard metadata. | ||
1132 | * @param[in] pkt Packet on which to operate. | ||
1133 | * @return Nonzero if the ethertype was recognized and L3 packet data was | ||
1134 | * processed. | ||
1135 | */ | ||
1136 | static __inline unsigned int | ||
1137 | NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1138 | { | ||
1139 | return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK); | ||
1140 | } | ||
1141 | |||
1142 | |||
1143 | /** Retrieve the status of a packet and any errors that may have occurred | ||
1144 | * during ingress processing (length mismatches, CRC errors, etc.). | ||
1145 | * @ingroup ingress | ||
1146 | * | ||
1147 | * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() | ||
1148 | * returns zero are always reported as underlength, as there is no a priori | ||
1149 | * means to determine their length. Normally, applications should use | ||
1150 | * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this | ||
1151 | * function. | ||
1152 | * | ||
1153 | * @param[in] mda Pointer to packet's standard metadata. | ||
1154 | * @param[in] pkt Packet on which to operate. | ||
1155 | * @return The packet's status. | ||
1156 | */ | ||
1157 | static __inline netio_pkt_status_t | ||
1158 | NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1159 | { | ||
1160 | return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; | ||
1161 | } | ||
1162 | |||
1163 | |||
1164 | /** Report whether a packet is bad (i.e., was shorter than expected based on | ||
1165 | * its headers, or had a bad CRC). | ||
1166 | * @ingroup ingress | ||
1167 | * | ||
1168 | * Note that this function does not verify L3 or L4 checksums. | ||
1169 | * | ||
1170 | * @param[in] mda Pointer to packet's standard metadata. | ||
1171 | * @param[in] pkt Packet on which to operate. | ||
1172 | * @return Nonzero if the packet is bad and should be discarded. | ||
1173 | */ | ||
1174 | static __inline unsigned int | ||
1175 | NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1176 | { | ||
1177 | return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) && | ||
1178 | (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) || | ||
1179 | NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD)); | ||
1180 | } | ||
1181 | |||
1182 | |||
1183 | /** Return the length of the packet, starting with the L2 (Ethernet) header. | ||
1184 | * @ingroup egress | ||
1185 | * | ||
1186 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1187 | * @param[in] pkt Packet on which to operate. | ||
1188 | * @return The length of the packet, in bytes. | ||
1189 | */ | ||
1190 | static __inline netio_size_t | ||
1191 | NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1192 | { | ||
1193 | return mmd->l2_length; | ||
1194 | } | ||
1195 | |||
1196 | |||
1197 | /** Return the length of the L2 (Ethernet) header. | ||
1198 | * @ingroup egress | ||
1199 | * | ||
1200 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1201 | * @param[in] pkt Packet on which to operate. | ||
1202 | * @return The length of the packet's L2 header, in bytes. | ||
1203 | */ | ||
1204 | static __inline netio_size_t | ||
1205 | NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1206 | netio_pkt_t* pkt) | ||
1207 | { | ||
1208 | return mmd->l3_offset - mmd->l2_offset; | ||
1209 | } | ||
1210 | |||
1211 | |||
1212 | /** Return the length of the packet, starting with the L3 (IP) header. | ||
1213 | * @ingroup egress | ||
1214 | * | ||
1215 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1216 | * @param[in] pkt Packet on which to operate. | ||
1217 | * @return Length of the packet's L3 header and data, in bytes. | ||
1218 | */ | ||
1219 | static __inline netio_size_t | ||
1220 | NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1221 | { | ||
1222 | return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) - | ||
1223 | NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt)); | ||
1224 | } | ||
1225 | |||
1226 | |||
1227 | /** Return a pointer to the packet's L3 (generally, the IP) header. | ||
1228 | * @ingroup egress | ||
1229 | * | ||
1230 | * Note that we guarantee word alignment of the L3 header. | ||
1231 | * | ||
1232 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1233 | * @param[in] pkt Packet on which to operate. | ||
1234 | * @return A pointer to the packet's L3 header. | ||
1235 | */ | ||
1236 | static __inline unsigned char* | ||
1237 | NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1238 | { | ||
1239 | return _NETIO_PKT_BASE(pkt) + mmd->l3_offset; | ||
1240 | } | ||
1241 | |||
1242 | |||
1243 | /** Return a pointer to the packet's L2 (Ethernet) header. | ||
1244 | * @ingroup egress | ||
1245 | * | ||
1246 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1247 | * @param[in] pkt Packet on which to operate. | ||
1248 | * @return A pointer to start of the packet. | ||
1249 | */ | ||
1250 | static __inline unsigned char* | ||
1251 | NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1252 | { | ||
1253 | return _NETIO_PKT_BASE(pkt) + mmd->l2_offset; | ||
1254 | } | ||
1255 | |||
1256 | |||
1257 | /** Retrieve the status of a packet and any errors that may have occurred | ||
1258 | * during ingress processing (length mismatches, CRC errors, etc.). | ||
1259 | * @ingroup ingress | ||
1260 | * | ||
1261 | * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() | ||
1262 | * returns zero are always reported as underlength, as there is no a priori | ||
1263 | * means to determine their length. Normally, applications should use | ||
1264 | * @ref NETIO_PKT_BAD() instead of explicitly checking status with this | ||
1265 | * function. | ||
1266 | * | ||
1267 | * @param[in] pkt Packet on which to operate. | ||
1268 | * @return The packet's status. | ||
1269 | */ | ||
1270 | static __inline netio_pkt_status_t | ||
1271 | NETIO_PKT_STATUS(netio_pkt_t* pkt) | ||
1272 | { | ||
1273 | netio_assert(!pkt->__packet.bits.__minimal); | ||
1274 | |||
1275 | return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; | ||
1276 | } | ||
1277 | |||
1278 | |||
1279 | /** Report whether a packet is bad (i.e., was shorter than expected based on | ||
1280 | * its headers, or had a bad CRC). | ||
1281 | * @ingroup ingress | ||
1282 | * | ||
1283 | * Note that this function does not verify L3 or L4 checksums. | ||
1284 | * | ||
1285 | * @param[in] pkt Packet on which to operate. | ||
1286 | * @return Nonzero if the packet is bad and should be discarded. | ||
1287 | */ | ||
1288 | static __inline unsigned int | ||
1289 | NETIO_PKT_BAD(netio_pkt_t* pkt) | ||
1290 | { | ||
1291 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1292 | |||
1293 | return NETIO_PKT_BAD_M(mda, pkt); | ||
1294 | } | ||
1295 | |||
1296 | |||
1297 | /** Return the length of the packet's custom header. | ||
1298 | * A custom header may or may not be present, depending upon the IPP; its | ||
1299 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
1300 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
1301 | * the custom header precedes the L2 header in the packet buffer. | ||
1302 | * @ingroup pktfuncs | ||
1303 | * | ||
1304 | * @param[in] pkt Packet on which to operate. | ||
1305 | * @return The length of the packet's custom header, in bytes. | ||
1306 | */ | ||
1307 | static __inline netio_size_t | ||
1308 | NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt) | ||
1309 | { | ||
1310 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1311 | |||
1312 | return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); | ||
1313 | } | ||
1314 | |||
1315 | |||
1316 | /** Return the length of the packet, starting with the custom header. | ||
1317 | * A custom header may or may not be present, depending upon the IPP; its | ||
1318 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
1319 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
1320 | * the custom header precedes the L2 header in the packet buffer. | ||
1321 | * @ingroup pktfuncs | ||
1322 | * | ||
1323 | * @param[in] pkt Packet on which to operate. | ||
1324 | * @return The length of the packet, in bytes. | ||
1325 | */ | ||
1326 | static __inline netio_size_t | ||
1327 | NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt) | ||
1328 | { | ||
1329 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1330 | |||
1331 | return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt); | ||
1332 | } | ||
1333 | |||
1334 | |||
1335 | /** Return a pointer to the packet's custom header. | ||
1336 | * A custom header may or may not be present, depending upon the IPP; its | ||
1337 | * contents and alignment are also IPP-dependent. Currently, none of the | ||
1338 | * standard IPPs supplied by Tilera produce a custom header. If present, | ||
1339 | * the custom header precedes the L2 header in the packet buffer. | ||
1340 | * @ingroup pktfuncs | ||
1341 | * | ||
1342 | * @param[in] pkt Packet on which to operate. | ||
1343 | * @return A pointer to start of the packet. | ||
1344 | */ | ||
1345 | static __inline unsigned char* | ||
1346 | NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt) | ||
1347 | { | ||
1348 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1349 | |||
1350 | return NETIO_PKT_CUSTOM_DATA_M(mda, pkt); | ||
1351 | } | ||
1352 | |||
1353 | |||
1354 | /** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. | ||
1355 | * @ingroup pktfuncs | ||
1356 | * | ||
1357 | * @param[in] pkt Packet on which to operate. | ||
1358 | * @return The length of the packet's L2 header, in bytes. | ||
1359 | */ | ||
1360 | static __inline netio_size_t | ||
1361 | NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt) | ||
1362 | { | ||
1363 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1364 | { | ||
1365 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1366 | |||
1367 | return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt); | ||
1368 | } | ||
1369 | else | ||
1370 | { | ||
1371 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1372 | |||
1373 | return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt); | ||
1374 | } | ||
1375 | } | ||
1376 | |||
1377 | |||
1378 | /** Return the length of the packet, starting with the L2 (Ethernet) header. | ||
1379 | * @ingroup pktfuncs | ||
1380 | * | ||
1381 | * @param[in] pkt Packet on which to operate. | ||
1382 | * @return The length of the packet, in bytes. | ||
1383 | */ | ||
1384 | static __inline netio_size_t | ||
1385 | NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt) | ||
1386 | { | ||
1387 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1388 | { | ||
1389 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1390 | |||
1391 | return NETIO_PKT_L2_LENGTH_MM(mmd, pkt); | ||
1392 | } | ||
1393 | else | ||
1394 | { | ||
1395 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1396 | |||
1397 | return NETIO_PKT_L2_LENGTH_M(mda, pkt); | ||
1398 | } | ||
1399 | } | ||
1400 | |||
1401 | |||
1402 | /** Return a pointer to the packet's L2 (Ethernet) header. | ||
1403 | * @ingroup pktfuncs | ||
1404 | * | ||
1405 | * @param[in] pkt Packet on which to operate. | ||
1406 | * @return A pointer to start of the packet. | ||
1407 | */ | ||
1408 | static __inline unsigned char* | ||
1409 | NETIO_PKT_L2_DATA(netio_pkt_t* pkt) | ||
1410 | { | ||
1411 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1412 | { | ||
1413 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1414 | |||
1415 | return NETIO_PKT_L2_DATA_MM(mmd, pkt); | ||
1416 | } | ||
1417 | else | ||
1418 | { | ||
1419 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1420 | |||
1421 | return NETIO_PKT_L2_DATA_M(mda, pkt); | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1425 | |||
1426 | /** Retrieve the length of the packet, starting with the L3 (generally, the IP) | ||
1427 | * header. | ||
1428 | * @ingroup pktfuncs | ||
1429 | * | ||
1430 | * @param[in] pkt Packet on which to operate. | ||
1431 | * @return Length of the packet's L3 header and data, in bytes. | ||
1432 | */ | ||
1433 | static __inline netio_size_t | ||
1434 | NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt) | ||
1435 | { | ||
1436 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1437 | { | ||
1438 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1439 | |||
1440 | return NETIO_PKT_L3_LENGTH_MM(mmd, pkt); | ||
1441 | } | ||
1442 | else | ||
1443 | { | ||
1444 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1445 | |||
1446 | return NETIO_PKT_L3_LENGTH_M(mda, pkt); | ||
1447 | } | ||
1448 | } | ||
1449 | |||
1450 | |||
1451 | /** Return a pointer to the packet's L3 (generally, the IP) header. | ||
1452 | * @ingroup pktfuncs | ||
1453 | * | ||
1454 | * Note that we guarantee word alignment of the L3 header. | ||
1455 | * | ||
1456 | * @param[in] pkt Packet on which to operate. | ||
1457 | * @return A pointer to the packet's L3 header. | ||
1458 | */ | ||
1459 | static __inline unsigned char* | ||
1460 | NETIO_PKT_L3_DATA(netio_pkt_t* pkt) | ||
1461 | { | ||
1462 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1463 | { | ||
1464 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1465 | |||
1466 | return NETIO_PKT_L3_DATA_MM(mmd, pkt); | ||
1467 | } | ||
1468 | else | ||
1469 | { | ||
1470 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1471 | |||
1472 | return NETIO_PKT_L3_DATA_M(mda, pkt); | ||
1473 | } | ||
1474 | } | ||
1475 | |||
1476 | |||
1477 | /** Return the ordinal of the packet. | ||
1478 | * @ingroup ingress | ||
1479 | * | ||
1480 | * Each packet is given an ordinal number when it is delivered by the IPP. | ||
1481 | * In the medium term, the ordinal is unique and monotonically increasing, | ||
1482 | * being incremented by 1 for each packet; the ordinal of the first packet | ||
1483 | * delivered after the IPP starts is zero. (Since the ordinal is of finite | ||
1484 | * size, given enough input packets, it will eventually wrap around to zero; | ||
1485 | * in the long term, therefore, ordinals are not unique.) The ordinals | ||
1486 | * handed out by different IPPs are not disjoint, so two packets from | ||
1487 | * different IPPs may have identical ordinals. Packets dropped by the | ||
1488 | * IPP or by the I/O shim are not assigned ordinals. | ||
1489 | * | ||
1490 | * | ||
1491 | * @param[in] pkt Packet on which to operate. | ||
1492 | * @return The packet's per-IPP packet ordinal. | ||
1493 | */ | ||
1494 | static __inline unsigned int | ||
1495 | NETIO_PKT_ORDINAL(netio_pkt_t* pkt) | ||
1496 | { | ||
1497 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1498 | |||
1499 | return NETIO_PKT_ORDINAL_M(mda, pkt); | ||
1500 | } | ||
1501 | |||
1502 | |||
1503 | /** Return the per-group ordinal of the packet. | ||
1504 | * @ingroup ingress | ||
1505 | * | ||
1506 | * Each packet is given a per-group ordinal number when it is | ||
1507 | * delivered by the IPP. By default, the group is the packet's VLAN, | ||
1508 | * although IPP can be recompiled to use different values. In | ||
1509 | * the medium term, the ordinal is unique and monotonically | ||
1510 | * increasing, being incremented by 1 for each packet; the ordinal of | ||
1511 | * the first packet distributed to a particular group is zero. | ||
1512 | * (Since the ordinal is of finite size, given enough input packets, | ||
1513 | * it will eventually wrap around to zero; in the long term, | ||
1514 | * therefore, ordinals are not unique.) The ordinals handed out by | ||
1515 | * different IPPs are not disjoint, so two packets from different IPPs | ||
1516 | * may have identical ordinals; similarly, packets distributed to | ||
1517 | * different groups may have identical ordinals. Packets dropped by | ||
1518 | * the IPP or by the I/O shim are not assigned ordinals. | ||
1519 | * | ||
1520 | * @param[in] pkt Packet on which to operate. | ||
1521 | * @return The packet's per-IPP, per-group ordinal. | ||
1522 | */ | ||
1523 | static __inline unsigned int | ||
1524 | NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt) | ||
1525 | { | ||
1526 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1527 | |||
1528 | return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt); | ||
1529 | } | ||
1530 | |||
1531 | |||
1532 | /** Return the VLAN ID assigned to the packet. | ||
1533 | * @ingroup ingress | ||
1534 | * | ||
1535 | * This is usually also contained within the packet header. If the packet | ||
1536 | * does not have a VLAN tag, the VLAN ID returned by this function is zero. | ||
1537 | * | ||
1538 | * @param[in] pkt Packet on which to operate. | ||
1539 | * @return The packet's VLAN ID. | ||
1540 | */ | ||
1541 | static __inline unsigned short | ||
1542 | NETIO_PKT_VLAN_ID(netio_pkt_t* pkt) | ||
1543 | { | ||
1544 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1545 | |||
1546 | return NETIO_PKT_VLAN_ID_M(mda, pkt); | ||
1547 | } | ||
1548 | |||
1549 | |||
1550 | /** Return the ethertype of the packet. | ||
1551 | * @ingroup ingress | ||
1552 | * | ||
1553 | * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() | ||
1554 | * returns true, and otherwise, may not be well defined. | ||
1555 | * | ||
1556 | * @param[in] pkt Packet on which to operate. | ||
1557 | * @return The packet's ethertype. | ||
1558 | */ | ||
1559 | static __inline unsigned short | ||
1560 | NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt) | ||
1561 | { | ||
1562 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1563 | |||
1564 | return NETIO_PKT_ETHERTYPE_M(mda, pkt); | ||
1565 | } | ||
1566 | |||
1567 | |||
1568 | /** Return the flow hash computed on the packet. | ||
1569 | * @ingroup ingress | ||
1570 | * | ||
1571 | * For TCP and UDP packets, this hash is calculated by hashing together | ||
1572 | * the "5-tuple" values, specifically the source IP address, destination | ||
1573 | * IP address, protocol type, source port and destination port. | ||
1574 | * The hash value is intended to be helpful for millions of distinct | ||
1575 | * flows. | ||
1576 | * | ||
1577 | * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is | ||
1578 | * derived by hashing together the source and destination IP addresses. | ||
1579 | * | ||
1580 | * For MPLS-encapsulated packets, the flow hash is derived by hashing | ||
1581 | * the first MPLS label. | ||
1582 | * | ||
1583 | * For all other packets the flow hash is computed from the source | ||
1584 | * and destination Ethernet addresses. | ||
1585 | * | ||
1586 | * The hash is symmetric, meaning it produces the same value if the | ||
1587 | * source and destination are swapped. The only exceptions are | ||
1588 | * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple | ||
1589 | * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 | ||
1590 | * (Encap Security Payload), which use only the destination address | ||
1591 | * since the source address is not meaningful. | ||
1592 | * | ||
1593 | * @param[in] pkt Packet on which to operate. | ||
1594 | * @return The packet's 32-bit flow hash. | ||
1595 | */ | ||
1596 | static __inline unsigned int | ||
1597 | NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt) | ||
1598 | { | ||
1599 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1600 | |||
1601 | return NETIO_PKT_FLOW_HASH_M(mda, pkt); | ||
1602 | } | ||
1603 | |||
1604 | |||
1605 | /** Return the first word of "user data" for the packet. | ||
1606 | * | ||
1607 | * The contents of the user data words depend on the IPP. | ||
1608 | * | ||
1609 | * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first | ||
1610 | * word of user data contains the least significant bits of the 64-bit | ||
1611 | * arrival cycle count (see @c get_cycle_count_low()). | ||
1612 | * | ||
1613 | * See the <em>System Programmer's Guide</em> for details. | ||
1614 | * | ||
1615 | * @ingroup ingress | ||
1616 | * | ||
1617 | * @param[in] pkt Packet on which to operate. | ||
1618 | * @return The packet's first word of "user data". | ||
1619 | */ | ||
1620 | static __inline unsigned int | ||
1621 | NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt) | ||
1622 | { | ||
1623 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1624 | |||
1625 | return NETIO_PKT_USER_DATA_0_M(mda, pkt); | ||
1626 | } | ||
1627 | |||
1628 | |||
1629 | /** Return the second word of "user data" for the packet. | ||
1630 | * | ||
1631 | * The contents of the user data words depend on the IPP. | ||
1632 | * | ||
1633 | * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second | ||
1634 | * word of user data contains the most significant bits of the 64-bit | ||
1635 | * arrival cycle count (see @c get_cycle_count_high()). | ||
1636 | * | ||
1637 | * See the <em>System Programmer's Guide</em> for details. | ||
1638 | * | ||
1639 | * @ingroup ingress | ||
1640 | * | ||
1641 | * @param[in] pkt Packet on which to operate. | ||
1642 | * @return The packet's second word of "user data". | ||
1643 | */ | ||
1644 | static __inline unsigned int | ||
1645 | NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt) | ||
1646 | { | ||
1647 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1648 | |||
1649 | return NETIO_PKT_USER_DATA_1_M(mda, pkt); | ||
1650 | } | ||
1651 | |||
1652 | |||
1653 | /** Determine whether the L4 (TCP/UDP) checksum was calculated. | ||
1654 | * @ingroup ingress | ||
1655 | * | ||
1656 | * @param[in] pkt Packet on which to operate. | ||
1657 | * @return Nonzero if the L4 checksum was calculated. | ||
1658 | */ | ||
1659 | static __inline unsigned int | ||
1660 | NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt) | ||
1661 | { | ||
1662 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1663 | |||
1664 | return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt); | ||
1665 | } | ||
1666 | |||
1667 | |||
1668 | /** Determine whether the L4 (TCP/UDP) checksum was calculated and found to | ||
1669 | * be correct. | ||
1670 | * @ingroup ingress | ||
1671 | * | ||
1672 | * @param[in] pkt Packet on which to operate. | ||
1673 | * @return Nonzero if the checksum was calculated and is correct. | ||
1674 | */ | ||
1675 | static __inline unsigned int | ||
1676 | NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt) | ||
1677 | { | ||
1678 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1679 | |||
1680 | return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt); | ||
1681 | } | ||
1682 | |||
1683 | |||
1684 | /** Determine whether the L3 (IP) checksum was calculated. | ||
1685 | * @ingroup ingress | ||
1686 | * | ||
1687 | * @param[in] pkt Packet on which to operate. | ||
1688 | * @return Nonzero if the L3 (IP) checksum was calculated. | ||
1689 | */ | ||
1690 | static __inline unsigned int | ||
1691 | NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt) | ||
1692 | { | ||
1693 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1694 | |||
1695 | return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt); | ||
1696 | } | ||
1697 | |||
1698 | |||
1699 | /** Determine whether the L3 (IP) checksum was calculated and found to be | ||
1700 | * correct. | ||
1701 | * @ingroup ingress | ||
1702 | * | ||
1703 | * @param[in] pkt Packet on which to operate. | ||
1704 | * @return Nonzero if the checksum was calculated and is correct. | ||
1705 | */ | ||
1706 | static __inline unsigned int | ||
1707 | NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt) | ||
1708 | { | ||
1709 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1710 | |||
1711 | return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt); | ||
1712 | } | ||
1713 | |||
1714 | |||
1715 | /** Determine whether the Ethertype was recognized and L3 packet data was | ||
1716 | * processed. | ||
1717 | * @ingroup ingress | ||
1718 | * | ||
1719 | * @param[in] pkt Packet on which to operate. | ||
1720 | * @return Nonzero if the Ethertype was recognized and L3 packet data was | ||
1721 | * processed. | ||
1722 | */ | ||
1723 | static __inline unsigned int | ||
1724 | NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt) | ||
1725 | { | ||
1726 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1727 | |||
1728 | return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt); | ||
1729 | } | ||
1730 | |||
1731 | |||
1732 | /** Set an egress packet's L2 length, using a metadata pointer to speed the | ||
1733 | * computation. | ||
1734 | * @ingroup egress | ||
1735 | * | ||
1736 | * @param[in,out] mmd Pointer to packet's minimal metadata. | ||
1737 | * @param[in] pkt Packet on which to operate. | ||
1738 | * @param[in] len Packet L2 length, in bytes. | ||
1739 | */ | ||
1740 | static __inline void | ||
1741 | NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt, | ||
1742 | int len) | ||
1743 | { | ||
1744 | mmd->l2_length = len; | ||
1745 | } | ||
1746 | |||
1747 | |||
1748 | /** Set an egress packet's L2 length. | ||
1749 | * @ingroup egress | ||
1750 | * | ||
1751 | * @param[in,out] pkt Packet on which to operate. | ||
1752 | * @param[in] len Packet L2 length, in bytes. | ||
1753 | */ | ||
1754 | static __inline void | ||
1755 | NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len) | ||
1756 | { | ||
1757 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1758 | |||
1759 | NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len); | ||
1760 | } | ||
1761 | |||
1762 | |||
1763 | /** Set an egress packet's L2 header length, using a metadata pointer to | ||
1764 | * speed the computation. | ||
1765 | * @ingroup egress | ||
1766 | * | ||
1767 | * It is not normally necessary to call this routine; only the L2 length, | ||
1768 | * not the header length, is needed to transmit a packet. It may be useful if | ||
1769 | * the egress packet will later be processed by code which expects to use | ||
1770 | * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. | ||
1771 | * | ||
1772 | * @param[in,out] mmd Pointer to packet's minimal metadata. | ||
1773 | * @param[in] pkt Packet on which to operate. | ||
1774 | * @param[in] len Packet L2 header length, in bytes. | ||
1775 | */ | ||
1776 | static __inline void | ||
1777 | NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1778 | netio_pkt_t* pkt, int len) | ||
1779 | { | ||
1780 | mmd->l3_offset = mmd->l2_offset + len; | ||
1781 | } | ||
1782 | |||
1783 | |||
1784 | /** Set an egress packet's L2 header length. | ||
1785 | * @ingroup egress | ||
1786 | * | ||
1787 | * It is not normally necessary to call this routine; only the L2 length, | ||
1788 | * not the header length, is needed to transmit a packet. It may be useful if | ||
1789 | * the egress packet will later be processed by code which expects to use | ||
1790 | * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. | ||
1791 | * | ||
1792 | * @param[in,out] pkt Packet on which to operate. | ||
1793 | * @param[in] len Packet L2 header length, in bytes. | ||
1794 | */ | ||
1795 | static __inline void | ||
1796 | NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len) | ||
1797 | { | ||
1798 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1799 | |||
1800 | NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len); | ||
1801 | } | ||
1802 | |||
1803 | |||
1804 | /** Set up an egress packet for hardware checksum computation, using a | ||
1805 | * metadata pointer to speed the operation. | ||
1806 | * @ingroup egress | ||
1807 | * | ||
1808 | * NetIO provides the ability to automatically calculate a standard | ||
1809 | * 16-bit Internet checksum on transmitted packets. The application | ||
1810 | * may specify the point in the packet where the checksum starts, the | ||
1811 | * number of bytes to be checksummed, and the two bytes in the packet | ||
1812 | * which will be replaced with the completed checksum. (If the range | ||
1813 | * of bytes to be checksummed includes the bytes to be replaced, the | ||
1814 | * initial values of those bytes will be included in the checksum.) | ||
1815 | * | ||
1816 | * For some protocols, the packet checksum covers data which is not present | ||
1817 | * in the packet, or is at least not contiguous to the main data payload. | ||
1818 | * For instance, the TCP checksum includes a "pseudo-header" which includes | ||
1819 | * the source and destination IP addresses of the packet. To accommodate | ||
1820 | * this, the checksum engine may be "seeded" with an initial value, which | ||
1821 | * the application would need to compute based on the specific protocol's | ||
1822 | * requirements. Note that the seed is given in host byte order (little- | ||
1823 | * endian), not network byte order (big-endian); code written to compute a | ||
1824 | * pseudo-header checksum in network byte order will need to byte-swap it | ||
1825 | * before use as the seed. | ||
1826 | * | ||
1827 | * Note that the checksum is computed as part of the transmission process, | ||
1828 | * so it will not be present in the packet upon completion of this routine. | ||
1829 | * | ||
1830 | * @param[in,out] mmd Pointer to packet's minimal metadata. | ||
1831 | * @param[in] pkt Packet on which to operate. | ||
1832 | * @param[in] start Offset within L2 packet of the first byte to include in | ||
1833 | * the checksum. | ||
1834 | * @param[in] length Number of bytes to include in the checksum. | ||
1835 | * the checksum. | ||
1836 | * @param[in] location Offset within L2 packet of the first of the two bytes | ||
1837 | * to be replaced with the calculated checksum. | ||
1838 | * @param[in] seed Initial value of the running checksum before any of the | ||
1839 | * packet data is added. | ||
1840 | */ | ||
1841 | static __inline void | ||
1842 | NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1843 | netio_pkt_t* pkt, int start, int length, | ||
1844 | int location, uint16_t seed) | ||
1845 | { | ||
1846 | mmd->csum_start = start; | ||
1847 | mmd->csum_length = length; | ||
1848 | mmd->csum_location = location; | ||
1849 | mmd->csum_seed = seed; | ||
1850 | mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK; | ||
1851 | } | ||
1852 | |||
1853 | |||
1854 | /** Set up an egress packet for hardware checksum computation. | ||
1855 | * @ingroup egress | ||
1856 | * | ||
1857 | * NetIO provides the ability to automatically calculate a standard | ||
1858 | * 16-bit Internet checksum on transmitted packets. The application | ||
1859 | * may specify the point in the packet where the checksum starts, the | ||
1860 | * number of bytes to be checksummed, and the two bytes in the packet | ||
1861 | * which will be replaced with the completed checksum. (If the range | ||
1862 | * of bytes to be checksummed includes the bytes to be replaced, the | ||
1863 | * initial values of those bytes will be included in the checksum.) | ||
1864 | * | ||
1865 | * For some protocols, the packet checksum covers data which is not present | ||
1866 | * in the packet, or is at least not contiguous to the main data payload. | ||
1867 | * For instance, the TCP checksum includes a "pseudo-header" which includes | ||
1868 | * the source and destination IP addresses of the packet. To accommodate | ||
1869 | * this, the checksum engine may be "seeded" with an initial value, which | ||
1870 | * the application would need to compute based on the specific protocol's | ||
1871 | * requirements. Note that the seed is given in host byte order (little- | ||
1872 | * endian), not network byte order (big-endian); code written to compute a | ||
1873 | * pseudo-header checksum in network byte order will need to byte-swap it | ||
1874 | * before use as the seed. | ||
1875 | * | ||
1876 | * Note that the checksum is computed as part of the transmission process, | ||
1877 | * so it will not be present in the packet upon completion of this routine. | ||
1878 | * | ||
1879 | * @param[in,out] pkt Packet on which to operate. | ||
1880 | * @param[in] start Offset within L2 packet of the first byte to include in | ||
1881 | * the checksum. | ||
1882 | * @param[in] length Number of bytes to include in the checksum. | ||
1883 | * the checksum. | ||
1884 | * @param[in] location Offset within L2 packet of the first of the two bytes | ||
1885 | * to be replaced with the calculated checksum. | ||
1886 | * @param[in] seed Initial value of the running checksum before any of the | ||
1887 | * packet data is added. | ||
1888 | */ | ||
1889 | static __inline void | ||
1890 | NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length, | ||
1891 | int location, uint16_t seed) | ||
1892 | { | ||
1893 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1894 | |||
1895 | NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed); | ||
1896 | } | ||
1897 | |||
1898 | |||
1899 | /** Return the number of bytes which could be prepended to a packet, using a | ||
1900 | * metadata pointer to speed the operation. | ||
1901 | * See @ref netio_populate_prepend_buffer() to get a full description of | ||
1902 | * prepending. | ||
1903 | * | ||
1904 | * @param[in,out] mda Pointer to packet's standard metadata. | ||
1905 | * @param[in] pkt Packet on which to operate. | ||
1906 | */ | ||
1907 | static __inline int | ||
1908 | NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
1909 | { | ||
1910 | return (pkt->__packet.bits.__offset << 6) + | ||
1911 | NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); | ||
1912 | } | ||
1913 | |||
1914 | |||
1915 | /** Return the number of bytes which could be prepended to a packet, using a | ||
1916 | * metadata pointer to speed the operation. | ||
1917 | * See @ref netio_populate_prepend_buffer() to get a full description of | ||
1918 | * prepending. | ||
1919 | * @ingroup egress | ||
1920 | * | ||
1921 | * @param[in,out] mmd Pointer to packet's minimal metadata. | ||
1922 | * @param[in] pkt Packet on which to operate. | ||
1923 | */ | ||
1924 | static __inline int | ||
1925 | NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) | ||
1926 | { | ||
1927 | return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset; | ||
1928 | } | ||
1929 | |||
1930 | |||
1931 | /** Return the number of bytes which could be prepended to a packet. | ||
1932 | * See @ref netio_populate_prepend_buffer() to get a full description of | ||
1933 | * prepending. | ||
1934 | * @ingroup egress | ||
1935 | * | ||
1936 | * @param[in] pkt Packet on which to operate. | ||
1937 | */ | ||
1938 | static __inline int | ||
1939 | NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt) | ||
1940 | { | ||
1941 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
1942 | { | ||
1943 | netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); | ||
1944 | |||
1945 | return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt); | ||
1946 | } | ||
1947 | else | ||
1948 | { | ||
1949 | netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); | ||
1950 | |||
1951 | return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt); | ||
1952 | } | ||
1953 | } | ||
1954 | |||
1955 | |||
1956 | /** Flush a packet's minimal metadata from the cache, using a metadata pointer | ||
1957 | * to speed the operation. | ||
1958 | * @ingroup egress | ||
1959 | * | ||
1960 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1961 | * @param[in] pkt Packet on which to operate. | ||
1962 | */ | ||
1963 | static __inline void | ||
1964 | NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1965 | netio_pkt_t* pkt) | ||
1966 | { | ||
1967 | } | ||
1968 | |||
1969 | |||
1970 | /** Invalidate a packet's minimal metadata from the cache, using a metadata | ||
1971 | * pointer to speed the operation. | ||
1972 | * @ingroup egress | ||
1973 | * | ||
1974 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1975 | * @param[in] pkt Packet on which to operate. | ||
1976 | */ | ||
1977 | static __inline void | ||
1978 | NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1979 | netio_pkt_t* pkt) | ||
1980 | { | ||
1981 | } | ||
1982 | |||
1983 | |||
1984 | /** Flush and then invalidate a packet's minimal metadata from the cache, | ||
1985 | * using a metadata pointer to speed the operation. | ||
1986 | * @ingroup egress | ||
1987 | * | ||
1988 | * @param[in] mmd Pointer to packet's minimal metadata. | ||
1989 | * @param[in] pkt Packet on which to operate. | ||
1990 | */ | ||
1991 | static __inline void | ||
1992 | NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, | ||
1993 | netio_pkt_t* pkt) | ||
1994 | { | ||
1995 | } | ||
1996 | |||
1997 | |||
1998 | /** Flush a packet's metadata from the cache, using a metadata pointer | ||
1999 | * to speed the operation. | ||
2000 | * @ingroup ingress | ||
2001 | * | ||
2002 | * @param[in] mda Pointer to packet's minimal metadata. | ||
2003 | * @param[in] pkt Packet on which to operate. | ||
2004 | */ | ||
2005 | static __inline void | ||
2006 | NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
2007 | { | ||
2008 | } | ||
2009 | |||
2010 | |||
2011 | /** Invalidate a packet's metadata from the cache, using a metadata | ||
2012 | * pointer to speed the operation. | ||
2013 | * @ingroup ingress | ||
2014 | * | ||
2015 | * @param[in] mda Pointer to packet's metadata. | ||
2016 | * @param[in] pkt Packet on which to operate. | ||
2017 | */ | ||
2018 | static __inline void | ||
2019 | NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
2020 | { | ||
2021 | } | ||
2022 | |||
2023 | |||
2024 | /** Flush and then invalidate a packet's metadata from the cache, | ||
2025 | * using a metadata pointer to speed the operation. | ||
2026 | * @ingroup ingress | ||
2027 | * | ||
2028 | * @param[in] mda Pointer to packet's metadata. | ||
2029 | * @param[in] pkt Packet on which to operate. | ||
2030 | */ | ||
2031 | static __inline void | ||
2032 | NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) | ||
2033 | { | ||
2034 | } | ||
2035 | |||
2036 | |||
2037 | /** Flush a packet's minimal metadata from the cache. | ||
2038 | * @ingroup egress | ||
2039 | * | ||
2040 | * @param[in] pkt Packet on which to operate. | ||
2041 | */ | ||
2042 | static __inline void | ||
2043 | NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt) | ||
2044 | { | ||
2045 | } | ||
2046 | |||
2047 | |||
2048 | /** Invalidate a packet's minimal metadata from the cache. | ||
2049 | * @ingroup egress | ||
2050 | * | ||
2051 | * @param[in] pkt Packet on which to operate. | ||
2052 | */ | ||
2053 | static __inline void | ||
2054 | NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt) | ||
2055 | { | ||
2056 | } | ||
2057 | |||
2058 | |||
2059 | /** Flush and then invalidate a packet's minimal metadata from the cache. | ||
2060 | * @ingroup egress | ||
2061 | * | ||
2062 | * @param[in] pkt Packet on which to operate. | ||
2063 | */ | ||
2064 | static __inline void | ||
2065 | NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt) | ||
2066 | { | ||
2067 | } | ||
2068 | |||
2069 | |||
2070 | /** Flush a packet's metadata from the cache. | ||
2071 | * @ingroup ingress | ||
2072 | * | ||
2073 | * @param[in] pkt Packet on which to operate. | ||
2074 | */ | ||
2075 | static __inline void | ||
2076 | NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt) | ||
2077 | { | ||
2078 | } | ||
2079 | |||
2080 | |||
2081 | /** Invalidate a packet's metadata from the cache. | ||
2082 | * @ingroup ingress | ||
2083 | * | ||
2084 | * @param[in] pkt Packet on which to operate. | ||
2085 | */ | ||
2086 | static __inline void | ||
2087 | NETIO_PKT_INV_METADATA(netio_pkt_t* pkt) | ||
2088 | { | ||
2089 | } | ||
2090 | |||
2091 | |||
2092 | /** Flush and then invalidate a packet's metadata from the cache. | ||
2093 | * @ingroup ingress | ||
2094 | * | ||
2095 | * @param[in] pkt Packet on which to operate. | ||
2096 | */ | ||
2097 | static __inline void | ||
2098 | NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt) | ||
2099 | { | ||
2100 | } | ||
2101 | |||
2102 | /** Number of NUMA nodes we can distribute buffers to. | ||
2103 | * @ingroup setup */ | ||
2104 | #define NETIO_NUM_NODE_WEIGHTS 16 | ||
2105 | |||
2106 | /** | ||
2107 | * @brief An object for specifying the characteristics of NetIO communication | ||
2108 | * endpoint. | ||
2109 | * | ||
2110 | * @ingroup setup | ||
2111 | * | ||
2112 | * The @ref netio_input_register() function uses this structure to define | ||
2113 | * how an application tile will communicate with an IPP. | ||
2114 | * | ||
2115 | * | ||
2116 | * Future updates to NetIO may add new members to this structure, | ||
2117 | * which can affect the success of the registration operation. Thus, | ||
2118 | * if dynamically initializing the structure, applications are urged to | ||
2119 | * zero it out first, for example: | ||
2120 | * | ||
2121 | * @code | ||
2122 | * netio_input_config_t config; | ||
2123 | * memset(&config, 0, sizeof (config)); | ||
2124 | * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE; | ||
2125 | * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS; | ||
2126 | * config.queue_id = 0; | ||
2127 | * . | ||
2128 | * . | ||
2129 | * . | ||
2130 | * @endcode | ||
2131 | * | ||
2132 | * since that guarantees that any unused structure members, including | ||
2133 | * members which did not exist when the application was first developed, | ||
2134 | * will not have unexpected values. | ||
2135 | * | ||
2136 | * If statically initializing the structure, we strongly recommend use of | ||
2137 | * C99-style named initializers, for example: | ||
2138 | * | ||
2139 | * @code | ||
2140 | * netio_input_config_t config = { | ||
2141 | * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, | ||
2142 | * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS, | ||
2143 | * .queue_id = 0, | ||
2144 | * }, | ||
2145 | * @endcode | ||
2146 | * | ||
2147 | * instead of the old-style structure initialization: | ||
2148 | * | ||
2149 | * @code | ||
2150 | * // Bad example! Currently equivalent to the above, but don't do this. | ||
2151 | * netio_input_config_t config = { | ||
2152 | * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0 | ||
2153 | * }, | ||
2154 | * @endcode | ||
2155 | * | ||
2156 | * since the C99 style requires no changes to the code if elements of the | ||
2157 | * config structure are rearranged. (It also makes the initialization much | ||
2158 | * easier to understand.) | ||
2159 | * | ||
2160 | * Except for items which address a particular tile's transmit or receive | ||
2161 | * characteristics, such as the ::NETIO_RECV flag, applications are advised | ||
2162 | * to specify the same set of configuration data on all registrations. | ||
2163 | * This prevents differing results if multiple tiles happen to do their | ||
2164 | * registration operations in a different order on different invocations of | ||
2165 | * the application. This is particularly important for things like link | ||
2166 | * management flags, and buffer size and homing specifications. | ||
2167 | * | ||
2168 | * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO | ||
2169 | * buffer pool is automatically created and mapped into the application's | ||
2170 | * virtual address space at an address chosen by the operating system, | ||
2171 | * using the common memory (cmem) facility in the Tilera Multicore | ||
2172 | * Components library. The cmem facility allows multiple processes to gain | ||
2173 | * access to shared memory which is mapped into each process at an | ||
2174 | * identical virtual address. In order for this to work, the processes | ||
2175 | * must have a common ancestor, which must create the common memory using | ||
2176 | * tmc_cmem_init(). | ||
2177 | * | ||
2178 | * In programs using the iLib process creation API, or in programs which use | ||
2179 | * only one process (which include programs using the pthreads library), | ||
2180 | * tmc_cmem_init() is called automatically. All other applications | ||
2181 | * must call it explicitly, before any child processes which might call | ||
2182 | * netio_input_register() are created. | ||
2183 | */ | ||
2184 | typedef struct | ||
2185 | { | ||
2186 | /** Registration characteristics. | ||
2187 | |||
2188 | This value determines several characteristics of the registration; | ||
2189 | flags for different types of behavior are ORed together to make the | ||
2190 | final flag value. Generally applications should specify exactly | ||
2191 | one flag from each of the following categories: | ||
2192 | |||
2193 | - Whether the application will be receiving packets on this queue | ||
2194 | (::NETIO_RECV or ::NETIO_NO_RECV). | ||
2195 | |||
2196 | - Whether the application will be transmitting packets on this queue, | ||
2197 | and if so, whether it will request egress checksum calculation | ||
2198 | (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is | ||
2199 | legal to call netio_get_buffer() without one of the XMIT flags, | ||
2200 | as long as ::NETIO_RECV is specified; in this case, the retrieved | ||
2201 | buffers must be passed to another tile for transmission. | ||
2202 | |||
2203 | - Whether the application expects any vendor-specific tags in | ||
2204 | its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM, | ||
2205 | or ::NETIO_TAG_MRVL). This must match the configuration of the | ||
2206 | target IPP. | ||
2207 | |||
2208 | To accommodate applications written to previous versions of the NetIO | ||
2209 | interface, none of the flags above are currently required; if omitted, | ||
2210 | NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM | | ||
2211 | ::NETIO_TAG_NONE were used. However, explicit specification of | ||
2212 | the relevant flags allows NetIO to do a better job of resource | ||
2213 | allocation, allows earlier detection of certain configuration errors, | ||
2214 | and may enable advanced features or higher performance in the future, | ||
2215 | so their use is strongly recommended. | ||
2216 | |||
2217 | Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT | ||
2218 | is a special case, intended primarily for use by programs which | ||
2219 | retrieve network statistics or do link management operations. | ||
2220 | When these flags are both specified, the resulting queue may not | ||
2221 | be used with NetIO routines other than netio_get(), netio_set(), | ||
2222 | and netio_input_unregister(). See @ref link for more information | ||
2223 | on link management. | ||
2224 | |||
2225 | Other flags are optional; their use is described below. | ||
2226 | */ | ||
2227 | int flags; | ||
2228 | |||
2229 | /** Interface name. This is a string which identifies the specific | ||
2230 | Ethernet controller hardware to be used. The format of the string | ||
2231 | is a device type and a device index, separated by a slash; so, | ||
2232 | the first 10 Gigabit Ethernet controller is named "xgbe/0", while | ||
2233 | the second 10/100/1000 Megabit Ethernet controller is named "gbe/1". | ||
2234 | */ | ||
2235 | const char* interface; | ||
2236 | |||
2237 | /** Receive packet queue size. This specifies the maximum number | ||
2238 | of ingress packets that can be received on this queue without | ||
2239 | being retrieved by @ref netio_get_packet(). If the IPP's distribution | ||
2240 | algorithm calls for a packet to be sent to this queue, and this | ||
2241 | number of packets are already pending there, the new packet | ||
2242 | will either be discarded, or sent to another tile registered | ||
2243 | for the same queue_id (see @ref drops). This value must | ||
2244 | be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least | ||
2245 | ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain | ||
2246 | interfaces. | ||
2247 | */ | ||
2248 | int num_receive_packets; | ||
2249 | |||
2250 | /** The queue ID being requested. Legal values for this range from 0 | ||
2251 | to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always | ||
2252 | greater than or equal to the number of tiles; this allows one queue | ||
2253 | for each tile, plus at least one additional queue. Some applications | ||
2254 | may wish to use the additional queue as a destination for unwanted | ||
2255 | packets, since packets delivered to queues for which no tiles have | ||
2256 | registered are discarded. | ||
2257 | */ | ||
2258 | unsigned int queue_id; | ||
2259 | |||
2260 | /** Maximum number of small send buffers to be held in the local empty | ||
2261 | buffer cache. This specifies the size of the area which holds | ||
2262 | empty small egress buffers requested from the IPP but not yet | ||
2263 | retrieved via @ref netio_get_buffer(). This value must be greater | ||
2264 | than zero if the application will ever use @ref netio_get_buffer() | ||
2265 | to allocate empty small egress buffers; it may be no larger than | ||
2266 | ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty | ||
2267 | buffer caching. | ||
2268 | */ | ||
2269 | int num_send_buffers_small_total; | ||
2270 | |||
2271 | /** Number of small send buffers to be preallocated at registration. | ||
2272 | If this value is nonzero, the specified number of empty small egress | ||
2273 | buffers will be requested from the IPP during the netio_input_register | ||
2274 | operation; this may speed the execution of @ref netio_get_buffer(). | ||
2275 | This may be no larger than @ref num_send_buffers_small_total. See @ref | ||
2276 | epp for more details on empty buffer caching. | ||
2277 | */ | ||
2278 | int num_send_buffers_small_prealloc; | ||
2279 | |||
2280 | /** Maximum number of large send buffers to be held in the local empty | ||
2281 | buffer cache. This specifies the size of the area which holds empty | ||
2282 | large egress buffers requested from the IPP but not yet retrieved via | ||
2283 | @ref netio_get_buffer(). This value must be greater than zero if the | ||
2284 | application will ever use @ref netio_get_buffer() to allocate empty | ||
2285 | large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. | ||
2286 | See @ref epp for more details on empty buffer caching. | ||
2287 | */ | ||
2288 | int num_send_buffers_large_total; | ||
2289 | |||
2290 | /** Number of large send buffers to be preallocated at registration. | ||
2291 | If this value is nonzero, the specified number of empty large egress | ||
2292 | buffers will be requested from the IPP during the netio_input_register | ||
2293 | operation; this may speed the execution of @ref netio_get_buffer(). | ||
2294 | This may be no larger than @ref num_send_buffers_large_total. See @ref | ||
2295 | epp for more details on empty buffer caching. | ||
2296 | */ | ||
2297 | int num_send_buffers_large_prealloc; | ||
2298 | |||
2299 | /** Maximum number of jumbo send buffers to be held in the local empty | ||
2300 | buffer cache. This specifies the size of the area which holds empty | ||
2301 | jumbo egress buffers requested from the IPP but not yet retrieved via | ||
2302 | @ref netio_get_buffer(). This value must be greater than zero if the | ||
2303 | application will ever use @ref netio_get_buffer() to allocate empty | ||
2304 | jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. | ||
2305 | See @ref epp for more details on empty buffer caching. | ||
2306 | */ | ||
2307 | int num_send_buffers_jumbo_total; | ||
2308 | |||
2309 | /** Number of jumbo send buffers to be preallocated at registration. | ||
2310 | If this value is nonzero, the specified number of empty jumbo egress | ||
2311 | buffers will be requested from the IPP during the netio_input_register | ||
2312 | operation; this may speed the execution of @ref netio_get_buffer(). | ||
2313 | This may be no larger than @ref num_send_buffers_jumbo_total. See @ref | ||
2314 | epp for more details on empty buffer caching. | ||
2315 | */ | ||
2316 | int num_send_buffers_jumbo_prealloc; | ||
2317 | |||
2318 | /** Total packet buffer size. This determines the total size, in bytes, | ||
2319 | of the NetIO buffer pool. Note that the maximum number of available | ||
2320 | buffers of each size is determined during hypervisor configuration | ||
2321 | (see the <em>System Programmer's Guide</em> for details); this just | ||
2322 | influences how much host memory is allocated for those buffers. | ||
2323 | |||
2324 | The buffer pool is allocated from common memory, which will be | ||
2325 | automatically initialized if needed. If your buffer pool is larger | ||
2326 | than 240 MB, you might need to explicitly call @c tmc_cmem_init(), | ||
2327 | as described in the Application Libraries Reference Manual (UG227). | ||
2328 | |||
2329 | Packet buffers are currently allocated in chunks of 16 MB; this | ||
2330 | value will be rounded up to the next larger multiple of 16 MB. | ||
2331 | If this value is zero, a default of 32 MB will be used; this was | ||
2332 | the value used by previous versions of NetIO. Note that taking this | ||
2333 | default also affects the placement of buffers on Linux NUMA nodes. | ||
2334 | See @ref buffer_node_weights for an explanation of buffer placement. | ||
2335 | |||
2336 | In order to successfully allocate packet buffers, Linux must have | ||
2337 | available huge pages on the relevant Linux NUMA nodes. See the | ||
2338 | <em>System Programmer's Guide</em> for information on configuring | ||
2339 | huge page support in Linux. | ||
2340 | */ | ||
2341 | uint64_t total_buffer_size; | ||
2342 | |||
2343 | /** Buffer placement weighting factors. | ||
2344 | |||
2345 | This array specifies the relative amount of buffering to place | ||
2346 | on each of the available Linux NUMA nodes. This array is | ||
2347 | indexed by the NUMA node, and the values in the array are | ||
2348 | proportional to the amount of buffer space to allocate on that | ||
2349 | node. | ||
2350 | |||
2351 | If memory striping is enabled in the Hypervisor, then there is | ||
2352 | only one logical NUMA node (node 0). In that case, NetIO will by | ||
2353 | default ignore the suggested buffer node weights, and buffers | ||
2354 | will be striped across the physical memory controllers. See | ||
2355 | UG209 System Programmer's Guide for a description of the | ||
2356 | hypervisor option that controls memory striping. | ||
2357 | |||
2358 | If memory striping is disabled, then there are up to four NUMA | ||
2359 | nodes, corresponding to the four DDRAM controllers in the TILE | ||
2360 | processor architecture. See UG100 Tile Processor Architecture | ||
2361 | Overview for a diagram showing the location of each of the DDRAM | ||
2362 | controllers relative to the tile array. | ||
2363 | |||
2364 | For instance, if memory striping is disabled, the following | ||
2365 | configuration strucure: | ||
2366 | |||
2367 | @code | ||
2368 | netio_input_config_t config = { | ||
2369 | . | ||
2370 | . | ||
2371 | . | ||
2372 | .total_buffer_size = 4 * 16 * 1024 * 1024; | ||
2373 | .buffer_node_weights = { 1, 0, 1, 0 }, | ||
2374 | }, | ||
2375 | @endcode | ||
2376 | |||
2377 | would result in 32 MB of buffers being placed on controller 0, and | ||
2378 | 32 MB on controller 2. (Since buffers are allocated in units of | ||
2379 | 16 MB, some sets of weights will not be able to be matched exactly.) | ||
2380 | |||
2381 | For the weights to be effective, @ref total_buffer_size must be | ||
2382 | nonzero. If @ref total_buffer_size is zero, causing the default | ||
2383 | 32 MB of buffer space to be used, then any specified weights will | ||
2384 | be ignored, and buffers will positioned as they were in previous | ||
2385 | versions of NetIO: | ||
2386 | |||
2387 | - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1, | ||
2388 | and the other 16 MB will be placed on controller 2. | ||
2389 | |||
2390 | - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2, | ||
2391 | and the other 16 MB will be placed on controller 3. | ||
2392 | |||
2393 | If @ref total_buffer_size is nonzero, but all weights are zero, | ||
2394 | then all buffer space will be allocated on Linux NUMA node zero. | ||
2395 | |||
2396 | By default, the specified buffer placement is treated as a hint; | ||
2397 | if sufficient free memory is not available on the specified | ||
2398 | controllers, the buffers will be allocated elsewhere. However, | ||
2399 | if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a | ||
2400 | failure to allocate buffer space exactly as requested will cause the | ||
2401 | registration operation to fail with an error of ::NETIO_CANNOT_HOME. | ||
2402 | |||
2403 | Note that maximal network performance cannot be achieved with | ||
2404 | only one memory controller. | ||
2405 | */ | ||
2406 | uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS]; | ||
2407 | |||
2408 | /** Fixed virtual address for packet buffers. Only valid when | ||
2409 | ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the | ||
2410 | description of that flag for details. | ||
2411 | */ | ||
2412 | void* fixed_buffer_va; | ||
2413 | |||
2414 | /** | ||
2415 | Maximum number of outstanding send packet requests. This value is | ||
2416 | only relevant when an EPP is in use; it determines the number of | ||
2417 | slots in the EPP's outgoing packet queue which this tile is allowed | ||
2418 | to consume, and thus the number of packets which may be sent before | ||
2419 | the sending tile must wait for an acknowledgment from the EPP. | ||
2420 | Modifying this value is generally only helpful when using @ref | ||
2421 | netio_send_packet_vector(), where it can help improve performance by | ||
2422 | allowing a single vector send operation to process more packets. | ||
2423 | Typically it is not specified, and the default, which divides the | ||
2424 | outgoing packet slots evenly between all tiles on the chip, is used. | ||
2425 | |||
2426 | If a registration asks for more outgoing packet queue slots than are | ||
2427 | available, ::NETIO_TOOMANY_XMIT will be returned. The total number | ||
2428 | of packet queue slots which are available for all tiles for each EPP | ||
2429 | is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING. | ||
2430 | |||
2431 | |||
2432 | This value is ignored if ::NETIO_XMIT is not specified in flags. | ||
2433 | If you want to specify a large value here for a specific tile, you are | ||
2434 | advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so | ||
2435 | that they do not consume a default number of packet slots. Any tile | ||
2436 | transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING | ||
2437 | slots allocated to it; values less than that will be silently | ||
2438 | increased by the NetIO library. | ||
2439 | */ | ||
2440 | int num_sends_outstanding; | ||
2441 | } | ||
2442 | netio_input_config_t; | ||
2443 | |||
2444 | |||
2445 | /** Registration flags; used in the @ref netio_input_config_t structure. | ||
2446 | * @addtogroup setup | ||
2447 | */ | ||
2448 | /** @{ */ | ||
2449 | |||
2450 | /** Fail a registration request if we can't put packet buffers | ||
2451 | on the specified memory controllers. */ | ||
2452 | #define NETIO_STRICT_HOMING 0x00000002 | ||
2453 | |||
2454 | /** This application expects no tags on its L2 headers. */ | ||
2455 | #define NETIO_TAG_NONE 0x00000004 | ||
2456 | |||
2457 | /** This application expects Marvell extended tags on its L2 headers. */ | ||
2458 | #define NETIO_TAG_MRVL 0x00000008 | ||
2459 | |||
2460 | /** This application expects Broadcom tags on its L2 headers. */ | ||
2461 | #define NETIO_TAG_BRCM 0x00000010 | ||
2462 | |||
2463 | /** This registration may call routines which receive packets. */ | ||
2464 | #define NETIO_RECV 0x00000020 | ||
2465 | |||
2466 | /** This registration may not call routines which receive packets. */ | ||
2467 | #define NETIO_NO_RECV 0x00000040 | ||
2468 | |||
2469 | /** This registration may call routines which transmit packets. */ | ||
2470 | #define NETIO_XMIT 0x00000080 | ||
2471 | |||
2472 | /** This registration may call routines which transmit packets with | ||
2473 | checksum acceleration. */ | ||
2474 | #define NETIO_XMIT_CSUM 0x00000100 | ||
2475 | |||
2476 | /** This registration may not call routines which transmit packets. */ | ||
2477 | #define NETIO_NO_XMIT 0x00000200 | ||
2478 | |||
2479 | /** This registration wants NetIO buffers mapped at an application-specified | ||
2480 | virtual address. | ||
2481 | |||
2482 | NetIO buffers are by default created by the TMC common memory facility, | ||
2483 | which must be configured by a common ancestor of all processes sharing | ||
2484 | a network interface. When this flag is specified, NetIO buffers are | ||
2485 | instead mapped at an address chosen by the application (and specified | ||
2486 | in @ref netio_input_config_t::fixed_buffer_va). This allows multiple | ||
2487 | unrelated but cooperating processes to share a NetIO interface. | ||
2488 | All processes sharing the same interface must specify this flag, | ||
2489 | and all must specify the same fixed virtual address. | ||
2490 | |||
2491 | @ref netio_input_config_t::fixed_buffer_va must be a | ||
2492 | multiple of 16 MB, and the packet buffers will occupy @ref | ||
2493 | netio_input_config_t::total_buffer_size bytes of virtual address | ||
2494 | space, beginning at that address. If any of those virtual addresses | ||
2495 | are currently occupied by other memory objects, like application or | ||
2496 | shared library code or data, @ref netio_input_register() will return | ||
2497 | ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va | ||
2498 | which will work for all applications, a good first guess might be to | ||
2499 | use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size. | ||
2500 | If that fails, it might be helpful to consult the running application's | ||
2501 | virtual address description file (/proc/<em>pid</em>/maps) to see | ||
2502 | which regions of virtual address space are available. | ||
2503 | */ | ||
2504 | #define NETIO_FIXED_BUFFER_VA 0x00000400 | ||
2505 | |||
2506 | /** This registration call will not complete unless the network link | ||
2507 | is up. The process will wait several seconds for this to happen (the | ||
2508 | precise interval is link-dependent), but if the link does not come up, | ||
2509 | ::NETIO_LINK_DOWN will be returned. This flag is the default if | ||
2510 | ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by | ||
2511 | itself does not request that the link be brought up; that can be done | ||
2512 | with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the | ||
2513 | latter is the default if no NETIO_AUTO_LINK_xxx flags are specified), | ||
2514 | or by explicitly setting the link's desired state via netio_set(). | ||
2515 | If the link is not brought up by one of those methods, and this flag | ||
2516 | is specified, the registration operation will return ::NETIO_LINK_DOWN. | ||
2517 | This flag is ignored if it is specified along with ::NETIO_NO_XMIT and | ||
2518 | ::NETIO_NO_RECV. See @ref link for more information on link | ||
2519 | management. | ||
2520 | */ | ||
2521 | #define NETIO_REQUIRE_LINK_UP 0x00000800 | ||
2522 | |||
2523 | /** This registration call will complete even if the network link is not up. | ||
2524 | Whenever the link is not up, packets will not be sent or received: | ||
2525 | netio_get_packet() will return ::NETIO_NOPKT once all queued packets | ||
2526 | have been drained, and netio_send_packet() and similar routines will | ||
2527 | return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP | ||
2528 | or the I/O shim is full. See @ref link for more information on link | ||
2529 | management. | ||
2530 | */ | ||
2531 | #define NETIO_NOREQUIRE_LINK_UP 0x00001000 | ||
2532 | |||
2533 | #ifndef __DOXYGEN__ | ||
2534 | /* | ||
2535 | * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags, | ||
2536 | * but should not be used directly by applications, and are thus not | ||
2537 | * documented. | ||
2538 | */ | ||
2539 | #define _NETIO_AUTO_UP 0x00002000 | ||
2540 | #define _NETIO_AUTO_DN 0x00004000 | ||
2541 | #define _NETIO_AUTO_PRESENT 0x00008000 | ||
2542 | #endif | ||
2543 | |||
2544 | /** Set the desired state of the link to up, allowing any speeds which are | ||
2545 | supported by the link hardware, as part of this registration operation. | ||
2546 | Do not take down the link automatically. This is the default if | ||
2547 | no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored | ||
2548 | if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. | ||
2549 | See @ref link for more information on link management. | ||
2550 | */ | ||
2551 | #define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP) | ||
2552 | |||
2553 | /** Set the desired state of the link to up, allowing any speeds which are | ||
2554 | supported by the link hardware, as part of this registration operation. | ||
2555 | Set the desired state of the link to down the next time no tiles are | ||
2556 | registered for packet reception or transmission. This flag is ignored | ||
2557 | if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. | ||
2558 | See @ref link for more information on link management. | ||
2559 | */ | ||
2560 | #define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \ | ||
2561 | _NETIO_AUTO_DN) | ||
2562 | |||
2563 | /** Set the desired state of the link to down the next time no tiles are | ||
2564 | registered for packet reception or transmission. This flag is ignored | ||
2565 | if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. | ||
2566 | See @ref link for more information on link management. | ||
2567 | */ | ||
2568 | #define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN) | ||
2569 | |||
2570 | /** Do not bring up the link automatically as part of this registration | ||
2571 | operation. Do not take down the link automatically. This flag | ||
2572 | is ignored if it is specified along with ::NETIO_NO_XMIT and | ||
2573 | ::NETIO_NO_RECV. See @ref link for more information on link management. | ||
2574 | */ | ||
2575 | #define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT | ||
2576 | |||
2577 | |||
2578 | /** Minimum number of receive packets. */ | ||
2579 | #define NETIO_MIN_RECEIVE_PKTS 16 | ||
2580 | |||
2581 | /** Lower bound on the maximum number of receive packets; may be higher | ||
2582 | than this on some interfaces. */ | ||
2583 | #define NETIO_MAX_RECEIVE_PKTS 128 | ||
2584 | |||
2585 | /** Maximum number of send buffers, per packet size. */ | ||
2586 | #define NETIO_MAX_SEND_BUFFERS 16 | ||
2587 | |||
2588 | /** Number of EPP queue slots, and thus outstanding sends, per EPP. */ | ||
2589 | #define NETIO_TOTAL_SENDS_OUTSTANDING 2015 | ||
2590 | |||
2591 | /** Minimum number of EPP queue slots, and thus outstanding sends, per | ||
2592 | * transmitting tile. */ | ||
2593 | #define NETIO_MIN_SENDS_OUTSTANDING 16 | ||
2594 | |||
2595 | |||
2596 | /**@}*/ | ||
2597 | |||
2598 | #ifndef __DOXYGEN__ | ||
2599 | |||
2600 | /** | ||
2601 | * An object for providing Ethernet packets to a process. | ||
2602 | */ | ||
2603 | struct __netio_queue_impl_t; | ||
2604 | |||
2605 | /** | ||
2606 | * An object for managing the user end of a NetIO queue. | ||
2607 | */ | ||
2608 | struct __netio_queue_user_impl_t; | ||
2609 | |||
2610 | #endif /* !__DOXYGEN__ */ | ||
2611 | |||
2612 | |||
2613 | /** A netio_queue_t describes a NetIO communications endpoint. | ||
2614 | * @ingroup setup | ||
2615 | */ | ||
2616 | typedef struct | ||
2617 | { | ||
2618 | #ifdef __DOXYGEN__ | ||
2619 | uint8_t opaque[8]; /**< This is an opaque structure. */ | ||
2620 | #else | ||
2621 | struct __netio_queue_impl_t* __system_part; /**< The system part. */ | ||
2622 | struct __netio_queue_user_impl_t* __user_part; /**< The user part. */ | ||
2623 | #ifdef _NETIO_PTHREAD | ||
2624 | _netio_percpu_mutex_t lock; /**< Queue lock. */ | ||
2625 | #endif | ||
2626 | #endif | ||
2627 | } | ||
2628 | netio_queue_t; | ||
2629 | |||
2630 | |||
2631 | /** | ||
2632 | * @brief Packet send context. | ||
2633 | * | ||
2634 | * @ingroup egress | ||
2635 | * | ||
2636 | * Packet send context for use with netio_send_packet_prepare and _commit. | ||
2637 | */ | ||
2638 | typedef struct | ||
2639 | { | ||
2640 | #ifdef __DOXYGEN__ | ||
2641 | uint8_t opaque[44]; /**< This is an opaque structure. */ | ||
2642 | #else | ||
2643 | uint8_t flags; /**< Defined below */ | ||
2644 | uint8_t datalen; /**< Number of valid words pointed to by data. */ | ||
2645 | uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note | ||
2646 | that this is smaller than the 11-word maximum | ||
2647 | request size, since some constant values are | ||
2648 | not saved in the context. */ | ||
2649 | uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */ | ||
2650 | #endif | ||
2651 | } | ||
2652 | netio_send_pkt_context_t; | ||
2653 | |||
2654 | |||
2655 | #ifndef __DOXYGEN__ | ||
2656 | #define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */ | ||
2657 | #define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */ | ||
2658 | #endif | ||
2659 | |||
2660 | /** | ||
2661 | * @brief Packet vector entry. | ||
2662 | * | ||
2663 | * @ingroup egress | ||
2664 | * | ||
2665 | * This data structure is used with netio_send_packet_vector() to send multiple | ||
2666 | * packets with one NetIO call. The structure should be initialized by | ||
2667 | * calling netio_pkt_vector_set(), rather than by setting the fields | ||
2668 | * directly. | ||
2669 | * | ||
2670 | * This structure is guaranteed to be a power of two in size, no | ||
2671 | * bigger than one L2 cache line, and to be aligned modulo its size. | ||
2672 | */ | ||
2673 | typedef struct | ||
2674 | #ifndef __DOXYGEN__ | ||
2675 | __attribute__((aligned(8))) | ||
2676 | #endif | ||
2677 | { | ||
2678 | /** Reserved for use by the user application. When initialized with | ||
2679 | * the netio_set_pkt_vector_entry() function, this field is guaranteed | ||
2680 | * to be visible to readers only after all other fields are already | ||
2681 | * visible. This way it can be used as a valid flag or generation | ||
2682 | * counter. */ | ||
2683 | uint8_t user_data; | ||
2684 | |||
2685 | /* Structure members below this point should not be accessed directly by | ||
2686 | * applications, as they may change in the future. */ | ||
2687 | |||
2688 | /** Low 8 bits of the packet address to send. The high bits are | ||
2689 | * acquired from the 'handle' field. */ | ||
2690 | uint8_t buffer_address_low; | ||
2691 | |||
2692 | /** Number of bytes to transmit. */ | ||
2693 | uint16_t size; | ||
2694 | |||
2695 | /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE, | ||
2696 | * this vector entry will be skipped and no packet will be transmitted. */ | ||
2697 | netio_pkt_handle_t handle; | ||
2698 | } | ||
2699 | netio_pkt_vector_entry_t; | ||
2700 | |||
2701 | |||
2702 | /** | ||
2703 | * @brief Initialize fields in a packet vector entry. | ||
2704 | * | ||
2705 | * @ingroup egress | ||
2706 | * | ||
2707 | * @param[out] v Pointer to the vector entry to be initialized. | ||
2708 | * @param[in] pkt Packet to be transmitted when the vector entry is passed to | ||
2709 | * netio_send_packet_vector(). Note that the packet's attributes | ||
2710 | * (e.g., its L2 offset and length) are captured at the time this | ||
2711 | * routine is called; subsequent changes in those attributes will not | ||
2712 | * be reflected in the packet which is actually transmitted. | ||
2713 | * Changes in the packet's contents, however, will be so reflected. | ||
2714 | * If this is NULL, no packet will be transmitted. | ||
2715 | * @param[in] user_data User data to be set in the vector entry. | ||
2716 | * This function guarantees that the "user_data" field will become | ||
2717 | * visible to a reader only after all other fields have become visible. | ||
2718 | * This allows a structure in a ring buffer to be written and read | ||
2719 | * by a polling reader without any locks or other synchronization. | ||
2720 | */ | ||
2721 | static __inline void | ||
2722 | netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt, | ||
2723 | uint8_t user_data) | ||
2724 | { | ||
2725 | if (pkt) | ||
2726 | { | ||
2727 | if (NETIO_PKT_IS_MINIMAL(pkt)) | ||
2728 | { | ||
2729 | netio_pkt_minimal_metadata_t* mmd = | ||
2730 | (netio_pkt_minimal_metadata_t*) &pkt->__metadata; | ||
2731 | v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF; | ||
2732 | v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt); | ||
2733 | } | ||
2734 | else | ||
2735 | { | ||
2736 | netio_pkt_metadata_t* mda = &pkt->__metadata; | ||
2737 | v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF; | ||
2738 | v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt); | ||
2739 | } | ||
2740 | v->handle.word = pkt->__packet.word; | ||
2741 | } | ||
2742 | else | ||
2743 | { | ||
2744 | v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */ | ||
2745 | } | ||
2746 | |||
2747 | __asm__("" : : : "memory"); | ||
2748 | |||
2749 | v->user_data = user_data; | ||
2750 | } | ||
2751 | |||
2752 | |||
2753 | /** | ||
2754 | * Flags and structures for @ref netio_get() and @ref netio_set(). | ||
2755 | * @ingroup config | ||
2756 | */ | ||
2757 | |||
2758 | /** @{ */ | ||
2759 | /** Parameter class; addr is a NETIO_PARAM_xxx value. */ | ||
2760 | #define NETIO_PARAM 0 | ||
2761 | /** Interface MAC address. This address is only valid with @ref netio_get(). | ||
2762 | * The value is a 6-byte MAC address. Depending upon the overall system | ||
2763 | * design, a MAC address may or may not be available for each interface. */ | ||
2764 | #define NETIO_PARAM_MAC 0 | ||
2765 | |||
2766 | /** Determine whether to suspend output on the receipt of pause frames. | ||
2767 | * If the value is nonzero, the I/O shim will suspend output when a pause | ||
2768 | * frame is received. If the value is zero, pause frames will be ignored. */ | ||
2769 | #define NETIO_PARAM_PAUSE_IN 1 | ||
2770 | |||
2771 | /** Determine whether to send pause frames if the I/O shim packet FIFOs are | ||
2772 | * nearly full. If the value is zero, pause frames are not sent. If | ||
2773 | * the value is nonzero, it is the delay value which will be sent in any | ||
2774 | * pause frames which are output, in units of 512 bit times. */ | ||
2775 | #define NETIO_PARAM_PAUSE_OUT 2 | ||
2776 | |||
2777 | /** Jumbo frame support. The value is a 4-byte integer. If the value is | ||
2778 | * nonzero, the MAC will accept frames of up to 10240 bytes. If the value | ||
2779 | * is zero, the MAC will only accept frames of up to 1544 bytes. */ | ||
2780 | #define NETIO_PARAM_JUMBO 3 | ||
2781 | |||
2782 | /** I/O shim's overflow statistics register. The value is two 16-bit integers. | ||
2783 | * The first 16-bit value (or the low 16 bits, if the value is treated as a | ||
2784 | * 32-bit number) is the count of packets which were completely dropped and | ||
2785 | * not delivered by the shim. The second 16-bit value (or the high 16 bits, | ||
2786 | * if the value is treated as a 32-bit number) is the count of packets | ||
2787 | * which were truncated and thus only partially delivered by the shim. This | ||
2788 | * register is automatically reset to zero after it has been read. | ||
2789 | */ | ||
2790 | #define NETIO_PARAM_OVERFLOW 4 | ||
2791 | |||
2792 | /** IPP statistics. This address is only valid with @ref netio_get(). The | ||
2793 | * value is a netio_stat_t structure. Unlike the I/O shim statistics, the | ||
2794 | * IPP statistics are not all reset to zero on read; see the description | ||
2795 | * of the netio_stat_t for details. */ | ||
2796 | #define NETIO_PARAM_STAT 5 | ||
2797 | |||
2798 | /** Possible link state. The value is a combination of "NETIO_LINK_xxx" | ||
2799 | * flags. With @ref netio_get(), this will indicate which flags are | ||
2800 | * actually supported by the hardware. | ||
2801 | * | ||
2802 | * For historical reasons, specifying this value to netio_set() will have | ||
2803 | * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is | ||
2804 | * discouraged. | ||
2805 | */ | ||
2806 | #define NETIO_PARAM_LINK_POSSIBLE_STATE 6 | ||
2807 | |||
2808 | /** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags. | ||
2809 | * With @ref netio_set(), this will attempt to immediately bring up the | ||
2810 | * link using whichever of the requested flags are supported by the | ||
2811 | * hardware, or take down the link if the flags are zero; if this is | ||
2812 | * not possible, an error will be returned. Many programs will want | ||
2813 | * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead. | ||
2814 | * | ||
2815 | * For historical reasons, specifying this value to netio_get() will | ||
2816 | * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE, | ||
2817 | * but this usage is discouraged. | ||
2818 | */ | ||
2819 | #define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE | ||
2820 | |||
2821 | /** Current link state. This address is only valid with @ref netio_get(). | ||
2822 | * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together. | ||
2823 | * If the link is down, the value ANDed with NETIO_LINK_SPEED will be | ||
2824 | * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will | ||
2825 | * result in exactly one of the NETIO_LINK_xxx values, indicating the | ||
2826 | * current speed. */ | ||
2827 | #define NETIO_PARAM_LINK_CURRENT_STATE 7 | ||
2828 | |||
2829 | /** Variant symbol for current state, retained for compatibility with | ||
2830 | * pre-MDE-2.1 programs. */ | ||
2831 | #define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE | ||
2832 | |||
2833 | /** Packet Coherence protocol. This address is only valid with @ref netio_get(). | ||
2834 | * The value is nonzero if the interface is configured for cache-coherent DMA. | ||
2835 | */ | ||
2836 | #define NETIO_PARAM_COHERENT 8 | ||
2837 | |||
2838 | /** Desired link state. The value is a conbination of "NETIO_LINK_xxx" | ||
2839 | * flags, which specify the desired state for the link. With @ref | ||
2840 | * netio_set(), this will, in the background, attempt to bring up the link | ||
2841 | * using whichever of the requested flags are reasonable, or take down the | ||
2842 | * link if the flags are zero. The actual link up or down operation may | ||
2843 | * happen after this call completes. If the link state changes in the | ||
2844 | * future, the system will continue to try to get back to the desired link | ||
2845 | * state; for instance, if the link is brought up successfully, and then | ||
2846 | * the network cable is disconnected, the link will go down. However, the | ||
2847 | * desired state of the link is still up, so if the cable is reconnected, | ||
2848 | * the link will be brought up again. | ||
2849 | * | ||
2850 | * With @ref netio_get(), this will indicate the desired state for the | ||
2851 | * link, as set with a previous netio_set() call, or implicitly by a | ||
2852 | * netio_input_register() or netio_input_unregister() operation. This may | ||
2853 | * not reflect the current state of the link; to get that, use | ||
2854 | * ::NETIO_PARAM_LINK_CURRENT_STATE. */ | ||
2855 | #define NETIO_PARAM_LINK_DESIRED_STATE 9 | ||
2856 | |||
2857 | /** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT | ||
2858 | * address passed to @ref netio_get(). */ | ||
2859 | typedef struct | ||
2860 | { | ||
2861 | /** Number of packets which have been received by the IPP and forwarded | ||
2862 | * to a tile's receive queue for processing. This value wraps at its | ||
2863 | * maximum, and is not cleared upon read. */ | ||
2864 | uint32_t packets_received; | ||
2865 | |||
2866 | /** Number of packets which have been dropped by the IPP, because they could | ||
2867 | * not be received, or could not be forwarded to a tile. The former happens | ||
2868 | * when the IPP does not have a free packet buffer of suitable size for an | ||
2869 | * incoming frame. The latter happens when all potential destination tiles | ||
2870 | * for a packet, as defined by the group, bucket, and queue configuration, | ||
2871 | * have full receive queues. This value wraps at its maximum, and is not | ||
2872 | * cleared upon read. */ | ||
2873 | uint32_t packets_dropped; | ||
2874 | |||
2875 | /* | ||
2876 | * Note: the #defines after each of the following four one-byte values | ||
2877 | * denote their location within the third word of the netio_stat_t. They | ||
2878 | * are intended for use only by the IPP implementation and are thus omitted | ||
2879 | * from the Doxygen output. | ||
2880 | */ | ||
2881 | |||
2882 | /** Number of packets dropped because no worker was able to accept a new | ||
2883 | * packet. This value saturates at its maximum, and is cleared upon | ||
2884 | * read. */ | ||
2885 | uint8_t drops_no_worker; | ||
2886 | #ifndef __DOXYGEN__ | ||
2887 | #define NETIO_STAT_DROPS_NO_WORKER 0 | ||
2888 | #endif | ||
2889 | |||
2890 | /** Number of packets dropped because no small buffers were available. | ||
2891 | * This value saturates at its maximum, and is cleared upon read. */ | ||
2892 | uint8_t drops_no_smallbuf; | ||
2893 | #ifndef __DOXYGEN__ | ||
2894 | #define NETIO_STAT_DROPS_NO_SMALLBUF 1 | ||
2895 | #endif | ||
2896 | |||
2897 | /** Number of packets dropped because no large buffers were available. | ||
2898 | * This value saturates at its maximum, and is cleared upon read. */ | ||
2899 | uint8_t drops_no_largebuf; | ||
2900 | #ifndef __DOXYGEN__ | ||
2901 | #define NETIO_STAT_DROPS_NO_LARGEBUF 2 | ||
2902 | #endif | ||
2903 | |||
2904 | /** Number of packets dropped because no jumbo buffers were available. | ||
2905 | * This value saturates at its maximum, and is cleared upon read. */ | ||
2906 | uint8_t drops_no_jumbobuf; | ||
2907 | #ifndef __DOXYGEN__ | ||
2908 | #define NETIO_STAT_DROPS_NO_JUMBOBUF 3 | ||
2909 | #endif | ||
2910 | } | ||
2911 | netio_stat_t; | ||
2912 | |||
2913 | |||
2914 | /** Link can run, should run, or is running at 10 Mbps. */ | ||
2915 | #define NETIO_LINK_10M 0x01 | ||
2916 | |||
2917 | /** Link can run, should run, or is running at 100 Mbps. */ | ||
2918 | #define NETIO_LINK_100M 0x02 | ||
2919 | |||
2920 | /** Link can run, should run, or is running at 1 Gbps. */ | ||
2921 | #define NETIO_LINK_1G 0x04 | ||
2922 | |||
2923 | /** Link can run, should run, or is running at 10 Gbps. */ | ||
2924 | #define NETIO_LINK_10G 0x08 | ||
2925 | |||
2926 | /** Link should run at the highest speed supported by the link and by | ||
2927 | * the device connected to the link. Only usable as a value for | ||
2928 | * the link's desired state; never returned as a value for the current | ||
2929 | * or possible states. */ | ||
2930 | #define NETIO_LINK_ANYSPEED 0x10 | ||
2931 | |||
2932 | /** All legal link speeds. */ | ||
2933 | #define NETIO_LINK_SPEED (NETIO_LINK_10M | \ | ||
2934 | NETIO_LINK_100M | \ | ||
2935 | NETIO_LINK_1G | \ | ||
2936 | NETIO_LINK_10G | \ | ||
2937 | NETIO_LINK_ANYSPEED) | ||
2938 | |||
2939 | |||
2940 | /** MAC register class. Addr is a register offset within the MAC. | ||
2941 | * Registers within the XGbE and GbE MACs are documented in the Tile | ||
2942 | * Processor I/O Device Guide (UG104). MAC registers start at address | ||
2943 | * 0x4000, and do not include the MAC_INTERFACE registers. */ | ||
2944 | #define NETIO_MAC 1 | ||
2945 | |||
2946 | /** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr" | ||
2947 | * member of a netio_mdio_addr_t structure. */ | ||
2948 | #define NETIO_MDIO 2 | ||
2949 | |||
2950 | /** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr" | ||
2951 | * member of a netio_mdio_addr_t structure. */ | ||
2952 | #define NETIO_MDIO_CLAUSE45 3 | ||
2953 | |||
2954 | /** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO | ||
2955 | * address passed to @ref netio_get() or @ref netio_set(). */ | ||
2956 | typedef union | ||
2957 | { | ||
2958 | struct | ||
2959 | { | ||
2960 | unsigned int reg:16; /**< MDIO register offset. For clause 22 access, | ||
2961 | must be less than 32. */ | ||
2962 | unsigned int phy:5; /**< Which MDIO PHY to access. */ | ||
2963 | unsigned int dev:5; /**< Which MDIO device to access within that PHY. | ||
2964 | Applicable for clause 45 access only; ignored | ||
2965 | for clause 22 access. */ | ||
2966 | } | ||
2967 | bits; /**< Container for bitfields. */ | ||
2968 | uint64_t addr; /**< Value to pass to @ref netio_get() or | ||
2969 | * @ref netio_set(). */ | ||
2970 | } | ||
2971 | netio_mdio_addr_t; | ||
2972 | |||
2973 | /** @} */ | ||
2974 | |||
2975 | #endif /* __NETIO_INTF_H__ */ | ||
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 112b1e248f05..b4c8e8ec45dc 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile | |||
@@ -15,3 +15,4 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o | |||
15 | obj-$(CONFIG_MODULES) += module.o | 15 | obj-$(CONFIG_MODULES) += module.o |
16 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 16 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
17 | obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o | 17 | obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o |
18 | obj-$(CONFIG_PCI) += pci.o | ||
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index 67617a05e602..dbc213adf5e1 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/kdev_t.h> | 21 | #include <linux/kdev_t.h> |
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/fcntl.h> | 23 | #include <linux/fcntl.h> |
24 | #include <linux/smp_lock.h> | ||
25 | #include <linux/uaccess.h> | 24 | #include <linux/uaccess.h> |
26 | #include <linux/signal.h> | 25 | #include <linux/signal.h> |
27 | #include <asm/syscalls.h> | 26 | #include <asm/syscalls.h> |
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index fb64b99959d4..543d6a33aa26 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/smp.h> | 17 | #include <linux/smp.h> |
18 | #include <linux/smp_lock.h> | ||
19 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
20 | #include <linux/signal.h> | 19 | #include <linux/signal.h> |
21 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c new file mode 100644 index 000000000000..a1ee25be9ad9 --- /dev/null +++ b/arch/tile/kernel/pci.c | |||
@@ -0,0 +1,621 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/pci.h> | ||
17 | #include <linux/delay.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/capability.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/errno.h> | ||
23 | #include <linux/bootmem.h> | ||
24 | #include <linux/irq.h> | ||
25 | #include <linux/io.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | |||
28 | #include <asm/processor.h> | ||
29 | #include <asm/sections.h> | ||
30 | #include <asm/byteorder.h> | ||
31 | #include <asm/hv_driver.h> | ||
32 | #include <hv/drv_pcie_rc_intf.h> | ||
33 | |||
34 | |||
35 | /* | ||
36 | * Initialization flow and process | ||
37 | * ------------------------------- | ||
38 | * | ||
39 | * This files containes the routines to search for PCI buses, | ||
40 | * enumerate the buses, and configure any attached devices. | ||
41 | * | ||
42 | * There are two entry points here: | ||
43 | * 1) tile_pci_init | ||
44 | * This sets up the pci_controller structs, and opens the | ||
45 | * FDs to the hypervisor. This is called from setup_arch() early | ||
46 | * in the boot process. | ||
47 | * 2) pcibios_init | ||
48 | * This probes the PCI bus(es) for any attached hardware. It's | ||
49 | * called by subsys_initcall. All of the real work is done by the | ||
50 | * generic Linux PCI layer. | ||
51 | * | ||
52 | */ | ||
53 | |||
54 | /* | ||
55 | * This flag tells if the platform is TILEmpower that needs | ||
56 | * special configuration for the PLX switch chip. | ||
57 | */ | ||
58 | int __write_once tile_plx_gen1; | ||
59 | |||
60 | static struct pci_controller controllers[TILE_NUM_PCIE]; | ||
61 | static int num_controllers; | ||
62 | |||
63 | static struct pci_ops tile_cfg_ops; | ||
64 | |||
65 | |||
66 | /* | ||
67 | * We don't need to worry about the alignment of resources. | ||
68 | */ | ||
69 | resource_size_t pcibios_align_resource(void *data, const struct resource *res, | ||
70 | resource_size_t size, resource_size_t align) | ||
71 | { | ||
72 | return res->start; | ||
73 | } | ||
74 | EXPORT_SYMBOL(pcibios_align_resource); | ||
75 | |||
76 | /* | ||
77 | * Open a FD to the hypervisor PCI device. | ||
78 | * | ||
79 | * controller_id is the controller number, config type is 0 or 1 for | ||
80 | * config0 or config1 operations. | ||
81 | */ | ||
82 | static int __init tile_pcie_open(int controller_id, int config_type) | ||
83 | { | ||
84 | char filename[32]; | ||
85 | int fd; | ||
86 | |||
87 | sprintf(filename, "pcie/%d/config%d", controller_id, config_type); | ||
88 | |||
89 | fd = hv_dev_open((HV_VirtAddr)filename, 0); | ||
90 | |||
91 | return fd; | ||
92 | } | ||
93 | |||
94 | |||
95 | /* | ||
96 | * Get the IRQ numbers from the HV and set up the handlers for them. | ||
97 | */ | ||
98 | static int __init tile_init_irqs(int controller_id, | ||
99 | struct pci_controller *controller) | ||
100 | { | ||
101 | char filename[32]; | ||
102 | int fd; | ||
103 | int ret; | ||
104 | int x; | ||
105 | struct pcie_rc_config rc_config; | ||
106 | |||
107 | sprintf(filename, "pcie/%d/ctl", controller_id); | ||
108 | fd = hv_dev_open((HV_VirtAddr)filename, 0); | ||
109 | if (fd < 0) { | ||
110 | pr_err("PCI: hv_dev_open(%s) failed\n", filename); | ||
111 | return -1; | ||
112 | } | ||
113 | ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config), | ||
114 | sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF); | ||
115 | hv_dev_close(fd); | ||
116 | if (ret != sizeof(rc_config)) { | ||
117 | pr_err("PCI: wanted %zd bytes, got %d\n", | ||
118 | sizeof(rc_config), ret); | ||
119 | return -1; | ||
120 | } | ||
121 | /* Record irq_base so that we can map INTx to IRQ # later. */ | ||
122 | controller->irq_base = rc_config.intr; | ||
123 | |||
124 | for (x = 0; x < 4; x++) | ||
125 | tile_irq_activate(rc_config.intr + x, | ||
126 | TILE_IRQ_HW_CLEAR); | ||
127 | |||
128 | if (rc_config.plx_gen1) | ||
129 | controller->plx_gen1 = 1; | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * First initialization entry point, called from setup_arch(). | ||
136 | * | ||
137 | * Find valid controllers and fill in pci_controller structs for each | ||
138 | * of them. | ||
139 | * | ||
140 | * Returns the number of controllers discovered. | ||
141 | */ | ||
142 | int __init tile_pci_init(void) | ||
143 | { | ||
144 | int i; | ||
145 | |||
146 | pr_info("PCI: Searching for controllers...\n"); | ||
147 | |||
148 | /* Do any configuration we need before using the PCIe */ | ||
149 | |||
150 | for (i = 0; i < TILE_NUM_PCIE; i++) { | ||
151 | int hv_cfg_fd0 = -1; | ||
152 | int hv_cfg_fd1 = -1; | ||
153 | int hv_mem_fd = -1; | ||
154 | char name[32]; | ||
155 | struct pci_controller *controller; | ||
156 | |||
157 | /* | ||
158 | * Open the fd to the HV. If it fails then this | ||
159 | * device doesn't exist. | ||
160 | */ | ||
161 | hv_cfg_fd0 = tile_pcie_open(i, 0); | ||
162 | if (hv_cfg_fd0 < 0) | ||
163 | continue; | ||
164 | hv_cfg_fd1 = tile_pcie_open(i, 1); | ||
165 | if (hv_cfg_fd1 < 0) { | ||
166 | pr_err("PCI: Couldn't open config fd to HV " | ||
167 | "for controller %d\n", i); | ||
168 | goto err_cont; | ||
169 | } | ||
170 | |||
171 | sprintf(name, "pcie/%d/mem", i); | ||
172 | hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0); | ||
173 | if (hv_mem_fd < 0) { | ||
174 | pr_err("PCI: Could not open mem fd to HV!\n"); | ||
175 | goto err_cont; | ||
176 | } | ||
177 | |||
178 | pr_info("PCI: Found PCI controller #%d\n", i); | ||
179 | |||
180 | controller = &controllers[num_controllers]; | ||
181 | |||
182 | if (tile_init_irqs(i, controller)) { | ||
183 | pr_err("PCI: Could not initialize " | ||
184 | "IRQs, aborting.\n"); | ||
185 | goto err_cont; | ||
186 | } | ||
187 | |||
188 | controller->index = num_controllers; | ||
189 | controller->hv_cfg_fd[0] = hv_cfg_fd0; | ||
190 | controller->hv_cfg_fd[1] = hv_cfg_fd1; | ||
191 | controller->hv_mem_fd = hv_mem_fd; | ||
192 | controller->first_busno = 0; | ||
193 | controller->last_busno = 0xff; | ||
194 | controller->ops = &tile_cfg_ops; | ||
195 | |||
196 | num_controllers++; | ||
197 | continue; | ||
198 | |||
199 | err_cont: | ||
200 | if (hv_cfg_fd0 >= 0) | ||
201 | hv_dev_close(hv_cfg_fd0); | ||
202 | if (hv_cfg_fd1 >= 0) | ||
203 | hv_dev_close(hv_cfg_fd1); | ||
204 | if (hv_mem_fd >= 0) | ||
205 | hv_dev_close(hv_mem_fd); | ||
206 | continue; | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * Before using the PCIe, see if we need to do any platform-specific | ||
211 | * configuration, such as the PLX switch Gen 1 issue on TILEmpower. | ||
212 | */ | ||
213 | for (i = 0; i < num_controllers; i++) { | ||
214 | struct pci_controller *controller = &controllers[i]; | ||
215 | |||
216 | if (controller->plx_gen1) | ||
217 | tile_plx_gen1 = 1; | ||
218 | } | ||
219 | |||
220 | return num_controllers; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * (pin - 1) converts from the PCI standard's [1:4] convention to | ||
225 | * a normal [0:3] range. | ||
226 | */ | ||
227 | static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin) | ||
228 | { | ||
229 | struct pci_controller *controller = | ||
230 | (struct pci_controller *)dev->sysdata; | ||
231 | return (pin - 1) + controller->irq_base; | ||
232 | } | ||
233 | |||
234 | |||
235 | static void __init fixup_read_and_payload_sizes(void) | ||
236 | { | ||
237 | struct pci_dev *dev = NULL; | ||
238 | int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */ | ||
239 | int max_read_size = 0x2; /* Limit to 512 byte reads. */ | ||
240 | u16 new_values; | ||
241 | |||
242 | /* Scan for the smallest maximum payload size. */ | ||
243 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
244 | int pcie_caps_offset; | ||
245 | u32 devcap; | ||
246 | int max_payload; | ||
247 | |||
248 | pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); | ||
249 | if (pcie_caps_offset == 0) | ||
250 | continue; | ||
251 | |||
252 | pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP, | ||
253 | &devcap); | ||
254 | max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD; | ||
255 | if (max_payload < smallest_max_payload) | ||
256 | smallest_max_payload = max_payload; | ||
257 | } | ||
258 | |||
259 | /* Now, set the max_payload_size for all devices to that value. */ | ||
260 | new_values = (max_read_size << 12) | (smallest_max_payload << 5); | ||
261 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
262 | int pcie_caps_offset; | ||
263 | u16 devctl; | ||
264 | |||
265 | pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); | ||
266 | if (pcie_caps_offset == 0) | ||
267 | continue; | ||
268 | |||
269 | pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, | ||
270 | &devctl); | ||
271 | devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ); | ||
272 | devctl |= new_values; | ||
273 | pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, | ||
274 | devctl); | ||
275 | } | ||
276 | } | ||
277 | |||
278 | |||
279 | /* | ||
280 | * Second PCI initialization entry point, called by subsys_initcall. | ||
281 | * | ||
282 | * The controllers have been set up by the time we get here, by a call to | ||
283 | * tile_pci_init. | ||
284 | */ | ||
285 | static int __init pcibios_init(void) | ||
286 | { | ||
287 | int i; | ||
288 | |||
289 | pr_info("PCI: Probing PCI hardware\n"); | ||
290 | |||
291 | /* | ||
292 | * Delay a bit in case devices aren't ready. Some devices are | ||
293 | * known to require at least 20ms here, but we use a more | ||
294 | * conservative value. | ||
295 | */ | ||
296 | mdelay(250); | ||
297 | |||
298 | /* Scan all of the recorded PCI controllers. */ | ||
299 | for (i = 0; i < num_controllers; i++) { | ||
300 | struct pci_controller *controller = &controllers[i]; | ||
301 | struct pci_bus *bus; | ||
302 | |||
303 | pr_info("PCI: initializing controller #%d\n", i); | ||
304 | |||
305 | /* | ||
306 | * This comes from the generic Linux PCI driver. | ||
307 | * | ||
308 | * It reads the PCI tree for this bus into the Linux | ||
309 | * data structures. | ||
310 | * | ||
311 | * This is inlined in linux/pci.h and calls into | ||
312 | * pci_scan_bus_parented() in probe.c. | ||
313 | */ | ||
314 | bus = pci_scan_bus(0, controller->ops, controller); | ||
315 | controller->root_bus = bus; | ||
316 | controller->last_busno = bus->subordinate; | ||
317 | |||
318 | } | ||
319 | |||
320 | /* Do machine dependent PCI interrupt routing */ | ||
321 | pci_fixup_irqs(pci_common_swizzle, tile_map_irq); | ||
322 | |||
323 | /* | ||
324 | * This comes from the generic Linux PCI driver. | ||
325 | * | ||
326 | * It allocates all of the resources (I/O memory, etc) | ||
327 | * associated with the devices read in above. | ||
328 | */ | ||
329 | |||
330 | pci_assign_unassigned_resources(); | ||
331 | |||
332 | /* Configure the max_read_size and max_payload_size values. */ | ||
333 | fixup_read_and_payload_sizes(); | ||
334 | |||
335 | /* Record the I/O resources in the PCI controller structure. */ | ||
336 | for (i = 0; i < num_controllers; i++) { | ||
337 | struct pci_bus *root_bus = controllers[i].root_bus; | ||
338 | struct pci_bus *next_bus; | ||
339 | struct pci_dev *dev; | ||
340 | |||
341 | list_for_each_entry(dev, &root_bus->devices, bus_list) { | ||
342 | /* Find the PCI host controller, ie. the 1st bridge. */ | ||
343 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && | ||
344 | (PCI_SLOT(dev->devfn) == 0)) { | ||
345 | next_bus = dev->subordinate; | ||
346 | controllers[i].mem_resources[0] = | ||
347 | *next_bus->resource[0]; | ||
348 | controllers[i].mem_resources[1] = | ||
349 | *next_bus->resource[1]; | ||
350 | controllers[i].mem_resources[2] = | ||
351 | *next_bus->resource[2]; | ||
352 | |||
353 | break; | ||
354 | } | ||
355 | } | ||
356 | |||
357 | } | ||
358 | |||
359 | return 0; | ||
360 | } | ||
361 | subsys_initcall(pcibios_init); | ||
362 | |||
363 | /* | ||
364 | * No bus fixups needed. | ||
365 | */ | ||
366 | void __devinit pcibios_fixup_bus(struct pci_bus *bus) | ||
367 | { | ||
368 | /* Nothing needs to be done. */ | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * This can be called from the generic PCI layer, but doesn't need to | ||
373 | * do anything. | ||
374 | */ | ||
375 | char __devinit *pcibios_setup(char *str) | ||
376 | { | ||
377 | /* Nothing needs to be done. */ | ||
378 | return str; | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * This is called from the generic Linux layer. | ||
383 | */ | ||
384 | void __init pcibios_update_irq(struct pci_dev *dev, int irq) | ||
385 | { | ||
386 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * Enable memory and/or address decoding, as appropriate, for the | ||
391 | * device described by the 'dev' struct. | ||
392 | * | ||
393 | * This is called from the generic PCI layer, and can be called | ||
394 | * for bridges or endpoints. | ||
395 | */ | ||
396 | int pcibios_enable_device(struct pci_dev *dev, int mask) | ||
397 | { | ||
398 | u16 cmd, old_cmd; | ||
399 | u8 header_type; | ||
400 | int i; | ||
401 | struct resource *r; | ||
402 | |||
403 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
404 | |||
405 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | ||
406 | old_cmd = cmd; | ||
407 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { | ||
408 | /* | ||
409 | * For bridges, we enable both memory and I/O decoding | ||
410 | * in call cases. | ||
411 | */ | ||
412 | cmd |= PCI_COMMAND_IO; | ||
413 | cmd |= PCI_COMMAND_MEMORY; | ||
414 | } else { | ||
415 | /* | ||
416 | * For endpoints, we enable memory and/or I/O decoding | ||
417 | * only if they have a memory resource of that type. | ||
418 | */ | ||
419 | for (i = 0; i < 6; i++) { | ||
420 | r = &dev->resource[i]; | ||
421 | if (r->flags & IORESOURCE_UNSET) { | ||
422 | pr_err("PCI: Device %s not available " | ||
423 | "because of resource collisions\n", | ||
424 | pci_name(dev)); | ||
425 | return -EINVAL; | ||
426 | } | ||
427 | if (r->flags & IORESOURCE_IO) | ||
428 | cmd |= PCI_COMMAND_IO; | ||
429 | if (r->flags & IORESOURCE_MEM) | ||
430 | cmd |= PCI_COMMAND_MEMORY; | ||
431 | } | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * We only write the command if it changed. | ||
436 | */ | ||
437 | if (cmd != old_cmd) | ||
438 | pci_write_config_word(dev, PCI_COMMAND, cmd); | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) | ||
443 | { | ||
444 | unsigned long start = pci_resource_start(dev, bar); | ||
445 | unsigned long len = pci_resource_len(dev, bar); | ||
446 | unsigned long flags = pci_resource_flags(dev, bar); | ||
447 | |||
448 | if (!len) | ||
449 | return NULL; | ||
450 | if (max && len > max) | ||
451 | len = max; | ||
452 | |||
453 | if (!(flags & IORESOURCE_MEM)) { | ||
454 | pr_info("PCI: Trying to map invalid resource %#lx\n", flags); | ||
455 | start = 0; | ||
456 | } | ||
457 | |||
458 | return (void __iomem *)start; | ||
459 | } | ||
460 | EXPORT_SYMBOL(pci_iomap); | ||
461 | |||
462 | |||
463 | /**************************************************************** | ||
464 | * | ||
465 | * Tile PCI config space read/write routines | ||
466 | * | ||
467 | ****************************************************************/ | ||
468 | |||
469 | /* | ||
470 | * These are the normal read and write ops | ||
471 | * These are expanded with macros from pci_bus_read_config_byte() etc. | ||
472 | * | ||
473 | * devfn is the combined PCI slot & function. | ||
474 | * | ||
475 | * offset is in bytes, from the start of config space for the | ||
476 | * specified bus & slot. | ||
477 | */ | ||
478 | |||
479 | static int __devinit tile_cfg_read(struct pci_bus *bus, | ||
480 | unsigned int devfn, | ||
481 | int offset, | ||
482 | int size, | ||
483 | u32 *val) | ||
484 | { | ||
485 | struct pci_controller *controller = bus->sysdata; | ||
486 | int busnum = bus->number & 0xff; | ||
487 | int slot = (devfn >> 3) & 0x1f; | ||
488 | int function = devfn & 0x7; | ||
489 | u32 addr; | ||
490 | int config_mode = 1; | ||
491 | |||
492 | /* | ||
493 | * There is no bridge between the Tile and bus 0, so we | ||
494 | * use config0 to talk to bus 0. | ||
495 | * | ||
496 | * If we're talking to a bus other than zero then we | ||
497 | * must have found a bridge. | ||
498 | */ | ||
499 | if (busnum == 0) { | ||
500 | /* | ||
501 | * We fake an empty slot for (busnum == 0) && (slot > 0), | ||
502 | * since there is only one slot on bus 0. | ||
503 | */ | ||
504 | if (slot) { | ||
505 | *val = 0xFFFFFFFF; | ||
506 | return 0; | ||
507 | } | ||
508 | config_mode = 0; | ||
509 | } | ||
510 | |||
511 | addr = busnum << 20; /* Bus in 27:20 */ | ||
512 | addr |= slot << 15; /* Slot (device) in 19:15 */ | ||
513 | addr |= function << 12; /* Function is in 14:12 */ | ||
514 | addr |= (offset & 0xFFF); /* byte address in 0:11 */ | ||
515 | |||
516 | return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0, | ||
517 | (HV_VirtAddr)(val), size, addr); | ||
518 | } | ||
519 | |||
520 | |||
521 | /* | ||
522 | * See tile_cfg_read() for relevent comments. | ||
523 | * Note that "val" is the value to write, not a pointer to that value. | ||
524 | */ | ||
525 | static int __devinit tile_cfg_write(struct pci_bus *bus, | ||
526 | unsigned int devfn, | ||
527 | int offset, | ||
528 | int size, | ||
529 | u32 val) | ||
530 | { | ||
531 | struct pci_controller *controller = bus->sysdata; | ||
532 | int busnum = bus->number & 0xff; | ||
533 | int slot = (devfn >> 3) & 0x1f; | ||
534 | int function = devfn & 0x7; | ||
535 | u32 addr; | ||
536 | int config_mode = 1; | ||
537 | HV_VirtAddr valp = (HV_VirtAddr)&val; | ||
538 | |||
539 | /* | ||
540 | * For bus 0 slot 0 we use config 0 accesses. | ||
541 | */ | ||
542 | if (busnum == 0) { | ||
543 | /* | ||
544 | * We fake an empty slot for (busnum == 0) && (slot > 0), | ||
545 | * since there is only one slot on bus 0. | ||
546 | */ | ||
547 | if (slot) | ||
548 | return 0; | ||
549 | config_mode = 0; | ||
550 | } | ||
551 | |||
552 | addr = busnum << 20; /* Bus in 27:20 */ | ||
553 | addr |= slot << 15; /* Slot (device) in 19:15 */ | ||
554 | addr |= function << 12; /* Function is in 14:12 */ | ||
555 | addr |= (offset & 0xFFF); /* byte address in 0:11 */ | ||
556 | |||
557 | #ifdef __BIG_ENDIAN | ||
558 | /* Point to the correct part of the 32-bit "val". */ | ||
559 | valp += 4 - size; | ||
560 | #endif | ||
561 | |||
562 | return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0, | ||
563 | valp, size, addr); | ||
564 | } | ||
565 | |||
566 | |||
567 | static struct pci_ops tile_cfg_ops = { | ||
568 | .read = tile_cfg_read, | ||
569 | .write = tile_cfg_write, | ||
570 | }; | ||
571 | |||
572 | |||
573 | /* | ||
574 | * In the following, each PCI controller's mem_resources[1] | ||
575 | * represents its (non-prefetchable) PCI memory resource. | ||
576 | * mem_resources[0] and mem_resources[2] refer to its PCI I/O and | ||
577 | * prefetchable PCI memory resources, respectively. | ||
578 | * For more details, see pci_setup_bridge() in setup-bus.c. | ||
579 | * By comparing the target PCI memory address against the | ||
580 | * end address of controller 0, we can determine the controller | ||
581 | * that should accept the PCI memory access. | ||
582 | */ | ||
583 | #define TILE_READ(size, type) \ | ||
584 | type _tile_read##size(unsigned long addr) \ | ||
585 | { \ | ||
586 | type val; \ | ||
587 | int idx = 0; \ | ||
588 | if (addr > controllers[0].mem_resources[1].end && \ | ||
589 | addr > controllers[0].mem_resources[2].end) \ | ||
590 | idx = 1; \ | ||
591 | if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \ | ||
592 | (HV_VirtAddr)(&val), sizeof(type), addr)) \ | ||
593 | pr_err("PCI: read %zd bytes at 0x%lX failed\n", \ | ||
594 | sizeof(type), addr); \ | ||
595 | return val; \ | ||
596 | } \ | ||
597 | EXPORT_SYMBOL(_tile_read##size) | ||
598 | |||
599 | TILE_READ(b, u8); | ||
600 | TILE_READ(w, u16); | ||
601 | TILE_READ(l, u32); | ||
602 | TILE_READ(q, u64); | ||
603 | |||
604 | #define TILE_WRITE(size, type) \ | ||
605 | void _tile_write##size(type val, unsigned long addr) \ | ||
606 | { \ | ||
607 | int idx = 0; \ | ||
608 | if (addr > controllers[0].mem_resources[1].end && \ | ||
609 | addr > controllers[0].mem_resources[2].end) \ | ||
610 | idx = 1; \ | ||
611 | if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \ | ||
612 | (HV_VirtAddr)(&val), sizeof(type), addr)) \ | ||
613 | pr_err("PCI: write %zd bytes at 0x%lX failed\n", \ | ||
614 | sizeof(type), addr); \ | ||
615 | } \ | ||
616 | EXPORT_SYMBOL(_tile_write##size) | ||
617 | |||
618 | TILE_WRITE(b, u8); | ||
619 | TILE_WRITE(w, u16); | ||
620 | TILE_WRITE(l, u32); | ||
621 | TILE_WRITE(q, u64); | ||
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index fb0b3cbeae14..f18573643ed1 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c | |||
@@ -840,7 +840,7 @@ static int __init topology_init(void) | |||
840 | for_each_online_node(i) | 840 | for_each_online_node(i) |
841 | register_one_node(i); | 841 | register_one_node(i); |
842 | 842 | ||
843 | for_each_present_cpu(i) | 843 | for (i = 0; i < smp_height * smp_width; ++i) |
844 | register_cpu(&cpu_devices[i], i); | 844 | register_cpu(&cpu_devices[i], i); |
845 | 845 | ||
846 | return 0; | 846 | return 0; |
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 687719d4abd1..757407e36696 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/smp.h> | 18 | #include <linux/smp.h> |
19 | #include <linux/smp_lock.h> | ||
20 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
21 | #include <linux/signal.h> | 20 | #include <linux/signal.h> |
22 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 74d62d098edf..b949edcec200 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
21 | #include <linux/smp_lock.h> | ||
22 | #include <linux/bootmem.h> | 21 | #include <linux/bootmem.h> |
23 | #include <linux/notifier.h> | 22 | #include <linux/notifier.h> |
24 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index 7e764669a022..e2187d24a9b4 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/smp.h> | 22 | #include <linux/smp.h> |
23 | #include <linux/smp_lock.h> | ||
24 | #include <linux/syscalls.h> | 23 | #include <linux/syscalls.h> |
25 | #include <linux/mman.h> | 24 | #include <linux/mman.h> |
26 | #include <linux/file.h> | 25 | #include <linux/file.h> |
diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c index 6235283b4859..cc3d9badf030 100644 --- a/arch/tile/lib/memchr_32.c +++ b/arch/tile/lib/memchr_32.c | |||
@@ -18,12 +18,24 @@ | |||
18 | 18 | ||
19 | void *memchr(const void *s, int c, size_t n) | 19 | void *memchr(const void *s, int c, size_t n) |
20 | { | 20 | { |
21 | const uint32_t *last_word_ptr; | ||
22 | const uint32_t *p; | ||
23 | const char *last_byte_ptr; | ||
24 | uintptr_t s_int; | ||
25 | uint32_t goal, before_mask, v, bits; | ||
26 | char *ret; | ||
27 | |||
28 | if (__builtin_expect(n == 0, 0)) { | ||
29 | /* Don't dereference any memory if the array is empty. */ | ||
30 | return NULL; | ||
31 | } | ||
32 | |||
21 | /* Get an aligned pointer. */ | 33 | /* Get an aligned pointer. */ |
22 | const uintptr_t s_int = (uintptr_t) s; | 34 | s_int = (uintptr_t) s; |
23 | const uint32_t *p = (const uint32_t *)(s_int & -4); | 35 | p = (const uint32_t *)(s_int & -4); |
24 | 36 | ||
25 | /* Create four copies of the byte for which we are looking. */ | 37 | /* Create four copies of the byte for which we are looking. */ |
26 | const uint32_t goal = 0x01010101 * (uint8_t) c; | 38 | goal = 0x01010101 * (uint8_t) c; |
27 | 39 | ||
28 | /* Read the first word, but munge it so that bytes before the array | 40 | /* Read the first word, but munge it so that bytes before the array |
29 | * will not match goal. | 41 | * will not match goal. |
@@ -31,23 +43,14 @@ void *memchr(const void *s, int c, size_t n) | |||
31 | * Note that this shift count expression works because we know | 43 | * Note that this shift count expression works because we know |
32 | * shift counts are taken mod 32. | 44 | * shift counts are taken mod 32. |
33 | */ | 45 | */ |
34 | const uint32_t before_mask = (1 << (s_int << 3)) - 1; | 46 | before_mask = (1 << (s_int << 3)) - 1; |
35 | uint32_t v = (*p | before_mask) ^ (goal & before_mask); | 47 | v = (*p | before_mask) ^ (goal & before_mask); |
36 | 48 | ||
37 | /* Compute the address of the last byte. */ | 49 | /* Compute the address of the last byte. */ |
38 | const char *const last_byte_ptr = (const char *)s + n - 1; | 50 | last_byte_ptr = (const char *)s + n - 1; |
39 | 51 | ||
40 | /* Compute the address of the word containing the last byte. */ | 52 | /* Compute the address of the word containing the last byte. */ |
41 | const uint32_t *const last_word_ptr = | 53 | last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4); |
42 | (const uint32_t *)((uintptr_t) last_byte_ptr & -4); | ||
43 | |||
44 | uint32_t bits; | ||
45 | char *ret; | ||
46 | |||
47 | if (__builtin_expect(n == 0, 0)) { | ||
48 | /* Don't dereference any memory if the array is empty. */ | ||
49 | return NULL; | ||
50 | } | ||
51 | 54 | ||
52 | while ((bits = __insn_seqb(v, goal)) == 0) { | 55 | while ((bits = __insn_seqb(v, goal)) == 0) { |
53 | if (__builtin_expect(p == last_word_ptr, 0)) { | 56 | if (__builtin_expect(p == last_word_ptr, 0)) { |
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 485e24d62c6b..5cd1c4004eca 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c | |||
@@ -167,23 +167,30 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
167 | * when we compare them. | 167 | * when we compare them. |
168 | */ | 168 | */ |
169 | u32 my_ticket_; | 169 | u32 my_ticket_; |
170 | u32 iterations = 0; | ||
170 | 171 | ||
171 | /* Take out the next ticket; this will also stop would-be readers. */ | 172 | /* |
172 | if (val & 1) | 173 | * Wait until there are no readers, then bump up the next |
173 | val = get_rwlock(rwlock); | 174 | * field and capture the ticket value. |
174 | rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); | 175 | */ |
176 | for (;;) { | ||
177 | if (!(val & 1)) { | ||
178 | if ((val >> RD_COUNT_SHIFT) == 0) | ||
179 | break; | ||
180 | rwlock->lock = val; | ||
181 | } | ||
182 | delay_backoff(iterations++); | ||
183 | val = __insn_tns((int *)&rwlock->lock); | ||
184 | } | ||
175 | 185 | ||
176 | /* Extract my ticket value from the original word. */ | 186 | /* Take out the next ticket and extract my ticket value. */ |
187 | rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); | ||
177 | my_ticket_ = val >> WR_NEXT_SHIFT; | 188 | my_ticket_ = val >> WR_NEXT_SHIFT; |
178 | 189 | ||
179 | /* | 190 | /* Wait until the "current" field matches our ticket. */ |
180 | * Wait until the "current" field matches our ticket, and | ||
181 | * there are no remaining readers. | ||
182 | */ | ||
183 | for (;;) { | 191 | for (;;) { |
184 | u32 curr_ = val >> WR_CURR_SHIFT; | 192 | u32 curr_ = val >> WR_CURR_SHIFT; |
185 | u32 readers = val >> RD_COUNT_SHIFT; | 193 | u32 delta = ((my_ticket_ - curr_) & WR_MASK); |
186 | u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers; | ||
187 | if (likely(delta == 0)) | 194 | if (likely(delta == 0)) |
188 | break; | 195 | break; |
189 | 196 | ||
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index f295b4ac941d..dcebfc831cd6 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/mman.h> | 24 | #include <linux/mman.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/smp.h> | 26 | #include <linux/smp.h> |
27 | #include <linux/smp_lock.h> | ||
28 | #include <linux/interrupt.h> | 27 | #include <linux/interrupt.h> |
29 | #include <linux/init.h> | 28 | #include <linux/init.h> |
30 | #include <linux/tty.h> | 29 | #include <linux/tty.h> |
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 24688b697a8d..201a582c4137 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/hugetlb.h> | 22 | #include <linux/hugetlb.h> |
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/smp_lock.h> | ||
25 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
26 | #include <linux/err.h> | 25 | #include <linux/err.h> |
27 | #include <linux/sysctl.h> | 26 | #include <linux/sysctl.h> |
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 7f7338c90784..1664cce7b0ac 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c | |||
@@ -727,6 +727,9 @@ struct winch { | |||
727 | 727 | ||
728 | static void free_winch(struct winch *winch, int free_irq_ok) | 728 | static void free_winch(struct winch *winch, int free_irq_ok) |
729 | { | 729 | { |
730 | if (free_irq_ok) | ||
731 | free_irq(WINCH_IRQ, winch); | ||
732 | |||
730 | list_del(&winch->list); | 733 | list_del(&winch->list); |
731 | 734 | ||
732 | if (winch->pid != -1) | 735 | if (winch->pid != -1) |
@@ -735,8 +738,6 @@ static void free_winch(struct winch *winch, int free_irq_ok) | |||
735 | os_close_file(winch->fd); | 738 | os_close_file(winch->fd); |
736 | if (winch->stack != 0) | 739 | if (winch->stack != 0) |
737 | free_stack(winch->stack, 0); | 740 | free_stack(winch->stack, 0); |
738 | if (free_irq_ok) | ||
739 | free_irq(WINCH_IRQ, winch); | ||
740 | kfree(winch); | 741 | kfree(winch); |
741 | } | 742 | } |
742 | 743 | ||
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 340268be00b5..09bd7b585726 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c | |||
@@ -5,7 +5,6 @@ | |||
5 | 5 | ||
6 | #include "linux/stddef.h" | 6 | #include "linux/stddef.h" |
7 | #include "linux/fs.h" | 7 | #include "linux/fs.h" |
8 | #include "linux/smp_lock.h" | ||
9 | #include "linux/ptrace.h" | 8 | #include "linux/ptrace.h" |
10 | #include "linux/sched.h" | 9 | #include "linux/sched.h" |
11 | #include "linux/slab.h" | 10 | #include "linux/slab.h" |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 849813f398e7..5852519b2d0f 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/syscalls.h> | 28 | #include <linux/syscalls.h> |
29 | #include <linux/times.h> | 29 | #include <linux/times.h> |
30 | #include <linux/utsname.h> | 30 | #include <linux/utsname.h> |
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/mm.h> | 31 | #include <linux/mm.h> |
33 | #include <linux/uio.h> | 32 | #include <linux/uio.h> |
34 | #include <linux/poll.h> | 33 | #include <linux/poll.h> |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 4d293dced62f..9479a037419f 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -216,8 +216,8 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) | |||
216 | } | 216 | } |
217 | 217 | ||
218 | /* Return an pointer with offset calculated */ | 218 | /* Return an pointer with offset calculated */ |
219 | static inline unsigned long __set_fixmap_offset(enum fixed_addresses idx, | 219 | static __always_inline unsigned long |
220 | phys_addr_t phys, pgprot_t flags) | 220 | __set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) |
221 | { | 221 | { |
222 | __set_fixmap(idx, phys, flags); | 222 | __set_fixmap(idx, phys, flags); |
223 | return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); | 223 | return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); |
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index e8506c1f0c55..1c10c88ee4e1 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h | |||
@@ -61,9 +61,9 @@ DEFINE_GUEST_HANDLE(void); | |||
61 | #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) | 61 | #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) |
62 | #endif | 62 | #endif |
63 | 63 | ||
64 | #ifndef machine_to_phys_mapping | 64 | #define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) |
65 | #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) | 65 | #define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) |
66 | #endif | 66 | #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) |
67 | 67 | ||
68 | /* Maximum number of virtual CPUs in multi-processor guests. */ | 68 | /* Maximum number of virtual CPUs in multi-processor guests. */ |
69 | #define MAX_VIRT_CPUS 32 | 69 | #define MAX_VIRT_CPUS 32 |
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h index 42a7e004ae5c..8413688b2571 100644 --- a/arch/x86/include/asm/xen/interface_32.h +++ b/arch/x86/include/asm/xen/interface_32.h | |||
@@ -32,6 +32,11 @@ | |||
32 | /* And the trap vector is... */ | 32 | /* And the trap vector is... */ |
33 | #define TRAP_INSTR "int $0x82" | 33 | #define TRAP_INSTR "int $0x82" |
34 | 34 | ||
35 | #define __MACH2PHYS_VIRT_START 0xF5800000 | ||
36 | #define __MACH2PHYS_VIRT_END 0xF6800000 | ||
37 | |||
38 | #define __MACH2PHYS_SHIFT 2 | ||
39 | |||
35 | /* | 40 | /* |
36 | * Virtual addresses beyond this are not modifiable by guest OSes. The | 41 | * Virtual addresses beyond this are not modifiable by guest OSes. The |
37 | * machine->physical mapping table starts at this address, read-only. | 42 | * machine->physical mapping table starts at this address, read-only. |
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h index 100d2662b97c..839a4811cf98 100644 --- a/arch/x86/include/asm/xen/interface_64.h +++ b/arch/x86/include/asm/xen/interface_64.h | |||
@@ -39,18 +39,7 @@ | |||
39 | #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 | 39 | #define __HYPERVISOR_VIRT_END 0xFFFF880000000000 |
40 | #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 | 40 | #define __MACH2PHYS_VIRT_START 0xFFFF800000000000 |
41 | #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 | 41 | #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 |
42 | 42 | #define __MACH2PHYS_SHIFT 3 | |
43 | #ifndef HYPERVISOR_VIRT_START | ||
44 | #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) | ||
45 | #define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) | ||
46 | #endif | ||
47 | |||
48 | #define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) | ||
49 | #define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) | ||
50 | #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) | ||
51 | #ifndef machine_to_phys_mapping | ||
52 | #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) | ||
53 | #endif | ||
54 | 43 | ||
55 | /* | 44 | /* |
56 | * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) | 45 | * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index dd8c1414b3d5..8760cc60a21c 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
7 | #include <linux/pfn.h> | 7 | #include <linux/pfn.h> |
8 | #include <linux/mm.h> | ||
8 | 9 | ||
9 | #include <asm/uaccess.h> | 10 | #include <asm/uaccess.h> |
10 | #include <asm/page.h> | 11 | #include <asm/page.h> |
@@ -35,6 +36,8 @@ typedef struct xpaddr { | |||
35 | #define MAX_DOMAIN_PAGES \ | 36 | #define MAX_DOMAIN_PAGES \ |
36 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) | 37 | ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) |
37 | 38 | ||
39 | extern unsigned long *machine_to_phys_mapping; | ||
40 | extern unsigned int machine_to_phys_order; | ||
38 | 41 | ||
39 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 42 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
40 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
@@ -69,10 +72,8 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
69 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 72 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
70 | return mfn; | 73 | return mfn; |
71 | 74 | ||
72 | #if 0 | ||
73 | if (unlikely((mfn >> machine_to_phys_order) != 0)) | 75 | if (unlikely((mfn >> machine_to_phys_order) != 0)) |
74 | return max_mapnr; | 76 | return ~0; |
75 | #endif | ||
76 | 77 | ||
77 | pfn = 0; | 78 | pfn = 0; |
78 | /* | 79 | /* |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 1b7b31ab7d86..212a6a42527c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/init.h> | 33 | #include <linux/init.h> |
34 | #include <linux/poll.h> | 34 | #include <linux/poll.h> |
35 | #include <linux/smp.h> | 35 | #include <linux/smp.h> |
36 | #include <linux/smp_lock.h> | ||
37 | #include <linux/major.h> | 36 | #include <linux/major.h> |
38 | #include <linux/fs.h> | 37 | #include <linux/fs.h> |
39 | #include <linux/device.h> | 38 | #include <linux/device.h> |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index ec592caac4b4..cd21b654dec6 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -315,14 +315,18 @@ static void kgdb_remove_all_hw_break(void) | |||
315 | if (!breakinfo[i].enabled) | 315 | if (!breakinfo[i].enabled) |
316 | continue; | 316 | continue; |
317 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | 317 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); |
318 | if (bp->attr.disabled == 1) | 318 | if (!bp->attr.disabled) { |
319 | arch_uninstall_hw_breakpoint(bp); | ||
320 | bp->attr.disabled = 1; | ||
319 | continue; | 321 | continue; |
322 | } | ||
320 | if (dbg_is_early) | 323 | if (dbg_is_early) |
321 | early_dr7 &= ~encode_dr7(i, breakinfo[i].len, | 324 | early_dr7 &= ~encode_dr7(i, breakinfo[i].len, |
322 | breakinfo[i].type); | 325 | breakinfo[i].type); |
323 | else | 326 | else if (hw_break_release_slot(i)) |
324 | arch_uninstall_hw_breakpoint(bp); | 327 | printk(KERN_ERR "KGDB: hw bpt remove failed %lx\n", |
325 | bp->attr.disabled = 1; | 328 | breakinfo[i].addr); |
329 | breakinfo[i].enabled = 0; | ||
326 | } | 330 | } |
327 | } | 331 | } |
328 | 332 | ||
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 7bf2dc4c8f70..12fcbe2c143e 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <linux/poll.h> | 31 | #include <linux/poll.h> |
32 | #include <linux/smp.h> | 32 | #include <linux/smp.h> |
33 | #include <linux/smp_lock.h> | ||
34 | #include <linux/major.h> | 33 | #include <linux/major.h> |
35 | #include <linux/fs.h> | 34 | #include <linux/fs.h> |
36 | #include <linux/device.h> | 35 | #include <linux/device.h> |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 82e144a4e514..1ca12298ffc7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3395,6 +3395,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3395 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 3395 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
3396 | 3396 | ||
3397 | load_host_msrs(vcpu); | 3397 | load_host_msrs(vcpu); |
3398 | kvm_load_ldt(ldt_selector); | ||
3398 | loadsegment(fs, fs_selector); | 3399 | loadsegment(fs, fs_selector); |
3399 | #ifdef CONFIG_X86_64 | 3400 | #ifdef CONFIG_X86_64 |
3400 | load_gs_index(gs_selector); | 3401 | load_gs_index(gs_selector); |
@@ -3402,7 +3403,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3402 | #else | 3403 | #else |
3403 | loadsegment(gs, gs_selector); | 3404 | loadsegment(gs, gs_selector); |
3404 | #endif | 3405 | #endif |
3405 | kvm_load_ldt(ldt_selector); | ||
3406 | 3406 | ||
3407 | reload_tss(vcpu); | 3407 | reload_tss(vcpu); |
3408 | 3408 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8da0e45ff7c9..ff21fdda0c53 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -821,10 +821,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
821 | #endif | 821 | #endif |
822 | 822 | ||
823 | #ifdef CONFIG_X86_64 | 823 | #ifdef CONFIG_X86_64 |
824 | if (is_long_mode(&vmx->vcpu)) { | 824 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
825 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 825 | if (is_long_mode(&vmx->vcpu)) |
826 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 826 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
827 | } | ||
828 | #endif | 827 | #endif |
829 | for (i = 0; i < vmx->save_nmsrs; ++i) | 828 | for (i = 0; i < vmx->save_nmsrs; ++i) |
830 | kvm_set_shared_msr(vmx->guest_msrs[i].index, | 829 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
@@ -839,23 +838,23 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
839 | 838 | ||
840 | ++vmx->vcpu.stat.host_state_reload; | 839 | ++vmx->vcpu.stat.host_state_reload; |
841 | vmx->host_state.loaded = 0; | 840 | vmx->host_state.loaded = 0; |
842 | if (vmx->host_state.fs_reload_needed) | 841 | #ifdef CONFIG_X86_64 |
843 | loadsegment(fs, vmx->host_state.fs_sel); | 842 | if (is_long_mode(&vmx->vcpu)) |
843 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | ||
844 | #endif | ||
844 | if (vmx->host_state.gs_ldt_reload_needed) { | 845 | if (vmx->host_state.gs_ldt_reload_needed) { |
845 | kvm_load_ldt(vmx->host_state.ldt_sel); | 846 | kvm_load_ldt(vmx->host_state.ldt_sel); |
846 | #ifdef CONFIG_X86_64 | 847 | #ifdef CONFIG_X86_64 |
847 | load_gs_index(vmx->host_state.gs_sel); | 848 | load_gs_index(vmx->host_state.gs_sel); |
848 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | ||
849 | #else | 849 | #else |
850 | loadsegment(gs, vmx->host_state.gs_sel); | 850 | loadsegment(gs, vmx->host_state.gs_sel); |
851 | #endif | 851 | #endif |
852 | } | 852 | } |
853 | if (vmx->host_state.fs_reload_needed) | ||
854 | loadsegment(fs, vmx->host_state.fs_sel); | ||
853 | reload_tss(); | 855 | reload_tss(); |
854 | #ifdef CONFIG_X86_64 | 856 | #ifdef CONFIG_X86_64 |
855 | if (is_long_mode(&vmx->vcpu)) { | 857 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
856 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | ||
857 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
858 | } | ||
859 | #endif | 858 | #endif |
860 | if (current_thread_info()->status & TS_USEDFPU) | 859 | if (current_thread_info()->status & TS_USEDFPU) |
861 | clts(); | 860 | clts(); |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 15466c096ba5..0972315c3860 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -138,7 +138,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
138 | struct acpi_resource_address64 addr; | 138 | struct acpi_resource_address64 addr; |
139 | acpi_status status; | 139 | acpi_status status; |
140 | unsigned long flags; | 140 | unsigned long flags; |
141 | struct resource *root, *conflict; | ||
142 | u64 start, end; | 141 | u64 start, end; |
143 | 142 | ||
144 | status = resource_to_addr(acpi_res, &addr); | 143 | status = resource_to_addr(acpi_res, &addr); |
@@ -146,12 +145,10 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
146 | return AE_OK; | 145 | return AE_OK; |
147 | 146 | ||
148 | if (addr.resource_type == ACPI_MEMORY_RANGE) { | 147 | if (addr.resource_type == ACPI_MEMORY_RANGE) { |
149 | root = &iomem_resource; | ||
150 | flags = IORESOURCE_MEM; | 148 | flags = IORESOURCE_MEM; |
151 | if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY) | 149 | if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY) |
152 | flags |= IORESOURCE_PREFETCH; | 150 | flags |= IORESOURCE_PREFETCH; |
153 | } else if (addr.resource_type == ACPI_IO_RANGE) { | 151 | } else if (addr.resource_type == ACPI_IO_RANGE) { |
154 | root = &ioport_resource; | ||
155 | flags = IORESOURCE_IO; | 152 | flags = IORESOURCE_IO; |
156 | } else | 153 | } else |
157 | return AE_OK; | 154 | return AE_OK; |
@@ -172,25 +169,90 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
172 | return AE_OK; | 169 | return AE_OK; |
173 | } | 170 | } |
174 | 171 | ||
175 | conflict = insert_resource_conflict(root, res); | 172 | info->res_num++; |
176 | if (conflict) { | 173 | if (addr.translation_offset) |
177 | dev_err(&info->bridge->dev, | 174 | dev_info(&info->bridge->dev, "host bridge window %pR " |
178 | "address space collision: host bridge window %pR " | 175 | "(PCI address [%#llx-%#llx])\n", |
179 | "conflicts with %s %pR\n", | 176 | res, res->start - addr.translation_offset, |
180 | res, conflict->name, conflict); | 177 | res->end - addr.translation_offset); |
181 | } else { | 178 | else |
182 | pci_bus_add_resource(info->bus, res, 0); | 179 | dev_info(&info->bridge->dev, "host bridge window %pR\n", res); |
183 | info->res_num++; | 180 | |
184 | if (addr.translation_offset) | 181 | return AE_OK; |
185 | dev_info(&info->bridge->dev, "host bridge window %pR " | 182 | } |
186 | "(PCI address [%#llx-%#llx])\n", | 183 | |
187 | res, res->start - addr.translation_offset, | 184 | static bool resource_contains(struct resource *res, resource_size_t point) |
188 | res->end - addr.translation_offset); | 185 | { |
186 | if (res->start <= point && point <= res->end) | ||
187 | return true; | ||
188 | return false; | ||
189 | } | ||
190 | |||
191 | static void coalesce_windows(struct pci_root_info *info, int type) | ||
192 | { | ||
193 | int i, j; | ||
194 | struct resource *res1, *res2; | ||
195 | |||
196 | for (i = 0; i < info->res_num; i++) { | ||
197 | res1 = &info->res[i]; | ||
198 | if (!(res1->flags & type)) | ||
199 | continue; | ||
200 | |||
201 | for (j = i + 1; j < info->res_num; j++) { | ||
202 | res2 = &info->res[j]; | ||
203 | if (!(res2->flags & type)) | ||
204 | continue; | ||
205 | |||
206 | /* | ||
207 | * I don't like throwing away windows because then | ||
208 | * our resources no longer match the ACPI _CRS, but | ||
209 | * the kernel resource tree doesn't allow overlaps. | ||
210 | */ | ||
211 | if (resource_contains(res1, res2->start) || | ||
212 | resource_contains(res1, res2->end) || | ||
213 | resource_contains(res2, res1->start) || | ||
214 | resource_contains(res2, res1->end)) { | ||
215 | res1->start = min(res1->start, res2->start); | ||
216 | res1->end = max(res1->end, res2->end); | ||
217 | dev_info(&info->bridge->dev, | ||
218 | "host bridge window expanded to %pR; %pR ignored\n", | ||
219 | res1, res2); | ||
220 | res2->flags = 0; | ||
221 | } | ||
222 | } | ||
223 | } | ||
224 | } | ||
225 | |||
226 | static void add_resources(struct pci_root_info *info) | ||
227 | { | ||
228 | int i; | ||
229 | struct resource *res, *root, *conflict; | ||
230 | |||
231 | if (!pci_use_crs) | ||
232 | return; | ||
233 | |||
234 | coalesce_windows(info, IORESOURCE_MEM); | ||
235 | coalesce_windows(info, IORESOURCE_IO); | ||
236 | |||
237 | for (i = 0; i < info->res_num; i++) { | ||
238 | res = &info->res[i]; | ||
239 | |||
240 | if (res->flags & IORESOURCE_MEM) | ||
241 | root = &iomem_resource; | ||
242 | else if (res->flags & IORESOURCE_IO) | ||
243 | root = &ioport_resource; | ||
189 | else | 244 | else |
190 | dev_info(&info->bridge->dev, | 245 | continue; |
191 | "host bridge window %pR\n", res); | 246 | |
247 | conflict = insert_resource_conflict(root, res); | ||
248 | if (conflict) | ||
249 | dev_err(&info->bridge->dev, | ||
250 | "address space collision: host bridge window %pR " | ||
251 | "conflicts with %s %pR\n", | ||
252 | res, conflict->name, conflict); | ||
253 | else | ||
254 | pci_bus_add_resource(info->bus, res, 0); | ||
192 | } | 255 | } |
193 | return AE_OK; | ||
194 | } | 256 | } |
195 | 257 | ||
196 | static void | 258 | static void |
@@ -224,6 +286,7 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
224 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, | 286 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, |
225 | &info); | 287 | &info); |
226 | 288 | ||
289 | add_resources(&info); | ||
227 | return; | 290 | return; |
228 | 291 | ||
229 | name_alloc_fail: | 292 | name_alloc_fail: |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 235c0f4d3861..02c710bebf7a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -75,6 +75,11 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | |||
75 | enum xen_domain_type xen_domain_type = XEN_NATIVE; | 75 | enum xen_domain_type xen_domain_type = XEN_NATIVE; |
76 | EXPORT_SYMBOL_GPL(xen_domain_type); | 76 | EXPORT_SYMBOL_GPL(xen_domain_type); |
77 | 77 | ||
78 | unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; | ||
79 | EXPORT_SYMBOL(machine_to_phys_mapping); | ||
80 | unsigned int machine_to_phys_order; | ||
81 | EXPORT_SYMBOL(machine_to_phys_order); | ||
82 | |||
78 | struct start_info *xen_start_info; | 83 | struct start_info *xen_start_info; |
79 | EXPORT_SYMBOL_GPL(xen_start_info); | 84 | EXPORT_SYMBOL_GPL(xen_start_info); |
80 | 85 | ||
@@ -1090,6 +1095,8 @@ static void __init xen_setup_stackprotector(void) | |||
1090 | /* First C function to be called on Xen boot */ | 1095 | /* First C function to be called on Xen boot */ |
1091 | asmlinkage void __init xen_start_kernel(void) | 1096 | asmlinkage void __init xen_start_kernel(void) |
1092 | { | 1097 | { |
1098 | struct physdev_set_iopl set_iopl; | ||
1099 | int rc; | ||
1093 | pgd_t *pgd; | 1100 | pgd_t *pgd; |
1094 | 1101 | ||
1095 | if (!xen_start_info) | 1102 | if (!xen_start_info) |
@@ -1097,6 +1104,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1097 | 1104 | ||
1098 | xen_domain_type = XEN_PV_DOMAIN; | 1105 | xen_domain_type = XEN_PV_DOMAIN; |
1099 | 1106 | ||
1107 | xen_setup_machphys_mapping(); | ||
1108 | |||
1100 | /* Install Xen paravirt ops */ | 1109 | /* Install Xen paravirt ops */ |
1101 | pv_info = xen_info; | 1110 | pv_info = xen_info; |
1102 | pv_init_ops = xen_init_ops; | 1111 | pv_init_ops = xen_init_ops; |
@@ -1191,8 +1200,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1191 | /* Allocate and initialize top and mid mfn levels for p2m structure */ | 1200 | /* Allocate and initialize top and mid mfn levels for p2m structure */ |
1192 | xen_build_mfn_list_list(); | 1201 | xen_build_mfn_list_list(); |
1193 | 1202 | ||
1194 | init_mm.pgd = pgd; | ||
1195 | |||
1196 | /* keep using Xen gdt for now; no urgent need to change it */ | 1203 | /* keep using Xen gdt for now; no urgent need to change it */ |
1197 | 1204 | ||
1198 | #ifdef CONFIG_X86_32 | 1205 | #ifdef CONFIG_X86_32 |
@@ -1202,10 +1209,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1202 | #else | 1209 | #else |
1203 | pv_info.kernel_rpl = 0; | 1210 | pv_info.kernel_rpl = 0; |
1204 | #endif | 1211 | #endif |
1205 | |||
1206 | /* set the limit of our address space */ | 1212 | /* set the limit of our address space */ |
1207 | xen_reserve_top(); | 1213 | xen_reserve_top(); |
1208 | 1214 | ||
1215 | /* We used to do this in xen_arch_setup, but that is too late on AMD | ||
1216 | * were early_cpu_init (run before ->arch_setup()) calls early_amd_init | ||
1217 | * which pokes 0xcf8 port. | ||
1218 | */ | ||
1219 | set_iopl.iopl = 1; | ||
1220 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
1221 | if (rc != 0) | ||
1222 | xen_raw_printk("physdev_op failed %d\n", rc); | ||
1223 | |||
1209 | #ifdef CONFIG_X86_32 | 1224 | #ifdef CONFIG_X86_32 |
1210 | /* set up basic CPUID stuff */ | 1225 | /* set up basic CPUID stuff */ |
1211 | cpu_detect(&new_cpu_data); | 1226 | cpu_detect(&new_cpu_data); |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 21ed8d7f75a5..a1feff9e59b6 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -2034,6 +2034,20 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
2034 | set_page_prot(pmd, PAGE_KERNEL_RO); | 2034 | set_page_prot(pmd, PAGE_KERNEL_RO); |
2035 | } | 2035 | } |
2036 | 2036 | ||
2037 | void __init xen_setup_machphys_mapping(void) | ||
2038 | { | ||
2039 | struct xen_machphys_mapping mapping; | ||
2040 | unsigned long machine_to_phys_nr_ents; | ||
2041 | |||
2042 | if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { | ||
2043 | machine_to_phys_mapping = (unsigned long *)mapping.v_start; | ||
2044 | machine_to_phys_nr_ents = mapping.max_mfn + 1; | ||
2045 | } else { | ||
2046 | machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; | ||
2047 | } | ||
2048 | machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); | ||
2049 | } | ||
2050 | |||
2037 | #ifdef CONFIG_X86_64 | 2051 | #ifdef CONFIG_X86_64 |
2038 | static void convert_pfn_mfn(void *v) | 2052 | static void convert_pfn_mfn(void *v) |
2039 | { | 2053 | { |
@@ -2119,44 +2133,83 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
2119 | return pgd; | 2133 | return pgd; |
2120 | } | 2134 | } |
2121 | #else /* !CONFIG_X86_64 */ | 2135 | #else /* !CONFIG_X86_64 */ |
2122 | static RESERVE_BRK_ARRAY(pmd_t, level2_kernel_pgt, PTRS_PER_PMD); | 2136 | static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); |
2137 | static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD); | ||
2138 | |||
2139 | static __init void xen_write_cr3_init(unsigned long cr3) | ||
2140 | { | ||
2141 | unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir)); | ||
2142 | |||
2143 | BUG_ON(read_cr3() != __pa(initial_page_table)); | ||
2144 | BUG_ON(cr3 != __pa(swapper_pg_dir)); | ||
2145 | |||
2146 | /* | ||
2147 | * We are switching to swapper_pg_dir for the first time (from | ||
2148 | * initial_page_table) and therefore need to mark that page | ||
2149 | * read-only and then pin it. | ||
2150 | * | ||
2151 | * Xen disallows sharing of kernel PMDs for PAE | ||
2152 | * guests. Therefore we must copy the kernel PMD from | ||
2153 | * initial_page_table into a new kernel PMD to be used in | ||
2154 | * swapper_pg_dir. | ||
2155 | */ | ||
2156 | swapper_kernel_pmd = | ||
2157 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | ||
2158 | memcpy(swapper_kernel_pmd, initial_kernel_pmd, | ||
2159 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
2160 | swapper_pg_dir[KERNEL_PGD_BOUNDARY] = | ||
2161 | __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); | ||
2162 | set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); | ||
2163 | |||
2164 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | ||
2165 | xen_write_cr3(cr3); | ||
2166 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn); | ||
2167 | |||
2168 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, | ||
2169 | PFN_DOWN(__pa(initial_page_table))); | ||
2170 | set_page_prot(initial_page_table, PAGE_KERNEL); | ||
2171 | set_page_prot(initial_kernel_pmd, PAGE_KERNEL); | ||
2172 | |||
2173 | pv_mmu_ops.write_cr3 = &xen_write_cr3; | ||
2174 | } | ||
2123 | 2175 | ||
2124 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | 2176 | __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
2125 | unsigned long max_pfn) | 2177 | unsigned long max_pfn) |
2126 | { | 2178 | { |
2127 | pmd_t *kernel_pmd; | 2179 | pmd_t *kernel_pmd; |
2128 | 2180 | ||
2129 | level2_kernel_pgt = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | 2181 | initial_kernel_pmd = |
2182 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | ||
2130 | 2183 | ||
2131 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + | 2184 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + |
2132 | xen_start_info->nr_pt_frames * PAGE_SIZE + | 2185 | xen_start_info->nr_pt_frames * PAGE_SIZE + |
2133 | 512*1024); | 2186 | 512*1024); |
2134 | 2187 | ||
2135 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 2188 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
2136 | memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | 2189 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); |
2137 | 2190 | ||
2138 | xen_map_identity_early(level2_kernel_pgt, max_pfn); | 2191 | xen_map_identity_early(initial_kernel_pmd, max_pfn); |
2139 | 2192 | ||
2140 | memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | 2193 | memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); |
2141 | set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], | 2194 | initial_page_table[KERNEL_PGD_BOUNDARY] = |
2142 | __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); | 2195 | __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); |
2143 | 2196 | ||
2144 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 2197 | set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO); |
2145 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | 2198 | set_page_prot(initial_page_table, PAGE_KERNEL_RO); |
2146 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); | 2199 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); |
2147 | 2200 | ||
2148 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 2201 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
2149 | 2202 | ||
2150 | xen_write_cr3(__pa(swapper_pg_dir)); | 2203 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, |
2151 | 2204 | PFN_DOWN(__pa(initial_page_table))); | |
2152 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | 2205 | xen_write_cr3(__pa(initial_page_table)); |
2153 | 2206 | ||
2154 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), | 2207 | memblock_x86_reserve_range(__pa(xen_start_info->pt_base), |
2155 | __pa(xen_start_info->pt_base + | 2208 | __pa(xen_start_info->pt_base + |
2156 | xen_start_info->nr_pt_frames * PAGE_SIZE), | 2209 | xen_start_info->nr_pt_frames * PAGE_SIZE), |
2157 | "XEN PAGETABLES"); | 2210 | "XEN PAGETABLES"); |
2158 | 2211 | ||
2159 | return swapper_pg_dir; | 2212 | return initial_page_table; |
2160 | } | 2213 | } |
2161 | #endif /* CONFIG_X86_64 */ | 2214 | #endif /* CONFIG_X86_64 */ |
2162 | 2215 | ||
@@ -2290,7 +2343,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
2290 | .write_cr2 = xen_write_cr2, | 2343 | .write_cr2 = xen_write_cr2, |
2291 | 2344 | ||
2292 | .read_cr3 = xen_read_cr3, | 2345 | .read_cr3 = xen_read_cr3, |
2346 | #ifdef CONFIG_X86_32 | ||
2347 | .write_cr3 = xen_write_cr3_init, | ||
2348 | #else | ||
2293 | .write_cr3 = xen_write_cr3, | 2349 | .write_cr3 = xen_write_cr3, |
2350 | #endif | ||
2294 | 2351 | ||
2295 | .flush_tlb_user = xen_flush_tlb, | 2352 | .flush_tlb_user = xen_flush_tlb, |
2296 | .flush_tlb_kernel = xen_flush_tlb, | 2353 | .flush_tlb_kernel = xen_flush_tlb, |
@@ -2627,7 +2684,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, | |||
2627 | 2684 | ||
2628 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); | 2685 | prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); |
2629 | 2686 | ||
2630 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; | 2687 | BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == |
2688 | (VM_PFNMAP | VM_RESERVED | VM_IO))); | ||
2631 | 2689 | ||
2632 | rmd.mfn = mfn; | 2690 | rmd.mfn = mfn; |
2633 | rmd.prot = prot; | 2691 | rmd.prot = prot; |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 769c4b01fa32..01afd8a94607 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <xen/interface/callback.h> | 23 | #include <xen/interface/callback.h> |
24 | #include <xen/interface/memory.h> | 24 | #include <xen/interface/memory.h> |
25 | #include <xen/interface/physdev.h> | 25 | #include <xen/interface/physdev.h> |
26 | #include <xen/interface/memory.h> | ||
27 | #include <xen/features.h> | 26 | #include <xen/features.h> |
28 | 27 | ||
29 | #include "xen-ops.h" | 28 | #include "xen-ops.h" |
@@ -248,8 +247,7 @@ char * __init xen_memory_setup(void) | |||
248 | else | 247 | else |
249 | extra_pages = 0; | 248 | extra_pages = 0; |
250 | 249 | ||
251 | if (!xen_initial_domain()) | 250 | xen_add_extra_mem(extra_pages); |
252 | xen_add_extra_mem(extra_pages); | ||
253 | 251 | ||
254 | return "Xen"; | 252 | return "Xen"; |
255 | } | 253 | } |
@@ -337,9 +335,6 @@ void __cpuinit xen_enable_syscall(void) | |||
337 | 335 | ||
338 | void __init xen_arch_setup(void) | 336 | void __init xen_arch_setup(void) |
339 | { | 337 | { |
340 | struct physdev_set_iopl set_iopl; | ||
341 | int rc; | ||
342 | |||
343 | xen_panic_handler_init(); | 338 | xen_panic_handler_init(); |
344 | 339 | ||
345 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | 340 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); |
@@ -356,11 +351,6 @@ void __init xen_arch_setup(void) | |||
356 | xen_enable_sysenter(); | 351 | xen_enable_sysenter(); |
357 | xen_enable_syscall(); | 352 | xen_enable_syscall(); |
358 | 353 | ||
359 | set_iopl.iopl = 1; | ||
360 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
361 | if (rc != 0) | ||
362 | printk(KERN_INFO "physdev_op failed %d\n", rc); | ||
363 | |||
364 | #ifdef CONFIG_ACPI | 354 | #ifdef CONFIG_ACPI |
365 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 355 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
366 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 356 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |