diff options
Diffstat (limited to 'arch/x86/kernel')
83 files changed, 3204 insertions, 1573 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d9770a56511a..c2ac1b4515a0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
58 | #ifdef CONFIG_X86_64 | 58 | #ifdef CONFIG_X86_64 |
59 | 59 | ||
60 | #include <asm/proto.h> | 60 | #include <asm/proto.h> |
61 | #include <asm/genapic.h> | ||
62 | 61 | ||
63 | #else /* X86 */ | 62 | #else /* X86 */ |
64 | 63 | ||
@@ -158,6 +157,16 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) | |||
158 | struct acpi_mcfg_allocation *pci_mmcfg_config; | 157 | struct acpi_mcfg_allocation *pci_mmcfg_config; |
159 | int pci_mmcfg_config_num; | 158 | int pci_mmcfg_config_num; |
160 | 159 | ||
160 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
161 | |||
162 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | ||
163 | { | ||
164 | if (!strcmp(mcfg->header.oem_id, "SGI")) | ||
165 | acpi_mcfg_64bit_base_addr = TRUE; | ||
166 | |||
167 | return 0; | ||
168 | } | ||
169 | |||
161 | int __init acpi_parse_mcfg(struct acpi_table_header *header) | 170 | int __init acpi_parse_mcfg(struct acpi_table_header *header) |
162 | { | 171 | { |
163 | struct acpi_table_mcfg *mcfg; | 172 | struct acpi_table_mcfg *mcfg; |
@@ -190,8 +199,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header) | |||
190 | } | 199 | } |
191 | 200 | ||
192 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); | 201 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); |
202 | |||
203 | acpi_mcfg_oem_check(mcfg); | ||
204 | |||
193 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | 205 | for (i = 0; i < pci_mmcfg_config_num; ++i) { |
194 | if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { | 206 | if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && |
207 | !acpi_mcfg_64bit_base_addr) { | ||
195 | printk(KERN_ERR PREFIX | 208 | printk(KERN_ERR PREFIX |
196 | "MMCONFIG not in low 4GB of memory\n"); | 209 | "MMCONFIG not in low 4GB of memory\n"); |
197 | kfree(pci_mmcfg_config); | 210 | kfree(pci_mmcfg_config); |
@@ -1589,6 +1602,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1589 | */ | 1602 | */ |
1590 | { | 1603 | { |
1591 | .callback = dmi_ignore_irq0_timer_override, | 1604 | .callback = dmi_ignore_irq0_timer_override, |
1605 | .ident = "HP nx6115 laptop", | ||
1606 | .matches = { | ||
1607 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | ||
1608 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), | ||
1609 | }, | ||
1610 | }, | ||
1611 | { | ||
1612 | .callback = dmi_ignore_irq0_timer_override, | ||
1592 | .ident = "HP NX6125 laptop", | 1613 | .ident = "HP NX6125 laptop", |
1593 | .matches = { | 1614 | .matches = { |
1594 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | 1615 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), |
@@ -1603,6 +1624,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1603 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), | 1624 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), |
1604 | }, | 1625 | }, |
1605 | }, | 1626 | }, |
1627 | { | ||
1628 | .callback = dmi_ignore_irq0_timer_override, | ||
1629 | .ident = "HP 6715b laptop", | ||
1630 | .matches = { | ||
1631 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | ||
1632 | DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), | ||
1633 | }, | ||
1634 | }, | ||
1606 | {} | 1635 | {} |
1607 | }; | 1636 | }; |
1608 | 1637 | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index fa2161d5003b..426e5d91b63a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags; | |||
20 | /* address in low memory of the wakeup routine. */ | 20 | /* address in low memory of the wakeup routine. */ |
21 | static unsigned long acpi_realmode; | 21 | static unsigned long acpi_realmode; |
22 | 22 | ||
23 | #ifdef CONFIG_64BIT | 23 | #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) |
24 | static char temp_stack[10240]; | 24 | static char temp_stack[10240]; |
25 | #endif | 25 | #endif |
26 | 26 | ||
@@ -86,7 +86,7 @@ int acpi_save_state_mem(void) | |||
86 | #endif /* !CONFIG_64BIT */ | 86 | #endif /* !CONFIG_64BIT */ |
87 | 87 | ||
88 | header->pmode_cr0 = read_cr0(); | 88 | header->pmode_cr0 = read_cr0(); |
89 | header->pmode_cr4 = read_cr4(); | 89 | header->pmode_cr4 = read_cr4_safe(); |
90 | header->realmode_flags = acpi_realmode_flags; | 90 | header->realmode_flags = acpi_realmode_flags; |
91 | header->real_magic = 0x12345678; | 91 | header->real_magic = 0x12345678; |
92 | 92 | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b553..fb04e49776ba 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | |||
145 | extern char __vsyscall_0; | 145 | extern char __vsyscall_0; |
146 | const unsigned char *const *find_nop_table(void) | 146 | const unsigned char *const *find_nop_table(void) |
147 | { | 147 | { |
148 | return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || | 148 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
149 | boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; | 149 | boot_cpu_has(X86_FEATURE_NOPL)) |
150 | return p6_nops; | ||
151 | else | ||
152 | return k8_nops; | ||
150 | } | 153 | } |
151 | 154 | ||
152 | #else /* CONFIG_X86_64 */ | 155 | #else /* CONFIG_X86_64 */ |
153 | 156 | ||
154 | static const struct nop { | ||
155 | int cpuid; | ||
156 | const unsigned char *const *noptable; | ||
157 | } noptypes[] = { | ||
158 | { X86_FEATURE_K8, k8_nops }, | ||
159 | { X86_FEATURE_K7, k7_nops }, | ||
160 | { X86_FEATURE_P4, p6_nops }, | ||
161 | { X86_FEATURE_P3, p6_nops }, | ||
162 | { -1, NULL } | ||
163 | }; | ||
164 | |||
165 | const unsigned char *const *find_nop_table(void) | 157 | const unsigned char *const *find_nop_table(void) |
166 | { | 158 | { |
167 | const unsigned char *const *noptable = intel_nops; | 159 | if (boot_cpu_has(X86_FEATURE_K8)) |
168 | int i; | 160 | return k8_nops; |
169 | 161 | else if (boot_cpu_has(X86_FEATURE_K7)) | |
170 | for (i = 0; noptypes[i].cpuid >= 0; i++) { | 162 | return k7_nops; |
171 | if (boot_cpu_has(noptypes[i].cpuid)) { | 163 | else if (boot_cpu_has(X86_FEATURE_NOPL)) |
172 | noptable = noptypes[i].noptable; | 164 | return p6_nops; |
173 | break; | 165 | else |
174 | } | 166 | return intel_nops; |
175 | } | ||
176 | return noptable; | ||
177 | } | 167 | } |
178 | 168 | ||
179 | #endif /* CONFIG_X86_64 */ | 169 | #endif /* CONFIG_X86_64 */ |
@@ -241,25 +231,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | |||
241 | continue; | 231 | continue; |
242 | if (*ptr > text_end) | 232 | if (*ptr > text_end) |
243 | continue; | 233 | continue; |
244 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ | 234 | /* turn DS segment override prefix into lock prefix */ |
235 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); | ||
245 | }; | 236 | }; |
246 | } | 237 | } |
247 | 238 | ||
248 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 239 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) |
249 | { | 240 | { |
250 | u8 **ptr; | 241 | u8 **ptr; |
251 | char insn[1]; | ||
252 | 242 | ||
253 | if (noreplace_smp) | 243 | if (noreplace_smp) |
254 | return; | 244 | return; |
255 | 245 | ||
256 | add_nops(insn, 1); | ||
257 | for (ptr = start; ptr < end; ptr++) { | 246 | for (ptr = start; ptr < end; ptr++) { |
258 | if (*ptr < text) | 247 | if (*ptr < text) |
259 | continue; | 248 | continue; |
260 | if (*ptr > text_end) | 249 | if (*ptr > text_end) |
261 | continue; | 250 | continue; |
262 | text_poke(*ptr, insn, 1); | 251 | /* turn lock prefix into DS segment override prefix */ |
252 | text_poke(*ptr, ((unsigned char []){0x3E}), 1); | ||
263 | }; | 253 | }; |
264 | } | 254 | } |
265 | 255 | ||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 22d7d050905d..34e4d112b1ef 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -33,6 +33,10 @@ | |||
33 | 33 | ||
34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
35 | 35 | ||
36 | /* A list of preallocated protection domains */ | ||
37 | static LIST_HEAD(iommu_pd_list); | ||
38 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | ||
39 | |||
36 | /* | 40 | /* |
37 | * general struct to manage commands send to an IOMMU | 41 | * general struct to manage commands send to an IOMMU |
38 | */ | 42 | */ |
@@ -51,6 +55,102 @@ static int iommu_has_npcache(struct amd_iommu *iommu) | |||
51 | 55 | ||
52 | /**************************************************************************** | 56 | /**************************************************************************** |
53 | * | 57 | * |
58 | * Interrupt handling functions | ||
59 | * | ||
60 | ****************************************************************************/ | ||
61 | |||
62 | static void iommu_print_event(void *__evt) | ||
63 | { | ||
64 | u32 *event = __evt; | ||
65 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | ||
66 | int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; | ||
67 | int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; | ||
68 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | ||
69 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | ||
70 | |||
71 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | ||
72 | |||
73 | switch (type) { | ||
74 | case EVENT_TYPE_ILL_DEV: | ||
75 | printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " | ||
76 | "address=0x%016llx flags=0x%04x]\n", | ||
77 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
78 | address, flags); | ||
79 | break; | ||
80 | case EVENT_TYPE_IO_FAULT: | ||
81 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | ||
82 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
83 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
84 | domid, address, flags); | ||
85 | break; | ||
86 | case EVENT_TYPE_DEV_TAB_ERR: | ||
87 | printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
88 | "address=0x%016llx flags=0x%04x]\n", | ||
89 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
90 | address, flags); | ||
91 | break; | ||
92 | case EVENT_TYPE_PAGE_TAB_ERR: | ||
93 | printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " | ||
94 | "domain=0x%04x address=0x%016llx flags=0x%04x]\n", | ||
95 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
96 | domid, address, flags); | ||
97 | break; | ||
98 | case EVENT_TYPE_ILL_CMD: | ||
99 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | ||
100 | break; | ||
101 | case EVENT_TYPE_CMD_HARD_ERR: | ||
102 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | ||
103 | "flags=0x%04x]\n", address, flags); | ||
104 | break; | ||
105 | case EVENT_TYPE_IOTLB_INV_TO: | ||
106 | printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " | ||
107 | "address=0x%016llx]\n", | ||
108 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
109 | address); | ||
110 | break; | ||
111 | case EVENT_TYPE_INV_DEV_REQ: | ||
112 | printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " | ||
113 | "address=0x%016llx flags=0x%04x]\n", | ||
114 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | ||
115 | address, flags); | ||
116 | break; | ||
117 | default: | ||
118 | printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void iommu_poll_events(struct amd_iommu *iommu) | ||
123 | { | ||
124 | u32 head, tail; | ||
125 | unsigned long flags; | ||
126 | |||
127 | spin_lock_irqsave(&iommu->lock, flags); | ||
128 | |||
129 | head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
130 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
131 | |||
132 | while (head != tail) { | ||
133 | iommu_print_event(iommu->evt_buf + head); | ||
134 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | ||
135 | } | ||
136 | |||
137 | writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
138 | |||
139 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
140 | } | ||
141 | |||
142 | irqreturn_t amd_iommu_int_handler(int irq, void *data) | ||
143 | { | ||
144 | struct amd_iommu *iommu; | ||
145 | |||
146 | list_for_each_entry(iommu, &amd_iommu_list, list) | ||
147 | iommu_poll_events(iommu); | ||
148 | |||
149 | return IRQ_HANDLED; | ||
150 | } | ||
151 | |||
152 | /**************************************************************************** | ||
153 | * | ||
54 | * IOMMU command queuing functions | 154 | * IOMMU command queuing functions |
55 | * | 155 | * |
56 | ****************************************************************************/ | 156 | ****************************************************************************/ |
@@ -65,7 +165,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
65 | u8 *target; | 165 | u8 *target; |
66 | 166 | ||
67 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 167 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
68 | target = (iommu->cmd_buf + tail); | 168 | target = iommu->cmd_buf + tail; |
69 | memcpy_toio(target, cmd, sizeof(*cmd)); | 169 | memcpy_toio(target, cmd, sizeof(*cmd)); |
70 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | 170 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; |
71 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | 171 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); |
@@ -101,32 +201,39 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
101 | */ | 201 | */ |
102 | static int iommu_completion_wait(struct amd_iommu *iommu) | 202 | static int iommu_completion_wait(struct amd_iommu *iommu) |
103 | { | 203 | { |
104 | int ret; | 204 | int ret = 0, ready = 0; |
205 | unsigned status = 0; | ||
105 | struct iommu_cmd cmd; | 206 | struct iommu_cmd cmd; |
106 | volatile u64 ready = 0; | 207 | unsigned long flags, i = 0; |
107 | unsigned long ready_phys = virt_to_phys(&ready); | ||
108 | unsigned long i = 0; | ||
109 | 208 | ||
110 | memset(&cmd, 0, sizeof(cmd)); | 209 | memset(&cmd, 0, sizeof(cmd)); |
111 | cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; | 210 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; |
112 | cmd.data[1] = upper_32_bits(ready_phys); | ||
113 | cmd.data[2] = 1; /* value written to 'ready' */ | ||
114 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | 211 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); |
115 | 212 | ||
116 | iommu->need_sync = 0; | 213 | iommu->need_sync = 0; |
117 | 214 | ||
118 | ret = iommu_queue_command(iommu, &cmd); | 215 | spin_lock_irqsave(&iommu->lock, flags); |
216 | |||
217 | ret = __iommu_queue_command(iommu, &cmd); | ||
119 | 218 | ||
120 | if (ret) | 219 | if (ret) |
121 | return ret; | 220 | goto out; |
122 | 221 | ||
123 | while (!ready && (i < EXIT_LOOP_COUNT)) { | 222 | while (!ready && (i < EXIT_LOOP_COUNT)) { |
124 | ++i; | 223 | ++i; |
125 | cpu_relax(); | 224 | /* wait for the bit to become one */ |
225 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
226 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | ||
126 | } | 227 | } |
127 | 228 | ||
229 | /* set bit back to zero */ | ||
230 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | ||
231 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
232 | |||
128 | if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) | 233 | if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) |
129 | printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); | 234 | printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); |
235 | out: | ||
236 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
130 | 237 | ||
131 | return 0; | 238 | return 0; |
132 | } | 239 | } |
@@ -137,6 +244,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) | |||
137 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | 244 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) |
138 | { | 245 | { |
139 | struct iommu_cmd cmd; | 246 | struct iommu_cmd cmd; |
247 | int ret; | ||
140 | 248 | ||
141 | BUG_ON(iommu == NULL); | 249 | BUG_ON(iommu == NULL); |
142 | 250 | ||
@@ -144,9 +252,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | |||
144 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | 252 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); |
145 | cmd.data[0] = devid; | 253 | cmd.data[0] = devid; |
146 | 254 | ||
255 | ret = iommu_queue_command(iommu, &cmd); | ||
256 | |||
147 | iommu->need_sync = 1; | 257 | iommu->need_sync = 1; |
148 | 258 | ||
149 | return iommu_queue_command(iommu, &cmd); | 259 | return ret; |
150 | } | 260 | } |
151 | 261 | ||
152 | /* | 262 | /* |
@@ -156,21 +266,24 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
156 | u64 address, u16 domid, int pde, int s) | 266 | u64 address, u16 domid, int pde, int s) |
157 | { | 267 | { |
158 | struct iommu_cmd cmd; | 268 | struct iommu_cmd cmd; |
269 | int ret; | ||
159 | 270 | ||
160 | memset(&cmd, 0, sizeof(cmd)); | 271 | memset(&cmd, 0, sizeof(cmd)); |
161 | address &= PAGE_MASK; | 272 | address &= PAGE_MASK; |
162 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); | 273 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); |
163 | cmd.data[1] |= domid; | 274 | cmd.data[1] |= domid; |
164 | cmd.data[2] = LOW_U32(address); | 275 | cmd.data[2] = lower_32_bits(address); |
165 | cmd.data[3] = upper_32_bits(address); | 276 | cmd.data[3] = upper_32_bits(address); |
166 | if (s) /* size bit - we flush more than one 4kb page */ | 277 | if (s) /* size bit - we flush more than one 4kb page */ |
167 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | 278 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; |
168 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | 279 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ |
169 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | 280 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; |
170 | 281 | ||
282 | ret = iommu_queue_command(iommu, &cmd); | ||
283 | |||
171 | iommu->need_sync = 1; | 284 | iommu->need_sync = 1; |
172 | 285 | ||
173 | return iommu_queue_command(iommu, &cmd); | 286 | return ret; |
174 | } | 287 | } |
175 | 288 | ||
176 | /* | 289 | /* |
@@ -200,6 +313,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
200 | return 0; | 313 | return 0; |
201 | } | 314 | } |
202 | 315 | ||
316 | /* Flush the whole IO/TLB for a given protection domain */ | ||
317 | static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | ||
318 | { | ||
319 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
320 | |||
321 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | ||
322 | } | ||
323 | |||
203 | /**************************************************************************** | 324 | /**************************************************************************** |
204 | * | 325 | * |
205 | * The functions below are used the create the page table mappings for | 326 | * The functions below are used the create the page table mappings for |
@@ -359,11 +480,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
359 | * efficient allocator. | 480 | * efficient allocator. |
360 | * | 481 | * |
361 | ****************************************************************************/ | 482 | ****************************************************************************/ |
362 | static unsigned long dma_mask_to_pages(unsigned long mask) | ||
363 | { | ||
364 | return (mask >> PAGE_SHIFT) + | ||
365 | (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); | ||
366 | } | ||
367 | 483 | ||
368 | /* | 484 | /* |
369 | * The address allocator core function. | 485 | * The address allocator core function. |
@@ -372,25 +488,31 @@ static unsigned long dma_mask_to_pages(unsigned long mask) | |||
372 | */ | 488 | */ |
373 | static unsigned long dma_ops_alloc_addresses(struct device *dev, | 489 | static unsigned long dma_ops_alloc_addresses(struct device *dev, |
374 | struct dma_ops_domain *dom, | 490 | struct dma_ops_domain *dom, |
375 | unsigned int pages) | 491 | unsigned int pages, |
492 | unsigned long align_mask, | ||
493 | u64 dma_mask) | ||
376 | { | 494 | { |
377 | unsigned long limit = dma_mask_to_pages(*dev->dma_mask); | 495 | unsigned long limit; |
378 | unsigned long address; | 496 | unsigned long address; |
379 | unsigned long size = dom->aperture_size >> PAGE_SHIFT; | ||
380 | unsigned long boundary_size; | 497 | unsigned long boundary_size; |
381 | 498 | ||
382 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | 499 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, |
383 | PAGE_SIZE) >> PAGE_SHIFT; | 500 | PAGE_SIZE) >> PAGE_SHIFT; |
384 | limit = limit < size ? limit : size; | 501 | limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, |
502 | dma_mask >> PAGE_SHIFT); | ||
385 | 503 | ||
386 | if (dom->next_bit >= limit) | 504 | if (dom->next_bit >= limit) { |
387 | dom->next_bit = 0; | 505 | dom->next_bit = 0; |
506 | dom->need_flush = true; | ||
507 | } | ||
388 | 508 | ||
389 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, | 509 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, |
390 | 0 , boundary_size, 0); | 510 | 0 , boundary_size, align_mask); |
391 | if (address == -1) | 511 | if (address == -1) { |
392 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, | 512 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, |
393 | 0, boundary_size, 0); | 513 | 0, boundary_size, align_mask); |
514 | dom->need_flush = true; | ||
515 | } | ||
394 | 516 | ||
395 | if (likely(address != -1)) { | 517 | if (likely(address != -1)) { |
396 | dom->next_bit = address + pages; | 518 | dom->next_bit = address + pages; |
@@ -456,7 +578,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | |||
456 | if (start_page + pages > last_page) | 578 | if (start_page + pages > last_page) |
457 | pages = last_page - start_page; | 579 | pages = last_page - start_page; |
458 | 580 | ||
459 | set_bit_string(dom->bitmap, start_page, pages); | 581 | iommu_area_reserve(dom->bitmap, start_page, pages); |
460 | } | 582 | } |
461 | 583 | ||
462 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | 584 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) |
@@ -550,6 +672,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
550 | dma_dom->bitmap[0] = 1; | 672 | dma_dom->bitmap[0] = 1; |
551 | dma_dom->next_bit = 0; | 673 | dma_dom->next_bit = 0; |
552 | 674 | ||
675 | dma_dom->need_flush = false; | ||
676 | dma_dom->target_dev = 0xffff; | ||
677 | |||
553 | /* Intialize the exclusion range if necessary */ | 678 | /* Intialize the exclusion range if necessary */ |
554 | if (iommu->exclusion_start && | 679 | if (iommu->exclusion_start && |
555 | iommu->exclusion_start < dma_dom->aperture_size) { | 680 | iommu->exclusion_start < dma_dom->aperture_size) { |
@@ -620,12 +745,13 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
620 | 745 | ||
621 | u64 pte_root = virt_to_phys(domain->pt_root); | 746 | u64 pte_root = virt_to_phys(domain->pt_root); |
622 | 747 | ||
623 | pte_root |= (domain->mode & 0x07) << 9; | 748 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
624 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; | 749 | << DEV_ENTRY_MODE_SHIFT; |
750 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
625 | 751 | ||
626 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 752 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
627 | amd_iommu_dev_table[devid].data[0] = pte_root; | 753 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); |
628 | amd_iommu_dev_table[devid].data[1] = pte_root >> 32; | 754 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); |
629 | amd_iommu_dev_table[devid].data[2] = domain->id; | 755 | amd_iommu_dev_table[devid].data[2] = domain->id; |
630 | 756 | ||
631 | amd_iommu_pd_table[devid] = domain; | 757 | amd_iommu_pd_table[devid] = domain; |
@@ -643,6 +769,45 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
643 | *****************************************************************************/ | 769 | *****************************************************************************/ |
644 | 770 | ||
645 | /* | 771 | /* |
772 | * This function checks if the driver got a valid device from the caller to | ||
773 | * avoid dereferencing invalid pointers. | ||
774 | */ | ||
775 | static bool check_device(struct device *dev) | ||
776 | { | ||
777 | if (!dev || !dev->dma_mask) | ||
778 | return false; | ||
779 | |||
780 | return true; | ||
781 | } | ||
782 | |||
783 | /* | ||
784 | * In this function the list of preallocated protection domains is traversed to | ||
785 | * find the domain for a specific device | ||
786 | */ | ||
787 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
788 | { | ||
789 | struct dma_ops_domain *entry, *ret = NULL; | ||
790 | unsigned long flags; | ||
791 | |||
792 | if (list_empty(&iommu_pd_list)) | ||
793 | return NULL; | ||
794 | |||
795 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
796 | |||
797 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
798 | if (entry->target_dev == devid) { | ||
799 | ret = entry; | ||
800 | list_del(&ret->list); | ||
801 | break; | ||
802 | } | ||
803 | } | ||
804 | |||
805 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
806 | |||
807 | return ret; | ||
808 | } | ||
809 | |||
810 | /* | ||
646 | * In the dma_ops path we only have the struct device. This function | 811 | * In the dma_ops path we only have the struct device. This function |
647 | * finds the corresponding IOMMU, the protection domain and the | 812 | * finds the corresponding IOMMU, the protection domain and the |
648 | * requestor id for a given device. | 813 | * requestor id for a given device. |
@@ -658,27 +823,30 @@ static int get_device_resources(struct device *dev, | |||
658 | struct pci_dev *pcidev; | 823 | struct pci_dev *pcidev; |
659 | u16 _bdf; | 824 | u16 _bdf; |
660 | 825 | ||
661 | BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); | 826 | *iommu = NULL; |
827 | *domain = NULL; | ||
828 | *bdf = 0xffff; | ||
829 | |||
830 | if (dev->bus != &pci_bus_type) | ||
831 | return 0; | ||
662 | 832 | ||
663 | pcidev = to_pci_dev(dev); | 833 | pcidev = to_pci_dev(dev); |
664 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | 834 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); |
665 | 835 | ||
666 | /* device not translated by any IOMMU in the system? */ | 836 | /* device not translated by any IOMMU in the system? */ |
667 | if (_bdf > amd_iommu_last_bdf) { | 837 | if (_bdf > amd_iommu_last_bdf) |
668 | *iommu = NULL; | ||
669 | *domain = NULL; | ||
670 | *bdf = 0xffff; | ||
671 | return 0; | 838 | return 0; |
672 | } | ||
673 | 839 | ||
674 | *bdf = amd_iommu_alias_table[_bdf]; | 840 | *bdf = amd_iommu_alias_table[_bdf]; |
675 | 841 | ||
676 | *iommu = amd_iommu_rlookup_table[*bdf]; | 842 | *iommu = amd_iommu_rlookup_table[*bdf]; |
677 | if (*iommu == NULL) | 843 | if (*iommu == NULL) |
678 | return 0; | 844 | return 0; |
679 | dma_dom = (*iommu)->default_dom; | ||
680 | *domain = domain_for_device(*bdf); | 845 | *domain = domain_for_device(*bdf); |
681 | if (*domain == NULL) { | 846 | if (*domain == NULL) { |
847 | dma_dom = find_protection_domain(*bdf); | ||
848 | if (!dma_dom) | ||
849 | dma_dom = (*iommu)->default_dom; | ||
682 | *domain = &dma_dom->domain; | 850 | *domain = &dma_dom->domain; |
683 | set_device_domain(*iommu, *domain, *bdf); | 851 | set_device_domain(*iommu, *domain, *bdf); |
684 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | 852 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " |
@@ -757,17 +925,24 @@ static dma_addr_t __map_single(struct device *dev, | |||
757 | struct dma_ops_domain *dma_dom, | 925 | struct dma_ops_domain *dma_dom, |
758 | phys_addr_t paddr, | 926 | phys_addr_t paddr, |
759 | size_t size, | 927 | size_t size, |
760 | int dir) | 928 | int dir, |
929 | bool align, | ||
930 | u64 dma_mask) | ||
761 | { | 931 | { |
762 | dma_addr_t offset = paddr & ~PAGE_MASK; | 932 | dma_addr_t offset = paddr & ~PAGE_MASK; |
763 | dma_addr_t address, start; | 933 | dma_addr_t address, start; |
764 | unsigned int pages; | 934 | unsigned int pages; |
935 | unsigned long align_mask = 0; | ||
765 | int i; | 936 | int i; |
766 | 937 | ||
767 | pages = iommu_num_pages(paddr, size); | 938 | pages = iommu_num_pages(paddr, size); |
768 | paddr &= PAGE_MASK; | 939 | paddr &= PAGE_MASK; |
769 | 940 | ||
770 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); | 941 | if (align) |
942 | align_mask = (1UL << get_order(size)) - 1; | ||
943 | |||
944 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | ||
945 | dma_mask); | ||
771 | if (unlikely(address == bad_dma_address)) | 946 | if (unlikely(address == bad_dma_address)) |
772 | goto out; | 947 | goto out; |
773 | 948 | ||
@@ -779,6 +954,12 @@ static dma_addr_t __map_single(struct device *dev, | |||
779 | } | 954 | } |
780 | address += offset; | 955 | address += offset; |
781 | 956 | ||
957 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | ||
958 | iommu_flush_tlb(iommu, dma_dom->domain.id); | ||
959 | dma_dom->need_flush = false; | ||
960 | } else if (unlikely(iommu_has_npcache(iommu))) | ||
961 | iommu_flush_pages(iommu, dma_dom->domain.id, address, size); | ||
962 | |||
782 | out: | 963 | out: |
783 | return address; | 964 | return address; |
784 | } | 965 | } |
@@ -809,6 +990,9 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
809 | } | 990 | } |
810 | 991 | ||
811 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 992 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
993 | |||
994 | if (amd_iommu_unmap_flush) | ||
995 | iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); | ||
812 | } | 996 | } |
813 | 997 | ||
814 | /* | 998 | /* |
@@ -822,6 +1006,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
822 | struct protection_domain *domain; | 1006 | struct protection_domain *domain; |
823 | u16 devid; | 1007 | u16 devid; |
824 | dma_addr_t addr; | 1008 | dma_addr_t addr; |
1009 | u64 dma_mask; | ||
1010 | |||
1011 | if (!check_device(dev)) | ||
1012 | return bad_dma_address; | ||
1013 | |||
1014 | dma_mask = *dev->dma_mask; | ||
825 | 1015 | ||
826 | get_device_resources(dev, &iommu, &domain, &devid); | 1016 | get_device_resources(dev, &iommu, &domain, &devid); |
827 | 1017 | ||
@@ -830,14 +1020,12 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
830 | return (dma_addr_t)paddr; | 1020 | return (dma_addr_t)paddr; |
831 | 1021 | ||
832 | spin_lock_irqsave(&domain->lock, flags); | 1022 | spin_lock_irqsave(&domain->lock, flags); |
833 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); | 1023 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, |
1024 | dma_mask); | ||
834 | if (addr == bad_dma_address) | 1025 | if (addr == bad_dma_address) |
835 | goto out; | 1026 | goto out; |
836 | 1027 | ||
837 | if (iommu_has_npcache(iommu)) | 1028 | if (unlikely(iommu->need_sync)) |
838 | iommu_flush_pages(iommu, domain->id, addr, size); | ||
839 | |||
840 | if (iommu->need_sync) | ||
841 | iommu_completion_wait(iommu); | 1029 | iommu_completion_wait(iommu); |
842 | 1030 | ||
843 | out: | 1031 | out: |
@@ -857,7 +1045,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
857 | struct protection_domain *domain; | 1045 | struct protection_domain *domain; |
858 | u16 devid; | 1046 | u16 devid; |
859 | 1047 | ||
860 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 1048 | if (!check_device(dev) || |
1049 | !get_device_resources(dev, &iommu, &domain, &devid)) | ||
861 | /* device not handled by any AMD IOMMU */ | 1050 | /* device not handled by any AMD IOMMU */ |
862 | return; | 1051 | return; |
863 | 1052 | ||
@@ -865,9 +1054,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
865 | 1054 | ||
866 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1055 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); |
867 | 1056 | ||
868 | iommu_flush_pages(iommu, domain->id, dma_addr, size); | 1057 | if (unlikely(iommu->need_sync)) |
869 | |||
870 | if (iommu->need_sync) | ||
871 | iommu_completion_wait(iommu); | 1058 | iommu_completion_wait(iommu); |
872 | 1059 | ||
873 | spin_unlock_irqrestore(&domain->lock, flags); | 1060 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -906,6 +1093,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
906 | struct scatterlist *s; | 1093 | struct scatterlist *s; |
907 | phys_addr_t paddr; | 1094 | phys_addr_t paddr; |
908 | int mapped_elems = 0; | 1095 | int mapped_elems = 0; |
1096 | u64 dma_mask; | ||
1097 | |||
1098 | if (!check_device(dev)) | ||
1099 | return 0; | ||
1100 | |||
1101 | dma_mask = *dev->dma_mask; | ||
909 | 1102 | ||
910 | get_device_resources(dev, &iommu, &domain, &devid); | 1103 | get_device_resources(dev, &iommu, &domain, &devid); |
911 | 1104 | ||
@@ -918,19 +1111,17 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
918 | paddr = sg_phys(s); | 1111 | paddr = sg_phys(s); |
919 | 1112 | ||
920 | s->dma_address = __map_single(dev, iommu, domain->priv, | 1113 | s->dma_address = __map_single(dev, iommu, domain->priv, |
921 | paddr, s->length, dir); | 1114 | paddr, s->length, dir, false, |
1115 | dma_mask); | ||
922 | 1116 | ||
923 | if (s->dma_address) { | 1117 | if (s->dma_address) { |
924 | s->dma_length = s->length; | 1118 | s->dma_length = s->length; |
925 | mapped_elems++; | 1119 | mapped_elems++; |
926 | } else | 1120 | } else |
927 | goto unmap; | 1121 | goto unmap; |
928 | if (iommu_has_npcache(iommu)) | ||
929 | iommu_flush_pages(iommu, domain->id, s->dma_address, | ||
930 | s->dma_length); | ||
931 | } | 1122 | } |
932 | 1123 | ||
933 | if (iommu->need_sync) | 1124 | if (unlikely(iommu->need_sync)) |
934 | iommu_completion_wait(iommu); | 1125 | iommu_completion_wait(iommu); |
935 | 1126 | ||
936 | out: | 1127 | out: |
@@ -964,7 +1155,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
964 | u16 devid; | 1155 | u16 devid; |
965 | int i; | 1156 | int i; |
966 | 1157 | ||
967 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 1158 | if (!check_device(dev) || |
1159 | !get_device_resources(dev, &iommu, &domain, &devid)) | ||
968 | return; | 1160 | return; |
969 | 1161 | ||
970 | spin_lock_irqsave(&domain->lock, flags); | 1162 | spin_lock_irqsave(&domain->lock, flags); |
@@ -972,12 +1164,10 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
972 | for_each_sg(sglist, s, nelems, i) { | 1164 | for_each_sg(sglist, s, nelems, i) { |
973 | __unmap_single(iommu, domain->priv, s->dma_address, | 1165 | __unmap_single(iommu, domain->priv, s->dma_address, |
974 | s->dma_length, dir); | 1166 | s->dma_length, dir); |
975 | iommu_flush_pages(iommu, domain->id, s->dma_address, | ||
976 | s->dma_length); | ||
977 | s->dma_address = s->dma_length = 0; | 1167 | s->dma_address = s->dma_length = 0; |
978 | } | 1168 | } |
979 | 1169 | ||
980 | if (iommu->need_sync) | 1170 | if (unlikely(iommu->need_sync)) |
981 | iommu_completion_wait(iommu); | 1171 | iommu_completion_wait(iommu); |
982 | 1172 | ||
983 | spin_unlock_irqrestore(&domain->lock, flags); | 1173 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -995,25 +1185,33 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
995 | struct protection_domain *domain; | 1185 | struct protection_domain *domain; |
996 | u16 devid; | 1186 | u16 devid; |
997 | phys_addr_t paddr; | 1187 | phys_addr_t paddr; |
1188 | u64 dma_mask = dev->coherent_dma_mask; | ||
1189 | |||
1190 | if (!check_device(dev)) | ||
1191 | return NULL; | ||
1192 | |||
1193 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | ||
1194 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
998 | 1195 | ||
1196 | flag |= __GFP_ZERO; | ||
999 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | 1197 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); |
1000 | if (!virt_addr) | 1198 | if (!virt_addr) |
1001 | return 0; | 1199 | return 0; |
1002 | 1200 | ||
1003 | memset(virt_addr, 0, size); | ||
1004 | paddr = virt_to_phys(virt_addr); | 1201 | paddr = virt_to_phys(virt_addr); |
1005 | 1202 | ||
1006 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1007 | |||
1008 | if (!iommu || !domain) { | 1203 | if (!iommu || !domain) { |
1009 | *dma_addr = (dma_addr_t)paddr; | 1204 | *dma_addr = (dma_addr_t)paddr; |
1010 | return virt_addr; | 1205 | return virt_addr; |
1011 | } | 1206 | } |
1012 | 1207 | ||
1208 | if (!dma_mask) | ||
1209 | dma_mask = *dev->dma_mask; | ||
1210 | |||
1013 | spin_lock_irqsave(&domain->lock, flags); | 1211 | spin_lock_irqsave(&domain->lock, flags); |
1014 | 1212 | ||
1015 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 1213 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, |
1016 | size, DMA_BIDIRECTIONAL); | 1214 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
1017 | 1215 | ||
1018 | if (*dma_addr == bad_dma_address) { | 1216 | if (*dma_addr == bad_dma_address) { |
1019 | free_pages((unsigned long)virt_addr, get_order(size)); | 1217 | free_pages((unsigned long)virt_addr, get_order(size)); |
@@ -1021,10 +1219,7 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1021 | goto out; | 1219 | goto out; |
1022 | } | 1220 | } |
1023 | 1221 | ||
1024 | if (iommu_has_npcache(iommu)) | 1222 | if (unlikely(iommu->need_sync)) |
1025 | iommu_flush_pages(iommu, domain->id, *dma_addr, size); | ||
1026 | |||
1027 | if (iommu->need_sync) | ||
1028 | iommu_completion_wait(iommu); | 1223 | iommu_completion_wait(iommu); |
1029 | 1224 | ||
1030 | out: | 1225 | out: |
@@ -1035,8 +1230,6 @@ out: | |||
1035 | 1230 | ||
1036 | /* | 1231 | /* |
1037 | * The exported free_coherent function for dma_ops. | 1232 | * The exported free_coherent function for dma_ops. |
1038 | * FIXME: fix the generic x86 DMA layer so that it actually calls that | ||
1039 | * function. | ||
1040 | */ | 1233 | */ |
1041 | static void free_coherent(struct device *dev, size_t size, | 1234 | static void free_coherent(struct device *dev, size_t size, |
1042 | void *virt_addr, dma_addr_t dma_addr) | 1235 | void *virt_addr, dma_addr_t dma_addr) |
@@ -1046,6 +1239,9 @@ static void free_coherent(struct device *dev, size_t size, | |||
1046 | struct protection_domain *domain; | 1239 | struct protection_domain *domain; |
1047 | u16 devid; | 1240 | u16 devid; |
1048 | 1241 | ||
1242 | if (!check_device(dev)) | ||
1243 | return; | ||
1244 | |||
1049 | get_device_resources(dev, &iommu, &domain, &devid); | 1245 | get_device_resources(dev, &iommu, &domain, &devid); |
1050 | 1246 | ||
1051 | if (!iommu || !domain) | 1247 | if (!iommu || !domain) |
@@ -1054,9 +1250,8 @@ static void free_coherent(struct device *dev, size_t size, | |||
1054 | spin_lock_irqsave(&domain->lock, flags); | 1250 | spin_lock_irqsave(&domain->lock, flags); |
1055 | 1251 | ||
1056 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 1252 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
1057 | iommu_flush_pages(iommu, domain->id, dma_addr, size); | ||
1058 | 1253 | ||
1059 | if (iommu->need_sync) | 1254 | if (unlikely(iommu->need_sync)) |
1060 | iommu_completion_wait(iommu); | 1255 | iommu_completion_wait(iommu); |
1061 | 1256 | ||
1062 | spin_unlock_irqrestore(&domain->lock, flags); | 1257 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1066,6 +1261,30 @@ free_mem: | |||
1066 | } | 1261 | } |
1067 | 1262 | ||
1068 | /* | 1263 | /* |
1264 | * This function is called by the DMA layer to find out if we can handle a | ||
1265 | * particular device. It is part of the dma_ops. | ||
1266 | */ | ||
1267 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) | ||
1268 | { | ||
1269 | u16 bdf; | ||
1270 | struct pci_dev *pcidev; | ||
1271 | |||
1272 | /* No device or no PCI device */ | ||
1273 | if (!dev || dev->bus != &pci_bus_type) | ||
1274 | return 0; | ||
1275 | |||
1276 | pcidev = to_pci_dev(dev); | ||
1277 | |||
1278 | bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
1279 | |||
1280 | /* Out of our scope? */ | ||
1281 | if (bdf > amd_iommu_last_bdf) | ||
1282 | return 0; | ||
1283 | |||
1284 | return 1; | ||
1285 | } | ||
1286 | |||
1287 | /* | ||
1069 | * The function for pre-allocating protection domains. | 1288 | * The function for pre-allocating protection domains. |
1070 | * | 1289 | * |
1071 | * If the driver core informs the DMA layer if a driver grabs a device | 1290 | * If the driver core informs the DMA layer if a driver grabs a device |
@@ -1094,10 +1313,9 @@ void prealloc_protection_domains(void) | |||
1094 | if (!dma_dom) | 1313 | if (!dma_dom) |
1095 | continue; | 1314 | continue; |
1096 | init_unity_mappings_for_device(dma_dom, devid); | 1315 | init_unity_mappings_for_device(dma_dom, devid); |
1097 | set_device_domain(iommu, &dma_dom->domain, devid); | 1316 | dma_dom->target_dev = devid; |
1098 | printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", | 1317 | |
1099 | dma_dom->domain.id); | 1318 | list_add_tail(&dma_dom->list, &iommu_pd_list); |
1100 | print_devid(devid, 1); | ||
1101 | } | 1319 | } |
1102 | } | 1320 | } |
1103 | 1321 | ||
@@ -1108,6 +1326,7 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { | |||
1108 | .unmap_single = unmap_single, | 1326 | .unmap_single = unmap_single, |
1109 | .map_sg = map_sg, | 1327 | .map_sg = map_sg, |
1110 | .unmap_sg = unmap_sg, | 1328 | .unmap_sg = unmap_sg, |
1329 | .dma_supported = amd_iommu_dma_supported, | ||
1111 | }; | 1330 | }; |
1112 | 1331 | ||
1113 | /* | 1332 | /* |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index d9a9da597e79..148fcfe22f17 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/gfp.h> | 22 | #include <linux/gfp.h> |
23 | #include <linux/list.h> | 23 | #include <linux/list.h> |
24 | #include <linux/sysdev.h> | 24 | #include <linux/sysdev.h> |
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/msi.h> | ||
25 | #include <asm/pci-direct.h> | 27 | #include <asm/pci-direct.h> |
26 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
27 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
@@ -30,7 +32,6 @@ | |||
30 | /* | 32 | /* |
31 | * definitions for the ACPI scanning code | 33 | * definitions for the ACPI scanning code |
32 | */ | 34 | */ |
33 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | ||
34 | #define IVRS_HEADER_LENGTH 48 | 35 | #define IVRS_HEADER_LENGTH 48 |
35 | 36 | ||
36 | #define ACPI_IVHD_TYPE 0x10 | 37 | #define ACPI_IVHD_TYPE 0x10 |
@@ -121,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | |||
121 | we find in ACPI */ | 122 | we find in ACPI */ |
122 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | 123 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ |
123 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ | 124 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ |
125 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | ||
124 | 126 | ||
125 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 127 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
126 | system */ | 128 | system */ |
@@ -234,7 +236,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
234 | { | 236 | { |
235 | u32 ctrl; | 237 | u32 ctrl; |
236 | 238 | ||
237 | ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); | 239 | ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); |
238 | ctrl &= ~(1 << bit); | 240 | ctrl &= ~(1 << bit); |
239 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | 241 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); |
240 | } | 242 | } |
@@ -242,13 +244,23 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
242 | /* Function to enable the hardware */ | 244 | /* Function to enable the hardware */ |
243 | void __init iommu_enable(struct amd_iommu *iommu) | 245 | void __init iommu_enable(struct amd_iommu *iommu) |
244 | { | 246 | { |
245 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); | 247 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " |
246 | print_devid(iommu->devid, 0); | 248 | "at %02x:%02x.%x cap 0x%hx\n", |
247 | printk(" cap 0x%hx\n", iommu->cap_ptr); | 249 | iommu->dev->bus->number, |
250 | PCI_SLOT(iommu->dev->devfn), | ||
251 | PCI_FUNC(iommu->dev->devfn), | ||
252 | iommu->cap_ptr); | ||
248 | 253 | ||
249 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 254 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
250 | } | 255 | } |
251 | 256 | ||
257 | /* Function to enable IOMMU event logging and event interrupts */ | ||
258 | void __init iommu_enable_event_logging(struct amd_iommu *iommu) | ||
259 | { | ||
260 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | ||
261 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | ||
262 | } | ||
263 | |||
252 | /* | 264 | /* |
253 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in | 265 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in |
254 | * the system has one. | 266 | * the system has one. |
@@ -286,6 +298,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) | |||
286 | ****************************************************************************/ | 298 | ****************************************************************************/ |
287 | 299 | ||
288 | /* | 300 | /* |
301 | * This function calculates the length of a given IVHD entry | ||
302 | */ | ||
303 | static inline int ivhd_entry_length(u8 *ivhd) | ||
304 | { | ||
305 | return 0x04 << (*ivhd >> 6); | ||
306 | } | ||
307 | |||
308 | /* | ||
289 | * This function reads the last device id the IOMMU has to handle from the PCI | 309 | * This function reads the last device id the IOMMU has to handle from the PCI |
290 | * capability header for this IOMMU | 310 | * capability header for this IOMMU |
291 | */ | 311 | */ |
@@ -329,7 +349,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
329 | default: | 349 | default: |
330 | break; | 350 | break; |
331 | } | 351 | } |
332 | p += 0x04 << (*p >> 6); | 352 | p += ivhd_entry_length(p); |
333 | } | 353 | } |
334 | 354 | ||
335 | WARN_ON(p != end); | 355 | WARN_ON(p != end); |
@@ -414,7 +434,32 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
414 | 434 | ||
415 | static void __init free_command_buffer(struct amd_iommu *iommu) | 435 | static void __init free_command_buffer(struct amd_iommu *iommu) |
416 | { | 436 | { |
417 | free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); | 437 | free_pages((unsigned long)iommu->cmd_buf, |
438 | get_order(iommu->cmd_buf_size)); | ||
439 | } | ||
440 | |||
441 | /* allocates the memory where the IOMMU will log its events to */ | ||
442 | static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) | ||
443 | { | ||
444 | u64 entry; | ||
445 | iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
446 | get_order(EVT_BUFFER_SIZE)); | ||
447 | |||
448 | if (iommu->evt_buf == NULL) | ||
449 | return NULL; | ||
450 | |||
451 | entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; | ||
452 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | ||
453 | &entry, sizeof(entry)); | ||
454 | |||
455 | iommu->evt_buf_size = EVT_BUFFER_SIZE; | ||
456 | |||
457 | return iommu->evt_buf; | ||
458 | } | ||
459 | |||
460 | static void __init free_event_buffer(struct amd_iommu *iommu) | ||
461 | { | ||
462 | free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); | ||
418 | } | 463 | } |
419 | 464 | ||
420 | /* sets a specific bit in the device table entry. */ | 465 | /* sets a specific bit in the device table entry. */ |
@@ -487,19 +532,21 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | |||
487 | */ | 532 | */ |
488 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | 533 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) |
489 | { | 534 | { |
490 | int bus = PCI_BUS(iommu->devid); | ||
491 | int dev = PCI_SLOT(iommu->devid); | ||
492 | int fn = PCI_FUNC(iommu->devid); | ||
493 | int cap_ptr = iommu->cap_ptr; | 535 | int cap_ptr = iommu->cap_ptr; |
494 | u32 range; | 536 | u32 range, misc; |
495 | 537 | ||
496 | iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); | 538 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, |
539 | &iommu->cap); | ||
540 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, | ||
541 | &range); | ||
542 | pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, | ||
543 | &misc); | ||
497 | 544 | ||
498 | range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | ||
499 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), | 545 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), |
500 | MMIO_GET_FD(range)); | 546 | MMIO_GET_FD(range)); |
501 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | 547 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), |
502 | MMIO_GET_LD(range)); | 548 | MMIO_GET_LD(range)); |
549 | iommu->evt_msi_num = MMIO_MSI_NUM(misc); | ||
503 | } | 550 | } |
504 | 551 | ||
505 | /* | 552 | /* |
@@ -604,7 +651,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
604 | break; | 651 | break; |
605 | } | 652 | } |
606 | 653 | ||
607 | p += 0x04 << (e->type >> 6); | 654 | p += ivhd_entry_length(p); |
608 | } | 655 | } |
609 | } | 656 | } |
610 | 657 | ||
@@ -622,6 +669,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) | |||
622 | static void __init free_iommu_one(struct amd_iommu *iommu) | 669 | static void __init free_iommu_one(struct amd_iommu *iommu) |
623 | { | 670 | { |
624 | free_command_buffer(iommu); | 671 | free_command_buffer(iommu); |
672 | free_event_buffer(iommu); | ||
625 | iommu_unmap_mmio_space(iommu); | 673 | iommu_unmap_mmio_space(iommu); |
626 | } | 674 | } |
627 | 675 | ||
@@ -649,8 +697,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
649 | /* | 697 | /* |
650 | * Copy data from ACPI table entry to the iommu struct | 698 | * Copy data from ACPI table entry to the iommu struct |
651 | */ | 699 | */ |
652 | iommu->devid = h->devid; | 700 | iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); |
701 | if (!iommu->dev) | ||
702 | return 1; | ||
703 | |||
653 | iommu->cap_ptr = h->cap_ptr; | 704 | iommu->cap_ptr = h->cap_ptr; |
705 | iommu->pci_seg = h->pci_seg; | ||
654 | iommu->mmio_phys = h->mmio_phys; | 706 | iommu->mmio_phys = h->mmio_phys; |
655 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); | 707 | iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); |
656 | if (!iommu->mmio_base) | 708 | if (!iommu->mmio_base) |
@@ -661,10 +713,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
661 | if (!iommu->cmd_buf) | 713 | if (!iommu->cmd_buf) |
662 | return -ENOMEM; | 714 | return -ENOMEM; |
663 | 715 | ||
716 | iommu->evt_buf = alloc_event_buffer(iommu); | ||
717 | if (!iommu->evt_buf) | ||
718 | return -ENOMEM; | ||
719 | |||
720 | iommu->int_enabled = false; | ||
721 | |||
664 | init_iommu_from_pci(iommu); | 722 | init_iommu_from_pci(iommu); |
665 | init_iommu_from_acpi(iommu, h); | 723 | init_iommu_from_acpi(iommu, h); |
666 | init_iommu_devices(iommu); | 724 | init_iommu_devices(iommu); |
667 | 725 | ||
726 | pci_enable_device(iommu->dev); | ||
727 | |||
668 | return 0; | 728 | return 0; |
669 | } | 729 | } |
670 | 730 | ||
@@ -706,6 +766,95 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
706 | 766 | ||
707 | /**************************************************************************** | 767 | /**************************************************************************** |
708 | * | 768 | * |
769 | * The following functions initialize the MSI interrupts for all IOMMUs | ||
770 | * in the system. Its a bit challenging because there could be multiple | ||
771 | * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per | ||
772 | * pci_dev. | ||
773 | * | ||
774 | ****************************************************************************/ | ||
775 | |||
776 | static int __init iommu_setup_msix(struct amd_iommu *iommu) | ||
777 | { | ||
778 | struct amd_iommu *curr; | ||
779 | struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ | ||
780 | int nvec = 0, i; | ||
781 | |||
782 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
783 | if (curr->dev == iommu->dev) { | ||
784 | entries[nvec].entry = curr->evt_msi_num; | ||
785 | entries[nvec].vector = 0; | ||
786 | curr->int_enabled = true; | ||
787 | nvec++; | ||
788 | } | ||
789 | } | ||
790 | |||
791 | if (pci_enable_msix(iommu->dev, entries, nvec)) { | ||
792 | pci_disable_msix(iommu->dev); | ||
793 | return 1; | ||
794 | } | ||
795 | |||
796 | for (i = 0; i < nvec; ++i) { | ||
797 | int r = request_irq(entries->vector, amd_iommu_int_handler, | ||
798 | IRQF_SAMPLE_RANDOM, | ||
799 | "AMD IOMMU", | ||
800 | NULL); | ||
801 | if (r) | ||
802 | goto out_free; | ||
803 | } | ||
804 | |||
805 | return 0; | ||
806 | |||
807 | out_free: | ||
808 | for (i -= 1; i >= 0; --i) | ||
809 | free_irq(entries->vector, NULL); | ||
810 | |||
811 | pci_disable_msix(iommu->dev); | ||
812 | |||
813 | return 1; | ||
814 | } | ||
815 | |||
816 | static int __init iommu_setup_msi(struct amd_iommu *iommu) | ||
817 | { | ||
818 | int r; | ||
819 | struct amd_iommu *curr; | ||
820 | |||
821 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
822 | if (curr->dev == iommu->dev) | ||
823 | curr->int_enabled = true; | ||
824 | } | ||
825 | |||
826 | |||
827 | if (pci_enable_msi(iommu->dev)) | ||
828 | return 1; | ||
829 | |||
830 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | ||
831 | IRQF_SAMPLE_RANDOM, | ||
832 | "AMD IOMMU", | ||
833 | NULL); | ||
834 | |||
835 | if (r) { | ||
836 | pci_disable_msi(iommu->dev); | ||
837 | return 1; | ||
838 | } | ||
839 | |||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | static int __init iommu_init_msi(struct amd_iommu *iommu) | ||
844 | { | ||
845 | if (iommu->int_enabled) | ||
846 | return 0; | ||
847 | |||
848 | if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) | ||
849 | return iommu_setup_msix(iommu); | ||
850 | else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) | ||
851 | return iommu_setup_msi(iommu); | ||
852 | |||
853 | return 1; | ||
854 | } | ||
855 | |||
856 | /**************************************************************************** | ||
857 | * | ||
709 | * The next functions belong to the third pass of parsing the ACPI | 858 | * The next functions belong to the third pass of parsing the ACPI |
710 | * table. In this last pass the memory mapping requirements are | 859 | * table. In this last pass the memory mapping requirements are |
711 | * gathered (like exclusion and unity mapping reanges). | 860 | * gathered (like exclusion and unity mapping reanges). |
@@ -801,6 +950,20 @@ static int __init init_memory_definitions(struct acpi_table_header *table) | |||
801 | } | 950 | } |
802 | 951 | ||
803 | /* | 952 | /* |
953 | * Init the device table to not allow DMA access for devices and | ||
954 | * suppress all page faults | ||
955 | */ | ||
956 | static void init_device_table(void) | ||
957 | { | ||
958 | u16 devid; | ||
959 | |||
960 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { | ||
961 | set_dev_entry_bit(devid, DEV_ENTRY_VALID); | ||
962 | set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); | ||
963 | } | ||
964 | } | ||
965 | |||
966 | /* | ||
804 | * This function finally enables all IOMMUs found in the system after | 967 | * This function finally enables all IOMMUs found in the system after |
805 | * they have been initialized | 968 | * they have been initialized |
806 | */ | 969 | */ |
@@ -810,6 +973,8 @@ static void __init enable_iommus(void) | |||
810 | 973 | ||
811 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 974 | list_for_each_entry(iommu, &amd_iommu_list, list) { |
812 | iommu_set_exclusion_range(iommu); | 975 | iommu_set_exclusion_range(iommu); |
976 | iommu_init_msi(iommu); | ||
977 | iommu_enable_event_logging(iommu); | ||
813 | iommu_enable(iommu); | 978 | iommu_enable(iommu); |
814 | } | 979 | } |
815 | } | 980 | } |
@@ -931,6 +1096,9 @@ int __init amd_iommu_init(void) | |||
931 | if (amd_iommu_pd_alloc_bitmap == NULL) | 1096 | if (amd_iommu_pd_alloc_bitmap == NULL) |
932 | goto free; | 1097 | goto free; |
933 | 1098 | ||
1099 | /* init the device table */ | ||
1100 | init_device_table(); | ||
1101 | |||
934 | /* | 1102 | /* |
935 | * let all alias entries point to itself | 1103 | * let all alias entries point to itself |
936 | */ | 1104 | */ |
@@ -954,15 +1122,15 @@ int __init amd_iommu_init(void) | |||
954 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) | 1122 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) |
955 | goto free; | 1123 | goto free; |
956 | 1124 | ||
957 | ret = amd_iommu_init_dma_ops(); | 1125 | ret = sysdev_class_register(&amd_iommu_sysdev_class); |
958 | if (ret) | 1126 | if (ret) |
959 | goto free; | 1127 | goto free; |
960 | 1128 | ||
961 | ret = sysdev_class_register(&amd_iommu_sysdev_class); | 1129 | ret = sysdev_register(&device_amd_iommu); |
962 | if (ret) | 1130 | if (ret) |
963 | goto free; | 1131 | goto free; |
964 | 1132 | ||
965 | ret = sysdev_register(&device_amd_iommu); | 1133 | ret = amd_iommu_init_dma_ops(); |
966 | if (ret) | 1134 | if (ret) |
967 | goto free; | 1135 | goto free; |
968 | 1136 | ||
@@ -977,11 +1145,17 @@ int __init amd_iommu_init(void) | |||
977 | else | 1145 | else |
978 | printk("disabled\n"); | 1146 | printk("disabled\n"); |
979 | 1147 | ||
1148 | if (amd_iommu_unmap_flush) | ||
1149 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | ||
1150 | else | ||
1151 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | ||
1152 | |||
980 | out: | 1153 | out: |
981 | return ret; | 1154 | return ret; |
982 | 1155 | ||
983 | free: | 1156 | free: |
984 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); | 1157 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, |
1158 | get_order(MAX_DOMAIN_ID/8)); | ||
985 | 1159 | ||
986 | free_pages((unsigned long)amd_iommu_pd_table, | 1160 | free_pages((unsigned long)amd_iommu_pd_table, |
987 | get_order(rlookup_table_size)); | 1161 | get_order(rlookup_table_size)); |
@@ -1039,8 +1213,10 @@ void __init amd_iommu_detect(void) | |||
1039 | static int __init parse_amd_iommu_options(char *str) | 1213 | static int __init parse_amd_iommu_options(char *str) |
1040 | { | 1214 | { |
1041 | for (; *str; ++str) { | 1215 | for (; *str; ++str) { |
1042 | if (strcmp(str, "isolate") == 0) | 1216 | if (strncmp(str, "isolate", 7) == 0) |
1043 | amd_iommu_isolate = 1; | 1217 | amd_iommu_isolate = 1; |
1218 | if (strncmp(str, "fullflush", 11) == 0) | ||
1219 | amd_iommu_unmap_flush = true; | ||
1044 | } | 1220 | } |
1045 | 1221 | ||
1046 | return 1; | 1222 | return 1; |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 44e21826db11..9a32b37ee2ee 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -455,11 +455,11 @@ out: | |||
455 | force_iommu || | 455 | force_iommu || |
456 | valid_agp || | 456 | valid_agp || |
457 | fallback_aper_force) { | 457 | fallback_aper_force) { |
458 | printk(KERN_ERR | 458 | printk(KERN_INFO |
459 | "Your BIOS doesn't leave a aperture memory hole\n"); | 459 | "Your BIOS doesn't leave a aperture memory hole\n"); |
460 | printk(KERN_ERR | 460 | printk(KERN_INFO |
461 | "Please enable the IOMMU option in the BIOS setup\n"); | 461 | "Please enable the IOMMU option in the BIOS setup\n"); |
462 | printk(KERN_ERR | 462 | printk(KERN_INFO |
463 | "This costs you %d MB of RAM\n", | 463 | "This costs you %d MB of RAM\n", |
464 | 32 << fallback_aper_order); | 464 | 32 << fallback_aper_order); |
465 | 465 | ||
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index b8d80c291650..a91c57cb666a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -114,8 +114,6 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | |||
114 | static int enabled_via_apicbase; | 114 | static int enabled_via_apicbase; |
115 | 115 | ||
116 | static unsigned long apic_phys; | 116 | static unsigned long apic_phys; |
117 | unsigned int __cpuinitdata maxcpus = NR_CPUS; | ||
118 | |||
119 | 117 | ||
120 | /* | 118 | /* |
121 | * Get the LAPIC version | 119 | * Get the LAPIC version |
@@ -1507,12 +1505,6 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1507 | return; | 1505 | return; |
1508 | } | 1506 | } |
1509 | 1507 | ||
1510 | if (num_processors >= maxcpus) { | ||
1511 | printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." | ||
1512 | " Processor ignored.\n", maxcpus); | ||
1513 | return; | ||
1514 | } | ||
1515 | |||
1516 | num_processors++; | 1508 | num_processors++; |
1517 | cpus_complement(tmp_map, cpu_present_map); | 1509 | cpus_complement(tmp_map, cpu_present_map); |
1518 | cpu = first_cpu(tmp_map); | 1510 | cpu = first_cpu(tmp_map); |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 37e037606f30..53898b65a6ae 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -98,7 +98,6 @@ static struct clock_event_device lapic_clockevent = { | |||
98 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | 98 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); |
99 | 99 | ||
100 | static unsigned long apic_phys; | 100 | static unsigned long apic_phys; |
101 | unsigned int __cpuinitdata maxcpus = NR_CPUS; | ||
102 | 101 | ||
103 | unsigned long mp_lapic_addr; | 102 | unsigned long mp_lapic_addr; |
104 | 103 | ||
@@ -1444,12 +1443,6 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1444 | return; | 1443 | return; |
1445 | } | 1444 | } |
1446 | 1445 | ||
1447 | if (num_processors >= maxcpus) { | ||
1448 | printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." | ||
1449 | " Processor ignored.\n", maxcpus); | ||
1450 | return; | ||
1451 | } | ||
1452 | |||
1453 | num_processors++; | 1446 | num_processors++; |
1454 | cpus_complement(tmp_map, cpu_present_map); | 1447 | cpus_complement(tmp_map, cpu_present_map); |
1455 | cpu = first_cpu(tmp_map); | 1448 | cpu = first_cpu(tmp_map); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9ee24e6bc4b0..5145a6e72bbb 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -228,12 +228,12 @@ | |||
228 | #include <linux/suspend.h> | 228 | #include <linux/suspend.h> |
229 | #include <linux/kthread.h> | 229 | #include <linux/kthread.h> |
230 | #include <linux/jiffies.h> | 230 | #include <linux/jiffies.h> |
231 | #include <linux/smp_lock.h> | ||
232 | 231 | ||
233 | #include <asm/system.h> | 232 | #include <asm/system.h> |
234 | #include <asm/uaccess.h> | 233 | #include <asm/uaccess.h> |
235 | #include <asm/desc.h> | 234 | #include <asm/desc.h> |
236 | #include <asm/i8253.h> | 235 | #include <asm/i8253.h> |
236 | #include <asm/olpc.h> | ||
237 | #include <asm/paravirt.h> | 237 | #include <asm/paravirt.h> |
238 | #include <asm/reboot.h> | 238 | #include <asm/reboot.h> |
239 | 239 | ||
@@ -2217,7 +2217,7 @@ static int __init apm_init(void) | |||
2217 | 2217 | ||
2218 | dmi_check_system(apm_dmi_table); | 2218 | dmi_check_system(apm_dmi_table); |
2219 | 2219 | ||
2220 | if (apm_info.bios.version == 0 || paravirt_enabled()) { | 2220 | if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { |
2221 | printk(KERN_INFO "apm: BIOS not found.\n"); | 2221 | printk(KERN_INFO "apm: BIOS not found.\n"); |
2222 | return -ENODEV; | 2222 | return -ENODEV; |
2223 | } | 2223 | } |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index c639bd55391c..fdd585f9c53d 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -25,11 +25,11 @@ x86_bios_strerror(long status) | |||
25 | { | 25 | { |
26 | const char *str; | 26 | const char *str; |
27 | switch (status) { | 27 | switch (status) { |
28 | case 0: str = "Call completed without error"; break; | 28 | case 0: str = "Call completed without error"; break; |
29 | case -1: str = "Not implemented"; break; | 29 | case -1: str = "Not implemented"; break; |
30 | case -2: str = "Invalid argument"; break; | 30 | case -2: str = "Invalid argument"; break; |
31 | case -3: str = "Call completed with error"; break; | 31 | case -3: str = "Call completed with error"; break; |
32 | default: str = "Unknown BIOS status code"; break; | 32 | default: str = "Unknown BIOS status code"; break; |
33 | } | 33 | } |
34 | return str; | 34 | return str; |
35 | } | 35 | } |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a6072..a6ef672adbba 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) | |||
56 | 56 | ||
57 | switch (c->x86_vendor) { | 57 | switch (c->x86_vendor) { |
58 | case X86_VENDOR_INTEL: | 58 | case X86_VENDOR_INTEL: |
59 | if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) | 59 | /* |
60 | * There is a known erratum on Pentium III and Core Solo | ||
61 | * and Core Duo CPUs. | ||
62 | * " Page with PAT set to WC while associated MTRR is UC | ||
63 | * may consolidate to UC " | ||
64 | * Because of this erratum, it is better to stick with | ||
65 | * setting WC in MTRR rather than using PAT on these CPUs. | ||
66 | * | ||
67 | * Enable PAT WC only on P4, Core 2 or later CPUs. | ||
68 | */ | ||
69 | if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) | ||
60 | return; | 70 | return; |
61 | break; | 71 | |
72 | pat_disable("PAT WC disabled due to known CPU erratum."); | ||
73 | return; | ||
74 | |||
62 | case X86_VENDOR_AMD: | 75 | case X86_VENDOR_AMD: |
63 | case X86_VENDOR_CENTAUR: | 76 | case X86_VENDOR_CENTAUR: |
64 | case X86_VENDOR_TRANSMETA: | 77 | case X86_VENDOR_TRANSMETA: |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cae9cabc3031..18514ed26104 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
31 | if (c->x86_power & (1<<8)) | 31 | if (c->x86_power & (1<<8)) |
32 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 32 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
33 | } | 33 | } |
34 | |||
35 | /* Set MTRR capability flag if appropriate */ | ||
36 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
37 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
38 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | ||
34 | } | 39 | } |
35 | 40 | ||
36 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 41 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
@@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
166 | mbytes); | 171 | mbytes); |
167 | } | 172 | } |
168 | 173 | ||
169 | /* Set MTRR capability flag if appropriate */ | ||
170 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
171 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
172 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | ||
173 | break; | 174 | break; |
174 | } | 175 | } |
175 | 176 | ||
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a55..a0534c04d38a 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -314,6 +314,16 @@ enum { | |||
314 | EAMD3D = 1<<20, | 314 | EAMD3D = 1<<20, |
315 | }; | 315 | }; |
316 | 316 | ||
317 | static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | ||
318 | { | ||
319 | switch (c->x86) { | ||
320 | case 5: | ||
321 | /* Emulate MTRRs using Centaur's MCR. */ | ||
322 | set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); | ||
323 | break; | ||
324 | } | ||
325 | } | ||
326 | |||
317 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | 327 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) |
318 | { | 328 | { |
319 | 329 | ||
@@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) | |||
462 | static struct cpu_dev centaur_cpu_dev __cpuinitdata = { | 472 | static struct cpu_dev centaur_cpu_dev __cpuinitdata = { |
463 | .c_vendor = "Centaur", | 473 | .c_vendor = "Centaur", |
464 | .c_ident = { "CentaurHauls" }, | 474 | .c_ident = { "CentaurHauls" }, |
475 | .c_early_init = early_init_centaur, | ||
465 | .c_init = init_centaur, | 476 | .c_init = init_centaur, |
466 | .c_size_cache = centaur_size_cache, | 477 | .c_size_cache = centaur_size_cache, |
467 | }; | 478 | }; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..4e456bd955bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/mtrr.h> | 13 | #include <asm/mtrr.h> |
14 | #include <asm/mce.h> | 14 | #include <asm/mce.h> |
15 | #include <asm/pat.h> | 15 | #include <asm/pat.h> |
16 | #include <asm/asm.h> | ||
16 | #ifdef CONFIG_X86_LOCAL_APIC | 17 | #ifdef CONFIG_X86_LOCAL_APIC |
17 | #include <asm/mpspec.h> | 18 | #include <asm/mpspec.h> |
18 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
@@ -334,11 +335,24 @@ static void __init early_cpu_detect(void) | |||
334 | 335 | ||
335 | get_cpu_vendor(c, 1); | 336 | get_cpu_vendor(c, 1); |
336 | 337 | ||
338 | early_get_cap(c); | ||
339 | |||
337 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | 340 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && |
338 | cpu_devs[c->x86_vendor]->c_early_init) | 341 | cpu_devs[c->x86_vendor]->c_early_init) |
339 | cpu_devs[c->x86_vendor]->c_early_init(c); | 342 | cpu_devs[c->x86_vendor]->c_early_init(c); |
343 | } | ||
340 | 344 | ||
341 | early_get_cap(c); | 345 | /* |
346 | * The NOPL instruction is supposed to exist on all CPUs with | ||
347 | * family >= 6; unfortunately, that's not true in practice because | ||
348 | * of early VIA chips and (more importantly) broken virtualizers that | ||
349 | * are not easy to detect. In the latter case it doesn't even *fail* | ||
350 | * reliably, so probing for it doesn't even work. Disable it completely | ||
351 | * unless we can find a reliable way to detect all the broken cases. | ||
352 | */ | ||
353 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | ||
354 | { | ||
355 | clear_cpu_cap(c, X86_FEATURE_NOPL); | ||
342 | } | 356 | } |
343 | 357 | ||
344 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | 358 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) |
@@ -395,8 +409,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
395 | } | 409 | } |
396 | 410 | ||
397 | init_scattered_cpuid_features(c); | 411 | init_scattered_cpuid_features(c); |
412 | detect_nopl(c); | ||
398 | } | 413 | } |
399 | |||
400 | } | 414 | } |
401 | 415 | ||
402 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | 416 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index cc6efe86249d..43f1aa51da5d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/mtrr.h> | 18 | #include <asm/mtrr.h> |
19 | #include <asm/mce.h> | 19 | #include <asm/mce.h> |
20 | #include <asm/pat.h> | 20 | #include <asm/pat.h> |
21 | #include <asm/asm.h> | ||
21 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
22 | #ifdef CONFIG_X86_LOCAL_APIC | 23 | #ifdef CONFIG_X86_LOCAL_APIC |
23 | #include <asm/mpspec.h> | 24 | #include <asm/mpspec.h> |
@@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void) | |||
215 | } | 216 | } |
216 | } | 217 | } |
217 | 218 | ||
219 | /* | ||
220 | * The NOPL instruction is supposed to exist on all CPUs with | ||
221 | * family >= 6, unfortunately, that's not true in practice because | ||
222 | * of early VIA chips and (more importantly) broken virtualizers that | ||
223 | * are not easy to detect. Hence, probe for it based on first | ||
224 | * principles. | ||
225 | * | ||
226 | * Note: no 64-bit chip is known to lack these, but put the code here | ||
227 | * for consistency with 32 bits, and to make it utterly trivial to | ||
228 | * diagnose the problem should it ever surface. | ||
229 | */ | ||
230 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | ||
231 | { | ||
232 | const u32 nopl_signature = 0x888c53b1; /* Random number */ | ||
233 | u32 has_nopl = nopl_signature; | ||
234 | |||
235 | clear_cpu_cap(c, X86_FEATURE_NOPL); | ||
236 | if (c->x86 >= 6) { | ||
237 | asm volatile("\n" | ||
238 | "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ | ||
239 | "2:\n" | ||
240 | " .section .fixup,\"ax\"\n" | ||
241 | "3: xor %0,%0\n" | ||
242 | " jmp 2b\n" | ||
243 | " .previous\n" | ||
244 | _ASM_EXTABLE(1b,3b) | ||
245 | : "+a" (has_nopl)); | ||
246 | |||
247 | if (has_nopl == nopl_signature) | ||
248 | set_cpu_cap(c, X86_FEATURE_NOPL); | ||
249 | } | ||
250 | } | ||
251 | |||
218 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); | 252 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); |
219 | 253 | ||
220 | void __init early_cpu_init(void) | 254 | void __init early_cpu_init(void) |
@@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
313 | c->x86_phys_bits = eax & 0xff; | 347 | c->x86_phys_bits = eax & 0xff; |
314 | } | 348 | } |
315 | 349 | ||
350 | detect_nopl(c); | ||
351 | |||
316 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | 352 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && |
317 | cpu_devs[c->x86_vendor]->c_early_init) | 353 | cpu_devs[c->x86_vendor]->c_early_init) |
318 | cpu_devs[c->x86_vendor]->c_early_init(c); | 354 | cpu_devs[c->x86_vendor]->c_early_init(c); |
@@ -394,6 +430,49 @@ static __init int setup_noclflush(char *arg) | |||
394 | } | 430 | } |
395 | __setup("noclflush", setup_noclflush); | 431 | __setup("noclflush", setup_noclflush); |
396 | 432 | ||
433 | struct msr_range { | ||
434 | unsigned min; | ||
435 | unsigned max; | ||
436 | }; | ||
437 | |||
438 | static struct msr_range msr_range_array[] __cpuinitdata = { | ||
439 | { 0x00000000, 0x00000418}, | ||
440 | { 0xc0000000, 0xc000040b}, | ||
441 | { 0xc0010000, 0xc0010142}, | ||
442 | { 0xc0011000, 0xc001103b}, | ||
443 | }; | ||
444 | |||
445 | static void __cpuinit print_cpu_msr(void) | ||
446 | { | ||
447 | unsigned index; | ||
448 | u64 val; | ||
449 | int i; | ||
450 | unsigned index_min, index_max; | ||
451 | |||
452 | for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { | ||
453 | index_min = msr_range_array[i].min; | ||
454 | index_max = msr_range_array[i].max; | ||
455 | for (index = index_min; index < index_max; index++) { | ||
456 | if (rdmsrl_amd_safe(index, &val)) | ||
457 | continue; | ||
458 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); | ||
459 | } | ||
460 | } | ||
461 | } | ||
462 | |||
463 | static int show_msr __cpuinitdata; | ||
464 | static __init int setup_show_msr(char *arg) | ||
465 | { | ||
466 | int num; | ||
467 | |||
468 | get_option(&arg, &num); | ||
469 | |||
470 | if (num > 0) | ||
471 | show_msr = num; | ||
472 | return 1; | ||
473 | } | ||
474 | __setup("show_msr=", setup_show_msr); | ||
475 | |||
397 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | 476 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) |
398 | { | 477 | { |
399 | if (c->x86_model_id[0]) | 478 | if (c->x86_model_id[0]) |
@@ -403,6 +482,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |||
403 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); | 482 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); |
404 | else | 483 | else |
405 | printk(KERN_CONT "\n"); | 484 | printk(KERN_CONT "\n"); |
485 | |||
486 | #ifdef CONFIG_SMP | ||
487 | if (c->cpu_index < show_msr) | ||
488 | print_cpu_msr(); | ||
489 | #else | ||
490 | if (show_msr) | ||
491 | print_cpu_msr(); | ||
492 | #endif | ||
406 | } | 493 | } |
407 | 494 | ||
408 | static __init int setup_disablecpuid(char *arg) | 495 | static __init int setup_disablecpuid(char *arg) |
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index f1685fb91fbd..b8e05ee4f736 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
171 | } | 171 | } |
172 | 172 | ||
173 | if (c->x86 != 0xF) { | 173 | if (c->x86 != 0xF) { |
174 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n"); | 174 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); |
175 | return 0; | 175 | return 0; |
176 | } | 176 | } |
177 | 177 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 4e7271999a74..84bb395038d8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -737,63 +737,44 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
737 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | 737 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI |
738 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) | 738 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) |
739 | { | 739 | { |
740 | if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE)) | 740 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) |
741 | return; | 741 | return; |
742 | 742 | ||
743 | data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK; | 743 | data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; |
744 | data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK; | 744 | data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; |
745 | data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; | 745 | data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; |
746 | data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; | 746 | data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; |
747 | data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK); | 747 | data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); |
748 | data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK; | 748 | data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; |
749 | } | ||
750 | |||
751 | |||
752 | static struct acpi_processor_performance *acpi_perf_data; | ||
753 | static int preregister_valid; | ||
754 | |||
755 | static int powernow_k8_cpu_preinit_acpi(void) | ||
756 | { | ||
757 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); | ||
758 | if (!acpi_perf_data) | ||
759 | return -ENODEV; | ||
760 | |||
761 | if (acpi_processor_preregister_performance(acpi_perf_data)) | ||
762 | return -ENODEV; | ||
763 | else | ||
764 | preregister_valid = 1; | ||
765 | return 0; | ||
766 | } | 749 | } |
767 | 750 | ||
768 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | 751 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) |
769 | { | 752 | { |
770 | struct cpufreq_frequency_table *powernow_table; | 753 | struct cpufreq_frequency_table *powernow_table; |
771 | int ret_val; | 754 | int ret_val; |
772 | int cpu = 0; | ||
773 | 755 | ||
774 | data->acpi_data = percpu_ptr(acpi_perf_data, cpu); | 756 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { |
775 | if (acpi_processor_register_performance(data->acpi_data, data->cpu)) { | ||
776 | dprintk("register performance failed: bad ACPI data\n"); | 757 | dprintk("register performance failed: bad ACPI data\n"); |
777 | return -EIO; | 758 | return -EIO; |
778 | } | 759 | } |
779 | 760 | ||
780 | /* verify the data contained in the ACPI structures */ | 761 | /* verify the data contained in the ACPI structures */ |
781 | if (data->acpi_data->state_count <= 1) { | 762 | if (data->acpi_data.state_count <= 1) { |
782 | dprintk("No ACPI P-States\n"); | 763 | dprintk("No ACPI P-States\n"); |
783 | goto err_out; | 764 | goto err_out; |
784 | } | 765 | } |
785 | 766 | ||
786 | if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || | 767 | if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || |
787 | (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { | 768 | (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { |
788 | dprintk("Invalid control/status registers (%x - %x)\n", | 769 | dprintk("Invalid control/status registers (%x - %x)\n", |
789 | data->acpi_data->control_register.space_id, | 770 | data->acpi_data.control_register.space_id, |
790 | data->acpi_data->status_register.space_id); | 771 | data->acpi_data.status_register.space_id); |
791 | goto err_out; | 772 | goto err_out; |
792 | } | 773 | } |
793 | 774 | ||
794 | /* fill in data->powernow_table */ | 775 | /* fill in data->powernow_table */ |
795 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | 776 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) |
796 | * (data->acpi_data->state_count + 1)), GFP_KERNEL); | 777 | * (data->acpi_data.state_count + 1)), GFP_KERNEL); |
797 | if (!powernow_table) { | 778 | if (!powernow_table) { |
798 | dprintk("powernow_table memory alloc failure\n"); | 779 | dprintk("powernow_table memory alloc failure\n"); |
799 | goto err_out; | 780 | goto err_out; |
@@ -806,12 +787,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
806 | if (ret_val) | 787 | if (ret_val) |
807 | goto err_out_mem; | 788 | goto err_out_mem; |
808 | 789 | ||
809 | powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END; | 790 | powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; |
810 | powernow_table[data->acpi_data->state_count].index = 0; | 791 | powernow_table[data->acpi_data.state_count].index = 0; |
811 | data->powernow_table = powernow_table; | 792 | data->powernow_table = powernow_table; |
812 | 793 | ||
813 | /* fill in data */ | 794 | /* fill in data */ |
814 | data->numps = data->acpi_data->state_count; | 795 | data->numps = data->acpi_data.state_count; |
815 | if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) | 796 | if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) |
816 | print_basics(data); | 797 | print_basics(data); |
817 | powernow_k8_acpi_pst_values(data, 0); | 798 | powernow_k8_acpi_pst_values(data, 0); |
@@ -819,31 +800,16 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
819 | /* notify BIOS that we exist */ | 800 | /* notify BIOS that we exist */ |
820 | acpi_processor_notify_smm(THIS_MODULE); | 801 | acpi_processor_notify_smm(THIS_MODULE); |
821 | 802 | ||
822 | /* determine affinity, from ACPI if available */ | ||
823 | if (preregister_valid) { | ||
824 | if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) || | ||
825 | (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY)) | ||
826 | data->starting_core_affinity = data->acpi_data->shared_cpu_map; | ||
827 | else | ||
828 | data->starting_core_affinity = cpumask_of_cpu(data->cpu); | ||
829 | } else { | ||
830 | /* best guess from family if not */ | ||
831 | if (cpu_family == CPU_HW_PSTATE) | ||
832 | data->starting_core_affinity = cpumask_of_cpu(data->cpu); | ||
833 | else | ||
834 | data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu); | ||
835 | } | ||
836 | |||
837 | return 0; | 803 | return 0; |
838 | 804 | ||
839 | err_out_mem: | 805 | err_out_mem: |
840 | kfree(powernow_table); | 806 | kfree(powernow_table); |
841 | 807 | ||
842 | err_out: | 808 | err_out: |
843 | acpi_processor_unregister_performance(data->acpi_data, data->cpu); | 809 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); |
844 | 810 | ||
845 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ | 811 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ |
846 | data->acpi_data->state_count = 0; | 812 | data->acpi_data.state_count = 0; |
847 | 813 | ||
848 | return -ENODEV; | 814 | return -ENODEV; |
849 | } | 815 | } |
@@ -855,10 +821,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf | |||
855 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); | 821 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); |
856 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | 822 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; |
857 | 823 | ||
858 | for (i = 0; i < data->acpi_data->state_count; i++) { | 824 | for (i = 0; i < data->acpi_data.state_count; i++) { |
859 | u32 index; | 825 | u32 index; |
860 | 826 | ||
861 | index = data->acpi_data->states[i].control & HW_PSTATE_MASK; | 827 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; |
862 | if (index > data->max_hw_pstate) { | 828 | if (index > data->max_hw_pstate) { |
863 | printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); | 829 | printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); |
864 | printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); | 830 | printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); |
@@ -874,7 +840,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf | |||
874 | 840 | ||
875 | powernow_table[i].index = index; | 841 | powernow_table[i].index = index; |
876 | 842 | ||
877 | powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000; | 843 | powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; |
878 | } | 844 | } |
879 | return 0; | 845 | return 0; |
880 | } | 846 | } |
@@ -883,16 +849,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
883 | { | 849 | { |
884 | int i; | 850 | int i; |
885 | int cntlofreq = 0; | 851 | int cntlofreq = 0; |
886 | for (i = 0; i < data->acpi_data->state_count; i++) { | 852 | for (i = 0; i < data->acpi_data.state_count; i++) { |
887 | u32 fid; | 853 | u32 fid; |
888 | u32 vid; | 854 | u32 vid; |
889 | 855 | ||
890 | if (data->exttype) { | 856 | if (data->exttype) { |
891 | fid = data->acpi_data->states[i].status & EXT_FID_MASK; | 857 | fid = data->acpi_data.states[i].status & EXT_FID_MASK; |
892 | vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK; | 858 | vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; |
893 | } else { | 859 | } else { |
894 | fid = data->acpi_data->states[i].control & FID_MASK; | 860 | fid = data->acpi_data.states[i].control & FID_MASK; |
895 | vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK; | 861 | vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; |
896 | } | 862 | } |
897 | 863 | ||
898 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); | 864 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); |
@@ -933,10 +899,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
933 | cntlofreq = i; | 899 | cntlofreq = i; |
934 | } | 900 | } |
935 | 901 | ||
936 | if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { | 902 | if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { |
937 | printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", | 903 | printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", |
938 | powernow_table[i].frequency, | 904 | powernow_table[i].frequency, |
939 | (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); | 905 | (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); |
940 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; | 906 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; |
941 | continue; | 907 | continue; |
942 | } | 908 | } |
@@ -946,12 +912,11 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
946 | 912 | ||
947 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | 913 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) |
948 | { | 914 | { |
949 | if (data->acpi_data->state_count) | 915 | if (data->acpi_data.state_count) |
950 | acpi_processor_unregister_performance(data->acpi_data, data->cpu); | 916 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); |
951 | } | 917 | } |
952 | 918 | ||
953 | #else | 919 | #else |
954 | static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; } | ||
955 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } | 920 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } |
956 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } | 921 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } |
957 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } | 922 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } |
@@ -1136,7 +1101,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol) | |||
1136 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | 1101 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) |
1137 | { | 1102 | { |
1138 | struct powernow_k8_data *data; | 1103 | struct powernow_k8_data *data; |
1139 | cpumask_t oldmask = CPU_MASK_ALL; | 1104 | cpumask_t oldmask; |
1140 | int rc; | 1105 | int rc; |
1141 | 1106 | ||
1142 | if (!cpu_online(pol->cpu)) | 1107 | if (!cpu_online(pol->cpu)) |
@@ -1209,7 +1174,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1209 | /* run on any CPU again */ | 1174 | /* run on any CPU again */ |
1210 | set_cpus_allowed_ptr(current, &oldmask); | 1175 | set_cpus_allowed_ptr(current, &oldmask); |
1211 | 1176 | ||
1212 | pol->cpus = data->starting_core_affinity; | 1177 | if (cpu_family == CPU_HW_PSTATE) |
1178 | pol->cpus = cpumask_of_cpu(pol->cpu); | ||
1179 | else | ||
1180 | pol->cpus = per_cpu(cpu_core_map, pol->cpu); | ||
1213 | data->available_cores = &(pol->cpus); | 1181 | data->available_cores = &(pol->cpus); |
1214 | 1182 | ||
1215 | /* Take a crude guess here. | 1183 | /* Take a crude guess here. |
@@ -1332,7 +1300,6 @@ static int __cpuinit powernowk8_init(void) | |||
1332 | } | 1300 | } |
1333 | 1301 | ||
1334 | if (supported_cpus == num_online_cpus()) { | 1302 | if (supported_cpus == num_online_cpus()) { |
1335 | powernow_k8_cpu_preinit_acpi(); | ||
1336 | printk(KERN_INFO PFX "Found %d %s " | 1303 | printk(KERN_INFO PFX "Found %d %s " |
1337 | "processors (%d cpu cores) (" VERSION ")\n", | 1304 | "processors (%d cpu cores) (" VERSION ")\n", |
1338 | num_online_nodes(), | 1305 | num_online_nodes(), |
@@ -1349,10 +1316,6 @@ static void __exit powernowk8_exit(void) | |||
1349 | dprintk("exit\n"); | 1316 | dprintk("exit\n"); |
1350 | 1317 | ||
1351 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1318 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
1352 | |||
1353 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | ||
1354 | free_percpu(acpi_perf_data); | ||
1355 | #endif | ||
1356 | } | 1319 | } |
1357 | 1320 | ||
1358 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); | 1321 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index a62612cd4be8..ab48cfed4d96 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -33,13 +33,12 @@ struct powernow_k8_data { | |||
33 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | 33 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI |
34 | /* the acpi table needs to be kept. it's only available if ACPI was | 34 | /* the acpi table needs to be kept. it's only available if ACPI was |
35 | * used to determine valid frequency/vid/fid states */ | 35 | * used to determine valid frequency/vid/fid states */ |
36 | struct acpi_processor_performance *acpi_data; | 36 | struct acpi_processor_performance acpi_data; |
37 | #endif | 37 | #endif |
38 | /* we need to keep track of associated cores, but let cpufreq | 38 | /* we need to keep track of associated cores, but let cpufreq |
39 | * handle hotplug events - so just point at cpufreq pol->cpus | 39 | * handle hotplug events - so just point at cpufreq pol->cpus |
40 | * structure */ | 40 | * structure */ |
41 | cpumask_t *available_cores; | 41 | cpumask_t *available_cores; |
42 | cpumask_t starting_core_affinity; | ||
43 | }; | 42 | }; |
44 | 43 | ||
45 | 44 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 15e13c01cc36..3b5f06423e77 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #include <asm/cpufeature.h> | 26 | #include <asm/cpufeature.h> |
27 | 27 | ||
28 | #define PFX "speedstep-centrino: " | 28 | #define PFX "speedstep-centrino: " |
29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" | 29 | #define MAINTAINER "cpufreq@vger.kernel.org" |
30 | 30 | ||
31 | #define dprintk(msg...) \ | 31 | #define dprintk(msg...) \ |
32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | 32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06a..898a5a2002ed 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -15,13 +15,11 @@ | |||
15 | /* | 15 | /* |
16 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU | 16 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU |
17 | */ | 17 | */ |
18 | static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | 18 | static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) |
19 | { | 19 | { |
20 | unsigned char ccr2, ccr3; | 20 | unsigned char ccr2, ccr3; |
21 | unsigned long flags; | ||
22 | 21 | ||
23 | /* we test for DEVID by checking whether CCR3 is writable */ | 22 | /* we test for DEVID by checking whether CCR3 is writable */ |
24 | local_irq_save(flags); | ||
25 | ccr3 = getCx86(CX86_CCR3); | 23 | ccr3 = getCx86(CX86_CCR3); |
26 | setCx86(CX86_CCR3, ccr3 ^ 0x80); | 24 | setCx86(CX86_CCR3, ccr3 ^ 0x80); |
27 | getCx86(0xc0); /* dummy to change bus */ | 25 | getCx86(0xc0); /* dummy to change bus */ |
@@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | |||
44 | *dir0 = getCx86(CX86_DIR0); | 42 | *dir0 = getCx86(CX86_DIR0); |
45 | *dir1 = getCx86(CX86_DIR1); | 43 | *dir1 = getCx86(CX86_DIR1); |
46 | } | 44 | } |
47 | local_irq_restore(flags); | ||
48 | } | 45 | } |
49 | 46 | ||
47 | static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | ||
48 | { | ||
49 | unsigned long flags; | ||
50 | |||
51 | local_irq_save(flags); | ||
52 | __do_cyrix_devid(dir0, dir1); | ||
53 | local_irq_restore(flags); | ||
54 | } | ||
50 | /* | 55 | /* |
51 | * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in | 56 | * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in |
52 | * order to identify the Cyrix CPU model after we're out of setup.c | 57 | * order to identify the Cyrix CPU model after we're out of setup.c |
@@ -134,23 +139,6 @@ static void __cpuinit set_cx86_memwb(void) | |||
134 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); | 139 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); |
135 | } | 140 | } |
136 | 141 | ||
137 | static void __cpuinit set_cx86_inc(void) | ||
138 | { | ||
139 | unsigned char ccr3; | ||
140 | |||
141 | printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); | ||
142 | |||
143 | ccr3 = getCx86(CX86_CCR3); | ||
144 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | ||
145 | /* PCR1 -- Performance Control */ | ||
146 | /* Incrementor on, whatever that is */ | ||
147 | setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); | ||
148 | /* PCR0 -- Performance Control */ | ||
149 | /* Incrementor Margin 10 */ | ||
150 | setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); | ||
151 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
152 | } | ||
153 | |||
154 | /* | 142 | /* |
155 | * Configure later MediaGX and/or Geode processor. | 143 | * Configure later MediaGX and/or Geode processor. |
156 | */ | 144 | */ |
@@ -174,11 +162,28 @@ static void __cpuinit geode_configure(void) | |||
174 | 162 | ||
175 | set_cx86_memwb(); | 163 | set_cx86_memwb(); |
176 | set_cx86_reorder(); | 164 | set_cx86_reorder(); |
177 | set_cx86_inc(); | ||
178 | 165 | ||
179 | local_irq_restore(flags); | 166 | local_irq_restore(flags); |
180 | } | 167 | } |
181 | 168 | ||
169 | static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) | ||
170 | { | ||
171 | unsigned char dir0, dir0_msn, dir1 = 0; | ||
172 | |||
173 | __do_cyrix_devid(&dir0, &dir1); | ||
174 | dir0_msn = dir0 >> 4; /* identifies CPU "family" */ | ||
175 | |||
176 | switch (dir0_msn) { | ||
177 | case 3: /* 6x86/6x86L */ | ||
178 | /* Emulate MTRRs using Cyrix's ARRs. */ | ||
179 | set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); | ||
180 | break; | ||
181 | case 5: /* 6x86MX/M II */ | ||
182 | /* Emulate MTRRs using Cyrix's ARRs. */ | ||
183 | set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); | ||
184 | break; | ||
185 | } | ||
186 | } | ||
182 | 187 | ||
183 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | 188 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) |
184 | { | 189 | { |
@@ -434,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) | |||
434 | static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { | 439 | static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { |
435 | .c_vendor = "Cyrix", | 440 | .c_vendor = "Cyrix", |
436 | .c_ident = { "CyrixInstead" }, | 441 | .c_ident = { "CyrixInstead" }, |
442 | .c_early_init = early_init_cyrix, | ||
437 | .c_init = init_cyrix, | 443 | .c_init = init_cyrix, |
438 | .c_identify = cyrix_identify, | 444 | .c_identify = cyrix_identify, |
439 | }; | 445 | }; |
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index 0bf4d37a0483..b96b69545fbf 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c | |||
@@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = { | |||
39 | NULL, NULL, NULL, NULL, | 39 | NULL, NULL, NULL, NULL, |
40 | "constant_tsc", "up", NULL, "arch_perfmon", | 40 | "constant_tsc", "up", NULL, "arch_perfmon", |
41 | "pebs", "bts", NULL, NULL, | 41 | "pebs", "bts", NULL, NULL, |
42 | "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 42 | "rep_good", NULL, NULL, NULL, |
43 | "nopl", NULL, NULL, NULL, | ||
43 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 44 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
44 | 45 | ||
45 | /* Intel-defined (#2) */ | 46 | /* Intel-defined (#2) */ |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b75f2569b8f8..f113ef4595f6 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
222 | set_cpu_cap(c, X86_FEATURE_BTS); | 222 | set_cpu_cap(c, X86_FEATURE_BTS); |
223 | if (!(l1 & (1<<12))) | 223 | if (!(l1 & (1<<12))) |
224 | set_cpu_cap(c, X86_FEATURE_PEBS); | 224 | set_cpu_cap(c, X86_FEATURE_PEBS); |
225 | ds_init_intel(c); | ||
225 | } | 226 | } |
226 | 227 | ||
227 | if (cpu_has_bts) | 228 | if (cpu_has_bts) |
228 | ds_init_intel(c); | 229 | ptrace_bts_init_intel(c); |
229 | 230 | ||
230 | /* | 231 | /* |
231 | * See if we have a good local APIC by checking for buggy Pentia, | 232 | * See if we have a good local APIC by checking for buggy Pentia, |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 65a339678ece..726a5fcdf341 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
@@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = { | |||
759 | }; | 759 | }; |
760 | 760 | ||
761 | DEFINE_PER_CPU(struct sys_device, device_mce); | 761 | DEFINE_PER_CPU(struct sys_device, device_mce); |
762 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; | ||
762 | 763 | ||
763 | /* Why are there no generic functions for this? */ | 764 | /* Why are there no generic functions for this? */ |
764 | #define ACCESSOR(name, var, start) \ | 765 | #define ACCESSOR(name, var, start) \ |
@@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | |||
883 | case CPU_ONLINE: | 884 | case CPU_ONLINE: |
884 | case CPU_ONLINE_FROZEN: | 885 | case CPU_ONLINE_FROZEN: |
885 | mce_create_device(cpu); | 886 | mce_create_device(cpu); |
887 | if (threshold_cpu_callback) | ||
888 | threshold_cpu_callback(action, cpu); | ||
886 | break; | 889 | break; |
887 | case CPU_DEAD: | 890 | case CPU_DEAD: |
888 | case CPU_DEAD_FROZEN: | 891 | case CPU_DEAD_FROZEN: |
892 | if (threshold_cpu_callback) | ||
893 | threshold_cpu_callback(action, cpu); | ||
889 | mce_remove_device(cpu); | 894 | mce_remove_device(cpu); |
890 | break; | 895 | break; |
891 | } | 896 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 88736cadbaa6..5eb390a4b2e9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
@@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
628 | deallocate_threshold_block(cpu, bank); | 628 | deallocate_threshold_block(cpu, bank); |
629 | 629 | ||
630 | free_out: | 630 | free_out: |
631 | kobject_del(b->kobj); | ||
631 | kobject_put(b->kobj); | 632 | kobject_put(b->kobj); |
632 | kfree(b); | 633 | kfree(b); |
633 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 634 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
@@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu) | |||
645 | } | 646 | } |
646 | 647 | ||
647 | /* get notified when a cpu comes on/off */ | 648 | /* get notified when a cpu comes on/off */ |
648 | static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, | 649 | static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, |
649 | unsigned long action, void *hcpu) | 650 | unsigned int cpu) |
650 | { | 651 | { |
651 | /* cpu was unsigned int to begin with */ | ||
652 | unsigned int cpu = (unsigned long)hcpu; | ||
653 | |||
654 | if (cpu >= NR_CPUS) | 652 | if (cpu >= NR_CPUS) |
655 | goto out; | 653 | return; |
656 | 654 | ||
657 | switch (action) { | 655 | switch (action) { |
658 | case CPU_ONLINE: | 656 | case CPU_ONLINE: |
@@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, | |||
666 | default: | 664 | default: |
667 | break; | 665 | break; |
668 | } | 666 | } |
669 | out: | ||
670 | return NOTIFY_OK; | ||
671 | } | 667 | } |
672 | 668 | ||
673 | static struct notifier_block threshold_cpu_notifier __cpuinitdata = { | ||
674 | .notifier_call = threshold_cpu_callback, | ||
675 | }; | ||
676 | |||
677 | static __init int threshold_init_device(void) | 669 | static __init int threshold_init_device(void) |
678 | { | 670 | { |
679 | unsigned lcpu = 0; | 671 | unsigned lcpu = 0; |
@@ -684,7 +676,7 @@ static __init int threshold_init_device(void) | |||
684 | if (err) | 676 | if (err) |
685 | return err; | 677 | return err; |
686 | } | 678 | } |
687 | register_hotcpu_notifier(&threshold_cpu_notifier); | 679 | threshold_cpu_callback = amd_64_threshold_cpu_callback; |
688 | return 0; | 680 | return 0; |
689 | } | 681 | } |
690 | 682 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 509bd3d9eacd..4e8d77f01eeb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
379 | unsigned long *size, mtrr_type *type) | 379 | unsigned long *size, mtrr_type *type) |
380 | { | 380 | { |
381 | unsigned int mask_lo, mask_hi, base_lo, base_hi; | 381 | unsigned int mask_lo, mask_hi, base_lo, base_hi; |
382 | unsigned int tmp, hi; | ||
382 | 383 | ||
383 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 384 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
384 | if ((mask_lo & 0x800) == 0) { | 385 | if ((mask_lo & 0x800) == 0) { |
@@ -392,8 +393,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
392 | rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); | 393 | rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); |
393 | 394 | ||
394 | /* Work out the shifted address mask. */ | 395 | /* Work out the shifted address mask. */ |
395 | mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) | 396 | tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; |
396 | | mask_lo >> PAGE_SHIFT; | 397 | mask_lo = size_or_mask | tmp; |
398 | /* Expand tmp with high bits to all 1s*/ | ||
399 | hi = fls(tmp); | ||
400 | if (hi > 0) { | ||
401 | tmp |= ~((1<<(hi - 1)) - 1); | ||
402 | |||
403 | if (tmp != mask_lo) { | ||
404 | WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); | ||
405 | mask_lo = tmp; | ||
406 | } | ||
407 | } | ||
397 | 408 | ||
398 | /* This works correctly if size is a power of two, i.e. a | 409 | /* This works correctly if size is a power of two, i.e. a |
399 | contiguous range. */ | 410 | contiguous range. */ |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 84c480bb3715..4c4214690dd1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) | |||
405 | } | 405 | } |
406 | /* RED-PEN: base can be > 32bit */ | 406 | /* RED-PEN: base can be > 32bit */ |
407 | len += seq_printf(seq, | 407 | len += seq_printf(seq, |
408 | "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", | 408 | "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", |
409 | i, base, base >> (20 - PAGE_SHIFT), size, factor, | 409 | i, base, base >> (20 - PAGE_SHIFT), size, factor, |
410 | mtrr_attrib_to_str(type), mtrr_usage_table[i]); | 410 | mtrr_usage_table[i], mtrr_attrib_to_str(type)); |
411 | } | 411 | } |
412 | } | 412 | } |
413 | return 0; | 413 | return 0; |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b5ade28ca8f8..c78c04821ea1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
759 | /* take out UC ranges */ | 759 | /* take out UC ranges */ |
760 | for (i = 0; i < num_var_ranges; i++) { | 760 | for (i = 0; i < num_var_ranges; i++) { |
761 | type = range_state[i].type; | 761 | type = range_state[i].type; |
762 | if (type != MTRR_TYPE_UNCACHABLE) | 762 | if (type != MTRR_TYPE_UNCACHABLE && |
763 | type != MTRR_TYPE_WRPROT) | ||
763 | continue; | 764 | continue; |
764 | size = range_state[i].size_pfn; | 765 | size = range_state[i].size_pfn; |
765 | if (!size) | 766 | if (!size) |
@@ -834,7 +835,14 @@ static int __init enable_mtrr_cleanup_setup(char *str) | |||
834 | enable_mtrr_cleanup = 1; | 835 | enable_mtrr_cleanup = 1; |
835 | return 0; | 836 | return 0; |
836 | } | 837 | } |
837 | early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); | 838 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); |
839 | |||
840 | static int __init mtrr_cleanup_debug_setup(char *str) | ||
841 | { | ||
842 | debug_print = 1; | ||
843 | return 0; | ||
844 | } | ||
845 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); | ||
838 | 846 | ||
839 | struct var_mtrr_state { | 847 | struct var_mtrr_state { |
840 | unsigned long range_startk; | 848 | unsigned long range_startk; |
@@ -898,6 +906,27 @@ set_var_mtrr_all(unsigned int address_bits) | |||
898 | } | 906 | } |
899 | } | 907 | } |
900 | 908 | ||
909 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) | ||
910 | { | ||
911 | char factor; | ||
912 | unsigned long base = sizek; | ||
913 | |||
914 | if (base & ((1<<10) - 1)) { | ||
915 | /* not MB alignment */ | ||
916 | factor = 'K'; | ||
917 | } else if (base & ((1<<20) - 1)){ | ||
918 | factor = 'M'; | ||
919 | base >>= 10; | ||
920 | } else { | ||
921 | factor = 'G'; | ||
922 | base >>= 20; | ||
923 | } | ||
924 | |||
925 | *factorp = factor; | ||
926 | |||
927 | return base; | ||
928 | } | ||
929 | |||
901 | static unsigned int __init | 930 | static unsigned int __init |
902 | range_to_mtrr(unsigned int reg, unsigned long range_startk, | 931 | range_to_mtrr(unsigned int reg, unsigned long range_startk, |
903 | unsigned long range_sizek, unsigned char type) | 932 | unsigned long range_sizek, unsigned char type) |
@@ -919,13 +948,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
919 | align = max_align; | 948 | align = max_align; |
920 | 949 | ||
921 | sizek = 1 << align; | 950 | sizek = 1 << align; |
922 | if (debug_print) | 951 | if (debug_print) { |
952 | char start_factor = 'K', size_factor = 'K'; | ||
953 | unsigned long start_base, size_base; | ||
954 | |||
955 | start_base = to_size_factor(range_startk, &start_factor), | ||
956 | size_base = to_size_factor(sizek, &size_factor), | ||
957 | |||
923 | printk(KERN_DEBUG "Setting variable MTRR %d, " | 958 | printk(KERN_DEBUG "Setting variable MTRR %d, " |
924 | "base: %ldMB, range: %ldMB, type %s\n", | 959 | "base: %ld%cB, range: %ld%cB, type %s\n", |
925 | reg, range_startk >> 10, sizek >> 10, | 960 | reg, start_base, start_factor, |
961 | size_base, size_factor, | ||
926 | (type == MTRR_TYPE_UNCACHABLE)?"UC": | 962 | (type == MTRR_TYPE_UNCACHABLE)?"UC": |
927 | ((type == MTRR_TYPE_WRBACK)?"WB":"Other") | 963 | ((type == MTRR_TYPE_WRBACK)?"WB":"Other") |
928 | ); | 964 | ); |
965 | } | ||
929 | save_var_mtrr(reg++, range_startk, sizek, type); | 966 | save_var_mtrr(reg++, range_startk, sizek, type); |
930 | range_startk += sizek; | 967 | range_startk += sizek; |
931 | range_sizek -= sizek; | 968 | range_sizek -= sizek; |
@@ -970,6 +1007,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
970 | /* try to append some small hole */ | 1007 | /* try to append some small hole */ |
971 | range0_basek = state->range_startk; | 1008 | range0_basek = state->range_startk; |
972 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | 1009 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); |
1010 | |||
1011 | /* no increase */ | ||
973 | if (range0_sizek == state->range_sizek) { | 1012 | if (range0_sizek == state->range_sizek) { |
974 | if (debug_print) | 1013 | if (debug_print) |
975 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | 1014 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", |
@@ -980,13 +1019,40 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
980 | return 0; | 1019 | return 0; |
981 | } | 1020 | } |
982 | 1021 | ||
983 | range0_sizek -= chunk_sizek; | 1022 | /* only cut back, when it is not the last */ |
984 | if (range0_sizek && sizek) { | 1023 | if (sizek) { |
985 | while (range0_basek + range0_sizek > (basek + sizek)) { | 1024 | while (range0_basek + range0_sizek > (basek + sizek)) { |
986 | range0_sizek -= chunk_sizek; | 1025 | if (range0_sizek >= chunk_sizek) |
987 | if (!range0_sizek) | 1026 | range0_sizek -= chunk_sizek; |
988 | break; | 1027 | else |
989 | } | 1028 | range0_sizek = 0; |
1029 | |||
1030 | if (!range0_sizek) | ||
1031 | break; | ||
1032 | } | ||
1033 | } | ||
1034 | |||
1035 | second_try: | ||
1036 | range_basek = range0_basek + range0_sizek; | ||
1037 | |||
1038 | /* one hole in the middle */ | ||
1039 | if (range_basek > basek && range_basek <= (basek + sizek)) | ||
1040 | second_sizek = range_basek - basek; | ||
1041 | |||
1042 | if (range0_sizek > state->range_sizek) { | ||
1043 | |||
1044 | /* one hole in middle or at end */ | ||
1045 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; | ||
1046 | |||
1047 | /* hole size should be less than half of range0 size */ | ||
1048 | if (hole_sizek >= (range0_sizek >> 1) && | ||
1049 | range0_sizek >= chunk_sizek) { | ||
1050 | range0_sizek -= chunk_sizek; | ||
1051 | second_sizek = 0; | ||
1052 | hole_sizek = 0; | ||
1053 | |||
1054 | goto second_try; | ||
1055 | } | ||
990 | } | 1056 | } |
991 | 1057 | ||
992 | if (range0_sizek) { | 1058 | if (range0_sizek) { |
@@ -996,50 +1062,28 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
996 | (range0_basek + range0_sizek)<<10); | 1062 | (range0_basek + range0_sizek)<<10); |
997 | state->reg = range_to_mtrr(state->reg, range0_basek, | 1063 | state->reg = range_to_mtrr(state->reg, range0_basek, |
998 | range0_sizek, MTRR_TYPE_WRBACK); | 1064 | range0_sizek, MTRR_TYPE_WRBACK); |
999 | |||
1000 | } | ||
1001 | |||
1002 | range_basek = range0_basek + range0_sizek; | ||
1003 | range_sizek = chunk_sizek; | ||
1004 | |||
1005 | if (range_basek + range_sizek > basek && | ||
1006 | range_basek + range_sizek <= (basek + sizek)) { | ||
1007 | /* one hole */ | ||
1008 | second_basek = basek; | ||
1009 | second_sizek = range_basek + range_sizek - basek; | ||
1010 | } | 1065 | } |
1011 | 1066 | ||
1012 | /* if last piece, only could one hole near end */ | 1067 | if (range0_sizek < state->range_sizek) { |
1013 | if ((second_basek || !basek) && | 1068 | /* need to handle left over */ |
1014 | range_sizek - (state->range_sizek - range0_sizek) - second_sizek < | ||
1015 | (chunk_sizek >> 1)) { | ||
1016 | /* | ||
1017 | * one hole in middle (second_sizek is 0) or at end | ||
1018 | * (second_sizek is 0 ) | ||
1019 | */ | ||
1020 | hole_sizek = range_sizek - (state->range_sizek - range0_sizek) | ||
1021 | - second_sizek; | ||
1022 | hole_basek = range_basek + range_sizek - hole_sizek | ||
1023 | - second_sizek; | ||
1024 | } else { | ||
1025 | /* fallback for big hole, or several holes */ | ||
1026 | range_sizek = state->range_sizek - range0_sizek; | 1069 | range_sizek = state->range_sizek - range0_sizek; |
1027 | second_basek = 0; | 1070 | |
1028 | second_sizek = 0; | 1071 | if (debug_print) |
1072 | printk(KERN_DEBUG "range: %016lx - %016lx\n", | ||
1073 | range_basek<<10, | ||
1074 | (range_basek + range_sizek)<<10); | ||
1075 | state->reg = range_to_mtrr(state->reg, range_basek, | ||
1076 | range_sizek, MTRR_TYPE_WRBACK); | ||
1029 | } | 1077 | } |
1030 | 1078 | ||
1031 | if (debug_print) | ||
1032 | printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, | ||
1033 | (range_basek + range_sizek)<<10); | ||
1034 | state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, | ||
1035 | MTRR_TYPE_WRBACK); | ||
1036 | if (hole_sizek) { | 1079 | if (hole_sizek) { |
1080 | hole_basek = range_basek - hole_sizek - second_sizek; | ||
1037 | if (debug_print) | 1081 | if (debug_print) |
1038 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | 1082 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", |
1039 | hole_basek<<10, (hole_basek + hole_sizek)<<10); | 1083 | hole_basek<<10, |
1040 | state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, | 1084 | (hole_basek + hole_sizek)<<10); |
1041 | MTRR_TYPE_UNCACHABLE); | 1085 | state->reg = range_to_mtrr(state->reg, hole_basek, |
1042 | 1086 | hole_sizek, MTRR_TYPE_UNCACHABLE); | |
1043 | } | 1087 | } |
1044 | 1088 | ||
1045 | return second_sizek; | 1089 | return second_sizek; |
@@ -1154,11 +1198,11 @@ struct mtrr_cleanup_result { | |||
1154 | }; | 1198 | }; |
1155 | 1199 | ||
1156 | /* | 1200 | /* |
1157 | * gran_size: 1M, 2M, ..., 2G | 1201 | * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G |
1158 | * chunk size: gran_size, ..., 4G | 1202 | * chunk size: gran_size, ..., 2G |
1159 | * so we need (2+13)*6 | 1203 | * so we need (1+16)*8 |
1160 | */ | 1204 | */ |
1161 | #define NUM_RESULT 90 | 1205 | #define NUM_RESULT 136 |
1162 | #define PSHIFT (PAGE_SHIFT - 10) | 1206 | #define PSHIFT (PAGE_SHIFT - 10) |
1163 | 1207 | ||
1164 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | 1208 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; |
@@ -1168,13 +1212,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | |||
1168 | static int __init mtrr_cleanup(unsigned address_bits) | 1212 | static int __init mtrr_cleanup(unsigned address_bits) |
1169 | { | 1213 | { |
1170 | unsigned long extra_remove_base, extra_remove_size; | 1214 | unsigned long extra_remove_base, extra_remove_size; |
1171 | unsigned long i, base, size, def, dummy; | 1215 | unsigned long base, size, def, dummy; |
1172 | mtrr_type type; | 1216 | mtrr_type type; |
1173 | int nr_range, nr_range_new; | 1217 | int nr_range, nr_range_new; |
1174 | u64 chunk_size, gran_size; | 1218 | u64 chunk_size, gran_size; |
1175 | unsigned long range_sums, range_sums_new; | 1219 | unsigned long range_sums, range_sums_new; |
1176 | int index_good; | 1220 | int index_good; |
1177 | int num_reg_good; | 1221 | int num_reg_good; |
1222 | int i; | ||
1178 | 1223 | ||
1179 | /* extra one for all 0 */ | 1224 | /* extra one for all 0 */ |
1180 | int num[MTRR_NUM_TYPES + 1]; | 1225 | int num[MTRR_NUM_TYPES + 1]; |
@@ -1204,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1204 | continue; | 1249 | continue; |
1205 | if (!size) | 1250 | if (!size) |
1206 | type = MTRR_NUM_TYPES; | 1251 | type = MTRR_NUM_TYPES; |
1252 | if (type == MTRR_TYPE_WRPROT) | ||
1253 | type = MTRR_TYPE_UNCACHABLE; | ||
1207 | num[type]++; | 1254 | num[type]++; |
1208 | } | 1255 | } |
1209 | 1256 | ||
@@ -1216,23 +1263,57 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1216 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1263 | num_var_ranges - num[MTRR_NUM_TYPES]) |
1217 | return 0; | 1264 | return 0; |
1218 | 1265 | ||
1266 | /* print original var MTRRs at first, for debugging: */ | ||
1267 | printk(KERN_DEBUG "original variable MTRRs\n"); | ||
1268 | for (i = 0; i < num_var_ranges; i++) { | ||
1269 | char start_factor = 'K', size_factor = 'K'; | ||
1270 | unsigned long start_base, size_base; | ||
1271 | |||
1272 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | ||
1273 | if (!size_base) | ||
1274 | continue; | ||
1275 | |||
1276 | size_base = to_size_factor(size_base, &size_factor), | ||
1277 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | ||
1278 | start_base = to_size_factor(start_base, &start_factor), | ||
1279 | type = range_state[i].type; | ||
1280 | |||
1281 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | ||
1282 | i, start_base, start_factor, | ||
1283 | size_base, size_factor, | ||
1284 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
1285 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
1286 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
1287 | ); | ||
1288 | } | ||
1289 | |||
1219 | memset(range, 0, sizeof(range)); | 1290 | memset(range, 0, sizeof(range)); |
1220 | extra_remove_size = 0; | 1291 | extra_remove_size = 0; |
1221 | if (mtrr_tom2) { | 1292 | extra_remove_base = 1 << (32 - PAGE_SHIFT); |
1222 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | 1293 | if (mtrr_tom2) |
1223 | extra_remove_size = | 1294 | extra_remove_size = |
1224 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | 1295 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; |
1225 | } | ||
1226 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | 1296 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, |
1227 | extra_remove_size); | 1297 | extra_remove_size); |
1298 | /* | ||
1299 | * [0, 1M) should always be coverred by var mtrr with WB | ||
1300 | * and fixed mtrrs should take effective before var mtrr for it | ||
1301 | */ | ||
1302 | nr_range = add_range_with_merge(range, nr_range, 0, | ||
1303 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | ||
1304 | /* sort the ranges */ | ||
1305 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
1306 | |||
1228 | range_sums = sum_ranges(range, nr_range); | 1307 | range_sums = sum_ranges(range, nr_range); |
1229 | printk(KERN_INFO "total RAM coverred: %ldM\n", | 1308 | printk(KERN_INFO "total RAM coverred: %ldM\n", |
1230 | range_sums >> (20 - PAGE_SHIFT)); | 1309 | range_sums >> (20 - PAGE_SHIFT)); |
1231 | 1310 | ||
1232 | if (mtrr_chunk_size && mtrr_gran_size) { | 1311 | if (mtrr_chunk_size && mtrr_gran_size) { |
1233 | int num_reg; | 1312 | int num_reg; |
1313 | char gran_factor, chunk_factor, lose_factor; | ||
1314 | unsigned long gran_base, chunk_base, lose_base; | ||
1234 | 1315 | ||
1235 | debug_print = 1; | 1316 | debug_print++; |
1236 | /* convert ranges to var ranges state */ | 1317 | /* convert ranges to var ranges state */ |
1237 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | 1318 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, |
1238 | mtrr_gran_size); | 1319 | mtrr_gran_size); |
@@ -1256,34 +1337,48 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1256 | result[i].lose_cover_sizek = | 1337 | result[i].lose_cover_sizek = |
1257 | (range_sums - range_sums_new) << PSHIFT; | 1338 | (range_sums - range_sums_new) << PSHIFT; |
1258 | 1339 | ||
1259 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | 1340 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
1260 | result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, | 1341 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
1261 | result[i].chunk_sizek >> 10); | 1342 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
1262 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", | 1343 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
1344 | result[i].bad?"*BAD*":" ", | ||
1345 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1346 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1263 | result[i].num_reg, result[i].bad?"-":"", | 1347 | result[i].num_reg, result[i].bad?"-":"", |
1264 | result[i].lose_cover_sizek >> 10); | 1348 | lose_base, lose_factor); |
1265 | if (!result[i].bad) { | 1349 | if (!result[i].bad) { |
1266 | set_var_mtrr_all(address_bits); | 1350 | set_var_mtrr_all(address_bits); |
1267 | return 1; | 1351 | return 1; |
1268 | } | 1352 | } |
1269 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | 1353 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " |
1270 | "will find optimal one\n"); | 1354 | "will find optimal one\n"); |
1271 | debug_print = 0; | 1355 | debug_print--; |
1272 | memset(result, 0, sizeof(result[0])); | 1356 | memset(result, 0, sizeof(result[0])); |
1273 | } | 1357 | } |
1274 | 1358 | ||
1275 | i = 0; | 1359 | i = 0; |
1276 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | 1360 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); |
1277 | memset(result, 0, sizeof(result)); | 1361 | memset(result, 0, sizeof(result)); |
1278 | for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { | 1362 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { |
1279 | for (chunk_size = gran_size; chunk_size < (1ULL<<33); | 1363 | char gran_factor; |
1364 | unsigned long gran_base; | ||
1365 | |||
1366 | if (debug_print) | ||
1367 | gran_base = to_size_factor(gran_size >> 10, &gran_factor); | ||
1368 | |||
1369 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | ||
1280 | chunk_size <<= 1) { | 1370 | chunk_size <<= 1) { |
1281 | int num_reg; | 1371 | int num_reg; |
1282 | 1372 | ||
1283 | if (debug_print) | 1373 | if (debug_print) { |
1284 | printk(KERN_INFO | 1374 | char chunk_factor; |
1285 | "\ngran_size: %lldM chunk_size_size: %lldM\n", | 1375 | unsigned long chunk_base; |
1286 | gran_size >> 20, chunk_size >> 20); | 1376 | |
1377 | chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), | ||
1378 | printk(KERN_INFO "\n"); | ||
1379 | printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", | ||
1380 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1381 | } | ||
1287 | if (i >= NUM_RESULT) | 1382 | if (i >= NUM_RESULT) |
1288 | continue; | 1383 | continue; |
1289 | 1384 | ||
@@ -1326,12 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1326 | 1421 | ||
1327 | /* print out all */ | 1422 | /* print out all */ |
1328 | for (i = 0; i < NUM_RESULT; i++) { | 1423 | for (i = 0; i < NUM_RESULT; i++) { |
1329 | printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", | 1424 | char gran_factor, chunk_factor, lose_factor; |
1330 | result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, | 1425 | unsigned long gran_base, chunk_base, lose_base; |
1331 | result[i].chunk_sizek >> 10); | 1426 | |
1332 | printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", | 1427 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
1333 | result[i].num_reg, result[i].bad?"-":"", | 1428 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
1334 | result[i].lose_cover_sizek >> 10); | 1429 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
1430 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1431 | result[i].bad?"*BAD*":" ", | ||
1432 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1433 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1434 | result[i].num_reg, result[i].bad?"-":"", | ||
1435 | lose_base, lose_factor); | ||
1335 | } | 1436 | } |
1336 | 1437 | ||
1337 | /* try to find the optimal index */ | 1438 | /* try to find the optimal index */ |
@@ -1339,10 +1440,8 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1339 | nr_mtrr_spare_reg = num_var_ranges - 1; | 1440 | nr_mtrr_spare_reg = num_var_ranges - 1; |
1340 | num_reg_good = -1; | 1441 | num_reg_good = -1; |
1341 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | 1442 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { |
1342 | if (!min_loss_pfn[i]) { | 1443 | if (!min_loss_pfn[i]) |
1343 | num_reg_good = i; | 1444 | num_reg_good = i; |
1344 | break; | ||
1345 | } | ||
1346 | } | 1445 | } |
1347 | 1446 | ||
1348 | index_good = -1; | 1447 | index_good = -1; |
@@ -1358,21 +1457,26 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1358 | } | 1457 | } |
1359 | 1458 | ||
1360 | if (index_good != -1) { | 1459 | if (index_good != -1) { |
1460 | char gran_factor, chunk_factor, lose_factor; | ||
1461 | unsigned long gran_base, chunk_base, lose_base; | ||
1462 | |||
1361 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | 1463 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); |
1362 | i = index_good; | 1464 | i = index_good; |
1363 | printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", | 1465 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
1364 | result[i].gran_sizek >> 10, | 1466 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
1365 | result[i].chunk_sizek >> 10); | 1467 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
1366 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", | 1468 | printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", |
1367 | result[i].num_reg, | 1469 | gran_base, gran_factor, chunk_base, chunk_factor); |
1368 | result[i].lose_cover_sizek >> 10); | 1470 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", |
1471 | result[i].num_reg, lose_base, lose_factor); | ||
1369 | /* convert ranges to var ranges state */ | 1472 | /* convert ranges to var ranges state */ |
1370 | chunk_size = result[i].chunk_sizek; | 1473 | chunk_size = result[i].chunk_sizek; |
1371 | chunk_size <<= 10; | 1474 | chunk_size <<= 10; |
1372 | gran_size = result[i].gran_sizek; | 1475 | gran_size = result[i].gran_sizek; |
1373 | gran_size <<= 10; | 1476 | gran_size <<= 10; |
1374 | debug_print = 1; | 1477 | debug_print++; |
1375 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | 1478 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
1479 | debug_print--; | ||
1376 | set_var_mtrr_all(address_bits); | 1480 | set_var_mtrr_all(address_bits); |
1377 | return 1; | 1481 | return 1; |
1378 | } | 1482 | } |
@@ -1496,11 +1600,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1496 | 1600 | ||
1497 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | 1601 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ |
1498 | if (!highest_pfn) { | 1602 | if (!highest_pfn) { |
1499 | if (!kvm_para_available()) { | 1603 | WARN(!kvm_para_available(), KERN_WARNING |
1500 | printk(KERN_WARNING | ||
1501 | "WARNING: strange, CPU MTRRs all blank?\n"); | 1604 | "WARNING: strange, CPU MTRRs all blank?\n"); |
1502 | WARN_ON(1); | ||
1503 | } | ||
1504 | return 0; | 1605 | return 0; |
1505 | } | 1606 | } |
1506 | 1607 | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index de7439f82b92..6bff382094f5 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz) | |||
295 | /* setup the timer */ | 295 | /* setup the timer */ |
296 | wrmsr(evntsel_msr, evntsel, 0); | 296 | wrmsr(evntsel_msr, evntsel, 0); |
297 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); | 297 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); |
298 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
299 | evntsel |= K7_EVNTSEL_ENABLE; | ||
300 | wrmsr(evntsel_msr, evntsel, 0); | ||
301 | 298 | ||
299 | /* initialize the wd struct before enabling */ | ||
302 | wd->perfctr_msr = perfctr_msr; | 300 | wd->perfctr_msr = perfctr_msr; |
303 | wd->evntsel_msr = evntsel_msr; | 301 | wd->evntsel_msr = evntsel_msr; |
304 | wd->cccr_msr = 0; /* unused */ | 302 | wd->cccr_msr = 0; /* unused */ |
303 | |||
304 | /* ok, everything is initialized, announce that we're set */ | ||
305 | cpu_nmi_set_wd_enabled(); | ||
306 | |||
307 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
308 | evntsel |= K7_EVNTSEL_ENABLE; | ||
309 | wrmsr(evntsel_msr, evntsel, 0); | ||
310 | |||
305 | return 1; | 311 | return 1; |
306 | } | 312 | } |
307 | 313 | ||
@@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz) | |||
379 | wrmsr(evntsel_msr, evntsel, 0); | 385 | wrmsr(evntsel_msr, evntsel, 0); |
380 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 386 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
381 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); | 387 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); |
382 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
383 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
384 | wrmsr(evntsel_msr, evntsel, 0); | ||
385 | 388 | ||
389 | /* initialize the wd struct before enabling */ | ||
386 | wd->perfctr_msr = perfctr_msr; | 390 | wd->perfctr_msr = perfctr_msr; |
387 | wd->evntsel_msr = evntsel_msr; | 391 | wd->evntsel_msr = evntsel_msr; |
388 | wd->cccr_msr = 0; /* unused */ | 392 | wd->cccr_msr = 0; /* unused */ |
393 | |||
394 | /* ok, everything is initialized, announce that we're set */ | ||
395 | cpu_nmi_set_wd_enabled(); | ||
396 | |||
397 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
398 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
399 | wrmsr(evntsel_msr, evntsel, 0); | ||
400 | |||
389 | return 1; | 401 | return 1; |
390 | } | 402 | } |
391 | 403 | ||
@@ -432,6 +444,27 @@ static const struct wd_ops p6_wd_ops = { | |||
432 | #define P4_CCCR_ENABLE (1 << 12) | 444 | #define P4_CCCR_ENABLE (1 << 12) |
433 | #define P4_CCCR_OVF (1 << 31) | 445 | #define P4_CCCR_OVF (1 << 31) |
434 | 446 | ||
447 | #define P4_CONTROLS 18 | ||
448 | static unsigned int p4_controls[18] = { | ||
449 | MSR_P4_BPU_CCCR0, | ||
450 | MSR_P4_BPU_CCCR1, | ||
451 | MSR_P4_BPU_CCCR2, | ||
452 | MSR_P4_BPU_CCCR3, | ||
453 | MSR_P4_MS_CCCR0, | ||
454 | MSR_P4_MS_CCCR1, | ||
455 | MSR_P4_MS_CCCR2, | ||
456 | MSR_P4_MS_CCCR3, | ||
457 | MSR_P4_FLAME_CCCR0, | ||
458 | MSR_P4_FLAME_CCCR1, | ||
459 | MSR_P4_FLAME_CCCR2, | ||
460 | MSR_P4_FLAME_CCCR3, | ||
461 | MSR_P4_IQ_CCCR0, | ||
462 | MSR_P4_IQ_CCCR1, | ||
463 | MSR_P4_IQ_CCCR2, | ||
464 | MSR_P4_IQ_CCCR3, | ||
465 | MSR_P4_IQ_CCCR4, | ||
466 | MSR_P4_IQ_CCCR5, | ||
467 | }; | ||
435 | /* | 468 | /* |
436 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | 469 | * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter |
437 | * CRU_ESCR0 (with any non-null event selector) through a complemented | 470 | * CRU_ESCR0 (with any non-null event selector) through a complemented |
@@ -473,12 +506,38 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
473 | evntsel_msr = MSR_P4_CRU_ESCR0; | 506 | evntsel_msr = MSR_P4_CRU_ESCR0; |
474 | cccr_msr = MSR_P4_IQ_CCCR0; | 507 | cccr_msr = MSR_P4_IQ_CCCR0; |
475 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | 508 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); |
509 | |||
510 | /* | ||
511 | * If we're on the kdump kernel or other situation, we may | ||
512 | * still have other performance counter registers set to | ||
513 | * interrupt and they'll keep interrupting forever because | ||
514 | * of the P4_CCCR_OVF quirk. So we need to ACK all the | ||
515 | * pending interrupts and disable all the registers here, | ||
516 | * before reenabling the NMI delivery. Refer to p4_rearm() | ||
517 | * about the P4_CCCR_OVF quirk. | ||
518 | */ | ||
519 | if (reset_devices) { | ||
520 | unsigned int low, high; | ||
521 | int i; | ||
522 | |||
523 | for (i = 0; i < P4_CONTROLS; i++) { | ||
524 | rdmsr(p4_controls[i], low, high); | ||
525 | low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); | ||
526 | wrmsr(p4_controls[i], low, high); | ||
527 | } | ||
528 | } | ||
476 | } else { | 529 | } else { |
477 | /* logical cpu 1 */ | 530 | /* logical cpu 1 */ |
478 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | 531 | perfctr_msr = MSR_P4_IQ_PERFCTR1; |
479 | evntsel_msr = MSR_P4_CRU_ESCR0; | 532 | evntsel_msr = MSR_P4_CRU_ESCR0; |
480 | cccr_msr = MSR_P4_IQ_CCCR1; | 533 | cccr_msr = MSR_P4_IQ_CCCR1; |
481 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | 534 | |
535 | /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ | ||
536 | if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) | ||
537 | cccr_val = P4_CCCR_OVF_PMI0; | ||
538 | else | ||
539 | cccr_val = P4_CCCR_OVF_PMI1; | ||
540 | cccr_val |= P4_CCCR_ESCR_SELECT(4); | ||
482 | } | 541 | } |
483 | 542 | ||
484 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | 543 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
@@ -493,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
493 | wrmsr(evntsel_msr, evntsel, 0); | 552 | wrmsr(evntsel_msr, evntsel, 0); |
494 | wrmsr(cccr_msr, cccr_val, 0); | 553 | wrmsr(cccr_msr, cccr_val, 0); |
495 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | 554 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); |
496 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 555 | |
497 | cccr_val |= P4_CCCR_ENABLE; | ||
498 | wrmsr(cccr_msr, cccr_val, 0); | ||
499 | wd->perfctr_msr = perfctr_msr; | 556 | wd->perfctr_msr = perfctr_msr; |
500 | wd->evntsel_msr = evntsel_msr; | 557 | wd->evntsel_msr = evntsel_msr; |
501 | wd->cccr_msr = cccr_msr; | 558 | wd->cccr_msr = cccr_msr; |
559 | |||
560 | /* ok, everything is initialized, announce that we're set */ | ||
561 | cpu_nmi_set_wd_enabled(); | ||
562 | |||
563 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
564 | cccr_val |= P4_CCCR_ENABLE; | ||
565 | wrmsr(cccr_msr, cccr_val, 0); | ||
502 | return 1; | 566 | return 1; |
503 | } | 567 | } |
504 | 568 | ||
@@ -614,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) | |||
614 | wrmsr(evntsel_msr, evntsel, 0); | 678 | wrmsr(evntsel_msr, evntsel, 0); |
615 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 679 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
616 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | 680 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); |
617 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
618 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
619 | wrmsr(evntsel_msr, evntsel, 0); | ||
620 | 681 | ||
621 | wd->perfctr_msr = perfctr_msr; | 682 | wd->perfctr_msr = perfctr_msr; |
622 | wd->evntsel_msr = evntsel_msr; | 683 | wd->evntsel_msr = evntsel_msr; |
623 | wd->cccr_msr = 0; /* unused */ | 684 | wd->cccr_msr = 0; /* unused */ |
685 | |||
686 | /* ok, everything is initialized, announce that we're set */ | ||
687 | cpu_nmi_set_wd_enabled(); | ||
688 | |||
689 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
690 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
691 | wrmsr(evntsel_msr, evntsel, 0); | ||
624 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | 692 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); |
625 | return 1; | 693 | return 1; |
626 | } | 694 | } |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 14b11b3be31c..6a44d6465991 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include <linux/smp_lock.h> | 36 | #include <linux/smp_lock.h> |
37 | #include <linux/major.h> | 37 | #include <linux/major.h> |
38 | #include <linux/fs.h> | 38 | #include <linux/fs.h> |
39 | #include <linux/smp_lock.h> | ||
40 | #include <linux/device.h> | 39 | #include <linux/device.h> |
41 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
42 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
@@ -89,6 +88,8 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
89 | struct cpuid_regs cmd; | 88 | struct cpuid_regs cmd; |
90 | int cpu = iminor(file->f_path.dentry->d_inode); | 89 | int cpu = iminor(file->f_path.dentry->d_inode); |
91 | u64 pos = *ppos; | 90 | u64 pos = *ppos; |
91 | ssize_t bytes = 0; | ||
92 | int err = 0; | ||
92 | 93 | ||
93 | if (count % 16) | 94 | if (count % 16) |
94 | return -EINVAL; /* Invalid chunk size */ | 95 | return -EINVAL; /* Invalid chunk size */ |
@@ -96,14 +97,19 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
96 | for (; count; count -= 16) { | 97 | for (; count; count -= 16) { |
97 | cmd.eax = pos; | 98 | cmd.eax = pos; |
98 | cmd.ecx = pos >> 32; | 99 | cmd.ecx = pos >> 32; |
99 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); | 100 | err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); |
100 | if (copy_to_user(tmp, &cmd, 16)) | 101 | if (err) |
101 | return -EFAULT; | 102 | break; |
103 | if (copy_to_user(tmp, &cmd, 16)) { | ||
104 | err = -EFAULT; | ||
105 | break; | ||
106 | } | ||
102 | tmp += 16; | 107 | tmp += 16; |
108 | bytes += 16; | ||
103 | *ppos = ++pos; | 109 | *ppos = ++pos; |
104 | } | 110 | } |
105 | 111 | ||
106 | return tmp - buf; | 112 | return bytes ? bytes : err; |
107 | } | 113 | } |
108 | 114 | ||
109 | static int cpuid_open(struct inode *inode, struct file *file) | 115 | static int cpuid_open(struct inode *inode, struct file *file) |
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6bc4a46..e90a60ef10c2 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c | |||
@@ -7,9 +7,8 @@ | |||
7 | 7 | ||
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | #include <linux/crash_dump.h> | 9 | #include <linux/crash_dump.h> |
10 | 10 | #include <linux/uaccess.h> | |
11 | #include <asm/uaccess.h> | 11 | #include <linux/io.h> |
12 | #include <asm/io.h> | ||
13 | 12 | ||
14 | /** | 13 | /** |
15 | * copy_oldmem_page - copy one page from "oldmem" | 14 | * copy_oldmem_page - copy one page from "oldmem" |
@@ -25,7 +24,7 @@ | |||
25 | * in the current kernel. We stitch up a pte, similar to kmap_atomic. | 24 | * in the current kernel. We stitch up a pte, similar to kmap_atomic. |
26 | */ | 25 | */ |
27 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | 26 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, |
28 | size_t csize, unsigned long offset, int userbuf) | 27 | size_t csize, unsigned long offset, int userbuf) |
29 | { | 28 | { |
30 | void *vaddr; | 29 | void *vaddr; |
31 | 30 | ||
@@ -33,14 +32,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | |||
33 | return 0; | 32 | return 0; |
34 | 33 | ||
35 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); | 34 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); |
35 | if (!vaddr) | ||
36 | return -ENOMEM; | ||
36 | 37 | ||
37 | if (userbuf) { | 38 | if (userbuf) { |
38 | if (copy_to_user(buf, (vaddr + offset), csize)) { | 39 | if (copy_to_user(buf, vaddr + offset, csize)) { |
39 | iounmap(vaddr); | 40 | iounmap(vaddr); |
40 | return -EFAULT; | 41 | return -EFAULT; |
41 | } | 42 | } |
42 | } else | 43 | } else |
43 | memcpy(buf, (vaddr + offset), csize); | 44 | memcpy(buf, vaddr + offset, csize); |
44 | 45 | ||
45 | iounmap(vaddr); | 46 | iounmap(vaddr); |
46 | return csize; | 47 | return csize; |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8ec48d..2b69994fd3a8 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -2,26 +2,49 @@ | |||
2 | * Debug Store support | 2 | * Debug Store support |
3 | * | 3 | * |
4 | * This provides a low-level interface to the hardware's Debug Store | 4 | * This provides a low-level interface to the hardware's Debug Store |
5 | * feature that is used for last branch recording (LBR) and | 5 | * feature that is used for branch trace store (BTS) and |
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * Different architectures use a different DS layout/pointer size. | 8 | * It manages: |
9 | * The below functions therefore work on a void*. | 9 | * - per-thread and per-cpu allocation of BTS and PEBS |
10 | * - buffer memory allocation (optional) | ||
11 | * - buffer overflow handling | ||
12 | * - buffer access | ||
10 | * | 13 | * |
14 | * It assumes: | ||
15 | * - get_task_struct on all parameter tasks | ||
16 | * - current is allowed to trace parameter tasks | ||
11 | * | 17 | * |
12 | * Since there is no user for PEBS, yet, only LBR (or branch | ||
13 | * trace store, BTS) is supported. | ||
14 | * | 18 | * |
15 | * | 19 | * Copyright (C) 2007-2008 Intel Corporation. |
16 | * Copyright (C) 2007 Intel Corporation. | 20 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 |
17 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
18 | */ | 21 | */ |
19 | 22 | ||
23 | |||
24 | #ifdef CONFIG_X86_DS | ||
25 | |||
20 | #include <asm/ds.h> | 26 | #include <asm/ds.h> |
21 | 27 | ||
22 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
23 | #include <linux/string.h> | 29 | #include <linux/string.h> |
24 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/sched.h> | ||
32 | #include <linux/mm.h> | ||
33 | |||
34 | |||
35 | /* | ||
36 | * The configuration for a particular DS hardware implementation. | ||
37 | */ | ||
38 | struct ds_configuration { | ||
39 | /* the size of the DS structure in bytes */ | ||
40 | unsigned char sizeof_ds; | ||
41 | /* the size of one pointer-typed field in the DS structure in bytes; | ||
42 | this covers the first 8 fields related to buffer management. */ | ||
43 | unsigned char sizeof_field; | ||
44 | /* the size of a BTS/PEBS record in bytes */ | ||
45 | unsigned char sizeof_rec[2]; | ||
46 | }; | ||
47 | static struct ds_configuration ds_cfg; | ||
25 | 48 | ||
26 | 49 | ||
27 | /* | 50 | /* |
@@ -44,378 +67,747 @@ | |||
44 | * (interrupt occurs when write pointer passes interrupt pointer) | 67 | * (interrupt occurs when write pointer passes interrupt pointer) |
45 | * - value to which counter is reset following counter overflow | 68 | * - value to which counter is reset following counter overflow |
46 | * | 69 | * |
47 | * On later architectures, the last branch recording hardware uses | 70 | * Later architectures use 64bit pointers throughout, whereas earlier |
48 | * 64bit pointers even in 32bit mode. | 71 | * architectures use 32bit pointers in 32bit mode. |
49 | * | ||
50 | * | ||
51 | * Branch Trace Store (BTS) records store information about control | ||
52 | * flow changes. They at least provide the following information: | ||
53 | * - source linear address | ||
54 | * - destination linear address | ||
55 | * | 72 | * |
56 | * Netburst supported a predicated bit that had been dropped in later | ||
57 | * architectures. We do not suppor it. | ||
58 | * | 73 | * |
74 | * We compute the base address for the first 8 fields based on: | ||
75 | * - the field size stored in the DS configuration | ||
76 | * - the relative field position | ||
77 | * - an offset giving the start of the respective region | ||
59 | * | 78 | * |
60 | * In order to abstract from the actual DS and BTS layout, we describe | 79 | * This offset is further used to index various arrays holding |
61 | * the access to the relevant fields. | 80 | * information for BTS and PEBS at the respective index. |
62 | * Thanks to Andi Kleen for proposing this design. | ||
63 | * | 81 | * |
64 | * The implementation, however, is not as general as it might seem. In | 82 | * On later 32bit processors, we only access the lower 32bit of the |
65 | * order to stay somewhat simple and efficient, we assume an | 83 | * 64bit pointer fields. The upper halves will be zeroed out. |
66 | * underlying unsigned type (mostly a pointer type) and we expect the | ||
67 | * field to be at least as big as that type. | ||
68 | */ | 84 | */ |
69 | 85 | ||
70 | /* | 86 | enum ds_field { |
71 | * A special from_ip address to indicate that the BTS record is an | 87 | ds_buffer_base = 0, |
72 | * info record that needs to be interpreted or skipped. | 88 | ds_index, |
73 | */ | 89 | ds_absolute_maximum, |
74 | #define BTS_ESCAPE_ADDRESS (-1) | 90 | ds_interrupt_threshold, |
91 | }; | ||
75 | 92 | ||
76 | /* | 93 | enum ds_qualifier { |
77 | * A field access descriptor | 94 | ds_bts = 0, |
78 | */ | 95 | ds_pebs |
79 | struct access_desc { | ||
80 | unsigned char offset; | ||
81 | unsigned char size; | ||
82 | }; | 96 | }; |
83 | 97 | ||
98 | static inline unsigned long ds_get(const unsigned char *base, | ||
99 | enum ds_qualifier qual, enum ds_field field) | ||
100 | { | ||
101 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
102 | return *(unsigned long *)base; | ||
103 | } | ||
104 | |||
105 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | ||
106 | enum ds_field field, unsigned long value) | ||
107 | { | ||
108 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
109 | (*(unsigned long *)base) = value; | ||
110 | } | ||
111 | |||
112 | |||
84 | /* | 113 | /* |
85 | * The configuration for a particular DS/BTS hardware implementation. | 114 | * Locking is done only for allocating BTS or PEBS resources and for |
115 | * guarding context and buffer memory allocation. | ||
116 | * | ||
117 | * Most functions require the current task to own the ds context part | ||
118 | * they are going to access. All the locking is done when validating | ||
119 | * access to the context. | ||
86 | */ | 120 | */ |
87 | struct ds_configuration { | 121 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); |
88 | /* the DS configuration */ | ||
89 | unsigned char sizeof_ds; | ||
90 | struct access_desc bts_buffer_base; | ||
91 | struct access_desc bts_index; | ||
92 | struct access_desc bts_absolute_maximum; | ||
93 | struct access_desc bts_interrupt_threshold; | ||
94 | /* the BTS configuration */ | ||
95 | unsigned char sizeof_bts; | ||
96 | struct access_desc from_ip; | ||
97 | struct access_desc to_ip; | ||
98 | /* BTS variants used to store additional information like | ||
99 | timestamps */ | ||
100 | struct access_desc info_type; | ||
101 | struct access_desc info_data; | ||
102 | unsigned long debugctl_mask; | ||
103 | }; | ||
104 | 122 | ||
105 | /* | 123 | /* |
106 | * The global configuration used by the below accessor functions | 124 | * Validate that the current task is allowed to access the BTS/PEBS |
125 | * buffer of the parameter task. | ||
126 | * | ||
127 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
107 | */ | 128 | */ |
108 | static struct ds_configuration ds_cfg; | 129 | static inline int ds_validate_access(struct ds_context *context, |
130 | enum ds_qualifier qual) | ||
131 | { | ||
132 | if (!context) | ||
133 | return -EPERM; | ||
134 | |||
135 | if (context->owner[qual] == current) | ||
136 | return 0; | ||
137 | |||
138 | return -EPERM; | ||
139 | } | ||
140 | |||
109 | 141 | ||
110 | /* | 142 | /* |
111 | * Accessor functions for some DS and BTS fields using the above | 143 | * We either support (system-wide) per-cpu or per-thread allocation. |
112 | * global ptrace_bts_cfg. | 144 | * We distinguish the two based on the task_struct pointer, where a |
145 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
146 | * | ||
147 | * Allocations are use-counted. As soon as resources are allocated, | ||
148 | * further allocations must be of the same type (per-cpu or | ||
149 | * per-thread). We model this by counting allocations (i.e. the number | ||
150 | * of tracers of a certain type) for one type negatively: | ||
151 | * =0 no tracers | ||
152 | * >0 number of per-thread tracers | ||
153 | * <0 number of per-cpu tracers | ||
154 | * | ||
155 | * The below functions to get and put tracers and to check the | ||
156 | * allocation type require the ds_lock to be held by the caller. | ||
157 | * | ||
158 | * Tracers essentially gives the number of ds contexts for a certain | ||
159 | * type of allocation. | ||
113 | */ | 160 | */ |
114 | static inline unsigned long get_bts_buffer_base(char *base) | 161 | static long tracers; |
162 | |||
163 | static inline void get_tracer(struct task_struct *task) | ||
115 | { | 164 | { |
116 | return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); | 165 | tracers += (task ? 1 : -1); |
117 | } | 166 | } |
118 | static inline void set_bts_buffer_base(char *base, unsigned long value) | 167 | |
168 | static inline void put_tracer(struct task_struct *task) | ||
119 | { | 169 | { |
120 | (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; | 170 | tracers -= (task ? 1 : -1); |
121 | } | 171 | } |
122 | static inline unsigned long get_bts_index(char *base) | 172 | |
173 | static inline int check_tracer(struct task_struct *task) | ||
123 | { | 174 | { |
124 | return *(unsigned long *)(base + ds_cfg.bts_index.offset); | 175 | return (task ? (tracers >= 0) : (tracers <= 0)); |
125 | } | 176 | } |
126 | static inline void set_bts_index(char *base, unsigned long value) | 177 | |
178 | |||
179 | /* | ||
180 | * The DS context is either attached to a thread or to a cpu: | ||
181 | * - in the former case, the thread_struct contains a pointer to the | ||
182 | * attached context. | ||
183 | * - in the latter case, we use a static array of per-cpu context | ||
184 | * pointers. | ||
185 | * | ||
186 | * Contexts are use-counted. They are allocated on first access and | ||
187 | * deallocated when the last user puts the context. | ||
188 | * | ||
189 | * We distinguish between an allocating and a non-allocating get of a | ||
190 | * context: | ||
191 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
192 | * requires the caller to hold the global ds_lock. | ||
193 | * - the non-allocating get is used for all other cases. A | ||
194 | * non-existing context indicates an error. It acquires and releases | ||
195 | * the ds_lock itself for obtaining the context. | ||
196 | * | ||
197 | * A context and its DS configuration are allocated and deallocated | ||
198 | * together. A context always has a DS configuration of the | ||
199 | * appropriate size. | ||
200 | */ | ||
201 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
202 | |||
203 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
204 | |||
205 | /* | ||
206 | * Returns the pointer to the parameter task's context or to the | ||
207 | * system-wide context, if task is NULL. | ||
208 | * | ||
209 | * Increases the use count of the returned context, if not NULL. | ||
210 | */ | ||
211 | static inline struct ds_context *ds_get_context(struct task_struct *task) | ||
127 | { | 212 | { |
128 | (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; | 213 | struct ds_context *context; |
214 | |||
215 | spin_lock(&ds_lock); | ||
216 | |||
217 | context = (task ? task->thread.ds_ctx : this_system_context); | ||
218 | if (context) | ||
219 | context->count++; | ||
220 | |||
221 | spin_unlock(&ds_lock); | ||
222 | |||
223 | return context; | ||
129 | } | 224 | } |
130 | static inline unsigned long get_bts_absolute_maximum(char *base) | 225 | |
226 | /* | ||
227 | * Same as ds_get_context, but allocates the context and it's DS | ||
228 | * structure, if necessary; returns NULL; if out of memory. | ||
229 | * | ||
230 | * pre: requires ds_lock to be held | ||
231 | */ | ||
232 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
131 | { | 233 | { |
132 | return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); | 234 | struct ds_context **p_context = |
235 | (task ? &task->thread.ds_ctx : &this_system_context); | ||
236 | struct ds_context *context = *p_context; | ||
237 | |||
238 | if (!context) { | ||
239 | context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
240 | |||
241 | if (!context) | ||
242 | return NULL; | ||
243 | |||
244 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
245 | if (!context->ds) { | ||
246 | kfree(context); | ||
247 | return NULL; | ||
248 | } | ||
249 | |||
250 | *p_context = context; | ||
251 | |||
252 | context->this = p_context; | ||
253 | context->task = task; | ||
254 | |||
255 | if (task) | ||
256 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
257 | |||
258 | if (!task || (task == current)) | ||
259 | wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); | ||
260 | |||
261 | get_tracer(task); | ||
262 | } | ||
263 | |||
264 | context->count++; | ||
265 | |||
266 | return context; | ||
133 | } | 267 | } |
134 | static inline void set_bts_absolute_maximum(char *base, unsigned long value) | 268 | |
269 | /* | ||
270 | * Decreases the use count of the parameter context, if not NULL. | ||
271 | * Deallocates the context, if the use count reaches zero. | ||
272 | */ | ||
273 | static inline void ds_put_context(struct ds_context *context) | ||
135 | { | 274 | { |
136 | (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; | 275 | if (!context) |
276 | return; | ||
277 | |||
278 | spin_lock(&ds_lock); | ||
279 | |||
280 | if (--context->count) | ||
281 | goto out; | ||
282 | |||
283 | *(context->this) = NULL; | ||
284 | |||
285 | if (context->task) | ||
286 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
287 | |||
288 | if (!context->task || (context->task == current)) | ||
289 | wrmsrl(MSR_IA32_DS_AREA, 0); | ||
290 | |||
291 | put_tracer(context->task); | ||
292 | |||
293 | /* free any leftover buffers from tracers that did not | ||
294 | * deallocate them properly. */ | ||
295 | kfree(context->buffer[ds_bts]); | ||
296 | kfree(context->buffer[ds_pebs]); | ||
297 | kfree(context->ds); | ||
298 | kfree(context); | ||
299 | out: | ||
300 | spin_unlock(&ds_lock); | ||
137 | } | 301 | } |
138 | static inline unsigned long get_bts_interrupt_threshold(char *base) | 302 | |
303 | |||
304 | /* | ||
305 | * Handle a buffer overflow | ||
306 | * | ||
307 | * task: the task whose buffers are overflowing; | ||
308 | * NULL for a buffer overflow on the current cpu | ||
309 | * context: the ds context | ||
310 | * qual: the buffer type | ||
311 | */ | ||
312 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | ||
313 | enum ds_qualifier qual) | ||
139 | { | 314 | { |
140 | return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); | 315 | if (!context) |
316 | return; | ||
317 | |||
318 | if (context->callback[qual]) | ||
319 | (*context->callback[qual])(task); | ||
320 | |||
321 | /* todo: do some more overflow handling */ | ||
141 | } | 322 | } |
142 | static inline void set_bts_interrupt_threshold(char *base, unsigned long value) | 323 | |
324 | |||
325 | /* | ||
326 | * Allocate a non-pageable buffer of the parameter size. | ||
327 | * Checks the memory and the locked memory rlimit. | ||
328 | * | ||
329 | * Returns the buffer, if successful; | ||
330 | * NULL, if out of memory or rlimit exceeded. | ||
331 | * | ||
332 | * size: the requested buffer size in bytes | ||
333 | * pages (out): if not NULL, contains the number of pages reserved | ||
334 | */ | ||
335 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | ||
143 | { | 336 | { |
144 | (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; | 337 | unsigned long rlim, vm, pgsz; |
338 | void *buffer; | ||
339 | |||
340 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
341 | |||
342 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
343 | vm = current->mm->total_vm + pgsz; | ||
344 | if (rlim < vm) | ||
345 | return NULL; | ||
346 | |||
347 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
348 | vm = current->mm->locked_vm + pgsz; | ||
349 | if (rlim < vm) | ||
350 | return NULL; | ||
351 | |||
352 | buffer = kzalloc(size, GFP_KERNEL); | ||
353 | if (!buffer) | ||
354 | return NULL; | ||
355 | |||
356 | current->mm->total_vm += pgsz; | ||
357 | current->mm->locked_vm += pgsz; | ||
358 | |||
359 | if (pages) | ||
360 | *pages = pgsz; | ||
361 | |||
362 | return buffer; | ||
145 | } | 363 | } |
146 | static inline unsigned long get_from_ip(char *base) | 364 | |
365 | static int ds_request(struct task_struct *task, void *base, size_t size, | ||
366 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | ||
147 | { | 367 | { |
148 | return *(unsigned long *)(base + ds_cfg.from_ip.offset); | 368 | struct ds_context *context; |
369 | unsigned long buffer, adj; | ||
370 | const unsigned long alignment = (1 << 3); | ||
371 | int error = 0; | ||
372 | |||
373 | if (!ds_cfg.sizeof_ds) | ||
374 | return -EOPNOTSUPP; | ||
375 | |||
376 | /* we require some space to do alignment adjustments below */ | ||
377 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | ||
378 | return -EINVAL; | ||
379 | |||
380 | /* buffer overflow notification is not yet implemented */ | ||
381 | if (ovfl) | ||
382 | return -EOPNOTSUPP; | ||
383 | |||
384 | |||
385 | spin_lock(&ds_lock); | ||
386 | |||
387 | if (!check_tracer(task)) | ||
388 | return -EPERM; | ||
389 | |||
390 | error = -ENOMEM; | ||
391 | context = ds_alloc_context(task); | ||
392 | if (!context) | ||
393 | goto out_unlock; | ||
394 | |||
395 | error = -EALREADY; | ||
396 | if (context->owner[qual] == current) | ||
397 | goto out_unlock; | ||
398 | error = -EPERM; | ||
399 | if (context->owner[qual] != NULL) | ||
400 | goto out_unlock; | ||
401 | context->owner[qual] = current; | ||
402 | |||
403 | spin_unlock(&ds_lock); | ||
404 | |||
405 | |||
406 | error = -ENOMEM; | ||
407 | if (!base) { | ||
408 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
409 | if (!base) | ||
410 | goto out_release; | ||
411 | |||
412 | context->buffer[qual] = base; | ||
413 | } | ||
414 | error = 0; | ||
415 | |||
416 | context->callback[qual] = ovfl; | ||
417 | |||
418 | /* adjust the buffer address and size to meet alignment | ||
419 | * constraints: | ||
420 | * - buffer is double-word aligned | ||
421 | * - size is multiple of record size | ||
422 | * | ||
423 | * We checked the size at the very beginning; we have enough | ||
424 | * space to do the adjustment. | ||
425 | */ | ||
426 | buffer = (unsigned long)base; | ||
427 | |||
428 | adj = ALIGN(buffer, alignment) - buffer; | ||
429 | buffer += adj; | ||
430 | size -= adj; | ||
431 | |||
432 | size /= ds_cfg.sizeof_rec[qual]; | ||
433 | size *= ds_cfg.sizeof_rec[qual]; | ||
434 | |||
435 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
436 | ds_set(context->ds, qual, ds_index, buffer); | ||
437 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
438 | |||
439 | if (ovfl) { | ||
440 | /* todo: select a suitable interrupt threshold */ | ||
441 | } else | ||
442 | ds_set(context->ds, qual, | ||
443 | ds_interrupt_threshold, buffer + size + 1); | ||
444 | |||
445 | /* we keep the context until ds_release */ | ||
446 | return error; | ||
447 | |||
448 | out_release: | ||
449 | context->owner[qual] = NULL; | ||
450 | ds_put_context(context); | ||
451 | return error; | ||
452 | |||
453 | out_unlock: | ||
454 | spin_unlock(&ds_lock); | ||
455 | ds_put_context(context); | ||
456 | return error; | ||
149 | } | 457 | } |
150 | static inline void set_from_ip(char *base, unsigned long value) | 458 | |
459 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | ||
460 | ds_ovfl_callback_t ovfl) | ||
151 | { | 461 | { |
152 | (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; | 462 | return ds_request(task, base, size, ovfl, ds_bts); |
153 | } | 463 | } |
154 | static inline unsigned long get_to_ip(char *base) | 464 | |
465 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | ||
466 | ds_ovfl_callback_t ovfl) | ||
155 | { | 467 | { |
156 | return *(unsigned long *)(base + ds_cfg.to_ip.offset); | 468 | return ds_request(task, base, size, ovfl, ds_pebs); |
157 | } | 469 | } |
158 | static inline void set_to_ip(char *base, unsigned long value) | 470 | |
471 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | ||
159 | { | 472 | { |
160 | (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; | 473 | struct ds_context *context; |
474 | int error; | ||
475 | |||
476 | context = ds_get_context(task); | ||
477 | error = ds_validate_access(context, qual); | ||
478 | if (error < 0) | ||
479 | goto out; | ||
480 | |||
481 | kfree(context->buffer[qual]); | ||
482 | context->buffer[qual] = NULL; | ||
483 | |||
484 | current->mm->total_vm -= context->pages[qual]; | ||
485 | current->mm->locked_vm -= context->pages[qual]; | ||
486 | context->pages[qual] = 0; | ||
487 | context->owner[qual] = NULL; | ||
488 | |||
489 | /* | ||
490 | * we put the context twice: | ||
491 | * once for the ds_get_context | ||
492 | * once for the corresponding ds_request | ||
493 | */ | ||
494 | ds_put_context(context); | ||
495 | out: | ||
496 | ds_put_context(context); | ||
497 | return error; | ||
161 | } | 498 | } |
162 | static inline unsigned char get_info_type(char *base) | 499 | |
500 | int ds_release_bts(struct task_struct *task) | ||
163 | { | 501 | { |
164 | return *(unsigned char *)(base + ds_cfg.info_type.offset); | 502 | return ds_release(task, ds_bts); |
165 | } | 503 | } |
166 | static inline void set_info_type(char *base, unsigned char value) | 504 | |
505 | int ds_release_pebs(struct task_struct *task) | ||
167 | { | 506 | { |
168 | (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; | 507 | return ds_release(task, ds_pebs); |
169 | } | 508 | } |
170 | static inline unsigned long get_info_data(char *base) | 509 | |
510 | static int ds_get_index(struct task_struct *task, size_t *pos, | ||
511 | enum ds_qualifier qual) | ||
171 | { | 512 | { |
172 | return *(unsigned long *)(base + ds_cfg.info_data.offset); | 513 | struct ds_context *context; |
514 | unsigned long base, index; | ||
515 | int error; | ||
516 | |||
517 | context = ds_get_context(task); | ||
518 | error = ds_validate_access(context, qual); | ||
519 | if (error < 0) | ||
520 | goto out; | ||
521 | |||
522 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
523 | index = ds_get(context->ds, qual, ds_index); | ||
524 | |||
525 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | ||
526 | if (pos) | ||
527 | *pos = error; | ||
528 | out: | ||
529 | ds_put_context(context); | ||
530 | return error; | ||
173 | } | 531 | } |
174 | static inline void set_info_data(char *base, unsigned long value) | 532 | |
533 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | ||
175 | { | 534 | { |
176 | (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; | 535 | return ds_get_index(task, pos, ds_bts); |
177 | } | 536 | } |
178 | 537 | ||
538 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
539 | { | ||
540 | return ds_get_index(task, pos, ds_pebs); | ||
541 | } | ||
179 | 542 | ||
180 | int ds_allocate(void **dsp, size_t bts_size_in_bytes) | 543 | static int ds_get_end(struct task_struct *task, size_t *pos, |
544 | enum ds_qualifier qual) | ||
181 | { | 545 | { |
182 | size_t bts_size_in_records; | 546 | struct ds_context *context; |
183 | unsigned long bts; | 547 | unsigned long base, end; |
184 | void *ds; | 548 | int error; |
549 | |||
550 | context = ds_get_context(task); | ||
551 | error = ds_validate_access(context, qual); | ||
552 | if (error < 0) | ||
553 | goto out; | ||
554 | |||
555 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
556 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
557 | |||
558 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | ||
559 | if (pos) | ||
560 | *pos = error; | ||
561 | out: | ||
562 | ds_put_context(context); | ||
563 | return error; | ||
564 | } | ||
185 | 565 | ||
186 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 566 | int ds_get_bts_end(struct task_struct *task, size_t *pos) |
187 | return -EOPNOTSUPP; | 567 | { |
568 | return ds_get_end(task, pos, ds_bts); | ||
569 | } | ||
188 | 570 | ||
189 | if (bts_size_in_bytes < 0) | 571 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) |
190 | return -EINVAL; | 572 | { |
573 | return ds_get_end(task, pos, ds_pebs); | ||
574 | } | ||
191 | 575 | ||
192 | bts_size_in_records = | 576 | static int ds_access(struct task_struct *task, size_t index, |
193 | bts_size_in_bytes / ds_cfg.sizeof_bts; | 577 | const void **record, enum ds_qualifier qual) |
194 | bts_size_in_bytes = | 578 | { |
195 | bts_size_in_records * ds_cfg.sizeof_bts; | 579 | struct ds_context *context; |
580 | unsigned long base, idx; | ||
581 | int error; | ||
196 | 582 | ||
197 | if (bts_size_in_bytes <= 0) | 583 | if (!record) |
198 | return -EINVAL; | 584 | return -EINVAL; |
199 | 585 | ||
200 | bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); | 586 | context = ds_get_context(task); |
201 | 587 | error = ds_validate_access(context, qual); | |
202 | if (!bts) | 588 | if (error < 0) |
203 | return -ENOMEM; | 589 | goto out; |
204 | 590 | ||
205 | ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | 591 | base = ds_get(context->ds, qual, ds_buffer_base); |
592 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
206 | 593 | ||
207 | if (!ds) { | 594 | error = -EINVAL; |
208 | kfree((void *)bts); | 595 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) |
209 | return -ENOMEM; | 596 | goto out; |
210 | } | ||
211 | |||
212 | set_bts_buffer_base(ds, bts); | ||
213 | set_bts_index(ds, bts); | ||
214 | set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); | ||
215 | set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); | ||
216 | 597 | ||
217 | *dsp = ds; | 598 | *record = (const void *)idx; |
218 | return 0; | 599 | error = ds_cfg.sizeof_rec[qual]; |
600 | out: | ||
601 | ds_put_context(context); | ||
602 | return error; | ||
219 | } | 603 | } |
220 | 604 | ||
221 | int ds_free(void **dsp) | 605 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) |
222 | { | 606 | { |
223 | if (*dsp) { | 607 | return ds_access(task, index, record, ds_bts); |
224 | kfree((void *)get_bts_buffer_base(*dsp)); | ||
225 | kfree(*dsp); | ||
226 | *dsp = NULL; | ||
227 | } | ||
228 | return 0; | ||
229 | } | 608 | } |
230 | 609 | ||
231 | int ds_get_bts_size(void *ds) | 610 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) |
232 | { | 611 | { |
233 | int size_in_bytes; | 612 | return ds_access(task, index, record, ds_pebs); |
234 | |||
235 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
236 | return -EOPNOTSUPP; | ||
237 | |||
238 | if (!ds) | ||
239 | return 0; | ||
240 | |||
241 | size_in_bytes = | ||
242 | get_bts_absolute_maximum(ds) - | ||
243 | get_bts_buffer_base(ds); | ||
244 | return size_in_bytes; | ||
245 | } | 613 | } |
246 | 614 | ||
247 | int ds_get_bts_end(void *ds) | 615 | static int ds_write(struct task_struct *task, const void *record, size_t size, |
616 | enum ds_qualifier qual, int force) | ||
248 | { | 617 | { |
249 | int size_in_bytes = ds_get_bts_size(ds); | 618 | struct ds_context *context; |
250 | 619 | int error; | |
251 | if (size_in_bytes <= 0) | ||
252 | return size_in_bytes; | ||
253 | 620 | ||
254 | return size_in_bytes / ds_cfg.sizeof_bts; | 621 | if (!record) |
255 | } | 622 | return -EINVAL; |
256 | 623 | ||
257 | int ds_get_bts_index(void *ds) | 624 | error = -EPERM; |
258 | { | 625 | context = ds_get_context(task); |
259 | int index_offset_in_bytes; | 626 | if (!context) |
627 | goto out; | ||
260 | 628 | ||
261 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 629 | if (!force) { |
262 | return -EOPNOTSUPP; | 630 | error = ds_validate_access(context, qual); |
631 | if (error < 0) | ||
632 | goto out; | ||
633 | } | ||
263 | 634 | ||
264 | index_offset_in_bytes = | 635 | error = 0; |
265 | get_bts_index(ds) - | 636 | while (size) { |
266 | get_bts_buffer_base(ds); | 637 | unsigned long base, index, end, write_end, int_th; |
638 | unsigned long write_size, adj_write_size; | ||
639 | |||
640 | /* | ||
641 | * write as much as possible without producing an | ||
642 | * overflow interrupt. | ||
643 | * | ||
644 | * interrupt_threshold must either be | ||
645 | * - bigger than absolute_maximum or | ||
646 | * - point to a record between buffer_base and absolute_maximum | ||
647 | * | ||
648 | * index points to a valid record. | ||
649 | */ | ||
650 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
651 | index = ds_get(context->ds, qual, ds_index); | ||
652 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
653 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
654 | |||
655 | write_end = min(end, int_th); | ||
656 | |||
657 | /* if we are already beyond the interrupt threshold, | ||
658 | * we fill the entire buffer */ | ||
659 | if (write_end <= index) | ||
660 | write_end = end; | ||
661 | |||
662 | if (write_end <= index) | ||
663 | goto out; | ||
664 | |||
665 | write_size = min((unsigned long) size, write_end - index); | ||
666 | memcpy((void *)index, record, write_size); | ||
667 | |||
668 | record = (const char *)record + write_size; | ||
669 | size -= write_size; | ||
670 | error += write_size; | ||
671 | |||
672 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
673 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
674 | |||
675 | /* zero out trailing bytes */ | ||
676 | memset((char *)index + write_size, 0, | ||
677 | adj_write_size - write_size); | ||
678 | index += adj_write_size; | ||
679 | |||
680 | if (index >= end) | ||
681 | index = base; | ||
682 | ds_set(context->ds, qual, ds_index, index); | ||
683 | |||
684 | if (index >= int_th) | ||
685 | ds_overflow(task, context, qual); | ||
686 | } | ||
267 | 687 | ||
268 | return index_offset_in_bytes / ds_cfg.sizeof_bts; | 688 | out: |
689 | ds_put_context(context); | ||
690 | return error; | ||
269 | } | 691 | } |
270 | 692 | ||
271 | int ds_set_overflow(void *ds, int method) | 693 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) |
272 | { | 694 | { |
273 | switch (method) { | 695 | return ds_write(task, record, size, ds_bts, /* force = */ 0); |
274 | case DS_O_SIGNAL: | ||
275 | return -EOPNOTSUPP; | ||
276 | case DS_O_WRAP: | ||
277 | return 0; | ||
278 | default: | ||
279 | return -EINVAL; | ||
280 | } | ||
281 | } | 696 | } |
282 | 697 | ||
283 | int ds_get_overflow(void *ds) | 698 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) |
284 | { | 699 | { |
285 | return DS_O_WRAP; | 700 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); |
286 | } | 701 | } |
287 | 702 | ||
288 | int ds_clear(void *ds) | 703 | int ds_unchecked_write_bts(struct task_struct *task, |
704 | const void *record, size_t size) | ||
289 | { | 705 | { |
290 | int bts_size = ds_get_bts_size(ds); | 706 | return ds_write(task, record, size, ds_bts, /* force = */ 1); |
291 | unsigned long bts_base; | ||
292 | |||
293 | if (bts_size <= 0) | ||
294 | return bts_size; | ||
295 | |||
296 | bts_base = get_bts_buffer_base(ds); | ||
297 | memset((void *)bts_base, 0, bts_size); | ||
298 | |||
299 | set_bts_index(ds, bts_base); | ||
300 | return 0; | ||
301 | } | 707 | } |
302 | 708 | ||
303 | int ds_read_bts(void *ds, int index, struct bts_struct *out) | 709 | int ds_unchecked_write_pebs(struct task_struct *task, |
710 | const void *record, size_t size) | ||
304 | { | 711 | { |
305 | void *bts; | 712 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); |
713 | } | ||
306 | 714 | ||
307 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 715 | static int ds_reset_or_clear(struct task_struct *task, |
308 | return -EOPNOTSUPP; | 716 | enum ds_qualifier qual, int clear) |
717 | { | ||
718 | struct ds_context *context; | ||
719 | unsigned long base, end; | ||
720 | int error; | ||
309 | 721 | ||
310 | if (index < 0) | 722 | context = ds_get_context(task); |
311 | return -EINVAL; | 723 | error = ds_validate_access(context, qual); |
724 | if (error < 0) | ||
725 | goto out; | ||
312 | 726 | ||
313 | if (index >= ds_get_bts_size(ds)) | 727 | base = ds_get(context->ds, qual, ds_buffer_base); |
314 | return -EINVAL; | 728 | end = ds_get(context->ds, qual, ds_absolute_maximum); |
315 | 729 | ||
316 | bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); | 730 | if (clear) |
731 | memset((void *)base, 0, end - base); | ||
317 | 732 | ||
318 | memset(out, 0, sizeof(*out)); | 733 | ds_set(context->ds, qual, ds_index, base); |
319 | if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { | ||
320 | out->qualifier = get_info_type(bts); | ||
321 | out->variant.jiffies = get_info_data(bts); | ||
322 | } else { | ||
323 | out->qualifier = BTS_BRANCH; | ||
324 | out->variant.lbr.from_ip = get_from_ip(bts); | ||
325 | out->variant.lbr.to_ip = get_to_ip(bts); | ||
326 | } | ||
327 | 734 | ||
328 | return sizeof(*out);; | 735 | error = 0; |
736 | out: | ||
737 | ds_put_context(context); | ||
738 | return error; | ||
329 | } | 739 | } |
330 | 740 | ||
331 | int ds_write_bts(void *ds, const struct bts_struct *in) | 741 | int ds_reset_bts(struct task_struct *task) |
332 | { | 742 | { |
333 | unsigned long bts; | 743 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); |
334 | 744 | } | |
335 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
336 | return -EOPNOTSUPP; | ||
337 | |||
338 | if (ds_get_bts_size(ds) <= 0) | ||
339 | return -ENXIO; | ||
340 | 745 | ||
341 | bts = get_bts_index(ds); | 746 | int ds_reset_pebs(struct task_struct *task) |
747 | { | ||
748 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | ||
749 | } | ||
342 | 750 | ||
343 | memset((void *)bts, 0, ds_cfg.sizeof_bts); | 751 | int ds_clear_bts(struct task_struct *task) |
344 | switch (in->qualifier) { | 752 | { |
345 | case BTS_INVALID: | 753 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); |
346 | break; | 754 | } |
347 | 755 | ||
348 | case BTS_BRANCH: | 756 | int ds_clear_pebs(struct task_struct *task) |
349 | set_from_ip((void *)bts, in->variant.lbr.from_ip); | 757 | { |
350 | set_to_ip((void *)bts, in->variant.lbr.to_ip); | 758 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); |
351 | break; | 759 | } |
352 | 760 | ||
353 | case BTS_TASK_ARRIVES: | 761 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) |
354 | case BTS_TASK_DEPARTS: | 762 | { |
355 | set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); | 763 | struct ds_context *context; |
356 | set_info_type((void *)bts, in->qualifier); | 764 | int error; |
357 | set_info_data((void *)bts, in->variant.jiffies); | ||
358 | break; | ||
359 | 765 | ||
360 | default: | 766 | if (!value) |
361 | return -EINVAL; | 767 | return -EINVAL; |
362 | } | ||
363 | 768 | ||
364 | bts = bts + ds_cfg.sizeof_bts; | 769 | context = ds_get_context(task); |
365 | if (bts >= get_bts_absolute_maximum(ds)) | 770 | error = ds_validate_access(context, ds_pebs); |
366 | bts = get_bts_buffer_base(ds); | 771 | if (error < 0) |
367 | set_bts_index(ds, bts); | 772 | goto out; |
368 | 773 | ||
369 | return ds_cfg.sizeof_bts; | 774 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); |
775 | |||
776 | error = 0; | ||
777 | out: | ||
778 | ds_put_context(context); | ||
779 | return error; | ||
370 | } | 780 | } |
371 | 781 | ||
372 | unsigned long ds_debugctl_mask(void) | 782 | int ds_set_pebs_reset(struct task_struct *task, u64 value) |
373 | { | 783 | { |
374 | return ds_cfg.debugctl_mask; | 784 | struct ds_context *context; |
375 | } | 785 | int error; |
376 | 786 | ||
377 | #ifdef __i386__ | 787 | context = ds_get_context(task); |
378 | static const struct ds_configuration ds_cfg_netburst = { | 788 | error = ds_validate_access(context, ds_pebs); |
379 | .sizeof_ds = 9 * 4, | 789 | if (error < 0) |
380 | .bts_buffer_base = { 0, 4 }, | 790 | goto out; |
381 | .bts_index = { 4, 4 }, | ||
382 | .bts_absolute_maximum = { 8, 4 }, | ||
383 | .bts_interrupt_threshold = { 12, 4 }, | ||
384 | .sizeof_bts = 3 * 4, | ||
385 | .from_ip = { 0, 4 }, | ||
386 | .to_ip = { 4, 4 }, | ||
387 | .info_type = { 4, 1 }, | ||
388 | .info_data = { 8, 4 }, | ||
389 | .debugctl_mask = (1<<2)|(1<<3) | ||
390 | }; | ||
391 | 791 | ||
392 | static const struct ds_configuration ds_cfg_pentium_m = { | 792 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; |
393 | .sizeof_ds = 9 * 4, | 793 | |
394 | .bts_buffer_base = { 0, 4 }, | 794 | error = 0; |
395 | .bts_index = { 4, 4 }, | 795 | out: |
396 | .bts_absolute_maximum = { 8, 4 }, | 796 | ds_put_context(context); |
397 | .bts_interrupt_threshold = { 12, 4 }, | 797 | return error; |
398 | .sizeof_bts = 3 * 4, | 798 | } |
399 | .from_ip = { 0, 4 }, | 799 | |
400 | .to_ip = { 4, 4 }, | 800 | static const struct ds_configuration ds_cfg_var = { |
401 | .info_type = { 4, 1 }, | 801 | .sizeof_ds = sizeof(long) * 12, |
402 | .info_data = { 8, 4 }, | 802 | .sizeof_field = sizeof(long), |
403 | .debugctl_mask = (1<<6)|(1<<7) | 803 | .sizeof_rec[ds_bts] = sizeof(long) * 3, |
804 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | ||
404 | }; | 805 | }; |
405 | #endif /* _i386_ */ | 806 | static const struct ds_configuration ds_cfg_64 = { |
406 | 807 | .sizeof_ds = 8 * 12, | |
407 | static const struct ds_configuration ds_cfg_core2 = { | 808 | .sizeof_field = 8, |
408 | .sizeof_ds = 9 * 8, | 809 | .sizeof_rec[ds_bts] = 8 * 3, |
409 | .bts_buffer_base = { 0, 8 }, | 810 | .sizeof_rec[ds_pebs] = 8 * 10 |
410 | .bts_index = { 8, 8 }, | ||
411 | .bts_absolute_maximum = { 16, 8 }, | ||
412 | .bts_interrupt_threshold = { 24, 8 }, | ||
413 | .sizeof_bts = 3 * 8, | ||
414 | .from_ip = { 0, 8 }, | ||
415 | .to_ip = { 8, 8 }, | ||
416 | .info_type = { 8, 1 }, | ||
417 | .info_data = { 16, 8 }, | ||
418 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
419 | }; | 811 | }; |
420 | 812 | ||
421 | static inline void | 813 | static inline void |
@@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
429 | switch (c->x86) { | 821 | switch (c->x86) { |
430 | case 0x6: | 822 | case 0x6: |
431 | switch (c->x86_model) { | 823 | switch (c->x86_model) { |
432 | #ifdef __i386__ | ||
433 | case 0xD: | 824 | case 0xD: |
434 | case 0xE: /* Pentium M */ | 825 | case 0xE: /* Pentium M */ |
435 | ds_configure(&ds_cfg_pentium_m); | 826 | ds_configure(&ds_cfg_var); |
436 | break; | 827 | break; |
437 | #endif /* _i386_ */ | ||
438 | case 0xF: /* Core2 */ | 828 | case 0xF: /* Core2 */ |
439 | ds_configure(&ds_cfg_core2); | 829 | case 0x1C: /* Atom */ |
830 | ds_configure(&ds_cfg_64); | ||
440 | break; | 831 | break; |
441 | default: | 832 | default: |
442 | /* sorry, don't know about them */ | 833 | /* sorry, don't know about them */ |
@@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
445 | break; | 836 | break; |
446 | case 0xF: | 837 | case 0xF: |
447 | switch (c->x86_model) { | 838 | switch (c->x86_model) { |
448 | #ifdef __i386__ | ||
449 | case 0x0: | 839 | case 0x0: |
450 | case 0x1: | 840 | case 0x1: |
451 | case 0x2: /* Netburst */ | 841 | case 0x2: /* Netburst */ |
452 | ds_configure(&ds_cfg_netburst); | 842 | ds_configure(&ds_cfg_var); |
453 | break; | 843 | break; |
454 | #endif /* _i386_ */ | ||
455 | default: | 844 | default: |
456 | /* sorry, don't know about them */ | 845 | /* sorry, don't know about them */ |
457 | break; | 846 | break; |
@@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
462 | break; | 851 | break; |
463 | } | 852 | } |
464 | } | 853 | } |
854 | |||
855 | void ds_free(struct ds_context *context) | ||
856 | { | ||
857 | /* This is called when the task owning the parameter context | ||
858 | * is dying. There should not be any user of that context left | ||
859 | * to disturb us, anymore. */ | ||
860 | unsigned long leftovers = context->count; | ||
861 | while (leftovers--) | ||
862 | ds_put_context(context); | ||
863 | } | ||
864 | #endif /* CONFIG_X86_DS */ | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7bb..66e48aa2dd1b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -1203,7 +1203,7 @@ static int __init parse_memmap_opt(char *p) | |||
1203 | if (!p) | 1203 | if (!p) |
1204 | return -EINVAL; | 1204 | return -EINVAL; |
1205 | 1205 | ||
1206 | if (!strcmp(p, "exactmap")) { | 1206 | if (!strncmp(p, "exactmap", 8)) { |
1207 | #ifdef CONFIG_CRASH_DUMP | 1207 | #ifdef CONFIG_CRASH_DUMP |
1208 | /* | 1208 | /* |
1209 | * If we are doing a crash dump, we still need to know | 1209 | * If we are doing a crash dump, we still need to know |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fac..24bb5faf5efa 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func) | |||
95 | 95 | ||
96 | } | 96 | } |
97 | 97 | ||
98 | #ifdef CONFIG_DMAR | ||
99 | static void __init intel_g33_dmar(int num, int slot, int func) | ||
100 | { | ||
101 | struct acpi_table_header *dmar_tbl; | ||
102 | acpi_status status; | ||
103 | |||
104 | status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); | ||
105 | if (ACPI_SUCCESS(status)) { | ||
106 | printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); | ||
107 | dmar_disabled = 1; | ||
108 | } | ||
109 | } | ||
110 | #endif | ||
111 | |||
98 | #define QFLAG_APPLY_ONCE 0x1 | 112 | #define QFLAG_APPLY_ONCE 0x1 |
99 | #define QFLAG_APPLIED 0x2 | 113 | #define QFLAG_APPLIED 0x2 |
100 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 114 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
@@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = { | |||
114 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, | 128 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, |
115 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | 129 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, |
116 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, | 130 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, |
131 | #ifdef CONFIG_DMAR | ||
132 | { PCI_VENDOR_ID_INTEL, 0x29c0, | ||
133 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, | ||
134 | #endif | ||
117 | {} | 135 | {} |
118 | }; | 136 | }; |
119 | 137 | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 06cc8d4254b1..945a31cdd81f 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -414,9 +414,11 @@ void __init efi_init(void) | |||
414 | if (memmap.map == NULL) | 414 | if (memmap.map == NULL) |
415 | printk(KERN_ERR "Could not map the EFI memory map!\n"); | 415 | printk(KERN_ERR "Could not map the EFI memory map!\n"); |
416 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); | 416 | memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); |
417 | |||
417 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) | 418 | if (memmap.desc_size != sizeof(efi_memory_desc_t)) |
418 | printk(KERN_WARNING "Kernel-defined memdesc" | 419 | printk(KERN_WARNING |
419 | "doesn't match the one from EFI!\n"); | 420 | "Kernel-defined memdesc doesn't match the one from EFI!\n"); |
421 | |||
420 | if (add_efi_memmap) | 422 | if (add_efi_memmap) |
421 | do_add_efi_memmap(); | 423 | do_add_efi_memmap(); |
422 | 424 | ||
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 4b63c8e1f13b..5cab48ee61a4 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c | |||
@@ -53,7 +53,7 @@ void efi_call_phys_prelog(void) | |||
53 | * directory. If I have PAE, I just need to duplicate one entry in | 53 | * directory. If I have PAE, I just need to duplicate one entry in |
54 | * page directory. | 54 | * page directory. |
55 | */ | 55 | */ |
56 | cr4 = read_cr4(); | 56 | cr4 = read_cr4_safe(); |
57 | 57 | ||
58 | if (cr4 & X86_CR4_PAE) { | 58 | if (cr4 & X86_CR4_PAE) { |
59 | efi_bak_pg_dir_pointer[0].pgd = | 59 | efi_bak_pg_dir_pointer[0].pgd = |
@@ -91,7 +91,7 @@ void efi_call_phys_epilog(void) | |||
91 | gdt_descr.size = GDT_SIZE - 1; | 91 | gdt_descr.size = GDT_SIZE - 1; |
92 | load_gdt(&gdt_descr); | 92 | load_gdt(&gdt_descr); |
93 | 93 | ||
94 | cr4 = read_cr4(); | 94 | cr4 = read_cr4_safe(); |
95 | 95 | ||
96 | if (cr4 & X86_CR4_PAE) { | 96 | if (cr4 & X86_CR4_PAE) { |
97 | swapper_pg_dir[pgd_index(0)].pgd = | 97 | swapper_pg_dir[pgd_index(0)].pgd = |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 16a93ed7baf1..ae2ffc8a400c 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -286,7 +286,7 @@ static __init void map_low_mmrs(void) | |||
286 | 286 | ||
287 | enum map_type {map_wb, map_uc}; | 287 | enum map_type {map_wb, map_uc}; |
288 | 288 | ||
289 | static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) | 289 | static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) |
290 | { | 290 | { |
291 | unsigned long bytes, paddr; | 291 | unsigned long bytes, paddr; |
292 | 292 | ||
@@ -357,7 +357,9 @@ static __init void uv_rtc_init(void) | |||
357 | sn_rtc_cycles_per_second = ticks_per_sec; | 357 | sn_rtc_cycles_per_second = ticks_per_sec; |
358 | } | 358 | } |
359 | 359 | ||
360 | static __init void uv_system_init(void) | 360 | static bool uv_system_inited; |
361 | |||
362 | void __init uv_system_init(void) | ||
361 | { | 363 | { |
362 | union uvh_si_addr_map_config_u m_n_config; | 364 | union uvh_si_addr_map_config_u m_n_config; |
363 | union uvh_node_id_u node_id; | 365 | union uvh_node_id_u node_id; |
@@ -447,6 +449,7 @@ static __init void uv_system_init(void) | |||
447 | map_mmr_high(max_pnode); | 449 | map_mmr_high(max_pnode); |
448 | map_config_high(max_pnode); | 450 | map_config_high(max_pnode); |
449 | map_mmioh_high(max_pnode); | 451 | map_mmioh_high(max_pnode); |
452 | uv_system_inited = true; | ||
450 | } | 453 | } |
451 | 454 | ||
452 | /* | 455 | /* |
@@ -455,8 +458,7 @@ static __init void uv_system_init(void) | |||
455 | */ | 458 | */ |
456 | void __cpuinit uv_cpu_init(void) | 459 | void __cpuinit uv_cpu_init(void) |
457 | { | 460 | { |
458 | if (!uv_node_to_blade) | 461 | BUG_ON(!uv_system_inited); |
459 | uv_system_init(); | ||
460 | 462 | ||
461 | uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; | 463 | uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; |
462 | 464 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1b318e903bf6..d16084f90649 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
88 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | 88 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); |
89 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | 89 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == |
90 | (__START_KERNEL & PGDIR_MASK))); | 90 | (__START_KERNEL & PGDIR_MASK))); |
91 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | ||
91 | 92 | ||
92 | /* clear bss before set_intr_gate with early_idt_handler */ | 93 | /* clear bss before set_intr_gate with early_idt_handler */ |
93 | clear_bss(); | 94 | clear_bss(); |
@@ -107,12 +108,11 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
107 | } | 108 | } |
108 | load_idt((const struct desc_ptr *)&idt_descr); | 109 | load_idt((const struct desc_ptr *)&idt_descr); |
109 | 110 | ||
110 | early_printk("Kernel alive\n"); | 111 | if (console_loglevel == 10) |
112 | early_printk("Kernel alive\n"); | ||
111 | 113 | ||
112 | x86_64_init_pda(); | 114 | x86_64_init_pda(); |
113 | 115 | ||
114 | early_printk("Kernel really alive\n"); | ||
115 | |||
116 | x86_64_start_reservations(real_mode_data); | 116 | x86_64_start_reservations(real_mode_data); |
117 | } | 117 | } |
118 | 118 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index a7010c3a377a..e835b4eea70b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
172 | * | 172 | * |
173 | * Note that the stack is not yet set up! | 173 | * Note that the stack is not yet set up! |
174 | */ | 174 | */ |
175 | #define PTE_ATTR 0x007 /* PRESENT+RW+USER */ | ||
176 | #define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ | ||
177 | #define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ | ||
178 | |||
179 | default_entry: | 175 | default_entry: |
180 | #ifdef CONFIG_X86_PAE | 176 | #ifdef CONFIG_X86_PAE |
181 | 177 | ||
@@ -196,9 +192,9 @@ default_entry: | |||
196 | movl $pa(pg0), %edi | 192 | movl $pa(pg0), %edi |
197 | movl %edi, pa(init_pg_tables_start) | 193 | movl %edi, pa(init_pg_tables_start) |
198 | movl $pa(swapper_pg_pmd), %edx | 194 | movl $pa(swapper_pg_pmd), %edx |
199 | movl $PTE_ATTR, %eax | 195 | movl $PTE_IDENT_ATTR, %eax |
200 | 10: | 196 | 10: |
201 | leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ | 197 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ |
202 | movl %ecx,(%edx) /* Store PMD entry */ | 198 | movl %ecx,(%edx) /* Store PMD entry */ |
203 | /* Upper half already zero */ | 199 | /* Upper half already zero */ |
204 | addl $8,%edx | 200 | addl $8,%edx |
@@ -215,7 +211,7 @@ default_entry: | |||
215 | * End condition: we must map up to and including INIT_MAP_BEYOND_END | 211 | * End condition: we must map up to and including INIT_MAP_BEYOND_END |
216 | * bytes beyond the end of our own page tables. | 212 | * bytes beyond the end of our own page tables. |
217 | */ | 213 | */ |
218 | leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp | 214 | leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp |
219 | cmpl %ebp,%eax | 215 | cmpl %ebp,%eax |
220 | jb 10b | 216 | jb 10b |
221 | 1: | 217 | 1: |
@@ -224,7 +220,7 @@ default_entry: | |||
224 | movl %eax, pa(max_pfn_mapped) | 220 | movl %eax, pa(max_pfn_mapped) |
225 | 221 | ||
226 | /* Do early initialization of the fixmap area */ | 222 | /* Do early initialization of the fixmap area */ |
227 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 223 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax |
228 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) | 224 | movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) |
229 | #else /* Not PAE */ | 225 | #else /* Not PAE */ |
230 | 226 | ||
@@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
233 | movl $pa(pg0), %edi | 229 | movl $pa(pg0), %edi |
234 | movl %edi, pa(init_pg_tables_start) | 230 | movl %edi, pa(init_pg_tables_start) |
235 | movl $pa(swapper_pg_dir), %edx | 231 | movl $pa(swapper_pg_dir), %edx |
236 | movl $PTE_ATTR, %eax | 232 | movl $PTE_IDENT_ATTR, %eax |
237 | 10: | 233 | 10: |
238 | leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ | 234 | leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ |
239 | movl %ecx,(%edx) /* Store identity PDE entry */ | 235 | movl %ecx,(%edx) /* Store identity PDE entry */ |
240 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ | 236 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ |
241 | addl $4,%edx | 237 | addl $4,%edx |
@@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
249 | * bytes beyond the end of our own page tables; the +0x007 is | 245 | * bytes beyond the end of our own page tables; the +0x007 is |
250 | * the attribute bits | 246 | * the attribute bits |
251 | */ | 247 | */ |
252 | leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp | 248 | leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp |
253 | cmpl %ebp,%eax | 249 | cmpl %ebp,%eax |
254 | jb 10b | 250 | jb 10b |
255 | movl %edi,pa(init_pg_tables_end) | 251 | movl %edi,pa(init_pg_tables_end) |
@@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
257 | movl %eax, pa(max_pfn_mapped) | 253 | movl %eax, pa(max_pfn_mapped) |
258 | 254 | ||
259 | /* Do early initialization of the fixmap area */ | 255 | /* Do early initialization of the fixmap area */ |
260 | movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax | 256 | movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax |
261 | movl %eax,pa(swapper_pg_dir+0xffc) | 257 | movl %eax,pa(swapper_pg_dir+0xffc) |
262 | #endif | 258 | #endif |
263 | jmp 3f | 259 | jmp 3f |
@@ -634,19 +630,19 @@ ENTRY(empty_zero_page) | |||
634 | /* Page-aligned for the benefit of paravirt? */ | 630 | /* Page-aligned for the benefit of paravirt? */ |
635 | .align PAGE_SIZE_asm | 631 | .align PAGE_SIZE_asm |
636 | ENTRY(swapper_pg_dir) | 632 | ENTRY(swapper_pg_dir) |
637 | .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ | 633 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ |
638 | # if KPMDS == 3 | 634 | # if KPMDS == 3 |
639 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 635 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
640 | .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 | 636 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
641 | .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 | 637 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 |
642 | # elif KPMDS == 2 | 638 | # elif KPMDS == 2 |
643 | .long 0,0 | 639 | .long 0,0 |
644 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 640 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
645 | .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 | 641 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 |
646 | # elif KPMDS == 1 | 642 | # elif KPMDS == 1 |
647 | .long 0,0 | 643 | .long 0,0 |
648 | .long 0,0 | 644 | .long 0,0 |
649 | .long pa(swapper_pg_pmd+PGD_ATTR),0 | 645 | .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 |
650 | # else | 646 | # else |
651 | # error "Kernel PMDs should be 1, 2 or 3" | 647 | # error "Kernel PMDs should be 1, 2 or 3" |
652 | # endif | 648 | # endif |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index db3280afe886..26cfdc1d7c7f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -110,7 +110,7 @@ startup_64: | |||
110 | movq %rdi, %rax | 110 | movq %rdi, %rax |
111 | shrq $PMD_SHIFT, %rax | 111 | shrq $PMD_SHIFT, %rax |
112 | andq $(PTRS_PER_PMD - 1), %rax | 112 | andq $(PTRS_PER_PMD - 1), %rax |
113 | leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx | 113 | leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx |
114 | leaq level2_spare_pgt(%rip), %rbx | 114 | leaq level2_spare_pgt(%rip), %rbx |
115 | movq %rdx, 0(%rbx, %rax, 8) | 115 | movq %rdx, 0(%rbx, %rax, 8) |
116 | ident_complete: | 116 | ident_complete: |
@@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt) | |||
374 | /* Since I easily can, map the first 1G. | 374 | /* Since I easily can, map the first 1G. |
375 | * Don't set NX because code runs from these pages. | 375 | * Don't set NX because code runs from these pages. |
376 | */ | 376 | */ |
377 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) | 377 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) |
378 | 378 | ||
379 | NEXT_PAGE(level2_kernel_pgt) | 379 | NEXT_PAGE(level2_kernel_pgt) |
380 | /* | 380 | /* |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ad2b15a1334d..73deaffadd03 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void) | |||
210 | /* Calculate the min / max delta */ | 210 | /* Calculate the min / max delta */ |
211 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | 211 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, |
212 | &hpet_clockevent); | 212 | &hpet_clockevent); |
213 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, | 213 | /* 5 usec minimum reprogramming delta. */ |
214 | &hpet_clockevent); | 214 | hpet_clockevent.min_delta_ns = 5000; |
215 | 215 | ||
216 | /* | 216 | /* |
217 | * Start hpet with the boot cpu mask and make it | 217 | * Start hpet with the boot cpu mask and make it |
@@ -270,15 +270,22 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode, | |||
270 | } | 270 | } |
271 | 271 | ||
272 | static int hpet_legacy_next_event(unsigned long delta, | 272 | static int hpet_legacy_next_event(unsigned long delta, |
273 | struct clock_event_device *evt) | 273 | struct clock_event_device *evt) |
274 | { | 274 | { |
275 | unsigned long cnt; | 275 | u32 cnt; |
276 | 276 | ||
277 | cnt = hpet_readl(HPET_COUNTER); | 277 | cnt = hpet_readl(HPET_COUNTER); |
278 | cnt += delta; | 278 | cnt += (u32) delta; |
279 | hpet_writel(cnt, HPET_T0_CMP); | 279 | hpet_writel(cnt, HPET_T0_CMP); |
280 | 280 | ||
281 | return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; | 281 | /* |
282 | * We need to read back the CMP register to make sure that | ||
283 | * what we wrote hit the chip before we compare it to the | ||
284 | * counter. | ||
285 | */ | ||
286 | WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); | ||
287 | |||
288 | return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | ||
282 | } | 289 | } |
283 | 290 | ||
284 | /* | 291 | /* |
@@ -359,6 +366,7 @@ static int hpet_clocksource_register(void) | |||
359 | int __init hpet_enable(void) | 366 | int __init hpet_enable(void) |
360 | { | 367 | { |
361 | unsigned long id; | 368 | unsigned long id; |
369 | int i; | ||
362 | 370 | ||
363 | if (!is_hpet_capable()) | 371 | if (!is_hpet_capable()) |
364 | return 0; | 372 | return 0; |
@@ -369,6 +377,29 @@ int __init hpet_enable(void) | |||
369 | * Read the period and check for a sane value: | 377 | * Read the period and check for a sane value: |
370 | */ | 378 | */ |
371 | hpet_period = hpet_readl(HPET_PERIOD); | 379 | hpet_period = hpet_readl(HPET_PERIOD); |
380 | |||
381 | /* | ||
382 | * AMD SB700 based systems with spread spectrum enabled use a | ||
383 | * SMM based HPET emulation to provide proper frequency | ||
384 | * setting. The SMM code is initialized with the first HPET | ||
385 | * register access and takes some time to complete. During | ||
386 | * this time the config register reads 0xffffffff. We check | ||
387 | * for max. 1000 loops whether the config register reads a non | ||
388 | * 0xffffffff value to make sure that HPET is up and running | ||
389 | * before we go further. A counting loop is safe, as the HPET | ||
390 | * access takes thousands of CPU cycles. On non SB700 based | ||
391 | * machines this check is only done once and has no side | ||
392 | * effects. | ||
393 | */ | ||
394 | for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { | ||
395 | if (i == 1000) { | ||
396 | printk(KERN_WARNING | ||
397 | "HPET config register value = 0xFFFFFFFF. " | ||
398 | "Disabling HPET\n"); | ||
399 | goto out_nohpet; | ||
400 | } | ||
401 | } | ||
402 | |||
372 | if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) | 403 | if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) |
373 | goto out_nohpet; | 404 | goto out_nohpet; |
374 | 405 | ||
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 1c3a66a67f83..720d2607aacb 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c | |||
@@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { | |||
92 | DMI_MATCH(DMI_BOARD_NAME, "30BF") | 92 | DMI_MATCH(DMI_BOARD_NAME, "30BF") |
93 | } | 93 | } |
94 | }, | 94 | }, |
95 | { | ||
96 | .callback = dmi_io_delay_0xed_port, | ||
97 | .ident = "Presario F700", | ||
98 | .matches = { | ||
99 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | ||
100 | DMI_MATCH(DMI_BOARD_NAME, "30D3") | ||
101 | } | ||
102 | }, | ||
95 | { } | 103 | { } |
96 | }; | 104 | }; |
97 | 105 | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1cf8c1fcc088..b71e02d42f4f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -325,7 +325,7 @@ skip: | |||
325 | for_each_online_cpu(j) | 325 | for_each_online_cpu(j) |
326 | seq_printf(p, "%10u ", | 326 | seq_printf(p, "%10u ", |
327 | per_cpu(irq_stat,j).irq_call_count); | 327 | per_cpu(irq_stat,j).irq_call_count); |
328 | seq_printf(p, " function call interrupts\n"); | 328 | seq_printf(p, " Function call interrupts\n"); |
329 | seq_printf(p, "TLB: "); | 329 | seq_printf(p, "TLB: "); |
330 | for_each_online_cpu(j) | 330 | for_each_online_cpu(j) |
331 | seq_printf(p, "%10u ", | 331 | seq_printf(p, "%10u ", |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1f78b238d8d2..f065fe9071b9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -129,7 +129,7 @@ skip: | |||
129 | seq_printf(p, "CAL: "); | 129 | seq_printf(p, "CAL: "); |
130 | for_each_online_cpu(j) | 130 | for_each_online_cpu(j) |
131 | seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); | 131 | seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); |
132 | seq_printf(p, " function call interrupts\n"); | 132 | seq_printf(p, " Function call interrupts\n"); |
133 | seq_printf(p, "TLB: "); | 133 | seq_printf(p, "TLB: "); |
134 | for_each_online_cpu(j) | 134 | for_each_online_cpu(j) |
135 | seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); | 135 | seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); |
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 7377ccb21335..304d8bad6559 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c | |||
@@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges); | |||
16 | static u32 *flush_words; | 16 | static u32 *flush_words; |
17 | 17 | ||
18 | struct pci_device_id k8_nb_ids[] = { | 18 | struct pci_device_id k8_nb_ids[] = { |
19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | 19 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | 20 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, | ||
21 | {} | 22 | {} |
22 | }; | 23 | }; |
23 | EXPORT_SYMBOL(k8_nb_ids); | 24 | EXPORT_SYMBOL(k8_nb_ids); |
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index f2d43bc75514..ff7d3b0124f1 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
139 | if (PageHighMem(pg)) { | 139 | if (PageHighMem(pg)) { |
140 | data = ioremap_cache(pa_data, sizeof(*data)); | 140 | data = ioremap_cache(pa_data, sizeof(*data)); |
141 | if (!data) { | 141 | if (!data) { |
142 | kfree(node); | ||
142 | error = -ENXIO; | 143 | error = -ENXIO; |
143 | goto err_dir; | 144 | goto err_dir; |
144 | } | 145 | } |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f47f0eb886b8..10435a120d22 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -69,6 +69,9 @@ static int gdb_x86vector = -1; | |||
69 | */ | 69 | */ |
70 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | 70 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) |
71 | { | 71 | { |
72 | #ifndef CONFIG_X86_32 | ||
73 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
74 | #endif | ||
72 | gdb_regs[GDB_AX] = regs->ax; | 75 | gdb_regs[GDB_AX] = regs->ax; |
73 | gdb_regs[GDB_BX] = regs->bx; | 76 | gdb_regs[GDB_BX] = regs->bx; |
74 | gdb_regs[GDB_CX] = regs->cx; | 77 | gdb_regs[GDB_CX] = regs->cx; |
@@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
76 | gdb_regs[GDB_SI] = regs->si; | 79 | gdb_regs[GDB_SI] = regs->si; |
77 | gdb_regs[GDB_DI] = regs->di; | 80 | gdb_regs[GDB_DI] = regs->di; |
78 | gdb_regs[GDB_BP] = regs->bp; | 81 | gdb_regs[GDB_BP] = regs->bp; |
79 | gdb_regs[GDB_PS] = regs->flags; | ||
80 | gdb_regs[GDB_PC] = regs->ip; | 82 | gdb_regs[GDB_PC] = regs->ip; |
81 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
84 | gdb_regs[GDB_PS] = regs->flags; | ||
82 | gdb_regs[GDB_DS] = regs->ds; | 85 | gdb_regs[GDB_DS] = regs->ds; |
83 | gdb_regs[GDB_ES] = regs->es; | 86 | gdb_regs[GDB_ES] = regs->es; |
84 | gdb_regs[GDB_CS] = regs->cs; | 87 | gdb_regs[GDB_CS] = regs->cs; |
@@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
94 | gdb_regs[GDB_R13] = regs->r13; | 97 | gdb_regs[GDB_R13] = regs->r13; |
95 | gdb_regs[GDB_R14] = regs->r14; | 98 | gdb_regs[GDB_R14] = regs->r14; |
96 | gdb_regs[GDB_R15] = regs->r15; | 99 | gdb_regs[GDB_R15] = regs->r15; |
100 | gdb_regs32[GDB_PS] = regs->flags; | ||
101 | gdb_regs32[GDB_CS] = regs->cs; | ||
102 | gdb_regs32[GDB_SS] = regs->ss; | ||
97 | #endif | 103 | #endif |
98 | gdb_regs[GDB_SP] = regs->sp; | 104 | gdb_regs[GDB_SP] = regs->sp; |
99 | } | 105 | } |
@@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
112 | */ | 118 | */ |
113 | void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | 119 | void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) |
114 | { | 120 | { |
121 | #ifndef CONFIG_X86_32 | ||
122 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
123 | #endif | ||
115 | gdb_regs[GDB_AX] = 0; | 124 | gdb_regs[GDB_AX] = 0; |
116 | gdb_regs[GDB_BX] = 0; | 125 | gdb_regs[GDB_BX] = 0; |
117 | gdb_regs[GDB_CX] = 0; | 126 | gdb_regs[GDB_CX] = 0; |
@@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
129 | gdb_regs[GDB_FS] = 0xFFFF; | 138 | gdb_regs[GDB_FS] = 0xFFFF; |
130 | gdb_regs[GDB_GS] = 0xFFFF; | 139 | gdb_regs[GDB_GS] = 0xFFFF; |
131 | #else | 140 | #else |
132 | gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); | 141 | gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); |
133 | gdb_regs[GDB_PC] = 0; | 142 | gdb_regs32[GDB_CS] = __KERNEL_CS; |
143 | gdb_regs32[GDB_SS] = __KERNEL_DS; | ||
144 | gdb_regs[GDB_PC] = p->thread.ip; | ||
134 | gdb_regs[GDB_R8] = 0; | 145 | gdb_regs[GDB_R8] = 0; |
135 | gdb_regs[GDB_R9] = 0; | 146 | gdb_regs[GDB_R9] = 0; |
136 | gdb_regs[GDB_R10] = 0; | 147 | gdb_regs[GDB_R10] = 0; |
@@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
153 | */ | 164 | */ |
154 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | 165 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) |
155 | { | 166 | { |
167 | #ifndef CONFIG_X86_32 | ||
168 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
169 | #endif | ||
156 | regs->ax = gdb_regs[GDB_AX]; | 170 | regs->ax = gdb_regs[GDB_AX]; |
157 | regs->bx = gdb_regs[GDB_BX]; | 171 | regs->bx = gdb_regs[GDB_BX]; |
158 | regs->cx = gdb_regs[GDB_CX]; | 172 | regs->cx = gdb_regs[GDB_CX]; |
@@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
160 | regs->si = gdb_regs[GDB_SI]; | 174 | regs->si = gdb_regs[GDB_SI]; |
161 | regs->di = gdb_regs[GDB_DI]; | 175 | regs->di = gdb_regs[GDB_DI]; |
162 | regs->bp = gdb_regs[GDB_BP]; | 176 | regs->bp = gdb_regs[GDB_BP]; |
163 | regs->flags = gdb_regs[GDB_PS]; | ||
164 | regs->ip = gdb_regs[GDB_PC]; | 177 | regs->ip = gdb_regs[GDB_PC]; |
165 | #ifdef CONFIG_X86_32 | 178 | #ifdef CONFIG_X86_32 |
179 | regs->flags = gdb_regs[GDB_PS]; | ||
166 | regs->ds = gdb_regs[GDB_DS]; | 180 | regs->ds = gdb_regs[GDB_DS]; |
167 | regs->es = gdb_regs[GDB_ES]; | 181 | regs->es = gdb_regs[GDB_ES]; |
168 | regs->cs = gdb_regs[GDB_CS]; | 182 | regs->cs = gdb_regs[GDB_CS]; |
@@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
175 | regs->r13 = gdb_regs[GDB_R13]; | 189 | regs->r13 = gdb_regs[GDB_R13]; |
176 | regs->r14 = gdb_regs[GDB_R14]; | 190 | regs->r14 = gdb_regs[GDB_R14]; |
177 | regs->r15 = gdb_regs[GDB_R15]; | 191 | regs->r15 = gdb_regs[GDB_R15]; |
192 | regs->flags = gdb_regs32[GDB_PS]; | ||
193 | regs->cs = gdb_regs32[GDB_CS]; | ||
194 | regs->ss = gdb_regs32[GDB_SS]; | ||
178 | #endif | 195 | #endif |
179 | } | 196 | } |
180 | 197 | ||
@@ -378,10 +395,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
378 | if (remcomInBuffer[0] == 's') { | 395 | if (remcomInBuffer[0] == 's') { |
379 | linux_regs->flags |= X86_EFLAGS_TF; | 396 | linux_regs->flags |= X86_EFLAGS_TF; |
380 | kgdb_single_step = 1; | 397 | kgdb_single_step = 1; |
381 | if (kgdb_contthread) { | 398 | atomic_set(&kgdb_cpu_doing_single_step, |
382 | atomic_set(&kgdb_cpu_doing_single_step, | 399 | raw_smp_processor_id()); |
383 | raw_smp_processor_id()); | ||
384 | } | ||
385 | } | 400 | } |
386 | 401 | ||
387 | get_debugreg(dr6, 6); | 402 | get_debugreg(dr6, 6); |
@@ -440,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
440 | return NOTIFY_DONE; | 455 | return NOTIFY_DONE; |
441 | 456 | ||
442 | case DIE_NMI_IPI: | 457 | case DIE_NMI_IPI: |
443 | if (atomic_read(&kgdb_active) != -1) { | 458 | /* Just ignore, we will handle the roundup on DIE_NMI. */ |
444 | /* KGDB CPU roundup */ | ||
445 | kgdb_nmicallback(raw_smp_processor_id(), regs); | ||
446 | was_in_debug_nmi[raw_smp_processor_id()] = 1; | ||
447 | touch_nmi_watchdog(); | ||
448 | } | ||
449 | return NOTIFY_DONE; | 459 | return NOTIFY_DONE; |
450 | 460 | ||
451 | case DIE_NMIUNKNOWN: | 461 | case DIE_NMIUNKNOWN: |
@@ -466,9 +476,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
466 | 476 | ||
467 | case DIE_DEBUG: | 477 | case DIE_DEBUG: |
468 | if (atomic_read(&kgdb_cpu_doing_single_step) == | 478 | if (atomic_read(&kgdb_cpu_doing_single_step) == |
469 | raw_smp_processor_id() && | 479 | raw_smp_processor_id()) { |
470 | user_mode(regs)) | 480 | if (user_mode(regs)) |
471 | return single_step_cont(regs, args); | 481 | return single_step_cont(regs, args); |
482 | break; | ||
483 | } else if (test_thread_flag(TIF_SINGLESTEP)) | ||
484 | /* This means a user thread is single stepping | ||
485 | * a system call which should be ignored | ||
486 | */ | ||
487 | return NOTIFY_DONE; | ||
472 | /* fall through */ | 488 | /* fall through */ |
473 | default: | 489 | default: |
474 | if (user_mode(regs)) | 490 | if (user_mode(regs)) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8b7a3cf37d2b..478bca986eca 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -178,7 +178,7 @@ static void kvm_flush_tlb(void) | |||
178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); | 178 | kvm_deferred_mmu_op(&ftlb, sizeof ftlb); |
179 | } | 179 | } |
180 | 180 | ||
181 | static void kvm_release_pt(u32 pfn) | 181 | static void kvm_release_pt(unsigned long pfn) |
182 | { | 182 | { |
183 | struct kvm_mmu_op_release_pt rpt = { | 183 | struct kvm_mmu_op_release_pt rpt = { |
184 | .header.op = KVM_MMU_OP_RELEASE_PT, | 184 | .header.op = KVM_MMU_OP_RELEASE_PT, |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 9fe478d98406..0732adba05ca 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
15 | #include <linux/suspend.h> | ||
15 | 16 | ||
16 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
17 | #include <asm/pgalloc.h> | 18 | #include <asm/pgalloc.h> |
@@ -78,7 +79,7 @@ static void load_segments(void) | |||
78 | /* | 79 | /* |
79 | * A architecture hook called to validate the | 80 | * A architecture hook called to validate the |
80 | * proposed image and prepare the control pages | 81 | * proposed image and prepare the control pages |
81 | * as needed. The pages for KEXEC_CONTROL_CODE_SIZE | 82 | * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE |
82 | * have been allocated, but the segments have yet | 83 | * have been allocated, but the segments have yet |
83 | * been copied into the kernel. | 84 | * been copied into the kernel. |
84 | * | 85 | * |
@@ -113,6 +114,7 @@ void machine_kexec(struct kimage *image) | |||
113 | { | 114 | { |
114 | unsigned long page_list[PAGES_NR]; | 115 | unsigned long page_list[PAGES_NR]; |
115 | void *control_page; | 116 | void *control_page; |
117 | int save_ftrace_enabled; | ||
116 | asmlinkage unsigned long | 118 | asmlinkage unsigned long |
117 | (*relocate_kernel_ptr)(unsigned long indirection_page, | 119 | (*relocate_kernel_ptr)(unsigned long indirection_page, |
118 | unsigned long control_page, | 120 | unsigned long control_page, |
@@ -120,7 +122,12 @@ void machine_kexec(struct kimage *image) | |||
120 | unsigned int has_pae, | 122 | unsigned int has_pae, |
121 | unsigned int preserve_context); | 123 | unsigned int preserve_context); |
122 | 124 | ||
123 | tracer_disable(); | 125 | #ifdef CONFIG_KEXEC_JUMP |
126 | if (kexec_image->preserve_context) | ||
127 | save_processor_state(); | ||
128 | #endif | ||
129 | |||
130 | save_ftrace_enabled = __ftrace_enabled_save(); | ||
124 | 131 | ||
125 | /* Interrupts aren't acceptable while we reboot */ | 132 | /* Interrupts aren't acceptable while we reboot */ |
126 | local_irq_disable(); | 133 | local_irq_disable(); |
@@ -138,7 +145,7 @@ void machine_kexec(struct kimage *image) | |||
138 | } | 145 | } |
139 | 146 | ||
140 | control_page = page_address(image->control_code_page); | 147 | control_page = page_address(image->control_code_page); |
141 | memcpy(control_page, relocate_kernel, PAGE_SIZE/2); | 148 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
142 | 149 | ||
143 | relocate_kernel_ptr = control_page; | 150 | relocate_kernel_ptr = control_page; |
144 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 151 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
@@ -178,6 +185,13 @@ void machine_kexec(struct kimage *image) | |||
178 | (unsigned long)page_list, | 185 | (unsigned long)page_list, |
179 | image->start, cpu_has_pae, | 186 | image->start, cpu_has_pae, |
180 | image->preserve_context); | 187 | image->preserve_context); |
188 | |||
189 | #ifdef CONFIG_KEXEC_JUMP | ||
190 | if (kexec_image->preserve_context) | ||
191 | restore_processor_state(); | ||
192 | #endif | ||
193 | |||
194 | __ftrace_enabled_restore(save_ftrace_enabled); | ||
181 | } | 195 | } |
182 | 196 | ||
183 | void arch_crash_save_vmcoreinfo(void) | 197 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 07c0f828f488..3b599518c322 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -33,6 +33,8 @@ | |||
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | #include <asm/geode.h> | 34 | #include <asm/geode.h> |
35 | 35 | ||
36 | #define MFGPT_DEFAULT_IRQ 7 | ||
37 | |||
36 | static struct mfgpt_timer_t { | 38 | static struct mfgpt_timer_t { |
37 | unsigned int avail:1; | 39 | unsigned int avail:1; |
38 | } mfgpt_timers[MFGPT_MAX_TIMERS]; | 40 | } mfgpt_timers[MFGPT_MAX_TIMERS]; |
@@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) | |||
157 | } | 159 | } |
158 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); | 160 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); |
159 | 161 | ||
160 | int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) | 162 | int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) |
161 | { | 163 | { |
162 | u32 val, dummy; | 164 | u32 zsel, lpc, dummy; |
163 | int offset; | 165 | int shift; |
164 | 166 | ||
165 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) | 167 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) |
166 | return -EIO; | 168 | return -EIO; |
167 | 169 | ||
168 | if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) | 170 | /* |
171 | * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA | ||
172 | * is using the same CMP of the timer's Siamese twin, the IRQ is set to | ||
173 | * 2, and we mustn't use nor change it. | ||
174 | * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the | ||
175 | * IRQ of the 1st. This can only happen if forcing an IRQ, calling this | ||
176 | * with *irq==0 is safe. Currently there _are_ no 2 drivers. | ||
177 | */ | ||
178 | rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); | ||
179 | shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; | ||
180 | if (((zsel >> shift) & 0xF) == 2) | ||
169 | return -EIO; | 181 | return -EIO; |
170 | 182 | ||
171 | rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); | 183 | /* Choose IRQ: if none supplied, keep IRQ already set or use default */ |
184 | if (!*irq) | ||
185 | *irq = (zsel >> shift) & 0xF; | ||
186 | if (!*irq) | ||
187 | *irq = MFGPT_DEFAULT_IRQ; | ||
172 | 188 | ||
173 | offset = (timer % 4) * 4; | 189 | /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ |
174 | 190 | if (*irq < 1 || *irq == 2 || *irq > 15) | |
175 | val &= ~((0xF << offset) | (0xF << (offset + 16))); | 191 | return -EIO; |
192 | rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); | ||
193 | if (lpc & (1 << *irq)) | ||
194 | return -EIO; | ||
176 | 195 | ||
196 | /* All chosen and checked - go for it */ | ||
197 | if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) | ||
198 | return -EIO; | ||
177 | if (enable) { | 199 | if (enable) { |
178 | val |= (irq & 0x0F) << (offset); | 200 | zsel = (zsel & ~(0xF << shift)) | (*irq << shift); |
179 | val |= (irq & 0x0F) << (offset + 16); | 201 | wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); |
180 | } | 202 | } |
181 | 203 | ||
182 | wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); | ||
183 | return 0; | 204 | return 0; |
184 | } | 205 | } |
185 | 206 | ||
@@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | |||
242 | static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; | 263 | static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; |
243 | static u16 mfgpt_event_clock; | 264 | static u16 mfgpt_event_clock; |
244 | 265 | ||
245 | static int irq = 7; | 266 | static int irq; |
246 | static int __init mfgpt_setup(char *str) | 267 | static int __init mfgpt_setup(char *str) |
247 | { | 268 | { |
248 | get_option(&str, &irq); | 269 | get_option(&str, &irq); |
@@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void) | |||
346 | mfgpt_event_clock = timer; | 367 | mfgpt_event_clock = timer; |
347 | 368 | ||
348 | /* Set up the IRQ on the MFGPT side */ | 369 | /* Set up the IRQ on the MFGPT side */ |
349 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { | 370 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { |
350 | printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); | 371 | printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); |
351 | return -EIO; | 372 | return -EIO; |
352 | } | 373 | } |
@@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void) | |||
374 | &mfgpt_clockevent); | 395 | &mfgpt_clockevent); |
375 | 396 | ||
376 | printk(KERN_INFO | 397 | printk(KERN_INFO |
377 | "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); | 398 | "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", |
399 | timer, irq); | ||
378 | clockevents_register_device(&mfgpt_clockevent); | 400 | clockevents_register_device(&mfgpt_clockevent); |
379 | 401 | ||
380 | return 0; | 402 | return 0; |
381 | 403 | ||
382 | err: | 404 | err: |
383 | geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); | 405 | geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); |
384 | printk(KERN_ERR | 406 | printk(KERN_ERR |
385 | "mfgpt-timer: Unable to set up the MFGPT clock source\n"); | 407 | "mfgpt-timer: Unable to set up the MFGPT clock source\n"); |
386 | return -EIO; | 408 | return -EIO; |
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b366..efc2f361fe85 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { | |||
238 | {} | 238 | {} |
239 | }; | 239 | }; |
240 | 240 | ||
241 | void __init check_enable_amd_mmconf_dmi(void) | 241 | void __cpuinit check_enable_amd_mmconf_dmi(void) |
242 | { | 242 | { |
243 | dmi_check_system(mmconf_dmi_table); | 243 | dmi_check_system(mmconf_dmi_table); |
244 | } | 244 | } |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e5d23675bb7c..f98f4e1dba09 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
49 | return sum & 0xFF; | 49 | return sum & 0xFF; |
50 | } | 50 | } |
51 | 51 | ||
52 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | 52 | static void __init MP_processor_info(struct mpc_config_processor *m) |
53 | { | 53 | { |
54 | int apicid; | 54 | int apicid; |
55 | char *bootup_cpu = ""; | 55 | char *bootup_cpu = ""; |
@@ -486,7 +486,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
486 | } | 486 | } |
487 | 487 | ||
488 | 488 | ||
489 | static void construct_ioapic_table(int mpc_default_type) | 489 | static void __init construct_ioapic_table(int mpc_default_type) |
490 | { | 490 | { |
491 | struct mpc_config_ioapic ioapic; | 491 | struct mpc_config_ioapic ioapic; |
492 | struct mpc_config_bus bus; | 492 | struct mpc_config_bus bus; |
@@ -531,7 +531,7 @@ static void construct_ioapic_table(int mpc_default_type) | |||
531 | construct_default_ioirq_mptable(mpc_default_type); | 531 | construct_default_ioirq_mptable(mpc_default_type); |
532 | } | 532 | } |
533 | #else | 533 | #else |
534 | static inline void construct_ioapic_table(int mpc_default_type) { } | 534 | static inline void __init construct_ioapic_table(int mpc_default_type) { } |
535 | #endif | 535 | #endif |
536 | 536 | ||
537 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | 537 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 9fd809552447..2e2af5d18191 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -72,21 +72,28 @@ static ssize_t msr_read(struct file *file, char __user *buf, | |||
72 | u32 data[2]; | 72 | u32 data[2]; |
73 | u32 reg = *ppos; | 73 | u32 reg = *ppos; |
74 | int cpu = iminor(file->f_path.dentry->d_inode); | 74 | int cpu = iminor(file->f_path.dentry->d_inode); |
75 | int err; | 75 | int err = 0; |
76 | ssize_t bytes = 0; | ||
76 | 77 | ||
77 | if (count % 8) | 78 | if (count % 8) |
78 | return -EINVAL; /* Invalid chunk size */ | 79 | return -EINVAL; /* Invalid chunk size */ |
79 | 80 | ||
80 | for (; count; count -= 8) { | 81 | for (; count; count -= 8) { |
81 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); | 82 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); |
82 | if (err) | 83 | if (err) { |
83 | return -EIO; | 84 | if (err == -EFAULT) /* Fix idiotic error code */ |
84 | if (copy_to_user(tmp, &data, 8)) | 85 | err = -EIO; |
85 | return -EFAULT; | 86 | break; |
87 | } | ||
88 | if (copy_to_user(tmp, &data, 8)) { | ||
89 | err = -EFAULT; | ||
90 | break; | ||
91 | } | ||
86 | tmp += 2; | 92 | tmp += 2; |
93 | bytes += 8; | ||
87 | } | 94 | } |
88 | 95 | ||
89 | return ((char __user *)tmp) - buf; | 96 | return bytes ? bytes : err; |
90 | } | 97 | } |
91 | 98 | ||
92 | static ssize_t msr_write(struct file *file, const char __user *buf, | 99 | static ssize_t msr_write(struct file *file, const char __user *buf, |
@@ -96,21 +103,28 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
96 | u32 data[2]; | 103 | u32 data[2]; |
97 | u32 reg = *ppos; | 104 | u32 reg = *ppos; |
98 | int cpu = iminor(file->f_path.dentry->d_inode); | 105 | int cpu = iminor(file->f_path.dentry->d_inode); |
99 | int err; | 106 | int err = 0; |
107 | ssize_t bytes = 0; | ||
100 | 108 | ||
101 | if (count % 8) | 109 | if (count % 8) |
102 | return -EINVAL; /* Invalid chunk size */ | 110 | return -EINVAL; /* Invalid chunk size */ |
103 | 111 | ||
104 | for (; count; count -= 8) { | 112 | for (; count; count -= 8) { |
105 | if (copy_from_user(&data, tmp, 8)) | 113 | if (copy_from_user(&data, tmp, 8)) { |
106 | return -EFAULT; | 114 | err = -EFAULT; |
115 | break; | ||
116 | } | ||
107 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); | 117 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); |
108 | if (err) | 118 | if (err) { |
109 | return -EIO; | 119 | if (err == -EFAULT) /* Fix idiotic error code */ |
120 | err = -EIO; | ||
121 | break; | ||
122 | } | ||
110 | tmp += 2; | 123 | tmp += 2; |
124 | bytes += 8; | ||
111 | } | 125 | } |
112 | 126 | ||
113 | return ((char __user *)tmp) - buf; | 127 | return bytes ? bytes : err; |
114 | } | 128 | } |
115 | 129 | ||
116 | static int msr_open(struct inode *inode, struct file *file) | 130 | static int msr_open(struct inode *inode, struct file *file) |
@@ -131,7 +145,7 @@ static int msr_open(struct inode *inode, struct file *file) | |||
131 | ret = -EIO; /* MSR not supported */ | 145 | ret = -EIO; /* MSR not supported */ |
132 | out: | 146 | out: |
133 | unlock_kernel(); | 147 | unlock_kernel(); |
134 | return 0; | 148 | return ret; |
135 | } | 149 | } |
136 | 150 | ||
137 | /* | 151 | /* |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ac6d51222e7d..2c97f07f1c2c 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data) | |||
114 | } | 114 | } |
115 | #endif | 115 | #endif |
116 | 116 | ||
117 | static void report_broken_nmi(int cpu, int *prev_nmi_count) | ||
118 | { | ||
119 | printk(KERN_CONT "\n"); | ||
120 | |||
121 | printk(KERN_WARNING | ||
122 | "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
123 | cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); | ||
124 | |||
125 | printk(KERN_WARNING | ||
126 | "Please report this to bugzilla.kernel.org,\n"); | ||
127 | printk(KERN_WARNING | ||
128 | "and attach the output of the 'dmesg' command.\n"); | ||
129 | |||
130 | per_cpu(wd_enabled, cpu) = 0; | ||
131 | atomic_dec(&nmi_active); | ||
132 | } | ||
133 | |||
117 | int __init check_nmi_watchdog(void) | 134 | int __init check_nmi_watchdog(void) |
118 | { | 135 | { |
119 | unsigned int *prev_nmi_count; | 136 | unsigned int *prev_nmi_count; |
@@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void) | |||
141 | for_each_online_cpu(cpu) { | 158 | for_each_online_cpu(cpu) { |
142 | if (!per_cpu(wd_enabled, cpu)) | 159 | if (!per_cpu(wd_enabled, cpu)) |
143 | continue; | 160 | continue; |
144 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 161 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) |
145 | printk(KERN_WARNING "WARNING: CPU#%d: NMI " | 162 | report_broken_nmi(cpu, prev_nmi_count); |
146 | "appears to be stuck (%d->%d)!\n", | ||
147 | cpu, | ||
148 | prev_nmi_count[cpu], | ||
149 | get_nmi_count(cpu)); | ||
150 | per_cpu(wd_enabled, cpu) = 0; | ||
151 | atomic_dec(&nmi_active); | ||
152 | } | ||
153 | } | 163 | } |
154 | endflag = 1; | 164 | endflag = 1; |
155 | if (!atomic_read(&nmi_active)) { | 165 | if (!atomic_read(&nmi_active)) { |
@@ -289,6 +299,15 @@ void acpi_nmi_disable(void) | |||
289 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | 299 | on_each_cpu(__acpi_nmi_disable, NULL, 1); |
290 | } | 300 | } |
291 | 301 | ||
302 | /* | ||
303 | * This function is called as soon the LAPIC NMI watchdog driver has everything | ||
304 | * in place and it's ready to check if the NMIs belong to the NMI watchdog | ||
305 | */ | ||
306 | void cpu_nmi_set_wd_enabled(void) | ||
307 | { | ||
308 | __get_cpu_var(wd_enabled) = 1; | ||
309 | } | ||
310 | |||
292 | void setup_apic_nmi_watchdog(void *unused) | 311 | void setup_apic_nmi_watchdog(void *unused) |
293 | { | 312 | { |
294 | if (__get_cpu_var(wd_enabled)) | 313 | if (__get_cpu_var(wd_enabled)) |
@@ -301,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused) | |||
301 | 320 | ||
302 | switch (nmi_watchdog) { | 321 | switch (nmi_watchdog) { |
303 | case NMI_LOCAL_APIC: | 322 | case NMI_LOCAL_APIC: |
304 | /* enable it before to avoid race with handler */ | ||
305 | __get_cpu_var(wd_enabled) = 1; | ||
306 | if (lapic_watchdog_init(nmi_hz) < 0) { | 323 | if (lapic_watchdog_init(nmi_hz) < 0) { |
307 | __get_cpu_var(wd_enabled) = 0; | 324 | __get_cpu_var(wd_enabled) = 0; |
308 | return; | 325 | return; |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 2434467ddf72..4caff39078e0 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -73,7 +73,7 @@ static void __init smp_dump_qct(void) | |||
73 | } | 73 | } |
74 | 74 | ||
75 | 75 | ||
76 | void __init numaq_tsc_disable(void) | 76 | void __cpuinit numaq_tsc_disable(void) |
77 | { | 77 | { |
78 | if (!found_numaq) | 78 | if (!found_numaq) |
79 | return; | 79 | return; |
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 3e6672274807..7a13fac63a1f 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
@@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd); | |||
190 | static void __init platform_detect(void) | 190 | static void __init platform_detect(void) |
191 | { | 191 | { |
192 | size_t propsize; | 192 | size_t propsize; |
193 | u32 rev; | 193 | __be32 rev; |
194 | 194 | ||
195 | if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, | 195 | if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, |
196 | &propsize) || propsize != 4) { | 196 | &propsize) || propsize != 4) { |
197 | printk(KERN_ERR "ofw: getprop call failed!\n"); | 197 | printk(KERN_ERR "ofw: getprop call failed!\n"); |
198 | rev = 0; | 198 | rev = cpu_to_be32(0); |
199 | } | 199 | } |
200 | olpc_platform_info.boardrev = be32_to_cpu(rev); | 200 | olpc_platform_info.boardrev = be32_to_cpu(rev); |
201 | } | 201 | } |
@@ -203,7 +203,7 @@ static void __init platform_detect(void) | |||
203 | static void __init platform_detect(void) | 203 | static void __init platform_detect(void) |
204 | { | 204 | { |
205 | /* stopgap until OFW support is added to the kernel */ | 205 | /* stopgap until OFW support is added to the kernel */ |
206 | olpc_platform_info.boardrev = be32_to_cpu(0xc2); | 206 | olpc_platform_info.boardrev = 0xc2; |
207 | } | 207 | } |
208 | #endif | 208 | #endif |
209 | 209 | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5744789a78f4..6b0bb73998dd 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
330 | #endif | 330 | #endif |
331 | .wbinvd = native_wbinvd, | 331 | .wbinvd = native_wbinvd, |
332 | .read_msr = native_read_msr_safe, | 332 | .read_msr = native_read_msr_safe, |
333 | .read_msr_amd = native_read_msr_amd_safe, | ||
333 | .write_msr = native_write_msr_safe, | 334 | .write_msr = native_write_msr_safe, |
334 | .read_tsc = native_read_tsc, | 335 | .read_tsc = native_read_tsc, |
335 | .read_pmc = native_read_pmc, | 336 | .read_pmc = native_read_pmc, |
@@ -469,7 +470,7 @@ struct pv_lock_ops pv_lock_ops = { | |||
469 | .spin_unlock = __ticket_spin_unlock, | 470 | .spin_unlock = __ticket_spin_unlock, |
470 | #endif | 471 | #endif |
471 | }; | 472 | }; |
472 | EXPORT_SYMBOL_GPL(pv_lock_ops); | 473 | EXPORT_SYMBOL(pv_lock_ops); |
473 | 474 | ||
474 | EXPORT_SYMBOL_GPL(pv_time_ops); | 475 | EXPORT_SYMBOL_GPL(pv_time_ops); |
475 | EXPORT_SYMBOL (pv_cpu_ops); | 476 | EXPORT_SYMBOL (pv_cpu_ops); |
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 58262218781b..9fe644f4861d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
@@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
23 | start = start_##ops##_##x; \ | 23 | start = start_##ops##_##x; \ |
24 | end = end_##ops##_##x; \ | 24 | end = end_##ops##_##x; \ |
25 | goto patch_site | 25 | goto patch_site |
26 | switch(type) { | 26 | switch (type) { |
27 | PATCH_SITE(pv_irq_ops, irq_disable); | 27 | PATCH_SITE(pv_irq_ops, irq_disable); |
28 | PATCH_SITE(pv_irq_ops, irq_enable); | 28 | PATCH_SITE(pv_irq_ops, irq_enable); |
29 | PATCH_SITE(pv_irq_ops, restore_fl); | 29 | PATCH_SITE(pv_irq_ops, restore_fl); |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 02d19328525d..080d1d27f37a 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
261 | badbit, tbl, start_addr, npages); | 261 | badbit, tbl, start_addr, npages); |
262 | } | 262 | } |
263 | 263 | ||
264 | set_bit_string(tbl->it_map, index, npages); | 264 | iommu_area_reserve(tbl->it_map, index, npages); |
265 | 265 | ||
266 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 266 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
267 | } | 267 | } |
@@ -343,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
343 | /* were we called with bad_dma_address? */ | 343 | /* were we called with bad_dma_address? */ |
344 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); | 344 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); |
345 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { | 345 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { |
346 | printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " | 346 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " |
347 | "address 0x%Lx\n", dma_addr); | 347 | "address 0x%Lx\n", dma_addr); |
348 | WARN_ON(1); | ||
349 | return; | 348 | return; |
350 | } | 349 | } |
351 | 350 | ||
@@ -492,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
492 | npages = size >> PAGE_SHIFT; | 491 | npages = size >> PAGE_SHIFT; |
493 | order = get_order(size); | 492 | order = get_order(size); |
494 | 493 | ||
494 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
495 | |||
495 | /* alloc enough pages (and possibly more) */ | 496 | /* alloc enough pages (and possibly more) */ |
496 | ret = (void *)__get_free_pages(flag, order); | 497 | ret = (void *)__get_free_pages(flag, order); |
497 | if (!ret) | 498 | if (!ret) |
@@ -511,8 +512,22 @@ error: | |||
511 | return ret; | 512 | return ret; |
512 | } | 513 | } |
513 | 514 | ||
515 | static void calgary_free_coherent(struct device *dev, size_t size, | ||
516 | void *vaddr, dma_addr_t dma_handle) | ||
517 | { | ||
518 | unsigned int npages; | ||
519 | struct iommu_table *tbl = find_iommu_table(dev); | ||
520 | |||
521 | size = PAGE_ALIGN(size); | ||
522 | npages = size >> PAGE_SHIFT; | ||
523 | |||
524 | iommu_free(tbl, dma_handle, npages); | ||
525 | free_pages((unsigned long)vaddr, get_order(size)); | ||
526 | } | ||
527 | |||
514 | static struct dma_mapping_ops calgary_dma_ops = { | 528 | static struct dma_mapping_ops calgary_dma_ops = { |
515 | .alloc_coherent = calgary_alloc_coherent, | 529 | .alloc_coherent = calgary_alloc_coherent, |
530 | .free_coherent = calgary_free_coherent, | ||
516 | .map_single = calgary_map_single, | 531 | .map_single = calgary_map_single, |
517 | .unmap_single = calgary_unmap_single, | 532 | .unmap_single = calgary_unmap_single, |
518 | .map_sg = calgary_map_sg, | 533 | .map_sg = calgary_map_sg, |
@@ -1269,13 +1284,15 @@ static inline int __init determine_tce_table_size(u64 ram) | |||
1269 | static int __init build_detail_arrays(void) | 1284 | static int __init build_detail_arrays(void) |
1270 | { | 1285 | { |
1271 | unsigned long ptr; | 1286 | unsigned long ptr; |
1272 | int i, scal_detail_size, rio_detail_size; | 1287 | unsigned numnodes, i; |
1288 | int scal_detail_size, rio_detail_size; | ||
1273 | 1289 | ||
1274 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ | 1290 | numnodes = rio_table_hdr->num_scal_dev; |
1291 | if (numnodes > MAX_NUMNODES){ | ||
1275 | printk(KERN_WARNING | 1292 | printk(KERN_WARNING |
1276 | "Calgary: MAX_NUMNODES too low! Defined as %d, " | 1293 | "Calgary: MAX_NUMNODES too low! Defined as %d, " |
1277 | "but system has %d nodes.\n", | 1294 | "but system has %d nodes.\n", |
1278 | MAX_NUMNODES, rio_table_hdr->num_scal_dev); | 1295 | MAX_NUMNODES, numnodes); |
1279 | return -ENODEV; | 1296 | return -ENODEV; |
1280 | } | 1297 | } |
1281 | 1298 | ||
@@ -1296,8 +1313,7 @@ static int __init build_detail_arrays(void) | |||
1296 | } | 1313 | } |
1297 | 1314 | ||
1298 | ptr = ((unsigned long)rio_table_hdr) + 3; | 1315 | ptr = ((unsigned long)rio_table_hdr) + 3; |
1299 | for (i = 0; i < rio_table_hdr->num_scal_dev; | 1316 | for (i = 0; i < numnodes; i++, ptr += scal_detail_size) |
1300 | i++, ptr += scal_detail_size) | ||
1301 | scal_devs[i] = (struct scal_detail *)ptr; | 1317 | scal_devs[i] = (struct scal_detail *)ptr; |
1302 | 1318 | ||
1303 | for (i = 0; i < rio_table_hdr->num_rio_dev; | 1319 | for (i = 0; i < rio_table_hdr->num_rio_dev; |
@@ -1350,7 +1366,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | |||
1350 | * Function for kdump case. Get the tce tables from first kernel | 1366 | * Function for kdump case. Get the tce tables from first kernel |
1351 | * by reading the contents of the base adress register of calgary iommu | 1367 | * by reading the contents of the base adress register of calgary iommu |
1352 | */ | 1368 | */ |
1353 | static void get_tce_space_from_tar(void) | 1369 | static void __init get_tce_space_from_tar(void) |
1354 | { | 1370 | { |
1355 | int bus; | 1371 | int bus; |
1356 | void __iomem *target; | 1372 | void __iomem *target; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 87d4d6964ec2..0a3824e837b4 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); | |||
41 | /* Dummy device used for NULL arguments (normally ISA). Better would | 41 | /* Dummy device used for NULL arguments (normally ISA). Better would |
42 | be probably a smaller DMA mask, but this is bug-to-bug compatible | 42 | be probably a smaller DMA mask, but this is bug-to-bug compatible |
43 | to older i386. */ | 43 | to older i386. */ |
44 | struct device fallback_dev = { | 44 | struct device x86_dma_fallback_dev = { |
45 | .bus_id = "fallback device", | 45 | .bus_id = "fallback device", |
46 | .coherent_dma_mask = DMA_32BIT_MASK, | 46 | .coherent_dma_mask = DMA_32BIT_MASK, |
47 | .dma_mask = &fallback_dev.coherent_dma_mask, | 47 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, |
48 | }; | 48 | }; |
49 | EXPORT_SYMBOL(x86_dma_fallback_dev); | ||
49 | 50 | ||
50 | int dma_set_mask(struct device *dev, u64 mask) | 51 | int dma_set_mask(struct device *dev, u64 mask) |
51 | { | 52 | { |
@@ -82,7 +83,7 @@ void __init dma32_reserve_bootmem(void) | |||
82 | * using 512M as goal | 83 | * using 512M as goal |
83 | */ | 84 | */ |
84 | align = 64ULL<<20; | 85 | align = 64ULL<<20; |
85 | size = round_up(dma32_bootmem_size, align); | 86 | size = roundup(dma32_bootmem_size, align); |
86 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | 87 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, |
87 | 512ULL<<20); | 88 | 512ULL<<20); |
88 | if (dma32_bootmem_ptr) | 89 | if (dma32_bootmem_ptr) |
@@ -133,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len) | |||
133 | EXPORT_SYMBOL(iommu_num_pages); | 134 | EXPORT_SYMBOL(iommu_num_pages); |
134 | #endif | 135 | #endif |
135 | 136 | ||
137 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | ||
138 | dma_addr_t *dma_addr, gfp_t flag) | ||
139 | { | ||
140 | unsigned long dma_mask; | ||
141 | struct page *page; | ||
142 | dma_addr_t addr; | ||
143 | |||
144 | dma_mask = dma_alloc_coherent_mask(dev, flag); | ||
145 | |||
146 | flag |= __GFP_ZERO; | ||
147 | again: | ||
148 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | ||
149 | if (!page) | ||
150 | return NULL; | ||
151 | |||
152 | addr = page_to_phys(page); | ||
153 | if (!is_buffer_dma_capable(dma_mask, addr, size)) { | ||
154 | __free_pages(page, get_order(size)); | ||
155 | |||
156 | if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { | ||
157 | flag = (flag & ~GFP_DMA32) | GFP_DMA; | ||
158 | goto again; | ||
159 | } | ||
160 | |||
161 | return NULL; | ||
162 | } | ||
163 | |||
164 | *dma_addr = addr; | ||
165 | return page_address(page); | ||
166 | } | ||
167 | |||
136 | /* | 168 | /* |
137 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter | 169 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter |
138 | * documentation. | 170 | * documentation. |
@@ -241,147 +273,6 @@ int dma_supported(struct device *dev, u64 mask) | |||
241 | } | 273 | } |
242 | EXPORT_SYMBOL(dma_supported); | 274 | EXPORT_SYMBOL(dma_supported); |
243 | 275 | ||
244 | /* Allocate DMA memory on node near device */ | ||
245 | static noinline struct page * | ||
246 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
247 | { | ||
248 | int node; | ||
249 | |||
250 | node = dev_to_node(dev); | ||
251 | |||
252 | return alloc_pages_node(node, gfp, order); | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Allocate memory for a coherent mapping. | ||
257 | */ | ||
258 | void * | ||
259 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
260 | gfp_t gfp) | ||
261 | { | ||
262 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
263 | void *memory = NULL; | ||
264 | struct page *page; | ||
265 | unsigned long dma_mask = 0; | ||
266 | dma_addr_t bus; | ||
267 | int noretry = 0; | ||
268 | |||
269 | /* ignore region specifiers */ | ||
270 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
271 | |||
272 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) | ||
273 | return memory; | ||
274 | |||
275 | if (!dev) { | ||
276 | dev = &fallback_dev; | ||
277 | gfp |= GFP_DMA; | ||
278 | } | ||
279 | dma_mask = dev->coherent_dma_mask; | ||
280 | if (dma_mask == 0) | ||
281 | dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; | ||
282 | |||
283 | /* Device not DMA able */ | ||
284 | if (dev->dma_mask == NULL) | ||
285 | return NULL; | ||
286 | |||
287 | /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ | ||
288 | if (gfp & __GFP_DMA) | ||
289 | noretry = 1; | ||
290 | |||
291 | #ifdef CONFIG_X86_64 | ||
292 | /* Why <=? Even when the mask is smaller than 4GB it is often | ||
293 | larger than 16MB and in this case we have a chance of | ||
294 | finding fitting memory in the next higher zone first. If | ||
295 | not retry with true GFP_DMA. -AK */ | ||
296 | if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { | ||
297 | gfp |= GFP_DMA32; | ||
298 | if (dma_mask < DMA_32BIT_MASK) | ||
299 | noretry = 1; | ||
300 | } | ||
301 | #endif | ||
302 | |||
303 | again: | ||
304 | page = dma_alloc_pages(dev, | ||
305 | noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); | ||
306 | if (page == NULL) | ||
307 | return NULL; | ||
308 | |||
309 | { | ||
310 | int high, mmu; | ||
311 | bus = page_to_phys(page); | ||
312 | memory = page_address(page); | ||
313 | high = (bus + size) >= dma_mask; | ||
314 | mmu = high; | ||
315 | if (force_iommu && !(gfp & GFP_DMA)) | ||
316 | mmu = 1; | ||
317 | else if (high) { | ||
318 | free_pages((unsigned long)memory, | ||
319 | get_order(size)); | ||
320 | |||
321 | /* Don't use the 16MB ZONE_DMA unless absolutely | ||
322 | needed. It's better to use remapping first. */ | ||
323 | if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { | ||
324 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
325 | goto again; | ||
326 | } | ||
327 | |||
328 | /* Let low level make its own zone decisions */ | ||
329 | gfp &= ~(GFP_DMA32|GFP_DMA); | ||
330 | |||
331 | if (ops->alloc_coherent) | ||
332 | return ops->alloc_coherent(dev, size, | ||
333 | dma_handle, gfp); | ||
334 | return NULL; | ||
335 | } | ||
336 | |||
337 | memset(memory, 0, size); | ||
338 | if (!mmu) { | ||
339 | *dma_handle = bus; | ||
340 | return memory; | ||
341 | } | ||
342 | } | ||
343 | |||
344 | if (ops->alloc_coherent) { | ||
345 | free_pages((unsigned long)memory, get_order(size)); | ||
346 | gfp &= ~(GFP_DMA|GFP_DMA32); | ||
347 | return ops->alloc_coherent(dev, size, dma_handle, gfp); | ||
348 | } | ||
349 | |||
350 | if (ops->map_simple) { | ||
351 | *dma_handle = ops->map_simple(dev, virt_to_phys(memory), | ||
352 | size, | ||
353 | PCI_DMA_BIDIRECTIONAL); | ||
354 | if (*dma_handle != bad_dma_address) | ||
355 | return memory; | ||
356 | } | ||
357 | |||
358 | if (panic_on_overflow) | ||
359 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", | ||
360 | (unsigned long)size); | ||
361 | free_pages((unsigned long)memory, get_order(size)); | ||
362 | return NULL; | ||
363 | } | ||
364 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
365 | |||
366 | /* | ||
367 | * Unmap coherent memory. | ||
368 | * The caller must ensure that the device has finished accessing the mapping. | ||
369 | */ | ||
370 | void dma_free_coherent(struct device *dev, size_t size, | ||
371 | void *vaddr, dma_addr_t bus) | ||
372 | { | ||
373 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
374 | |||
375 | int order = get_order(size); | ||
376 | WARN_ON(irqs_disabled()); /* for portability */ | ||
377 | if (dma_release_from_coherent(dev, order, vaddr)) | ||
378 | return; | ||
379 | if (ops->unmap_single) | ||
380 | ops->unmap_single(dev, bus, size, 0); | ||
381 | free_pages((unsigned long)vaddr, order); | ||
382 | } | ||
383 | EXPORT_SYMBOL(dma_free_coherent); | ||
384 | |||
385 | static int __init pci_iommu_init(void) | 276 | static int __init pci_iommu_init(void) |
386 | { | 277 | { |
387 | calgary_iommu_init(); | 278 | calgary_iommu_init(); |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d5..145f1c83369f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -27,8 +27,8 @@ | |||
27 | #include <linux/scatterlist.h> | 27 | #include <linux/scatterlist.h> |
28 | #include <linux/iommu-helper.h> | 28 | #include <linux/iommu-helper.h> |
29 | #include <linux/sysdev.h> | 29 | #include <linux/sysdev.h> |
30 | #include <linux/io.h> | ||
30 | #include <asm/atomic.h> | 31 | #include <asm/atomic.h> |
31 | #include <asm/io.h> | ||
32 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
@@ -80,9 +80,10 @@ AGPEXTERN int agp_memory_reserved; | |||
80 | AGPEXTERN __u32 *agp_gatt_table; | 80 | AGPEXTERN __u32 *agp_gatt_table; |
81 | 81 | ||
82 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | 82 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ |
83 | static int need_flush; /* global flush state. set for each gart wrap */ | 83 | static bool need_flush; /* global flush state. set for each gart wrap */ |
84 | 84 | ||
85 | static unsigned long alloc_iommu(struct device *dev, int size) | 85 | static unsigned long alloc_iommu(struct device *dev, int size, |
86 | unsigned long align_mask) | ||
86 | { | 87 | { |
87 | unsigned long offset, flags; | 88 | unsigned long offset, flags; |
88 | unsigned long boundary_size; | 89 | unsigned long boundary_size; |
@@ -90,26 +91,27 @@ static unsigned long alloc_iommu(struct device *dev, int size) | |||
90 | 91 | ||
91 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), | 92 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), |
92 | PAGE_SIZE) >> PAGE_SHIFT; | 93 | PAGE_SIZE) >> PAGE_SHIFT; |
93 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | 94 | boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, |
94 | PAGE_SIZE) >> PAGE_SHIFT; | 95 | PAGE_SIZE) >> PAGE_SHIFT; |
95 | 96 | ||
96 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 97 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
97 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, | 98 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, |
98 | size, base_index, boundary_size, 0); | 99 | size, base_index, boundary_size, align_mask); |
99 | if (offset == -1) { | 100 | if (offset == -1) { |
100 | need_flush = 1; | 101 | need_flush = true; |
101 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, | 102 | offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, |
102 | size, base_index, boundary_size, 0); | 103 | size, base_index, boundary_size, |
104 | align_mask); | ||
103 | } | 105 | } |
104 | if (offset != -1) { | 106 | if (offset != -1) { |
105 | next_bit = offset+size; | 107 | next_bit = offset+size; |
106 | if (next_bit >= iommu_pages) { | 108 | if (next_bit >= iommu_pages) { |
107 | next_bit = 0; | 109 | next_bit = 0; |
108 | need_flush = 1; | 110 | need_flush = true; |
109 | } | 111 | } |
110 | } | 112 | } |
111 | if (iommu_fullflush) | 113 | if (iommu_fullflush) |
112 | need_flush = 1; | 114 | need_flush = true; |
113 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 115 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
114 | 116 | ||
115 | return offset; | 117 | return offset; |
@@ -134,7 +136,7 @@ static void flush_gart(void) | |||
134 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 136 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
135 | if (need_flush) { | 137 | if (need_flush) { |
136 | k8_flush_garts(); | 138 | k8_flush_garts(); |
137 | need_flush = 0; | 139 | need_flush = false; |
138 | } | 140 | } |
139 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 141 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
140 | } | 142 | } |
@@ -173,7 +175,8 @@ static void dump_leak(void) | |||
173 | iommu_leak_pages); | 175 | iommu_leak_pages); |
174 | for (i = 0; i < iommu_leak_pages; i += 2) { | 176 | for (i = 0; i < iommu_leak_pages; i += 2) { |
175 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); | 177 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); |
176 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); | 178 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], |
179 | 0); | ||
177 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); | 180 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); |
178 | } | 181 | } |
179 | printk(KERN_DEBUG "\n"); | 182 | printk(KERN_DEBUG "\n"); |
@@ -212,34 +215,24 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
212 | static inline int | 215 | static inline int |
213 | need_iommu(struct device *dev, unsigned long addr, size_t size) | 216 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
214 | { | 217 | { |
215 | u64 mask = *dev->dma_mask; | 218 | return force_iommu || |
216 | int high = addr + size > mask; | 219 | !is_buffer_dma_capable(*dev->dma_mask, addr, size); |
217 | int mmu = high; | ||
218 | |||
219 | if (force_iommu) | ||
220 | mmu = 1; | ||
221 | |||
222 | return mmu; | ||
223 | } | 220 | } |
224 | 221 | ||
225 | static inline int | 222 | static inline int |
226 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 223 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
227 | { | 224 | { |
228 | u64 mask = *dev->dma_mask; | 225 | return !is_buffer_dma_capable(*dev->dma_mask, addr, size); |
229 | int high = addr + size > mask; | ||
230 | int mmu = high; | ||
231 | |||
232 | return mmu; | ||
233 | } | 226 | } |
234 | 227 | ||
235 | /* Map a single continuous physical area into the IOMMU. | 228 | /* Map a single continuous physical area into the IOMMU. |
236 | * Caller needs to check if the iommu is needed and flush. | 229 | * Caller needs to check if the iommu is needed and flush. |
237 | */ | 230 | */ |
238 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | 231 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
239 | size_t size, int dir) | 232 | size_t size, int dir, unsigned long align_mask) |
240 | { | 233 | { |
241 | unsigned long npages = iommu_num_pages(phys_mem, size); | 234 | unsigned long npages = iommu_num_pages(phys_mem, size); |
242 | unsigned long iommu_page = alloc_iommu(dev, npages); | 235 | unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); |
243 | int i; | 236 | int i; |
244 | 237 | ||
245 | if (iommu_page == -1) { | 238 | if (iommu_page == -1) { |
@@ -259,16 +252,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
259 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 252 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
260 | } | 253 | } |
261 | 254 | ||
262 | static dma_addr_t | ||
263 | gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) | ||
264 | { | ||
265 | dma_addr_t map = dma_map_area(dev, paddr, size, dir); | ||
266 | |||
267 | flush_gart(); | ||
268 | |||
269 | return map; | ||
270 | } | ||
271 | |||
272 | /* Map a single area into the IOMMU */ | 255 | /* Map a single area into the IOMMU */ |
273 | static dma_addr_t | 256 | static dma_addr_t |
274 | gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | 257 | gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) |
@@ -276,12 +259,13 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) | |||
276 | unsigned long bus; | 259 | unsigned long bus; |
277 | 260 | ||
278 | if (!dev) | 261 | if (!dev) |
279 | dev = &fallback_dev; | 262 | dev = &x86_dma_fallback_dev; |
280 | 263 | ||
281 | if (!need_iommu(dev, paddr, size)) | 264 | if (!need_iommu(dev, paddr, size)) |
282 | return paddr; | 265 | return paddr; |
283 | 266 | ||
284 | bus = gart_map_simple(dev, paddr, size, dir); | 267 | bus = dma_map_area(dev, paddr, size, dir, 0); |
268 | flush_gart(); | ||
285 | 269 | ||
286 | return bus; | 270 | return bus; |
287 | } | 271 | } |
@@ -340,7 +324,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
340 | unsigned long addr = sg_phys(s); | 324 | unsigned long addr = sg_phys(s); |
341 | 325 | ||
342 | if (nonforced_iommu(dev, addr, s->length)) { | 326 | if (nonforced_iommu(dev, addr, s->length)) { |
343 | addr = dma_map_area(dev, addr, s->length, dir); | 327 | addr = dma_map_area(dev, addr, s->length, dir, 0); |
344 | if (addr == bad_dma_address) { | 328 | if (addr == bad_dma_address) { |
345 | if (i > 0) | 329 | if (i > 0) |
346 | gart_unmap_sg(dev, sg, i, dir); | 330 | gart_unmap_sg(dev, sg, i, dir); |
@@ -362,7 +346,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
362 | int nelems, struct scatterlist *sout, | 346 | int nelems, struct scatterlist *sout, |
363 | unsigned long pages) | 347 | unsigned long pages) |
364 | { | 348 | { |
365 | unsigned long iommu_start = alloc_iommu(dev, pages); | 349 | unsigned long iommu_start = alloc_iommu(dev, pages, 0); |
366 | unsigned long iommu_page = iommu_start; | 350 | unsigned long iommu_page = iommu_start; |
367 | struct scatterlist *s; | 351 | struct scatterlist *s; |
368 | int i; | 352 | int i; |
@@ -427,7 +411,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
427 | return 0; | 411 | return 0; |
428 | 412 | ||
429 | if (!dev) | 413 | if (!dev) |
430 | dev = &fallback_dev; | 414 | dev = &x86_dma_fallback_dev; |
431 | 415 | ||
432 | out = 0; | 416 | out = 0; |
433 | start = 0; | 417 | start = 0; |
@@ -499,6 +483,46 @@ error: | |||
499 | return 0; | 483 | return 0; |
500 | } | 484 | } |
501 | 485 | ||
486 | /* allocate and map a coherent mapping */ | ||
487 | static void * | ||
488 | gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, | ||
489 | gfp_t flag) | ||
490 | { | ||
491 | dma_addr_t paddr; | ||
492 | unsigned long align_mask; | ||
493 | struct page *page; | ||
494 | |||
495 | if (force_iommu && !(flag & GFP_DMA)) { | ||
496 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
497 | page = alloc_pages(flag | __GFP_ZERO, get_order(size)); | ||
498 | if (!page) | ||
499 | return NULL; | ||
500 | |||
501 | align_mask = (1UL << get_order(size)) - 1; | ||
502 | paddr = dma_map_area(dev, page_to_phys(page), size, | ||
503 | DMA_BIDIRECTIONAL, align_mask); | ||
504 | |||
505 | flush_gart(); | ||
506 | if (paddr != bad_dma_address) { | ||
507 | *dma_addr = paddr; | ||
508 | return page_address(page); | ||
509 | } | ||
510 | __free_pages(page, get_order(size)); | ||
511 | } else | ||
512 | return dma_generic_alloc_coherent(dev, size, dma_addr, flag); | ||
513 | |||
514 | return NULL; | ||
515 | } | ||
516 | |||
517 | /* free a coherent mapping */ | ||
518 | static void | ||
519 | gart_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
520 | dma_addr_t dma_addr) | ||
521 | { | ||
522 | gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); | ||
523 | free_pages((unsigned long)vaddr, get_order(size)); | ||
524 | } | ||
525 | |||
502 | static int no_agp; | 526 | static int no_agp; |
503 | 527 | ||
504 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 528 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
@@ -626,7 +650,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
626 | struct pci_dev *dev; | 650 | struct pci_dev *dev; |
627 | void *gatt; | 651 | void *gatt; |
628 | int i, error; | 652 | int i, error; |
629 | unsigned long start_pfn, end_pfn; | ||
630 | 653 | ||
631 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 654 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
632 | aper_size = aper_base = info->aper_size = 0; | 655 | aper_size = aper_base = info->aper_size = 0; |
@@ -650,13 +673,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
650 | info->aper_size = aper_size >> 20; | 673 | info->aper_size = aper_size >> 20; |
651 | 674 | ||
652 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); | 675 | gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); |
653 | gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); | 676 | gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
677 | get_order(gatt_size)); | ||
654 | if (!gatt) | 678 | if (!gatt) |
655 | panic("Cannot allocate GATT table"); | 679 | panic("Cannot allocate GATT table"); |
656 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) | 680 | if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) |
657 | panic("Could not set GART PTEs to uncacheable pages"); | 681 | panic("Could not set GART PTEs to uncacheable pages"); |
658 | 682 | ||
659 | memset(gatt, 0, gatt_size); | ||
660 | agp_gatt_table = gatt; | 683 | agp_gatt_table = gatt; |
661 | 684 | ||
662 | enable_gart_translations(); | 685 | enable_gart_translations(); |
@@ -665,19 +688,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
665 | if (!error) | 688 | if (!error) |
666 | error = sysdev_register(&device_gart); | 689 | error = sysdev_register(&device_gart); |
667 | if (error) | 690 | if (error) |
668 | panic("Could not register gart_sysdev -- would corrupt data on next suspend"); | 691 | panic("Could not register gart_sysdev -- " |
692 | "would corrupt data on next suspend"); | ||
669 | 693 | ||
670 | flush_gart(); | 694 | flush_gart(); |
671 | 695 | ||
672 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 696 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", |
673 | aper_base, aper_size>>10); | 697 | aper_base, aper_size>>10); |
674 | 698 | ||
675 | /* need to map that range */ | ||
676 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | ||
677 | if (end_pfn > max_low_pfn_mapped) { | ||
678 | start_pfn = (aper_base>>PAGE_SHIFT); | ||
679 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
680 | } | ||
681 | return 0; | 699 | return 0; |
682 | 700 | ||
683 | nommu: | 701 | nommu: |
@@ -687,20 +705,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
687 | return -1; | 705 | return -1; |
688 | } | 706 | } |
689 | 707 | ||
690 | extern int agp_amd64_init(void); | ||
691 | |||
692 | static struct dma_mapping_ops gart_dma_ops = { | 708 | static struct dma_mapping_ops gart_dma_ops = { |
693 | .map_single = gart_map_single, | 709 | .map_single = gart_map_single, |
694 | .map_simple = gart_map_simple, | ||
695 | .unmap_single = gart_unmap_single, | 710 | .unmap_single = gart_unmap_single, |
696 | .sync_single_for_cpu = NULL, | ||
697 | .sync_single_for_device = NULL, | ||
698 | .sync_single_range_for_cpu = NULL, | ||
699 | .sync_single_range_for_device = NULL, | ||
700 | .sync_sg_for_cpu = NULL, | ||
701 | .sync_sg_for_device = NULL, | ||
702 | .map_sg = gart_map_sg, | 711 | .map_sg = gart_map_sg, |
703 | .unmap_sg = gart_unmap_sg, | 712 | .unmap_sg = gart_unmap_sg, |
713 | .alloc_coherent = gart_alloc_coherent, | ||
714 | .free_coherent = gart_free_coherent, | ||
704 | }; | 715 | }; |
705 | 716 | ||
706 | void gart_iommu_shutdown(void) | 717 | void gart_iommu_shutdown(void) |
@@ -727,7 +738,8 @@ void __init gart_iommu_init(void) | |||
727 | { | 738 | { |
728 | struct agp_kern_info info; | 739 | struct agp_kern_info info; |
729 | unsigned long iommu_start; | 740 | unsigned long iommu_start; |
730 | unsigned long aper_size; | 741 | unsigned long aper_base, aper_size; |
742 | unsigned long start_pfn, end_pfn; | ||
731 | unsigned long scratch; | 743 | unsigned long scratch; |
732 | long i; | 744 | long i; |
733 | 745 | ||
@@ -759,30 +771,35 @@ void __init gart_iommu_init(void) | |||
759 | (no_agp && init_k8_gatt(&info) < 0)) { | 771 | (no_agp && init_k8_gatt(&info) < 0)) { |
760 | if (max_pfn > MAX_DMA32_PFN) { | 772 | if (max_pfn > MAX_DMA32_PFN) { |
761 | printk(KERN_WARNING "More than 4GB of memory " | 773 | printk(KERN_WARNING "More than 4GB of memory " |
762 | "but GART IOMMU not available.\n" | 774 | "but GART IOMMU not available.\n"); |
763 | KERN_WARNING "falling back to iommu=soft.\n"); | 775 | printk(KERN_WARNING "falling back to iommu=soft.\n"); |
764 | } | 776 | } |
765 | return; | 777 | return; |
766 | } | 778 | } |
767 | 779 | ||
780 | /* need to map that range */ | ||
781 | aper_size = info.aper_size << 20; | ||
782 | aper_base = info.aper_base; | ||
783 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | ||
784 | if (end_pfn > max_low_pfn_mapped) { | ||
785 | start_pfn = (aper_base>>PAGE_SHIFT); | ||
786 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
787 | } | ||
788 | |||
768 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 789 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); |
769 | aper_size = info.aper_size * 1024 * 1024; | ||
770 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 790 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
771 | iommu_pages = iommu_size >> PAGE_SHIFT; | 791 | iommu_pages = iommu_size >> PAGE_SHIFT; |
772 | 792 | ||
773 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, | 793 | iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, |
774 | get_order(iommu_pages/8)); | 794 | get_order(iommu_pages/8)); |
775 | if (!iommu_gart_bitmap) | 795 | if (!iommu_gart_bitmap) |
776 | panic("Cannot allocate iommu bitmap\n"); | 796 | panic("Cannot allocate iommu bitmap\n"); |
777 | memset(iommu_gart_bitmap, 0, iommu_pages/8); | ||
778 | 797 | ||
779 | #ifdef CONFIG_IOMMU_LEAK | 798 | #ifdef CONFIG_IOMMU_LEAK |
780 | if (leak_trace) { | 799 | if (leak_trace) { |
781 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, | 800 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, |
782 | get_order(iommu_pages*sizeof(void *))); | 801 | get_order(iommu_pages*sizeof(void *))); |
783 | if (iommu_leak_tab) | 802 | if (!iommu_leak_tab) |
784 | memset(iommu_leak_tab, 0, iommu_pages * 8); | ||
785 | else | ||
786 | printk(KERN_DEBUG | 803 | printk(KERN_DEBUG |
787 | "PCI-DMA: Cannot allocate leak trace area\n"); | 804 | "PCI-DMA: Cannot allocate leak trace area\n"); |
788 | } | 805 | } |
@@ -792,7 +809,7 @@ void __init gart_iommu_init(void) | |||
792 | * Out of IOMMU space handling. | 809 | * Out of IOMMU space handling. |
793 | * Reserve some invalid pages at the beginning of the GART. | 810 | * Reserve some invalid pages at the beginning of the GART. |
794 | */ | 811 | */ |
795 | set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | 812 | iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); |
796 | 813 | ||
797 | agp_memory_reserved = iommu_size; | 814 | agp_memory_reserved = iommu_size; |
798 | printk(KERN_INFO | 815 | printk(KERN_INFO |
@@ -850,7 +867,8 @@ void __init gart_parse_options(char *p) | |||
850 | if (!strncmp(p, "leak", 4)) { | 867 | if (!strncmp(p, "leak", 4)) { |
851 | leak_trace = 1; | 868 | leak_trace = 1; |
852 | p += 4; | 869 | p += 4; |
853 | if (*p == '=') ++p; | 870 | if (*p == '=') |
871 | ++p; | ||
854 | if (isdigit(*p) && get_option(&p, &arg)) | 872 | if (isdigit(*p) && get_option(&p, &arg)) |
855 | iommu_leak_pages = arg; | 873 | iommu_leak_pages = arg; |
856 | } | 874 | } |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3f91f71cdc3e..c70ab5a5d4c8 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -14,7 +14,7 @@ | |||
14 | static int | 14 | static int |
15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
16 | { | 16 | { |
17 | if (hwdev && bus + size > *hwdev->dma_mask) { | 17 | if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { |
18 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) | 18 | if (*hwdev->dma_mask >= DMA_32BIT_MASK) |
19 | printk(KERN_ERR | 19 | printk(KERN_ERR |
20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", | 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
@@ -72,7 +72,15 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
72 | return nents; | 72 | return nents; |
73 | } | 73 | } |
74 | 74 | ||
75 | static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
76 | dma_addr_t dma_addr) | ||
77 | { | ||
78 | free_pages((unsigned long)vaddr, get_order(size)); | ||
79 | } | ||
80 | |||
75 | struct dma_mapping_ops nommu_dma_ops = { | 81 | struct dma_mapping_ops nommu_dma_ops = { |
82 | .alloc_coherent = dma_generic_alloc_coherent, | ||
83 | .free_coherent = nommu_free_coherent, | ||
76 | .map_single = nommu_map_single, | 84 | .map_single = nommu_map_single, |
77 | .map_sg = nommu_map_sg, | 85 | .map_sg = nommu_map_sg, |
78 | .is_phys = 1, | 86 | .is_phys = 1, |
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c index bc1f2d3ea277..a311ffcaad16 100644 --- a/arch/x86/kernel/pcspeaker.c +++ b/arch/x86/kernel/pcspeaker.c | |||
@@ -1,20 +1,13 @@ | |||
1 | #include <linux/platform_device.h> | 1 | #include <linux/platform_device.h> |
2 | #include <linux/errno.h> | 2 | #include <linux/err.h> |
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | 4 | ||
5 | static __init int add_pcspkr(void) | 5 | static __init int add_pcspkr(void) |
6 | { | 6 | { |
7 | struct platform_device *pd; | 7 | struct platform_device *pd; |
8 | int ret; | ||
9 | 8 | ||
10 | pd = platform_device_alloc("pcspkr", -1); | 9 | pd = platform_device_register_simple("pcspkr", -1, NULL, 0); |
11 | if (!pd) | ||
12 | return -ENOMEM; | ||
13 | 10 | ||
14 | ret = platform_device_add(pd); | 11 | return IS_ERR(pd) ? PTR_ERR(pd) : 0; |
15 | if (ret) | ||
16 | platform_device_put(pd); | ||
17 | |||
18 | return ret; | ||
19 | } | 12 | } |
20 | device_initcall(add_pcspkr); | 13 | device_initcall(add_pcspkr); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a0..ec7a2ba9bce8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -185,7 +185,8 @@ static void mwait_idle(void) | |||
185 | static void poll_idle(void) | 185 | static void poll_idle(void) |
186 | { | 186 | { |
187 | local_irq_enable(); | 187 | local_irq_enable(); |
188 | cpu_relax(); | 188 | while (!need_resched()) |
189 | cpu_relax(); | ||
189 | } | 190 | } |
190 | 191 | ||
191 | /* | 192 | /* |
@@ -246,6 +247,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
246 | return 1; | 247 | return 1; |
247 | } | 248 | } |
248 | 249 | ||
250 | static cpumask_t c1e_mask = CPU_MASK_NONE; | ||
251 | static int c1e_detected; | ||
252 | |||
253 | void c1e_remove_cpu(int cpu) | ||
254 | { | ||
255 | cpu_clear(cpu, c1e_mask); | ||
256 | } | ||
257 | |||
249 | /* | 258 | /* |
250 | * C1E aware idle routine. We check for C1E active in the interrupt | 259 | * C1E aware idle routine. We check for C1E active in the interrupt |
251 | * pending message MSR. If we detect C1E, then we handle it the same | 260 | * pending message MSR. If we detect C1E, then we handle it the same |
@@ -253,9 +262,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
253 | */ | 262 | */ |
254 | static void c1e_idle(void) | 263 | static void c1e_idle(void) |
255 | { | 264 | { |
256 | static cpumask_t c1e_mask = CPU_MASK_NONE; | ||
257 | static int c1e_detected; | ||
258 | |||
259 | if (need_resched()) | 265 | if (need_resched()) |
260 | return; | 266 | return; |
261 | 267 | ||
@@ -265,8 +271,10 @@ static void c1e_idle(void) | |||
265 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 271 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
266 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 272 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
267 | c1e_detected = 1; | 273 | c1e_detected = 1; |
268 | mark_tsc_unstable("TSC halt in C1E"); | 274 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
269 | printk(KERN_INFO "System has C1E enabled\n"); | 275 | mark_tsc_unstable("TSC halt in AMD C1E"); |
276 | printk(KERN_INFO "System has AMD C1E enabled\n"); | ||
277 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | ||
270 | } | 278 | } |
271 | } | 279 | } |
272 | 280 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7b6e44a7c624..205188db9626 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/tick.h> | 37 | #include <linux/tick.h> |
38 | #include <linux/percpu.h> | 38 | #include <linux/percpu.h> |
39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
40 | #include <linux/dmi.h> | ||
40 | 41 | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
@@ -55,6 +56,7 @@ | |||
55 | #include <asm/tlbflush.h> | 56 | #include <asm/tlbflush.h> |
56 | #include <asm/cpu.h> | 57 | #include <asm/cpu.h> |
57 | #include <asm/kdebug.h> | 58 | #include <asm/kdebug.h> |
59 | #include <asm/idle.h> | ||
58 | #include <asm/syscalls.h> | 60 | #include <asm/syscalls.h> |
59 | #include <asm/smp.h> | 61 | #include <asm/smp.h> |
60 | 62 | ||
@@ -90,6 +92,7 @@ static void cpu_exit_clear(void) | |||
90 | cpu_clear(cpu, cpu_callin_map); | 92 | cpu_clear(cpu, cpu_callin_map); |
91 | 93 | ||
92 | numa_remove_cpu(cpu); | 94 | numa_remove_cpu(cpu); |
95 | c1e_remove_cpu(cpu); | ||
93 | } | 96 | } |
94 | 97 | ||
95 | /* We don't actually take CPU down, just spin without interrupts. */ | 98 | /* We don't actually take CPU down, just spin without interrupts. */ |
@@ -97,7 +100,6 @@ static inline void play_dead(void) | |||
97 | { | 100 | { |
98 | /* This must be done before dead CPU ack */ | 101 | /* This must be done before dead CPU ack */ |
99 | cpu_exit_clear(); | 102 | cpu_exit_clear(); |
100 | wbinvd(); | ||
101 | mb(); | 103 | mb(); |
102 | /* Ack it */ | 104 | /* Ack it */ |
103 | __get_cpu_var(cpu_state) = CPU_DEAD; | 105 | __get_cpu_var(cpu_state) = CPU_DEAD; |
@@ -106,8 +108,8 @@ static inline void play_dead(void) | |||
106 | * With physical CPU hotplug, we should halt the cpu | 108 | * With physical CPU hotplug, we should halt the cpu |
107 | */ | 109 | */ |
108 | local_irq_disable(); | 110 | local_irq_disable(); |
109 | while (1) | 111 | /* mask all interrupts, flush any and all caches, and halt */ |
110 | halt(); | 112 | wbinvd_halt(); |
111 | } | 113 | } |
112 | #else | 114 | #else |
113 | static inline void play_dead(void) | 115 | static inline void play_dead(void) |
@@ -162,6 +164,7 @@ void __show_registers(struct pt_regs *regs, int all) | |||
162 | unsigned long d0, d1, d2, d3, d6, d7; | 164 | unsigned long d0, d1, d2, d3, d6, d7; |
163 | unsigned long sp; | 165 | unsigned long sp; |
164 | unsigned short ss, gs; | 166 | unsigned short ss, gs; |
167 | const char *board; | ||
165 | 168 | ||
166 | if (user_mode_vm(regs)) { | 169 | if (user_mode_vm(regs)) { |
167 | sp = regs->sp; | 170 | sp = regs->sp; |
@@ -174,11 +177,15 @@ void __show_registers(struct pt_regs *regs, int all) | |||
174 | } | 177 | } |
175 | 178 | ||
176 | printk("\n"); | 179 | printk("\n"); |
177 | printk("Pid: %d, comm: %s %s (%s %.*s)\n", | 180 | |
181 | board = dmi_get_system_info(DMI_PRODUCT_NAME); | ||
182 | if (!board) | ||
183 | board = ""; | ||
184 | printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", | ||
178 | task_pid_nr(current), current->comm, | 185 | task_pid_nr(current), current->comm, |
179 | print_tainted(), init_utsname()->release, | 186 | print_tainted(), init_utsname()->release, |
180 | (int)strcspn(init_utsname()->version, " "), | 187 | (int)strcspn(init_utsname()->version, " "), |
181 | init_utsname()->version); | 188 | init_utsname()->version, board); |
182 | 189 | ||
183 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", | 190 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", |
184 | (u16)regs->cs, regs->ip, regs->flags, | 191 | (u16)regs->cs, regs->ip, regs->flags, |
@@ -278,6 +285,14 @@ void exit_thread(void) | |||
278 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 285 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
279 | put_cpu(); | 286 | put_cpu(); |
280 | } | 287 | } |
288 | #ifdef CONFIG_X86_DS | ||
289 | /* Free any DS contexts that have not been properly released. */ | ||
290 | if (unlikely(current->thread.ds_ctx)) { | ||
291 | /* we clear debugctl to make sure DS is not used. */ | ||
292 | update_debugctlmsr(0); | ||
293 | ds_free(current->thread.ds_ctx); | ||
294 | } | ||
295 | #endif /* CONFIG_X86_DS */ | ||
281 | } | 296 | } |
282 | 297 | ||
283 | void flush_thread(void) | 298 | void flush_thread(void) |
@@ -439,6 +454,35 @@ int set_tsc_mode(unsigned int val) | |||
439 | return 0; | 454 | return 0; |
440 | } | 455 | } |
441 | 456 | ||
457 | #ifdef CONFIG_X86_DS | ||
458 | static int update_debugctl(struct thread_struct *prev, | ||
459 | struct thread_struct *next, unsigned long debugctl) | ||
460 | { | ||
461 | unsigned long ds_prev = 0; | ||
462 | unsigned long ds_next = 0; | ||
463 | |||
464 | if (prev->ds_ctx) | ||
465 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
466 | if (next->ds_ctx) | ||
467 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
468 | |||
469 | if (ds_next != ds_prev) { | ||
470 | /* we clear debugctl to make sure DS | ||
471 | * is not in use when we change it */ | ||
472 | debugctl = 0; | ||
473 | update_debugctlmsr(0); | ||
474 | wrmsr(MSR_IA32_DS_AREA, ds_next, 0); | ||
475 | } | ||
476 | return debugctl; | ||
477 | } | ||
478 | #else | ||
479 | static int update_debugctl(struct thread_struct *prev, | ||
480 | struct thread_struct *next, unsigned long debugctl) | ||
481 | { | ||
482 | return debugctl; | ||
483 | } | ||
484 | #endif /* CONFIG_X86_DS */ | ||
485 | |||
442 | static noinline void | 486 | static noinline void |
443 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 487 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
444 | struct tss_struct *tss) | 488 | struct tss_struct *tss) |
@@ -449,14 +493,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
449 | prev = &prev_p->thread; | 493 | prev = &prev_p->thread; |
450 | next = &next_p->thread; | 494 | next = &next_p->thread; |
451 | 495 | ||
452 | debugctl = prev->debugctlmsr; | 496 | debugctl = update_debugctl(prev, next, prev->debugctlmsr); |
453 | if (next->ds_area_msr != prev->ds_area_msr) { | ||
454 | /* we clear debugctl to make sure DS | ||
455 | * is not in use when we change it */ | ||
456 | debugctl = 0; | ||
457 | update_debugctlmsr(0); | ||
458 | wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); | ||
459 | } | ||
460 | 497 | ||
461 | if (next->debugctlmsr != debugctl) | 498 | if (next->debugctlmsr != debugctl) |
462 | update_debugctlmsr(next->debugctlmsr); | 499 | update_debugctlmsr(next->debugctlmsr); |
@@ -480,13 +517,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
480 | hard_enable_TSC(); | 517 | hard_enable_TSC(); |
481 | } | 518 | } |
482 | 519 | ||
483 | #ifdef X86_BTS | 520 | #ifdef CONFIG_X86_PTRACE_BTS |
484 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 521 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
485 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 522 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
486 | 523 | ||
487 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 524 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
488 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 525 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
489 | #endif | 526 | #endif /* CONFIG_X86_PTRACE_BTS */ |
490 | 527 | ||
491 | 528 | ||
492 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 529 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 87d7dfdcf46c..2a8ccb9238b4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -37,11 +37,11 @@ | |||
37 | #include <linux/kdebug.h> | 37 | #include <linux/kdebug.h> |
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
40 | #include <linux/uaccess.h> | ||
41 | #include <linux/io.h> | ||
40 | 42 | ||
41 | #include <asm/uaccess.h> | ||
42 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
43 | #include <asm/system.h> | 44 | #include <asm/system.h> |
44 | #include <asm/io.h> | ||
45 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
46 | #include <asm/i387.h> | 46 | #include <asm/i387.h> |
47 | #include <asm/mmu_context.h> | 47 | #include <asm/mmu_context.h> |
@@ -89,19 +89,20 @@ void exit_idle(void) | |||
89 | #ifdef CONFIG_HOTPLUG_CPU | 89 | #ifdef CONFIG_HOTPLUG_CPU |
90 | DECLARE_PER_CPU(int, cpu_state); | 90 | DECLARE_PER_CPU(int, cpu_state); |
91 | 91 | ||
92 | #include <asm/nmi.h> | 92 | #include <linux/nmi.h> |
93 | /* We halt the CPU with physical CPU hotplug */ | 93 | /* We halt the CPU with physical CPU hotplug */ |
94 | static inline void play_dead(void) | 94 | static inline void play_dead(void) |
95 | { | 95 | { |
96 | idle_task_exit(); | 96 | idle_task_exit(); |
97 | wbinvd(); | 97 | c1e_remove_cpu(raw_smp_processor_id()); |
98 | |||
98 | mb(); | 99 | mb(); |
99 | /* Ack it */ | 100 | /* Ack it */ |
100 | __get_cpu_var(cpu_state) = CPU_DEAD; | 101 | __get_cpu_var(cpu_state) = CPU_DEAD; |
101 | 102 | ||
102 | local_irq_disable(); | 103 | local_irq_disable(); |
103 | while (1) | 104 | /* mask all interrupts, flush any and all caches, and halt */ |
104 | halt(); | 105 | wbinvd_halt(); |
105 | } | 106 | } |
106 | #else | 107 | #else |
107 | static inline void play_dead(void) | 108 | static inline void play_dead(void) |
@@ -153,7 +154,7 @@ void cpu_idle(void) | |||
153 | } | 154 | } |
154 | 155 | ||
155 | /* Prints also some state that isn't saved in the pt_regs */ | 156 | /* Prints also some state that isn't saved in the pt_regs */ |
156 | void __show_regs(struct pt_regs * regs) | 157 | void __show_regs(struct pt_regs *regs) |
157 | { | 158 | { |
158 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; | 159 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; |
159 | unsigned long d0, d1, d2, d3, d6, d7; | 160 | unsigned long d0, d1, d2, d3, d6, d7; |
@@ -162,59 +163,61 @@ void __show_regs(struct pt_regs * regs) | |||
162 | 163 | ||
163 | printk("\n"); | 164 | printk("\n"); |
164 | print_modules(); | 165 | print_modules(); |
165 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 166 | printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", |
166 | current->pid, current->comm, print_tainted(), | 167 | current->pid, current->comm, print_tainted(), |
167 | init_utsname()->release, | 168 | init_utsname()->release, |
168 | (int)strcspn(init_utsname()->version, " "), | 169 | (int)strcspn(init_utsname()->version, " "), |
169 | init_utsname()->version); | 170 | init_utsname()->version); |
170 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); | 171 | printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); |
171 | printk_address(regs->ip, 1); | 172 | printk_address(regs->ip, 1); |
172 | printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, | 173 | printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, |
173 | regs->flags); | 174 | regs->sp, regs->flags); |
174 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 175 | printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", |
175 | regs->ax, regs->bx, regs->cx); | 176 | regs->ax, regs->bx, regs->cx); |
176 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", | 177 | printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", |
177 | regs->dx, regs->si, regs->di); | 178 | regs->dx, regs->si, regs->di); |
178 | printk("RBP: %016lx R08: %016lx R09: %016lx\n", | 179 | printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", |
179 | regs->bp, regs->r8, regs->r9); | 180 | regs->bp, regs->r8, regs->r9); |
180 | printk("R10: %016lx R11: %016lx R12: %016lx\n", | 181 | printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", |
181 | regs->r10, regs->r11, regs->r12); | 182 | regs->r10, regs->r11, regs->r12); |
182 | printk("R13: %016lx R14: %016lx R15: %016lx\n", | 183 | printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", |
183 | regs->r13, regs->r14, regs->r15); | 184 | regs->r13, regs->r14, regs->r15); |
184 | 185 | ||
185 | asm("movl %%ds,%0" : "=r" (ds)); | 186 | asm("movl %%ds,%0" : "=r" (ds)); |
186 | asm("movl %%cs,%0" : "=r" (cs)); | 187 | asm("movl %%cs,%0" : "=r" (cs)); |
187 | asm("movl %%es,%0" : "=r" (es)); | 188 | asm("movl %%es,%0" : "=r" (es)); |
188 | asm("movl %%fs,%0" : "=r" (fsindex)); | 189 | asm("movl %%fs,%0" : "=r" (fsindex)); |
189 | asm("movl %%gs,%0" : "=r" (gsindex)); | 190 | asm("movl %%gs,%0" : "=r" (gsindex)); |
190 | 191 | ||
191 | rdmsrl(MSR_FS_BASE, fs); | 192 | rdmsrl(MSR_FS_BASE, fs); |
192 | rdmsrl(MSR_GS_BASE, gs); | 193 | rdmsrl(MSR_GS_BASE, gs); |
193 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); | 194 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); |
194 | 195 | ||
195 | cr0 = read_cr0(); | 196 | cr0 = read_cr0(); |
196 | cr2 = read_cr2(); | 197 | cr2 = read_cr2(); |
197 | cr3 = read_cr3(); | 198 | cr3 = read_cr3(); |
198 | cr4 = read_cr4(); | 199 | cr4 = read_cr4(); |
199 | 200 | ||
200 | printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", | 201 | printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
201 | fs,fsindex,gs,gsindex,shadowgs); | 202 | fs, fsindex, gs, gsindex, shadowgs); |
202 | printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); | 203 | printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, |
203 | printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); | 204 | es, cr0); |
205 | printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, | ||
206 | cr4); | ||
204 | 207 | ||
205 | get_debugreg(d0, 0); | 208 | get_debugreg(d0, 0); |
206 | get_debugreg(d1, 1); | 209 | get_debugreg(d1, 1); |
207 | get_debugreg(d2, 2); | 210 | get_debugreg(d2, 2); |
208 | printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); | 211 | printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); |
209 | get_debugreg(d3, 3); | 212 | get_debugreg(d3, 3); |
210 | get_debugreg(d6, 6); | 213 | get_debugreg(d6, 6); |
211 | get_debugreg(d7, 7); | 214 | get_debugreg(d7, 7); |
212 | printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); | 215 | printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); |
213 | } | 216 | } |
214 | 217 | ||
215 | void show_regs(struct pt_regs *regs) | 218 | void show_regs(struct pt_regs *regs) |
216 | { | 219 | { |
217 | printk("CPU %d:", smp_processor_id()); | 220 | printk(KERN_INFO "CPU %d:", smp_processor_id()); |
218 | __show_regs(regs); | 221 | __show_regs(regs); |
219 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | 222 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); |
220 | } | 223 | } |
@@ -240,6 +243,14 @@ void exit_thread(void) | |||
240 | t->io_bitmap_max = 0; | 243 | t->io_bitmap_max = 0; |
241 | put_cpu(); | 244 | put_cpu(); |
242 | } | 245 | } |
246 | #ifdef CONFIG_X86_DS | ||
247 | /* Free any DS contexts that have not been properly released. */ | ||
248 | if (unlikely(t->ds_ctx)) { | ||
249 | /* we clear debugctl to make sure DS is not used. */ | ||
250 | update_debugctlmsr(0); | ||
251 | ds_free(t->ds_ctx); | ||
252 | } | ||
253 | #endif /* CONFIG_X86_DS */ | ||
243 | } | 254 | } |
244 | 255 | ||
245 | void flush_thread(void) | 256 | void flush_thread(void) |
@@ -315,10 +326,10 @@ void prepare_to_copy(struct task_struct *tsk) | |||
315 | 326 | ||
316 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | 327 | int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, |
317 | unsigned long unused, | 328 | unsigned long unused, |
318 | struct task_struct * p, struct pt_regs * regs) | 329 | struct task_struct *p, struct pt_regs *regs) |
319 | { | 330 | { |
320 | int err; | 331 | int err; |
321 | struct pt_regs * childregs; | 332 | struct pt_regs *childregs; |
322 | struct task_struct *me = current; | 333 | struct task_struct *me = current; |
323 | 334 | ||
324 | childregs = ((struct pt_regs *) | 335 | childregs = ((struct pt_regs *) |
@@ -363,10 +374,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
363 | if (test_thread_flag(TIF_IA32)) | 374 | if (test_thread_flag(TIF_IA32)) |
364 | err = do_set_thread_area(p, -1, | 375 | err = do_set_thread_area(p, -1, |
365 | (struct user_desc __user *)childregs->si, 0); | 376 | (struct user_desc __user *)childregs->si, 0); |
366 | else | 377 | else |
367 | #endif | 378 | #endif |
368 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); | 379 | err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); |
369 | if (err) | 380 | if (err) |
370 | goto out; | 381 | goto out; |
371 | } | 382 | } |
372 | err = 0; | 383 | err = 0; |
@@ -473,13 +484,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
473 | next = &next_p->thread; | 484 | next = &next_p->thread; |
474 | 485 | ||
475 | debugctl = prev->debugctlmsr; | 486 | debugctl = prev->debugctlmsr; |
476 | if (next->ds_area_msr != prev->ds_area_msr) { | 487 | |
477 | /* we clear debugctl to make sure DS | 488 | #ifdef CONFIG_X86_DS |
478 | * is not in use when we change it */ | 489 | { |
479 | debugctl = 0; | 490 | unsigned long ds_prev = 0, ds_next = 0; |
480 | update_debugctlmsr(0); | 491 | |
481 | wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); | 492 | if (prev->ds_ctx) |
493 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
494 | if (next->ds_ctx) | ||
495 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
496 | |||
497 | if (ds_next != ds_prev) { | ||
498 | /* | ||
499 | * We clear debugctl to make sure DS | ||
500 | * is not in use when we change it: | ||
501 | */ | ||
502 | debugctl = 0; | ||
503 | update_debugctlmsr(0); | ||
504 | wrmsrl(MSR_IA32_DS_AREA, ds_next); | ||
505 | } | ||
482 | } | 506 | } |
507 | #endif /* CONFIG_X86_DS */ | ||
483 | 508 | ||
484 | if (next->debugctlmsr != debugctl) | 509 | if (next->debugctlmsr != debugctl) |
485 | update_debugctlmsr(next->debugctlmsr); | 510 | update_debugctlmsr(next->debugctlmsr); |
@@ -517,13 +542,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
517 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 542 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
518 | } | 543 | } |
519 | 544 | ||
520 | #ifdef X86_BTS | 545 | #ifdef CONFIG_X86_PTRACE_BTS |
521 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 546 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
522 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 547 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
523 | 548 | ||
524 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 549 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
525 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 550 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
526 | #endif | 551 | #endif /* CONFIG_X86_PTRACE_BTS */ |
527 | } | 552 | } |
528 | 553 | ||
529 | /* | 554 | /* |
@@ -545,7 +570,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
545 | unsigned fsindex, gsindex; | 570 | unsigned fsindex, gsindex; |
546 | 571 | ||
547 | /* we're going to use this soon, after a few expensive things */ | 572 | /* we're going to use this soon, after a few expensive things */ |
548 | if (next_p->fpu_counter>5) | 573 | if (next_p->fpu_counter > 5) |
549 | prefetch(next->xstate); | 574 | prefetch(next->xstate); |
550 | 575 | ||
551 | /* | 576 | /* |
@@ -553,13 +578,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
553 | */ | 578 | */ |
554 | load_sp0(tss, next); | 579 | load_sp0(tss, next); |
555 | 580 | ||
556 | /* | 581 | /* |
557 | * Switch DS and ES. | 582 | * Switch DS and ES. |
558 | * This won't pick up thread selector changes, but I guess that is ok. | 583 | * This won't pick up thread selector changes, but I guess that is ok. |
559 | */ | 584 | */ |
560 | savesegment(es, prev->es); | 585 | savesegment(es, prev->es); |
561 | if (unlikely(next->es | prev->es)) | 586 | if (unlikely(next->es | prev->es)) |
562 | loadsegment(es, next->es); | 587 | loadsegment(es, next->es); |
563 | 588 | ||
564 | savesegment(ds, prev->ds); | 589 | savesegment(ds, prev->ds); |
565 | if (unlikely(next->ds | prev->ds)) | 590 | if (unlikely(next->ds | prev->ds)) |
@@ -585,7 +610,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
585 | */ | 610 | */ |
586 | arch_leave_lazy_cpu_mode(); | 611 | arch_leave_lazy_cpu_mode(); |
587 | 612 | ||
588 | /* | 613 | /* |
589 | * Switch FS and GS. | 614 | * Switch FS and GS. |
590 | * | 615 | * |
591 | * Segment register != 0 always requires a reload. Also | 616 | * Segment register != 0 always requires a reload. Also |
@@ -594,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
594 | */ | 619 | */ |
595 | if (unlikely(fsindex | next->fsindex | prev->fs)) { | 620 | if (unlikely(fsindex | next->fsindex | prev->fs)) { |
596 | loadsegment(fs, next->fsindex); | 621 | loadsegment(fs, next->fsindex); |
597 | /* | 622 | /* |
598 | * Check if the user used a selector != 0; if yes | 623 | * Check if the user used a selector != 0; if yes |
599 | * clear 64bit base, since overloaded base is always | 624 | * clear 64bit base, since overloaded base is always |
600 | * mapped to the Null selector | 625 | * mapped to the Null selector |
601 | */ | 626 | */ |
602 | if (fsindex) | 627 | if (fsindex) |
603 | prev->fs = 0; | 628 | prev->fs = 0; |
604 | } | 629 | } |
605 | /* when next process has a 64bit base use it */ | 630 | /* when next process has a 64bit base use it */ |
606 | if (next->fs) | 631 | if (next->fs) |
@@ -610,7 +635,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
610 | if (unlikely(gsindex | next->gsindex | prev->gs)) { | 635 | if (unlikely(gsindex | next->gsindex | prev->gs)) { |
611 | load_gs_index(next->gsindex); | 636 | load_gs_index(next->gsindex); |
612 | if (gsindex) | 637 | if (gsindex) |
613 | prev->gs = 0; | 638 | prev->gs = 0; |
614 | } | 639 | } |
615 | if (next->gs) | 640 | if (next->gs) |
616 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 641 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); |
@@ -619,12 +644,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
619 | /* Must be after DS reload */ | 644 | /* Must be after DS reload */ |
620 | unlazy_fpu(prev_p); | 645 | unlazy_fpu(prev_p); |
621 | 646 | ||
622 | /* | 647 | /* |
623 | * Switch the PDA and FPU contexts. | 648 | * Switch the PDA and FPU contexts. |
624 | */ | 649 | */ |
625 | prev->usersp = read_pda(oldrsp); | 650 | prev->usersp = read_pda(oldrsp); |
626 | write_pda(oldrsp, next->usersp); | 651 | write_pda(oldrsp, next->usersp); |
627 | write_pda(pcurrent, next_p); | 652 | write_pda(pcurrent, next_p); |
628 | 653 | ||
629 | write_pda(kernelstack, | 654 | write_pda(kernelstack, |
630 | (unsigned long)task_stack_page(next_p) + | 655 | (unsigned long)task_stack_page(next_p) + |
@@ -665,7 +690,7 @@ long sys_execve(char __user *name, char __user * __user *argv, | |||
665 | char __user * __user *envp, struct pt_regs *regs) | 690 | char __user * __user *envp, struct pt_regs *regs) |
666 | { | 691 | { |
667 | long error; | 692 | long error; |
668 | char * filename; | 693 | char *filename; |
669 | 694 | ||
670 | filename = getname(name); | 695 | filename = getname(name); |
671 | error = PTR_ERR(filename); | 696 | error = PTR_ERR(filename); |
@@ -723,55 +748,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs) | |||
723 | unsigned long get_wchan(struct task_struct *p) | 748 | unsigned long get_wchan(struct task_struct *p) |
724 | { | 749 | { |
725 | unsigned long stack; | 750 | unsigned long stack; |
726 | u64 fp,ip; | 751 | u64 fp, ip; |
727 | int count = 0; | 752 | int count = 0; |
728 | 753 | ||
729 | if (!p || p == current || p->state==TASK_RUNNING) | 754 | if (!p || p == current || p->state == TASK_RUNNING) |
730 | return 0; | 755 | return 0; |
731 | stack = (unsigned long)task_stack_page(p); | 756 | stack = (unsigned long)task_stack_page(p); |
732 | if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) | 757 | if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) |
733 | return 0; | 758 | return 0; |
734 | fp = *(u64 *)(p->thread.sp); | 759 | fp = *(u64 *)(p->thread.sp); |
735 | do { | 760 | do { |
736 | if (fp < (unsigned long)stack || | 761 | if (fp < (unsigned long)stack || |
737 | fp > (unsigned long)stack+THREAD_SIZE) | 762 | fp > (unsigned long)stack+THREAD_SIZE) |
738 | return 0; | 763 | return 0; |
739 | ip = *(u64 *)(fp+8); | 764 | ip = *(u64 *)(fp+8); |
740 | if (!in_sched_functions(ip)) | 765 | if (!in_sched_functions(ip)) |
741 | return ip; | 766 | return ip; |
742 | fp = *(u64 *)fp; | 767 | fp = *(u64 *)fp; |
743 | } while (count++ < 16); | 768 | } while (count++ < 16); |
744 | return 0; | 769 | return 0; |
745 | } | 770 | } |
746 | 771 | ||
747 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | 772 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) |
748 | { | 773 | { |
749 | int ret = 0; | 774 | int ret = 0; |
750 | int doit = task == current; | 775 | int doit = task == current; |
751 | int cpu; | 776 | int cpu; |
752 | 777 | ||
753 | switch (code) { | 778 | switch (code) { |
754 | case ARCH_SET_GS: | 779 | case ARCH_SET_GS: |
755 | if (addr >= TASK_SIZE_OF(task)) | 780 | if (addr >= TASK_SIZE_OF(task)) |
756 | return -EPERM; | 781 | return -EPERM; |
757 | cpu = get_cpu(); | 782 | cpu = get_cpu(); |
758 | /* handle small bases via the GDT because that's faster to | 783 | /* handle small bases via the GDT because that's faster to |
759 | switch. */ | 784 | switch. */ |
760 | if (addr <= 0xffffffff) { | 785 | if (addr <= 0xffffffff) { |
761 | set_32bit_tls(task, GS_TLS, addr); | 786 | set_32bit_tls(task, GS_TLS, addr); |
762 | if (doit) { | 787 | if (doit) { |
763 | load_TLS(&task->thread, cpu); | 788 | load_TLS(&task->thread, cpu); |
764 | load_gs_index(GS_TLS_SEL); | 789 | load_gs_index(GS_TLS_SEL); |
765 | } | 790 | } |
766 | task->thread.gsindex = GS_TLS_SEL; | 791 | task->thread.gsindex = GS_TLS_SEL; |
767 | task->thread.gs = 0; | 792 | task->thread.gs = 0; |
768 | } else { | 793 | } else { |
769 | task->thread.gsindex = 0; | 794 | task->thread.gsindex = 0; |
770 | task->thread.gs = addr; | 795 | task->thread.gs = addr; |
771 | if (doit) { | 796 | if (doit) { |
772 | load_gs_index(0); | 797 | load_gs_index(0); |
773 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); | 798 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); |
774 | } | 799 | } |
775 | } | 800 | } |
776 | put_cpu(); | 801 | put_cpu(); |
777 | break; | 802 | break; |
@@ -825,8 +850,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
825 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 850 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
826 | else | 851 | else |
827 | base = task->thread.gs; | 852 | base = task->thread.gs; |
828 | } | 853 | } else |
829 | else | ||
830 | base = task->thread.gs; | 854 | base = task->thread.gs; |
831 | ret = put_user(base, (unsigned long __user *)addr); | 855 | ret = put_user(base, (unsigned long __user *)addr); |
832 | break; | 856 | break; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fc3e8dcd9da6..e375b658efc3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
15 | #include <linux/ptrace.h> | 15 | #include <linux/ptrace.h> |
16 | #include <linux/regset.h> | 16 | #include <linux/regset.h> |
17 | #include <linux/tracehook.h> | ||
17 | #include <linux/user.h> | 18 | #include <linux/user.h> |
18 | #include <linux/elf.h> | 19 | #include <linux/elf.h> |
19 | #include <linux/security.h> | 20 | #include <linux/security.h> |
@@ -554,45 +555,115 @@ static int ptrace_set_debugreg(struct task_struct *child, | |||
554 | return 0; | 555 | return 0; |
555 | } | 556 | } |
556 | 557 | ||
557 | #ifdef X86_BTS | 558 | #ifdef CONFIG_X86_PTRACE_BTS |
559 | /* | ||
560 | * The configuration for a particular BTS hardware implementation. | ||
561 | */ | ||
562 | struct bts_configuration { | ||
563 | /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ | ||
564 | unsigned char sizeof_bts; | ||
565 | /* the size of a field in the BTS record in bytes */ | ||
566 | unsigned char sizeof_field; | ||
567 | /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ | ||
568 | unsigned long debugctl_mask; | ||
569 | }; | ||
570 | static struct bts_configuration bts_cfg; | ||
571 | |||
572 | #define BTS_MAX_RECORD_SIZE (8 * 3) | ||
573 | |||
574 | |||
575 | /* | ||
576 | * Branch Trace Store (BTS) uses the following format. Different | ||
577 | * architectures vary in the size of those fields. | ||
578 | * - source linear address | ||
579 | * - destination linear address | ||
580 | * - flags | ||
581 | * | ||
582 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
583 | * architectures use 32bit pointers in 32bit mode. | ||
584 | * | ||
585 | * We compute the base address for the first 8 fields based on: | ||
586 | * - the field size stored in the DS configuration | ||
587 | * - the relative field position | ||
588 | * | ||
589 | * In order to store additional information in the BTS buffer, we use | ||
590 | * a special source address to indicate that the record requires | ||
591 | * special interpretation. | ||
592 | * | ||
593 | * Netburst indicated via a bit in the flags field whether the branch | ||
594 | * was predicted; this is ignored. | ||
595 | */ | ||
596 | |||
597 | enum bts_field { | ||
598 | bts_from = 0, | ||
599 | bts_to, | ||
600 | bts_flags, | ||
601 | |||
602 | bts_escape = (unsigned long)-1, | ||
603 | bts_qual = bts_to, | ||
604 | bts_jiffies = bts_flags | ||
605 | }; | ||
606 | |||
607 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
608 | { | ||
609 | base += (bts_cfg.sizeof_field * field); | ||
610 | return *(unsigned long *)base; | ||
611 | } | ||
558 | 612 | ||
559 | static int ptrace_bts_get_size(struct task_struct *child) | 613 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
560 | { | 614 | { |
561 | if (!child->thread.ds_area_msr) | 615 | base += (bts_cfg.sizeof_field * field);; |
562 | return -ENXIO; | 616 | (*(unsigned long *)base) = val; |
617 | } | ||
563 | 618 | ||
564 | return ds_get_bts_index((void *)child->thread.ds_area_msr); | 619 | /* |
620 | * Translate a BTS record from the raw format into the bts_struct format | ||
621 | * | ||
622 | * out (out): bts_struct interpretation | ||
623 | * raw: raw BTS record | ||
624 | */ | ||
625 | static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) | ||
626 | { | ||
627 | memset(out, 0, sizeof(*out)); | ||
628 | if (bts_get(raw, bts_from) == bts_escape) { | ||
629 | out->qualifier = bts_get(raw, bts_qual); | ||
630 | out->variant.jiffies = bts_get(raw, bts_jiffies); | ||
631 | } else { | ||
632 | out->qualifier = BTS_BRANCH; | ||
633 | out->variant.lbr.from_ip = bts_get(raw, bts_from); | ||
634 | out->variant.lbr.to_ip = bts_get(raw, bts_to); | ||
635 | } | ||
565 | } | 636 | } |
566 | 637 | ||
567 | static int ptrace_bts_read_record(struct task_struct *child, | 638 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
568 | long index, | ||
569 | struct bts_struct __user *out) | 639 | struct bts_struct __user *out) |
570 | { | 640 | { |
571 | struct bts_struct ret; | 641 | struct bts_struct ret; |
572 | int retval; | 642 | const void *bts_record; |
573 | int bts_end; | 643 | size_t bts_index, bts_end; |
574 | int bts_index; | 644 | int error; |
575 | |||
576 | if (!child->thread.ds_area_msr) | ||
577 | return -ENXIO; | ||
578 | 645 | ||
579 | if (index < 0) | 646 | error = ds_get_bts_end(child, &bts_end); |
580 | return -EINVAL; | 647 | if (error < 0) |
648 | return error; | ||
581 | 649 | ||
582 | bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); | ||
583 | if (bts_end <= index) | 650 | if (bts_end <= index) |
584 | return -EINVAL; | 651 | return -EINVAL; |
585 | 652 | ||
653 | error = ds_get_bts_index(child, &bts_index); | ||
654 | if (error < 0) | ||
655 | return error; | ||
656 | |||
586 | /* translate the ptrace bts index into the ds bts index */ | 657 | /* translate the ptrace bts index into the ds bts index */ |
587 | bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); | 658 | bts_index += bts_end - (index + 1); |
588 | bts_index -= (index + 1); | 659 | if (bts_end <= bts_index) |
589 | if (bts_index < 0) | 660 | bts_index -= bts_end; |
590 | bts_index += bts_end; | ||
591 | 661 | ||
592 | retval = ds_read_bts((void *)child->thread.ds_area_msr, | 662 | error = ds_access_bts(child, bts_index, &bts_record); |
593 | bts_index, &ret); | 663 | if (error < 0) |
594 | if (retval < 0) | 664 | return error; |
595 | return retval; | 665 | |
666 | ptrace_bts_translate_record(&ret, bts_record); | ||
596 | 667 | ||
597 | if (copy_to_user(out, &ret, sizeof(ret))) | 668 | if (copy_to_user(out, &ret, sizeof(ret))) |
598 | return -EFAULT; | 669 | return -EFAULT; |
@@ -600,101 +671,106 @@ static int ptrace_bts_read_record(struct task_struct *child, | |||
600 | return sizeof(ret); | 671 | return sizeof(ret); |
601 | } | 672 | } |
602 | 673 | ||
603 | static int ptrace_bts_clear(struct task_struct *child) | ||
604 | { | ||
605 | if (!child->thread.ds_area_msr) | ||
606 | return -ENXIO; | ||
607 | |||
608 | return ds_clear((void *)child->thread.ds_area_msr); | ||
609 | } | ||
610 | |||
611 | static int ptrace_bts_drain(struct task_struct *child, | 674 | static int ptrace_bts_drain(struct task_struct *child, |
612 | long size, | 675 | long size, |
613 | struct bts_struct __user *out) | 676 | struct bts_struct __user *out) |
614 | { | 677 | { |
615 | int end, i; | 678 | struct bts_struct ret; |
616 | void *ds = (void *)child->thread.ds_area_msr; | 679 | const unsigned char *raw; |
617 | 680 | size_t end, i; | |
618 | if (!ds) | 681 | int error; |
619 | return -ENXIO; | ||
620 | 682 | ||
621 | end = ds_get_bts_index(ds); | 683 | error = ds_get_bts_index(child, &end); |
622 | if (end <= 0) | 684 | if (error < 0) |
623 | return end; | 685 | return error; |
624 | 686 | ||
625 | if (size < (end * sizeof(struct bts_struct))) | 687 | if (size < (end * sizeof(struct bts_struct))) |
626 | return -EIO; | 688 | return -EIO; |
627 | 689 | ||
628 | for (i = 0; i < end; i++, out++) { | 690 | error = ds_access_bts(child, 0, (const void **)&raw); |
629 | struct bts_struct ret; | 691 | if (error < 0) |
630 | int retval; | 692 | return error; |
631 | 693 | ||
632 | retval = ds_read_bts(ds, i, &ret); | 694 | for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { |
633 | if (retval < 0) | 695 | ptrace_bts_translate_record(&ret, raw); |
634 | return retval; | ||
635 | 696 | ||
636 | if (copy_to_user(out, &ret, sizeof(ret))) | 697 | if (copy_to_user(out, &ret, sizeof(ret))) |
637 | return -EFAULT; | 698 | return -EFAULT; |
638 | } | 699 | } |
639 | 700 | ||
640 | ds_clear(ds); | 701 | error = ds_clear_bts(child); |
702 | if (error < 0) | ||
703 | return error; | ||
641 | 704 | ||
642 | return end; | 705 | return end; |
643 | } | 706 | } |
644 | 707 | ||
708 | static void ptrace_bts_ovfl(struct task_struct *child) | ||
709 | { | ||
710 | send_sig(child->thread.bts_ovfl_signal, child, 0); | ||
711 | } | ||
712 | |||
645 | static int ptrace_bts_config(struct task_struct *child, | 713 | static int ptrace_bts_config(struct task_struct *child, |
646 | long cfg_size, | 714 | long cfg_size, |
647 | const struct ptrace_bts_config __user *ucfg) | 715 | const struct ptrace_bts_config __user *ucfg) |
648 | { | 716 | { |
649 | struct ptrace_bts_config cfg; | 717 | struct ptrace_bts_config cfg; |
650 | int bts_size, ret = 0; | 718 | int error = 0; |
651 | void *ds; | 719 | |
720 | error = -EOPNOTSUPP; | ||
721 | if (!bts_cfg.sizeof_bts) | ||
722 | goto errout; | ||
652 | 723 | ||
724 | error = -EIO; | ||
653 | if (cfg_size < sizeof(cfg)) | 725 | if (cfg_size < sizeof(cfg)) |
654 | return -EIO; | 726 | goto errout; |
655 | 727 | ||
728 | error = -EFAULT; | ||
656 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 729 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
657 | return -EFAULT; | 730 | goto errout; |
658 | 731 | ||
659 | if ((int)cfg.size < 0) | 732 | error = -EINVAL; |
660 | return -EINVAL; | 733 | if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && |
734 | !(cfg.flags & PTRACE_BTS_O_ALLOC)) | ||
735 | goto errout; | ||
661 | 736 | ||
662 | bts_size = 0; | 737 | if (cfg.flags & PTRACE_BTS_O_ALLOC) { |
663 | ds = (void *)child->thread.ds_area_msr; | 738 | ds_ovfl_callback_t ovfl = NULL; |
664 | if (ds) { | 739 | unsigned int sig = 0; |
665 | bts_size = ds_get_bts_size(ds); | 740 | |
666 | if (bts_size < 0) | 741 | /* we ignore the error in case we were not tracing child */ |
667 | return bts_size; | 742 | (void)ds_release_bts(child); |
668 | } | ||
669 | cfg.size = PAGE_ALIGN(cfg.size); | ||
670 | 743 | ||
671 | if (bts_size != cfg.size) { | 744 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
672 | ret = ptrace_bts_realloc(child, cfg.size, | 745 | if (!cfg.signal) |
673 | cfg.flags & PTRACE_BTS_O_CUT_SIZE); | 746 | goto errout; |
674 | if (ret < 0) | 747 | |
748 | sig = cfg.signal; | ||
749 | ovfl = ptrace_bts_ovfl; | ||
750 | } | ||
751 | |||
752 | error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); | ||
753 | if (error < 0) | ||
675 | goto errout; | 754 | goto errout; |
676 | 755 | ||
677 | ds = (void *)child->thread.ds_area_msr; | 756 | child->thread.bts_ovfl_signal = sig; |
678 | } | 757 | } |
679 | 758 | ||
680 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) | 759 | error = -EINVAL; |
681 | ret = ds_set_overflow(ds, DS_O_SIGNAL); | 760 | if (!child->thread.ds_ctx && cfg.flags) |
682 | else | ||
683 | ret = ds_set_overflow(ds, DS_O_WRAP); | ||
684 | if (ret < 0) | ||
685 | goto errout; | 761 | goto errout; |
686 | 762 | ||
687 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 763 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
688 | child->thread.debugctlmsr |= ds_debugctl_mask(); | 764 | child->thread.debugctlmsr |= bts_cfg.debugctl_mask; |
689 | else | 765 | else |
690 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 766 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
691 | 767 | ||
692 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 768 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
693 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 769 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
694 | else | 770 | else |
695 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 771 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
696 | 772 | ||
697 | ret = sizeof(cfg); | 773 | error = sizeof(cfg); |
698 | 774 | ||
699 | out: | 775 | out: |
700 | if (child->thread.debugctlmsr) | 776 | if (child->thread.debugctlmsr) |
@@ -702,10 +778,10 @@ out: | |||
702 | else | 778 | else |
703 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 779 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
704 | 780 | ||
705 | return ret; | 781 | return error; |
706 | 782 | ||
707 | errout: | 783 | errout: |
708 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 784 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
709 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 785 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
710 | goto out; | 786 | goto out; |
711 | } | 787 | } |
@@ -714,29 +790,40 @@ static int ptrace_bts_status(struct task_struct *child, | |||
714 | long cfg_size, | 790 | long cfg_size, |
715 | struct ptrace_bts_config __user *ucfg) | 791 | struct ptrace_bts_config __user *ucfg) |
716 | { | 792 | { |
717 | void *ds = (void *)child->thread.ds_area_msr; | ||
718 | struct ptrace_bts_config cfg; | 793 | struct ptrace_bts_config cfg; |
794 | size_t end; | ||
795 | const void *base, *max; | ||
796 | int error; | ||
719 | 797 | ||
720 | if (cfg_size < sizeof(cfg)) | 798 | if (cfg_size < sizeof(cfg)) |
721 | return -EIO; | 799 | return -EIO; |
722 | 800 | ||
723 | memset(&cfg, 0, sizeof(cfg)); | 801 | error = ds_get_bts_end(child, &end); |
802 | if (error < 0) | ||
803 | return error; | ||
724 | 804 | ||
725 | if (ds) { | 805 | error = ds_access_bts(child, /* index = */ 0, &base); |
726 | cfg.size = ds_get_bts_size(ds); | 806 | if (error < 0) |
807 | return error; | ||
727 | 808 | ||
728 | if (ds_get_overflow(ds) == DS_O_SIGNAL) | 809 | error = ds_access_bts(child, /* index = */ end, &max); |
729 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 810 | if (error < 0) |
811 | return error; | ||
730 | 812 | ||
731 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | 813 | memset(&cfg, 0, sizeof(cfg)); |
732 | child->thread.debugctlmsr & ds_debugctl_mask()) | 814 | cfg.size = (max - base); |
733 | cfg.flags |= PTRACE_BTS_O_TRACE; | 815 | cfg.signal = child->thread.bts_ovfl_signal; |
816 | cfg.bts_size = sizeof(struct bts_struct); | ||
734 | 817 | ||
735 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | 818 | if (cfg.signal) |
736 | cfg.flags |= PTRACE_BTS_O_SCHED; | 819 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
737 | } | ||
738 | 820 | ||
739 | cfg.bts_size = sizeof(struct bts_struct); | 821 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && |
822 | child->thread.debugctlmsr & bts_cfg.debugctl_mask) | ||
823 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
824 | |||
825 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | ||
826 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
740 | 827 | ||
741 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | 828 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) |
742 | return -EFAULT; | 829 | return -EFAULT; |
@@ -744,89 +831,38 @@ static int ptrace_bts_status(struct task_struct *child, | |||
744 | return sizeof(cfg); | 831 | return sizeof(cfg); |
745 | } | 832 | } |
746 | 833 | ||
747 | |||
748 | static int ptrace_bts_write_record(struct task_struct *child, | 834 | static int ptrace_bts_write_record(struct task_struct *child, |
749 | const struct bts_struct *in) | 835 | const struct bts_struct *in) |
750 | { | 836 | { |
751 | int retval; | 837 | unsigned char bts_record[BTS_MAX_RECORD_SIZE]; |
752 | 838 | ||
753 | if (!child->thread.ds_area_msr) | 839 | BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); |
754 | return -ENXIO; | ||
755 | 840 | ||
756 | retval = ds_write_bts((void *)child->thread.ds_area_msr, in); | 841 | memset(bts_record, 0, bts_cfg.sizeof_bts); |
757 | if (retval) | 842 | switch (in->qualifier) { |
758 | return retval; | 843 | case BTS_INVALID: |
844 | break; | ||
759 | 845 | ||
760 | return sizeof(*in); | 846 | case BTS_BRANCH: |
761 | } | 847 | bts_set(bts_record, bts_from, in->variant.lbr.from_ip); |
848 | bts_set(bts_record, bts_to, in->variant.lbr.to_ip); | ||
849 | break; | ||
762 | 850 | ||
763 | static int ptrace_bts_realloc(struct task_struct *child, | 851 | case BTS_TASK_ARRIVES: |
764 | int size, int reduce_size) | 852 | case BTS_TASK_DEPARTS: |
765 | { | 853 | bts_set(bts_record, bts_from, bts_escape); |
766 | unsigned long rlim, vm; | 854 | bts_set(bts_record, bts_qual, in->qualifier); |
767 | int ret, old_size; | 855 | bts_set(bts_record, bts_jiffies, in->variant.jiffies); |
856 | break; | ||
768 | 857 | ||
769 | if (size < 0) | 858 | default: |
770 | return -EINVAL; | 859 | return -EINVAL; |
771 | |||
772 | old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); | ||
773 | if (old_size < 0) | ||
774 | return old_size; | ||
775 | |||
776 | ret = ds_free((void **)&child->thread.ds_area_msr); | ||
777 | if (ret < 0) | ||
778 | goto out; | ||
779 | |||
780 | size >>= PAGE_SHIFT; | ||
781 | old_size >>= PAGE_SHIFT; | ||
782 | |||
783 | current->mm->total_vm -= old_size; | ||
784 | current->mm->locked_vm -= old_size; | ||
785 | |||
786 | if (size == 0) | ||
787 | goto out; | ||
788 | |||
789 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
790 | vm = current->mm->total_vm + size; | ||
791 | if (rlim < vm) { | ||
792 | ret = -ENOMEM; | ||
793 | |||
794 | if (!reduce_size) | ||
795 | goto out; | ||
796 | |||
797 | size = rlim - current->mm->total_vm; | ||
798 | if (size <= 0) | ||
799 | goto out; | ||
800 | } | ||
801 | |||
802 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
803 | vm = current->mm->locked_vm + size; | ||
804 | if (rlim < vm) { | ||
805 | ret = -ENOMEM; | ||
806 | |||
807 | if (!reduce_size) | ||
808 | goto out; | ||
809 | |||
810 | size = rlim - current->mm->locked_vm; | ||
811 | if (size <= 0) | ||
812 | goto out; | ||
813 | } | 860 | } |
814 | 861 | ||
815 | ret = ds_allocate((void **)&child->thread.ds_area_msr, | 862 | /* The writing task will be the switched-to task on a context |
816 | size << PAGE_SHIFT); | 863 | * switch. It needs to write into the switched-from task's BTS |
817 | if (ret < 0) | 864 | * buffer. */ |
818 | goto out; | 865 | return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); |
819 | |||
820 | current->mm->total_vm += size; | ||
821 | current->mm->locked_vm += size; | ||
822 | |||
823 | out: | ||
824 | if (child->thread.ds_area_msr) | ||
825 | set_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
826 | else | ||
827 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
828 | |||
829 | return ret; | ||
830 | } | 866 | } |
831 | 867 | ||
832 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 868 | void ptrace_bts_take_timestamp(struct task_struct *tsk, |
@@ -839,7 +875,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, | |||
839 | 875 | ||
840 | ptrace_bts_write_record(tsk, &rec); | 876 | ptrace_bts_write_record(tsk, &rec); |
841 | } | 877 | } |
842 | #endif /* X86_BTS */ | 878 | |
879 | static const struct bts_configuration bts_cfg_netburst = { | ||
880 | .sizeof_bts = sizeof(long) * 3, | ||
881 | .sizeof_field = sizeof(long), | ||
882 | .debugctl_mask = (1<<2)|(1<<3)|(1<<5) | ||
883 | }; | ||
884 | |||
885 | static const struct bts_configuration bts_cfg_pentium_m = { | ||
886 | .sizeof_bts = sizeof(long) * 3, | ||
887 | .sizeof_field = sizeof(long), | ||
888 | .debugctl_mask = (1<<6)|(1<<7) | ||
889 | }; | ||
890 | |||
891 | static const struct bts_configuration bts_cfg_core2 = { | ||
892 | .sizeof_bts = 8 * 3, | ||
893 | .sizeof_field = 8, | ||
894 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
895 | }; | ||
896 | |||
897 | static inline void bts_configure(const struct bts_configuration *cfg) | ||
898 | { | ||
899 | bts_cfg = *cfg; | ||
900 | } | ||
901 | |||
902 | void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | ||
903 | { | ||
904 | switch (c->x86) { | ||
905 | case 0x6: | ||
906 | switch (c->x86_model) { | ||
907 | case 0xD: | ||
908 | case 0xE: /* Pentium M */ | ||
909 | bts_configure(&bts_cfg_pentium_m); | ||
910 | break; | ||
911 | case 0xF: /* Core2 */ | ||
912 | case 0x1C: /* Atom */ | ||
913 | bts_configure(&bts_cfg_core2); | ||
914 | break; | ||
915 | default: | ||
916 | /* sorry, don't know about them */ | ||
917 | break; | ||
918 | } | ||
919 | break; | ||
920 | case 0xF: | ||
921 | switch (c->x86_model) { | ||
922 | case 0x0: | ||
923 | case 0x1: | ||
924 | case 0x2: /* Netburst */ | ||
925 | bts_configure(&bts_cfg_netburst); | ||
926 | break; | ||
927 | default: | ||
928 | /* sorry, don't know about them */ | ||
929 | break; | ||
930 | } | ||
931 | break; | ||
932 | default: | ||
933 | /* sorry, don't know about them */ | ||
934 | break; | ||
935 | } | ||
936 | } | ||
937 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
843 | 938 | ||
844 | /* | 939 | /* |
845 | * Called by kernel/ptrace.c when detaching.. | 940 | * Called by kernel/ptrace.c when detaching.. |
@@ -852,15 +947,15 @@ void ptrace_disable(struct task_struct *child) | |||
852 | #ifdef TIF_SYSCALL_EMU | 947 | #ifdef TIF_SYSCALL_EMU |
853 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 948 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
854 | #endif | 949 | #endif |
855 | if (child->thread.ds_area_msr) { | 950 | #ifdef CONFIG_X86_PTRACE_BTS |
856 | #ifdef X86_BTS | 951 | (void)ds_release_bts(child); |
857 | ptrace_bts_realloc(child, 0, 0); | 952 | |
858 | #endif | 953 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
859 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 954 | if (!child->thread.debugctlmsr) |
860 | if (!child->thread.debugctlmsr) | 955 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
861 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 956 | |
862 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 957 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
863 | } | 958 | #endif /* CONFIG_X86_PTRACE_BTS */ |
864 | } | 959 | } |
865 | 960 | ||
866 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 961 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
@@ -980,7 +1075,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
980 | /* | 1075 | /* |
981 | * These bits need more cooking - not enabled yet: | 1076 | * These bits need more cooking - not enabled yet: |
982 | */ | 1077 | */ |
983 | #ifdef X86_BTS | 1078 | #ifdef CONFIG_X86_PTRACE_BTS |
984 | case PTRACE_BTS_CONFIG: | 1079 | case PTRACE_BTS_CONFIG: |
985 | ret = ptrace_bts_config | 1080 | ret = ptrace_bts_config |
986 | (child, data, (struct ptrace_bts_config __user *)addr); | 1081 | (child, data, (struct ptrace_bts_config __user *)addr); |
@@ -992,7 +1087,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
992 | break; | 1087 | break; |
993 | 1088 | ||
994 | case PTRACE_BTS_SIZE: | 1089 | case PTRACE_BTS_SIZE: |
995 | ret = ptrace_bts_get_size(child); | 1090 | ret = ds_get_bts_index(child, /* pos = */ NULL); |
996 | break; | 1091 | break; |
997 | 1092 | ||
998 | case PTRACE_BTS_GET: | 1093 | case PTRACE_BTS_GET: |
@@ -1001,14 +1096,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1001 | break; | 1096 | break; |
1002 | 1097 | ||
1003 | case PTRACE_BTS_CLEAR: | 1098 | case PTRACE_BTS_CLEAR: |
1004 | ret = ptrace_bts_clear(child); | 1099 | ret = ds_clear_bts(child); |
1005 | break; | 1100 | break; |
1006 | 1101 | ||
1007 | case PTRACE_BTS_DRAIN: | 1102 | case PTRACE_BTS_DRAIN: |
1008 | ret = ptrace_bts_drain | 1103 | ret = ptrace_bts_drain |
1009 | (child, data, (struct bts_struct __user *) addr); | 1104 | (child, data, (struct bts_struct __user *) addr); |
1010 | break; | 1105 | break; |
1011 | #endif | 1106 | #endif /* CONFIG_X86_PTRACE_BTS */ |
1012 | 1107 | ||
1013 | default: | 1108 | default: |
1014 | ret = ptrace_request(child, request, addr, data); | 1109 | ret = ptrace_request(child, request, addr, data); |
@@ -1375,30 +1470,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | |||
1375 | force_sig_info(SIGTRAP, &info, tsk); | 1470 | force_sig_info(SIGTRAP, &info, tsk); |
1376 | } | 1471 | } |
1377 | 1472 | ||
1378 | static void syscall_trace(struct pt_regs *regs) | ||
1379 | { | ||
1380 | if (!(current->ptrace & PT_PTRACED)) | ||
1381 | return; | ||
1382 | |||
1383 | #if 0 | ||
1384 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", | ||
1385 | current->comm, | ||
1386 | regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), | ||
1387 | current_thread_info()->flags, current->ptrace); | ||
1388 | #endif | ||
1389 | |||
1390 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) | ||
1391 | ? 0x80 : 0)); | ||
1392 | /* | ||
1393 | * this isn't the same as continuing with a signal, but it will do | ||
1394 | * for normal use. strace only continues with a signal if the | ||
1395 | * stopping signal is not SIGTRAP. -brl | ||
1396 | */ | ||
1397 | if (current->exit_code) { | ||
1398 | send_sig(current->exit_code, current, 1); | ||
1399 | current->exit_code = 0; | ||
1400 | } | ||
1401 | } | ||
1402 | 1473 | ||
1403 | #ifdef CONFIG_X86_32 | 1474 | #ifdef CONFIG_X86_32 |
1404 | # define IS_IA32 1 | 1475 | # define IS_IA32 1 |
@@ -1432,8 +1503,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
1432 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | 1503 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) |
1433 | ret = -1L; | 1504 | ret = -1L; |
1434 | 1505 | ||
1435 | if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) | 1506 | if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && |
1436 | syscall_trace(regs); | 1507 | tracehook_report_syscall_entry(regs)) |
1508 | ret = -1L; | ||
1437 | 1509 | ||
1438 | if (unlikely(current->audit_context)) { | 1510 | if (unlikely(current->audit_context)) { |
1439 | if (IS_IA32) | 1511 | if (IS_IA32) |
@@ -1459,7 +1531,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) | |||
1459 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1531 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1460 | 1532 | ||
1461 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | 1533 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
1462 | syscall_trace(regs); | 1534 | tracehook_report_syscall_exit(regs, 0); |
1463 | 1535 | ||
1464 | /* | 1536 | /* |
1465 | * If TIF_SYSCALL_EMU is set, we only get here because of | 1537 | * If TIF_SYSCALL_EMU is set, we only get here because of |
@@ -1475,6 +1547,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) | |||
1475 | * system call instruction. | 1547 | * system call instruction. |
1476 | */ | 1548 | */ |
1477 | if (test_thread_flag(TIF_SINGLESTEP) && | 1549 | if (test_thread_flag(TIF_SINGLESTEP) && |
1478 | (current->ptrace & PT_PTRACED)) | 1550 | tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) |
1479 | send_sigtrap(current, regs, 0); | 1551 | send_sigtrap(current, regs, 0); |
1480 | } | 1552 | } |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 724adfc63cb9..f4c93f1cfc19 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off); | |||
29 | 29 | ||
30 | static const struct desc_ptr no_idt = {}; | 30 | static const struct desc_ptr no_idt = {}; |
31 | static int reboot_mode; | 31 | static int reboot_mode; |
32 | enum reboot_type reboot_type = BOOT_KBD; | 32 | /* |
33 | * Keyboard reset and triple fault may result in INIT, not RESET, which | ||
34 | * doesn't work when we're in vmx root mode. Try ACPI first. | ||
35 | */ | ||
36 | enum reboot_type reboot_type = BOOT_ACPI; | ||
33 | int reboot_force; | 37 | int reboot_force; |
34 | 38 | ||
35 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | 39 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 703310a99023..6f50664b2ba5 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -20,10 +20,11 @@ | |||
20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) |
22 | 22 | ||
23 | /* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are | 23 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE |
24 | * used to save some data for jumping back | 24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for |
25 | * jumping back | ||
25 | */ | 26 | */ |
26 | #define DATA(offset) (PAGE_SIZE/2+(offset)) | 27 | #define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) |
27 | 28 | ||
28 | /* Minimal CPU state */ | 29 | /* Minimal CPU state */ |
29 | #define ESP DATA(0x0) | 30 | #define ESP DATA(0x0) |
@@ -376,3 +377,6 @@ swap_pages: | |||
376 | popl %ebx | 377 | popl %ebx |
377 | popl %ebp | 378 | popl %ebp |
378 | ret | 379 | ret |
380 | |||
381 | .globl kexec_control_code_size | ||
382 | .set kexec_control_code_size, . - relocate_kernel | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 59f07e14d083..46c98efbbf8d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -223,6 +223,9 @@ unsigned long saved_video_mode; | |||
223 | #define RAMDISK_LOAD_FLAG 0x4000 | 223 | #define RAMDISK_LOAD_FLAG 0x4000 |
224 | 224 | ||
225 | static char __initdata command_line[COMMAND_LINE_SIZE]; | 225 | static char __initdata command_line[COMMAND_LINE_SIZE]; |
226 | #ifdef CONFIG_CMDLINE_BOOL | ||
227 | static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; | ||
228 | #endif | ||
226 | 229 | ||
227 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | 230 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) |
228 | struct edd edd; | 231 | struct edd edd; |
@@ -445,7 +448,7 @@ static void __init reserve_early_setup_data(void) | |||
445 | * @size: Size of the crashkernel memory to reserve. | 448 | * @size: Size of the crashkernel memory to reserve. |
446 | * Returns the base address on success, and -1ULL on failure. | 449 | * Returns the base address on success, and -1ULL on failure. |
447 | */ | 450 | */ |
448 | unsigned long long find_and_reserve_crashkernel(unsigned long long size) | 451 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) |
449 | { | 452 | { |
450 | const unsigned long long alignment = 16<<20; /* 16M */ | 453 | const unsigned long long alignment = 16<<20; /* 16M */ |
451 | unsigned long long start = 0LL; | 454 | unsigned long long start = 0LL; |
@@ -604,14 +607,6 @@ void __init setup_arch(char **cmdline_p) | |||
604 | early_cpu_init(); | 607 | early_cpu_init(); |
605 | early_ioremap_init(); | 608 | early_ioremap_init(); |
606 | 609 | ||
607 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | ||
608 | /* | ||
609 | * Must be before kernel pagetables are setup | ||
610 | * or fixmap area is touched. | ||
611 | */ | ||
612 | vmi_init(); | ||
613 | #endif | ||
614 | |||
615 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 610 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
616 | screen_info = boot_params.screen_info; | 611 | screen_info = boot_params.screen_info; |
617 | edid_info = boot_params.edid_info; | 612 | edid_info = boot_params.edid_info; |
@@ -673,11 +668,36 @@ void __init setup_arch(char **cmdline_p) | |||
673 | bss_resource.start = virt_to_phys(&__bss_start); | 668 | bss_resource.start = virt_to_phys(&__bss_start); |
674 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 669 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
675 | 670 | ||
671 | #ifdef CONFIG_CMDLINE_BOOL | ||
672 | #ifdef CONFIG_CMDLINE_OVERRIDE | ||
673 | strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); | ||
674 | #else | ||
675 | if (builtin_cmdline[0]) { | ||
676 | /* append boot loader cmdline to builtin */ | ||
677 | strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); | ||
678 | strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); | ||
679 | strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); | ||
680 | } | ||
681 | #endif | ||
682 | #endif | ||
683 | |||
676 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 684 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
677 | *cmdline_p = command_line; | 685 | *cmdline_p = command_line; |
678 | 686 | ||
679 | parse_early_param(); | 687 | parse_early_param(); |
680 | 688 | ||
689 | #ifdef CONFIG_X86_64 | ||
690 | check_efer(); | ||
691 | #endif | ||
692 | |||
693 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | ||
694 | /* | ||
695 | * Must be before kernel pagetables are setup | ||
696 | * or fixmap area is touched. | ||
697 | */ | ||
698 | vmi_init(); | ||
699 | #endif | ||
700 | |||
681 | /* after early param, so could get panic from serial */ | 701 | /* after early param, so could get panic from serial */ |
682 | reserve_early_setup_data(); | 702 | reserve_early_setup_data(); |
683 | 703 | ||
@@ -738,7 +758,6 @@ void __init setup_arch(char **cmdline_p) | |||
738 | #else | 758 | #else |
739 | num_physpages = max_pfn; | 759 | num_physpages = max_pfn; |
740 | 760 | ||
741 | check_efer(); | ||
742 | if (cpu_has_x2apic) | 761 | if (cpu_has_x2apic) |
743 | check_x2apic(); | 762 | check_x2apic(); |
744 | 763 | ||
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb519d2dc..8b4956e800ac 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h | |||
@@ -24,4 +24,9 @@ struct rt_sigframe { | |||
24 | struct ucontext uc; | 24 | struct ucontext uc; |
25 | struct siginfo info; | 25 | struct siginfo info; |
26 | }; | 26 | }; |
27 | |||
28 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
29 | sigset_t *set, struct pt_regs *regs); | ||
30 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
31 | sigset_t *set, struct pt_regs *regs); | ||
27 | #endif | 32 | #endif |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 0c727f64e79b..2a2435d3037d 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/wait.h> | 19 | #include <linux/wait.h> |
20 | #include <linux/tracehook.h> | ||
20 | #include <linux/elf.h> | 21 | #include <linux/elf.h> |
21 | #include <linux/smp.h> | 22 | #include <linux/smp.h> |
22 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
@@ -559,8 +560,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
559 | * handler too. | 560 | * handler too. |
560 | */ | 561 | */ |
561 | regs->flags &= ~X86_EFLAGS_TF; | 562 | regs->flags &= ~X86_EFLAGS_TF; |
562 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
563 | ptrace_notify(SIGTRAP); | ||
564 | 563 | ||
565 | spin_lock_irq(¤t->sighand->siglock); | 564 | spin_lock_irq(¤t->sighand->siglock); |
566 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | 565 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); |
@@ -569,6 +568,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
569 | recalc_sigpending(); | 568 | recalc_sigpending(); |
570 | spin_unlock_irq(¤t->sighand->siglock); | 569 | spin_unlock_irq(¤t->sighand->siglock); |
571 | 570 | ||
571 | tracehook_signal_handler(sig, info, ka, regs, | ||
572 | test_thread_flag(TIF_SINGLESTEP)); | ||
573 | |||
572 | return 0; | 574 | return 0; |
573 | } | 575 | } |
574 | 576 | ||
@@ -662,5 +664,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
662 | if (thread_info_flags & _TIF_SIGPENDING) | 664 | if (thread_info_flags & _TIF_SIGPENDING) |
663 | do_signal(regs); | 665 | do_signal(regs); |
664 | 666 | ||
667 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
668 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
669 | tracehook_notify_resume(regs); | ||
670 | } | ||
671 | |||
665 | clear_thread_flag(TIF_IRET); | 672 | clear_thread_flag(TIF_IRET); |
666 | } | 673 | } |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 2f1464050059..694aa888bb19 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
@@ -15,17 +15,20 @@ | |||
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | #include <linux/ptrace.h> | 17 | #include <linux/ptrace.h> |
18 | #include <linux/tracehook.h> | ||
18 | #include <linux/unistd.h> | 19 | #include <linux/unistd.h> |
19 | #include <linux/stddef.h> | 20 | #include <linux/stddef.h> |
20 | #include <linux/personality.h> | 21 | #include <linux/personality.h> |
21 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
23 | #include <linux/uaccess.h> | ||
24 | |||
22 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
23 | #include <asm/ucontext.h> | 26 | #include <asm/ucontext.h> |
24 | #include <asm/uaccess.h> | ||
25 | #include <asm/i387.h> | 27 | #include <asm/i387.h> |
26 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
27 | #include <asm/ia32_unistd.h> | 29 | #include <asm/ia32_unistd.h> |
28 | #include <asm/mce.h> | 30 | #include <asm/mce.h> |
31 | #include <asm/syscall.h> | ||
29 | #include <asm/syscalls.h> | 32 | #include <asm/syscalls.h> |
30 | #include "sigframe.h" | 33 | #include "sigframe.h" |
31 | 34 | ||
@@ -42,11 +45,6 @@ | |||
42 | # define FIX_EFLAGS __FIX_EFLAGS | 45 | # define FIX_EFLAGS __FIX_EFLAGS |
43 | #endif | 46 | #endif |
44 | 47 | ||
45 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
46 | sigset_t *set, struct pt_regs * regs); | ||
47 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
48 | sigset_t *set, struct pt_regs * regs); | ||
49 | |||
50 | asmlinkage long | 48 | asmlinkage long |
51 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | 49 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, |
52 | struct pt_regs *regs) | 50 | struct pt_regs *regs) |
@@ -129,7 +127,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
129 | /* Always make any pending restarted system calls return -EINTR */ | 127 | /* Always make any pending restarted system calls return -EINTR */ |
130 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 128 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
131 | 129 | ||
132 | #define COPY(x) err |= __get_user(regs->x, &sc->x) | 130 | #define COPY(x) (err |= __get_user(regs->x, &sc->x)) |
133 | 131 | ||
134 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 132 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
135 | COPY(dx); COPY(cx); COPY(ip); | 133 | COPY(dx); COPY(cx); COPY(ip); |
@@ -159,7 +157,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
159 | } | 157 | } |
160 | 158 | ||
161 | { | 159 | { |
162 | struct _fpstate __user * buf; | 160 | struct _fpstate __user *buf; |
163 | err |= __get_user(buf, &sc->fpstate); | 161 | err |= __get_user(buf, &sc->fpstate); |
164 | 162 | ||
165 | if (buf) { | 163 | if (buf) { |
@@ -199,7 +197,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
199 | current->blocked = set; | 197 | current->blocked = set; |
200 | recalc_sigpending(); | 198 | recalc_sigpending(); |
201 | spin_unlock_irq(¤t->sighand->siglock); | 199 | spin_unlock_irq(¤t->sighand->siglock); |
202 | 200 | ||
203 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | 201 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) |
204 | goto badframe; | 202 | goto badframe; |
205 | 203 | ||
@@ -209,16 +207,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
209 | return ax; | 207 | return ax; |
210 | 208 | ||
211 | badframe: | 209 | badframe: |
212 | signal_fault(regs,frame,"sigreturn"); | 210 | signal_fault(regs, frame, "sigreturn"); |
213 | return 0; | 211 | return 0; |
214 | } | 212 | } |
215 | 213 | ||
216 | /* | 214 | /* |
217 | * Set up a signal frame. | 215 | * Set up a signal frame. |
218 | */ | 216 | */ |
219 | 217 | ||
220 | static inline int | 218 | static inline int |
221 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me) | 219 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, |
220 | unsigned long mask, struct task_struct *me) | ||
222 | { | 221 | { |
223 | int err = 0; | 222 | int err = 0; |
224 | 223 | ||
@@ -274,35 +273,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | |||
274 | } | 273 | } |
275 | 274 | ||
276 | static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 275 | static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
277 | sigset_t *set, struct pt_regs * regs) | 276 | sigset_t *set, struct pt_regs *regs) |
278 | { | 277 | { |
279 | struct rt_sigframe __user *frame; | 278 | struct rt_sigframe __user *frame; |
280 | struct _fpstate __user *fp = NULL; | 279 | struct _fpstate __user *fp = NULL; |
281 | int err = 0; | 280 | int err = 0; |
282 | struct task_struct *me = current; | 281 | struct task_struct *me = current; |
283 | 282 | ||
284 | if (used_math()) { | 283 | if (used_math()) { |
285 | fp = get_stack(ka, regs, sizeof(struct _fpstate)); | 284 | fp = get_stack(ka, regs, sizeof(struct _fpstate)); |
286 | frame = (void __user *)round_down( | 285 | frame = (void __user *)round_down( |
287 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | 286 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; |
288 | 287 | ||
289 | if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) | 288 | if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) |
290 | goto give_sigsegv; | 289 | goto give_sigsegv; |
291 | 290 | ||
292 | if (save_i387(fp) < 0) | 291 | if (save_i387(fp) < 0) |
293 | err |= -1; | 292 | err |= -1; |
294 | } else | 293 | } else |
295 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; | 294 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; |
296 | 295 | ||
297 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 296 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
298 | goto give_sigsegv; | 297 | goto give_sigsegv; |
299 | 298 | ||
300 | if (ka->sa.sa_flags & SA_SIGINFO) { | 299 | if (ka->sa.sa_flags & SA_SIGINFO) { |
301 | err |= copy_siginfo_to_user(&frame->info, info); | 300 | err |= copy_siginfo_to_user(&frame->info, info); |
302 | if (err) | 301 | if (err) |
303 | goto give_sigsegv; | 302 | goto give_sigsegv; |
304 | } | 303 | } |
305 | 304 | ||
306 | /* Create the ucontext. */ | 305 | /* Create the ucontext. */ |
307 | err |= __put_user(0, &frame->uc.uc_flags); | 306 | err |= __put_user(0, &frame->uc.uc_flags); |
308 | err |= __put_user(0, &frame->uc.uc_link); | 307 | err |= __put_user(0, &frame->uc.uc_link); |
@@ -312,9 +311,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
312 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | 311 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); |
313 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); | 312 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); |
314 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); | 313 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); |
315 | if (sizeof(*set) == 16) { | 314 | if (sizeof(*set) == 16) { |
316 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); | 315 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); |
317 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); | 316 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); |
318 | } else | 317 | } else |
319 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | 318 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); |
320 | 319 | ||
@@ -325,7 +324,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
325 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | 324 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); |
326 | } else { | 325 | } else { |
327 | /* could use a vstub here */ | 326 | /* could use a vstub here */ |
328 | goto give_sigsegv; | 327 | goto give_sigsegv; |
329 | } | 328 | } |
330 | 329 | ||
331 | if (err) | 330 | if (err) |
@@ -333,7 +332,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
333 | 332 | ||
334 | /* Set up registers for signal handler */ | 333 | /* Set up registers for signal handler */ |
335 | regs->di = sig; | 334 | regs->di = sig; |
336 | /* In case the signal handler was declared without prototypes */ | 335 | /* In case the signal handler was declared without prototypes */ |
337 | regs->ax = 0; | 336 | regs->ax = 0; |
338 | 337 | ||
339 | /* This also works for non SA_SIGINFO handlers because they expect the | 338 | /* This also works for non SA_SIGINFO handlers because they expect the |
@@ -356,37 +355,8 @@ give_sigsegv: | |||
356 | } | 355 | } |
357 | 356 | ||
358 | /* | 357 | /* |
359 | * Return -1L or the syscall number that @regs is executing. | ||
360 | */ | ||
361 | static long current_syscall(struct pt_regs *regs) | ||
362 | { | ||
363 | /* | ||
364 | * We always sign-extend a -1 value being set here, | ||
365 | * so this is always either -1L or a syscall number. | ||
366 | */ | ||
367 | return regs->orig_ax; | ||
368 | } | ||
369 | |||
370 | /* | ||
371 | * Return a value that is -EFOO if the system call in @regs->orig_ax | ||
372 | * returned an error. This only works for @regs from @current. | ||
373 | */ | ||
374 | static long current_syscall_ret(struct pt_regs *regs) | ||
375 | { | ||
376 | #ifdef CONFIG_IA32_EMULATION | ||
377 | if (test_thread_flag(TIF_IA32)) | ||
378 | /* | ||
379 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO | ||
380 | * and will match correctly in comparisons. | ||
381 | */ | ||
382 | return (int) regs->ax; | ||
383 | #endif | ||
384 | return regs->ax; | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * OK, we're invoking a handler | 358 | * OK, we're invoking a handler |
389 | */ | 359 | */ |
390 | 360 | ||
391 | static int | 361 | static int |
392 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | 362 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, |
@@ -395,9 +365,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
395 | int ret; | 365 | int ret; |
396 | 366 | ||
397 | /* Are we from a system call? */ | 367 | /* Are we from a system call? */ |
398 | if (current_syscall(regs) >= 0) { | 368 | if (syscall_get_nr(current, regs) >= 0) { |
399 | /* If so, check system call restarting.. */ | 369 | /* If so, check system call restarting.. */ |
400 | switch (current_syscall_ret(regs)) { | 370 | switch (syscall_get_error(current, regs)) { |
401 | case -ERESTART_RESTARTBLOCK: | 371 | case -ERESTART_RESTARTBLOCK: |
402 | case -ERESTARTNOHAND: | 372 | case -ERESTARTNOHAND: |
403 | regs->ax = -EINTR; | 373 | regs->ax = -EINTR; |
@@ -430,7 +400,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
430 | ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); | 400 | ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); |
431 | else | 401 | else |
432 | ret = ia32_setup_frame(sig, ka, oldset, regs); | 402 | ret = ia32_setup_frame(sig, ka, oldset, regs); |
433 | } else | 403 | } else |
434 | #endif | 404 | #endif |
435 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | 405 | ret = setup_rt_frame(sig, ka, info, oldset, regs); |
436 | 406 | ||
@@ -454,15 +424,16 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
454 | * handler too. | 424 | * handler too. |
455 | */ | 425 | */ |
456 | regs->flags &= ~X86_EFLAGS_TF; | 426 | regs->flags &= ~X86_EFLAGS_TF; |
457 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
458 | ptrace_notify(SIGTRAP); | ||
459 | 427 | ||
460 | spin_lock_irq(¤t->sighand->siglock); | 428 | spin_lock_irq(¤t->sighand->siglock); |
461 | sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); | 429 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); |
462 | if (!(ka->sa.sa_flags & SA_NODEFER)) | 430 | if (!(ka->sa.sa_flags & SA_NODEFER)) |
463 | sigaddset(¤t->blocked,sig); | 431 | sigaddset(¤t->blocked, sig); |
464 | recalc_sigpending(); | 432 | recalc_sigpending(); |
465 | spin_unlock_irq(¤t->sighand->siglock); | 433 | spin_unlock_irq(¤t->sighand->siglock); |
434 | |||
435 | tracehook_signal_handler(sig, info, ka, regs, | ||
436 | test_thread_flag(TIF_SINGLESTEP)); | ||
466 | } | 437 | } |
467 | 438 | ||
468 | return ret; | 439 | return ret; |
@@ -519,9 +490,9 @@ static void do_signal(struct pt_regs *regs) | |||
519 | } | 490 | } |
520 | 491 | ||
521 | /* Did we come from a system call? */ | 492 | /* Did we come from a system call? */ |
522 | if (current_syscall(regs) >= 0) { | 493 | if (syscall_get_nr(current, regs) >= 0) { |
523 | /* Restart the system call - no handlers present */ | 494 | /* Restart the system call - no handlers present */ |
524 | switch (current_syscall_ret(regs)) { | 495 | switch (syscall_get_error(current, regs)) { |
525 | case -ERESTARTNOHAND: | 496 | case -ERESTARTNOHAND: |
526 | case -ERESTARTSYS: | 497 | case -ERESTARTSYS: |
527 | case -ERESTARTNOINTR: | 498 | case -ERESTARTNOINTR: |
@@ -559,17 +530,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused, | |||
559 | /* deal with pending signal delivery */ | 530 | /* deal with pending signal delivery */ |
560 | if (thread_info_flags & _TIF_SIGPENDING) | 531 | if (thread_info_flags & _TIF_SIGPENDING) |
561 | do_signal(regs); | 532 | do_signal(regs); |
533 | |||
534 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
535 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
536 | tracehook_notify_resume(regs); | ||
537 | } | ||
562 | } | 538 | } |
563 | 539 | ||
564 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 540 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
565 | { | 541 | { |
566 | struct task_struct *me = current; | 542 | struct task_struct *me = current; |
567 | if (show_unhandled_signals && printk_ratelimit()) { | 543 | if (show_unhandled_signals && printk_ratelimit()) { |
568 | printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | 544 | printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", |
569 | me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); | 545 | me->comm, me->pid, where, frame, regs->ip, |
546 | regs->sp, regs->orig_ax); | ||
570 | print_vma_addr(" in ", regs->ip); | 547 | print_vma_addr(" in ", regs->ip); |
571 | printk("\n"); | 548 | printk("\n"); |
572 | } | 549 | } |
573 | 550 | ||
574 | force_sig(SIGSEGV, me); | 551 | force_sig(SIGSEGV, me); |
575 | } | 552 | } |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0133a952d11f..2ff0bbcd5bd1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -749,6 +749,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
749 | } | 749 | } |
750 | 750 | ||
751 | #ifdef CONFIG_X86_64 | 751 | #ifdef CONFIG_X86_64 |
752 | |||
753 | /* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ | ||
754 | static void __ref free_bootmem_pda(struct x8664_pda *oldpda) | ||
755 | { | ||
756 | if (!after_bootmem) | ||
757 | free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); | ||
758 | } | ||
759 | |||
752 | /* | 760 | /* |
753 | * Allocate node local memory for the AP pda. | 761 | * Allocate node local memory for the AP pda. |
754 | * | 762 | * |
@@ -777,8 +785,7 @@ int __cpuinit get_local_pda(int cpu) | |||
777 | 785 | ||
778 | if (oldpda) { | 786 | if (oldpda) { |
779 | memcpy(newpda, oldpda, size); | 787 | memcpy(newpda, oldpda, size); |
780 | if (!after_bootmem) | 788 | free_bootmem_pda(oldpda); |
781 | free_bootmem((unsigned long)oldpda, size); | ||
782 | } | 789 | } |
783 | 790 | ||
784 | newpda->in_bootmem = 0; | 791 | newpda->in_bootmem = 0; |
@@ -987,17 +994,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
987 | flush_tlb_all(); | 994 | flush_tlb_all(); |
988 | low_mappings = 1; | 995 | low_mappings = 1; |
989 | 996 | ||
990 | #ifdef CONFIG_X86_PC | ||
991 | if (def_to_bigsmp && apicid > 8) { | ||
992 | printk(KERN_WARNING | ||
993 | "More than 8 CPUs detected - skipping them.\n" | ||
994 | "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); | ||
995 | err = -1; | ||
996 | } else | ||
997 | err = do_boot_cpu(apicid, cpu); | ||
998 | #else | ||
999 | err = do_boot_cpu(apicid, cpu); | 997 | err = do_boot_cpu(apicid, cpu); |
1000 | #endif | ||
1001 | 998 | ||
1002 | zap_low_mappings(); | 999 | zap_low_mappings(); |
1003 | low_mappings = 0; | 1000 | low_mappings = 0; |
@@ -1051,6 +1048,34 @@ static __init void disable_smp(void) | |||
1051 | static int __init smp_sanity_check(unsigned max_cpus) | 1048 | static int __init smp_sanity_check(unsigned max_cpus) |
1052 | { | 1049 | { |
1053 | preempt_disable(); | 1050 | preempt_disable(); |
1051 | |||
1052 | #if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | ||
1053 | if (def_to_bigsmp && nr_cpu_ids > 8) { | ||
1054 | unsigned int cpu; | ||
1055 | unsigned nr; | ||
1056 | |||
1057 | printk(KERN_WARNING | ||
1058 | "More than 8 CPUs detected - skipping them.\n" | ||
1059 | "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); | ||
1060 | |||
1061 | nr = 0; | ||
1062 | for_each_present_cpu(cpu) { | ||
1063 | if (nr >= 8) | ||
1064 | cpu_clear(cpu, cpu_present_map); | ||
1065 | nr++; | ||
1066 | } | ||
1067 | |||
1068 | nr = 0; | ||
1069 | for_each_possible_cpu(cpu) { | ||
1070 | if (nr >= 8) | ||
1071 | cpu_clear(cpu, cpu_possible_map); | ||
1072 | nr++; | ||
1073 | } | ||
1074 | |||
1075 | nr_cpu_ids = 8; | ||
1076 | } | ||
1077 | #endif | ||
1078 | |||
1054 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 1079 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
1055 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" | 1080 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" |
1056 | "by the BIOS.\n", hard_smp_processor_id()); | 1081 | "by the BIOS.\n", hard_smp_processor_id()); |
@@ -1196,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1196 | printk(KERN_INFO "CPU%d: ", 0); | 1221 | printk(KERN_INFO "CPU%d: ", 0); |
1197 | print_cpu_info(&cpu_data(0)); | 1222 | print_cpu_info(&cpu_data(0)); |
1198 | setup_boot_clock(); | 1223 | setup_boot_clock(); |
1224 | |||
1225 | if (is_uv_system()) | ||
1226 | uv_system_init(); | ||
1199 | out: | 1227 | out: |
1200 | preempt_enable(); | 1228 | preempt_enable(); |
1201 | } | 1229 | } |
@@ -1285,16 +1313,13 @@ __init void prefill_possible_map(void) | |||
1285 | if (!num_processors) | 1313 | if (!num_processors) |
1286 | num_processors = 1; | 1314 | num_processors = 1; |
1287 | 1315 | ||
1288 | #ifdef CONFIG_HOTPLUG_CPU | ||
1289 | if (additional_cpus == -1) { | 1316 | if (additional_cpus == -1) { |
1290 | if (disabled_cpus > 0) | 1317 | if (disabled_cpus > 0) |
1291 | additional_cpus = disabled_cpus; | 1318 | additional_cpus = disabled_cpus; |
1292 | else | 1319 | else |
1293 | additional_cpus = 0; | 1320 | additional_cpus = 0; |
1294 | } | 1321 | } |
1295 | #else | 1322 | |
1296 | additional_cpus = 0; | ||
1297 | #endif | ||
1298 | possible = num_processors + additional_cpus; | 1323 | possible = num_processors + additional_cpus; |
1299 | if (possible > NR_CPUS) | 1324 | if (possible > NR_CPUS) |
1300 | possible = NR_CPUS; | 1325 | possible = NR_CPUS; |
@@ -1386,17 +1411,3 @@ void __cpu_die(unsigned int cpu) | |||
1386 | BUG(); | 1411 | BUG(); |
1387 | } | 1412 | } |
1388 | #endif | 1413 | #endif |
1389 | |||
1390 | /* | ||
1391 | * If the BIOS enumerates physical processors before logical, | ||
1392 | * maxcpus=N at enumeration-time can be used to disable HT. | ||
1393 | */ | ||
1394 | static int __init parse_maxcpus(char *arg) | ||
1395 | { | ||
1396 | extern unsigned int maxcpus; | ||
1397 | |||
1398 | if (arg) | ||
1399 | maxcpus = simple_strtoul(arg, NULL, 0); | ||
1400 | return 0; | ||
1401 | } | ||
1402 | early_param("maxcpus", parse_maxcpus); | ||
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 99941b37eca0..397e309839dd 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c | |||
@@ -8,18 +8,21 @@ | |||
8 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | 8 | DEFINE_PER_CPU(unsigned long, this_cpu_off); |
9 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | 9 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); |
10 | 10 | ||
11 | /* Initialize the CPU's GDT. This is either the boot CPU doing itself | 11 | /* |
12 | (still using the master per-cpu area), or a CPU doing it for a | 12 | * Initialize the CPU's GDT. This is either the boot CPU doing itself |
13 | secondary which will soon come up. */ | 13 | * (still using the master per-cpu area), or a CPU doing it for a |
14 | * secondary which will soon come up. | ||
15 | */ | ||
14 | __cpuinit void init_gdt(int cpu) | 16 | __cpuinit void init_gdt(int cpu) |
15 | { | 17 | { |
16 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 18 | struct desc_struct gdt; |
17 | 19 | ||
18 | pack_descriptor(&gdt[GDT_ENTRY_PERCPU], | 20 | pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, |
19 | __per_cpu_offset[cpu], 0xFFFFF, | ||
20 | 0x2 | DESCTYPE_S, 0x8); | 21 | 0x2 | DESCTYPE_S, 0x8); |
22 | gdt.s = 1; | ||
21 | 23 | ||
22 | gdt[GDT_ENTRY_PERCPU].s = 1; | 24 | write_gdt_entry(get_cpu_gdt_table(cpu), |
25 | GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); | ||
23 | 26 | ||
24 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | 27 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; |
25 | per_cpu(cpu_number, cpu) = cpu; | 28 | per_cpu(cpu_number, cpu) = cpu; |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index c9288c883e20..6bc211accf08 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -13,16 +13,17 @@ | |||
13 | #include <linux/utsname.h> | 13 | #include <linux/utsname.h> |
14 | #include <linux/personality.h> | 14 | #include <linux/personality.h> |
15 | #include <linux/random.h> | 15 | #include <linux/random.h> |
16 | #include <linux/uaccess.h> | ||
16 | 17 | ||
17 | #include <asm/uaccess.h> | ||
18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
19 | #include <asm/syscalls.h> | 19 | #include <asm/syscalls.h> |
20 | 20 | ||
21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, | 21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, |
22 | unsigned long fd, unsigned long off) | 22 | unsigned long prot, unsigned long flags, |
23 | unsigned long fd, unsigned long off) | ||
23 | { | 24 | { |
24 | long error; | 25 | long error; |
25 | struct file * file; | 26 | struct file *file; |
26 | 27 | ||
27 | error = -EINVAL; | 28 | error = -EINVAL; |
28 | if (off & ~PAGE_MASK) | 29 | if (off & ~PAGE_MASK) |
@@ -57,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
57 | unmapped base down for this case. This can give | 58 | unmapped base down for this case. This can give |
58 | conflicts with the heap, but we assume that glibc | 59 | conflicts with the heap, but we assume that glibc |
59 | malloc knows how to fall back to mmap. Give it 1GB | 60 | malloc knows how to fall back to mmap. Give it 1GB |
60 | of playground for now. -AK */ | 61 | of playground for now. -AK */ |
61 | *begin = 0x40000000; | 62 | *begin = 0x40000000; |
62 | *end = 0x80000000; | 63 | *end = 0x80000000; |
63 | if (current->flags & PF_RANDOMIZE) { | 64 | if (current->flags & PF_RANDOMIZE) { |
64 | new_begin = randomize_range(*begin, *begin + 0x02000000, 0); | 65 | new_begin = randomize_range(*begin, *begin + 0x02000000, 0); |
65 | if (new_begin) | 66 | if (new_begin) |
@@ -67,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, | |||
67 | } | 68 | } |
68 | } else { | 69 | } else { |
69 | *begin = TASK_UNMAPPED_BASE; | 70 | *begin = TASK_UNMAPPED_BASE; |
70 | *end = TASK_SIZE; | 71 | *end = TASK_SIZE; |
71 | } | 72 | } |
72 | } | 73 | } |
73 | 74 | ||
74 | unsigned long | 75 | unsigned long |
75 | arch_get_unmapped_area(struct file *filp, unsigned long addr, | 76 | arch_get_unmapped_area(struct file *filp, unsigned long addr, |
@@ -79,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
79 | struct vm_area_struct *vma; | 80 | struct vm_area_struct *vma; |
80 | unsigned long start_addr; | 81 | unsigned long start_addr; |
81 | unsigned long begin, end; | 82 | unsigned long begin, end; |
82 | 83 | ||
83 | if (flags & MAP_FIXED) | 84 | if (flags & MAP_FIXED) |
84 | return addr; | 85 | return addr; |
85 | 86 | ||
86 | find_start_end(flags, &begin, &end); | 87 | find_start_end(flags, &begin, &end); |
87 | 88 | ||
88 | if (len > end) | 89 | if (len > end) |
89 | return -ENOMEM; | 90 | return -ENOMEM; |
@@ -97,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
97 | } | 98 | } |
98 | if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) | 99 | if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) |
99 | && len <= mm->cached_hole_size) { | 100 | && len <= mm->cached_hole_size) { |
100 | mm->cached_hole_size = 0; | 101 | mm->cached_hole_size = 0; |
101 | mm->free_area_cache = begin; | 102 | mm->free_area_cache = begin; |
102 | } | 103 | } |
103 | addr = mm->free_area_cache; | 104 | addr = mm->free_area_cache; |
104 | if (addr < begin) | 105 | if (addr < begin) |
105 | addr = begin; | 106 | addr = begin; |
106 | start_addr = addr; | 107 | start_addr = addr; |
107 | 108 | ||
108 | full_search: | 109 | full_search: |
@@ -128,7 +129,7 @@ full_search: | |||
128 | return addr; | 129 | return addr; |
129 | } | 130 | } |
130 | if (addr + mm->cached_hole_size < vma->vm_start) | 131 | if (addr + mm->cached_hole_size < vma->vm_start) |
131 | mm->cached_hole_size = vma->vm_start - addr; | 132 | mm->cached_hole_size = vma->vm_start - addr; |
132 | 133 | ||
133 | addr = vma->vm_end; | 134 | addr = vma->vm_end; |
134 | } | 135 | } |
@@ -178,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
178 | vma = find_vma(mm, addr-len); | 179 | vma = find_vma(mm, addr-len); |
179 | if (!vma || addr <= vma->vm_start) | 180 | if (!vma || addr <= vma->vm_start) |
180 | /* remember the address as a hint for next time */ | 181 | /* remember the address as a hint for next time */ |
181 | return (mm->free_area_cache = addr-len); | 182 | return mm->free_area_cache = addr-len; |
182 | } | 183 | } |
183 | 184 | ||
184 | if (mm->mmap_base < len) | 185 | if (mm->mmap_base < len) |
@@ -195,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
195 | vma = find_vma(mm, addr); | 196 | vma = find_vma(mm, addr); |
196 | if (!vma || addr+len <= vma->vm_start) | 197 | if (!vma || addr+len <= vma->vm_start) |
197 | /* remember the address as a hint for next time */ | 198 | /* remember the address as a hint for next time */ |
198 | return (mm->free_area_cache = addr); | 199 | return mm->free_area_cache = addr; |
199 | 200 | ||
200 | /* remember the largest hole we saw so far */ | 201 | /* remember the largest hole we saw so far */ |
201 | if (addr + mm->cached_hole_size < vma->vm_start) | 202 | if (addr + mm->cached_hole_size < vma->vm_start) |
@@ -225,13 +226,13 @@ bottomup: | |||
225 | } | 226 | } |
226 | 227 | ||
227 | 228 | ||
228 | asmlinkage long sys_uname(struct new_utsname __user * name) | 229 | asmlinkage long sys_uname(struct new_utsname __user *name) |
229 | { | 230 | { |
230 | int err; | 231 | int err; |
231 | down_read(&uts_sem); | 232 | down_read(&uts_sem); |
232 | err = copy_to_user(name, utsname(), sizeof (*name)); | 233 | err = copy_to_user(name, utsname(), sizeof(*name)); |
233 | up_read(&uts_sem); | 234 | up_read(&uts_sem); |
234 | if (personality(current->personality) == PER_LINUX32) | 235 | if (personality(current->personality) == PER_LINUX32) |
235 | err |= copy_to_user(&name->machine, "i686", 5); | 236 | err |= copy_to_user(&name->machine, "i686", 5); |
236 | return err ? -EFAULT : 0; | 237 | return err ? -EFAULT : 0; |
237 | } | 238 | } |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d0fbb7712ab0..8b8c0d6640fa 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/genapic.h> | 17 | #include <asm/genapic.h> |
18 | #include <asm/idle.h> | 18 | #include <asm/idle.h> |
19 | #include <asm/tsc.h> | 19 | #include <asm/tsc.h> |
20 | #include <asm/irq_vectors.h> | ||
20 | 21 | ||
21 | #include <mach_apic.h> | 22 | #include <mach_apic.h> |
22 | 23 | ||
@@ -783,7 +784,7 @@ static int __init uv_bau_init(void) | |||
783 | uv_init_blade(blade, node, cur_cpu); | 784 | uv_init_blade(blade, node, cur_cpu); |
784 | cur_cpu += uv_blade_nr_possible_cpus(blade); | 785 | cur_cpu += uv_blade_nr_possible_cpus(blade); |
785 | } | 786 | } |
786 | set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); | 787 | alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); |
787 | uv_enable_timeouts(); | 788 | uv_enable_timeouts(); |
788 | 789 | ||
789 | return 0; | 790 | return 0; |
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 513caaca7115..7a31f104bef9 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <linux/bug.h> | 32 | #include <linux/bug.h> |
33 | #include <linux/nmi.h> | 33 | #include <linux/nmi.h> |
34 | #include <linux/mm.h> | 34 | #include <linux/mm.h> |
35 | #include <linux/smp.h> | ||
36 | #include <linux/io.h> | ||
35 | 37 | ||
36 | #if defined(CONFIG_EDAC) | 38 | #if defined(CONFIG_EDAC) |
37 | #include <linux/edac.h> | 39 | #include <linux/edac.h> |
@@ -45,9 +47,6 @@ | |||
45 | #include <asm/unwind.h> | 47 | #include <asm/unwind.h> |
46 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
47 | #include <asm/i387.h> | 49 | #include <asm/i387.h> |
48 | #include <asm/nmi.h> | ||
49 | #include <asm/smp.h> | ||
50 | #include <asm/io.h> | ||
51 | #include <asm/pgalloc.h> | 50 | #include <asm/pgalloc.h> |
52 | #include <asm/proto.h> | 51 | #include <asm/proto.h> |
53 | #include <asm/pda.h> | 52 | #include <asm/pda.h> |
@@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
85 | 84 | ||
86 | void printk_address(unsigned long address, int reliable) | 85 | void printk_address(unsigned long address, int reliable) |
87 | { | 86 | { |
88 | printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); | 87 | printk(" [<%016lx>] %s%pS\n", |
88 | address, reliable ? "" : "? ", (void *) address); | ||
89 | } | 89 | } |
90 | 90 | ||
91 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 91 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
@@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
98 | [STACKFAULT_STACK - 1] = "#SS", | 98 | [STACKFAULT_STACK - 1] = "#SS", |
99 | [MCE_STACK - 1] = "#MC", | 99 | [MCE_STACK - 1] = "#MC", |
100 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 100 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
101 | [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | 101 | [N_EXCEPTION_STACKS ... |
102 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | ||
102 | #endif | 103 | #endif |
103 | }; | 104 | }; |
104 | unsigned k; | 105 | unsigned k; |
@@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
163 | } | 164 | } |
164 | 165 | ||
165 | /* | 166 | /* |
166 | * x86-64 can have up to three kernel stacks: | 167 | * x86-64 can have up to three kernel stacks: |
167 | * process stack | 168 | * process stack |
168 | * interrupt stack | 169 | * interrupt stack |
169 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 170 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
@@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
219 | const struct stacktrace_ops *ops, void *data) | 220 | const struct stacktrace_ops *ops, void *data) |
220 | { | 221 | { |
221 | const unsigned cpu = get_cpu(); | 222 | const unsigned cpu = get_cpu(); |
222 | unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; | 223 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
223 | unsigned used = 0; | 224 | unsigned used = 0; |
224 | struct thread_info *tinfo; | 225 | struct thread_info *tinfo; |
225 | 226 | ||
@@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
237 | if (!bp) { | 238 | if (!bp) { |
238 | if (task == current) { | 239 | if (task == current) { |
239 | /* Grab bp right from our regs */ | 240 | /* Grab bp right from our regs */ |
240 | asm("movq %%rbp, %0" : "=r" (bp) :); | 241 | asm("movq %%rbp, %0" : "=r" (bp) : ); |
241 | } else { | 242 | } else { |
242 | /* bp is the last reg pushed by switch_to */ | 243 | /* bp is the last reg pushed by switch_to */ |
243 | bp = *(unsigned long *) task->thread.sp; | 244 | bp = *(unsigned long *) task->thread.sp; |
@@ -339,9 +340,8 @@ static void | |||
339 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 340 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
340 | unsigned long *stack, unsigned long bp, char *log_lvl) | 341 | unsigned long *stack, unsigned long bp, char *log_lvl) |
341 | { | 342 | { |
342 | printk("\nCall Trace:\n"); | 343 | printk("Call Trace:\n"); |
343 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 344 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
344 | printk("\n"); | ||
345 | } | 345 | } |
346 | 346 | ||
347 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 347 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
@@ -357,11 +357,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
357 | unsigned long *stack; | 357 | unsigned long *stack; |
358 | int i; | 358 | int i; |
359 | const int cpu = smp_processor_id(); | 359 | const int cpu = smp_processor_id(); |
360 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); | 360 | unsigned long *irqstack_end = |
361 | unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | 361 | (unsigned long *) (cpu_pda(cpu)->irqstackptr); |
362 | unsigned long *irqstack = | ||
363 | (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); | ||
362 | 364 | ||
363 | // debugging aid: "show_stack(NULL, NULL);" prints the | 365 | /* |
364 | // back trace for this cpu. | 366 | * debugging aid: "show_stack(NULL, NULL);" prints the |
367 | * back trace for this cpu. | ||
368 | */ | ||
365 | 369 | ||
366 | if (sp == NULL) { | 370 | if (sp == NULL) { |
367 | if (task) | 371 | if (task) |
@@ -386,6 +390,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
386 | printk(" %016lx", *stack++); | 390 | printk(" %016lx", *stack++); |
387 | touch_nmi_watchdog(); | 391 | touch_nmi_watchdog(); |
388 | } | 392 | } |
393 | printk("\n"); | ||
389 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 394 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
390 | } | 395 | } |
391 | 396 | ||
@@ -404,7 +409,7 @@ void dump_stack(void) | |||
404 | 409 | ||
405 | #ifdef CONFIG_FRAME_POINTER | 410 | #ifdef CONFIG_FRAME_POINTER |
406 | if (!bp) | 411 | if (!bp) |
407 | asm("movq %%rbp, %0" : "=r" (bp):); | 412 | asm("movq %%rbp, %0" : "=r" (bp) : ); |
408 | #endif | 413 | #endif |
409 | 414 | ||
410 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 415 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
@@ -414,7 +419,6 @@ void dump_stack(void) | |||
414 | init_utsname()->version); | 419 | init_utsname()->version); |
415 | show_trace(NULL, NULL, &stack, bp); | 420 | show_trace(NULL, NULL, &stack, bp); |
416 | } | 421 | } |
417 | |||
418 | EXPORT_SYMBOL(dump_stack); | 422 | EXPORT_SYMBOL(dump_stack); |
419 | 423 | ||
420 | void show_registers(struct pt_regs *regs) | 424 | void show_registers(struct pt_regs *regs) |
@@ -443,7 +447,6 @@ void show_registers(struct pt_regs *regs) | |||
443 | printk("Stack: "); | 447 | printk("Stack: "); |
444 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 448 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
445 | regs->bp, ""); | 449 | regs->bp, ""); |
446 | printk("\n"); | ||
447 | 450 | ||
448 | printk(KERN_EMERG "Code: "); | 451 | printk(KERN_EMERG "Code: "); |
449 | 452 | ||
@@ -493,7 +496,7 @@ unsigned __kprobes long oops_begin(void) | |||
493 | raw_local_irq_save(flags); | 496 | raw_local_irq_save(flags); |
494 | cpu = smp_processor_id(); | 497 | cpu = smp_processor_id(); |
495 | if (!__raw_spin_trylock(&die_lock)) { | 498 | if (!__raw_spin_trylock(&die_lock)) { |
496 | if (cpu == die_owner) | 499 | if (cpu == die_owner) |
497 | /* nested oops. should stop eventually */; | 500 | /* nested oops. should stop eventually */; |
498 | else | 501 | else |
499 | __raw_spin_lock(&die_lock); | 502 | __raw_spin_lock(&die_lock); |
@@ -638,7 +641,7 @@ kernel_trap: | |||
638 | } | 641 | } |
639 | 642 | ||
640 | #define DO_ERROR(trapnr, signr, str, name) \ | 643 | #define DO_ERROR(trapnr, signr, str, name) \ |
641 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | 644 | asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ |
642 | { \ | 645 | { \ |
643 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 646 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
644 | == NOTIFY_STOP) \ | 647 | == NOTIFY_STOP) \ |
@@ -648,7 +651,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |||
648 | } | 651 | } |
649 | 652 | ||
650 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 653 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
651 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | 654 | asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ |
652 | { \ | 655 | { \ |
653 | siginfo_t info; \ | 656 | siginfo_t info; \ |
654 | info.si_signo = signr; \ | 657 | info.si_signo = signr; \ |
@@ -683,7 +686,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) | |||
683 | preempt_conditional_cli(regs); | 686 | preempt_conditional_cli(regs); |
684 | } | 687 | } |
685 | 688 | ||
686 | asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) | 689 | asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) |
687 | { | 690 | { |
688 | static const char str[] = "double fault"; | 691 | static const char str[] = "double fault"; |
689 | struct task_struct *tsk = current; | 692 | struct task_struct *tsk = current; |
@@ -778,9 +781,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
778 | } | 781 | } |
779 | 782 | ||
780 | static notrace __kprobes void | 783 | static notrace __kprobes void |
781 | unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | 784 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) |
782 | { | 785 | { |
783 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 786 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == |
787 | NOTIFY_STOP) | ||
784 | return; | 788 | return; |
785 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | 789 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", |
786 | reason); | 790 | reason); |
@@ -882,7 +886,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
882 | else if (user_mode(eregs)) | 886 | else if (user_mode(eregs)) |
883 | regs = task_pt_regs(current); | 887 | regs = task_pt_regs(current); |
884 | /* Exception from kernel and interrupts are enabled. Move to | 888 | /* Exception from kernel and interrupts are enabled. Move to |
885 | kernel process stack. */ | 889 | kernel process stack. */ |
886 | else if (eregs->flags & X86_EFLAGS_IF) | 890 | else if (eregs->flags & X86_EFLAGS_IF) |
887 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | 891 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); |
888 | if (eregs != regs) | 892 | if (eregs != regs) |
@@ -891,7 +895,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
891 | } | 895 | } |
892 | 896 | ||
893 | /* runs on IST stack. */ | 897 | /* runs on IST stack. */ |
894 | asmlinkage void __kprobes do_debug(struct pt_regs * regs, | 898 | asmlinkage void __kprobes do_debug(struct pt_regs *regs, |
895 | unsigned long error_code) | 899 | unsigned long error_code) |
896 | { | 900 | { |
897 | struct task_struct *tsk = current; | 901 | struct task_struct *tsk = current; |
@@ -1035,7 +1039,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) | |||
1035 | 1039 | ||
1036 | asmlinkage void bad_intr(void) | 1040 | asmlinkage void bad_intr(void) |
1037 | { | 1041 | { |
1038 | printk("bad interrupt"); | 1042 | printk("bad interrupt"); |
1039 | } | 1043 | } |
1040 | 1044 | ||
1041 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | 1045 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) |
@@ -1047,7 +1051,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |||
1047 | 1051 | ||
1048 | conditional_sti(regs); | 1052 | conditional_sti(regs); |
1049 | if (!user_mode(regs) && | 1053 | if (!user_mode(regs) && |
1050 | kernel_math_error(regs, "kernel simd math error", 19)) | 1054 | kernel_math_error(regs, "kernel simd math error", 19)) |
1051 | return; | 1055 | return; |
1052 | 1056 | ||
1053 | /* | 1057 | /* |
@@ -1092,7 +1096,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |||
1092 | force_sig_info(SIGFPE, &info, task); | 1096 | force_sig_info(SIGFPE, &info, task); |
1093 | } | 1097 | } |
1094 | 1098 | ||
1095 | asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) | 1099 | asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs) |
1096 | { | 1100 | { |
1097 | } | 1101 | } |
1098 | 1102 | ||
@@ -1149,8 +1153,10 @@ void __init trap_init(void) | |||
1149 | set_intr_gate(0, ÷_error); | 1153 | set_intr_gate(0, ÷_error); |
1150 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | 1154 | set_intr_gate_ist(1, &debug, DEBUG_STACK); |
1151 | set_intr_gate_ist(2, &nmi, NMI_STACK); | 1155 | set_intr_gate_ist(2, &nmi, NMI_STACK); |
1152 | set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ | 1156 | /* int3 can be called from all */ |
1153 | set_system_gate(4, &overflow); /* int4 can be called from all */ | 1157 | set_system_gate_ist(3, &int3, DEBUG_STACK); |
1158 | /* int4 can be called from all */ | ||
1159 | set_system_gate(4, &overflow); | ||
1154 | set_intr_gate(5, &bounds); | 1160 | set_intr_gate(5, &bounds); |
1155 | set_intr_gate(6, &invalid_op); | 1161 | set_intr_gate(6, &invalid_op); |
1156 | set_intr_gate(7, &device_not_available); | 1162 | set_intr_gate(7, &device_not_available); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c0553909..161bb850fc47 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); | |||
104 | /* | 104 | /* |
105 | * Read TSC and the reference counters. Take care of SMI disturbance | 105 | * Read TSC and the reference counters. Take care of SMI disturbance |
106 | */ | 106 | */ |
107 | static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | 107 | static u64 tsc_read_refs(u64 *p, int hpet) |
108 | { | 108 | { |
109 | u64 t1, t2; | 109 | u64 t1, t2; |
110 | int i; | 110 | int i; |
@@ -112,9 +112,9 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | |||
112 | for (i = 0; i < MAX_RETRIES; i++) { | 112 | for (i = 0; i < MAX_RETRIES; i++) { |
113 | t1 = get_cycles(); | 113 | t1 = get_cycles(); |
114 | if (hpet) | 114 | if (hpet) |
115 | *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; | 115 | *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; |
116 | else | 116 | else |
117 | *pm = acpi_pm_read_early(); | 117 | *p = acpi_pm_read_early(); |
118 | t2 = get_cycles(); | 118 | t2 = get_cycles(); |
119 | if ((t2 - t1) < SMI_TRESHOLD) | 119 | if ((t2 - t1) < SMI_TRESHOLD) |
120 | return t2; | 120 | return t2; |
@@ -122,80 +122,390 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | |||
122 | return ULLONG_MAX; | 122 | return ULLONG_MAX; |
123 | } | 123 | } |
124 | 124 | ||
125 | /** | 125 | /* |
126 | * native_calibrate_tsc - calibrate the tsc on boot | 126 | * Calculate the TSC frequency from HPET reference |
127 | */ | 127 | */ |
128 | unsigned long native_calibrate_tsc(void) | 128 | static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) |
129 | { | 129 | { |
130 | unsigned long flags; | 130 | u64 tmp; |
131 | u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; | ||
132 | int hpet = is_hpet_enabled(); | ||
133 | unsigned int tsc_khz_val = 0; | ||
134 | 131 | ||
135 | local_irq_save(flags); | 132 | if (hpet2 < hpet1) |
133 | hpet2 += 0x100000000ULL; | ||
134 | hpet2 -= hpet1; | ||
135 | tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
136 | do_div(tmp, 1000000); | ||
137 | do_div(deltatsc, tmp); | ||
138 | |||
139 | return (unsigned long) deltatsc; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Calculate the TSC frequency from PMTimer reference | ||
144 | */ | ||
145 | static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) | ||
146 | { | ||
147 | u64 tmp; | ||
136 | 148 | ||
137 | tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); | 149 | if (!pm1 && !pm2) |
150 | return ULONG_MAX; | ||
151 | |||
152 | if (pm2 < pm1) | ||
153 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
154 | pm2 -= pm1; | ||
155 | tmp = pm2 * 1000000000LL; | ||
156 | do_div(tmp, PMTMR_TICKS_PER_SEC); | ||
157 | do_div(deltatsc, tmp); | ||
158 | |||
159 | return (unsigned long) deltatsc; | ||
160 | } | ||
161 | |||
162 | #define CAL_MS 10 | ||
163 | #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) | ||
164 | #define CAL_PIT_LOOPS 1000 | ||
165 | |||
166 | #define CAL2_MS 50 | ||
167 | #define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) | ||
168 | #define CAL2_PIT_LOOPS 5000 | ||
169 | |||
170 | |||
171 | /* | ||
172 | * Try to calibrate the TSC against the Programmable | ||
173 | * Interrupt Timer and return the frequency of the TSC | ||
174 | * in kHz. | ||
175 | * | ||
176 | * Return ULONG_MAX on failure to calibrate. | ||
177 | */ | ||
178 | static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) | ||
179 | { | ||
180 | u64 tsc, t1, t2, delta; | ||
181 | unsigned long tscmin, tscmax; | ||
182 | int pitcnt; | ||
138 | 183 | ||
184 | /* Set the Gate high, disable speaker */ | ||
139 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | 185 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); |
140 | 186 | ||
187 | /* | ||
188 | * Setup CTC channel 2* for mode 0, (interrupt on terminal | ||
189 | * count mode), binary count. Set the latch register to 50ms | ||
190 | * (LSB then MSB) to begin countdown. | ||
191 | */ | ||
141 | outb(0xb0, 0x43); | 192 | outb(0xb0, 0x43); |
142 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | 193 | outb(latch & 0xff, 0x42); |
143 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); | 194 | outb(latch >> 8, 0x42); |
144 | tr1 = get_cycles(); | 195 | |
145 | while ((inb(0x61) & 0x20) == 0); | 196 | tsc = t1 = t2 = get_cycles(); |
146 | tr2 = get_cycles(); | 197 | |
198 | pitcnt = 0; | ||
199 | tscmax = 0; | ||
200 | tscmin = ULONG_MAX; | ||
201 | while ((inb(0x61) & 0x20) == 0) { | ||
202 | t2 = get_cycles(); | ||
203 | delta = t2 - tsc; | ||
204 | tsc = t2; | ||
205 | if ((unsigned long) delta < tscmin) | ||
206 | tscmin = (unsigned int) delta; | ||
207 | if ((unsigned long) delta > tscmax) | ||
208 | tscmax = (unsigned int) delta; | ||
209 | pitcnt++; | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * Sanity checks: | ||
214 | * | ||
215 | * If we were not able to read the PIT more than loopmin | ||
216 | * times, then we have been hit by a massive SMI | ||
217 | * | ||
218 | * If the maximum is 10 times larger than the minimum, | ||
219 | * then we got hit by an SMI as well. | ||
220 | */ | ||
221 | if (pitcnt < loopmin || tscmax > 10 * tscmin) | ||
222 | return ULONG_MAX; | ||
223 | |||
224 | /* Calculate the PIT value */ | ||
225 | delta = t2 - t1; | ||
226 | do_div(delta, ms); | ||
227 | return delta; | ||
228 | } | ||
147 | 229 | ||
148 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | 230 | /* |
231 | * This reads the current MSB of the PIT counter, and | ||
232 | * checks if we are running on sufficiently fast and | ||
233 | * non-virtualized hardware. | ||
234 | * | ||
235 | * Our expectations are: | ||
236 | * | ||
237 | * - the PIT is running at roughly 1.19MHz | ||
238 | * | ||
239 | * - each IO is going to take about 1us on real hardware, | ||
240 | * but we allow it to be much faster (by a factor of 10) or | ||
241 | * _slightly_ slower (ie we allow up to a 2us read+counter | ||
242 | * update - anything else implies a unacceptably slow CPU | ||
243 | * or PIT for the fast calibration to work. | ||
244 | * | ||
245 | * - with 256 PIT ticks to read the value, we have 214us to | ||
246 | * see the same MSB (and overhead like doing a single TSC | ||
247 | * read per MSB value etc). | ||
248 | * | ||
249 | * - We're doing 2 reads per loop (LSB, MSB), and we expect | ||
250 | * them each to take about a microsecond on real hardware. | ||
251 | * So we expect a count value of around 100. But we'll be | ||
252 | * generous, and accept anything over 50. | ||
253 | * | ||
254 | * - if the PIT is stuck, and we see *many* more reads, we | ||
255 | * return early (and the next caller of pit_expect_msb() | ||
256 | * then consider it a failure when they don't see the | ||
257 | * next expected value). | ||
258 | * | ||
259 | * These expectations mean that we know that we have seen the | ||
260 | * transition from one expected value to another with a fairly | ||
261 | * high accuracy, and we didn't miss any events. We can thus | ||
262 | * use the TSC value at the transitions to calculate a pretty | ||
263 | * good value for the TSC frequencty. | ||
264 | */ | ||
265 | static inline int pit_expect_msb(unsigned char val) | ||
266 | { | ||
267 | int count = 0; | ||
149 | 268 | ||
269 | for (count = 0; count < 50000; count++) { | ||
270 | /* Ignore LSB */ | ||
271 | inb(0x42); | ||
272 | if (inb(0x42) != val) | ||
273 | break; | ||
274 | } | ||
275 | return count > 50; | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * How many MSB values do we want to see? We aim for a | ||
280 | * 15ms calibration, which assuming a 2us counter read | ||
281 | * error should give us roughly 150 ppm precision for | ||
282 | * the calibration. | ||
283 | */ | ||
284 | #define QUICK_PIT_MS 15 | ||
285 | #define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) | ||
286 | |||
287 | static unsigned long quick_pit_calibrate(void) | ||
288 | { | ||
289 | /* Set the Gate high, disable speaker */ | ||
290 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | ||
291 | |||
292 | /* | ||
293 | * Counter 2, mode 0 (one-shot), binary count | ||
294 | * | ||
295 | * NOTE! Mode 2 decrements by two (and then the | ||
296 | * output is flipped each time, giving the same | ||
297 | * final output frequency as a decrement-by-one), | ||
298 | * so mode 0 is much better when looking at the | ||
299 | * individual counts. | ||
300 | */ | ||
301 | outb(0xb0, 0x43); | ||
302 | |||
303 | /* Start at 0xffff */ | ||
304 | outb(0xff, 0x42); | ||
305 | outb(0xff, 0x42); | ||
306 | |||
307 | if (pit_expect_msb(0xff)) { | ||
308 | int i; | ||
309 | u64 t1, t2, delta; | ||
310 | unsigned char expect = 0xfe; | ||
311 | |||
312 | t1 = get_cycles(); | ||
313 | for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { | ||
314 | if (!pit_expect_msb(expect)) | ||
315 | goto failed; | ||
316 | } | ||
317 | t2 = get_cycles(); | ||
318 | |||
319 | /* | ||
320 | * Make sure we can rely on the second TSC timestamp: | ||
321 | */ | ||
322 | if (!pit_expect_msb(expect)) | ||
323 | goto failed; | ||
324 | |||
325 | /* | ||
326 | * Ok, if we get here, then we've seen the | ||
327 | * MSB of the PIT decrement QUICK_PIT_ITERATIONS | ||
328 | * times, and each MSB had many hits, so we never | ||
329 | * had any sudden jumps. | ||
330 | * | ||
331 | * As a result, we can depend on there not being | ||
332 | * any odd delays anywhere, and the TSC reads are | ||
333 | * reliable. | ||
334 | * | ||
335 | * kHz = ticks / time-in-seconds / 1000; | ||
336 | * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 | ||
337 | * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) | ||
338 | */ | ||
339 | delta = (t2 - t1)*PIT_TICK_RATE; | ||
340 | do_div(delta, QUICK_PIT_ITERATIONS*256*1000); | ||
341 | printk("Fast TSC calibration using PIT\n"); | ||
342 | return delta; | ||
343 | } | ||
344 | failed: | ||
345 | return 0; | ||
346 | } | ||
347 | |||
348 | /** | ||
349 | * native_calibrate_tsc - calibrate the tsc on boot | ||
350 | */ | ||
351 | unsigned long native_calibrate_tsc(void) | ||
352 | { | ||
353 | u64 tsc1, tsc2, delta, ref1, ref2; | ||
354 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | ||
355 | unsigned long flags, latch, ms, fast_calibrate; | ||
356 | int hpet = is_hpet_enabled(), i, loopmin; | ||
357 | |||
358 | local_irq_save(flags); | ||
359 | fast_calibrate = quick_pit_calibrate(); | ||
150 | local_irq_restore(flags); | 360 | local_irq_restore(flags); |
361 | if (fast_calibrate) | ||
362 | return fast_calibrate; | ||
151 | 363 | ||
152 | /* | 364 | /* |
153 | * Preset the result with the raw and inaccurate PIT | 365 | * Run 5 calibration loops to get the lowest frequency value |
154 | * calibration value | 366 | * (the best estimate). We use two different calibration modes |
367 | * here: | ||
368 | * | ||
369 | * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and | ||
370 | * load a timeout of 50ms. We read the time right after we | ||
371 | * started the timer and wait until the PIT count down reaches | ||
372 | * zero. In each wait loop iteration we read the TSC and check | ||
373 | * the delta to the previous read. We keep track of the min | ||
374 | * and max values of that delta. The delta is mostly defined | ||
375 | * by the IO time of the PIT access, so we can detect when a | ||
376 | * SMI/SMM disturbance happend between the two reads. If the | ||
377 | * maximum time is significantly larger than the minimum time, | ||
378 | * then we discard the result and have another try. | ||
379 | * | ||
380 | * 2) Reference counter. If available we use the HPET or the | ||
381 | * PMTIMER as a reference to check the sanity of that value. | ||
382 | * We use separate TSC readouts and check inside of the | ||
383 | * reference read for a SMI/SMM disturbance. We dicard | ||
384 | * disturbed values here as well. We do that around the PIT | ||
385 | * calibration delay loop as we have to wait for a certain | ||
386 | * amount of time anyway. | ||
155 | */ | 387 | */ |
156 | delta = (tr2 - tr1); | 388 | |
157 | do_div(delta, 50); | 389 | /* Preset PIT loop values */ |
158 | tsc_khz_val = delta; | 390 | latch = CAL_LATCH; |
159 | 391 | ms = CAL_MS; | |
160 | /* hpet or pmtimer available ? */ | 392 | loopmin = CAL_PIT_LOOPS; |
161 | if (!hpet && !pm1 && !pm2) { | 393 | |
162 | printk(KERN_INFO "TSC calibrated against PIT\n"); | 394 | for (i = 0; i < 3; i++) { |
163 | goto out; | 395 | unsigned long tsc_pit_khz; |
396 | |||
397 | /* | ||
398 | * Read the start value and the reference count of | ||
399 | * hpet/pmtimer when available. Then do the PIT | ||
400 | * calibration, which will take at least 50ms, and | ||
401 | * read the end value. | ||
402 | */ | ||
403 | local_irq_save(flags); | ||
404 | tsc1 = tsc_read_refs(&ref1, hpet); | ||
405 | tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); | ||
406 | tsc2 = tsc_read_refs(&ref2, hpet); | ||
407 | local_irq_restore(flags); | ||
408 | |||
409 | /* Pick the lowest PIT TSC calibration so far */ | ||
410 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); | ||
411 | |||
412 | /* hpet or pmtimer available ? */ | ||
413 | if (!hpet && !ref1 && !ref2) | ||
414 | continue; | ||
415 | |||
416 | /* Check, whether the sampling was disturbed by an SMI */ | ||
417 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) | ||
418 | continue; | ||
419 | |||
420 | tsc2 = (tsc2 - tsc1) * 1000000LL; | ||
421 | if (hpet) | ||
422 | tsc2 = calc_hpet_ref(tsc2, ref1, ref2); | ||
423 | else | ||
424 | tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); | ||
425 | |||
426 | tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); | ||
427 | |||
428 | /* Check the reference deviation */ | ||
429 | delta = ((u64) tsc_pit_min) * 100; | ||
430 | do_div(delta, tsc_ref_min); | ||
431 | |||
432 | /* | ||
433 | * If both calibration results are inside a 10% window | ||
434 | * then we can be sure, that the calibration | ||
435 | * succeeded. We break out of the loop right away. We | ||
436 | * use the reference value, as it is more precise. | ||
437 | */ | ||
438 | if (delta >= 90 && delta <= 110) { | ||
439 | printk(KERN_INFO | ||
440 | "TSC: PIT calibration matches %s. %d loops\n", | ||
441 | hpet ? "HPET" : "PMTIMER", i + 1); | ||
442 | return tsc_ref_min; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * Check whether PIT failed more than once. This | ||
447 | * happens in virtualized environments. We need to | ||
448 | * give the virtual PC a slightly longer timeframe for | ||
449 | * the HPET/PMTIMER to make the result precise. | ||
450 | */ | ||
451 | if (i == 1 && tsc_pit_min == ULONG_MAX) { | ||
452 | latch = CAL2_LATCH; | ||
453 | ms = CAL2_MS; | ||
454 | loopmin = CAL2_PIT_LOOPS; | ||
455 | } | ||
164 | } | 456 | } |
165 | 457 | ||
166 | /* Check, whether the sampling was disturbed by an SMI */ | 458 | /* |
167 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { | 459 | * Now check the results. |
168 | printk(KERN_WARNING "TSC calibration disturbed by SMI, " | 460 | */ |
169 | "using PIT calibration result\n"); | 461 | if (tsc_pit_min == ULONG_MAX) { |
170 | goto out; | 462 | /* PIT gave no useful value */ |
463 | printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); | ||
464 | |||
465 | /* We don't have an alternative source, disable TSC */ | ||
466 | if (!hpet && !ref1 && !ref2) { | ||
467 | printk("TSC: No reference (HPET/PMTIMER) available\n"); | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | /* The alternative source failed as well, disable TSC */ | ||
472 | if (tsc_ref_min == ULONG_MAX) { | ||
473 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " | ||
474 | "failed.\n"); | ||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | /* Use the alternative source */ | ||
479 | printk(KERN_INFO "TSC: using %s reference calibration\n", | ||
480 | hpet ? "HPET" : "PMTIMER"); | ||
481 | |||
482 | return tsc_ref_min; | ||
171 | } | 483 | } |
172 | 484 | ||
173 | tsc2 = (tsc2 - tsc1) * 1000000LL; | 485 | /* We don't have an alternative source, use the PIT calibration value */ |
174 | 486 | if (!hpet && !ref1 && !ref2) { | |
175 | if (hpet) { | 487 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); |
176 | printk(KERN_INFO "TSC calibrated against HPET\n"); | 488 | return tsc_pit_min; |
177 | if (hpet2 < hpet1) | ||
178 | hpet2 += 0x100000000ULL; | ||
179 | hpet2 -= hpet1; | ||
180 | tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
181 | do_div(tsc1, 1000000); | ||
182 | } else { | ||
183 | printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); | ||
184 | if (pm2 < pm1) | ||
185 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
186 | pm2 -= pm1; | ||
187 | tsc1 = pm2 * 1000000000LL; | ||
188 | do_div(tsc1, PMTMR_TICKS_PER_SEC); | ||
189 | } | 489 | } |
190 | 490 | ||
191 | do_div(tsc2, tsc1); | 491 | /* The alternative source failed, use the PIT calibration value */ |
192 | tsc_khz_val = tsc2; | 492 | if (tsc_ref_min == ULONG_MAX) { |
493 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " | ||
494 | "Using PIT calibration\n"); | ||
495 | return tsc_pit_min; | ||
496 | } | ||
193 | 497 | ||
194 | out: | 498 | /* |
195 | return tsc_khz_val; | 499 | * The calibration values differ too much. In doubt, we use |
500 | * the PIT value as we know that there are PMTIMERs around | ||
501 | * running at double speed. At least we let the user know: | ||
502 | */ | ||
503 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", | ||
504 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); | ||
505 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | ||
506 | return tsc_pit_min; | ||
196 | } | 507 | } |
197 | 508 | ||
198 | |||
199 | #ifdef CONFIG_X86_32 | 509 | #ifdef CONFIG_X86_32 |
200 | /* Only called from the Powernow K7 cpu freq driver */ | 510 | /* Only called from the Powernow K7 cpu freq driver */ |
201 | int recalibrate_cpu_khz(void) | 511 | int recalibrate_cpu_khz(void) |
@@ -314,7 +624,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
314 | mark_tsc_unstable("cpufreq changes"); | 624 | mark_tsc_unstable("cpufreq changes"); |
315 | } | 625 | } |
316 | 626 | ||
317 | set_cyc2ns_scale(tsc_khz_ref, freq->cpu); | 627 | set_cyc2ns_scale(tsc_khz, freq->cpu); |
318 | 628 | ||
319 | return 0; | 629 | return 0; |
320 | } | 630 | } |
@@ -325,6 +635,10 @@ static struct notifier_block time_cpufreq_notifier_block = { | |||
325 | 635 | ||
326 | static int __init cpufreq_tsc(void) | 636 | static int __init cpufreq_tsc(void) |
327 | { | 637 | { |
638 | if (!cpu_has_tsc) | ||
639 | return 0; | ||
640 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
641 | return 0; | ||
328 | cpufreq_register_notifier(&time_cpufreq_notifier_block, | 642 | cpufreq_register_notifier(&time_cpufreq_notifier_block, |
329 | CPUFREQ_TRANSITION_NOTIFIER); | 643 | CPUFREQ_TRANSITION_NOTIFIER); |
330 | return 0; | 644 | return 0; |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0577825cf89b..9ffb01c31c40 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void) | |||
88 | __raw_spin_unlock(&sync_lock); | 88 | __raw_spin_unlock(&sync_lock); |
89 | } | 89 | } |
90 | } | 90 | } |
91 | if (!(now-start)) { | 91 | WARN(!(now-start), |
92 | printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", | 92 | "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", |
93 | now-start, end-start); | 93 | now-start, end-start); |
94 | WARN_ON(1); | ||
95 | } | ||
96 | } | 94 | } |
97 | 95 | ||
98 | /* | 96 | /* |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 41e01b145c48..61a97e616f70 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -25,45 +25,31 @@ | |||
25 | #include <asm/visws/cobalt.h> | 25 | #include <asm/visws/cobalt.h> |
26 | #include <asm/visws/piix4.h> | 26 | #include <asm/visws/piix4.h> |
27 | #include <asm/arch_hooks.h> | 27 | #include <asm/arch_hooks.h> |
28 | #include <asm/io_apic.h> | ||
28 | #include <asm/fixmap.h> | 29 | #include <asm/fixmap.h> |
29 | #include <asm/reboot.h> | 30 | #include <asm/reboot.h> |
30 | #include <asm/setup.h> | 31 | #include <asm/setup.h> |
31 | #include <asm/e820.h> | 32 | #include <asm/e820.h> |
32 | #include <asm/smp.h> | ||
33 | #include <asm/io.h> | 33 | #include <asm/io.h> |
34 | 34 | ||
35 | #include <mach_ipi.h> | 35 | #include <mach_ipi.h> |
36 | 36 | ||
37 | #include "mach_apic.h" | 37 | #include "mach_apic.h" |
38 | 38 | ||
39 | #include <linux/init.h> | ||
40 | #include <linux/smp.h> | ||
41 | |||
42 | #include <linux/kernel_stat.h> | 39 | #include <linux/kernel_stat.h> |
43 | #include <linux/interrupt.h> | ||
44 | #include <linux/init.h> | ||
45 | 40 | ||
46 | #include <asm/io.h> | ||
47 | #include <asm/apic.h> | ||
48 | #include <asm/i8259.h> | 41 | #include <asm/i8259.h> |
49 | #include <asm/irq_vectors.h> | 42 | #include <asm/irq_vectors.h> |
50 | #include <asm/visws/cobalt.h> | ||
51 | #include <asm/visws/lithium.h> | 43 | #include <asm/visws/lithium.h> |
52 | #include <asm/visws/piix4.h> | ||
53 | 44 | ||
54 | #include <linux/sched.h> | 45 | #include <linux/sched.h> |
55 | #include <linux/kernel.h> | 46 | #include <linux/kernel.h> |
56 | #include <linux/init.h> | ||
57 | #include <linux/pci.h> | 47 | #include <linux/pci.h> |
58 | #include <linux/pci_ids.h> | 48 | #include <linux/pci_ids.h> |
59 | 49 | ||
60 | extern int no_broadcast; | 50 | extern int no_broadcast; |
61 | 51 | ||
62 | #include <asm/io.h> | ||
63 | #include <asm/apic.h> | 52 | #include <asm/apic.h> |
64 | #include <asm/arch_hooks.h> | ||
65 | #include <asm/visws/cobalt.h> | ||
66 | #include <asm/visws/lithium.h> | ||
67 | 53 | ||
68 | char visws_board_type = -1; | 54 | char visws_board_type = -1; |
69 | char visws_board_rev = -1; | 55 | char visws_board_rev = -1; |
@@ -184,8 +170,6 @@ static int __init visws_get_smp_config(unsigned int early) | |||
184 | return 1; | 170 | return 1; |
185 | } | 171 | } |
186 | 172 | ||
187 | extern unsigned int __cpuinitdata maxcpus; | ||
188 | |||
189 | /* | 173 | /* |
190 | * The Visual Workstation is Intel MP compliant in the hardware | 174 | * The Visual Workstation is Intel MP compliant in the hardware |
191 | * sense, but it doesn't have a BIOS(-configuration table). | 175 | * sense, but it doesn't have a BIOS(-configuration table). |
@@ -244,8 +228,8 @@ static int __init visws_find_smp_config(unsigned int reserve) | |||
244 | ncpus = CO_CPU_MAX; | 228 | ncpus = CO_CPU_MAX; |
245 | } | 229 | } |
246 | 230 | ||
247 | if (ncpus > maxcpus) | 231 | if (ncpus > setup_max_cpus) |
248 | ncpus = maxcpus; | 232 | ncpus = setup_max_cpus; |
249 | 233 | ||
250 | #ifdef CONFIG_X86_LOCAL_APIC | 234 | #ifdef CONFIG_X86_LOCAL_APIC |
251 | smp_found_config = 1; | 235 | smp_found_config = 1; |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 61531d5c9507..8b6c393ab9fd 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, | |||
235 | const void *desc) | 235 | const void *desc) |
236 | { | 236 | { |
237 | u32 *ldt_entry = (u32 *)desc; | 237 | u32 *ldt_entry = (u32 *)desc; |
238 | vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); | 238 | vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); |
239 | } | 239 | } |
240 | 240 | ||
241 | static void vmi_load_sp0(struct tss_struct *tss, | 241 | static void vmi_load_sp0(struct tss_struct *tss, |
@@ -393,13 +393,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | |||
393 | } | 393 | } |
394 | #endif | 394 | #endif |
395 | 395 | ||
396 | static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) | 396 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
397 | { | 397 | { |
398 | vmi_set_page_type(pfn, VMI_PAGE_L1); | 398 | vmi_set_page_type(pfn, VMI_PAGE_L1); |
399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
400 | } | 400 | } |
401 | 401 | ||
402 | static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) | 402 | static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) |
403 | { | 403 | { |
404 | /* | 404 | /* |
405 | * This call comes in very early, before mem_map is setup. | 405 | * This call comes in very early, before mem_map is setup. |
@@ -410,20 +410,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) | |||
410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | 410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); |
411 | } | 411 | } |
412 | 412 | ||
413 | static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) | 413 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) |
414 | { | 414 | { |
415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | 415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); |
416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | 416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); |
417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | 417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); |
418 | } | 418 | } |
419 | 419 | ||
420 | static void vmi_release_pte(u32 pfn) | 420 | static void vmi_release_pte(unsigned long pfn) |
421 | { | 421 | { |
422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | 422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); |
423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); |
424 | } | 424 | } |
425 | 425 | ||
426 | static void vmi_release_pmd(u32 pfn) | 426 | static void vmi_release_pmd(unsigned long pfn) |
427 | { | 427 | { |
428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | 428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); |
429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | 429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index cdb2363697d2..af5bdad84604 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
@@ -209,3 +209,11 @@ SECTIONS | |||
209 | 209 | ||
210 | DWARF_DEBUG | 210 | DWARF_DEBUG |
211 | } | 211 | } |
212 | |||
213 | #ifdef CONFIG_KEXEC | ||
214 | /* Link time checks */ | ||
215 | #include <asm/kexec.h> | ||
216 | |||
217 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | ||
218 | "kexec control code size is too big") | ||
219 | #endif | ||
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 0c029e8959c7..7766d36983fc 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -61,7 +61,7 @@ static void vsmp_irq_enable(void) | |||
61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); | 61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); |
62 | } | 62 | } |
63 | 63 | ||
64 | static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, | 64 | static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, |
65 | unsigned long addr, unsigned len) | 65 | unsigned long addr, unsigned len) |
66 | { | 66 | { |
67 | switch (type) { | 67 | switch (type) { |