diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 31 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 42 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/amd.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 138 | ||||
-rw-r--r-- | arch/x86/kernel/head32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 22 |
13 files changed, 133 insertions, 186 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 20abd912f0e4..50c95af0f017 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -313,7 +313,7 @@ int lapic_get_maxlvt(void) | |||
313 | 313 | ||
314 | /* Clock divisor */ | 314 | /* Clock divisor */ |
315 | #define APIC_DIVISOR 16 | 315 | #define APIC_DIVISOR 16 |
316 | #define TSC_DIVISOR 32 | 316 | #define TSC_DIVISOR 8 |
317 | 317 | ||
318 | /* | 318 | /* |
319 | * This function sets up the local APIC timer, with a timeout of | 319 | * This function sets up the local APIC timer, with a timeout of |
@@ -565,13 +565,37 @@ static void setup_APIC_timer(void) | |||
565 | CLOCK_EVT_FEAT_DUMMY); | 565 | CLOCK_EVT_FEAT_DUMMY); |
566 | levt->set_next_event = lapic_next_deadline; | 566 | levt->set_next_event = lapic_next_deadline; |
567 | clockevents_config_and_register(levt, | 567 | clockevents_config_and_register(levt, |
568 | (tsc_khz / TSC_DIVISOR) * 1000, | 568 | tsc_khz * (1000 / TSC_DIVISOR), |
569 | 0xF, ~0UL); | 569 | 0xF, ~0UL); |
570 | } else | 570 | } else |
571 | clockevents_register_device(levt); | 571 | clockevents_register_device(levt); |
572 | } | 572 | } |
573 | 573 | ||
574 | /* | 574 | /* |
575 | * Install the updated TSC frequency from recalibration at the TSC | ||
576 | * deadline clockevent devices. | ||
577 | */ | ||
578 | static void __lapic_update_tsc_freq(void *info) | ||
579 | { | ||
580 | struct clock_event_device *levt = this_cpu_ptr(&lapic_events); | ||
581 | |||
582 | if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) | ||
583 | return; | ||
584 | |||
585 | clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR)); | ||
586 | } | ||
587 | |||
588 | void lapic_update_tsc_freq(void) | ||
589 | { | ||
590 | /* | ||
591 | * The clockevent device's ->mult and ->shift can both be | ||
592 | * changed. In order to avoid races, schedule the frequency | ||
593 | * update code on each CPU. | ||
594 | */ | ||
595 | on_each_cpu(__lapic_update_tsc_freq, NULL, 0); | ||
596 | } | ||
597 | |||
598 | /* | ||
575 | * In this functions we calibrate APIC bus clocks to the external timer. | 599 | * In this functions we calibrate APIC bus clocks to the external timer. |
576 | * | 600 | * |
577 | * We want to do the calibration only once since we want to have local timer | 601 | * We want to do the calibration only once since we want to have local timer |
@@ -1599,6 +1623,9 @@ void __init enable_IR_x2apic(void) | |||
1599 | unsigned long flags; | 1623 | unsigned long flags; |
1600 | int ret, ir_stat; | 1624 | int ret, ir_stat; |
1601 | 1625 | ||
1626 | if (skip_ioapic_setup) | ||
1627 | return; | ||
1628 | |||
1602 | ir_stat = irq_remapping_prepare(); | 1629 | ir_stat = irq_remapping_prepare(); |
1603 | if (ir_stat < 0 && !x2apic_supported()) | 1630 | if (ir_stat < 0 && !x2apic_supported()) |
1604 | return; | 1631 | return; |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6368fa69d2af..54f35d988025 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void) | |||
155 | /* | 155 | /* |
156 | * At CPU state changes, update the x2apic cluster sibling info. | 156 | * At CPU state changes, update the x2apic cluster sibling info. |
157 | */ | 157 | */ |
158 | int x2apic_prepare_cpu(unsigned int cpu) | 158 | static int x2apic_prepare_cpu(unsigned int cpu) |
159 | { | 159 | { |
160 | if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) | 160 | if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL)) |
161 | return -ENOMEM; | 161 | return -ENOMEM; |
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu) | |||
168 | return 0; | 168 | return 0; |
169 | } | 169 | } |
170 | 170 | ||
171 | int x2apic_dead_cpu(unsigned int this_cpu) | 171 | static int x2apic_dead_cpu(unsigned int this_cpu) |
172 | { | 172 | { |
173 | int cpu; | 173 | int cpu; |
174 | 174 | ||
@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu) | |||
186 | static int x2apic_cluster_probe(void) | 186 | static int x2apic_cluster_probe(void) |
187 | { | 187 | { |
188 | int cpu = smp_processor_id(); | 188 | int cpu = smp_processor_id(); |
189 | int ret; | ||
189 | 190 | ||
190 | if (!x2apic_mode) | 191 | if (!x2apic_mode) |
191 | return 0; | 192 | return 0; |
192 | 193 | ||
194 | ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", | ||
195 | x2apic_prepare_cpu, x2apic_dead_cpu); | ||
196 | if (ret < 0) { | ||
197 | pr_err("Failed to register X2APIC_PREPARE\n"); | ||
198 | return 0; | ||
199 | } | ||
193 | cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); | 200 | cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); |
194 | cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE", | ||
195 | x2apic_prepare_cpu, x2apic_dead_cpu); | ||
196 | return 1; | 201 | return 1; |
197 | } | 202 | } |
198 | 203 | ||
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 09b59adaea3f..cb0673c1e940 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
223 | if (strncmp(oem_id, "SGI", 3) != 0) | 223 | if (strncmp(oem_id, "SGI", 3) != 0) |
224 | return 0; | 224 | return 0; |
225 | 225 | ||
226 | if (numa_off) { | ||
227 | pr_err("UV: NUMA is off, disabling UV support\n"); | ||
228 | return 0; | ||
229 | } | ||
230 | |||
226 | /* Setup early hub type field in uv_hub_info for Node 0 */ | 231 | /* Setup early hub type field in uv_hub_info for Node 0 */ |
227 | uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; | 232 | uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; |
228 | 233 | ||
@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void) | |||
325 | struct uv_gam_range_entry *gre = uv_gre_table; | 330 | struct uv_gam_range_entry *gre = uv_gre_table; |
326 | struct uv_gam_range_s *grt; | 331 | struct uv_gam_range_s *grt; |
327 | unsigned long last_limit = 0, ram_limit = 0; | 332 | unsigned long last_limit = 0, ram_limit = 0; |
328 | int bytes, i, sid, lsid = -1; | 333 | int bytes, i, sid, lsid = -1, indx = 0, lindx = -1; |
329 | 334 | ||
330 | if (!gre) | 335 | if (!gre) |
331 | return; | 336 | return; |
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void) | |||
356 | } | 361 | } |
357 | sid = gre->sockid - _min_socket; | 362 | sid = gre->sockid - _min_socket; |
358 | if (lsid < sid) { /* new range */ | 363 | if (lsid < sid) { /* new range */ |
359 | grt = &_gr_table[sid]; | 364 | grt = &_gr_table[indx]; |
360 | grt->base = lsid; | 365 | grt->base = lindx; |
361 | grt->nasid = gre->nasid; | 366 | grt->nasid = gre->nasid; |
362 | grt->limit = last_limit = gre->limit; | 367 | grt->limit = last_limit = gre->limit; |
363 | lsid = sid; | 368 | lsid = sid; |
369 | lindx = indx++; | ||
364 | continue; | 370 | continue; |
365 | } | 371 | } |
366 | if (lsid == sid && !ram_limit) { /* update range */ | 372 | if (lsid == sid && !ram_limit) { /* update range */ |
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void) | |||
371 | } | 377 | } |
372 | if (!ram_limit) { /* non-contiguous ram range */ | 378 | if (!ram_limit) { /* non-contiguous ram range */ |
373 | grt++; | 379 | grt++; |
374 | grt->base = sid - 1; | 380 | grt->base = lindx; |
375 | grt->nasid = gre->nasid; | 381 | grt->nasid = gre->nasid; |
376 | grt->limit = last_limit = gre->limit; | 382 | grt->limit = last_limit = gre->limit; |
377 | continue; | 383 | continue; |
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) | |||
1155 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { | 1161 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { |
1156 | if (!index) { | 1162 | if (!index) { |
1157 | pr_info("UV: GAM Range Table...\n"); | 1163 | pr_info("UV: GAM Range Table...\n"); |
1158 | pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n", | 1164 | pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", |
1159 | "Range", "", "Size", "Type", "NASID", | 1165 | "Range", "", "Size", "Type", "NASID", |
1160 | "SID", "PN", "PXM"); | 1166 | "SID", "PN"); |
1161 | } | 1167 | } |
1162 | pr_info( | 1168 | pr_info( |
1163 | "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n", | 1169 | "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n", |
1164 | index++, | 1170 | index++, |
1165 | (unsigned long)lgre << UV_GAM_RANGE_SHFT, | 1171 | (unsigned long)lgre << UV_GAM_RANGE_SHFT, |
1166 | (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, | 1172 | (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, |
1167 | ((unsigned long)(gre->limit - lgre)) >> | 1173 | ((unsigned long)(gre->limit - lgre)) >> |
1168 | (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ | 1174 | (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */ |
1169 | gre->type, gre->nasid, gre->sockid, | 1175 | gre->type, gre->nasid, gre->sockid, gre->pnode); |
1170 | gre->pnode, gre->pxm); | ||
1171 | 1176 | ||
1172 | lgre = gre->limit; | 1177 | lgre = gre->limit; |
1173 | if (sock_min > gre->sockid) | 1178 | if (sock_min > gre->sockid) |
@@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void) | |||
1286 | _pnode_to_socket[i] = SOCK_EMPTY; | 1291 | _pnode_to_socket[i] = SOCK_EMPTY; |
1287 | 1292 | ||
1288 | /* fill in pnode/node/addr conversion list values */ | 1293 | /* fill in pnode/node/addr conversion list values */ |
1289 | pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n"); | 1294 | pr_info("UV: GAM Building socket/pnode conversion tables\n"); |
1290 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { | 1295 | for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { |
1291 | if (gre->type == UV_GAM_RANGE_TYPE_HOLE) | 1296 | if (gre->type == UV_GAM_RANGE_TYPE_HOLE) |
1292 | continue; | 1297 | continue; |
@@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void) | |||
1294 | if (_socket_to_pnode[i] != SOCK_EMPTY) | 1299 | if (_socket_to_pnode[i] != SOCK_EMPTY) |
1295 | continue; /* duplicate */ | 1300 | continue; /* duplicate */ |
1296 | _socket_to_pnode[i] = gre->pnode; | 1301 | _socket_to_pnode[i] = gre->pnode; |
1297 | _socket_to_node[i] = gre->pxm; | ||
1298 | 1302 | ||
1299 | i = gre->pnode - minpnode; | 1303 | i = gre->pnode - minpnode; |
1300 | _pnode_to_socket[i] = gre->sockid; | 1304 | _pnode_to_socket[i] = gre->sockid; |
1301 | 1305 | ||
1302 | pr_info( | 1306 | pr_info( |
1303 | "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n", | 1307 | "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", |
1304 | gre->sockid, gre->type, gre->nasid, | 1308 | gre->sockid, gre->type, gre->nasid, |
1305 | _socket_to_pnode[gre->sockid - minsock], | 1309 | _socket_to_pnode[gre->sockid - minsock], |
1306 | _socket_to_node[gre->sockid - minsock], | ||
1307 | _pnode_to_socket[gre->pnode - minpnode]); | 1310 | _pnode_to_socket[gre->pnode - minpnode]); |
1308 | } | 1311 | } |
1309 | 1312 | ||
1310 | /* check socket -> node values */ | 1313 | /* Set socket -> node values */ |
1311 | lnid = -1; | 1314 | lnid = -1; |
1312 | for_each_present_cpu(cpu) { | 1315 | for_each_present_cpu(cpu) { |
1313 | int nid = cpu_to_node(cpu); | 1316 | int nid = cpu_to_node(cpu); |
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void) | |||
1318 | lnid = nid; | 1321 | lnid = nid; |
1319 | apicid = per_cpu(x86_cpu_to_apicid, cpu); | 1322 | apicid = per_cpu(x86_cpu_to_apicid, cpu); |
1320 | sockid = apicid >> uv_cpuid.socketid_shift; | 1323 | sockid = apicid >> uv_cpuid.socketid_shift; |
1321 | i = sockid - minsock; | 1324 | _socket_to_node[sockid - minsock] = nid; |
1322 | 1325 | pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", | |
1323 | if (nid != _socket_to_node[i]) { | 1326 | sockid, apicid, nid); |
1324 | pr_warn( | ||
1325 | "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n", | ||
1326 | i, sockid, gre->type, _socket_to_node[i], nid); | ||
1327 | _socket_to_node[i] = nid; | ||
1328 | } | ||
1329 | } | 1327 | } |
1330 | 1328 | ||
1331 | /* Setup physical blade to pnode translation from GAM Range Table */ | 1329 | /* Setup physical blade to pnode translation from GAM Range Table */ |
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 27a0228c9cae..b816971f5da4 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c | |||
@@ -355,6 +355,7 @@ void load_ucode_amd_ap(void) | |||
355 | unsigned int cpu = smp_processor_id(); | 355 | unsigned int cpu = smp_processor_id(); |
356 | struct equiv_cpu_entry *eq; | 356 | struct equiv_cpu_entry *eq; |
357 | struct microcode_amd *mc; | 357 | struct microcode_amd *mc; |
358 | u8 *cont = container; | ||
358 | u32 rev, eax; | 359 | u32 rev, eax; |
359 | u16 eq_id; | 360 | u16 eq_id; |
360 | 361 | ||
@@ -371,8 +372,11 @@ void load_ucode_amd_ap(void) | |||
371 | if (check_current_patch_level(&rev, false)) | 372 | if (check_current_patch_level(&rev, false)) |
372 | return; | 373 | return; |
373 | 374 | ||
375 | /* Add CONFIG_RANDOMIZE_MEMORY offset. */ | ||
376 | cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; | ||
377 | |||
374 | eax = cpuid_eax(0x00000001); | 378 | eax = cpuid_eax(0x00000001); |
375 | eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); | 379 | eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); |
376 | 380 | ||
377 | eq_id = find_equiv_id(eq, eax); | 381 | eq_id = find_equiv_id(eq, eax); |
378 | if (!eq_id) | 382 | if (!eq_id) |
@@ -434,6 +438,9 @@ int __init save_microcode_in_initrd_amd(void) | |||
434 | else | 438 | else |
435 | container = cont_va; | 439 | container = cont_va; |
436 | 440 | ||
441 | /* Add CONFIG_RANDOMIZE_MEMORY offset. */ | ||
442 | container += PAGE_OFFSET - __PAGE_OFFSET_BASE; | ||
443 | |||
437 | eax = cpuid_eax(0x00000001); | 444 | eax = cpuid_eax(0x00000001); |
438 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); | 445 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); |
439 | 446 | ||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 680049aa4593..01567aa87503 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state) | |||
866 | return get_xsave_addr(&fpu->state.xsave, xsave_state); | 866 | return get_xsave_addr(&fpu->state.xsave, xsave_state); |
867 | } | 867 | } |
868 | 868 | ||
869 | |||
870 | /* | ||
871 | * Set xfeatures (aka XSTATE_BV) bit for a feature that we want | ||
872 | * to take out of its "init state". This will ensure that an | ||
873 | * XRSTOR actually restores the state. | ||
874 | */ | ||
875 | static void fpu__xfeature_set_non_init(struct xregs_state *xsave, | ||
876 | int xstate_feature_mask) | ||
877 | { | ||
878 | xsave->header.xfeatures |= xstate_feature_mask; | ||
879 | } | ||
880 | |||
881 | /* | ||
882 | * This function is safe to call whether the FPU is in use or not. | ||
883 | * | ||
884 | * Note that this only works on the current task. | ||
885 | * | ||
886 | * Inputs: | ||
887 | * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, | ||
888 | * XFEATURE_MASK_SSE, etc...) | ||
889 | * @xsave_state_ptr: a pointer to a copy of the state that you would | ||
890 | * like written in to the current task's FPU xsave state. This pointer | ||
891 | * must not be located in the current tasks's xsave area. | ||
892 | * Output: | ||
893 | * address of the state in the xsave area or NULL if the state | ||
894 | * is not present or is in its 'init state'. | ||
895 | */ | ||
896 | static void fpu__xfeature_set_state(int xstate_feature_mask, | ||
897 | void *xstate_feature_src, size_t len) | ||
898 | { | ||
899 | struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; | ||
900 | struct fpu *fpu = ¤t->thread.fpu; | ||
901 | void *dst; | ||
902 | |||
903 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) { | ||
904 | WARN_ONCE(1, "%s() attempted with no xsave support", __func__); | ||
905 | return; | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Tell the FPU code that we need the FPU state to be in | ||
910 | * 'fpu' (not in the registers), and that we need it to | ||
911 | * be stable while we write to it. | ||
912 | */ | ||
913 | fpu__current_fpstate_write_begin(); | ||
914 | |||
915 | /* | ||
916 | * This method *WILL* *NOT* work for compact-format | ||
917 | * buffers. If the 'xstate_feature_mask' is unset in | ||
918 | * xcomp_bv then we may need to move other feature state | ||
919 | * "up" in the buffer. | ||
920 | */ | ||
921 | if (xsave->header.xcomp_bv & xstate_feature_mask) { | ||
922 | WARN_ON_ONCE(1); | ||
923 | goto out; | ||
924 | } | ||
925 | |||
926 | /* find the location in the xsave buffer of the desired state */ | ||
927 | dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask); | ||
928 | |||
929 | /* | ||
930 | * Make sure that the pointer being passed in did not | ||
931 | * come from the xsave buffer itself. | ||
932 | */ | ||
933 | WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself"); | ||
934 | |||
935 | /* put the caller-provided data in the location */ | ||
936 | memcpy(dst, xstate_feature_src, len); | ||
937 | |||
938 | /* | ||
939 | * Mark the xfeature so that the CPU knows there is state | ||
940 | * in the buffer now. | ||
941 | */ | ||
942 | fpu__xfeature_set_non_init(xsave, xstate_feature_mask); | ||
943 | out: | ||
944 | /* | ||
945 | * We are done writing to the 'fpu'. Reenable preeption | ||
946 | * and (possibly) move the fpstate back in to the fpregs. | ||
947 | */ | ||
948 | fpu__current_fpstate_write_end(); | ||
949 | } | ||
950 | |||
951 | #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) | 869 | #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) |
952 | #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) | 870 | #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) |
953 | 871 | ||
954 | /* | 872 | /* |
955 | * This will go out and modify the XSAVE buffer so that PKRU is | 873 | * This will go out and modify PKRU register to set the access |
956 | * set to a particular state for access to 'pkey'. | 874 | * rights for @pkey to @init_val. |
957 | * | ||
958 | * PKRU state does affect kernel access to user memory. We do | ||
959 | * not modfiy PKRU *itself* here, only the XSAVE state that will | ||
960 | * be restored in to PKRU when we return back to userspace. | ||
961 | */ | 875 | */ |
962 | int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | 876 | int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, |
963 | unsigned long init_val) | 877 | unsigned long init_val) |
964 | { | 878 | { |
965 | struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; | 879 | u32 old_pkru; |
966 | struct pkru_state *old_pkru_state; | ||
967 | struct pkru_state new_pkru_state; | ||
968 | int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); | 880 | int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); |
969 | u32 new_pkru_bits = 0; | 881 | u32 new_pkru_bits = 0; |
970 | 882 | ||
@@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | |||
974 | */ | 886 | */ |
975 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | 887 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
976 | return -EINVAL; | 888 | return -EINVAL; |
889 | /* | ||
890 | * For most XSAVE components, this would be an arduous task: | ||
891 | * brining fpstate up to date with fpregs, updating fpstate, | ||
892 | * then re-populating fpregs. But, for components that are | ||
893 | * never lazily managed, we can just access the fpregs | ||
894 | * directly. PKRU is never managed lazily, so we can just | ||
895 | * manipulate it directly. Make sure it stays that way. | ||
896 | */ | ||
897 | WARN_ON_ONCE(!use_eager_fpu()); | ||
977 | 898 | ||
978 | /* Set the bits we need in PKRU: */ | 899 | /* Set the bits we need in PKRU: */ |
979 | if (init_val & PKEY_DISABLE_ACCESS) | 900 | if (init_val & PKEY_DISABLE_ACCESS) |
@@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | |||
984 | /* Shift the bits in to the correct place in PKRU for pkey: */ | 905 | /* Shift the bits in to the correct place in PKRU for pkey: */ |
985 | new_pkru_bits <<= pkey_shift; | 906 | new_pkru_bits <<= pkey_shift; |
986 | 907 | ||
987 | /* Locate old copy of the state in the xsave buffer: */ | 908 | /* Get old PKRU and mask off any old bits in place: */ |
988 | old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); | 909 | old_pkru = read_pkru(); |
989 | 910 | old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); | |
990 | /* | ||
991 | * When state is not in the buffer, it is in the init | ||
992 | * state, set it manually. Otherwise, copy out the old | ||
993 | * state. | ||
994 | */ | ||
995 | if (!old_pkru_state) | ||
996 | new_pkru_state.pkru = 0; | ||
997 | else | ||
998 | new_pkru_state.pkru = old_pkru_state->pkru; | ||
999 | |||
1000 | /* Mask off any old bits in place: */ | ||
1001 | new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); | ||
1002 | |||
1003 | /* Set the newly-requested bits: */ | ||
1004 | new_pkru_state.pkru |= new_pkru_bits; | ||
1005 | |||
1006 | /* | ||
1007 | * We could theoretically live without zeroing pkru.pad. | ||
1008 | * The current XSAVE feature state definition says that | ||
1009 | * only bytes 0->3 are used. But we do not want to | ||
1010 | * chance leaking kernel stack out to userspace in case a | ||
1011 | * memcpy() of the whole xsave buffer was done. | ||
1012 | * | ||
1013 | * They're in the same cacheline anyway. | ||
1014 | */ | ||
1015 | new_pkru_state.pad = 0; | ||
1016 | 911 | ||
1017 | fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state)); | 912 | /* Write old part along with new part: */ |
913 | write_pkru(old_pkru | new_pkru_bits); | ||
1018 | 914 | ||
1019 | return 0; | 915 | return 0; |
1020 | } | 916 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 2dda0bc4576e..f16c55bfc090 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void) | |||
25 | /* Initialize 32bit specific setup functions */ | 25 | /* Initialize 32bit specific setup functions */ |
26 | x86_init.resources.reserve_resources = i386_reserve_resources; | 26 | x86_init.resources.reserve_resources = i386_reserve_resources; |
27 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; | 27 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; |
28 | |||
29 | reserve_bios_regions(); | ||
30 | } | 28 | } |
31 | 29 | ||
32 | asmlinkage __visible void __init i386_start_kernel(void) | 30 | asmlinkage __visible void __init i386_start_kernel(void) |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 99d48e7d2974..54a2372f5dbb 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
183 | copy_bootdata(__va(real_mode_data)); | 183 | copy_bootdata(__va(real_mode_data)); |
184 | 184 | ||
185 | x86_early_init_platform_quirks(); | 185 | x86_early_init_platform_quirks(); |
186 | reserve_bios_regions(); | ||
187 | 186 | ||
188 | switch (boot_params.hdr.hardware_subarch) { | 187 | switch (boot_params.hdr.hardware_subarch) { |
189 | case X86_SUBARCH_INTEL_MID: | 188 | case X86_SUBARCH_INTEL_MID: |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ed16e58658a4..c6dfd801df97 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -1242,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
1242 | memset(&curr_time, 0, sizeof(struct rtc_time)); | 1242 | memset(&curr_time, 0, sizeof(struct rtc_time)); |
1243 | 1243 | ||
1244 | if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) | 1244 | if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) |
1245 | mc146818_set_time(&curr_time); | 1245 | mc146818_get_time(&curr_time); |
1246 | 1246 | ||
1247 | if (hpet_rtc_flags & RTC_UIE && | 1247 | if (hpet_rtc_flags & RTC_UIE && |
1248 | curr_time.tm_sec != hpet_prev_update_sec) { | 1248 | curr_time.tm_sec != hpet_prev_update_sec) { |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 61521dc19c10..9f669fdd2010 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) | |||
102 | seq_puts(p, " Rescheduling interrupts\n"); | 102 | seq_puts(p, " Rescheduling interrupts\n"); |
103 | seq_printf(p, "%*s: ", prec, "CAL"); | 103 | seq_printf(p, "%*s: ", prec, "CAL"); |
104 | for_each_online_cpu(j) | 104 | for_each_online_cpu(j) |
105 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count - | 105 | seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); |
106 | irq_stats(j)->irq_tlb_count); | ||
107 | seq_puts(p, " Function call interrupts\n"); | 106 | seq_puts(p, " Function call interrupts\n"); |
108 | seq_printf(p, "%*s: ", prec, "TLB"); | 107 | seq_printf(p, "%*s: ", prec, "TLB"); |
109 | for_each_online_cpu(j) | 108 | for_each_online_cpu(j) |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 991b77986d57..0fa60f5f5a16 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p) | |||
936 | 936 | ||
937 | x86_init.oem.arch_setup(); | 937 | x86_init.oem.arch_setup(); |
938 | 938 | ||
939 | kernel_randomize_memory(); | ||
940 | |||
941 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; | 939 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; |
942 | setup_memory_map(); | 940 | setup_memory_map(); |
943 | parse_setup_data(); | 941 | parse_setup_data(); |
@@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p) | |||
1055 | 1053 | ||
1056 | max_possible_pfn = max_pfn; | 1054 | max_possible_pfn = max_pfn; |
1057 | 1055 | ||
1056 | /* | ||
1057 | * Define random base addresses for memory sections after max_pfn is | ||
1058 | * defined and before each memory section base is used. | ||
1059 | */ | ||
1060 | kernel_randomize_memory(); | ||
1061 | |||
1058 | #ifdef CONFIG_X86_32 | 1062 | #ifdef CONFIG_X86_32 |
1059 | /* max_low_pfn get updated here */ | 1063 | /* max_low_pfn get updated here */ |
1060 | find_low_pfn_range(); | 1064 | find_low_pfn_range(); |
@@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p) | |||
1097 | efi_find_mirror(); | 1101 | efi_find_mirror(); |
1098 | } | 1102 | } |
1099 | 1103 | ||
1104 | reserve_bios_regions(); | ||
1105 | |||
1100 | /* | 1106 | /* |
1101 | * The EFI specification says that boot service code won't be called | 1107 | * The EFI specification says that boot service code won't be called |
1102 | * after ExitBootServices(). This is, in fact, a lie. | 1108 | * after ExitBootServices(). This is, in fact, a lie. |
@@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p) | |||
1125 | 1131 | ||
1126 | early_trap_pf_init(); | 1132 | early_trap_pf_init(); |
1127 | 1133 | ||
1128 | setup_real_mode(); | 1134 | /* |
1135 | * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features) | ||
1136 | * with the current CR4 value. This may not be necessary, but | ||
1137 | * auditing all the early-boot CR4 manipulation would be needed to | ||
1138 | * rule it out. | ||
1139 | */ | ||
1140 | if (boot_cpu_data.cpuid_level >= 0) | ||
1141 | /* A CPU has %cr4 if and only if it has CPUID. */ | ||
1142 | mmu_cr4_features = __read_cr4(); | ||
1129 | 1143 | ||
1130 | memblock_set_current_limit(get_max_mapped()); | 1144 | memblock_set_current_limit(get_max_mapped()); |
1131 | 1145 | ||
@@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p) | |||
1174 | 1188 | ||
1175 | kasan_init(); | 1189 | kasan_init(); |
1176 | 1190 | ||
1177 | if (boot_cpu_data.cpuid_level >= 0) { | ||
1178 | /* A CPU has %cr4 if and only if it has CPUID */ | ||
1179 | mmu_cr4_features = __read_cr4(); | ||
1180 | if (trampoline_cr4_features) | ||
1181 | *trampoline_cr4_features = mmu_cr4_features; | ||
1182 | } | ||
1183 | |||
1184 | #ifdef CONFIG_X86_32 | 1191 | #ifdef CONFIG_X86_32 |
1185 | /* sync back kernel address range */ | 1192 | /* sync back kernel address range */ |
1186 | clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, | 1193 | clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 067de612d3fa..26b473dc3f82 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |||
100 | /* Logical package management. We might want to allocate that dynamically */ | 100 | /* Logical package management. We might want to allocate that dynamically */ |
101 | static int *physical_to_logical_pkg __read_mostly; | 101 | static int *physical_to_logical_pkg __read_mostly; |
102 | static unsigned long *physical_package_map __read_mostly;; | 102 | static unsigned long *physical_package_map __read_mostly;; |
103 | static unsigned long *logical_package_map __read_mostly; | ||
104 | static unsigned int max_physical_pkg_id __read_mostly; | 103 | static unsigned int max_physical_pkg_id __read_mostly; |
105 | unsigned int __max_logical_packages __read_mostly; | 104 | unsigned int __max_logical_packages __read_mostly; |
106 | EXPORT_SYMBOL(__max_logical_packages); | 105 | EXPORT_SYMBOL(__max_logical_packages); |
106 | static unsigned int logical_packages __read_mostly; | ||
107 | static bool logical_packages_frozen __read_mostly; | ||
107 | 108 | ||
108 | /* Maximum number of SMT threads on any online core */ | 109 | /* Maximum number of SMT threads on any online core */ |
109 | int __max_smt_threads __read_mostly; | 110 | int __max_smt_threads __read_mostly; |
@@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu) | |||
277 | if (test_and_set_bit(pkg, physical_package_map)) | 278 | if (test_and_set_bit(pkg, physical_package_map)) |
278 | goto found; | 279 | goto found; |
279 | 280 | ||
280 | new = find_first_zero_bit(logical_package_map, __max_logical_packages); | 281 | if (logical_packages_frozen) { |
281 | if (new >= __max_logical_packages) { | ||
282 | physical_to_logical_pkg[pkg] = -1; | 282 | physical_to_logical_pkg[pkg] = -1; |
283 | pr_warn("APIC(%x) Package %u exceeds logical package map\n", | 283 | pr_warn("APIC(%x) Package %u exceeds logical package max\n", |
284 | apicid, pkg); | 284 | apicid, pkg); |
285 | return -ENOSPC; | 285 | return -ENOSPC; |
286 | } | 286 | } |
287 | set_bit(new, logical_package_map); | 287 | |
288 | new = logical_packages++; | ||
288 | pr_info("APIC(%x) Converting physical %u to logical package %u\n", | 289 | pr_info("APIC(%x) Converting physical %u to logical package %u\n", |
289 | apicid, pkg, new); | 290 | apicid, pkg, new); |
290 | physical_to_logical_pkg[pkg] = new; | 291 | physical_to_logical_pkg[pkg] = new; |
@@ -341,6 +342,7 @@ static void __init smp_init_package_map(void) | |||
341 | } | 342 | } |
342 | 343 | ||
343 | __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); | 344 | __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); |
345 | logical_packages = 0; | ||
344 | 346 | ||
345 | /* | 347 | /* |
346 | * Possibly larger than what we need as the number of apic ids per | 348 | * Possibly larger than what we need as the number of apic ids per |
@@ -352,10 +354,6 @@ static void __init smp_init_package_map(void) | |||
352 | memset(physical_to_logical_pkg, 0xff, size); | 354 | memset(physical_to_logical_pkg, 0xff, size); |
353 | size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); | 355 | size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); |
354 | physical_package_map = kzalloc(size, GFP_KERNEL); | 356 | physical_package_map = kzalloc(size, GFP_KERNEL); |
355 | size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long); | ||
356 | logical_package_map = kzalloc(size, GFP_KERNEL); | ||
357 | |||
358 | pr_info("Max logical packages: %u\n", __max_logical_packages); | ||
359 | 357 | ||
360 | for_each_present_cpu(cpu) { | 358 | for_each_present_cpu(cpu) { |
361 | unsigned int apicid = apic->cpu_present_to_apicid(cpu); | 359 | unsigned int apicid = apic->cpu_present_to_apicid(cpu); |
@@ -369,6 +367,15 @@ static void __init smp_init_package_map(void) | |||
369 | set_cpu_possible(cpu, false); | 367 | set_cpu_possible(cpu, false); |
370 | set_cpu_present(cpu, false); | 368 | set_cpu_present(cpu, false); |
371 | } | 369 | } |
370 | |||
371 | if (logical_packages > __max_logical_packages) { | ||
372 | pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n", | ||
373 | logical_packages, __max_logical_packages); | ||
374 | logical_packages_frozen = true; | ||
375 | __max_logical_packages = logical_packages; | ||
376 | } | ||
377 | |||
378 | pr_info("Max logical packages: %u\n", __max_logical_packages); | ||
372 | } | 379 | } |
373 | 380 | ||
374 | void __init smp_store_boot_cpu_info(void) | 381 | void __init smp_store_boot_cpu_info(void) |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 1ef87e887051..78b9cb5a26af 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/nmi.h> | 22 | #include <asm/nmi.h> |
23 | #include <asm/x86_init.h> | 23 | #include <asm/x86_init.h> |
24 | #include <asm/geode.h> | 24 | #include <asm/geode.h> |
25 | #include <asm/apic.h> | ||
25 | 26 | ||
26 | unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ | 27 | unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ |
27 | EXPORT_SYMBOL(cpu_khz); | 28 | EXPORT_SYMBOL(cpu_khz); |
@@ -1249,6 +1250,9 @@ static void tsc_refine_calibration_work(struct work_struct *work) | |||
1249 | (unsigned long)tsc_khz / 1000, | 1250 | (unsigned long)tsc_khz / 1000, |
1250 | (unsigned long)tsc_khz % 1000); | 1251 | (unsigned long)tsc_khz % 1000); |
1251 | 1252 | ||
1253 | /* Inform the TSC deadline clockevent devices about the recalibration */ | ||
1254 | lapic_update_tsc_freq(); | ||
1255 | |||
1252 | out: | 1256 | out: |
1253 | if (boot_cpu_has(X86_FEATURE_ART)) | 1257 | if (boot_cpu_has(X86_FEATURE_ART)) |
1254 | art_related_clocksource = &clocksource_tsc; | 1258 | art_related_clocksource = &clocksource_tsc; |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 6c1ff31d99ff..495c776de4b4 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) | |||
357 | *cursor &= 0xfe; | 357 | *cursor &= 0xfe; |
358 | } | 358 | } |
359 | /* | 359 | /* |
360 | * Similar treatment for VEX3 prefix. | 360 | * Similar treatment for VEX3/EVEX prefix. |
361 | * TODO: add XOP/EVEX treatment when insn decoder supports them | 361 | * TODO: add XOP treatment when insn decoder supports them |
362 | */ | 362 | */ |
363 | if (insn->vex_prefix.nbytes == 3) { | 363 | if (insn->vex_prefix.nbytes >= 3) { |
364 | /* | 364 | /* |
365 | * vex2: c5 rvvvvLpp (has no b bit) | 365 | * vex2: c5 rvvvvLpp (has no b bit) |
366 | * vex3/xop: c4/8f rxbmmmmm wvvvvLpp | 366 | * vex3/xop: c4/8f rxbmmmmm wvvvvLpp |
367 | * evex: 62 rxbR00mm wvvvv1pp zllBVaaa | 367 | * evex: 62 rxbR00mm wvvvv1pp zllBVaaa |
368 | * (evex will need setting of both b and x since | 368 | * Setting VEX3.b (setting because it has inverted meaning). |
369 | * in non-sib encoding evex.x is 4th bit of MODRM.rm) | 369 | * Setting EVEX.x since (in non-SIB encoding) EVEX.x |
370 | * Setting VEX3.b (setting because it has inverted meaning): | 370 | * is the 4th bit of MODRM.rm, and needs the same treatment. |
371 | * For VEX3-encoded insns, VEX3.x value has no effect in | ||
372 | * non-SIB encoding, the change is superfluous but harmless. | ||
371 | */ | 373 | */ |
372 | cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; | 374 | cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; |
373 | *cursor |= 0x20; | 375 | *cursor |= 0x60; |
374 | } | 376 | } |
375 | 377 | ||
376 | /* | 378 | /* |
@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) | |||
415 | 417 | ||
416 | reg = MODRM_REG(insn); /* Fetch modrm.reg */ | 418 | reg = MODRM_REG(insn); /* Fetch modrm.reg */ |
417 | reg2 = 0xff; /* Fetch vex.vvvv */ | 419 | reg2 = 0xff; /* Fetch vex.vvvv */ |
418 | if (insn->vex_prefix.nbytes == 2) | 420 | if (insn->vex_prefix.nbytes) |
419 | reg2 = insn->vex_prefix.bytes[1]; | ||
420 | else if (insn->vex_prefix.nbytes == 3) | ||
421 | reg2 = insn->vex_prefix.bytes[2]; | 421 | reg2 = insn->vex_prefix.bytes[2]; |
422 | /* | 422 | /* |
423 | * TODO: add XOP, EXEV vvvv reading. | 423 | * TODO: add XOP vvvv reading. |
424 | * | 424 | * |
425 | * vex.vvvv field is in bits 6-3, bits are inverted. | 425 | * vex.vvvv field is in bits 6-3, bits are inverted. |
426 | * But in 32-bit mode, high-order bit may be ignored. | 426 | * But in 32-bit mode, high-order bit may be ignored. |