aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c112
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--arch/x86/kernel/genapic_64.c1
-rw-r--r--arch/x86/kvm/mmu.c100
-rw-r--r--arch/x86/kvm/paging_tmpl.h12
-rw-r--r--arch/x86/kvm/x86.c24
-rw-r--r--arch/x86/lib/copy_user_64.S2
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S3
10 files changed, 210 insertions, 53 deletions
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596d..efae3b22a0ff 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
235 If in doubt, say N. 235 If in doubt, say N.
236 236
237config X86_E_POWERSAVER 237config X86_E_POWERSAVER
238 tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" 238 tristate "VIA C7 Enhanced PowerSaver"
239 select CPU_FREQ_TABLE 239 select CPU_FREQ_TABLE
240 depends on X86_32 && EXPERIMENTAL 240 depends on X86_32
241 help 241 help
242 This adds the CPUFreq driver for VIA C7 processors. 242 This adds the CPUFreq driver for VIA C7 processors.
243 243
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f563..e4a4bf870e94 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
44 * It is important that the frequencies 44 * It is important that the frequencies
45 * are listed in ascending order here! 45 * are listed in ascending order here!
46 */ 46 */
47struct s_elan_multiplier elan_multiplier[] = { 47static struct s_elan_multiplier elan_multiplier[] = {
48 {1000, 0x02, 0x18}, 48 {1000, 0x02, 0x18},
49 {2000, 0x02, 0x10}, 49 {2000, 0x02, 0x10},
50 {4000, 0x02, 0x08}, 50 {4000, 0x02, 0x08},
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index c45ca6d4dce1..4e7271999a74 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
66 return 800 + (fid * 100); 66 return 800 + (fid * 100);
67} 67}
68 68
69
70/* Return a frequency in KHz, given an input fid */ 69/* Return a frequency in KHz, given an input fid */
71static u32 find_khz_freq_from_fid(u32 fid) 70static u32 find_khz_freq_from_fid(u32 fid)
72{ 71{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
78 return data[pstate].frequency; 77 return data[pstate].frequency;
79} 78}
80 79
81
82/* Return the vco fid for an input fid 80/* Return the vco fid for an input fid
83 * 81 *
84 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids 82 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
166 wrmsr(MSR_FIDVID_CTL, lo, hi); 164 wrmsr(MSR_FIDVID_CTL, lo, hi);
167} 165}
168 166
169
170/* write the new fid value along with the other control fields to the msr */ 167/* write the new fid value along with the other control fields to the msr */
171static int write_new_fid(struct powernow_k8_data *data, u32 fid) 168static int write_new_fid(struct powernow_k8_data *data, u32 fid)
172{ 169{
@@ -740,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data)
740#ifdef CONFIG_X86_POWERNOW_K8_ACPI 737#ifdef CONFIG_X86_POWERNOW_K8_ACPI
741static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) 738static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index)
742{ 739{
743 if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) 740 if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE))
744 return; 741 return;
745 742
746 data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; 743 data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK;
747 data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; 744 data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK;
748 data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; 745 data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
749 data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; 746 data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK;
750 data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); 747 data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK);
751 data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; 748 data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK;
749}
750
751
752static struct acpi_processor_performance *acpi_perf_data;
753static int preregister_valid;
754
755static int powernow_k8_cpu_preinit_acpi(void)
756{
757 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
758 if (!acpi_perf_data)
759 return -ENODEV;
760
761 if (acpi_processor_preregister_performance(acpi_perf_data))
762 return -ENODEV;
763 else
764 preregister_valid = 1;
765 return 0;
752} 766}
753 767
754static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 768static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
755{ 769{
756 struct cpufreq_frequency_table *powernow_table; 770 struct cpufreq_frequency_table *powernow_table;
757 int ret_val; 771 int ret_val;
772 int cpu = 0;
758 773
759 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 774 data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
775 if (acpi_processor_register_performance(data->acpi_data, data->cpu)) {
760 dprintk("register performance failed: bad ACPI data\n"); 776 dprintk("register performance failed: bad ACPI data\n");
761 return -EIO; 777 return -EIO;
762 } 778 }
763 779
764 /* verify the data contained in the ACPI structures */ 780 /* verify the data contained in the ACPI structures */
765 if (data->acpi_data.state_count <= 1) { 781 if (data->acpi_data->state_count <= 1) {
766 dprintk("No ACPI P-States\n"); 782 dprintk("No ACPI P-States\n");
767 goto err_out; 783 goto err_out;
768 } 784 }
769 785
770 if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || 786 if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
771 (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { 787 (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
772 dprintk("Invalid control/status registers (%x - %x)\n", 788 dprintk("Invalid control/status registers (%x - %x)\n",
773 data->acpi_data.control_register.space_id, 789 data->acpi_data->control_register.space_id,
774 data->acpi_data.status_register.space_id); 790 data->acpi_data->status_register.space_id);
775 goto err_out; 791 goto err_out;
776 } 792 }
777 793
778 /* fill in data->powernow_table */ 794 /* fill in data->powernow_table */
779 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) 795 powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
780 * (data->acpi_data.state_count + 1)), GFP_KERNEL); 796 * (data->acpi_data->state_count + 1)), GFP_KERNEL);
781 if (!powernow_table) { 797 if (!powernow_table) {
782 dprintk("powernow_table memory alloc failure\n"); 798 dprintk("powernow_table memory alloc failure\n");
783 goto err_out; 799 goto err_out;
@@ -790,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
790 if (ret_val) 806 if (ret_val)
791 goto err_out_mem; 807 goto err_out_mem;
792 808
793 powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; 809 powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END;
794 powernow_table[data->acpi_data.state_count].index = 0; 810 powernow_table[data->acpi_data->state_count].index = 0;
795 data->powernow_table = powernow_table; 811 data->powernow_table = powernow_table;
796 812
797 /* fill in data */ 813 /* fill in data */
798 data->numps = data->acpi_data.state_count; 814 data->numps = data->acpi_data->state_count;
799 if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) 815 if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
800 print_basics(data); 816 print_basics(data);
801 powernow_k8_acpi_pst_values(data, 0); 817 powernow_k8_acpi_pst_values(data, 0);
@@ -803,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
803 /* notify BIOS that we exist */ 819 /* notify BIOS that we exist */
804 acpi_processor_notify_smm(THIS_MODULE); 820 acpi_processor_notify_smm(THIS_MODULE);
805 821
822 /* determine affinity, from ACPI if available */
823 if (preregister_valid) {
824 if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) ||
825 (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY))
826 data->starting_core_affinity = data->acpi_data->shared_cpu_map;
827 else
828 data->starting_core_affinity = cpumask_of_cpu(data->cpu);
829 } else {
830 /* best guess from family if not */
831 if (cpu_family == CPU_HW_PSTATE)
832 data->starting_core_affinity = cpumask_of_cpu(data->cpu);
833 else
834 data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu);
835 }
836
806 return 0; 837 return 0;
807 838
808err_out_mem: 839err_out_mem:
809 kfree(powernow_table); 840 kfree(powernow_table);
810 841
811err_out: 842err_out:
812 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 843 acpi_processor_unregister_performance(data->acpi_data, data->cpu);
813 844
814 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ 845 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
815 data->acpi_data.state_count = 0; 846 data->acpi_data->state_count = 0;
816 847
817 return -ENODEV; 848 return -ENODEV;
818} 849}
@@ -824,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
824 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); 855 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo);
825 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; 856 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
826 857
827 for (i = 0; i < data->acpi_data.state_count; i++) { 858 for (i = 0; i < data->acpi_data->state_count; i++) {
828 u32 index; 859 u32 index;
829 860
830 index = data->acpi_data.states[i].control & HW_PSTATE_MASK; 861 index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
831 if (index > data->max_hw_pstate) { 862 if (index > data->max_hw_pstate) {
832 printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); 863 printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index);
833 printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); 864 printk(KERN_ERR PFX "Please report to BIOS manufacturer\n");
@@ -843,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
843 874
844 powernow_table[i].index = index; 875 powernow_table[i].index = index;
845 876
846 powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; 877 powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000;
847 } 878 }
848 return 0; 879 return 0;
849} 880}
@@ -852,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
852{ 883{
853 int i; 884 int i;
854 int cntlofreq = 0; 885 int cntlofreq = 0;
855 for (i = 0; i < data->acpi_data.state_count; i++) { 886 for (i = 0; i < data->acpi_data->state_count; i++) {
856 u32 fid; 887 u32 fid;
857 u32 vid; 888 u32 vid;
858 889
859 if (data->exttype) { 890 if (data->exttype) {
860 fid = data->acpi_data.states[i].status & EXT_FID_MASK; 891 fid = data->acpi_data->states[i].status & EXT_FID_MASK;
861 vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; 892 vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK;
862 } else { 893 } else {
863 fid = data->acpi_data.states[i].control & FID_MASK; 894 fid = data->acpi_data->states[i].control & FID_MASK;
864 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; 895 vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK;
865 } 896 }
866 897
867 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); 898 dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -902,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
902 cntlofreq = i; 933 cntlofreq = i;
903 } 934 }
904 935
905 if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { 936 if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) {
906 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", 937 printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n",
907 powernow_table[i].frequency, 938 powernow_table[i].frequency,
908 (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); 939 (unsigned int) (data->acpi_data->states[i].core_frequency * 1000));
909 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; 940 powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
910 continue; 941 continue;
911 } 942 }
@@ -915,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf
915 946
916static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) 947static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
917{ 948{
918 if (data->acpi_data.state_count) 949 if (data->acpi_data->state_count)
919 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 950 acpi_processor_unregister_performance(data->acpi_data, data->cpu);
920} 951}
921 952
922#else 953#else
954static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; }
923static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } 955static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
924static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } 956static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
925static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } 957static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
@@ -1104,7 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol)
1104static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) 1136static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1105{ 1137{
1106 struct powernow_k8_data *data; 1138 struct powernow_k8_data *data;
1107 cpumask_t oldmask; 1139 cpumask_t oldmask = CPU_MASK_ALL;
1108 int rc; 1140 int rc;
1109 1141
1110 if (!cpu_online(pol->cpu)) 1142 if (!cpu_online(pol->cpu))
@@ -1177,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1177 /* run on any CPU again */ 1209 /* run on any CPU again */
1178 set_cpus_allowed_ptr(current, &oldmask); 1210 set_cpus_allowed_ptr(current, &oldmask);
1179 1211
1180 if (cpu_family == CPU_HW_PSTATE) 1212 pol->cpus = data->starting_core_affinity;
1181 pol->cpus = cpumask_of_cpu(pol->cpu);
1182 else
1183 pol->cpus = per_cpu(cpu_core_map, pol->cpu);
1184 data->available_cores = &(pol->cpus); 1213 data->available_cores = &(pol->cpus);
1185 1214
1186 /* Take a crude guess here. 1215 /* Take a crude guess here.
@@ -1303,6 +1332,7 @@ static int __cpuinit powernowk8_init(void)
1303 } 1332 }
1304 1333
1305 if (supported_cpus == num_online_cpus()) { 1334 if (supported_cpus == num_online_cpus()) {
1335 powernow_k8_cpu_preinit_acpi();
1306 printk(KERN_INFO PFX "Found %d %s " 1336 printk(KERN_INFO PFX "Found %d %s "
1307 "processors (%d cpu cores) (" VERSION ")\n", 1337 "processors (%d cpu cores) (" VERSION ")\n",
1308 num_online_nodes(), 1338 num_online_nodes(),
@@ -1319,6 +1349,10 @@ static void __exit powernowk8_exit(void)
1319 dprintk("exit\n"); 1349 dprintk("exit\n");
1320 1350
1321 cpufreq_unregister_driver(&cpufreq_amd64_driver); 1351 cpufreq_unregister_driver(&cpufreq_amd64_driver);
1352
1353#ifdef CONFIG_X86_POWERNOW_K8_ACPI
1354 free_percpu(acpi_perf_data);
1355#endif
1322} 1356}
1323 1357
1324MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); 1358MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d96..a62612cd4be8 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -33,12 +33,13 @@ struct powernow_k8_data {
33#ifdef CONFIG_X86_POWERNOW_K8_ACPI 33#ifdef CONFIG_X86_POWERNOW_K8_ACPI
34 /* the acpi table needs to be kept. it's only available if ACPI was 34 /* the acpi table needs to be kept. it's only available if ACPI was
35 * used to determine valid frequency/vid/fid states */ 35 * used to determine valid frequency/vid/fid states */
36 struct acpi_processor_performance acpi_data; 36 struct acpi_processor_performance *acpi_data;
37#endif 37#endif
38 /* we need to keep track of associated cores, but let cpufreq 38 /* we need to keep track of associated cores, but let cpufreq
39 * handle hotplug events - so just point at cpufreq pol->cpus 39 * handle hotplug events - so just point at cpufreq pol->cpus
40 * structure */ 40 * structure */
41 cpumask_t *available_cores; 41 cpumask_t *available_cores;
42 cpumask_t starting_core_affinity;
42}; 43};
43 44
44 45
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd217..eaff0bbb1444 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -99,3 +99,4 @@ int is_uv_system(void)
99{ 99{
100 return uv_system_type != UV_NONE; 100 return uv_system_type != UV_NONE;
101} 101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2fa231923cf7..0bfe2bd305eb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
653 account_shadowed(kvm, gfn); 653 account_shadowed(kvm, gfn);
654} 654}
655 655
656static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
657{
658 u64 *spte;
659 int need_tlb_flush = 0;
660
661 while ((spte = rmap_next(kvm, rmapp, NULL))) {
662 BUG_ON(!(*spte & PT_PRESENT_MASK));
663 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
664 rmap_remove(kvm, spte);
665 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
666 need_tlb_flush = 1;
667 }
668 return need_tlb_flush;
669}
670
671static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
672 int (*handler)(struct kvm *kvm, unsigned long *rmapp))
673{
674 int i;
675 int retval = 0;
676
677 /*
678 * If mmap_sem isn't taken, we can look the memslots with only
679 * the mmu_lock by skipping over the slots with userspace_addr == 0.
680 */
681 for (i = 0; i < kvm->nmemslots; i++) {
682 struct kvm_memory_slot *memslot = &kvm->memslots[i];
683 unsigned long start = memslot->userspace_addr;
684 unsigned long end;
685
686 /* mmu_lock protects userspace_addr */
687 if (!start)
688 continue;
689
690 end = start + (memslot->npages << PAGE_SHIFT);
691 if (hva >= start && hva < end) {
692 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
693 retval |= handler(kvm, &memslot->rmap[gfn_offset]);
694 retval |= handler(kvm,
695 &memslot->lpage_info[
696 gfn_offset /
697 KVM_PAGES_PER_HPAGE].rmap_pde);
698 }
699 }
700
701 return retval;
702}
703
704int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
705{
706 return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
707}
708
709static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
710{
711 u64 *spte;
712 int young = 0;
713
714 spte = rmap_next(kvm, rmapp, NULL);
715 while (spte) {
716 int _young;
717 u64 _spte = *spte;
718 BUG_ON(!(_spte & PT_PRESENT_MASK));
719 _young = _spte & PT_ACCESSED_MASK;
720 if (_young) {
721 young = 1;
722 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
723 }
724 spte = rmap_next(kvm, rmapp, spte);
725 }
726 return young;
727}
728
729int kvm_age_hva(struct kvm *kvm, unsigned long hva)
730{
731 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
732}
733
656#ifdef MMU_DEBUG 734#ifdef MMU_DEBUG
657static int is_empty_shadow_page(u64 *spt) 735static int is_empty_shadow_page(u64 *spt)
658{ 736{
@@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1203 int r; 1281 int r;
1204 int largepage = 0; 1282 int largepage = 0;
1205 pfn_t pfn; 1283 pfn_t pfn;
1284 unsigned long mmu_seq;
1206 1285
1207 down_read(&current->mm->mmap_sem); 1286 down_read(&current->mm->mmap_sem);
1208 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { 1287 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1210 largepage = 1; 1289 largepage = 1;
1211 } 1290 }
1212 1291
1292 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1293 /* implicit mb(), we'll read before PT lock is unlocked */
1213 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1294 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1214 up_read(&current->mm->mmap_sem); 1295 up_read(&current->mm->mmap_sem);
1215 1296
@@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1220 } 1301 }
1221 1302
1222 spin_lock(&vcpu->kvm->mmu_lock); 1303 spin_lock(&vcpu->kvm->mmu_lock);
1304 if (mmu_notifier_retry(vcpu, mmu_seq))
1305 goto out_unlock;
1223 kvm_mmu_free_some_pages(vcpu); 1306 kvm_mmu_free_some_pages(vcpu);
1224 r = __direct_map(vcpu, v, write, largepage, gfn, pfn, 1307 r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
1225 PT32E_ROOT_LEVEL); 1308 PT32E_ROOT_LEVEL);
@@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1227 1310
1228 1311
1229 return r; 1312 return r;
1313
1314out_unlock:
1315 spin_unlock(&vcpu->kvm->mmu_lock);
1316 kvm_release_pfn_clean(pfn);
1317 return 0;
1230} 1318}
1231 1319
1232 1320
@@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1345 int r; 1433 int r;
1346 int largepage = 0; 1434 int largepage = 0;
1347 gfn_t gfn = gpa >> PAGE_SHIFT; 1435 gfn_t gfn = gpa >> PAGE_SHIFT;
1436 unsigned long mmu_seq;
1348 1437
1349 ASSERT(vcpu); 1438 ASSERT(vcpu);
1350 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 1439 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1358 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1447 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1359 largepage = 1; 1448 largepage = 1;
1360 } 1449 }
1450 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1451 /* implicit mb(), we'll read before PT lock is unlocked */
1361 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1452 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1362 up_read(&current->mm->mmap_sem); 1453 up_read(&current->mm->mmap_sem);
1363 if (is_error_pfn(pfn)) { 1454 if (is_error_pfn(pfn)) {
@@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1365 return 1; 1456 return 1;
1366 } 1457 }
1367 spin_lock(&vcpu->kvm->mmu_lock); 1458 spin_lock(&vcpu->kvm->mmu_lock);
1459 if (mmu_notifier_retry(vcpu, mmu_seq))
1460 goto out_unlock;
1368 kvm_mmu_free_some_pages(vcpu); 1461 kvm_mmu_free_some_pages(vcpu);
1369 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 1462 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1370 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); 1463 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
1371 spin_unlock(&vcpu->kvm->mmu_lock); 1464 spin_unlock(&vcpu->kvm->mmu_lock);
1372 1465
1373 return r; 1466 return r;
1467
1468out_unlock:
1469 spin_unlock(&vcpu->kvm->mmu_lock);
1470 kvm_release_pfn_clean(pfn);
1471 return 0;
1374} 1472}
1375 1473
1376static void nonpaging_free(struct kvm_vcpu *vcpu) 1474static void nonpaging_free(struct kvm_vcpu *vcpu)
@@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1670 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1768 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1671 vcpu->arch.update_pte.largepage = 1; 1769 vcpu->arch.update_pte.largepage = 1;
1672 } 1770 }
1771 vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
1772 /* implicit mb(), we'll read before PT lock is unlocked */
1673 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1773 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1674 up_read(&current->mm->mmap_sem); 1774 up_read(&current->mm->mmap_sem);
1675 1775
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4d918220baeb..f72ac1fa35f0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
263 pfn = vcpu->arch.update_pte.pfn; 263 pfn = vcpu->arch.update_pte.pfn;
264 if (is_error_pfn(pfn)) 264 if (is_error_pfn(pfn))
265 return; 265 return;
266 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
267 return;
266 kvm_get_pfn(pfn); 268 kvm_get_pfn(pfn);
267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 269 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 270 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
380 int r; 382 int r;
381 pfn_t pfn; 383 pfn_t pfn;
382 int largepage = 0; 384 int largepage = 0;
385 unsigned long mmu_seq;
383 386
384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 387 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
385 kvm_mmu_audit(vcpu, "pre page fault"); 388 kvm_mmu_audit(vcpu, "pre page fault");
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
413 largepage = 1; 416 largepage = 1;
414 } 417 }
415 } 418 }
419 mmu_seq = vcpu->kvm->mmu_notifier_seq;
420 /* implicit mb(), we'll read before PT lock is unlocked */
416 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 421 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
417 up_read(&current->mm->mmap_sem); 422 up_read(&current->mm->mmap_sem);
418 423
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
424 } 429 }
425 430
426 spin_lock(&vcpu->kvm->mmu_lock); 431 spin_lock(&vcpu->kvm->mmu_lock);
432 if (mmu_notifier_retry(vcpu, mmu_seq))
433 goto out_unlock;
427 kvm_mmu_free_some_pages(vcpu); 434 kvm_mmu_free_some_pages(vcpu);
428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 435 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
429 largepage, &write_pt, pfn); 436 largepage, &write_pt, pfn);
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
439 spin_unlock(&vcpu->kvm->mmu_lock); 446 spin_unlock(&vcpu->kvm->mmu_lock);
440 447
441 return write_pt; 448 return write_pt;
449
450out_unlock:
451 spin_unlock(&vcpu->kvm->mmu_lock);
452 kvm_release_pfn_clean(pfn);
453 return 0;
442} 454}
443 455
444static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 456static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5916191420c7..0d682fc6aeb3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext)
883 case KVM_CAP_PIT: 883 case KVM_CAP_PIT:
884 case KVM_CAP_NOP_IO_DELAY: 884 case KVM_CAP_NOP_IO_DELAY:
885 case KVM_CAP_MP_STATE: 885 case KVM_CAP_MP_STATE:
886 case KVM_CAP_SYNC_MMU:
886 r = 1; 887 r = 1;
887 break; 888 break;
888 case KVM_CAP_COALESCED_MMIO: 889 case KVM_CAP_COALESCED_MMIO:
@@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1495 goto out; 1496 goto out;
1496 1497
1497 down_write(&kvm->slots_lock); 1498 down_write(&kvm->slots_lock);
1499 spin_lock(&kvm->mmu_lock);
1498 1500
1499 p = &kvm->arch.aliases[alias->slot]; 1501 p = &kvm->arch.aliases[alias->slot];
1500 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 1502 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1506 break; 1508 break;
1507 kvm->arch.naliases = n; 1509 kvm->arch.naliases = n;
1508 1510
1511 spin_unlock(&kvm->mmu_lock);
1509 kvm_mmu_zap_all(kvm); 1512 kvm_mmu_zap_all(kvm);
1510 1513
1511 up_write(&kvm->slots_lock); 1514 up_write(&kvm->slots_lock);
@@ -3972,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3972 */ 3975 */
3973 if (!user_alloc) { 3976 if (!user_alloc) {
3974 if (npages && !old.rmap) { 3977 if (npages && !old.rmap) {
3978 unsigned long userspace_addr;
3979
3975 down_write(&current->mm->mmap_sem); 3980 down_write(&current->mm->mmap_sem);
3976 memslot->userspace_addr = do_mmap(NULL, 0, 3981 userspace_addr = do_mmap(NULL, 0,
3977 npages * PAGE_SIZE, 3982 npages * PAGE_SIZE,
3978 PROT_READ | PROT_WRITE, 3983 PROT_READ | PROT_WRITE,
3979 MAP_SHARED | MAP_ANONYMOUS, 3984 MAP_SHARED | MAP_ANONYMOUS,
3980 0); 3985 0);
3981 up_write(&current->mm->mmap_sem); 3986 up_write(&current->mm->mmap_sem);
3982 3987
3983 if (IS_ERR((void *)memslot->userspace_addr)) 3988 if (IS_ERR((void *)userspace_addr))
3984 return PTR_ERR((void *)memslot->userspace_addr); 3989 return PTR_ERR((void *)userspace_addr);
3990
3991 /* set userspace_addr atomically for kvm_hva_to_rmapp */
3992 spin_lock(&kvm->mmu_lock);
3993 memslot->userspace_addr = userspace_addr;
3994 spin_unlock(&kvm->mmu_lock);
3985 } else { 3995 } else {
3986 if (!old.user_alloc && old.rmap) { 3996 if (!old.user_alloc && old.rmap) {
3987 int ret; 3997 int ret;
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index dfdf428975c0..f118c110af32 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -52,7 +52,7 @@
52 jnz 100b 52 jnz 100b
53102: 53102:
54 .section .fixup,"ax" 54 .section .fixup,"ax"
55103: addl %r8d,%edx /* ecx is zerorest also */ 55103: addl %ecx,%edx /* ecx is zerorest also */
56 jmp copy_user_handle_tail 56 jmp copy_user_handle_tail
57 .previous 57 .previous
58 58
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 40e0e309d27e..cb0c112386fb 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -32,7 +32,7 @@
32 jnz 100b 32 jnz 100b
33102: 33102:
34 .section .fixup,"ax" 34 .section .fixup,"ax"
35103: addl %r8d,%edx /* ecx is zerorest also */ 35103: addl %ecx,%edx /* ecx is zerorest also */
36 jmp copy_user_handle_tail 36 jmp copy_user_handle_tail
37 .previous 37 .previous
38 38
@@ -108,7 +108,6 @@ ENTRY(__copy_user_nocache)
108 jmp 60f 108 jmp 60f
10950: movl %ecx,%edx 10950: movl %ecx,%edx
11060: sfence 11060: sfence
111 movl %r8d,%ecx
112 jmp copy_user_handle_tail 111 jmp copy_user_handle_tail
113 .previous 112 .previous
114 113