diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-19 14:41:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-19 14:41:26 -0400 |
commit | b5d72dda8976e878be47415b94bca8465d1fa22d (patch) | |
tree | 67dfaf0120febff148ec18824bf1d08ac9ecf7e5 | |
parent | 26473f83703e6bc56114ce4b045000de6efcfff7 (diff) | |
parent | a1078e821b605813b63bf6bca414a85f804d5c66 (diff) |
Merge tag 'for-linus-5.3a-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross:
"Fixes and features:
- A series to introduce a common command line parameter for disabling
paravirtual extensions when running as a guest in virtualized
environment
- A fix for int3 handling in Xen pv guests
- Removal of the Xen-specific tmem driver as support of tmem in Xen
has been dropped (and it was experimental only)
- A security fix for running as Xen dom0 (XSA-300)
- A fix for IRQ handling when offlining cpus in Xen guests
- Some small cleanups"
* tag 'for-linus-5.3a-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen: let alloc_xenballooned_pages() fail if not enough memory free
xen/pv: Fix a boot up hang revealed by int3 self test
x86/xen: Add "nopv" support for HVM guest
x86/paravirt: Remove const mark from x86_hyper_xen_hvm variable
xen: Map "xen_nopv" parameter to "nopv" and mark it obsolete
x86: Add "nopv" parameter to disable PV extensions
x86/xen: Mark xen_hvm_need_lapic() and xen_x2apic_para_available() as __init
xen: remove tmem driver
Revert "x86/paravirt: Set up the virt_spin_lock_key after static keys get initialized"
xen/events: fix binding user event channels to cpus
-rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 28 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/hypervisor.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/traps.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/x86_init.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/xen/hypervisor.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/hypervisor.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/jailhouse.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/x86_init.c | 4 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_hvm.c | 58 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pv.c | 3 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 6 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_64.S | 1 | ||||
-rw-r--r-- | drivers/xen/Kconfig | 23 | ||||
-rw-r--r-- | drivers/xen/Makefile | 2 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 16 | ||||
-rw-r--r-- | drivers/xen/events/events_base.c | 12 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 2 | ||||
-rw-r--r-- | drivers/xen/tmem.c | 419 | ||||
-rw-r--r-- | drivers/xen/xen-balloon.c | 2 | ||||
-rw-r--r-- | drivers/xen/xen-selfballoon.c | 579 | ||||
-rw-r--r-- | include/xen/balloon.h | 10 | ||||
-rw-r--r-- | include/xen/events.h | 3 | ||||
-rw-r--r-- | include/xen/tmem.h | 18 |
25 files changed, 112 insertions, 1120 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f0461456d910..46b826fcb5ad 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -4698,27 +4698,6 @@ | |||
4698 | Force threading of all interrupt handlers except those | 4698 | Force threading of all interrupt handlers except those |
4699 | marked explicitly IRQF_NO_THREAD. | 4699 | marked explicitly IRQF_NO_THREAD. |
4700 | 4700 | ||
4701 | tmem [KNL,XEN] | ||
4702 | Enable the Transcendent memory driver if built-in. | ||
4703 | |||
4704 | tmem.cleancache=0|1 [KNL, XEN] | ||
4705 | Default is on (1). Disable the usage of the cleancache | ||
4706 | API to send anonymous pages to the hypervisor. | ||
4707 | |||
4708 | tmem.frontswap=0|1 [KNL, XEN] | ||
4709 | Default is on (1). Disable the usage of the frontswap | ||
4710 | API to send swap pages to the hypervisor. If disabled | ||
4711 | the selfballooning and selfshrinking are force disabled. | ||
4712 | |||
4713 | tmem.selfballooning=0|1 [KNL, XEN] | ||
4714 | Default is on (1). Disable the driving of swap pages | ||
4715 | to the hypervisor. | ||
4716 | |||
4717 | tmem.selfshrinking=0|1 [KNL, XEN] | ||
4718 | Default is on (1). Partial swapoff that immediately | ||
4719 | transfers pages from Xen hypervisor back to the | ||
4720 | kernel based on different criteria. | ||
4721 | |||
4722 | topology= [S390] | 4701 | topology= [S390] |
4723 | Format: {off | on} | 4702 | Format: {off | on} |
4724 | Specify if the kernel should make use of the cpu | 4703 | Specify if the kernel should make use of the cpu |
@@ -5288,6 +5267,8 @@ | |||
5288 | xen_nopv [X86] | 5267 | xen_nopv [X86] |
5289 | Disables the PV optimizations forcing the HVM guest to | 5268 | Disables the PV optimizations forcing the HVM guest to |
5290 | run as generic HVM guest with no PV drivers. | 5269 | run as generic HVM guest with no PV drivers. |
5270 | This option is obsoleted by the "nopv" option, which | ||
5271 | has equivalent effect for XEN platform. | ||
5291 | 5272 | ||
5292 | xen_scrub_pages= [XEN] | 5273 | xen_scrub_pages= [XEN] |
5293 | Boolean option to control scrubbing pages before giving them back | 5274 | Boolean option to control scrubbing pages before giving them back |
@@ -5302,6 +5283,11 @@ | |||
5302 | improve timer resolution at the expense of processing | 5283 | improve timer resolution at the expense of processing |
5303 | more timer interrupts. | 5284 | more timer interrupts. |
5304 | 5285 | ||
5286 | nopv= [X86,XEN,KVM,HYPER_V,VMWARE] | ||
5287 | Disables the PV optimizations forcing the guest to run | ||
5288 | as generic guest with no PV drivers. Currently support | ||
5289 | XEN HVM, KVM, HYPER_V and VMWARE guest. | ||
5290 | |||
5305 | xirc2ps_cs= [NET,PCMCIA] | 5291 | xirc2ps_cs= [NET,PCMCIA] |
5306 | Format: | 5292 | Format: |
5307 | <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] | 5293 | <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 0ea4831a72a4..35a66fcfcb91 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -1176,7 +1176,6 @@ idtentry stack_segment do_stack_segment has_error_code=1 | |||
1176 | #ifdef CONFIG_XEN_PV | 1176 | #ifdef CONFIG_XEN_PV |
1177 | idtentry xennmi do_nmi has_error_code=0 | 1177 | idtentry xennmi do_nmi has_error_code=0 |
1178 | idtentry xendebug do_debug has_error_code=0 | 1178 | idtentry xendebug do_debug has_error_code=0 |
1179 | idtentry xenint3 do_int3 has_error_code=0 | ||
1180 | #endif | 1179 | #endif |
1181 | 1180 | ||
1182 | idtentry general_protection do_general_protection has_error_code=1 | 1181 | idtentry general_protection do_general_protection has_error_code=1 |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 50a30f6c668b..e41cbf2ec41d 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -53,8 +53,20 @@ struct hypervisor_x86 { | |||
53 | 53 | ||
54 | /* runtime callbacks */ | 54 | /* runtime callbacks */ |
55 | struct x86_hyper_runtime runtime; | 55 | struct x86_hyper_runtime runtime; |
56 | |||
57 | /* ignore nopv parameter */ | ||
58 | bool ignore_nopv; | ||
56 | }; | 59 | }; |
57 | 60 | ||
61 | extern const struct hypervisor_x86 x86_hyper_vmware; | ||
62 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | ||
63 | extern const struct hypervisor_x86 x86_hyper_xen_pv; | ||
64 | extern const struct hypervisor_x86 x86_hyper_kvm; | ||
65 | extern const struct hypervisor_x86 x86_hyper_jailhouse; | ||
66 | extern const struct hypervisor_x86 x86_hyper_acrn; | ||
67 | extern struct hypervisor_x86 x86_hyper_xen_hvm; | ||
68 | |||
69 | extern bool nopv; | ||
58 | extern enum x86_hypervisor_type x86_hyper_type; | 70 | extern enum x86_hypervisor_type x86_hyper_type; |
59 | extern void init_hypervisor_platform(void); | 71 | extern void init_hypervisor_platform(void); |
60 | static inline bool hypervisor_is_type(enum x86_hypervisor_type type) | 72 | static inline bool hypervisor_is_type(enum x86_hypervisor_type type) |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 7d6f3f3fad78..f2bd284abc16 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -40,7 +40,7 @@ asmlinkage void simd_coprocessor_error(void); | |||
40 | asmlinkage void xen_divide_error(void); | 40 | asmlinkage void xen_divide_error(void); |
41 | asmlinkage void xen_xennmi(void); | 41 | asmlinkage void xen_xennmi(void); |
42 | asmlinkage void xen_xendebug(void); | 42 | asmlinkage void xen_xendebug(void); |
43 | asmlinkage void xen_xenint3(void); | 43 | asmlinkage void xen_int3(void); |
44 | asmlinkage void xen_overflow(void); | 44 | asmlinkage void xen_overflow(void); |
45 | asmlinkage void xen_bounds(void); | 45 | asmlinkage void xen_bounds(void); |
46 | asmlinkage void xen_invalid_op(void); | 46 | asmlinkage void xen_invalid_op(void); |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b85a7c54c6a1..ac0934189017 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -301,6 +301,8 @@ extern struct x86_apic_ops x86_apic_ops; | |||
301 | extern void x86_early_init_platform_quirks(void); | 301 | extern void x86_early_init_platform_quirks(void); |
302 | extern void x86_init_noop(void); | 302 | extern void x86_init_noop(void); |
303 | extern void x86_init_uint_noop(unsigned int unused); | 303 | extern void x86_init_uint_noop(unsigned int unused); |
304 | extern bool bool_x86_init_noop(void); | ||
305 | extern void x86_op_int_noop(int cpu); | ||
304 | extern bool x86_pnpbios_disabled(void); | 306 | extern bool x86_pnpbios_disabled(void); |
305 | 307 | ||
306 | #endif | 308 | #endif |
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 39171b3646bb..42e1245af0d8 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h | |||
@@ -44,14 +44,14 @@ static inline uint32_t xen_cpuid_base(void) | |||
44 | } | 44 | } |
45 | 45 | ||
46 | #ifdef CONFIG_XEN | 46 | #ifdef CONFIG_XEN |
47 | extern bool xen_hvm_need_lapic(void); | 47 | extern bool __init xen_hvm_need_lapic(void); |
48 | 48 | ||
49 | static inline bool xen_x2apic_para_available(void) | 49 | static inline bool __init xen_x2apic_para_available(void) |
50 | { | 50 | { |
51 | return xen_hvm_need_lapic(); | 51 | return xen_hvm_need_lapic(); |
52 | } | 52 | } |
53 | #else | 53 | #else |
54 | static inline bool xen_x2apic_para_available(void) | 54 | static inline bool __init xen_x2apic_para_available(void) |
55 | { | 55 | { |
56 | return (xen_cpuid_base() != 0); | 56 | return (xen_cpuid_base() != 0); |
57 | } | 57 | } |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 87e39ad8d873..553bfbfc3a1b 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -26,14 +26,6 @@ | |||
26 | #include <asm/processor.h> | 26 | #include <asm/processor.h> |
27 | #include <asm/hypervisor.h> | 27 | #include <asm/hypervisor.h> |
28 | 28 | ||
29 | extern const struct hypervisor_x86 x86_hyper_vmware; | ||
30 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | ||
31 | extern const struct hypervisor_x86 x86_hyper_xen_pv; | ||
32 | extern const struct hypervisor_x86 x86_hyper_xen_hvm; | ||
33 | extern const struct hypervisor_x86 x86_hyper_kvm; | ||
34 | extern const struct hypervisor_x86 x86_hyper_jailhouse; | ||
35 | extern const struct hypervisor_x86 x86_hyper_acrn; | ||
36 | |||
37 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | 29 | static const __initconst struct hypervisor_x86 * const hypervisors[] = |
38 | { | 30 | { |
39 | #ifdef CONFIG_XEN_PV | 31 | #ifdef CONFIG_XEN_PV |
@@ -58,6 +50,14 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = | |||
58 | enum x86_hypervisor_type x86_hyper_type; | 50 | enum x86_hypervisor_type x86_hyper_type; |
59 | EXPORT_SYMBOL(x86_hyper_type); | 51 | EXPORT_SYMBOL(x86_hyper_type); |
60 | 52 | ||
53 | bool __initdata nopv; | ||
54 | static __init int parse_nopv(char *arg) | ||
55 | { | ||
56 | nopv = true; | ||
57 | return 0; | ||
58 | } | ||
59 | early_param("nopv", parse_nopv); | ||
60 | |||
61 | static inline const struct hypervisor_x86 * __init | 61 | static inline const struct hypervisor_x86 * __init |
62 | detect_hypervisor_vendor(void) | 62 | detect_hypervisor_vendor(void) |
63 | { | 63 | { |
@@ -65,6 +65,9 @@ detect_hypervisor_vendor(void) | |||
65 | uint32_t pri, max_pri = 0; | 65 | uint32_t pri, max_pri = 0; |
66 | 66 | ||
67 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { | 67 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
68 | if (unlikely(nopv) && !(*p)->ignore_nopv) | ||
69 | continue; | ||
70 | |||
68 | pri = (*p)->detect(); | 71 | pri = (*p)->detect(); |
69 | if (pri > max_pri) { | 72 | if (pri > max_pri) { |
70 | max_pri = pri; | 73 | max_pri = pri; |
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 6857b4577f17..3ad34f01de2a 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c | |||
@@ -217,4 +217,5 @@ const struct hypervisor_x86 x86_hyper_jailhouse __refconst = { | |||
217 | .detect = jailhouse_detect, | 217 | .detect = jailhouse_detect, |
218 | .init.init_platform = jailhouse_init_platform, | 218 | .init.init_platform = jailhouse_init_platform, |
219 | .init.x2apic_available = jailhouse_x2apic_available, | 219 | .init.x2apic_available = jailhouse_x2apic_available, |
220 | .ignore_nopv = true, | ||
220 | }; | 221 | }; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 259d1d2be076..fdbd47ceb84d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1368,8 +1368,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1368 | pr_info("CPU0: "); | 1368 | pr_info("CPU0: "); |
1369 | print_cpu_info(&cpu_data(0)); | 1369 | print_cpu_info(&cpu_data(0)); |
1370 | 1370 | ||
1371 | native_pv_lock_init(); | ||
1372 | |||
1373 | uv_system_init(); | 1371 | uv_system_init(); |
1374 | 1372 | ||
1375 | set_mtrr_aps_delayed_init(); | 1373 | set_mtrr_aps_delayed_init(); |
@@ -1399,6 +1397,7 @@ void __init native_smp_prepare_boot_cpu(void) | |||
1399 | /* already set me in cpu_online_mask in boot_cpu_init() */ | 1397 | /* already set me in cpu_online_mask in boot_cpu_init() */ |
1400 | cpumask_set_cpu(me, cpu_callout_mask); | 1398 | cpumask_set_cpu(me, cpu_callout_mask); |
1401 | cpu_set_state_online(me); | 1399 | cpu_set_state_online(me); |
1400 | native_pv_lock_init(); | ||
1402 | } | 1401 | } |
1403 | 1402 | ||
1404 | void __init calculate_max_logical_packages(void) | 1403 | void __init calculate_max_logical_packages(void) |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 50a2b492fdd6..1bef687faf22 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -29,8 +29,8 @@ void x86_init_noop(void) { } | |||
29 | void __init x86_init_uint_noop(unsigned int unused) { } | 29 | void __init x86_init_uint_noop(unsigned int unused) { } |
30 | static int __init iommu_init_noop(void) { return 0; } | 30 | static int __init iommu_init_noop(void) { return 0; } |
31 | static void iommu_shutdown_noop(void) { } | 31 | static void iommu_shutdown_noop(void) { } |
32 | static bool __init bool_x86_init_noop(void) { return false; } | 32 | bool __init bool_x86_init_noop(void) { return false; } |
33 | static void x86_op_int_noop(int cpu) { } | 33 | void x86_op_int_noop(int cpu) { } |
34 | 34 | ||
35 | /* | 35 | /* |
36 | * The platform setup functions are preset with the default functions | 36 | * The platform setup functions are preset with the default functions |
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 0e75642d42a3..e138f7de52d2 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c | |||
@@ -210,18 +210,18 @@ static void __init xen_hvm_guest_init(void) | |||
210 | #endif | 210 | #endif |
211 | } | 211 | } |
212 | 212 | ||
213 | static bool xen_nopv; | ||
214 | static __init int xen_parse_nopv(char *arg) | 213 | static __init int xen_parse_nopv(char *arg) |
215 | { | 214 | { |
216 | xen_nopv = true; | 215 | pr_notice("\"xen_nopv\" is deprecated, please use \"nopv\" instead\n"); |
217 | return 0; | 216 | |
217 | if (xen_cpuid_base()) | ||
218 | nopv = true; | ||
219 | return 0; | ||
218 | } | 220 | } |
219 | early_param("xen_nopv", xen_parse_nopv); | 221 | early_param("xen_nopv", xen_parse_nopv); |
220 | 222 | ||
221 | bool xen_hvm_need_lapic(void) | 223 | bool __init xen_hvm_need_lapic(void) |
222 | { | 224 | { |
223 | if (xen_nopv) | ||
224 | return false; | ||
225 | if (xen_pv_domain()) | 225 | if (xen_pv_domain()) |
226 | return false; | 226 | return false; |
227 | if (!xen_hvm_domain()) | 227 | if (!xen_hvm_domain()) |
@@ -230,15 +230,6 @@ bool xen_hvm_need_lapic(void) | |||
230 | return false; | 230 | return false; |
231 | return true; | 231 | return true; |
232 | } | 232 | } |
233 | EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); | ||
234 | |||
235 | static uint32_t __init xen_platform_hvm(void) | ||
236 | { | ||
237 | if (xen_pv_domain() || xen_nopv) | ||
238 | return 0; | ||
239 | |||
240 | return xen_cpuid_base(); | ||
241 | } | ||
242 | 233 | ||
243 | static __init void xen_hvm_guest_late_init(void) | 234 | static __init void xen_hvm_guest_late_init(void) |
244 | { | 235 | { |
@@ -251,6 +242,9 @@ static __init void xen_hvm_guest_late_init(void) | |||
251 | /* PVH detected. */ | 242 | /* PVH detected. */ |
252 | xen_pvh = true; | 243 | xen_pvh = true; |
253 | 244 | ||
245 | if (nopv) | ||
246 | panic("\"nopv\" and \"xen_nopv\" parameters are unsupported in PVH guest."); | ||
247 | |||
254 | /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */ | 248 | /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */ |
255 | if (!nr_ioapics && acpi_irq_model == ACPI_IRQ_MODEL_PIC) | 249 | if (!nr_ioapics && acpi_irq_model == ACPI_IRQ_MODEL_PIC) |
256 | acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; | 250 | acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; |
@@ -260,7 +254,38 @@ static __init void xen_hvm_guest_late_init(void) | |||
260 | #endif | 254 | #endif |
261 | } | 255 | } |
262 | 256 | ||
263 | const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = { | 257 | static uint32_t __init xen_platform_hvm(void) |
258 | { | ||
259 | uint32_t xen_domain = xen_cpuid_base(); | ||
260 | struct x86_hyper_init *h = &x86_hyper_xen_hvm.init; | ||
261 | |||
262 | if (xen_pv_domain()) | ||
263 | return 0; | ||
264 | |||
265 | if (xen_pvh_domain() && nopv) { | ||
266 | /* Guest booting via the Xen-PVH boot entry goes here */ | ||
267 | pr_info("\"nopv\" parameter is ignored in PVH guest\n"); | ||
268 | nopv = false; | ||
269 | } else if (nopv && xen_domain) { | ||
270 | /* | ||
271 | * Guest booting via normal boot entry (like via grub2) goes | ||
272 | * here. | ||
273 | * | ||
274 | * Use interface functions for bare hardware if nopv, | ||
275 | * xen_hvm_guest_late_init is an exception as we need to | ||
276 | * detect PVH and panic there. | ||
277 | */ | ||
278 | h->init_platform = x86_init_noop; | ||
279 | h->x2apic_available = bool_x86_init_noop; | ||
280 | h->init_mem_mapping = x86_init_noop; | ||
281 | h->init_after_bootmem = x86_init_noop; | ||
282 | h->guest_late_init = xen_hvm_guest_late_init; | ||
283 | x86_hyper_xen_hvm.runtime.pin_vcpu = x86_op_int_noop; | ||
284 | } | ||
285 | return xen_domain; | ||
286 | } | ||
287 | |||
288 | struct hypervisor_x86 x86_hyper_xen_hvm __initdata = { | ||
264 | .name = "Xen HVM", | 289 | .name = "Xen HVM", |
265 | .detect = xen_platform_hvm, | 290 | .detect = xen_platform_hvm, |
266 | .type = X86_HYPER_XEN_HVM, | 291 | .type = X86_HYPER_XEN_HVM, |
@@ -269,4 +294,5 @@ const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = { | |||
269 | .init.init_mem_mapping = xen_hvm_init_mem_mapping, | 294 | .init.init_mem_mapping = xen_hvm_init_mem_mapping, |
270 | .init.guest_late_init = xen_hvm_guest_late_init, | 295 | .init.guest_late_init = xen_hvm_guest_late_init, |
271 | .runtime.pin_vcpu = xen_pin_vcpu, | 296 | .runtime.pin_vcpu = xen_pin_vcpu, |
297 | .ignore_nopv = true, | ||
272 | }; | 298 | }; |
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4722ba2966ac..bed6bb93c965 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c | |||
@@ -596,12 +596,12 @@ struct trap_array_entry { | |||
596 | 596 | ||
597 | static struct trap_array_entry trap_array[] = { | 597 | static struct trap_array_entry trap_array[] = { |
598 | { debug, xen_xendebug, true }, | 598 | { debug, xen_xendebug, true }, |
599 | { int3, xen_xenint3, true }, | ||
600 | { double_fault, xen_double_fault, true }, | 599 | { double_fault, xen_double_fault, true }, |
601 | #ifdef CONFIG_X86_MCE | 600 | #ifdef CONFIG_X86_MCE |
602 | { machine_check, xen_machine_check, true }, | 601 | { machine_check, xen_machine_check, true }, |
603 | #endif | 602 | #endif |
604 | { nmi, xen_xennmi, true }, | 603 | { nmi, xen_xennmi, true }, |
604 | { int3, xen_int3, false }, | ||
605 | { overflow, xen_overflow, false }, | 605 | { overflow, xen_overflow, false }, |
606 | #ifdef CONFIG_IA32_EMULATION | 606 | #ifdef CONFIG_IA32_EMULATION |
607 | { entry_INT80_compat, xen_entry_INT80_compat, false }, | 607 | { entry_INT80_compat, xen_entry_INT80_compat, false }, |
@@ -1463,4 +1463,5 @@ const __initconst struct hypervisor_x86 x86_hyper_xen_pv = { | |||
1463 | .detect = xen_platform_pv, | 1463 | .detect = xen_platform_pv, |
1464 | .type = X86_HYPER_XEN_PV, | 1464 | .type = X86_HYPER_XEN_PV, |
1465 | .runtime.pin_vcpu = xen_pin_vcpu, | 1465 | .runtime.pin_vcpu = xen_pin_vcpu, |
1466 | .ignore_nopv = true, | ||
1466 | }; | 1467 | }; |
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 3776122c87cc..6deb49094c60 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -68,11 +68,8 @@ void xen_init_lock_cpu(int cpu) | |||
68 | int irq; | 68 | int irq; |
69 | char *name; | 69 | char *name; |
70 | 70 | ||
71 | if (!xen_pvspin) { | 71 | if (!xen_pvspin) |
72 | if (cpu == 0) | ||
73 | static_branch_disable(&virt_spin_lock_key); | ||
74 | return; | 72 | return; |
75 | } | ||
76 | 73 | ||
77 | WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", | 74 | WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", |
78 | cpu, per_cpu(lock_kicker_irq, cpu)); | 75 | cpu, per_cpu(lock_kicker_irq, cpu)); |
@@ -124,6 +121,7 @@ void __init xen_init_spinlocks(void) | |||
124 | 121 | ||
125 | if (!xen_pvspin) { | 122 | if (!xen_pvspin) { |
126 | printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); | 123 | printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); |
124 | static_branch_disable(&virt_spin_lock_key); | ||
127 | return; | 125 | return; |
128 | } | 126 | } |
129 | printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); | 127 | printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 1e9ef0ba30a5..ebf610b49c06 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -32,7 +32,6 @@ xen_pv_trap divide_error | |||
32 | xen_pv_trap debug | 32 | xen_pv_trap debug |
33 | xen_pv_trap xendebug | 33 | xen_pv_trap xendebug |
34 | xen_pv_trap int3 | 34 | xen_pv_trap int3 |
35 | xen_pv_trap xenint3 | ||
36 | xen_pv_trap xennmi | 35 | xen_pv_trap xennmi |
37 | xen_pv_trap overflow | 36 | xen_pv_trap overflow |
38 | xen_pv_trap bounds | 37 | xen_pv_trap bounds |
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index ec6558b79e9d..79cc75096f42 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig | |||
@@ -10,21 +10,6 @@ config XEN_BALLOON | |||
10 | the system to expand the domain's memory allocation, or alternatively | 10 | the system to expand the domain's memory allocation, or alternatively |
11 | return unneeded memory to the system. | 11 | return unneeded memory to the system. |
12 | 12 | ||
13 | config XEN_SELFBALLOONING | ||
14 | bool "Dynamically self-balloon kernel memory to target" | ||
15 | depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP && XEN_TMEM | ||
16 | help | ||
17 | Self-ballooning dynamically balloons available kernel memory driven | ||
18 | by the current usage of anonymous memory ("committed AS") and | ||
19 | controlled by various sysfs-settable parameters. Configuring | ||
20 | FRONTSWAP is highly recommended; if it is not configured, self- | ||
21 | ballooning is disabled by default. If FRONTSWAP is configured, | ||
22 | frontswap-selfshrinking is enabled by default but can be disabled | ||
23 | with the 'tmem.selfshrink=0' kernel boot parameter; and self-ballooning | ||
24 | is enabled by default but can be disabled with the 'tmem.selfballooning=0' | ||
25 | kernel boot parameter. Note that systems without a sufficiently | ||
26 | large swap device should not enable self-ballooning. | ||
27 | |||
28 | config XEN_BALLOON_MEMORY_HOTPLUG | 13 | config XEN_BALLOON_MEMORY_HOTPLUG |
29 | bool "Memory hotplug support for Xen balloon driver" | 14 | bool "Memory hotplug support for Xen balloon driver" |
30 | depends on XEN_BALLOON && MEMORY_HOTPLUG | 15 | depends on XEN_BALLOON && MEMORY_HOTPLUG |
@@ -191,14 +176,6 @@ config SWIOTLB_XEN | |||
191 | def_bool y | 176 | def_bool y |
192 | select SWIOTLB | 177 | select SWIOTLB |
193 | 178 | ||
194 | config XEN_TMEM | ||
195 | tristate | ||
196 | depends on !ARM && !ARM64 | ||
197 | default m if (CLEANCACHE || FRONTSWAP) | ||
198 | help | ||
199 | Shim to interface in-kernel Transcendent Memory hooks | ||
200 | (e.g. cleancache and frontswap) to Xen tmem hypercalls. | ||
201 | |||
202 | config XEN_PCIDEV_BACKEND | 179 | config XEN_PCIDEV_BACKEND |
203 | tristate "Xen PCI-device backend driver" | 180 | tristate "Xen PCI-device backend driver" |
204 | depends on PCI && X86 && XEN | 181 | depends on PCI && X86 && XEN |
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index ad3844d9f876..0c4efa6fe450 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile | |||
@@ -17,14 +17,12 @@ dom0-$(CONFIG_X86) += pcpu.o | |||
17 | obj-$(CONFIG_XEN_DOM0) += $(dom0-y) | 17 | obj-$(CONFIG_XEN_DOM0) += $(dom0-y) |
18 | obj-$(CONFIG_BLOCK) += biomerge.o | 18 | obj-$(CONFIG_BLOCK) += biomerge.o |
19 | obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o | 19 | obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o |
20 | obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o | ||
21 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o | 20 | obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o |
22 | obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o | 21 | obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o |
23 | obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o | 22 | obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o |
24 | obj-$(CONFIG_XENFS) += xenfs/ | 23 | obj-$(CONFIG_XENFS) += xenfs/ |
25 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o | 24 | obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o |
26 | obj-$(CONFIG_XEN_PVHVM) += platform-pci.o | 25 | obj-$(CONFIG_XEN_PVHVM) += platform-pci.o |
27 | obj-$(CONFIG_XEN_TMEM) += tmem.o | ||
28 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o | 26 | obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o |
29 | obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o | 27 | obj-$(CONFIG_XEN_MCE_LOG) += mcelog.o |
30 | obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ | 28 | obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ |
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 37a36c6b9f93..4e11de6cde81 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c | |||
@@ -535,8 +535,15 @@ static void balloon_process(struct work_struct *work) | |||
535 | state = reserve_additional_memory(); | 535 | state = reserve_additional_memory(); |
536 | } | 536 | } |
537 | 537 | ||
538 | if (credit < 0) | 538 | if (credit < 0) { |
539 | state = decrease_reservation(-credit, GFP_BALLOON); | 539 | long n_pages; |
540 | |||
541 | n_pages = min(-credit, si_mem_available()); | ||
542 | state = decrease_reservation(n_pages, GFP_BALLOON); | ||
543 | if (state == BP_DONE && n_pages != -credit && | ||
544 | n_pages < totalreserve_pages) | ||
545 | state = BP_EAGAIN; | ||
546 | } | ||
540 | 547 | ||
541 | state = update_schedule(state); | 548 | state = update_schedule(state); |
542 | 549 | ||
@@ -575,6 +582,9 @@ static int add_ballooned_pages(int nr_pages) | |||
575 | } | 582 | } |
576 | } | 583 | } |
577 | 584 | ||
585 | if (si_mem_available() < nr_pages) | ||
586 | return -ENOMEM; | ||
587 | |||
578 | st = decrease_reservation(nr_pages, GFP_USER); | 588 | st = decrease_reservation(nr_pages, GFP_USER); |
579 | if (st != BP_DONE) | 589 | if (st != BP_DONE) |
580 | return -ENOMEM; | 590 | return -ENOMEM; |
@@ -707,7 +717,7 @@ static int __init balloon_init(void) | |||
707 | balloon_stats.schedule_delay = 1; | 717 | balloon_stats.schedule_delay = 1; |
708 | balloon_stats.max_schedule_delay = 32; | 718 | balloon_stats.max_schedule_delay = 32; |
709 | balloon_stats.retry_count = 1; | 719 | balloon_stats.retry_count = 1; |
710 | balloon_stats.max_retry_count = RETRY_UNLIMITED; | 720 | balloon_stats.max_retry_count = 4; |
711 | 721 | ||
712 | #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG | 722 | #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG |
713 | set_online_page_callback(&xen_online_page); | 723 | set_online_page_callback(&xen_online_page); |
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index ff9b51055b14..2e8570c09789 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c | |||
@@ -1294,7 +1294,7 @@ void rebind_evtchn_irq(int evtchn, int irq) | |||
1294 | } | 1294 | } |
1295 | 1295 | ||
1296 | /* Rebind an evtchn so that it gets delivered to a specific cpu */ | 1296 | /* Rebind an evtchn so that it gets delivered to a specific cpu */ |
1297 | int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu) | 1297 | static int xen_rebind_evtchn_to_cpu(int evtchn, unsigned int tcpu) |
1298 | { | 1298 | { |
1299 | struct evtchn_bind_vcpu bind_vcpu; | 1299 | struct evtchn_bind_vcpu bind_vcpu; |
1300 | int masked; | 1300 | int masked; |
@@ -1328,7 +1328,6 @@ int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu) | |||
1328 | 1328 | ||
1329 | return 0; | 1329 | return 0; |
1330 | } | 1330 | } |
1331 | EXPORT_SYMBOL_GPL(xen_rebind_evtchn_to_cpu); | ||
1332 | 1331 | ||
1333 | static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, | 1332 | static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, |
1334 | bool force) | 1333 | bool force) |
@@ -1342,6 +1341,15 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, | |||
1342 | return ret; | 1341 | return ret; |
1343 | } | 1342 | } |
1344 | 1343 | ||
1344 | /* To be called with desc->lock held. */ | ||
1345 | int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu) | ||
1346 | { | ||
1347 | struct irq_data *d = irq_desc_get_irq_data(desc); | ||
1348 | |||
1349 | return set_affinity_irq(d, cpumask_of(tcpu), false); | ||
1350 | } | ||
1351 | EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn); | ||
1352 | |||
1345 | static void enable_dynirq(struct irq_data *data) | 1353 | static void enable_dynirq(struct irq_data *data) |
1346 | { | 1354 | { |
1347 | int evtchn = evtchn_from_irq(data->irq); | 1355 | int evtchn = evtchn_from_irq(data->irq); |
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index f341b016672f..052b55a14ebc 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c | |||
@@ -447,7 +447,7 @@ static void evtchn_bind_interdom_next_vcpu(int evtchn) | |||
447 | this_cpu_write(bind_last_selected_cpu, selected_cpu); | 447 | this_cpu_write(bind_last_selected_cpu, selected_cpu); |
448 | 448 | ||
449 | /* unmask expects irqs to be disabled */ | 449 | /* unmask expects irqs to be disabled */ |
450 | xen_rebind_evtchn_to_cpu(evtchn, selected_cpu); | 450 | xen_set_affinity_evtchn(desc, selected_cpu); |
451 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 451 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
452 | } | 452 | } |
453 | 453 | ||
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c deleted file mode 100644 index 64d7479ad5ad..000000000000 --- a/drivers/xen/tmem.c +++ /dev/null | |||
@@ -1,419 +0,0 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | /* | ||
3 | * Xen implementation for transcendent memory (tmem) | ||
4 | * | ||
5 | * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. | ||
6 | * Author: Dan Magenheimer | ||
7 | */ | ||
8 | |||
9 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/types.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/pagemap.h> | ||
16 | #include <linux/cleancache.h> | ||
17 | #include <linux/frontswap.h> | ||
18 | |||
19 | #include <xen/xen.h> | ||
20 | #include <xen/interface/xen.h> | ||
21 | #include <xen/page.h> | ||
22 | #include <asm/xen/hypercall.h> | ||
23 | #include <asm/xen/hypervisor.h> | ||
24 | #include <xen/tmem.h> | ||
25 | |||
26 | #ifndef CONFIG_XEN_TMEM_MODULE | ||
27 | bool __read_mostly tmem_enabled = false; | ||
28 | |||
29 | static int __init enable_tmem(char *s) | ||
30 | { | ||
31 | tmem_enabled = true; | ||
32 | return 1; | ||
33 | } | ||
34 | __setup("tmem", enable_tmem); | ||
35 | #endif | ||
36 | |||
37 | #ifdef CONFIG_CLEANCACHE | ||
38 | static bool cleancache __read_mostly = true; | ||
39 | module_param(cleancache, bool, S_IRUGO); | ||
40 | static bool selfballooning __read_mostly = true; | ||
41 | module_param(selfballooning, bool, S_IRUGO); | ||
42 | #endif /* CONFIG_CLEANCACHE */ | ||
43 | |||
44 | #ifdef CONFIG_FRONTSWAP | ||
45 | static bool frontswap __read_mostly = true; | ||
46 | module_param(frontswap, bool, S_IRUGO); | ||
47 | #else /* CONFIG_FRONTSWAP */ | ||
48 | #define frontswap (0) | ||
49 | #endif /* CONFIG_FRONTSWAP */ | ||
50 | |||
51 | #ifdef CONFIG_XEN_SELFBALLOONING | ||
52 | static bool selfshrinking __read_mostly = true; | ||
53 | module_param(selfshrinking, bool, S_IRUGO); | ||
54 | #endif /* CONFIG_XEN_SELFBALLOONING */ | ||
55 | |||
56 | #define TMEM_CONTROL 0 | ||
57 | #define TMEM_NEW_POOL 1 | ||
58 | #define TMEM_DESTROY_POOL 2 | ||
59 | #define TMEM_NEW_PAGE 3 | ||
60 | #define TMEM_PUT_PAGE 4 | ||
61 | #define TMEM_GET_PAGE 5 | ||
62 | #define TMEM_FLUSH_PAGE 6 | ||
63 | #define TMEM_FLUSH_OBJECT 7 | ||
64 | #define TMEM_READ 8 | ||
65 | #define TMEM_WRITE 9 | ||
66 | #define TMEM_XCHG 10 | ||
67 | |||
68 | /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */ | ||
69 | #define TMEM_POOL_PERSIST 1 | ||
70 | #define TMEM_POOL_SHARED 2 | ||
71 | #define TMEM_POOL_PAGESIZE_SHIFT 4 | ||
72 | #define TMEM_VERSION_SHIFT 24 | ||
73 | |||
74 | |||
75 | struct tmem_pool_uuid { | ||
76 | u64 uuid_lo; | ||
77 | u64 uuid_hi; | ||
78 | }; | ||
79 | |||
80 | struct tmem_oid { | ||
81 | u64 oid[3]; | ||
82 | }; | ||
83 | |||
84 | #define TMEM_POOL_PRIVATE_UUID { 0, 0 } | ||
85 | |||
86 | /* flags for tmem_ops.new_pool */ | ||
87 | #define TMEM_POOL_PERSIST 1 | ||
88 | #define TMEM_POOL_SHARED 2 | ||
89 | |||
90 | /* xen tmem foundation ops/hypercalls */ | ||
91 | |||
92 | static inline int xen_tmem_op(u32 tmem_cmd, u32 tmem_pool, struct tmem_oid oid, | ||
93 | u32 index, unsigned long gmfn, u32 tmem_offset, u32 pfn_offset, u32 len) | ||
94 | { | ||
95 | struct tmem_op op; | ||
96 | int rc = 0; | ||
97 | |||
98 | op.cmd = tmem_cmd; | ||
99 | op.pool_id = tmem_pool; | ||
100 | op.u.gen.oid[0] = oid.oid[0]; | ||
101 | op.u.gen.oid[1] = oid.oid[1]; | ||
102 | op.u.gen.oid[2] = oid.oid[2]; | ||
103 | op.u.gen.index = index; | ||
104 | op.u.gen.tmem_offset = tmem_offset; | ||
105 | op.u.gen.pfn_offset = pfn_offset; | ||
106 | op.u.gen.len = len; | ||
107 | set_xen_guest_handle(op.u.gen.gmfn, (void *)gmfn); | ||
108 | rc = HYPERVISOR_tmem_op(&op); | ||
109 | return rc; | ||
110 | } | ||
111 | |||
112 | static int xen_tmem_new_pool(struct tmem_pool_uuid uuid, | ||
113 | u32 flags, unsigned long pagesize) | ||
114 | { | ||
115 | struct tmem_op op; | ||
116 | int rc = 0, pageshift; | ||
117 | |||
118 | for (pageshift = 0; pagesize != 1; pageshift++) | ||
119 | pagesize >>= 1; | ||
120 | flags |= (pageshift - 12) << TMEM_POOL_PAGESIZE_SHIFT; | ||
121 | flags |= TMEM_SPEC_VERSION << TMEM_VERSION_SHIFT; | ||
122 | op.cmd = TMEM_NEW_POOL; | ||
123 | op.u.new.uuid[0] = uuid.uuid_lo; | ||
124 | op.u.new.uuid[1] = uuid.uuid_hi; | ||
125 | op.u.new.flags = flags; | ||
126 | rc = HYPERVISOR_tmem_op(&op); | ||
127 | return rc; | ||
128 | } | ||
129 | |||
130 | /* xen generic tmem ops */ | ||
131 | |||
132 | static int xen_tmem_put_page(u32 pool_id, struct tmem_oid oid, | ||
133 | u32 index, struct page *page) | ||
134 | { | ||
135 | return xen_tmem_op(TMEM_PUT_PAGE, pool_id, oid, index, | ||
136 | xen_page_to_gfn(page), 0, 0, 0); | ||
137 | } | ||
138 | |||
139 | static int xen_tmem_get_page(u32 pool_id, struct tmem_oid oid, | ||
140 | u32 index, struct page *page) | ||
141 | { | ||
142 | return xen_tmem_op(TMEM_GET_PAGE, pool_id, oid, index, | ||
143 | xen_page_to_gfn(page), 0, 0, 0); | ||
144 | } | ||
145 | |||
146 | static int xen_tmem_flush_page(u32 pool_id, struct tmem_oid oid, u32 index) | ||
147 | { | ||
148 | return xen_tmem_op(TMEM_FLUSH_PAGE, pool_id, oid, index, | ||
149 | 0, 0, 0, 0); | ||
150 | } | ||
151 | |||
152 | static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) | ||
153 | { | ||
154 | return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); | ||
155 | } | ||
156 | |||
157 | |||
158 | #ifdef CONFIG_CLEANCACHE | ||
159 | static int xen_tmem_destroy_pool(u32 pool_id) | ||
160 | { | ||
161 | struct tmem_oid oid = { { 0 } }; | ||
162 | |||
163 | return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); | ||
164 | } | ||
165 | |||
166 | /* cleancache ops */ | ||
167 | |||
168 | static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, | ||
169 | pgoff_t index, struct page *page) | ||
170 | { | ||
171 | u32 ind = (u32) index; | ||
172 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
173 | |||
174 | if (pool < 0) | ||
175 | return; | ||
176 | if (ind != index) | ||
177 | return; | ||
178 | mb(); /* ensure page is quiescent; tmem may address it with an alias */ | ||
179 | (void)xen_tmem_put_page((u32)pool, oid, ind, page); | ||
180 | } | ||
181 | |||
182 | static int tmem_cleancache_get_page(int pool, struct cleancache_filekey key, | ||
183 | pgoff_t index, struct page *page) | ||
184 | { | ||
185 | u32 ind = (u32) index; | ||
186 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
187 | int ret; | ||
188 | |||
189 | /* translate return values to linux semantics */ | ||
190 | if (pool < 0) | ||
191 | return -1; | ||
192 | if (ind != index) | ||
193 | return -1; | ||
194 | ret = xen_tmem_get_page((u32)pool, oid, ind, page); | ||
195 | if (ret == 1) | ||
196 | return 0; | ||
197 | else | ||
198 | return -1; | ||
199 | } | ||
200 | |||
201 | static void tmem_cleancache_flush_page(int pool, struct cleancache_filekey key, | ||
202 | pgoff_t index) | ||
203 | { | ||
204 | u32 ind = (u32) index; | ||
205 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
206 | |||
207 | if (pool < 0) | ||
208 | return; | ||
209 | if (ind != index) | ||
210 | return; | ||
211 | (void)xen_tmem_flush_page((u32)pool, oid, ind); | ||
212 | } | ||
213 | |||
214 | static void tmem_cleancache_flush_inode(int pool, struct cleancache_filekey key) | ||
215 | { | ||
216 | struct tmem_oid oid = *(struct tmem_oid *)&key; | ||
217 | |||
218 | if (pool < 0) | ||
219 | return; | ||
220 | (void)xen_tmem_flush_object((u32)pool, oid); | ||
221 | } | ||
222 | |||
223 | static void tmem_cleancache_flush_fs(int pool) | ||
224 | { | ||
225 | if (pool < 0) | ||
226 | return; | ||
227 | (void)xen_tmem_destroy_pool((u32)pool); | ||
228 | } | ||
229 | |||
230 | static int tmem_cleancache_init_fs(size_t pagesize) | ||
231 | { | ||
232 | struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID; | ||
233 | |||
234 | return xen_tmem_new_pool(uuid_private, 0, pagesize); | ||
235 | } | ||
236 | |||
237 | static int tmem_cleancache_init_shared_fs(uuid_t *uuid, size_t pagesize) | ||
238 | { | ||
239 | struct tmem_pool_uuid shared_uuid; | ||
240 | |||
241 | shared_uuid.uuid_lo = *(u64 *)&uuid->b[0]; | ||
242 | shared_uuid.uuid_hi = *(u64 *)&uuid->b[8]; | ||
243 | return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); | ||
244 | } | ||
245 | |||
246 | static const struct cleancache_ops tmem_cleancache_ops = { | ||
247 | .put_page = tmem_cleancache_put_page, | ||
248 | .get_page = tmem_cleancache_get_page, | ||
249 | .invalidate_page = tmem_cleancache_flush_page, | ||
250 | .invalidate_inode = tmem_cleancache_flush_inode, | ||
251 | .invalidate_fs = tmem_cleancache_flush_fs, | ||
252 | .init_shared_fs = tmem_cleancache_init_shared_fs, | ||
253 | .init_fs = tmem_cleancache_init_fs | ||
254 | }; | ||
255 | #endif | ||
256 | |||
257 | #ifdef CONFIG_FRONTSWAP | ||
258 | /* frontswap tmem operations */ | ||
259 | |||
260 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | ||
261 | static int tmem_frontswap_poolid; | ||
262 | |||
263 | /* | ||
264 | * Swizzling increases objects per swaptype, increasing tmem concurrency | ||
265 | * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS | ||
266 | */ | ||
267 | #define SWIZ_BITS 4 | ||
268 | #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) | ||
269 | #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) | ||
270 | #define iswiz(_ind) (_ind >> SWIZ_BITS) | ||
271 | |||
272 | static inline struct tmem_oid oswiz(unsigned type, u32 ind) | ||
273 | { | ||
274 | struct tmem_oid oid = { .oid = { 0 } }; | ||
275 | oid.oid[0] = _oswiz(type, ind); | ||
276 | return oid; | ||
277 | } | ||
278 | |||
279 | /* returns 0 if the page was successfully put into frontswap, -1 if not */ | ||
280 | static int tmem_frontswap_store(unsigned type, pgoff_t offset, | ||
281 | struct page *page) | ||
282 | { | ||
283 | u64 ind64 = (u64)offset; | ||
284 | u32 ind = (u32)offset; | ||
285 | int pool = tmem_frontswap_poolid; | ||
286 | int ret; | ||
287 | |||
288 | /* THP isn't supported */ | ||
289 | if (PageTransHuge(page)) | ||
290 | return -1; | ||
291 | |||
292 | if (pool < 0) | ||
293 | return -1; | ||
294 | if (ind64 != ind) | ||
295 | return -1; | ||
296 | mb(); /* ensure page is quiescent; tmem may address it with an alias */ | ||
297 | ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), page); | ||
298 | /* translate Xen tmem return values to linux semantics */ | ||
299 | if (ret == 1) | ||
300 | return 0; | ||
301 | else | ||
302 | return -1; | ||
303 | } | ||
304 | |||
305 | /* | ||
306 | * returns 0 if the page was successfully gotten from frontswap, -1 if | ||
307 | * was not present (should never happen!) | ||
308 | */ | ||
309 | static int tmem_frontswap_load(unsigned type, pgoff_t offset, | ||
310 | struct page *page) | ||
311 | { | ||
312 | u64 ind64 = (u64)offset; | ||
313 | u32 ind = (u32)offset; | ||
314 | int pool = tmem_frontswap_poolid; | ||
315 | int ret; | ||
316 | |||
317 | if (pool < 0) | ||
318 | return -1; | ||
319 | if (ind64 != ind) | ||
320 | return -1; | ||
321 | ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), page); | ||
322 | /* translate Xen tmem return values to linux semantics */ | ||
323 | if (ret == 1) | ||
324 | return 0; | ||
325 | else | ||
326 | return -1; | ||
327 | } | ||
328 | |||
329 | /* flush a single page from frontswap */ | ||
330 | static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) | ||
331 | { | ||
332 | u64 ind64 = (u64)offset; | ||
333 | u32 ind = (u32)offset; | ||
334 | int pool = tmem_frontswap_poolid; | ||
335 | |||
336 | if (pool < 0) | ||
337 | return; | ||
338 | if (ind64 != ind) | ||
339 | return; | ||
340 | (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); | ||
341 | } | ||
342 | |||
343 | /* flush all pages from the passed swaptype */ | ||
344 | static void tmem_frontswap_flush_area(unsigned type) | ||
345 | { | ||
346 | int pool = tmem_frontswap_poolid; | ||
347 | int ind; | ||
348 | |||
349 | if (pool < 0) | ||
350 | return; | ||
351 | for (ind = SWIZ_MASK; ind >= 0; ind--) | ||
352 | (void)xen_tmem_flush_object(pool, oswiz(type, ind)); | ||
353 | } | ||
354 | |||
355 | static void tmem_frontswap_init(unsigned ignored) | ||
356 | { | ||
357 | struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; | ||
358 | |||
359 | /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ | ||
360 | if (tmem_frontswap_poolid < 0) | ||
361 | tmem_frontswap_poolid = | ||
362 | xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); | ||
363 | } | ||
364 | |||
365 | static struct frontswap_ops tmem_frontswap_ops = { | ||
366 | .store = tmem_frontswap_store, | ||
367 | .load = tmem_frontswap_load, | ||
368 | .invalidate_page = tmem_frontswap_flush_page, | ||
369 | .invalidate_area = tmem_frontswap_flush_area, | ||
370 | .init = tmem_frontswap_init | ||
371 | }; | ||
372 | #endif | ||
373 | |||
374 | static int __init xen_tmem_init(void) | ||
375 | { | ||
376 | if (!xen_domain()) | ||
377 | return 0; | ||
378 | #ifdef CONFIG_FRONTSWAP | ||
379 | if (tmem_enabled && frontswap) { | ||
380 | char *s = ""; | ||
381 | |||
382 | tmem_frontswap_poolid = -1; | ||
383 | frontswap_register_ops(&tmem_frontswap_ops); | ||
384 | pr_info("frontswap enabled, RAM provided by Xen Transcendent Memory%s\n", | ||
385 | s); | ||
386 | } | ||
387 | #endif | ||
388 | #ifdef CONFIG_CLEANCACHE | ||
389 | BUILD_BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); | ||
390 | if (tmem_enabled && cleancache) { | ||
391 | int err; | ||
392 | |||
393 | err = cleancache_register_ops(&tmem_cleancache_ops); | ||
394 | if (err) | ||
395 | pr_warn("xen-tmem: failed to enable cleancache: %d\n", | ||
396 | err); | ||
397 | else | ||
398 | pr_info("cleancache enabled, RAM provided by " | ||
399 | "Xen Transcendent Memory\n"); | ||
400 | } | ||
401 | #endif | ||
402 | #ifdef CONFIG_XEN_SELFBALLOONING | ||
403 | /* | ||
404 | * There is no point of driving pages to the swap system if they | ||
405 | * aren't going anywhere in tmem universe. | ||
406 | */ | ||
407 | if (!frontswap) { | ||
408 | selfshrinking = false; | ||
409 | selfballooning = false; | ||
410 | } | ||
411 | xen_selfballoon_init(selfballooning, selfshrinking); | ||
412 | #endif | ||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | module_init(xen_tmem_init) | ||
417 | MODULE_LICENSE("GPL"); | ||
418 | MODULE_AUTHOR("Dan Magenheimer <dan.magenheimer@oracle.com>"); | ||
419 | MODULE_DESCRIPTION("Shim to Xen transcendent memory"); | ||
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index a67236b02452..6d12fc368210 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c | |||
@@ -129,8 +129,6 @@ void xen_balloon_init(void) | |||
129 | { | 129 | { |
130 | register_balloon(&balloon_dev); | 130 | register_balloon(&balloon_dev); |
131 | 131 | ||
132 | register_xen_selfballooning(&balloon_dev); | ||
133 | |||
134 | register_xenstore_notifier(&xenstore_notifier); | 132 | register_xenstore_notifier(&xenstore_notifier); |
135 | } | 133 | } |
136 | EXPORT_SYMBOL_GPL(xen_balloon_init); | 134 | EXPORT_SYMBOL_GPL(xen_balloon_init); |
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c deleted file mode 100644 index 246f6122c9ee..000000000000 --- a/drivers/xen/xen-selfballoon.c +++ /dev/null | |||
@@ -1,579 +0,0 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /****************************************************************************** | ||
3 | * Xen selfballoon driver (and optional frontswap self-shrinking driver) | ||
4 | * | ||
5 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. | ||
6 | * | ||
7 | * This code complements the cleancache and frontswap patchsets to optimize | ||
8 | * support for Xen Transcendent Memory ("tmem"). The policy it implements | ||
9 | * is rudimentary and will likely improve over time, but it does work well | ||
10 | * enough today. | ||
11 | * | ||
12 | * Two functionalities are implemented here which both use "control theory" | ||
13 | * (feedback) to optimize memory utilization. In a virtualized environment | ||
14 | * such as Xen, RAM is often a scarce resource and we would like to ensure | ||
15 | * that each of a possibly large number of virtual machines is using RAM | ||
16 | * efficiently, i.e. using as little as possible when under light load | ||
17 | * and obtaining as much as possible when memory demands are high. | ||
18 | * Since RAM needs vary highly dynamically and sometimes dramatically, | ||
19 | * "hysteresis" is used, that is, memory target is determined not just | ||
20 | * on current data but also on past data stored in the system. | ||
21 | * | ||
22 | * "Selfballooning" creates memory pressure by managing the Xen balloon | ||
23 | * driver to decrease and increase available kernel memory, driven | ||
24 | * largely by the target value of "Committed_AS" (see /proc/meminfo). | ||
25 | * Since Committed_AS does not account for clean mapped pages (i.e. pages | ||
26 | * in RAM that are identical to pages on disk), selfballooning has the | ||
27 | * affect of pushing less frequently used clean pagecache pages out of | ||
28 | * kernel RAM and, presumably using cleancache, into Xen tmem where | ||
29 | * Xen can more efficiently optimize RAM utilization for such pages. | ||
30 | * | ||
31 | * When kernel memory demand unexpectedly increases faster than Xen, via | ||
32 | * the selfballoon driver, is able to (or chooses to) provide usable RAM, | ||
33 | * the kernel may invoke swapping. In most cases, frontswap is able | ||
34 | * to absorb this swapping into Xen tmem. However, due to the fact | ||
35 | * that the kernel swap subsystem assumes swapping occurs to a disk, | ||
36 | * swapped pages may sit on the disk for a very long time; even if | ||
37 | * the kernel knows the page will never be used again. This is because | ||
38 | * the disk space costs very little and can be overwritten when | ||
39 | * necessary. When such stale pages are in frontswap, however, they | ||
40 | * are taking up valuable real estate. "Frontswap selfshrinking" works | ||
41 | * to resolve this: When frontswap activity is otherwise stable | ||
42 | * and the guest kernel is not under memory pressure, the "frontswap | ||
43 | * selfshrinking" accounts for this by providing pressure to remove some | ||
44 | * pages from frontswap and return them to kernel memory. | ||
45 | * | ||
46 | * For both "selfballooning" and "frontswap-selfshrinking", a worker | ||
47 | * thread is used and sysfs tunables are provided to adjust the frequency | ||
48 | * and rate of adjustments to achieve the goal, as well as to disable one | ||
49 | * or both functions independently. | ||
50 | * | ||
51 | * While some argue that this functionality can and should be implemented | ||
52 | * in userspace, it has been observed that bad things happen (e.g. OOMs). | ||
53 | * | ||
54 | * System configuration note: Selfballooning should not be enabled on | ||
55 | * systems without a sufficiently large swap device configured; for best | ||
56 | * results, it is recommended that total swap be increased by the size | ||
57 | * of the guest memory. Note, that selfballooning should be disabled by default | ||
58 | * if frontswap is not configured. Similarly selfballooning should be enabled | ||
59 | * by default if frontswap is configured and can be disabled with the | ||
60 | * "tmem.selfballooning=0" kernel boot option. Finally, when frontswap is | ||
61 | * configured, frontswap-selfshrinking can be disabled with the | ||
62 | * "tmem.selfshrink=0" kernel boot option. | ||
63 | * | ||
64 | * Selfballooning is disallowed in domain0 and force-disabled. | ||
65 | * | ||
66 | */ | ||
67 | |||
68 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
69 | |||
70 | #include <linux/kernel.h> | ||
71 | #include <linux/memblock.h> | ||
72 | #include <linux/swap.h> | ||
73 | #include <linux/mm.h> | ||
74 | #include <linux/mman.h> | ||
75 | #include <linux/workqueue.h> | ||
76 | #include <linux/device.h> | ||
77 | #include <xen/balloon.h> | ||
78 | #include <xen/tmem.h> | ||
79 | #include <xen/xen.h> | ||
80 | |||
81 | /* Enable/disable with sysfs. */ | ||
82 | static int xen_selfballooning_enabled __read_mostly; | ||
83 | |||
84 | /* | ||
85 | * Controls rate at which memory target (this iteration) approaches | ||
86 | * ultimate goal when memory need is increasing (up-hysteresis) or | ||
87 | * decreasing (down-hysteresis). Higher values of hysteresis cause | ||
88 | * slower increases/decreases. The default values for the various | ||
89 | * parameters were deemed reasonable by experimentation, may be | ||
90 | * workload-dependent, and can all be adjusted via sysfs. | ||
91 | */ | ||
92 | static unsigned int selfballoon_downhysteresis __read_mostly = 8; | ||
93 | static unsigned int selfballoon_uphysteresis __read_mostly = 1; | ||
94 | |||
95 | /* In HZ, controls frequency of worker invocation. */ | ||
96 | static unsigned int selfballoon_interval __read_mostly = 5; | ||
97 | |||
98 | /* | ||
99 | * Minimum usable RAM in MB for selfballooning target for balloon. | ||
100 | * If non-zero, it is added to totalreserve_pages and self-ballooning | ||
101 | * will not balloon below the sum. If zero, a piecewise linear function | ||
102 | * is calculated as a minimum and added to totalreserve_pages. Note that | ||
103 | * setting this value indiscriminately may cause OOMs and crashes. | ||
104 | */ | ||
105 | static unsigned int selfballoon_min_usable_mb; | ||
106 | |||
107 | /* | ||
108 | * Amount of RAM in MB to add to the target number of pages. | ||
109 | * Can be used to reserve some more room for caches and the like. | ||
110 | */ | ||
111 | static unsigned int selfballoon_reserved_mb; | ||
112 | |||
113 | static void selfballoon_process(struct work_struct *work); | ||
114 | static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); | ||
115 | |||
116 | #ifdef CONFIG_FRONTSWAP | ||
117 | #include <linux/frontswap.h> | ||
118 | |||
119 | /* Enable/disable with sysfs. */ | ||
120 | static bool frontswap_selfshrinking __read_mostly; | ||
121 | |||
122 | /* | ||
123 | * The default values for the following parameters were deemed reasonable | ||
124 | * by experimentation, may be workload-dependent, and can all be | ||
125 | * adjusted via sysfs. | ||
126 | */ | ||
127 | |||
128 | /* Control rate for frontswap shrinking. Higher hysteresis is slower. */ | ||
129 | static unsigned int frontswap_hysteresis __read_mostly = 20; | ||
130 | |||
131 | /* | ||
132 | * Number of selfballoon worker invocations to wait before observing that | ||
133 | * frontswap selfshrinking should commence. Note that selfshrinking does | ||
134 | * not use a separate worker thread. | ||
135 | */ | ||
136 | static unsigned int frontswap_inertia __read_mostly = 3; | ||
137 | |||
138 | /* Countdown to next invocation of frontswap_shrink() */ | ||
139 | static unsigned long frontswap_inertia_counter; | ||
140 | |||
141 | /* | ||
142 | * Invoked by the selfballoon worker thread, uses current number of pages | ||
143 | * in frontswap (frontswap_curr_pages()), previous status, and control | ||
144 | * values (hysteresis and inertia) to determine if frontswap should be | ||
145 | * shrunk and what the new frontswap size should be. Note that | ||
146 | * frontswap_shrink is essentially a partial swapoff that immediately | ||
147 | * transfers pages from the "swap device" (frontswap) back into kernel | ||
148 | * RAM; despite the name, frontswap "shrinking" is very different from | ||
149 | * the "shrinker" interface used by the kernel MM subsystem to reclaim | ||
150 | * memory. | ||
151 | */ | ||
152 | static void frontswap_selfshrink(void) | ||
153 | { | ||
154 | static unsigned long cur_frontswap_pages; | ||
155 | unsigned long last_frontswap_pages; | ||
156 | unsigned long tgt_frontswap_pages; | ||
157 | |||
158 | last_frontswap_pages = cur_frontswap_pages; | ||
159 | cur_frontswap_pages = frontswap_curr_pages(); | ||
160 | if (!cur_frontswap_pages || | ||
161 | (cur_frontswap_pages > last_frontswap_pages)) { | ||
162 | frontswap_inertia_counter = frontswap_inertia; | ||
163 | return; | ||
164 | } | ||
165 | if (frontswap_inertia_counter && --frontswap_inertia_counter) | ||
166 | return; | ||
167 | if (cur_frontswap_pages <= frontswap_hysteresis) | ||
168 | tgt_frontswap_pages = 0; | ||
169 | else | ||
170 | tgt_frontswap_pages = cur_frontswap_pages - | ||
171 | (cur_frontswap_pages / frontswap_hysteresis); | ||
172 | frontswap_shrink(tgt_frontswap_pages); | ||
173 | frontswap_inertia_counter = frontswap_inertia; | ||
174 | } | ||
175 | |||
176 | #endif /* CONFIG_FRONTSWAP */ | ||
177 | |||
178 | #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) | ||
179 | #define PAGES2MB(pages) ((pages) >> (20 - PAGE_SHIFT)) | ||
180 | |||
181 | /* | ||
182 | * Use current balloon size, the goal (vm_committed_as), and hysteresis | ||
183 | * parameters to set a new target balloon size | ||
184 | */ | ||
185 | static void selfballoon_process(struct work_struct *work) | ||
186 | { | ||
187 | unsigned long cur_pages, goal_pages, tgt_pages, floor_pages; | ||
188 | unsigned long useful_pages; | ||
189 | bool reset_timer = false; | ||
190 | |||
191 | if (xen_selfballooning_enabled) { | ||
192 | cur_pages = totalram_pages(); | ||
193 | tgt_pages = cur_pages; /* default is no change */ | ||
194 | goal_pages = vm_memory_committed() + | ||
195 | totalreserve_pages + | ||
196 | MB2PAGES(selfballoon_reserved_mb); | ||
197 | #ifdef CONFIG_FRONTSWAP | ||
198 | /* allow space for frontswap pages to be repatriated */ | ||
199 | if (frontswap_selfshrinking) | ||
200 | goal_pages += frontswap_curr_pages(); | ||
201 | #endif | ||
202 | if (cur_pages > goal_pages) | ||
203 | tgt_pages = cur_pages - | ||
204 | ((cur_pages - goal_pages) / | ||
205 | selfballoon_downhysteresis); | ||
206 | else if (cur_pages < goal_pages) | ||
207 | tgt_pages = cur_pages + | ||
208 | ((goal_pages - cur_pages) / | ||
209 | selfballoon_uphysteresis); | ||
210 | /* else if cur_pages == goal_pages, no change */ | ||
211 | useful_pages = max_pfn - totalreserve_pages; | ||
212 | if (selfballoon_min_usable_mb != 0) | ||
213 | floor_pages = totalreserve_pages + | ||
214 | MB2PAGES(selfballoon_min_usable_mb); | ||
215 | /* piecewise linear function ending in ~3% slope */ | ||
216 | else if (useful_pages < MB2PAGES(16)) | ||
217 | floor_pages = max_pfn; /* not worth ballooning */ | ||
218 | else if (useful_pages < MB2PAGES(64)) | ||
219 | floor_pages = totalreserve_pages + MB2PAGES(16) + | ||
220 | ((useful_pages - MB2PAGES(16)) >> 1); | ||
221 | else if (useful_pages < MB2PAGES(512)) | ||
222 | floor_pages = totalreserve_pages + MB2PAGES(40) + | ||
223 | ((useful_pages - MB2PAGES(40)) >> 3); | ||
224 | else /* useful_pages >= MB2PAGES(512) */ | ||
225 | floor_pages = totalreserve_pages + MB2PAGES(99) + | ||
226 | ((useful_pages - MB2PAGES(99)) >> 5); | ||
227 | if (tgt_pages < floor_pages) | ||
228 | tgt_pages = floor_pages; | ||
229 | balloon_set_new_target(tgt_pages + | ||
230 | balloon_stats.current_pages - totalram_pages()); | ||
231 | reset_timer = true; | ||
232 | } | ||
233 | #ifdef CONFIG_FRONTSWAP | ||
234 | if (frontswap_selfshrinking) { | ||
235 | frontswap_selfshrink(); | ||
236 | reset_timer = true; | ||
237 | } | ||
238 | #endif | ||
239 | if (reset_timer) | ||
240 | schedule_delayed_work(&selfballoon_worker, | ||
241 | selfballoon_interval * HZ); | ||
242 | } | ||
243 | |||
244 | #ifdef CONFIG_SYSFS | ||
245 | |||
246 | #include <linux/capability.h> | ||
247 | |||
248 | #define SELFBALLOON_SHOW(name, format, args...) \ | ||
249 | static ssize_t show_##name(struct device *dev, \ | ||
250 | struct device_attribute *attr, \ | ||
251 | char *buf) \ | ||
252 | { \ | ||
253 | return sprintf(buf, format, ##args); \ | ||
254 | } | ||
255 | |||
256 | SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); | ||
257 | |||
258 | static ssize_t store_selfballooning(struct device *dev, | ||
259 | struct device_attribute *attr, | ||
260 | const char *buf, | ||
261 | size_t count) | ||
262 | { | ||
263 | bool was_enabled = xen_selfballooning_enabled; | ||
264 | unsigned long tmp; | ||
265 | int err; | ||
266 | |||
267 | if (!capable(CAP_SYS_ADMIN)) | ||
268 | return -EPERM; | ||
269 | |||
270 | err = kstrtoul(buf, 10, &tmp); | ||
271 | if (err) | ||
272 | return err; | ||
273 | if ((tmp != 0) && (tmp != 1)) | ||
274 | return -EINVAL; | ||
275 | |||
276 | xen_selfballooning_enabled = !!tmp; | ||
277 | if (!was_enabled && xen_selfballooning_enabled) | ||
278 | schedule_delayed_work(&selfballoon_worker, | ||
279 | selfballoon_interval * HZ); | ||
280 | |||
281 | return count; | ||
282 | } | ||
283 | |||
284 | static DEVICE_ATTR(selfballooning, S_IRUGO | S_IWUSR, | ||
285 | show_selfballooning, store_selfballooning); | ||
286 | |||
287 | SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); | ||
288 | |||
289 | static ssize_t store_selfballoon_interval(struct device *dev, | ||
290 | struct device_attribute *attr, | ||
291 | const char *buf, | ||
292 | size_t count) | ||
293 | { | ||
294 | unsigned long val; | ||
295 | int err; | ||
296 | |||
297 | if (!capable(CAP_SYS_ADMIN)) | ||
298 | return -EPERM; | ||
299 | err = kstrtoul(buf, 10, &val); | ||
300 | if (err) | ||
301 | return err; | ||
302 | if (val == 0) | ||
303 | return -EINVAL; | ||
304 | selfballoon_interval = val; | ||
305 | return count; | ||
306 | } | ||
307 | |||
308 | static DEVICE_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, | ||
309 | show_selfballoon_interval, store_selfballoon_interval); | ||
310 | |||
311 | SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); | ||
312 | |||
313 | static ssize_t store_selfballoon_downhys(struct device *dev, | ||
314 | struct device_attribute *attr, | ||
315 | const char *buf, | ||
316 | size_t count) | ||
317 | { | ||
318 | unsigned long val; | ||
319 | int err; | ||
320 | |||
321 | if (!capable(CAP_SYS_ADMIN)) | ||
322 | return -EPERM; | ||
323 | err = kstrtoul(buf, 10, &val); | ||
324 | if (err) | ||
325 | return err; | ||
326 | if (val == 0) | ||
327 | return -EINVAL; | ||
328 | selfballoon_downhysteresis = val; | ||
329 | return count; | ||
330 | } | ||
331 | |||
332 | static DEVICE_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, | ||
333 | show_selfballoon_downhys, store_selfballoon_downhys); | ||
334 | |||
335 | |||
336 | SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); | ||
337 | |||
338 | static ssize_t store_selfballoon_uphys(struct device *dev, | ||
339 | struct device_attribute *attr, | ||
340 | const char *buf, | ||
341 | size_t count) | ||
342 | { | ||
343 | unsigned long val; | ||
344 | int err; | ||
345 | |||
346 | if (!capable(CAP_SYS_ADMIN)) | ||
347 | return -EPERM; | ||
348 | err = kstrtoul(buf, 10, &val); | ||
349 | if (err) | ||
350 | return err; | ||
351 | if (val == 0) | ||
352 | return -EINVAL; | ||
353 | selfballoon_uphysteresis = val; | ||
354 | return count; | ||
355 | } | ||
356 | |||
357 | static DEVICE_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, | ||
358 | show_selfballoon_uphys, store_selfballoon_uphys); | ||
359 | |||
360 | SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n", | ||
361 | selfballoon_min_usable_mb); | ||
362 | |||
363 | static ssize_t store_selfballoon_min_usable_mb(struct device *dev, | ||
364 | struct device_attribute *attr, | ||
365 | const char *buf, | ||
366 | size_t count) | ||
367 | { | ||
368 | unsigned long val; | ||
369 | int err; | ||
370 | |||
371 | if (!capable(CAP_SYS_ADMIN)) | ||
372 | return -EPERM; | ||
373 | err = kstrtoul(buf, 10, &val); | ||
374 | if (err) | ||
375 | return err; | ||
376 | if (val == 0) | ||
377 | return -EINVAL; | ||
378 | selfballoon_min_usable_mb = val; | ||
379 | return count; | ||
380 | } | ||
381 | |||
382 | static DEVICE_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR, | ||
383 | show_selfballoon_min_usable_mb, | ||
384 | store_selfballoon_min_usable_mb); | ||
385 | |||
386 | SELFBALLOON_SHOW(selfballoon_reserved_mb, "%d\n", | ||
387 | selfballoon_reserved_mb); | ||
388 | |||
389 | static ssize_t store_selfballoon_reserved_mb(struct device *dev, | ||
390 | struct device_attribute *attr, | ||
391 | const char *buf, | ||
392 | size_t count) | ||
393 | { | ||
394 | unsigned long val; | ||
395 | int err; | ||
396 | |||
397 | if (!capable(CAP_SYS_ADMIN)) | ||
398 | return -EPERM; | ||
399 | err = kstrtoul(buf, 10, &val); | ||
400 | if (err) | ||
401 | return err; | ||
402 | if (val == 0) | ||
403 | return -EINVAL; | ||
404 | selfballoon_reserved_mb = val; | ||
405 | return count; | ||
406 | } | ||
407 | |||
408 | static DEVICE_ATTR(selfballoon_reserved_mb, S_IRUGO | S_IWUSR, | ||
409 | show_selfballoon_reserved_mb, | ||
410 | store_selfballoon_reserved_mb); | ||
411 | |||
412 | |||
413 | #ifdef CONFIG_FRONTSWAP | ||
414 | SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); | ||
415 | |||
416 | static ssize_t store_frontswap_selfshrinking(struct device *dev, | ||
417 | struct device_attribute *attr, | ||
418 | const char *buf, | ||
419 | size_t count) | ||
420 | { | ||
421 | bool was_enabled = frontswap_selfshrinking; | ||
422 | unsigned long tmp; | ||
423 | int err; | ||
424 | |||
425 | if (!capable(CAP_SYS_ADMIN)) | ||
426 | return -EPERM; | ||
427 | err = kstrtoul(buf, 10, &tmp); | ||
428 | if (err) | ||
429 | return err; | ||
430 | if ((tmp != 0) && (tmp != 1)) | ||
431 | return -EINVAL; | ||
432 | frontswap_selfshrinking = !!tmp; | ||
433 | if (!was_enabled && !xen_selfballooning_enabled && | ||
434 | frontswap_selfshrinking) | ||
435 | schedule_delayed_work(&selfballoon_worker, | ||
436 | selfballoon_interval * HZ); | ||
437 | |||
438 | return count; | ||
439 | } | ||
440 | |||
441 | static DEVICE_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, | ||
442 | show_frontswap_selfshrinking, store_frontswap_selfshrinking); | ||
443 | |||
444 | SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); | ||
445 | |||
446 | static ssize_t store_frontswap_inertia(struct device *dev, | ||
447 | struct device_attribute *attr, | ||
448 | const char *buf, | ||
449 | size_t count) | ||
450 | { | ||
451 | unsigned long val; | ||
452 | int err; | ||
453 | |||
454 | if (!capable(CAP_SYS_ADMIN)) | ||
455 | return -EPERM; | ||
456 | err = kstrtoul(buf, 10, &val); | ||
457 | if (err) | ||
458 | return err; | ||
459 | if (val == 0) | ||
460 | return -EINVAL; | ||
461 | frontswap_inertia = val; | ||
462 | frontswap_inertia_counter = val; | ||
463 | return count; | ||
464 | } | ||
465 | |||
466 | static DEVICE_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, | ||
467 | show_frontswap_inertia, store_frontswap_inertia); | ||
468 | |||
469 | SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); | ||
470 | |||
471 | static ssize_t store_frontswap_hysteresis(struct device *dev, | ||
472 | struct device_attribute *attr, | ||
473 | const char *buf, | ||
474 | size_t count) | ||
475 | { | ||
476 | unsigned long val; | ||
477 | int err; | ||
478 | |||
479 | if (!capable(CAP_SYS_ADMIN)) | ||
480 | return -EPERM; | ||
481 | err = kstrtoul(buf, 10, &val); | ||
482 | if (err) | ||
483 | return err; | ||
484 | if (val == 0) | ||
485 | return -EINVAL; | ||
486 | frontswap_hysteresis = val; | ||
487 | return count; | ||
488 | } | ||
489 | |||
490 | static DEVICE_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, | ||
491 | show_frontswap_hysteresis, store_frontswap_hysteresis); | ||
492 | |||
493 | #endif /* CONFIG_FRONTSWAP */ | ||
494 | |||
495 | static struct attribute *selfballoon_attrs[] = { | ||
496 | &dev_attr_selfballooning.attr, | ||
497 | &dev_attr_selfballoon_interval.attr, | ||
498 | &dev_attr_selfballoon_downhysteresis.attr, | ||
499 | &dev_attr_selfballoon_uphysteresis.attr, | ||
500 | &dev_attr_selfballoon_min_usable_mb.attr, | ||
501 | &dev_attr_selfballoon_reserved_mb.attr, | ||
502 | #ifdef CONFIG_FRONTSWAP | ||
503 | &dev_attr_frontswap_selfshrinking.attr, | ||
504 | &dev_attr_frontswap_hysteresis.attr, | ||
505 | &dev_attr_frontswap_inertia.attr, | ||
506 | #endif | ||
507 | NULL | ||
508 | }; | ||
509 | |||
510 | static const struct attribute_group selfballoon_group = { | ||
511 | .name = "selfballoon", | ||
512 | .attrs = selfballoon_attrs | ||
513 | }; | ||
514 | #endif | ||
515 | |||
516 | int register_xen_selfballooning(struct device *dev) | ||
517 | { | ||
518 | int error = -1; | ||
519 | |||
520 | #ifdef CONFIG_SYSFS | ||
521 | error = sysfs_create_group(&dev->kobj, &selfballoon_group); | ||
522 | #endif | ||
523 | return error; | ||
524 | } | ||
525 | EXPORT_SYMBOL(register_xen_selfballooning); | ||
526 | |||
527 | int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink) | ||
528 | { | ||
529 | bool enable = false; | ||
530 | unsigned long reserve_pages; | ||
531 | |||
532 | if (!xen_domain()) | ||
533 | return -ENODEV; | ||
534 | |||
535 | if (xen_initial_domain()) { | ||
536 | pr_info("Xen selfballooning driver disabled for domain0\n"); | ||
537 | return -ENODEV; | ||
538 | } | ||
539 | |||
540 | xen_selfballooning_enabled = tmem_enabled && use_selfballooning; | ||
541 | if (xen_selfballooning_enabled) { | ||
542 | pr_info("Initializing Xen selfballooning driver\n"); | ||
543 | enable = true; | ||
544 | } | ||
545 | #ifdef CONFIG_FRONTSWAP | ||
546 | frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; | ||
547 | if (frontswap_selfshrinking) { | ||
548 | pr_info("Initializing frontswap selfshrinking driver\n"); | ||
549 | enable = true; | ||
550 | } | ||
551 | #endif | ||
552 | if (!enable) | ||
553 | return -ENODEV; | ||
554 | |||
555 | /* | ||
556 | * Give selfballoon_reserved_mb a default value(10% of total ram pages) | ||
557 | * to make selfballoon not so aggressive. | ||
558 | * | ||
559 | * There are mainly two reasons: | ||
560 | * 1) The original goal_page didn't consider some pages used by kernel | ||
561 | * space, like slab pages and memory used by device drivers. | ||
562 | * | ||
563 | * 2) The balloon driver may not give back memory to guest OS fast | ||
564 | * enough when the workload suddenly aquries a lot of physical memory. | ||
565 | * | ||
566 | * In both cases, the guest OS will suffer from memory pressure and | ||
567 | * OOM killer may be triggered. | ||
568 | * By reserving extra 10% of total ram pages, we can keep the system | ||
569 | * much more reliably and response faster in some cases. | ||
570 | */ | ||
571 | if (!selfballoon_reserved_mb) { | ||
572 | reserve_pages = totalram_pages() / 10; | ||
573 | selfballoon_reserved_mb = PAGES2MB(reserve_pages); | ||
574 | } | ||
575 | schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); | ||
576 | |||
577 | return 0; | ||
578 | } | ||
579 | EXPORT_SYMBOL(xen_selfballoon_init); | ||
diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 4914b93a23f2..6fb95aa19405 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h | |||
@@ -27,16 +27,6 @@ void balloon_set_new_target(unsigned long target); | |||
27 | int alloc_xenballooned_pages(int nr_pages, struct page **pages); | 27 | int alloc_xenballooned_pages(int nr_pages, struct page **pages); |
28 | void free_xenballooned_pages(int nr_pages, struct page **pages); | 28 | void free_xenballooned_pages(int nr_pages, struct page **pages); |
29 | 29 | ||
30 | struct device; | ||
31 | #ifdef CONFIG_XEN_SELFBALLOONING | ||
32 | extern int register_xen_selfballooning(struct device *dev); | ||
33 | #else | ||
34 | static inline int register_xen_selfballooning(struct device *dev) | ||
35 | { | ||
36 | return -ENOSYS; | ||
37 | } | ||
38 | #endif | ||
39 | |||
40 | #ifdef CONFIG_XEN_BALLOON | 30 | #ifdef CONFIG_XEN_BALLOON |
41 | void xen_balloon_init(void); | 31 | void xen_balloon_init(void); |
42 | #else | 32 | #else |
diff --git a/include/xen/events.h b/include/xen/events.h index a48897199975..c0e6a0598397 100644 --- a/include/xen/events.h +++ b/include/xen/events.h | |||
@@ -3,6 +3,7 @@ | |||
3 | #define _XEN_EVENTS_H | 3 | #define _XEN_EVENTS_H |
4 | 4 | ||
5 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/irq.h> | ||
6 | #ifdef CONFIG_PCI_MSI | 7 | #ifdef CONFIG_PCI_MSI |
7 | #include <linux/msi.h> | 8 | #include <linux/msi.h> |
8 | #endif | 9 | #endif |
@@ -59,7 +60,7 @@ void evtchn_put(unsigned int evtchn); | |||
59 | 60 | ||
60 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); | 61 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); |
61 | void rebind_evtchn_irq(int evtchn, int irq); | 62 | void rebind_evtchn_irq(int evtchn, int irq); |
62 | int xen_rebind_evtchn_to_cpu(int evtchn, unsigned tcpu); | 63 | int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu); |
63 | 64 | ||
64 | static inline void notify_remote_via_evtchn(int port) | 65 | static inline void notify_remote_via_evtchn(int port) |
65 | { | 66 | { |
diff --git a/include/xen/tmem.h b/include/xen/tmem.h deleted file mode 100644 index c80bafe31f14..000000000000 --- a/include/xen/tmem.h +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _XEN_TMEM_H | ||
3 | #define _XEN_TMEM_H | ||
4 | |||
5 | #include <linux/types.h> | ||
6 | |||
7 | #ifdef CONFIG_XEN_TMEM_MODULE | ||
8 | #define tmem_enabled true | ||
9 | #else | ||
10 | /* defined in drivers/xen/tmem.c */ | ||
11 | extern bool tmem_enabled; | ||
12 | #endif | ||
13 | |||
14 | #ifdef CONFIG_XEN_SELFBALLOONING | ||
15 | extern int xen_selfballoon_init(bool, bool); | ||
16 | #endif | ||
17 | |||
18 | #endif /* _XEN_TMEM_H */ | ||