diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2008-05-26 18:31:27 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2008-05-27 04:11:38 -0400 |
commit | 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 (patch) | |
tree | c6a3b31b7bcbbfb55bb2304d8651abdd28cdad54 /drivers | |
parent | 7d88d32a4670af583c896e5ecd3929b78538ca62 (diff) |
xen: implement save/restore
This patch implements Xen save/restore and migration.
Saving is triggered via xenbus, which is polled in
drivers/xen/manage.c. When a suspend request comes in, the kernel
prepares itself for saving by:
1 - Freeze all processes. This is primarily to prevent any
partially-completed pagetable updates from confusing the suspend
process. If CONFIG_PREEMPT isn't defined, then this isn't necessary.
2 - Suspend xenbus and other devices
3 - Stop_machine, to make sure all the other vcpus are quiescent. The
Xen tools require the domain to run its save off vcpu0.
4 - Within the stop_machine state, it pins any unpinned pgds (under
construction or destruction), performs canonicalizes various other
pieces of state (mostly converting mfns to pfns), and finally
5 - Suspend the domain
Restore reverses the steps used to save the domain, ending when all
the frozen processes are thawed.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/xen/events.c | 83 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 4 | ||||
-rw-r--r-- | drivers/xen/manage.c | 126 |
3 files changed, 197 insertions, 16 deletions
diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 70375a690761..73d78dc9b875 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c | |||
@@ -674,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq) | |||
674 | return ret; | 674 | return ret; |
675 | } | 675 | } |
676 | 676 | ||
677 | static void restore_cpu_virqs(unsigned int cpu) | ||
678 | { | ||
679 | struct evtchn_bind_virq bind_virq; | ||
680 | int virq, irq, evtchn; | ||
681 | |||
682 | for (virq = 0; virq < NR_VIRQS; virq++) { | ||
683 | if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) | ||
684 | continue; | ||
685 | |||
686 | BUG_ON(irq_info[irq].type != IRQT_VIRQ); | ||
687 | BUG_ON(irq_info[irq].index != virq); | ||
688 | |||
689 | /* Get a new binding from Xen. */ | ||
690 | bind_virq.virq = virq; | ||
691 | bind_virq.vcpu = cpu; | ||
692 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, | ||
693 | &bind_virq) != 0) | ||
694 | BUG(); | ||
695 | evtchn = bind_virq.port; | ||
696 | |||
697 | /* Record the new mapping. */ | ||
698 | evtchn_to_irq[evtchn] = irq; | ||
699 | irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); | ||
700 | bind_evtchn_to_cpu(evtchn, cpu); | ||
701 | |||
702 | /* Ready for use. */ | ||
703 | unmask_evtchn(evtchn); | ||
704 | } | ||
705 | } | ||
706 | |||
707 | static void restore_cpu_ipis(unsigned int cpu) | ||
708 | { | ||
709 | struct evtchn_bind_ipi bind_ipi; | ||
710 | int ipi, irq, evtchn; | ||
711 | |||
712 | for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { | ||
713 | if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) | ||
714 | continue; | ||
715 | |||
716 | BUG_ON(irq_info[irq].type != IRQT_IPI); | ||
717 | BUG_ON(irq_info[irq].index != ipi); | ||
718 | |||
719 | /* Get a new binding from Xen. */ | ||
720 | bind_ipi.vcpu = cpu; | ||
721 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, | ||
722 | &bind_ipi) != 0) | ||
723 | BUG(); | ||
724 | evtchn = bind_ipi.port; | ||
725 | |||
726 | /* Record the new mapping. */ | ||
727 | evtchn_to_irq[evtchn] = irq; | ||
728 | irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); | ||
729 | bind_evtchn_to_cpu(evtchn, cpu); | ||
730 | |||
731 | /* Ready for use. */ | ||
732 | unmask_evtchn(evtchn); | ||
733 | |||
734 | } | ||
735 | } | ||
736 | |||
737 | void xen_irq_resume(void) | ||
738 | { | ||
739 | unsigned int cpu, irq, evtchn; | ||
740 | |||
741 | init_evtchn_cpu_bindings(); | ||
742 | |||
743 | /* New event-channel space is not 'live' yet. */ | ||
744 | for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) | ||
745 | mask_evtchn(evtchn); | ||
746 | |||
747 | /* No IRQ <-> event-channel mappings. */ | ||
748 | for (irq = 0; irq < NR_IRQS; irq++) | ||
749 | irq_info[irq].evtchn = 0; /* zap event-channel binding */ | ||
750 | |||
751 | for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) | ||
752 | evtchn_to_irq[evtchn] = -1; | ||
753 | |||
754 | for_each_possible_cpu(cpu) { | ||
755 | restore_cpu_virqs(cpu); | ||
756 | restore_cpu_ipis(cpu); | ||
757 | } | ||
758 | } | ||
759 | |||
677 | static struct irq_chip xen_dynamic_chip __read_mostly = { | 760 | static struct irq_chip xen_dynamic_chip __read_mostly = { |
678 | .name = "xen-dyn", | 761 | .name = "xen-dyn", |
679 | .mask = disable_dynirq, | 762 | .mask = disable_dynirq, |
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 52b6b41b909d..e9e11168616a 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c | |||
@@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) | |||
471 | return 0; | 471 | return 0; |
472 | } | 472 | } |
473 | 473 | ||
474 | static int gnttab_resume(void) | 474 | int gnttab_resume(void) |
475 | { | 475 | { |
476 | if (max_nr_grant_frames() < nr_grant_frames) | 476 | if (max_nr_grant_frames() < nr_grant_frames) |
477 | return -ENOSYS; | 477 | return -ENOSYS; |
478 | return gnttab_map(0, nr_grant_frames - 1); | 478 | return gnttab_map(0, nr_grant_frames - 1); |
479 | } | 479 | } |
480 | 480 | ||
481 | static int gnttab_suspend(void) | 481 | int gnttab_suspend(void) |
482 | { | 482 | { |
483 | arch_gnttab_unmap_shared(shared, nr_grant_frames); | 483 | arch_gnttab_unmap_shared(shared, nr_grant_frames); |
484 | return 0; | 484 | return 0; |
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index aa7af9e6abc0..ba85fa2cff33 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c | |||
@@ -5,21 +5,113 @@ | |||
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/reboot.h> | 6 | #include <linux/reboot.h> |
7 | #include <linux/sysrq.h> | 7 | #include <linux/sysrq.h> |
8 | #include <linux/stop_machine.h> | ||
9 | #include <linux/freezer.h> | ||
8 | 10 | ||
9 | #include <xen/xenbus.h> | 11 | #include <xen/xenbus.h> |
10 | 12 | #include <xen/grant_table.h> | |
11 | #define SHUTDOWN_INVALID -1 | 13 | #include <xen/events.h> |
12 | #define SHUTDOWN_POWEROFF 0 | 14 | #include <xen/hvc-console.h> |
13 | #define SHUTDOWN_SUSPEND 2 | 15 | #include <xen/xen-ops.h> |
14 | /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only | 16 | |
15 | * report a crash, not be instructed to crash! | 17 | #include <asm/xen/hypercall.h> |
16 | * HALT is the same as POWEROFF, as far as we're concerned. The tools use | 18 | #include <asm/xen/page.h> |
17 | * the distinction when we return the reason code to them. | 19 | |
18 | */ | 20 | enum shutdown_state { |
19 | #define SHUTDOWN_HALT 4 | 21 | SHUTDOWN_INVALID = -1, |
22 | SHUTDOWN_POWEROFF = 0, | ||
23 | SHUTDOWN_SUSPEND = 2, | ||
24 | /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only | ||
25 | report a crash, not be instructed to crash! | ||
26 | HALT is the same as POWEROFF, as far as we're concerned. The tools use | ||
27 | the distinction when we return the reason code to them. */ | ||
28 | SHUTDOWN_HALT = 4, | ||
29 | }; | ||
20 | 30 | ||
21 | /* Ignore multiple shutdown requests. */ | 31 | /* Ignore multiple shutdown requests. */ |
22 | static int shutting_down = SHUTDOWN_INVALID; | 32 | static enum shutdown_state shutting_down = SHUTDOWN_INVALID; |
33 | |||
34 | static int xen_suspend(void *data) | ||
35 | { | ||
36 | int *cancelled = data; | ||
37 | |||
38 | BUG_ON(!irqs_disabled()); | ||
39 | |||
40 | load_cr3(swapper_pg_dir); | ||
41 | |||
42 | xen_mm_pin_all(); | ||
43 | gnttab_suspend(); | ||
44 | xen_time_suspend(); | ||
45 | xen_pre_suspend(); | ||
46 | |||
47 | /* | ||
48 | * This hypercall returns 1 if suspend was cancelled | ||
49 | * or the domain was merely checkpointed, and 0 if it | ||
50 | * is resuming in a new domain. | ||
51 | */ | ||
52 | *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); | ||
53 | |||
54 | xen_post_suspend(*cancelled); | ||
55 | xen_time_resume(); | ||
56 | gnttab_resume(); | ||
57 | xen_mm_unpin_all(); | ||
58 | |||
59 | if (!*cancelled) { | ||
60 | xen_irq_resume(); | ||
61 | xen_console_resume(); | ||
62 | } | ||
63 | |||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static void do_suspend(void) | ||
68 | { | ||
69 | int err; | ||
70 | int cancelled = 1; | ||
71 | |||
72 | shutting_down = SHUTDOWN_SUSPEND; | ||
73 | |||
74 | #ifdef CONFIG_PREEMPT | ||
75 | /* If the kernel is preemptible, we need to freeze all the processes | ||
76 | to prevent them from being in the middle of a pagetable update | ||
77 | during suspend. */ | ||
78 | err = freeze_processes(); | ||
79 | if (err) { | ||
80 | printk(KERN_ERR "xen suspend: freeze failed %d\n", err); | ||
81 | return; | ||
82 | } | ||
83 | #endif | ||
84 | |||
85 | err = device_suspend(PMSG_SUSPEND); | ||
86 | if (err) { | ||
87 | printk(KERN_ERR "xen suspend: device_suspend %d\n", err); | ||
88 | goto out; | ||
89 | } | ||
90 | |||
91 | printk("suspending xenbus...\n"); | ||
92 | /* XXX use normal device tree? */ | ||
93 | xenbus_suspend(); | ||
94 | |||
95 | err = stop_machine_run(xen_suspend, &cancelled, 0); | ||
96 | if (err) { | ||
97 | printk(KERN_ERR "failed to start xen_suspend: %d\n", err); | ||
98 | goto out; | ||
99 | } | ||
100 | |||
101 | if (!cancelled) | ||
102 | xenbus_resume(); | ||
103 | else | ||
104 | xenbus_suspend_cancel(); | ||
105 | |||
106 | device_resume(); | ||
107 | |||
108 | |||
109 | out: | ||
110 | #ifdef CONFIG_PREEMPT | ||
111 | thaw_processes(); | ||
112 | #endif | ||
113 | shutting_down = SHUTDOWN_INVALID; | ||
114 | } | ||
23 | 115 | ||
24 | static void shutdown_handler(struct xenbus_watch *watch, | 116 | static void shutdown_handler(struct xenbus_watch *watch, |
25 | const char **vec, unsigned int len) | 117 | const char **vec, unsigned int len) |
@@ -52,11 +144,17 @@ static void shutdown_handler(struct xenbus_watch *watch, | |||
52 | } | 144 | } |
53 | 145 | ||
54 | if (strcmp(str, "poweroff") == 0 || | 146 | if (strcmp(str, "poweroff") == 0 || |
55 | strcmp(str, "halt") == 0) | 147 | strcmp(str, "halt") == 0) { |
148 | shutting_down = SHUTDOWN_POWEROFF; | ||
56 | orderly_poweroff(false); | 149 | orderly_poweroff(false); |
57 | else if (strcmp(str, "reboot") == 0) | 150 | } else if (strcmp(str, "reboot") == 0) { |
151 | shutting_down = SHUTDOWN_POWEROFF; /* ? */ | ||
58 | ctrl_alt_del(); | 152 | ctrl_alt_del(); |
59 | else { | 153 | #ifdef CONFIG_PM_SLEEP |
154 | } else if (strcmp(str, "suspend") == 0) { | ||
155 | do_suspend(); | ||
156 | #endif | ||
157 | } else { | ||
60 | printk(KERN_INFO "Ignoring shutdown request: %s\n", str); | 158 | printk(KERN_INFO "Ignoring shutdown request: %s\n", str); |
61 | shutting_down = SHUTDOWN_INVALID; | 159 | shutting_down = SHUTDOWN_INVALID; |
62 | } | 160 | } |