aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/input/keyboard/atakbd.c157
-rw-r--r--drivers/input/mouse/atarimouse.c18
-rw-r--r--drivers/kvm/Kconfig1
-rw-r--r--drivers/kvm/Makefile2
-rw-r--r--drivers/kvm/i8259.c450
-rw-r--r--drivers/kvm/ioapic.c388
-rw-r--r--drivers/kvm/irq.c98
-rw-r--r--drivers/kvm/irq.h165
-rw-r--r--drivers/kvm/kvm.h201
-rw-r--r--drivers/kvm/kvm_main.c1486
-rw-r--r--drivers/kvm/kvm_svm.h3
-rw-r--r--drivers/kvm/lapic.c1064
-rw-r--r--drivers/kvm/mmu.c51
-rw-r--r--drivers/kvm/paging_tmpl.h84
-rw-r--r--drivers/kvm/svm.c1046
-rw-r--r--drivers/kvm/vmx.c1034
-rw-r--r--drivers/kvm/vmx.h73
-rw-r--r--drivers/kvm/x86_emulate.c411
-rw-r--r--drivers/kvm/x86_emulate.h20
-rw-r--r--drivers/md/dm-emc.c2
-rw-r--r--drivers/media/video/videobuf-core.c2
-rw-r--r--drivers/media/video/videobuf-dma-sg.c2
-rw-r--r--drivers/media/video/videobuf-vmalloc.c2
-rw-r--r--drivers/mtd/Kconfig8
-rw-r--r--drivers/mtd/Makefile1
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c38
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c2
-rw-r--r--drivers/mtd/chips/jedec_probe.c37
-rw-r--r--drivers/mtd/devices/Kconfig25
-rw-r--r--drivers/mtd/devices/Makefile1
-rw-r--r--drivers/mtd/devices/at91_dataflash26.c485
-rw-r--r--drivers/mtd/devices/docprobe.c4
-rw-r--r--drivers/mtd/devices/m25p80.c271
-rw-r--r--drivers/mtd/devices/mtd_dataflash.c17
-rw-r--r--drivers/mtd/devices/pmc551.c27
-rw-r--r--drivers/mtd/inftlmount.c3
-rw-r--r--drivers/mtd/maps/Kconfig43
-rw-r--r--drivers/mtd/maps/Makefile6
-rw-r--r--drivers/mtd/maps/alchemy-flash.c14
-rw-r--r--drivers/mtd/maps/intel_vr_nor.c298
-rw-r--r--drivers/mtd/maps/lubbock-flash.c168
-rw-r--r--drivers/mtd/maps/mainstone-flash.c180
-rw-r--r--drivers/mtd/maps/nettel.c65
-rw-r--r--drivers/mtd/maps/ocelot.c175
-rw-r--r--drivers/mtd/maps/physmap_of.c1
-rw-r--r--drivers/mtd/maps/pmcmsp-flash.c22
-rw-r--r--drivers/mtd/maps/pmcmsp-ramroot.c1
-rw-r--r--drivers/mtd/maps/pq2fads.c88
-rw-r--r--drivers/mtd/maps/pxa2xx-flash.c200
-rw-r--r--drivers/mtd/maps/tqm834x.c286
-rw-r--r--drivers/mtd/mtd_blkdevs.c7
-rw-r--r--drivers/mtd/mtdchar.c3
-rw-r--r--drivers/mtd/mtdconcat.c2
-rw-r--r--drivers/mtd/mtdcore.c2
-rw-r--r--drivers/mtd/mtdcore.h11
-rw-r--r--drivers/mtd/mtdoops.c376
-rw-r--r--drivers/mtd/nand/Kconfig31
-rw-r--r--drivers/mtd/nand/Makefile2
-rw-r--r--drivers/mtd/nand/alauda.c742
-rw-r--r--drivers/mtd/nand/bf5xx_nand.c788
-rw-r--r--drivers/mtd/nand/cafe_nand.c51
-rw-r--r--drivers/mtd/nand/diskonchip.c2
-rw-r--r--drivers/mtd/nand/excite_nandflash.c1
-rw-r--r--drivers/mtd/nand/nand_base.c9
-rw-r--r--drivers/mtd/nand/nand_ids.c1
-rw-r--r--drivers/mtd/nand/nandsim.c8
-rw-r--r--drivers/mtd/nand/ndfc.c8
-rw-r--r--drivers/mtd/nand/s3c2410.c4
-rw-r--r--drivers/mtd/onenand/Kconfig23
-rw-r--r--drivers/mtd/onenand/Makefile3
-rw-r--r--drivers/mtd/onenand/onenand_base.c665
-rw-r--r--drivers/mtd/onenand/onenand_sim.c495
-rw-r--r--drivers/mtd/rfd_ftl.c8
-rw-r--r--drivers/mtd/ubi/scan.c17
-rw-r--r--drivers/net/atarilance.c2
-rw-r--r--drivers/net/macmace.c6
-rw-r--r--drivers/net/mv643xx_eth.c2
-rw-r--r--drivers/net/mvme147.c1
-rw-r--r--drivers/net/wireless/b43/phy.c1
-rw-r--r--drivers/net/wireless/b43/pio.h1
-rw-r--r--drivers/net/wireless/b43/sysfs.c5
-rw-r--r--drivers/rtc/rtc-sh.c51
-rw-r--r--drivers/serial/sh-sci.c39
-rw-r--r--drivers/serial/sh-sci.h34
-rw-r--r--drivers/sh/Makefile4
-rw-r--r--drivers/sh/maple/Makefile3
-rw-r--r--drivers/sh/maple/maple.c735
-rw-r--r--drivers/ssb/main.c1
-rw-r--r--drivers/video/backlight/hp680_bl.c4
-rw-r--r--drivers/video/pvr2fb.c4
90 files changed, 9582 insertions, 3796 deletions
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index ded1d6ac6ff3..f948d3a14a93 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -55,7 +55,140 @@ MODULE_AUTHOR("Michael Schmitz <schmitz@biophys.uni-duesseldorf.de>");
55MODULE_DESCRIPTION("Atari keyboard driver"); 55MODULE_DESCRIPTION("Atari keyboard driver");
56MODULE_LICENSE("GPL"); 56MODULE_LICENSE("GPL");
57 57
58static unsigned char atakbd_keycode[0x72]; 58/*
59 0x47: KP_7 71
60 0x48: KP_8 72
61 0x49: KP_9 73
62 0x62: KP_/ 98
63 0x4b: KP_4 75
64 0x4c: KP_5 76
65 0x4d: KP_6 77
66 0x37: KP_* 55
67 0x4f: KP_1 79
68 0x50: KP_2 80
69 0x51: KP_3 81
70 0x4a: KP_- 74
71 0x52: KP_0 82
72 0x53: KP_. 83
73 0x4e: KP_+ 78
74
75 0x67: Up 103
76 0x6c: Down 108
77 0x69: Left 105
78 0x6a: Right 106
79 */
80
81
82static unsigned char atakbd_keycode[0x72] = { /* American layout */
83 [0] = KEY_GRAVE,
84 [1] = KEY_ESC,
85 [2] = KEY_1,
86 [3] = KEY_2,
87 [4] = KEY_3,
88 [5] = KEY_4,
89 [6] = KEY_5,
90 [7] = KEY_6,
91 [8] = KEY_7,
92 [9] = KEY_8,
93 [10] = KEY_9,
94 [11] = KEY_0,
95 [12] = KEY_MINUS,
96 [13] = KEY_EQUAL,
97 [14] = KEY_BACKSPACE,
98 [15] = KEY_TAB,
99 [16] = KEY_Q,
100 [17] = KEY_W,
101 [18] = KEY_E,
102 [19] = KEY_R,
103 [20] = KEY_T,
104 [21] = KEY_Y,
105 [22] = KEY_U,
106 [23] = KEY_I,
107 [24] = KEY_O,
108 [25] = KEY_P,
109 [26] = KEY_LEFTBRACE,
110 [27] = KEY_RIGHTBRACE,
111 [28] = KEY_ENTER,
112 [29] = KEY_LEFTCTRL,
113 [30] = KEY_A,
114 [31] = KEY_S,
115 [32] = KEY_D,
116 [33] = KEY_F,
117 [34] = KEY_G,
118 [35] = KEY_H,
119 [36] = KEY_J,
120 [37] = KEY_K,
121 [38] = KEY_L,
122 [39] = KEY_SEMICOLON,
123 [40] = KEY_APOSTROPHE,
124 [41] = KEY_BACKSLASH, /* FIXME, '#' */
125 [42] = KEY_LEFTSHIFT,
126 [43] = KEY_GRAVE, /* FIXME: '~' */
127 [44] = KEY_Z,
128 [45] = KEY_X,
129 [46] = KEY_C,
130 [47] = KEY_V,
131 [48] = KEY_B,
132 [49] = KEY_N,
133 [50] = KEY_M,
134 [51] = KEY_COMMA,
135 [52] = KEY_DOT,
136 [53] = KEY_SLASH,
137 [54] = KEY_RIGHTSHIFT,
138 [55] = KEY_KPASTERISK,
139 [56] = KEY_LEFTALT,
140 [57] = KEY_SPACE,
141 [58] = KEY_CAPSLOCK,
142 [59] = KEY_F1,
143 [60] = KEY_F2,
144 [61] = KEY_F3,
145 [62] = KEY_F4,
146 [63] = KEY_F5,
147 [64] = KEY_F6,
148 [65] = KEY_F7,
149 [66] = KEY_F8,
150 [67] = KEY_F9,
151 [68] = KEY_F10,
152 [69] = KEY_ESC,
153 [70] = KEY_DELETE,
154 [71] = KEY_KP7,
155 [72] = KEY_KP8,
156 [73] = KEY_KP9,
157 [74] = KEY_KPMINUS,
158 [75] = KEY_KP4,
159 [76] = KEY_KP5,
160 [77] = KEY_KP6,
161 [78] = KEY_KPPLUS,
162 [79] = KEY_KP1,
163 [80] = KEY_KP2,
164 [81] = KEY_KP3,
165 [82] = KEY_KP0,
166 [83] = KEY_KPDOT,
167 [90] = KEY_KPLEFTPAREN,
168 [91] = KEY_KPRIGHTPAREN,
169 [92] = KEY_KPASTERISK, /* FIXME */
170 [93] = KEY_KPASTERISK,
171 [94] = KEY_KPPLUS,
172 [95] = KEY_HELP,
173 [96] = KEY_BACKSLASH, /* FIXME: '<' */
174 [97] = KEY_KPASTERISK, /* FIXME */
175 [98] = KEY_KPSLASH,
176 [99] = KEY_KPLEFTPAREN,
177 [100] = KEY_KPRIGHTPAREN,
178 [101] = KEY_KPSLASH,
179 [102] = KEY_KPASTERISK,
180 [103] = KEY_UP,
181 [104] = KEY_KPASTERISK, /* FIXME */
182 [105] = KEY_LEFT,
183 [106] = KEY_RIGHT,
184 [107] = KEY_KPASTERISK, /* FIXME */
185 [108] = KEY_DOWN,
186 [109] = KEY_KPASTERISK, /* FIXME */
187 [110] = KEY_KPASTERISK, /* FIXME */
188 [111] = KEY_KPASTERISK, /* FIXME */
189 [112] = KEY_KPASTERISK, /* FIXME */
190 [113] = KEY_KPASTERISK /* FIXME */
191};
59 192
60static struct input_dev *atakbd_dev; 193static struct input_dev *atakbd_dev;
61 194
@@ -86,21 +219,20 @@ static int __init atakbd_init(void)
86{ 219{
87 int i; 220 int i;
88 221
89 if (!ATARIHW_PRESENT(ST_MFP)) 222 if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ST_MFP))
90 return -EIO; 223 return -EIO;
91 224
92 // TODO: request_mem_region if not done in arch code
93
94 if (!(atakbd_dev = input_allocate_device()))
95 return -ENOMEM;
96
97 // need to init core driver if not already done so 225 // need to init core driver if not already done so
98 if (atari_keyb_init()) 226 if (atari_keyb_init())
99 return -ENODEV; 227 return -ENODEV;
100 228
229 atakbd_dev = input_allocate_device();
230 if (!atakbd_dev)
231 return -ENOMEM;
232
101 atakbd_dev->name = "Atari Keyboard"; 233 atakbd_dev->name = "Atari Keyboard";
102 atakbd_dev->phys = "atakbd/input0"; 234 atakbd_dev->phys = "atakbd/input0";
103 atakbd_dev->id.bustype = BUS_ATARI; 235 atakbd_dev->id.bustype = BUS_HOST;
104 atakbd_dev->id.vendor = 0x0001; 236 atakbd_dev->id.vendor = 0x0001;
105 atakbd_dev->id.product = 0x0001; 237 atakbd_dev->id.product = 0x0001;
106 atakbd_dev->id.version = 0x0100; 238 atakbd_dev->id.version = 0x0100;
@@ -111,16 +243,17 @@ static int __init atakbd_init(void)
111 atakbd_dev->keycodemax = ARRAY_SIZE(atakbd_keycode); 243 atakbd_dev->keycodemax = ARRAY_SIZE(atakbd_keycode);
112 244
113 for (i = 1; i < 0x72; i++) { 245 for (i = 1; i < 0x72; i++) {
114 atakbd_keycode[i] = i;
115 set_bit(atakbd_keycode[i], atakbd_dev->keybit); 246 set_bit(atakbd_keycode[i], atakbd_dev->keybit);
116 } 247 }
117 248
118 input_register_device(atakbd_dev); 249 /* error check */
250 if (input_register_device(atakbd_dev)) {
251 input_free_device(atakbd_dev);
252 return -ENOMEM;
253 }
119 254
120 atari_input_keyboard_interrupt_hook = atakbd_interrupt; 255 atari_input_keyboard_interrupt_hook = atakbd_interrupt;
121 256
122 printk(KERN_INFO "input: %s at IKBD ACIA\n", atakbd_dev->name);
123
124 return 0; 257 return 0;
125} 258}
126 259
diff --git a/drivers/input/mouse/atarimouse.c b/drivers/input/mouse/atarimouse.c
index 43ab6566fb65..c8c7244b48a1 100644
--- a/drivers/input/mouse/atarimouse.c
+++ b/drivers/input/mouse/atarimouse.c
@@ -73,14 +73,11 @@ static void atamouse_interrupt(char *buf)
73{ 73{
74 int buttons, dx, dy; 74 int buttons, dx, dy;
75 75
76/* ikbd_mouse_disable(); */
77
78 buttons = (buf[0] & 1) | ((buf[0] & 2) << 1); 76 buttons = (buf[0] & 1) | ((buf[0] & 2) << 1);
79#ifdef FIXED_ATARI_JOYSTICK 77#ifdef FIXED_ATARI_JOYSTICK
80 buttons |= atari_mouse_buttons & 2; 78 buttons |= atari_mouse_buttons & 2;
81 atari_mouse_buttons = buttons; 79 atari_mouse_buttons = buttons;
82#endif 80#endif
83/* ikbd_mouse_rel_pos(); */
84 81
85 /* only relative events get here */ 82 /* only relative events get here */
86 dx = buf[1]; 83 dx = buf[1];
@@ -126,15 +123,16 @@ static int __init atamouse_init(void)
126 if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ST_MFP)) 123 if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ST_MFP))
127 return -ENODEV; 124 return -ENODEV;
128 125
129 if (!(atamouse_dev = input_allocate_device()))
130 return -ENOMEM;
131
132 if (!(atari_keyb_init())) 126 if (!(atari_keyb_init()))
133 return -ENODEV; 127 return -ENODEV;
134 128
129 atamouse_dev = input_allocate_device();
130 if (!atamouse_dev)
131 return -ENOMEM;
132
135 atamouse_dev->name = "Atari mouse"; 133 atamouse_dev->name = "Atari mouse";
136 atamouse_dev->phys = "atamouse/input0"; 134 atamouse_dev->phys = "atamouse/input0";
137 atamouse_dev->id.bustype = BUS_ATARI; 135 atamouse_dev->id.bustype = BUS_HOST;
138 atamouse_dev->id.vendor = 0x0001; 136 atamouse_dev->id.vendor = 0x0001;
139 atamouse_dev->id.product = 0x0002; 137 atamouse_dev->id.product = 0x0002;
140 atamouse_dev->id.version = 0x0100; 138 atamouse_dev->id.version = 0x0100;
@@ -145,9 +143,11 @@ static int __init atamouse_init(void)
145 atamouse_dev->open = atamouse_open; 143 atamouse_dev->open = atamouse_open;
146 atamouse_dev->close = atamouse_close; 144 atamouse_dev->close = atamouse_close;
147 145
148 input_register_device(atamouse_dev); 146 if (input_register_device(atamouse_dev)) {
147 input_free_device(atamouse_dev);
148 return -ENOMEM;
149 }
149 150
150 printk(KERN_INFO "input: %s at keyboard ACIA\n", atamouse_dev->name);
151 return 0; 151 return 0;
152} 152}
153 153
diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig
index 0a419a0de603..8749fa4ffcee 100644
--- a/drivers/kvm/Kconfig
+++ b/drivers/kvm/Kconfig
@@ -17,6 +17,7 @@ if VIRTUALIZATION
17config KVM 17config KVM
18 tristate "Kernel-based Virtual Machine (KVM) support" 18 tristate "Kernel-based Virtual Machine (KVM) support"
19 depends on X86 && EXPERIMENTAL 19 depends on X86 && EXPERIMENTAL
20 select PREEMPT_NOTIFIERS
20 select ANON_INODES 21 select ANON_INODES
21 ---help--- 22 ---help---
22 Support hosting fully virtualized guest machines using hardware 23 Support hosting fully virtualized guest machines using hardware
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index c0a789fa9d65..e5a8f4d3e973 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
2# Makefile for Kernel-based Virtual Machine module 2# Makefile for Kernel-based Virtual Machine module
3# 3#
4 4
5kvm-objs := kvm_main.o mmu.o x86_emulate.o 5kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
6obj-$(CONFIG_KVM) += kvm.o 6obj-$(CONFIG_KVM) += kvm.o
7kvm-intel-objs = vmx.o 7kvm-intel-objs = vmx.o
8obj-$(CONFIG_KVM_INTEL) += kvm-intel.o 8obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c
new file mode 100644
index 000000000000..a679157bc599
--- /dev/null
+++ b/drivers/kvm/i8259.c
@@ -0,0 +1,450 @@
1/*
2 * 8259 interrupt controller emulation
3 *
4 * Copyright (c) 2003-2004 Fabrice Bellard
5 * Copyright (c) 2007 Intel Corporation
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 * Authors:
25 * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
26 * Port from Qemu.
27 */
28#include <linux/mm.h>
29#include "irq.h"
30
31/*
32 * set irq level. If an edge is detected, then the IRR is set to 1
33 */
34static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level)
35{
36 int mask;
37 mask = 1 << irq;
38 if (s->elcr & mask) /* level triggered */
39 if (level) {
40 s->irr |= mask;
41 s->last_irr |= mask;
42 } else {
43 s->irr &= ~mask;
44 s->last_irr &= ~mask;
45 }
46 else /* edge triggered */
47 if (level) {
48 if ((s->last_irr & mask) == 0)
49 s->irr |= mask;
50 s->last_irr |= mask;
51 } else
52 s->last_irr &= ~mask;
53}
54
55/*
56 * return the highest priority found in mask (highest = smallest
57 * number). Return 8 if no irq
58 */
59static inline int get_priority(struct kvm_kpic_state *s, int mask)
60{
61 int priority;
62 if (mask == 0)
63 return 8;
64 priority = 0;
65 while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0)
66 priority++;
67 return priority;
68}
69
70/*
71 * return the pic wanted interrupt. return -1 if none
72 */
73static int pic_get_irq(struct kvm_kpic_state *s)
74{
75 int mask, cur_priority, priority;
76
77 mask = s->irr & ~s->imr;
78 priority = get_priority(s, mask);
79 if (priority == 8)
80 return -1;
81 /*
82 * compute current priority. If special fully nested mode on the
83 * master, the IRQ coming from the slave is not taken into account
84 * for the priority computation.
85 */
86 mask = s->isr;
87 if (s->special_fully_nested_mode && s == &s->pics_state->pics[0])
88 mask &= ~(1 << 2);
89 cur_priority = get_priority(s, mask);
90 if (priority < cur_priority)
91 /*
92 * higher priority found: an irq should be generated
93 */
94 return (priority + s->priority_add) & 7;
95 else
96 return -1;
97}
98
99/*
100 * raise irq to CPU if necessary. must be called every time the active
101 * irq may change
102 */
103static void pic_update_irq(struct kvm_pic *s)
104{
105 int irq2, irq;
106
107 irq2 = pic_get_irq(&s->pics[1]);
108 if (irq2 >= 0) {
109 /*
110 * if irq request by slave pic, signal master PIC
111 */
112 pic_set_irq1(&s->pics[0], 2, 1);
113 pic_set_irq1(&s->pics[0], 2, 0);
114 }
115 irq = pic_get_irq(&s->pics[0]);
116 if (irq >= 0)
117 s->irq_request(s->irq_request_opaque, 1);
118 else
119 s->irq_request(s->irq_request_opaque, 0);
120}
121
122void kvm_pic_update_irq(struct kvm_pic *s)
123{
124 pic_update_irq(s);
125}
126
127void kvm_pic_set_irq(void *opaque, int irq, int level)
128{
129 struct kvm_pic *s = opaque;
130
131 pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
132 pic_update_irq(s);
133}
134
135/*
136 * acknowledge interrupt 'irq'
137 */
138static inline void pic_intack(struct kvm_kpic_state *s, int irq)
139{
140 if (s->auto_eoi) {
141 if (s->rotate_on_auto_eoi)
142 s->priority_add = (irq + 1) & 7;
143 } else
144 s->isr |= (1 << irq);
145 /*
146 * We don't clear a level sensitive interrupt here
147 */
148 if (!(s->elcr & (1 << irq)))
149 s->irr &= ~(1 << irq);
150}
151
152int kvm_pic_read_irq(struct kvm_pic *s)
153{
154 int irq, irq2, intno;
155
156 irq = pic_get_irq(&s->pics[0]);
157 if (irq >= 0) {
158 pic_intack(&s->pics[0], irq);
159 if (irq == 2) {
160 irq2 = pic_get_irq(&s->pics[1]);
161 if (irq2 >= 0)
162 pic_intack(&s->pics[1], irq2);
163 else
164 /*
165 * spurious IRQ on slave controller
166 */
167 irq2 = 7;
168 intno = s->pics[1].irq_base + irq2;
169 irq = irq2 + 8;
170 } else
171 intno = s->pics[0].irq_base + irq;
172 } else {
173 /*
174 * spurious IRQ on host controller
175 */
176 irq = 7;
177 intno = s->pics[0].irq_base + irq;
178 }
179 pic_update_irq(s);
180
181 return intno;
182}
183
184static void pic_reset(void *opaque)
185{
186 struct kvm_kpic_state *s = opaque;
187
188 s->last_irr = 0;
189 s->irr = 0;
190 s->imr = 0;
191 s->isr = 0;
192 s->priority_add = 0;
193 s->irq_base = 0;
194 s->read_reg_select = 0;
195 s->poll = 0;
196 s->special_mask = 0;
197 s->init_state = 0;
198 s->auto_eoi = 0;
199 s->rotate_on_auto_eoi = 0;
200 s->special_fully_nested_mode = 0;
201 s->init4 = 0;
202}
203
204static void pic_ioport_write(void *opaque, u32 addr, u32 val)
205{
206 struct kvm_kpic_state *s = opaque;
207 int priority, cmd, irq;
208
209 addr &= 1;
210 if (addr == 0) {
211 if (val & 0x10) {
212 pic_reset(s); /* init */
213 /*
214 * deassert a pending interrupt
215 */
216 s->pics_state->irq_request(s->pics_state->
217 irq_request_opaque, 0);
218 s->init_state = 1;
219 s->init4 = val & 1;
220 if (val & 0x02)
221 printk(KERN_ERR "single mode not supported");
222 if (val & 0x08)
223 printk(KERN_ERR
224 "level sensitive irq not supported");
225 } else if (val & 0x08) {
226 if (val & 0x04)
227 s->poll = 1;
228 if (val & 0x02)
229 s->read_reg_select = val & 1;
230 if (val & 0x40)
231 s->special_mask = (val >> 5) & 1;
232 } else {
233 cmd = val >> 5;
234 switch (cmd) {
235 case 0:
236 case 4:
237 s->rotate_on_auto_eoi = cmd >> 2;
238 break;
239 case 1: /* end of interrupt */
240 case 5:
241 priority = get_priority(s, s->isr);
242 if (priority != 8) {
243 irq = (priority + s->priority_add) & 7;
244 s->isr &= ~(1 << irq);
245 if (cmd == 5)
246 s->priority_add = (irq + 1) & 7;
247 pic_update_irq(s->pics_state);
248 }
249 break;
250 case 3:
251 irq = val & 7;
252 s->isr &= ~(1 << irq);
253 pic_update_irq(s->pics_state);
254 break;
255 case 6:
256 s->priority_add = (val + 1) & 7;
257 pic_update_irq(s->pics_state);
258 break;
259 case 7:
260 irq = val & 7;
261 s->isr &= ~(1 << irq);
262 s->priority_add = (irq + 1) & 7;
263 pic_update_irq(s->pics_state);
264 break;
265 default:
266 break; /* no operation */
267 }
268 }
269 } else
270 switch (s->init_state) {
271 case 0: /* normal mode */
272 s->imr = val;
273 pic_update_irq(s->pics_state);
274 break;
275 case 1:
276 s->irq_base = val & 0xf8;
277 s->init_state = 2;
278 break;
279 case 2:
280 if (s->init4)
281 s->init_state = 3;
282 else
283 s->init_state = 0;
284 break;
285 case 3:
286 s->special_fully_nested_mode = (val >> 4) & 1;
287 s->auto_eoi = (val >> 1) & 1;
288 s->init_state = 0;
289 break;
290 }
291}
292
293static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
294{
295 int ret;
296
297 ret = pic_get_irq(s);
298 if (ret >= 0) {
299 if (addr1 >> 7) {
300 s->pics_state->pics[0].isr &= ~(1 << 2);
301 s->pics_state->pics[0].irr &= ~(1 << 2);
302 }
303 s->irr &= ~(1 << ret);
304 s->isr &= ~(1 << ret);
305 if (addr1 >> 7 || ret != 2)
306 pic_update_irq(s->pics_state);
307 } else {
308 ret = 0x07;
309 pic_update_irq(s->pics_state);
310 }
311
312 return ret;
313}
314
315static u32 pic_ioport_read(void *opaque, u32 addr1)
316{
317 struct kvm_kpic_state *s = opaque;
318 unsigned int addr;
319 int ret;
320
321 addr = addr1;
322 addr &= 1;
323 if (s->poll) {
324 ret = pic_poll_read(s, addr1);
325 s->poll = 0;
326 } else
327 if (addr == 0)
328 if (s->read_reg_select)
329 ret = s->isr;
330 else
331 ret = s->irr;
332 else
333 ret = s->imr;
334 return ret;
335}
336
337static void elcr_ioport_write(void *opaque, u32 addr, u32 val)
338{
339 struct kvm_kpic_state *s = opaque;
340 s->elcr = val & s->elcr_mask;
341}
342
343static u32 elcr_ioport_read(void *opaque, u32 addr1)
344{
345 struct kvm_kpic_state *s = opaque;
346 return s->elcr;
347}
348
349static int picdev_in_range(struct kvm_io_device *this, gpa_t addr)
350{
351 switch (addr) {
352 case 0x20:
353 case 0x21:
354 case 0xa0:
355 case 0xa1:
356 case 0x4d0:
357 case 0x4d1:
358 return 1;
359 default:
360 return 0;
361 }
362}
363
364static void picdev_write(struct kvm_io_device *this,
365 gpa_t addr, int len, const void *val)
366{
367 struct kvm_pic *s = this->private;
368 unsigned char data = *(unsigned char *)val;
369
370 if (len != 1) {
371 if (printk_ratelimit())
372 printk(KERN_ERR "PIC: non byte write\n");
373 return;
374 }
375 switch (addr) {
376 case 0x20:
377 case 0x21:
378 case 0xa0:
379 case 0xa1:
380 pic_ioport_write(&s->pics[addr >> 7], addr, data);
381 break;
382 case 0x4d0:
383 case 0x4d1:
384 elcr_ioport_write(&s->pics[addr & 1], addr, data);
385 break;
386 }
387}
388
389static void picdev_read(struct kvm_io_device *this,
390 gpa_t addr, int len, void *val)
391{
392 struct kvm_pic *s = this->private;
393 unsigned char data = 0;
394
395 if (len != 1) {
396 if (printk_ratelimit())
397 printk(KERN_ERR "PIC: non byte read\n");
398 return;
399 }
400 switch (addr) {
401 case 0x20:
402 case 0x21:
403 case 0xa0:
404 case 0xa1:
405 data = pic_ioport_read(&s->pics[addr >> 7], addr);
406 break;
407 case 0x4d0:
408 case 0x4d1:
409 data = elcr_ioport_read(&s->pics[addr & 1], addr);
410 break;
411 }
412 *(unsigned char *)val = data;
413}
414
415/*
416 * callback when PIC0 irq status changed
417 */
418static void pic_irq_request(void *opaque, int level)
419{
420 struct kvm *kvm = opaque;
421 struct kvm_vcpu *vcpu = kvm->vcpus[0];
422
423 pic_irqchip(kvm)->output = level;
424 if (vcpu)
425 kvm_vcpu_kick(vcpu);
426}
427
428struct kvm_pic *kvm_create_pic(struct kvm *kvm)
429{
430 struct kvm_pic *s;
431 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
432 if (!s)
433 return NULL;
434 s->pics[0].elcr_mask = 0xf8;
435 s->pics[1].elcr_mask = 0xde;
436 s->irq_request = pic_irq_request;
437 s->irq_request_opaque = kvm;
438 s->pics[0].pics_state = s;
439 s->pics[1].pics_state = s;
440
441 /*
442 * Initialize PIO device
443 */
444 s->dev.read = picdev_read;
445 s->dev.write = picdev_write;
446 s->dev.in_range = picdev_in_range;
447 s->dev.private = s;
448 kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
449 return s;
450}
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
new file mode 100644
index 000000000000..c7992e667fdb
--- /dev/null
+++ b/drivers/kvm/ioapic.c
@@ -0,0 +1,388 @@
1/*
2 * Copyright (C) 2001 MandrakeSoft S.A.
3 *
4 * MandrakeSoft S.A.
5 * 43, rue d'Aboukir
6 * 75002 Paris - France
7 * http://www.linux-mandrake.com/
8 * http://www.mandrakesoft.com/
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
24 * Yunhong Jiang <yunhong.jiang@intel.com>
25 * Yaozu (Eddie) Dong <eddie.dong@intel.com>
26 * Based on Xen 3.1 code.
27 */
28
29#include "kvm.h"
30#include <linux/kvm.h>
31#include <linux/mm.h>
32#include <linux/highmem.h>
33#include <linux/smp.h>
34#include <linux/hrtimer.h>
35#include <linux/io.h>
36#include <asm/processor.h>
37#include <asm/msr.h>
38#include <asm/page.h>
39#include <asm/current.h>
40#include <asm/apicdef.h>
41#include <asm/io_apic.h>
42#include "irq.h"
43/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
44#define ioapic_debug(fmt, arg...)
45static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
46
47static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
48 unsigned long addr,
49 unsigned long length)
50{
51 unsigned long result = 0;
52
53 switch (ioapic->ioregsel) {
54 case IOAPIC_REG_VERSION:
55 result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
56 | (IOAPIC_VERSION_ID & 0xff));
57 break;
58
59 case IOAPIC_REG_APIC_ID:
60 case IOAPIC_REG_ARB_ID:
61 result = ((ioapic->id & 0xf) << 24);
62 break;
63
64 default:
65 {
66 u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
67 u64 redir_content;
68
69 ASSERT(redir_index < IOAPIC_NUM_PINS);
70
71 redir_content = ioapic->redirtbl[redir_index].bits;
72 result = (ioapic->ioregsel & 0x1) ?
73 (redir_content >> 32) & 0xffffffff :
74 redir_content & 0xffffffff;
75 break;
76 }
77 }
78
79 return result;
80}
81
82static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
83{
84 union ioapic_redir_entry *pent;
85
86 pent = &ioapic->redirtbl[idx];
87
88 if (!pent->fields.mask) {
89 ioapic_deliver(ioapic, idx);
90 if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
91 pent->fields.remote_irr = 1;
92 }
93 if (!pent->fields.trig_mode)
94 ioapic->irr &= ~(1 << idx);
95}
96
97static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
98{
99 unsigned index;
100
101 switch (ioapic->ioregsel) {
102 case IOAPIC_REG_VERSION:
103 /* Writes are ignored. */
104 break;
105
106 case IOAPIC_REG_APIC_ID:
107 ioapic->id = (val >> 24) & 0xf;
108 break;
109
110 case IOAPIC_REG_ARB_ID:
111 break;
112
113 default:
114 index = (ioapic->ioregsel - 0x10) >> 1;
115
116 ioapic_debug("change redir index %x val %x", index, val);
117 if (index >= IOAPIC_NUM_PINS)
118 return;
119 if (ioapic->ioregsel & 1) {
120 ioapic->redirtbl[index].bits &= 0xffffffff;
121 ioapic->redirtbl[index].bits |= (u64) val << 32;
122 } else {
123 ioapic->redirtbl[index].bits &= ~0xffffffffULL;
124 ioapic->redirtbl[index].bits |= (u32) val;
125 ioapic->redirtbl[index].fields.remote_irr = 0;
126 }
127 if (ioapic->irr & (1 << index))
128 ioapic_service(ioapic, index);
129 break;
130 }
131}
132
133static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
134 struct kvm_lapic *target,
135 u8 vector, u8 trig_mode, u8 delivery_mode)
136{
137 ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
138 delivery_mode);
139
140 ASSERT((delivery_mode == dest_Fixed) ||
141 (delivery_mode == dest_LowestPrio));
142
143 kvm_apic_set_irq(target, vector, trig_mode);
144}
145
146static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
147 u8 dest_mode)
148{
149 u32 mask = 0;
150 int i;
151 struct kvm *kvm = ioapic->kvm;
152 struct kvm_vcpu *vcpu;
153
154 ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
155
156 if (dest_mode == 0) { /* Physical mode. */
157 if (dest == 0xFF) { /* Broadcast. */
158 for (i = 0; i < KVM_MAX_VCPUS; ++i)
159 if (kvm->vcpus[i] && kvm->vcpus[i]->apic)
160 mask |= 1 << i;
161 return mask;
162 }
163 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
164 vcpu = kvm->vcpus[i];
165 if (!vcpu)
166 continue;
167 if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
168 if (vcpu->apic)
169 mask = 1 << i;
170 break;
171 }
172 }
173 } else if (dest != 0) /* Logical mode, MDA non-zero. */
174 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
175 vcpu = kvm->vcpus[i];
176 if (!vcpu)
177 continue;
178 if (vcpu->apic &&
179 kvm_apic_match_logical_addr(vcpu->apic, dest))
180 mask |= 1 << vcpu->vcpu_id;
181 }
182 ioapic_debug("mask %x", mask);
183 return mask;
184}
185
186static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
187{
188 u8 dest = ioapic->redirtbl[irq].fields.dest_id;
189 u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
190 u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
191 u8 vector = ioapic->redirtbl[irq].fields.vector;
192 u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
193 u32 deliver_bitmask;
194 struct kvm_lapic *target;
195 struct kvm_vcpu *vcpu;
196 int vcpu_id;
197
198 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
199 "vector=%x trig_mode=%x",
200 dest, dest_mode, delivery_mode, vector, trig_mode);
201
202 deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
203 if (!deliver_bitmask) {
204 ioapic_debug("no target on destination");
205 return;
206 }
207
208 switch (delivery_mode) {
209 case dest_LowestPrio:
210 target =
211 kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
212 if (target != NULL)
213 ioapic_inj_irq(ioapic, target, vector,
214 trig_mode, delivery_mode);
215 else
216 ioapic_debug("null round robin: "
217 "mask=%x vector=%x delivery_mode=%x",
218 deliver_bitmask, vector, dest_LowestPrio);
219 break;
220 case dest_Fixed:
221 for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
222 if (!(deliver_bitmask & (1 << vcpu_id)))
223 continue;
224 deliver_bitmask &= ~(1 << vcpu_id);
225 vcpu = ioapic->kvm->vcpus[vcpu_id];
226 if (vcpu) {
227 target = vcpu->apic;
228 ioapic_inj_irq(ioapic, target, vector,
229 trig_mode, delivery_mode);
230 }
231 }
232 break;
233
234 /* TODO: NMI */
235 default:
236 printk(KERN_WARNING "Unsupported delivery mode %d\n",
237 delivery_mode);
238 break;
239 }
240}
241
242void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
243{
244 u32 old_irr = ioapic->irr;
245 u32 mask = 1 << irq;
246 union ioapic_redir_entry entry;
247
248 if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
249 entry = ioapic->redirtbl[irq];
250 level ^= entry.fields.polarity;
251 if (!level)
252 ioapic->irr &= ~mask;
253 else {
254 ioapic->irr |= mask;
255 if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
256 || !entry.fields.remote_irr)
257 ioapic_service(ioapic, irq);
258 }
259 }
260}
261
262static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
263{
264 int i;
265
266 for (i = 0; i < IOAPIC_NUM_PINS; i++)
267 if (ioapic->redirtbl[i].fields.vector == vector)
268 return i;
269 return -1;
270}
271
272void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
273{
274 struct kvm_ioapic *ioapic = kvm->vioapic;
275 union ioapic_redir_entry *ent;
276 int gsi;
277
278 gsi = get_eoi_gsi(ioapic, vector);
279 if (gsi == -1) {
280 printk(KERN_WARNING "Can't find redir item for %d EOI\n",
281 vector);
282 return;
283 }
284
285 ent = &ioapic->redirtbl[gsi];
286 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
287
288 ent->fields.remote_irr = 0;
289 if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
290 ioapic_deliver(ioapic, gsi);
291}
292
293static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
294{
295 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
296
297 return ((addr >= ioapic->base_address &&
298 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
299}
300
301static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
302 void *val)
303{
304 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
305 u32 result;
306
307 ioapic_debug("addr %lx", (unsigned long)addr);
308 ASSERT(!(addr & 0xf)); /* check alignment */
309
310 addr &= 0xff;
311 switch (addr) {
312 case IOAPIC_REG_SELECT:
313 result = ioapic->ioregsel;
314 break;
315
316 case IOAPIC_REG_WINDOW:
317 result = ioapic_read_indirect(ioapic, addr, len);
318 break;
319
320 default:
321 result = 0;
322 break;
323 }
324 switch (len) {
325 case 8:
326 *(u64 *) val = result;
327 break;
328 case 1:
329 case 2:
330 case 4:
331 memcpy(val, (char *)&result, len);
332 break;
333 default:
334 printk(KERN_WARNING "ioapic: wrong length %d\n", len);
335 }
336}
337
338static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
339 const void *val)
340{
341 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
342 u32 data;
343
344 ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
345 addr, len, val);
346 ASSERT(!(addr & 0xf)); /* check alignment */
347 if (len == 4 || len == 8)
348 data = *(u32 *) val;
349 else {
350 printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
351 return;
352 }
353
354 addr &= 0xff;
355 switch (addr) {
356 case IOAPIC_REG_SELECT:
357 ioapic->ioregsel = data;
358 break;
359
360 case IOAPIC_REG_WINDOW:
361 ioapic_write_indirect(ioapic, data);
362 break;
363
364 default:
365 break;
366 }
367}
368
369int kvm_ioapic_init(struct kvm *kvm)
370{
371 struct kvm_ioapic *ioapic;
372 int i;
373
374 ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
375 if (!ioapic)
376 return -ENOMEM;
377 kvm->vioapic = ioapic;
378 for (i = 0; i < IOAPIC_NUM_PINS; i++)
379 ioapic->redirtbl[i].fields.mask = 1;
380 ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
381 ioapic->dev.read = ioapic_mmio_read;
382 ioapic->dev.write = ioapic_mmio_write;
383 ioapic->dev.in_range = ioapic_in_range;
384 ioapic->dev.private = ioapic;
385 ioapic->kvm = kvm;
386 kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
387 return 0;
388}
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
new file mode 100644
index 000000000000..7628c7ff628f
--- /dev/null
+++ b/drivers/kvm/irq.c
@@ -0,0 +1,98 @@
1/*
2 * irq.c: API for in kernel interrupt controller
3 * Copyright (c) 2007, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Authors:
18 * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
19 *
20 */
21
22#include <linux/module.h>
23
24#include "kvm.h"
25#include "irq.h"
26
27/*
28 * check if there is pending interrupt without
29 * intack.
30 */
31int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
32{
33 struct kvm_pic *s;
34
35 if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
36 if (kvm_apic_accept_pic_intr(v)) {
37 s = pic_irqchip(v->kvm); /* PIC */
38 return s->output;
39 } else
40 return 0;
41 }
42 return 1;
43}
44EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
45
46/*
47 * Read pending interrupt vector and intack.
48 */
49int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
50{
51 struct kvm_pic *s;
52 int vector;
53
54 vector = kvm_get_apic_interrupt(v); /* APIC */
55 if (vector == -1) {
56 if (kvm_apic_accept_pic_intr(v)) {
57 s = pic_irqchip(v->kvm);
58 s->output = 0; /* PIC */
59 vector = kvm_pic_read_irq(s);
60 }
61 }
62 return vector;
63}
64EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
65
66static void vcpu_kick_intr(void *info)
67{
68#ifdef DEBUG
69 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
70 printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
71#endif
72}
73
74void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
75{
76 int ipi_pcpu = vcpu->cpu;
77
78 if (waitqueue_active(&vcpu->wq)) {
79 wake_up_interruptible(&vcpu->wq);
80 ++vcpu->stat.halt_wakeup;
81 }
82 if (vcpu->guest_mode)
83 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
84}
85
86void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
87{
88 kvm_inject_apic_timer_irqs(vcpu);
89 /* TODO: PIT, RTC etc. */
90}
91EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
92
93void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
94{
95 kvm_apic_timer_intr_post(vcpu, vec);
96 /* TODO: PIT, RTC etc. */
97}
98EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
new file mode 100644
index 000000000000..11fc014e2b30
--- /dev/null
+++ b/drivers/kvm/irq.h
@@ -0,0 +1,165 @@
1/*
2 * irq.h: in kernel interrupt controller related definitions
3 * Copyright (c) 2007, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Authors:
18 * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
19 *
20 */
21
22#ifndef __IRQ_H
23#define __IRQ_H
24
25#include "kvm.h"
26
27typedef void irq_request_func(void *opaque, int level);
28
29struct kvm_kpic_state {
30 u8 last_irr; /* edge detection */
31 u8 irr; /* interrupt request register */
32 u8 imr; /* interrupt mask register */
33 u8 isr; /* interrupt service register */
34 u8 priority_add; /* highest irq priority */
35 u8 irq_base;
36 u8 read_reg_select;
37 u8 poll;
38 u8 special_mask;
39 u8 init_state;
40 u8 auto_eoi;
41 u8 rotate_on_auto_eoi;
42 u8 special_fully_nested_mode;
43 u8 init4; /* true if 4 byte init */
44 u8 elcr; /* PIIX edge/trigger selection */
45 u8 elcr_mask;
46 struct kvm_pic *pics_state;
47};
48
49struct kvm_pic {
50 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
51 irq_request_func *irq_request;
52 void *irq_request_opaque;
53 int output; /* intr from master PIC */
54 struct kvm_io_device dev;
55};
56
57struct kvm_pic *kvm_create_pic(struct kvm *kvm);
58void kvm_pic_set_irq(void *opaque, int irq, int level);
59int kvm_pic_read_irq(struct kvm_pic *s);
60int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
61int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
62void kvm_pic_update_irq(struct kvm_pic *s);
63
64#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
65#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
66#define IOAPIC_EDGE_TRIG 0
67#define IOAPIC_LEVEL_TRIG 1
68
69#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
70#define IOAPIC_MEM_LENGTH 0x100
71
72/* Direct registers. */
73#define IOAPIC_REG_SELECT 0x00
74#define IOAPIC_REG_WINDOW 0x10
75#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
76
77/* Indirect registers. */
78#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
79#define IOAPIC_REG_VERSION 0x01
80#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
81
82struct kvm_ioapic {
83 u64 base_address;
84 u32 ioregsel;
85 u32 id;
86 u32 irr;
87 u32 pad;
88 union ioapic_redir_entry {
89 u64 bits;
90 struct {
91 u8 vector;
92 u8 delivery_mode:3;
93 u8 dest_mode:1;
94 u8 delivery_status:1;
95 u8 polarity:1;
96 u8 remote_irr:1;
97 u8 trig_mode:1;
98 u8 mask:1;
99 u8 reserve:7;
100 u8 reserved[4];
101 u8 dest_id;
102 } fields;
103 } redirtbl[IOAPIC_NUM_PINS];
104 struct kvm_io_device dev;
105 struct kvm *kvm;
106};
107
108struct kvm_lapic {
109 unsigned long base_address;
110 struct kvm_io_device dev;
111 struct {
112 atomic_t pending;
113 s64 period; /* unit: ns */
114 u32 divide_count;
115 ktime_t last_update;
116 struct hrtimer dev;
117 } timer;
118 struct kvm_vcpu *vcpu;
119 struct page *regs_page;
120 void *regs;
121};
122
123#ifdef DEBUG
124#define ASSERT(x) \
125do { \
126 if (!(x)) { \
127 printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
128 __FILE__, __LINE__, #x); \
129 BUG(); \
130 } \
131} while (0)
132#else
133#define ASSERT(x) do { } while (0)
134#endif
135
136void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
137int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
138int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
139int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
140int kvm_create_lapic(struct kvm_vcpu *vcpu);
141void kvm_lapic_reset(struct kvm_vcpu *vcpu);
142void kvm_free_apic(struct kvm_lapic *apic);
143u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
144void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
145void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
146struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
147 unsigned long bitmap);
148u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
149void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
150int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
151void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
152int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
153int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
154void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
155int kvm_ioapic_init(struct kvm *kvm);
156void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
157int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
158int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
159void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
160void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
161void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
162void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
163void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
164
165#endif
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 336be86c6f5a..ad0813843adc 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -13,60 +13,38 @@
13#include <linux/signal.h> 13#include <linux/signal.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/preempt.h>
16#include <asm/signal.h> 17#include <asm/signal.h>
17 18
18#include "vmx.h"
19#include <linux/kvm.h> 19#include <linux/kvm.h>
20#include <linux/kvm_para.h> 20#include <linux/kvm_para.h>
21 21
22#define CR0_PE_MASK (1ULL << 0) 22#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
23#define CR0_MP_MASK (1ULL << 1) 23#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
24#define CR0_TS_MASK (1ULL << 3) 24#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS|0xFFFFFF0000000000ULL)
25#define CR0_NE_MASK (1ULL << 5)
26#define CR0_WP_MASK (1ULL << 16)
27#define CR0_NW_MASK (1ULL << 29)
28#define CR0_CD_MASK (1ULL << 30)
29#define CR0_PG_MASK (1ULL << 31)
30
31#define CR3_WPT_MASK (1ULL << 3)
32#define CR3_PCD_MASK (1ULL << 4)
33
34#define CR3_RESEVED_BITS 0x07ULL
35#define CR3_L_MODE_RESEVED_BITS (~((1ULL << 40) - 1) | 0x0fe7ULL)
36#define CR3_FLAGS_MASK ((1ULL << 5) - 1)
37
38#define CR4_VME_MASK (1ULL << 0)
39#define CR4_PSE_MASK (1ULL << 4)
40#define CR4_PAE_MASK (1ULL << 5)
41#define CR4_PGE_MASK (1ULL << 7)
42#define CR4_VMXE_MASK (1ULL << 13)
43 25
44#define KVM_GUEST_CR0_MASK \ 26#define KVM_GUEST_CR0_MASK \
45 (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \ 27 (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \
46 | CR0_NW_MASK | CR0_CD_MASK) 28 | X86_CR0_NW | X86_CR0_CD)
47#define KVM_VM_CR0_ALWAYS_ON \ 29#define KVM_VM_CR0_ALWAYS_ON \
48 (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK \ 30 (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \
49 | CR0_MP_MASK) 31 | X86_CR0_MP)
50#define KVM_GUEST_CR4_MASK \ 32#define KVM_GUEST_CR4_MASK \
51 (CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK) 33 (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
52#define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK) 34#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
53#define KVM_RMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK | CR4_VME_MASK) 35#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
54 36
55#define INVALID_PAGE (~(hpa_t)0) 37#define INVALID_PAGE (~(hpa_t)0)
56#define UNMAPPED_GVA (~(gpa_t)0) 38#define UNMAPPED_GVA (~(gpa_t)0)
57 39
58#define KVM_MAX_VCPUS 4 40#define KVM_MAX_VCPUS 4
59#define KVM_ALIAS_SLOTS 4 41#define KVM_ALIAS_SLOTS 4
60#define KVM_MEMORY_SLOTS 4 42#define KVM_MEMORY_SLOTS 8
61#define KVM_NUM_MMU_PAGES 1024 43#define KVM_NUM_MMU_PAGES 1024
62#define KVM_MIN_FREE_MMU_PAGES 5 44#define KVM_MIN_FREE_MMU_PAGES 5
63#define KVM_REFILL_PAGES 25 45#define KVM_REFILL_PAGES 25
64#define KVM_MAX_CPUID_ENTRIES 40 46#define KVM_MAX_CPUID_ENTRIES 40
65 47
66#define FX_IMAGE_SIZE 512
67#define FX_IMAGE_ALIGN 16
68#define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
69
70#define DE_VECTOR 0 48#define DE_VECTOR 0
71#define NM_VECTOR 7 49#define NM_VECTOR 7
72#define DF_VECTOR 8 50#define DF_VECTOR 8
@@ -158,15 +136,8 @@ struct kvm_mmu_page {
158 }; 136 };
159}; 137};
160 138
161struct vmcs {
162 u32 revision_id;
163 u32 abort;
164 char data[0];
165};
166
167#define vmx_msr_entry kvm_msr_entry
168
169struct kvm_vcpu; 139struct kvm_vcpu;
140extern struct kmem_cache *kvm_vcpu_cache;
170 141
171/* 142/*
172 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level 143 * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
@@ -260,6 +231,7 @@ struct kvm_stat {
260 u32 signal_exits; 231 u32 signal_exits;
261 u32 irq_window_exits; 232 u32 irq_window_exits;
262 u32 halt_exits; 233 u32 halt_exits;
234 u32 halt_wakeup;
263 u32 request_irq_exits; 235 u32 request_irq_exits;
264 u32 irq_exits; 236 u32 irq_exits;
265 u32 light_exits; 237 u32 light_exits;
@@ -328,21 +300,17 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
328 300
329struct kvm_vcpu { 301struct kvm_vcpu {
330 struct kvm *kvm; 302 struct kvm *kvm;
331 union { 303 struct preempt_notifier preempt_notifier;
332 struct vmcs *vmcs; 304 int vcpu_id;
333 struct vcpu_svm *svm;
334 };
335 struct mutex mutex; 305 struct mutex mutex;
336 int cpu; 306 int cpu;
337 int launched;
338 u64 host_tsc; 307 u64 host_tsc;
339 struct kvm_run *run; 308 struct kvm_run *run;
340 int interrupt_window_open; 309 int interrupt_window_open;
341 int guest_mode; 310 int guest_mode;
342 unsigned long requests; 311 unsigned long requests;
343 unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ 312 unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
344#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) 313 DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
345 unsigned long irq_pending[NR_IRQ_WORDS];
346 unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ 314 unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
347 unsigned long rip; /* needs vcpu_load_rsp_rip() */ 315 unsigned long rip; /* needs vcpu_load_rsp_rip() */
348 316
@@ -357,15 +325,15 @@ struct kvm_vcpu {
357 u64 pdptrs[4]; /* pae */ 325 u64 pdptrs[4]; /* pae */
358 u64 shadow_efer; 326 u64 shadow_efer;
359 u64 apic_base; 327 u64 apic_base;
328 struct kvm_lapic *apic; /* kernel irqchip context */
329#define VCPU_MP_STATE_RUNNABLE 0
330#define VCPU_MP_STATE_UNINITIALIZED 1
331#define VCPU_MP_STATE_INIT_RECEIVED 2
332#define VCPU_MP_STATE_SIPI_RECEIVED 3
333#define VCPU_MP_STATE_HALTED 4
334 int mp_state;
335 int sipi_vector;
360 u64 ia32_misc_enable_msr; 336 u64 ia32_misc_enable_msr;
361 int nmsrs;
362 int save_nmsrs;
363 int msr_offset_efer;
364#ifdef CONFIG_X86_64
365 int msr_offset_kernel_gs_base;
366#endif
367 struct vmx_msr_entry *guest_msrs;
368 struct vmx_msr_entry *host_msrs;
369 337
370 struct kvm_mmu mmu; 338 struct kvm_mmu mmu;
371 339
@@ -379,16 +347,10 @@ struct kvm_vcpu {
379 347
380 struct kvm_guest_debug guest_debug; 348 struct kvm_guest_debug guest_debug;
381 349
382 char fx_buf[FX_BUF_SIZE]; 350 struct i387_fxsave_struct host_fx_image;
383 char *host_fx_image; 351 struct i387_fxsave_struct guest_fx_image;
384 char *guest_fx_image;
385 int fpu_active; 352 int fpu_active;
386 int guest_fpu_loaded; 353 int guest_fpu_loaded;
387 struct vmx_host_state {
388 int loaded;
389 u16 fs_sel, gs_sel, ldt_sel;
390 int fs_gs_ldt_reload_needed;
391 } vmx_host_state;
392 354
393 int mmio_needed; 355 int mmio_needed;
394 int mmio_read_completed; 356 int mmio_read_completed;
@@ -399,6 +361,7 @@ struct kvm_vcpu {
399 gva_t mmio_fault_cr2; 361 gva_t mmio_fault_cr2;
400 struct kvm_pio_request pio; 362 struct kvm_pio_request pio;
401 void *pio_data; 363 void *pio_data;
364 wait_queue_head_t wq;
402 365
403 int sigset_active; 366 int sigset_active;
404 sigset_t sigset; 367 sigset_t sigset;
@@ -436,7 +399,7 @@ struct kvm_memory_slot {
436}; 399};
437 400
438struct kvm { 401struct kvm {
439 spinlock_t lock; /* protects everything except vcpus */ 402 struct mutex lock; /* protects everything except vcpus */
440 int naliases; 403 int naliases;
441 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; 404 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
442 int nmemslots; 405 int nmemslots;
@@ -447,39 +410,59 @@ struct kvm {
447 struct list_head active_mmu_pages; 410 struct list_head active_mmu_pages;
448 int n_free_mmu_pages; 411 int n_free_mmu_pages;
449 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; 412 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
450 int nvcpus; 413 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
451 struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
452 int memory_config_version;
453 int busy;
454 unsigned long rmap_overflow; 414 unsigned long rmap_overflow;
455 struct list_head vm_list; 415 struct list_head vm_list;
456 struct file *filp; 416 struct file *filp;
457 struct kvm_io_bus mmio_bus; 417 struct kvm_io_bus mmio_bus;
458 struct kvm_io_bus pio_bus; 418 struct kvm_io_bus pio_bus;
419 struct kvm_pic *vpic;
420 struct kvm_ioapic *vioapic;
421 int round_robin_prev_vcpu;
459}; 422};
460 423
424static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
425{
426 return kvm->vpic;
427}
428
429static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
430{
431 return kvm->vioapic;
432}
433
434static inline int irqchip_in_kernel(struct kvm *kvm)
435{
436 return pic_irqchip(kvm) != 0;
437}
438
461struct descriptor_table { 439struct descriptor_table {
462 u16 limit; 440 u16 limit;
463 unsigned long base; 441 unsigned long base;
464} __attribute__((packed)); 442} __attribute__((packed));
465 443
466struct kvm_arch_ops { 444struct kvm_x86_ops {
467 int (*cpu_has_kvm_support)(void); /* __init */ 445 int (*cpu_has_kvm_support)(void); /* __init */
468 int (*disabled_by_bios)(void); /* __init */ 446 int (*disabled_by_bios)(void); /* __init */
469 void (*hardware_enable)(void *dummy); /* __init */ 447 void (*hardware_enable)(void *dummy); /* __init */
470 void (*hardware_disable)(void *dummy); 448 void (*hardware_disable)(void *dummy);
449 void (*check_processor_compatibility)(void *rtn);
471 int (*hardware_setup)(void); /* __init */ 450 int (*hardware_setup)(void); /* __init */
472 void (*hardware_unsetup)(void); /* __exit */ 451 void (*hardware_unsetup)(void); /* __exit */
473 452
474 int (*vcpu_create)(struct kvm_vcpu *vcpu); 453 /* Create, but do not attach this VCPU */
454 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
475 void (*vcpu_free)(struct kvm_vcpu *vcpu); 455 void (*vcpu_free)(struct kvm_vcpu *vcpu);
456 void (*vcpu_reset)(struct kvm_vcpu *vcpu);
476 457
477 void (*vcpu_load)(struct kvm_vcpu *vcpu); 458 void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
459 void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
478 void (*vcpu_put)(struct kvm_vcpu *vcpu); 460 void (*vcpu_put)(struct kvm_vcpu *vcpu);
479 void (*vcpu_decache)(struct kvm_vcpu *vcpu); 461 void (*vcpu_decache)(struct kvm_vcpu *vcpu);
480 462
481 int (*set_guest_debug)(struct kvm_vcpu *vcpu, 463 int (*set_guest_debug)(struct kvm_vcpu *vcpu,
482 struct kvm_debug_guest *dbg); 464 struct kvm_debug_guest *dbg);
465 void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
483 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); 466 int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
484 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); 467 int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
485 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); 468 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -505,27 +488,43 @@ struct kvm_arch_ops {
505 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 488 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
506 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 489 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
507 490
508 void (*invlpg)(struct kvm_vcpu *vcpu, gva_t addr);
509 void (*tlb_flush)(struct kvm_vcpu *vcpu); 491 void (*tlb_flush)(struct kvm_vcpu *vcpu);
510 void (*inject_page_fault)(struct kvm_vcpu *vcpu, 492 void (*inject_page_fault)(struct kvm_vcpu *vcpu,
511 unsigned long addr, u32 err_code); 493 unsigned long addr, u32 err_code);
512 494
513 void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code); 495 void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
514 496
515 int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); 497 void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
516 int (*vcpu_setup)(struct kvm_vcpu *vcpu); 498 int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
517 void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); 499 void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
518 void (*patch_hypercall)(struct kvm_vcpu *vcpu, 500 void (*patch_hypercall)(struct kvm_vcpu *vcpu,
519 unsigned char *hypercall_addr); 501 unsigned char *hypercall_addr);
502 int (*get_irq)(struct kvm_vcpu *vcpu);
503 void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
504 void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
505 void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
506 struct kvm_run *run);
520}; 507};
521 508
522extern struct kvm_arch_ops *kvm_arch_ops; 509extern struct kvm_x86_ops *kvm_x86_ops;
510
511/* The guest did something we don't support. */
512#define pr_unimpl(vcpu, fmt, ...) \
513 do { \
514 if (printk_ratelimit()) \
515 printk(KERN_ERR "kvm: %i: cpu%i " fmt, \
516 current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
517 } while(0)
523 518
524#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) 519#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
525#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) 520#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
526 521
527int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); 522int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
528void kvm_exit_arch(void); 523void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
524
525int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
526 struct module *module);
527void kvm_exit_x86(void);
529 528
530int kvm_mmu_module_init(void); 529int kvm_mmu_module_init(void);
531void kvm_mmu_module_exit(void); 530void kvm_mmu_module_exit(void);
@@ -545,8 +544,6 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
545hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); 544hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
546struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); 545struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
547 546
548void kvm_emulator_want_group7_invlpg(void);
549
550extern hpa_t bad_page_address; 547extern hpa_t bad_page_address;
551 548
552struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 549struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
@@ -561,6 +558,7 @@ enum emulation_result {
561 558
562int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, 559int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
563 unsigned long cr2, u16 error_code); 560 unsigned long cr2, u16 error_code);
561void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
564void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 562void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
565void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); 563void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
566void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 564void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
@@ -574,9 +572,11 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
574 572
575struct x86_emulate_ctxt; 573struct x86_emulate_ctxt;
576 574
577int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 575int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
578 int size, unsigned long count, int string, int down, 576 int size, unsigned port);
579 gva_t address, int rep, unsigned port); 577int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
578 int size, unsigned long count, int down,
579 gva_t address, int rep, unsigned port);
580void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); 580void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
581int kvm_emulate_halt(struct kvm_vcpu *vcpu); 581int kvm_emulate_halt(struct kvm_vcpu *vcpu);
582int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); 582int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
@@ -590,34 +590,33 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
590void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); 590void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
591void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); 591void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
592void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); 592void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
593unsigned long get_cr8(struct kvm_vcpu *vcpu);
593void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); 594void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
595void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
594 596
595int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); 597int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
596int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); 598int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
597 599
598void fx_init(struct kvm_vcpu *vcpu); 600void fx_init(struct kvm_vcpu *vcpu);
599 601
600void load_msrs(struct vmx_msr_entry *e, int n);
601void save_msrs(struct vmx_msr_entry *e, int n);
602void kvm_resched(struct kvm_vcpu *vcpu); 602void kvm_resched(struct kvm_vcpu *vcpu);
603void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); 603void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
604void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); 604void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
605void kvm_flush_remote_tlbs(struct kvm *kvm); 605void kvm_flush_remote_tlbs(struct kvm *kvm);
606 606
607int kvm_read_guest(struct kvm_vcpu *vcpu, 607int emulator_read_std(unsigned long addr,
608 gva_t addr, 608 void *val,
609 unsigned long size, 609 unsigned int bytes,
610 void *dest); 610 struct kvm_vcpu *vcpu);
611 611int emulator_write_emulated(unsigned long addr,
612int kvm_write_guest(struct kvm_vcpu *vcpu, 612 const void *val,
613 gva_t addr, 613 unsigned int bytes,
614 unsigned long size, 614 struct kvm_vcpu *vcpu);
615 void *data);
616 615
617unsigned long segment_base(u16 selector); 616unsigned long segment_base(u16 selector);
618 617
619void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 618void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
620 const u8 *old, const u8 *new, int bytes); 619 const u8 *new, int bytes);
621int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); 620int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
622void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); 621void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
623int kvm_mmu_load(struct kvm_vcpu *vcpu); 622int kvm_mmu_load(struct kvm_vcpu *vcpu);
@@ -656,17 +655,17 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu)
656 655
657static inline int is_pae(struct kvm_vcpu *vcpu) 656static inline int is_pae(struct kvm_vcpu *vcpu)
658{ 657{
659 return vcpu->cr4 & CR4_PAE_MASK; 658 return vcpu->cr4 & X86_CR4_PAE;
660} 659}
661 660
662static inline int is_pse(struct kvm_vcpu *vcpu) 661static inline int is_pse(struct kvm_vcpu *vcpu)
663{ 662{
664 return vcpu->cr4 & CR4_PSE_MASK; 663 return vcpu->cr4 & X86_CR4_PSE;
665} 664}
666 665
667static inline int is_paging(struct kvm_vcpu *vcpu) 666static inline int is_paging(struct kvm_vcpu *vcpu)
668{ 667{
669 return vcpu->cr0 & CR0_PG_MASK; 668 return vcpu->cr0 & X86_CR0_PG;
670} 669}
671 670
672static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot) 671static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
@@ -746,12 +745,12 @@ static inline unsigned long read_msr(unsigned long msr)
746} 745}
747#endif 746#endif
748 747
749static inline void fx_save(void *image) 748static inline void fx_save(struct i387_fxsave_struct *image)
750{ 749{
751 asm ("fxsave (%0)":: "r" (image)); 750 asm ("fxsave (%0)":: "r" (image));
752} 751}
753 752
754static inline void fx_restore(void *image) 753static inline void fx_restore(struct i387_fxsave_struct *image)
755{ 754{
756 asm ("fxrstor (%0)":: "r" (image)); 755 asm ("fxrstor (%0)":: "r" (image));
757} 756}
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index cd0557954e50..353e58527d15 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -18,6 +18,7 @@
18#include "kvm.h" 18#include "kvm.h"
19#include "x86_emulate.h" 19#include "x86_emulate.h"
20#include "segment_descriptor.h" 20#include "segment_descriptor.h"
21#include "irq.h"
21 22
22#include <linux/kvm.h> 23#include <linux/kvm.h>
23#include <linux/module.h> 24#include <linux/module.h>
@@ -37,6 +38,7 @@
37#include <linux/cpumask.h> 38#include <linux/cpumask.h>
38#include <linux/smp.h> 39#include <linux/smp.h>
39#include <linux/anon_inodes.h> 40#include <linux/anon_inodes.h>
41#include <linux/profile.h>
40 42
41#include <asm/processor.h> 43#include <asm/processor.h>
42#include <asm/msr.h> 44#include <asm/msr.h>
@@ -52,9 +54,11 @@ static LIST_HEAD(vm_list);
52 54
53static cpumask_t cpus_hardware_enabled; 55static cpumask_t cpus_hardware_enabled;
54 56
55struct kvm_arch_ops *kvm_arch_ops; 57struct kvm_x86_ops *kvm_x86_ops;
58struct kmem_cache *kvm_vcpu_cache;
59EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
56 60
57static void hardware_disable(void *ignored); 61static __read_mostly struct preempt_ops kvm_preempt_ops;
58 62
59#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) 63#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
60 64
@@ -73,6 +77,7 @@ static struct kvm_stats_debugfs_item {
73 { "signal_exits", STAT_OFFSET(signal_exits) }, 77 { "signal_exits", STAT_OFFSET(signal_exits) },
74 { "irq_window", STAT_OFFSET(irq_window_exits) }, 78 { "irq_window", STAT_OFFSET(irq_window_exits) },
75 { "halt_exits", STAT_OFFSET(halt_exits) }, 79 { "halt_exits", STAT_OFFSET(halt_exits) },
80 { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
76 { "request_irq", STAT_OFFSET(request_irq_exits) }, 81 { "request_irq", STAT_OFFSET(request_irq_exits) },
77 { "irq_exits", STAT_OFFSET(irq_exits) }, 82 { "irq_exits", STAT_OFFSET(irq_exits) },
78 { "light_exits", STAT_OFFSET(light_exits) }, 83 { "light_exits", STAT_OFFSET(light_exits) },
@@ -84,10 +89,17 @@ static struct dentry *debugfs_dir;
84 89
85#define MAX_IO_MSRS 256 90#define MAX_IO_MSRS 256
86 91
87#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL 92#define CR0_RESERVED_BITS \
88#define LMSW_GUEST_MASK 0x0eULL 93 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
89#define CR4_RESEVED_BITS (~((1ULL << 11) - 1)) 94 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
90#define CR8_RESEVED_BITS (~0x0fULL) 95 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
96#define CR4_RESERVED_BITS \
97 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
98 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
99 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
100 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
101
102#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
91#define EFER_RESERVED_BITS 0xfffffffffffff2fe 103#define EFER_RESERVED_BITS 0xfffffffffffff2fe
92 104
93#ifdef CONFIG_X86_64 105#ifdef CONFIG_X86_64
@@ -139,82 +151,14 @@ static inline int valid_vcpu(int n)
139 return likely(n >= 0 && n < KVM_MAX_VCPUS); 151 return likely(n >= 0 && n < KVM_MAX_VCPUS);
140} 152}
141 153
142int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
143 void *dest)
144{
145 unsigned char *host_buf = dest;
146 unsigned long req_size = size;
147
148 while (size) {
149 hpa_t paddr;
150 unsigned now;
151 unsigned offset;
152 hva_t guest_buf;
153
154 paddr = gva_to_hpa(vcpu, addr);
155
156 if (is_error_hpa(paddr))
157 break;
158
159 guest_buf = (hva_t)kmap_atomic(
160 pfn_to_page(paddr >> PAGE_SHIFT),
161 KM_USER0);
162 offset = addr & ~PAGE_MASK;
163 guest_buf |= offset;
164 now = min(size, PAGE_SIZE - offset);
165 memcpy(host_buf, (void*)guest_buf, now);
166 host_buf += now;
167 addr += now;
168 size -= now;
169 kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0);
170 }
171 return req_size - size;
172}
173EXPORT_SYMBOL_GPL(kvm_read_guest);
174
175int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size,
176 void *data)
177{
178 unsigned char *host_buf = data;
179 unsigned long req_size = size;
180
181 while (size) {
182 hpa_t paddr;
183 unsigned now;
184 unsigned offset;
185 hva_t guest_buf;
186 gfn_t gfn;
187
188 paddr = gva_to_hpa(vcpu, addr);
189
190 if (is_error_hpa(paddr))
191 break;
192
193 gfn = vcpu->mmu.gva_to_gpa(vcpu, addr) >> PAGE_SHIFT;
194 mark_page_dirty(vcpu->kvm, gfn);
195 guest_buf = (hva_t)kmap_atomic(
196 pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0);
197 offset = addr & ~PAGE_MASK;
198 guest_buf |= offset;
199 now = min(size, PAGE_SIZE - offset);
200 memcpy((void*)guest_buf, host_buf, now);
201 host_buf += now;
202 addr += now;
203 size -= now;
204 kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0);
205 }
206 return req_size - size;
207}
208EXPORT_SYMBOL_GPL(kvm_write_guest);
209
210void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 154void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
211{ 155{
212 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) 156 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
213 return; 157 return;
214 158
215 vcpu->guest_fpu_loaded = 1; 159 vcpu->guest_fpu_loaded = 1;
216 fx_save(vcpu->host_fx_image); 160 fx_save(&vcpu->host_fx_image);
217 fx_restore(vcpu->guest_fx_image); 161 fx_restore(&vcpu->guest_fx_image);
218} 162}
219EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); 163EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
220 164
@@ -224,8 +168,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
224 return; 168 return;
225 169
226 vcpu->guest_fpu_loaded = 0; 170 vcpu->guest_fpu_loaded = 0;
227 fx_save(vcpu->guest_fx_image); 171 fx_save(&vcpu->guest_fx_image);
228 fx_restore(vcpu->host_fx_image); 172 fx_restore(&vcpu->host_fx_image);
229} 173}
230EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); 174EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
231 175
@@ -234,13 +178,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
234 */ 178 */
235static void vcpu_load(struct kvm_vcpu *vcpu) 179static void vcpu_load(struct kvm_vcpu *vcpu)
236{ 180{
181 int cpu;
182
237 mutex_lock(&vcpu->mutex); 183 mutex_lock(&vcpu->mutex);
238 kvm_arch_ops->vcpu_load(vcpu); 184 cpu = get_cpu();
185 preempt_notifier_register(&vcpu->preempt_notifier);
186 kvm_x86_ops->vcpu_load(vcpu, cpu);
187 put_cpu();
239} 188}
240 189
241static void vcpu_put(struct kvm_vcpu *vcpu) 190static void vcpu_put(struct kvm_vcpu *vcpu)
242{ 191{
243 kvm_arch_ops->vcpu_put(vcpu); 192 preempt_disable();
193 kvm_x86_ops->vcpu_put(vcpu);
194 preempt_notifier_unregister(&vcpu->preempt_notifier);
195 preempt_enable();
244 mutex_unlock(&vcpu->mutex); 196 mutex_unlock(&vcpu->mutex);
245} 197}
246 198
@@ -261,8 +213,10 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
261 atomic_set(&completed, 0); 213 atomic_set(&completed, 0);
262 cpus_clear(cpus); 214 cpus_clear(cpus);
263 needed = 0; 215 needed = 0;
264 for (i = 0; i < kvm->nvcpus; ++i) { 216 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
265 vcpu = &kvm->vcpus[i]; 217 vcpu = kvm->vcpus[i];
218 if (!vcpu)
219 continue;
266 if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) 220 if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
267 continue; 221 continue;
268 cpu = vcpu->cpu; 222 cpu = vcpu->cpu;
@@ -286,37 +240,79 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
286 } 240 }
287} 241}
288 242
243int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
244{
245 struct page *page;
246 int r;
247
248 mutex_init(&vcpu->mutex);
249 vcpu->cpu = -1;
250 vcpu->mmu.root_hpa = INVALID_PAGE;
251 vcpu->kvm = kvm;
252 vcpu->vcpu_id = id;
253 if (!irqchip_in_kernel(kvm) || id == 0)
254 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
255 else
256 vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED;
257 init_waitqueue_head(&vcpu->wq);
258
259 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
260 if (!page) {
261 r = -ENOMEM;
262 goto fail;
263 }
264 vcpu->run = page_address(page);
265
266 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
267 if (!page) {
268 r = -ENOMEM;
269 goto fail_free_run;
270 }
271 vcpu->pio_data = page_address(page);
272
273 r = kvm_mmu_create(vcpu);
274 if (r < 0)
275 goto fail_free_pio_data;
276
277 return 0;
278
279fail_free_pio_data:
280 free_page((unsigned long)vcpu->pio_data);
281fail_free_run:
282 free_page((unsigned long)vcpu->run);
283fail:
284 return -ENOMEM;
285}
286EXPORT_SYMBOL_GPL(kvm_vcpu_init);
287
288void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
289{
290 kvm_mmu_destroy(vcpu);
291 if (vcpu->apic)
292 hrtimer_cancel(&vcpu->apic->timer.dev);
293 kvm_free_apic(vcpu->apic);
294 free_page((unsigned long)vcpu->pio_data);
295 free_page((unsigned long)vcpu->run);
296}
297EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
298
289static struct kvm *kvm_create_vm(void) 299static struct kvm *kvm_create_vm(void)
290{ 300{
291 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); 301 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
292 int i;
293 302
294 if (!kvm) 303 if (!kvm)
295 return ERR_PTR(-ENOMEM); 304 return ERR_PTR(-ENOMEM);
296 305
297 kvm_io_bus_init(&kvm->pio_bus); 306 kvm_io_bus_init(&kvm->pio_bus);
298 spin_lock_init(&kvm->lock); 307 mutex_init(&kvm->lock);
299 INIT_LIST_HEAD(&kvm->active_mmu_pages); 308 INIT_LIST_HEAD(&kvm->active_mmu_pages);
300 kvm_io_bus_init(&kvm->mmio_bus); 309 kvm_io_bus_init(&kvm->mmio_bus);
301 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
302 struct kvm_vcpu *vcpu = &kvm->vcpus[i];
303
304 mutex_init(&vcpu->mutex);
305 vcpu->cpu = -1;
306 vcpu->kvm = kvm;
307 vcpu->mmu.root_hpa = INVALID_PAGE;
308 }
309 spin_lock(&kvm_lock); 310 spin_lock(&kvm_lock);
310 list_add(&kvm->vm_list, &vm_list); 311 list_add(&kvm->vm_list, &vm_list);
311 spin_unlock(&kvm_lock); 312 spin_unlock(&kvm_lock);
312 return kvm; 313 return kvm;
313} 314}
314 315
315static int kvm_dev_open(struct inode *inode, struct file *filp)
316{
317 return 0;
318}
319
320/* 316/*
321 * Free any memory in @free but not in @dont. 317 * Free any memory in @free but not in @dont.
322 */ 318 */
@@ -353,7 +349,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
353{ 349{
354 int i; 350 int i;
355 351
356 for (i = 0; i < 2; ++i) 352 for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
357 if (vcpu->pio.guest_pages[i]) { 353 if (vcpu->pio.guest_pages[i]) {
358 __free_page(vcpu->pio.guest_pages[i]); 354 __free_page(vcpu->pio.guest_pages[i]);
359 vcpu->pio.guest_pages[i] = NULL; 355 vcpu->pio.guest_pages[i] = NULL;
@@ -362,30 +358,11 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
362 358
363static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) 359static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
364{ 360{
365 if (!vcpu->vmcs)
366 return;
367
368 vcpu_load(vcpu); 361 vcpu_load(vcpu);
369 kvm_mmu_unload(vcpu); 362 kvm_mmu_unload(vcpu);
370 vcpu_put(vcpu); 363 vcpu_put(vcpu);
371} 364}
372 365
373static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
374{
375 if (!vcpu->vmcs)
376 return;
377
378 vcpu_load(vcpu);
379 kvm_mmu_destroy(vcpu);
380 vcpu_put(vcpu);
381 kvm_arch_ops->vcpu_free(vcpu);
382 free_page((unsigned long)vcpu->run);
383 vcpu->run = NULL;
384 free_page((unsigned long)vcpu->pio_data);
385 vcpu->pio_data = NULL;
386 free_pio_guest_pages(vcpu);
387}
388
389static void kvm_free_vcpus(struct kvm *kvm) 366static void kvm_free_vcpus(struct kvm *kvm)
390{ 367{
391 unsigned int i; 368 unsigned int i;
@@ -394,14 +371,15 @@ static void kvm_free_vcpus(struct kvm *kvm)
394 * Unpin any mmu pages first. 371 * Unpin any mmu pages first.
395 */ 372 */
396 for (i = 0; i < KVM_MAX_VCPUS; ++i) 373 for (i = 0; i < KVM_MAX_VCPUS; ++i)
397 kvm_unload_vcpu_mmu(&kvm->vcpus[i]); 374 if (kvm->vcpus[i])
398 for (i = 0; i < KVM_MAX_VCPUS; ++i) 375 kvm_unload_vcpu_mmu(kvm->vcpus[i]);
399 kvm_free_vcpu(&kvm->vcpus[i]); 376 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
400} 377 if (kvm->vcpus[i]) {
378 kvm_x86_ops->vcpu_free(kvm->vcpus[i]);
379 kvm->vcpus[i] = NULL;
380 }
381 }
401 382
402static int kvm_dev_release(struct inode *inode, struct file *filp)
403{
404 return 0;
405} 383}
406 384
407static void kvm_destroy_vm(struct kvm *kvm) 385static void kvm_destroy_vm(struct kvm *kvm)
@@ -411,6 +389,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
411 spin_unlock(&kvm_lock); 389 spin_unlock(&kvm_lock);
412 kvm_io_bus_destroy(&kvm->pio_bus); 390 kvm_io_bus_destroy(&kvm->pio_bus);
413 kvm_io_bus_destroy(&kvm->mmio_bus); 391 kvm_io_bus_destroy(&kvm->mmio_bus);
392 kfree(kvm->vpic);
393 kfree(kvm->vioapic);
414 kvm_free_vcpus(kvm); 394 kvm_free_vcpus(kvm);
415 kvm_free_physmem(kvm); 395 kvm_free_physmem(kvm);
416 kfree(kvm); 396 kfree(kvm);
@@ -426,7 +406,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
426 406
427static void inject_gp(struct kvm_vcpu *vcpu) 407static void inject_gp(struct kvm_vcpu *vcpu)
428{ 408{
429 kvm_arch_ops->inject_gp(vcpu, 0); 409 kvm_x86_ops->inject_gp(vcpu, 0);
430} 410}
431 411
432/* 412/*
@@ -437,58 +417,60 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
437 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; 417 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
438 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; 418 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
439 int i; 419 int i;
440 u64 pdpte;
441 u64 *pdpt; 420 u64 *pdpt;
442 int ret; 421 int ret;
443 struct page *page; 422 struct page *page;
423 u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
444 424
445 spin_lock(&vcpu->kvm->lock); 425 mutex_lock(&vcpu->kvm->lock);
446 page = gfn_to_page(vcpu->kvm, pdpt_gfn); 426 page = gfn_to_page(vcpu->kvm, pdpt_gfn);
447 /* FIXME: !page - emulate? 0xff? */ 427 if (!page) {
428 ret = 0;
429 goto out;
430 }
431
448 pdpt = kmap_atomic(page, KM_USER0); 432 pdpt = kmap_atomic(page, KM_USER0);
433 memcpy(pdpte, pdpt+offset, sizeof(pdpte));
434 kunmap_atomic(pdpt, KM_USER0);
449 435
450 ret = 1; 436 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
451 for (i = 0; i < 4; ++i) { 437 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
452 pdpte = pdpt[offset + i];
453 if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) {
454 ret = 0; 438 ret = 0;
455 goto out; 439 goto out;
456 } 440 }
457 } 441 }
442 ret = 1;
458 443
459 for (i = 0; i < 4; ++i) 444 memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
460 vcpu->pdptrs[i] = pdpt[offset + i];
461
462out: 445out:
463 kunmap_atomic(pdpt, KM_USER0); 446 mutex_unlock(&vcpu->kvm->lock);
464 spin_unlock(&vcpu->kvm->lock);
465 447
466 return ret; 448 return ret;
467} 449}
468 450
469void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 451void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
470{ 452{
471 if (cr0 & CR0_RESEVED_BITS) { 453 if (cr0 & CR0_RESERVED_BITS) {
472 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 454 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
473 cr0, vcpu->cr0); 455 cr0, vcpu->cr0);
474 inject_gp(vcpu); 456 inject_gp(vcpu);
475 return; 457 return;
476 } 458 }
477 459
478 if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) { 460 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
479 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); 461 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
480 inject_gp(vcpu); 462 inject_gp(vcpu);
481 return; 463 return;
482 } 464 }
483 465
484 if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) { 466 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
485 printk(KERN_DEBUG "set_cr0: #GP, set PG flag " 467 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
486 "and a clear PE flag\n"); 468 "and a clear PE flag\n");
487 inject_gp(vcpu); 469 inject_gp(vcpu);
488 return; 470 return;
489 } 471 }
490 472
491 if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { 473 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
492#ifdef CONFIG_X86_64 474#ifdef CONFIG_X86_64
493 if ((vcpu->shadow_efer & EFER_LME)) { 475 if ((vcpu->shadow_efer & EFER_LME)) {
494 int cs_db, cs_l; 476 int cs_db, cs_l;
@@ -499,7 +481,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
499 inject_gp(vcpu); 481 inject_gp(vcpu);
500 return; 482 return;
501 } 483 }
502 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 484 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
503 if (cs_l) { 485 if (cs_l) {
504 printk(KERN_DEBUG "set_cr0: #GP, start paging " 486 printk(KERN_DEBUG "set_cr0: #GP, start paging "
505 "in long mode while CS.L == 1\n"); 487 "in long mode while CS.L == 1\n");
@@ -518,12 +500,12 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
518 500
519 } 501 }
520 502
521 kvm_arch_ops->set_cr0(vcpu, cr0); 503 kvm_x86_ops->set_cr0(vcpu, cr0);
522 vcpu->cr0 = cr0; 504 vcpu->cr0 = cr0;
523 505
524 spin_lock(&vcpu->kvm->lock); 506 mutex_lock(&vcpu->kvm->lock);
525 kvm_mmu_reset_context(vcpu); 507 kvm_mmu_reset_context(vcpu);
526 spin_unlock(&vcpu->kvm->lock); 508 mutex_unlock(&vcpu->kvm->lock);
527 return; 509 return;
528} 510}
529EXPORT_SYMBOL_GPL(set_cr0); 511EXPORT_SYMBOL_GPL(set_cr0);
@@ -536,62 +518,72 @@ EXPORT_SYMBOL_GPL(lmsw);
536 518
537void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 519void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
538{ 520{
539 if (cr4 & CR4_RESEVED_BITS) { 521 if (cr4 & CR4_RESERVED_BITS) {
540 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); 522 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
541 inject_gp(vcpu); 523 inject_gp(vcpu);
542 return; 524 return;
543 } 525 }
544 526
545 if (is_long_mode(vcpu)) { 527 if (is_long_mode(vcpu)) {
546 if (!(cr4 & CR4_PAE_MASK)) { 528 if (!(cr4 & X86_CR4_PAE)) {
547 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " 529 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
548 "in long mode\n"); 530 "in long mode\n");
549 inject_gp(vcpu); 531 inject_gp(vcpu);
550 return; 532 return;
551 } 533 }
552 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) 534 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
553 && !load_pdptrs(vcpu, vcpu->cr3)) { 535 && !load_pdptrs(vcpu, vcpu->cr3)) {
554 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); 536 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
555 inject_gp(vcpu); 537 inject_gp(vcpu);
538 return;
556 } 539 }
557 540
558 if (cr4 & CR4_VMXE_MASK) { 541 if (cr4 & X86_CR4_VMXE) {
559 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); 542 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
560 inject_gp(vcpu); 543 inject_gp(vcpu);
561 return; 544 return;
562 } 545 }
563 kvm_arch_ops->set_cr4(vcpu, cr4); 546 kvm_x86_ops->set_cr4(vcpu, cr4);
564 spin_lock(&vcpu->kvm->lock); 547 vcpu->cr4 = cr4;
548 mutex_lock(&vcpu->kvm->lock);
565 kvm_mmu_reset_context(vcpu); 549 kvm_mmu_reset_context(vcpu);
566 spin_unlock(&vcpu->kvm->lock); 550 mutex_unlock(&vcpu->kvm->lock);
567} 551}
568EXPORT_SYMBOL_GPL(set_cr4); 552EXPORT_SYMBOL_GPL(set_cr4);
569 553
570void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 554void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
571{ 555{
572 if (is_long_mode(vcpu)) { 556 if (is_long_mode(vcpu)) {
573 if (cr3 & CR3_L_MODE_RESEVED_BITS) { 557 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
574 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); 558 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
575 inject_gp(vcpu); 559 inject_gp(vcpu);
576 return; 560 return;
577 } 561 }
578 } else { 562 } else {
579 if (cr3 & CR3_RESEVED_BITS) { 563 if (is_pae(vcpu)) {
580 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); 564 if (cr3 & CR3_PAE_RESERVED_BITS) {
581 inject_gp(vcpu); 565 printk(KERN_DEBUG
582 return; 566 "set_cr3: #GP, reserved bits\n");
583 } 567 inject_gp(vcpu);
584 if (is_paging(vcpu) && is_pae(vcpu) && 568 return;
585 !load_pdptrs(vcpu, cr3)) { 569 }
586 printk(KERN_DEBUG "set_cr3: #GP, pdptrs " 570 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
587 "reserved bits\n"); 571 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
588 inject_gp(vcpu); 572 "reserved bits\n");
589 return; 573 inject_gp(vcpu);
574 return;
575 }
576 } else {
577 if (cr3 & CR3_NONPAE_RESERVED_BITS) {
578 printk(KERN_DEBUG
579 "set_cr3: #GP, reserved bits\n");
580 inject_gp(vcpu);
581 return;
582 }
590 } 583 }
591 } 584 }
592 585
593 vcpu->cr3 = cr3; 586 mutex_lock(&vcpu->kvm->lock);
594 spin_lock(&vcpu->kvm->lock);
595 /* 587 /*
596 * Does the new cr3 value map to physical memory? (Note, we 588 * Does the new cr3 value map to physical memory? (Note, we
597 * catch an invalid cr3 even in real-mode, because it would 589 * catch an invalid cr3 even in real-mode, because it would
@@ -603,46 +595,73 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
603 */ 595 */
604 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) 596 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
605 inject_gp(vcpu); 597 inject_gp(vcpu);
606 else 598 else {
599 vcpu->cr3 = cr3;
607 vcpu->mmu.new_cr3(vcpu); 600 vcpu->mmu.new_cr3(vcpu);
608 spin_unlock(&vcpu->kvm->lock); 601 }
602 mutex_unlock(&vcpu->kvm->lock);
609} 603}
610EXPORT_SYMBOL_GPL(set_cr3); 604EXPORT_SYMBOL_GPL(set_cr3);
611 605
612void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 606void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
613{ 607{
614 if ( cr8 & CR8_RESEVED_BITS) { 608 if (cr8 & CR8_RESERVED_BITS) {
615 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); 609 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
616 inject_gp(vcpu); 610 inject_gp(vcpu);
617 return; 611 return;
618 } 612 }
619 vcpu->cr8 = cr8; 613 if (irqchip_in_kernel(vcpu->kvm))
614 kvm_lapic_set_tpr(vcpu, cr8);
615 else
616 vcpu->cr8 = cr8;
620} 617}
621EXPORT_SYMBOL_GPL(set_cr8); 618EXPORT_SYMBOL_GPL(set_cr8);
622 619
623void fx_init(struct kvm_vcpu *vcpu) 620unsigned long get_cr8(struct kvm_vcpu *vcpu)
621{
622 if (irqchip_in_kernel(vcpu->kvm))
623 return kvm_lapic_get_cr8(vcpu);
624 else
625 return vcpu->cr8;
626}
627EXPORT_SYMBOL_GPL(get_cr8);
628
629u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
624{ 630{
625 struct __attribute__ ((__packed__)) fx_image_s { 631 if (irqchip_in_kernel(vcpu->kvm))
626 u16 control; //fcw 632 return vcpu->apic_base;
627 u16 status; //fsw 633 else
628 u16 tag; // ftw 634 return vcpu->apic_base;
629 u16 opcode; //fop 635}
630 u64 ip; // fpu ip 636EXPORT_SYMBOL_GPL(kvm_get_apic_base);
631 u64 operand;// fpu dp
632 u32 mxcsr;
633 u32 mxcsr_mask;
634 637
635 } *fx_image; 638void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
639{
640 /* TODO: reserve bits check */
641 if (irqchip_in_kernel(vcpu->kvm))
642 kvm_lapic_set_base(vcpu, data);
643 else
644 vcpu->apic_base = data;
645}
646EXPORT_SYMBOL_GPL(kvm_set_apic_base);
647
648void fx_init(struct kvm_vcpu *vcpu)
649{
650 unsigned after_mxcsr_mask;
636 651
637 fx_save(vcpu->host_fx_image); 652 /* Initialize guest FPU by resetting ours and saving into guest's */
653 preempt_disable();
654 fx_save(&vcpu->host_fx_image);
638 fpu_init(); 655 fpu_init();
639 fx_save(vcpu->guest_fx_image); 656 fx_save(&vcpu->guest_fx_image);
640 fx_restore(vcpu->host_fx_image); 657 fx_restore(&vcpu->host_fx_image);
658 preempt_enable();
641 659
642 fx_image = (struct fx_image_s *)vcpu->guest_fx_image; 660 vcpu->cr0 |= X86_CR0_ET;
643 fx_image->mxcsr = 0x1f80; 661 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
644 memset(vcpu->guest_fx_image + sizeof(struct fx_image_s), 662 vcpu->guest_fx_image.mxcsr = 0x1f80;
645 0, FX_IMAGE_SIZE - sizeof(struct fx_image_s)); 663 memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask,
664 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
646} 665}
647EXPORT_SYMBOL_GPL(fx_init); 666EXPORT_SYMBOL_GPL(fx_init);
648 667
@@ -661,7 +680,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
661 unsigned long i; 680 unsigned long i;
662 struct kvm_memory_slot *memslot; 681 struct kvm_memory_slot *memslot;
663 struct kvm_memory_slot old, new; 682 struct kvm_memory_slot old, new;
664 int memory_config_version;
665 683
666 r = -EINVAL; 684 r = -EINVAL;
667 /* General sanity checks */ 685 /* General sanity checks */
@@ -681,10 +699,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
681 if (!npages) 699 if (!npages)
682 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; 700 mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
683 701
684raced: 702 mutex_lock(&kvm->lock);
685 spin_lock(&kvm->lock);
686 703
687 memory_config_version = kvm->memory_config_version;
688 new = old = *memslot; 704 new = old = *memslot;
689 705
690 new.base_gfn = base_gfn; 706 new.base_gfn = base_gfn;
@@ -707,11 +723,6 @@ raced:
707 (base_gfn >= s->base_gfn + s->npages))) 723 (base_gfn >= s->base_gfn + s->npages)))
708 goto out_unlock; 724 goto out_unlock;
709 } 725 }
710 /*
711 * Do memory allocations outside lock. memory_config_version will
712 * detect any races.
713 */
714 spin_unlock(&kvm->lock);
715 726
716 /* Deallocate if slot is being removed */ 727 /* Deallocate if slot is being removed */
717 if (!npages) 728 if (!npages)
@@ -728,14 +739,14 @@ raced:
728 new.phys_mem = vmalloc(npages * sizeof(struct page *)); 739 new.phys_mem = vmalloc(npages * sizeof(struct page *));
729 740
730 if (!new.phys_mem) 741 if (!new.phys_mem)
731 goto out_free; 742 goto out_unlock;
732 743
733 memset(new.phys_mem, 0, npages * sizeof(struct page *)); 744 memset(new.phys_mem, 0, npages * sizeof(struct page *));
734 for (i = 0; i < npages; ++i) { 745 for (i = 0; i < npages; ++i) {
735 new.phys_mem[i] = alloc_page(GFP_HIGHUSER 746 new.phys_mem[i] = alloc_page(GFP_HIGHUSER
736 | __GFP_ZERO); 747 | __GFP_ZERO);
737 if (!new.phys_mem[i]) 748 if (!new.phys_mem[i])
738 goto out_free; 749 goto out_unlock;
739 set_page_private(new.phys_mem[i],0); 750 set_page_private(new.phys_mem[i],0);
740 } 751 }
741 } 752 }
@@ -746,39 +757,25 @@ raced:
746 757
747 new.dirty_bitmap = vmalloc(dirty_bytes); 758 new.dirty_bitmap = vmalloc(dirty_bytes);
748 if (!new.dirty_bitmap) 759 if (!new.dirty_bitmap)
749 goto out_free; 760 goto out_unlock;
750 memset(new.dirty_bitmap, 0, dirty_bytes); 761 memset(new.dirty_bitmap, 0, dirty_bytes);
751 } 762 }
752 763
753 spin_lock(&kvm->lock);
754
755 if (memory_config_version != kvm->memory_config_version) {
756 spin_unlock(&kvm->lock);
757 kvm_free_physmem_slot(&new, &old);
758 goto raced;
759 }
760
761 r = -EAGAIN;
762 if (kvm->busy)
763 goto out_unlock;
764
765 if (mem->slot >= kvm->nmemslots) 764 if (mem->slot >= kvm->nmemslots)
766 kvm->nmemslots = mem->slot + 1; 765 kvm->nmemslots = mem->slot + 1;
767 766
768 *memslot = new; 767 *memslot = new;
769 ++kvm->memory_config_version;
770 768
771 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 769 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
772 kvm_flush_remote_tlbs(kvm); 770 kvm_flush_remote_tlbs(kvm);
773 771
774 spin_unlock(&kvm->lock); 772 mutex_unlock(&kvm->lock);
775 773
776 kvm_free_physmem_slot(&old, &new); 774 kvm_free_physmem_slot(&old, &new);
777 return 0; 775 return 0;
778 776
779out_unlock: 777out_unlock:
780 spin_unlock(&kvm->lock); 778 mutex_unlock(&kvm->lock);
781out_free:
782 kvm_free_physmem_slot(&new, &old); 779 kvm_free_physmem_slot(&new, &old);
783out: 780out:
784 return r; 781 return r;
@@ -795,14 +792,8 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
795 int n; 792 int n;
796 unsigned long any = 0; 793 unsigned long any = 0;
797 794
798 spin_lock(&kvm->lock); 795 mutex_lock(&kvm->lock);
799 796
800 /*
801 * Prevent changes to guest memory configuration even while the lock
802 * is not taken.
803 */
804 ++kvm->busy;
805 spin_unlock(&kvm->lock);
806 r = -EINVAL; 797 r = -EINVAL;
807 if (log->slot >= KVM_MEMORY_SLOTS) 798 if (log->slot >= KVM_MEMORY_SLOTS)
808 goto out; 799 goto out;
@@ -821,18 +812,17 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
821 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 812 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
822 goto out; 813 goto out;
823 814
824 spin_lock(&kvm->lock); 815 /* If nothing is dirty, don't bother messing with page tables. */
825 kvm_mmu_slot_remove_write_access(kvm, log->slot); 816 if (any) {
826 kvm_flush_remote_tlbs(kvm); 817 kvm_mmu_slot_remove_write_access(kvm, log->slot);
827 memset(memslot->dirty_bitmap, 0, n); 818 kvm_flush_remote_tlbs(kvm);
828 spin_unlock(&kvm->lock); 819 memset(memslot->dirty_bitmap, 0, n);
820 }
829 821
830 r = 0; 822 r = 0;
831 823
832out: 824out:
833 spin_lock(&kvm->lock); 825 mutex_unlock(&kvm->lock);
834 --kvm->busy;
835 spin_unlock(&kvm->lock);
836 return r; 826 return r;
837} 827}
838 828
@@ -862,7 +852,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
862 < alias->target_phys_addr) 852 < alias->target_phys_addr)
863 goto out; 853 goto out;
864 854
865 spin_lock(&kvm->lock); 855 mutex_lock(&kvm->lock);
866 856
867 p = &kvm->aliases[alias->slot]; 857 p = &kvm->aliases[alias->slot];
868 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 858 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -876,7 +866,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
876 866
877 kvm_mmu_zap_all(kvm); 867 kvm_mmu_zap_all(kvm);
878 868
879 spin_unlock(&kvm->lock); 869 mutex_unlock(&kvm->lock);
880 870
881 return 0; 871 return 0;
882 872
@@ -884,6 +874,63 @@ out:
884 return r; 874 return r;
885} 875}
886 876
877static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
878{
879 int r;
880
881 r = 0;
882 switch (chip->chip_id) {
883 case KVM_IRQCHIP_PIC_MASTER:
884 memcpy (&chip->chip.pic,
885 &pic_irqchip(kvm)->pics[0],
886 sizeof(struct kvm_pic_state));
887 break;
888 case KVM_IRQCHIP_PIC_SLAVE:
889 memcpy (&chip->chip.pic,
890 &pic_irqchip(kvm)->pics[1],
891 sizeof(struct kvm_pic_state));
892 break;
893 case KVM_IRQCHIP_IOAPIC:
894 memcpy (&chip->chip.ioapic,
895 ioapic_irqchip(kvm),
896 sizeof(struct kvm_ioapic_state));
897 break;
898 default:
899 r = -EINVAL;
900 break;
901 }
902 return r;
903}
904
905static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
906{
907 int r;
908
909 r = 0;
910 switch (chip->chip_id) {
911 case KVM_IRQCHIP_PIC_MASTER:
912 memcpy (&pic_irqchip(kvm)->pics[0],
913 &chip->chip.pic,
914 sizeof(struct kvm_pic_state));
915 break;
916 case KVM_IRQCHIP_PIC_SLAVE:
917 memcpy (&pic_irqchip(kvm)->pics[1],
918 &chip->chip.pic,
919 sizeof(struct kvm_pic_state));
920 break;
921 case KVM_IRQCHIP_IOAPIC:
922 memcpy (ioapic_irqchip(kvm),
923 &chip->chip.ioapic,
924 sizeof(struct kvm_ioapic_state));
925 break;
926 default:
927 r = -EINVAL;
928 break;
929 }
930 kvm_pic_update_irq(pic_irqchip(kvm));
931 return r;
932}
933
887static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 934static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
888{ 935{
889 int i; 936 int i;
@@ -930,37 +977,26 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
930} 977}
931EXPORT_SYMBOL_GPL(gfn_to_page); 978EXPORT_SYMBOL_GPL(gfn_to_page);
932 979
980/* WARNING: Does not work on aliased pages. */
933void mark_page_dirty(struct kvm *kvm, gfn_t gfn) 981void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
934{ 982{
935 int i;
936 struct kvm_memory_slot *memslot; 983 struct kvm_memory_slot *memslot;
937 unsigned long rel_gfn;
938 984
939 for (i = 0; i < kvm->nmemslots; ++i) { 985 memslot = __gfn_to_memslot(kvm, gfn);
940 memslot = &kvm->memslots[i]; 986 if (memslot && memslot->dirty_bitmap) {
941 987 unsigned long rel_gfn = gfn - memslot->base_gfn;
942 if (gfn >= memslot->base_gfn
943 && gfn < memslot->base_gfn + memslot->npages) {
944 988
945 if (!memslot->dirty_bitmap) 989 /* avoid RMW */
946 return; 990 if (!test_bit(rel_gfn, memslot->dirty_bitmap))
947 991 set_bit(rel_gfn, memslot->dirty_bitmap);
948 rel_gfn = gfn - memslot->base_gfn;
949
950 /* avoid RMW */
951 if (!test_bit(rel_gfn, memslot->dirty_bitmap))
952 set_bit(rel_gfn, memslot->dirty_bitmap);
953 return;
954 }
955 } 992 }
956} 993}
957 994
958static int emulator_read_std(unsigned long addr, 995int emulator_read_std(unsigned long addr,
959 void *val, 996 void *val,
960 unsigned int bytes, 997 unsigned int bytes,
961 struct x86_emulate_ctxt *ctxt) 998 struct kvm_vcpu *vcpu)
962{ 999{
963 struct kvm_vcpu *vcpu = ctxt->vcpu;
964 void *data = val; 1000 void *data = val;
965 1001
966 while (bytes) { 1002 while (bytes) {
@@ -990,26 +1026,42 @@ static int emulator_read_std(unsigned long addr,
990 1026
991 return X86EMUL_CONTINUE; 1027 return X86EMUL_CONTINUE;
992} 1028}
1029EXPORT_SYMBOL_GPL(emulator_read_std);
993 1030
994static int emulator_write_std(unsigned long addr, 1031static int emulator_write_std(unsigned long addr,
995 const void *val, 1032 const void *val,
996 unsigned int bytes, 1033 unsigned int bytes,
997 struct x86_emulate_ctxt *ctxt) 1034 struct kvm_vcpu *vcpu)
998{ 1035{
999 printk(KERN_ERR "emulator_write_std: addr %lx n %d\n", 1036 pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes);
1000 addr, bytes);
1001 return X86EMUL_UNHANDLEABLE; 1037 return X86EMUL_UNHANDLEABLE;
1002} 1038}
1003 1039
1040/*
1041 * Only apic need an MMIO device hook, so shortcut now..
1042 */
1043static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
1044 gpa_t addr)
1045{
1046 struct kvm_io_device *dev;
1047
1048 if (vcpu->apic) {
1049 dev = &vcpu->apic->dev;
1050 if (dev->in_range(dev, addr))
1051 return dev;
1052 }
1053 return NULL;
1054}
1055
1004static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, 1056static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
1005 gpa_t addr) 1057 gpa_t addr)
1006{ 1058{
1007 /* 1059 struct kvm_io_device *dev;
1008 * Note that its important to have this wrapper function because 1060
1009 * in the very near future we will be checking for MMIOs against 1061 dev = vcpu_find_pervcpu_dev(vcpu, addr);
1010 * the LAPIC as well as the general MMIO bus 1062 if (dev == NULL)
1011 */ 1063 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
1012 return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); 1064 return dev;
1013} 1065}
1014 1066
1015static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, 1067static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -1021,9 +1073,8 @@ static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
1021static int emulator_read_emulated(unsigned long addr, 1073static int emulator_read_emulated(unsigned long addr,
1022 void *val, 1074 void *val,
1023 unsigned int bytes, 1075 unsigned int bytes,
1024 struct x86_emulate_ctxt *ctxt) 1076 struct kvm_vcpu *vcpu)
1025{ 1077{
1026 struct kvm_vcpu *vcpu = ctxt->vcpu;
1027 struct kvm_io_device *mmio_dev; 1078 struct kvm_io_device *mmio_dev;
1028 gpa_t gpa; 1079 gpa_t gpa;
1029 1080
@@ -1031,7 +1082,7 @@ static int emulator_read_emulated(unsigned long addr,
1031 memcpy(val, vcpu->mmio_data, bytes); 1082 memcpy(val, vcpu->mmio_data, bytes);
1032 vcpu->mmio_read_completed = 0; 1083 vcpu->mmio_read_completed = 0;
1033 return X86EMUL_CONTINUE; 1084 return X86EMUL_CONTINUE;
1034 } else if (emulator_read_std(addr, val, bytes, ctxt) 1085 } else if (emulator_read_std(addr, val, bytes, vcpu)
1035 == X86EMUL_CONTINUE) 1086 == X86EMUL_CONTINUE)
1036 return X86EMUL_CONTINUE; 1087 return X86EMUL_CONTINUE;
1037 1088
@@ -1061,7 +1112,6 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1061{ 1112{
1062 struct page *page; 1113 struct page *page;
1063 void *virt; 1114 void *virt;
1064 unsigned offset = offset_in_page(gpa);
1065 1115
1066 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) 1116 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
1067 return 0; 1117 return 0;
@@ -1070,7 +1120,7 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1070 return 0; 1120 return 0;
1071 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); 1121 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
1072 virt = kmap_atomic(page, KM_USER0); 1122 virt = kmap_atomic(page, KM_USER0);
1073 kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); 1123 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1074 memcpy(virt + offset_in_page(gpa), val, bytes); 1124 memcpy(virt + offset_in_page(gpa), val, bytes);
1075 kunmap_atomic(virt, KM_USER0); 1125 kunmap_atomic(virt, KM_USER0);
1076 return 1; 1126 return 1;
@@ -1079,14 +1129,13 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
1079static int emulator_write_emulated_onepage(unsigned long addr, 1129static int emulator_write_emulated_onepage(unsigned long addr,
1080 const void *val, 1130 const void *val,
1081 unsigned int bytes, 1131 unsigned int bytes,
1082 struct x86_emulate_ctxt *ctxt) 1132 struct kvm_vcpu *vcpu)
1083{ 1133{
1084 struct kvm_vcpu *vcpu = ctxt->vcpu;
1085 struct kvm_io_device *mmio_dev; 1134 struct kvm_io_device *mmio_dev;
1086 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); 1135 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
1087 1136
1088 if (gpa == UNMAPPED_GVA) { 1137 if (gpa == UNMAPPED_GVA) {
1089 kvm_arch_ops->inject_page_fault(vcpu, addr, 2); 1138 kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
1090 return X86EMUL_PROPAGATE_FAULT; 1139 return X86EMUL_PROPAGATE_FAULT;
1091 } 1140 }
1092 1141
@@ -1111,31 +1160,32 @@ static int emulator_write_emulated_onepage(unsigned long addr,
1111 return X86EMUL_CONTINUE; 1160 return X86EMUL_CONTINUE;
1112} 1161}
1113 1162
1114static int emulator_write_emulated(unsigned long addr, 1163int emulator_write_emulated(unsigned long addr,
1115 const void *val, 1164 const void *val,
1116 unsigned int bytes, 1165 unsigned int bytes,
1117 struct x86_emulate_ctxt *ctxt) 1166 struct kvm_vcpu *vcpu)
1118{ 1167{
1119 /* Crossing a page boundary? */ 1168 /* Crossing a page boundary? */
1120 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { 1169 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
1121 int rc, now; 1170 int rc, now;
1122 1171
1123 now = -addr & ~PAGE_MASK; 1172 now = -addr & ~PAGE_MASK;
1124 rc = emulator_write_emulated_onepage(addr, val, now, ctxt); 1173 rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
1125 if (rc != X86EMUL_CONTINUE) 1174 if (rc != X86EMUL_CONTINUE)
1126 return rc; 1175 return rc;
1127 addr += now; 1176 addr += now;
1128 val += now; 1177 val += now;
1129 bytes -= now; 1178 bytes -= now;
1130 } 1179 }
1131 return emulator_write_emulated_onepage(addr, val, bytes, ctxt); 1180 return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
1132} 1181}
1182EXPORT_SYMBOL_GPL(emulator_write_emulated);
1133 1183
1134static int emulator_cmpxchg_emulated(unsigned long addr, 1184static int emulator_cmpxchg_emulated(unsigned long addr,
1135 const void *old, 1185 const void *old,
1136 const void *new, 1186 const void *new,
1137 unsigned int bytes, 1187 unsigned int bytes,
1138 struct x86_emulate_ctxt *ctxt) 1188 struct kvm_vcpu *vcpu)
1139{ 1189{
1140 static int reported; 1190 static int reported;
1141 1191
@@ -1143,12 +1193,12 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
1143 reported = 1; 1193 reported = 1;
1144 printk(KERN_WARNING "kvm: emulating exchange as write\n"); 1194 printk(KERN_WARNING "kvm: emulating exchange as write\n");
1145 } 1195 }
1146 return emulator_write_emulated(addr, new, bytes, ctxt); 1196 return emulator_write_emulated(addr, new, bytes, vcpu);
1147} 1197}
1148 1198
1149static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 1199static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
1150{ 1200{
1151 return kvm_arch_ops->get_segment_base(vcpu, seg); 1201 return kvm_x86_ops->get_segment_base(vcpu, seg);
1152} 1202}
1153 1203
1154int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 1204int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
@@ -1158,10 +1208,8 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
1158 1208
1159int emulate_clts(struct kvm_vcpu *vcpu) 1209int emulate_clts(struct kvm_vcpu *vcpu)
1160{ 1210{
1161 unsigned long cr0; 1211 vcpu->cr0 &= ~X86_CR0_TS;
1162 1212 kvm_x86_ops->set_cr0(vcpu, vcpu->cr0);
1163 cr0 = vcpu->cr0 & ~CR0_TS_MASK;
1164 kvm_arch_ops->set_cr0(vcpu, cr0);
1165 return X86EMUL_CONTINUE; 1213 return X86EMUL_CONTINUE;
1166} 1214}
1167 1215
@@ -1171,11 +1219,10 @@ int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest)
1171 1219
1172 switch (dr) { 1220 switch (dr) {
1173 case 0 ... 3: 1221 case 0 ... 3:
1174 *dest = kvm_arch_ops->get_dr(vcpu, dr); 1222 *dest = kvm_x86_ops->get_dr(vcpu, dr);
1175 return X86EMUL_CONTINUE; 1223 return X86EMUL_CONTINUE;
1176 default: 1224 default:
1177 printk(KERN_DEBUG "%s: unexpected dr %u\n", 1225 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
1178 __FUNCTION__, dr);
1179 return X86EMUL_UNHANDLEABLE; 1226 return X86EMUL_UNHANDLEABLE;
1180 } 1227 }
1181} 1228}
@@ -1185,7 +1232,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
1185 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 1232 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
1186 int exception; 1233 int exception;
1187 1234
1188 kvm_arch_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); 1235 kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
1189 if (exception) { 1236 if (exception) {
1190 /* FIXME: better handling */ 1237 /* FIXME: better handling */
1191 return X86EMUL_UNHANDLEABLE; 1238 return X86EMUL_UNHANDLEABLE;
@@ -1193,25 +1240,25 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
1193 return X86EMUL_CONTINUE; 1240 return X86EMUL_CONTINUE;
1194} 1241}
1195 1242
1196static void report_emulation_failure(struct x86_emulate_ctxt *ctxt) 1243void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
1197{ 1244{
1198 static int reported; 1245 static int reported;
1199 u8 opcodes[4]; 1246 u8 opcodes[4];
1200 unsigned long rip = ctxt->vcpu->rip; 1247 unsigned long rip = vcpu->rip;
1201 unsigned long rip_linear; 1248 unsigned long rip_linear;
1202 1249
1203 rip_linear = rip + get_segment_base(ctxt->vcpu, VCPU_SREG_CS); 1250 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
1204 1251
1205 if (reported) 1252 if (reported)
1206 return; 1253 return;
1207 1254
1208 emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt); 1255 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
1209 1256
1210 printk(KERN_ERR "emulation failed but !mmio_needed?" 1257 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
1211 " rip %lx %02x %02x %02x %02x\n", 1258 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
1212 rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
1213 reported = 1; 1259 reported = 1;
1214} 1260}
1261EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
1215 1262
1216struct x86_emulate_ops emulate_ops = { 1263struct x86_emulate_ops emulate_ops = {
1217 .read_std = emulator_read_std, 1264 .read_std = emulator_read_std,
@@ -1231,12 +1278,12 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1231 int cs_db, cs_l; 1278 int cs_db, cs_l;
1232 1279
1233 vcpu->mmio_fault_cr2 = cr2; 1280 vcpu->mmio_fault_cr2 = cr2;
1234 kvm_arch_ops->cache_regs(vcpu); 1281 kvm_x86_ops->cache_regs(vcpu);
1235 1282
1236 kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 1283 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
1237 1284
1238 emulate_ctxt.vcpu = vcpu; 1285 emulate_ctxt.vcpu = vcpu;
1239 emulate_ctxt.eflags = kvm_arch_ops->get_rflags(vcpu); 1286 emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
1240 emulate_ctxt.cr2 = cr2; 1287 emulate_ctxt.cr2 = cr2;
1241 emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) 1288 emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
1242 ? X86EMUL_MODE_REAL : cs_l 1289 ? X86EMUL_MODE_REAL : cs_l
@@ -1259,9 +1306,13 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1259 emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); 1306 emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);
1260 1307
1261 vcpu->mmio_is_write = 0; 1308 vcpu->mmio_is_write = 0;
1309 vcpu->pio.string = 0;
1262 r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); 1310 r = x86_emulate_memop(&emulate_ctxt, &emulate_ops);
1311 if (vcpu->pio.string)
1312 return EMULATE_DO_MMIO;
1263 1313
1264 if ((r || vcpu->mmio_is_write) && run) { 1314 if ((r || vcpu->mmio_is_write) && run) {
1315 run->exit_reason = KVM_EXIT_MMIO;
1265 run->mmio.phys_addr = vcpu->mmio_phys_addr; 1316 run->mmio.phys_addr = vcpu->mmio_phys_addr;
1266 memcpy(run->mmio.data, vcpu->mmio_data, 8); 1317 memcpy(run->mmio.data, vcpu->mmio_data, 8);
1267 run->mmio.len = vcpu->mmio_size; 1318 run->mmio.len = vcpu->mmio_size;
@@ -1272,14 +1323,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1272 if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) 1323 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1273 return EMULATE_DONE; 1324 return EMULATE_DONE;
1274 if (!vcpu->mmio_needed) { 1325 if (!vcpu->mmio_needed) {
1275 report_emulation_failure(&emulate_ctxt); 1326 kvm_report_emulation_failure(vcpu, "mmio");
1276 return EMULATE_FAIL; 1327 return EMULATE_FAIL;
1277 } 1328 }
1278 return EMULATE_DO_MMIO; 1329 return EMULATE_DO_MMIO;
1279 } 1330 }
1280 1331
1281 kvm_arch_ops->decache_regs(vcpu); 1332 kvm_x86_ops->decache_regs(vcpu);
1282 kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); 1333 kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags);
1283 1334
1284 if (vcpu->mmio_is_write) { 1335 if (vcpu->mmio_is_write) {
1285 vcpu->mmio_needed = 0; 1336 vcpu->mmio_needed = 0;
@@ -1290,14 +1341,45 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1290} 1341}
1291EXPORT_SYMBOL_GPL(emulate_instruction); 1342EXPORT_SYMBOL_GPL(emulate_instruction);
1292 1343
1293int kvm_emulate_halt(struct kvm_vcpu *vcpu) 1344/*
1345 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1346 */
1347static void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1294{ 1348{
1295 if (vcpu->irq_summary) 1349 DECLARE_WAITQUEUE(wait, current);
1296 return 1;
1297 1350
1298 vcpu->run->exit_reason = KVM_EXIT_HLT; 1351 add_wait_queue(&vcpu->wq, &wait);
1352
1353 /*
1354 * We will block until either an interrupt or a signal wakes us up
1355 */
1356 while (!kvm_cpu_has_interrupt(vcpu)
1357 && !signal_pending(current)
1358 && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE
1359 && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) {
1360 set_current_state(TASK_INTERRUPTIBLE);
1361 vcpu_put(vcpu);
1362 schedule();
1363 vcpu_load(vcpu);
1364 }
1365
1366 __set_current_state(TASK_RUNNING);
1367 remove_wait_queue(&vcpu->wq, &wait);
1368}
1369
1370int kvm_emulate_halt(struct kvm_vcpu *vcpu)
1371{
1299 ++vcpu->stat.halt_exits; 1372 ++vcpu->stat.halt_exits;
1300 return 0; 1373 if (irqchip_in_kernel(vcpu->kvm)) {
1374 vcpu->mp_state = VCPU_MP_STATE_HALTED;
1375 kvm_vcpu_block(vcpu);
1376 if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
1377 return -EINTR;
1378 return 1;
1379 } else {
1380 vcpu->run->exit_reason = KVM_EXIT_HLT;
1381 return 0;
1382 }
1301} 1383}
1302EXPORT_SYMBOL_GPL(kvm_emulate_halt); 1384EXPORT_SYMBOL_GPL(kvm_emulate_halt);
1303 1385
@@ -1305,7 +1387,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1305{ 1387{
1306 unsigned long nr, a0, a1, a2, a3, a4, a5, ret; 1388 unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
1307 1389
1308 kvm_arch_ops->cache_regs(vcpu); 1390 kvm_x86_ops->cache_regs(vcpu);
1309 ret = -KVM_EINVAL; 1391 ret = -KVM_EINVAL;
1310#ifdef CONFIG_X86_64 1392#ifdef CONFIG_X86_64
1311 if (is_long_mode(vcpu)) { 1393 if (is_long_mode(vcpu)) {
@@ -1329,6 +1411,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1329 } 1411 }
1330 switch (nr) { 1412 switch (nr) {
1331 default: 1413 default:
1414 run->hypercall.nr = nr;
1332 run->hypercall.args[0] = a0; 1415 run->hypercall.args[0] = a0;
1333 run->hypercall.args[1] = a1; 1416 run->hypercall.args[1] = a1;
1334 run->hypercall.args[2] = a2; 1417 run->hypercall.args[2] = a2;
@@ -1337,11 +1420,11 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1337 run->hypercall.args[5] = a5; 1420 run->hypercall.args[5] = a5;
1338 run->hypercall.ret = ret; 1421 run->hypercall.ret = ret;
1339 run->hypercall.longmode = is_long_mode(vcpu); 1422 run->hypercall.longmode = is_long_mode(vcpu);
1340 kvm_arch_ops->decache_regs(vcpu); 1423 kvm_x86_ops->decache_regs(vcpu);
1341 return 0; 1424 return 0;
1342 } 1425 }
1343 vcpu->regs[VCPU_REGS_RAX] = ret; 1426 vcpu->regs[VCPU_REGS_RAX] = ret;
1344 kvm_arch_ops->decache_regs(vcpu); 1427 kvm_x86_ops->decache_regs(vcpu);
1345 return 1; 1428 return 1;
1346} 1429}
1347EXPORT_SYMBOL_GPL(kvm_hypercall); 1430EXPORT_SYMBOL_GPL(kvm_hypercall);
@@ -1355,26 +1438,26 @@ void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1355{ 1438{
1356 struct descriptor_table dt = { limit, base }; 1439 struct descriptor_table dt = { limit, base };
1357 1440
1358 kvm_arch_ops->set_gdt(vcpu, &dt); 1441 kvm_x86_ops->set_gdt(vcpu, &dt);
1359} 1442}
1360 1443
1361void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) 1444void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
1362{ 1445{
1363 struct descriptor_table dt = { limit, base }; 1446 struct descriptor_table dt = { limit, base };
1364 1447
1365 kvm_arch_ops->set_idt(vcpu, &dt); 1448 kvm_x86_ops->set_idt(vcpu, &dt);
1366} 1449}
1367 1450
1368void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, 1451void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
1369 unsigned long *rflags) 1452 unsigned long *rflags)
1370{ 1453{
1371 lmsw(vcpu, msw); 1454 lmsw(vcpu, msw);
1372 *rflags = kvm_arch_ops->get_rflags(vcpu); 1455 *rflags = kvm_x86_ops->get_rflags(vcpu);
1373} 1456}
1374 1457
1375unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 1458unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
1376{ 1459{
1377 kvm_arch_ops->decache_cr4_guest_bits(vcpu); 1460 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
1378 switch (cr) { 1461 switch (cr) {
1379 case 0: 1462 case 0:
1380 return vcpu->cr0; 1463 return vcpu->cr0;
@@ -1396,7 +1479,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
1396 switch (cr) { 1479 switch (cr) {
1397 case 0: 1480 case 0:
1398 set_cr0(vcpu, mk_cr_64(vcpu->cr0, val)); 1481 set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
1399 *rflags = kvm_arch_ops->get_rflags(vcpu); 1482 *rflags = kvm_x86_ops->get_rflags(vcpu);
1400 break; 1483 break;
1401 case 2: 1484 case 2:
1402 vcpu->cr2 = val; 1485 vcpu->cr2 = val;
@@ -1439,7 +1522,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1439 1522
1440 mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); 1523 mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
1441 para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); 1524 para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
1442 para_state = kmap_atomic(para_state_page, KM_USER0); 1525 para_state = kmap(para_state_page);
1443 1526
1444 printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); 1527 printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
1445 printk(KERN_DEBUG ".... size: %d\n", para_state->size); 1528 printk(KERN_DEBUG ".... size: %d\n", para_state->size);
@@ -1470,12 +1553,12 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1470 mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); 1553 mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
1471 hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), 1554 hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
1472 KM_USER1) + (hypercall_hpa & ~PAGE_MASK); 1555 KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
1473 kvm_arch_ops->patch_hypercall(vcpu, hypercall); 1556 kvm_x86_ops->patch_hypercall(vcpu, hypercall);
1474 kunmap_atomic(hypercall, KM_USER1); 1557 kunmap_atomic(hypercall, KM_USER1);
1475 1558
1476 para_state->ret = 0; 1559 para_state->ret = 0;
1477err_kunmap_skip: 1560err_kunmap_skip:
1478 kunmap_atomic(para_state, KM_USER0); 1561 kunmap(para_state_page);
1479 return 0; 1562 return 0;
1480err_gp: 1563err_gp:
1481 return 1; 1564 return 1;
@@ -1511,7 +1594,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1511 data = 3; 1594 data = 3;
1512 break; 1595 break;
1513 case MSR_IA32_APICBASE: 1596 case MSR_IA32_APICBASE:
1514 data = vcpu->apic_base; 1597 data = kvm_get_apic_base(vcpu);
1515 break; 1598 break;
1516 case MSR_IA32_MISC_ENABLE: 1599 case MSR_IA32_MISC_ENABLE:
1517 data = vcpu->ia32_misc_enable_msr; 1600 data = vcpu->ia32_misc_enable_msr;
@@ -1522,7 +1605,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1522 break; 1605 break;
1523#endif 1606#endif
1524 default: 1607 default:
1525 printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", msr); 1608 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1526 return 1; 1609 return 1;
1527 } 1610 }
1528 *pdata = data; 1611 *pdata = data;
@@ -1537,7 +1620,7 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1537 */ 1620 */
1538int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 1621int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1539{ 1622{
1540 return kvm_arch_ops->get_msr(vcpu, msr_index, pdata); 1623 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1541} 1624}
1542 1625
1543#ifdef CONFIG_X86_64 1626#ifdef CONFIG_X86_64
@@ -1558,7 +1641,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
1558 return; 1641 return;
1559 } 1642 }
1560 1643
1561 kvm_arch_ops->set_efer(vcpu, efer); 1644 kvm_x86_ops->set_efer(vcpu, efer);
1562 1645
1563 efer &= ~EFER_LMA; 1646 efer &= ~EFER_LMA;
1564 efer |= vcpu->shadow_efer & EFER_LMA; 1647 efer |= vcpu->shadow_efer & EFER_LMA;
@@ -1577,11 +1660,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1577 break; 1660 break;
1578#endif 1661#endif
1579 case MSR_IA32_MC0_STATUS: 1662 case MSR_IA32_MC0_STATUS:
1580 printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", 1663 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
1581 __FUNCTION__, data); 1664 __FUNCTION__, data);
1582 break; 1665 break;
1583 case MSR_IA32_MCG_STATUS: 1666 case MSR_IA32_MCG_STATUS:
1584 printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", 1667 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
1585 __FUNCTION__, data); 1668 __FUNCTION__, data);
1586 break; 1669 break;
1587 case MSR_IA32_UCODE_REV: 1670 case MSR_IA32_UCODE_REV:
@@ -1589,7 +1672,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1589 case 0x200 ... 0x2ff: /* MTRRs */ 1672 case 0x200 ... 0x2ff: /* MTRRs */
1590 break; 1673 break;
1591 case MSR_IA32_APICBASE: 1674 case MSR_IA32_APICBASE:
1592 vcpu->apic_base = data; 1675 kvm_set_apic_base(vcpu, data);
1593 break; 1676 break;
1594 case MSR_IA32_MISC_ENABLE: 1677 case MSR_IA32_MISC_ENABLE:
1595 vcpu->ia32_misc_enable_msr = data; 1678 vcpu->ia32_misc_enable_msr = data;
@@ -1601,7 +1684,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1601 return vcpu_register_para(vcpu, data); 1684 return vcpu_register_para(vcpu, data);
1602 1685
1603 default: 1686 default:
1604 printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr); 1687 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
1605 return 1; 1688 return 1;
1606 } 1689 }
1607 return 0; 1690 return 0;
@@ -1615,44 +1698,24 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1615 */ 1698 */
1616int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 1699int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1617{ 1700{
1618 return kvm_arch_ops->set_msr(vcpu, msr_index, data); 1701 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
1619} 1702}
1620 1703
1621void kvm_resched(struct kvm_vcpu *vcpu) 1704void kvm_resched(struct kvm_vcpu *vcpu)
1622{ 1705{
1623 if (!need_resched()) 1706 if (!need_resched())
1624 return; 1707 return;
1625 vcpu_put(vcpu);
1626 cond_resched(); 1708 cond_resched();
1627 vcpu_load(vcpu);
1628} 1709}
1629EXPORT_SYMBOL_GPL(kvm_resched); 1710EXPORT_SYMBOL_GPL(kvm_resched);
1630 1711
1631void load_msrs(struct vmx_msr_entry *e, int n)
1632{
1633 int i;
1634
1635 for (i = 0; i < n; ++i)
1636 wrmsrl(e[i].index, e[i].data);
1637}
1638EXPORT_SYMBOL_GPL(load_msrs);
1639
1640void save_msrs(struct vmx_msr_entry *e, int n)
1641{
1642 int i;
1643
1644 for (i = 0; i < n; ++i)
1645 rdmsrl(e[i].index, e[i].data);
1646}
1647EXPORT_SYMBOL_GPL(save_msrs);
1648
1649void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) 1712void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1650{ 1713{
1651 int i; 1714 int i;
1652 u32 function; 1715 u32 function;
1653 struct kvm_cpuid_entry *e, *best; 1716 struct kvm_cpuid_entry *e, *best;
1654 1717
1655 kvm_arch_ops->cache_regs(vcpu); 1718 kvm_x86_ops->cache_regs(vcpu);
1656 function = vcpu->regs[VCPU_REGS_RAX]; 1719 function = vcpu->regs[VCPU_REGS_RAX];
1657 vcpu->regs[VCPU_REGS_RAX] = 0; 1720 vcpu->regs[VCPU_REGS_RAX] = 0;
1658 vcpu->regs[VCPU_REGS_RBX] = 0; 1721 vcpu->regs[VCPU_REGS_RBX] = 0;
@@ -1678,8 +1741,8 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1678 vcpu->regs[VCPU_REGS_RCX] = best->ecx; 1741 vcpu->regs[VCPU_REGS_RCX] = best->ecx;
1679 vcpu->regs[VCPU_REGS_RDX] = best->edx; 1742 vcpu->regs[VCPU_REGS_RDX] = best->edx;
1680 } 1743 }
1681 kvm_arch_ops->decache_regs(vcpu); 1744 kvm_x86_ops->decache_regs(vcpu);
1682 kvm_arch_ops->skip_emulated_instruction(vcpu); 1745 kvm_x86_ops->skip_emulated_instruction(vcpu);
1683} 1746}
1684EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); 1747EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
1685 1748
@@ -1690,11 +1753,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
1690 unsigned bytes; 1753 unsigned bytes;
1691 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; 1754 int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
1692 1755
1693 kvm_arch_ops->vcpu_put(vcpu);
1694 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, 1756 q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
1695 PAGE_KERNEL); 1757 PAGE_KERNEL);
1696 if (!q) { 1758 if (!q) {
1697 kvm_arch_ops->vcpu_load(vcpu);
1698 free_pio_guest_pages(vcpu); 1759 free_pio_guest_pages(vcpu);
1699 return -ENOMEM; 1760 return -ENOMEM;
1700 } 1761 }
@@ -1706,7 +1767,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
1706 memcpy(p, q, bytes); 1767 memcpy(p, q, bytes);
1707 q -= vcpu->pio.guest_page_offset; 1768 q -= vcpu->pio.guest_page_offset;
1708 vunmap(q); 1769 vunmap(q);
1709 kvm_arch_ops->vcpu_load(vcpu);
1710 free_pio_guest_pages(vcpu); 1770 free_pio_guest_pages(vcpu);
1711 return 0; 1771 return 0;
1712} 1772}
@@ -1717,7 +1777,7 @@ static int complete_pio(struct kvm_vcpu *vcpu)
1717 long delta; 1777 long delta;
1718 int r; 1778 int r;
1719 1779
1720 kvm_arch_ops->cache_regs(vcpu); 1780 kvm_x86_ops->cache_regs(vcpu);
1721 1781
1722 if (!io->string) { 1782 if (!io->string) {
1723 if (io->in) 1783 if (io->in)
@@ -1727,7 +1787,7 @@ static int complete_pio(struct kvm_vcpu *vcpu)
1727 if (io->in) { 1787 if (io->in) {
1728 r = pio_copy_data(vcpu); 1788 r = pio_copy_data(vcpu);
1729 if (r) { 1789 if (r) {
1730 kvm_arch_ops->cache_regs(vcpu); 1790 kvm_x86_ops->cache_regs(vcpu);
1731 return r; 1791 return r;
1732 } 1792 }
1733 } 1793 }
@@ -1750,79 +1810,109 @@ static int complete_pio(struct kvm_vcpu *vcpu)
1750 vcpu->regs[VCPU_REGS_RSI] += delta; 1810 vcpu->regs[VCPU_REGS_RSI] += delta;
1751 } 1811 }
1752 1812
1753 kvm_arch_ops->decache_regs(vcpu); 1813 kvm_x86_ops->decache_regs(vcpu);
1754 1814
1755 io->count -= io->cur_count; 1815 io->count -= io->cur_count;
1756 io->cur_count = 0; 1816 io->cur_count = 0;
1757 1817
1758 if (!io->count)
1759 kvm_arch_ops->skip_emulated_instruction(vcpu);
1760 return 0; 1818 return 0;
1761} 1819}
1762 1820
1763void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu) 1821static void kernel_pio(struct kvm_io_device *pio_dev,
1822 struct kvm_vcpu *vcpu,
1823 void *pd)
1764{ 1824{
1765 /* TODO: String I/O for in kernel device */ 1825 /* TODO: String I/O for in kernel device */
1766 1826
1827 mutex_lock(&vcpu->kvm->lock);
1767 if (vcpu->pio.in) 1828 if (vcpu->pio.in)
1768 kvm_iodevice_read(pio_dev, vcpu->pio.port, 1829 kvm_iodevice_read(pio_dev, vcpu->pio.port,
1769 vcpu->pio.size, 1830 vcpu->pio.size,
1770 vcpu->pio_data); 1831 pd);
1771 else 1832 else
1772 kvm_iodevice_write(pio_dev, vcpu->pio.port, 1833 kvm_iodevice_write(pio_dev, vcpu->pio.port,
1773 vcpu->pio.size, 1834 vcpu->pio.size,
1774 vcpu->pio_data); 1835 pd);
1836 mutex_unlock(&vcpu->kvm->lock);
1837}
1838
1839static void pio_string_write(struct kvm_io_device *pio_dev,
1840 struct kvm_vcpu *vcpu)
1841{
1842 struct kvm_pio_request *io = &vcpu->pio;
1843 void *pd = vcpu->pio_data;
1844 int i;
1845
1846 mutex_lock(&vcpu->kvm->lock);
1847 for (i = 0; i < io->cur_count; i++) {
1848 kvm_iodevice_write(pio_dev, io->port,
1849 io->size,
1850 pd);
1851 pd += io->size;
1852 }
1853 mutex_unlock(&vcpu->kvm->lock);
1775} 1854}
1776 1855
1777int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, 1856int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1778 int size, unsigned long count, int string, int down, 1857 int size, unsigned port)
1858{
1859 struct kvm_io_device *pio_dev;
1860
1861 vcpu->run->exit_reason = KVM_EXIT_IO;
1862 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1863 vcpu->run->io.size = vcpu->pio.size = size;
1864 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1865 vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1;
1866 vcpu->run->io.port = vcpu->pio.port = port;
1867 vcpu->pio.in = in;
1868 vcpu->pio.string = 0;
1869 vcpu->pio.down = 0;
1870 vcpu->pio.guest_page_offset = 0;
1871 vcpu->pio.rep = 0;
1872
1873 kvm_x86_ops->cache_regs(vcpu);
1874 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1875 kvm_x86_ops->decache_regs(vcpu);
1876
1877 kvm_x86_ops->skip_emulated_instruction(vcpu);
1878
1879 pio_dev = vcpu_find_pio_dev(vcpu, port);
1880 if (pio_dev) {
1881 kernel_pio(pio_dev, vcpu, vcpu->pio_data);
1882 complete_pio(vcpu);
1883 return 1;
1884 }
1885 return 0;
1886}
1887EXPORT_SYMBOL_GPL(kvm_emulate_pio);
1888
1889int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1890 int size, unsigned long count, int down,
1779 gva_t address, int rep, unsigned port) 1891 gva_t address, int rep, unsigned port)
1780{ 1892{
1781 unsigned now, in_page; 1893 unsigned now, in_page;
1782 int i; 1894 int i, ret = 0;
1783 int nr_pages = 1; 1895 int nr_pages = 1;
1784 struct page *page; 1896 struct page *page;
1785 struct kvm_io_device *pio_dev; 1897 struct kvm_io_device *pio_dev;
1786 1898
1787 vcpu->run->exit_reason = KVM_EXIT_IO; 1899 vcpu->run->exit_reason = KVM_EXIT_IO;
1788 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 1900 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
1789 vcpu->run->io.size = size; 1901 vcpu->run->io.size = vcpu->pio.size = size;
1790 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; 1902 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
1791 vcpu->run->io.count = count; 1903 vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count;
1792 vcpu->run->io.port = port; 1904 vcpu->run->io.port = vcpu->pio.port = port;
1793 vcpu->pio.count = count;
1794 vcpu->pio.cur_count = count;
1795 vcpu->pio.size = size;
1796 vcpu->pio.in = in; 1905 vcpu->pio.in = in;
1797 vcpu->pio.port = port; 1906 vcpu->pio.string = 1;
1798 vcpu->pio.string = string;
1799 vcpu->pio.down = down; 1907 vcpu->pio.down = down;
1800 vcpu->pio.guest_page_offset = offset_in_page(address); 1908 vcpu->pio.guest_page_offset = offset_in_page(address);
1801 vcpu->pio.rep = rep; 1909 vcpu->pio.rep = rep;
1802 1910
1803 pio_dev = vcpu_find_pio_dev(vcpu, port);
1804 if (!string) {
1805 kvm_arch_ops->cache_regs(vcpu);
1806 memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4);
1807 kvm_arch_ops->decache_regs(vcpu);
1808 if (pio_dev) {
1809 kernel_pio(pio_dev, vcpu);
1810 complete_pio(vcpu);
1811 return 1;
1812 }
1813 return 0;
1814 }
1815 /* TODO: String I/O for in kernel device */
1816 if (pio_dev)
1817 printk(KERN_ERR "kvm_setup_pio: no string io support\n");
1818
1819 if (!count) { 1911 if (!count) {
1820 kvm_arch_ops->skip_emulated_instruction(vcpu); 1912 kvm_x86_ops->skip_emulated_instruction(vcpu);
1821 return 1; 1913 return 1;
1822 } 1914 }
1823 1915
1824 now = min(count, PAGE_SIZE / size);
1825
1826 if (!down) 1916 if (!down)
1827 in_page = PAGE_SIZE - offset_in_page(address); 1917 in_page = PAGE_SIZE - offset_in_page(address);
1828 else 1918 else
@@ -1841,20 +1931,23 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1841 /* 1931 /*
1842 * String I/O in reverse. Yuck. Kill the guest, fix later. 1932 * String I/O in reverse. Yuck. Kill the guest, fix later.
1843 */ 1933 */
1844 printk(KERN_ERR "kvm: guest string pio down\n"); 1934 pr_unimpl(vcpu, "guest string pio down\n");
1845 inject_gp(vcpu); 1935 inject_gp(vcpu);
1846 return 1; 1936 return 1;
1847 } 1937 }
1848 vcpu->run->io.count = now; 1938 vcpu->run->io.count = now;
1849 vcpu->pio.cur_count = now; 1939 vcpu->pio.cur_count = now;
1850 1940
1941 if (vcpu->pio.cur_count == vcpu->pio.count)
1942 kvm_x86_ops->skip_emulated_instruction(vcpu);
1943
1851 for (i = 0; i < nr_pages; ++i) { 1944 for (i = 0; i < nr_pages; ++i) {
1852 spin_lock(&vcpu->kvm->lock); 1945 mutex_lock(&vcpu->kvm->lock);
1853 page = gva_to_page(vcpu, address + i * PAGE_SIZE); 1946 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
1854 if (page) 1947 if (page)
1855 get_page(page); 1948 get_page(page);
1856 vcpu->pio.guest_pages[i] = page; 1949 vcpu->pio.guest_pages[i] = page;
1857 spin_unlock(&vcpu->kvm->lock); 1950 mutex_unlock(&vcpu->kvm->lock);
1858 if (!page) { 1951 if (!page) {
1859 inject_gp(vcpu); 1952 inject_gp(vcpu);
1860 free_pio_guest_pages(vcpu); 1953 free_pio_guest_pages(vcpu);
@@ -1862,11 +1955,145 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
1862 } 1955 }
1863 } 1956 }
1864 1957
1865 if (!vcpu->pio.in) 1958 pio_dev = vcpu_find_pio_dev(vcpu, port);
1866 return pio_copy_data(vcpu); 1959 if (!vcpu->pio.in) {
1867 return 0; 1960 /* string PIO write */
1961 ret = pio_copy_data(vcpu);
1962 if (ret >= 0 && pio_dev) {
1963 pio_string_write(pio_dev, vcpu);
1964 complete_pio(vcpu);
1965 if (vcpu->pio.count == 0)
1966 ret = 1;
1967 }
1968 } else if (pio_dev)
1969 pr_unimpl(vcpu, "no string pio read support yet, "
1970 "port %x size %d count %ld\n",
1971 port, size, count);
1972
1973 return ret;
1974}
1975EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
1976
1977/*
1978 * Check if userspace requested an interrupt window, and that the
1979 * interrupt window is open.
1980 *
1981 * No need to exit to userspace if we already have an interrupt queued.
1982 */
1983static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1984 struct kvm_run *kvm_run)
1985{
1986 return (!vcpu->irq_summary &&
1987 kvm_run->request_interrupt_window &&
1988 vcpu->interrupt_window_open &&
1989 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
1990}
1991
1992static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1993 struct kvm_run *kvm_run)
1994{
1995 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
1996 kvm_run->cr8 = get_cr8(vcpu);
1997 kvm_run->apic_base = kvm_get_apic_base(vcpu);
1998 if (irqchip_in_kernel(vcpu->kvm))
1999 kvm_run->ready_for_interrupt_injection = 1;
2000 else
2001 kvm_run->ready_for_interrupt_injection =
2002 (vcpu->interrupt_window_open &&
2003 vcpu->irq_summary == 0);
2004}
2005
2006static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2007{
2008 int r;
2009
2010 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
2011 printk("vcpu %d received sipi with vector # %x\n",
2012 vcpu->vcpu_id, vcpu->sipi_vector);
2013 kvm_lapic_reset(vcpu);
2014 kvm_x86_ops->vcpu_reset(vcpu);
2015 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
2016 }
2017
2018preempted:
2019 if (vcpu->guest_debug.enabled)
2020 kvm_x86_ops->guest_debug_pre(vcpu);
2021
2022again:
2023 r = kvm_mmu_reload(vcpu);
2024 if (unlikely(r))
2025 goto out;
2026
2027 preempt_disable();
2028
2029 kvm_x86_ops->prepare_guest_switch(vcpu);
2030 kvm_load_guest_fpu(vcpu);
2031
2032 local_irq_disable();
2033
2034 if (signal_pending(current)) {
2035 local_irq_enable();
2036 preempt_enable();
2037 r = -EINTR;
2038 kvm_run->exit_reason = KVM_EXIT_INTR;
2039 ++vcpu->stat.signal_exits;
2040 goto out;
2041 }
2042
2043 if (irqchip_in_kernel(vcpu->kvm))
2044 kvm_x86_ops->inject_pending_irq(vcpu);
2045 else if (!vcpu->mmio_read_completed)
2046 kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
2047
2048 vcpu->guest_mode = 1;
2049
2050 if (vcpu->requests)
2051 if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
2052 kvm_x86_ops->tlb_flush(vcpu);
2053
2054 kvm_x86_ops->run(vcpu, kvm_run);
2055
2056 vcpu->guest_mode = 0;
2057 local_irq_enable();
2058
2059 ++vcpu->stat.exits;
2060
2061 preempt_enable();
2062
2063 /*
2064 * Profile KVM exit RIPs:
2065 */
2066 if (unlikely(prof_on == KVM_PROFILING)) {
2067 kvm_x86_ops->cache_regs(vcpu);
2068 profile_hit(KVM_PROFILING, (void *)vcpu->rip);
2069 }
2070
2071 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
2072
2073 if (r > 0) {
2074 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
2075 r = -EINTR;
2076 kvm_run->exit_reason = KVM_EXIT_INTR;
2077 ++vcpu->stat.request_irq_exits;
2078 goto out;
2079 }
2080 if (!need_resched()) {
2081 ++vcpu->stat.light_exits;
2082 goto again;
2083 }
2084 }
2085
2086out:
2087 if (r > 0) {
2088 kvm_resched(vcpu);
2089 goto preempted;
2090 }
2091
2092 post_kvm_run_save(vcpu, kvm_run);
2093
2094 return r;
1868} 2095}
1869EXPORT_SYMBOL_GPL(kvm_setup_pio); 2096
1870 2097
1871static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2098static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1872{ 2099{
@@ -1875,11 +2102,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1875 2102
1876 vcpu_load(vcpu); 2103 vcpu_load(vcpu);
1877 2104
2105 if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
2106 kvm_vcpu_block(vcpu);
2107 vcpu_put(vcpu);
2108 return -EAGAIN;
2109 }
2110
1878 if (vcpu->sigset_active) 2111 if (vcpu->sigset_active)
1879 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 2112 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1880 2113
1881 /* re-sync apic's tpr */ 2114 /* re-sync apic's tpr */
1882 vcpu->cr8 = kvm_run->cr8; 2115 if (!irqchip_in_kernel(vcpu->kvm))
2116 set_cr8(vcpu, kvm_run->cr8);
1883 2117
1884 if (vcpu->pio.cur_count) { 2118 if (vcpu->pio.cur_count) {
1885 r = complete_pio(vcpu); 2119 r = complete_pio(vcpu);
@@ -1897,19 +2131,18 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1897 /* 2131 /*
1898 * Read-modify-write. Back to userspace. 2132 * Read-modify-write. Back to userspace.
1899 */ 2133 */
1900 kvm_run->exit_reason = KVM_EXIT_MMIO;
1901 r = 0; 2134 r = 0;
1902 goto out; 2135 goto out;
1903 } 2136 }
1904 } 2137 }
1905 2138
1906 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { 2139 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
1907 kvm_arch_ops->cache_regs(vcpu); 2140 kvm_x86_ops->cache_regs(vcpu);
1908 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; 2141 vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
1909 kvm_arch_ops->decache_regs(vcpu); 2142 kvm_x86_ops->decache_regs(vcpu);
1910 } 2143 }
1911 2144
1912 r = kvm_arch_ops->run(vcpu, kvm_run); 2145 r = __vcpu_run(vcpu, kvm_run);
1913 2146
1914out: 2147out:
1915 if (vcpu->sigset_active) 2148 if (vcpu->sigset_active)
@@ -1924,7 +2157,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
1924{ 2157{
1925 vcpu_load(vcpu); 2158 vcpu_load(vcpu);
1926 2159
1927 kvm_arch_ops->cache_regs(vcpu); 2160 kvm_x86_ops->cache_regs(vcpu);
1928 2161
1929 regs->rax = vcpu->regs[VCPU_REGS_RAX]; 2162 regs->rax = vcpu->regs[VCPU_REGS_RAX];
1930 regs->rbx = vcpu->regs[VCPU_REGS_RBX]; 2163 regs->rbx = vcpu->regs[VCPU_REGS_RBX];
@@ -1946,7 +2179,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu,
1946#endif 2179#endif
1947 2180
1948 regs->rip = vcpu->rip; 2181 regs->rip = vcpu->rip;
1949 regs->rflags = kvm_arch_ops->get_rflags(vcpu); 2182 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
1950 2183
1951 /* 2184 /*
1952 * Don't leak debug flags in case they were set for guest debugging 2185 * Don't leak debug flags in case they were set for guest debugging
@@ -1984,9 +2217,9 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
1984#endif 2217#endif
1985 2218
1986 vcpu->rip = regs->rip; 2219 vcpu->rip = regs->rip;
1987 kvm_arch_ops->set_rflags(vcpu, regs->rflags); 2220 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
1988 2221
1989 kvm_arch_ops->decache_regs(vcpu); 2222 kvm_x86_ops->decache_regs(vcpu);
1990 2223
1991 vcpu_put(vcpu); 2224 vcpu_put(vcpu);
1992 2225
@@ -1996,13 +2229,14 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu,
1996static void get_segment(struct kvm_vcpu *vcpu, 2229static void get_segment(struct kvm_vcpu *vcpu,
1997 struct kvm_segment *var, int seg) 2230 struct kvm_segment *var, int seg)
1998{ 2231{
1999 return kvm_arch_ops->get_segment(vcpu, var, seg); 2232 return kvm_x86_ops->get_segment(vcpu, var, seg);
2000} 2233}
2001 2234
2002static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 2235static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2003 struct kvm_sregs *sregs) 2236 struct kvm_sregs *sregs)
2004{ 2237{
2005 struct descriptor_table dt; 2238 struct descriptor_table dt;
2239 int pending_vec;
2006 2240
2007 vcpu_load(vcpu); 2241 vcpu_load(vcpu);
2008 2242
@@ -2016,24 +2250,31 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2016 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); 2250 get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
2017 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); 2251 get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
2018 2252
2019 kvm_arch_ops->get_idt(vcpu, &dt); 2253 kvm_x86_ops->get_idt(vcpu, &dt);
2020 sregs->idt.limit = dt.limit; 2254 sregs->idt.limit = dt.limit;
2021 sregs->idt.base = dt.base; 2255 sregs->idt.base = dt.base;
2022 kvm_arch_ops->get_gdt(vcpu, &dt); 2256 kvm_x86_ops->get_gdt(vcpu, &dt);
2023 sregs->gdt.limit = dt.limit; 2257 sregs->gdt.limit = dt.limit;
2024 sregs->gdt.base = dt.base; 2258 sregs->gdt.base = dt.base;
2025 2259
2026 kvm_arch_ops->decache_cr4_guest_bits(vcpu); 2260 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2027 sregs->cr0 = vcpu->cr0; 2261 sregs->cr0 = vcpu->cr0;
2028 sregs->cr2 = vcpu->cr2; 2262 sregs->cr2 = vcpu->cr2;
2029 sregs->cr3 = vcpu->cr3; 2263 sregs->cr3 = vcpu->cr3;
2030 sregs->cr4 = vcpu->cr4; 2264 sregs->cr4 = vcpu->cr4;
2031 sregs->cr8 = vcpu->cr8; 2265 sregs->cr8 = get_cr8(vcpu);
2032 sregs->efer = vcpu->shadow_efer; 2266 sregs->efer = vcpu->shadow_efer;
2033 sregs->apic_base = vcpu->apic_base; 2267 sregs->apic_base = kvm_get_apic_base(vcpu);
2034 2268
2035 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, 2269 if (irqchip_in_kernel(vcpu->kvm)) {
2036 sizeof sregs->interrupt_bitmap); 2270 memset(sregs->interrupt_bitmap, 0,
2271 sizeof sregs->interrupt_bitmap);
2272 pending_vec = kvm_x86_ops->get_irq(vcpu);
2273 if (pending_vec >= 0)
2274 set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap);
2275 } else
2276 memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
2277 sizeof sregs->interrupt_bitmap);
2037 2278
2038 vcpu_put(vcpu); 2279 vcpu_put(vcpu);
2039 2280
@@ -2043,56 +2284,69 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2043static void set_segment(struct kvm_vcpu *vcpu, 2284static void set_segment(struct kvm_vcpu *vcpu,
2044 struct kvm_segment *var, int seg) 2285 struct kvm_segment *var, int seg)
2045{ 2286{
2046 return kvm_arch_ops->set_segment(vcpu, var, seg); 2287 return kvm_x86_ops->set_segment(vcpu, var, seg);
2047} 2288}
2048 2289
2049static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 2290static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2050 struct kvm_sregs *sregs) 2291 struct kvm_sregs *sregs)
2051{ 2292{
2052 int mmu_reset_needed = 0; 2293 int mmu_reset_needed = 0;
2053 int i; 2294 int i, pending_vec, max_bits;
2054 struct descriptor_table dt; 2295 struct descriptor_table dt;
2055 2296
2056 vcpu_load(vcpu); 2297 vcpu_load(vcpu);
2057 2298
2058 dt.limit = sregs->idt.limit; 2299 dt.limit = sregs->idt.limit;
2059 dt.base = sregs->idt.base; 2300 dt.base = sregs->idt.base;
2060 kvm_arch_ops->set_idt(vcpu, &dt); 2301 kvm_x86_ops->set_idt(vcpu, &dt);
2061 dt.limit = sregs->gdt.limit; 2302 dt.limit = sregs->gdt.limit;
2062 dt.base = sregs->gdt.base; 2303 dt.base = sregs->gdt.base;
2063 kvm_arch_ops->set_gdt(vcpu, &dt); 2304 kvm_x86_ops->set_gdt(vcpu, &dt);
2064 2305
2065 vcpu->cr2 = sregs->cr2; 2306 vcpu->cr2 = sregs->cr2;
2066 mmu_reset_needed |= vcpu->cr3 != sregs->cr3; 2307 mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
2067 vcpu->cr3 = sregs->cr3; 2308 vcpu->cr3 = sregs->cr3;
2068 2309
2069 vcpu->cr8 = sregs->cr8; 2310 set_cr8(vcpu, sregs->cr8);
2070 2311
2071 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; 2312 mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
2072#ifdef CONFIG_X86_64 2313#ifdef CONFIG_X86_64
2073 kvm_arch_ops->set_efer(vcpu, sregs->efer); 2314 kvm_x86_ops->set_efer(vcpu, sregs->efer);
2074#endif 2315#endif
2075 vcpu->apic_base = sregs->apic_base; 2316 kvm_set_apic_base(vcpu, sregs->apic_base);
2076 2317
2077 kvm_arch_ops->decache_cr4_guest_bits(vcpu); 2318 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2078 2319
2079 mmu_reset_needed |= vcpu->cr0 != sregs->cr0; 2320 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
2080 kvm_arch_ops->set_cr0(vcpu, sregs->cr0); 2321 vcpu->cr0 = sregs->cr0;
2322 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
2081 2323
2082 mmu_reset_needed |= vcpu->cr4 != sregs->cr4; 2324 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
2083 kvm_arch_ops->set_cr4(vcpu, sregs->cr4); 2325 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
2084 if (!is_long_mode(vcpu) && is_pae(vcpu)) 2326 if (!is_long_mode(vcpu) && is_pae(vcpu))
2085 load_pdptrs(vcpu, vcpu->cr3); 2327 load_pdptrs(vcpu, vcpu->cr3);
2086 2328
2087 if (mmu_reset_needed) 2329 if (mmu_reset_needed)
2088 kvm_mmu_reset_context(vcpu); 2330 kvm_mmu_reset_context(vcpu);
2089 2331
2090 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, 2332 if (!irqchip_in_kernel(vcpu->kvm)) {
2091 sizeof vcpu->irq_pending); 2333 memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
2092 vcpu->irq_summary = 0; 2334 sizeof vcpu->irq_pending);
2093 for (i = 0; i < NR_IRQ_WORDS; ++i) 2335 vcpu->irq_summary = 0;
2094 if (vcpu->irq_pending[i]) 2336 for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i)
2095 __set_bit(i, &vcpu->irq_summary); 2337 if (vcpu->irq_pending[i])
2338 __set_bit(i, &vcpu->irq_summary);
2339 } else {
2340 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
2341 pending_vec = find_first_bit(
2342 (const unsigned long *)sregs->interrupt_bitmap,
2343 max_bits);
2344 /* Only pending external irq is handled here */
2345 if (pending_vec < max_bits) {
2346 kvm_x86_ops->set_irq(vcpu, pending_vec);
2347 printk("Set back pending irq %d\n", pending_vec);
2348 }
2349 }
2096 2350
2097 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); 2351 set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
2098 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); 2352 set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
@@ -2109,6 +2363,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2109 return 0; 2363 return 0;
2110} 2364}
2111 2365
2366void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
2367{
2368 struct kvm_segment cs;
2369
2370 get_segment(vcpu, &cs, VCPU_SREG_CS);
2371 *db = cs.db;
2372 *l = cs.l;
2373}
2374EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
2375
2112/* 2376/*
2113 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 2377 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
2114 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 2378 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -2236,13 +2500,13 @@ static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2236 gpa_t gpa; 2500 gpa_t gpa;
2237 2501
2238 vcpu_load(vcpu); 2502 vcpu_load(vcpu);
2239 spin_lock(&vcpu->kvm->lock); 2503 mutex_lock(&vcpu->kvm->lock);
2240 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); 2504 gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
2241 tr->physical_address = gpa; 2505 tr->physical_address = gpa;
2242 tr->valid = gpa != UNMAPPED_GVA; 2506 tr->valid = gpa != UNMAPPED_GVA;
2243 tr->writeable = 1; 2507 tr->writeable = 1;
2244 tr->usermode = 0; 2508 tr->usermode = 0;
2245 spin_unlock(&vcpu->kvm->lock); 2509 mutex_unlock(&vcpu->kvm->lock);
2246 vcpu_put(vcpu); 2510 vcpu_put(vcpu);
2247 2511
2248 return 0; 2512 return 0;
@@ -2253,6 +2517,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2253{ 2517{
2254 if (irq->irq < 0 || irq->irq >= 256) 2518 if (irq->irq < 0 || irq->irq >= 256)
2255 return -EINVAL; 2519 return -EINVAL;
2520 if (irqchip_in_kernel(vcpu->kvm))
2521 return -ENXIO;
2256 vcpu_load(vcpu); 2522 vcpu_load(vcpu);
2257 2523
2258 set_bit(irq->irq, vcpu->irq_pending); 2524 set_bit(irq->irq, vcpu->irq_pending);
@@ -2270,7 +2536,7 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
2270 2536
2271 vcpu_load(vcpu); 2537 vcpu_load(vcpu);
2272 2538
2273 r = kvm_arch_ops->set_guest_debug(vcpu, dbg); 2539 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
2274 2540
2275 vcpu_put(vcpu); 2541 vcpu_put(vcpu);
2276 2542
@@ -2285,7 +2551,6 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
2285 unsigned long pgoff; 2551 unsigned long pgoff;
2286 struct page *page; 2552 struct page *page;
2287 2553
2288 *type = VM_FAULT_MINOR;
2289 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 2554 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
2290 if (pgoff == 0) 2555 if (pgoff == 0)
2291 page = virt_to_page(vcpu->run); 2556 page = virt_to_page(vcpu->run);
@@ -2294,6 +2559,9 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma,
2294 else 2559 else
2295 return NOPAGE_SIGBUS; 2560 return NOPAGE_SIGBUS;
2296 get_page(page); 2561 get_page(page);
2562 if (type != NULL)
2563 *type = VM_FAULT_MINOR;
2564
2297 return page; 2565 return page;
2298} 2566}
2299 2567
@@ -2346,74 +2614,52 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
2346{ 2614{
2347 int r; 2615 int r;
2348 struct kvm_vcpu *vcpu; 2616 struct kvm_vcpu *vcpu;
2349 struct page *page;
2350 2617
2351 r = -EINVAL;
2352 if (!valid_vcpu(n)) 2618 if (!valid_vcpu(n))
2353 goto out; 2619 return -EINVAL;
2354
2355 vcpu = &kvm->vcpus[n];
2356
2357 mutex_lock(&vcpu->mutex);
2358
2359 if (vcpu->vmcs) {
2360 mutex_unlock(&vcpu->mutex);
2361 return -EEXIST;
2362 }
2363
2364 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2365 r = -ENOMEM;
2366 if (!page)
2367 goto out_unlock;
2368 vcpu->run = page_address(page);
2369
2370 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
2371 r = -ENOMEM;
2372 if (!page)
2373 goto out_free_run;
2374 vcpu->pio_data = page_address(page);
2375 2620
2376 vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, 2621 vcpu = kvm_x86_ops->vcpu_create(kvm, n);
2377 FX_IMAGE_ALIGN); 2622 if (IS_ERR(vcpu))
2378 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; 2623 return PTR_ERR(vcpu);
2379 vcpu->cr0 = 0x10;
2380 2624
2381 r = kvm_arch_ops->vcpu_create(vcpu); 2625 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
2382 if (r < 0)
2383 goto out_free_vcpus;
2384 2626
2385 r = kvm_mmu_create(vcpu); 2627 /* We do fxsave: this must be aligned. */
2386 if (r < 0) 2628 BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
2387 goto out_free_vcpus;
2388 2629
2389 kvm_arch_ops->vcpu_load(vcpu); 2630 vcpu_load(vcpu);
2390 r = kvm_mmu_setup(vcpu); 2631 r = kvm_mmu_setup(vcpu);
2391 if (r >= 0)
2392 r = kvm_arch_ops->vcpu_setup(vcpu);
2393 vcpu_put(vcpu); 2632 vcpu_put(vcpu);
2394
2395 if (r < 0) 2633 if (r < 0)
2396 goto out_free_vcpus; 2634 goto free_vcpu;
2397 2635
2636 mutex_lock(&kvm->lock);
2637 if (kvm->vcpus[n]) {
2638 r = -EEXIST;
2639 mutex_unlock(&kvm->lock);
2640 goto mmu_unload;
2641 }
2642 kvm->vcpus[n] = vcpu;
2643 mutex_unlock(&kvm->lock);
2644
2645 /* Now it's all set up, let userspace reach it */
2398 r = create_vcpu_fd(vcpu); 2646 r = create_vcpu_fd(vcpu);
2399 if (r < 0) 2647 if (r < 0)
2400 goto out_free_vcpus; 2648 goto unlink;
2649 return r;
2401 2650
2402 spin_lock(&kvm_lock); 2651unlink:
2403 if (n >= kvm->nvcpus) 2652 mutex_lock(&kvm->lock);
2404 kvm->nvcpus = n + 1; 2653 kvm->vcpus[n] = NULL;
2405 spin_unlock(&kvm_lock); 2654 mutex_unlock(&kvm->lock);
2406 2655
2407 return r; 2656mmu_unload:
2657 vcpu_load(vcpu);
2658 kvm_mmu_unload(vcpu);
2659 vcpu_put(vcpu);
2408 2660
2409out_free_vcpus: 2661free_vcpu:
2410 kvm_free_vcpu(vcpu); 2662 kvm_x86_ops->vcpu_free(vcpu);
2411out_free_run:
2412 free_page((unsigned long)vcpu->run);
2413 vcpu->run = NULL;
2414out_unlock:
2415 mutex_unlock(&vcpu->mutex);
2416out:
2417 return r; 2663 return r;
2418} 2664}
2419 2665
@@ -2493,7 +2739,7 @@ struct fxsave {
2493 2739
2494static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2740static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2495{ 2741{
2496 struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; 2742 struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
2497 2743
2498 vcpu_load(vcpu); 2744 vcpu_load(vcpu);
2499 2745
@@ -2513,7 +2759,7 @@ static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2513 2759
2514static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 2760static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2515{ 2761{
2516 struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; 2762 struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
2517 2763
2518 vcpu_load(vcpu); 2764 vcpu_load(vcpu);
2519 2765
@@ -2531,6 +2777,27 @@ static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2531 return 0; 2777 return 0;
2532} 2778}
2533 2779
2780static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2781 struct kvm_lapic_state *s)
2782{
2783 vcpu_load(vcpu);
2784 memcpy(s->regs, vcpu->apic->regs, sizeof *s);
2785 vcpu_put(vcpu);
2786
2787 return 0;
2788}
2789
2790static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2791 struct kvm_lapic_state *s)
2792{
2793 vcpu_load(vcpu);
2794 memcpy(vcpu->apic->regs, s->regs, sizeof *s);
2795 kvm_apic_post_state_restore(vcpu);
2796 vcpu_put(vcpu);
2797
2798 return 0;
2799}
2800
2534static long kvm_vcpu_ioctl(struct file *filp, 2801static long kvm_vcpu_ioctl(struct file *filp,
2535 unsigned int ioctl, unsigned long arg) 2802 unsigned int ioctl, unsigned long arg)
2536{ 2803{
@@ -2700,6 +2967,31 @@ static long kvm_vcpu_ioctl(struct file *filp,
2700 r = 0; 2967 r = 0;
2701 break; 2968 break;
2702 } 2969 }
2970 case KVM_GET_LAPIC: {
2971 struct kvm_lapic_state lapic;
2972
2973 memset(&lapic, 0, sizeof lapic);
2974 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
2975 if (r)
2976 goto out;
2977 r = -EFAULT;
2978 if (copy_to_user(argp, &lapic, sizeof lapic))
2979 goto out;
2980 r = 0;
2981 break;
2982 }
2983 case KVM_SET_LAPIC: {
2984 struct kvm_lapic_state lapic;
2985
2986 r = -EFAULT;
2987 if (copy_from_user(&lapic, argp, sizeof lapic))
2988 goto out;
2989 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
2990 if (r)
2991 goto out;
2992 r = 0;
2993 break;
2994 }
2703 default: 2995 default:
2704 ; 2996 ;
2705 } 2997 }
@@ -2753,6 +3045,75 @@ static long kvm_vm_ioctl(struct file *filp,
2753 goto out; 3045 goto out;
2754 break; 3046 break;
2755 } 3047 }
3048 case KVM_CREATE_IRQCHIP:
3049 r = -ENOMEM;
3050 kvm->vpic = kvm_create_pic(kvm);
3051 if (kvm->vpic) {
3052 r = kvm_ioapic_init(kvm);
3053 if (r) {
3054 kfree(kvm->vpic);
3055 kvm->vpic = NULL;
3056 goto out;
3057 }
3058 }
3059 else
3060 goto out;
3061 break;
3062 case KVM_IRQ_LINE: {
3063 struct kvm_irq_level irq_event;
3064
3065 r = -EFAULT;
3066 if (copy_from_user(&irq_event, argp, sizeof irq_event))
3067 goto out;
3068 if (irqchip_in_kernel(kvm)) {
3069 mutex_lock(&kvm->lock);
3070 if (irq_event.irq < 16)
3071 kvm_pic_set_irq(pic_irqchip(kvm),
3072 irq_event.irq,
3073 irq_event.level);
3074 kvm_ioapic_set_irq(kvm->vioapic,
3075 irq_event.irq,
3076 irq_event.level);
3077 mutex_unlock(&kvm->lock);
3078 r = 0;
3079 }
3080 break;
3081 }
3082 case KVM_GET_IRQCHIP: {
3083 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3084 struct kvm_irqchip chip;
3085
3086 r = -EFAULT;
3087 if (copy_from_user(&chip, argp, sizeof chip))
3088 goto out;
3089 r = -ENXIO;
3090 if (!irqchip_in_kernel(kvm))
3091 goto out;
3092 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
3093 if (r)
3094 goto out;
3095 r = -EFAULT;
3096 if (copy_to_user(argp, &chip, sizeof chip))
3097 goto out;
3098 r = 0;
3099 break;
3100 }
3101 case KVM_SET_IRQCHIP: {
3102 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
3103 struct kvm_irqchip chip;
3104
3105 r = -EFAULT;
3106 if (copy_from_user(&chip, argp, sizeof chip))
3107 goto out;
3108 r = -ENXIO;
3109 if (!irqchip_in_kernel(kvm))
3110 goto out;
3111 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
3112 if (r)
3113 goto out;
3114 r = 0;
3115 break;
3116 }
2756 default: 3117 default:
2757 ; 3118 ;
2758 } 3119 }
@@ -2768,12 +3129,14 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
2768 unsigned long pgoff; 3129 unsigned long pgoff;
2769 struct page *page; 3130 struct page *page;
2770 3131
2771 *type = VM_FAULT_MINOR;
2772 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; 3132 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
2773 page = gfn_to_page(kvm, pgoff); 3133 page = gfn_to_page(kvm, pgoff);
2774 if (!page) 3134 if (!page)
2775 return NOPAGE_SIGBUS; 3135 return NOPAGE_SIGBUS;
2776 get_page(page); 3136 get_page(page);
3137 if (type != NULL)
3138 *type = VM_FAULT_MINOR;
3139
2777 return page; 3140 return page;
2778} 3141}
2779 3142
@@ -2861,12 +3224,20 @@ static long kvm_dev_ioctl(struct file *filp,
2861 r = 0; 3224 r = 0;
2862 break; 3225 break;
2863 } 3226 }
2864 case KVM_CHECK_EXTENSION: 3227 case KVM_CHECK_EXTENSION: {
2865 /* 3228 int ext = (long)argp;
2866 * No extensions defined at present. 3229
2867 */ 3230 switch (ext) {
2868 r = 0; 3231 case KVM_CAP_IRQCHIP:
3232 case KVM_CAP_HLT:
3233 r = 1;
3234 break;
3235 default:
3236 r = 0;
3237 break;
3238 }
2869 break; 3239 break;
3240 }
2870 case KVM_GET_VCPU_MMAP_SIZE: 3241 case KVM_GET_VCPU_MMAP_SIZE:
2871 r = -EINVAL; 3242 r = -EINVAL;
2872 if (arg) 3243 if (arg)
@@ -2881,8 +3252,6 @@ out:
2881} 3252}
2882 3253
2883static struct file_operations kvm_chardev_ops = { 3254static struct file_operations kvm_chardev_ops = {
2884 .open = kvm_dev_open,
2885 .release = kvm_dev_release,
2886 .unlocked_ioctl = kvm_dev_ioctl, 3255 .unlocked_ioctl = kvm_dev_ioctl,
2887 .compat_ioctl = kvm_dev_ioctl, 3256 .compat_ioctl = kvm_dev_ioctl,
2888}; 3257};
@@ -2893,25 +3262,6 @@ static struct miscdevice kvm_dev = {
2893 &kvm_chardev_ops, 3262 &kvm_chardev_ops,
2894}; 3263};
2895 3264
2896static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2897 void *v)
2898{
2899 if (val == SYS_RESTART) {
2900 /*
2901 * Some (well, at least mine) BIOSes hang on reboot if
2902 * in vmx root mode.
2903 */
2904 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
2905 on_each_cpu(hardware_disable, NULL, 0, 1);
2906 }
2907 return NOTIFY_OK;
2908}
2909
2910static struct notifier_block kvm_reboot_notifier = {
2911 .notifier_call = kvm_reboot,
2912 .priority = 0,
2913};
2914
2915/* 3265/*
2916 * Make sure that a cpu that is being hot-unplugged does not have any vcpus 3266 * Make sure that a cpu that is being hot-unplugged does not have any vcpus
2917 * cached on it. 3267 * cached on it.
@@ -2925,7 +3275,9 @@ static void decache_vcpus_on_cpu(int cpu)
2925 spin_lock(&kvm_lock); 3275 spin_lock(&kvm_lock);
2926 list_for_each_entry(vm, &vm_list, vm_list) 3276 list_for_each_entry(vm, &vm_list, vm_list)
2927 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 3277 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
2928 vcpu = &vm->vcpus[i]; 3278 vcpu = vm->vcpus[i];
3279 if (!vcpu)
3280 continue;
2929 /* 3281 /*
2930 * If the vcpu is locked, then it is running on some 3282 * If the vcpu is locked, then it is running on some
2931 * other cpu and therefore it is not cached on the 3283 * other cpu and therefore it is not cached on the
@@ -2936,7 +3288,7 @@ static void decache_vcpus_on_cpu(int cpu)
2936 */ 3288 */
2937 if (mutex_trylock(&vcpu->mutex)) { 3289 if (mutex_trylock(&vcpu->mutex)) {
2938 if (vcpu->cpu == cpu) { 3290 if (vcpu->cpu == cpu) {
2939 kvm_arch_ops->vcpu_decache(vcpu); 3291 kvm_x86_ops->vcpu_decache(vcpu);
2940 vcpu->cpu = -1; 3292 vcpu->cpu = -1;
2941 } 3293 }
2942 mutex_unlock(&vcpu->mutex); 3294 mutex_unlock(&vcpu->mutex);
@@ -2952,7 +3304,7 @@ static void hardware_enable(void *junk)
2952 if (cpu_isset(cpu, cpus_hardware_enabled)) 3304 if (cpu_isset(cpu, cpus_hardware_enabled))
2953 return; 3305 return;
2954 cpu_set(cpu, cpus_hardware_enabled); 3306 cpu_set(cpu, cpus_hardware_enabled);
2955 kvm_arch_ops->hardware_enable(NULL); 3307 kvm_x86_ops->hardware_enable(NULL);
2956} 3308}
2957 3309
2958static void hardware_disable(void *junk) 3310static void hardware_disable(void *junk)
@@ -2963,7 +3315,7 @@ static void hardware_disable(void *junk)
2963 return; 3315 return;
2964 cpu_clear(cpu, cpus_hardware_enabled); 3316 cpu_clear(cpu, cpus_hardware_enabled);
2965 decache_vcpus_on_cpu(cpu); 3317 decache_vcpus_on_cpu(cpu);
2966 kvm_arch_ops->hardware_disable(NULL); 3318 kvm_x86_ops->hardware_disable(NULL);
2967} 3319}
2968 3320
2969static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, 3321static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
@@ -2994,6 +3346,25 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2994 return NOTIFY_OK; 3346 return NOTIFY_OK;
2995} 3347}
2996 3348
3349static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
3350 void *v)
3351{
3352 if (val == SYS_RESTART) {
3353 /*
3354 * Some (well, at least mine) BIOSes hang on reboot if
3355 * in vmx root mode.
3356 */
3357 printk(KERN_INFO "kvm: exiting hardware virtualization\n");
3358 on_each_cpu(hardware_disable, NULL, 0, 1);
3359 }
3360 return NOTIFY_OK;
3361}
3362
3363static struct notifier_block kvm_reboot_notifier = {
3364 .notifier_call = kvm_reboot,
3365 .priority = 0,
3366};
3367
2997void kvm_io_bus_init(struct kvm_io_bus *bus) 3368void kvm_io_bus_init(struct kvm_io_bus *bus)
2998{ 3369{
2999 memset(bus, 0, sizeof(*bus)); 3370 memset(bus, 0, sizeof(*bus));
@@ -3047,18 +3418,15 @@ static u64 stat_get(void *_offset)
3047 spin_lock(&kvm_lock); 3418 spin_lock(&kvm_lock);
3048 list_for_each_entry(kvm, &vm_list, vm_list) 3419 list_for_each_entry(kvm, &vm_list, vm_list)
3049 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 3420 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
3050 vcpu = &kvm->vcpus[i]; 3421 vcpu = kvm->vcpus[i];
3051 total += *(u32 *)((void *)vcpu + offset); 3422 if (vcpu)
3423 total += *(u32 *)((void *)vcpu + offset);
3052 } 3424 }
3053 spin_unlock(&kvm_lock); 3425 spin_unlock(&kvm_lock);
3054 return total; 3426 return total;
3055} 3427}
3056 3428
3057static void stat_set(void *offset, u64 val) 3429DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n");
3058{
3059}
3060
3061DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n");
3062 3430
3063static __init void kvm_init_debug(void) 3431static __init void kvm_init_debug(void)
3064{ 3432{
@@ -3105,11 +3473,34 @@ static struct sys_device kvm_sysdev = {
3105 3473
3106hpa_t bad_page_address; 3474hpa_t bad_page_address;
3107 3475
3108int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) 3476static inline
3477struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
3478{
3479 return container_of(pn, struct kvm_vcpu, preempt_notifier);
3480}
3481
3482static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
3483{
3484 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3485
3486 kvm_x86_ops->vcpu_load(vcpu, cpu);
3487}
3488
3489static void kvm_sched_out(struct preempt_notifier *pn,
3490 struct task_struct *next)
3491{
3492 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3493
3494 kvm_x86_ops->vcpu_put(vcpu);
3495}
3496
3497int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
3498 struct module *module)
3109{ 3499{
3110 int r; 3500 int r;
3501 int cpu;
3111 3502
3112 if (kvm_arch_ops) { 3503 if (kvm_x86_ops) {
3113 printk(KERN_ERR "kvm: already loaded the other module\n"); 3504 printk(KERN_ERR "kvm: already loaded the other module\n");
3114 return -EEXIST; 3505 return -EEXIST;
3115 } 3506 }
@@ -3123,12 +3514,20 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3123 return -EOPNOTSUPP; 3514 return -EOPNOTSUPP;
3124 } 3515 }
3125 3516
3126 kvm_arch_ops = ops; 3517 kvm_x86_ops = ops;
3127 3518
3128 r = kvm_arch_ops->hardware_setup(); 3519 r = kvm_x86_ops->hardware_setup();
3129 if (r < 0) 3520 if (r < 0)
3130 goto out; 3521 goto out;
3131 3522
3523 for_each_online_cpu(cpu) {
3524 smp_call_function_single(cpu,
3525 kvm_x86_ops->check_processor_compatibility,
3526 &r, 0, 1);
3527 if (r < 0)
3528 goto out_free_0;
3529 }
3530
3132 on_each_cpu(hardware_enable, NULL, 0, 1); 3531 on_each_cpu(hardware_enable, NULL, 0, 1);
3133 r = register_cpu_notifier(&kvm_cpu_notifier); 3532 r = register_cpu_notifier(&kvm_cpu_notifier);
3134 if (r) 3533 if (r)
@@ -3143,6 +3542,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3143 if (r) 3542 if (r)
3144 goto out_free_3; 3543 goto out_free_3;
3145 3544
3545 /* A kmem cache lets us meet the alignment requirements of fx_save. */
3546 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
3547 __alignof__(struct kvm_vcpu), 0, 0);
3548 if (!kvm_vcpu_cache) {
3549 r = -ENOMEM;
3550 goto out_free_4;
3551 }
3552
3146 kvm_chardev_ops.owner = module; 3553 kvm_chardev_ops.owner = module;
3147 3554
3148 r = misc_register(&kvm_dev); 3555 r = misc_register(&kvm_dev);
@@ -3151,9 +3558,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
3151 goto out_free; 3558 goto out_free;
3152 } 3559 }
3153 3560
3561 kvm_preempt_ops.sched_in = kvm_sched_in;
3562 kvm_preempt_ops.sched_out = kvm_sched_out;
3563
3154 return r; 3564 return r;
3155 3565
3156out_free: 3566out_free:
3567 kmem_cache_destroy(kvm_vcpu_cache);
3568out_free_4:
3157 sysdev_unregister(&kvm_sysdev); 3569 sysdev_unregister(&kvm_sysdev);
3158out_free_3: 3570out_free_3:
3159 sysdev_class_unregister(&kvm_sysdev_class); 3571 sysdev_class_unregister(&kvm_sysdev_class);
@@ -3162,22 +3574,24 @@ out_free_2:
3162 unregister_cpu_notifier(&kvm_cpu_notifier); 3574 unregister_cpu_notifier(&kvm_cpu_notifier);
3163out_free_1: 3575out_free_1:
3164 on_each_cpu(hardware_disable, NULL, 0, 1); 3576 on_each_cpu(hardware_disable, NULL, 0, 1);
3165 kvm_arch_ops->hardware_unsetup(); 3577out_free_0:
3578 kvm_x86_ops->hardware_unsetup();
3166out: 3579out:
3167 kvm_arch_ops = NULL; 3580 kvm_x86_ops = NULL;
3168 return r; 3581 return r;
3169} 3582}
3170 3583
3171void kvm_exit_arch(void) 3584void kvm_exit_x86(void)
3172{ 3585{
3173 misc_deregister(&kvm_dev); 3586 misc_deregister(&kvm_dev);
3587 kmem_cache_destroy(kvm_vcpu_cache);
3174 sysdev_unregister(&kvm_sysdev); 3588 sysdev_unregister(&kvm_sysdev);
3175 sysdev_class_unregister(&kvm_sysdev_class); 3589 sysdev_class_unregister(&kvm_sysdev_class);
3176 unregister_reboot_notifier(&kvm_reboot_notifier); 3590 unregister_reboot_notifier(&kvm_reboot_notifier);
3177 unregister_cpu_notifier(&kvm_cpu_notifier); 3591 unregister_cpu_notifier(&kvm_cpu_notifier);
3178 on_each_cpu(hardware_disable, NULL, 0, 1); 3592 on_each_cpu(hardware_disable, NULL, 0, 1);
3179 kvm_arch_ops->hardware_unsetup(); 3593 kvm_x86_ops->hardware_unsetup();
3180 kvm_arch_ops = NULL; 3594 kvm_x86_ops = NULL;
3181} 3595}
3182 3596
3183static __init int kvm_init(void) 3597static __init int kvm_init(void)
@@ -3220,5 +3634,5 @@ static __exit void kvm_exit(void)
3220module_init(kvm_init) 3634module_init(kvm_init)
3221module_exit(kvm_exit) 3635module_exit(kvm_exit)
3222 3636
3223EXPORT_SYMBOL_GPL(kvm_init_arch); 3637EXPORT_SYMBOL_GPL(kvm_init_x86);
3224EXPORT_SYMBOL_GPL(kvm_exit_arch); 3638EXPORT_SYMBOL_GPL(kvm_exit_x86);
diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h
index a869983d683d..a0e415daef5b 100644
--- a/drivers/kvm/kvm_svm.h
+++ b/drivers/kvm/kvm_svm.h
@@ -20,7 +20,10 @@ static const u32 host_save_user_msrs[] = {
20#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) 20#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
21#define NUM_DB_REGS 4 21#define NUM_DB_REGS 4
22 22
23struct kvm_vcpu;
24
23struct vcpu_svm { 25struct vcpu_svm {
26 struct kvm_vcpu vcpu;
24 struct vmcb *vmcb; 27 struct vmcb *vmcb;
25 unsigned long vmcb_pa; 28 unsigned long vmcb_pa;
26 struct svm_cpu_data *svm_data; 29 struct svm_cpu_data *svm_data;
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
new file mode 100644
index 000000000000..a190587cf6a5
--- /dev/null
+++ b/drivers/kvm/lapic.c
@@ -0,0 +1,1064 @@
1
2/*
3 * Local APIC virtualization
4 *
5 * Copyright (C) 2006 Qumranet, Inc.
6 * Copyright (C) 2007 Novell
7 * Copyright (C) 2007 Intel
8 *
9 * Authors:
10 * Dor Laor <dor.laor@qumranet.com>
11 * Gregory Haskins <ghaskins@novell.com>
12 * Yaozu (Eddie) Dong <eddie.dong@intel.com>
13 *
14 * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
15 *
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
18 */
19
20#include "kvm.h"
21#include <linux/kvm.h>
22#include <linux/mm.h>
23#include <linux/highmem.h>
24#include <linux/smp.h>
25#include <linux/hrtimer.h>
26#include <linux/io.h>
27#include <linux/module.h>
28#include <asm/processor.h>
29#include <asm/msr.h>
30#include <asm/page.h>
31#include <asm/current.h>
32#include <asm/apicdef.h>
33#include <asm/atomic.h>
34#include <asm/div64.h>
35#include "irq.h"
36
37#define PRId64 "d"
38#define PRIx64 "llx"
39#define PRIu64 "u"
40#define PRIo64 "o"
41
42#define APIC_BUS_CYCLE_NS 1
43
44/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
45#define apic_debug(fmt, arg...)
46
47#define APIC_LVT_NUM 6
48/* 14 is the version for Xeon and Pentium 8.4.8*/
49#define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16))
50#define LAPIC_MMIO_LENGTH (1 << 12)
51/* followed define is not in apicdef.h */
52#define APIC_SHORT_MASK 0xc0000
53#define APIC_DEST_NOSHORT 0x0
54#define APIC_DEST_MASK 0x800
55#define MAX_APIC_VECTOR 256
56
57#define VEC_POS(v) ((v) & (32 - 1))
58#define REG_POS(v) (((v) >> 5) << 4)
59static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
60{
61 return *((u32 *) (apic->regs + reg_off));
62}
63
64static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
65{
66 *((u32 *) (apic->regs + reg_off)) = val;
67}
68
69static inline int apic_test_and_set_vector(int vec, void *bitmap)
70{
71 return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
72}
73
74static inline int apic_test_and_clear_vector(int vec, void *bitmap)
75{
76 return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
77}
78
79static inline void apic_set_vector(int vec, void *bitmap)
80{
81 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
82}
83
84static inline void apic_clear_vector(int vec, void *bitmap)
85{
86 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
87}
88
89static inline int apic_hw_enabled(struct kvm_lapic *apic)
90{
91 return (apic)->vcpu->apic_base & MSR_IA32_APICBASE_ENABLE;
92}
93
94static inline int apic_sw_enabled(struct kvm_lapic *apic)
95{
96 return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
97}
98
99static inline int apic_enabled(struct kvm_lapic *apic)
100{
101 return apic_sw_enabled(apic) && apic_hw_enabled(apic);
102}
103
104#define LVT_MASK \
105 (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
106
107#define LINT_MASK \
108 (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
109 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
110
111static inline int kvm_apic_id(struct kvm_lapic *apic)
112{
113 return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
114}
115
116static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
117{
118 return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
119}
120
121static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
122{
123 return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
124}
125
126static inline int apic_lvtt_period(struct kvm_lapic *apic)
127{
128 return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
129}
130
131static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
132 LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
133 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
134 LVT_MASK | APIC_MODE_MASK, /* LVTPC */
135 LINT_MASK, LINT_MASK, /* LVT0-1 */
136 LVT_MASK /* LVTERR */
137};
138
139static int find_highest_vector(void *bitmap)
140{
141 u32 *word = bitmap;
142 int word_offset = MAX_APIC_VECTOR >> 5;
143
144 while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
145 continue;
146
147 if (likely(!word_offset && !word[0]))
148 return -1;
149 else
150 return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
151}
152
153static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
154{
155 return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
156}
157
158static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
159{
160 apic_clear_vector(vec, apic->regs + APIC_IRR);
161}
162
163static inline int apic_find_highest_irr(struct kvm_lapic *apic)
164{
165 int result;
166
167 result = find_highest_vector(apic->regs + APIC_IRR);
168 ASSERT(result == -1 || result >= 16);
169
170 return result;
171}
172
173int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
174{
175 struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
176 int highest_irr;
177
178 if (!apic)
179 return 0;
180 highest_irr = apic_find_highest_irr(apic);
181
182 return highest_irr;
183}
184EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
185
186int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig)
187{
188 if (!apic_test_and_set_irr(vec, apic)) {
189 /* a new pending irq is set in IRR */
190 if (trig)
191 apic_set_vector(vec, apic->regs + APIC_TMR);
192 else
193 apic_clear_vector(vec, apic->regs + APIC_TMR);
194 kvm_vcpu_kick(apic->vcpu);
195 return 1;
196 }
197 return 0;
198}
199
200static inline int apic_find_highest_isr(struct kvm_lapic *apic)
201{
202 int result;
203
204 result = find_highest_vector(apic->regs + APIC_ISR);
205 ASSERT(result == -1 || result >= 16);
206
207 return result;
208}
209
210static void apic_update_ppr(struct kvm_lapic *apic)
211{
212 u32 tpr, isrv, ppr;
213 int isr;
214
215 tpr = apic_get_reg(apic, APIC_TASKPRI);
216 isr = apic_find_highest_isr(apic);
217 isrv = (isr != -1) ? isr : 0;
218
219 if ((tpr & 0xf0) >= (isrv & 0xf0))
220 ppr = tpr & 0xff;
221 else
222 ppr = isrv & 0xf0;
223
224 apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
225 apic, ppr, isr, isrv);
226
227 apic_set_reg(apic, APIC_PROCPRI, ppr);
228}
229
230static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
231{
232 apic_set_reg(apic, APIC_TASKPRI, tpr);
233 apic_update_ppr(apic);
234}
235
236int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
237{
238 return kvm_apic_id(apic) == dest;
239}
240
241int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
242{
243 int result = 0;
244 u8 logical_id;
245
246 logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
247
248 switch (apic_get_reg(apic, APIC_DFR)) {
249 case APIC_DFR_FLAT:
250 if (logical_id & mda)
251 result = 1;
252 break;
253 case APIC_DFR_CLUSTER:
254 if (((logical_id >> 4) == (mda >> 0x4))
255 && (logical_id & mda & 0xf))
256 result = 1;
257 break;
258 default:
259 printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
260 apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
261 break;
262 }
263
264 return result;
265}
266
267static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
268 int short_hand, int dest, int dest_mode)
269{
270 int result = 0;
271 struct kvm_lapic *target = vcpu->apic;
272
273 apic_debug("target %p, source %p, dest 0x%x, "
274 "dest_mode 0x%x, short_hand 0x%x",
275 target, source, dest, dest_mode, short_hand);
276
277 ASSERT(!target);
278 switch (short_hand) {
279 case APIC_DEST_NOSHORT:
280 if (dest_mode == 0) {
281 /* Physical mode. */
282 if ((dest == 0xFF) || (dest == kvm_apic_id(target)))
283 result = 1;
284 } else
285 /* Logical mode. */
286 result = kvm_apic_match_logical_addr(target, dest);
287 break;
288 case APIC_DEST_SELF:
289 if (target == source)
290 result = 1;
291 break;
292 case APIC_DEST_ALLINC:
293 result = 1;
294 break;
295 case APIC_DEST_ALLBUT:
296 if (target != source)
297 result = 1;
298 break;
299 default:
300 printk(KERN_WARNING "Bad dest shorthand value %x\n",
301 short_hand);
302 break;
303 }
304
305 return result;
306}
307
308/*
309 * Add a pending IRQ into lapic.
310 * Return 1 if successfully added and 0 if discarded.
311 */
312static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
313 int vector, int level, int trig_mode)
314{
315 int orig_irr, result = 0;
316 struct kvm_vcpu *vcpu = apic->vcpu;
317
318 switch (delivery_mode) {
319 case APIC_DM_FIXED:
320 case APIC_DM_LOWEST:
321 /* FIXME add logic for vcpu on reset */
322 if (unlikely(!apic_enabled(apic)))
323 break;
324
325 orig_irr = apic_test_and_set_irr(vector, apic);
326 if (orig_irr && trig_mode) {
327 apic_debug("level trig mode repeatedly for vector %d",
328 vector);
329 break;
330 }
331
332 if (trig_mode) {
333 apic_debug("level trig mode for vector %d", vector);
334 apic_set_vector(vector, apic->regs + APIC_TMR);
335 } else
336 apic_clear_vector(vector, apic->regs + APIC_TMR);
337
338 if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
339 kvm_vcpu_kick(vcpu);
340 else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) {
341 vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
342 if (waitqueue_active(&vcpu->wq))
343 wake_up_interruptible(&vcpu->wq);
344 }
345
346 result = (orig_irr == 0);
347 break;
348
349 case APIC_DM_REMRD:
350 printk(KERN_DEBUG "Ignoring delivery mode 3\n");
351 break;
352
353 case APIC_DM_SMI:
354 printk(KERN_DEBUG "Ignoring guest SMI\n");
355 break;
356 case APIC_DM_NMI:
357 printk(KERN_DEBUG "Ignoring guest NMI\n");
358 break;
359
360 case APIC_DM_INIT:
361 if (level) {
362 if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE)
363 printk(KERN_DEBUG
364 "INIT on a runnable vcpu %d\n",
365 vcpu->vcpu_id);
366 vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED;
367 kvm_vcpu_kick(vcpu);
368 } else {
369 printk(KERN_DEBUG
370 "Ignoring de-assert INIT to vcpu %d\n",
371 vcpu->vcpu_id);
372 }
373
374 break;
375
376 case APIC_DM_STARTUP:
377 printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
378 vcpu->vcpu_id, vector);
379 if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
380 vcpu->sipi_vector = vector;
381 vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
382 if (waitqueue_active(&vcpu->wq))
383 wake_up_interruptible(&vcpu->wq);
384 }
385 break;
386
387 default:
388 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
389 delivery_mode);
390 break;
391 }
392 return result;
393}
394
395struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
396 unsigned long bitmap)
397{
398 int vcpu_id;
399 int last;
400 int next;
401 struct kvm_lapic *apic;
402
403 last = kvm->round_robin_prev_vcpu;
404 next = last;
405
406 do {
407 if (++next == KVM_MAX_VCPUS)
408 next = 0;
409 if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap))
410 continue;
411 apic = kvm->vcpus[next]->apic;
412 if (apic && apic_enabled(apic))
413 break;
414 apic = NULL;
415 } while (next != last);
416 kvm->round_robin_prev_vcpu = next;
417
418 if (!apic) {
419 vcpu_id = ffs(bitmap) - 1;
420 if (vcpu_id < 0) {
421 vcpu_id = 0;
422 printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
423 }
424 apic = kvm->vcpus[vcpu_id]->apic;
425 }
426
427 return apic;
428}
429
430static void apic_set_eoi(struct kvm_lapic *apic)
431{
432 int vector = apic_find_highest_isr(apic);
433
434 /*
435 * Not every write EOI will has corresponding ISR,
436 * one example is when Kernel check timer on setup_IO_APIC
437 */
438 if (vector == -1)
439 return;
440
441 apic_clear_vector(vector, apic->regs + APIC_ISR);
442 apic_update_ppr(apic);
443
444 if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
445 kvm_ioapic_update_eoi(apic->vcpu->kvm, vector);
446}
447
448static void apic_send_ipi(struct kvm_lapic *apic)
449{
450 u32 icr_low = apic_get_reg(apic, APIC_ICR);
451 u32 icr_high = apic_get_reg(apic, APIC_ICR2);
452
453 unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
454 unsigned int short_hand = icr_low & APIC_SHORT_MASK;
455 unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
456 unsigned int level = icr_low & APIC_INT_ASSERT;
457 unsigned int dest_mode = icr_low & APIC_DEST_MASK;
458 unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
459 unsigned int vector = icr_low & APIC_VECTOR_MASK;
460
461 struct kvm_lapic *target;
462 struct kvm_vcpu *vcpu;
463 unsigned long lpr_map = 0;
464 int i;
465
466 apic_debug("icr_high 0x%x, icr_low 0x%x, "
467 "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
468 "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
469 icr_high, icr_low, short_hand, dest,
470 trig_mode, level, dest_mode, delivery_mode, vector);
471
472 for (i = 0; i < KVM_MAX_VCPUS; i++) {
473 vcpu = apic->vcpu->kvm->vcpus[i];
474 if (!vcpu)
475 continue;
476
477 if (vcpu->apic &&
478 apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
479 if (delivery_mode == APIC_DM_LOWEST)
480 set_bit(vcpu->vcpu_id, &lpr_map);
481 else
482 __apic_accept_irq(vcpu->apic, delivery_mode,
483 vector, level, trig_mode);
484 }
485 }
486
487 if (delivery_mode == APIC_DM_LOWEST) {
488 target = kvm_apic_round_robin(vcpu->kvm, vector, lpr_map);
489 if (target != NULL)
490 __apic_accept_irq(target, delivery_mode,
491 vector, level, trig_mode);
492 }
493}
494
495static u32 apic_get_tmcct(struct kvm_lapic *apic)
496{
497 u32 counter_passed;
498 ktime_t passed, now = apic->timer.dev.base->get_time();
499 u32 tmcct = apic_get_reg(apic, APIC_TMICT);
500
501 ASSERT(apic != NULL);
502
503 if (unlikely(ktime_to_ns(now) <=
504 ktime_to_ns(apic->timer.last_update))) {
505 /* Wrap around */
506 passed = ktime_add(( {
507 (ktime_t) {
508 .tv64 = KTIME_MAX -
509 (apic->timer.last_update).tv64}; }
510 ), now);
511 apic_debug("time elapsed\n");
512 } else
513 passed = ktime_sub(now, apic->timer.last_update);
514
515 counter_passed = div64_64(ktime_to_ns(passed),
516 (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
517 tmcct -= counter_passed;
518
519 if (tmcct <= 0) {
520 if (unlikely(!apic_lvtt_period(apic)))
521 tmcct = 0;
522 else
523 do {
524 tmcct += apic_get_reg(apic, APIC_TMICT);
525 } while (tmcct <= 0);
526 }
527
528 return tmcct;
529}
530
531static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
532{
533 u32 val = 0;
534
535 if (offset >= LAPIC_MMIO_LENGTH)
536 return 0;
537
538 switch (offset) {
539 case APIC_ARBPRI:
540 printk(KERN_WARNING "Access APIC ARBPRI register "
541 "which is for P6\n");
542 break;
543
544 case APIC_TMCCT: /* Timer CCR */
545 val = apic_get_tmcct(apic);
546 break;
547
548 default:
549 apic_update_ppr(apic);
550 val = apic_get_reg(apic, offset);
551 break;
552 }
553
554 return val;
555}
556
557static void apic_mmio_read(struct kvm_io_device *this,
558 gpa_t address, int len, void *data)
559{
560 struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
561 unsigned int offset = address - apic->base_address;
562 unsigned char alignment = offset & 0xf;
563 u32 result;
564
565 if ((alignment + len) > 4) {
566 printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
567 (unsigned long)address, len);
568 return;
569 }
570 result = __apic_read(apic, offset & ~0xf);
571
572 switch (len) {
573 case 1:
574 case 2:
575 case 4:
576 memcpy(data, (char *)&result + alignment, len);
577 break;
578 default:
579 printk(KERN_ERR "Local APIC read with len = %x, "
580 "should be 1,2, or 4 instead\n", len);
581 break;
582 }
583}
584
585static void update_divide_count(struct kvm_lapic *apic)
586{
587 u32 tmp1, tmp2, tdcr;
588
589 tdcr = apic_get_reg(apic, APIC_TDCR);
590 tmp1 = tdcr & 0xf;
591 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
592 apic->timer.divide_count = 0x1 << (tmp2 & 0x7);
593
594 apic_debug("timer divide count is 0x%x\n",
595 apic->timer.divide_count);
596}
597
598static void start_apic_timer(struct kvm_lapic *apic)
599{
600 ktime_t now = apic->timer.dev.base->get_time();
601
602 apic->timer.last_update = now;
603
604 apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
605 APIC_BUS_CYCLE_NS * apic->timer.divide_count;
606 atomic_set(&apic->timer.pending, 0);
607 hrtimer_start(&apic->timer.dev,
608 ktime_add_ns(now, apic->timer.period),
609 HRTIMER_MODE_ABS);
610
611 apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
612 PRIx64 ", "
613 "timer initial count 0x%x, period %lldns, "
614 "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__,
615 APIC_BUS_CYCLE_NS, ktime_to_ns(now),
616 apic_get_reg(apic, APIC_TMICT),
617 apic->timer.period,
618 ktime_to_ns(ktime_add_ns(now,
619 apic->timer.period)));
620}
621
622static void apic_mmio_write(struct kvm_io_device *this,
623 gpa_t address, int len, const void *data)
624{
625 struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
626 unsigned int offset = address - apic->base_address;
627 unsigned char alignment = offset & 0xf;
628 u32 val;
629
630 /*
631 * APIC register must be aligned on 128-bits boundary.
632 * 32/64/128 bits registers must be accessed thru 32 bits.
633 * Refer SDM 8.4.1
634 */
635 if (len != 4 || alignment) {
636 if (printk_ratelimit())
637 printk(KERN_ERR "apic write: bad size=%d %lx\n",
638 len, (long)address);
639 return;
640 }
641
642 val = *(u32 *) data;
643
644 /* too common printing */
645 if (offset != APIC_EOI)
646 apic_debug("%s: offset 0x%x with length 0x%x, and value is "
647 "0x%x\n", __FUNCTION__, offset, len, val);
648
649 offset &= 0xff0;
650
651 switch (offset) {
652 case APIC_ID: /* Local APIC ID */
653 apic_set_reg(apic, APIC_ID, val);
654 break;
655
656 case APIC_TASKPRI:
657 apic_set_tpr(apic, val & 0xff);
658 break;
659
660 case APIC_EOI:
661 apic_set_eoi(apic);
662 break;
663
664 case APIC_LDR:
665 apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
666 break;
667
668 case APIC_DFR:
669 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
670 break;
671
672 case APIC_SPIV:
673 apic_set_reg(apic, APIC_SPIV, val & 0x3ff);
674 if (!(val & APIC_SPIV_APIC_ENABLED)) {
675 int i;
676 u32 lvt_val;
677
678 for (i = 0; i < APIC_LVT_NUM; i++) {
679 lvt_val = apic_get_reg(apic,
680 APIC_LVTT + 0x10 * i);
681 apic_set_reg(apic, APIC_LVTT + 0x10 * i,
682 lvt_val | APIC_LVT_MASKED);
683 }
684 atomic_set(&apic->timer.pending, 0);
685
686 }
687 break;
688
689 case APIC_ICR:
690 /* No delay here, so we always clear the pending bit */
691 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
692 apic_send_ipi(apic);
693 break;
694
695 case APIC_ICR2:
696 apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
697 break;
698
699 case APIC_LVTT:
700 case APIC_LVTTHMR:
701 case APIC_LVTPC:
702 case APIC_LVT0:
703 case APIC_LVT1:
704 case APIC_LVTERR:
705 /* TODO: Check vector */
706 if (!apic_sw_enabled(apic))
707 val |= APIC_LVT_MASKED;
708
709 val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4];
710 apic_set_reg(apic, offset, val);
711
712 break;
713
714 case APIC_TMICT:
715 hrtimer_cancel(&apic->timer.dev);
716 apic_set_reg(apic, APIC_TMICT, val);
717 start_apic_timer(apic);
718 return;
719
720 case APIC_TDCR:
721 if (val & 4)
722 printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val);
723 apic_set_reg(apic, APIC_TDCR, val);
724 update_divide_count(apic);
725 break;
726
727 default:
728 apic_debug("Local APIC Write to read-only register %x\n",
729 offset);
730 break;
731 }
732
733}
734
735static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
736{
737 struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
738 int ret = 0;
739
740
741 if (apic_hw_enabled(apic) &&
742 (addr >= apic->base_address) &&
743 (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
744 ret = 1;
745
746 return ret;
747}
748
749void kvm_free_apic(struct kvm_lapic *apic)
750{
751 if (!apic)
752 return;
753
754 hrtimer_cancel(&apic->timer.dev);
755
756 if (apic->regs_page) {
757 __free_page(apic->regs_page);
758 apic->regs_page = 0;
759 }
760
761 kfree(apic);
762}
763
764/*
765 *----------------------------------------------------------------------
766 * LAPIC interface
767 *----------------------------------------------------------------------
768 */
769
770void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
771{
772 struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
773
774 if (!apic)
775 return;
776 apic_set_tpr(apic, ((cr8 & 0x0f) << 4));
777}
778
779u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
780{
781 struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
782 u64 tpr;
783
784 if (!apic)
785 return 0;
786 tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
787
788 return (tpr & 0xf0) >> 4;
789}
790EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
791
792void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
793{
794 struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
795
796 if (!apic) {
797 value |= MSR_IA32_APICBASE_BSP;
798 vcpu->apic_base = value;
799 return;
800 }
801 if (apic->vcpu->vcpu_id)
802 value &= ~MSR_IA32_APICBASE_BSP;
803
804 vcpu->apic_base = value;
805 apic->base_address = apic->vcpu->apic_base &
806 MSR_IA32_APICBASE_BASE;
807
808 /* with FSB delivery interrupt, we can restart APIC functionality */
809 apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
810 "0x%lx.\n", apic->apic_base, apic->base_address);
811
812}
813
814u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
815{
816 return vcpu->apic_base;
817}
818EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
819
820void kvm_lapic_reset(struct kvm_vcpu *vcpu)
821{
822 struct kvm_lapic *apic;
823 int i;
824
825 apic_debug("%s\n", __FUNCTION__);
826
827 ASSERT(vcpu);
828 apic = vcpu->apic;
829 ASSERT(apic != NULL);
830
831 /* Stop the timer in case it's a reset to an active apic */
832 hrtimer_cancel(&apic->timer.dev);
833
834 apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
835 apic_set_reg(apic, APIC_LVR, APIC_VERSION);
836
837 for (i = 0; i < APIC_LVT_NUM; i++)
838 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
839 apic_set_reg(apic, APIC_LVT0,
840 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
841
842 apic_set_reg(apic, APIC_DFR, 0xffffffffU);
843 apic_set_reg(apic, APIC_SPIV, 0xff);
844 apic_set_reg(apic, APIC_TASKPRI, 0);
845 apic_set_reg(apic, APIC_LDR, 0);
846 apic_set_reg(apic, APIC_ESR, 0);
847 apic_set_reg(apic, APIC_ICR, 0);
848 apic_set_reg(apic, APIC_ICR2, 0);
849 apic_set_reg(apic, APIC_TDCR, 0);
850 apic_set_reg(apic, APIC_TMICT, 0);
851 for (i = 0; i < 8; i++) {
852 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
853 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
854 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
855 }
856 apic->timer.divide_count = 0;
857 atomic_set(&apic->timer.pending, 0);
858 if (vcpu->vcpu_id == 0)
859 vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
860 apic_update_ppr(apic);
861
862 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
863 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
864 vcpu, kvm_apic_id(apic),
865 vcpu->apic_base, apic->base_address);
866}
867EXPORT_SYMBOL_GPL(kvm_lapic_reset);
868
869int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
870{
871 struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
872 int ret = 0;
873
874 if (!apic)
875 return 0;
876 ret = apic_enabled(apic);
877
878 return ret;
879}
880EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
881
882/*
883 *----------------------------------------------------------------------
884 * timer interface
885 *----------------------------------------------------------------------
886 */
887
888/* TODO: make sure __apic_timer_fn runs in current pCPU */
889static int __apic_timer_fn(struct kvm_lapic *apic)
890{
891 int result = 0;
892 wait_queue_head_t *q = &apic->vcpu->wq;
893
894 atomic_inc(&apic->timer.pending);
895 if (waitqueue_active(q))
896 {
897 apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
898 wake_up_interruptible(q);
899 }
900 if (apic_lvtt_period(apic)) {
901 result = 1;
902 apic->timer.dev.expires = ktime_add_ns(
903 apic->timer.dev.expires,
904 apic->timer.period);
905 }
906 return result;
907}
908
909static int __inject_apic_timer_irq(struct kvm_lapic *apic)
910{
911 int vector;
912
913 vector = apic_lvt_vector(apic, APIC_LVTT);
914 return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
915}
916
917static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
918{
919 struct kvm_lapic *apic;
920 int restart_timer = 0;
921
922 apic = container_of(data, struct kvm_lapic, timer.dev);
923
924 restart_timer = __apic_timer_fn(apic);
925
926 if (restart_timer)
927 return HRTIMER_RESTART;
928 else
929 return HRTIMER_NORESTART;
930}
931
932int kvm_create_lapic(struct kvm_vcpu *vcpu)
933{
934 struct kvm_lapic *apic;
935
936 ASSERT(vcpu != NULL);
937 apic_debug("apic_init %d\n", vcpu->vcpu_id);
938
939 apic = kzalloc(sizeof(*apic), GFP_KERNEL);
940 if (!apic)
941 goto nomem;
942
943 vcpu->apic = apic;
944
945 apic->regs_page = alloc_page(GFP_KERNEL);
946 if (apic->regs_page == NULL) {
947 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
948 vcpu->vcpu_id);
949 goto nomem;
950 }
951 apic->regs = page_address(apic->regs_page);
952 memset(apic->regs, 0, PAGE_SIZE);
953 apic->vcpu = vcpu;
954
955 hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
956 apic->timer.dev.function = apic_timer_fn;
957 apic->base_address = APIC_DEFAULT_PHYS_BASE;
958 vcpu->apic_base = APIC_DEFAULT_PHYS_BASE;
959
960 kvm_lapic_reset(vcpu);
961 apic->dev.read = apic_mmio_read;
962 apic->dev.write = apic_mmio_write;
963 apic->dev.in_range = apic_mmio_range;
964 apic->dev.private = apic;
965
966 return 0;
967nomem:
968 kvm_free_apic(apic);
969 return -ENOMEM;
970}
971EXPORT_SYMBOL_GPL(kvm_create_lapic);
972
973int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
974{
975 struct kvm_lapic *apic = vcpu->apic;
976 int highest_irr;
977
978 if (!apic || !apic_enabled(apic))
979 return -1;
980
981 apic_update_ppr(apic);
982 highest_irr = apic_find_highest_irr(apic);
983 if ((highest_irr == -1) ||
984 ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
985 return -1;
986 return highest_irr;
987}
988
989int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
990{
991 u32 lvt0 = apic_get_reg(vcpu->apic, APIC_LVT0);
992 int r = 0;
993
994 if (vcpu->vcpu_id == 0) {
995 if (!apic_hw_enabled(vcpu->apic))
996 r = 1;
997 if ((lvt0 & APIC_LVT_MASKED) == 0 &&
998 GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
999 r = 1;
1000 }
1001 return r;
1002}
1003
1004void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1005{
1006 struct kvm_lapic *apic = vcpu->apic;
1007
1008 if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
1009 atomic_read(&apic->timer.pending) > 0) {
1010 if (__inject_apic_timer_irq(apic))
1011 atomic_dec(&apic->timer.pending);
1012 }
1013}
1014
1015void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
1016{
1017 struct kvm_lapic *apic = vcpu->apic;
1018
1019 if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
1020 apic->timer.last_update = ktime_add_ns(
1021 apic->timer.last_update,
1022 apic->timer.period);
1023}
1024
1025int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1026{
1027 int vector = kvm_apic_has_interrupt(vcpu);
1028 struct kvm_lapic *apic = vcpu->apic;
1029
1030 if (vector == -1)
1031 return -1;
1032
1033 apic_set_vector(vector, apic->regs + APIC_ISR);
1034 apic_update_ppr(apic);
1035 apic_clear_irr(vector, apic);
1036 return vector;
1037}
1038
1039void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
1040{
1041 struct kvm_lapic *apic = vcpu->apic;
1042
1043 apic->base_address = vcpu->apic_base &
1044 MSR_IA32_APICBASE_BASE;
1045 apic_set_reg(apic, APIC_LVR, APIC_VERSION);
1046 apic_update_ppr(apic);
1047 hrtimer_cancel(&apic->timer.dev);
1048 update_divide_count(apic);
1049 start_apic_timer(apic);
1050}
1051
1052void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
1053{
1054 struct kvm_lapic *apic = vcpu->apic;
1055 struct hrtimer *timer;
1056
1057 if (!apic)
1058 return;
1059
1060 timer = &apic->timer.dev;
1061 if (hrtimer_cancel(timer))
1062 hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
1063}
1064EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer);
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 23965aa5ee78..6d84d30f5ed0 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -158,7 +158,7 @@ static struct kmem_cache *mmu_page_header_cache;
158 158
159static int is_write_protection(struct kvm_vcpu *vcpu) 159static int is_write_protection(struct kvm_vcpu *vcpu)
160{ 160{
161 return vcpu->cr0 & CR0_WP_MASK; 161 return vcpu->cr0 & X86_CR0_WP;
162} 162}
163 163
164static int is_cpuid_PSE36(void) 164static int is_cpuid_PSE36(void)
@@ -202,15 +202,14 @@ static void set_shadow_pte(u64 *sptep, u64 spte)
202} 202}
203 203
204static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 204static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
205 struct kmem_cache *base_cache, int min, 205 struct kmem_cache *base_cache, int min)
206 gfp_t gfp_flags)
207{ 206{
208 void *obj; 207 void *obj;
209 208
210 if (cache->nobjs >= min) 209 if (cache->nobjs >= min)
211 return 0; 210 return 0;
212 while (cache->nobjs < ARRAY_SIZE(cache->objects)) { 211 while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
213 obj = kmem_cache_zalloc(base_cache, gfp_flags); 212 obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
214 if (!obj) 213 if (!obj)
215 return -ENOMEM; 214 return -ENOMEM;
216 cache->objects[cache->nobjs++] = obj; 215 cache->objects[cache->nobjs++] = obj;
@@ -225,14 +224,14 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
225} 224}
226 225
227static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, 226static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
228 int min, gfp_t gfp_flags) 227 int min)
229{ 228{
230 struct page *page; 229 struct page *page;
231 230
232 if (cache->nobjs >= min) 231 if (cache->nobjs >= min)
233 return 0; 232 return 0;
234 while (cache->nobjs < ARRAY_SIZE(cache->objects)) { 233 while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
235 page = alloc_page(gfp_flags); 234 page = alloc_page(GFP_KERNEL);
236 if (!page) 235 if (!page)
237 return -ENOMEM; 236 return -ENOMEM;
238 set_page_private(page, 0); 237 set_page_private(page, 0);
@@ -247,44 +246,28 @@ static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc)
247 free_page((unsigned long)mc->objects[--mc->nobjs]); 246 free_page((unsigned long)mc->objects[--mc->nobjs]);
248} 247}
249 248
250static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) 249static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
251{ 250{
252 int r; 251 int r;
253 252
253 kvm_mmu_free_some_pages(vcpu);
254 r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, 254 r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
255 pte_chain_cache, 4, gfp_flags); 255 pte_chain_cache, 4);
256 if (r) 256 if (r)
257 goto out; 257 goto out;
258 r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, 258 r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
259 rmap_desc_cache, 1, gfp_flags); 259 rmap_desc_cache, 1);
260 if (r) 260 if (r)
261 goto out; 261 goto out;
262 r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4, gfp_flags); 262 r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4);
263 if (r) 263 if (r)
264 goto out; 264 goto out;
265 r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, 265 r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
266 mmu_page_header_cache, 4, gfp_flags); 266 mmu_page_header_cache, 4);
267out: 267out:
268 return r; 268 return r;
269} 269}
270 270
271static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
272{
273 int r;
274
275 r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT);
276 kvm_mmu_free_some_pages(vcpu);
277 if (r < 0) {
278 spin_unlock(&vcpu->kvm->lock);
279 kvm_arch_ops->vcpu_put(vcpu);
280 r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
281 kvm_arch_ops->vcpu_load(vcpu);
282 spin_lock(&vcpu->kvm->lock);
283 kvm_mmu_free_some_pages(vcpu);
284 }
285 return r;
286}
287
288static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) 271static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
289{ 272{
290 mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); 273 mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
@@ -969,7 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
969static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 952static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
970{ 953{
971 ++vcpu->stat.tlb_flush; 954 ++vcpu->stat.tlb_flush;
972 kvm_arch_ops->tlb_flush(vcpu); 955 kvm_x86_ops->tlb_flush(vcpu);
973} 956}
974 957
975static void paging_new_cr3(struct kvm_vcpu *vcpu) 958static void paging_new_cr3(struct kvm_vcpu *vcpu)
@@ -982,7 +965,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
982 u64 addr, 965 u64 addr,
983 u32 err_code) 966 u32 err_code)
984{ 967{
985 kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); 968 kvm_x86_ops->inject_page_fault(vcpu, addr, err_code);
986} 969}
987 970
988static void paging_free(struct kvm_vcpu *vcpu) 971static void paging_free(struct kvm_vcpu *vcpu)
@@ -1071,15 +1054,15 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
1071{ 1054{
1072 int r; 1055 int r;
1073 1056
1074 spin_lock(&vcpu->kvm->lock); 1057 mutex_lock(&vcpu->kvm->lock);
1075 r = mmu_topup_memory_caches(vcpu); 1058 r = mmu_topup_memory_caches(vcpu);
1076 if (r) 1059 if (r)
1077 goto out; 1060 goto out;
1078 mmu_alloc_roots(vcpu); 1061 mmu_alloc_roots(vcpu);
1079 kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); 1062 kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
1080 kvm_mmu_flush_tlb(vcpu); 1063 kvm_mmu_flush_tlb(vcpu);
1081out: 1064out:
1082 spin_unlock(&vcpu->kvm->lock); 1065 mutex_unlock(&vcpu->kvm->lock);
1083 return r; 1066 return r;
1084} 1067}
1085EXPORT_SYMBOL_GPL(kvm_mmu_load); 1068EXPORT_SYMBOL_GPL(kvm_mmu_load);
@@ -1124,7 +1107,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
1124} 1107}
1125 1108
1126void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1109void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1127 const u8 *old, const u8 *new, int bytes) 1110 const u8 *new, int bytes)
1128{ 1111{
1129 gfn_t gfn = gpa >> PAGE_SHIFT; 1112 gfn_t gfn = gpa >> PAGE_SHIFT;
1130 struct kvm_mmu_page *page; 1113 struct kvm_mmu_page *page;
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 4b5391c717f8..6b094b44f8fb 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -58,7 +58,10 @@ struct guest_walker {
58 int level; 58 int level;
59 gfn_t table_gfn[PT_MAX_FULL_LEVELS]; 59 gfn_t table_gfn[PT_MAX_FULL_LEVELS];
60 pt_element_t *table; 60 pt_element_t *table;
61 pt_element_t pte;
61 pt_element_t *ptep; 62 pt_element_t *ptep;
63 struct page *page;
64 int index;
62 pt_element_t inherited_ar; 65 pt_element_t inherited_ar;
63 gfn_t gfn; 66 gfn_t gfn;
64 u32 error_code; 67 u32 error_code;
@@ -80,11 +83,14 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
80 pgprintk("%s: addr %lx\n", __FUNCTION__, addr); 83 pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
81 walker->level = vcpu->mmu.root_level; 84 walker->level = vcpu->mmu.root_level;
82 walker->table = NULL; 85 walker->table = NULL;
86 walker->page = NULL;
87 walker->ptep = NULL;
83 root = vcpu->cr3; 88 root = vcpu->cr3;
84#if PTTYPE == 64 89#if PTTYPE == 64
85 if (!is_long_mode(vcpu)) { 90 if (!is_long_mode(vcpu)) {
86 walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; 91 walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
87 root = *walker->ptep; 92 root = *walker->ptep;
93 walker->pte = root;
88 if (!(root & PT_PRESENT_MASK)) 94 if (!(root & PT_PRESENT_MASK))
89 goto not_present; 95 goto not_present;
90 --walker->level; 96 --walker->level;
@@ -96,10 +102,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
96 walker->level - 1, table_gfn); 102 walker->level - 1, table_gfn);
97 slot = gfn_to_memslot(vcpu->kvm, table_gfn); 103 slot = gfn_to_memslot(vcpu->kvm, table_gfn);
98 hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK); 104 hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
99 walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0); 105 walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
106 walker->table = kmap_atomic(walker->page, KM_USER0);
100 107
101 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || 108 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
102 (vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0); 109 (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
103 110
104 walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; 111 walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
105 112
@@ -108,6 +115,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
108 hpa_t paddr; 115 hpa_t paddr;
109 116
110 ptep = &walker->table[index]; 117 ptep = &walker->table[index];
118 walker->index = index;
111 ASSERT(((unsigned long)walker->table & PAGE_MASK) == 119 ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
112 ((unsigned long)ptep & PAGE_MASK)); 120 ((unsigned long)ptep & PAGE_MASK));
113 121
@@ -148,16 +156,20 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
148 156
149 walker->inherited_ar &= walker->table[index]; 157 walker->inherited_ar &= walker->table[index];
150 table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; 158 table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
151 paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
152 kunmap_atomic(walker->table, KM_USER0); 159 kunmap_atomic(walker->table, KM_USER0);
153 walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT), 160 paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT);
154 KM_USER0); 161 walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
162 walker->table = kmap_atomic(walker->page, KM_USER0);
155 --walker->level; 163 --walker->level;
156 walker->table_gfn[walker->level - 1 ] = table_gfn; 164 walker->table_gfn[walker->level - 1 ] = table_gfn;
157 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, 165 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
158 walker->level - 1, table_gfn); 166 walker->level - 1, table_gfn);
159 } 167 }
160 walker->ptep = ptep; 168 walker->pte = *ptep;
169 if (walker->page)
170 walker->ptep = NULL;
171 if (walker->table)
172 kunmap_atomic(walker->table, KM_USER0);
161 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); 173 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
162 return 1; 174 return 1;
163 175
@@ -175,13 +187,9 @@ err:
175 walker->error_code |= PFERR_USER_MASK; 187 walker->error_code |= PFERR_USER_MASK;
176 if (fetch_fault) 188 if (fetch_fault)
177 walker->error_code |= PFERR_FETCH_MASK; 189 walker->error_code |= PFERR_FETCH_MASK;
178 return 0;
179}
180
181static void FNAME(release_walker)(struct guest_walker *walker)
182{
183 if (walker->table) 190 if (walker->table)
184 kunmap_atomic(walker->table, KM_USER0); 191 kunmap_atomic(walker->table, KM_USER0);
192 return 0;
185} 193}
186 194
187static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, 195static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
@@ -193,7 +201,7 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
193static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, 201static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
194 u64 *shadow_pte, 202 u64 *shadow_pte,
195 gpa_t gaddr, 203 gpa_t gaddr,
196 pt_element_t *gpte, 204 pt_element_t gpte,
197 u64 access_bits, 205 u64 access_bits,
198 int user_fault, 206 int user_fault,
199 int write_fault, 207 int write_fault,
@@ -202,23 +210,34 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
202 gfn_t gfn) 210 gfn_t gfn)
203{ 211{
204 hpa_t paddr; 212 hpa_t paddr;
205 int dirty = *gpte & PT_DIRTY_MASK; 213 int dirty = gpte & PT_DIRTY_MASK;
206 u64 spte = *shadow_pte; 214 u64 spte = *shadow_pte;
207 int was_rmapped = is_rmap_pte(spte); 215 int was_rmapped = is_rmap_pte(spte);
208 216
209 pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" 217 pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d"
210 " user_fault %d gfn %lx\n", 218 " user_fault %d gfn %lx\n",
211 __FUNCTION__, spte, (u64)*gpte, access_bits, 219 __FUNCTION__, spte, (u64)gpte, access_bits,
212 write_fault, user_fault, gfn); 220 write_fault, user_fault, gfn);
213 221
214 if (write_fault && !dirty) { 222 if (write_fault && !dirty) {
215 *gpte |= PT_DIRTY_MASK; 223 pt_element_t *guest_ent, *tmp = NULL;
224
225 if (walker->ptep)
226 guest_ent = walker->ptep;
227 else {
228 tmp = kmap_atomic(walker->page, KM_USER0);
229 guest_ent = &tmp[walker->index];
230 }
231
232 *guest_ent |= PT_DIRTY_MASK;
233 if (!walker->ptep)
234 kunmap_atomic(tmp, KM_USER0);
216 dirty = 1; 235 dirty = 1;
217 FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); 236 FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
218 } 237 }
219 238
220 spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; 239 spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
221 spte |= *gpte & PT64_NX_MASK; 240 spte |= gpte & PT64_NX_MASK;
222 if (!dirty) 241 if (!dirty)
223 access_bits &= ~PT_WRITABLE_MASK; 242 access_bits &= ~PT_WRITABLE_MASK;
224 243
@@ -255,7 +274,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
255 access_bits &= ~PT_WRITABLE_MASK; 274 access_bits &= ~PT_WRITABLE_MASK;
256 if (is_writeble_pte(spte)) { 275 if (is_writeble_pte(spte)) {
257 spte &= ~PT_WRITABLE_MASK; 276 spte &= ~PT_WRITABLE_MASK;
258 kvm_arch_ops->tlb_flush(vcpu); 277 kvm_x86_ops->tlb_flush(vcpu);
259 } 278 }
260 if (write_fault) 279 if (write_fault)
261 *ptwrite = 1; 280 *ptwrite = 1;
@@ -273,13 +292,13 @@ unshadowed:
273 rmap_add(vcpu, shadow_pte); 292 rmap_add(vcpu, shadow_pte);
274} 293}
275 294
276static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, 295static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte,
277 u64 *shadow_pte, u64 access_bits, 296 u64 *shadow_pte, u64 access_bits,
278 int user_fault, int write_fault, int *ptwrite, 297 int user_fault, int write_fault, int *ptwrite,
279 struct guest_walker *walker, gfn_t gfn) 298 struct guest_walker *walker, gfn_t gfn)
280{ 299{
281 access_bits &= *gpte; 300 access_bits &= gpte;
282 FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, 301 FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK,
283 gpte, access_bits, user_fault, write_fault, 302 gpte, access_bits, user_fault, write_fault,
284 ptwrite, walker, gfn); 303 ptwrite, walker, gfn);
285} 304}
@@ -295,22 +314,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
295 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) 314 if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK))
296 return; 315 return;
297 pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); 316 pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
298 FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, 317 FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0,
299 0, NULL, NULL, 318 0, NULL, NULL,
300 (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); 319 (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT);
301} 320}
302 321
303static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, 322static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde,
304 u64 *shadow_pte, u64 access_bits, 323 u64 *shadow_pte, u64 access_bits,
305 int user_fault, int write_fault, int *ptwrite, 324 int user_fault, int write_fault, int *ptwrite,
306 struct guest_walker *walker, gfn_t gfn) 325 struct guest_walker *walker, gfn_t gfn)
307{ 326{
308 gpa_t gaddr; 327 gpa_t gaddr;
309 328
310 access_bits &= *gpde; 329 access_bits &= gpde;
311 gaddr = (gpa_t)gfn << PAGE_SHIFT; 330 gaddr = (gpa_t)gfn << PAGE_SHIFT;
312 if (PTTYPE == 32 && is_cpuid_PSE36()) 331 if (PTTYPE == 32 && is_cpuid_PSE36())
313 gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << 332 gaddr |= (gpde & PT32_DIR_PSE36_MASK) <<
314 (32 - PT32_DIR_PSE36_SHIFT); 333 (32 - PT32_DIR_PSE36_SHIFT);
315 FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, 334 FNAME(set_pte_common)(vcpu, shadow_pte, gaddr,
316 gpde, access_bits, user_fault, write_fault, 335 gpde, access_bits, user_fault, write_fault,
@@ -328,9 +347,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
328 int level; 347 int level;
329 u64 *shadow_ent; 348 u64 *shadow_ent;
330 u64 *prev_shadow_ent = NULL; 349 u64 *prev_shadow_ent = NULL;
331 pt_element_t *guest_ent = walker->ptep;
332 350
333 if (!is_present_pte(*guest_ent)) 351 if (!is_present_pte(walker->pte))
334 return NULL; 352 return NULL;
335 353
336 shadow_addr = vcpu->mmu.root_hpa; 354 shadow_addr = vcpu->mmu.root_hpa;
@@ -364,12 +382,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
364 if (level - 1 == PT_PAGE_TABLE_LEVEL 382 if (level - 1 == PT_PAGE_TABLE_LEVEL
365 && walker->level == PT_DIRECTORY_LEVEL) { 383 && walker->level == PT_DIRECTORY_LEVEL) {
366 metaphysical = 1; 384 metaphysical = 1;
367 hugepage_access = *guest_ent; 385 hugepage_access = walker->pte;
368 hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; 386 hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
369 if (*guest_ent & PT64_NX_MASK) 387 if (walker->pte & PT64_NX_MASK)
370 hugepage_access |= (1 << 2); 388 hugepage_access |= (1 << 2);
371 hugepage_access >>= PT_WRITABLE_SHIFT; 389 hugepage_access >>= PT_WRITABLE_SHIFT;
372 table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) 390 table_gfn = (walker->pte & PT_BASE_ADDR_MASK)
373 >> PAGE_SHIFT; 391 >> PAGE_SHIFT;
374 } else { 392 } else {
375 metaphysical = 0; 393 metaphysical = 0;
@@ -386,12 +404,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
386 } 404 }
387 405
388 if (walker->level == PT_DIRECTORY_LEVEL) { 406 if (walker->level == PT_DIRECTORY_LEVEL) {
389 FNAME(set_pde)(vcpu, guest_ent, shadow_ent, 407 FNAME(set_pde)(vcpu, walker->pte, shadow_ent,
390 walker->inherited_ar, user_fault, write_fault, 408 walker->inherited_ar, user_fault, write_fault,
391 ptwrite, walker, walker->gfn); 409 ptwrite, walker, walker->gfn);
392 } else { 410 } else {
393 ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); 411 ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
394 FNAME(set_pte)(vcpu, guest_ent, shadow_ent, 412 FNAME(set_pte)(vcpu, walker->pte, shadow_ent,
395 walker->inherited_ar, user_fault, write_fault, 413 walker->inherited_ar, user_fault, write_fault,
396 ptwrite, walker, walker->gfn); 414 ptwrite, walker, walker->gfn);
397 } 415 }
@@ -442,7 +460,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
442 if (!r) { 460 if (!r) {
443 pgprintk("%s: guest page fault\n", __FUNCTION__); 461 pgprintk("%s: guest page fault\n", __FUNCTION__);
444 inject_page_fault(vcpu, addr, walker.error_code); 462 inject_page_fault(vcpu, addr, walker.error_code);
445 FNAME(release_walker)(&walker);
446 vcpu->last_pt_write_count = 0; /* reset fork detector */ 463 vcpu->last_pt_write_count = 0; /* reset fork detector */
447 return 0; 464 return 0;
448 } 465 }
@@ -452,8 +469,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
452 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, 469 pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
453 shadow_pte, *shadow_pte, write_pt); 470 shadow_pte, *shadow_pte, write_pt);
454 471
455 FNAME(release_walker)(&walker);
456
457 if (!write_pt) 472 if (!write_pt)
458 vcpu->last_pt_write_count = 0; /* reset fork detector */ 473 vcpu->last_pt_write_count = 0; /* reset fork detector */
459 474
@@ -482,7 +497,6 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
482 gpa |= vaddr & ~PAGE_MASK; 497 gpa |= vaddr & ~PAGE_MASK;
483 } 498 }
484 499
485 FNAME(release_walker)(&walker);
486 return gpa; 500 return gpa;
487} 501}
488 502
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index bc818cc126e3..729f1cd93606 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -16,12 +16,12 @@
16 16
17#include "kvm_svm.h" 17#include "kvm_svm.h"
18#include "x86_emulate.h" 18#include "x86_emulate.h"
19#include "irq.h"
19 20
20#include <linux/module.h> 21#include <linux/module.h>
21#include <linux/kernel.h> 22#include <linux/kernel.h>
22#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
23#include <linux/highmem.h> 24#include <linux/highmem.h>
24#include <linux/profile.h>
25#include <linux/sched.h> 25#include <linux/sched.h>
26 26
27#include <asm/desc.h> 27#include <asm/desc.h>
@@ -38,7 +38,6 @@ MODULE_LICENSE("GPL");
38 38
39#define DR7_GD_MASK (1 << 13) 39#define DR7_GD_MASK (1 << 13)
40#define DR6_BD_MASK (1 << 13) 40#define DR6_BD_MASK (1 << 13)
41#define CR4_DE_MASK (1UL << 3)
42 41
43#define SEG_TYPE_LDT 2 42#define SEG_TYPE_LDT 2
44#define SEG_TYPE_BUSY_TSS16 3 43#define SEG_TYPE_BUSY_TSS16 3
@@ -50,6 +49,13 @@ MODULE_LICENSE("GPL");
50#define SVM_FEATURE_LBRV (1 << 1) 49#define SVM_FEATURE_LBRV (1 << 1)
51#define SVM_DEATURE_SVML (1 << 2) 50#define SVM_DEATURE_SVML (1 << 2)
52 51
52static void kvm_reput_irq(struct vcpu_svm *svm);
53
54static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
55{
56 return container_of(vcpu, struct vcpu_svm, vcpu);
57}
58
53unsigned long iopm_base; 59unsigned long iopm_base;
54unsigned long msrpm_base; 60unsigned long msrpm_base;
55 61
@@ -94,20 +100,6 @@ static inline u32 svm_has(u32 feat)
94 return svm_features & feat; 100 return svm_features & feat;
95} 101}
96 102
97static unsigned get_addr_size(struct kvm_vcpu *vcpu)
98{
99 struct vmcb_save_area *sa = &vcpu->svm->vmcb->save;
100 u16 cs_attrib;
101
102 if (!(sa->cr0 & CR0_PE_MASK) || (sa->rflags & X86_EFLAGS_VM))
103 return 2;
104
105 cs_attrib = sa->cs.attrib;
106
107 return (cs_attrib & SVM_SELECTOR_L_MASK) ? 8 :
108 (cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2;
109}
110
111static inline u8 pop_irq(struct kvm_vcpu *vcpu) 103static inline u8 pop_irq(struct kvm_vcpu *vcpu)
112{ 104{
113 int word_index = __ffs(vcpu->irq_summary); 105 int word_index = __ffs(vcpu->irq_summary);
@@ -182,7 +174,7 @@ static inline void write_dr7(unsigned long val)
182 174
183static inline void force_new_asid(struct kvm_vcpu *vcpu) 175static inline void force_new_asid(struct kvm_vcpu *vcpu)
184{ 176{
185 vcpu->svm->asid_generation--; 177 to_svm(vcpu)->asid_generation--;
186} 178}
187 179
188static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) 180static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
@@ -195,22 +187,24 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
195 if (!(efer & KVM_EFER_LMA)) 187 if (!(efer & KVM_EFER_LMA))
196 efer &= ~KVM_EFER_LME; 188 efer &= ~KVM_EFER_LME;
197 189
198 vcpu->svm->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; 190 to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
199 vcpu->shadow_efer = efer; 191 vcpu->shadow_efer = efer;
200} 192}
201 193
202static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) 194static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
203{ 195{
204 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 196 struct vcpu_svm *svm = to_svm(vcpu);
197
198 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
205 SVM_EVTINJ_VALID_ERR | 199 SVM_EVTINJ_VALID_ERR |
206 SVM_EVTINJ_TYPE_EXEPT | 200 SVM_EVTINJ_TYPE_EXEPT |
207 GP_VECTOR; 201 GP_VECTOR;
208 vcpu->svm->vmcb->control.event_inj_err = error_code; 202 svm->vmcb->control.event_inj_err = error_code;
209} 203}
210 204
211static void inject_ud(struct kvm_vcpu *vcpu) 205static void inject_ud(struct kvm_vcpu *vcpu)
212{ 206{
213 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 207 to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID |
214 SVM_EVTINJ_TYPE_EXEPT | 208 SVM_EVTINJ_TYPE_EXEPT |
215 UD_VECTOR; 209 UD_VECTOR;
216} 210}
@@ -229,19 +223,21 @@ static int is_external_interrupt(u32 info)
229 223
230static void skip_emulated_instruction(struct kvm_vcpu *vcpu) 224static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
231{ 225{
232 if (!vcpu->svm->next_rip) { 226 struct vcpu_svm *svm = to_svm(vcpu);
227
228 if (!svm->next_rip) {
233 printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); 229 printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
234 return; 230 return;
235 } 231 }
236 if (vcpu->svm->next_rip - vcpu->svm->vmcb->save.rip > 15) { 232 if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) {
237 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", 233 printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
238 __FUNCTION__, 234 __FUNCTION__,
239 vcpu->svm->vmcb->save.rip, 235 svm->vmcb->save.rip,
240 vcpu->svm->next_rip); 236 svm->next_rip);
241 } 237 }
242 238
243 vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip; 239 vcpu->rip = svm->vmcb->save.rip = svm->next_rip;
244 vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 240 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
245 241
246 vcpu->interrupt_window_open = 1; 242 vcpu->interrupt_window_open = 1;
247} 243}
@@ -351,8 +347,8 @@ err_1:
351 347
352} 348}
353 349
354static int set_msr_interception(u32 *msrpm, unsigned msr, 350static void set_msr_interception(u32 *msrpm, unsigned msr,
355 int read, int write) 351 int read, int write)
356{ 352{
357 int i; 353 int i;
358 354
@@ -367,11 +363,10 @@ static int set_msr_interception(u32 *msrpm, unsigned msr,
367 u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); 363 u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1);
368 *base = (*base & ~(0x3 << msr_shift)) | 364 *base = (*base & ~(0x3 << msr_shift)) |
369 (mask << msr_shift); 365 (mask << msr_shift);
370 return 1; 366 return;
371 } 367 }
372 } 368 }
373 printk(KERN_DEBUG "%s: not found 0x%x\n", __FUNCTION__, msr); 369 BUG();
374 return 0;
375} 370}
376 371
377static __init int svm_hardware_setup(void) 372static __init int svm_hardware_setup(void)
@@ -382,8 +377,6 @@ static __init int svm_hardware_setup(void)
382 void *iopm_va, *msrpm_va; 377 void *iopm_va, *msrpm_va;
383 int r; 378 int r;
384 379
385 kvm_emulator_want_group7_invlpg();
386
387 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); 380 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
388 381
389 if (!iopm_pages) 382 if (!iopm_pages)
@@ -458,11 +451,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
458 seg->base = 0; 451 seg->base = 0;
459} 452}
460 453
461static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
462{
463 return 0;
464}
465
466static void init_vmcb(struct vmcb *vmcb) 454static void init_vmcb(struct vmcb *vmcb)
467{ 455{
468 struct vmcb_control_area *control = &vmcb->control; 456 struct vmcb_control_area *control = &vmcb->control;
@@ -563,59 +551,83 @@ static void init_vmcb(struct vmcb *vmcb)
563 * cr0 val on cpu init should be 0x60000010, we enable cpu 551 * cr0 val on cpu init should be 0x60000010, we enable cpu
564 * cache by default. the orderly way is to enable cache in bios. 552 * cache by default. the orderly way is to enable cache in bios.
565 */ 553 */
566 save->cr0 = 0x00000010 | CR0_PG_MASK | CR0_WP_MASK; 554 save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
567 save->cr4 = CR4_PAE_MASK; 555 save->cr4 = X86_CR4_PAE;
568 /* rdx = ?? */ 556 /* rdx = ?? */
569} 557}
570 558
571static int svm_create_vcpu(struct kvm_vcpu *vcpu) 559static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
560{
561 struct vcpu_svm *svm = to_svm(vcpu);
562
563 init_vmcb(svm->vmcb);
564}
565
566static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
572{ 567{
568 struct vcpu_svm *svm;
573 struct page *page; 569 struct page *page;
574 int r; 570 int err;
571
572 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
573 if (!svm) {
574 err = -ENOMEM;
575 goto out;
576 }
577
578 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
579 if (err)
580 goto free_svm;
581
582 if (irqchip_in_kernel(kvm)) {
583 err = kvm_create_lapic(&svm->vcpu);
584 if (err < 0)
585 goto free_svm;
586 }
575 587
576 r = -ENOMEM;
577 vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL);
578 if (!vcpu->svm)
579 goto out1;
580 page = alloc_page(GFP_KERNEL); 588 page = alloc_page(GFP_KERNEL);
581 if (!page) 589 if (!page) {
582 goto out2; 590 err = -ENOMEM;
583 591 goto uninit;
584 vcpu->svm->vmcb = page_address(page); 592 }
585 clear_page(vcpu->svm->vmcb);
586 vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
587 vcpu->svm->asid_generation = 0;
588 memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
589 init_vmcb(vcpu->svm->vmcb);
590
591 fx_init(vcpu);
592 vcpu->fpu_active = 1;
593 vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
594 if (vcpu == &vcpu->kvm->vcpus[0])
595 vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
596 593
597 return 0; 594 svm->vmcb = page_address(page);
595 clear_page(svm->vmcb);
596 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
597 svm->asid_generation = 0;
598 memset(svm->db_regs, 0, sizeof(svm->db_regs));
599 init_vmcb(svm->vmcb);
598 600
599out2: 601 fx_init(&svm->vcpu);
600 kfree(vcpu->svm); 602 svm->vcpu.fpu_active = 1;
601out1: 603 svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
602 return r; 604 if (svm->vcpu.vcpu_id == 0)
605 svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP;
606
607 return &svm->vcpu;
608
609uninit:
610 kvm_vcpu_uninit(&svm->vcpu);
611free_svm:
612 kmem_cache_free(kvm_vcpu_cache, svm);
613out:
614 return ERR_PTR(err);
603} 615}
604 616
605static void svm_free_vcpu(struct kvm_vcpu *vcpu) 617static void svm_free_vcpu(struct kvm_vcpu *vcpu)
606{ 618{
607 if (!vcpu->svm) 619 struct vcpu_svm *svm = to_svm(vcpu);
608 return; 620
609 if (vcpu->svm->vmcb) 621 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
610 __free_page(pfn_to_page(vcpu->svm->vmcb_pa >> PAGE_SHIFT)); 622 kvm_vcpu_uninit(vcpu);
611 kfree(vcpu->svm); 623 kmem_cache_free(kvm_vcpu_cache, svm);
612} 624}
613 625
614static void svm_vcpu_load(struct kvm_vcpu *vcpu) 626static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
615{ 627{
616 int cpu, i; 628 struct vcpu_svm *svm = to_svm(vcpu);
629 int i;
617 630
618 cpu = get_cpu();
619 if (unlikely(cpu != vcpu->cpu)) { 631 if (unlikely(cpu != vcpu->cpu)) {
620 u64 tsc_this, delta; 632 u64 tsc_this, delta;
621 633
@@ -625,23 +637,24 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu)
625 */ 637 */
626 rdtscll(tsc_this); 638 rdtscll(tsc_this);
627 delta = vcpu->host_tsc - tsc_this; 639 delta = vcpu->host_tsc - tsc_this;
628 vcpu->svm->vmcb->control.tsc_offset += delta; 640 svm->vmcb->control.tsc_offset += delta;
629 vcpu->cpu = cpu; 641 vcpu->cpu = cpu;
642 kvm_migrate_apic_timer(vcpu);
630 } 643 }
631 644
632 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 645 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
633 rdmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); 646 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
634} 647}
635 648
636static void svm_vcpu_put(struct kvm_vcpu *vcpu) 649static void svm_vcpu_put(struct kvm_vcpu *vcpu)
637{ 650{
651 struct vcpu_svm *svm = to_svm(vcpu);
638 int i; 652 int i;
639 653
640 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 654 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
641 wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); 655 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
642 656
643 rdtscll(vcpu->host_tsc); 657 rdtscll(vcpu->host_tsc);
644 put_cpu();
645} 658}
646 659
647static void svm_vcpu_decache(struct kvm_vcpu *vcpu) 660static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
@@ -650,31 +663,34 @@ static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
650 663
651static void svm_cache_regs(struct kvm_vcpu *vcpu) 664static void svm_cache_regs(struct kvm_vcpu *vcpu)
652{ 665{
653 vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax; 666 struct vcpu_svm *svm = to_svm(vcpu);
654 vcpu->regs[VCPU_REGS_RSP] = vcpu->svm->vmcb->save.rsp; 667
655 vcpu->rip = vcpu->svm->vmcb->save.rip; 668 vcpu->regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
669 vcpu->regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
670 vcpu->rip = svm->vmcb->save.rip;
656} 671}
657 672
658static void svm_decache_regs(struct kvm_vcpu *vcpu) 673static void svm_decache_regs(struct kvm_vcpu *vcpu)
659{ 674{
660 vcpu->svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX]; 675 struct vcpu_svm *svm = to_svm(vcpu);
661 vcpu->svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP]; 676 svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
662 vcpu->svm->vmcb->save.rip = vcpu->rip; 677 svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
678 svm->vmcb->save.rip = vcpu->rip;
663} 679}
664 680
665static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 681static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
666{ 682{
667 return vcpu->svm->vmcb->save.rflags; 683 return to_svm(vcpu)->vmcb->save.rflags;
668} 684}
669 685
670static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 686static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
671{ 687{
672 vcpu->svm->vmcb->save.rflags = rflags; 688 to_svm(vcpu)->vmcb->save.rflags = rflags;
673} 689}
674 690
675static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) 691static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
676{ 692{
677 struct vmcb_save_area *save = &vcpu->svm->vmcb->save; 693 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
678 694
679 switch (seg) { 695 switch (seg) {
680 case VCPU_SREG_CS: return &save->cs; 696 case VCPU_SREG_CS: return &save->cs;
@@ -716,36 +732,36 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
716 var->unusable = !var->present; 732 var->unusable = !var->present;
717} 733}
718 734
719static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
720{
721 struct vmcb_seg *s = svm_seg(vcpu, VCPU_SREG_CS);
722
723 *db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
724 *l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
725}
726
727static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 735static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
728{ 736{
729 dt->limit = vcpu->svm->vmcb->save.idtr.limit; 737 struct vcpu_svm *svm = to_svm(vcpu);
730 dt->base = vcpu->svm->vmcb->save.idtr.base; 738
739 dt->limit = svm->vmcb->save.idtr.limit;
740 dt->base = svm->vmcb->save.idtr.base;
731} 741}
732 742
733static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 743static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
734{ 744{
735 vcpu->svm->vmcb->save.idtr.limit = dt->limit; 745 struct vcpu_svm *svm = to_svm(vcpu);
736 vcpu->svm->vmcb->save.idtr.base = dt->base ; 746
747 svm->vmcb->save.idtr.limit = dt->limit;
748 svm->vmcb->save.idtr.base = dt->base ;
737} 749}
738 750
739static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 751static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
740{ 752{
741 dt->limit = vcpu->svm->vmcb->save.gdtr.limit; 753 struct vcpu_svm *svm = to_svm(vcpu);
742 dt->base = vcpu->svm->vmcb->save.gdtr.base; 754
755 dt->limit = svm->vmcb->save.gdtr.limit;
756 dt->base = svm->vmcb->save.gdtr.base;
743} 757}
744 758
745static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) 759static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
746{ 760{
747 vcpu->svm->vmcb->save.gdtr.limit = dt->limit; 761 struct vcpu_svm *svm = to_svm(vcpu);
748 vcpu->svm->vmcb->save.gdtr.base = dt->base ; 762
763 svm->vmcb->save.gdtr.limit = dt->limit;
764 svm->vmcb->save.gdtr.base = dt->base ;
749} 765}
750 766
751static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 767static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -754,39 +770,42 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
754 770
755static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 771static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
756{ 772{
773 struct vcpu_svm *svm = to_svm(vcpu);
774
757#ifdef CONFIG_X86_64 775#ifdef CONFIG_X86_64
758 if (vcpu->shadow_efer & KVM_EFER_LME) { 776 if (vcpu->shadow_efer & KVM_EFER_LME) {
759 if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { 777 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
760 vcpu->shadow_efer |= KVM_EFER_LMA; 778 vcpu->shadow_efer |= KVM_EFER_LMA;
761 vcpu->svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME; 779 svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME;
762 } 780 }
763 781
764 if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK) ) { 782 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) {
765 vcpu->shadow_efer &= ~KVM_EFER_LMA; 783 vcpu->shadow_efer &= ~KVM_EFER_LMA;
766 vcpu->svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME); 784 svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME);
767 } 785 }
768 } 786 }
769#endif 787#endif
770 if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) { 788 if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
771 vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 789 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
772 vcpu->fpu_active = 1; 790 vcpu->fpu_active = 1;
773 } 791 }
774 792
775 vcpu->cr0 = cr0; 793 vcpu->cr0 = cr0;
776 cr0 |= CR0_PG_MASK | CR0_WP_MASK; 794 cr0 |= X86_CR0_PG | X86_CR0_WP;
777 cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); 795 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
778 vcpu->svm->vmcb->save.cr0 = cr0; 796 svm->vmcb->save.cr0 = cr0;
779} 797}
780 798
781static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 799static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
782{ 800{
783 vcpu->cr4 = cr4; 801 vcpu->cr4 = cr4;
784 vcpu->svm->vmcb->save.cr4 = cr4 | CR4_PAE_MASK; 802 to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
785} 803}
786 804
787static void svm_set_segment(struct kvm_vcpu *vcpu, 805static void svm_set_segment(struct kvm_vcpu *vcpu,
788 struct kvm_segment *var, int seg) 806 struct kvm_segment *var, int seg)
789{ 807{
808 struct vcpu_svm *svm = to_svm(vcpu);
790 struct vmcb_seg *s = svm_seg(vcpu, seg); 809 struct vmcb_seg *s = svm_seg(vcpu, seg);
791 810
792 s->base = var->base; 811 s->base = var->base;
@@ -805,16 +824,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
805 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 824 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
806 } 825 }
807 if (seg == VCPU_SREG_CS) 826 if (seg == VCPU_SREG_CS)
808 vcpu->svm->vmcb->save.cpl 827 svm->vmcb->save.cpl
809 = (vcpu->svm->vmcb->save.cs.attrib 828 = (svm->vmcb->save.cs.attrib
810 >> SVM_SELECTOR_DPL_SHIFT) & 3; 829 >> SVM_SELECTOR_DPL_SHIFT) & 3;
811 830
812} 831}
813 832
814/* FIXME: 833/* FIXME:
815 834
816 vcpu->svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 835 svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK;
817 vcpu->svm->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); 836 svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
818 837
819*/ 838*/
820 839
@@ -823,61 +842,68 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
823 return -EOPNOTSUPP; 842 return -EOPNOTSUPP;
824} 843}
825 844
845static int svm_get_irq(struct kvm_vcpu *vcpu)
846{
847 struct vcpu_svm *svm = to_svm(vcpu);
848 u32 exit_int_info = svm->vmcb->control.exit_int_info;
849
850 if (is_external_interrupt(exit_int_info))
851 return exit_int_info & SVM_EVTINJ_VEC_MASK;
852 return -1;
853}
854
826static void load_host_msrs(struct kvm_vcpu *vcpu) 855static void load_host_msrs(struct kvm_vcpu *vcpu)
827{ 856{
828#ifdef CONFIG_X86_64 857#ifdef CONFIG_X86_64
829 wrmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); 858 wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
830#endif 859#endif
831} 860}
832 861
833static void save_host_msrs(struct kvm_vcpu *vcpu) 862static void save_host_msrs(struct kvm_vcpu *vcpu)
834{ 863{
835#ifdef CONFIG_X86_64 864#ifdef CONFIG_X86_64
836 rdmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); 865 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
837#endif 866#endif
838} 867}
839 868
840static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) 869static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
841{ 870{
842 if (svm_data->next_asid > svm_data->max_asid) { 871 if (svm_data->next_asid > svm_data->max_asid) {
843 ++svm_data->asid_generation; 872 ++svm_data->asid_generation;
844 svm_data->next_asid = 1; 873 svm_data->next_asid = 1;
845 vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 874 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
846 } 875 }
847 876
848 vcpu->cpu = svm_data->cpu; 877 svm->vcpu.cpu = svm_data->cpu;
849 vcpu->svm->asid_generation = svm_data->asid_generation; 878 svm->asid_generation = svm_data->asid_generation;
850 vcpu->svm->vmcb->control.asid = svm_data->next_asid++; 879 svm->vmcb->control.asid = svm_data->next_asid++;
851}
852
853static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address)
854{
855 invlpga(address, vcpu->svm->vmcb->control.asid); // is needed?
856} 880}
857 881
858static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 882static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
859{ 883{
860 return vcpu->svm->db_regs[dr]; 884 return to_svm(vcpu)->db_regs[dr];
861} 885}
862 886
863static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, 887static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
864 int *exception) 888 int *exception)
865{ 889{
890 struct vcpu_svm *svm = to_svm(vcpu);
891
866 *exception = 0; 892 *exception = 0;
867 893
868 if (vcpu->svm->vmcb->save.dr7 & DR7_GD_MASK) { 894 if (svm->vmcb->save.dr7 & DR7_GD_MASK) {
869 vcpu->svm->vmcb->save.dr7 &= ~DR7_GD_MASK; 895 svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
870 vcpu->svm->vmcb->save.dr6 |= DR6_BD_MASK; 896 svm->vmcb->save.dr6 |= DR6_BD_MASK;
871 *exception = DB_VECTOR; 897 *exception = DB_VECTOR;
872 return; 898 return;
873 } 899 }
874 900
875 switch (dr) { 901 switch (dr) {
876 case 0 ... 3: 902 case 0 ... 3:
877 vcpu->svm->db_regs[dr] = value; 903 svm->db_regs[dr] = value;
878 return; 904 return;
879 case 4 ... 5: 905 case 4 ... 5:
880 if (vcpu->cr4 & CR4_DE_MASK) { 906 if (vcpu->cr4 & X86_CR4_DE) {
881 *exception = UD_VECTOR; 907 *exception = UD_VECTOR;
882 return; 908 return;
883 } 909 }
@@ -886,7 +912,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
886 *exception = GP_VECTOR; 912 *exception = GP_VECTOR;
887 return; 913 return;
888 } 914 }
889 vcpu->svm->vmcb->save.dr7 = value; 915 svm->vmcb->save.dr7 = value;
890 return; 916 return;
891 } 917 }
892 default: 918 default:
@@ -897,42 +923,44 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
897 } 923 }
898} 924}
899 925
900static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 926static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
901{ 927{
902 u32 exit_int_info = vcpu->svm->vmcb->control.exit_int_info; 928 u32 exit_int_info = svm->vmcb->control.exit_int_info;
929 struct kvm *kvm = svm->vcpu.kvm;
903 u64 fault_address; 930 u64 fault_address;
904 u32 error_code; 931 u32 error_code;
905 enum emulation_result er; 932 enum emulation_result er;
906 int r; 933 int r;
907 934
908 if (is_external_interrupt(exit_int_info)) 935 if (!irqchip_in_kernel(kvm) &&
909 push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); 936 is_external_interrupt(exit_int_info))
937 push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
910 938
911 spin_lock(&vcpu->kvm->lock); 939 mutex_lock(&kvm->lock);
912 940
913 fault_address = vcpu->svm->vmcb->control.exit_info_2; 941 fault_address = svm->vmcb->control.exit_info_2;
914 error_code = vcpu->svm->vmcb->control.exit_info_1; 942 error_code = svm->vmcb->control.exit_info_1;
915 r = kvm_mmu_page_fault(vcpu, fault_address, error_code); 943 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
916 if (r < 0) { 944 if (r < 0) {
917 spin_unlock(&vcpu->kvm->lock); 945 mutex_unlock(&kvm->lock);
918 return r; 946 return r;
919 } 947 }
920 if (!r) { 948 if (!r) {
921 spin_unlock(&vcpu->kvm->lock); 949 mutex_unlock(&kvm->lock);
922 return 1; 950 return 1;
923 } 951 }
924 er = emulate_instruction(vcpu, kvm_run, fault_address, error_code); 952 er = emulate_instruction(&svm->vcpu, kvm_run, fault_address,
925 spin_unlock(&vcpu->kvm->lock); 953 error_code);
954 mutex_unlock(&kvm->lock);
926 955
927 switch (er) { 956 switch (er) {
928 case EMULATE_DONE: 957 case EMULATE_DONE:
929 return 1; 958 return 1;
930 case EMULATE_DO_MMIO: 959 case EMULATE_DO_MMIO:
931 ++vcpu->stat.mmio_exits; 960 ++svm->vcpu.stat.mmio_exits;
932 kvm_run->exit_reason = KVM_EXIT_MMIO;
933 return 0; 961 return 0;
934 case EMULATE_FAIL: 962 case EMULATE_FAIL:
935 vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); 963 kvm_report_emulation_failure(&svm->vcpu, "pagetable");
936 break; 964 break;
937 default: 965 default:
938 BUG(); 966 BUG();
@@ -942,252 +970,142 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
942 return 0; 970 return 0;
943} 971}
944 972
945static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 973static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
946{ 974{
947 vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 975 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
948 if (!(vcpu->cr0 & CR0_TS_MASK)) 976 if (!(svm->vcpu.cr0 & X86_CR0_TS))
949 vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK; 977 svm->vmcb->save.cr0 &= ~X86_CR0_TS;
950 vcpu->fpu_active = 1; 978 svm->vcpu.fpu_active = 1;
951 979
952 return 1; 980 return 1;
953} 981}
954 982
955static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 983static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
956{ 984{
957 /* 985 /*
958 * VMCB is undefined after a SHUTDOWN intercept 986 * VMCB is undefined after a SHUTDOWN intercept
959 * so reinitialize it. 987 * so reinitialize it.
960 */ 988 */
961 clear_page(vcpu->svm->vmcb); 989 clear_page(svm->vmcb);
962 init_vmcb(vcpu->svm->vmcb); 990 init_vmcb(svm->vmcb);
963 991
964 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 992 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
965 return 0; 993 return 0;
966} 994}
967 995
968static int io_get_override(struct kvm_vcpu *vcpu, 996static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
969 struct vmcb_seg **seg,
970 int *addr_override)
971{
972 u8 inst[MAX_INST_SIZE];
973 unsigned ins_length;
974 gva_t rip;
975 int i;
976
977 rip = vcpu->svm->vmcb->save.rip;
978 ins_length = vcpu->svm->next_rip - rip;
979 rip += vcpu->svm->vmcb->save.cs.base;
980
981 if (ins_length > MAX_INST_SIZE)
982 printk(KERN_DEBUG
983 "%s: inst length err, cs base 0x%llx rip 0x%llx "
984 "next rip 0x%llx ins_length %u\n",
985 __FUNCTION__,
986 vcpu->svm->vmcb->save.cs.base,
987 vcpu->svm->vmcb->save.rip,
988 vcpu->svm->vmcb->control.exit_info_2,
989 ins_length);
990
991 if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length)
992 /* #PF */
993 return 0;
994
995 *addr_override = 0;
996 *seg = NULL;
997 for (i = 0; i < ins_length; i++)
998 switch (inst[i]) {
999 case 0xf0:
1000 case 0xf2:
1001 case 0xf3:
1002 case 0x66:
1003 continue;
1004 case 0x67:
1005 *addr_override = 1;
1006 continue;
1007 case 0x2e:
1008 *seg = &vcpu->svm->vmcb->save.cs;
1009 continue;
1010 case 0x36:
1011 *seg = &vcpu->svm->vmcb->save.ss;
1012 continue;
1013 case 0x3e:
1014 *seg = &vcpu->svm->vmcb->save.ds;
1015 continue;
1016 case 0x26:
1017 *seg = &vcpu->svm->vmcb->save.es;
1018 continue;
1019 case 0x64:
1020 *seg = &vcpu->svm->vmcb->save.fs;
1021 continue;
1022 case 0x65:
1023 *seg = &vcpu->svm->vmcb->save.gs;
1024 continue;
1025 default:
1026 return 1;
1027 }
1028 printk(KERN_DEBUG "%s: unexpected\n", __FUNCTION__);
1029 return 0;
1030}
1031
1032static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address)
1033{ 997{
1034 unsigned long addr_mask; 998 u32 io_info = svm->vmcb->control.exit_info_1; //address size bug?
1035 unsigned long *reg; 999 int size, down, in, string, rep;
1036 struct vmcb_seg *seg; 1000 unsigned port;
1037 int addr_override;
1038 struct vmcb_save_area *save_area = &vcpu->svm->vmcb->save;
1039 u16 cs_attrib = save_area->cs.attrib;
1040 unsigned addr_size = get_addr_size(vcpu);
1041
1042 if (!io_get_override(vcpu, &seg, &addr_override))
1043 return 0;
1044
1045 if (addr_override)
1046 addr_size = (addr_size == 2) ? 4: (addr_size >> 1);
1047 1001
1048 if (ins) { 1002 ++svm->vcpu.stat.io_exits;
1049 reg = &vcpu->regs[VCPU_REGS_RDI];
1050 seg = &vcpu->svm->vmcb->save.es;
1051 } else {
1052 reg = &vcpu->regs[VCPU_REGS_RSI];
1053 seg = (seg) ? seg : &vcpu->svm->vmcb->save.ds;
1054 }
1055 1003
1056 addr_mask = ~0ULL >> (64 - (addr_size * 8)); 1004 svm->next_rip = svm->vmcb->control.exit_info_2;
1057 1005
1058 if ((cs_attrib & SVM_SELECTOR_L_MASK) && 1006 string = (io_info & SVM_IOIO_STR_MASK) != 0;
1059 !(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_VM)) {
1060 *address = (*reg & addr_mask);
1061 return addr_mask;
1062 }
1063 1007
1064 if (!(seg->attrib & SVM_SELECTOR_P_SHIFT)) { 1008 if (string) {
1065 svm_inject_gp(vcpu, 0); 1009 if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO)
1066 return 0; 1010 return 0;
1011 return 1;
1067 } 1012 }
1068 1013
1069 *address = (*reg & addr_mask) + seg->base;
1070 return addr_mask;
1071}
1072
1073static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1074{
1075 u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug?
1076 int size, down, in, string, rep;
1077 unsigned port;
1078 unsigned long count;
1079 gva_t address = 0;
1080
1081 ++vcpu->stat.io_exits;
1082
1083 vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2;
1084
1085 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1014 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1086 port = io_info >> 16; 1015 port = io_info >> 16;
1087 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1016 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1088 string = (io_info & SVM_IOIO_STR_MASK) != 0;
1089 rep = (io_info & SVM_IOIO_REP_MASK) != 0; 1017 rep = (io_info & SVM_IOIO_REP_MASK) != 0;
1090 count = 1; 1018 down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
1091 down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
1092 1019
1093 if (string) { 1020 return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
1094 unsigned addr_mask;
1095
1096 addr_mask = io_adress(vcpu, in, &address);
1097 if (!addr_mask) {
1098 printk(KERN_DEBUG "%s: get io address failed\n",
1099 __FUNCTION__);
1100 return 1;
1101 }
1102
1103 if (rep)
1104 count = vcpu->regs[VCPU_REGS_RCX] & addr_mask;
1105 }
1106 return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down,
1107 address, rep, port);
1108} 1021}
1109 1022
1110static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1023static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1111{ 1024{
1112 return 1; 1025 return 1;
1113} 1026}
1114 1027
1115static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1028static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1116{ 1029{
1117 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; 1030 svm->next_rip = svm->vmcb->save.rip + 1;
1118 skip_emulated_instruction(vcpu); 1031 skip_emulated_instruction(&svm->vcpu);
1119 return kvm_emulate_halt(vcpu); 1032 return kvm_emulate_halt(&svm->vcpu);
1120} 1033}
1121 1034
1122static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1035static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1123{ 1036{
1124 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3; 1037 svm->next_rip = svm->vmcb->save.rip + 3;
1125 skip_emulated_instruction(vcpu); 1038 skip_emulated_instruction(&svm->vcpu);
1126 return kvm_hypercall(vcpu, kvm_run); 1039 return kvm_hypercall(&svm->vcpu, kvm_run);
1127} 1040}
1128 1041
1129static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1042static int invalid_op_interception(struct vcpu_svm *svm,
1043 struct kvm_run *kvm_run)
1130{ 1044{
1131 inject_ud(vcpu); 1045 inject_ud(&svm->vcpu);
1132 return 1; 1046 return 1;
1133} 1047}
1134 1048
1135static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1049static int task_switch_interception(struct vcpu_svm *svm,
1050 struct kvm_run *kvm_run)
1136{ 1051{
1137 printk(KERN_DEBUG "%s: task swiche is unsupported\n", __FUNCTION__); 1052 pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__);
1138 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 1053 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
1139 return 0; 1054 return 0;
1140} 1055}
1141 1056
1142static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1057static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1143{ 1058{
1144 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; 1059 svm->next_rip = svm->vmcb->save.rip + 2;
1145 kvm_emulate_cpuid(vcpu); 1060 kvm_emulate_cpuid(&svm->vcpu);
1146 return 1; 1061 return 1;
1147} 1062}
1148 1063
1149static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1064static int emulate_on_interception(struct vcpu_svm *svm,
1065 struct kvm_run *kvm_run)
1150{ 1066{
1151 if (emulate_instruction(vcpu, NULL, 0, 0) != EMULATE_DONE) 1067 if (emulate_instruction(&svm->vcpu, NULL, 0, 0) != EMULATE_DONE)
1152 printk(KERN_ERR "%s: failed\n", __FUNCTION__); 1068 pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__);
1153 return 1; 1069 return 1;
1154} 1070}
1155 1071
1156static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) 1072static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1157{ 1073{
1074 struct vcpu_svm *svm = to_svm(vcpu);
1075
1158 switch (ecx) { 1076 switch (ecx) {
1159 case MSR_IA32_TIME_STAMP_COUNTER: { 1077 case MSR_IA32_TIME_STAMP_COUNTER: {
1160 u64 tsc; 1078 u64 tsc;
1161 1079
1162 rdtscll(tsc); 1080 rdtscll(tsc);
1163 *data = vcpu->svm->vmcb->control.tsc_offset + tsc; 1081 *data = svm->vmcb->control.tsc_offset + tsc;
1164 break; 1082 break;
1165 } 1083 }
1166 case MSR_K6_STAR: 1084 case MSR_K6_STAR:
1167 *data = vcpu->svm->vmcb->save.star; 1085 *data = svm->vmcb->save.star;
1168 break; 1086 break;
1169#ifdef CONFIG_X86_64 1087#ifdef CONFIG_X86_64
1170 case MSR_LSTAR: 1088 case MSR_LSTAR:
1171 *data = vcpu->svm->vmcb->save.lstar; 1089 *data = svm->vmcb->save.lstar;
1172 break; 1090 break;
1173 case MSR_CSTAR: 1091 case MSR_CSTAR:
1174 *data = vcpu->svm->vmcb->save.cstar; 1092 *data = svm->vmcb->save.cstar;
1175 break; 1093 break;
1176 case MSR_KERNEL_GS_BASE: 1094 case MSR_KERNEL_GS_BASE:
1177 *data = vcpu->svm->vmcb->save.kernel_gs_base; 1095 *data = svm->vmcb->save.kernel_gs_base;
1178 break; 1096 break;
1179 case MSR_SYSCALL_MASK: 1097 case MSR_SYSCALL_MASK:
1180 *data = vcpu->svm->vmcb->save.sfmask; 1098 *data = svm->vmcb->save.sfmask;
1181 break; 1099 break;
1182#endif 1100#endif
1183 case MSR_IA32_SYSENTER_CS: 1101 case MSR_IA32_SYSENTER_CS:
1184 *data = vcpu->svm->vmcb->save.sysenter_cs; 1102 *data = svm->vmcb->save.sysenter_cs;
1185 break; 1103 break;
1186 case MSR_IA32_SYSENTER_EIP: 1104 case MSR_IA32_SYSENTER_EIP:
1187 *data = vcpu->svm->vmcb->save.sysenter_eip; 1105 *data = svm->vmcb->save.sysenter_eip;
1188 break; 1106 break;
1189 case MSR_IA32_SYSENTER_ESP: 1107 case MSR_IA32_SYSENTER_ESP:
1190 *data = vcpu->svm->vmcb->save.sysenter_esp; 1108 *data = svm->vmcb->save.sysenter_esp;
1191 break; 1109 break;
1192 default: 1110 default:
1193 return kvm_get_msr_common(vcpu, ecx, data); 1111 return kvm_get_msr_common(vcpu, ecx, data);
@@ -1195,57 +1113,59 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
1195 return 0; 1113 return 0;
1196} 1114}
1197 1115
1198static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1116static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1199{ 1117{
1200 u32 ecx = vcpu->regs[VCPU_REGS_RCX]; 1118 u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX];
1201 u64 data; 1119 u64 data;
1202 1120
1203 if (svm_get_msr(vcpu, ecx, &data)) 1121 if (svm_get_msr(&svm->vcpu, ecx, &data))
1204 svm_inject_gp(vcpu, 0); 1122 svm_inject_gp(&svm->vcpu, 0);
1205 else { 1123 else {
1206 vcpu->svm->vmcb->save.rax = data & 0xffffffff; 1124 svm->vmcb->save.rax = data & 0xffffffff;
1207 vcpu->regs[VCPU_REGS_RDX] = data >> 32; 1125 svm->vcpu.regs[VCPU_REGS_RDX] = data >> 32;
1208 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; 1126 svm->next_rip = svm->vmcb->save.rip + 2;
1209 skip_emulated_instruction(vcpu); 1127 skip_emulated_instruction(&svm->vcpu);
1210 } 1128 }
1211 return 1; 1129 return 1;
1212} 1130}
1213 1131
1214static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) 1132static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1215{ 1133{
1134 struct vcpu_svm *svm = to_svm(vcpu);
1135
1216 switch (ecx) { 1136 switch (ecx) {
1217 case MSR_IA32_TIME_STAMP_COUNTER: { 1137 case MSR_IA32_TIME_STAMP_COUNTER: {
1218 u64 tsc; 1138 u64 tsc;
1219 1139
1220 rdtscll(tsc); 1140 rdtscll(tsc);
1221 vcpu->svm->vmcb->control.tsc_offset = data - tsc; 1141 svm->vmcb->control.tsc_offset = data - tsc;
1222 break; 1142 break;
1223 } 1143 }
1224 case MSR_K6_STAR: 1144 case MSR_K6_STAR:
1225 vcpu->svm->vmcb->save.star = data; 1145 svm->vmcb->save.star = data;
1226 break; 1146 break;
1227#ifdef CONFIG_X86_64 1147#ifdef CONFIG_X86_64
1228 case MSR_LSTAR: 1148 case MSR_LSTAR:
1229 vcpu->svm->vmcb->save.lstar = data; 1149 svm->vmcb->save.lstar = data;
1230 break; 1150 break;
1231 case MSR_CSTAR: 1151 case MSR_CSTAR:
1232 vcpu->svm->vmcb->save.cstar = data; 1152 svm->vmcb->save.cstar = data;
1233 break; 1153 break;
1234 case MSR_KERNEL_GS_BASE: 1154 case MSR_KERNEL_GS_BASE:
1235 vcpu->svm->vmcb->save.kernel_gs_base = data; 1155 svm->vmcb->save.kernel_gs_base = data;
1236 break; 1156 break;
1237 case MSR_SYSCALL_MASK: 1157 case MSR_SYSCALL_MASK:
1238 vcpu->svm->vmcb->save.sfmask = data; 1158 svm->vmcb->save.sfmask = data;
1239 break; 1159 break;
1240#endif 1160#endif
1241 case MSR_IA32_SYSENTER_CS: 1161 case MSR_IA32_SYSENTER_CS:
1242 vcpu->svm->vmcb->save.sysenter_cs = data; 1162 svm->vmcb->save.sysenter_cs = data;
1243 break; 1163 break;
1244 case MSR_IA32_SYSENTER_EIP: 1164 case MSR_IA32_SYSENTER_EIP:
1245 vcpu->svm->vmcb->save.sysenter_eip = data; 1165 svm->vmcb->save.sysenter_eip = data;
1246 break; 1166 break;
1247 case MSR_IA32_SYSENTER_ESP: 1167 case MSR_IA32_SYSENTER_ESP:
1248 vcpu->svm->vmcb->save.sysenter_esp = data; 1168 svm->vmcb->save.sysenter_esp = data;
1249 break; 1169 break;
1250 default: 1170 default:
1251 return kvm_set_msr_common(vcpu, ecx, data); 1171 return kvm_set_msr_common(vcpu, ecx, data);
@@ -1253,37 +1173,39 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
1253 return 0; 1173 return 0;
1254} 1174}
1255 1175
1256static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1176static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1257{ 1177{
1258 u32 ecx = vcpu->regs[VCPU_REGS_RCX]; 1178 u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX];
1259 u64 data = (vcpu->svm->vmcb->save.rax & -1u) 1179 u64 data = (svm->vmcb->save.rax & -1u)
1260 | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32); 1180 | ((u64)(svm->vcpu.regs[VCPU_REGS_RDX] & -1u) << 32);
1261 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; 1181 svm->next_rip = svm->vmcb->save.rip + 2;
1262 if (svm_set_msr(vcpu, ecx, data)) 1182 if (svm_set_msr(&svm->vcpu, ecx, data))
1263 svm_inject_gp(vcpu, 0); 1183 svm_inject_gp(&svm->vcpu, 0);
1264 else 1184 else
1265 skip_emulated_instruction(vcpu); 1185 skip_emulated_instruction(&svm->vcpu);
1266 return 1; 1186 return 1;
1267} 1187}
1268 1188
1269static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1189static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
1270{ 1190{
1271 if (vcpu->svm->vmcb->control.exit_info_1) 1191 if (svm->vmcb->control.exit_info_1)
1272 return wrmsr_interception(vcpu, kvm_run); 1192 return wrmsr_interception(svm, kvm_run);
1273 else 1193 else
1274 return rdmsr_interception(vcpu, kvm_run); 1194 return rdmsr_interception(svm, kvm_run);
1275} 1195}
1276 1196
1277static int interrupt_window_interception(struct kvm_vcpu *vcpu, 1197static int interrupt_window_interception(struct vcpu_svm *svm,
1278 struct kvm_run *kvm_run) 1198 struct kvm_run *kvm_run)
1279{ 1199{
1200 svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
1201 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1280 /* 1202 /*
1281 * If the user space waits to inject interrupts, exit as soon as 1203 * If the user space waits to inject interrupts, exit as soon as
1282 * possible 1204 * possible
1283 */ 1205 */
1284 if (kvm_run->request_interrupt_window && 1206 if (kvm_run->request_interrupt_window &&
1285 !vcpu->irq_summary) { 1207 !svm->vcpu.irq_summary) {
1286 ++vcpu->stat.irq_window_exits; 1208 ++svm->vcpu.stat.irq_window_exits;
1287 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 1209 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
1288 return 0; 1210 return 0;
1289 } 1211 }
@@ -1291,7 +1213,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu,
1291 return 1; 1213 return 1;
1292} 1214}
1293 1215
1294static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, 1216static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
1295 struct kvm_run *kvm_run) = { 1217 struct kvm_run *kvm_run) = {
1296 [SVM_EXIT_READ_CR0] = emulate_on_interception, 1218 [SVM_EXIT_READ_CR0] = emulate_on_interception,
1297 [SVM_EXIT_READ_CR3] = emulate_on_interception, 1219 [SVM_EXIT_READ_CR3] = emulate_on_interception,
@@ -1338,15 +1260,25 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
1338}; 1260};
1339 1261
1340 1262
1341static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1263static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1342{ 1264{
1343 u32 exit_code = vcpu->svm->vmcb->control.exit_code; 1265 struct vcpu_svm *svm = to_svm(vcpu);
1266 u32 exit_code = svm->vmcb->control.exit_code;
1267
1268 kvm_reput_irq(svm);
1344 1269
1345 if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && 1270 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
1271 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
1272 kvm_run->fail_entry.hardware_entry_failure_reason
1273 = svm->vmcb->control.exit_code;
1274 return 0;
1275 }
1276
1277 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
1346 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) 1278 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR)
1347 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " 1279 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
1348 "exit_code 0x%x\n", 1280 "exit_code 0x%x\n",
1349 __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info, 1281 __FUNCTION__, svm->vmcb->control.exit_int_info,
1350 exit_code); 1282 exit_code);
1351 1283
1352 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 1284 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
@@ -1356,7 +1288,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1356 return 0; 1288 return 0;
1357 } 1289 }
1358 1290
1359 return svm_exit_handlers[exit_code](vcpu, kvm_run); 1291 return svm_exit_handlers[exit_code](svm, kvm_run);
1360} 1292}
1361 1293
1362static void reload_tss(struct kvm_vcpu *vcpu) 1294static void reload_tss(struct kvm_vcpu *vcpu)
@@ -1368,93 +1300,126 @@ static void reload_tss(struct kvm_vcpu *vcpu)
1368 load_TR_desc(); 1300 load_TR_desc();
1369} 1301}
1370 1302
1371static void pre_svm_run(struct kvm_vcpu *vcpu) 1303static void pre_svm_run(struct vcpu_svm *svm)
1372{ 1304{
1373 int cpu = raw_smp_processor_id(); 1305 int cpu = raw_smp_processor_id();
1374 1306
1375 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 1307 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
1376 1308
1377 vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 1309 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
1378 if (vcpu->cpu != cpu || 1310 if (svm->vcpu.cpu != cpu ||
1379 vcpu->svm->asid_generation != svm_data->asid_generation) 1311 svm->asid_generation != svm_data->asid_generation)
1380 new_asid(vcpu, svm_data); 1312 new_asid(svm, svm_data);
1381} 1313}
1382 1314
1383 1315
1384static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 1316static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
1385{ 1317{
1386 struct vmcb_control_area *control; 1318 struct vmcb_control_area *control;
1387 1319
1388 control = &vcpu->svm->vmcb->control; 1320 control = &svm->vmcb->control;
1389 control->int_vector = pop_irq(vcpu); 1321 control->int_vector = irq;
1390 control->int_ctl &= ~V_INTR_PRIO_MASK; 1322 control->int_ctl &= ~V_INTR_PRIO_MASK;
1391 control->int_ctl |= V_IRQ_MASK | 1323 control->int_ctl |= V_IRQ_MASK |
1392 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); 1324 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
1393} 1325}
1394 1326
1395static void kvm_reput_irq(struct kvm_vcpu *vcpu) 1327static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
1328{
1329 struct vcpu_svm *svm = to_svm(vcpu);
1330
1331 svm_inject_irq(svm, irq);
1332}
1333
1334static void svm_intr_assist(struct kvm_vcpu *vcpu)
1335{
1336 struct vcpu_svm *svm = to_svm(vcpu);
1337 struct vmcb *vmcb = svm->vmcb;
1338 int intr_vector = -1;
1339
1340 kvm_inject_pending_timer_irqs(vcpu);
1341 if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
1342 ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
1343 intr_vector = vmcb->control.exit_int_info &
1344 SVM_EVTINJ_VEC_MASK;
1345 vmcb->control.exit_int_info = 0;
1346 svm_inject_irq(svm, intr_vector);
1347 return;
1348 }
1349
1350 if (vmcb->control.int_ctl & V_IRQ_MASK)
1351 return;
1352
1353 if (!kvm_cpu_has_interrupt(vcpu))
1354 return;
1355
1356 if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
1357 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
1358 (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
1359 /* unable to deliver irq, set pending irq */
1360 vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
1361 svm_inject_irq(svm, 0x0);
1362 return;
1363 }
1364 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
1365 intr_vector = kvm_cpu_get_interrupt(vcpu);
1366 svm_inject_irq(svm, intr_vector);
1367 kvm_timer_intr_post(vcpu, intr_vector);
1368}
1369
1370static void kvm_reput_irq(struct vcpu_svm *svm)
1396{ 1371{
1397 struct vmcb_control_area *control = &vcpu->svm->vmcb->control; 1372 struct vmcb_control_area *control = &svm->vmcb->control;
1398 1373
1399 if (control->int_ctl & V_IRQ_MASK) { 1374 if ((control->int_ctl & V_IRQ_MASK)
1375 && !irqchip_in_kernel(svm->vcpu.kvm)) {
1400 control->int_ctl &= ~V_IRQ_MASK; 1376 control->int_ctl &= ~V_IRQ_MASK;
1401 push_irq(vcpu, control->int_vector); 1377 push_irq(&svm->vcpu, control->int_vector);
1402 } 1378 }
1403 1379
1404 vcpu->interrupt_window_open = 1380 svm->vcpu.interrupt_window_open =
1405 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); 1381 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
1406} 1382}
1407 1383
1384static void svm_do_inject_vector(struct vcpu_svm *svm)
1385{
1386 struct kvm_vcpu *vcpu = &svm->vcpu;
1387 int word_index = __ffs(vcpu->irq_summary);
1388 int bit_index = __ffs(vcpu->irq_pending[word_index]);
1389 int irq = word_index * BITS_PER_LONG + bit_index;
1390
1391 clear_bit(bit_index, &vcpu->irq_pending[word_index]);
1392 if (!vcpu->irq_pending[word_index])
1393 clear_bit(word_index, &vcpu->irq_summary);
1394 svm_inject_irq(svm, irq);
1395}
1396
1408static void do_interrupt_requests(struct kvm_vcpu *vcpu, 1397static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1409 struct kvm_run *kvm_run) 1398 struct kvm_run *kvm_run)
1410{ 1399{
1411 struct vmcb_control_area *control = &vcpu->svm->vmcb->control; 1400 struct vcpu_svm *svm = to_svm(vcpu);
1401 struct vmcb_control_area *control = &svm->vmcb->control;
1412 1402
1413 vcpu->interrupt_window_open = 1403 svm->vcpu.interrupt_window_open =
1414 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && 1404 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
1415 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); 1405 (svm->vmcb->save.rflags & X86_EFLAGS_IF));
1416 1406
1417 if (vcpu->interrupt_window_open && vcpu->irq_summary) 1407 if (svm->vcpu.interrupt_window_open && svm->vcpu.irq_summary)
1418 /* 1408 /*
1419 * If interrupts enabled, and not blocked by sti or mov ss. Good. 1409 * If interrupts enabled, and not blocked by sti or mov ss. Good.
1420 */ 1410 */
1421 kvm_do_inject_irq(vcpu); 1411 svm_do_inject_vector(svm);
1422 1412
1423 /* 1413 /*
1424 * Interrupts blocked. Wait for unblock. 1414 * Interrupts blocked. Wait for unblock.
1425 */ 1415 */
1426 if (!vcpu->interrupt_window_open && 1416 if (!svm->vcpu.interrupt_window_open &&
1427 (vcpu->irq_summary || kvm_run->request_interrupt_window)) { 1417 (svm->vcpu.irq_summary || kvm_run->request_interrupt_window)) {
1428 control->intercept |= 1ULL << INTERCEPT_VINTR; 1418 control->intercept |= 1ULL << INTERCEPT_VINTR;
1429 } else 1419 } else
1430 control->intercept &= ~(1ULL << INTERCEPT_VINTR); 1420 control->intercept &= ~(1ULL << INTERCEPT_VINTR);
1431} 1421}
1432 1422
1433static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1434 struct kvm_run *kvm_run)
1435{
1436 kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
1437 vcpu->irq_summary == 0);
1438 kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
1439 kvm_run->cr8 = vcpu->cr8;
1440 kvm_run->apic_base = vcpu->apic_base;
1441}
1442
1443/*
1444 * Check if userspace requested an interrupt window, and that the
1445 * interrupt window is open.
1446 *
1447 * No need to exit to userspace if we already have an interrupt queued.
1448 */
1449static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1450 struct kvm_run *kvm_run)
1451{
1452 return (!vcpu->irq_summary &&
1453 kvm_run->request_interrupt_window &&
1454 vcpu->interrupt_window_open &&
1455 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
1456}
1457
1458static void save_db_regs(unsigned long *db_regs) 1423static void save_db_regs(unsigned long *db_regs)
1459{ 1424{
1460 asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0])); 1425 asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
@@ -1476,49 +1441,37 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu)
1476 force_new_asid(vcpu); 1441 force_new_asid(vcpu);
1477} 1442}
1478 1443
1479static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1444static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
1445{
1446}
1447
1448static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1480{ 1449{
1450 struct vcpu_svm *svm = to_svm(vcpu);
1481 u16 fs_selector; 1451 u16 fs_selector;
1482 u16 gs_selector; 1452 u16 gs_selector;
1483 u16 ldt_selector; 1453 u16 ldt_selector;
1484 int r;
1485
1486again:
1487 r = kvm_mmu_reload(vcpu);
1488 if (unlikely(r))
1489 return r;
1490
1491 if (!vcpu->mmio_read_completed)
1492 do_interrupt_requests(vcpu, kvm_run);
1493 1454
1494 clgi(); 1455 pre_svm_run(svm);
1495
1496 vcpu->guest_mode = 1;
1497 if (vcpu->requests)
1498 if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
1499 svm_flush_tlb(vcpu);
1500
1501 pre_svm_run(vcpu);
1502 1456
1503 save_host_msrs(vcpu); 1457 save_host_msrs(vcpu);
1504 fs_selector = read_fs(); 1458 fs_selector = read_fs();
1505 gs_selector = read_gs(); 1459 gs_selector = read_gs();
1506 ldt_selector = read_ldt(); 1460 ldt_selector = read_ldt();
1507 vcpu->svm->host_cr2 = kvm_read_cr2(); 1461 svm->host_cr2 = kvm_read_cr2();
1508 vcpu->svm->host_dr6 = read_dr6(); 1462 svm->host_dr6 = read_dr6();
1509 vcpu->svm->host_dr7 = read_dr7(); 1463 svm->host_dr7 = read_dr7();
1510 vcpu->svm->vmcb->save.cr2 = vcpu->cr2; 1464 svm->vmcb->save.cr2 = vcpu->cr2;
1511 1465
1512 if (vcpu->svm->vmcb->save.dr7 & 0xff) { 1466 if (svm->vmcb->save.dr7 & 0xff) {
1513 write_dr7(0); 1467 write_dr7(0);
1514 save_db_regs(vcpu->svm->host_db_regs); 1468 save_db_regs(svm->host_db_regs);
1515 load_db_regs(vcpu->svm->db_regs); 1469 load_db_regs(svm->db_regs);
1516 } 1470 }
1517 1471
1518 if (vcpu->fpu_active) { 1472 clgi();
1519 fx_save(vcpu->host_fx_image); 1473
1520 fx_restore(vcpu->guest_fx_image); 1474 local_irq_enable();
1521 }
1522 1475
1523 asm volatile ( 1476 asm volatile (
1524#ifdef CONFIG_X86_64 1477#ifdef CONFIG_X86_64
@@ -1532,34 +1485,33 @@ again:
1532#endif 1485#endif
1533 1486
1534#ifdef CONFIG_X86_64 1487#ifdef CONFIG_X86_64
1535 "mov %c[rbx](%[vcpu]), %%rbx \n\t" 1488 "mov %c[rbx](%[svm]), %%rbx \n\t"
1536 "mov %c[rcx](%[vcpu]), %%rcx \n\t" 1489 "mov %c[rcx](%[svm]), %%rcx \n\t"
1537 "mov %c[rdx](%[vcpu]), %%rdx \n\t" 1490 "mov %c[rdx](%[svm]), %%rdx \n\t"
1538 "mov %c[rsi](%[vcpu]), %%rsi \n\t" 1491 "mov %c[rsi](%[svm]), %%rsi \n\t"
1539 "mov %c[rdi](%[vcpu]), %%rdi \n\t" 1492 "mov %c[rdi](%[svm]), %%rdi \n\t"
1540 "mov %c[rbp](%[vcpu]), %%rbp \n\t" 1493 "mov %c[rbp](%[svm]), %%rbp \n\t"
1541 "mov %c[r8](%[vcpu]), %%r8 \n\t" 1494 "mov %c[r8](%[svm]), %%r8 \n\t"
1542 "mov %c[r9](%[vcpu]), %%r9 \n\t" 1495 "mov %c[r9](%[svm]), %%r9 \n\t"
1543 "mov %c[r10](%[vcpu]), %%r10 \n\t" 1496 "mov %c[r10](%[svm]), %%r10 \n\t"
1544 "mov %c[r11](%[vcpu]), %%r11 \n\t" 1497 "mov %c[r11](%[svm]), %%r11 \n\t"
1545 "mov %c[r12](%[vcpu]), %%r12 \n\t" 1498 "mov %c[r12](%[svm]), %%r12 \n\t"
1546 "mov %c[r13](%[vcpu]), %%r13 \n\t" 1499 "mov %c[r13](%[svm]), %%r13 \n\t"
1547 "mov %c[r14](%[vcpu]), %%r14 \n\t" 1500 "mov %c[r14](%[svm]), %%r14 \n\t"
1548 "mov %c[r15](%[vcpu]), %%r15 \n\t" 1501 "mov %c[r15](%[svm]), %%r15 \n\t"
1549#else 1502#else
1550 "mov %c[rbx](%[vcpu]), %%ebx \n\t" 1503 "mov %c[rbx](%[svm]), %%ebx \n\t"
1551 "mov %c[rcx](%[vcpu]), %%ecx \n\t" 1504 "mov %c[rcx](%[svm]), %%ecx \n\t"
1552 "mov %c[rdx](%[vcpu]), %%edx \n\t" 1505 "mov %c[rdx](%[svm]), %%edx \n\t"
1553 "mov %c[rsi](%[vcpu]), %%esi \n\t" 1506 "mov %c[rsi](%[svm]), %%esi \n\t"
1554 "mov %c[rdi](%[vcpu]), %%edi \n\t" 1507 "mov %c[rdi](%[svm]), %%edi \n\t"
1555 "mov %c[rbp](%[vcpu]), %%ebp \n\t" 1508 "mov %c[rbp](%[svm]), %%ebp \n\t"
1556#endif 1509#endif
1557 1510
1558#ifdef CONFIG_X86_64 1511#ifdef CONFIG_X86_64
1559 /* Enter guest mode */ 1512 /* Enter guest mode */
1560 "push %%rax \n\t" 1513 "push %%rax \n\t"
1561 "mov %c[svm](%[vcpu]), %%rax \n\t" 1514 "mov %c[vmcb](%[svm]), %%rax \n\t"
1562 "mov %c[vmcb](%%rax), %%rax \n\t"
1563 SVM_VMLOAD "\n\t" 1515 SVM_VMLOAD "\n\t"
1564 SVM_VMRUN "\n\t" 1516 SVM_VMRUN "\n\t"
1565 SVM_VMSAVE "\n\t" 1517 SVM_VMSAVE "\n\t"
@@ -1567,8 +1519,7 @@ again:
1567#else 1519#else
1568 /* Enter guest mode */ 1520 /* Enter guest mode */
1569 "push %%eax \n\t" 1521 "push %%eax \n\t"
1570 "mov %c[svm](%[vcpu]), %%eax \n\t" 1522 "mov %c[vmcb](%[svm]), %%eax \n\t"
1571 "mov %c[vmcb](%%eax), %%eax \n\t"
1572 SVM_VMLOAD "\n\t" 1523 SVM_VMLOAD "\n\t"
1573 SVM_VMRUN "\n\t" 1524 SVM_VMRUN "\n\t"
1574 SVM_VMSAVE "\n\t" 1525 SVM_VMSAVE "\n\t"
@@ -1577,73 +1528,69 @@ again:
1577 1528
1578 /* Save guest registers, load host registers */ 1529 /* Save guest registers, load host registers */
1579#ifdef CONFIG_X86_64 1530#ifdef CONFIG_X86_64
1580 "mov %%rbx, %c[rbx](%[vcpu]) \n\t" 1531 "mov %%rbx, %c[rbx](%[svm]) \n\t"
1581 "mov %%rcx, %c[rcx](%[vcpu]) \n\t" 1532 "mov %%rcx, %c[rcx](%[svm]) \n\t"
1582 "mov %%rdx, %c[rdx](%[vcpu]) \n\t" 1533 "mov %%rdx, %c[rdx](%[svm]) \n\t"
1583 "mov %%rsi, %c[rsi](%[vcpu]) \n\t" 1534 "mov %%rsi, %c[rsi](%[svm]) \n\t"
1584 "mov %%rdi, %c[rdi](%[vcpu]) \n\t" 1535 "mov %%rdi, %c[rdi](%[svm]) \n\t"
1585 "mov %%rbp, %c[rbp](%[vcpu]) \n\t" 1536 "mov %%rbp, %c[rbp](%[svm]) \n\t"
1586 "mov %%r8, %c[r8](%[vcpu]) \n\t" 1537 "mov %%r8, %c[r8](%[svm]) \n\t"
1587 "mov %%r9, %c[r9](%[vcpu]) \n\t" 1538 "mov %%r9, %c[r9](%[svm]) \n\t"
1588 "mov %%r10, %c[r10](%[vcpu]) \n\t" 1539 "mov %%r10, %c[r10](%[svm]) \n\t"
1589 "mov %%r11, %c[r11](%[vcpu]) \n\t" 1540 "mov %%r11, %c[r11](%[svm]) \n\t"
1590 "mov %%r12, %c[r12](%[vcpu]) \n\t" 1541 "mov %%r12, %c[r12](%[svm]) \n\t"
1591 "mov %%r13, %c[r13](%[vcpu]) \n\t" 1542 "mov %%r13, %c[r13](%[svm]) \n\t"
1592 "mov %%r14, %c[r14](%[vcpu]) \n\t" 1543 "mov %%r14, %c[r14](%[svm]) \n\t"
1593 "mov %%r15, %c[r15](%[vcpu]) \n\t" 1544 "mov %%r15, %c[r15](%[svm]) \n\t"
1594 1545
1595 "pop %%r15; pop %%r14; pop %%r13; pop %%r12;" 1546 "pop %%r15; pop %%r14; pop %%r13; pop %%r12;"
1596 "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" 1547 "pop %%r11; pop %%r10; pop %%r9; pop %%r8;"
1597 "pop %%rbp; pop %%rdi; pop %%rsi;" 1548 "pop %%rbp; pop %%rdi; pop %%rsi;"
1598 "pop %%rdx; pop %%rcx; pop %%rbx; \n\t" 1549 "pop %%rdx; pop %%rcx; pop %%rbx; \n\t"
1599#else 1550#else
1600 "mov %%ebx, %c[rbx](%[vcpu]) \n\t" 1551 "mov %%ebx, %c[rbx](%[svm]) \n\t"
1601 "mov %%ecx, %c[rcx](%[vcpu]) \n\t" 1552 "mov %%ecx, %c[rcx](%[svm]) \n\t"
1602 "mov %%edx, %c[rdx](%[vcpu]) \n\t" 1553 "mov %%edx, %c[rdx](%[svm]) \n\t"
1603 "mov %%esi, %c[rsi](%[vcpu]) \n\t" 1554 "mov %%esi, %c[rsi](%[svm]) \n\t"
1604 "mov %%edi, %c[rdi](%[vcpu]) \n\t" 1555 "mov %%edi, %c[rdi](%[svm]) \n\t"
1605 "mov %%ebp, %c[rbp](%[vcpu]) \n\t" 1556 "mov %%ebp, %c[rbp](%[svm]) \n\t"
1606 1557
1607 "pop %%ebp; pop %%edi; pop %%esi;" 1558 "pop %%ebp; pop %%edi; pop %%esi;"
1608 "pop %%edx; pop %%ecx; pop %%ebx; \n\t" 1559 "pop %%edx; pop %%ecx; pop %%ebx; \n\t"
1609#endif 1560#endif
1610 : 1561 :
1611 : [vcpu]"a"(vcpu), 1562 : [svm]"a"(svm),
1612 [svm]"i"(offsetof(struct kvm_vcpu, svm)),
1613 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), 1563 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
1614 [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), 1564 [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])),
1615 [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])), 1565 [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])),
1616 [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])), 1566 [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])),
1617 [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])), 1567 [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])),
1618 [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])), 1568 [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])),
1619 [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP])) 1569 [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP]))
1620#ifdef CONFIG_X86_64 1570#ifdef CONFIG_X86_64
1621 ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])), 1571 ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])),
1622 [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])), 1572 [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])),
1623 [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])), 1573 [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])),
1624 [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])), 1574 [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])),
1625 [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])), 1575 [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])),
1626 [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])), 1576 [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])),
1627 [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])), 1577 [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])),
1628 [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])) 1578 [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15]))
1629#endif 1579#endif
1630 : "cc", "memory" ); 1580 : "cc", "memory" );
1631 1581
1632 vcpu->guest_mode = 0; 1582 local_irq_disable();
1633 1583
1634 if (vcpu->fpu_active) { 1584 stgi();
1635 fx_save(vcpu->guest_fx_image);
1636 fx_restore(vcpu->host_fx_image);
1637 }
1638 1585
1639 if ((vcpu->svm->vmcb->save.dr7 & 0xff)) 1586 if ((svm->vmcb->save.dr7 & 0xff))
1640 load_db_regs(vcpu->svm->host_db_regs); 1587 load_db_regs(svm->host_db_regs);
1641 1588
1642 vcpu->cr2 = vcpu->svm->vmcb->save.cr2; 1589 vcpu->cr2 = svm->vmcb->save.cr2;
1643 1590
1644 write_dr6(vcpu->svm->host_dr6); 1591 write_dr6(svm->host_dr6);
1645 write_dr7(vcpu->svm->host_dr7); 1592 write_dr7(svm->host_dr7);
1646 kvm_write_cr2(vcpu->svm->host_cr2); 1593 kvm_write_cr2(svm->host_cr2);
1647 1594
1648 load_fs(fs_selector); 1595 load_fs(fs_selector);
1649 load_gs(gs_selector); 1596 load_gs(gs_selector);
@@ -1652,57 +1599,19 @@ again:
1652 1599
1653 reload_tss(vcpu); 1600 reload_tss(vcpu);
1654 1601
1655 /* 1602 svm->next_rip = 0;
1656 * Profile KVM exit RIPs:
1657 */
1658 if (unlikely(prof_on == KVM_PROFILING))
1659 profile_hit(KVM_PROFILING,
1660 (void *)(unsigned long)vcpu->svm->vmcb->save.rip);
1661
1662 stgi();
1663
1664 kvm_reput_irq(vcpu);
1665
1666 vcpu->svm->next_rip = 0;
1667
1668 if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
1669 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
1670 kvm_run->fail_entry.hardware_entry_failure_reason
1671 = vcpu->svm->vmcb->control.exit_code;
1672 post_kvm_run_save(vcpu, kvm_run);
1673 return 0;
1674 }
1675
1676 r = handle_exit(vcpu, kvm_run);
1677 if (r > 0) {
1678 if (signal_pending(current)) {
1679 ++vcpu->stat.signal_exits;
1680 post_kvm_run_save(vcpu, kvm_run);
1681 kvm_run->exit_reason = KVM_EXIT_INTR;
1682 return -EINTR;
1683 }
1684
1685 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
1686 ++vcpu->stat.request_irq_exits;
1687 post_kvm_run_save(vcpu, kvm_run);
1688 kvm_run->exit_reason = KVM_EXIT_INTR;
1689 return -EINTR;
1690 }
1691 kvm_resched(vcpu);
1692 goto again;
1693 }
1694 post_kvm_run_save(vcpu, kvm_run);
1695 return r;
1696} 1603}
1697 1604
1698static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) 1605static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
1699{ 1606{
1700 vcpu->svm->vmcb->save.cr3 = root; 1607 struct vcpu_svm *svm = to_svm(vcpu);
1608
1609 svm->vmcb->save.cr3 = root;
1701 force_new_asid(vcpu); 1610 force_new_asid(vcpu);
1702 1611
1703 if (vcpu->fpu_active) { 1612 if (vcpu->fpu_active) {
1704 vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); 1613 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
1705 vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK; 1614 svm->vmcb->save.cr0 |= X86_CR0_TS;
1706 vcpu->fpu_active = 0; 1615 vcpu->fpu_active = 0;
1707 } 1616 }
1708} 1617}
@@ -1711,26 +1620,27 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
1711 unsigned long addr, 1620 unsigned long addr,
1712 uint32_t err_code) 1621 uint32_t err_code)
1713{ 1622{
1714 uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; 1623 struct vcpu_svm *svm = to_svm(vcpu);
1624 uint32_t exit_int_info = svm->vmcb->control.exit_int_info;
1715 1625
1716 ++vcpu->stat.pf_guest; 1626 ++vcpu->stat.pf_guest;
1717 1627
1718 if (is_page_fault(exit_int_info)) { 1628 if (is_page_fault(exit_int_info)) {
1719 1629
1720 vcpu->svm->vmcb->control.event_inj_err = 0; 1630 svm->vmcb->control.event_inj_err = 0;
1721 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 1631 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
1722 SVM_EVTINJ_VALID_ERR | 1632 SVM_EVTINJ_VALID_ERR |
1723 SVM_EVTINJ_TYPE_EXEPT | 1633 SVM_EVTINJ_TYPE_EXEPT |
1724 DF_VECTOR; 1634 DF_VECTOR;
1725 return; 1635 return;
1726 } 1636 }
1727 vcpu->cr2 = addr; 1637 vcpu->cr2 = addr;
1728 vcpu->svm->vmcb->save.cr2 = addr; 1638 svm->vmcb->save.cr2 = addr;
1729 vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | 1639 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID |
1730 SVM_EVTINJ_VALID_ERR | 1640 SVM_EVTINJ_VALID_ERR |
1731 SVM_EVTINJ_TYPE_EXEPT | 1641 SVM_EVTINJ_TYPE_EXEPT |
1732 PF_VECTOR; 1642 PF_VECTOR;
1733 vcpu->svm->vmcb->control.event_inj_err = err_code; 1643 svm->vmcb->control.event_inj_err = err_code;
1734} 1644}
1735 1645
1736 1646
@@ -1757,17 +1667,25 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
1757 hypercall[3] = 0xc3; 1667 hypercall[3] = 0xc3;
1758} 1668}
1759 1669
1760static struct kvm_arch_ops svm_arch_ops = { 1670static void svm_check_processor_compat(void *rtn)
1671{
1672 *(int *)rtn = 0;
1673}
1674
1675static struct kvm_x86_ops svm_x86_ops = {
1761 .cpu_has_kvm_support = has_svm, 1676 .cpu_has_kvm_support = has_svm,
1762 .disabled_by_bios = is_disabled, 1677 .disabled_by_bios = is_disabled,
1763 .hardware_setup = svm_hardware_setup, 1678 .hardware_setup = svm_hardware_setup,
1764 .hardware_unsetup = svm_hardware_unsetup, 1679 .hardware_unsetup = svm_hardware_unsetup,
1680 .check_processor_compatibility = svm_check_processor_compat,
1765 .hardware_enable = svm_hardware_enable, 1681 .hardware_enable = svm_hardware_enable,
1766 .hardware_disable = svm_hardware_disable, 1682 .hardware_disable = svm_hardware_disable,
1767 1683
1768 .vcpu_create = svm_create_vcpu, 1684 .vcpu_create = svm_create_vcpu,
1769 .vcpu_free = svm_free_vcpu, 1685 .vcpu_free = svm_free_vcpu,
1686 .vcpu_reset = svm_vcpu_reset,
1770 1687
1688 .prepare_guest_switch = svm_prepare_guest_switch,
1771 .vcpu_load = svm_vcpu_load, 1689 .vcpu_load = svm_vcpu_load,
1772 .vcpu_put = svm_vcpu_put, 1690 .vcpu_put = svm_vcpu_put,
1773 .vcpu_decache = svm_vcpu_decache, 1691 .vcpu_decache = svm_vcpu_decache,
@@ -1778,7 +1696,7 @@ static struct kvm_arch_ops svm_arch_ops = {
1778 .get_segment_base = svm_get_segment_base, 1696 .get_segment_base = svm_get_segment_base,
1779 .get_segment = svm_get_segment, 1697 .get_segment = svm_get_segment,
1780 .set_segment = svm_set_segment, 1698 .set_segment = svm_set_segment,
1781 .get_cs_db_l_bits = svm_get_cs_db_l_bits, 1699 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
1782 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 1700 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
1783 .set_cr0 = svm_set_cr0, 1701 .set_cr0 = svm_set_cr0,
1784 .set_cr3 = svm_set_cr3, 1702 .set_cr3 = svm_set_cr3,
@@ -1795,26 +1713,30 @@ static struct kvm_arch_ops svm_arch_ops = {
1795 .get_rflags = svm_get_rflags, 1713 .get_rflags = svm_get_rflags,
1796 .set_rflags = svm_set_rflags, 1714 .set_rflags = svm_set_rflags,
1797 1715
1798 .invlpg = svm_invlpg,
1799 .tlb_flush = svm_flush_tlb, 1716 .tlb_flush = svm_flush_tlb,
1800 .inject_page_fault = svm_inject_page_fault, 1717 .inject_page_fault = svm_inject_page_fault,
1801 1718
1802 .inject_gp = svm_inject_gp, 1719 .inject_gp = svm_inject_gp,
1803 1720
1804 .run = svm_vcpu_run, 1721 .run = svm_vcpu_run,
1722 .handle_exit = handle_exit,
1805 .skip_emulated_instruction = skip_emulated_instruction, 1723 .skip_emulated_instruction = skip_emulated_instruction,
1806 .vcpu_setup = svm_vcpu_setup,
1807 .patch_hypercall = svm_patch_hypercall, 1724 .patch_hypercall = svm_patch_hypercall,
1725 .get_irq = svm_get_irq,
1726 .set_irq = svm_set_irq,
1727 .inject_pending_irq = svm_intr_assist,
1728 .inject_pending_vectors = do_interrupt_requests,
1808}; 1729};
1809 1730
1810static int __init svm_init(void) 1731static int __init svm_init(void)
1811{ 1732{
1812 return kvm_init_arch(&svm_arch_ops, THIS_MODULE); 1733 return kvm_init_x86(&svm_x86_ops, sizeof(struct vcpu_svm),
1734 THIS_MODULE);
1813} 1735}
1814 1736
1815static void __exit svm_exit(void) 1737static void __exit svm_exit(void)
1816{ 1738{
1817 kvm_exit_arch(); 1739 kvm_exit_x86();
1818} 1740}
1819 1741
1820module_init(svm_init) 1742module_init(svm_init)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 80628f69916d..4f115a8e45ef 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -16,6 +16,8 @@
16 */ 16 */
17 17
18#include "kvm.h" 18#include "kvm.h"
19#include "x86_emulate.h"
20#include "irq.h"
19#include "vmx.h" 21#include "vmx.h"
20#include "segment_descriptor.h" 22#include "segment_descriptor.h"
21 23
@@ -23,7 +25,6 @@
23#include <linux/kernel.h> 25#include <linux/kernel.h>
24#include <linux/mm.h> 26#include <linux/mm.h>
25#include <linux/highmem.h> 27#include <linux/highmem.h>
26#include <linux/profile.h>
27#include <linux/sched.h> 28#include <linux/sched.h>
28 29
29#include <asm/io.h> 30#include <asm/io.h>
@@ -32,6 +33,39 @@
32MODULE_AUTHOR("Qumranet"); 33MODULE_AUTHOR("Qumranet");
33MODULE_LICENSE("GPL"); 34MODULE_LICENSE("GPL");
34 35
36struct vmcs {
37 u32 revision_id;
38 u32 abort;
39 char data[0];
40};
41
42struct vcpu_vmx {
43 struct kvm_vcpu vcpu;
44 int launched;
45 u8 fail;
46 struct kvm_msr_entry *guest_msrs;
47 struct kvm_msr_entry *host_msrs;
48 int nmsrs;
49 int save_nmsrs;
50 int msr_offset_efer;
51#ifdef CONFIG_X86_64
52 int msr_offset_kernel_gs_base;
53#endif
54 struct vmcs *vmcs;
55 struct {
56 int loaded;
57 u16 fs_sel, gs_sel, ldt_sel;
58 int gs_ldt_reload_needed;
59 int fs_reload_needed;
60 }host_state;
61
62};
63
64static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
65{
66 return container_of(vcpu, struct vcpu_vmx, vcpu);
67}
68
35static int init_rmode_tss(struct kvm *kvm); 69static int init_rmode_tss(struct kvm *kvm);
36 70
37static DEFINE_PER_CPU(struct vmcs *, vmxarea); 71static DEFINE_PER_CPU(struct vmcs *, vmxarea);
@@ -40,18 +74,17 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
40static struct page *vmx_io_bitmap_a; 74static struct page *vmx_io_bitmap_a;
41static struct page *vmx_io_bitmap_b; 75static struct page *vmx_io_bitmap_b;
42 76
43#ifdef CONFIG_X86_64
44#define HOST_IS_64 1
45#else
46#define HOST_IS_64 0
47#endif
48#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) 77#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE)
49 78
50static struct vmcs_descriptor { 79static struct vmcs_config {
51 int size; 80 int size;
52 int order; 81 int order;
53 u32 revision_id; 82 u32 revision_id;
54} vmcs_descriptor; 83 u32 pin_based_exec_ctrl;
84 u32 cpu_based_exec_ctrl;
85 u32 vmexit_ctrl;
86 u32 vmentry_ctrl;
87} vmcs_config;
55 88
56#define VMX_SEGMENT_FIELD(seg) \ 89#define VMX_SEGMENT_FIELD(seg) \
57 [VCPU_SREG_##seg] = { \ 90 [VCPU_SREG_##seg] = { \
@@ -89,16 +122,32 @@ static const u32 vmx_msr_index[] = {
89}; 122};
90#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 123#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
91 124
92static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr) 125static void load_msrs(struct kvm_msr_entry *e, int n)
126{
127 int i;
128
129 for (i = 0; i < n; ++i)
130 wrmsrl(e[i].index, e[i].data);
131}
132
133static void save_msrs(struct kvm_msr_entry *e, int n)
134{
135 int i;
136
137 for (i = 0; i < n; ++i)
138 rdmsrl(e[i].index, e[i].data);
139}
140
141static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr)
93{ 142{
94 return (u64)msr.data & EFER_SAVE_RESTORE_BITS; 143 return (u64)msr.data & EFER_SAVE_RESTORE_BITS;
95} 144}
96 145
97static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) 146static inline int msr_efer_need_save_restore(struct vcpu_vmx *vmx)
98{ 147{
99 int efer_offset = vcpu->msr_offset_efer; 148 int efer_offset = vmx->msr_offset_efer;
100 return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != 149 return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) !=
101 msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); 150 msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);
102} 151}
103 152
104static inline int is_page_fault(u32 intr_info) 153static inline int is_page_fault(u32 intr_info)
@@ -121,23 +170,33 @@ static inline int is_external_interrupt(u32 intr_info)
121 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 170 == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
122} 171}
123 172
124static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr) 173static inline int cpu_has_vmx_tpr_shadow(void)
174{
175 return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);
176}
177
178static inline int vm_need_tpr_shadow(struct kvm *kvm)
179{
180 return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));
181}
182
183static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
125{ 184{
126 int i; 185 int i;
127 186
128 for (i = 0; i < vcpu->nmsrs; ++i) 187 for (i = 0; i < vmx->nmsrs; ++i)
129 if (vcpu->guest_msrs[i].index == msr) 188 if (vmx->guest_msrs[i].index == msr)
130 return i; 189 return i;
131 return -1; 190 return -1;
132} 191}
133 192
134static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) 193static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
135{ 194{
136 int i; 195 int i;
137 196
138 i = __find_msr_index(vcpu, msr); 197 i = __find_msr_index(vmx, msr);
139 if (i >= 0) 198 if (i >= 0)
140 return &vcpu->guest_msrs[i]; 199 return &vmx->guest_msrs[i];
141 return NULL; 200 return NULL;
142} 201}
143 202
@@ -156,23 +215,24 @@ static void vmcs_clear(struct vmcs *vmcs)
156 215
157static void __vcpu_clear(void *arg) 216static void __vcpu_clear(void *arg)
158{ 217{
159 struct kvm_vcpu *vcpu = arg; 218 struct vcpu_vmx *vmx = arg;
160 int cpu = raw_smp_processor_id(); 219 int cpu = raw_smp_processor_id();
161 220
162 if (vcpu->cpu == cpu) 221 if (vmx->vcpu.cpu == cpu)
163 vmcs_clear(vcpu->vmcs); 222 vmcs_clear(vmx->vmcs);
164 if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) 223 if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
165 per_cpu(current_vmcs, cpu) = NULL; 224 per_cpu(current_vmcs, cpu) = NULL;
166 rdtscll(vcpu->host_tsc); 225 rdtscll(vmx->vcpu.host_tsc);
167} 226}
168 227
169static void vcpu_clear(struct kvm_vcpu *vcpu) 228static void vcpu_clear(struct vcpu_vmx *vmx)
170{ 229{
171 if (vcpu->cpu != raw_smp_processor_id() && vcpu->cpu != -1) 230 if (vmx->vcpu.cpu != raw_smp_processor_id() && vmx->vcpu.cpu != -1)
172 smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, 0, 1); 231 smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear,
232 vmx, 0, 1);
173 else 233 else
174 __vcpu_clear(vcpu); 234 __vcpu_clear(vmx);
175 vcpu->launched = 0; 235 vmx->launched = 0;
176} 236}
177 237
178static unsigned long vmcs_readl(unsigned long field) 238static unsigned long vmcs_readl(unsigned long field)
@@ -282,121 +342,122 @@ static void reload_tss(void)
282#endif 342#endif
283} 343}
284 344
285static void load_transition_efer(struct kvm_vcpu *vcpu) 345static void load_transition_efer(struct vcpu_vmx *vmx)
286{ 346{
287 u64 trans_efer; 347 u64 trans_efer;
288 int efer_offset = vcpu->msr_offset_efer; 348 int efer_offset = vmx->msr_offset_efer;
289 349
290 trans_efer = vcpu->host_msrs[efer_offset].data; 350 trans_efer = vmx->host_msrs[efer_offset].data;
291 trans_efer &= ~EFER_SAVE_RESTORE_BITS; 351 trans_efer &= ~EFER_SAVE_RESTORE_BITS;
292 trans_efer |= msr_efer_save_restore_bits( 352 trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);
293 vcpu->guest_msrs[efer_offset]);
294 wrmsrl(MSR_EFER, trans_efer); 353 wrmsrl(MSR_EFER, trans_efer);
295 vcpu->stat.efer_reload++; 354 vmx->vcpu.stat.efer_reload++;
296} 355}
297 356
298static void vmx_save_host_state(struct kvm_vcpu *vcpu) 357static void vmx_save_host_state(struct kvm_vcpu *vcpu)
299{ 358{
300 struct vmx_host_state *hs = &vcpu->vmx_host_state; 359 struct vcpu_vmx *vmx = to_vmx(vcpu);
301 360
302 if (hs->loaded) 361 if (vmx->host_state.loaded)
303 return; 362 return;
304 363
305 hs->loaded = 1; 364 vmx->host_state.loaded = 1;
306 /* 365 /*
307 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not 366 * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
308 * allow segment selectors with cpl > 0 or ti == 1. 367 * allow segment selectors with cpl > 0 or ti == 1.
309 */ 368 */
310 hs->ldt_sel = read_ldt(); 369 vmx->host_state.ldt_sel = read_ldt();
311 hs->fs_gs_ldt_reload_needed = hs->ldt_sel; 370 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
312 hs->fs_sel = read_fs(); 371 vmx->host_state.fs_sel = read_fs();
313 if (!(hs->fs_sel & 7)) 372 if (!(vmx->host_state.fs_sel & 7)) {
314 vmcs_write16(HOST_FS_SELECTOR, hs->fs_sel); 373 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
315 else { 374 vmx->host_state.fs_reload_needed = 0;
375 } else {
316 vmcs_write16(HOST_FS_SELECTOR, 0); 376 vmcs_write16(HOST_FS_SELECTOR, 0);
317 hs->fs_gs_ldt_reload_needed = 1; 377 vmx->host_state.fs_reload_needed = 1;
318 } 378 }
319 hs->gs_sel = read_gs(); 379 vmx->host_state.gs_sel = read_gs();
320 if (!(hs->gs_sel & 7)) 380 if (!(vmx->host_state.gs_sel & 7))
321 vmcs_write16(HOST_GS_SELECTOR, hs->gs_sel); 381 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
322 else { 382 else {
323 vmcs_write16(HOST_GS_SELECTOR, 0); 383 vmcs_write16(HOST_GS_SELECTOR, 0);
324 hs->fs_gs_ldt_reload_needed = 1; 384 vmx->host_state.gs_ldt_reload_needed = 1;
325 } 385 }
326 386
327#ifdef CONFIG_X86_64 387#ifdef CONFIG_X86_64
328 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); 388 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
329 vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); 389 vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
330#else 390#else
331 vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel)); 391 vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
332 vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel)); 392 vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
333#endif 393#endif
334 394
335#ifdef CONFIG_X86_64 395#ifdef CONFIG_X86_64
336 if (is_long_mode(vcpu)) { 396 if (is_long_mode(&vmx->vcpu)) {
337 save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1); 397 save_msrs(vmx->host_msrs +
398 vmx->msr_offset_kernel_gs_base, 1);
338 } 399 }
339#endif 400#endif
340 load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); 401 load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
341 if (msr_efer_need_save_restore(vcpu)) 402 if (msr_efer_need_save_restore(vmx))
342 load_transition_efer(vcpu); 403 load_transition_efer(vmx);
343} 404}
344 405
345static void vmx_load_host_state(struct kvm_vcpu *vcpu) 406static void vmx_load_host_state(struct vcpu_vmx *vmx)
346{ 407{
347 struct vmx_host_state *hs = &vcpu->vmx_host_state; 408 unsigned long flags;
348 409
349 if (!hs->loaded) 410 if (!vmx->host_state.loaded)
350 return; 411 return;
351 412
352 hs->loaded = 0; 413 vmx->host_state.loaded = 0;
353 if (hs->fs_gs_ldt_reload_needed) { 414 if (vmx->host_state.fs_reload_needed)
354 load_ldt(hs->ldt_sel); 415 load_fs(vmx->host_state.fs_sel);
355 load_fs(hs->fs_sel); 416 if (vmx->host_state.gs_ldt_reload_needed) {
417 load_ldt(vmx->host_state.ldt_sel);
356 /* 418 /*
357 * If we have to reload gs, we must take care to 419 * If we have to reload gs, we must take care to
358 * preserve our gs base. 420 * preserve our gs base.
359 */ 421 */
360 local_irq_disable(); 422 local_irq_save(flags);
361 load_gs(hs->gs_sel); 423 load_gs(vmx->host_state.gs_sel);
362#ifdef CONFIG_X86_64 424#ifdef CONFIG_X86_64
363 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); 425 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
364#endif 426#endif
365 local_irq_enable(); 427 local_irq_restore(flags);
366
367 reload_tss();
368 } 428 }
369 save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); 429 reload_tss();
370 load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); 430 save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
371 if (msr_efer_need_save_restore(vcpu)) 431 load_msrs(vmx->host_msrs, vmx->save_nmsrs);
372 load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); 432 if (msr_efer_need_save_restore(vmx))
433 load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
373} 434}
374 435
375/* 436/*
376 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 437 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
377 * vcpu mutex is already taken. 438 * vcpu mutex is already taken.
378 */ 439 */
379static void vmx_vcpu_load(struct kvm_vcpu *vcpu) 440static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
380{ 441{
381 u64 phys_addr = __pa(vcpu->vmcs); 442 struct vcpu_vmx *vmx = to_vmx(vcpu);
382 int cpu; 443 u64 phys_addr = __pa(vmx->vmcs);
383 u64 tsc_this, delta; 444 u64 tsc_this, delta;
384 445
385 cpu = get_cpu(); 446 if (vcpu->cpu != cpu) {
386 447 vcpu_clear(vmx);
387 if (vcpu->cpu != cpu) 448 kvm_migrate_apic_timer(vcpu);
388 vcpu_clear(vcpu); 449 }
389 450
390 if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) { 451 if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
391 u8 error; 452 u8 error;
392 453
393 per_cpu(current_vmcs, cpu) = vcpu->vmcs; 454 per_cpu(current_vmcs, cpu) = vmx->vmcs;
394 asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" 455 asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
395 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) 456 : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
396 : "cc"); 457 : "cc");
397 if (error) 458 if (error)
398 printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", 459 printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
399 vcpu->vmcs, phys_addr); 460 vmx->vmcs, phys_addr);
400 } 461 }
401 462
402 if (vcpu->cpu != cpu) { 463 if (vcpu->cpu != cpu) {
@@ -426,9 +487,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
426 487
427static void vmx_vcpu_put(struct kvm_vcpu *vcpu) 488static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
428{ 489{
429 vmx_load_host_state(vcpu); 490 vmx_load_host_state(to_vmx(vcpu));
430 kvm_put_guest_fpu(vcpu); 491 kvm_put_guest_fpu(vcpu);
431 put_cpu();
432} 492}
433 493
434static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 494static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -436,9 +496,9 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
436 if (vcpu->fpu_active) 496 if (vcpu->fpu_active)
437 return; 497 return;
438 vcpu->fpu_active = 1; 498 vcpu->fpu_active = 1;
439 vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); 499 vmcs_clear_bits(GUEST_CR0, X86_CR0_TS);
440 if (vcpu->cr0 & CR0_TS_MASK) 500 if (vcpu->cr0 & X86_CR0_TS)
441 vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); 501 vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
442 update_exception_bitmap(vcpu); 502 update_exception_bitmap(vcpu);
443} 503}
444 504
@@ -447,13 +507,13 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
447 if (!vcpu->fpu_active) 507 if (!vcpu->fpu_active)
448 return; 508 return;
449 vcpu->fpu_active = 0; 509 vcpu->fpu_active = 0;
450 vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); 510 vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
451 update_exception_bitmap(vcpu); 511 update_exception_bitmap(vcpu);
452} 512}
453 513
454static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) 514static void vmx_vcpu_decache(struct kvm_vcpu *vcpu)
455{ 515{
456 vcpu_clear(vcpu); 516 vcpu_clear(to_vmx(vcpu));
457} 517}
458 518
459static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 519static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
@@ -501,59 +561,62 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
501/* 561/*
502 * Swap MSR entry in host/guest MSR entry array. 562 * Swap MSR entry in host/guest MSR entry array.
503 */ 563 */
504void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) 564#ifdef CONFIG_X86_64
565static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
505{ 566{
506 struct vmx_msr_entry tmp; 567 struct kvm_msr_entry tmp;
507 tmp = vcpu->guest_msrs[to]; 568
508 vcpu->guest_msrs[to] = vcpu->guest_msrs[from]; 569 tmp = vmx->guest_msrs[to];
509 vcpu->guest_msrs[from] = tmp; 570 vmx->guest_msrs[to] = vmx->guest_msrs[from];
510 tmp = vcpu->host_msrs[to]; 571 vmx->guest_msrs[from] = tmp;
511 vcpu->host_msrs[to] = vcpu->host_msrs[from]; 572 tmp = vmx->host_msrs[to];
512 vcpu->host_msrs[from] = tmp; 573 vmx->host_msrs[to] = vmx->host_msrs[from];
574 vmx->host_msrs[from] = tmp;
513} 575}
576#endif
514 577
515/* 578/*
516 * Set up the vmcs to automatically save and restore system 579 * Set up the vmcs to automatically save and restore system
517 * msrs. Don't touch the 64-bit msrs if the guest is in legacy 580 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
518 * mode, as fiddling with msrs is very expensive. 581 * mode, as fiddling with msrs is very expensive.
519 */ 582 */
520static void setup_msrs(struct kvm_vcpu *vcpu) 583static void setup_msrs(struct vcpu_vmx *vmx)
521{ 584{
522 int save_nmsrs; 585 int save_nmsrs;
523 586
524 save_nmsrs = 0; 587 save_nmsrs = 0;
525#ifdef CONFIG_X86_64 588#ifdef CONFIG_X86_64
526 if (is_long_mode(vcpu)) { 589 if (is_long_mode(&vmx->vcpu)) {
527 int index; 590 int index;
528 591
529 index = __find_msr_index(vcpu, MSR_SYSCALL_MASK); 592 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
530 if (index >= 0) 593 if (index >= 0)
531 move_msr_up(vcpu, index, save_nmsrs++); 594 move_msr_up(vmx, index, save_nmsrs++);
532 index = __find_msr_index(vcpu, MSR_LSTAR); 595 index = __find_msr_index(vmx, MSR_LSTAR);
533 if (index >= 0) 596 if (index >= 0)
534 move_msr_up(vcpu, index, save_nmsrs++); 597 move_msr_up(vmx, index, save_nmsrs++);
535 index = __find_msr_index(vcpu, MSR_CSTAR); 598 index = __find_msr_index(vmx, MSR_CSTAR);
536 if (index >= 0) 599 if (index >= 0)
537 move_msr_up(vcpu, index, save_nmsrs++); 600 move_msr_up(vmx, index, save_nmsrs++);
538 index = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); 601 index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
539 if (index >= 0) 602 if (index >= 0)
540 move_msr_up(vcpu, index, save_nmsrs++); 603 move_msr_up(vmx, index, save_nmsrs++);
541 /* 604 /*
542 * MSR_K6_STAR is only needed on long mode guests, and only 605 * MSR_K6_STAR is only needed on long mode guests, and only
543 * if efer.sce is enabled. 606 * if efer.sce is enabled.
544 */ 607 */
545 index = __find_msr_index(vcpu, MSR_K6_STAR); 608 index = __find_msr_index(vmx, MSR_K6_STAR);
546 if ((index >= 0) && (vcpu->shadow_efer & EFER_SCE)) 609 if ((index >= 0) && (vmx->vcpu.shadow_efer & EFER_SCE))
547 move_msr_up(vcpu, index, save_nmsrs++); 610 move_msr_up(vmx, index, save_nmsrs++);
548 } 611 }
549#endif 612#endif
550 vcpu->save_nmsrs = save_nmsrs; 613 vmx->save_nmsrs = save_nmsrs;
551 614
552#ifdef CONFIG_X86_64 615#ifdef CONFIG_X86_64
553 vcpu->msr_offset_kernel_gs_base = 616 vmx->msr_offset_kernel_gs_base =
554 __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); 617 __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
555#endif 618#endif
556 vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); 619 vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
557} 620}
558 621
559/* 622/*
@@ -589,7 +652,7 @@ static void guest_write_tsc(u64 guest_tsc)
589static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) 652static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
590{ 653{
591 u64 data; 654 u64 data;
592 struct vmx_msr_entry *msr; 655 struct kvm_msr_entry *msr;
593 656
594 if (!pdata) { 657 if (!pdata) {
595 printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); 658 printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
@@ -620,7 +683,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
620 data = vmcs_readl(GUEST_SYSENTER_ESP); 683 data = vmcs_readl(GUEST_SYSENTER_ESP);
621 break; 684 break;
622 default: 685 default:
623 msr = find_msr_entry(vcpu, msr_index); 686 msr = find_msr_entry(to_vmx(vcpu), msr_index);
624 if (msr) { 687 if (msr) {
625 data = msr->data; 688 data = msr->data;
626 break; 689 break;
@@ -639,15 +702,16 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
639 */ 702 */
640static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) 703static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
641{ 704{
642 struct vmx_msr_entry *msr; 705 struct vcpu_vmx *vmx = to_vmx(vcpu);
706 struct kvm_msr_entry *msr;
643 int ret = 0; 707 int ret = 0;
644 708
645 switch (msr_index) { 709 switch (msr_index) {
646#ifdef CONFIG_X86_64 710#ifdef CONFIG_X86_64
647 case MSR_EFER: 711 case MSR_EFER:
648 ret = kvm_set_msr_common(vcpu, msr_index, data); 712 ret = kvm_set_msr_common(vcpu, msr_index, data);
649 if (vcpu->vmx_host_state.loaded) 713 if (vmx->host_state.loaded)
650 load_transition_efer(vcpu); 714 load_transition_efer(vmx);
651 break; 715 break;
652 case MSR_FS_BASE: 716 case MSR_FS_BASE:
653 vmcs_writel(GUEST_FS_BASE, data); 717 vmcs_writel(GUEST_FS_BASE, data);
@@ -669,11 +733,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
669 guest_write_tsc(data); 733 guest_write_tsc(data);
670 break; 734 break;
671 default: 735 default:
672 msr = find_msr_entry(vcpu, msr_index); 736 msr = find_msr_entry(vmx, msr_index);
673 if (msr) { 737 if (msr) {
674 msr->data = data; 738 msr->data = data;
675 if (vcpu->vmx_host_state.loaded) 739 if (vmx->host_state.loaded)
676 load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); 740 load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
677 break; 741 break;
678 } 742 }
679 ret = kvm_set_msr_common(vcpu, msr_index, data); 743 ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -740,6 +804,20 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
740 return 0; 804 return 0;
741} 805}
742 806
807static int vmx_get_irq(struct kvm_vcpu *vcpu)
808{
809 u32 idtv_info_field;
810
811 idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
812 if (idtv_info_field & INTR_INFO_VALID_MASK) {
813 if (is_external_interrupt(idtv_info_field))
814 return idtv_info_field & VECTORING_INFO_VECTOR_MASK;
815 else
816 printk("pending exception: not handled yet\n");
817 }
818 return -1;
819}
820
743static __init int cpu_has_kvm_support(void) 821static __init int cpu_has_kvm_support(void)
744{ 822{
745 unsigned long ecx = cpuid_ecx(1); 823 unsigned long ecx = cpuid_ecx(1);
@@ -751,7 +829,10 @@ static __init int vmx_disabled_by_bios(void)
751 u64 msr; 829 u64 msr;
752 830
753 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); 831 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
754 return (msr & 5) == 1; /* locked but not enabled */ 832 return (msr & (MSR_IA32_FEATURE_CONTROL_LOCKED |
833 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED))
834 == MSR_IA32_FEATURE_CONTROL_LOCKED;
835 /* locked but not enabled */
755} 836}
756 837
757static void hardware_enable(void *garbage) 838static void hardware_enable(void *garbage)
@@ -761,10 +842,15 @@ static void hardware_enable(void *garbage)
761 u64 old; 842 u64 old;
762 843
763 rdmsrl(MSR_IA32_FEATURE_CONTROL, old); 844 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
764 if ((old & 5) != 5) 845 if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED |
846 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED))
847 != (MSR_IA32_FEATURE_CONTROL_LOCKED |
848 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED))
765 /* enable and lock */ 849 /* enable and lock */
766 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 5); 850 wrmsrl(MSR_IA32_FEATURE_CONTROL, old |
767 write_cr4(read_cr4() | CR4_VMXE); /* FIXME: not cpu hotplug safe */ 851 MSR_IA32_FEATURE_CONTROL_LOCKED |
852 MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED);
853 write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
768 asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) 854 asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr)
769 : "memory", "cc"); 855 : "memory", "cc");
770} 856}
@@ -774,14 +860,102 @@ static void hardware_disable(void *garbage)
774 asm volatile (ASM_VMX_VMXOFF : : : "cc"); 860 asm volatile (ASM_VMX_VMXOFF : : : "cc");
775} 861}
776 862
777static __init void setup_vmcs_descriptor(void) 863static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
864 u32 msr, u32* result)
865{
866 u32 vmx_msr_low, vmx_msr_high;
867 u32 ctl = ctl_min | ctl_opt;
868
869 rdmsr(msr, vmx_msr_low, vmx_msr_high);
870
871 ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
872 ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
873
874 /* Ensure minimum (required) set of control bits are supported. */
875 if (ctl_min & ~ctl)
876 return -EIO;
877
878 *result = ctl;
879 return 0;
880}
881
882static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
778{ 883{
779 u32 vmx_msr_low, vmx_msr_high; 884 u32 vmx_msr_low, vmx_msr_high;
885 u32 min, opt;
886 u32 _pin_based_exec_control = 0;
887 u32 _cpu_based_exec_control = 0;
888 u32 _vmexit_control = 0;
889 u32 _vmentry_control = 0;
890
891 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
892 opt = 0;
893 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
894 &_pin_based_exec_control) < 0)
895 return -EIO;
896
897 min = CPU_BASED_HLT_EXITING |
898#ifdef CONFIG_X86_64
899 CPU_BASED_CR8_LOAD_EXITING |
900 CPU_BASED_CR8_STORE_EXITING |
901#endif
902 CPU_BASED_USE_IO_BITMAPS |
903 CPU_BASED_MOV_DR_EXITING |
904 CPU_BASED_USE_TSC_OFFSETING;
905#ifdef CONFIG_X86_64
906 opt = CPU_BASED_TPR_SHADOW;
907#else
908 opt = 0;
909#endif
910 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
911 &_cpu_based_exec_control) < 0)
912 return -EIO;
913#ifdef CONFIG_X86_64
914 if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
915 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
916 ~CPU_BASED_CR8_STORE_EXITING;
917#endif
918
919 min = 0;
920#ifdef CONFIG_X86_64
921 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
922#endif
923 opt = 0;
924 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
925 &_vmexit_control) < 0)
926 return -EIO;
927
928 min = opt = 0;
929 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
930 &_vmentry_control) < 0)
931 return -EIO;
780 932
781 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); 933 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
782 vmcs_descriptor.size = vmx_msr_high & 0x1fff; 934
783 vmcs_descriptor.order = get_order(vmcs_descriptor.size); 935 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
784 vmcs_descriptor.revision_id = vmx_msr_low; 936 if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
937 return -EIO;
938
939#ifdef CONFIG_X86_64
940 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
941 if (vmx_msr_high & (1u<<16))
942 return -EIO;
943#endif
944
945 /* Require Write-Back (WB) memory type for VMCS accesses. */
946 if (((vmx_msr_high >> 18) & 15) != 6)
947 return -EIO;
948
949 vmcs_conf->size = vmx_msr_high & 0x1fff;
950 vmcs_conf->order = get_order(vmcs_config.size);
951 vmcs_conf->revision_id = vmx_msr_low;
952
953 vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
954 vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
955 vmcs_conf->vmexit_ctrl = _vmexit_control;
956 vmcs_conf->vmentry_ctrl = _vmentry_control;
957
958 return 0;
785} 959}
786 960
787static struct vmcs *alloc_vmcs_cpu(int cpu) 961static struct vmcs *alloc_vmcs_cpu(int cpu)
@@ -790,12 +964,12 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
790 struct page *pages; 964 struct page *pages;
791 struct vmcs *vmcs; 965 struct vmcs *vmcs;
792 966
793 pages = alloc_pages_node(node, GFP_KERNEL, vmcs_descriptor.order); 967 pages = alloc_pages_node(node, GFP_KERNEL, vmcs_config.order);
794 if (!pages) 968 if (!pages)
795 return NULL; 969 return NULL;
796 vmcs = page_address(pages); 970 vmcs = page_address(pages);
797 memset(vmcs, 0, vmcs_descriptor.size); 971 memset(vmcs, 0, vmcs_config.size);
798 vmcs->revision_id = vmcs_descriptor.revision_id; /* vmcs revision id */ 972 vmcs->revision_id = vmcs_config.revision_id; /* vmcs revision id */
799 return vmcs; 973 return vmcs;
800} 974}
801 975
@@ -806,7 +980,7 @@ static struct vmcs *alloc_vmcs(void)
806 980
807static void free_vmcs(struct vmcs *vmcs) 981static void free_vmcs(struct vmcs *vmcs)
808{ 982{
809 free_pages((unsigned long)vmcs, vmcs_descriptor.order); 983 free_pages((unsigned long)vmcs, vmcs_config.order);
810} 984}
811 985
812static void free_kvm_area(void) 986static void free_kvm_area(void)
@@ -817,8 +991,6 @@ static void free_kvm_area(void)
817 free_vmcs(per_cpu(vmxarea, cpu)); 991 free_vmcs(per_cpu(vmxarea, cpu));
818} 992}
819 993
820extern struct vmcs *alloc_vmcs_cpu(int cpu);
821
822static __init int alloc_kvm_area(void) 994static __init int alloc_kvm_area(void)
823{ 995{
824 int cpu; 996 int cpu;
@@ -839,7 +1011,8 @@ static __init int alloc_kvm_area(void)
839 1011
840static __init int hardware_setup(void) 1012static __init int hardware_setup(void)
841{ 1013{
842 setup_vmcs_descriptor(); 1014 if (setup_vmcs_config(&vmcs_config) < 0)
1015 return -EIO;
843 return alloc_kvm_area(); 1016 return alloc_kvm_area();
844} 1017}
845 1018
@@ -879,8 +1052,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
879 flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT); 1052 flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT);
880 vmcs_writel(GUEST_RFLAGS, flags); 1053 vmcs_writel(GUEST_RFLAGS, flags);
881 1054
882 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) | 1055 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
883 (vmcs_readl(CR4_READ_SHADOW) & CR4_VME_MASK)); 1056 (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
884 1057
885 update_exception_bitmap(vcpu); 1058 update_exception_bitmap(vcpu);
886 1059
@@ -897,7 +1070,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
897 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); 1070 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
898} 1071}
899 1072
900static int rmode_tss_base(struct kvm* kvm) 1073static gva_t rmode_tss_base(struct kvm* kvm)
901{ 1074{
902 gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3; 1075 gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3;
903 return base_gfn << PAGE_SHIFT; 1076 return base_gfn << PAGE_SHIFT;
@@ -937,7 +1110,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
937 flags |= IOPL_MASK | X86_EFLAGS_VM; 1110 flags |= IOPL_MASK | X86_EFLAGS_VM;
938 1111
939 vmcs_writel(GUEST_RFLAGS, flags); 1112 vmcs_writel(GUEST_RFLAGS, flags);
940 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | CR4_VME_MASK); 1113 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
941 update_exception_bitmap(vcpu); 1114 update_exception_bitmap(vcpu);
942 1115
943 vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); 1116 vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
@@ -975,10 +1148,10 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
975 1148
976 vcpu->shadow_efer |= EFER_LMA; 1149 vcpu->shadow_efer |= EFER_LMA;
977 1150
978 find_msr_entry(vcpu, MSR_EFER)->data |= EFER_LMA | EFER_LME; 1151 find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME;
979 vmcs_write32(VM_ENTRY_CONTROLS, 1152 vmcs_write32(VM_ENTRY_CONTROLS,
980 vmcs_read32(VM_ENTRY_CONTROLS) 1153 vmcs_read32(VM_ENTRY_CONTROLS)
981 | VM_ENTRY_CONTROLS_IA32E_MASK); 1154 | VM_ENTRY_IA32E_MODE);
982} 1155}
983 1156
984static void exit_lmode(struct kvm_vcpu *vcpu) 1157static void exit_lmode(struct kvm_vcpu *vcpu)
@@ -987,7 +1160,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
987 1160
988 vmcs_write32(VM_ENTRY_CONTROLS, 1161 vmcs_write32(VM_ENTRY_CONTROLS,
989 vmcs_read32(VM_ENTRY_CONTROLS) 1162 vmcs_read32(VM_ENTRY_CONTROLS)
990 & ~VM_ENTRY_CONTROLS_IA32E_MASK); 1163 & ~VM_ENTRY_IA32E_MODE);
991} 1164}
992 1165
993#endif 1166#endif
@@ -1002,17 +1175,17 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1002{ 1175{
1003 vmx_fpu_deactivate(vcpu); 1176 vmx_fpu_deactivate(vcpu);
1004 1177
1005 if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) 1178 if (vcpu->rmode.active && (cr0 & X86_CR0_PE))
1006 enter_pmode(vcpu); 1179 enter_pmode(vcpu);
1007 1180
1008 if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK)) 1181 if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE))
1009 enter_rmode(vcpu); 1182 enter_rmode(vcpu);
1010 1183
1011#ifdef CONFIG_X86_64 1184#ifdef CONFIG_X86_64
1012 if (vcpu->shadow_efer & EFER_LME) { 1185 if (vcpu->shadow_efer & EFER_LME) {
1013 if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) 1186 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
1014 enter_lmode(vcpu); 1187 enter_lmode(vcpu);
1015 if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK)) 1188 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
1016 exit_lmode(vcpu); 1189 exit_lmode(vcpu);
1017 } 1190 }
1018#endif 1191#endif
@@ -1022,14 +1195,14 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1022 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); 1195 (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
1023 vcpu->cr0 = cr0; 1196 vcpu->cr0 = cr0;
1024 1197
1025 if (!(cr0 & CR0_TS_MASK) || !(cr0 & CR0_PE_MASK)) 1198 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
1026 vmx_fpu_activate(vcpu); 1199 vmx_fpu_activate(vcpu);
1027} 1200}
1028 1201
1029static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) 1202static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1030{ 1203{
1031 vmcs_writel(GUEST_CR3, cr3); 1204 vmcs_writel(GUEST_CR3, cr3);
1032 if (vcpu->cr0 & CR0_PE_MASK) 1205 if (vcpu->cr0 & X86_CR0_PE)
1033 vmx_fpu_deactivate(vcpu); 1206 vmx_fpu_deactivate(vcpu);
1034} 1207}
1035 1208
@@ -1045,23 +1218,24 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1045 1218
1046static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) 1219static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1047{ 1220{
1048 struct vmx_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER); 1221 struct vcpu_vmx *vmx = to_vmx(vcpu);
1222 struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
1049 1223
1050 vcpu->shadow_efer = efer; 1224 vcpu->shadow_efer = efer;
1051 if (efer & EFER_LMA) { 1225 if (efer & EFER_LMA) {
1052 vmcs_write32(VM_ENTRY_CONTROLS, 1226 vmcs_write32(VM_ENTRY_CONTROLS,
1053 vmcs_read32(VM_ENTRY_CONTROLS) | 1227 vmcs_read32(VM_ENTRY_CONTROLS) |
1054 VM_ENTRY_CONTROLS_IA32E_MASK); 1228 VM_ENTRY_IA32E_MODE);
1055 msr->data = efer; 1229 msr->data = efer;
1056 1230
1057 } else { 1231 } else {
1058 vmcs_write32(VM_ENTRY_CONTROLS, 1232 vmcs_write32(VM_ENTRY_CONTROLS,
1059 vmcs_read32(VM_ENTRY_CONTROLS) & 1233 vmcs_read32(VM_ENTRY_CONTROLS) &
1060 ~VM_ENTRY_CONTROLS_IA32E_MASK); 1234 ~VM_ENTRY_IA32E_MODE);
1061 1235
1062 msr->data = efer & ~EFER_LME; 1236 msr->data = efer & ~EFER_LME;
1063 } 1237 }
1064 setup_msrs(vcpu); 1238 setup_msrs(vmx);
1065} 1239}
1066 1240
1067#endif 1241#endif
@@ -1210,17 +1384,6 @@ static int init_rmode_tss(struct kvm* kvm)
1210 return 1; 1384 return 1;
1211} 1385}
1212 1386
1213static void vmcs_write32_fixedbits(u32 msr, u32 vmcs_field, u32 val)
1214{
1215 u32 msr_high, msr_low;
1216
1217 rdmsr(msr, msr_low, msr_high);
1218
1219 val &= msr_high;
1220 val |= msr_low;
1221 vmcs_write32(vmcs_field, val);
1222}
1223
1224static void seg_setup(int seg) 1387static void seg_setup(int seg)
1225{ 1388{
1226 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; 1389 struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1234,7 +1397,7 @@ static void seg_setup(int seg)
1234/* 1397/*
1235 * Sets up the vmcs for emulated real mode. 1398 * Sets up the vmcs for emulated real mode.
1236 */ 1399 */
1237static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) 1400static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
1238{ 1401{
1239 u32 host_sysenter_cs; 1402 u32 host_sysenter_cs;
1240 u32 junk; 1403 u32 junk;
@@ -1243,27 +1406,36 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1243 int i; 1406 int i;
1244 int ret = 0; 1407 int ret = 0;
1245 unsigned long kvm_vmx_return; 1408 unsigned long kvm_vmx_return;
1409 u64 msr;
1410 u32 exec_control;
1246 1411
1247 if (!init_rmode_tss(vcpu->kvm)) { 1412 if (!init_rmode_tss(vmx->vcpu.kvm)) {
1248 ret = -ENOMEM; 1413 ret = -ENOMEM;
1249 goto out; 1414 goto out;
1250 } 1415 }
1251 1416
1252 memset(vcpu->regs, 0, sizeof(vcpu->regs)); 1417 vmx->vcpu.rmode.active = 0;
1253 vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
1254 vcpu->cr8 = 0;
1255 vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1256 if (vcpu == &vcpu->kvm->vcpus[0])
1257 vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
1258 1418
1259 fx_init(vcpu); 1419 vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();
1420 set_cr8(&vmx->vcpu, 0);
1421 msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1422 if (vmx->vcpu.vcpu_id == 0)
1423 msr |= MSR_IA32_APICBASE_BSP;
1424 kvm_set_apic_base(&vmx->vcpu, msr);
1425
1426 fx_init(&vmx->vcpu);
1260 1427
1261 /* 1428 /*
1262 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode 1429 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
1263 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. 1430 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
1264 */ 1431 */
1265 vmcs_write16(GUEST_CS_SELECTOR, 0xf000); 1432 if (vmx->vcpu.vcpu_id == 0) {
1266 vmcs_writel(GUEST_CS_BASE, 0x000f0000); 1433 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
1434 vmcs_writel(GUEST_CS_BASE, 0x000f0000);
1435 } else {
1436 vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8);
1437 vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);
1438 }
1267 vmcs_write32(GUEST_CS_LIMIT, 0xffff); 1439 vmcs_write32(GUEST_CS_LIMIT, 0xffff);
1268 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); 1440 vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
1269 1441
@@ -1288,7 +1460,10 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1288 vmcs_writel(GUEST_SYSENTER_EIP, 0); 1460 vmcs_writel(GUEST_SYSENTER_EIP, 0);
1289 1461
1290 vmcs_writel(GUEST_RFLAGS, 0x02); 1462 vmcs_writel(GUEST_RFLAGS, 0x02);
1291 vmcs_writel(GUEST_RIP, 0xfff0); 1463 if (vmx->vcpu.vcpu_id == 0)
1464 vmcs_writel(GUEST_RIP, 0xfff0);
1465 else
1466 vmcs_writel(GUEST_RIP, 0);
1292 vmcs_writel(GUEST_RSP, 0); 1467 vmcs_writel(GUEST_RSP, 0);
1293 1468
1294 //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 1469 //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0
@@ -1316,20 +1491,18 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1316 vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 1491 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
1317 1492
1318 /* Control */ 1493 /* Control */
1319 vmcs_write32_fixedbits(MSR_IA32_VMX_PINBASED_CTLS, 1494 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
1320 PIN_BASED_VM_EXEC_CONTROL, 1495 vmcs_config.pin_based_exec_ctrl);
1321 PIN_BASED_EXT_INTR_MASK /* 20.6.1 */ 1496
1322 | PIN_BASED_NMI_EXITING /* 20.6.1 */ 1497 exec_control = vmcs_config.cpu_based_exec_ctrl;
1323 ); 1498 if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) {
1324 vmcs_write32_fixedbits(MSR_IA32_VMX_PROCBASED_CTLS, 1499 exec_control &= ~CPU_BASED_TPR_SHADOW;
1325 CPU_BASED_VM_EXEC_CONTROL, 1500#ifdef CONFIG_X86_64
1326 CPU_BASED_HLT_EXITING /* 20.6.2 */ 1501 exec_control |= CPU_BASED_CR8_STORE_EXITING |
1327 | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ 1502 CPU_BASED_CR8_LOAD_EXITING;
1328 | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ 1503#endif
1329 | CPU_BASED_ACTIVATE_IO_BITMAP /* 20.6.2 */ 1504 }
1330 | CPU_BASED_MOV_DR_EXITING 1505 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
1331 | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */
1332 );
1333 1506
1334 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); 1507 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
1335 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); 1508 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
@@ -1377,46 +1550,48 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1377 u32 index = vmx_msr_index[i]; 1550 u32 index = vmx_msr_index[i];
1378 u32 data_low, data_high; 1551 u32 data_low, data_high;
1379 u64 data; 1552 u64 data;
1380 int j = vcpu->nmsrs; 1553 int j = vmx->nmsrs;
1381 1554
1382 if (rdmsr_safe(index, &data_low, &data_high) < 0) 1555 if (rdmsr_safe(index, &data_low, &data_high) < 0)
1383 continue; 1556 continue;
1384 if (wrmsr_safe(index, data_low, data_high) < 0) 1557 if (wrmsr_safe(index, data_low, data_high) < 0)
1385 continue; 1558 continue;
1386 data = data_low | ((u64)data_high << 32); 1559 data = data_low | ((u64)data_high << 32);
1387 vcpu->host_msrs[j].index = index; 1560 vmx->host_msrs[j].index = index;
1388 vcpu->host_msrs[j].reserved = 0; 1561 vmx->host_msrs[j].reserved = 0;
1389 vcpu->host_msrs[j].data = data; 1562 vmx->host_msrs[j].data = data;
1390 vcpu->guest_msrs[j] = vcpu->host_msrs[j]; 1563 vmx->guest_msrs[j] = vmx->host_msrs[j];
1391 ++vcpu->nmsrs; 1564 ++vmx->nmsrs;
1392 } 1565 }
1393 1566
1394 setup_msrs(vcpu); 1567 setup_msrs(vmx);
1395 1568
1396 vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, 1569 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
1397 (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */
1398 1570
1399 /* 22.2.1, 20.8.1 */ 1571 /* 22.2.1, 20.8.1 */
1400 vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, 1572 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
1401 VM_ENTRY_CONTROLS, 0); 1573
1402 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ 1574 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
1403 1575
1404#ifdef CONFIG_X86_64 1576#ifdef CONFIG_X86_64
1405 vmcs_writel(VIRTUAL_APIC_PAGE_ADDR, 0); 1577 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
1406 vmcs_writel(TPR_THRESHOLD, 0); 1578 if (vm_need_tpr_shadow(vmx->vcpu.kvm))
1579 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
1580 page_to_phys(vmx->vcpu.apic->regs_page));
1581 vmcs_write32(TPR_THRESHOLD, 0);
1407#endif 1582#endif
1408 1583
1409 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 1584 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
1410 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 1585 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
1411 1586
1412 vcpu->cr0 = 0x60000010; 1587 vmx->vcpu.cr0 = 0x60000010;
1413 vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode 1588 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.cr0); // enter rmode
1414 vmx_set_cr4(vcpu, 0); 1589 vmx_set_cr4(&vmx->vcpu, 0);
1415#ifdef CONFIG_X86_64 1590#ifdef CONFIG_X86_64
1416 vmx_set_efer(vcpu, 0); 1591 vmx_set_efer(&vmx->vcpu, 0);
1417#endif 1592#endif
1418 vmx_fpu_activate(vcpu); 1593 vmx_fpu_activate(&vmx->vcpu);
1419 update_exception_bitmap(vcpu); 1594 update_exception_bitmap(&vmx->vcpu);
1420 1595
1421 return 0; 1596 return 0;
1422 1597
@@ -1424,6 +1599,13 @@ out:
1424 return ret; 1599 return ret;
1425} 1600}
1426 1601
1602static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
1603{
1604 struct vcpu_vmx *vmx = to_vmx(vcpu);
1605
1606 vmx_vcpu_setup(vmx);
1607}
1608
1427static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) 1609static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
1428{ 1610{
1429 u16 ent[2]; 1611 u16 ent[2];
@@ -1443,8 +1625,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
1443 return; 1625 return;
1444 } 1626 }
1445 1627
1446 if (kvm_read_guest(vcpu, irq * sizeof(ent), sizeof(ent), &ent) != 1628 if (emulator_read_std(irq * sizeof(ent), &ent, sizeof(ent), vcpu) !=
1447 sizeof(ent)) { 1629 X86EMUL_CONTINUE) {
1448 vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__); 1630 vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__);
1449 return; 1631 return;
1450 } 1632 }
@@ -1454,9 +1636,9 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
1454 ip = vmcs_readl(GUEST_RIP); 1636 ip = vmcs_readl(GUEST_RIP);
1455 1637
1456 1638
1457 if (kvm_write_guest(vcpu, ss_base + sp - 2, 2, &flags) != 2 || 1639 if (emulator_write_emulated(ss_base + sp - 2, &flags, 2, vcpu) != X86EMUL_CONTINUE ||
1458 kvm_write_guest(vcpu, ss_base + sp - 4, 2, &cs) != 2 || 1640 emulator_write_emulated(ss_base + sp - 4, &cs, 2, vcpu) != X86EMUL_CONTINUE ||
1459 kvm_write_guest(vcpu, ss_base + sp - 6, 2, &ip) != 2) { 1641 emulator_write_emulated(ss_base + sp - 6, &ip, 2, vcpu) != X86EMUL_CONTINUE) {
1460 vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__); 1642 vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__);
1461 return; 1643 return;
1462 } 1644 }
@@ -1469,6 +1651,16 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
1469 vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6)); 1651 vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6));
1470} 1652}
1471 1653
1654static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
1655{
1656 if (vcpu->rmode.active) {
1657 inject_rmode_irq(vcpu, irq);
1658 return;
1659 }
1660 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
1661 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
1662}
1663
1472static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) 1664static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
1473{ 1665{
1474 int word_index = __ffs(vcpu->irq_summary); 1666 int word_index = __ffs(vcpu->irq_summary);
@@ -1478,13 +1670,7 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
1478 clear_bit(bit_index, &vcpu->irq_pending[word_index]); 1670 clear_bit(bit_index, &vcpu->irq_pending[word_index]);
1479 if (!vcpu->irq_pending[word_index]) 1671 if (!vcpu->irq_pending[word_index])
1480 clear_bit(word_index, &vcpu->irq_summary); 1672 clear_bit(word_index, &vcpu->irq_summary);
1481 1673 vmx_inject_irq(vcpu, irq);
1482 if (vcpu->rmode.active) {
1483 inject_rmode_irq(vcpu, irq);
1484 return;
1485 }
1486 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
1487 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
1488} 1674}
1489 1675
1490 1676
@@ -1568,7 +1754,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1568 "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); 1754 "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
1569 } 1755 }
1570 1756
1571 if (is_external_interrupt(vect_info)) { 1757 if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
1572 int irq = vect_info & VECTORING_INFO_VECTOR_MASK; 1758 int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
1573 set_bit(irq, vcpu->irq_pending); 1759 set_bit(irq, vcpu->irq_pending);
1574 set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); 1760 set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary);
@@ -1591,29 +1777,28 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1591 if (is_page_fault(intr_info)) { 1777 if (is_page_fault(intr_info)) {
1592 cr2 = vmcs_readl(EXIT_QUALIFICATION); 1778 cr2 = vmcs_readl(EXIT_QUALIFICATION);
1593 1779
1594 spin_lock(&vcpu->kvm->lock); 1780 mutex_lock(&vcpu->kvm->lock);
1595 r = kvm_mmu_page_fault(vcpu, cr2, error_code); 1781 r = kvm_mmu_page_fault(vcpu, cr2, error_code);
1596 if (r < 0) { 1782 if (r < 0) {
1597 spin_unlock(&vcpu->kvm->lock); 1783 mutex_unlock(&vcpu->kvm->lock);
1598 return r; 1784 return r;
1599 } 1785 }
1600 if (!r) { 1786 if (!r) {
1601 spin_unlock(&vcpu->kvm->lock); 1787 mutex_unlock(&vcpu->kvm->lock);
1602 return 1; 1788 return 1;
1603 } 1789 }
1604 1790
1605 er = emulate_instruction(vcpu, kvm_run, cr2, error_code); 1791 er = emulate_instruction(vcpu, kvm_run, cr2, error_code);
1606 spin_unlock(&vcpu->kvm->lock); 1792 mutex_unlock(&vcpu->kvm->lock);
1607 1793
1608 switch (er) { 1794 switch (er) {
1609 case EMULATE_DONE: 1795 case EMULATE_DONE:
1610 return 1; 1796 return 1;
1611 case EMULATE_DO_MMIO: 1797 case EMULATE_DO_MMIO:
1612 ++vcpu->stat.mmio_exits; 1798 ++vcpu->stat.mmio_exits;
1613 kvm_run->exit_reason = KVM_EXIT_MMIO;
1614 return 0; 1799 return 0;
1615 case EMULATE_FAIL: 1800 case EMULATE_FAIL:
1616 vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); 1801 kvm_report_emulation_failure(vcpu, "pagetable");
1617 break; 1802 break;
1618 default: 1803 default:
1619 BUG(); 1804 BUG();
@@ -1653,80 +1838,29 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1653 return 0; 1838 return 0;
1654} 1839}
1655 1840
1656static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count)
1657{
1658 u64 inst;
1659 gva_t rip;
1660 int countr_size;
1661 int i, n;
1662
1663 if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) {
1664 countr_size = 2;
1665 } else {
1666 u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
1667
1668 countr_size = (cs_ar & AR_L_MASK) ? 8:
1669 (cs_ar & AR_DB_MASK) ? 4: 2;
1670 }
1671
1672 rip = vmcs_readl(GUEST_RIP);
1673 if (countr_size != 8)
1674 rip += vmcs_readl(GUEST_CS_BASE);
1675
1676 n = kvm_read_guest(vcpu, rip, sizeof(inst), &inst);
1677
1678 for (i = 0; i < n; i++) {
1679 switch (((u8*)&inst)[i]) {
1680 case 0xf0:
1681 case 0xf2:
1682 case 0xf3:
1683 case 0x2e:
1684 case 0x36:
1685 case 0x3e:
1686 case 0x26:
1687 case 0x64:
1688 case 0x65:
1689 case 0x66:
1690 break;
1691 case 0x67:
1692 countr_size = (countr_size == 2) ? 4: (countr_size >> 1);
1693 default:
1694 goto done;
1695 }
1696 }
1697 return 0;
1698done:
1699 countr_size *= 8;
1700 *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size));
1701 //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]);
1702 return 1;
1703}
1704
1705static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1841static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1706{ 1842{
1707 u64 exit_qualification; 1843 unsigned long exit_qualification;
1708 int size, down, in, string, rep; 1844 int size, down, in, string, rep;
1709 unsigned port; 1845 unsigned port;
1710 unsigned long count;
1711 gva_t address;
1712 1846
1713 ++vcpu->stat.io_exits; 1847 ++vcpu->stat.io_exits;
1714 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 1848 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
1715 in = (exit_qualification & 8) != 0;
1716 size = (exit_qualification & 7) + 1;
1717 string = (exit_qualification & 16) != 0; 1849 string = (exit_qualification & 16) != 0;
1850
1851 if (string) {
1852 if (emulate_instruction(vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO)
1853 return 0;
1854 return 1;
1855 }
1856
1857 size = (exit_qualification & 7) + 1;
1858 in = (exit_qualification & 8) != 0;
1718 down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; 1859 down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0;
1719 count = 1;
1720 rep = (exit_qualification & 32) != 0; 1860 rep = (exit_qualification & 32) != 0;
1721 port = exit_qualification >> 16; 1861 port = exit_qualification >> 16;
1722 address = 0; 1862
1723 if (string) { 1863 return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
1724 if (rep && !get_io_count(vcpu, &count))
1725 return 1;
1726 address = vmcs_readl(GUEST_LINEAR_ADDRESS);
1727 }
1728 return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down,
1729 address, rep, port);
1730} 1864}
1731 1865
1732static void 1866static void
@@ -1743,11 +1877,11 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
1743 1877
1744static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1878static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1745{ 1879{
1746 u64 exit_qualification; 1880 unsigned long exit_qualification;
1747 int cr; 1881 int cr;
1748 int reg; 1882 int reg;
1749 1883
1750 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 1884 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
1751 cr = exit_qualification & 15; 1885 cr = exit_qualification & 15;
1752 reg = (exit_qualification >> 8) & 15; 1886 reg = (exit_qualification >> 8) & 15;
1753 switch ((exit_qualification >> 4) & 3) { 1887 switch ((exit_qualification >> 4) & 3) {
@@ -1772,13 +1906,14 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1772 vcpu_load_rsp_rip(vcpu); 1906 vcpu_load_rsp_rip(vcpu);
1773 set_cr8(vcpu, vcpu->regs[reg]); 1907 set_cr8(vcpu, vcpu->regs[reg]);
1774 skip_emulated_instruction(vcpu); 1908 skip_emulated_instruction(vcpu);
1775 return 1; 1909 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
1910 return 0;
1776 }; 1911 };
1777 break; 1912 break;
1778 case 2: /* clts */ 1913 case 2: /* clts */
1779 vcpu_load_rsp_rip(vcpu); 1914 vcpu_load_rsp_rip(vcpu);
1780 vmx_fpu_deactivate(vcpu); 1915 vmx_fpu_deactivate(vcpu);
1781 vcpu->cr0 &= ~CR0_TS_MASK; 1916 vcpu->cr0 &= ~X86_CR0_TS;
1782 vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); 1917 vmcs_writel(CR0_READ_SHADOW, vcpu->cr0);
1783 vmx_fpu_activate(vcpu); 1918 vmx_fpu_activate(vcpu);
1784 skip_emulated_instruction(vcpu); 1919 skip_emulated_instruction(vcpu);
@@ -1793,7 +1928,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1793 return 1; 1928 return 1;
1794 case 8: 1929 case 8:
1795 vcpu_load_rsp_rip(vcpu); 1930 vcpu_load_rsp_rip(vcpu);
1796 vcpu->regs[reg] = vcpu->cr8; 1931 vcpu->regs[reg] = get_cr8(vcpu);
1797 vcpu_put_rsp_rip(vcpu); 1932 vcpu_put_rsp_rip(vcpu);
1798 skip_emulated_instruction(vcpu); 1933 skip_emulated_instruction(vcpu);
1799 return 1; 1934 return 1;
@@ -1808,14 +1943,14 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1808 break; 1943 break;
1809 } 1944 }
1810 kvm_run->exit_reason = 0; 1945 kvm_run->exit_reason = 0;
1811 printk(KERN_ERR "kvm: unhandled control register: op %d cr %d\n", 1946 pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
1812 (int)(exit_qualification >> 4) & 3, cr); 1947 (int)(exit_qualification >> 4) & 3, cr);
1813 return 0; 1948 return 0;
1814} 1949}
1815 1950
1816static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1951static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1817{ 1952{
1818 u64 exit_qualification; 1953 unsigned long exit_qualification;
1819 unsigned long val; 1954 unsigned long val;
1820 int dr, reg; 1955 int dr, reg;
1821 1956
@@ -1823,7 +1958,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1823 * FIXME: this code assumes the host is debugging the guest. 1958 * FIXME: this code assumes the host is debugging the guest.
1824 * need to deal with guest debugging itself too. 1959 * need to deal with guest debugging itself too.
1825 */ 1960 */
1826 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); 1961 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
1827 dr = exit_qualification & 7; 1962 dr = exit_qualification & 7;
1828 reg = (exit_qualification >> 8) & 15; 1963 reg = (exit_qualification >> 8) & 15;
1829 vcpu_load_rsp_rip(vcpu); 1964 vcpu_load_rsp_rip(vcpu);
@@ -1886,19 +2021,21 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1886 return 1; 2021 return 1;
1887} 2022}
1888 2023
1889static void post_kvm_run_save(struct kvm_vcpu *vcpu, 2024static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu,
1890 struct kvm_run *kvm_run) 2025 struct kvm_run *kvm_run)
1891{ 2026{
1892 kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; 2027 return 1;
1893 kvm_run->cr8 = vcpu->cr8;
1894 kvm_run->apic_base = vcpu->apic_base;
1895 kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
1896 vcpu->irq_summary == 0);
1897} 2028}
1898 2029
1899static int handle_interrupt_window(struct kvm_vcpu *vcpu, 2030static int handle_interrupt_window(struct kvm_vcpu *vcpu,
1900 struct kvm_run *kvm_run) 2031 struct kvm_run *kvm_run)
1901{ 2032{
2033 u32 cpu_based_vm_exec_control;
2034
2035 /* clear pending irq */
2036 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
2037 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
2038 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
1902 /* 2039 /*
1903 * If the user space waits to inject interrupts, exit as soon as 2040 * If the user space waits to inject interrupts, exit as soon as
1904 * possible 2041 * possible
@@ -1943,6 +2080,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
1943 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, 2080 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
1944 [EXIT_REASON_HLT] = handle_halt, 2081 [EXIT_REASON_HLT] = handle_halt,
1945 [EXIT_REASON_VMCALL] = handle_vmcall, 2082 [EXIT_REASON_VMCALL] = handle_vmcall,
2083 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold
1946}; 2084};
1947 2085
1948static const int kvm_vmx_max_exit_handlers = 2086static const int kvm_vmx_max_exit_handlers =
@@ -1956,6 +2094,14 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1956{ 2094{
1957 u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 2095 u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
1958 u32 exit_reason = vmcs_read32(VM_EXIT_REASON); 2096 u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
2097 struct vcpu_vmx *vmx = to_vmx(vcpu);
2098
2099 if (unlikely(vmx->fail)) {
2100 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2101 kvm_run->fail_entry.hardware_entry_failure_reason
2102 = vmcs_read32(VM_INSTRUCTION_ERROR);
2103 return 0;
2104 }
1959 2105
1960 if ( (vectoring_info & VECTORING_INFO_VALID_MASK) && 2106 if ( (vectoring_info & VECTORING_INFO_VALID_MASK) &&
1961 exit_reason != EXIT_REASON_EXCEPTION_NMI ) 2107 exit_reason != EXIT_REASON_EXCEPTION_NMI )
@@ -1971,57 +2117,91 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1971 return 0; 2117 return 0;
1972} 2118}
1973 2119
1974/* 2120static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1975 * Check if userspace requested an interrupt window, and that the
1976 * interrupt window is open.
1977 *
1978 * No need to exit to userspace if we already have an interrupt queued.
1979 */
1980static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1981 struct kvm_run *kvm_run)
1982{ 2121{
1983 return (!vcpu->irq_summary &&
1984 kvm_run->request_interrupt_window &&
1985 vcpu->interrupt_window_open &&
1986 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
1987} 2122}
1988 2123
1989static void vmx_flush_tlb(struct kvm_vcpu *vcpu) 2124static void update_tpr_threshold(struct kvm_vcpu *vcpu)
1990{ 2125{
2126 int max_irr, tpr;
2127
2128 if (!vm_need_tpr_shadow(vcpu->kvm))
2129 return;
2130
2131 if (!kvm_lapic_enabled(vcpu) ||
2132 ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) {
2133 vmcs_write32(TPR_THRESHOLD, 0);
2134 return;
2135 }
2136
2137 tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4;
2138 vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
1991} 2139}
1992 2140
1993static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 2141static void enable_irq_window(struct kvm_vcpu *vcpu)
1994{ 2142{
1995 u8 fail; 2143 u32 cpu_based_vm_exec_control;
1996 int r;
1997 2144
1998preempted: 2145 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
1999 if (vcpu->guest_debug.enabled) 2146 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
2000 kvm_guest_debug_pre(vcpu); 2147 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
2148}
2001 2149
2002again: 2150static void vmx_intr_assist(struct kvm_vcpu *vcpu)
2003 if (!vcpu->mmio_read_completed) 2151{
2004 do_interrupt_requests(vcpu, kvm_run); 2152 u32 idtv_info_field, intr_info_field;
2153 int has_ext_irq, interrupt_window_open;
2154 int vector;
2005 2155
2006 vmx_save_host_state(vcpu); 2156 kvm_inject_pending_timer_irqs(vcpu);
2007 kvm_load_guest_fpu(vcpu); 2157 update_tpr_threshold(vcpu);
2008 2158
2009 r = kvm_mmu_reload(vcpu); 2159 has_ext_irq = kvm_cpu_has_interrupt(vcpu);
2010 if (unlikely(r)) 2160 intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
2011 goto out; 2161 idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
2162 if (intr_info_field & INTR_INFO_VALID_MASK) {
2163 if (idtv_info_field & INTR_INFO_VALID_MASK) {
2164 /* TODO: fault when IDT_Vectoring */
2165 printk(KERN_ERR "Fault when IDT_Vectoring\n");
2166 }
2167 if (has_ext_irq)
2168 enable_irq_window(vcpu);
2169 return;
2170 }
2171 if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
2172 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
2173 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2174 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
2175
2176 if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK))
2177 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
2178 vmcs_read32(IDT_VECTORING_ERROR_CODE));
2179 if (unlikely(has_ext_irq))
2180 enable_irq_window(vcpu);
2181 return;
2182 }
2183 if (!has_ext_irq)
2184 return;
2185 interrupt_window_open =
2186 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
2187 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
2188 if (interrupt_window_open) {
2189 vector = kvm_cpu_get_interrupt(vcpu);
2190 vmx_inject_irq(vcpu, vector);
2191 kvm_timer_intr_post(vcpu, vector);
2192 } else
2193 enable_irq_window(vcpu);
2194}
2195
2196static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2197{
2198 struct vcpu_vmx *vmx = to_vmx(vcpu);
2012 2199
2013 /* 2200 /*
2014 * Loading guest fpu may have cleared host cr0.ts 2201 * Loading guest fpu may have cleared host cr0.ts
2015 */ 2202 */
2016 vmcs_writel(HOST_CR0, read_cr0()); 2203 vmcs_writel(HOST_CR0, read_cr0());
2017 2204
2018 local_irq_disable();
2019
2020 vcpu->guest_mode = 1;
2021 if (vcpu->requests)
2022 if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
2023 vmx_flush_tlb(vcpu);
2024
2025 asm ( 2205 asm (
2026 /* Store host registers */ 2206 /* Store host registers */
2027#ifdef CONFIG_X86_64 2207#ifdef CONFIG_X86_64
@@ -2115,8 +2295,8 @@ again:
2115 "pop %%ecx; popa \n\t" 2295 "pop %%ecx; popa \n\t"
2116#endif 2296#endif
2117 "setbe %0 \n\t" 2297 "setbe %0 \n\t"
2118 : "=q" (fail) 2298 : "=q" (vmx->fail)
2119 : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP), 2299 : "r"(vmx->launched), "d"((unsigned long)HOST_RSP),
2120 "c"(vcpu), 2300 "c"(vcpu),
2121 [rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])), 2301 [rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])),
2122 [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), 2302 [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])),
@@ -2138,59 +2318,10 @@ again:
2138 [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) 2318 [cr2]"i"(offsetof(struct kvm_vcpu, cr2))
2139 : "cc", "memory" ); 2319 : "cc", "memory" );
2140 2320
2141 vcpu->guest_mode = 0;
2142 local_irq_enable();
2143
2144 ++vcpu->stat.exits;
2145
2146 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; 2321 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
2147 2322
2148 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 2323 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
2149 2324 vmx->launched = 1;
2150 if (unlikely(fail)) {
2151 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2152 kvm_run->fail_entry.hardware_entry_failure_reason
2153 = vmcs_read32(VM_INSTRUCTION_ERROR);
2154 r = 0;
2155 goto out;
2156 }
2157 /*
2158 * Profile KVM exit RIPs:
2159 */
2160 if (unlikely(prof_on == KVM_PROFILING))
2161 profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
2162
2163 vcpu->launched = 1;
2164 r = kvm_handle_exit(kvm_run, vcpu);
2165 if (r > 0) {
2166 /* Give scheduler a change to reschedule. */
2167 if (signal_pending(current)) {
2168 r = -EINTR;
2169 kvm_run->exit_reason = KVM_EXIT_INTR;
2170 ++vcpu->stat.signal_exits;
2171 goto out;
2172 }
2173
2174 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
2175 r = -EINTR;
2176 kvm_run->exit_reason = KVM_EXIT_INTR;
2177 ++vcpu->stat.request_irq_exits;
2178 goto out;
2179 }
2180 if (!need_resched()) {
2181 ++vcpu->stat.light_exits;
2182 goto again;
2183 }
2184 }
2185
2186out:
2187 if (r > 0) {
2188 kvm_resched(vcpu);
2189 goto preempted;
2190 }
2191
2192 post_kvm_run_save(vcpu, kvm_run);
2193 return r;
2194} 2325}
2195 2326
2196static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, 2327static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
@@ -2225,67 +2356,118 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
2225 2356
2226static void vmx_free_vmcs(struct kvm_vcpu *vcpu) 2357static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
2227{ 2358{
2228 if (vcpu->vmcs) { 2359 struct vcpu_vmx *vmx = to_vmx(vcpu);
2229 on_each_cpu(__vcpu_clear, vcpu, 0, 1); 2360
2230 free_vmcs(vcpu->vmcs); 2361 if (vmx->vmcs) {
2231 vcpu->vmcs = NULL; 2362 on_each_cpu(__vcpu_clear, vmx, 0, 1);
2363 free_vmcs(vmx->vmcs);
2364 vmx->vmcs = NULL;
2232 } 2365 }
2233} 2366}
2234 2367
2235static void vmx_free_vcpu(struct kvm_vcpu *vcpu) 2368static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
2236{ 2369{
2370 struct vcpu_vmx *vmx = to_vmx(vcpu);
2371
2237 vmx_free_vmcs(vcpu); 2372 vmx_free_vmcs(vcpu);
2373 kfree(vmx->host_msrs);
2374 kfree(vmx->guest_msrs);
2375 kvm_vcpu_uninit(vcpu);
2376 kmem_cache_free(kvm_vcpu_cache, vmx);
2238} 2377}
2239 2378
2240static int vmx_create_vcpu(struct kvm_vcpu *vcpu) 2379static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
2241{ 2380{
2242 struct vmcs *vmcs; 2381 int err;
2382 struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2383 int cpu;
2243 2384
2244 vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); 2385 if (!vmx)
2245 if (!vcpu->guest_msrs) 2386 return ERR_PTR(-ENOMEM);
2246 return -ENOMEM;
2247 2387
2248 vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); 2388 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
2249 if (!vcpu->host_msrs) 2389 if (err)
2250 goto out_free_guest_msrs; 2390 goto free_vcpu;
2251 2391
2252 vmcs = alloc_vmcs(); 2392 if (irqchip_in_kernel(kvm)) {
2253 if (!vmcs) 2393 err = kvm_create_lapic(&vmx->vcpu);
2254 goto out_free_msrs; 2394 if (err < 0)
2395 goto free_vcpu;
2396 }
2255 2397
2256 vmcs_clear(vmcs); 2398 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
2257 vcpu->vmcs = vmcs; 2399 if (!vmx->guest_msrs) {
2258 vcpu->launched = 0; 2400 err = -ENOMEM;
2401 goto uninit_vcpu;
2402 }
2259 2403
2260 return 0; 2404 vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
2405 if (!vmx->host_msrs)
2406 goto free_guest_msrs;
2261 2407
2262out_free_msrs: 2408 vmx->vmcs = alloc_vmcs();
2263 kfree(vcpu->host_msrs); 2409 if (!vmx->vmcs)
2264 vcpu->host_msrs = NULL; 2410 goto free_msrs;
2265 2411
2266out_free_guest_msrs: 2412 vmcs_clear(vmx->vmcs);
2267 kfree(vcpu->guest_msrs);
2268 vcpu->guest_msrs = NULL;
2269 2413
2270 return -ENOMEM; 2414 cpu = get_cpu();
2415 vmx_vcpu_load(&vmx->vcpu, cpu);
2416 err = vmx_vcpu_setup(vmx);
2417 vmx_vcpu_put(&vmx->vcpu);
2418 put_cpu();
2419 if (err)
2420 goto free_vmcs;
2421
2422 return &vmx->vcpu;
2423
2424free_vmcs:
2425 free_vmcs(vmx->vmcs);
2426free_msrs:
2427 kfree(vmx->host_msrs);
2428free_guest_msrs:
2429 kfree(vmx->guest_msrs);
2430uninit_vcpu:
2431 kvm_vcpu_uninit(&vmx->vcpu);
2432free_vcpu:
2433 kmem_cache_free(kvm_vcpu_cache, vmx);
2434 return ERR_PTR(err);
2435}
2436
2437static void __init vmx_check_processor_compat(void *rtn)
2438{
2439 struct vmcs_config vmcs_conf;
2440
2441 *(int *)rtn = 0;
2442 if (setup_vmcs_config(&vmcs_conf) < 0)
2443 *(int *)rtn = -EIO;
2444 if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
2445 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
2446 smp_processor_id());
2447 *(int *)rtn = -EIO;
2448 }
2271} 2449}
2272 2450
2273static struct kvm_arch_ops vmx_arch_ops = { 2451static struct kvm_x86_ops vmx_x86_ops = {
2274 .cpu_has_kvm_support = cpu_has_kvm_support, 2452 .cpu_has_kvm_support = cpu_has_kvm_support,
2275 .disabled_by_bios = vmx_disabled_by_bios, 2453 .disabled_by_bios = vmx_disabled_by_bios,
2276 .hardware_setup = hardware_setup, 2454 .hardware_setup = hardware_setup,
2277 .hardware_unsetup = hardware_unsetup, 2455 .hardware_unsetup = hardware_unsetup,
2456 .check_processor_compatibility = vmx_check_processor_compat,
2278 .hardware_enable = hardware_enable, 2457 .hardware_enable = hardware_enable,
2279 .hardware_disable = hardware_disable, 2458 .hardware_disable = hardware_disable,
2280 2459
2281 .vcpu_create = vmx_create_vcpu, 2460 .vcpu_create = vmx_create_vcpu,
2282 .vcpu_free = vmx_free_vcpu, 2461 .vcpu_free = vmx_free_vcpu,
2462 .vcpu_reset = vmx_vcpu_reset,
2283 2463
2464 .prepare_guest_switch = vmx_save_host_state,
2284 .vcpu_load = vmx_vcpu_load, 2465 .vcpu_load = vmx_vcpu_load,
2285 .vcpu_put = vmx_vcpu_put, 2466 .vcpu_put = vmx_vcpu_put,
2286 .vcpu_decache = vmx_vcpu_decache, 2467 .vcpu_decache = vmx_vcpu_decache,
2287 2468
2288 .set_guest_debug = set_guest_debug, 2469 .set_guest_debug = set_guest_debug,
2470 .guest_debug_pre = kvm_guest_debug_pre,
2289 .get_msr = vmx_get_msr, 2471 .get_msr = vmx_get_msr,
2290 .set_msr = vmx_set_msr, 2472 .set_msr = vmx_set_msr,
2291 .get_segment_base = vmx_get_segment_base, 2473 .get_segment_base = vmx_get_segment_base,
@@ -2314,9 +2496,13 @@ static struct kvm_arch_ops vmx_arch_ops = {
2314 .inject_gp = vmx_inject_gp, 2496 .inject_gp = vmx_inject_gp,
2315 2497
2316 .run = vmx_vcpu_run, 2498 .run = vmx_vcpu_run,
2499 .handle_exit = kvm_handle_exit,
2317 .skip_emulated_instruction = skip_emulated_instruction, 2500 .skip_emulated_instruction = skip_emulated_instruction,
2318 .vcpu_setup = vmx_vcpu_setup,
2319 .patch_hypercall = vmx_patch_hypercall, 2501 .patch_hypercall = vmx_patch_hypercall,
2502 .get_irq = vmx_get_irq,
2503 .set_irq = vmx_inject_irq,
2504 .inject_pending_irq = vmx_intr_assist,
2505 .inject_pending_vectors = do_interrupt_requests,
2320}; 2506};
2321 2507
2322static int __init vmx_init(void) 2508static int __init vmx_init(void)
@@ -2347,7 +2533,7 @@ static int __init vmx_init(void)
2347 memset(iova, 0xff, PAGE_SIZE); 2533 memset(iova, 0xff, PAGE_SIZE);
2348 kunmap(vmx_io_bitmap_b); 2534 kunmap(vmx_io_bitmap_b);
2349 2535
2350 r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE); 2536 r = kvm_init_x86(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
2351 if (r) 2537 if (r)
2352 goto out1; 2538 goto out1;
2353 2539
@@ -2365,7 +2551,7 @@ static void __exit vmx_exit(void)
2365 __free_page(vmx_io_bitmap_b); 2551 __free_page(vmx_io_bitmap_b);
2366 __free_page(vmx_io_bitmap_a); 2552 __free_page(vmx_io_bitmap_a);
2367 2553
2368 kvm_exit_arch(); 2554 kvm_exit_x86();
2369} 2555}
2370 2556
2371module_init(vmx_init) 2557module_init(vmx_init)
diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h
index d0dc93df411b..fd4e14666088 100644
--- a/drivers/kvm/vmx.h
+++ b/drivers/kvm/vmx.h
@@ -25,29 +25,36 @@
25 * 25 *
26 */ 26 */
27 27
28#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 28#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
29#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 29#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
30#define CPU_BASED_HLT_EXITING 0x00000080 30#define CPU_BASED_HLT_EXITING 0x00000080
31#define CPU_BASED_INVDPG_EXITING 0x00000200 31#define CPU_BASED_INVLPG_EXITING 0x00000200
32#define CPU_BASED_MWAIT_EXITING 0x00000400 32#define CPU_BASED_MWAIT_EXITING 0x00000400
33#define CPU_BASED_RDPMC_EXITING 0x00000800 33#define CPU_BASED_RDPMC_EXITING 0x00000800
34#define CPU_BASED_RDTSC_EXITING 0x00001000 34#define CPU_BASED_RDTSC_EXITING 0x00001000
35#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 35#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
36#define CPU_BASED_CR8_STORE_EXITING 0x00100000 36#define CPU_BASED_CR8_STORE_EXITING 0x00100000
37#define CPU_BASED_TPR_SHADOW 0x00200000 37#define CPU_BASED_TPR_SHADOW 0x00200000
38#define CPU_BASED_MOV_DR_EXITING 0x00800000 38#define CPU_BASED_MOV_DR_EXITING 0x00800000
39#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 39#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
40#define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000 40#define CPU_BASED_USE_IO_BITMAPS 0x02000000
41#define CPU_BASED_MSR_BITMAPS 0x10000000 41#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
42#define CPU_BASED_MONITOR_EXITING 0x20000000 42#define CPU_BASED_MONITOR_EXITING 0x20000000
43#define CPU_BASED_PAUSE_EXITING 0x40000000 43#define CPU_BASED_PAUSE_EXITING 0x40000000
44#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
44 45
45#define PIN_BASED_EXT_INTR_MASK 0x1 46#define PIN_BASED_EXT_INTR_MASK 0x00000001
46#define PIN_BASED_NMI_EXITING 0x8 47#define PIN_BASED_NMI_EXITING 0x00000008
48#define PIN_BASED_VIRTUAL_NMIS 0x00000020
47 49
48#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 50#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
49#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200 51#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
50 52
53#define VM_ENTRY_IA32E_MODE 0x00000200
54#define VM_ENTRY_SMM 0x00000400
55#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
56
57#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
51 58
52/* VMCS Encodings */ 59/* VMCS Encodings */
53enum vmcs_field { 60enum vmcs_field {
@@ -206,6 +213,7 @@ enum vmcs_field {
206#define EXIT_REASON_MSR_READ 31 213#define EXIT_REASON_MSR_READ 31
207#define EXIT_REASON_MSR_WRITE 32 214#define EXIT_REASON_MSR_WRITE 32
208#define EXIT_REASON_MWAIT_INSTRUCTION 36 215#define EXIT_REASON_MWAIT_INSTRUCTION 36
216#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
209 217
210/* 218/*
211 * Interruption-information format 219 * Interruption-information format
@@ -261,9 +269,6 @@ enum vmcs_field {
261/* segment AR */ 269/* segment AR */
262#define SEGMENT_AR_L_MASK (1 << 13) 270#define SEGMENT_AR_L_MASK (1 << 13)
263 271
264/* entry controls */
265#define VM_ENTRY_CONTROLS_IA32E_MASK (1 << 9)
266
267#define AR_TYPE_ACCESSES_MASK 1 272#define AR_TYPE_ACCESSES_MASK 1
268#define AR_TYPE_READABLE_MASK (1 << 1) 273#define AR_TYPE_READABLE_MASK (1 << 1)
269#define AR_TYPE_WRITEABLE_MASK (1 << 2) 274#define AR_TYPE_WRITEABLE_MASK (1 << 2)
@@ -285,13 +290,21 @@ enum vmcs_field {
285 290
286#define AR_RESERVD_MASK 0xfffe0f00 291#define AR_RESERVD_MASK 0xfffe0f00
287 292
288#define CR4_VMXE 0x2000 293#define MSR_IA32_VMX_BASIC 0x480
294#define MSR_IA32_VMX_PINBASED_CTLS 0x481
295#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
296#define MSR_IA32_VMX_EXIT_CTLS 0x483
297#define MSR_IA32_VMX_ENTRY_CTLS 0x484
298#define MSR_IA32_VMX_MISC 0x485
299#define MSR_IA32_VMX_CR0_FIXED0 0x486
300#define MSR_IA32_VMX_CR0_FIXED1 0x487
301#define MSR_IA32_VMX_CR4_FIXED0 0x488
302#define MSR_IA32_VMX_CR4_FIXED1 0x489
303#define MSR_IA32_VMX_VMCS_ENUM 0x48a
304#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
289 305
290#define MSR_IA32_VMX_BASIC 0x480 306#define MSR_IA32_FEATURE_CONTROL 0x3a
291#define MSR_IA32_FEATURE_CONTROL 0x03a 307#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
292#define MSR_IA32_VMX_PINBASED_CTLS 0x481 308#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
293#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
294#define MSR_IA32_VMX_EXIT_CTLS 0x483
295#define MSR_IA32_VMX_ENTRY_CTLS 0x484
296 309
297#endif 310#endif
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 4b8a0cc9665e..9737c3b2f48c 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -6,7 +6,7 @@
6 * Copyright (c) 2005 Keir Fraser 6 * Copyright (c) 2005 Keir Fraser
7 * 7 *
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode 8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privieged instructions: 9 * privileged instructions:
10 * 10 *
11 * Copyright (C) 2006 Qumranet 11 * Copyright (C) 2006 Qumranet
12 * 12 *
@@ -83,7 +83,7 @@ static u8 opcode_table[256] = {
83 /* 0x20 - 0x27 */ 83 /* 0x20 - 0x27 */
84 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 84 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
85 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 85 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
86 0, 0, 0, 0, 86 SrcImmByte, SrcImm, 0, 0,
87 /* 0x28 - 0x2F */ 87 /* 0x28 - 0x2F */
88 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 88 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
89 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 89 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -99,15 +99,24 @@ static u8 opcode_table[256] = {
99 /* 0x40 - 0x4F */ 99 /* 0x40 - 0x4F */
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 /* 0x50 - 0x57 */ 101 /* 0x50 - 0x57 */
102 0, 0, 0, 0, 0, 0, 0, 0, 102 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
103 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
103 /* 0x58 - 0x5F */ 104 /* 0x58 - 0x5F */
104 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 105 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
105 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, 106 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
106 /* 0x60 - 0x6F */ 107 /* 0x60 - 0x67 */
107 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , 108 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109 0, 0, 0, 0,
109 /* 0x70 - 0x7F */ 110 /* 0x68 - 0x6F */
110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 111 0, 0, ImplicitOps|Mov, 0,
112 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
113 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
114 /* 0x70 - 0x77 */
115 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
116 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
117 /* 0x78 - 0x7F */
118 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
119 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
111 /* 0x80 - 0x87 */ 120 /* 0x80 - 0x87 */
112 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 121 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
113 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 122 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
@@ -116,9 +125,9 @@ static u8 opcode_table[256] = {
116 /* 0x88 - 0x8F */ 125 /* 0x88 - 0x8F */
117 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 126 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
118 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 127 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
119 0, 0, 0, DstMem | SrcNone | ModRM | Mov, 128 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov,
120 /* 0x90 - 0x9F */ 129 /* 0x90 - 0x9F */
121 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 0,
122 /* 0xA0 - 0xA7 */ 131 /* 0xA0 - 0xA7 */
123 ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov, 132 ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov,
124 ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov, 133 ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov,
@@ -142,8 +151,10 @@ static u8 opcode_table[256] = {
142 0, 0, 0, 0, 151 0, 0, 0, 0,
143 /* 0xD8 - 0xDF */ 152 /* 0xD8 - 0xDF */
144 0, 0, 0, 0, 0, 0, 0, 0, 153 0, 0, 0, 0, 0, 0, 0, 0,
145 /* 0xE0 - 0xEF */ 154 /* 0xE0 - 0xE7 */
146 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 155 0, 0, 0, 0, 0, 0, 0, 0,
156 /* 0xE8 - 0xEF */
157 ImplicitOps, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0,
147 /* 0xF0 - 0xF7 */ 158 /* 0xF0 - 0xF7 */
148 0, 0, 0, 0, 159 0, 0, 0, 0,
149 ImplicitOps, 0, 160 ImplicitOps, 0,
@@ -181,7 +192,10 @@ static u16 twobyte_table[256] = {
181 /* 0x70 - 0x7F */ 192 /* 0x70 - 0x7F */
182 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 193 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183 /* 0x80 - 0x8F */ 194 /* 0x80 - 0x8F */
184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 195 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
196 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
197 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
198 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
185 /* 0x90 - 0x9F */ 199 /* 0x90 - 0x9F */
186 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
187 /* 0xA0 - 0xA7 */ 201 /* 0xA0 - 0xA7 */
@@ -207,19 +221,6 @@ static u16 twobyte_table[256] = {
207 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 221 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
208}; 222};
209 223
210/*
211 * Tell the emulator that of the Group 7 instructions (sgdt, lidt, etc.) we
212 * are interested only in invlpg and not in any of the rest.
213 *
214 * invlpg is a special instruction in that the data it references may not
215 * be mapped.
216 */
217void kvm_emulator_want_group7_invlpg(void)
218{
219 twobyte_table[1] &= ~SrcMem;
220}
221EXPORT_SYMBOL_GPL(kvm_emulator_want_group7_invlpg);
222
223/* Type, address-of, and value of an instruction's operand. */ 224/* Type, address-of, and value of an instruction's operand. */
224struct operand { 225struct operand {
225 enum { OP_REG, OP_MEM, OP_IMM } type; 226 enum { OP_REG, OP_MEM, OP_IMM } type;
@@ -420,7 +421,7 @@ struct operand {
420#define insn_fetch(_type, _size, _eip) \ 421#define insn_fetch(_type, _size, _eip) \
421({ unsigned long _x; \ 422({ unsigned long _x; \
422 rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x, \ 423 rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x, \
423 (_size), ctxt); \ 424 (_size), ctxt->vcpu); \
424 if ( rc != 0 ) \ 425 if ( rc != 0 ) \
425 goto done; \ 426 goto done; \
426 (_eip) += (_size); \ 427 (_eip) += (_size); \
@@ -428,10 +429,11 @@ struct operand {
428}) 429})
429 430
430/* Access/update address held in a register, based on addressing mode. */ 431/* Access/update address held in a register, based on addressing mode. */
432#define address_mask(reg) \
433 ((ad_bytes == sizeof(unsigned long)) ? \
434 (reg) : ((reg) & ((1UL << (ad_bytes << 3)) - 1)))
431#define register_address(base, reg) \ 435#define register_address(base, reg) \
432 ((base) + ((ad_bytes == sizeof(unsigned long)) ? (reg) : \ 436 ((base) + address_mask(reg))
433 ((reg) & ((1UL << (ad_bytes << 3)) - 1))))
434
435#define register_address_increment(reg, inc) \ 437#define register_address_increment(reg, inc) \
436 do { \ 438 do { \
437 /* signed type ensures sign extension to long */ \ 439 /* signed type ensures sign extension to long */ \
@@ -443,8 +445,19 @@ struct operand {
443 (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \ 445 (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \
444 } while (0) 446 } while (0)
445 447
446void *decode_register(u8 modrm_reg, unsigned long *regs, 448#define JMP_REL(rel) \
447 int highbyte_regs) 449 do { \
450 _eip += (int)(rel); \
451 _eip = ((op_bytes == 2) ? (uint16_t)_eip : (uint32_t)_eip); \
452 } while (0)
453
454/*
455 * Given the 'reg' portion of a ModRM byte, and a register block, return a
456 * pointer into the block that addresses the relevant register.
457 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
458 */
459static void *decode_register(u8 modrm_reg, unsigned long *regs,
460 int highbyte_regs)
448{ 461{
449 void *p; 462 void *p;
450 463
@@ -464,13 +477,50 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
464 if (op_bytes == 2) 477 if (op_bytes == 2)
465 op_bytes = 3; 478 op_bytes = 3;
466 *address = 0; 479 *address = 0;
467 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, ctxt); 480 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
481 ctxt->vcpu);
468 if (rc) 482 if (rc)
469 return rc; 483 return rc;
470 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, ctxt); 484 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
485 ctxt->vcpu);
471 return rc; 486 return rc;
472} 487}
473 488
489static int test_cc(unsigned int condition, unsigned int flags)
490{
491 int rc = 0;
492
493 switch ((condition & 15) >> 1) {
494 case 0: /* o */
495 rc |= (flags & EFLG_OF);
496 break;
497 case 1: /* b/c/nae */
498 rc |= (flags & EFLG_CF);
499 break;
500 case 2: /* z/e */
501 rc |= (flags & EFLG_ZF);
502 break;
503 case 3: /* be/na */
504 rc |= (flags & (EFLG_CF|EFLG_ZF));
505 break;
506 case 4: /* s */
507 rc |= (flags & EFLG_SF);
508 break;
509 case 5: /* p/pe */
510 rc |= (flags & EFLG_PF);
511 break;
512 case 7: /* le/ng */
513 rc |= (flags & EFLG_ZF);
514 /* fall through */
515 case 6: /* l/nge */
516 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
517 break;
518 }
519
520 /* Odd condition identifiers (lsb == 1) have inverted sense. */
521 return (!!rc ^ (condition & 1));
522}
523
474int 524int
475x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) 525x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
476{ 526{
@@ -771,11 +821,15 @@ done_prefixes:
771 goto srcmem_common; 821 goto srcmem_common;
772 case SrcMem: 822 case SrcMem:
773 src.bytes = (d & ByteOp) ? 1 : op_bytes; 823 src.bytes = (d & ByteOp) ? 1 : op_bytes;
824 /* Don't fetch the address for invlpg: it could be unmapped. */
825 if (twobyte && b == 0x01 && modrm_reg == 7)
826 break;
774 srcmem_common: 827 srcmem_common:
775 src.type = OP_MEM; 828 src.type = OP_MEM;
776 src.ptr = (unsigned long *)cr2; 829 src.ptr = (unsigned long *)cr2;
830 src.val = 0;
777 if ((rc = ops->read_emulated((unsigned long)src.ptr, 831 if ((rc = ops->read_emulated((unsigned long)src.ptr,
778 &src.val, src.bytes, ctxt)) != 0) 832 &src.val, src.bytes, ctxt->vcpu)) != 0)
779 goto done; 833 goto done;
780 src.orig_val = src.val; 834 src.orig_val = src.val;
781 break; 835 break;
@@ -814,7 +868,7 @@ done_prefixes:
814 case DstReg: 868 case DstReg:
815 dst.type = OP_REG; 869 dst.type = OP_REG;
816 if ((d & ByteOp) 870 if ((d & ByteOp)
817 && !(twobyte_table && (b == 0xb6 || b == 0xb7))) { 871 && !(twobyte && (b == 0xb6 || b == 0xb7))) {
818 dst.ptr = decode_register(modrm_reg, _regs, 872 dst.ptr = decode_register(modrm_reg, _regs,
819 (rex_prefix == 0)); 873 (rex_prefix == 0));
820 dst.val = *(u8 *) dst.ptr; 874 dst.val = *(u8 *) dst.ptr;
@@ -838,6 +892,7 @@ done_prefixes:
838 dst.type = OP_MEM; 892 dst.type = OP_MEM;
839 dst.ptr = (unsigned long *)cr2; 893 dst.ptr = (unsigned long *)cr2;
840 dst.bytes = (d & ByteOp) ? 1 : op_bytes; 894 dst.bytes = (d & ByteOp) ? 1 : op_bytes;
895 dst.val = 0;
841 if (d & BitOp) { 896 if (d & BitOp) {
842 unsigned long mask = ~(dst.bytes * 8 - 1); 897 unsigned long mask = ~(dst.bytes * 8 - 1);
843 898
@@ -845,7 +900,7 @@ done_prefixes:
845 } 900 }
846 if (!(d & Mov) && /* optimisation - avoid slow emulated read */ 901 if (!(d & Mov) && /* optimisation - avoid slow emulated read */
847 ((rc = ops->read_emulated((unsigned long)dst.ptr, 902 ((rc = ops->read_emulated((unsigned long)dst.ptr,
848 &dst.val, dst.bytes, ctxt)) != 0)) 903 &dst.val, dst.bytes, ctxt->vcpu)) != 0))
849 goto done; 904 goto done;
850 break; 905 break;
851 } 906 }
@@ -871,10 +926,27 @@ done_prefixes:
871 sbb: /* sbb */ 926 sbb: /* sbb */
872 emulate_2op_SrcV("sbb", src, dst, _eflags); 927 emulate_2op_SrcV("sbb", src, dst, _eflags);
873 break; 928 break;
874 case 0x20 ... 0x25: 929 case 0x20 ... 0x23:
875 and: /* and */ 930 and: /* and */
876 emulate_2op_SrcV("and", src, dst, _eflags); 931 emulate_2op_SrcV("and", src, dst, _eflags);
877 break; 932 break;
933 case 0x24: /* and al imm8 */
934 dst.type = OP_REG;
935 dst.ptr = &_regs[VCPU_REGS_RAX];
936 dst.val = *(u8 *)dst.ptr;
937 dst.bytes = 1;
938 dst.orig_val = dst.val;
939 goto and;
940 case 0x25: /* and ax imm16, or eax imm32 */
941 dst.type = OP_REG;
942 dst.bytes = op_bytes;
943 dst.ptr = &_regs[VCPU_REGS_RAX];
944 if (op_bytes == 2)
945 dst.val = *(u16 *)dst.ptr;
946 else
947 dst.val = *(u32 *)dst.ptr;
948 dst.orig_val = dst.val;
949 goto and;
878 case 0x28 ... 0x2d: 950 case 0x28 ... 0x2d:
879 sub: /* sub */ 951 sub: /* sub */
880 emulate_2op_SrcV("sub", src, dst, _eflags); 952 emulate_2op_SrcV("sub", src, dst, _eflags);
@@ -892,6 +964,17 @@ done_prefixes:
892 goto cannot_emulate; 964 goto cannot_emulate;
893 dst.val = (s32) src.val; 965 dst.val = (s32) src.val;
894 break; 966 break;
967 case 0x6a: /* push imm8 */
968 src.val = 0L;
969 src.val = insn_fetch(s8, 1, _eip);
970push:
971 dst.type = OP_MEM;
972 dst.bytes = op_bytes;
973 dst.val = src.val;
974 register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes);
975 dst.ptr = (void *) register_address(ctxt->ss_base,
976 _regs[VCPU_REGS_RSP]);
977 break;
895 case 0x80 ... 0x83: /* Grp1 */ 978 case 0x80 ... 0x83: /* Grp1 */
896 switch (modrm_reg) { 979 switch (modrm_reg) {
897 case 0: 980 case 0:
@@ -939,18 +1022,10 @@ done_prefixes:
939 dst.val = src.val; 1022 dst.val = src.val;
940 lock_prefix = 1; 1023 lock_prefix = 1;
941 break; 1024 break;
942 case 0xa0 ... 0xa1: /* mov */
943 dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
944 dst.val = src.val;
945 _eip += ad_bytes; /* skip src displacement */
946 break;
947 case 0xa2 ... 0xa3: /* mov */
948 dst.val = (unsigned long)_regs[VCPU_REGS_RAX];
949 _eip += ad_bytes; /* skip dst displacement */
950 break;
951 case 0x88 ... 0x8b: /* mov */ 1025 case 0x88 ... 0x8b: /* mov */
952 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ 1026 goto mov;
953 dst.val = src.val; 1027 case 0x8d: /* lea r16/r32, m */
1028 dst.val = modrm_val;
954 break; 1029 break;
955 case 0x8f: /* pop (sole member of Grp1a) */ 1030 case 0x8f: /* pop (sole member of Grp1a) */
956 /* 64-bit mode: POP always pops a 64-bit operand. */ 1031 /* 64-bit mode: POP always pops a 64-bit operand. */
@@ -958,10 +1033,19 @@ done_prefixes:
958 dst.bytes = 8; 1033 dst.bytes = 8;
959 if ((rc = ops->read_std(register_address(ctxt->ss_base, 1034 if ((rc = ops->read_std(register_address(ctxt->ss_base,
960 _regs[VCPU_REGS_RSP]), 1035 _regs[VCPU_REGS_RSP]),
961 &dst.val, dst.bytes, ctxt)) != 0) 1036 &dst.val, dst.bytes, ctxt->vcpu)) != 0)
962 goto done; 1037 goto done;
963 register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes); 1038 register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes);
964 break; 1039 break;
1040 case 0xa0 ... 0xa1: /* mov */
1041 dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
1042 dst.val = src.val;
1043 _eip += ad_bytes; /* skip src displacement */
1044 break;
1045 case 0xa2 ... 0xa3: /* mov */
1046 dst.val = (unsigned long)_regs[VCPU_REGS_RAX];
1047 _eip += ad_bytes; /* skip dst displacement */
1048 break;
965 case 0xc0 ... 0xc1: 1049 case 0xc0 ... 0xc1:
966 grp2: /* Grp2 */ 1050 grp2: /* Grp2 */
967 switch (modrm_reg) { 1051 switch (modrm_reg) {
@@ -989,12 +1073,41 @@ done_prefixes:
989 break; 1073 break;
990 } 1074 }
991 break; 1075 break;
1076 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
1077 mov:
1078 dst.val = src.val;
1079 break;
992 case 0xd0 ... 0xd1: /* Grp2 */ 1080 case 0xd0 ... 0xd1: /* Grp2 */
993 src.val = 1; 1081 src.val = 1;
994 goto grp2; 1082 goto grp2;
995 case 0xd2 ... 0xd3: /* Grp2 */ 1083 case 0xd2 ... 0xd3: /* Grp2 */
996 src.val = _regs[VCPU_REGS_RCX]; 1084 src.val = _regs[VCPU_REGS_RCX];
997 goto grp2; 1085 goto grp2;
1086 case 0xe8: /* call (near) */ {
1087 long int rel;
1088 switch (op_bytes) {
1089 case 2:
1090 rel = insn_fetch(s16, 2, _eip);
1091 break;
1092 case 4:
1093 rel = insn_fetch(s32, 4, _eip);
1094 break;
1095 case 8:
1096 rel = insn_fetch(s64, 8, _eip);
1097 break;
1098 default:
1099 DPRINTF("Call: Invalid op_bytes\n");
1100 goto cannot_emulate;
1101 }
1102 src.val = (unsigned long) _eip;
1103 JMP_REL(rel);
1104 goto push;
1105 }
1106 case 0xe9: /* jmp rel */
1107 case 0xeb: /* jmp rel short */
1108 JMP_REL(src.val);
1109 no_wb = 1; /* Disable writeback. */
1110 break;
998 case 0xf6 ... 0xf7: /* Grp3 */ 1111 case 0xf6 ... 0xf7: /* Grp3 */
999 switch (modrm_reg) { 1112 switch (modrm_reg) {
1000 case 0 ... 1: /* test */ 1113 case 0 ... 1: /* test */
@@ -1037,13 +1150,19 @@ done_prefixes:
1037 case 1: /* dec */ 1150 case 1: /* dec */
1038 emulate_1op("dec", dst, _eflags); 1151 emulate_1op("dec", dst, _eflags);
1039 break; 1152 break;
1153 case 4: /* jmp abs */
1154 if (b == 0xff)
1155 _eip = dst.val;
1156 else
1157 goto cannot_emulate;
1158 break;
1040 case 6: /* push */ 1159 case 6: /* push */
1041 /* 64-bit mode: PUSH always pushes a 64-bit operand. */ 1160 /* 64-bit mode: PUSH always pushes a 64-bit operand. */
1042 if (mode == X86EMUL_MODE_PROT64) { 1161 if (mode == X86EMUL_MODE_PROT64) {
1043 dst.bytes = 8; 1162 dst.bytes = 8;
1044 if ((rc = ops->read_std((unsigned long)dst.ptr, 1163 if ((rc = ops->read_std((unsigned long)dst.ptr,
1045 &dst.val, 8, 1164 &dst.val, 8,
1046 ctxt)) != 0) 1165 ctxt->vcpu)) != 0)
1047 goto done; 1166 goto done;
1048 } 1167 }
1049 register_address_increment(_regs[VCPU_REGS_RSP], 1168 register_address_increment(_regs[VCPU_REGS_RSP],
@@ -1051,7 +1170,7 @@ done_prefixes:
1051 if ((rc = ops->write_std( 1170 if ((rc = ops->write_std(
1052 register_address(ctxt->ss_base, 1171 register_address(ctxt->ss_base,
1053 _regs[VCPU_REGS_RSP]), 1172 _regs[VCPU_REGS_RSP]),
1054 &dst.val, dst.bytes, ctxt)) != 0) 1173 &dst.val, dst.bytes, ctxt->vcpu)) != 0)
1055 goto done; 1174 goto done;
1056 no_wb = 1; 1175 no_wb = 1;
1057 break; 1176 break;
@@ -1086,11 +1205,11 @@ writeback:
1086 rc = ops->cmpxchg_emulated((unsigned long)dst. 1205 rc = ops->cmpxchg_emulated((unsigned long)dst.
1087 ptr, &dst.orig_val, 1206 ptr, &dst.orig_val,
1088 &dst.val, dst.bytes, 1207 &dst.val, dst.bytes,
1089 ctxt); 1208 ctxt->vcpu);
1090 else 1209 else
1091 rc = ops->write_emulated((unsigned long)dst.ptr, 1210 rc = ops->write_emulated((unsigned long)dst.ptr,
1092 &dst.val, dst.bytes, 1211 &dst.val, dst.bytes,
1093 ctxt); 1212 ctxt->vcpu);
1094 if (rc != 0) 1213 if (rc != 0)
1095 goto done; 1214 goto done;
1096 default: 1215 default:
@@ -1109,6 +1228,81 @@ done:
1109special_insn: 1228special_insn:
1110 if (twobyte) 1229 if (twobyte)
1111 goto twobyte_special_insn; 1230 goto twobyte_special_insn;
1231 switch(b) {
1232 case 0x50 ... 0x57: /* push reg */
1233 if (op_bytes == 2)
1234 src.val = (u16) _regs[b & 0x7];
1235 else
1236 src.val = (u32) _regs[b & 0x7];
1237 dst.type = OP_MEM;
1238 dst.bytes = op_bytes;
1239 dst.val = src.val;
1240 register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes);
1241 dst.ptr = (void *) register_address(
1242 ctxt->ss_base, _regs[VCPU_REGS_RSP]);
1243 break;
1244 case 0x58 ... 0x5f: /* pop reg */
1245 dst.ptr = (unsigned long *)&_regs[b & 0x7];
1246 pop_instruction:
1247 if ((rc = ops->read_std(register_address(ctxt->ss_base,
1248 _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt->vcpu))
1249 != 0)
1250 goto done;
1251
1252 register_address_increment(_regs[VCPU_REGS_RSP], op_bytes);
1253 no_wb = 1; /* Disable writeback. */
1254 break;
1255 case 0x6c: /* insb */
1256 case 0x6d: /* insw/insd */
1257 if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
1258 1, /* in */
1259 (d & ByteOp) ? 1 : op_bytes, /* size */
1260 rep_prefix ?
1261 address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */
1262 (_eflags & EFLG_DF), /* down */
1263 register_address(ctxt->es_base,
1264 _regs[VCPU_REGS_RDI]), /* address */
1265 rep_prefix,
1266 _regs[VCPU_REGS_RDX] /* port */
1267 ) == 0)
1268 return -1;
1269 return 0;
1270 case 0x6e: /* outsb */
1271 case 0x6f: /* outsw/outsd */
1272 if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
1273 0, /* in */
1274 (d & ByteOp) ? 1 : op_bytes, /* size */
1275 rep_prefix ?
1276 address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */
1277 (_eflags & EFLG_DF), /* down */
1278 register_address(override_base ?
1279 *override_base : ctxt->ds_base,
1280 _regs[VCPU_REGS_RSI]), /* address */
1281 rep_prefix,
1282 _regs[VCPU_REGS_RDX] /* port */
1283 ) == 0)
1284 return -1;
1285 return 0;
1286 case 0x70 ... 0x7f: /* jcc (short) */ {
1287 int rel = insn_fetch(s8, 1, _eip);
1288
1289 if (test_cc(b, _eflags))
1290 JMP_REL(rel);
1291 break;
1292 }
1293 case 0x9c: /* pushf */
1294 src.val = (unsigned long) _eflags;
1295 goto push;
1296 case 0x9d: /* popf */
1297 dst.ptr = (unsigned long *) &_eflags;
1298 goto pop_instruction;
1299 case 0xc3: /* ret */
1300 dst.ptr = &_eip;
1301 goto pop_instruction;
1302 case 0xf4: /* hlt */
1303 ctxt->vcpu->halt_request = 1;
1304 goto done;
1305 }
1112 if (rep_prefix) { 1306 if (rep_prefix) {
1113 if (_regs[VCPU_REGS_RCX] == 0) { 1307 if (_regs[VCPU_REGS_RCX] == 0) {
1114 ctxt->vcpu->rip = _eip; 1308 ctxt->vcpu->rip = _eip;
@@ -1125,7 +1319,7 @@ special_insn:
1125 _regs[VCPU_REGS_RDI]); 1319 _regs[VCPU_REGS_RDI]);
1126 if ((rc = ops->read_emulated(register_address( 1320 if ((rc = ops->read_emulated(register_address(
1127 override_base ? *override_base : ctxt->ds_base, 1321 override_base ? *override_base : ctxt->ds_base,
1128 _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt)) != 0) 1322 _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt->vcpu)) != 0)
1129 goto done; 1323 goto done;
1130 register_address_increment(_regs[VCPU_REGS_RSI], 1324 register_address_increment(_regs[VCPU_REGS_RSI],
1131 (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes); 1325 (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
@@ -1147,7 +1341,8 @@ special_insn:
1147 dst.type = OP_REG; 1341 dst.type = OP_REG;
1148 dst.bytes = (d & ByteOp) ? 1 : op_bytes; 1342 dst.bytes = (d & ByteOp) ? 1 : op_bytes;
1149 dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; 1343 dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
1150 if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0) 1344 if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes,
1345 ctxt->vcpu)) != 0)
1151 goto done; 1346 goto done;
1152 register_address_increment(_regs[VCPU_REGS_RSI], 1347 register_address_increment(_regs[VCPU_REGS_RSI],
1153 (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes); 1348 (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
@@ -1155,23 +1350,7 @@ special_insn:
1155 case 0xae ... 0xaf: /* scas */ 1350 case 0xae ... 0xaf: /* scas */
1156 DPRINTF("Urk! I don't handle SCAS.\n"); 1351 DPRINTF("Urk! I don't handle SCAS.\n");
1157 goto cannot_emulate; 1352 goto cannot_emulate;
1158 case 0xf4: /* hlt */
1159 ctxt->vcpu->halt_request = 1;
1160 goto done;
1161 case 0xc3: /* ret */
1162 dst.ptr = &_eip;
1163 goto pop_instruction;
1164 case 0x58 ... 0x5f: /* pop reg */
1165 dst.ptr = (unsigned long *)&_regs[b & 0x7];
1166 1353
1167pop_instruction:
1168 if ((rc = ops->read_std(register_address(ctxt->ss_base,
1169 _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt)) != 0)
1170 goto done;
1171
1172 register_address_increment(_regs[VCPU_REGS_RSP], op_bytes);
1173 no_wb = 1; /* Disable writeback. */
1174 break;
1175 } 1354 }
1176 goto writeback; 1355 goto writeback;
1177 1356
@@ -1230,40 +1409,50 @@ twobyte_insn:
1230 break; 1409 break;
1231 case 0x40 ... 0x4f: /* cmov */ 1410 case 0x40 ... 0x4f: /* cmov */
1232 dst.val = dst.orig_val = src.val; 1411 dst.val = dst.orig_val = src.val;
1233 d &= ~Mov; /* default to no move */ 1412 no_wb = 1;
1234 /* 1413 /*
1235 * First, assume we're decoding an even cmov opcode 1414 * First, assume we're decoding an even cmov opcode
1236 * (lsb == 0). 1415 * (lsb == 0).
1237 */ 1416 */
1238 switch ((b & 15) >> 1) { 1417 switch ((b & 15) >> 1) {
1239 case 0: /* cmovo */ 1418 case 0: /* cmovo */
1240 d |= (_eflags & EFLG_OF) ? Mov : 0; 1419 no_wb = (_eflags & EFLG_OF) ? 0 : 1;
1241 break; 1420 break;
1242 case 1: /* cmovb/cmovc/cmovnae */ 1421 case 1: /* cmovb/cmovc/cmovnae */
1243 d |= (_eflags & EFLG_CF) ? Mov : 0; 1422 no_wb = (_eflags & EFLG_CF) ? 0 : 1;
1244 break; 1423 break;
1245 case 2: /* cmovz/cmove */ 1424 case 2: /* cmovz/cmove */
1246 d |= (_eflags & EFLG_ZF) ? Mov : 0; 1425 no_wb = (_eflags & EFLG_ZF) ? 0 : 1;
1247 break; 1426 break;
1248 case 3: /* cmovbe/cmovna */ 1427 case 3: /* cmovbe/cmovna */
1249 d |= (_eflags & (EFLG_CF | EFLG_ZF)) ? Mov : 0; 1428 no_wb = (_eflags & (EFLG_CF | EFLG_ZF)) ? 0 : 1;
1250 break; 1429 break;
1251 case 4: /* cmovs */ 1430 case 4: /* cmovs */
1252 d |= (_eflags & EFLG_SF) ? Mov : 0; 1431 no_wb = (_eflags & EFLG_SF) ? 0 : 1;
1253 break; 1432 break;
1254 case 5: /* cmovp/cmovpe */ 1433 case 5: /* cmovp/cmovpe */
1255 d |= (_eflags & EFLG_PF) ? Mov : 0; 1434 no_wb = (_eflags & EFLG_PF) ? 0 : 1;
1256 break; 1435 break;
1257 case 7: /* cmovle/cmovng */ 1436 case 7: /* cmovle/cmovng */
1258 d |= (_eflags & EFLG_ZF) ? Mov : 0; 1437 no_wb = (_eflags & EFLG_ZF) ? 0 : 1;
1259 /* fall through */ 1438 /* fall through */
1260 case 6: /* cmovl/cmovnge */ 1439 case 6: /* cmovl/cmovnge */
1261 d |= (!(_eflags & EFLG_SF) != 1440 no_wb &= (!(_eflags & EFLG_SF) !=
1262 !(_eflags & EFLG_OF)) ? Mov : 0; 1441 !(_eflags & EFLG_OF)) ? 0 : 1;
1263 break; 1442 break;
1264 } 1443 }
1265 /* Odd cmov opcodes (lsb == 1) have inverted sense. */ 1444 /* Odd cmov opcodes (lsb == 1) have inverted sense. */
1266 d ^= (b & 1) ? Mov : 0; 1445 no_wb ^= b & 1;
1446 break;
1447 case 0xa3:
1448 bt: /* bt */
1449 src.val &= (dst.bytes << 3) - 1; /* only subword offset */
1450 emulate_2op_SrcV_nobyte("bt", src, dst, _eflags);
1451 break;
1452 case 0xab:
1453 bts: /* bts */
1454 src.val &= (dst.bytes << 3) - 1; /* only subword offset */
1455 emulate_2op_SrcV_nobyte("bts", src, dst, _eflags);
1267 break; 1456 break;
1268 case 0xb0 ... 0xb1: /* cmpxchg */ 1457 case 0xb0 ... 0xb1: /* cmpxchg */
1269 /* 1458 /*
@@ -1273,8 +1462,6 @@ twobyte_insn:
1273 src.orig_val = src.val; 1462 src.orig_val = src.val;
1274 src.val = _regs[VCPU_REGS_RAX]; 1463 src.val = _regs[VCPU_REGS_RAX];
1275 emulate_2op_SrcV("cmp", src, dst, _eflags); 1464 emulate_2op_SrcV("cmp", src, dst, _eflags);
1276 /* Always write back. The question is: where to? */
1277 d |= Mov;
1278 if (_eflags & EFLG_ZF) { 1465 if (_eflags & EFLG_ZF) {
1279 /* Success: write back to memory. */ 1466 /* Success: write back to memory. */
1280 dst.val = src.orig_val; 1467 dst.val = src.orig_val;
@@ -1284,30 +1471,15 @@ twobyte_insn:
1284 dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; 1471 dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX];
1285 } 1472 }
1286 break; 1473 break;
1287 case 0xa3:
1288 bt: /* bt */
1289 src.val &= (dst.bytes << 3) - 1; /* only subword offset */
1290 emulate_2op_SrcV_nobyte("bt", src, dst, _eflags);
1291 break;
1292 case 0xb3: 1474 case 0xb3:
1293 btr: /* btr */ 1475 btr: /* btr */
1294 src.val &= (dst.bytes << 3) - 1; /* only subword offset */ 1476 src.val &= (dst.bytes << 3) - 1; /* only subword offset */
1295 emulate_2op_SrcV_nobyte("btr", src, dst, _eflags); 1477 emulate_2op_SrcV_nobyte("btr", src, dst, _eflags);
1296 break; 1478 break;
1297 case 0xab:
1298 bts: /* bts */
1299 src.val &= (dst.bytes << 3) - 1; /* only subword offset */
1300 emulate_2op_SrcV_nobyte("bts", src, dst, _eflags);
1301 break;
1302 case 0xb6 ... 0xb7: /* movzx */ 1479 case 0xb6 ... 0xb7: /* movzx */
1303 dst.bytes = op_bytes; 1480 dst.bytes = op_bytes;
1304 dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val; 1481 dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val;
1305 break; 1482 break;
1306 case 0xbb:
1307 btc: /* btc */
1308 src.val &= (dst.bytes << 3) - 1; /* only subword offset */
1309 emulate_2op_SrcV_nobyte("btc", src, dst, _eflags);
1310 break;
1311 case 0xba: /* Grp8 */ 1483 case 0xba: /* Grp8 */
1312 switch (modrm_reg & 3) { 1484 switch (modrm_reg & 3) {
1313 case 0: 1485 case 0:
@@ -1320,6 +1492,11 @@ twobyte_insn:
1320 goto btc; 1492 goto btc;
1321 } 1493 }
1322 break; 1494 break;
1495 case 0xbb:
1496 btc: /* btc */
1497 src.val &= (dst.bytes << 3) - 1; /* only subword offset */
1498 emulate_2op_SrcV_nobyte("btc", src, dst, _eflags);
1499 break;
1323 case 0xbe ... 0xbf: /* movsx */ 1500 case 0xbe ... 0xbf: /* movsx */
1324 dst.bytes = op_bytes; 1501 dst.bytes = op_bytes;
1325 dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; 1502 dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val;
@@ -1331,14 +1508,14 @@ twobyte_special_insn:
1331 /* Disable writeback. */ 1508 /* Disable writeback. */
1332 no_wb = 1; 1509 no_wb = 1;
1333 switch (b) { 1510 switch (b) {
1511 case 0x06:
1512 emulate_clts(ctxt->vcpu);
1513 break;
1334 case 0x09: /* wbinvd */ 1514 case 0x09: /* wbinvd */
1335 break; 1515 break;
1336 case 0x0d: /* GrpP (prefetch) */ 1516 case 0x0d: /* GrpP (prefetch) */
1337 case 0x18: /* Grp16 (prefetch/nop) */ 1517 case 0x18: /* Grp16 (prefetch/nop) */
1338 break; 1518 break;
1339 case 0x06:
1340 emulate_clts(ctxt->vcpu);
1341 break;
1342 case 0x20: /* mov cr, reg */ 1519 case 0x20: /* mov cr, reg */
1343 if (modrm_mod != 3) 1520 if (modrm_mod != 3)
1344 goto cannot_emulate; 1521 goto cannot_emulate;
@@ -1355,7 +1532,7 @@ twobyte_special_insn:
1355 | ((u64)_regs[VCPU_REGS_RDX] << 32); 1532 | ((u64)_regs[VCPU_REGS_RDX] << 32);
1356 rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data); 1533 rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data);
1357 if (rc) { 1534 if (rc) {
1358 kvm_arch_ops->inject_gp(ctxt->vcpu, 0); 1535 kvm_x86_ops->inject_gp(ctxt->vcpu, 0);
1359 _eip = ctxt->vcpu->rip; 1536 _eip = ctxt->vcpu->rip;
1360 } 1537 }
1361 rc = X86EMUL_CONTINUE; 1538 rc = X86EMUL_CONTINUE;
@@ -1364,7 +1541,7 @@ twobyte_special_insn:
1364 /* rdmsr */ 1541 /* rdmsr */
1365 rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data); 1542 rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data);
1366 if (rc) { 1543 if (rc) {
1367 kvm_arch_ops->inject_gp(ctxt->vcpu, 0); 1544 kvm_x86_ops->inject_gp(ctxt->vcpu, 0);
1368 _eip = ctxt->vcpu->rip; 1545 _eip = ctxt->vcpu->rip;
1369 } else { 1546 } else {
1370 _regs[VCPU_REGS_RAX] = (u32)msr_data; 1547 _regs[VCPU_REGS_RAX] = (u32)msr_data;
@@ -1372,10 +1549,32 @@ twobyte_special_insn:
1372 } 1549 }
1373 rc = X86EMUL_CONTINUE; 1550 rc = X86EMUL_CONTINUE;
1374 break; 1551 break;
1552 case 0x80 ... 0x8f: /* jnz rel, etc*/ {
1553 long int rel;
1554
1555 switch (op_bytes) {
1556 case 2:
1557 rel = insn_fetch(s16, 2, _eip);
1558 break;
1559 case 4:
1560 rel = insn_fetch(s32, 4, _eip);
1561 break;
1562 case 8:
1563 rel = insn_fetch(s64, 8, _eip);
1564 break;
1565 default:
1566 DPRINTF("jnz: Invalid op_bytes\n");
1567 goto cannot_emulate;
1568 }
1569 if (test_cc(b, _eflags))
1570 JMP_REL(rel);
1571 break;
1572 }
1375 case 0xc7: /* Grp9 (cmpxchg8b) */ 1573 case 0xc7: /* Grp9 (cmpxchg8b) */
1376 { 1574 {
1377 u64 old, new; 1575 u64 old, new;
1378 if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0) 1576 if ((rc = ops->read_emulated(cr2, &old, 8, ctxt->vcpu))
1577 != 0)
1379 goto done; 1578 goto done;
1380 if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) || 1579 if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) ||
1381 ((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) { 1580 ((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) {
@@ -1386,7 +1585,7 @@ twobyte_special_insn:
1386 new = ((u64)_regs[VCPU_REGS_RCX] << 32) 1585 new = ((u64)_regs[VCPU_REGS_RCX] << 32)
1387 | (u32) _regs[VCPU_REGS_RBX]; 1586 | (u32) _regs[VCPU_REGS_RBX];
1388 if ((rc = ops->cmpxchg_emulated(cr2, &old, 1587 if ((rc = ops->cmpxchg_emulated(cr2, &old,
1389 &new, 8, ctxt)) != 0) 1588 &new, 8, ctxt->vcpu)) != 0)
1390 goto done; 1589 goto done;
1391 _eflags |= EFLG_ZF; 1590 _eflags |= EFLG_ZF;
1392 } 1591 }
diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h
index ea3407d7feee..92c73aa7f9ac 100644
--- a/drivers/kvm/x86_emulate.h
+++ b/drivers/kvm/x86_emulate.h
@@ -60,7 +60,7 @@ struct x86_emulate_ops {
60 * @bytes: [IN ] Number of bytes to read from memory. 60 * @bytes: [IN ] Number of bytes to read from memory.
61 */ 61 */
62 int (*read_std)(unsigned long addr, void *val, 62 int (*read_std)(unsigned long addr, void *val,
63 unsigned int bytes, struct x86_emulate_ctxt * ctxt); 63 unsigned int bytes, struct kvm_vcpu *vcpu);
64 64
65 /* 65 /*
66 * write_std: Write bytes of standard (non-emulated/special) memory. 66 * write_std: Write bytes of standard (non-emulated/special) memory.
@@ -71,7 +71,7 @@ struct x86_emulate_ops {
71 * @bytes: [IN ] Number of bytes to write to memory. 71 * @bytes: [IN ] Number of bytes to write to memory.
72 */ 72 */
73 int (*write_std)(unsigned long addr, const void *val, 73 int (*write_std)(unsigned long addr, const void *val,
74 unsigned int bytes, struct x86_emulate_ctxt * ctxt); 74 unsigned int bytes, struct kvm_vcpu *vcpu);
75 75
76 /* 76 /*
77 * read_emulated: Read bytes from emulated/special memory area. 77 * read_emulated: Read bytes from emulated/special memory area.
@@ -82,7 +82,7 @@ struct x86_emulate_ops {
82 int (*read_emulated) (unsigned long addr, 82 int (*read_emulated) (unsigned long addr,
83 void *val, 83 void *val,
84 unsigned int bytes, 84 unsigned int bytes,
85 struct x86_emulate_ctxt * ctxt); 85 struct kvm_vcpu *vcpu);
86 86
87 /* 87 /*
88 * write_emulated: Read bytes from emulated/special memory area. 88 * write_emulated: Read bytes from emulated/special memory area.
@@ -94,7 +94,7 @@ struct x86_emulate_ops {
94 int (*write_emulated) (unsigned long addr, 94 int (*write_emulated) (unsigned long addr,
95 const void *val, 95 const void *val,
96 unsigned int bytes, 96 unsigned int bytes,
97 struct x86_emulate_ctxt * ctxt); 97 struct kvm_vcpu *vcpu);
98 98
99 /* 99 /*
100 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an 100 * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
@@ -108,12 +108,10 @@ struct x86_emulate_ops {
108 const void *old, 108 const void *old,
109 const void *new, 109 const void *new,
110 unsigned int bytes, 110 unsigned int bytes,
111 struct x86_emulate_ctxt * ctxt); 111 struct kvm_vcpu *vcpu);
112 112
113}; 113};
114 114
115struct cpu_user_regs;
116
117struct x86_emulate_ctxt { 115struct x86_emulate_ctxt {
118 /* Register state before/after emulation. */ 116 /* Register state before/after emulation. */
119 struct kvm_vcpu *vcpu; 117 struct kvm_vcpu *vcpu;
@@ -154,12 +152,4 @@ struct x86_emulate_ctxt {
154int x86_emulate_memop(struct x86_emulate_ctxt *ctxt, 152int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
155 struct x86_emulate_ops *ops); 153 struct x86_emulate_ops *ops);
156 154
157/*
158 * Given the 'reg' portion of a ModRM byte, and a register block, return a
159 * pointer into the block that addresses the relevant register.
160 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
161 */
162void *decode_register(u8 modrm_reg, unsigned long *regs,
163 int highbyte_regs);
164
165#endif /* __X86_EMULATE_H__ */ 155#endif /* __X86_EMULATE_H__ */
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index a2191a4fcf77..342517261ece 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -54,8 +54,6 @@ static void emc_endio(struct bio *bio, int error)
54 54
55 /* request is freed in block layer */ 55 /* request is freed in block layer */
56 free_bio(bio); 56 free_bio(bio);
57
58 return 0;
59} 57}
60 58
61static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size) 59static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size)
diff --git a/drivers/media/video/videobuf-core.c b/drivers/media/video/videobuf-core.c
index c606332512b6..5599a36490fc 100644
--- a/drivers/media/video/videobuf-core.c
+++ b/drivers/media/video/videobuf-core.c
@@ -674,7 +674,7 @@ ssize_t videobuf_read_one(struct videobuf_queue *q,
674 } 674 }
675 675
676 /* Copy to userspace */ 676 /* Copy to userspace */
677 retval=CALL(q,copy_to_user,q,data,count,nonblocking); 677 retval=CALL(q,video_copy_to_user,q,data,count,nonblocking);
678 if (retval<0) 678 if (retval<0)
679 goto done; 679 goto done;
680 680
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index 8bb7fdd306d6..3eb6123227b2 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -670,7 +670,7 @@ static struct videobuf_qtype_ops pci_ops = {
670 .sync = __videobuf_sync, 670 .sync = __videobuf_sync,
671 .mmap_free = __videobuf_mmap_free, 671 .mmap_free = __videobuf_mmap_free,
672 .mmap_mapper = __videobuf_mmap_mapper, 672 .mmap_mapper = __videobuf_mmap_mapper,
673 .copy_to_user = __videobuf_copy_to_user, 673 .video_copy_to_user = __videobuf_copy_to_user,
674 .copy_stream = __videobuf_copy_stream, 674 .copy_stream = __videobuf_copy_stream,
675}; 675};
676 676
diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c
index 2e3689a12a28..cd74341c984f 100644
--- a/drivers/media/video/videobuf-vmalloc.c
+++ b/drivers/media/video/videobuf-vmalloc.c
@@ -320,7 +320,7 @@ static struct videobuf_qtype_ops qops = {
320 .sync = __videobuf_sync, 320 .sync = __videobuf_sync,
321 .mmap_free = __videobuf_mmap_free, 321 .mmap_free = __videobuf_mmap_free,
322 .mmap_mapper = __videobuf_mmap_mapper, 322 .mmap_mapper = __videobuf_mmap_mapper,
323 .copy_to_user = __videobuf_copy_to_user, 323 .video_copy_to_user = __videobuf_copy_to_user,
324 .copy_stream = __videobuf_copy_stream, 324 .copy_stream = __videobuf_copy_stream,
325}; 325};
326 326
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index fbec8cd55e38..8848e8ac705d 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -278,6 +278,14 @@ config SSFDC
278 This enables read only access to SmartMedia formatted NAND 278 This enables read only access to SmartMedia formatted NAND
279 flash. You can mount it with FAT file system. 279 flash. You can mount it with FAT file system.
280 280
281config MTD_OOPS
282 tristate "Log panic/oops to an MTD buffer"
283 depends on MTD
284 help
285 This enables panic and oops messages to be logged to a circular
286 buffer in a flash partition where it can be read back at some
287 later point.
288
281source "drivers/mtd/chips/Kconfig" 289source "drivers/mtd/chips/Kconfig"
282 290
283source "drivers/mtd/maps/Kconfig" 291source "drivers/mtd/maps/Kconfig"
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 6d958a4566ff..7f0b04b4caa7 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_NFTL) += nftl.o
22obj-$(CONFIG_INFTL) += inftl.o 22obj-$(CONFIG_INFTL) += inftl.o
23obj-$(CONFIG_RFD_FTL) += rfd_ftl.o 23obj-$(CONFIG_RFD_FTL) += rfd_ftl.o
24obj-$(CONFIG_SSFDC) += ssfdc.o 24obj-$(CONFIG_SSFDC) += ssfdc.o
25obj-$(CONFIG_MTD_OOPS) += mtdoops.o
25 26
26nftl-objs := nftlcore.o nftlmount.o 27nftl-objs := nftlcore.o nftlmount.o
27inftl-objs := inftlcore.o inftlmount.o 28inftl-objs := inftlcore.o inftlmount.o
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 2f19fa78d24a..3aa3dca56ae6 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -526,7 +526,7 @@ static int cfi_intelext_partition_fixup(struct mtd_info *mtd,
526 struct cfi_pri_intelext *extp = cfi->cmdset_priv; 526 struct cfi_pri_intelext *extp = cfi->cmdset_priv;
527 527
528 /* 528 /*
529 * Probing of multi-partition flash ships. 529 * Probing of multi-partition flash chips.
530 * 530 *
531 * To support multiple partitions when available, we simply arrange 531 * To support multiple partitions when available, we simply arrange
532 * for each of them to have their own flchip structure even if they 532 * for each of them to have their own flchip structure even if they
@@ -653,7 +653,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
653 resettime: 653 resettime:
654 timeo = jiffies + HZ; 654 timeo = jiffies + HZ;
655 retry: 655 retry:
656 if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE)) { 656 if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE || mode == FL_SHUTDOWN)) {
657 /* 657 /*
658 * OK. We have possibility for contension on the write/erase 658 * OK. We have possibility for contension on the write/erase
659 * operations which are global to the real chip and not per 659 * operations which are global to the real chip and not per
@@ -798,6 +798,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
798 if (mode == FL_READY && chip->oldstate == FL_READY) 798 if (mode == FL_READY && chip->oldstate == FL_READY)
799 return 0; 799 return 0;
800 800
801 case FL_SHUTDOWN:
802 /* The machine is rebooting now,so no one can get chip anymore */
803 return -EIO;
801 default: 804 default:
802 sleep: 805 sleep:
803 set_current_state(TASK_UNINTERRUPTIBLE); 806 set_current_state(TASK_UNINTERRUPTIBLE);
@@ -1166,28 +1169,34 @@ static int cfi_intelext_point (struct mtd_info *mtd, loff_t from, size_t len, si
1166{ 1169{
1167 struct map_info *map = mtd->priv; 1170 struct map_info *map = mtd->priv;
1168 struct cfi_private *cfi = map->fldrv_priv; 1171 struct cfi_private *cfi = map->fldrv_priv;
1169 unsigned long ofs; 1172 unsigned long ofs, last_end = 0;
1170 int chipnum; 1173 int chipnum;
1171 int ret = 0; 1174 int ret = 0;
1172 1175
1173 if (!map->virt || (from + len > mtd->size)) 1176 if (!map->virt || (from + len > mtd->size))
1174 return -EINVAL; 1177 return -EINVAL;
1175 1178
1176 *mtdbuf = (void *)map->virt + from;
1177 *retlen = 0;
1178
1179 /* Now lock the chip(s) to POINT state */ 1179 /* Now lock the chip(s) to POINT state */
1180 1180
1181 /* ofs: offset within the first chip that the first read should start */ 1181 /* ofs: offset within the first chip that the first read should start */
1182 chipnum = (from >> cfi->chipshift); 1182 chipnum = (from >> cfi->chipshift);
1183 ofs = from - (chipnum << cfi->chipshift); 1183 ofs = from - (chipnum << cfi->chipshift);
1184 1184
1185 *mtdbuf = (void *)map->virt + cfi->chips[chipnum].start + ofs;
1186 *retlen = 0;
1187
1185 while (len) { 1188 while (len) {
1186 unsigned long thislen; 1189 unsigned long thislen;
1187 1190
1188 if (chipnum >= cfi->numchips) 1191 if (chipnum >= cfi->numchips)
1189 break; 1192 break;
1190 1193
1194 /* We cannot point across chips that are virtually disjoint */
1195 if (!last_end)
1196 last_end = cfi->chips[chipnum].start;
1197 else if (cfi->chips[chipnum].start != last_end)
1198 break;
1199
1191 if ((len + ofs -1) >> cfi->chipshift) 1200 if ((len + ofs -1) >> cfi->chipshift)
1192 thislen = (1<<cfi->chipshift) - ofs; 1201 thislen = (1<<cfi->chipshift) - ofs;
1193 else 1202 else
@@ -1201,6 +1210,7 @@ static int cfi_intelext_point (struct mtd_info *mtd, loff_t from, size_t len, si
1201 len -= thislen; 1210 len -= thislen;
1202 1211
1203 ofs = 0; 1212 ofs = 0;
1213 last_end += 1 << cfi->chipshift;
1204 chipnum++; 1214 chipnum++;
1205 } 1215 }
1206 return 0; 1216 return 0;
@@ -1780,7 +1790,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
1780 return ret; 1790 return ret;
1781} 1791}
1782 1792
1783int cfi_intelext_erase_varsize(struct mtd_info *mtd, struct erase_info *instr) 1793static int cfi_intelext_erase_varsize(struct mtd_info *mtd, struct erase_info *instr)
1784{ 1794{
1785 unsigned long ofs, len; 1795 unsigned long ofs, len;
1786 int ret; 1796 int ret;
@@ -1930,7 +1940,7 @@ static int cfi_intelext_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
1930 printk(KERN_DEBUG "%s: lock status before, ofs=0x%08llx, len=0x%08X\n", 1940 printk(KERN_DEBUG "%s: lock status before, ofs=0x%08llx, len=0x%08X\n",
1931 __FUNCTION__, ofs, len); 1941 __FUNCTION__, ofs, len);
1932 cfi_varsize_frob(mtd, do_printlockstatus_oneblock, 1942 cfi_varsize_frob(mtd, do_printlockstatus_oneblock,
1933 ofs, len, 0); 1943 ofs, len, NULL);
1934#endif 1944#endif
1935 1945
1936 ret = cfi_varsize_frob(mtd, do_xxlock_oneblock, 1946 ret = cfi_varsize_frob(mtd, do_xxlock_oneblock,
@@ -1940,7 +1950,7 @@ static int cfi_intelext_lock(struct mtd_info *mtd, loff_t ofs, size_t len)
1940 printk(KERN_DEBUG "%s: lock status after, ret=%d\n", 1950 printk(KERN_DEBUG "%s: lock status after, ret=%d\n",
1941 __FUNCTION__, ret); 1951 __FUNCTION__, ret);
1942 cfi_varsize_frob(mtd, do_printlockstatus_oneblock, 1952 cfi_varsize_frob(mtd, do_printlockstatus_oneblock,
1943 ofs, len, 0); 1953 ofs, len, NULL);
1944#endif 1954#endif
1945 1955
1946 return ret; 1956 return ret;
@@ -1954,7 +1964,7 @@ static int cfi_intelext_unlock(struct mtd_info *mtd, loff_t ofs, size_t len)
1954 printk(KERN_DEBUG "%s: lock status before, ofs=0x%08llx, len=0x%08X\n", 1964 printk(KERN_DEBUG "%s: lock status before, ofs=0x%08llx, len=0x%08X\n",
1955 __FUNCTION__, ofs, len); 1965 __FUNCTION__, ofs, len);
1956 cfi_varsize_frob(mtd, do_printlockstatus_oneblock, 1966 cfi_varsize_frob(mtd, do_printlockstatus_oneblock,
1957 ofs, len, 0); 1967 ofs, len, NULL);
1958#endif 1968#endif
1959 1969
1960 ret = cfi_varsize_frob(mtd, do_xxlock_oneblock, 1970 ret = cfi_varsize_frob(mtd, do_xxlock_oneblock,
@@ -1964,7 +1974,7 @@ static int cfi_intelext_unlock(struct mtd_info *mtd, loff_t ofs, size_t len)
1964 printk(KERN_DEBUG "%s: lock status after, ret=%d\n", 1974 printk(KERN_DEBUG "%s: lock status after, ret=%d\n",
1965 __FUNCTION__, ret); 1975 __FUNCTION__, ret);
1966 cfi_varsize_frob(mtd, do_printlockstatus_oneblock, 1976 cfi_varsize_frob(mtd, do_printlockstatus_oneblock,
1967 ofs, len, 0); 1977 ofs, len, NULL);
1968#endif 1978#endif
1969 1979
1970 return ret; 1980 return ret;
@@ -2255,7 +2265,7 @@ static void cfi_intelext_save_locks(struct mtd_info *mtd)
2255 adr = region->offset + block * len; 2265 adr = region->offset + block * len;
2256 2266
2257 status = cfi_varsize_frob(mtd, 2267 status = cfi_varsize_frob(mtd,
2258 do_getlockstatus_oneblock, adr, len, 0); 2268 do_getlockstatus_oneblock, adr, len, NULL);
2259 if (status) 2269 if (status)
2260 set_bit(block, region->lockmap); 2270 set_bit(block, region->lockmap);
2261 else 2271 else
@@ -2402,10 +2412,10 @@ static int cfi_intelext_reset(struct mtd_info *mtd)
2402 and switch to array mode so any bootloader in 2412 and switch to array mode so any bootloader in
2403 flash is accessible for soft reboot. */ 2413 flash is accessible for soft reboot. */
2404 spin_lock(chip->mutex); 2414 spin_lock(chip->mutex);
2405 ret = get_chip(map, chip, chip->start, FL_SYNCING); 2415 ret = get_chip(map, chip, chip->start, FL_SHUTDOWN);
2406 if (!ret) { 2416 if (!ret) {
2407 map_write(map, CMD(0xff), chip->start); 2417 map_write(map, CMD(0xff), chip->start);
2408 chip->state = FL_READY; 2418 chip->state = FL_SHUTDOWN;
2409 } 2419 }
2410 spin_unlock(chip->mutex); 2420 spin_unlock(chip->mutex);
2411 } 2421 }
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 1f6445840461..389acc600f5e 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1609,7 +1609,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
1609} 1609}
1610 1610
1611 1611
1612int cfi_amdstd_erase_varsize(struct mtd_info *mtd, struct erase_info *instr) 1612static int cfi_amdstd_erase_varsize(struct mtd_info *mtd, struct erase_info *instr)
1613{ 1613{
1614 unsigned long ofs, len; 1614 unsigned long ofs, len;
1615 int ret; 1615 int ret;
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index 58e561e87699..a67b23b87fc0 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -17,7 +17,6 @@
17#include <linux/errno.h> 17#include <linux/errno.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/init.h>
21 20
22#include <linux/mtd/mtd.h> 21#include <linux/mtd/mtd.h>
23#include <linux/mtd/map.h> 22#include <linux/mtd/map.h>
@@ -70,6 +69,7 @@
70 69
71/* Fujitsu */ 70/* Fujitsu */
72#define MBM29F040C 0x00A4 71#define MBM29F040C 0x00A4
72#define MBM29F800BA 0x2258
73#define MBM29LV650UE 0x22D7 73#define MBM29LV650UE 0x22D7
74#define MBM29LV320TE 0x22F6 74#define MBM29LV320TE 0x22F6
75#define MBM29LV320BE 0x22F9 75#define MBM29LV320BE 0x22F9
@@ -129,6 +129,7 @@
129#define LH28F640BF 0x00b0 129#define LH28F640BF 0x00b0
130 130
131/* ST - www.st.com */ 131/* ST - www.st.com */
132#define M29F800AB 0x0058
132#define M29W800DT 0x00D7 133#define M29W800DT 0x00D7
133#define M29W800DB 0x005B 134#define M29W800DB 0x005B
134#define M29W160DT 0x22C4 135#define M29W160DT 0x22C4
@@ -646,6 +647,23 @@ static const struct amd_flash_info jedec_table[] = {
646 } 647 }
647 }, { 648 }, {
648 .mfr_id = MANUFACTURER_FUJITSU, 649 .mfr_id = MANUFACTURER_FUJITSU,
650 .dev_id = MBM29F800BA,
651 .name = "Fujitsu MBM29F800BA",
652 .uaddr = {
653 [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */
654 [1] = MTD_UADDR_0x0555_0x02AA, /* x16 */
655 },
656 .DevSize = SIZE_1MiB,
657 .CmdSet = P_ID_AMD_STD,
658 .NumEraseRegions= 4,
659 .regions = {
660 ERASEINFO(0x04000,1),
661 ERASEINFO(0x02000,2),
662 ERASEINFO(0x08000,1),
663 ERASEINFO(0x10000,15),
664 }
665 }, {
666 .mfr_id = MANUFACTURER_FUJITSU,
649 .dev_id = MBM29LV650UE, 667 .dev_id = MBM29LV650UE,
650 .name = "Fujitsu MBM29LV650UE", 668 .name = "Fujitsu MBM29LV650UE",
651 .uaddr = { 669 .uaddr = {
@@ -1510,6 +1528,23 @@ static const struct amd_flash_info jedec_table[] = {
1510 ERASEINFO(0x1000,256) 1528 ERASEINFO(0x1000,256)
1511 } 1529 }
1512 1530
1531 }, {
1532 .mfr_id = MANUFACTURER_ST,
1533 .dev_id = M29F800AB,
1534 .name = "ST M29F800AB",
1535 .uaddr = {
1536 [0] = MTD_UADDR_0x0AAA_0x0555, /* x8 */
1537 [1] = MTD_UADDR_0x0555_0x02AA, /* x16 */
1538 },
1539 .DevSize = SIZE_1MiB,
1540 .CmdSet = P_ID_AMD_STD,
1541 .NumEraseRegions= 4,
1542 .regions = {
1543 ERASEINFO(0x04000,1),
1544 ERASEINFO(0x02000,2),
1545 ERASEINFO(0x08000,1),
1546 ERASEINFO(0x10000,15),
1547 }
1513 }, { 1548 }, {
1514 .mfr_id = MANUFACTURER_ST, /* FIXME - CFI device? */ 1549 .mfr_id = MANUFACTURER_ST, /* FIXME - CFI device? */
1515 .dev_id = M29W800DT, 1550 .dev_id = M29W800DT,
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index ff642f8fbee7..811d56fd890f 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -60,21 +60,22 @@ config MTD_DATAFLASH
60 Sometimes DataFlash chips are packaged inside MMC-format 60 Sometimes DataFlash chips are packaged inside MMC-format
61 cards; at this writing, the MMC stack won't handle those. 61 cards; at this writing, the MMC stack won't handle those.
62 62
63config MTD_DATAFLASH26
64 tristate "AT91RM9200 DataFlash AT26xxx"
65 depends on MTD && ARCH_AT91RM9200 && AT91_SPI
66 help
67 This enables access to the DataFlash chip (AT26xxx) on an
68 AT91RM9200-based board.
69 If you have such a board and such a DataFlash, say 'Y'.
70
71config MTD_M25P80 63config MTD_M25P80
72 tristate "Support for M25 SPI Flash" 64 tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
73 depends on SPI_MASTER && EXPERIMENTAL 65 depends on SPI_MASTER && EXPERIMENTAL
74 help 66 help
75 This enables access to ST M25P80 and similar SPI flash chips, 67 This enables access to most modern SPI flash chips, used for
76 used for program and data storage. Set up your spi devices 68 program and data storage. Series supported include Atmel AT26DF,
77 with the right board-specific platform data. 69 Spansion S25SL, SST 25VF, ST M25P, and Winbond W25X. Other chips
70 are supported as well. See the driver source for the current list,
71 or to add other chips.
72
73 Note that the original DataFlash chips (AT45 series, not AT26DF),
74 need an entirely different driver.
75
76 Set up your spi devices with the right board-specific platform data,
77 if you want to specify device partitioning or to use a device which
78 doesn't support the JEDEC ID instruction.
78 79
79config MTD_SLRAM 80config MTD_SLRAM
80 tristate "Uncached system RAM" 81 tristate "Uncached system RAM"
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 8ab568b3f533..0f788d5c4bf8 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -16,5 +16,4 @@ obj-$(CONFIG_MTD_MTDRAM) += mtdram.o
16obj-$(CONFIG_MTD_LART) += lart.o 16obj-$(CONFIG_MTD_LART) += lart.o
17obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o 17obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o
18obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o 18obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o
19obj-$(CONFIG_MTD_DATAFLASH26) += at91_dataflash26.o
20obj-$(CONFIG_MTD_M25P80) += m25p80.o 19obj-$(CONFIG_MTD_M25P80) += m25p80.o
diff --git a/drivers/mtd/devices/at91_dataflash26.c b/drivers/mtd/devices/at91_dataflash26.c
deleted file mode 100644
index 64ce37f986fc..000000000000
--- a/drivers/mtd/devices/at91_dataflash26.c
+++ /dev/null
@@ -1,485 +0,0 @@
1/*
2 * Atmel DataFlash driver for Atmel AT91RM9200 (Thunder)
3 * This is a largely modified version of at91_dataflash.c that
4 * supports AT26xxx dataflash chips. The original driver supports
5 * AT45xxx chips.
6 *
7 * Note: This driver was only tested with an AT26F004. It should be
8 * easy to make it work with other AT26xxx dataflash devices, though.
9 *
10 * Copyright (C) 2007 Hans J. Koch <hjk@linutronix.de>
11 * original Copyright (C) SAN People (Pty) Ltd
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * version 2 as published by the Free Software Foundation.
16*/
17
18#include <linux/config.h>
19#include <linux/module.h>
20#include <linux/init.h>
21#include <linux/mtd/mtd.h>
22
23#include <asm/arch/at91_spi.h>
24
25#define DATAFLASH_MAX_DEVICES 4 /* max number of dataflash devices */
26
27#define MANUFACTURER_ID_ATMEL 0x1F
28
29/* command codes */
30
31#define AT26_OP_READ_STATUS 0x05
32#define AT26_OP_READ_DEV_ID 0x9F
33#define AT26_OP_ERASE_PAGE_4K 0x20
34#define AT26_OP_READ_ARRAY_FAST 0x0B
35#define AT26_OP_SEQUENTIAL_WRITE 0xAF
36#define AT26_OP_WRITE_ENABLE 0x06
37#define AT26_OP_WRITE_DISABLE 0x04
38#define AT26_OP_SECTOR_PROTECT 0x36
39#define AT26_OP_SECTOR_UNPROTECT 0x39
40
41/* status register bits */
42
43#define AT26_STATUS_BUSY 0x01
44#define AT26_STATUS_WRITE_ENABLE 0x02
45
46struct dataflash_local
47{
48 int spi; /* SPI chip-select number */
49 unsigned int page_size; /* number of bytes per page */
50};
51
52
53/* Detected DataFlash devices */
54static struct mtd_info* mtd_devices[DATAFLASH_MAX_DEVICES];
55static int nr_devices = 0;
56
57/* Allocate a single SPI transfer descriptor. We're assuming that if multiple
58 SPI transfers occur at the same time, spi_access_bus() will serialize them.
59 If this is not valid, then either (i) each dataflash 'priv' structure
60 needs it's own transfer descriptor, (ii) we lock this one, or (iii) use
61 another mechanism. */
62static struct spi_transfer_list* spi_transfer_desc;
63
64/*
65 * Perform a SPI transfer to access the DataFlash device.
66 */
67static int do_spi_transfer(int nr, char* tx, int tx_len, char* rx, int rx_len,
68 char* txnext, int txnext_len, char* rxnext, int rxnext_len)
69{
70 struct spi_transfer_list* list = spi_transfer_desc;
71
72 list->tx[0] = tx; list->txlen[0] = tx_len;
73 list->rx[0] = rx; list->rxlen[0] = rx_len;
74
75 list->tx[1] = txnext; list->txlen[1] = txnext_len;
76 list->rx[1] = rxnext; list->rxlen[1] = rxnext_len;
77
78 list->nr_transfers = nr;
79 /* Note: spi_transfer() always returns 0, there are no error checks */
80 return spi_transfer(list);
81}
82
83/*
84 * Return the status of the DataFlash device.
85 */
86static unsigned char at91_dataflash26_status(void)
87{
88 unsigned char command[2];
89
90 command[0] = AT26_OP_READ_STATUS;
91 command[1] = 0;
92
93 do_spi_transfer(1, command, 2, command, 2, NULL, 0, NULL, 0);
94
95 return command[1];
96}
97
98/*
99 * Poll the DataFlash device until it is READY.
100 */
101static unsigned char at91_dataflash26_waitready(void)
102{
103 unsigned char status;
104
105 while (1) {
106 status = at91_dataflash26_status();
107 if (!(status & AT26_STATUS_BUSY))
108 return status;
109 }
110}
111
112/*
113 * Enable/disable write access
114 */
115 static void at91_dataflash26_write_enable(int enable)
116{
117 unsigned char cmd[2];
118
119 DEBUG(MTD_DEBUG_LEVEL3, "write_enable: enable=%i\n", enable);
120
121 if (enable)
122 cmd[0] = AT26_OP_WRITE_ENABLE;
123 else
124 cmd[0] = AT26_OP_WRITE_DISABLE;
125 cmd[1] = 0;
126
127 do_spi_transfer(1, cmd, 2, cmd, 2, NULL, 0, NULL, 0);
128}
129
130/*
131 * Protect/unprotect sector
132 */
133 static void at91_dataflash26_sector_protect(loff_t addr, int protect)
134{
135 unsigned char cmd[4];
136
137 DEBUG(MTD_DEBUG_LEVEL3, "sector_protect: addr=0x%06x prot=%d\n",
138 addr, protect);
139
140 if (protect)
141 cmd[0] = AT26_OP_SECTOR_PROTECT;
142 else
143 cmd[0] = AT26_OP_SECTOR_UNPROTECT;
144 cmd[1] = (addr & 0x00FF0000) >> 16;
145 cmd[2] = (addr & 0x0000FF00) >> 8;
146 cmd[3] = (addr & 0x000000FF);
147
148 do_spi_transfer(1, cmd, 4, cmd, 4, NULL, 0, NULL, 0);
149}
150
151/*
152 * Erase blocks of flash.
153 */
154static int at91_dataflash26_erase(struct mtd_info *mtd,
155 struct erase_info *instr)
156{
157 struct dataflash_local *priv = (struct dataflash_local *) mtd->priv;
158 unsigned char cmd[4];
159
160 DEBUG(MTD_DEBUG_LEVEL1, "dataflash_erase: addr=0x%06x len=%i\n",
161 instr->addr, instr->len);
162
163 /* Sanity checks */
164 if (priv->page_size != 4096)
165 return -EINVAL; /* Can't handle other sizes at the moment */
166
167 if ( ((instr->len % mtd->erasesize) != 0)
168 || ((instr->len % priv->page_size) != 0)
169 || ((instr->addr % priv->page_size) != 0)
170 || ((instr->addr + instr->len) > mtd->size))
171 return -EINVAL;
172
173 spi_access_bus(priv->spi);
174
175 while (instr->len > 0) {
176 at91_dataflash26_write_enable(1);
177 at91_dataflash26_sector_protect(instr->addr, 0);
178 at91_dataflash26_write_enable(1);
179 cmd[0] = AT26_OP_ERASE_PAGE_4K;
180 cmd[1] = (instr->addr & 0x00FF0000) >> 16;
181 cmd[2] = (instr->addr & 0x0000FF00) >> 8;
182 cmd[3] = (instr->addr & 0x000000FF);
183
184 DEBUG(MTD_DEBUG_LEVEL3, "ERASE: (0x%02x) 0x%02x 0x%02x"
185 "0x%02x\n",
186 cmd[0], cmd[1], cmd[2], cmd[3]);
187
188 do_spi_transfer(1, cmd, 4, cmd, 4, NULL, 0, NULL, 0);
189 at91_dataflash26_waitready();
190
191 instr->addr += priv->page_size; /* next page */
192 instr->len -= priv->page_size;
193 }
194
195 at91_dataflash26_write_enable(0);
196 spi_release_bus(priv->spi);
197
198 /* Inform MTD subsystem that erase is complete */
199 instr->state = MTD_ERASE_DONE;
200 if (instr->callback)
201 instr->callback(instr);
202
203 return 0;
204}
205
206/*
207 * Read from the DataFlash device.
208 * from : Start offset in flash device
209 * len : Number of bytes to read
210 * retlen : Number of bytes actually read
211 * buf : Buffer that will receive data
212 */
213static int at91_dataflash26_read(struct mtd_info *mtd, loff_t from, size_t len,
214 size_t *retlen, u_char *buf)
215{
216 struct dataflash_local *priv = (struct dataflash_local *) mtd->priv;
217 unsigned char cmd[5];
218
219 DEBUG(MTD_DEBUG_LEVEL1, "dataflash_read: %lli .. %lli\n",
220 from, from+len);
221
222 *retlen = 0;
223
224 /* Sanity checks */
225 if (!len)
226 return 0;
227 if (from + len > mtd->size)
228 return -EINVAL;
229
230 cmd[0] = AT26_OP_READ_ARRAY_FAST;
231 cmd[1] = (from & 0x00FF0000) >> 16;
232 cmd[2] = (from & 0x0000FF00) >> 8;
233 cmd[3] = (from & 0x000000FF);
234 /* cmd[4] is a "Don't care" byte */
235
236 DEBUG(MTD_DEBUG_LEVEL3, "READ: (0x%02x) 0x%02x 0x%02x 0x%02x\n",
237 cmd[0], cmd[1], cmd[2], cmd[3]);
238
239 spi_access_bus(priv->spi);
240 do_spi_transfer(2, cmd, 5, cmd, 5, buf, len, buf, len);
241 spi_release_bus(priv->spi);
242
243 *retlen = len;
244 return 0;
245}
246
247/*
248 * Write to the DataFlash device.
249 * to : Start offset in flash device
250 * len : Number of bytes to write
251 * retlen : Number of bytes actually written
252 * buf : Buffer containing the data
253 */
254static int at91_dataflash26_write(struct mtd_info *mtd, loff_t to, size_t len,
255 size_t *retlen, const u_char *buf)
256{
257 struct dataflash_local *priv = (struct dataflash_local *) mtd->priv;
258 unsigned int addr, buf_index = 0;
259 int ret = -EIO, sector, last_sector;
260 unsigned char status, cmd[5];
261
262 DEBUG(MTD_DEBUG_LEVEL1, "dataflash_write: %lli .. %lli\n", to, to+len);
263
264 *retlen = 0;
265
266 /* Sanity checks */
267 if (!len)
268 return 0;
269 if (to + len > mtd->size)
270 return -EINVAL;
271
272 spi_access_bus(priv->spi);
273
274 addr = to;
275 last_sector = -1;
276
277 while (buf_index < len) {
278 sector = addr / priv->page_size;
279 /* Write first byte if a new sector begins */
280 if (sector != last_sector) {
281 at91_dataflash26_write_enable(1);
282 at91_dataflash26_sector_protect(addr, 0);
283 at91_dataflash26_write_enable(1);
284
285 /* Program first byte of a new sector */
286 cmd[0] = AT26_OP_SEQUENTIAL_WRITE;
287 cmd[1] = (addr & 0x00FF0000) >> 16;
288 cmd[2] = (addr & 0x0000FF00) >> 8;
289 cmd[3] = (addr & 0x000000FF);
290 cmd[4] = buf[buf_index++];
291 do_spi_transfer(1, cmd, 5, cmd, 5, NULL, 0, NULL, 0);
292 status = at91_dataflash26_waitready();
293 addr++;
294 /* On write errors, the chip resets the write enable
295 flag. This also happens after the last byte of a
296 sector is successfully programmed. */
297 if ( ( !(status & AT26_STATUS_WRITE_ENABLE))
298 && ((addr % priv->page_size) != 0) ) {
299 DEBUG(MTD_DEBUG_LEVEL1,
300 "write error1: addr=0x%06x, "
301 "status=0x%02x\n", addr, status);
302 goto write_err;
303 }
304 (*retlen)++;
305 last_sector = sector;
306 }
307
308 /* Write subsequent bytes in the same sector */
309 cmd[0] = AT26_OP_SEQUENTIAL_WRITE;
310 cmd[1] = buf[buf_index++];
311 do_spi_transfer(1, cmd, 2, cmd, 2, NULL, 0, NULL, 0);
312 status = at91_dataflash26_waitready();
313 addr++;
314
315 if ( ( !(status & AT26_STATUS_WRITE_ENABLE))
316 && ((addr % priv->page_size) != 0) ) {
317 DEBUG(MTD_DEBUG_LEVEL1, "write error2: addr=0x%06x, "
318 "status=0x%02x\n", addr, status);
319 goto write_err;
320 }
321
322 (*retlen)++;
323 }
324
325 ret = 0;
326 at91_dataflash26_write_enable(0);
327write_err:
328 spi_release_bus(priv->spi);
329 return ret;
330}
331
332/*
333 * Initialize and register DataFlash device with MTD subsystem.
334 */
335static int __init add_dataflash(int channel, char *name, int nr_pages,
336 int pagesize)
337{
338 struct mtd_info *device;
339 struct dataflash_local *priv;
340
341 if (nr_devices >= DATAFLASH_MAX_DEVICES) {
342 printk(KERN_ERR "at91_dataflash26: Too many devices "
343 "detected\n");
344 return 0;
345 }
346
347 device = kzalloc(sizeof(struct mtd_info) + strlen(name) + 8,
348 GFP_KERNEL);
349 if (!device)
350 return -ENOMEM;
351
352 device->name = (char *)&device[1];
353 sprintf(device->name, "%s.spi%d", name, channel);
354 device->size = nr_pages * pagesize;
355 device->erasesize = pagesize;
356 device->owner = THIS_MODULE;
357 device->type = MTD_DATAFLASH;
358 device->flags = MTD_CAP_NORFLASH;
359 device->erase = at91_dataflash26_erase;
360 device->read = at91_dataflash26_read;
361 device->write = at91_dataflash26_write;
362
363 priv = (struct dataflash_local *)kzalloc(sizeof(struct dataflash_local),
364 GFP_KERNEL);
365 if (!priv) {
366 kfree(device);
367 return -ENOMEM;
368 }
369
370 priv->spi = channel;
371 priv->page_size = pagesize;
372 device->priv = priv;
373
374 mtd_devices[nr_devices] = device;
375 nr_devices++;
376 printk(KERN_INFO "at91_dataflash26: %s detected [spi%i] (%i bytes)\n",
377 name, channel, device->size);
378
379 return add_mtd_device(device);
380}
381
382/*
383 * Detect and initialize DataFlash device connected to specified SPI channel.
384 *
385 */
386
387struct dataflash26_types {
388 unsigned char id0;
389 unsigned char id1;
390 char *name;
391 int pagesize;
392 int nr_pages;
393};
394
395struct dataflash26_types df26_types[] = {
396 {
397 .id0 = 0x04,
398 .id1 = 0x00,
399 .name = "AT26F004",
400 .pagesize = 4096,
401 .nr_pages = 128,
402 },
403 {
404 .id0 = 0x45,
405 .id1 = 0x01,
406 .name = "AT26DF081A", /* Not tested ! */
407 .pagesize = 4096,
408 .nr_pages = 256,
409 },
410};
411
412static int __init at91_dataflash26_detect(int channel)
413{
414 unsigned char status, cmd[5];
415 int i;
416
417 spi_access_bus(channel);
418 status = at91_dataflash26_status();
419
420 if (status == 0 || status == 0xff) {
421 printk(KERN_ERR "at91_dataflash26_detect: status error %d\n",
422 status);
423 spi_release_bus(channel);
424 return -ENODEV;
425 }
426
427 cmd[0] = AT26_OP_READ_DEV_ID;
428 do_spi_transfer(1, cmd, 5, cmd, 5, NULL, 0, NULL, 0);
429 spi_release_bus(channel);
430
431 if (cmd[1] != MANUFACTURER_ID_ATMEL)
432 return -ENODEV;
433
434 for (i = 0; i < ARRAY_SIZE(df26_types); i++) {
435 if ( cmd[2] == df26_types[i].id0
436 && cmd[3] == df26_types[i].id1)
437 return add_dataflash(channel,
438 df26_types[i].name,
439 df26_types[i].nr_pages,
440 df26_types[i].pagesize);
441 }
442
443 printk(KERN_ERR "at91_dataflash26_detect: Unsupported device "
444 "(0x%02x/0x%02x)\n", cmd[2], cmd[3]);
445 return -ENODEV;
446}
447
448static int __init at91_dataflash26_init(void)
449{
450 spi_transfer_desc = kmalloc(sizeof(struct spi_transfer_list),
451 GFP_KERNEL);
452 if (!spi_transfer_desc)
453 return -ENOMEM;
454
455 /* DataFlash (SPI chip select 0) */
456 at91_dataflash26_detect(0);
457
458#ifdef CONFIG_MTD_AT91_DATAFLASH_CARD
459 /* DataFlash card (SPI chip select 3) */
460 at91_dataflash26_detect(3);
461#endif
462 return 0;
463}
464
465static void __exit at91_dataflash26_exit(void)
466{
467 int i;
468
469 for (i = 0; i < DATAFLASH_MAX_DEVICES; i++) {
470 if (mtd_devices[i]) {
471 del_mtd_device(mtd_devices[i]);
472 kfree(mtd_devices[i]->priv);
473 kfree(mtd_devices[i]);
474 }
475 }
476 nr_devices = 0;
477 kfree(spi_transfer_desc);
478}
479
480module_init(at91_dataflash26_init);
481module_exit(at91_dataflash26_exit);
482
483MODULE_LICENSE("GPL");
484MODULE_AUTHOR("Hans J. Koch");
485MODULE_DESCRIPTION("DataFlash AT26xxx driver for Atmel AT91RM9200");
diff --git a/drivers/mtd/devices/docprobe.c b/drivers/mtd/devices/docprobe.c
index 54aa75907640..d8cc94ec4e50 100644
--- a/drivers/mtd/devices/docprobe.c
+++ b/drivers/mtd/devices/docprobe.c
@@ -81,9 +81,7 @@ static unsigned long __initdata doc_locations[] = {
81#endif /* CONFIG_MTD_DOCPROBE_HIGH */ 81#endif /* CONFIG_MTD_DOCPROBE_HIGH */
82#elif defined(__PPC__) 82#elif defined(__PPC__)
83 0xe4000000, 83 0xe4000000,
84#elif defined(CONFIG_MOMENCO_OCELOT_G) 84#else
85 0xff000000,
86##else
87#warning Unknown architecture for DiskOnChip. No default probe locations defined 85#warning Unknown architecture for DiskOnChip. No default probe locations defined
88#endif 86#endif
89 0xffffffff }; 87 0xffffffff };
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 78c2511ae9e0..98df5bcc02f3 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * MTD SPI driver for ST M25Pxx flash chips 2 * MTD SPI driver for ST M25Pxx (and similar) serial flash chips
3 * 3 *
4 * Author: Mike Lavender, mike@steroidmicros.com 4 * Author: Mike Lavender, mike@steroidmicros.com
5 * 5 *
@@ -19,33 +19,32 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/interrupt.h> 22#include <linux/mutex.h>
23
23#include <linux/mtd/mtd.h> 24#include <linux/mtd/mtd.h>
24#include <linux/mtd/partitions.h> 25#include <linux/mtd/partitions.h>
26
25#include <linux/spi/spi.h> 27#include <linux/spi/spi.h>
26#include <linux/spi/flash.h> 28#include <linux/spi/flash.h>
27 29
28#include <asm/semaphore.h>
29
30
31/* NOTE: AT 25F and SST 25LF series are very similar,
32 * but commands for sector erase and chip id differ...
33 */
34 30
35#define FLASH_PAGESIZE 256 31#define FLASH_PAGESIZE 256
36 32
37/* Flash opcodes. */ 33/* Flash opcodes. */
38#define OPCODE_WREN 6 /* Write enable */ 34#define OPCODE_WREN 0x06 /* Write enable */
39#define OPCODE_RDSR 5 /* Read status register */ 35#define OPCODE_RDSR 0x05 /* Read status register */
40#define OPCODE_READ 3 /* Read data bytes */ 36#define OPCODE_READ 0x03 /* Read data bytes (low frequency) */
41#define OPCODE_PP 2 /* Page program */ 37#define OPCODE_FAST_READ 0x0b /* Read data bytes (high frequency) */
42#define OPCODE_SE 0xd8 /* Sector erase */ 38#define OPCODE_PP 0x02 /* Page program (up to 256 bytes) */
43#define OPCODE_RES 0xab /* Read Electronic Signature */ 39#define OPCODE_BE_4K 0x20 /* Erase 4KiB block */
40#define OPCODE_BE_32K 0x52 /* Erase 32KiB block */
41#define OPCODE_SE 0xd8 /* Sector erase (usually 64KiB) */
44#define OPCODE_RDID 0x9f /* Read JEDEC ID */ 42#define OPCODE_RDID 0x9f /* Read JEDEC ID */
45 43
46/* Status Register bits. */ 44/* Status Register bits. */
47#define SR_WIP 1 /* Write in progress */ 45#define SR_WIP 1 /* Write in progress */
48#define SR_WEL 2 /* Write enable latch */ 46#define SR_WEL 2 /* Write enable latch */
47/* meaning of other SR_* bits may differ between vendors */
49#define SR_BP0 4 /* Block protect 0 */ 48#define SR_BP0 4 /* Block protect 0 */
50#define SR_BP1 8 /* Block protect 1 */ 49#define SR_BP1 8 /* Block protect 1 */
51#define SR_BP2 0x10 /* Block protect 2 */ 50#define SR_BP2 0x10 /* Block protect 2 */
@@ -65,9 +64,10 @@
65 64
66struct m25p { 65struct m25p {
67 struct spi_device *spi; 66 struct spi_device *spi;
68 struct semaphore lock; 67 struct mutex lock;
69 struct mtd_info mtd; 68 struct mtd_info mtd;
70 unsigned partitioned; 69 unsigned partitioned:1;
70 u8 erase_opcode;
71 u8 command[4]; 71 u8 command[4];
72}; 72};
73 73
@@ -150,8 +150,9 @@ static int wait_till_ready(struct m25p *flash)
150 */ 150 */
151static int erase_sector(struct m25p *flash, u32 offset) 151static int erase_sector(struct m25p *flash, u32 offset)
152{ 152{
153 DEBUG(MTD_DEBUG_LEVEL3, "%s: %s at 0x%08x\n", flash->spi->dev.bus_id, 153 DEBUG(MTD_DEBUG_LEVEL3, "%s: %s %dKiB at 0x%08x\n",
154 __FUNCTION__, offset); 154 flash->spi->dev.bus_id, __FUNCTION__,
155 flash->mtd.erasesize / 1024, offset);
155 156
156 /* Wait until finished previous write command. */ 157 /* Wait until finished previous write command. */
157 if (wait_till_ready(flash)) 158 if (wait_till_ready(flash))
@@ -161,7 +162,7 @@ static int erase_sector(struct m25p *flash, u32 offset)
161 write_enable(flash); 162 write_enable(flash);
162 163
163 /* Set up command buffer. */ 164 /* Set up command buffer. */
164 flash->command[0] = OPCODE_SE; 165 flash->command[0] = flash->erase_opcode;
165 flash->command[1] = offset >> 16; 166 flash->command[1] = offset >> 16;
166 flash->command[2] = offset >> 8; 167 flash->command[2] = offset >> 8;
167 flash->command[3] = offset; 168 flash->command[3] = offset;
@@ -201,13 +202,17 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
201 addr = instr->addr; 202 addr = instr->addr;
202 len = instr->len; 203 len = instr->len;
203 204
204 down(&flash->lock); 205 mutex_lock(&flash->lock);
206
207 /* REVISIT in some cases we could speed up erasing large regions
208 * by using OPCODE_SE instead of OPCODE_BE_4K
209 */
205 210
206 /* now erase those sectors */ 211 /* now erase those sectors */
207 while (len) { 212 while (len) {
208 if (erase_sector(flash, addr)) { 213 if (erase_sector(flash, addr)) {
209 instr->state = MTD_ERASE_FAILED; 214 instr->state = MTD_ERASE_FAILED;
210 up(&flash->lock); 215 mutex_unlock(&flash->lock);
211 return -EIO; 216 return -EIO;
212 } 217 }
213 218
@@ -215,7 +220,7 @@ static int m25p80_erase(struct mtd_info *mtd, struct erase_info *instr)
215 len -= mtd->erasesize; 220 len -= mtd->erasesize;
216 } 221 }
217 222
218 up(&flash->lock); 223 mutex_unlock(&flash->lock);
219 224
220 instr->state = MTD_ERASE_DONE; 225 instr->state = MTD_ERASE_DONE;
221 mtd_erase_callback(instr); 226 mtd_erase_callback(instr);
@@ -260,16 +265,19 @@ static int m25p80_read(struct mtd_info *mtd, loff_t from, size_t len,
260 if (retlen) 265 if (retlen)
261 *retlen = 0; 266 *retlen = 0;
262 267
263 down(&flash->lock); 268 mutex_lock(&flash->lock);
264 269
265 /* Wait till previous write/erase is done. */ 270 /* Wait till previous write/erase is done. */
266 if (wait_till_ready(flash)) { 271 if (wait_till_ready(flash)) {
267 /* REVISIT status return?? */ 272 /* REVISIT status return?? */
268 up(&flash->lock); 273 mutex_unlock(&flash->lock);
269 return 1; 274 return 1;
270 } 275 }
271 276
272 /* NOTE: OPCODE_FAST_READ (if available) is faster... */ 277 /* FIXME switch to OPCODE_FAST_READ. It's required for higher
278 * clocks; and at this writing, every chip this driver handles
279 * supports that opcode.
280 */
273 281
274 /* Set up the write data buffer. */ 282 /* Set up the write data buffer. */
275 flash->command[0] = OPCODE_READ; 283 flash->command[0] = OPCODE_READ;
@@ -281,7 +289,7 @@ static int m25p80_read(struct mtd_info *mtd, loff_t from, size_t len,
281 289
282 *retlen = m.actual_length - sizeof(flash->command); 290 *retlen = m.actual_length - sizeof(flash->command);
283 291
284 up(&flash->lock); 292 mutex_unlock(&flash->lock);
285 293
286 return 0; 294 return 0;
287} 295}
@@ -323,7 +331,7 @@ static int m25p80_write(struct mtd_info *mtd, loff_t to, size_t len,
323 t[1].tx_buf = buf; 331 t[1].tx_buf = buf;
324 spi_message_add_tail(&t[1], &m); 332 spi_message_add_tail(&t[1], &m);
325 333
326 down(&flash->lock); 334 mutex_lock(&flash->lock);
327 335
328 /* Wait until finished previous write command. */ 336 /* Wait until finished previous write command. */
329 if (wait_till_ready(flash)) 337 if (wait_till_ready(flash))
@@ -381,10 +389,10 @@ static int m25p80_write(struct mtd_info *mtd, loff_t to, size_t len,
381 if (retlen) 389 if (retlen)
382 *retlen += m.actual_length 390 *retlen += m.actual_length
383 - sizeof(flash->command); 391 - sizeof(flash->command);
384 } 392 }
385 } 393 }
386 394
387 up(&flash->lock); 395 mutex_unlock(&flash->lock);
388 396
389 return 0; 397 return 0;
390} 398}
@@ -398,24 +406,118 @@ static int m25p80_write(struct mtd_info *mtd, loff_t to, size_t len,
398 406
399struct flash_info { 407struct flash_info {
400 char *name; 408 char *name;
401 u8 id; 409
402 u16 jedec_id; 410 /* JEDEC id zero means "no ID" (most older chips); otherwise it has
411 * a high byte of zero plus three data bytes: the manufacturer id,
412 * then a two byte device id.
413 */
414 u32 jedec_id;
415
416 /* The size listed here is what works with OPCODE_SE, which isn't
417 * necessarily called a "sector" by the vendor.
418 */
403 unsigned sector_size; 419 unsigned sector_size;
404 unsigned n_sectors; 420 u16 n_sectors;
421
422 u16 flags;
423#define SECT_4K 0x01 /* OPCODE_BE_4K works uniformly */
405}; 424};
406 425
426
427/* NOTE: double check command sets and memory organization when you add
428 * more flash chips. This current list focusses on newer chips, which
429 * have been converging on command sets which including JEDEC ID.
430 */
407static struct flash_info __devinitdata m25p_data [] = { 431static struct flash_info __devinitdata m25p_data [] = {
408 /* REVISIT: fill in JEDEC ids, for parts that have them */ 432
409 { "m25p05", 0x05, 0x2010, 32 * 1024, 2 }, 433 /* Atmel -- some are (confusingly) marketed as "DataFlash" */
410 { "m25p10", 0x10, 0x2011, 32 * 1024, 4 }, 434 { "at25fs010", 0x1f6601, 32 * 1024, 4, SECT_4K, },
411 { "m25p20", 0x11, 0x2012, 64 * 1024, 4 }, 435 { "at25fs040", 0x1f6604, 64 * 1024, 8, SECT_4K, },
412 { "m25p40", 0x12, 0x2013, 64 * 1024, 8 }, 436
413 { "m25p80", 0x13, 0x0000, 64 * 1024, 16 }, 437 { "at25df041a", 0x1f4401, 64 * 1024, 8, SECT_4K, },
414 { "m25p16", 0x14, 0x2015, 64 * 1024, 32 }, 438
415 { "m25p32", 0x15, 0x2016, 64 * 1024, 64 }, 439 { "at26f004", 0x1f0400, 64 * 1024, 8, SECT_4K, },
416 { "m25p64", 0x16, 0x2017, 64 * 1024, 128 }, 440 { "at26df081a", 0x1f4501, 64 * 1024, 16, SECT_4K, },
441 { "at26df161a", 0x1f4601, 64 * 1024, 32, SECT_4K, },
442 { "at26df321", 0x1f4701, 64 * 1024, 64, SECT_4K, },
443
444 /* Spansion -- single (large) sector size only, at least
445 * for the chips listed here (without boot sectors).
446 */
447 { "s25sl004a", 0x010212, 64 * 1024, 8, },
448 { "s25sl008a", 0x010213, 64 * 1024, 16, },
449 { "s25sl016a", 0x010214, 64 * 1024, 32, },
450 { "s25sl032a", 0x010215, 64 * 1024, 64, },
451 { "s25sl064a", 0x010216, 64 * 1024, 128, },
452
453 /* SST -- large erase sizes are "overlays", "sectors" are 4K */
454 { "sst25vf040b", 0xbf258d, 64 * 1024, 8, SECT_4K, },
455 { "sst25vf080b", 0xbf258e, 64 * 1024, 16, SECT_4K, },
456 { "sst25vf016b", 0xbf2541, 64 * 1024, 32, SECT_4K, },
457 { "sst25vf032b", 0xbf254a, 64 * 1024, 64, SECT_4K, },
458
459 /* ST Microelectronics -- newer production may have feature updates */
460 { "m25p05", 0x202010, 32 * 1024, 2, },
461 { "m25p10", 0x202011, 32 * 1024, 4, },
462 { "m25p20", 0x202012, 64 * 1024, 4, },
463 { "m25p40", 0x202013, 64 * 1024, 8, },
464 { "m25p80", 0, 64 * 1024, 16, },
465 { "m25p16", 0x202015, 64 * 1024, 32, },
466 { "m25p32", 0x202016, 64 * 1024, 64, },
467 { "m25p64", 0x202017, 64 * 1024, 128, },
468 { "m25p128", 0x202018, 256 * 1024, 64, },
469
470 { "m45pe80", 0x204014, 64 * 1024, 16, },
471 { "m45pe16", 0x204015, 64 * 1024, 32, },
472
473 { "m25pe80", 0x208014, 64 * 1024, 16, },
474 { "m25pe16", 0x208015, 64 * 1024, 32, SECT_4K, },
475
476 /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
477 { "w25x10", 0xef3011, 64 * 1024, 2, SECT_4K, },
478 { "w25x20", 0xef3012, 64 * 1024, 4, SECT_4K, },
479 { "w25x40", 0xef3013, 64 * 1024, 8, SECT_4K, },
480 { "w25x80", 0xef3014, 64 * 1024, 16, SECT_4K, },
481 { "w25x16", 0xef3015, 64 * 1024, 32, SECT_4K, },
482 { "w25x32", 0xef3016, 64 * 1024, 64, SECT_4K, },
483 { "w25x64", 0xef3017, 64 * 1024, 128, SECT_4K, },
417}; 484};
418 485
486static struct flash_info *__devinit jedec_probe(struct spi_device *spi)
487{
488 int tmp;
489 u8 code = OPCODE_RDID;
490 u8 id[3];
491 u32 jedec;
492 struct flash_info *info;
493
494 /* JEDEC also defines an optional "extended device information"
495 * string for after vendor-specific data, after the three bytes
496 * we use here. Supporting some chips might require using it.
497 */
498 tmp = spi_write_then_read(spi, &code, 1, id, 3);
499 if (tmp < 0) {
500 DEBUG(MTD_DEBUG_LEVEL0, "%s: error %d reading JEDEC ID\n",
501 spi->dev.bus_id, tmp);
502 return NULL;
503 }
504 jedec = id[0];
505 jedec = jedec << 8;
506 jedec |= id[1];
507 jedec = jedec << 8;
508 jedec |= id[2];
509
510 for (tmp = 0, info = m25p_data;
511 tmp < ARRAY_SIZE(m25p_data);
512 tmp++, info++) {
513 if (info->jedec_id == jedec)
514 return info;
515 }
516 dev_err(&spi->dev, "unrecognized JEDEC id %06x\n", jedec);
517 return NULL;
518}
519
520
419/* 521/*
420 * board specific setup should have ensured the SPI clock used here 522 * board specific setup should have ensured the SPI clock used here
421 * matches what the READ command supports, at least until this driver 523 * matches what the READ command supports, at least until this driver
@@ -429,37 +531,51 @@ static int __devinit m25p_probe(struct spi_device *spi)
429 unsigned i; 531 unsigned i;
430 532
431 /* Platform data helps sort out which chip type we have, as 533 /* Platform data helps sort out which chip type we have, as
432 * well as how this board partitions it. 534 * well as how this board partitions it. If we don't have
535 * a chip ID, try the JEDEC id commands; they'll work for most
536 * newer chips, even if we don't recognize the particular chip.
433 */ 537 */
434 data = spi->dev.platform_data; 538 data = spi->dev.platform_data;
435 if (!data || !data->type) { 539 if (data && data->type) {
436 /* FIXME some chips can identify themselves with RES 540 for (i = 0, info = m25p_data;
437 * or JEDEC get-id commands. Try them ... 541 i < ARRAY_SIZE(m25p_data);
438 */ 542 i++, info++) {
439 DEBUG(MTD_DEBUG_LEVEL1, "%s: no chip id\n", 543 if (strcmp(data->type, info->name) == 0)
440 spi->dev.bus_id); 544 break;
441 return -ENODEV; 545 }
442 }
443 546
444 for (i = 0, info = m25p_data; i < ARRAY_SIZE(m25p_data); i++, info++) { 547 /* unrecognized chip? */
445 if (strcmp(data->type, info->name) == 0) 548 if (i == ARRAY_SIZE(m25p_data)) {
446 break; 549 DEBUG(MTD_DEBUG_LEVEL0, "%s: unrecognized id %s\n",
447 } 550 spi->dev.bus_id, data->type);
448 if (i == ARRAY_SIZE(m25p_data)) { 551 info = NULL;
449 DEBUG(MTD_DEBUG_LEVEL1, "%s: unrecognized id %s\n", 552
450 spi->dev.bus_id, data->type); 553 /* recognized; is that chip really what's there? */
554 } else if (info->jedec_id) {
555 struct flash_info *chip = jedec_probe(spi);
556
557 if (!chip || chip != info) {
558 dev_warn(&spi->dev, "found %s, expected %s\n",
559 chip ? chip->name : "UNKNOWN",
560 info->name);
561 info = NULL;
562 }
563 }
564 } else
565 info = jedec_probe(spi);
566
567 if (!info)
451 return -ENODEV; 568 return -ENODEV;
452 }
453 569
454 flash = kzalloc(sizeof *flash, GFP_KERNEL); 570 flash = kzalloc(sizeof *flash, GFP_KERNEL);
455 if (!flash) 571 if (!flash)
456 return -ENOMEM; 572 return -ENOMEM;
457 573
458 flash->spi = spi; 574 flash->spi = spi;
459 init_MUTEX(&flash->lock); 575 mutex_init(&flash->lock);
460 dev_set_drvdata(&spi->dev, flash); 576 dev_set_drvdata(&spi->dev, flash);
461 577
462 if (data->name) 578 if (data && data->name)
463 flash->mtd.name = data->name; 579 flash->mtd.name = data->name;
464 else 580 else
465 flash->mtd.name = spi->dev.bus_id; 581 flash->mtd.name = spi->dev.bus_id;
@@ -468,17 +584,25 @@ static int __devinit m25p_probe(struct spi_device *spi)
468 flash->mtd.writesize = 1; 584 flash->mtd.writesize = 1;
469 flash->mtd.flags = MTD_CAP_NORFLASH; 585 flash->mtd.flags = MTD_CAP_NORFLASH;
470 flash->mtd.size = info->sector_size * info->n_sectors; 586 flash->mtd.size = info->sector_size * info->n_sectors;
471 flash->mtd.erasesize = info->sector_size;
472 flash->mtd.erase = m25p80_erase; 587 flash->mtd.erase = m25p80_erase;
473 flash->mtd.read = m25p80_read; 588 flash->mtd.read = m25p80_read;
474 flash->mtd.write = m25p80_write; 589 flash->mtd.write = m25p80_write;
475 590
591 /* prefer "small sector" erase if possible */
592 if (info->flags & SECT_4K) {
593 flash->erase_opcode = OPCODE_BE_4K;
594 flash->mtd.erasesize = 4096;
595 } else {
596 flash->erase_opcode = OPCODE_SE;
597 flash->mtd.erasesize = info->sector_size;
598 }
599
476 dev_info(&spi->dev, "%s (%d Kbytes)\n", info->name, 600 dev_info(&spi->dev, "%s (%d Kbytes)\n", info->name,
477 flash->mtd.size / 1024); 601 flash->mtd.size / 1024);
478 602
479 DEBUG(MTD_DEBUG_LEVEL2, 603 DEBUG(MTD_DEBUG_LEVEL2,
480 "mtd .name = %s, .size = 0x%.8x (%uM) " 604 "mtd .name = %s, .size = 0x%.8x (%uMiB) "
481 ".erasesize = 0x%.8x (%uK) .numeraseregions = %d\n", 605 ".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n",
482 flash->mtd.name, 606 flash->mtd.name,
483 flash->mtd.size, flash->mtd.size / (1024*1024), 607 flash->mtd.size, flash->mtd.size / (1024*1024),
484 flash->mtd.erasesize, flash->mtd.erasesize / 1024, 608 flash->mtd.erasesize, flash->mtd.erasesize / 1024,
@@ -488,7 +612,7 @@ static int __devinit m25p_probe(struct spi_device *spi)
488 for (i = 0; i < flash->mtd.numeraseregions; i++) 612 for (i = 0; i < flash->mtd.numeraseregions; i++)
489 DEBUG(MTD_DEBUG_LEVEL2, 613 DEBUG(MTD_DEBUG_LEVEL2,
490 "mtd.eraseregions[%d] = { .offset = 0x%.8x, " 614 "mtd.eraseregions[%d] = { .offset = 0x%.8x, "
491 ".erasesize = 0x%.8x (%uK), " 615 ".erasesize = 0x%.8x (%uKiB), "
492 ".numblocks = %d }\n", 616 ".numblocks = %d }\n",
493 i, flash->mtd.eraseregions[i].offset, 617 i, flash->mtd.eraseregions[i].offset,
494 flash->mtd.eraseregions[i].erasesize, 618 flash->mtd.eraseregions[i].erasesize,
@@ -516,14 +640,14 @@ static int __devinit m25p_probe(struct spi_device *spi)
516 } 640 }
517 641
518 if (nr_parts > 0) { 642 if (nr_parts > 0) {
519 for (i = 0; i < data->nr_parts; i++) { 643 for (i = 0; i < nr_parts; i++) {
520 DEBUG(MTD_DEBUG_LEVEL2, "partitions[%d] = " 644 DEBUG(MTD_DEBUG_LEVEL2, "partitions[%d] = "
521 "{.name = %s, .offset = 0x%.8x, " 645 "{.name = %s, .offset = 0x%.8x, "
522 ".size = 0x%.8x (%uK) }\n", 646 ".size = 0x%.8x (%uKiB) }\n",
523 i, data->parts[i].name, 647 i, parts[i].name,
524 data->parts[i].offset, 648 parts[i].offset,
525 data->parts[i].size, 649 parts[i].size,
526 data->parts[i].size / 1024); 650 parts[i].size / 1024);
527 } 651 }
528 flash->partitioned = 1; 652 flash->partitioned = 1;
529 return add_mtd_partitions(&flash->mtd, parts, nr_parts); 653 return add_mtd_partitions(&flash->mtd, parts, nr_parts);
@@ -560,6 +684,11 @@ static struct spi_driver m25p80_driver = {
560 }, 684 },
561 .probe = m25p_probe, 685 .probe = m25p_probe,
562 .remove = __devexit_p(m25p_remove), 686 .remove = __devexit_p(m25p_remove),
687
688 /* REVISIT: many of these chips have deep power-down modes, which
689 * should clearly be entered on suspend() to minimize power use.
690 * And also when they're otherwise idle...
691 */
563}; 692};
564 693
565 694
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index a987e917f4e0..a5ed6d232c35 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -14,6 +14,7 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/device.h> 16#include <linux/device.h>
17#include <linux/mutex.h>
17#include <linux/spi/spi.h> 18#include <linux/spi/spi.h>
18#include <linux/spi/flash.h> 19#include <linux/spi/flash.h>
19 20
@@ -89,7 +90,7 @@ struct dataflash {
89 unsigned short page_offset; /* offset in flash address */ 90 unsigned short page_offset; /* offset in flash address */
90 unsigned int page_size; /* of bytes per page */ 91 unsigned int page_size; /* of bytes per page */
91 92
92 struct semaphore lock; 93 struct mutex lock;
93 struct spi_device *spi; 94 struct spi_device *spi;
94 95
95 struct mtd_info mtd; 96 struct mtd_info mtd;
@@ -167,7 +168,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
167 x.len = 4; 168 x.len = 4;
168 spi_message_add_tail(&x, &msg); 169 spi_message_add_tail(&x, &msg);
169 170
170 down(&priv->lock); 171 mutex_lock(&priv->lock);
171 while (instr->len > 0) { 172 while (instr->len > 0) {
172 unsigned int pageaddr; 173 unsigned int pageaddr;
173 int status; 174 int status;
@@ -210,7 +211,7 @@ static int dataflash_erase(struct mtd_info *mtd, struct erase_info *instr)
210 instr->len -= priv->page_size; 211 instr->len -= priv->page_size;
211 } 212 }
212 } 213 }
213 up(&priv->lock); 214 mutex_unlock(&priv->lock);
214 215
215 /* Inform MTD subsystem that erase is complete */ 216 /* Inform MTD subsystem that erase is complete */
216 instr->state = MTD_ERASE_DONE; 217 instr->state = MTD_ERASE_DONE;
@@ -266,7 +267,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
266 x[1].len = len; 267 x[1].len = len;
267 spi_message_add_tail(&x[1], &msg); 268 spi_message_add_tail(&x[1], &msg);
268 269
269 down(&priv->lock); 270 mutex_lock(&priv->lock);
270 271
271 /* Continuous read, max clock = f(car) which may be less than 272 /* Continuous read, max clock = f(car) which may be less than
272 * the peak rate available. Some chips support commands with 273 * the peak rate available. Some chips support commands with
@@ -279,7 +280,7 @@ static int dataflash_read(struct mtd_info *mtd, loff_t from, size_t len,
279 /* plus 4 "don't care" bytes */ 280 /* plus 4 "don't care" bytes */
280 281
281 status = spi_sync(priv->spi, &msg); 282 status = spi_sync(priv->spi, &msg);
282 up(&priv->lock); 283 mutex_unlock(&priv->lock);
283 284
284 if (status >= 0) { 285 if (status >= 0) {
285 *retlen = msg.actual_length - 8; 286 *retlen = msg.actual_length - 8;
@@ -336,7 +337,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
336 else 337 else
337 writelen = len; 338 writelen = len;
338 339
339 down(&priv->lock); 340 mutex_lock(&priv->lock);
340 while (remaining > 0) { 341 while (remaining > 0) {
341 DEBUG(MTD_DEBUG_LEVEL3, "write @ %i:%i len=%i\n", 342 DEBUG(MTD_DEBUG_LEVEL3, "write @ %i:%i len=%i\n",
342 pageaddr, offset, writelen); 343 pageaddr, offset, writelen);
@@ -441,7 +442,7 @@ static int dataflash_write(struct mtd_info *mtd, loff_t to, size_t len,
441 else 442 else
442 writelen = remaining; 443 writelen = remaining;
443 } 444 }
444 up(&priv->lock); 445 mutex_unlock(&priv->lock);
445 446
446 return status; 447 return status;
447} 448}
@@ -463,7 +464,7 @@ add_dataflash(struct spi_device *spi, char *name,
463 if (!priv) 464 if (!priv)
464 return -ENOMEM; 465 return -ENOMEM;
465 466
466 init_MUTEX(&priv->lock); 467 mutex_init(&priv->lock);
467 priv->spi = spi; 468 priv->spi = spi;
468 priv->page_size = pagesize; 469 priv->page_size = pagesize;
469 priv->page_offset = pageoffset; 470 priv->page_offset = pageoffset;
diff --git a/drivers/mtd/devices/pmc551.c b/drivers/mtd/devices/pmc551.c
index e8f686f7a357..7060a0895ce2 100644
--- a/drivers/mtd/devices/pmc551.c
+++ b/drivers/mtd/devices/pmc551.c
@@ -30,8 +30,8 @@
30 * 30 *
31 * Notes: 31 * Notes:
32 * Due to what I assume is more buggy SROM, the 64M PMC551 I 32 * Due to what I assume is more buggy SROM, the 64M PMC551 I
33 * have available claims that all 4 of it's DRAM banks have 64M 33 * have available claims that all 4 of its DRAM banks have 64MiB
34 * of ram configured (making a grand total of 256M onboard). 34 * of ram configured (making a grand total of 256MiB onboard).
35 * This is slightly annoying since the BAR0 size reflects the 35 * This is slightly annoying since the BAR0 size reflects the
36 * aperture size, not the dram size, and the V370PDC supplies no 36 * aperture size, not the dram size, and the V370PDC supplies no
37 * other method for memory size discovery. This problem is 37 * other method for memory size discovery. This problem is
@@ -70,7 +70,7 @@
70 * made the memory unusable, added a fix to code to touch up 70 * made the memory unusable, added a fix to code to touch up
71 * the DRAM some. 71 * the DRAM some.
72 * 72 *
73 * Bugs/FIXME's: 73 * Bugs/FIXMEs:
74 * * MUST fix the init function to not spin on a register 74 * * MUST fix the init function to not spin on a register
75 * waiting for it to set .. this does not safely handle busted 75 * waiting for it to set .. this does not safely handle busted
76 * devices that never reset the register correctly which will 76 * devices that never reset the register correctly which will
@@ -562,10 +562,10 @@ static u32 fixup_pmc551(struct pci_dev *dev)
562 /* 562 /*
563 * Some screen fun 563 * Some screen fun
564 */ 564 */
565 printk(KERN_DEBUG "pmc551: %d%c (0x%x) of %sprefetchable memory at " 565 printk(KERN_DEBUG "pmc551: %d%sB (0x%x) of %sprefetchable memory at "
566 "0x%llx\n", (size < 1024) ? size : (size < 1048576) ? 566 "0x%llx\n", (size < 1024) ? size : (size < 1048576) ?
567 size >> 10 : size >> 20, 567 size >> 10 : size >> 20,
568 (size < 1024) ? 'B' : (size < 1048576) ? 'K' : 'M', size, 568 (size < 1024) ? "" : (size < 1048576) ? "Ki" : "Mi", size,
569 ((dcmd & (0x1 << 3)) == 0) ? "non-" : "", 569 ((dcmd & (0x1 << 3)) == 0) ? "non-" : "",
570 (unsigned long long)pci_resource_start(dev, 0)); 570 (unsigned long long)pci_resource_start(dev, 0));
571 571
@@ -649,14 +649,10 @@ MODULE_DESCRIPTION(PMC551_VERSION);
649 * Stuff these outside the ifdef so as to not bust compiled in driver support 649 * Stuff these outside the ifdef so as to not bust compiled in driver support
650 */ 650 */
651static int msize = 0; 651static int msize = 0;
652#if defined(CONFIG_MTD_PMC551_APERTURE_SIZE)
653static int asize = CONFIG_MTD_PMC551_APERTURE_SIZE;
654#else
655static int asize = 0; 652static int asize = 0;
656#endif
657 653
658module_param(msize, int, 0); 654module_param(msize, int, 0);
659MODULE_PARM_DESC(msize, "memory size in Megabytes [1 - 1024]"); 655MODULE_PARM_DESC(msize, "memory size in MiB [1 - 1024]");
660module_param(asize, int, 0); 656module_param(asize, int, 0);
661MODULE_PARM_DESC(asize, "aperture size, must be <= memsize [1-1024]"); 657MODULE_PARM_DESC(asize, "aperture size, must be <= memsize [1-1024]");
662 658
@@ -799,8 +795,7 @@ static int __init init_pmc551(void)
799 mtd->owner = THIS_MODULE; 795 mtd->owner = THIS_MODULE;
800 796
801 if (add_mtd_device(mtd)) { 797 if (add_mtd_device(mtd)) {
802 printk(KERN_NOTICE "pmc551: Failed to register new " 798 printk(KERN_NOTICE "pmc551: Failed to register new device\n");
803 "device\n");
804 pci_iounmap(PCI_Device, priv->start); 799 pci_iounmap(PCI_Device, priv->start);
805 kfree(mtd->priv); 800 kfree(mtd->priv);
806 kfree(mtd); 801 kfree(mtd);
@@ -811,13 +806,13 @@ static int __init init_pmc551(void)
811 pci_dev_get(PCI_Device); 806 pci_dev_get(PCI_Device);
812 807
813 printk(KERN_NOTICE "Registered pmc551 memory device.\n"); 808 printk(KERN_NOTICE "Registered pmc551 memory device.\n");
814 printk(KERN_NOTICE "Mapped %dM of memory from 0x%p to 0x%p\n", 809 printk(KERN_NOTICE "Mapped %dMiB of memory from 0x%p to 0x%p\n",
815 priv->asize >> 20, 810 priv->asize >> 20,
816 priv->start, priv->start + priv->asize); 811 priv->start, priv->start + priv->asize);
817 printk(KERN_NOTICE "Total memory is %d%c\n", 812 printk(KERN_NOTICE "Total memory is %d%sB\n",
818 (length < 1024) ? length : 813 (length < 1024) ? length :
819 (length < 1048576) ? length >> 10 : length >> 20, 814 (length < 1048576) ? length >> 10 : length >> 20,
820 (length < 1024) ? 'B' : (length < 1048576) ? 'K' : 'M'); 815 (length < 1024) ? "" : (length < 1048576) ? "Ki" : "Mi");
821 priv->nextpmc551 = pmc551list; 816 priv->nextpmc551 = pmc551list;
822 pmc551list = mtd; 817 pmc551list = mtd;
823 found++; 818 found++;
@@ -850,7 +845,7 @@ static void __exit cleanup_pmc551(void)
850 pmc551list = priv->nextpmc551; 845 pmc551list = priv->nextpmc551;
851 846
852 if (priv->start) { 847 if (priv->start) {
853 printk(KERN_DEBUG "pmc551: unmapping %dM starting at " 848 printk(KERN_DEBUG "pmc551: unmapping %dMiB starting at "
854 "0x%p\n", priv->asize >> 20, priv->start); 849 "0x%p\n", priv->asize >> 20, priv->start);
855 pci_iounmap(priv->dev, priv->start); 850 pci_iounmap(priv->dev, priv->start);
856 } 851 }
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index ecac0e438f49..b8917beeb650 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -580,14 +580,13 @@ int INFTL_mount(struct INFTLrecord *s)
580 logical_block = block = BLOCK_NIL; 580 logical_block = block = BLOCK_NIL;
581 581
582 /* Temporary buffer to store ANAC numbers. */ 582 /* Temporary buffer to store ANAC numbers. */
583 ANACtable = kmalloc(s->nb_blocks * sizeof(u8), GFP_KERNEL); 583 ANACtable = kcalloc(s->nb_blocks, sizeof(u8), GFP_KERNEL);
584 if (!ANACtable) { 584 if (!ANACtable) {
585 printk(KERN_WARNING "INFTL: allocation of ANACtable " 585 printk(KERN_WARNING "INFTL: allocation of ANACtable "
586 "failed (%zd bytes)\n", 586 "failed (%zd bytes)\n",
587 s->nb_blocks * sizeof(u8)); 587 s->nb_blocks * sizeof(u8));
588 return -ENOMEM; 588 return -ENOMEM;
589 } 589 }
590 memset(ANACtable, 0, s->nb_blocks);
591 590
592 /* 591 /*
593 * First pass is to explore each physical unit, and construct the 592 * First pass is to explore each physical unit, and construct the
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 6cd132c75187..2a2a125b0c76 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -163,20 +163,12 @@ config MTD_SBC_GXX
163 More info at 163 More info at
164 <http://www.arcomcontrols.com/products/icp/pc104/processors/SBC_GX1.htm>. 164 <http://www.arcomcontrols.com/products/icp/pc104/processors/SBC_GX1.htm>.
165 165
166config MTD_LUBBOCK 166config MTD_PXA2XX
167 tristate "CFI Flash device mapped on Intel Lubbock XScale eval board" 167 tristate "CFI Flash device mapped on Intel XScale PXA2xx based boards"
168 depends on ARCH_LUBBOCK && MTD_CFI_INTELEXT && MTD_PARTITIONS 168 depends on (PXA25x || PXA27x) && MTD_CFI_INTELEXT
169 help
170 This provides a driver for the on-board flash of the Intel
171 'Lubbock' XScale evaluation board.
172
173config MTD_MAINSTONE
174 tristate "CFI Flash device mapped on Intel Mainstone XScale eval board"
175 depends on MACH_MAINSTONE && MTD_CFI_INTELEXT
176 select MTD_PARTITIONS 169 select MTD_PARTITIONS
177 help 170 help
178 This provides a driver for the on-board flash of the Intel 171 This provides a driver for the NOR flash attached to a PXA2xx chip.
179 'Mainstone PXA27x evaluation board.
180 172
181config MTD_OCTAGON 173config MTD_OCTAGON
182 tristate "JEDEC Flash device mapped on Octagon 5066 SBC" 174 tristate "JEDEC Flash device mapped on Octagon 5066 SBC"
@@ -354,7 +346,7 @@ config MTD_CFI_FLAGADM
354 346
355config MTD_WALNUT 347config MTD_WALNUT
356 tristate "Flash device mapped on IBM 405GP Walnut" 348 tristate "Flash device mapped on IBM 405GP Walnut"
357 depends on MTD_JEDECPROBE && WALNUT 349 depends on MTD_JEDECPROBE && WALNUT && !PPC_MERGE
358 help 350 help
359 This enables access routines for the flash chips on the IBM 405GP 351 This enables access routines for the flash chips on the IBM 405GP
360 Walnut board. If you have one of these boards and would like to 352 Walnut board. If you have one of these boards and would like to
@@ -370,7 +362,7 @@ config MTD_EBONY
370 362
371config MTD_OCOTEA 363config MTD_OCOTEA
372 tristate "Flash devices mapped on IBM 440GX Ocotea" 364 tristate "Flash devices mapped on IBM 440GX Ocotea"
373 depends on MTD_CFI && OCOTEA 365 depends on MTD_CFI && OCOTEA && !PPC_MERGE
374 help 366 help
375 This enables access routines for the flash chips on the IBM 440GX 367 This enables access routines for the flash chips on the IBM 440GX
376 Ocotea board. If you have one of these boards and would like to 368 Ocotea board. If you have one of these boards and would like to
@@ -384,22 +376,6 @@ config MTD_REDWOOD
384 Redwood board. If you have one of these boards and would like to 376 Redwood board. If you have one of these boards and would like to
385 use the flash chips on it, say 'Y'. 377 use the flash chips on it, say 'Y'.
386 378
387config MTD_TQM834x
388 tristate "Flash device mapped on TQ Components TQM834x Boards"
389 depends on MTD_CFI && TQM834x
390 help
391 This enables access routines for the flash chips on the
392 TQ Components TQM834x boards. If you have one of these boards
393 and would like to use the flash chips on it, say 'Y'.
394
395config MTD_OCELOT
396 tristate "Momenco Ocelot boot flash device"
397 depends on MOMENCO_OCELOT
398 help
399 This enables access routines for the boot flash device and for the
400 NVRAM on the Momenco Ocelot board. If you have one of these boards
401 and would like access to either of these, say 'Y'.
402
403config MTD_SOLUTIONENGINE 379config MTD_SOLUTIONENGINE
404 tristate "CFI Flash device mapped on Hitachi SolutionEngine" 380 tristate "CFI Flash device mapped on Hitachi SolutionEngine"
405 depends on SUPERH && MTD_CFI && MTD_REDBOOT_PARTS 381 depends on SUPERH && MTD_CFI && MTD_REDBOOT_PARTS
@@ -605,6 +581,13 @@ config MTD_SHARP_SL
605 help 581 help
606 This enables access to the flash chip on the Sharp SL Series of PDAs. 582 This enables access to the flash chip on the Sharp SL Series of PDAs.
607 583
584config MTD_INTEL_VR_NOR
585 tristate "NOR flash on Intel Vermilion Range Expansion Bus CS0"
586 depends on PCI
587 help
588 Map driver for a NOR flash bank located on the Expansion Bus of the
589 Intel Vermilion Range chipset.
590
608config MTD_PLATRAM 591config MTD_PLATRAM
609 tristate "Map driver for platform device RAM (mtd-ram)" 592 tristate "Map driver for platform device RAM (mtd-ram)"
610 select MTD_RAM 593 select MTD_RAM
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 970b189271a2..316382a1401b 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -20,8 +20,7 @@ obj-$(CONFIG_MTD_ESB2ROM) += esb2rom.o
20obj-$(CONFIG_MTD_ICHXROM) += ichxrom.o 20obj-$(CONFIG_MTD_ICHXROM) += ichxrom.o
21obj-$(CONFIG_MTD_CK804XROM) += ck804xrom.o 21obj-$(CONFIG_MTD_CK804XROM) += ck804xrom.o
22obj-$(CONFIG_MTD_TSUNAMI) += tsunami_flash.o 22obj-$(CONFIG_MTD_TSUNAMI) += tsunami_flash.o
23obj-$(CONFIG_MTD_LUBBOCK) += lubbock-flash.o 23obj-$(CONFIG_MTD_PXA2XX) += pxa2xx-flash.o
24obj-$(CONFIG_MTD_MAINSTONE) += mainstone-flash.o
25obj-$(CONFIG_MTD_MBX860) += mbx860.o 24obj-$(CONFIG_MTD_MBX860) += mbx860.o
26obj-$(CONFIG_MTD_CEIVA) += ceiva.o 25obj-$(CONFIG_MTD_CEIVA) += ceiva.o
27obj-$(CONFIG_MTD_OCTAGON) += octagon-5066.o 26obj-$(CONFIG_MTD_OCTAGON) += octagon-5066.o
@@ -43,7 +42,6 @@ obj-$(CONFIG_MTD_SUN_UFLASH) += sun_uflash.o
43obj-$(CONFIG_MTD_VMAX) += vmax301.o 42obj-$(CONFIG_MTD_VMAX) += vmax301.o
44obj-$(CONFIG_MTD_SCx200_DOCFLASH)+= scx200_docflash.o 43obj-$(CONFIG_MTD_SCx200_DOCFLASH)+= scx200_docflash.o
45obj-$(CONFIG_MTD_DBOX2) += dbox2-flash.o 44obj-$(CONFIG_MTD_DBOX2) += dbox2-flash.o
46obj-$(CONFIG_MTD_OCELOT) += ocelot.o
47obj-$(CONFIG_MTD_SOLUTIONENGINE)+= solutionengine.o 45obj-$(CONFIG_MTD_SOLUTIONENGINE)+= solutionengine.o
48obj-$(CONFIG_MTD_PCI) += pci.o 46obj-$(CONFIG_MTD_PCI) += pci.o
49obj-$(CONFIG_MTD_ALCHEMY) += alchemy-flash.o 47obj-$(CONFIG_MTD_ALCHEMY) += alchemy-flash.o
@@ -70,4 +68,4 @@ obj-$(CONFIG_MTD_SHARP_SL) += sharpsl-flash.o
70obj-$(CONFIG_MTD_PLATRAM) += plat-ram.o 68obj-$(CONFIG_MTD_PLATRAM) += plat-ram.o
71obj-$(CONFIG_MTD_OMAP_NOR) += omap_nor.o 69obj-$(CONFIG_MTD_OMAP_NOR) += omap_nor.o
72obj-$(CONFIG_MTD_MTX1) += mtx-1_flash.o 70obj-$(CONFIG_MTD_MTX1) += mtx-1_flash.o
73obj-$(CONFIG_MTD_TQM834x) += tqm834x.o 71obj-$(CONFIG_MTD_INTEL_VR_NOR) += intel_vr_nor.o
diff --git a/drivers/mtd/maps/alchemy-flash.c b/drivers/mtd/maps/alchemy-flash.c
index 84fbe0e8c47e..82811bcb0436 100644
--- a/drivers/mtd/maps/alchemy-flash.c
+++ b/drivers/mtd/maps/alchemy-flash.c
@@ -75,13 +75,6 @@
75#define BOARD_FLASH_WIDTH 2 /* 16-bits */ 75#define BOARD_FLASH_WIDTH 2 /* 16-bits */
76#endif 76#endif
77 77
78#ifdef CONFIG_MIPS_HYDROGEN3
79#define BOARD_MAP_NAME "Hydrogen3 Flash"
80#define BOARD_FLASH_SIZE 0x02000000 /* 32MB */
81#define BOARD_FLASH_WIDTH 4 /* 32-bits */
82#define USE_LOCAL_ACCESSORS /* why? */
83#endif
84
85#ifdef CONFIG_MIPS_BOSPORUS 78#ifdef CONFIG_MIPS_BOSPORUS
86#define BOARD_MAP_NAME "Bosporus Flash" 79#define BOARD_MAP_NAME "Bosporus Flash"
87#define BOARD_FLASH_SIZE 0x01000000 /* 16MB */ 80#define BOARD_FLASH_SIZE 0x01000000 /* 16MB */
@@ -130,13 +123,6 @@ int __init alchemy_mtd_init(void)
130 123
131 window_addr = 0x20000000 - BOARD_FLASH_SIZE; 124 window_addr = 0x20000000 - BOARD_FLASH_SIZE;
132 window_size = BOARD_FLASH_SIZE; 125 window_size = BOARD_FLASH_SIZE;
133#ifdef CONFIG_MIPS_MIRAGE_WHY
134 /* Boot ROM flash bank only; no user bank */
135 window_addr = 0x1C000000;
136 window_size = 0x04000000;
137 /* USERFS from 0x1C00 0000 to 0x1FC00000 */
138 alchemy_partitions[0].size = 0x03C00000;
139#endif
140 126
141 /* 127 /*
142 * Static partition definition selection 128 * Static partition definition selection
diff --git a/drivers/mtd/maps/intel_vr_nor.c b/drivers/mtd/maps/intel_vr_nor.c
new file mode 100644
index 000000000000..1e7814ae212a
--- /dev/null
+++ b/drivers/mtd/maps/intel_vr_nor.c
@@ -0,0 +1,298 @@
1/*
2 * drivers/mtd/maps/intel_vr_nor.c
3 *
4 * An MTD map driver for a NOR flash bank on the Expansion Bus of the Intel
5 * Vermilion Range chipset.
6 *
7 * The Vermilion Range Expansion Bus supports four chip selects, each of which
8 * has 64MiB of address space. The 2nd BAR of the Expansion Bus PCI Device
9 * is a 256MiB memory region containing the address spaces for all four of the
10 * chip selects, with start addresses hardcoded on 64MiB boundaries.
11 *
12 * This map driver only supports NOR flash on chip select 0. The buswidth
13 * (either 8 bits or 16 bits) is determined by reading the Expansion Bus Timing
14 * and Control Register for Chip Select 0 (EXP_TIMING_CS0). This driver does
15 * not modify the value in the EXP_TIMING_CS0 register except to enable writing
16 * and disable boot acceleration. The timing parameters in the register are
17 * assumed to have been properly initialized by the BIOS. The reset default
18 * timing parameters are maximally conservative (slow), so access to the flash
19 * will be slower than it should be if the BIOS has not initialized the timing
20 * parameters.
21 *
22 * Author: Andy Lowe <alowe@mvista.com>
23 *
24 * 2006 (c) MontaVista Software, Inc. This file is licensed under
25 * the terms of the GNU General Public License version 2. This program
26 * is licensed "as is" without any warranty of any kind, whether express
27 * or implied.
28 */
29
30#include <linux/module.h>
31#include <linux/kernel.h>
32#include <linux/pci.h>
33#include <linux/init.h>
34#include <linux/mtd/mtd.h>
35#include <linux/mtd/map.h>
36#include <linux/mtd/partitions.h>
37#include <linux/mtd/cfi.h>
38#include <linux/mtd/flashchip.h>
39
40#define DRV_NAME "vr_nor"
41
42struct vr_nor_mtd {
43 void __iomem *csr_base;
44 struct map_info map;
45 struct mtd_info *info;
46 int nr_parts;
47 struct pci_dev *dev;
48};
49
50/* Expansion Bus Configuration and Status Registers are in BAR 0 */
51#define EXP_CSR_MBAR 0
52/* Expansion Bus Memory Window is BAR 1 */
53#define EXP_WIN_MBAR 1
54/* Maximum address space for Chip Select 0 is 64MiB */
55#define CS0_SIZE 0x04000000
56/* Chip Select 0 is at offset 0 in the Memory Window */
57#define CS0_START 0x0
58/* Chip Select 0 Timing Register is at offset 0 in CSR */
59#define EXP_TIMING_CS0 0x00
60#define TIMING_CS_EN (1 << 31) /* Chip Select Enable */
61#define TIMING_BOOT_ACCEL_DIS (1 << 8) /* Boot Acceleration Disable */
62#define TIMING_WR_EN (1 << 1) /* Write Enable */
63#define TIMING_BYTE_EN (1 << 0) /* 8-bit vs 16-bit bus */
64#define TIMING_MASK 0x3FFF0000
65
66static void __devexit vr_nor_destroy_partitions(struct vr_nor_mtd *p)
67{
68 if (p->nr_parts > 0) {
69#if defined(CONFIG_MTD_PARTITIONS) || defined(CONFIG_MTD_PARTITIONS_MODULE)
70 del_mtd_partitions(p->info);
71#endif
72 } else
73 del_mtd_device(p->info);
74}
75
76static int __devinit vr_nor_init_partitions(struct vr_nor_mtd *p)
77{
78 int err = 0;
79#if defined(CONFIG_MTD_PARTITIONS) || defined(CONFIG_MTD_PARTITIONS_MODULE)
80 struct mtd_partition *parts;
81 static const char *part_probes[] = { "cmdlinepart", NULL };
82#endif
83
84 /* register the flash bank */
85#if defined(CONFIG_MTD_PARTITIONS) || defined(CONFIG_MTD_PARTITIONS_MODULE)
86 /* partition the flash bank */
87 p->nr_parts = parse_mtd_partitions(p->info, part_probes, &parts, 0);
88 if (p->nr_parts > 0)
89 err = add_mtd_partitions(p->info, parts, p->nr_parts);
90#endif
91 if (p->nr_parts <= 0)
92 err = add_mtd_device(p->info);
93
94 return err;
95}
96
97static void __devexit vr_nor_destroy_mtd_setup(struct vr_nor_mtd *p)
98{
99 map_destroy(p->info);
100}
101
102static int __devinit vr_nor_mtd_setup(struct vr_nor_mtd *p)
103{
104 static const char *probe_types[] =
105 { "cfi_probe", "jedec_probe", NULL };
106 const char **type;
107
108 for (type = probe_types; !p->info && *type; type++)
109 p->info = do_map_probe(*type, &p->map);
110 if (!p->info)
111 return -ENODEV;
112
113 p->info->owner = THIS_MODULE;
114
115 return 0;
116}
117
118static void __devexit vr_nor_destroy_maps(struct vr_nor_mtd *p)
119{
120 unsigned int exp_timing_cs0;
121
122 /* write-protect the flash bank */
123 exp_timing_cs0 = readl(p->csr_base + EXP_TIMING_CS0);
124 exp_timing_cs0 &= ~TIMING_WR_EN;
125 writel(exp_timing_cs0, p->csr_base + EXP_TIMING_CS0);
126
127 /* unmap the flash window */
128 iounmap(p->map.virt);
129
130 /* unmap the csr window */
131 iounmap(p->csr_base);
132}
133
134/*
135 * Initialize the map_info structure and map the flash.
136 * Returns 0 on success, nonzero otherwise.
137 */
138static int __devinit vr_nor_init_maps(struct vr_nor_mtd *p)
139{
140 unsigned long csr_phys, csr_len;
141 unsigned long win_phys, win_len;
142 unsigned int exp_timing_cs0;
143 int err;
144
145 csr_phys = pci_resource_start(p->dev, EXP_CSR_MBAR);
146 csr_len = pci_resource_len(p->dev, EXP_CSR_MBAR);
147 win_phys = pci_resource_start(p->dev, EXP_WIN_MBAR);
148 win_len = pci_resource_len(p->dev, EXP_WIN_MBAR);
149
150 if (!csr_phys || !csr_len || !win_phys || !win_len)
151 return -ENODEV;
152
153 if (win_len < (CS0_START + CS0_SIZE))
154 return -ENXIO;
155
156 p->csr_base = ioremap_nocache(csr_phys, csr_len);
157 if (!p->csr_base)
158 return -ENOMEM;
159
160 exp_timing_cs0 = readl(p->csr_base + EXP_TIMING_CS0);
161 if (!(exp_timing_cs0 & TIMING_CS_EN)) {
162 dev_warn(&p->dev->dev, "Expansion Bus Chip Select 0 "
163 "is disabled.\n");
164 err = -ENODEV;
165 goto release;
166 }
167 if ((exp_timing_cs0 & TIMING_MASK) == TIMING_MASK) {
168 dev_warn(&p->dev->dev, "Expansion Bus Chip Select 0 "
169 "is configured for maximally slow access times.\n");
170 }
171 p->map.name = DRV_NAME;
172 p->map.bankwidth = (exp_timing_cs0 & TIMING_BYTE_EN) ? 1 : 2;
173 p->map.phys = win_phys + CS0_START;
174 p->map.size = CS0_SIZE;
175 p->map.virt = ioremap_nocache(p->map.phys, p->map.size);
176 if (!p->map.virt) {
177 err = -ENOMEM;
178 goto release;
179 }
180 simple_map_init(&p->map);
181
182 /* Enable writes to flash bank */
183 exp_timing_cs0 |= TIMING_BOOT_ACCEL_DIS | TIMING_WR_EN;
184 writel(exp_timing_cs0, p->csr_base + EXP_TIMING_CS0);
185
186 return 0;
187
188 release:
189 iounmap(p->csr_base);
190 return err;
191}
192
193static struct pci_device_id vr_nor_pci_ids[] = {
194 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x500D)},
195 {0,}
196};
197
198static void __devexit vr_nor_pci_remove(struct pci_dev *dev)
199{
200 struct vr_nor_mtd *p = pci_get_drvdata(dev);
201
202 pci_set_drvdata(dev, NULL);
203 vr_nor_destroy_partitions(p);
204 vr_nor_destroy_mtd_setup(p);
205 vr_nor_destroy_maps(p);
206 kfree(p);
207 pci_release_regions(dev);
208 pci_disable_device(dev);
209}
210
211static int __devinit
212vr_nor_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
213{
214 struct vr_nor_mtd *p = NULL;
215 unsigned int exp_timing_cs0;
216 int err;
217
218 err = pci_enable_device(dev);
219 if (err)
220 goto out;
221
222 err = pci_request_regions(dev, DRV_NAME);
223 if (err)
224 goto disable_dev;
225
226 p = kzalloc(sizeof(*p), GFP_KERNEL);
227 err = -ENOMEM;
228 if (!p)
229 goto release;
230
231 p->dev = dev;
232
233 err = vr_nor_init_maps(p);
234 if (err)
235 goto release;
236
237 err = vr_nor_mtd_setup(p);
238 if (err)
239 goto destroy_maps;
240
241 err = vr_nor_init_partitions(p);
242 if (err)
243 goto destroy_mtd_setup;
244
245 pci_set_drvdata(dev, p);
246
247 return 0;
248
249 destroy_mtd_setup:
250 map_destroy(p->info);
251
252 destroy_maps:
253 /* write-protect the flash bank */
254 exp_timing_cs0 = readl(p->csr_base + EXP_TIMING_CS0);
255 exp_timing_cs0 &= ~TIMING_WR_EN;
256 writel(exp_timing_cs0, p->csr_base + EXP_TIMING_CS0);
257
258 /* unmap the flash window */
259 iounmap(p->map.virt);
260
261 /* unmap the csr window */
262 iounmap(p->csr_base);
263
264 release:
265 kfree(p);
266 pci_release_regions(dev);
267
268 disable_dev:
269 pci_disable_device(dev);
270
271 out:
272 return err;
273}
274
275static struct pci_driver vr_nor_pci_driver = {
276 .name = DRV_NAME,
277 .probe = vr_nor_pci_probe,
278 .remove = __devexit_p(vr_nor_pci_remove),
279 .id_table = vr_nor_pci_ids,
280};
281
282static int __init vr_nor_mtd_init(void)
283{
284 return pci_register_driver(&vr_nor_pci_driver);
285}
286
287static void __exit vr_nor_mtd_exit(void)
288{
289 pci_unregister_driver(&vr_nor_pci_driver);
290}
291
292module_init(vr_nor_mtd_init);
293module_exit(vr_nor_mtd_exit);
294
295MODULE_AUTHOR("Andy Lowe");
296MODULE_DESCRIPTION("MTD map driver for NOR flash on Intel Vermilion Range");
297MODULE_LICENSE("GPL");
298MODULE_DEVICE_TABLE(pci, vr_nor_pci_ids);
diff --git a/drivers/mtd/maps/lubbock-flash.c b/drivers/mtd/maps/lubbock-flash.c
deleted file mode 100644
index e8560683b973..000000000000
--- a/drivers/mtd/maps/lubbock-flash.c
+++ /dev/null
@@ -1,168 +0,0 @@
1/*
2 * $Id: lubbock-flash.c,v 1.21 2005/11/07 11:14:27 gleixner Exp $
3 *
4 * Map driver for the Lubbock developer platform.
5 *
6 * Author: Nicolas Pitre
7 * Copyright: (C) 2001 MontaVista Software Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18
19#include <linux/mtd/mtd.h>
20#include <linux/mtd/map.h>
21#include <linux/mtd/partitions.h>
22
23#include <asm/io.h>
24#include <asm/hardware.h>
25#include <asm/arch/pxa-regs.h>
26#include <asm/arch/lubbock.h>
27#include <asm/cacheflush.h>
28
29#define ROM_ADDR 0x00000000
30#define FLASH_ADDR 0x04000000
31
32#define WINDOW_SIZE 64*1024*1024
33
34static void lubbock_map_inval_cache(struct map_info *map, unsigned long from, ssize_t len)
35{
36 flush_ioremap_region(map->phys, map->cached, from, len);
37}
38
39static struct map_info lubbock_maps[2] = { {
40 .size = WINDOW_SIZE,
41 .phys = 0x00000000,
42 .inval_cache = lubbock_map_inval_cache,
43}, {
44 .size = WINDOW_SIZE,
45 .phys = 0x04000000,
46 .inval_cache = lubbock_map_inval_cache,
47} };
48
49static struct mtd_partition lubbock_partitions[] = {
50 {
51 .name = "Bootloader",
52 .size = 0x00040000,
53 .offset = 0,
54 .mask_flags = MTD_WRITEABLE /* force read-only */
55 },{
56 .name = "Kernel",
57 .size = 0x00100000,
58 .offset = 0x00040000,
59 },{
60 .name = "Filesystem",
61 .size = MTDPART_SIZ_FULL,
62 .offset = 0x00140000
63 }
64};
65
66static struct mtd_info *mymtds[2];
67static struct mtd_partition *parsed_parts[2];
68static int nr_parsed_parts[2];
69
70static const char *probes[] = { "RedBoot", "cmdlinepart", NULL };
71
72static int __init init_lubbock(void)
73{
74 int flashboot = (LUB_CONF_SWITCHES & 1);
75 int ret = 0, i;
76
77 lubbock_maps[0].bankwidth = lubbock_maps[1].bankwidth =
78 (BOOT_DEF & 1) ? 2 : 4;
79
80 /* Compensate for the nROMBT switch which swaps the flash banks */
81 printk(KERN_NOTICE "Lubbock configured to boot from %s (bank %d)\n",
82 flashboot?"Flash":"ROM", flashboot);
83
84 lubbock_maps[flashboot^1].name = "Lubbock Application Flash";
85 lubbock_maps[flashboot].name = "Lubbock Boot ROM";
86
87 for (i = 0; i < 2; i++) {
88 lubbock_maps[i].virt = ioremap(lubbock_maps[i].phys, WINDOW_SIZE);
89 if (!lubbock_maps[i].virt) {
90 printk(KERN_WARNING "Failed to ioremap %s\n", lubbock_maps[i].name);
91 if (!ret)
92 ret = -ENOMEM;
93 continue;
94 }
95 lubbock_maps[i].cached = ioremap_cached(lubbock_maps[i].phys, WINDOW_SIZE);
96 if (!lubbock_maps[i].cached)
97 printk(KERN_WARNING "Failed to ioremap cached %s\n", lubbock_maps[i].name);
98 simple_map_init(&lubbock_maps[i]);
99
100 printk(KERN_NOTICE "Probing %s at physical address 0x%08lx (%d-bit bankwidth)\n",
101 lubbock_maps[i].name, lubbock_maps[i].phys,
102 lubbock_maps[i].bankwidth * 8);
103
104 mymtds[i] = do_map_probe("cfi_probe", &lubbock_maps[i]);
105
106 if (!mymtds[i]) {
107 iounmap((void *)lubbock_maps[i].virt);
108 if (lubbock_maps[i].cached)
109 iounmap(lubbock_maps[i].cached);
110 if (!ret)
111 ret = -EIO;
112 continue;
113 }
114 mymtds[i]->owner = THIS_MODULE;
115
116 ret = parse_mtd_partitions(mymtds[i], probes,
117 &parsed_parts[i], 0);
118
119 if (ret > 0)
120 nr_parsed_parts[i] = ret;
121 }
122
123 if (!mymtds[0] && !mymtds[1])
124 return ret;
125
126 for (i = 0; i < 2; i++) {
127 if (!mymtds[i]) {
128 printk(KERN_WARNING "%s is absent. Skipping\n", lubbock_maps[i].name);
129 } else if (nr_parsed_parts[i]) {
130 add_mtd_partitions(mymtds[i], parsed_parts[i], nr_parsed_parts[i]);
131 } else if (!i) {
132 printk("Using static partitions on %s\n", lubbock_maps[i].name);
133 add_mtd_partitions(mymtds[i], lubbock_partitions, ARRAY_SIZE(lubbock_partitions));
134 } else {
135 printk("Registering %s as whole device\n", lubbock_maps[i].name);
136 add_mtd_device(mymtds[i]);
137 }
138 }
139 return 0;
140}
141
142static void __exit cleanup_lubbock(void)
143{
144 int i;
145 for (i = 0; i < 2; i++) {
146 if (!mymtds[i])
147 continue;
148
149 if (nr_parsed_parts[i] || !i)
150 del_mtd_partitions(mymtds[i]);
151 else
152 del_mtd_device(mymtds[i]);
153
154 map_destroy(mymtds[i]);
155 iounmap((void *)lubbock_maps[i].virt);
156 if (lubbock_maps[i].cached)
157 iounmap(lubbock_maps[i].cached);
158
159 kfree(parsed_parts[i]);
160 }
161}
162
163module_init(init_lubbock);
164module_exit(cleanup_lubbock);
165
166MODULE_LICENSE("GPL");
167MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
168MODULE_DESCRIPTION("MTD map driver for Intel Lubbock");
diff --git a/drivers/mtd/maps/mainstone-flash.c b/drivers/mtd/maps/mainstone-flash.c
deleted file mode 100644
index d76487d82dcd..000000000000
--- a/drivers/mtd/maps/mainstone-flash.c
+++ /dev/null
@@ -1,180 +0,0 @@
1/*
2 * $Id: $
3 *
4 * Map driver for the Mainstone developer platform.
5 *
6 * Author: Nicolas Pitre
7 * Copyright: (C) 2001 MontaVista Software Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18
19#include <linux/mtd/mtd.h>
20#include <linux/mtd/map.h>
21#include <linux/mtd/partitions.h>
22
23#include <asm/io.h>
24#include <asm/hardware.h>
25#include <asm/arch/pxa-regs.h>
26#include <asm/arch/mainstone.h>
27#include <asm/cacheflush.h>
28
29
30#define ROM_ADDR 0x00000000
31#define FLASH_ADDR 0x04000000
32
33#define WINDOW_SIZE 0x04000000
34
35static void mainstone_map_inval_cache(struct map_info *map, unsigned long from,
36 ssize_t len)
37{
38 flush_ioremap_region(map->phys, map->cached, from, len);
39}
40
41static struct map_info mainstone_maps[2] = { {
42 .size = WINDOW_SIZE,
43 .phys = PXA_CS0_PHYS,
44 .inval_cache = mainstone_map_inval_cache,
45}, {
46 .size = WINDOW_SIZE,
47 .phys = PXA_CS1_PHYS,
48 .inval_cache = mainstone_map_inval_cache,
49} };
50
51static struct mtd_partition mainstone_partitions[] = {
52 {
53 .name = "Bootloader",
54 .size = 0x00040000,
55 .offset = 0,
56 .mask_flags = MTD_WRITEABLE /* force read-only */
57 },{
58 .name = "Kernel",
59 .size = 0x00400000,
60 .offset = 0x00040000,
61 },{
62 .name = "Filesystem",
63 .size = MTDPART_SIZ_FULL,
64 .offset = 0x00440000
65 }
66};
67
68static struct mtd_info *mymtds[2];
69static struct mtd_partition *parsed_parts[2];
70static int nr_parsed_parts[2];
71
72static const char *probes[] = { "RedBoot", "cmdlinepart", NULL };
73
74static int __init init_mainstone(void)
75{
76 int SW7 = 0; /* FIXME: get from SCR (Mst doc section 3.2.1.1) */
77 int ret = 0, i;
78
79 mainstone_maps[0].bankwidth = (BOOT_DEF & 1) ? 2 : 4;
80 mainstone_maps[1].bankwidth = 4;
81
82 /* Compensate for SW7 which swaps the flash banks */
83 mainstone_maps[SW7].name = "processor flash";
84 mainstone_maps[SW7 ^ 1].name = "main board flash";
85
86 printk(KERN_NOTICE "Mainstone configured to boot from %s\n",
87 mainstone_maps[0].name);
88
89 for (i = 0; i < 2; i++) {
90 mainstone_maps[i].virt = ioremap(mainstone_maps[i].phys,
91 WINDOW_SIZE);
92 if (!mainstone_maps[i].virt) {
93 printk(KERN_WARNING "Failed to ioremap %s\n",
94 mainstone_maps[i].name);
95 if (!ret)
96 ret = -ENOMEM;
97 continue;
98 }
99 mainstone_maps[i].cached =
100 ioremap_cached(mainstone_maps[i].phys, WINDOW_SIZE);
101 if (!mainstone_maps[i].cached)
102 printk(KERN_WARNING "Failed to ioremap cached %s\n",
103 mainstone_maps[i].name);
104 simple_map_init(&mainstone_maps[i]);
105
106 printk(KERN_NOTICE
107 "Probing %s at physical address 0x%08lx"
108 " (%d-bit bankwidth)\n",
109 mainstone_maps[i].name, mainstone_maps[i].phys,
110 mainstone_maps[i].bankwidth * 8);
111
112 mymtds[i] = do_map_probe("cfi_probe", &mainstone_maps[i]);
113
114 if (!mymtds[i]) {
115 iounmap((void *)mainstone_maps[i].virt);
116 if (mainstone_maps[i].cached)
117 iounmap(mainstone_maps[i].cached);
118 if (!ret)
119 ret = -EIO;
120 continue;
121 }
122 mymtds[i]->owner = THIS_MODULE;
123
124 ret = parse_mtd_partitions(mymtds[i], probes,
125 &parsed_parts[i], 0);
126
127 if (ret > 0)
128 nr_parsed_parts[i] = ret;
129 }
130
131 if (!mymtds[0] && !mymtds[1])
132 return ret;
133
134 for (i = 0; i < 2; i++) {
135 if (!mymtds[i]) {
136 printk(KERN_WARNING "%s is absent. Skipping\n",
137 mainstone_maps[i].name);
138 } else if (nr_parsed_parts[i]) {
139 add_mtd_partitions(mymtds[i], parsed_parts[i],
140 nr_parsed_parts[i]);
141 } else if (!i) {
142 printk("Using static partitions on %s\n",
143 mainstone_maps[i].name);
144 add_mtd_partitions(mymtds[i], mainstone_partitions,
145 ARRAY_SIZE(mainstone_partitions));
146 } else {
147 printk("Registering %s as whole device\n",
148 mainstone_maps[i].name);
149 add_mtd_device(mymtds[i]);
150 }
151 }
152 return 0;
153}
154
155static void __exit cleanup_mainstone(void)
156{
157 int i;
158 for (i = 0; i < 2; i++) {
159 if (!mymtds[i])
160 continue;
161
162 if (nr_parsed_parts[i] || !i)
163 del_mtd_partitions(mymtds[i]);
164 else
165 del_mtd_device(mymtds[i]);
166
167 map_destroy(mymtds[i]);
168 iounmap((void *)mainstone_maps[i].virt);
169 if (mainstone_maps[i].cached)
170 iounmap(mainstone_maps[i].cached);
171 kfree(parsed_parts[i]);
172 }
173}
174
175module_init(init_mainstone);
176module_exit(cleanup_mainstone);
177
178MODULE_LICENSE("GPL");
179MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
180MODULE_DESCRIPTION("MTD map driver for Intel Mainstone");
diff --git a/drivers/mtd/maps/nettel.c b/drivers/mtd/maps/nettel.c
index 7b96cd02f82b..0c9b305a72e0 100644
--- a/drivers/mtd/maps/nettel.c
+++ b/drivers/mtd/maps/nettel.c
@@ -158,68 +158,11 @@ static struct notifier_block nettel_notifier_block = {
158 nettel_reboot_notifier, NULL, 0 158 nettel_reboot_notifier, NULL, 0
159}; 159};
160 160
161/*
162 * Erase the configuration file system.
163 * Used to support the software reset button.
164 */
165static void nettel_erasecallback(struct erase_info *done)
166{
167 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv;
168 wake_up(wait_q);
169}
170
171static struct erase_info nettel_erase;
172
173int nettel_eraseconfig(void)
174{
175 struct mtd_info *mtd;
176 DECLARE_WAITQUEUE(wait, current);
177 wait_queue_head_t wait_q;
178 int ret;
179
180 init_waitqueue_head(&wait_q);
181 mtd = get_mtd_device(NULL, 2);
182 if (!IS_ERR(mtd)) {
183 nettel_erase.mtd = mtd;
184 nettel_erase.callback = nettel_erasecallback;
185 nettel_erase.callback = NULL;
186 nettel_erase.addr = 0;
187 nettel_erase.len = mtd->size;
188 nettel_erase.priv = (u_long) &wait_q;
189 nettel_erase.priv = 0;
190
191 set_current_state(TASK_INTERRUPTIBLE);
192 add_wait_queue(&wait_q, &wait);
193
194 ret = mtd->erase(mtd, &nettel_erase);
195 if (ret) {
196 set_current_state(TASK_RUNNING);
197 remove_wait_queue(&wait_q, &wait);
198 put_mtd_device(mtd);
199 return(ret);
200 }
201
202 schedule(); /* Wait for erase to finish. */
203 remove_wait_queue(&wait_q, &wait);
204
205 put_mtd_device(mtd);
206 }
207
208 return(0);
209}
210
211#else
212
213int nettel_eraseconfig(void)
214{
215 return(0);
216}
217
218#endif 161#endif
219 162
220/****************************************************************************/ 163/****************************************************************************/
221 164
222int __init nettel_init(void) 165static int __init nettel_init(void)
223{ 166{
224 volatile unsigned long *amdpar; 167 volatile unsigned long *amdpar;
225 unsigned long amdaddr, maxsize; 168 unsigned long amdaddr, maxsize;
@@ -421,10 +364,6 @@ int __init nettel_init(void)
421 364
422 intel_mtd->owner = THIS_MODULE; 365 intel_mtd->owner = THIS_MODULE;
423 366
424#ifndef CONFIG_BLK_DEV_INITRD
425 ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, 1);
426#endif
427
428 num_intel_partitions = sizeof(nettel_intel_partitions) / 367 num_intel_partitions = sizeof(nettel_intel_partitions) /
429 sizeof(nettel_intel_partitions[0]); 368 sizeof(nettel_intel_partitions[0]);
430 369
@@ -477,7 +416,7 @@ out_unmap2:
477 416
478/****************************************************************************/ 417/****************************************************************************/
479 418
480void __exit nettel_cleanup(void) 419static void __exit nettel_cleanup(void)
481{ 420{
482#ifdef CONFIG_MTD_CFI_INTELEXT 421#ifdef CONFIG_MTD_CFI_INTELEXT
483 unregister_reboot_notifier(&nettel_notifier_block); 422 unregister_reboot_notifier(&nettel_notifier_block);
diff --git a/drivers/mtd/maps/ocelot.c b/drivers/mtd/maps/ocelot.c
deleted file mode 100644
index 6977963d7897..000000000000
--- a/drivers/mtd/maps/ocelot.c
+++ /dev/null
@@ -1,175 +0,0 @@
1/*
2 * $Id: ocelot.c,v 1.17 2005/11/07 11:14:27 gleixner Exp $
3 *
4 * Flash on Momenco Ocelot
5 */
6
7#include <linux/module.h>
8#include <linux/types.h>
9#include <linux/kernel.h>
10#include <linux/init.h>
11#include <asm/io.h>
12#include <linux/mtd/mtd.h>
13#include <linux/mtd/map.h>
14#include <linux/mtd/partitions.h>
15
16#define OCELOT_PLD 0x2c000000
17#define FLASH_WINDOW_ADDR 0x2fc00000
18#define FLASH_WINDOW_SIZE 0x00080000
19#define FLASH_BUSWIDTH 1
20#define NVRAM_WINDOW_ADDR 0x2c800000
21#define NVRAM_WINDOW_SIZE 0x00007FF0
22#define NVRAM_BUSWIDTH 1
23
24static unsigned int cacheflush = 0;
25
26static struct mtd_info *flash_mtd;
27static struct mtd_info *nvram_mtd;
28
29static void ocelot_ram_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf)
30{
31 struct map_info *map = mtd->priv;
32 size_t done = 0;
33
34 /* If we use memcpy, it does word-wide writes. Even though we told the
35 GT64120A that it's an 8-bit wide region, word-wide writes don't work.
36 We end up just writing the first byte of the four to all four bytes.
37 So we have this loop instead */
38 *retlen = len;
39 while(len) {
40 __raw_writeb(*(unsigned char *) from, map->virt + to);
41 from++;
42 to++;
43 len--;
44 }
45}
46
47static struct mtd_partition *parsed_parts;
48
49struct map_info ocelot_flash_map = {
50 .name = "Ocelot boot flash",
51 .size = FLASH_WINDOW_SIZE,
52 .bankwidth = FLASH_BUSWIDTH,
53 .phys = FLASH_WINDOW_ADDR,
54};
55
56struct map_info ocelot_nvram_map = {
57 .name = "Ocelot NVRAM",
58 .size = NVRAM_WINDOW_SIZE,
59 .bankwidth = NVRAM_BUSWIDTH,
60 .phys = NVRAM_WINDOW_ADDR,
61};
62
63static const char *probes[] = { "RedBoot", NULL };
64
65static int __init init_ocelot_maps(void)
66{
67 void *pld;
68 int nr_parts;
69 unsigned char brd_status;
70
71 printk(KERN_INFO "Momenco Ocelot MTD mappings: Flash 0x%x at 0x%x, NVRAM 0x%x at 0x%x\n",
72 FLASH_WINDOW_SIZE, FLASH_WINDOW_ADDR, NVRAM_WINDOW_SIZE, NVRAM_WINDOW_ADDR);
73
74 /* First check whether the flash jumper is present */
75 pld = ioremap(OCELOT_PLD, 0x10);
76 if (!pld) {
77 printk(KERN_NOTICE "Failed to ioremap Ocelot PLD\n");
78 return -EIO;
79 }
80 brd_status = readb(pld+4);
81 iounmap(pld);
82
83 /* Now ioremap the NVRAM space */
84 ocelot_nvram_map.virt = ioremap_nocache(NVRAM_WINDOW_ADDR, NVRAM_WINDOW_SIZE);
85 if (!ocelot_nvram_map.virt) {
86 printk(KERN_NOTICE "Failed to ioremap Ocelot NVRAM space\n");
87 return -EIO;
88 }
89
90 simple_map_init(&ocelot_nvram_map);
91
92 /* And do the RAM probe on it to get an MTD device */
93 nvram_mtd = do_map_probe("map_ram", &ocelot_nvram_map);
94 if (!nvram_mtd) {
95 printk("NVRAM probe failed\n");
96 goto fail_1;
97 }
98 nvram_mtd->owner = THIS_MODULE;
99 nvram_mtd->erasesize = 16;
100 /* Override the write() method */
101 nvram_mtd->write = ocelot_ram_write;
102
103 /* Now map the flash space */
104 ocelot_flash_map.virt = ioremap_nocache(FLASH_WINDOW_ADDR, FLASH_WINDOW_SIZE);
105 if (!ocelot_flash_map.virt) {
106 printk(KERN_NOTICE "Failed to ioremap Ocelot flash space\n");
107 goto fail_2;
108 }
109 /* Now the cached version */
110 ocelot_flash_map.cached = (unsigned long)__ioremap(FLASH_WINDOW_ADDR, FLASH_WINDOW_SIZE, 0);
111
112 simple_map_init(&ocelot_flash_map);
113
114 /* Only probe for flash if the write jumper is present */
115 if (brd_status & 0x40) {
116 flash_mtd = do_map_probe("jedec", &ocelot_flash_map);
117 } else {
118 printk(KERN_NOTICE "Ocelot flash write jumper not present. Treating as ROM\n");
119 }
120 /* If that failed or the jumper's absent, pretend it's ROM */
121 if (!flash_mtd) {
122 flash_mtd = do_map_probe("map_rom", &ocelot_flash_map);
123 /* If we're treating it as ROM, set the erase size */
124 if (flash_mtd)
125 flash_mtd->erasesize = 0x10000;
126 }
127 if (!flash_mtd)
128 goto fail3;
129
130 add_mtd_device(nvram_mtd);
131
132 flash_mtd->owner = THIS_MODULE;
133 nr_parts = parse_mtd_partitions(flash_mtd, probes, &parsed_parts, 0);
134
135 if (nr_parts > 0)
136 add_mtd_partitions(flash_mtd, parsed_parts, nr_parts);
137 else
138 add_mtd_device(flash_mtd);
139
140 return 0;
141
142 fail3:
143 iounmap((void *)ocelot_flash_map.virt);
144 if (ocelot_flash_map.cached)
145 iounmap((void *)ocelot_flash_map.cached);
146 fail_2:
147 map_destroy(nvram_mtd);
148 fail_1:
149 iounmap((void *)ocelot_nvram_map.virt);
150
151 return -ENXIO;
152}
153
154static void __exit cleanup_ocelot_maps(void)
155{
156 del_mtd_device(nvram_mtd);
157 map_destroy(nvram_mtd);
158 iounmap((void *)ocelot_nvram_map.virt);
159
160 if (parsed_parts)
161 del_mtd_partitions(flash_mtd);
162 else
163 del_mtd_device(flash_mtd);
164 map_destroy(flash_mtd);
165 iounmap((void *)ocelot_flash_map.virt);
166 if (ocelot_flash_map.cached)
167 iounmap((void *)ocelot_flash_map.cached);
168}
169
170module_init(init_ocelot_maps);
171module_exit(cleanup_ocelot_maps);
172
173MODULE_LICENSE("GPL");
174MODULE_AUTHOR("Red Hat, Inc. - David Woodhouse <dwmw2@cambridge.redhat.com>");
175MODULE_DESCRIPTION("MTD map driver for Momenco Ocelot board");
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index cf75a566442e..aeed9ea79714 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -232,7 +232,6 @@ static int __devinit of_flash_probe(struct of_device *dev,
232 info = kzalloc(sizeof(*info), GFP_KERNEL); 232 info = kzalloc(sizeof(*info), GFP_KERNEL);
233 if (!info) 233 if (!info)
234 goto err_out; 234 goto err_out;
235 memset(info, 0, sizeof(*info));
236 235
237 dev_set_drvdata(&dev->dev, info); 236 dev_set_drvdata(&dev->dev, info);
238 237
diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c
index 7e0377ec1c40..02bde8c982ec 100644
--- a/drivers/mtd/maps/pmcmsp-flash.c
+++ b/drivers/mtd/maps/pmcmsp-flash.c
@@ -73,13 +73,16 @@ int __init init_msp_flash(void)
73 return -ENXIO; 73 return -ENXIO;
74 74
75 printk(KERN_NOTICE "Found %d PMC flash devices\n", fcnt); 75 printk(KERN_NOTICE "Found %d PMC flash devices\n", fcnt);
76 msp_flash = (struct mtd_info **)kmalloc( 76
77 fcnt * sizeof(struct map_info *), GFP_KERNEL); 77 msp_flash = kmalloc(fcnt * sizeof(struct map_info *), GFP_KERNEL);
78 msp_parts = (struct mtd_partition **)kmalloc( 78 msp_parts = kmalloc(fcnt * sizeof(struct mtd_partition *), GFP_KERNEL);
79 fcnt * sizeof(struct mtd_partition *), GFP_KERNEL); 79 msp_maps = kcalloc(fcnt, sizeof(struct mtd_info), GFP_KERNEL);
80 msp_maps = (struct map_info *)kmalloc( 80 if (!msp_flash || !msp_parts || !msp_maps) {
81 fcnt * sizeof(struct mtd_info), GFP_KERNEL); 81 kfree(msp_maps);
82 memset(msp_maps, 0, fcnt * sizeof(struct mtd_info)); 82 kfree(msp_parts);
83 kfree(msp_flash);
84 return -ENOMEM;
85 }
83 86
84 /* loop over the flash devices, initializing each */ 87 /* loop over the flash devices, initializing each */
85 for (i = 0; i < fcnt; i++) { 88 for (i = 0; i < fcnt; i++) {
@@ -95,9 +98,8 @@ int __init init_msp_flash(void)
95 continue; 98 continue;
96 } 99 }
97 100
98 msp_parts[i] = (struct mtd_partition *)kmalloc( 101 msp_parts[i] = kcalloc(pcnt, sizeof(struct mtd_partition),
99 pcnt * sizeof(struct mtd_partition), GFP_KERNEL); 102 GFP_KERNEL);
100 memset(msp_parts[i], 0, pcnt * sizeof(struct mtd_partition));
101 103
102 /* now initialize the devices proper */ 104 /* now initialize the devices proper */
103 flash_name[5] = '0' + i; 105 flash_name[5] = '0' + i;
diff --git a/drivers/mtd/maps/pmcmsp-ramroot.c b/drivers/mtd/maps/pmcmsp-ramroot.c
index 18049bceba8d..30de5c0c09a9 100644
--- a/drivers/mtd/maps/pmcmsp-ramroot.c
+++ b/drivers/mtd/maps/pmcmsp-ramroot.c
@@ -79,7 +79,6 @@ static int __init init_rrmap(void)
79 rr_mtd->owner = THIS_MODULE; 79 rr_mtd->owner = THIS_MODULE;
80 80
81 add_mtd_device(rr_mtd); 81 add_mtd_device(rr_mtd);
82 ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, rr_mtd->index);
83 82
84 return 0; 83 return 0;
85 } 84 }
diff --git a/drivers/mtd/maps/pq2fads.c b/drivers/mtd/maps/pq2fads.c
deleted file mode 100644
index fb78d87cc130..000000000000
--- a/drivers/mtd/maps/pq2fads.c
+++ /dev/null
@@ -1,88 +0,0 @@
1/*
2 * drivers/mtd/maps/pq2fads.c
3 *
4 * Mapping for the flash SIMM on 8272ADS and PQ2FADS board
5 *
6 * Author: Vitaly Bordug <vbordug@ru.mvista.com>
7 *
8 * 2005 (c) MontaVista Software, Inc. This file is licensed under
9 * the terms of the GNU General Public License version 2. This program
10 * is licensed "as is" without any warranty of any kind, whether express
11 * or implied.
12 */
13
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18#include <asm/io.h>
19#include <asm/ppcboot.h>
20#include <linux/mtd/mtd.h>
21#include <linux/mtd/map.h>
22#include <linux/mtd/partitions.h>
23#include <linux/mtd/physmap.h>
24
25/*
26 NOTE: bank width and interleave relative to the installed flash
27 should have been chosen within MTD_CFI_GEOMETRY options.
28 */
29#define PQ2FADS_BANK_WIDTH 4
30
31static struct mtd_partition pq2fads_partitions[] = {
32 {
33#ifdef CONFIG_ADS8272
34 .name = "HRCW",
35 .size = 0x40000,
36 .offset = 0,
37 .mask_flags = MTD_WRITEABLE, /* force read-only */
38 }, {
39 .name = "User FS",
40 .size = 0x5c0000,
41 .offset = 0x40000,
42#else
43 .name = "User FS",
44 .size = 0x600000,
45 .offset = 0,
46#endif
47 }, {
48 .name = "uImage",
49 .size = 0x100000,
50 .offset = 0x600000,
51 .mask_flags = MTD_WRITEABLE, /* force read-only */
52 }, {
53 .name = "bootloader",
54 .size = 0x40000,
55 .offset = 0x700000,
56 .mask_flags = MTD_WRITEABLE, /* force read-only */
57 }, {
58 .name = "bootloader env",
59 .size = 0x40000,
60 .offset = 0x740000,
61 .mask_flags = MTD_WRITEABLE, /* force read-only */
62 }
63};
64
65
66/* pointer to MPC885ADS board info data */
67extern unsigned char __res[];
68
69static int __init init_pq2fads_mtd(void)
70{
71 bd_t *bd = (bd_t *)__res;
72 physmap_configure(bd->bi_flashstart, bd->bi_flashsize, PQ2FADS_BANK_WIDTH, NULL);
73
74 physmap_set_partitions(pq2fads_partitions,
75 sizeof (pq2fads_partitions) /
76 sizeof (pq2fads_partitions[0]));
77 return 0;
78}
79
80static void __exit cleanup_pq2fads_mtd(void)
81{
82}
83
84module_init(init_pq2fads_mtd);
85module_exit(cleanup_pq2fads_mtd);
86
87MODULE_LICENSE("GPL");
88MODULE_DESCRIPTION("MTD map and partitions for MPC8272ADS boards");
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
new file mode 100644
index 000000000000..cb933ac475d5
--- /dev/null
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -0,0 +1,200 @@
1/*
2 * Map driver for Intel XScale PXA2xx platforms.
3 *
4 * Author: Nicolas Pitre
5 * Copyright: (C) 2001 MontaVista Software Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/init.h>
16#include <linux/platform_device.h>
17#include <linux/dma-mapping.h>
18#include <linux/mtd/mtd.h>
19#include <linux/mtd/map.h>
20#include <linux/mtd/partitions.h>
21
22#include <asm/io.h>
23#include <asm/hardware.h>
24
25#include <asm/mach/flash.h>
26
27static void pxa2xx_map_inval_cache(struct map_info *map, unsigned long from,
28 ssize_t len)
29{
30 consistent_sync((char *)map->cached + from, len, DMA_FROM_DEVICE);
31}
32
33struct pxa2xx_flash_info {
34 struct mtd_partition *parts;
35 int nr_parts;
36 struct mtd_info *mtd;
37 struct map_info map;
38};
39
40
41static const char *probes[] = { "RedBoot", "cmdlinepart", NULL };
42
43
44static int __init pxa2xx_flash_probe(struct device *dev)
45{
46 struct platform_device *pdev = to_platform_device(dev);
47 struct flash_platform_data *flash = pdev->dev.platform_data;
48 struct pxa2xx_flash_info *info;
49 struct mtd_partition *parts;
50 struct resource *res;
51 int ret = 0;
52
53 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
54 if (!res)
55 return -ENODEV;
56
57 info = kmalloc(sizeof(struct pxa2xx_flash_info), GFP_KERNEL);
58 if (!info)
59 return -ENOMEM;
60
61 memset(info, 0, sizeof(struct pxa2xx_flash_info));
62 info->map.name = (char *) flash->name;
63 info->map.bankwidth = flash->width;
64 info->map.phys = res->start;
65 info->map.size = res->end - res->start + 1;
66 info->parts = flash->parts;
67 info->nr_parts = flash->nr_parts;
68
69 info->map.virt = ioremap(info->map.phys, info->map.size);
70 if (!info->map.virt) {
71 printk(KERN_WARNING "Failed to ioremap %s\n",
72 info->map.name);
73 return -ENOMEM;
74 }
75 info->map.cached =
76 ioremap_cached(info->map.phys, info->map.size);
77 if (!info->map.cached)
78 printk(KERN_WARNING "Failed to ioremap cached %s\n",
79 info->map.name);
80 info->map.inval_cache = pxa2xx_map_inval_cache;
81 simple_map_init(&info->map);
82
83 printk(KERN_NOTICE
84 "Probing %s at physical address 0x%08lx"
85 " (%d-bit bankwidth)\n",
86 info->map.name, (unsigned long)info->map.phys,
87 info->map.bankwidth * 8);
88
89 info->mtd = do_map_probe(flash->map_name, &info->map);
90
91 if (!info->mtd) {
92 iounmap((void *)info->map.virt);
93 if (info->map.cached)
94 iounmap(info->map.cached);
95 return -EIO;
96 }
97 info->mtd->owner = THIS_MODULE;
98
99#ifdef CONFIG_MTD_PARTITIONS
100 ret = parse_mtd_partitions(info->mtd, probes, &parts, 0);
101
102 if (ret > 0) {
103 info->nr_parts = ret;
104 info->parts = parts;
105 }
106#endif
107
108 if (info->nr_parts) {
109 add_mtd_partitions(info->mtd, info->parts,
110 info->nr_parts);
111 } else {
112 printk("Registering %s as whole device\n",
113 info->map.name);
114 add_mtd_device(info->mtd);
115 }
116
117 dev_set_drvdata(dev, info);
118 return 0;
119}
120
121static int __exit pxa2xx_flash_remove(struct device *dev)
122{
123 struct pxa2xx_flash_info *info = dev_get_drvdata(dev);
124
125 dev_set_drvdata(dev, NULL);
126
127#ifdef CONFIG_MTD_PARTITIONS
128 if (info->nr_parts)
129 del_mtd_partitions(info->mtd);
130 else
131#endif
132 del_mtd_device(info->mtd);
133
134 map_destroy(info->mtd);
135 iounmap(info->map.virt);
136 if (info->map.cached)
137 iounmap(info->map.cached);
138 kfree(info->parts);
139 kfree(info);
140 return 0;
141}
142
143#ifdef CONFIG_PM
144static int pxa2xx_flash_suspend(struct device *dev, pm_message_t state)
145{
146 struct pxa2xx_flash_info *info = dev_get_drvdata(dev);
147 int ret = 0;
148
149 if (info->mtd && info->mtd->suspend)
150 ret = info->mtd->suspend(info->mtd);
151 return ret;
152}
153
154static int pxa2xx_flash_resume(struct device *dev)
155{
156 struct pxa2xx_flash_info *info = dev_get_drvdata(dev);
157
158 if (info->mtd && info->mtd->resume)
159 info->mtd->resume(info->mtd);
160 return 0;
161}
162static void pxa2xx_flash_shutdown(struct device *dev)
163{
164 struct pxa2xx_flash_info *info = dev_get_drvdata(dev);
165
166 if (info && info->mtd->suspend(info->mtd) == 0)
167 info->mtd->resume(info->mtd);
168}
169#else
170#define pxa2xx_flash_suspend NULL
171#define pxa2xx_flash_resume NULL
172#define pxa2xx_flash_shutdown NULL
173#endif
174
175static struct device_driver pxa2xx_flash_driver = {
176 .name = "pxa2xx-flash",
177 .bus = &platform_bus_type,
178 .probe = pxa2xx_flash_probe,
179 .remove = __exit_p(pxa2xx_flash_remove),
180 .suspend = pxa2xx_flash_suspend,
181 .resume = pxa2xx_flash_resume,
182 .shutdown = pxa2xx_flash_shutdown,
183};
184
185static int __init init_pxa2xx_flash(void)
186{
187 return driver_register(&pxa2xx_flash_driver);
188}
189
190static void __exit cleanup_pxa2xx_flash(void)
191{
192 driver_unregister(&pxa2xx_flash_driver);
193}
194
195module_init(init_pxa2xx_flash);
196module_exit(cleanup_pxa2xx_flash);
197
198MODULE_LICENSE("GPL");
199MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
200MODULE_DESCRIPTION("MTD map driver for Intel XScale PXA2xx");
diff --git a/drivers/mtd/maps/tqm834x.c b/drivers/mtd/maps/tqm834x.c
deleted file mode 100644
index 9adc970e55e6..000000000000
--- a/drivers/mtd/maps/tqm834x.c
+++ /dev/null
@@ -1,286 +0,0 @@
1/*
2 * drivers/mtd/maps/tqm834x.c
3 *
4 * MTD mapping driver for TQM834x boards
5 *
6 * Copyright 2005 Wolfgang Denk, DENX Software Engineering, <wd@denx.de>.
7 *
8 * This file is licensed under the terms of the GNU General Public License
9 * version 2. This program is licensed "as is" without any warranty of any
10 * kind, whether express or implied.
11 *
12 */
13
14#include <linux/init.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <asm/io.h>
20#include <asm/ppcboot.h>
21
22#include <linux/mtd/mtd.h>
23#include <linux/mtd/map.h>
24#include <linux/mtd/partitions.h>
25
26#define FLASH_BANK_MAX 2
27
28extern unsigned char __res[];
29
30/* trivial struct to describe partition information */
31struct mtd_part_def
32{
33 int nums;
34 unsigned char *type;
35 struct mtd_partition* mtd_part;
36};
37
38static struct mtd_info* mtd_banks[FLASH_BANK_MAX];
39static struct map_info* map_banks[FLASH_BANK_MAX];
40static struct mtd_part_def part_banks[FLASH_BANK_MAX];
41
42static unsigned long num_banks;
43static unsigned long start_scan_addr;
44
45#ifdef CONFIG_MTD_PARTITIONS
46/*
47 * The following defines the partition layout of TQM834x boards.
48 *
49 * See include/linux/mtd/partitions.h for definition of the
50 * mtd_partition structure.
51 *
52 * Assume minimal initial size of 4 MiB per bank, will be updated
53 * later in init_tqm834x_mtd() routine.
54 */
55
56/* Partition definition for the first flash bank which is always present. */
57static struct mtd_partition tqm834x_partitions_bank1[] = {
58 {
59 .name = "u-boot", /* u-boot firmware */
60 .offset = 0x00000000,
61 .size = 0x00040000, /* 256 KiB */
62 /*mask_flags: MTD_WRITEABLE, * force read-only */
63 },
64 {
65 .name = "env", /* u-boot environment */
66 .offset = 0x00040000,
67 .size = 0x00020000, /* 128 KiB */
68 /*mask_flags: MTD_WRITEABLE, * force read-only */
69 },
70 {
71 .name = "kernel", /* linux kernel image */
72 .offset = 0x00060000,
73 .size = 0x00100000, /* 1 MiB */
74 /*mask_flags: MTD_WRITEABLE, * force read-only */
75 },
76 {
77 .name = "initrd", /* ramdisk image */
78 .offset = 0x00160000,
79 .size = 0x00200000, /* 2 MiB */
80 },
81 {
82 .name = "user", /* user data */
83 .offset = 0x00360000,
84 .size = 0x000a0000, /* remaining space */
85 /* NOTE: this parttion size is re-calcated in */
86 /* init_tqm834x_mtd() to cover actual remaining space. */
87 },
88};
89
90/* Partition definition for the second flash bank which may be present on some
91 * TQM834x boards.
92 */
93static struct mtd_partition tqm834x_partitions_bank2[] = {
94 {
95 .name = "jffs2", /* jffs2 filesystem */
96 .offset = 0x00000000,
97 .size = 0x00400000, /* whole device */
98 /* NOTE: this parttion size is re-calcated in */
99 /* init_tqm834x_mtd() to cover actual device size. */
100 },
101};
102
103#endif /* CONFIG_MTD_PARTITIONS */
104
105static int __init init_tqm834x_mtd(void)
106{
107 int idx = 0, ret = 0;
108 unsigned long flash_addr, flash_size, mtd_size = 0;
109
110 /* pointer to TQM834x board info data */
111 bd_t *bd = (bd_t *)__res;
112#ifdef CONFIG_MTD_CMDLINE_PARTS
113 int n;
114 char mtdid[4];
115 const char *part_probes[] = { "cmdlinepart", NULL };
116#endif
117
118 flash_addr = bd->bi_flashstart;
119 flash_size = bd->bi_flashsize;
120
121 /* request maximum flash size address space */
122 start_scan_addr = (unsigned long)ioremap(flash_addr, flash_size);
123 if (!start_scan_addr) {
124 printk("%s: Failed to ioremap address: 0x%lx\n",
125 __FUNCTION__, flash_addr);
126 return -EIO;
127 }
128
129 for(idx = 0 ; idx < FLASH_BANK_MAX ; idx++) {
130 if (mtd_size >= flash_size)
131 break;
132
133 pr_debug("%s: chip probing count %d\n", __FUNCTION__, idx);
134
135 map_banks[idx] = kzalloc(sizeof(struct map_info), GFP_KERNEL);
136 if (map_banks[idx] == NULL) {
137 ret = -ENOMEM;
138 goto error_mem;
139 }
140 map_banks[idx]->name = kzalloc(16, GFP_KERNEL);
141 if (map_banks[idx]->name == NULL) {
142 ret = -ENOMEM;
143 goto error_mem;
144 }
145
146 sprintf(map_banks[idx]->name, "TQM834x-%d", idx);
147 map_banks[idx]->size = flash_size;
148 map_banks[idx]->bankwidth = 4;
149
150 simple_map_init(map_banks[idx]);
151
152 map_banks[idx]->virt = (void __iomem *)
153 (start_scan_addr + ((idx > 0) ?
154 (mtd_banks[idx-1] ? mtd_banks[idx-1]->size : 0) : 0));
155 map_banks[idx]->phys =
156 flash_addr + ((idx > 0) ?
157 (mtd_banks[idx-1] ? mtd_banks[idx-1]->size : 0) : 0);
158
159 /* start to probe flash chips */
160 mtd_banks[idx] = do_map_probe("cfi_probe", map_banks[idx]);
161 if (mtd_banks[idx]) {
162 mtd_banks[idx]->owner = THIS_MODULE;
163 mtd_size += mtd_banks[idx]->size;
164 num_banks++;
165 pr_debug("%s: bank %ld, name: %s, size: %d bytes \n",
166 __FUNCTION__, num_banks,
167 mtd_banks[idx]->name, mtd_banks[idx]->size);
168 }
169 }
170
171 /* no supported flash chips found */
172 if (!num_banks) {
173 printk("TQM834x: No supported flash chips found!\n");
174 ret = -ENXIO;
175 goto error_mem;
176 }
177
178#ifdef CONFIG_MTD_PARTITIONS
179 /*
180 * Select static partition definitions
181 */
182 n = ARRAY_SIZE(tqm834x_partitions_bank1);
183 part_banks[0].mtd_part = tqm834x_partitions_bank1;
184 part_banks[0].type = "static image bank1";
185 part_banks[0].nums = n;
186
187 /* update last partition size to cover actual remaining space */
188 tqm834x_partitions_bank1[n - 1].size =
189 mtd_banks[0]->size -
190 tqm834x_partitions_bank1[n - 1].offset;
191
192 /* check if we have second bank? */
193 if (num_banks == 2) {
194 n = ARRAY_SIZE(tqm834x_partitions_bank2);
195 part_banks[1].mtd_part = tqm834x_partitions_bank2;
196 part_banks[1].type = "static image bank2";
197 part_banks[1].nums = n;
198
199 /* update last partition size to cover actual remaining space */
200 tqm834x_partitions_bank2[n - 1].size =
201 mtd_banks[1]->size -
202 tqm834x_partitions_bank2[n - 1].offset;
203 }
204
205 for(idx = 0; idx < num_banks ; idx++) {
206#ifdef CONFIG_MTD_CMDLINE_PARTS
207 sprintf(mtdid, "%d", idx);
208 n = parse_mtd_partitions(mtd_banks[idx],
209 part_probes,
210 &part_banks[idx].mtd_part,
211 0);
212 pr_debug("%s: %d command line partitions on bank %s\n",
213 __FUNCTION__, n, mtdid);
214 if (n > 0) {
215 part_banks[idx].type = "command line";
216 part_banks[idx].nums = n;
217 }
218#endif /* CONFIG_MTD_CMDLINE_PARTS */
219 if (part_banks[idx].nums == 0) {
220 printk(KERN_NOTICE
221 "TQM834x flash bank %d: no partition info "
222 "available, registering whole device\n", idx);
223 add_mtd_device(mtd_banks[idx]);
224 } else {
225 printk(KERN_NOTICE
226 "TQM834x flash bank %d: Using %s partition "
227 "definition\n", idx, part_banks[idx].type);
228 add_mtd_partitions(mtd_banks[idx],
229 part_banks[idx].mtd_part,
230 part_banks[idx].nums);
231 }
232 }
233#else /* ! CONFIG_MTD_PARTITIONS */
234 printk(KERN_NOTICE "TQM834x flash: registering %d flash banks "
235 "at once\n", num_banks);
236
237 for(idx = 0 ; idx < num_banks ; idx++)
238 add_mtd_device(mtd_banks[idx]);
239
240#endif /* CONFIG_MTD_PARTITIONS */
241
242 return 0;
243error_mem:
244 for (idx = 0 ; idx < FLASH_BANK_MAX ; idx++) {
245 if (map_banks[idx] != NULL) {
246 if (map_banks[idx]->name != NULL) {
247 kfree(map_banks[idx]->name);
248 map_banks[idx]->name = NULL;
249 }
250 kfree(map_banks[idx]);
251 map_banks[idx] = NULL;
252 }
253 }
254
255 iounmap((void *)start_scan_addr);
256
257 return ret;
258}
259
260static void __exit cleanup_tqm834x_mtd(void)
261{
262 unsigned int idx = 0;
263 for(idx = 0 ; idx < num_banks ; idx++) {
264 /* destroy mtd_info previously allocated */
265 if (mtd_banks[idx]) {
266 del_mtd_partitions(mtd_banks[idx]);
267 map_destroy(mtd_banks[idx]);
268 }
269
270 /* release map_info not used anymore */
271 kfree(map_banks[idx]->name);
272 kfree(map_banks[idx]);
273 }
274
275 if (start_scan_addr) {
276 iounmap((void *)start_scan_addr);
277 start_scan_addr = 0;
278 }
279}
280
281module_init(init_tqm834x_mtd);
282module_exit(cleanup_tqm834x_mtd);
283
284MODULE_LICENSE("GPL");
285MODULE_AUTHOR("Wolfgang Denk <wd@denx.de>");
286MODULE_DESCRIPTION("MTD map driver for TQM834x boards");
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index ef89780eb9d6..74d9d30edabd 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -24,10 +24,9 @@
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26 26
27static LIST_HEAD(blktrans_majors); 27#include "mtdcore.h"
28 28
29extern struct mutex mtd_table_mutex; 29static LIST_HEAD(blktrans_majors);
30extern struct mtd_info *mtd_table[];
31 30
32struct mtd_blkcore_priv { 31struct mtd_blkcore_priv {
33 struct task_struct *thread; 32 struct task_struct *thread;
@@ -202,7 +201,7 @@ static int blktrans_ioctl(struct inode *inode, struct file *file,
202 } 201 }
203} 202}
204 203
205struct block_device_operations mtd_blktrans_ops = { 204static struct block_device_operations mtd_blktrans_ops = {
206 .owner = THIS_MODULE, 205 .owner = THIS_MODULE,
207 .open = blktrans_open, 206 .open = blktrans_open,
208 .release = blktrans_release, 207 .release = blktrans_release,
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index d091b2430b48..22ed96c4b7bd 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -136,7 +136,8 @@ static int mtd_close(struct inode *inode, struct file *file)
136 136
137 DEBUG(MTD_DEBUG_LEVEL0, "MTD_close\n"); 137 DEBUG(MTD_DEBUG_LEVEL0, "MTD_close\n");
138 138
139 if (mtd->sync) 139 /* Only sync if opened RW */
140 if ((file->f_mode & 2) && mtd->sync)
140 mtd->sync(mtd); 141 mtd->sync(mtd);
141 142
142 put_mtd_device(mtd); 143 put_mtd_device(mtd);
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 41844ea02462..96be7ef62f35 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -178,7 +178,7 @@ concat_writev(struct mtd_info *mtd, const struct kvec *vecs,
178 178
179 /* Check alignment */ 179 /* Check alignment */
180 if (mtd->writesize > 1) { 180 if (mtd->writesize > 1) {
181 loff_t __to = to; 181 uint64_t __to = to;
182 if (do_div(__to, mtd->writesize) || (total_len % mtd->writesize)) 182 if (do_div(__to, mtd->writesize) || (total_len % mtd->writesize))
183 return -EINVAL; 183 return -EINVAL;
184 } 184 }
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index c153b64a8300..6c2645e28371 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -22,6 +22,8 @@
22 22
23#include <linux/mtd/mtd.h> 23#include <linux/mtd/mtd.h>
24 24
25#include "mtdcore.h"
26
25/* These are exported solely for the purpose of mtd_blkdevs.c. You 27/* These are exported solely for the purpose of mtd_blkdevs.c. You
26 should not use them for _anything_ else */ 28 should not use them for _anything_ else */
27DEFINE_MUTEX(mtd_table_mutex); 29DEFINE_MUTEX(mtd_table_mutex);
diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
new file mode 100644
index 000000000000..a33251f4b872
--- /dev/null
+++ b/drivers/mtd/mtdcore.h
@@ -0,0 +1,11 @@
1/* linux/drivers/mtd/mtdcore.h
2 *
3 * Header file for driver private mtdcore exports
4 *
5 */
6
7/* These are exported solely for the purpose of mtd_blkdevs.c. You
8 should not use them for _anything_ else */
9
10extern struct mutex mtd_table_mutex;
11extern struct mtd_info *mtd_table[MAX_MTD_DEVICES];
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
new file mode 100644
index 000000000000..f8af627f0b98
--- /dev/null
+++ b/drivers/mtd/mtdoops.c
@@ -0,0 +1,376 @@
1/*
2 * MTD Oops/Panic logger
3 *
4 * Copyright (C) 2007 Nokia Corporation. All rights reserved.
5 *
6 * Author: Richard Purdie <rpurdie@openedhand.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
20 * 02110-1301 USA
21 *
22 */
23
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/console.h>
27#include <linux/vmalloc.h>
28#include <linux/workqueue.h>
29#include <linux/sched.h>
30#include <linux/wait.h>
31#include <linux/mtd/mtd.h>
32
33#define OOPS_PAGE_SIZE 4096
34
35static struct mtdoops_context {
36 int mtd_index;
37 struct work_struct work;
38 struct mtd_info *mtd;
39 int oops_pages;
40 int nextpage;
41 int nextcount;
42
43 void *oops_buf;
44 int ready;
45 int writecount;
46} oops_cxt;
47
48static void mtdoops_erase_callback(struct erase_info *done)
49{
50 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv;
51 wake_up(wait_q);
52}
53
54static int mtdoops_erase_block(struct mtd_info *mtd, int offset)
55{
56 struct erase_info erase;
57 DECLARE_WAITQUEUE(wait, current);
58 wait_queue_head_t wait_q;
59 int ret;
60
61 init_waitqueue_head(&wait_q);
62 erase.mtd = mtd;
63 erase.callback = mtdoops_erase_callback;
64 erase.addr = offset;
65 if (mtd->erasesize < OOPS_PAGE_SIZE)
66 erase.len = OOPS_PAGE_SIZE;
67 else
68 erase.len = mtd->erasesize;
69 erase.priv = (u_long)&wait_q;
70
71 set_current_state(TASK_INTERRUPTIBLE);
72 add_wait_queue(&wait_q, &wait);
73
74 ret = mtd->erase(mtd, &erase);
75 if (ret) {
76 set_current_state(TASK_RUNNING);
77 remove_wait_queue(&wait_q, &wait);
78 printk (KERN_WARNING "mtdoops: erase of region [0x%x, 0x%x] "
79 "on \"%s\" failed\n",
80 erase.addr, erase.len, mtd->name);
81 return ret;
82 }
83
84 schedule(); /* Wait for erase to finish. */
85 remove_wait_queue(&wait_q, &wait);
86
87 return 0;
88}
89
90static int mtdoops_inc_counter(struct mtdoops_context *cxt)
91{
92 struct mtd_info *mtd = cxt->mtd;
93 size_t retlen;
94 u32 count;
95 int ret;
96
97 cxt->nextpage++;
98 if (cxt->nextpage > cxt->oops_pages)
99 cxt->nextpage = 0;
100 cxt->nextcount++;
101 if (cxt->nextcount == 0xffffffff)
102 cxt->nextcount = 0;
103
104 ret = mtd->read(mtd, cxt->nextpage * OOPS_PAGE_SIZE, 4,
105 &retlen, (u_char *) &count);
106 if ((retlen != 4) || (ret < 0)) {
107 printk(KERN_ERR "mtdoops: Read failure at %d (%td of 4 read)"
108 ", err %d.\n", cxt->nextpage * OOPS_PAGE_SIZE,
109 retlen, ret);
110 return 1;
111 }
112
113 /* See if we need to erase the next block */
114 if (count != 0xffffffff)
115 return 1;
116
117 printk(KERN_DEBUG "mtdoops: Ready %d, %d (no erase)\n",
118 cxt->nextpage, cxt->nextcount);
119 cxt->ready = 1;
120 return 0;
121}
122
123static void mtdoops_prepare(struct mtdoops_context *cxt)
124{
125 struct mtd_info *mtd = cxt->mtd;
126 int i = 0, j, ret, mod;
127
128 /* We were unregistered */
129 if (!mtd)
130 return;
131
132 mod = (cxt->nextpage * OOPS_PAGE_SIZE) % mtd->erasesize;
133 if (mod != 0) {
134 cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / OOPS_PAGE_SIZE);
135 if (cxt->nextpage > cxt->oops_pages)
136 cxt->nextpage = 0;
137 }
138
139 while (mtd->block_isbad &&
140 mtd->block_isbad(mtd, cxt->nextpage * OOPS_PAGE_SIZE)) {
141badblock:
142 printk(KERN_WARNING "mtdoops: Bad block at %08x\n",
143 cxt->nextpage * OOPS_PAGE_SIZE);
144 i++;
145 cxt->nextpage = cxt->nextpage + (mtd->erasesize / OOPS_PAGE_SIZE);
146 if (cxt->nextpage > cxt->oops_pages)
147 cxt->nextpage = 0;
148 if (i == (cxt->oops_pages / (mtd->erasesize / OOPS_PAGE_SIZE))) {
149 printk(KERN_ERR "mtdoops: All blocks bad!\n");
150 return;
151 }
152 }
153
154 for (j = 0, ret = -1; (j < 3) && (ret < 0); j++)
155 ret = mtdoops_erase_block(mtd, cxt->nextpage * OOPS_PAGE_SIZE);
156
157 if (ret < 0) {
158 if (mtd->block_markbad)
159 mtd->block_markbad(mtd, cxt->nextpage * OOPS_PAGE_SIZE);
160 goto badblock;
161 }
162
163 printk(KERN_DEBUG "mtdoops: Ready %d, %d \n", cxt->nextpage, cxt->nextcount);
164
165 cxt->ready = 1;
166}
167
168static void mtdoops_workfunc(struct work_struct *work)
169{
170 struct mtdoops_context *cxt =
171 container_of(work, struct mtdoops_context, work);
172
173 mtdoops_prepare(cxt);
174}
175
176static int find_next_position(struct mtdoops_context *cxt)
177{
178 struct mtd_info *mtd = cxt->mtd;
179 int page, maxpos = 0;
180 u32 count, maxcount = 0xffffffff;
181 size_t retlen;
182
183 for (page = 0; page < cxt->oops_pages; page++) {
184 mtd->read(mtd, page * OOPS_PAGE_SIZE, 4, &retlen, (u_char *) &count);
185 if (count == 0xffffffff)
186 continue;
187 if (maxcount == 0xffffffff) {
188 maxcount = count;
189 maxpos = page;
190 } else if ((count < 0x40000000) && (maxcount > 0xc0000000)) {
191 maxcount = count;
192 maxpos = page;
193 } else if ((count > maxcount) && (count < 0xc0000000)) {
194 maxcount = count;
195 maxpos = page;
196 } else if ((count > maxcount) && (count > 0xc0000000)
197 && (maxcount > 0x80000000)) {
198 maxcount = count;
199 maxpos = page;
200 }
201 }
202 if (maxcount == 0xffffffff) {
203 cxt->nextpage = 0;
204 cxt->nextcount = 1;
205 cxt->ready = 1;
206 printk(KERN_DEBUG "mtdoops: Ready %d, %d (first init)\n",
207 cxt->nextpage, cxt->nextcount);
208 return 0;
209 }
210
211 cxt->nextpage = maxpos;
212 cxt->nextcount = maxcount;
213
214 return mtdoops_inc_counter(cxt);
215}
216
217
218static void mtdoops_notify_add(struct mtd_info *mtd)
219{
220 struct mtdoops_context *cxt = &oops_cxt;
221 int ret;
222
223 if ((mtd->index != cxt->mtd_index) || cxt->mtd_index < 0)
224 return;
225
226 if (mtd->size < (mtd->erasesize * 2)) {
227 printk(KERN_ERR "MTD partition %d not big enough for mtdoops\n",
228 mtd->index);
229 return;
230 }
231
232 cxt->mtd = mtd;
233 cxt->oops_pages = mtd->size / OOPS_PAGE_SIZE;
234
235 ret = find_next_position(cxt);
236 if (ret == 1)
237 mtdoops_prepare(cxt);
238
239 printk(KERN_DEBUG "mtdoops: Attached to MTD device %d\n", mtd->index);
240}
241
242static void mtdoops_notify_remove(struct mtd_info *mtd)
243{
244 struct mtdoops_context *cxt = &oops_cxt;
245
246 if ((mtd->index != cxt->mtd_index) || cxt->mtd_index < 0)
247 return;
248
249 cxt->mtd = NULL;
250 flush_scheduled_work();
251}
252
253static void mtdoops_console_sync(void)
254{
255 struct mtdoops_context *cxt = &oops_cxt;
256 struct mtd_info *mtd = cxt->mtd;
257 size_t retlen;
258 int ret;
259
260 if (!cxt->ready || !mtd)
261 return;
262
263 if (cxt->writecount == 0)
264 return;
265
266 if (cxt->writecount < OOPS_PAGE_SIZE)
267 memset(cxt->oops_buf + cxt->writecount, 0xff,
268 OOPS_PAGE_SIZE - cxt->writecount);
269
270 ret = mtd->write(mtd, cxt->nextpage * OOPS_PAGE_SIZE,
271 OOPS_PAGE_SIZE, &retlen, cxt->oops_buf);
272 cxt->ready = 0;
273 cxt->writecount = 0;
274
275 if ((retlen != OOPS_PAGE_SIZE) || (ret < 0))
276 printk(KERN_ERR "mtdoops: Write failure at %d (%td of %d written), err %d.\n",
277 cxt->nextpage * OOPS_PAGE_SIZE, retlen, OOPS_PAGE_SIZE, ret);
278
279 ret = mtdoops_inc_counter(cxt);
280 if (ret == 1)
281 schedule_work(&cxt->work);
282}
283
284static void
285mtdoops_console_write(struct console *co, const char *s, unsigned int count)
286{
287 struct mtdoops_context *cxt = co->data;
288 struct mtd_info *mtd = cxt->mtd;
289 int i;
290
291 if (!oops_in_progress) {
292 mtdoops_console_sync();
293 return;
294 }
295
296 if (!cxt->ready || !mtd)
297 return;
298
299 if (cxt->writecount == 0) {
300 u32 *stamp = cxt->oops_buf;
301 *stamp = cxt->nextcount;
302 cxt->writecount = 4;
303 }
304
305 if ((count + cxt->writecount) > OOPS_PAGE_SIZE)
306 count = OOPS_PAGE_SIZE - cxt->writecount;
307
308 for (i = 0; i < count; i++, s++)
309 *((char *)(cxt->oops_buf) + cxt->writecount + i) = *s;
310
311 cxt->writecount = cxt->writecount + count;
312}
313
314static int __init mtdoops_console_setup(struct console *co, char *options)
315{
316 struct mtdoops_context *cxt = co->data;
317
318 if (cxt->mtd_index != -1)
319 return -EBUSY;
320 if (co->index == -1)
321 return -EINVAL;
322
323 cxt->mtd_index = co->index;
324 return 0;
325}
326
327static struct mtd_notifier mtdoops_notifier = {
328 .add = mtdoops_notify_add,
329 .remove = mtdoops_notify_remove,
330};
331
332static struct console mtdoops_console = {
333 .name = "ttyMTD",
334 .write = mtdoops_console_write,
335 .setup = mtdoops_console_setup,
336 .unblank = mtdoops_console_sync,
337 .flags = CON_PRINTBUFFER,
338 .index = -1,
339 .data = &oops_cxt,
340};
341
342static int __init mtdoops_console_init(void)
343{
344 struct mtdoops_context *cxt = &oops_cxt;
345
346 cxt->mtd_index = -1;
347 cxt->oops_buf = vmalloc(OOPS_PAGE_SIZE);
348
349 if (!cxt->oops_buf) {
350 printk(KERN_ERR "Failed to allocate oops buffer workspace\n");
351 return -ENOMEM;
352 }
353
354 INIT_WORK(&cxt->work, mtdoops_workfunc);
355
356 register_console(&mtdoops_console);
357 register_mtd_user(&mtdoops_notifier);
358 return 0;
359}
360
361static void __exit mtdoops_console_exit(void)
362{
363 struct mtdoops_context *cxt = &oops_cxt;
364
365 unregister_mtd_user(&mtdoops_notifier);
366 unregister_console(&mtdoops_console);
367 vfree(cxt->oops_buf);
368}
369
370
371subsys_initcall(mtdoops_console_init);
372module_exit(mtdoops_console_exit);
373
374MODULE_LICENSE("GPL");
375MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>");
376MODULE_DESCRIPTION("MTD Oops/Panic console logger/driver");
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index f1d60b6f048e..8f9c3baeb38e 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -91,6 +91,25 @@ config MTD_NAND_AU1550
91 This enables the driver for the NAND flash controller on the 91 This enables the driver for the NAND flash controller on the
92 AMD/Alchemy 1550 SOC. 92 AMD/Alchemy 1550 SOC.
93 93
94config MTD_NAND_BF5XX
95 tristate "Blackfin on-chip NAND Flash Controller driver"
96 depends on BF54x && MTD_NAND
97 help
98 This enables the Blackfin on-chip NAND flash controller
99
100 No board specific support is done by this driver, each board
101 must advertise a platform_device for the driver to attach.
102
103 This driver can also be built as a module. If so, the module
104 will be called bf5xx-nand.
105
106config MTD_NAND_BF5XX_HWECC
107 bool "BF5XX NAND Hardware ECC"
108 depends on MTD_NAND_BF5XX
109 help
110 Enable the use of the BF5XX's internal ECC generator when
111 using NAND.
112
94config MTD_NAND_RTC_FROM4 113config MTD_NAND_RTC_FROM4
95 tristate "Renesas Flash ROM 4-slot interface board (FROM_BOARD4)" 114 tristate "Renesas Flash ROM 4-slot interface board (FROM_BOARD4)"
96 depends on SH_SOLUTION_ENGINE 115 depends on SH_SOLUTION_ENGINE
@@ -134,10 +153,10 @@ config MTD_NAND_S3C2410_HWECC
134 153
135config MTD_NAND_NDFC 154config MTD_NAND_NDFC
136 tristate "NDFC NanD Flash Controller" 155 tristate "NDFC NanD Flash Controller"
137 depends on 44x 156 depends on 4xx && !PPC_MERGE
138 select MTD_NAND_ECC_SMC 157 select MTD_NAND_ECC_SMC
139 help 158 help
140 NDFC Nand Flash Controllers are integrated in EP44x SoCs 159 NDFC Nand Flash Controllers are integrated in IBM/AMCC's 4xx SoCs
141 160
142config MTD_NAND_S3C2410_CLKSTOP 161config MTD_NAND_S3C2410_CLKSTOP
143 bool "S3C2410 NAND IDLE clock stop" 162 bool "S3C2410 NAND IDLE clock stop"
@@ -237,7 +256,7 @@ config MTD_NAND_CAFE
237 select REED_SOLOMON 256 select REED_SOLOMON
238 select REED_SOLOMON_DEC16 257 select REED_SOLOMON_DEC16
239 help 258 help
240 Use NAND flash attached to the CAFÉ chip designed for the $100 259 Use NAND flash attached to the CAFÉ chip designed for the OLPC
241 laptop. 260 laptop.
242 261
243config MTD_NAND_CS553X 262config MTD_NAND_CS553X
@@ -280,5 +299,11 @@ config MTD_NAND_PLATFORM
280 devices. You will need to provide platform-specific functions 299 devices. You will need to provide platform-specific functions
281 via platform_data. 300 via platform_data.
282 301
302config MTD_ALAUDA
303 tristate "MTD driver for Olympus MAUSB-10 and Fijufilm DPC-R1"
304 depends on MTD_NAND && USB
305 help
306 These two (and possibly other) Alauda-based cardreaders for
307 SmartMedia and xD allow raw flash access.
283 308
284endif # MTD_NAND 309endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index edba1db14bfa..3ad6c0165da3 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_MTD_NAND_TOTO) += toto.o
13obj-$(CONFIG_MTD_NAND_AUTCPU12) += autcpu12.o 13obj-$(CONFIG_MTD_NAND_AUTCPU12) += autcpu12.o
14obj-$(CONFIG_MTD_NAND_EDB7312) += edb7312.o 14obj-$(CONFIG_MTD_NAND_EDB7312) += edb7312.o
15obj-$(CONFIG_MTD_NAND_AU1550) += au1550nd.o 15obj-$(CONFIG_MTD_NAND_AU1550) += au1550nd.o
16obj-$(CONFIG_MTD_NAND_BF5XX) += bf5xx_nand.o
16obj-$(CONFIG_MTD_NAND_PPCHAMELEONEVB) += ppchameleonevb.o 17obj-$(CONFIG_MTD_NAND_PPCHAMELEONEVB) += ppchameleonevb.o
17obj-$(CONFIG_MTD_NAND_S3C2410) += s3c2410.o 18obj-$(CONFIG_MTD_NAND_S3C2410) += s3c2410.o
18obj-$(CONFIG_MTD_NAND_DISKONCHIP) += diskonchip.o 19obj-$(CONFIG_MTD_NAND_DISKONCHIP) += diskonchip.o
@@ -27,5 +28,6 @@ obj-$(CONFIG_MTD_NAND_AT91) += at91_nand.o
27obj-$(CONFIG_MTD_NAND_CM_X270) += cmx270_nand.o 28obj-$(CONFIG_MTD_NAND_CM_X270) += cmx270_nand.o
28obj-$(CONFIG_MTD_NAND_BASLER_EXCITE) += excite_nandflash.o 29obj-$(CONFIG_MTD_NAND_BASLER_EXCITE) += excite_nandflash.o
29obj-$(CONFIG_MTD_NAND_PLATFORM) += plat_nand.o 30obj-$(CONFIG_MTD_NAND_PLATFORM) += plat_nand.o
31obj-$(CONFIG_MTD_ALAUDA) += alauda.o
30 32
31nand-objs := nand_base.o nand_bbt.o 33nand-objs := nand_base.o nand_bbt.o
diff --git a/drivers/mtd/nand/alauda.c b/drivers/mtd/nand/alauda.c
new file mode 100644
index 000000000000..257937cd99bf
--- /dev/null
+++ b/drivers/mtd/nand/alauda.c
@@ -0,0 +1,742 @@
1/*
2 * MTD driver for Alauda chips
3 *
4 * Copyright (C) 2007 Joern Engel <joern@logfs.org>
5 *
6 * Based on drivers/usb/usb-skeleton.c which is:
7 * Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
8 * and on drivers/usb/storage/alauda.c, which is:
9 * (c) 2005 Daniel Drake <dsd@gentoo.org>
10 *
11 * Idea and initial work by Arnd Bergmann <arnd@arndb.de>
12 */
13#include <linux/kernel.h>
14#include <linux/errno.h>
15#include <linux/init.h>
16#include <linux/slab.h>
17#include <linux/module.h>
18#include <linux/kref.h>
19#include <linux/usb.h>
20#include <linux/mutex.h>
21#include <linux/mtd/mtd.h>
22#include <linux/mtd/nand_ecc.h>
23
24/* Control commands */
25#define ALAUDA_GET_XD_MEDIA_STATUS 0x08
26#define ALAUDA_ACK_XD_MEDIA_CHANGE 0x0a
27#define ALAUDA_GET_XD_MEDIA_SIG 0x86
28
29/* Common prefix */
30#define ALAUDA_BULK_CMD 0x40
31
32/* The two ports */
33#define ALAUDA_PORT_XD 0x00
34#define ALAUDA_PORT_SM 0x01
35
36/* Bulk commands */
37#define ALAUDA_BULK_READ_PAGE 0x84
38#define ALAUDA_BULK_READ_OOB 0x85 /* don't use, there's a chip bug */
39#define ALAUDA_BULK_READ_BLOCK 0x94
40#define ALAUDA_BULK_ERASE_BLOCK 0xa3
41#define ALAUDA_BULK_WRITE_PAGE 0xa4
42#define ALAUDA_BULK_WRITE_BLOCK 0xb4
43#define ALAUDA_BULK_RESET_MEDIA 0xe0
44
45/* Address shifting */
46#define PBA_LO(pba) ((pba & 0xF) << 5)
47#define PBA_HI(pba) (pba >> 3)
48#define PBA_ZONE(pba) (pba >> 11)
49
50#define TIMEOUT HZ
51
52static struct usb_device_id alauda_table [] = {
53 { USB_DEVICE(0x0584, 0x0008) }, /* Fujifilm DPC-R1 */
54 { USB_DEVICE(0x07b4, 0x010a) }, /* Olympus MAUSB-10 */
55 { }
56};
57MODULE_DEVICE_TABLE(usb, alauda_table);
58
59struct alauda_card {
60 u8 id; /* id byte */
61 u8 chipshift; /* 1<<chipshift total size */
62 u8 pageshift; /* 1<<pageshift page size */
63 u8 blockshift; /* 1<<blockshift block size */
64};
65
66struct alauda {
67 struct usb_device *dev;
68 struct usb_interface *interface;
69 struct mtd_info *mtd;
70 struct alauda_card *card;
71 struct mutex card_mutex;
72 u32 pagemask;
73 u32 bytemask;
74 u32 blockmask;
75 unsigned int write_out;
76 unsigned int bulk_in;
77 unsigned int bulk_out;
78 u8 port;
79 struct kref kref;
80};
81
82static struct alauda_card alauda_card_ids[] = {
83 /* NAND flash */
84 { 0x6e, 20, 8, 12}, /* 1 MB */
85 { 0xe8, 20, 8, 12}, /* 1 MB */
86 { 0xec, 20, 8, 12}, /* 1 MB */
87 { 0x64, 21, 8, 12}, /* 2 MB */
88 { 0xea, 21, 8, 12}, /* 2 MB */
89 { 0x6b, 22, 9, 13}, /* 4 MB */
90 { 0xe3, 22, 9, 13}, /* 4 MB */
91 { 0xe5, 22, 9, 13}, /* 4 MB */
92 { 0xe6, 23, 9, 13}, /* 8 MB */
93 { 0x73, 24, 9, 14}, /* 16 MB */
94 { 0x75, 25, 9, 14}, /* 32 MB */
95 { 0x76, 26, 9, 14}, /* 64 MB */
96 { 0x79, 27, 9, 14}, /* 128 MB */
97 { 0x71, 28, 9, 14}, /* 256 MB */
98
99 /* MASK ROM */
100 { 0x5d, 21, 9, 13}, /* 2 MB */
101 { 0xd5, 22, 9, 13}, /* 4 MB */
102 { 0xd6, 23, 9, 13}, /* 8 MB */
103 { 0x57, 24, 9, 13}, /* 16 MB */
104 { 0x58, 25, 9, 13}, /* 32 MB */
105 { }
106};
107
108static struct alauda_card *get_card(u8 id)
109{
110 struct alauda_card *card;
111
112 for (card = alauda_card_ids; card->id; card++)
113 if (card->id == id)
114 return card;
115 return NULL;
116}
117
118static void alauda_delete(struct kref *kref)
119{
120 struct alauda *al = container_of(kref, struct alauda, kref);
121
122 if (al->mtd) {
123 del_mtd_device(al->mtd);
124 kfree(al->mtd);
125 }
126 usb_put_dev(al->dev);
127 kfree(al);
128}
129
130static int alauda_get_media_status(struct alauda *al, void *buf)
131{
132 int ret;
133
134 mutex_lock(&al->card_mutex);
135 ret = usb_control_msg(al->dev, usb_rcvctrlpipe(al->dev, 0),
136 ALAUDA_GET_XD_MEDIA_STATUS, 0xc0, 0, 1, buf, 2, HZ);
137 mutex_unlock(&al->card_mutex);
138 return ret;
139}
140
141static int alauda_ack_media(struct alauda *al)
142{
143 int ret;
144
145 mutex_lock(&al->card_mutex);
146 ret = usb_control_msg(al->dev, usb_sndctrlpipe(al->dev, 0),
147 ALAUDA_ACK_XD_MEDIA_CHANGE, 0x40, 0, 1, NULL, 0, HZ);
148 mutex_unlock(&al->card_mutex);
149 return ret;
150}
151
152static int alauda_get_media_signatures(struct alauda *al, void *buf)
153{
154 int ret;
155
156 mutex_lock(&al->card_mutex);
157 ret = usb_control_msg(al->dev, usb_rcvctrlpipe(al->dev, 0),
158 ALAUDA_GET_XD_MEDIA_SIG, 0xc0, 0, 0, buf, 4, HZ);
159 mutex_unlock(&al->card_mutex);
160 return ret;
161}
162
163static void alauda_reset(struct alauda *al)
164{
165 u8 command[] = {
166 ALAUDA_BULK_CMD, ALAUDA_BULK_RESET_MEDIA, 0, 0,
167 0, 0, 0, 0, al->port
168 };
169 mutex_lock(&al->card_mutex);
170 usb_bulk_msg(al->dev, al->bulk_out, command, 9, NULL, HZ);
171 mutex_unlock(&al->card_mutex);
172}
173
174static void correct_data(void *buf, void *read_ecc,
175 int *corrected, int *uncorrected)
176{
177 u8 calc_ecc[3];
178 int err;
179
180 nand_calculate_ecc(NULL, buf, calc_ecc);
181 err = nand_correct_data(NULL, buf, read_ecc, calc_ecc);
182 if (err) {
183 if (err > 0)
184 (*corrected)++;
185 else
186 (*uncorrected)++;
187 }
188}
189
190struct alauda_sg_request {
191 struct urb *urb[3];
192 struct completion comp;
193};
194
195static void alauda_complete(struct urb *urb)
196{
197 struct completion *comp = urb->context;
198
199 if (comp)
200 complete(comp);
201}
202
203static int __alauda_read_page(struct mtd_info *mtd, loff_t from, void *buf,
204 void *oob)
205{
206 struct alauda_sg_request sg;
207 struct alauda *al = mtd->priv;
208 u32 pba = from >> al->card->blockshift;
209 u32 page = (from >> al->card->pageshift) & al->pagemask;
210 u8 command[] = {
211 ALAUDA_BULK_CMD, ALAUDA_BULK_READ_PAGE, PBA_HI(pba),
212 PBA_ZONE(pba), 0, PBA_LO(pba) + page, 1, 0, al->port
213 };
214 int i, err;
215
216 for (i=0; i<3; i++)
217 sg.urb[i] = NULL;
218
219 err = -ENOMEM;
220 for (i=0; i<3; i++) {
221 sg.urb[i] = usb_alloc_urb(0, GFP_NOIO);
222 if (!sg.urb[i])
223 goto out;
224 }
225 init_completion(&sg.comp);
226 usb_fill_bulk_urb(sg.urb[0], al->dev, al->bulk_out, command, 9,
227 alauda_complete, NULL);
228 usb_fill_bulk_urb(sg.urb[1], al->dev, al->bulk_in, buf, mtd->writesize,
229 alauda_complete, NULL);
230 usb_fill_bulk_urb(sg.urb[2], al->dev, al->bulk_in, oob, 16,
231 alauda_complete, &sg.comp);
232
233 mutex_lock(&al->card_mutex);
234 for (i=0; i<3; i++) {
235 err = usb_submit_urb(sg.urb[i], GFP_NOIO);
236 if (err)
237 goto cancel;
238 }
239 if (!wait_for_completion_timeout(&sg.comp, TIMEOUT)) {
240 err = -ETIMEDOUT;
241cancel:
242 for (i=0; i<3; i++) {
243 usb_kill_urb(sg.urb[i]);
244 }
245 }
246 mutex_unlock(&al->card_mutex);
247
248out:
249 usb_free_urb(sg.urb[0]);
250 usb_free_urb(sg.urb[1]);
251 usb_free_urb(sg.urb[2]);
252 return err;
253}
254
255static int alauda_read_page(struct mtd_info *mtd, loff_t from,
256 void *buf, u8 *oob, int *corrected, int *uncorrected)
257{
258 int err;
259
260 err = __alauda_read_page(mtd, from, buf, oob);
261 if (err)
262 return err;
263 correct_data(buf, oob+13, corrected, uncorrected);
264 correct_data(buf+256, oob+8, corrected, uncorrected);
265 return 0;
266}
267
268static int alauda_write_page(struct mtd_info *mtd, loff_t to, void *buf,
269 void *oob)
270{
271 struct alauda_sg_request sg;
272 struct alauda *al = mtd->priv;
273 u32 pba = to >> al->card->blockshift;
274 u32 page = (to >> al->card->pageshift) & al->pagemask;
275 u8 command[] = {
276 ALAUDA_BULK_CMD, ALAUDA_BULK_WRITE_PAGE, PBA_HI(pba),
277 PBA_ZONE(pba), 0, PBA_LO(pba) + page, 32, 0, al->port
278 };
279 int i, err;
280
281 for (i=0; i<3; i++)
282 sg.urb[i] = NULL;
283
284 err = -ENOMEM;
285 for (i=0; i<3; i++) {
286 sg.urb[i] = usb_alloc_urb(0, GFP_NOIO);
287 if (!sg.urb[i])
288 goto out;
289 }
290 init_completion(&sg.comp);
291 usb_fill_bulk_urb(sg.urb[0], al->dev, al->bulk_out, command, 9,
292 alauda_complete, NULL);
293 usb_fill_bulk_urb(sg.urb[1], al->dev, al->write_out, buf,mtd->writesize,
294 alauda_complete, NULL);
295 usb_fill_bulk_urb(sg.urb[2], al->dev, al->write_out, oob, 16,
296 alauda_complete, &sg.comp);
297
298 mutex_lock(&al->card_mutex);
299 for (i=0; i<3; i++) {
300 err = usb_submit_urb(sg.urb[i], GFP_NOIO);
301 if (err)
302 goto cancel;
303 }
304 if (!wait_for_completion_timeout(&sg.comp, TIMEOUT)) {
305 err = -ETIMEDOUT;
306cancel:
307 for (i=0; i<3; i++) {
308 usb_kill_urb(sg.urb[i]);
309 }
310 }
311 mutex_unlock(&al->card_mutex);
312
313out:
314 usb_free_urb(sg.urb[0]);
315 usb_free_urb(sg.urb[1]);
316 usb_free_urb(sg.urb[2]);
317 return err;
318}
319
320static int alauda_erase_block(struct mtd_info *mtd, loff_t ofs)
321{
322 struct alauda_sg_request sg;
323 struct alauda *al = mtd->priv;
324 u32 pba = ofs >> al->card->blockshift;
325 u8 command[] = {
326 ALAUDA_BULK_CMD, ALAUDA_BULK_ERASE_BLOCK, PBA_HI(pba),
327 PBA_ZONE(pba), 0, PBA_LO(pba), 0x02, 0, al->port
328 };
329 u8 buf[2];
330 int i, err;
331
332 for (i=0; i<2; i++)
333 sg.urb[i] = NULL;
334
335 err = -ENOMEM;
336 for (i=0; i<2; i++) {
337 sg.urb[i] = usb_alloc_urb(0, GFP_NOIO);
338 if (!sg.urb[i])
339 goto out;
340 }
341 init_completion(&sg.comp);
342 usb_fill_bulk_urb(sg.urb[0], al->dev, al->bulk_out, command, 9,
343 alauda_complete, NULL);
344 usb_fill_bulk_urb(sg.urb[1], al->dev, al->bulk_in, buf, 2,
345 alauda_complete, &sg.comp);
346
347 mutex_lock(&al->card_mutex);
348 for (i=0; i<2; i++) {
349 err = usb_submit_urb(sg.urb[i], GFP_NOIO);
350 if (err)
351 goto cancel;
352 }
353 if (!wait_for_completion_timeout(&sg.comp, TIMEOUT)) {
354 err = -ETIMEDOUT;
355cancel:
356 for (i=0; i<2; i++) {
357 usb_kill_urb(sg.urb[i]);
358 }
359 }
360 mutex_unlock(&al->card_mutex);
361
362out:
363 usb_free_urb(sg.urb[0]);
364 usb_free_urb(sg.urb[1]);
365 return err;
366}
367
368static int alauda_read_oob(struct mtd_info *mtd, loff_t from, void *oob)
369{
370 static u8 ignore_buf[512]; /* write only */
371
372 return __alauda_read_page(mtd, from, ignore_buf, oob);
373}
374
375static int popcount8(u8 c)
376{
377 int ret = 0;
378
379 for ( ; c; c>>=1)
380 ret += c & 1;
381 return ret;
382}
383
384static int alauda_isbad(struct mtd_info *mtd, loff_t ofs)
385{
386 u8 oob[16];
387 int err;
388
389 err = alauda_read_oob(mtd, ofs, oob);
390 if (err)
391 return err;
392
393 /* A block is marked bad if two or more bits are zero */
394 return popcount8(oob[5]) >= 7 ? 0 : 1;
395}
396
397static int alauda_bounce_read(struct mtd_info *mtd, loff_t from, size_t len,
398 size_t *retlen, u_char *buf)
399{
400 struct alauda *al = mtd->priv;
401 void *bounce_buf;
402 int err, corrected=0, uncorrected=0;
403
404 bounce_buf = kmalloc(mtd->writesize, GFP_KERNEL);
405 if (!bounce_buf)
406 return -ENOMEM;
407
408 *retlen = len;
409 while (len) {
410 u8 oob[16];
411 size_t byte = from & al->bytemask;
412 size_t cplen = min(len, mtd->writesize - byte);
413
414 err = alauda_read_page(mtd, from, bounce_buf, oob,
415 &corrected, &uncorrected);
416 if (err)
417 goto out;
418
419 memcpy(buf, bounce_buf + byte, cplen);
420 buf += cplen;
421 from += cplen;
422 len -= cplen;
423 }
424 err = 0;
425 if (corrected)
426 err = -EUCLEAN;
427 if (uncorrected)
428 err = -EBADMSG;
429out:
430 kfree(bounce_buf);
431 return err;
432}
433
434static int alauda_read(struct mtd_info *mtd, loff_t from, size_t len,
435 size_t *retlen, u_char *buf)
436{
437 struct alauda *al = mtd->priv;
438 int err, corrected=0, uncorrected=0;
439
440 if ((from & al->bytemask) || (len & al->bytemask))
441 return alauda_bounce_read(mtd, from, len, retlen, buf);
442
443 *retlen = len;
444 while (len) {
445 u8 oob[16];
446
447 err = alauda_read_page(mtd, from, buf, oob,
448 &corrected, &uncorrected);
449 if (err)
450 return err;
451
452 buf += mtd->writesize;
453 from += mtd->writesize;
454 len -= mtd->writesize;
455 }
456 err = 0;
457 if (corrected)
458 err = -EUCLEAN;
459 if (uncorrected)
460 err = -EBADMSG;
461 return err;
462}
463
464static int alauda_write(struct mtd_info *mtd, loff_t to, size_t len,
465 size_t *retlen, const u_char *buf)
466{
467 struct alauda *al = mtd->priv;
468 int err;
469
470 if ((to & al->bytemask) || (len & al->bytemask))
471 return -EINVAL;
472
473 *retlen = len;
474 while (len) {
475 u32 page = (to >> al->card->pageshift) & al->pagemask;
476 u8 oob[16] = { 'h', 'e', 'l', 'l', 'o', 0xff, 0xff, 0xff,
477 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
478
479 /* don't write to bad blocks */
480 if (page == 0) {
481 err = alauda_isbad(mtd, to);
482 if (err) {
483 return -EIO;
484 }
485 }
486 nand_calculate_ecc(mtd, buf, &oob[13]);
487 nand_calculate_ecc(mtd, buf+256, &oob[8]);
488
489 err = alauda_write_page(mtd, to, (void*)buf, oob);
490 if (err)
491 return err;
492
493 buf += mtd->writesize;
494 to += mtd->writesize;
495 len -= mtd->writesize;
496 }
497 return 0;
498}
499
500static int __alauda_erase(struct mtd_info *mtd, struct erase_info *instr)
501{
502 struct alauda *al = mtd->priv;
503 u32 ofs = instr->addr;
504 u32 len = instr->len;
505 int err;
506
507 if ((ofs & al->blockmask) || (len & al->blockmask))
508 return -EINVAL;
509
510 while (len) {
511 /* don't erase bad blocks */
512 err = alauda_isbad(mtd, ofs);
513 if (err > 0)
514 err = -EIO;
515 if (err < 0)
516 return err;
517
518 err = alauda_erase_block(mtd, ofs);
519 if (err < 0)
520 return err;
521
522 ofs += mtd->erasesize;
523 len -= mtd->erasesize;
524 }
525 return 0;
526}
527
528static int alauda_erase(struct mtd_info *mtd, struct erase_info *instr)
529{
530 int err;
531
532 err = __alauda_erase(mtd, instr);
533 instr->state = err ? MTD_ERASE_FAILED : MTD_ERASE_DONE;
534 mtd_erase_callback(instr);
535 return err;
536}
537
538static int alauda_init_media(struct alauda *al)
539{
540 u8 buf[4], *b0=buf, *b1=buf+1;
541 struct alauda_card *card;
542 struct mtd_info *mtd;
543 int err;
544
545 mtd = kzalloc(sizeof(*mtd), GFP_KERNEL);
546 if (!mtd)
547 return -ENOMEM;
548
549 for (;;) {
550 err = alauda_get_media_status(al, buf);
551 if (err < 0)
552 goto error;
553 if (*b0 & 0x10)
554 break;
555 msleep(20);
556 }
557
558 err = alauda_ack_media(al);
559 if (err)
560 goto error;
561
562 msleep(10);
563
564 err = alauda_get_media_status(al, buf);
565 if (err < 0)
566 goto error;
567
568 if (*b0 != 0x14) {
569 /* media not ready */
570 err = -EIO;
571 goto error;
572 }
573 err = alauda_get_media_signatures(al, buf);
574 if (err < 0)
575 goto error;
576
577 card = get_card(*b1);
578 if (!card) {
579 printk(KERN_ERR"Alauda: unknown card id %02x\n", *b1);
580 err = -EIO;
581 goto error;
582 }
583 printk(KERN_INFO"pagesize=%x\nerasesize=%x\nsize=%xMiB\n",
584 1<<card->pageshift, 1<<card->blockshift,
585 1<<(card->chipshift-20));
586 al->card = card;
587 al->pagemask = (1 << (card->blockshift - card->pageshift)) - 1;
588 al->bytemask = (1 << card->pageshift) - 1;
589 al->blockmask = (1 << card->blockshift) - 1;
590
591 mtd->name = "alauda";
592 mtd->size = 1<<card->chipshift;
593 mtd->erasesize = 1<<card->blockshift;
594 mtd->writesize = 1<<card->pageshift;
595 mtd->type = MTD_NANDFLASH;
596 mtd->flags = MTD_CAP_NANDFLASH;
597 mtd->read = alauda_read;
598 mtd->write = alauda_write;
599 mtd->erase = alauda_erase;
600 mtd->block_isbad = alauda_isbad;
601 mtd->priv = al;
602 mtd->owner = THIS_MODULE;
603
604 err = add_mtd_device(mtd);
605 if (err) {
606 err = -ENFILE;
607 goto error;
608 }
609
610 al->mtd = mtd;
611 alauda_reset(al); /* no clue whether this is necessary */
612 return 0;
613error:
614 kfree(mtd);
615 return err;
616}
617
618static int alauda_check_media(struct alauda *al)
619{
620 u8 buf[2], *b0 = buf, *b1 = buf+1;
621 int err;
622
623 err = alauda_get_media_status(al, buf);
624 if (err < 0)
625 return err;
626
627 if ((*b1 & 0x01) == 0) {
628 /* door open */
629 return -EIO;
630 }
631 if ((*b0 & 0x80) || ((*b0 & 0x1F) == 0x10)) {
632 /* no media ? */
633 return -EIO;
634 }
635 if (*b0 & 0x08) {
636 /* media change ? */
637 return alauda_init_media(al);
638 }
639 return 0;
640}
641
642static int alauda_probe(struct usb_interface *interface,
643 const struct usb_device_id *id)
644{
645 struct alauda *al;
646 struct usb_host_interface *iface;
647 struct usb_endpoint_descriptor *ep,
648 *ep_in=NULL, *ep_out=NULL, *ep_wr=NULL;
649 int i, err = -ENOMEM;
650
651 al = kzalloc(2*sizeof(*al), GFP_KERNEL);
652 if (!al)
653 goto error;
654
655 kref_init(&al->kref);
656 usb_set_intfdata(interface, al);
657
658 al->dev = usb_get_dev(interface_to_usbdev(interface));
659 al->interface = interface;
660
661 iface = interface->cur_altsetting;
662 for (i = 0; i < iface->desc.bNumEndpoints; ++i) {
663 ep = &iface->endpoint[i].desc;
664
665 if (usb_endpoint_is_bulk_in(ep)) {
666 ep_in = ep;
667 } else if (usb_endpoint_is_bulk_out(ep)) {
668 if (i==0)
669 ep_wr = ep;
670 else
671 ep_out = ep;
672 }
673 }
674 err = -EIO;
675 if (!ep_wr || !ep_in || !ep_out)
676 goto error;
677
678 al->write_out = usb_sndbulkpipe(al->dev,
679 ep_wr->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
680 al->bulk_in = usb_rcvbulkpipe(al->dev,
681 ep_in->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
682 al->bulk_out = usb_sndbulkpipe(al->dev,
683 ep_out->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
684
685 /* second device is identical up to now */
686 memcpy(al+1, al, sizeof(*al));
687
688 mutex_init(&al[0].card_mutex);
689 mutex_init(&al[1].card_mutex);
690
691 al[0].port = ALAUDA_PORT_XD;
692 al[1].port = ALAUDA_PORT_SM;
693
694 info("alauda probed");
695 alauda_check_media(al);
696 alauda_check_media(al+1);
697
698 return 0;
699
700error:
701 if (al)
702 kref_put(&al->kref, alauda_delete);
703 return err;
704}
705
706static void alauda_disconnect(struct usb_interface *interface)
707{
708 struct alauda *al;
709
710 al = usb_get_intfdata(interface);
711 usb_set_intfdata(interface, NULL);
712
713 /* FIXME: prevent more I/O from starting */
714
715 /* decrement our usage count */
716 if (al)
717 kref_put(&al->kref, alauda_delete);
718
719 info("alauda gone");
720}
721
722static struct usb_driver alauda_driver = {
723 .name = "alauda",
724 .probe = alauda_probe,
725 .disconnect = alauda_disconnect,
726 .id_table = alauda_table,
727};
728
729static int __init alauda_init(void)
730{
731 return usb_register(&alauda_driver);
732}
733
734static void __exit alauda_exit(void)
735{
736 usb_deregister(&alauda_driver);
737}
738
739module_init(alauda_init);
740module_exit(alauda_exit);
741
742MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/nand/bf5xx_nand.c b/drivers/mtd/nand/bf5xx_nand.c
new file mode 100644
index 000000000000..1657ecd74881
--- /dev/null
+++ b/drivers/mtd/nand/bf5xx_nand.c
@@ -0,0 +1,788 @@
1/* linux/drivers/mtd/nand/bf5xx_nand.c
2 *
3 * Copyright 2006-2007 Analog Devices Inc.
4 * http://blackfin.uclinux.org/
5 * Bryan Wu <bryan.wu@analog.com>
6 *
7 * Blackfin BF5xx on-chip NAND flash controler driver
8 *
9 * Derived from drivers/mtd/nand/s3c2410.c
10 * Copyright (c) 2007 Ben Dooks <ben@simtec.co.uk>
11 *
12 * Derived from drivers/mtd/nand/cafe.c
13 * Copyright © 2006 Red Hat, Inc.
14 * Copyright © 2006 David Woodhouse <dwmw2@infradead.org>
15 *
16 * Changelog:
17 * 12-Jun-2007 Bryan Wu: Initial version
18 * 18-Jul-2007 Bryan Wu:
19 * - ECC_HW and ECC_SW supported
20 * - DMA supported in ECC_HW
21 * - YAFFS tested as rootfs in both ECC_HW and ECC_SW
22 *
23 * TODO:
24 * Enable JFFS2 over NAND as rootfs
25 *
26 * This program is free software; you can redistribute it and/or modify
27 * it under the terms of the GNU General Public License as published by
28 * the Free Software Foundation; either version 2 of the License, or
29 * (at your option) any later version.
30 *
31 * This program is distributed in the hope that it will be useful,
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 * GNU General Public License for more details.
35 *
36 * You should have received a copy of the GNU General Public License
37 * along with this program; if not, write to the Free Software
38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39*/
40
41#include <linux/module.h>
42#include <linux/types.h>
43#include <linux/init.h>
44#include <linux/kernel.h>
45#include <linux/string.h>
46#include <linux/ioport.h>
47#include <linux/platform_device.h>
48#include <linux/delay.h>
49#include <linux/dma-mapping.h>
50#include <linux/err.h>
51#include <linux/slab.h>
52#include <linux/io.h>
53#include <linux/bitops.h>
54
55#include <linux/mtd/mtd.h>
56#include <linux/mtd/nand.h>
57#include <linux/mtd/nand_ecc.h>
58#include <linux/mtd/partitions.h>
59
60#include <asm/blackfin.h>
61#include <asm/dma.h>
62#include <asm/cacheflush.h>
63#include <asm/nand.h>
64#include <asm/portmux.h>
65
66#define DRV_NAME "bf5xx-nand"
67#define DRV_VERSION "1.2"
68#define DRV_AUTHOR "Bryan Wu <bryan.wu@analog.com>"
69#define DRV_DESC "BF5xx on-chip NAND FLash Controller Driver"
70
71#ifdef CONFIG_MTD_NAND_BF5XX_HWECC
72static int hardware_ecc = 1;
73#else
74static int hardware_ecc;
75#endif
76
77static unsigned short bfin_nfc_pin_req[] = {P_NAND_CE, P_NAND_RB, 0};
78
79/*
80 * Data structures for bf5xx nand flash controller driver
81 */
82
83/* bf5xx nand info */
84struct bf5xx_nand_info {
85 /* mtd info */
86 struct nand_hw_control controller;
87 struct mtd_info mtd;
88 struct nand_chip chip;
89
90 /* platform info */
91 struct bf5xx_nand_platform *platform;
92
93 /* device info */
94 struct device *device;
95
96 /* DMA stuff */
97 struct completion dma_completion;
98};
99
100/*
101 * Conversion functions
102 */
103static struct bf5xx_nand_info *mtd_to_nand_info(struct mtd_info *mtd)
104{
105 return container_of(mtd, struct bf5xx_nand_info, mtd);
106}
107
108static struct bf5xx_nand_info *to_nand_info(struct platform_device *pdev)
109{
110 return platform_get_drvdata(pdev);
111}
112
113static struct bf5xx_nand_platform *to_nand_plat(struct platform_device *pdev)
114{
115 return pdev->dev.platform_data;
116}
117
118/*
119 * struct nand_chip interface function pointers
120 */
121
122/*
123 * bf5xx_nand_hwcontrol
124 *
125 * Issue command and address cycles to the chip
126 */
127static void bf5xx_nand_hwcontrol(struct mtd_info *mtd, int cmd,
128 unsigned int ctrl)
129{
130 if (cmd == NAND_CMD_NONE)
131 return;
132
133 while (bfin_read_NFC_STAT() & WB_FULL)
134 cpu_relax();
135
136 if (ctrl & NAND_CLE)
137 bfin_write_NFC_CMD(cmd);
138 else
139 bfin_write_NFC_ADDR(cmd);
140 SSYNC();
141}
142
143/*
144 * bf5xx_nand_devready()
145 *
146 * returns 0 if the nand is busy, 1 if it is ready
147 */
148static int bf5xx_nand_devready(struct mtd_info *mtd)
149{
150 unsigned short val = bfin_read_NFC_IRQSTAT();
151
152 if ((val & NBUSYIRQ) == NBUSYIRQ)
153 return 1;
154 else
155 return 0;
156}
157
158/*
159 * ECC functions
160 * These allow the bf5xx to use the controller's ECC
161 * generator block to ECC the data as it passes through
162 */
163
164/*
165 * ECC error correction function
166 */
167static int bf5xx_nand_correct_data_256(struct mtd_info *mtd, u_char *dat,
168 u_char *read_ecc, u_char *calc_ecc)
169{
170 struct bf5xx_nand_info *info = mtd_to_nand_info(mtd);
171 u32 syndrome[5];
172 u32 calced, stored;
173 int i;
174 unsigned short failing_bit, failing_byte;
175 u_char data;
176
177 calced = calc_ecc[0] | (calc_ecc[1] << 8) | (calc_ecc[2] << 16);
178 stored = read_ecc[0] | (read_ecc[1] << 8) | (read_ecc[2] << 16);
179
180 syndrome[0] = (calced ^ stored);
181
182 /*
183 * syndrome 0: all zero
184 * No error in data
185 * No action
186 */
187 if (!syndrome[0] || !calced || !stored)
188 return 0;
189
190 /*
191 * sysdrome 0: only one bit is one
192 * ECC data was incorrect
193 * No action
194 */
195 if (hweight32(syndrome[0]) == 1) {
196 dev_err(info->device, "ECC data was incorrect!\n");
197 return 1;
198 }
199
200 syndrome[1] = (calced & 0x7FF) ^ (stored & 0x7FF);
201 syndrome[2] = (calced & 0x7FF) ^ ((calced >> 11) & 0x7FF);
202 syndrome[3] = (stored & 0x7FF) ^ ((stored >> 11) & 0x7FF);
203 syndrome[4] = syndrome[2] ^ syndrome[3];
204
205 for (i = 0; i < 5; i++)
206 dev_info(info->device, "syndrome[%d] 0x%08x\n", i, syndrome[i]);
207
208 dev_info(info->device,
209 "calced[0x%08x], stored[0x%08x]\n",
210 calced, stored);
211
212 /*
213 * sysdrome 0: exactly 11 bits are one, each parity
214 * and parity' pair is 1 & 0 or 0 & 1.
215 * 1-bit correctable error
216 * Correct the error
217 */
218 if (hweight32(syndrome[0]) == 11 && syndrome[4] == 0x7FF) {
219 dev_info(info->device,
220 "1-bit correctable error, correct it.\n");
221 dev_info(info->device,
222 "syndrome[1] 0x%08x\n", syndrome[1]);
223
224 failing_bit = syndrome[1] & 0x7;
225 failing_byte = syndrome[1] >> 0x3;
226 data = *(dat + failing_byte);
227 data = data ^ (0x1 << failing_bit);
228 *(dat + failing_byte) = data;
229
230 return 0;
231 }
232
233 /*
234 * sysdrome 0: random data
235 * More than 1-bit error, non-correctable error
236 * Discard data, mark bad block
237 */
238 dev_err(info->device,
239 "More than 1-bit error, non-correctable error.\n");
240 dev_err(info->device,
241 "Please discard data, mark bad block\n");
242
243 return 1;
244}
245
246static int bf5xx_nand_correct_data(struct mtd_info *mtd, u_char *dat,
247 u_char *read_ecc, u_char *calc_ecc)
248{
249 struct bf5xx_nand_info *info = mtd_to_nand_info(mtd);
250 struct bf5xx_nand_platform *plat = info->platform;
251 unsigned short page_size = (plat->page_size ? 512 : 256);
252 int ret;
253
254 ret = bf5xx_nand_correct_data_256(mtd, dat, read_ecc, calc_ecc);
255
256 /* If page size is 512, correct second 256 bytes */
257 if (page_size == 512) {
258 dat += 256;
259 read_ecc += 8;
260 calc_ecc += 8;
261 ret = bf5xx_nand_correct_data_256(mtd, dat, read_ecc, calc_ecc);
262 }
263
264 return ret;
265}
266
267static void bf5xx_nand_enable_hwecc(struct mtd_info *mtd, int mode)
268{
269 return;
270}
271
272static int bf5xx_nand_calculate_ecc(struct mtd_info *mtd,
273 const u_char *dat, u_char *ecc_code)
274{
275 struct bf5xx_nand_info *info = mtd_to_nand_info(mtd);
276 struct bf5xx_nand_platform *plat = info->platform;
277 u16 page_size = (plat->page_size ? 512 : 256);
278 u16 ecc0, ecc1;
279 u32 code[2];
280 u8 *p;
281 int bytes = 3, i;
282
283 /* first 4 bytes ECC code for 256 page size */
284 ecc0 = bfin_read_NFC_ECC0();
285 ecc1 = bfin_read_NFC_ECC1();
286
287 code[0] = (ecc0 & 0x3FF) | ((ecc1 & 0x3FF) << 11);
288
289 dev_dbg(info->device, "returning ecc 0x%08x\n", code[0]);
290
291 /* second 4 bytes ECC code for 512 page size */
292 if (page_size == 512) {
293 ecc0 = bfin_read_NFC_ECC2();
294 ecc1 = bfin_read_NFC_ECC3();
295 code[1] = (ecc0 & 0x3FF) | ((ecc1 & 0x3FF) << 11);
296 bytes = 6;
297 dev_dbg(info->device, "returning ecc 0x%08x\n", code[1]);
298 }
299
300 p = (u8 *)code;
301 for (i = 0; i < bytes; i++)
302 ecc_code[i] = p[i];
303
304 return 0;
305}
306
307/*
308 * PIO mode for buffer writing and reading
309 */
310static void bf5xx_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
311{
312 int i;
313 unsigned short val;
314
315 /*
316 * Data reads are requested by first writing to NFC_DATA_RD
317 * and then reading back from NFC_READ.
318 */
319 for (i = 0; i < len; i++) {
320 while (bfin_read_NFC_STAT() & WB_FULL)
321 cpu_relax();
322
323 /* Contents do not matter */
324 bfin_write_NFC_DATA_RD(0x0000);
325 SSYNC();
326
327 while ((bfin_read_NFC_IRQSTAT() & RD_RDY) != RD_RDY)
328 cpu_relax();
329
330 buf[i] = bfin_read_NFC_READ();
331
332 val = bfin_read_NFC_IRQSTAT();
333 val |= RD_RDY;
334 bfin_write_NFC_IRQSTAT(val);
335 SSYNC();
336 }
337}
338
339static uint8_t bf5xx_nand_read_byte(struct mtd_info *mtd)
340{
341 uint8_t val;
342
343 bf5xx_nand_read_buf(mtd, &val, 1);
344
345 return val;
346}
347
348static void bf5xx_nand_write_buf(struct mtd_info *mtd,
349 const uint8_t *buf, int len)
350{
351 int i;
352
353 for (i = 0; i < len; i++) {
354 while (bfin_read_NFC_STAT() & WB_FULL)
355 cpu_relax();
356
357 bfin_write_NFC_DATA_WR(buf[i]);
358 SSYNC();
359 }
360}
361
362static void bf5xx_nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
363{
364 int i;
365 u16 *p = (u16 *) buf;
366 len >>= 1;
367
368 /*
369 * Data reads are requested by first writing to NFC_DATA_RD
370 * and then reading back from NFC_READ.
371 */
372 bfin_write_NFC_DATA_RD(0x5555);
373
374 SSYNC();
375
376 for (i = 0; i < len; i++)
377 p[i] = bfin_read_NFC_READ();
378}
379
380static void bf5xx_nand_write_buf16(struct mtd_info *mtd,
381 const uint8_t *buf, int len)
382{
383 int i;
384 u16 *p = (u16 *) buf;
385 len >>= 1;
386
387 for (i = 0; i < len; i++)
388 bfin_write_NFC_DATA_WR(p[i]);
389
390 SSYNC();
391}
392
393/*
394 * DMA functions for buffer writing and reading
395 */
396static irqreturn_t bf5xx_nand_dma_irq(int irq, void *dev_id)
397{
398 struct bf5xx_nand_info *info = dev_id;
399
400 clear_dma_irqstat(CH_NFC);
401 disable_dma(CH_NFC);
402 complete(&info->dma_completion);
403
404 return IRQ_HANDLED;
405}
406
407static int bf5xx_nand_dma_rw(struct mtd_info *mtd,
408 uint8_t *buf, int is_read)
409{
410 struct bf5xx_nand_info *info = mtd_to_nand_info(mtd);
411 struct bf5xx_nand_platform *plat = info->platform;
412 unsigned short page_size = (plat->page_size ? 512 : 256);
413 unsigned short val;
414
415 dev_dbg(info->device, " mtd->%p, buf->%p, is_read %d\n",
416 mtd, buf, is_read);
417
418 /*
419 * Before starting a dma transfer, be sure to invalidate/flush
420 * the cache over the address range of your DMA buffer to
421 * prevent cache coherency problems. Otherwise very subtle bugs
422 * can be introduced to your driver.
423 */
424 if (is_read)
425 invalidate_dcache_range((unsigned int)buf,
426 (unsigned int)(buf + page_size));
427 else
428 flush_dcache_range((unsigned int)buf,
429 (unsigned int)(buf + page_size));
430
431 /*
432 * This register must be written before each page is
433 * transferred to generate the correct ECC register
434 * values.
435 */
436 bfin_write_NFC_RST(0x1);
437 SSYNC();
438
439 disable_dma(CH_NFC);
440 clear_dma_irqstat(CH_NFC);
441
442 /* setup DMA register with Blackfin DMA API */
443 set_dma_config(CH_NFC, 0x0);
444 set_dma_start_addr(CH_NFC, (unsigned long) buf);
445 set_dma_x_count(CH_NFC, (page_size >> 2));
446 set_dma_x_modify(CH_NFC, 4);
447
448 /* setup write or read operation */
449 val = DI_EN | WDSIZE_32;
450 if (is_read)
451 val |= WNR;
452 set_dma_config(CH_NFC, val);
453 enable_dma(CH_NFC);
454
455 /* Start PAGE read/write operation */
456 if (is_read)
457 bfin_write_NFC_PGCTL(0x1);
458 else
459 bfin_write_NFC_PGCTL(0x2);
460 wait_for_completion(&info->dma_completion);
461
462 return 0;
463}
464
465static void bf5xx_nand_dma_read_buf(struct mtd_info *mtd,
466 uint8_t *buf, int len)
467{
468 struct bf5xx_nand_info *info = mtd_to_nand_info(mtd);
469 struct bf5xx_nand_platform *plat = info->platform;
470 unsigned short page_size = (plat->page_size ? 512 : 256);
471
472 dev_dbg(info->device, "mtd->%p, buf->%p, int %d\n", mtd, buf, len);
473
474 if (len == page_size)
475 bf5xx_nand_dma_rw(mtd, buf, 1);
476 else
477 bf5xx_nand_read_buf(mtd, buf, len);
478}
479
480static void bf5xx_nand_dma_write_buf(struct mtd_info *mtd,
481 const uint8_t *buf, int len)
482{
483 struct bf5xx_nand_info *info = mtd_to_nand_info(mtd);
484 struct bf5xx_nand_platform *plat = info->platform;
485 unsigned short page_size = (plat->page_size ? 512 : 256);
486
487 dev_dbg(info->device, "mtd->%p, buf->%p, len %d\n", mtd, buf, len);
488
489 if (len == page_size)
490 bf5xx_nand_dma_rw(mtd, (uint8_t *)buf, 0);
491 else
492 bf5xx_nand_write_buf(mtd, buf, len);
493}
494
495/*
496 * System initialization functions
497 */
498
499static int bf5xx_nand_dma_init(struct bf5xx_nand_info *info)
500{
501 int ret;
502 unsigned short val;
503
504 /* Do not use dma */
505 if (!hardware_ecc)
506 return 0;
507
508 init_completion(&info->dma_completion);
509
510 /* Setup DMAC1 channel mux for NFC which shared with SDH */
511 val = bfin_read_DMAC1_PERIMUX();
512 val &= 0xFFFE;
513 bfin_write_DMAC1_PERIMUX(val);
514 SSYNC();
515
516 /* Request NFC DMA channel */
517 ret = request_dma(CH_NFC, "BF5XX NFC driver");
518 if (ret < 0) {
519 dev_err(info->device, " unable to get DMA channel\n");
520 return ret;
521 }
522
523 set_dma_callback(CH_NFC, (void *) bf5xx_nand_dma_irq, (void *) info);
524
525 /* Turn off the DMA channel first */
526 disable_dma(CH_NFC);
527 return 0;
528}
529
530/*
531 * BF5XX NFC hardware initialization
532 * - pin mux setup
533 * - clear interrupt status
534 */
535static int bf5xx_nand_hw_init(struct bf5xx_nand_info *info)
536{
537 int err = 0;
538 unsigned short val;
539 struct bf5xx_nand_platform *plat = info->platform;
540
541 /* setup NFC_CTL register */
542 dev_info(info->device,
543 "page_size=%d, data_width=%d, wr_dly=%d, rd_dly=%d\n",
544 (plat->page_size ? 512 : 256),
545 (plat->data_width ? 16 : 8),
546 plat->wr_dly, plat->rd_dly);
547
548 val = (plat->page_size << NFC_PG_SIZE_OFFSET) |
549 (plat->data_width << NFC_NWIDTH_OFFSET) |
550 (plat->rd_dly << NFC_RDDLY_OFFSET) |
551 (plat->rd_dly << NFC_WRDLY_OFFSET);
552 dev_dbg(info->device, "NFC_CTL is 0x%04x\n", val);
553
554 bfin_write_NFC_CTL(val);
555 SSYNC();
556
557 /* clear interrupt status */
558 bfin_write_NFC_IRQMASK(0x0);
559 SSYNC();
560 val = bfin_read_NFC_IRQSTAT();
561 bfin_write_NFC_IRQSTAT(val);
562 SSYNC();
563
564 if (peripheral_request_list(bfin_nfc_pin_req, DRV_NAME)) {
565 printk(KERN_ERR DRV_NAME
566 ": Requesting Peripherals failed\n");
567 return -EFAULT;
568 }
569
570 /* DMA initialization */
571 if (bf5xx_nand_dma_init(info))
572 err = -ENXIO;
573
574 return err;
575}
576
577/*
578 * Device management interface
579 */
580static int bf5xx_nand_add_partition(struct bf5xx_nand_info *info)
581{
582 struct mtd_info *mtd = &info->mtd;
583
584#ifdef CONFIG_MTD_PARTITIONS
585 struct mtd_partition *parts = info->platform->partitions;
586 int nr = info->platform->nr_partitions;
587
588 return add_mtd_partitions(mtd, parts, nr);
589#else
590 return add_mtd_device(mtd);
591#endif
592}
593
594static int bf5xx_nand_remove(struct platform_device *pdev)
595{
596 struct bf5xx_nand_info *info = to_nand_info(pdev);
597 struct mtd_info *mtd = NULL;
598
599 platform_set_drvdata(pdev, NULL);
600
601 /* first thing we need to do is release all our mtds
602 * and their partitions, then go through freeing the
603 * resources used
604 */
605 mtd = &info->mtd;
606 if (mtd) {
607 nand_release(mtd);
608 kfree(mtd);
609 }
610
611 peripheral_free_list(bfin_nfc_pin_req);
612
613 /* free the common resources */
614 kfree(info);
615
616 return 0;
617}
618
619/*
620 * bf5xx_nand_probe
621 *
622 * called by device layer when it finds a device matching
623 * one our driver can handled. This code checks to see if
624 * it can allocate all necessary resources then calls the
625 * nand layer to look for devices
626 */
627static int bf5xx_nand_probe(struct platform_device *pdev)
628{
629 struct bf5xx_nand_platform *plat = to_nand_plat(pdev);
630 struct bf5xx_nand_info *info = NULL;
631 struct nand_chip *chip = NULL;
632 struct mtd_info *mtd = NULL;
633 int err = 0;
634
635 dev_dbg(&pdev->dev, "(%p)\n", pdev);
636
637 if (!plat) {
638 dev_err(&pdev->dev, "no platform specific information\n");
639 goto exit_error;
640 }
641
642 info = kzalloc(sizeof(*info), GFP_KERNEL);
643 if (info == NULL) {
644 dev_err(&pdev->dev, "no memory for flash info\n");
645 err = -ENOMEM;
646 goto exit_error;
647 }
648
649 platform_set_drvdata(pdev, info);
650
651 spin_lock_init(&info->controller.lock);
652 init_waitqueue_head(&info->controller.wq);
653
654 info->device = &pdev->dev;
655 info->platform = plat;
656
657 /* initialise chip data struct */
658 chip = &info->chip;
659
660 if (plat->data_width)
661 chip->options |= NAND_BUSWIDTH_16;
662
663 chip->options |= NAND_CACHEPRG | NAND_SKIP_BBTSCAN;
664
665 chip->read_buf = (plat->data_width) ?
666 bf5xx_nand_read_buf16 : bf5xx_nand_read_buf;
667 chip->write_buf = (plat->data_width) ?
668 bf5xx_nand_write_buf16 : bf5xx_nand_write_buf;
669
670 chip->read_byte = bf5xx_nand_read_byte;
671
672 chip->cmd_ctrl = bf5xx_nand_hwcontrol;
673 chip->dev_ready = bf5xx_nand_devready;
674
675 chip->priv = &info->mtd;
676 chip->controller = &info->controller;
677
678 chip->IO_ADDR_R = (void __iomem *) NFC_READ;
679 chip->IO_ADDR_W = (void __iomem *) NFC_DATA_WR;
680
681 chip->chip_delay = 0;
682
683 /* initialise mtd info data struct */
684 mtd = &info->mtd;
685 mtd->priv = chip;
686 mtd->owner = THIS_MODULE;
687
688 /* initialise the hardware */
689 err = bf5xx_nand_hw_init(info);
690 if (err != 0)
691 goto exit_error;
692
693 /* setup hardware ECC data struct */
694 if (hardware_ecc) {
695 if (plat->page_size == NFC_PG_SIZE_256) {
696 chip->ecc.bytes = 3;
697 chip->ecc.size = 256;
698 } else if (plat->page_size == NFC_PG_SIZE_512) {
699 chip->ecc.bytes = 6;
700 chip->ecc.size = 512;
701 }
702
703 chip->read_buf = bf5xx_nand_dma_read_buf;
704 chip->write_buf = bf5xx_nand_dma_write_buf;
705 chip->ecc.calculate = bf5xx_nand_calculate_ecc;
706 chip->ecc.correct = bf5xx_nand_correct_data;
707 chip->ecc.mode = NAND_ECC_HW;
708 chip->ecc.hwctl = bf5xx_nand_enable_hwecc;
709 } else {
710 chip->ecc.mode = NAND_ECC_SOFT;
711 }
712
713 /* scan hardware nand chip and setup mtd info data struct */
714 if (nand_scan(mtd, 1)) {
715 err = -ENXIO;
716 goto exit_error;
717 }
718
719 /* add NAND partition */
720 bf5xx_nand_add_partition(info);
721
722 dev_dbg(&pdev->dev, "initialised ok\n");
723 return 0;
724
725exit_error:
726 bf5xx_nand_remove(pdev);
727
728 if (err == 0)
729 err = -EINVAL;
730 return err;
731}
732
733/* PM Support */
734#ifdef CONFIG_PM
735
736static int bf5xx_nand_suspend(struct platform_device *dev, pm_message_t pm)
737{
738 struct bf5xx_nand_info *info = platform_get_drvdata(dev);
739
740 return 0;
741}
742
743static int bf5xx_nand_resume(struct platform_device *dev)
744{
745 struct bf5xx_nand_info *info = platform_get_drvdata(dev);
746
747 if (info)
748 bf5xx_nand_hw_init(info);
749
750 return 0;
751}
752
753#else
754#define bf5xx_nand_suspend NULL
755#define bf5xx_nand_resume NULL
756#endif
757
758/* driver device registration */
759static struct platform_driver bf5xx_nand_driver = {
760 .probe = bf5xx_nand_probe,
761 .remove = bf5xx_nand_remove,
762 .suspend = bf5xx_nand_suspend,
763 .resume = bf5xx_nand_resume,
764 .driver = {
765 .name = DRV_NAME,
766 .owner = THIS_MODULE,
767 },
768};
769
770static int __init bf5xx_nand_init(void)
771{
772 printk(KERN_INFO "%s, Version %s (c) 2007 Analog Devices, Inc.\n",
773 DRV_DESC, DRV_VERSION);
774
775 return platform_driver_register(&bf5xx_nand_driver);
776}
777
778static void __exit bf5xx_nand_exit(void)
779{
780 platform_driver_unregister(&bf5xx_nand_driver);
781}
782
783module_init(bf5xx_nand_init);
784module_exit(bf5xx_nand_exit);
785
786MODULE_LICENSE("GPL");
787MODULE_AUTHOR(DRV_AUTHOR);
788MODULE_DESCRIPTION(DRV_DESC);
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 6f32a35eb106..e2832d0b9899 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -623,6 +623,11 @@ static int __devinit cafe_nand_probe(struct pci_dev *pdev,
623 uint32_t ctrl; 623 uint32_t ctrl;
624 int err = 0; 624 int err = 0;
625 625
626 /* Very old versions shared the same PCI ident for all three
627 functions on the chip. Verify the class too... */
628 if ((pdev->class >> 8) != PCI_CLASS_MEMORY_FLASH)
629 return -ENODEV;
630
626 err = pci_enable_device(pdev); 631 err = pci_enable_device(pdev);
627 if (err) 632 if (err)
628 return err; 633 return err;
@@ -816,21 +821,57 @@ static void __devexit cafe_nand_remove(struct pci_dev *pdev)
816} 821}
817 822
818static struct pci_device_id cafe_nand_tbl[] = { 823static struct pci_device_id cafe_nand_tbl[] = {
819 { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 }, 824 { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID },
820 { 0, } 825 { }
821}; 826};
822 827
823MODULE_DEVICE_TABLE(pci, cafe_nand_tbl); 828MODULE_DEVICE_TABLE(pci, cafe_nand_tbl);
824 829
830static int cafe_nand_resume(struct pci_dev *pdev)
831{
832 uint32_t ctrl;
833 struct mtd_info *mtd = pci_get_drvdata(pdev);
834 struct cafe_priv *cafe = mtd->priv;
835
836 /* Start off by resetting the NAND controller completely */
837 cafe_writel(cafe, 1, NAND_RESET);
838 cafe_writel(cafe, 0, NAND_RESET);
839 cafe_writel(cafe, 0xffffffff, NAND_IRQ_MASK);
840
841 /* Restore timing configuration */
842 cafe_writel(cafe, timing[0], NAND_TIMING1);
843 cafe_writel(cafe, timing[1], NAND_TIMING2);
844 cafe_writel(cafe, timing[2], NAND_TIMING3);
845
846 /* Disable master reset, enable NAND clock */
847 ctrl = cafe_readl(cafe, GLOBAL_CTRL);
848 ctrl &= 0xffffeff0;
849 ctrl |= 0x00007000;
850 cafe_writel(cafe, ctrl | 0x05, GLOBAL_CTRL);
851 cafe_writel(cafe, ctrl | 0x0a, GLOBAL_CTRL);
852 cafe_writel(cafe, 0, NAND_DMA_CTRL);
853 cafe_writel(cafe, 0x7006, GLOBAL_CTRL);
854 cafe_writel(cafe, 0x700a, GLOBAL_CTRL);
855
856 /* Set up DMA address */
857 cafe_writel(cafe, cafe->dmaaddr & 0xffffffff, NAND_DMA_ADDR0);
858 if (sizeof(cafe->dmaaddr) > 4)
859 /* Shift in two parts to shut the compiler up */
860 cafe_writel(cafe, (cafe->dmaaddr >> 16) >> 16, NAND_DMA_ADDR1);
861 else
862 cafe_writel(cafe, 0, NAND_DMA_ADDR1);
863
864 /* Enable NAND IRQ in global IRQ mask register */
865 cafe_writel(cafe, 0x80000007, GLOBAL_IRQ_MASK);
866 return 0;
867}
868
825static struct pci_driver cafe_nand_pci_driver = { 869static struct pci_driver cafe_nand_pci_driver = {
826 .name = "CAFÉ NAND", 870 .name = "CAFÉ NAND",
827 .id_table = cafe_nand_tbl, 871 .id_table = cafe_nand_tbl,
828 .probe = cafe_nand_probe, 872 .probe = cafe_nand_probe,
829 .remove = __devexit_p(cafe_nand_remove), 873 .remove = __devexit_p(cafe_nand_remove),
830#ifdef CONFIG_PMx
831 .suspend = cafe_nand_suspend,
832 .resume = cafe_nand_resume, 874 .resume = cafe_nand_resume,
833#endif
834}; 875};
835 876
836static int cafe_nand_init(void) 877static int cafe_nand_init(void)
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index e96259f22cca..ab9f5c5db38d 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -56,8 +56,6 @@ static unsigned long __initdata doc_locations[] = {
56#endif /* CONFIG_MTD_DOCPROBE_HIGH */ 56#endif /* CONFIG_MTD_DOCPROBE_HIGH */
57#elif defined(__PPC__) 57#elif defined(__PPC__)
58 0xe4000000, 58 0xe4000000,
59#elif defined(CONFIG_MOMENCO_OCELOT_G)
60 0xff000000,
61#else 59#else
62#warning Unknown architecture for DiskOnChip. No default probe locations defined 60#warning Unknown architecture for DiskOnChip. No default probe locations defined
63#endif 61#endif
diff --git a/drivers/mtd/nand/excite_nandflash.c b/drivers/mtd/nand/excite_nandflash.c
index 7e9afc4c7757..bed87290decc 100644
--- a/drivers/mtd/nand/excite_nandflash.c
+++ b/drivers/mtd/nand/excite_nandflash.c
@@ -27,7 +27,6 @@
27#include <linux/platform_device.h> 27#include <linux/platform_device.h>
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/err.h> 29#include <linux/err.h>
30#include <linux/kernel.h>
31 30
32#include <linux/mtd/mtd.h> 31#include <linux/mtd/mtd.h>
33#include <linux/mtd/nand.h> 32#include <linux/mtd/nand.h>
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 24ac6778b1a8..b4e0e7723894 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -7,7 +7,7 @@
7 * Basic support for AG-AND chips is provided. 7 * Basic support for AG-AND chips is provided.
8 * 8 *
9 * Additional technical information is available on 9 * Additional technical information is available on
10 * http://www.linux-mtd.infradead.org/tech/nand.html 10 * http://www.linux-mtd.infradead.org/doc/nand.html
11 * 11 *
12 * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com) 12 * Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com)
13 * 2002-2006 Thomas Gleixner (tglx@linutronix.de) 13 * 2002-2006 Thomas Gleixner (tglx@linutronix.de)
@@ -2069,13 +2069,14 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
2069 erase_exit: 2069 erase_exit:
2070 2070
2071 ret = instr->state == MTD_ERASE_DONE ? 0 : -EIO; 2071 ret = instr->state == MTD_ERASE_DONE ? 0 : -EIO;
2072 /* Do call back function */
2073 if (!ret)
2074 mtd_erase_callback(instr);
2075 2072
2076 /* Deselect and wake up anyone waiting on the device */ 2073 /* Deselect and wake up anyone waiting on the device */
2077 nand_release_device(mtd); 2074 nand_release_device(mtd);
2078 2075
2076 /* Do call back function */
2077 if (!ret)
2078 mtd_erase_callback(instr);
2079
2079 /* 2080 /*
2080 * If BBT requires refresh and erase was successful, rewrite any 2081 * If BBT requires refresh and erase was successful, rewrite any
2081 * selected bad block tables 2082 * selected bad block tables
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 2fc674a190cf..a3e3ab0185d5 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -141,6 +141,7 @@ struct nand_manufacturers nand_manuf_ids[] = {
141 {NAND_MFR_STMICRO, "ST Micro"}, 141 {NAND_MFR_STMICRO, "ST Micro"},
142 {NAND_MFR_HYNIX, "Hynix"}, 142 {NAND_MFR_HYNIX, "Hynix"},
143 {NAND_MFR_MICRON, "Micron"}, 143 {NAND_MFR_MICRON, "Micron"},
144 {NAND_MFR_AMD, "AMD"},
144 {0x0, "Unknown"} 145 {0x0, "Unknown"}
145}; 146};
146 147
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 205df0f771fe..a7574807dc46 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -1272,7 +1272,13 @@ static int prog_page(struct nandsim *ns, int num)
1272 mypage = NS_GET_PAGE(ns); 1272 mypage = NS_GET_PAGE(ns);
1273 if (mypage->byte == NULL) { 1273 if (mypage->byte == NULL) {
1274 NS_DBG("prog_page: allocating page %d\n", ns->regs.row); 1274 NS_DBG("prog_page: allocating page %d\n", ns->regs.row);
1275 mypage->byte = kmalloc(ns->geom.pgszoob, GFP_KERNEL); 1275 /*
1276 * We allocate memory with GFP_NOFS because a flash FS may
1277 * utilize this. If it is holding an FS lock, then gets here,
1278 * then kmalloc runs writeback which goes to the FS again
1279 * and deadlocks. This was seen in practice.
1280 */
1281 mypage->byte = kmalloc(ns->geom.pgszoob, GFP_NOFS);
1276 if (mypage->byte == NULL) { 1282 if (mypage->byte == NULL) {
1277 NS_ERR("prog_page: error allocating memory for page %d\n", ns->regs.row); 1283 NS_ERR("prog_page: error allocating memory for page %d\n", ns->regs.row);
1278 return -1; 1284 return -1;
diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c
index fd7a8d5ba29a..1c0e89f00e8d 100644
--- a/drivers/mtd/nand/ndfc.c
+++ b/drivers/mtd/nand/ndfc.c
@@ -24,7 +24,11 @@
24#include <linux/platform_device.h> 24#include <linux/platform_device.h>
25 25
26#include <asm/io.h> 26#include <asm/io.h>
27#ifdef CONFIG_40x
28#include <asm/ibm405.h>
29#else
27#include <asm/ibm44x.h> 30#include <asm/ibm44x.h>
31#endif
28 32
29struct ndfc_nand_mtd { 33struct ndfc_nand_mtd {
30 struct mtd_info mtd; 34 struct mtd_info mtd;
@@ -230,7 +234,11 @@ static int ndfc_nand_probe(struct platform_device *pdev)
230 struct ndfc_controller *ndfc = &ndfc_ctrl; 234 struct ndfc_controller *ndfc = &ndfc_ctrl;
231 unsigned long long phys = settings->ndfc_erpn | res->start; 235 unsigned long long phys = settings->ndfc_erpn | res->start;
232 236
237#ifndef CONFIG_PHYS_64BIT
238 ndfc->ndfcbase = ioremap((phys_addr_t)phys, res->end - res->start + 1);
239#else
233 ndfc->ndfcbase = ioremap64(phys, res->end - res->start + 1); 240 ndfc->ndfcbase = ioremap64(phys, res->end - res->start + 1);
241#endif
234 if (!ndfc->ndfcbase) { 242 if (!ndfc->ndfcbase) {
235 printk(KERN_ERR "NDFC: ioremap failed\n"); 243 printk(KERN_ERR "NDFC: ioremap failed\n");
236 return -EIO; 244 return -EIO;
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 5fac4c421a20..b79a9cf2d162 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -60,8 +60,8 @@
60 60
61#include <asm/io.h> 61#include <asm/io.h>
62 62
63#include <asm/arch/regs-nand.h> 63#include <asm/plat-s3c/regs-nand.h>
64#include <asm/arch/nand.h> 64#include <asm/plat-s3c/nand.h>
65 65
66#ifdef CONFIG_MTD_NAND_S3C2410_HWECC 66#ifdef CONFIG_MTD_NAND_S3C2410_HWECC
67static int hardware_ecc = 1; 67static int hardware_ecc = 1;
diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index c257d397d08a..cb41cbca64f7 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -40,4 +40,27 @@ config MTD_ONENAND_OTP
40 40
41 OTP block is fully-guaranteed to be a valid block. 41 OTP block is fully-guaranteed to be a valid block.
42 42
43config MTD_ONENAND_2X_PROGRAM
44 bool "OneNAND 2X program support"
45 help
46 The 2X Program is an extension of Program Operation.
47 Since the device is equipped with two DataRAMs, and two-plane NAND
48 Flash memory array, these two component enables simultaneous program
49 of 4KiB. Plane1 has only even blocks such as block0, block2, block4
50 while Plane2 has only odd blocks such as block1, block3, block5.
51 So MTD regards it as 4KiB page size and 256KiB block size
52
53 Now the following chips support it. (KFXXX16Q2M)
54 Demux: KFG2G16Q2M, KFH4G16Q2M, KFW8G16Q2M,
55 Mux: KFM2G16Q2M, KFN4G16Q2M,
56
57 And more recent chips
58
59config MTD_ONENAND_SIM
60 tristate "OneNAND simulator support"
61 depends on MTD_PARTITIONS
62 help
63 The simulator may simulate various OneNAND flash chips for the
64 OneNAND MTD layer.
65
43endif # MTD_ONENAND 66endif # MTD_ONENAND
diff --git a/drivers/mtd/onenand/Makefile b/drivers/mtd/onenand/Makefile
index 269cfe467345..4d2eacfd7e11 100644
--- a/drivers/mtd/onenand/Makefile
+++ b/drivers/mtd/onenand/Makefile
@@ -8,4 +8,7 @@ obj-$(CONFIG_MTD_ONENAND) += onenand.o
8# Board specific. 8# Board specific.
9obj-$(CONFIG_MTD_ONENAND_GENERIC) += generic.o 9obj-$(CONFIG_MTD_ONENAND_GENERIC) += generic.o
10 10
11# Simulator
12obj-$(CONFIG_MTD_ONENAND_SIM) += onenand_sim.o
13
11onenand-objs = onenand_base.o onenand_bbt.o 14onenand-objs = onenand_base.o onenand_bbt.o
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 0537fac8de74..b2c40f67db83 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -206,6 +206,15 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
206 default: 206 default:
207 block = (int) (addr >> this->erase_shift); 207 block = (int) (addr >> this->erase_shift);
208 page = (int) (addr >> this->page_shift); 208 page = (int) (addr >> this->page_shift);
209
210 if (ONENAND_IS_2PLANE(this)) {
211 /* Make the even block number */
212 block &= ~1;
213 /* Is it the odd plane? */
214 if (addr & this->writesize)
215 block++;
216 page >>= 1;
217 }
209 page &= this->page_mask; 218 page &= this->page_mask;
210 break; 219 break;
211 } 220 }
@@ -216,8 +225,12 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
216 value = onenand_bufferram_address(this, block); 225 value = onenand_bufferram_address(this, block);
217 this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2); 226 this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2);
218 227
219 /* Switch to the next data buffer */ 228 if (ONENAND_IS_2PLANE(this))
220 ONENAND_SET_NEXT_BUFFERRAM(this); 229 /* It is always BufferRAM0 */
230 ONENAND_SET_BUFFERRAM0(this);
231 else
232 /* Switch to the next data buffer */
233 ONENAND_SET_NEXT_BUFFERRAM(this);
221 234
222 return 0; 235 return 0;
223 } 236 }
@@ -247,6 +260,8 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
247 break; 260 break;
248 261
249 default: 262 default:
263 if (ONENAND_IS_2PLANE(this) && cmd == ONENAND_CMD_PROG)
264 cmd = ONENAND_CMD_2X_PROG;
250 dataram = ONENAND_CURRENT_BUFFERRAM(this); 265 dataram = ONENAND_CURRENT_BUFFERRAM(this);
251 break; 266 break;
252 } 267 }
@@ -318,12 +333,14 @@ static int onenand_wait(struct mtd_info *mtd, int state)
318 if (interrupt & ONENAND_INT_READ) { 333 if (interrupt & ONENAND_INT_READ) {
319 int ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS); 334 int ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS);
320 if (ecc) { 335 if (ecc) {
321 printk(KERN_ERR "onenand_wait: ECC error = 0x%04x\n", ecc);
322 if (ecc & ONENAND_ECC_2BIT_ALL) { 336 if (ecc & ONENAND_ECC_2BIT_ALL) {
337 printk(KERN_ERR "onenand_wait: ECC error = 0x%04x\n", ecc);
323 mtd->ecc_stats.failed++; 338 mtd->ecc_stats.failed++;
324 return ecc; 339 return ecc;
325 } else if (ecc & ONENAND_ECC_1BIT_ALL) 340 } else if (ecc & ONENAND_ECC_1BIT_ALL) {
341 printk(KERN_INFO "onenand_wait: correctable ECC error = 0x%04x\n", ecc);
326 mtd->ecc_stats.corrected++; 342 mtd->ecc_stats.corrected++;
343 }
327 } 344 }
328 } else if (state == FL_READING) { 345 } else if (state == FL_READING) {
329 printk(KERN_ERR "onenand_wait: read timeout! ctrl=0x%04x intr=0x%04x\n", ctrl, interrupt); 346 printk(KERN_ERR "onenand_wait: read timeout! ctrl=0x%04x intr=0x%04x\n", ctrl, interrupt);
@@ -445,8 +462,9 @@ static inline int onenand_bufferram_offset(struct mtd_info *mtd, int area)
445 struct onenand_chip *this = mtd->priv; 462 struct onenand_chip *this = mtd->priv;
446 463
447 if (ONENAND_CURRENT_BUFFERRAM(this)) { 464 if (ONENAND_CURRENT_BUFFERRAM(this)) {
465 /* Note: the 'this->writesize' is a real page size */
448 if (area == ONENAND_DATARAM) 466 if (area == ONENAND_DATARAM)
449 return mtd->writesize; 467 return this->writesize;
450 if (area == ONENAND_SPARERAM) 468 if (area == ONENAND_SPARERAM)
451 return mtd->oobsize; 469 return mtd->oobsize;
452 } 470 }
@@ -572,6 +590,30 @@ static int onenand_write_bufferram(struct mtd_info *mtd, int area,
572} 590}
573 591
574/** 592/**
593 * onenand_get_2x_blockpage - [GENERIC] Get blockpage at 2x program mode
594 * @param mtd MTD data structure
595 * @param addr address to check
596 * @return blockpage address
597 *
598 * Get blockpage address at 2x program mode
599 */
600static int onenand_get_2x_blockpage(struct mtd_info *mtd, loff_t addr)
601{
602 struct onenand_chip *this = mtd->priv;
603 int blockpage, block, page;
604
605 /* Calculate the even block number */
606 block = (int) (addr >> this->erase_shift) & ~1;
607 /* Is it the odd plane? */
608 if (addr & this->writesize)
609 block++;
610 page = (int) (addr >> (this->page_shift + 1)) & this->page_mask;
611 blockpage = (block << 7) | page;
612
613 return blockpage;
614}
615
616/**
575 * onenand_check_bufferram - [GENERIC] Check BufferRAM information 617 * onenand_check_bufferram - [GENERIC] Check BufferRAM information
576 * @param mtd MTD data structure 618 * @param mtd MTD data structure
577 * @param addr address to check 619 * @param addr address to check
@@ -585,7 +627,10 @@ static int onenand_check_bufferram(struct mtd_info *mtd, loff_t addr)
585 int blockpage, found = 0; 627 int blockpage, found = 0;
586 unsigned int i; 628 unsigned int i;
587 629
588 blockpage = (int) (addr >> this->page_shift); 630 if (ONENAND_IS_2PLANE(this))
631 blockpage = onenand_get_2x_blockpage(mtd, addr);
632 else
633 blockpage = (int) (addr >> this->page_shift);
589 634
590 /* Is there valid data? */ 635 /* Is there valid data? */
591 i = ONENAND_CURRENT_BUFFERRAM(this); 636 i = ONENAND_CURRENT_BUFFERRAM(this);
@@ -625,7 +670,10 @@ static void onenand_update_bufferram(struct mtd_info *mtd, loff_t addr,
625 int blockpage; 670 int blockpage;
626 unsigned int i; 671 unsigned int i;
627 672
628 blockpage = (int) (addr >> this->page_shift); 673 if (ONENAND_IS_2PLANE(this))
674 blockpage = onenand_get_2x_blockpage(mtd, addr);
675 else
676 blockpage = (int) (addr >> this->page_shift);
629 677
630 /* Invalidate another BufferRAM */ 678 /* Invalidate another BufferRAM */
631 i = ONENAND_NEXT_BUFFERRAM(this); 679 i = ONENAND_NEXT_BUFFERRAM(this);
@@ -717,36 +765,86 @@ static void onenand_release_device(struct mtd_info *mtd)
717} 765}
718 766
719/** 767/**
720 * onenand_read - [MTD Interface] Read data from flash 768 * onenand_transfer_auto_oob - [Internal] oob auto-placement transfer
769 * @param mtd MTD device structure
770 * @param buf destination address
771 * @param column oob offset to read from
772 * @param thislen oob length to read
773 */
774static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int column,
775 int thislen)
776{
777 struct onenand_chip *this = mtd->priv;
778 struct nand_oobfree *free;
779 int readcol = column;
780 int readend = column + thislen;
781 int lastgap = 0;
782 unsigned int i;
783 uint8_t *oob_buf = this->oob_buf;
784
785 free = this->ecclayout->oobfree;
786 for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) {
787 if (readcol >= lastgap)
788 readcol += free->offset - lastgap;
789 if (readend >= lastgap)
790 readend += free->offset - lastgap;
791 lastgap = free->offset + free->length;
792 }
793 this->read_bufferram(mtd, ONENAND_SPARERAM, oob_buf, 0, mtd->oobsize);
794 free = this->ecclayout->oobfree;
795 for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) {
796 int free_end = free->offset + free->length;
797 if (free->offset < readend && free_end > readcol) {
798 int st = max_t(int,free->offset,readcol);
799 int ed = min_t(int,free_end,readend);
800 int n = ed - st;
801 memcpy(buf, oob_buf + st, n);
802 buf += n;
803 } else if (column == 0)
804 break;
805 }
806 return 0;
807}
808
809/**
810 * onenand_read_ops_nolock - [OneNAND Interface] OneNAND read main and/or out-of-band
721 * @param mtd MTD device structure 811 * @param mtd MTD device structure
722 * @param from offset to read from 812 * @param from offset to read from
723 * @param len number of bytes to read 813 * @param ops: oob operation description structure
724 * @param retlen pointer to variable to store the number of read bytes
725 * @param buf the databuffer to put data
726 * 814 *
727 * Read with ecc 815 * OneNAND read main and/or out-of-band data
728*/ 816 */
729static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len, 817static int onenand_read_ops_nolock(struct mtd_info *mtd, loff_t from,
730 size_t *retlen, u_char *buf) 818 struct mtd_oob_ops *ops)
731{ 819{
732 struct onenand_chip *this = mtd->priv; 820 struct onenand_chip *this = mtd->priv;
733 struct mtd_ecc_stats stats; 821 struct mtd_ecc_stats stats;
734 int read = 0, column; 822 size_t len = ops->len;
735 int thislen; 823 size_t ooblen = ops->ooblen;
824 u_char *buf = ops->datbuf;
825 u_char *oobbuf = ops->oobbuf;
826 int read = 0, column, thislen;
827 int oobread = 0, oobcolumn, thisooblen, oobsize;
736 int ret = 0, boundary = 0; 828 int ret = 0, boundary = 0;
829 int writesize = this->writesize;
737 830
738 DEBUG(MTD_DEBUG_LEVEL3, "onenand_read: from = 0x%08x, len = %i\n", (unsigned int) from, (int) len); 831 DEBUG(MTD_DEBUG_LEVEL3, "onenand_read_ops_nolock: from = 0x%08x, len = %i\n", (unsigned int) from, (int) len);
832
833 if (ops->mode == MTD_OOB_AUTO)
834 oobsize = this->ecclayout->oobavail;
835 else
836 oobsize = mtd->oobsize;
837
838 oobcolumn = from & (mtd->oobsize - 1);
739 839
740 /* Do not allow reads past end of device */ 840 /* Do not allow reads past end of device */
741 if ((from + len) > mtd->size) { 841 if ((from + len) > mtd->size) {
742 printk(KERN_ERR "onenand_read: Attempt read beyond end of device\n"); 842 printk(KERN_ERR "onenand_read_ops_nolock: Attempt read beyond end of device\n");
743 *retlen = 0; 843 ops->retlen = 0;
844 ops->oobretlen = 0;
744 return -EINVAL; 845 return -EINVAL;
745 } 846 }
746 847
747 /* Grab the lock and see if the device is available */
748 onenand_get_device(mtd, FL_READING);
749
750 stats = mtd->ecc_stats; 848 stats = mtd->ecc_stats;
751 849
752 /* Read-while-load method */ 850 /* Read-while-load method */
@@ -754,22 +852,22 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
754 /* Do first load to bufferRAM */ 852 /* Do first load to bufferRAM */
755 if (read < len) { 853 if (read < len) {
756 if (!onenand_check_bufferram(mtd, from)) { 854 if (!onenand_check_bufferram(mtd, from)) {
757 this->command(mtd, ONENAND_CMD_READ, from, mtd->writesize); 855 this->command(mtd, ONENAND_CMD_READ, from, writesize);
758 ret = this->wait(mtd, FL_READING); 856 ret = this->wait(mtd, FL_READING);
759 onenand_update_bufferram(mtd, from, !ret); 857 onenand_update_bufferram(mtd, from, !ret);
760 } 858 }
761 } 859 }
762 860
763 thislen = min_t(int, mtd->writesize, len - read); 861 thislen = min_t(int, writesize, len - read);
764 column = from & (mtd->writesize - 1); 862 column = from & (writesize - 1);
765 if (column + thislen > mtd->writesize) 863 if (column + thislen > writesize)
766 thislen = mtd->writesize - column; 864 thislen = writesize - column;
767 865
768 while (!ret) { 866 while (!ret) {
769 /* If there is more to load then start next load */ 867 /* If there is more to load then start next load */
770 from += thislen; 868 from += thislen;
771 if (read + thislen < len) { 869 if (read + thislen < len) {
772 this->command(mtd, ONENAND_CMD_READ, from, mtd->writesize); 870 this->command(mtd, ONENAND_CMD_READ, from, writesize);
773 /* 871 /*
774 * Chip boundary handling in DDP 872 * Chip boundary handling in DDP
775 * Now we issued chip 1 read and pointed chip 1 873 * Now we issued chip 1 read and pointed chip 1
@@ -785,6 +883,21 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
785 } 883 }
786 /* While load is going, read from last bufferRAM */ 884 /* While load is going, read from last bufferRAM */
787 this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen); 885 this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen);
886
887 /* Read oob area if needed */
888 if (oobbuf) {
889 thisooblen = oobsize - oobcolumn;
890 thisooblen = min_t(int, thisooblen, ooblen - oobread);
891
892 if (ops->mode == MTD_OOB_AUTO)
893 onenand_transfer_auto_oob(mtd, oobbuf, oobcolumn, thisooblen);
894 else
895 this->read_bufferram(mtd, ONENAND_SPARERAM, oobbuf, oobcolumn, thisooblen);
896 oobread += thisooblen;
897 oobbuf += thisooblen;
898 oobcolumn = 0;
899 }
900
788 /* See if we are done */ 901 /* See if we are done */
789 read += thislen; 902 read += thislen;
790 if (read == len) 903 if (read == len)
@@ -794,7 +907,7 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
794 this->write_word(ONENAND_DDP_CHIP1, this->base + ONENAND_REG_START_ADDRESS2); 907 this->write_word(ONENAND_DDP_CHIP1, this->base + ONENAND_REG_START_ADDRESS2);
795 ONENAND_SET_NEXT_BUFFERRAM(this); 908 ONENAND_SET_NEXT_BUFFERRAM(this);
796 buf += thislen; 909 buf += thislen;
797 thislen = min_t(int, mtd->writesize, len - read); 910 thislen = min_t(int, writesize, len - read);
798 column = 0; 911 column = 0;
799 cond_resched(); 912 cond_resched();
800 /* Now wait for load */ 913 /* Now wait for load */
@@ -802,15 +915,13 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
802 onenand_update_bufferram(mtd, from, !ret); 915 onenand_update_bufferram(mtd, from, !ret);
803 } 916 }
804 917
805 /* Deselect and wake up anyone waiting on the device */
806 onenand_release_device(mtd);
807
808 /* 918 /*
809 * Return success, if no ECC failures, else -EBADMSG 919 * Return success, if no ECC failures, else -EBADMSG
810 * fs driver will take care of that, because 920 * fs driver will take care of that, because
811 * retlen == desired len and result == -EBADMSG 921 * retlen == desired len and result == -EBADMSG
812 */ 922 */
813 *retlen = read; 923 ops->retlen = read;
924 ops->oobretlen = oobread;
814 925
815 if (mtd->ecc_stats.failed - stats.failed) 926 if (mtd->ecc_stats.failed - stats.failed)
816 return -EBADMSG; 927 return -EBADMSG;
@@ -822,69 +933,29 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
822} 933}
823 934
824/** 935/**
825 * onenand_transfer_auto_oob - [Internal] oob auto-placement transfer 936 * onenand_read_oob_nolock - [MTD Interface] OneNAND read out-of-band
826 * @param mtd MTD device structure
827 * @param buf destination address
828 * @param column oob offset to read from
829 * @param thislen oob length to read
830 */
831static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int column,
832 int thislen)
833{
834 struct onenand_chip *this = mtd->priv;
835 struct nand_oobfree *free;
836 int readcol = column;
837 int readend = column + thislen;
838 int lastgap = 0;
839 unsigned int i;
840 uint8_t *oob_buf = this->oob_buf;
841
842 free = this->ecclayout->oobfree;
843 for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) {
844 if (readcol >= lastgap)
845 readcol += free->offset - lastgap;
846 if (readend >= lastgap)
847 readend += free->offset - lastgap;
848 lastgap = free->offset + free->length;
849 }
850 this->read_bufferram(mtd, ONENAND_SPARERAM, oob_buf, 0, mtd->oobsize);
851 free = this->ecclayout->oobfree;
852 for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) {
853 int free_end = free->offset + free->length;
854 if (free->offset < readend && free_end > readcol) {
855 int st = max_t(int,free->offset,readcol);
856 int ed = min_t(int,free_end,readend);
857 int n = ed - st;
858 memcpy(buf, oob_buf + st, n);
859 buf += n;
860 } else if (column == 0)
861 break;
862 }
863 return 0;
864}
865
866/**
867 * onenand_do_read_oob - [MTD Interface] OneNAND read out-of-band
868 * @param mtd MTD device structure 937 * @param mtd MTD device structure
869 * @param from offset to read from 938 * @param from offset to read from
870 * @param len number of bytes to read 939 * @param ops: oob operation description structure
871 * @param retlen pointer to variable to store the number of read bytes
872 * @param buf the databuffer to put data
873 * @param mode operation mode
874 * 940 *
875 * OneNAND read out-of-band data from the spare area 941 * OneNAND read out-of-band data from the spare area
876 */ 942 */
877static int onenand_do_read_oob(struct mtd_info *mtd, loff_t from, size_t len, 943static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from,
878 size_t *retlen, u_char *buf, mtd_oob_mode_t mode) 944 struct mtd_oob_ops *ops)
879{ 945{
880 struct onenand_chip *this = mtd->priv; 946 struct onenand_chip *this = mtd->priv;
881 int read = 0, thislen, column, oobsize; 947 int read = 0, thislen, column, oobsize;
948 size_t len = ops->ooblen;
949 mtd_oob_mode_t mode = ops->mode;
950 u_char *buf = ops->oobbuf;
882 int ret = 0; 951 int ret = 0;
883 952
884 DEBUG(MTD_DEBUG_LEVEL3, "onenand_read_oob: from = 0x%08x, len = %i\n", (unsigned int) from, (int) len); 953 from += ops->ooboffs;
954
955 DEBUG(MTD_DEBUG_LEVEL3, "onenand_read_oob_nolock: from = 0x%08x, len = %i\n", (unsigned int) from, (int) len);
885 956
886 /* Initialize return length value */ 957 /* Initialize return length value */
887 *retlen = 0; 958 ops->oobretlen = 0;
888 959
889 if (mode == MTD_OOB_AUTO) 960 if (mode == MTD_OOB_AUTO)
890 oobsize = this->ecclayout->oobavail; 961 oobsize = this->ecclayout->oobavail;
@@ -894,7 +965,7 @@ static int onenand_do_read_oob(struct mtd_info *mtd, loff_t from, size_t len,
894 column = from & (mtd->oobsize - 1); 965 column = from & (mtd->oobsize - 1);
895 966
896 if (unlikely(column >= oobsize)) { 967 if (unlikely(column >= oobsize)) {
897 printk(KERN_ERR "onenand_read_oob: Attempted to start read outside oob\n"); 968 printk(KERN_ERR "onenand_read_oob_nolock: Attempted to start read outside oob\n");
898 return -EINVAL; 969 return -EINVAL;
899 } 970 }
900 971
@@ -902,13 +973,10 @@ static int onenand_do_read_oob(struct mtd_info *mtd, loff_t from, size_t len,
902 if (unlikely(from >= mtd->size || 973 if (unlikely(from >= mtd->size ||
903 column + len > ((mtd->size >> this->page_shift) - 974 column + len > ((mtd->size >> this->page_shift) -
904 (from >> this->page_shift)) * oobsize)) { 975 (from >> this->page_shift)) * oobsize)) {
905 printk(KERN_ERR "onenand_read_oob: Attempted to read beyond end of device\n"); 976 printk(KERN_ERR "onenand_read_oob_nolock: Attempted to read beyond end of device\n");
906 return -EINVAL; 977 return -EINVAL;
907 } 978 }
908 979
909 /* Grab the lock and see if the device is available */
910 onenand_get_device(mtd, FL_READING);
911
912 while (read < len) { 980 while (read < len) {
913 cond_resched(); 981 cond_resched();
914 982
@@ -928,7 +996,7 @@ static int onenand_do_read_oob(struct mtd_info *mtd, loff_t from, size_t len,
928 this->read_bufferram(mtd, ONENAND_SPARERAM, buf, column, thislen); 996 this->read_bufferram(mtd, ONENAND_SPARERAM, buf, column, thislen);
929 997
930 if (ret) { 998 if (ret) {
931 printk(KERN_ERR "onenand_read_oob: read failed = 0x%x\n", ret); 999 printk(KERN_ERR "onenand_read_oob_nolock: read failed = 0x%x\n", ret);
932 break; 1000 break;
933 } 1001 }
934 1002
@@ -947,22 +1015,52 @@ static int onenand_do_read_oob(struct mtd_info *mtd, loff_t from, size_t len,
947 } 1015 }
948 } 1016 }
949 1017
950 /* Deselect and wake up anyone waiting on the device */ 1018 ops->oobretlen = read;
1019 return ret;
1020}
1021
1022/**
1023 * onenand_read - [MTD Interface] Read data from flash
1024 * @param mtd MTD device structure
1025 * @param from offset to read from
1026 * @param len number of bytes to read
1027 * @param retlen pointer to variable to store the number of read bytes
1028 * @param buf the databuffer to put data
1029 *
1030 * Read with ecc
1031*/
1032static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
1033 size_t *retlen, u_char *buf)
1034{
1035 struct mtd_oob_ops ops = {
1036 .len = len,
1037 .ooblen = 0,
1038 .datbuf = buf,
1039 .oobbuf = NULL,
1040 };
1041 int ret;
1042
1043 onenand_get_device(mtd, FL_READING);
1044 ret = onenand_read_ops_nolock(mtd, from, &ops);
951 onenand_release_device(mtd); 1045 onenand_release_device(mtd);
952 1046
953 *retlen = read; 1047 *retlen = ops.retlen;
954 return ret; 1048 return ret;
955} 1049}
956 1050
957/** 1051/**
958 * onenand_read_oob - [MTD Interface] NAND write data and/or out-of-band 1052 * onenand_read_oob - [MTD Interface] Read main and/or out-of-band
959 * @param mtd: MTD device structure 1053 * @param mtd: MTD device structure
960 * @param from: offset to read from 1054 * @param from: offset to read from
961 * @param ops: oob operation description structure 1055 * @param ops: oob operation description structure
1056
1057 * Read main and/or out-of-band
962 */ 1058 */
963static int onenand_read_oob(struct mtd_info *mtd, loff_t from, 1059static int onenand_read_oob(struct mtd_info *mtd, loff_t from,
964 struct mtd_oob_ops *ops) 1060 struct mtd_oob_ops *ops)
965{ 1061{
1062 int ret;
1063
966 switch (ops->mode) { 1064 switch (ops->mode) {
967 case MTD_OOB_PLACE: 1065 case MTD_OOB_PLACE:
968 case MTD_OOB_AUTO: 1066 case MTD_OOB_AUTO:
@@ -972,8 +1070,15 @@ static int onenand_read_oob(struct mtd_info *mtd, loff_t from,
972 default: 1070 default:
973 return -EINVAL; 1071 return -EINVAL;
974 } 1072 }
975 return onenand_do_read_oob(mtd, from + ops->ooboffs, ops->ooblen, 1073
976 &ops->oobretlen, ops->oobbuf, ops->mode); 1074 onenand_get_device(mtd, FL_READING);
1075 if (ops->datbuf)
1076 ret = onenand_read_ops_nolock(mtd, from, ops);
1077 else
1078 ret = onenand_read_oob_nolock(mtd, from, ops);
1079 onenand_release_device(mtd);
1080
1081 return ret;
977} 1082}
978 1083
979/** 1084/**
@@ -1079,7 +1184,7 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from,
1079 /* Read more? */ 1184 /* Read more? */
1080 if (read < len) { 1185 if (read < len) {
1081 /* Update Page size */ 1186 /* Update Page size */
1082 from += mtd->writesize; 1187 from += this->writesize;
1083 column = 0; 1188 column = 0;
1084 } 1189 }
1085 } 1190 }
@@ -1097,7 +1202,6 @@ int onenand_bbt_read_oob(struct mtd_info *mtd, loff_t from,
1097 * @param mtd MTD device structure 1202 * @param mtd MTD device structure
1098 * @param buf the databuffer to verify 1203 * @param buf the databuffer to verify
1099 * @param to offset to read from 1204 * @param to offset to read from
1100 *
1101 */ 1205 */
1102static int onenand_verify_oob(struct mtd_info *mtd, const u_char *buf, loff_t to) 1206static int onenand_verify_oob(struct mtd_info *mtd, const u_char *buf, loff_t to)
1103{ 1207{
@@ -1125,7 +1229,6 @@ static int onenand_verify_oob(struct mtd_info *mtd, const u_char *buf, loff_t to
1125 * @param buf the databuffer to verify 1229 * @param buf the databuffer to verify
1126 * @param addr offset to read from 1230 * @param addr offset to read from
1127 * @param len number of bytes to read and compare 1231 * @param len number of bytes to read and compare
1128 *
1129 */ 1232 */
1130static int onenand_verify(struct mtd_info *mtd, const u_char *buf, loff_t addr, size_t len) 1233static int onenand_verify(struct mtd_info *mtd, const u_char *buf, loff_t addr, size_t len)
1131{ 1234{
@@ -1135,12 +1238,12 @@ static int onenand_verify(struct mtd_info *mtd, const u_char *buf, loff_t addr,
1135 int thislen, column; 1238 int thislen, column;
1136 1239
1137 while (len != 0) { 1240 while (len != 0) {
1138 thislen = min_t(int, mtd->writesize, len); 1241 thislen = min_t(int, this->writesize, len);
1139 column = addr & (mtd->writesize - 1); 1242 column = addr & (this->writesize - 1);
1140 if (column + thislen > mtd->writesize) 1243 if (column + thislen > this->writesize)
1141 thislen = mtd->writesize - column; 1244 thislen = this->writesize - column;
1142 1245
1143 this->command(mtd, ONENAND_CMD_READ, addr, mtd->writesize); 1246 this->command(mtd, ONENAND_CMD_READ, addr, this->writesize);
1144 1247
1145 onenand_update_bufferram(mtd, addr, 0); 1248 onenand_update_bufferram(mtd, addr, 0);
1146 1249
@@ -1171,50 +1274,101 @@ static int onenand_verify(struct mtd_info *mtd, const u_char *buf, loff_t addr,
1171#define NOTALIGNED(x) ((x & (this->subpagesize - 1)) != 0) 1274#define NOTALIGNED(x) ((x & (this->subpagesize - 1)) != 0)
1172 1275
1173/** 1276/**
1174 * onenand_write - [MTD Interface] write buffer to FLASH 1277 * onenand_fill_auto_oob - [Internal] oob auto-placement transfer
1278 * @param mtd MTD device structure
1279 * @param oob_buf oob buffer
1280 * @param buf source address
1281 * @param column oob offset to write to
1282 * @param thislen oob length to write
1283 */
1284static int onenand_fill_auto_oob(struct mtd_info *mtd, u_char *oob_buf,
1285 const u_char *buf, int column, int thislen)
1286{
1287 struct onenand_chip *this = mtd->priv;
1288 struct nand_oobfree *free;
1289 int writecol = column;
1290 int writeend = column + thislen;
1291 int lastgap = 0;
1292 unsigned int i;
1293
1294 free = this->ecclayout->oobfree;
1295 for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) {
1296 if (writecol >= lastgap)
1297 writecol += free->offset - lastgap;
1298 if (writeend >= lastgap)
1299 writeend += free->offset - lastgap;
1300 lastgap = free->offset + free->length;
1301 }
1302 free = this->ecclayout->oobfree;
1303 for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) {
1304 int free_end = free->offset + free->length;
1305 if (free->offset < writeend && free_end > writecol) {
1306 int st = max_t(int,free->offset,writecol);
1307 int ed = min_t(int,free_end,writeend);
1308 int n = ed - st;
1309 memcpy(oob_buf + st, buf, n);
1310 buf += n;
1311 } else if (column == 0)
1312 break;
1313 }
1314 return 0;
1315}
1316
1317/**
1318 * onenand_write_ops_nolock - [OneNAND Interface] write main and/or out-of-band
1175 * @param mtd MTD device structure 1319 * @param mtd MTD device structure
1176 * @param to offset to write to 1320 * @param to offset to write to
1177 * @param len number of bytes to write 1321 * @param ops oob operation description structure
1178 * @param retlen pointer to variable to store the number of written bytes
1179 * @param buf the data to write
1180 * 1322 *
1181 * Write with ECC 1323 * Write main and/or oob with ECC
1182 */ 1324 */
1183static int onenand_write(struct mtd_info *mtd, loff_t to, size_t len, 1325static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
1184 size_t *retlen, const u_char *buf) 1326 struct mtd_oob_ops *ops)
1185{ 1327{
1186 struct onenand_chip *this = mtd->priv; 1328 struct onenand_chip *this = mtd->priv;
1187 int written = 0; 1329 int written = 0, column, thislen, subpage;
1330 int oobwritten = 0, oobcolumn, thisooblen, oobsize;
1331 size_t len = ops->len;
1332 size_t ooblen = ops->ooblen;
1333 const u_char *buf = ops->datbuf;
1334 const u_char *oob = ops->oobbuf;
1335 u_char *oobbuf;
1188 int ret = 0; 1336 int ret = 0;
1189 int column, subpage;
1190 1337
1191 DEBUG(MTD_DEBUG_LEVEL3, "onenand_write: to = 0x%08x, len = %i\n", (unsigned int) to, (int) len); 1338 DEBUG(MTD_DEBUG_LEVEL3, "onenand_write_ops_nolock: to = 0x%08x, len = %i\n", (unsigned int) to, (int) len);
1192 1339
1193 /* Initialize retlen, in case of early exit */ 1340 /* Initialize retlen, in case of early exit */
1194 *retlen = 0; 1341 ops->retlen = 0;
1342 ops->oobretlen = 0;
1195 1343
1196 /* Do not allow writes past end of device */ 1344 /* Do not allow writes past end of device */
1197 if (unlikely((to + len) > mtd->size)) { 1345 if (unlikely((to + len) > mtd->size)) {
1198 printk(KERN_ERR "onenand_write: Attempt write to past end of device\n"); 1346 printk(KERN_ERR "onenand_write_ops_nolock: Attempt write to past end of device\n");
1199 return -EINVAL; 1347 return -EINVAL;
1200 } 1348 }
1201 1349
1202 /* Reject writes, which are not page aligned */ 1350 /* Reject writes, which are not page aligned */
1203 if (unlikely(NOTALIGNED(to)) || unlikely(NOTALIGNED(len))) { 1351 if (unlikely(NOTALIGNED(to)) || unlikely(NOTALIGNED(len))) {
1204 printk(KERN_ERR "onenand_write: Attempt to write not page aligned data\n"); 1352 printk(KERN_ERR "onenand_write_ops_nolock: Attempt to write not page aligned data\n");
1205 return -EINVAL; 1353 return -EINVAL;
1206 } 1354 }
1207 1355
1208 column = to & (mtd->writesize - 1); 1356 if (ops->mode == MTD_OOB_AUTO)
1357 oobsize = this->ecclayout->oobavail;
1358 else
1359 oobsize = mtd->oobsize;
1209 1360
1210 /* Grab the lock and see if the device is available */ 1361 oobcolumn = to & (mtd->oobsize - 1);
1211 onenand_get_device(mtd, FL_WRITING); 1362
1363 column = to & (mtd->writesize - 1);
1212 1364
1213 /* Loop until all data write */ 1365 /* Loop until all data write */
1214 while (written < len) { 1366 while (written < len) {
1215 int thislen = min_t(int, mtd->writesize - column, len - written);
1216 u_char *wbuf = (u_char *) buf; 1367 u_char *wbuf = (u_char *) buf;
1217 1368
1369 thislen = min_t(int, mtd->writesize - column, len - written);
1370 thisooblen = min_t(int, oobsize - oobcolumn, ooblen - oobwritten);
1371
1218 cond_resched(); 1372 cond_resched();
1219 1373
1220 this->command(mtd, ONENAND_CMD_BUFFERRAM, to, thislen); 1374 this->command(mtd, ONENAND_CMD_BUFFERRAM, to, thislen);
@@ -1228,7 +1382,25 @@ static int onenand_write(struct mtd_info *mtd, loff_t to, size_t len,
1228 } 1382 }
1229 1383
1230 this->write_bufferram(mtd, ONENAND_DATARAM, wbuf, 0, mtd->writesize); 1384 this->write_bufferram(mtd, ONENAND_DATARAM, wbuf, 0, mtd->writesize);
1231 this->write_bufferram(mtd, ONENAND_SPARERAM, ffchars, 0, mtd->oobsize); 1385
1386 if (oob) {
1387 oobbuf = this->oob_buf;
1388
1389 /* We send data to spare ram with oobsize
1390 * to prevent byte access */
1391 memset(oobbuf, 0xff, mtd->oobsize);
1392 if (ops->mode == MTD_OOB_AUTO)
1393 onenand_fill_auto_oob(mtd, oobbuf, oob, oobcolumn, thisooblen);
1394 else
1395 memcpy(oobbuf + oobcolumn, oob, thisooblen);
1396
1397 oobwritten += thisooblen;
1398 oob += thisooblen;
1399 oobcolumn = 0;
1400 } else
1401 oobbuf = (u_char *) ffchars;
1402
1403 this->write_bufferram(mtd, ONENAND_SPARERAM, oobbuf, 0, mtd->oobsize);
1232 1404
1233 this->command(mtd, ONENAND_CMD_PROG, to, mtd->writesize); 1405 this->command(mtd, ONENAND_CMD_PROG, to, mtd->writesize);
1234 1406
@@ -1236,16 +1408,20 @@ static int onenand_write(struct mtd_info *mtd, loff_t to, size_t len,
1236 1408
1237 /* In partial page write we don't update bufferram */ 1409 /* In partial page write we don't update bufferram */
1238 onenand_update_bufferram(mtd, to, !ret && !subpage); 1410 onenand_update_bufferram(mtd, to, !ret && !subpage);
1411 if (ONENAND_IS_2PLANE(this)) {
1412 ONENAND_SET_BUFFERRAM1(this);
1413 onenand_update_bufferram(mtd, to + this->writesize, !ret && !subpage);
1414 }
1239 1415
1240 if (ret) { 1416 if (ret) {
1241 printk(KERN_ERR "onenand_write: write filaed %d\n", ret); 1417 printk(KERN_ERR "onenand_write_ops_nolock: write filaed %d\n", ret);
1242 break; 1418 break;
1243 } 1419 }
1244 1420
1245 /* Only check verify write turn on */ 1421 /* Only check verify write turn on */
1246 ret = onenand_verify(mtd, (u_char *) wbuf, to, thislen); 1422 ret = onenand_verify(mtd, (u_char *) wbuf, to, thislen);
1247 if (ret) { 1423 if (ret) {
1248 printk(KERN_ERR "onenand_write: verify failed %d\n", ret); 1424 printk(KERN_ERR "onenand_write_ops_nolock: verify failed %d\n", ret);
1249 break; 1425 break;
1250 } 1426 }
1251 1427
@@ -1262,54 +1438,14 @@ static int onenand_write(struct mtd_info *mtd, loff_t to, size_t len,
1262 /* Deselect and wake up anyone waiting on the device */ 1438 /* Deselect and wake up anyone waiting on the device */
1263 onenand_release_device(mtd); 1439 onenand_release_device(mtd);
1264 1440
1265 *retlen = written; 1441 ops->retlen = written;
1266 1442
1267 return ret; 1443 return ret;
1268} 1444}
1269 1445
1270/**
1271 * onenand_fill_auto_oob - [Internal] oob auto-placement transfer
1272 * @param mtd MTD device structure
1273 * @param oob_buf oob buffer
1274 * @param buf source address
1275 * @param column oob offset to write to
1276 * @param thislen oob length to write
1277 */
1278static int onenand_fill_auto_oob(struct mtd_info *mtd, u_char *oob_buf,
1279 const u_char *buf, int column, int thislen)
1280{
1281 struct onenand_chip *this = mtd->priv;
1282 struct nand_oobfree *free;
1283 int writecol = column;
1284 int writeend = column + thislen;
1285 int lastgap = 0;
1286 unsigned int i;
1287
1288 free = this->ecclayout->oobfree;
1289 for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) {
1290 if (writecol >= lastgap)
1291 writecol += free->offset - lastgap;
1292 if (writeend >= lastgap)
1293 writeend += free->offset - lastgap;
1294 lastgap = free->offset + free->length;
1295 }
1296 free = this->ecclayout->oobfree;
1297 for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) {
1298 int free_end = free->offset + free->length;
1299 if (free->offset < writeend && free_end > writecol) {
1300 int st = max_t(int,free->offset,writecol);
1301 int ed = min_t(int,free_end,writeend);
1302 int n = ed - st;
1303 memcpy(oob_buf + st, buf, n);
1304 buf += n;
1305 } else if (column == 0)
1306 break;
1307 }
1308 return 0;
1309}
1310 1446
1311/** 1447/**
1312 * onenand_do_write_oob - [Internal] OneNAND write out-of-band 1448 * onenand_write_oob_nolock - [Internal] OneNAND write out-of-band
1313 * @param mtd MTD device structure 1449 * @param mtd MTD device structure
1314 * @param to offset to write to 1450 * @param to offset to write to
1315 * @param len number of bytes to write 1451 * @param len number of bytes to write
@@ -1319,18 +1455,23 @@ static int onenand_fill_auto_oob(struct mtd_info *mtd, u_char *oob_buf,
1319 * 1455 *
1320 * OneNAND write out-of-band 1456 * OneNAND write out-of-band
1321 */ 1457 */
1322static int onenand_do_write_oob(struct mtd_info *mtd, loff_t to, size_t len, 1458static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
1323 size_t *retlen, const u_char *buf, mtd_oob_mode_t mode) 1459 struct mtd_oob_ops *ops)
1324{ 1460{
1325 struct onenand_chip *this = mtd->priv; 1461 struct onenand_chip *this = mtd->priv;
1326 int column, ret = 0, oobsize; 1462 int column, ret = 0, oobsize;
1327 int written = 0; 1463 int written = 0;
1328 u_char *oobbuf; 1464 u_char *oobbuf;
1465 size_t len = ops->ooblen;
1466 const u_char *buf = ops->oobbuf;
1467 mtd_oob_mode_t mode = ops->mode;
1329 1468
1330 DEBUG(MTD_DEBUG_LEVEL3, "onenand_write_oob: to = 0x%08x, len = %i\n", (unsigned int) to, (int) len); 1469 to += ops->ooboffs;
1470
1471 DEBUG(MTD_DEBUG_LEVEL3, "onenand_write_oob_nolock: to = 0x%08x, len = %i\n", (unsigned int) to, (int) len);
1331 1472
1332 /* Initialize retlen, in case of early exit */ 1473 /* Initialize retlen, in case of early exit */
1333 *retlen = 0; 1474 ops->oobretlen = 0;
1334 1475
1335 if (mode == MTD_OOB_AUTO) 1476 if (mode == MTD_OOB_AUTO)
1336 oobsize = this->ecclayout->oobavail; 1477 oobsize = this->ecclayout->oobavail;
@@ -1340,13 +1481,13 @@ static int onenand_do_write_oob(struct mtd_info *mtd, loff_t to, size_t len,
1340 column = to & (mtd->oobsize - 1); 1481 column = to & (mtd->oobsize - 1);
1341 1482
1342 if (unlikely(column >= oobsize)) { 1483 if (unlikely(column >= oobsize)) {
1343 printk(KERN_ERR "onenand_write_oob: Attempted to start write outside oob\n"); 1484 printk(KERN_ERR "onenand_write_oob_nolock: Attempted to start write outside oob\n");
1344 return -EINVAL; 1485 return -EINVAL;
1345 } 1486 }
1346 1487
1347 /* For compatibility with NAND: Do not allow write past end of page */ 1488 /* For compatibility with NAND: Do not allow write past end of page */
1348 if (unlikely(column + len > oobsize)) { 1489 if (unlikely(column + len > oobsize)) {
1349 printk(KERN_ERR "onenand_write_oob: " 1490 printk(KERN_ERR "onenand_write_oob_nolock: "
1350 "Attempt to write past end of page\n"); 1491 "Attempt to write past end of page\n");
1351 return -EINVAL; 1492 return -EINVAL;
1352 } 1493 }
@@ -1355,13 +1496,10 @@ static int onenand_do_write_oob(struct mtd_info *mtd, loff_t to, size_t len,
1355 if (unlikely(to >= mtd->size || 1496 if (unlikely(to >= mtd->size ||
1356 column + len > ((mtd->size >> this->page_shift) - 1497 column + len > ((mtd->size >> this->page_shift) -
1357 (to >> this->page_shift)) * oobsize)) { 1498 (to >> this->page_shift)) * oobsize)) {
1358 printk(KERN_ERR "onenand_write_oob: Attempted to write past end of device\n"); 1499 printk(KERN_ERR "onenand_write_oob_nolock: Attempted to write past end of device\n");
1359 return -EINVAL; 1500 return -EINVAL;
1360 } 1501 }
1361 1502
1362 /* Grab the lock and see if the device is available */
1363 onenand_get_device(mtd, FL_WRITING);
1364
1365 oobbuf = this->oob_buf; 1503 oobbuf = this->oob_buf;
1366 1504
1367 /* Loop until all data write */ 1505 /* Loop until all data write */
@@ -1384,16 +1522,20 @@ static int onenand_do_write_oob(struct mtd_info *mtd, loff_t to, size_t len,
1384 this->command(mtd, ONENAND_CMD_PROGOOB, to, mtd->oobsize); 1522 this->command(mtd, ONENAND_CMD_PROGOOB, to, mtd->oobsize);
1385 1523
1386 onenand_update_bufferram(mtd, to, 0); 1524 onenand_update_bufferram(mtd, to, 0);
1525 if (ONENAND_IS_2PLANE(this)) {
1526 ONENAND_SET_BUFFERRAM1(this);
1527 onenand_update_bufferram(mtd, to + this->writesize, 0);
1528 }
1387 1529
1388 ret = this->wait(mtd, FL_WRITING); 1530 ret = this->wait(mtd, FL_WRITING);
1389 if (ret) { 1531 if (ret) {
1390 printk(KERN_ERR "onenand_write_oob: write failed %d\n", ret); 1532 printk(KERN_ERR "onenand_write_oob_nolock: write failed %d\n", ret);
1391 break; 1533 break;
1392 } 1534 }
1393 1535
1394 ret = onenand_verify_oob(mtd, oobbuf, to); 1536 ret = onenand_verify_oob(mtd, oobbuf, to);
1395 if (ret) { 1537 if (ret) {
1396 printk(KERN_ERR "onenand_write_oob: verify failed %d\n", ret); 1538 printk(KERN_ERR "onenand_write_oob_nolock: verify failed %d\n", ret);
1397 break; 1539 break;
1398 } 1540 }
1399 1541
@@ -1406,11 +1548,37 @@ static int onenand_do_write_oob(struct mtd_info *mtd, loff_t to, size_t len,
1406 column = 0; 1548 column = 0;
1407 } 1549 }
1408 1550
1409 /* Deselect and wake up anyone waiting on the device */ 1551 ops->oobretlen = written;
1410 onenand_release_device(mtd); 1552
1553 return ret;
1554}
1555
1556/**
1557 * onenand_write - [MTD Interface] write buffer to FLASH
1558 * @param mtd MTD device structure
1559 * @param to offset to write to
1560 * @param len number of bytes to write
1561 * @param retlen pointer to variable to store the number of written bytes
1562 * @param buf the data to write
1563 *
1564 * Write with ECC
1565 */
1566static int onenand_write(struct mtd_info *mtd, loff_t to, size_t len,
1567 size_t *retlen, const u_char *buf)
1568{
1569 struct mtd_oob_ops ops = {
1570 .len = len,
1571 .ooblen = 0,
1572 .datbuf = (u_char *) buf,
1573 .oobbuf = NULL,
1574 };
1575 int ret;
1411 1576
1412 *retlen = written; 1577 onenand_get_device(mtd, FL_WRITING);
1578 ret = onenand_write_ops_nolock(mtd, to, &ops);
1579 onenand_release_device(mtd);
1413 1580
1581 *retlen = ops.retlen;
1414 return ret; 1582 return ret;
1415} 1583}
1416 1584
@@ -1423,6 +1591,8 @@ static int onenand_do_write_oob(struct mtd_info *mtd, loff_t to, size_t len,
1423static int onenand_write_oob(struct mtd_info *mtd, loff_t to, 1591static int onenand_write_oob(struct mtd_info *mtd, loff_t to,
1424 struct mtd_oob_ops *ops) 1592 struct mtd_oob_ops *ops)
1425{ 1593{
1594 int ret;
1595
1426 switch (ops->mode) { 1596 switch (ops->mode) {
1427 case MTD_OOB_PLACE: 1597 case MTD_OOB_PLACE:
1428 case MTD_OOB_AUTO: 1598 case MTD_OOB_AUTO:
@@ -1432,21 +1602,27 @@ static int onenand_write_oob(struct mtd_info *mtd, loff_t to,
1432 default: 1602 default:
1433 return -EINVAL; 1603 return -EINVAL;
1434 } 1604 }
1435 return onenand_do_write_oob(mtd, to + ops->ooboffs, ops->ooblen, 1605
1436 &ops->oobretlen, ops->oobbuf, ops->mode); 1606 onenand_get_device(mtd, FL_WRITING);
1607 if (ops->datbuf)
1608 ret = onenand_write_ops_nolock(mtd, to, ops);
1609 else
1610 ret = onenand_write_oob_nolock(mtd, to, ops);
1611 onenand_release_device(mtd);
1612
1613 return ret;
1437} 1614}
1438 1615
1439/** 1616/**
1440 * onenand_block_checkbad - [GENERIC] Check if a block is marked bad 1617 * onenand_block_isbad_nolock - [GENERIC] Check if a block is marked bad
1441 * @param mtd MTD device structure 1618 * @param mtd MTD device structure
1442 * @param ofs offset from device start 1619 * @param ofs offset from device start
1443 * @param getchip 0, if the chip is already selected
1444 * @param allowbbt 1, if its allowed to access the bbt area 1620 * @param allowbbt 1, if its allowed to access the bbt area
1445 * 1621 *
1446 * Check, if the block is bad. Either by reading the bad block table or 1622 * Check, if the block is bad. Either by reading the bad block table or
1447 * calling of the scan function. 1623 * calling of the scan function.
1448 */ 1624 */
1449static int onenand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int getchip, int allowbbt) 1625static int onenand_block_isbad_nolock(struct mtd_info *mtd, loff_t ofs, int allowbbt)
1450{ 1626{
1451 struct onenand_chip *this = mtd->priv; 1627 struct onenand_chip *this = mtd->priv;
1452 struct bbm_info *bbm = this->bbm; 1628 struct bbm_info *bbm = this->bbm;
@@ -1507,7 +1683,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr)
1507 cond_resched(); 1683 cond_resched();
1508 1684
1509 /* Check if we have a bad block, we do not erase bad blocks */ 1685 /* Check if we have a bad block, we do not erase bad blocks */
1510 if (onenand_block_checkbad(mtd, addr, 0, 0)) { 1686 if (onenand_block_isbad_nolock(mtd, addr, 0)) {
1511 printk (KERN_WARNING "onenand_erase: attempt to erase a bad block at addr 0x%08x\n", (unsigned int) addr); 1687 printk (KERN_WARNING "onenand_erase: attempt to erase a bad block at addr 0x%08x\n", (unsigned int) addr);
1512 instr->state = MTD_ERASE_FAILED; 1688 instr->state = MTD_ERASE_FAILED;
1513 goto erase_exit; 1689 goto erase_exit;
@@ -1571,11 +1747,16 @@ static void onenand_sync(struct mtd_info *mtd)
1571 */ 1747 */
1572static int onenand_block_isbad(struct mtd_info *mtd, loff_t ofs) 1748static int onenand_block_isbad(struct mtd_info *mtd, loff_t ofs)
1573{ 1749{
1750 int ret;
1751
1574 /* Check for invalid offset */ 1752 /* Check for invalid offset */
1575 if (ofs > mtd->size) 1753 if (ofs > mtd->size)
1576 return -EINVAL; 1754 return -EINVAL;
1577 1755
1578 return onenand_block_checkbad(mtd, ofs, 1, 0); 1756 onenand_get_device(mtd, FL_READING);
1757 ret = onenand_block_isbad_nolock(mtd, ofs, 0);
1758 onenand_release_device(mtd);
1759 return ret;
1579} 1760}
1580 1761
1581/** 1762/**
@@ -1591,7 +1772,12 @@ static int onenand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
1591 struct onenand_chip *this = mtd->priv; 1772 struct onenand_chip *this = mtd->priv;
1592 struct bbm_info *bbm = this->bbm; 1773 struct bbm_info *bbm = this->bbm;
1593 u_char buf[2] = {0, 0}; 1774 u_char buf[2] = {0, 0};
1594 size_t retlen; 1775 struct mtd_oob_ops ops = {
1776 .mode = MTD_OOB_PLACE,
1777 .ooblen = 2,
1778 .oobbuf = buf,
1779 .ooboffs = 0,
1780 };
1595 int block; 1781 int block;
1596 1782
1597 /* Get block number */ 1783 /* Get block number */
@@ -1601,7 +1787,7 @@ static int onenand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
1601 1787
1602 /* We write two bytes, so we dont have to mess with 16 bit access */ 1788 /* We write two bytes, so we dont have to mess with 16 bit access */
1603 ofs += mtd->oobsize + (bbm->badblockpos & ~0x01); 1789 ofs += mtd->oobsize + (bbm->badblockpos & ~0x01);
1604 return onenand_do_write_oob(mtd, ofs , 2, &retlen, buf, MTD_OOB_PLACE); 1790 return onenand_write_oob_nolock(mtd, ofs, &ops);
1605} 1791}
1606 1792
1607/** 1793/**
@@ -1624,7 +1810,10 @@ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
1624 return ret; 1810 return ret;
1625 } 1811 }
1626 1812
1627 return this->block_markbad(mtd, ofs); 1813 onenand_get_device(mtd, FL_WRITING);
1814 ret = this->block_markbad(mtd, ofs);
1815 onenand_release_device(mtd);
1816 return ret;
1628} 1817}
1629 1818
1630/** 1819/**
@@ -1823,13 +2012,19 @@ static int do_otp_read(struct mtd_info *mtd, loff_t from, size_t len,
1823 size_t *retlen, u_char *buf) 2012 size_t *retlen, u_char *buf)
1824{ 2013{
1825 struct onenand_chip *this = mtd->priv; 2014 struct onenand_chip *this = mtd->priv;
2015 struct mtd_oob_ops ops = {
2016 .len = len,
2017 .ooblen = 0,
2018 .datbuf = buf,
2019 .oobbuf = NULL,
2020 };
1826 int ret; 2021 int ret;
1827 2022
1828 /* Enter OTP access mode */ 2023 /* Enter OTP access mode */
1829 this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0); 2024 this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
1830 this->wait(mtd, FL_OTPING); 2025 this->wait(mtd, FL_OTPING);
1831 2026
1832 ret = mtd->read(mtd, from, len, retlen, buf); 2027 ret = onenand_read_ops_nolock(mtd, from, &ops);
1833 2028
1834 /* Exit OTP access mode */ 2029 /* Exit OTP access mode */
1835 this->command(mtd, ONENAND_CMD_RESET, 0, 0); 2030 this->command(mtd, ONENAND_CMD_RESET, 0, 0);
@@ -1841,19 +2036,20 @@ static int do_otp_read(struct mtd_info *mtd, loff_t from, size_t len,
1841/** 2036/**
1842 * do_otp_write - [DEFAULT] Write OTP block area 2037 * do_otp_write - [DEFAULT] Write OTP block area
1843 * @param mtd MTD device structure 2038 * @param mtd MTD device structure
1844 * @param from The offset to write 2039 * @param to The offset to write
1845 * @param len number of bytes to write 2040 * @param len number of bytes to write
1846 * @param retlen pointer to variable to store the number of write bytes 2041 * @param retlen pointer to variable to store the number of write bytes
1847 * @param buf the databuffer to put/get data 2042 * @param buf the databuffer to put/get data
1848 * 2043 *
1849 * Write OTP block area. 2044 * Write OTP block area.
1850 */ 2045 */
1851static int do_otp_write(struct mtd_info *mtd, loff_t from, size_t len, 2046static int do_otp_write(struct mtd_info *mtd, loff_t to, size_t len,
1852 size_t *retlen, u_char *buf) 2047 size_t *retlen, u_char *buf)
1853{ 2048{
1854 struct onenand_chip *this = mtd->priv; 2049 struct onenand_chip *this = mtd->priv;
1855 unsigned char *pbuf = buf; 2050 unsigned char *pbuf = buf;
1856 int ret; 2051 int ret;
2052 struct mtd_oob_ops ops;
1857 2053
1858 /* Force buffer page aligned */ 2054 /* Force buffer page aligned */
1859 if (len < mtd->writesize) { 2055 if (len < mtd->writesize) {
@@ -1867,7 +2063,12 @@ static int do_otp_write(struct mtd_info *mtd, loff_t from, size_t len,
1867 this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0); 2063 this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
1868 this->wait(mtd, FL_OTPING); 2064 this->wait(mtd, FL_OTPING);
1869 2065
1870 ret = mtd->write(mtd, from, len, retlen, pbuf); 2066 ops.len = len;
2067 ops.ooblen = 0;
2068 ops.datbuf = pbuf;
2069 ops.oobbuf = NULL;
2070 ret = onenand_write_ops_nolock(mtd, to, &ops);
2071 *retlen = ops.retlen;
1871 2072
1872 /* Exit OTP access mode */ 2073 /* Exit OTP access mode */
1873 this->command(mtd, ONENAND_CMD_RESET, 0, 0); 2074 this->command(mtd, ONENAND_CMD_RESET, 0, 0);
@@ -1890,13 +2091,21 @@ static int do_otp_lock(struct mtd_info *mtd, loff_t from, size_t len,
1890 size_t *retlen, u_char *buf) 2091 size_t *retlen, u_char *buf)
1891{ 2092{
1892 struct onenand_chip *this = mtd->priv; 2093 struct onenand_chip *this = mtd->priv;
2094 struct mtd_oob_ops ops = {
2095 .mode = MTD_OOB_PLACE,
2096 .ooblen = len,
2097 .oobbuf = buf,
2098 .ooboffs = 0,
2099 };
1893 int ret; 2100 int ret;
1894 2101
1895 /* Enter OTP access mode */ 2102 /* Enter OTP access mode */
1896 this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0); 2103 this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
1897 this->wait(mtd, FL_OTPING); 2104 this->wait(mtd, FL_OTPING);
1898 2105
1899 ret = onenand_do_write_oob(mtd, from, len, retlen, buf, MTD_OOB_PLACE); 2106 ret = onenand_write_oob_nolock(mtd, from, &ops);
2107
2108 *retlen = ops.oobretlen;
1900 2109
1901 /* Exit OTP access mode */ 2110 /* Exit OTP access mode */
1902 this->command(mtd, ONENAND_CMD_RESET, 0, 0); 2111 this->command(mtd, ONENAND_CMD_RESET, 0, 0);
@@ -1943,13 +2152,16 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
1943 if (((mtd->writesize * otp_pages) - (from + len)) < 0) 2152 if (((mtd->writesize * otp_pages) - (from + len)) < 0)
1944 return 0; 2153 return 0;
1945 2154
2155 onenand_get_device(mtd, FL_OTPING);
1946 while (len > 0 && otp_pages > 0) { 2156 while (len > 0 && otp_pages > 0) {
1947 if (!action) { /* OTP Info functions */ 2157 if (!action) { /* OTP Info functions */
1948 struct otp_info *otpinfo; 2158 struct otp_info *otpinfo;
1949 2159
1950 len -= sizeof(struct otp_info); 2160 len -= sizeof(struct otp_info);
1951 if (len <= 0) 2161 if (len <= 0) {
1952 return -ENOSPC; 2162 ret = -ENOSPC;
2163 break;
2164 }
1953 2165
1954 otpinfo = (struct otp_info *) buf; 2166 otpinfo = (struct otp_info *) buf;
1955 otpinfo->start = from; 2167 otpinfo->start = from;
@@ -1969,13 +2181,14 @@ static int onenand_otp_walk(struct mtd_info *mtd, loff_t from, size_t len,
1969 len -= size; 2181 len -= size;
1970 *retlen += size; 2182 *retlen += size;
1971 2183
1972 if (ret < 0) 2184 if (ret)
1973 return ret; 2185 break;
1974 } 2186 }
1975 otp_pages--; 2187 otp_pages--;
1976 } 2188 }
2189 onenand_release_device(mtd);
1977 2190
1978 return 0; 2191 return ret;
1979} 2192}
1980 2193
1981/** 2194/**
@@ -2107,6 +2320,7 @@ static int onenand_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
2107 * 2320 *
2108 * Check and set OneNAND features 2321 * Check and set OneNAND features
2109 * - lock scheme 2322 * - lock scheme
2323 * - two plane
2110 */ 2324 */
2111static void onenand_check_features(struct mtd_info *mtd) 2325static void onenand_check_features(struct mtd_info *mtd)
2112{ 2326{
@@ -2118,19 +2332,35 @@ static void onenand_check_features(struct mtd_info *mtd)
2118 process = this->version_id >> ONENAND_VERSION_PROCESS_SHIFT; 2332 process = this->version_id >> ONENAND_VERSION_PROCESS_SHIFT;
2119 2333
2120 /* Lock scheme */ 2334 /* Lock scheme */
2121 if (density >= ONENAND_DEVICE_DENSITY_1Gb) { 2335 switch (density) {
2336 case ONENAND_DEVICE_DENSITY_4Gb:
2337 this->options |= ONENAND_HAS_2PLANE;
2338
2339 case ONENAND_DEVICE_DENSITY_2Gb:
2340 /* 2Gb DDP don't have 2 plane */
2341 if (!ONENAND_IS_DDP(this))
2342 this->options |= ONENAND_HAS_2PLANE;
2343 this->options |= ONENAND_HAS_UNLOCK_ALL;
2344
2345 case ONENAND_DEVICE_DENSITY_1Gb:
2122 /* A-Die has all block unlock */ 2346 /* A-Die has all block unlock */
2123 if (process) { 2347 if (process)
2124 printk(KERN_DEBUG "Chip support all block unlock\n");
2125 this->options |= ONENAND_HAS_UNLOCK_ALL; 2348 this->options |= ONENAND_HAS_UNLOCK_ALL;
2126 } 2349 break;
2127 } else { 2350
2128 /* Some OneNAND has continues lock scheme */ 2351 default:
2129 if (!process) { 2352 /* Some OneNAND has continuous lock scheme */
2130 printk(KERN_DEBUG "Lock scheme is Continues Lock\n"); 2353 if (!process)
2131 this->options |= ONENAND_HAS_CONT_LOCK; 2354 this->options |= ONENAND_HAS_CONT_LOCK;
2132 } 2355 break;
2133 } 2356 }
2357
2358 if (this->options & ONENAND_HAS_CONT_LOCK)
2359 printk(KERN_DEBUG "Lock scheme is Continuous Lock\n");
2360 if (this->options & ONENAND_HAS_UNLOCK_ALL)
2361 printk(KERN_DEBUG "Chip support all block unlock\n");
2362 if (this->options & ONENAND_HAS_2PLANE)
2363 printk(KERN_DEBUG "Chip has 2 plane\n");
2134} 2364}
2135 2365
2136/** 2366/**
@@ -2154,7 +2384,7 @@ static void onenand_print_device_info(int device, int version)
2154 (16 << density), 2384 (16 << density),
2155 vcc ? "2.65/3.3" : "1.8", 2385 vcc ? "2.65/3.3" : "1.8",
2156 device); 2386 device);
2157 printk(KERN_DEBUG "OneNAND version = 0x%04x\n", version); 2387 printk(KERN_INFO "OneNAND version = 0x%04x\n", version);
2158} 2388}
2159 2389
2160static const struct onenand_manufacturers onenand_manuf_ids[] = { 2390static const struct onenand_manufacturers onenand_manuf_ids[] = {
@@ -2257,6 +2487,8 @@ static int onenand_probe(struct mtd_info *mtd)
2257 this->erase_shift = ffs(mtd->erasesize) - 1; 2487 this->erase_shift = ffs(mtd->erasesize) - 1;
2258 this->page_shift = ffs(mtd->writesize) - 1; 2488 this->page_shift = ffs(mtd->writesize) - 1;
2259 this->page_mask = (1 << (this->erase_shift - this->page_shift)) - 1; 2489 this->page_mask = (1 << (this->erase_shift - this->page_shift)) - 1;
2490 /* It's real page size */
2491 this->writesize = mtd->writesize;
2260 2492
2261 /* REVIST: Multichip handling */ 2493 /* REVIST: Multichip handling */
2262 2494
@@ -2265,6 +2497,17 @@ static int onenand_probe(struct mtd_info *mtd)
2265 /* Check OneNAND features */ 2497 /* Check OneNAND features */
2266 onenand_check_features(mtd); 2498 onenand_check_features(mtd);
2267 2499
2500 /*
2501 * We emulate the 4KiB page and 256KiB erase block size
2502 * But oobsize is still 64 bytes.
2503 * It is only valid if you turn on 2X program support,
2504 * Otherwise it will be ignored by compiler.
2505 */
2506 if (ONENAND_IS_2PLANE(this)) {
2507 mtd->writesize <<= 1;
2508 mtd->erasesize <<= 1;
2509 }
2510
2268 return 0; 2511 return 0;
2269} 2512}
2270 2513
diff --git a/drivers/mtd/onenand/onenand_sim.c b/drivers/mtd/onenand/onenand_sim.c
new file mode 100644
index 000000000000..0d89ad5776fa
--- /dev/null
+++ b/drivers/mtd/onenand/onenand_sim.c
@@ -0,0 +1,495 @@
1/*
2 * linux/drivers/mtd/onenand/onenand_sim.c
3 *
4 * The OneNAND simulator
5 *
6 * Copyright © 2005-2007 Samsung Electronics
7 * Kyungmin Park <kyungmin.park@samsung.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/vmalloc.h>
18#include <linux/mtd/mtd.h>
19#include <linux/mtd/partitions.h>
20#include <linux/mtd/onenand.h>
21
22#include <linux/io.h>
23
24#ifndef CONFIG_ONENAND_SIM_MANUFACTURER
25#define CONFIG_ONENAND_SIM_MANUFACTURER 0xec
26#endif
27#ifndef CONFIG_ONENAND_SIM_DEVICE_ID
28#define CONFIG_ONENAND_SIM_DEVICE_ID 0x04
29#endif
30#ifndef CONFIG_ONENAND_SIM_VERSION_ID
31#define CONFIG_ONENAND_SIM_VERSION_ID 0x1e
32#endif
33
34static int manuf_id = CONFIG_ONENAND_SIM_MANUFACTURER;
35static int device_id = CONFIG_ONENAND_SIM_DEVICE_ID;
36static int version_id = CONFIG_ONENAND_SIM_VERSION_ID;
37
38struct onenand_flash {
39 void __iomem *base;
40 void __iomem *data;
41};
42
43#define ONENAND_CORE(flash) (flash->data)
44#define ONENAND_CORE_SPARE(flash, this, offset) \
45 ((flash->data) + (this->chipsize) + (offset >> 5))
46
47#define ONENAND_MAIN_AREA(this, offset) \
48 (this->base + ONENAND_DATARAM + offset)
49
50#define ONENAND_SPARE_AREA(this, offset) \
51 (this->base + ONENAND_SPARERAM + offset)
52
53#define ONENAND_GET_WP_STATUS(this) \
54 (readw(this->base + ONENAND_REG_WP_STATUS))
55
56#define ONENAND_SET_WP_STATUS(v, this) \
57 (writew(v, this->base + ONENAND_REG_WP_STATUS))
58
59/* It has all 0xff chars */
60#define MAX_ONENAND_PAGESIZE (2048 + 64)
61static unsigned char *ffchars;
62
63static struct mtd_partition os_partitions[] = {
64 {
65 .name = "OneNAND simulator partition",
66 .offset = 0,
67 .size = MTDPART_SIZ_FULL,
68 },
69};
70
71/*
72 * OneNAND simulator mtd
73 */
74struct onenand_info {
75 struct mtd_info mtd;
76 struct mtd_partition *parts;
77 struct onenand_chip onenand;
78 struct onenand_flash flash;
79};
80
81static struct onenand_info *info;
82
83#define DPRINTK(format, args...) \
84do { \
85 printk(KERN_DEBUG "%s[%d]: " format "\n", __func__, \
86 __LINE__, ##args); \
87} while (0)
88
89/**
90 * onenand_lock_handle - Handle Lock scheme
91 * @param this OneNAND device structure
92 * @param cmd The command to be sent
93 *
94 * Send lock command to OneNAND device.
95 * The lock scheme is depends on chip type.
96 */
97static void onenand_lock_handle(struct onenand_chip *this, int cmd)
98{
99 int block_lock_scheme;
100 int status;
101
102 status = ONENAND_GET_WP_STATUS(this);
103 block_lock_scheme = !(this->options & ONENAND_HAS_CONT_LOCK);
104
105 switch (cmd) {
106 case ONENAND_CMD_UNLOCK:
107 if (block_lock_scheme)
108 ONENAND_SET_WP_STATUS(ONENAND_WP_US, this);
109 else
110 ONENAND_SET_WP_STATUS(status | ONENAND_WP_US, this);
111 break;
112
113 case ONENAND_CMD_LOCK:
114 if (block_lock_scheme)
115 ONENAND_SET_WP_STATUS(ONENAND_WP_LS, this);
116 else
117 ONENAND_SET_WP_STATUS(status | ONENAND_WP_LS, this);
118 break;
119
120 case ONENAND_CMD_LOCK_TIGHT:
121 if (block_lock_scheme)
122 ONENAND_SET_WP_STATUS(ONENAND_WP_LTS, this);
123 else
124 ONENAND_SET_WP_STATUS(status | ONENAND_WP_LTS, this);
125 break;
126
127 default:
128 break;
129 }
130}
131
132/**
133 * onenand_bootram_handle - Handle BootRAM area
134 * @param this OneNAND device structure
135 * @param cmd The command to be sent
136 *
137 * Emulate BootRAM area. It is possible to do basic operation using BootRAM.
138 */
139static void onenand_bootram_handle(struct onenand_chip *this, int cmd)
140{
141 switch (cmd) {
142 case ONENAND_CMD_READID:
143 writew(manuf_id, this->base);
144 writew(device_id, this->base + 2);
145 writew(version_id, this->base + 4);
146 break;
147
148 default:
149 /* REVIST: Handle other commands */
150 break;
151 }
152}
153
154/**
155 * onenand_update_interrupt - Set interrupt register
156 * @param this OneNAND device structure
157 * @param cmd The command to be sent
158 *
159 * Update interrupt register. The status is depends on command.
160 */
161static void onenand_update_interrupt(struct onenand_chip *this, int cmd)
162{
163 int interrupt = ONENAND_INT_MASTER;
164
165 switch (cmd) {
166 case ONENAND_CMD_READ:
167 case ONENAND_CMD_READOOB:
168 interrupt |= ONENAND_INT_READ;
169 break;
170
171 case ONENAND_CMD_PROG:
172 case ONENAND_CMD_PROGOOB:
173 interrupt |= ONENAND_INT_WRITE;
174 break;
175
176 case ONENAND_CMD_ERASE:
177 interrupt |= ONENAND_INT_ERASE;
178 break;
179
180 case ONENAND_CMD_RESET:
181 interrupt |= ONENAND_INT_RESET;
182 break;
183
184 default:
185 break;
186 }
187
188 writew(interrupt, this->base + ONENAND_REG_INTERRUPT);
189}
190
191/**
192 * onenand_check_overwrite - Check over-write if happend
193 * @param dest The destination pointer
194 * @param src The source pointer
195 * @param count The length to be check
196 * @return 0 on same, otherwise 1
197 *
198 * Compare the source with destination
199 */
200static int onenand_check_overwrite(void *dest, void *src, size_t count)
201{
202 unsigned int *s = (unsigned int *) src;
203 unsigned int *d = (unsigned int *) dest;
204 int i;
205
206 count >>= 2;
207 for (i = 0; i < count; i++)
208 if ((*s++ ^ *d++) != 0)
209 return 1;
210
211 return 0;
212}
213
214/**
215 * onenand_data_handle - Handle OneNAND Core and DataRAM
216 * @param this OneNAND device structure
217 * @param cmd The command to be sent
218 * @param dataram Which dataram used
219 * @param offset The offset to OneNAND Core
220 *
221 * Copy data from OneNAND Core to DataRAM (read)
222 * Copy data from DataRAM to OneNAND Core (write)
223 * Erase the OneNAND Core (erase)
224 */
225static void onenand_data_handle(struct onenand_chip *this, int cmd,
226 int dataram, unsigned int offset)
227{
228 struct mtd_info *mtd = &info->mtd;
229 struct onenand_flash *flash = this->priv;
230 int main_offset, spare_offset;
231 void __iomem *src;
232 void __iomem *dest;
233 unsigned int i;
234
235 if (dataram) {
236 main_offset = mtd->writesize;
237 spare_offset = mtd->oobsize;
238 } else {
239 main_offset = 0;
240 spare_offset = 0;
241 }
242
243 switch (cmd) {
244 case ONENAND_CMD_READ:
245 src = ONENAND_CORE(flash) + offset;
246 dest = ONENAND_MAIN_AREA(this, main_offset);
247 memcpy(dest, src, mtd->writesize);
248 /* Fall through */
249
250 case ONENAND_CMD_READOOB:
251 src = ONENAND_CORE_SPARE(flash, this, offset);
252 dest = ONENAND_SPARE_AREA(this, spare_offset);
253 memcpy(dest, src, mtd->oobsize);
254 break;
255
256 case ONENAND_CMD_PROG:
257 src = ONENAND_MAIN_AREA(this, main_offset);
258 dest = ONENAND_CORE(flash) + offset;
259 /* To handle partial write */
260 for (i = 0; i < (1 << mtd->subpage_sft); i++) {
261 int off = i * this->subpagesize;
262 if (!memcmp(src + off, ffchars, this->subpagesize))
263 continue;
264 if (memcmp(dest + off, ffchars, this->subpagesize) &&
265 onenand_check_overwrite(dest + off, src + off, this->subpagesize))
266 printk(KERN_ERR "over-write happend at 0x%08x\n", offset);
267 memcpy(dest + off, src + off, this->subpagesize);
268 }
269 /* Fall through */
270
271 case ONENAND_CMD_PROGOOB:
272 src = ONENAND_SPARE_AREA(this, spare_offset);
273 /* Check all data is 0xff chars */
274 if (!memcmp(src, ffchars, mtd->oobsize))
275 break;
276
277 dest = ONENAND_CORE_SPARE(flash, this, offset);
278 if (memcmp(dest, ffchars, mtd->oobsize) &&
279 onenand_check_overwrite(dest, src, mtd->oobsize))
280 printk(KERN_ERR "OOB: over-write happend at 0x%08x\n",
281 offset);
282 memcpy(dest, src, mtd->oobsize);
283 break;
284
285 case ONENAND_CMD_ERASE:
286 memset(ONENAND_CORE(flash) + offset, 0xff, mtd->erasesize);
287 memset(ONENAND_CORE_SPARE(flash, this, offset), 0xff,
288 (mtd->erasesize >> 5));
289 break;
290
291 default:
292 break;
293 }
294}
295
296/**
297 * onenand_command_handle - Handle command
298 * @param this OneNAND device structure
299 * @param cmd The command to be sent
300 *
301 * Emulate OneNAND command.
302 */
303static void onenand_command_handle(struct onenand_chip *this, int cmd)
304{
305 unsigned long offset = 0;
306 int block = -1, page = -1, bufferram = -1;
307 int dataram = 0;
308
309 switch (cmd) {
310 case ONENAND_CMD_UNLOCK:
311 case ONENAND_CMD_LOCK:
312 case ONENAND_CMD_LOCK_TIGHT:
313 case ONENAND_CMD_UNLOCK_ALL:
314 onenand_lock_handle(this, cmd);
315 break;
316
317 case ONENAND_CMD_BUFFERRAM:
318 /* Do nothing */
319 return;
320
321 default:
322 block = (int) readw(this->base + ONENAND_REG_START_ADDRESS1);
323 if (block & (1 << ONENAND_DDP_SHIFT)) {
324 block &= ~(1 << ONENAND_DDP_SHIFT);
325 /* The half of chip block */
326 block += this->chipsize >> (this->erase_shift + 1);
327 }
328 if (cmd == ONENAND_CMD_ERASE)
329 break;
330
331 page = (int) readw(this->base + ONENAND_REG_START_ADDRESS8);
332 page = (page >> ONENAND_FPA_SHIFT);
333 bufferram = (int) readw(this->base + ONENAND_REG_START_BUFFER);
334 bufferram >>= ONENAND_BSA_SHIFT;
335 bufferram &= ONENAND_BSA_DATARAM1;
336 dataram = (bufferram == ONENAND_BSA_DATARAM1) ? 1 : 0;
337 break;
338 }
339
340 if (block != -1)
341 offset += block << this->erase_shift;
342
343 if (page != -1)
344 offset += page << this->page_shift;
345
346 onenand_data_handle(this, cmd, dataram, offset);
347
348 onenand_update_interrupt(this, cmd);
349}
350
351/**
352 * onenand_writew - [OneNAND Interface] Emulate write operation
353 * @param value value to write
354 * @param addr address to write
355 *
356 * Write OneNAND register with value
357 */
358static void onenand_writew(unsigned short value, void __iomem * addr)
359{
360 struct onenand_chip *this = info->mtd.priv;
361
362 /* BootRAM handling */
363 if (addr < this->base + ONENAND_DATARAM) {
364 onenand_bootram_handle(this, value);
365 return;
366 }
367 /* Command handling */
368 if (addr == this->base + ONENAND_REG_COMMAND)
369 onenand_command_handle(this, value);
370
371 writew(value, addr);
372}
373
374/**
375 * flash_init - Initialize OneNAND simulator
376 * @param flash OneNAND simulaotr data strucutres
377 *
378 * Initialize OneNAND simulator.
379 */
380static int __init flash_init(struct onenand_flash *flash)
381{
382 int density, size;
383 int buffer_size;
384
385 flash->base = kzalloc(131072, GFP_KERNEL);
386 if (!flash->base) {
387 printk(KERN_ERR "Unable to allocate base address.\n");
388 return -ENOMEM;
389 }
390
391 density = device_id >> ONENAND_DEVICE_DENSITY_SHIFT;
392 size = ((16 << 20) << density);
393
394 ONENAND_CORE(flash) = vmalloc(size + (size >> 5));
395 if (!ONENAND_CORE(flash)) {
396 printk(KERN_ERR "Unable to allocate nand core address.\n");
397 kfree(flash->base);
398 return -ENOMEM;
399 }
400
401 memset(ONENAND_CORE(flash), 0xff, size + (size >> 5));
402
403 /* Setup registers */
404 writew(manuf_id, flash->base + ONENAND_REG_MANUFACTURER_ID);
405 writew(device_id, flash->base + ONENAND_REG_DEVICE_ID);
406 writew(version_id, flash->base + ONENAND_REG_VERSION_ID);
407
408 if (density < 2)
409 buffer_size = 0x0400; /* 1KiB page */
410 else
411 buffer_size = 0x0800; /* 2KiB page */
412 writew(buffer_size, flash->base + ONENAND_REG_DATA_BUFFER_SIZE);
413
414 return 0;
415}
416
417/**
418 * flash_exit - Clean up OneNAND simulator
419 * @param flash OneNAND simulaotr data strucutres
420 *
421 * Clean up OneNAND simulator.
422 */
423static void flash_exit(struct onenand_flash *flash)
424{
425 vfree(ONENAND_CORE(flash));
426 kfree(flash->base);
427 kfree(flash);
428}
429
430static int __init onenand_sim_init(void)
431{
432 /* Allocate all 0xff chars pointer */
433 ffchars = kmalloc(MAX_ONENAND_PAGESIZE, GFP_KERNEL);
434 if (!ffchars) {
435 printk(KERN_ERR "Unable to allocate ff chars.\n");
436 return -ENOMEM;
437 }
438 memset(ffchars, 0xff, MAX_ONENAND_PAGESIZE);
439
440 /* Allocate OneNAND simulator mtd pointer */
441 info = kzalloc(sizeof(struct onenand_info), GFP_KERNEL);
442 if (!info) {
443 printk(KERN_ERR "Unable to allocate core structures.\n");
444 kfree(ffchars);
445 return -ENOMEM;
446 }
447
448 /* Override write_word function */
449 info->onenand.write_word = onenand_writew;
450
451 if (flash_init(&info->flash)) {
452 printk(KERN_ERR "Unable to allocat flash.\n");
453 kfree(ffchars);
454 kfree(info);
455 return -ENOMEM;
456 }
457
458 info->parts = os_partitions;
459
460 info->onenand.base = info->flash.base;
461 info->onenand.priv = &info->flash;
462
463 info->mtd.name = "OneNAND simulator";
464 info->mtd.priv = &info->onenand;
465 info->mtd.owner = THIS_MODULE;
466
467 if (onenand_scan(&info->mtd, 1)) {
468 flash_exit(&info->flash);
469 kfree(ffchars);
470 kfree(info);
471 return -ENXIO;
472 }
473
474 add_mtd_partitions(&info->mtd, info->parts, ARRAY_SIZE(os_partitions));
475
476 return 0;
477}
478
479static void __exit onenand_sim_exit(void)
480{
481 struct onenand_chip *this = info->mtd.priv;
482 struct onenand_flash *flash = this->priv;
483
484 onenand_release(&info->mtd);
485 flash_exit(flash);
486 kfree(ffchars);
487 kfree(info);
488}
489
490module_init(onenand_sim_init);
491module_exit(onenand_sim_exit);
492
493MODULE_AUTHOR("Kyungmin Park <kyungmin.park@samsung.com>");
494MODULE_DESCRIPTION("The OneNAND flash simulator");
495MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 006c03aacb55..823fba4e6d2f 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -779,10 +779,8 @@ static void rfd_ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
779 else { 779 else {
780 if (!mtd->erasesize) { 780 if (!mtd->erasesize) {
781 printk(KERN_WARNING PREFIX "please provide block_size"); 781 printk(KERN_WARNING PREFIX "please provide block_size");
782 kfree(part); 782 goto out;
783 return; 783 } else
784 }
785 else
786 part->block_size = mtd->erasesize; 784 part->block_size = mtd->erasesize;
787 } 785 }
788 786
@@ -804,7 +802,7 @@ static void rfd_ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
804 if (!add_mtd_blktrans_dev((void*)part)) 802 if (!add_mtd_blktrans_dev((void*)part))
805 return; 803 return;
806 } 804 }
807 805out:
808 kfree(part); 806 kfree(part);
809} 807}
810 808
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 94ee54934411..29c41eeb09fe 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -1314,11 +1314,10 @@ static int paranoid_check_si(const struct ubi_device *ubi,
1314 * Make sure that all the physical eraseblocks are in one of the lists 1314 * Make sure that all the physical eraseblocks are in one of the lists
1315 * or trees. 1315 * or trees.
1316 */ 1316 */
1317 buf = kmalloc(ubi->peb_count, GFP_KERNEL); 1317 buf = kzalloc(ubi->peb_count, GFP_KERNEL);
1318 if (!buf) 1318 if (!buf)
1319 return -ENOMEM; 1319 return -ENOMEM;
1320 1320
1321 memset(buf, 1, ubi->peb_count);
1322 for (pnum = 0; pnum < ubi->peb_count; pnum++) { 1321 for (pnum = 0; pnum < ubi->peb_count; pnum++) {
1323 err = ubi_io_is_bad(ubi, pnum); 1322 err = ubi_io_is_bad(ubi, pnum);
1324 if (err < 0) { 1323 if (err < 0) {
@@ -1326,28 +1325,28 @@ static int paranoid_check_si(const struct ubi_device *ubi,
1326 return err; 1325 return err;
1327 } 1326 }
1328 else if (err) 1327 else if (err)
1329 buf[pnum] = 0; 1328 buf[pnum] = 1;
1330 } 1329 }
1331 1330
1332 ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) 1331 ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb)
1333 ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) 1332 ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb)
1334 buf[seb->pnum] = 0; 1333 buf[seb->pnum] = 1;
1335 1334
1336 list_for_each_entry(seb, &si->free, u.list) 1335 list_for_each_entry(seb, &si->free, u.list)
1337 buf[seb->pnum] = 0; 1336 buf[seb->pnum] = 1;
1338 1337
1339 list_for_each_entry(seb, &si->corr, u.list) 1338 list_for_each_entry(seb, &si->corr, u.list)
1340 buf[seb->pnum] = 0; 1339 buf[seb->pnum] = 1;
1341 1340
1342 list_for_each_entry(seb, &si->erase, u.list) 1341 list_for_each_entry(seb, &si->erase, u.list)
1343 buf[seb->pnum] = 0; 1342 buf[seb->pnum] = 1;
1344 1343
1345 list_for_each_entry(seb, &si->alien, u.list) 1344 list_for_each_entry(seb, &si->alien, u.list)
1346 buf[seb->pnum] = 0; 1345 buf[seb->pnum] = 1;
1347 1346
1348 err = 0; 1347 err = 0;
1349 for (pnum = 0; pnum < ubi->peb_count; pnum++) 1348 for (pnum = 0; pnum < ubi->peb_count; pnum++)
1350 if (buf[pnum]) { 1349 if (!buf[pnum]) {
1351 ubi_err("PEB %d is not referred", pnum); 1350 ubi_err("PEB %d is not referred", pnum);
1352 err = 1; 1351 err = 1;
1353 } 1352 }
diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c
index ebf1a3a88e15..b74dbeef8050 100644
--- a/drivers/net/atarilance.c
+++ b/drivers/net/atarilance.c
@@ -1023,7 +1023,7 @@ static int lance_rx( struct net_device *dev )
1023 DECLARE_MAC_BUF(mac); 1023 DECLARE_MAC_BUF(mac);
1024 DECLARE_MAC_BUF(mac2); 1024 DECLARE_MAC_BUF(mac2);
1025 1025
1026 printk(KERN_DEBUG "%s: RX pkt type 0x%04x from %s to %s ", 1026 printk(KERN_DEBUG "%s: RX pkt type 0x%04x from %s to %s "
1027 "data %02x %02x %02x %02x %02x %02x %02x %02x " 1027 "data %02x %02x %02x %02x %02x %02x %02x %02x "
1028 "len %d\n", 1028 "len %d\n",
1029 dev->name, ((u_short *)data)[6], 1029 dev->name, ((u_short *)data)[6],
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 6589239b79ee..18770527df99 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -538,8 +538,9 @@ static void mace_set_multicast(struct net_device *dev)
538 local_irq_restore(flags); 538 local_irq_restore(flags);
539} 539}
540 540
541static void mace_handle_misc_intrs(struct mace_data *mp, int intr) 541static void mace_handle_misc_intrs(struct net_device *dev, int intr)
542{ 542{
543 struct mace_data *mp = netdev_priv(dev);
543 volatile struct mace *mb = mp->mace; 544 volatile struct mace *mb = mp->mace;
544 static int mace_babbles, mace_jabbers; 545 static int mace_babbles, mace_jabbers;
545 546
@@ -571,7 +572,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
571 local_irq_save(flags); 572 local_irq_save(flags);
572 573
573 intr = mb->ir; /* read interrupt register */ 574 intr = mb->ir; /* read interrupt register */
574 mace_handle_misc_intrs(mp, intr); 575 mace_handle_misc_intrs(dev, intr);
575 576
576 if (intr & XMTINT) { 577 if (intr & XMTINT) {
577 fs = mb->xmtfs; 578 fs = mb->xmtfs;
@@ -645,7 +646,6 @@ static void mace_tx_timeout(struct net_device *dev)
645 646
646static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf) 647static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
647{ 648{
648 struct mace_data *mp = netdev_priv(dev);
649 struct sk_buff *skb; 649 struct sk_buff *skb;
650 unsigned int frame_status = mf->rcvsts; 650 unsigned int frame_status = mf->rcvsts;
651 651
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index b33d21f4efff..84f2d6382f1e 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -784,7 +784,6 @@ static int mv643xx_eth_open(struct net_device *dev)
784 unsigned int port_num = mp->port_num; 784 unsigned int port_num = mp->port_num;
785 unsigned int size; 785 unsigned int size;
786 int err; 786 int err;
787 DECLARE_MAC_BUF(mac);
788 787
789 /* Clear any pending ethernet port interrupts */ 788 /* Clear any pending ethernet port interrupts */
790 mv_write(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num), 0); 789 mv_write(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num), 0);
@@ -1296,6 +1295,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
1296 struct ethtool_cmd cmd; 1295 struct ethtool_cmd cmd;
1297 int duplex = DUPLEX_HALF; 1296 int duplex = DUPLEX_HALF;
1298 int speed = 0; /* default to auto-negotiation */ 1297 int speed = 0; /* default to auto-negotiation */
1298 DECLARE_MAC_BUF(mac);
1299 1299
1300 pd = pdev->dev.platform_data; 1300 pd = pdev->dev.platform_data;
1301 if (pd == NULL) { 1301 if (pd == NULL) {
diff --git a/drivers/net/mvme147.c b/drivers/net/mvme147.c
index 86c9c06433cb..06ca4252155f 100644
--- a/drivers/net/mvme147.c
+++ b/drivers/net/mvme147.c
@@ -85,7 +85,6 @@ struct net_device * __init mvme147lance_probe(int unit)
85 dev->open = &m147lance_open; 85 dev->open = &m147lance_open;
86 dev->stop = &m147lance_close; 86 dev->stop = &m147lance_close;
87 dev->hard_start_xmit = &lance_start_xmit; 87 dev->hard_start_xmit = &lance_start_xmit;
88 dev->get_stats = &lance_get_stats;
89 dev->set_multicast_list = &lance_set_multicast; 88 dev->set_multicast_list = &lance_set_multicast;
90 dev->tx_timeout = &lance_tx_timeout; 89 dev->tx_timeout = &lance_tx_timeout;
91 dev->dma = 0; 90 dev->dma = 0;
diff --git a/drivers/net/wireless/b43/phy.c b/drivers/net/wireless/b43/phy.c
index 5f7ffa0a76c0..3d4ed647c311 100644
--- a/drivers/net/wireless/b43/phy.c
+++ b/drivers/net/wireless/b43/phy.c
@@ -26,6 +26,7 @@
26*/ 26*/
27 27
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/io.h>
29#include <linux/types.h> 30#include <linux/types.h>
30 31
31#include "b43.h" 32#include "b43.h"
diff --git a/drivers/net/wireless/b43/pio.h b/drivers/net/wireless/b43/pio.h
index 34a44c1b6314..3488f2447bbf 100644
--- a/drivers/net/wireless/b43/pio.h
+++ b/drivers/net/wireless/b43/pio.h
@@ -4,6 +4,7 @@
4#include "b43.h" 4#include "b43.h"
5 5
6#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/io.h>
7#include <linux/list.h> 8#include <linux/list.h>
8#include <linux/skbuff.h> 9#include <linux/skbuff.h>
9 10
diff --git a/drivers/net/wireless/b43/sysfs.c b/drivers/net/wireless/b43/sysfs.c
index fcb777383e70..f4faff6a7d6c 100644
--- a/drivers/net/wireless/b43/sysfs.c
+++ b/drivers/net/wireless/b43/sysfs.c
@@ -23,13 +23,14 @@
23 23
24*/ 24*/
25 25
26#include <linux/capability.h>
27#include <linux/io.h>
28
26#include "b43.h" 29#include "b43.h"
27#include "sysfs.h" 30#include "sysfs.h"
28#include "main.h" 31#include "main.h"
29#include "phy.h" 32#include "phy.h"
30 33
31#include <linux/capability.h>
32
33#define GENERIC_FILESIZE 64 34#define GENERIC_FILESIZE 64
34 35
35static int get_integer(const char *buf, size_t count) 36static int get_integer(const char *buf, size_t count)
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 93ee05eeaeba..78277a118b67 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * SuperH On-Chip RTC Support 2 * SuperH On-Chip RTC Support
3 * 3 *
4 * Copyright (C) 2006 Paul Mundt 4 * Copyright (C) 2006, 2007 Paul Mundt
5 * Copyright (C) 2006 Jamie Lenehan 5 * Copyright (C) 2006 Jamie Lenehan
6 * 6 *
7 * Based on the old arch/sh/kernel/cpu/rtc.c by: 7 * Based on the old arch/sh/kernel/cpu/rtc.c by:
@@ -23,16 +23,19 @@
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/io.h> 25#include <linux/io.h>
26#include <asm/rtc.h>
26 27
27#define DRV_NAME "sh-rtc" 28#define DRV_NAME "sh-rtc"
28#define DRV_VERSION "0.1.2" 29#define DRV_VERSION "0.1.3"
29 30
30#ifdef CONFIG_CPU_SH3 31#ifdef CONFIG_CPU_SH3
31#define rtc_reg_size sizeof(u16) 32#define rtc_reg_size sizeof(u16)
32#define RTC_BIT_INVERTED 0 /* No bug on SH7708, SH7709A */ 33#define RTC_BIT_INVERTED 0 /* No bug on SH7708, SH7709A */
34#define RTC_DEF_CAPABILITIES 0UL
33#elif defined(CONFIG_CPU_SH4) 35#elif defined(CONFIG_CPU_SH4)
34#define rtc_reg_size sizeof(u32) 36#define rtc_reg_size sizeof(u32)
35#define RTC_BIT_INVERTED 0x40 /* bug on SH7750, SH7750S */ 37#define RTC_BIT_INVERTED 0x40 /* bug on SH7750, SH7750S */
38#define RTC_DEF_CAPABILITIES RTC_CAP_4_DIGIT_YEAR
36#endif 39#endif
37 40
38#define RTC_REG(r) ((r) * rtc_reg_size) 41#define RTC_REG(r) ((r) * rtc_reg_size)
@@ -80,6 +83,7 @@ struct sh_rtc {
80 struct rtc_device *rtc_dev; 83 struct rtc_device *rtc_dev;
81 spinlock_t lock; 84 spinlock_t lock;
82 int rearm_aie; 85 int rearm_aie;
86 unsigned long capabilities; /* See asm-sh/rtc.h for cap bits */
83}; 87};
84 88
85static irqreturn_t sh_rtc_interrupt(int irq, void *dev_id) 89static irqreturn_t sh_rtc_interrupt(int irq, void *dev_id)
@@ -319,14 +323,14 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
319 tm->tm_mday = BCD2BIN(readb(rtc->regbase + RDAYCNT)); 323 tm->tm_mday = BCD2BIN(readb(rtc->regbase + RDAYCNT));
320 tm->tm_mon = BCD2BIN(readb(rtc->regbase + RMONCNT)) - 1; 324 tm->tm_mon = BCD2BIN(readb(rtc->regbase + RMONCNT)) - 1;
321 325
322#if defined(CONFIG_CPU_SH4) 326 if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
323 yr = readw(rtc->regbase + RYRCNT); 327 yr = readw(rtc->regbase + RYRCNT);
324 yr100 = BCD2BIN(yr >> 8); 328 yr100 = BCD2BIN(yr >> 8);
325 yr &= 0xff; 329 yr &= 0xff;
326#else 330 } else {
327 yr = readb(rtc->regbase + RYRCNT); 331 yr = readb(rtc->regbase + RYRCNT);
328 yr100 = BCD2BIN((yr == 0x99) ? 0x19 : 0x20); 332 yr100 = BCD2BIN((yr == 0x99) ? 0x19 : 0x20);
329#endif 333 }
330 334
331 tm->tm_year = (yr100 * 100 + BCD2BIN(yr)) - 1900; 335 tm->tm_year = (yr100 * 100 + BCD2BIN(yr)) - 1900;
332 336
@@ -375,14 +379,14 @@ static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
375 writeb(BIN2BCD(tm->tm_mday), rtc->regbase + RDAYCNT); 379 writeb(BIN2BCD(tm->tm_mday), rtc->regbase + RDAYCNT);
376 writeb(BIN2BCD(tm->tm_mon + 1), rtc->regbase + RMONCNT); 380 writeb(BIN2BCD(tm->tm_mon + 1), rtc->regbase + RMONCNT);
377 381
378#ifdef CONFIG_CPU_SH3 382 if (rtc->capabilities & RTC_CAP_4_DIGIT_YEAR) {
379 year = tm->tm_year % 100; 383 year = (BIN2BCD((tm->tm_year + 1900) / 100) << 8) |
380 writeb(BIN2BCD(year), rtc->regbase + RYRCNT); 384 BIN2BCD(tm->tm_year % 100);
381#else 385 writew(year, rtc->regbase + RYRCNT);
382 year = (BIN2BCD((tm->tm_year + 1900) / 100) << 8) | 386 } else {
383 BIN2BCD(tm->tm_year % 100); 387 year = tm->tm_year % 100;
384 writew(year, rtc->regbase + RYRCNT); 388 writeb(BIN2BCD(year), rtc->regbase + RYRCNT);
385#endif 389 }
386 390
387 /* Start RTC */ 391 /* Start RTC */
388 tmp = readb(rtc->regbase + RCR2); 392 tmp = readb(rtc->regbase + RCR2);
@@ -589,6 +593,17 @@ static int __devinit sh_rtc_probe(struct platform_device *pdev)
589 goto err_badmap; 593 goto err_badmap;
590 } 594 }
591 595
596 rtc->capabilities = RTC_DEF_CAPABILITIES;
597 if (pdev->dev.platform_data) {
598 struct sh_rtc_platform_info *pinfo = pdev->dev.platform_data;
599
600 /*
601 * Some CPUs have special capabilities in addition to the
602 * default set. Add those in here.
603 */
604 rtc->capabilities |= pinfo->capabilities;
605 }
606
592 platform_set_drvdata(pdev, rtc); 607 platform_set_drvdata(pdev, rtc);
593 608
594 return 0; 609 return 0;
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 053fca41b08a..73440e26834b 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -4,6 +4,7 @@
4 * SuperH on-chip serial module support. (SCI with no FIFO / with FIFO) 4 * SuperH on-chip serial module support. (SCI with no FIFO / with FIFO)
5 * 5 *
6 * Copyright (C) 2002 - 2006 Paul Mundt 6 * Copyright (C) 2002 - 2006 Paul Mundt
7 * Modified to support SH7720 SCIF. Markus Brunner, Mark Jonas (Jul 2007).
7 * 8 *
8 * based off of the old drivers/char/sh-sci.c by: 9 * based off of the old drivers/char/sh-sci.c by:
9 * 10 *
@@ -301,6 +302,38 @@ static void sci_init_pins_scif(struct uart_port* port, unsigned int cflag)
301 } 302 }
302 sci_out(port, SCFCR, fcr_val); 303 sci_out(port, SCFCR, fcr_val);
303} 304}
305#elif defined(CONFIG_CPU_SUBTYPE_SH7720)
306static void sci_init_pins_scif(struct uart_port *port, unsigned int cflag)
307{
308 unsigned int fcr_val = 0;
309 unsigned short data;
310
311 if (cflag & CRTSCTS) {
312 /* enable RTS/CTS */
313 if (port->mapbase == 0xa4430000) { /* SCIF0 */
314 /* Clear PTCR bit 9-2; enable all scif pins but sck */
315 data = ctrl_inw(PORT_PTCR);
316 ctrl_outw((data & 0xfc03), PORT_PTCR);
317 } else if (port->mapbase == 0xa4438000) { /* SCIF1 */
318 /* Clear PVCR bit 9-2 */
319 data = ctrl_inw(PORT_PVCR);
320 ctrl_outw((data & 0xfc03), PORT_PVCR);
321 }
322 fcr_val |= SCFCR_MCE;
323 } else {
324 if (port->mapbase == 0xa4430000) { /* SCIF0 */
325 /* Clear PTCR bit 5-2; enable only tx and rx */
326 data = ctrl_inw(PORT_PTCR);
327 ctrl_outw((data & 0xffc3), PORT_PTCR);
328 } else if (port->mapbase == 0xa4438000) { /* SCIF1 */
329 /* Clear PVCR bit 5-2 */
330 data = ctrl_inw(PORT_PVCR);
331 ctrl_outw((data & 0xffc3), PORT_PVCR);
332 }
333 }
334 sci_out(port, SCFCR, fcr_val);
335}
336
304#elif defined(CONFIG_CPU_SH3) 337#elif defined(CONFIG_CPU_SH3)
305/* For SH7705, SH7706, SH7707, SH7709, SH7709A, SH7729 */ 338/* For SH7705, SH7706, SH7707, SH7709, SH7709A, SH7729 */
306static void sci_init_pins_scif(struct uart_port *port, unsigned int cflag) 339static void sci_init_pins_scif(struct uart_port *port, unsigned int cflag)
@@ -1276,7 +1309,7 @@ static int __init sci_console_init(void)
1276console_initcall(sci_console_init); 1309console_initcall(sci_console_init);
1277#endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */ 1310#endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */
1278 1311
1279#ifdef CONFIG_SH_KGDB 1312#ifdef CONFIG_SH_KGDB_CONSOLE
1280/* 1313/*
1281 * FIXME: Most of this can go away.. at the moment, we rely on 1314 * FIXME: Most of this can go away.. at the moment, we rely on
1282 * arch/sh/kernel/setup.c to do the command line parsing for kgdb, though 1315 * arch/sh/kernel/setup.c to do the command line parsing for kgdb, though
@@ -1334,9 +1367,7 @@ int __init kgdb_console_setup(struct console *co, char *options)
1334 1367
1335 return uart_set_options(port, co, baud, parity, bits, flow); 1368 return uart_set_options(port, co, baud, parity, bits, flow);
1336} 1369}
1337#endif /* CONFIG_SH_KGDB */
1338 1370
1339#ifdef CONFIG_SH_KGDB_CONSOLE
1340static struct console kgdb_console = { 1371static struct console kgdb_console = {
1341 .name = "ttySC", 1372 .name = "ttySC",
1342 .device = uart_console_device, 1373 .device = uart_console_device,
@@ -1432,7 +1463,7 @@ static int __devinit sci_probe(struct platform_device *dev)
1432 1463
1433#ifdef CONFIG_CPU_FREQ 1464#ifdef CONFIG_CPU_FREQ
1434 cpufreq_register_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER); 1465 cpufreq_register_notifier(&sci_nb, CPUFREQ_TRANSITION_NOTIFIER);
1435 dev_info(&dev->dev, "sci: CPU frequency notifier registered\n"); 1466 dev_info(&dev->dev, "CPU frequency notifier registered\n");
1436#endif 1467#endif
1437 1468
1438#ifdef CONFIG_SH_STANDARD_BIOS 1469#ifdef CONFIG_SH_STANDARD_BIOS
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index cf75466ebf57..e89ae29645d6 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -10,19 +10,19 @@
10 * Modified to support SH7300(SH-Mobile) SCIF. Takashi Kusuda (Jun 2003). 10 * Modified to support SH7300(SH-Mobile) SCIF. Takashi Kusuda (Jun 2003).
11 * Modified to support H8/300 Series Yoshinori Sato (Feb 2004). 11 * Modified to support H8/300 Series Yoshinori Sato (Feb 2004).
12 * Removed SH7300 support (Jul 2007). 12 * Removed SH7300 support (Jul 2007).
13 * Modified to support SH7720 SCIF. Markus Brunner, Mark Jonas (Aug 2007).
13 */ 14 */
14#include <linux/serial_core.h> 15#include <linux/serial_core.h>
15#include <asm/io.h> 16#include <asm/io.h>
16 17
17#if defined(__H8300H__) || defined(__H8300S__)
18#include <asm/gpio.h> 18#include <asm/gpio.h>
19
19#if defined(CONFIG_H83007) || defined(CONFIG_H83068) 20#if defined(CONFIG_H83007) || defined(CONFIG_H83068)
20#include <asm/regs306x.h> 21#include <asm/regs306x.h>
21#endif 22#endif
22#if defined(CONFIG_H8S2678) 23#if defined(CONFIG_H8S2678)
23#include <asm/regs267x.h> 24#include <asm/regs267x.h>
24#endif 25#endif
25#endif
26 26
27#if defined(CONFIG_CPU_SUBTYPE_SH7706) || \ 27#if defined(CONFIG_CPU_SUBTYPE_SH7706) || \
28 defined(CONFIG_CPU_SUBTYPE_SH7707) || \ 28 defined(CONFIG_CPU_SUBTYPE_SH7707) || \
@@ -46,6 +46,10 @@
46 */ 46 */
47# define SCSCR_INIT(port) (port->mapbase == SCIF2) ? 0xF3 : 0xF0 47# define SCSCR_INIT(port) (port->mapbase == SCIF2) ? 0xF3 : 0xF0
48# define SCIF_ONLY 48# define SCIF_ONLY
49#elif defined(CONFIG_CPU_SUBTYPE_SH7720)
50# define SCSCR_INIT(port) 0x0030 /* TIE=0,RIE=0,TE=1,RE=1 */
51# define SCIF_ONLY
52#define SCIF_ORER 0x0200 /* overrun error bit */
49#elif defined(CONFIG_SH_RTS7751R2D) 53#elif defined(CONFIG_SH_RTS7751R2D)
50# define SCSPTR2 0xFFE80020 /* 16 bit SCIF */ 54# define SCSPTR2 0xFFE80020 /* 16 bit SCIF */
51# define SCIF_ORER 0x0001 /* overrun error bit */ 55# define SCIF_ORER 0x0001 /* overrun error bit */
@@ -217,7 +221,8 @@
217#define SCIF_RDF 0x0002 /* 7705 SCIF, 7707 SCIF, 7709 SCIF, 7750 SCIF */ 221#define SCIF_RDF 0x0002 /* 7705 SCIF, 7707 SCIF, 7709 SCIF, 7750 SCIF */
218#define SCIF_DR 0x0001 /* 7705 SCIF, 7707 SCIF, 7709 SCIF, 7750 SCIF */ 222#define SCIF_DR 0x0001 /* 7705 SCIF, 7707 SCIF, 7709 SCIF, 7750 SCIF */
219 223
220#if defined(CONFIG_CPU_SUBTYPE_SH7705) 224#if defined(CONFIG_CPU_SUBTYPE_SH7705) || \
225 defined(CONFIG_CPU_SUBTYPE_SH7720)
221#define SCIF_ORER 0x0200 226#define SCIF_ORER 0x0200
222#define SCIF_ERRORS ( SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK | SCIF_ORER) 227#define SCIF_ERRORS ( SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK | SCIF_ORER)
223#define SCIF_RFDC_MASK 0x007f 228#define SCIF_RFDC_MASK 0x007f
@@ -254,7 +259,8 @@
254# define SCxSR_FER(port) SCIF_FER 259# define SCxSR_FER(port) SCIF_FER
255# define SCxSR_PER(port) SCIF_PER 260# define SCxSR_PER(port) SCIF_PER
256# define SCxSR_BRK(port) SCIF_BRK 261# define SCxSR_BRK(port) SCIF_BRK
257#if defined(CONFIG_CPU_SUBTYPE_SH7705) 262#if defined(CONFIG_CPU_SUBTYPE_SH7705) || \
263 defined(CONFIG_CPU_SUBTYPE_SH7720)
258# define SCxSR_RDxF_CLEAR(port) (sci_in(port,SCxSR)&0xfffc) 264# define SCxSR_RDxF_CLEAR(port) (sci_in(port,SCxSR)&0xfffc)
259# define SCxSR_ERROR_CLEAR(port) (sci_in(port,SCxSR)&0xfd73) 265# define SCxSR_ERROR_CLEAR(port) (sci_in(port,SCxSR)&0xfd73)
260# define SCxSR_TDxE_CLEAR(port) (sci_in(port,SCxSR)&0xffdf) 266# define SCxSR_TDxE_CLEAR(port) (sci_in(port,SCxSR)&0xffdf)
@@ -362,7 +368,8 @@
362 CPU_SCIx_FNS(name, sh4_sci_offset, sh4_sci_size, sh4_scif_offset, sh4_scif_size) 368 CPU_SCIx_FNS(name, sh4_sci_offset, sh4_sci_size, sh4_scif_offset, sh4_scif_size)
363#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) \ 369#define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) \
364 CPU_SCIF_FNS(name, sh4_scif_offset, sh4_scif_size) 370 CPU_SCIF_FNS(name, sh4_scif_offset, sh4_scif_size)
365#elif defined(CONFIG_CPU_SUBTYPE_SH7705) 371#elif defined(CONFIG_CPU_SUBTYPE_SH7705) || \
372 defined(CONFIG_CPU_SUBTYPE_SH7720)
366#define SCIF_FNS(name, scif_offset, scif_size) \ 373#define SCIF_FNS(name, scif_offset, scif_size) \
367 CPU_SCIF_FNS(name, scif_offset, scif_size) 374 CPU_SCIF_FNS(name, scif_offset, scif_size)
368#else 375#else
@@ -388,7 +395,8 @@
388 CPU_SCIF_FNS(name, sh4_scif_offset, sh4_scif_size) 395 CPU_SCIF_FNS(name, sh4_scif_offset, sh4_scif_size)
389#endif 396#endif
390 397
391#if defined(CONFIG_CPU_SUBTYPE_SH7705) 398#if defined(CONFIG_CPU_SUBTYPE_SH7705) || \
399 defined(CONFIG_CPU_SUBTYPE_SH7720)
392 400
393SCIF_FNS(SCSMR, 0x00, 16) 401SCIF_FNS(SCSMR, 0x00, 16)
394SCIF_FNS(SCBRR, 0x04, 8) 402SCIF_FNS(SCBRR, 0x04, 8)
@@ -510,7 +518,15 @@ static inline void set_sh771x_scif_pfc(struct uart_port *port)
510 return; 518 return;
511 } 519 }
512} 520}
513 521#elif defined(CONFIG_CPU_SUBTYPE_SH7720)
522static inline int sci_rxd_in(struct uart_port *port)
523{
524 if (port->mapbase == 0xa4430000)
525 return sci_in(port, SCxSR) & 0x0003 ? 1 : 0;
526 else if (port->mapbase == 0xa4438000)
527 return sci_in(port, SCxSR) & 0x0003 ? 1 : 0;
528 return 1;
529}
514#elif defined(CONFIG_CPU_SUBTYPE_SH7750) || \ 530#elif defined(CONFIG_CPU_SUBTYPE_SH7750) || \
515 defined(CONFIG_CPU_SUBTYPE_SH7751) || \ 531 defined(CONFIG_CPU_SUBTYPE_SH7751) || \
516 defined(CONFIG_CPU_SUBTYPE_SH7751R) || \ 532 defined(CONFIG_CPU_SUBTYPE_SH7751R) || \
@@ -653,6 +669,7 @@ static inline int sci_rxd_in(struct uart_port *port)
653 return ctrl_inw(SCSPTR2) & 0x0001 ? 1 : 0; /* SCIF */ 669 return ctrl_inw(SCSPTR2) & 0x0001 ? 1 : 0; /* SCIF */
654 if (port->mapbase == 0xffc60000) 670 if (port->mapbase == 0xffc60000)
655 return ctrl_inw(SCSPTR3) & 0x0001 ? 1 : 0; /* SCIF */ 671 return ctrl_inw(SCSPTR3) & 0x0001 ? 1 : 0; /* SCIF */
672 return 1;
656} 673}
657#endif 674#endif
658 675
@@ -691,7 +708,8 @@ static inline int sci_rxd_in(struct uart_port *port)
691#if defined(CONFIG_CPU_SUBTYPE_SH7780) || \ 708#if defined(CONFIG_CPU_SUBTYPE_SH7780) || \
692 defined(CONFIG_CPU_SUBTYPE_SH7785) 709 defined(CONFIG_CPU_SUBTYPE_SH7785)
693#define SCBRR_VALUE(bps, clk) ((clk+16*bps)/(16*bps)-1) 710#define SCBRR_VALUE(bps, clk) ((clk+16*bps)/(16*bps)-1)
694#elif defined(CONFIG_CPU_SUBTYPE_SH7705) 711#elif defined(CONFIG_CPU_SUBTYPE_SH7705) || \
712 defined(CONFIG_CPU_SUBTYPE_SH7720)
695#define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(32*bps)-1) 713#define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(32*bps)-1)
696#elif defined(__H8300H__) || defined(__H8300S__) 714#elif defined(__H8300H__) || defined(__H8300S__)
697#define SCBRR_VALUE(bps) (((CONFIG_CPU_CLOCK*1000/32)/bps)-1) 715#define SCBRR_VALUE(bps) (((CONFIG_CPU_CLOCK*1000/32)/bps)-1)
diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile
index 8a143894e33f..a96f4a8cfeb8 100644
--- a/drivers/sh/Makefile
+++ b/drivers/sh/Makefile
@@ -2,5 +2,5 @@
2# Makefile for the SuperH specific drivers. 2# Makefile for the SuperH specific drivers.
3# 3#
4 4
5obj-$(CONFIG_SUPERHYWAY) += superhyway/ 5obj-$(CONFIG_SUPERHYWAY) += superhyway/
6 6obj-$(CONFIG_MAPLE) += maple/
diff --git a/drivers/sh/maple/Makefile b/drivers/sh/maple/Makefile
new file mode 100644
index 000000000000..65dfeeb610ef
--- /dev/null
+++ b/drivers/sh/maple/Makefile
@@ -0,0 +1,3 @@
1# Makefile for Maple Bus
2
3obj-$(CONFIG_MAPLE) := maple.o
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
new file mode 100644
index 000000000000..161d1021b7eb
--- /dev/null
+++ b/drivers/sh/maple/maple.c
@@ -0,0 +1,735 @@
1/*
2 * Core maple bus functionality
3 *
4 * Copyright (C) 2007 Adrian McMenamin
5 *
6 * Based on 2.4 code by:
7 *
8 * Copyright (C) 2000-2001 YAEGASHI Takeshi
9 * Copyright (C) 2001 M. R. Brown
10 * Copyright (C) 2001 Paul Mundt
11 *
12 * and others.
13 *
14 * This file is subject to the terms and conditions of the GNU General Public
15 * License. See the file "COPYING" in the main directory of this archive
16 * for more details.
17 */
18#include <linux/init.h>
19#include <linux/kernel.h>
20#include <linux/device.h>
21#include <linux/module.h>
22#include <linux/interrupt.h>
23#include <linux/list.h>
24#include <linux/io.h>
25#include <linux/slab.h>
26#include <linux/maple.h>
27#include <linux/dma-mapping.h>
28#include <asm/cacheflush.h>
29#include <asm/dma.h>
30#include <asm/io.h>
31#include <asm/mach/dma.h>
32#include <asm/mach/sysasic.h>
33#include <asm/mach/maple.h>
34
35MODULE_AUTHOR("Yaegshi Takeshi, Paul Mundt, M.R. Brown, Adrian McMenamin");
36MODULE_DESCRIPTION("Maple bus driver for Dreamcast");
37MODULE_LICENSE("GPL v2");
38MODULE_SUPPORTED_DEVICE("{{SEGA, Dreamcast/Maple}}");
39
40static void maple_dma_handler(struct work_struct *work);
41static void maple_vblank_handler(struct work_struct *work);
42
43static DECLARE_WORK(maple_dma_process, maple_dma_handler);
44static DECLARE_WORK(maple_vblank_process, maple_vblank_handler);
45
46static LIST_HEAD(maple_waitq);
47static LIST_HEAD(maple_sentq);
48
49static DEFINE_MUTEX(maple_list_lock);
50
51static struct maple_driver maple_dummy_driver;
52static struct device maple_bus;
53static int subdevice_map[MAPLE_PORTS];
54static unsigned long *maple_sendbuf, *maple_sendptr, *maple_lastptr;
55static unsigned long maple_pnp_time;
56static int started, scanning, liststatus;
57static struct kmem_cache *maple_queue_cache;
58
59struct maple_device_specify {
60 int port;
61 int unit;
62};
63
64/**
65 * maple_driver_register - register a device driver
66 * automatically makes the driver bus a maple bus
67 * @drv: the driver to be registered
68 */
69int maple_driver_register(struct device_driver *drv)
70{
71 if (!drv)
72 return -EINVAL;
73 drv->bus = &maple_bus_type;
74 return driver_register(drv);
75}
76EXPORT_SYMBOL_GPL(maple_driver_register);
77
78/* set hardware registers to enable next round of dma */
79static void maplebus_dma_reset(void)
80{
81 ctrl_outl(MAPLE_MAGIC, MAPLE_RESET);
82 /* set trig type to 0 for software trigger, 1 for hardware (VBLANK) */
83 ctrl_outl(1, MAPLE_TRIGTYPE);
84 ctrl_outl(MAPLE_2MBPS | MAPLE_TIMEOUT(50000), MAPLE_SPEED);
85 ctrl_outl(PHYSADDR(maple_sendbuf), MAPLE_DMAADDR);
86 ctrl_outl(1, MAPLE_ENABLE);
87}
88
89/**
90 * maple_getcond_callback - setup handling MAPLE_COMMAND_GETCOND
91 * @dev: device responding
92 * @callback: handler callback
93 * @interval: interval in jiffies between callbacks
94 * @function: the function code for the device
95 */
96void maple_getcond_callback(struct maple_device *dev,
97 void (*callback) (struct mapleq * mq),
98 unsigned long interval, unsigned long function)
99{
100 dev->callback = callback;
101 dev->interval = interval;
102 dev->function = cpu_to_be32(function);
103 dev->when = jiffies;
104}
105EXPORT_SYMBOL_GPL(maple_getcond_callback);
106
107static int maple_dma_done(void)
108{
109 return (ctrl_inl(MAPLE_STATE) & 1) == 0;
110}
111
112static void maple_release_device(struct device *dev)
113{
114 if (dev->type) {
115 kfree(dev->type->name);
116 kfree(dev->type);
117 }
118}
119
120/**
121 * maple_add_packet - add a single instruction to the queue
122 * @mq: instruction to add to waiting queue
123 */
124void maple_add_packet(struct mapleq *mq)
125{
126 mutex_lock(&maple_list_lock);
127 list_add(&mq->list, &maple_waitq);
128 mutex_unlock(&maple_list_lock);
129}
130EXPORT_SYMBOL_GPL(maple_add_packet);
131
132static struct mapleq *maple_allocq(struct maple_device *dev)
133{
134 struct mapleq *mq;
135
136 mq = kmalloc(sizeof(*mq), GFP_KERNEL);
137 if (!mq)
138 return NULL;
139
140 mq->dev = dev;
141 mq->recvbufdcsp = kmem_cache_zalloc(maple_queue_cache, GFP_KERNEL);
142 mq->recvbuf = (void *) P2SEGADDR(mq->recvbufdcsp);
143 if (!mq->recvbuf) {
144 kfree(mq);
145 return NULL;
146 }
147
148 return mq;
149}
150
151static struct maple_device *maple_alloc_dev(int port, int unit)
152{
153 struct maple_device *dev;
154
155 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
156 if (!dev)
157 return NULL;
158
159 dev->port = port;
160 dev->unit = unit;
161 dev->mq = maple_allocq(dev);
162
163 if (!dev->mq) {
164 kfree(dev);
165 return NULL;
166 }
167
168 return dev;
169}
170
171static void maple_free_dev(struct maple_device *mdev)
172{
173 if (!mdev)
174 return;
175 if (mdev->mq) {
176 kmem_cache_free(maple_queue_cache, mdev->mq->recvbufdcsp);
177 kfree(mdev->mq);
178 }
179 kfree(mdev);
180}
181
182/* process the command queue into a maple command block
183 * terminating command has bit 32 of first long set to 0
184 */
185static void maple_build_block(struct mapleq *mq)
186{
187 int port, unit, from, to, len;
188 unsigned long *lsendbuf = mq->sendbuf;
189
190 port = mq->dev->port & 3;
191 unit = mq->dev->unit;
192 len = mq->length;
193 from = port << 6;
194 to = (port << 6) | (unit > 0 ? (1 << (unit - 1)) & 0x1f : 0x20);
195
196 *maple_lastptr &= 0x7fffffff;
197 maple_lastptr = maple_sendptr;
198
199 *maple_sendptr++ = (port << 16) | len | 0x80000000;
200 *maple_sendptr++ = PHYSADDR(mq->recvbuf);
201 *maple_sendptr++ =
202 mq->command | (to << 8) | (from << 16) | (len << 24);
203
204 while (len-- > 0)
205 *maple_sendptr++ = *lsendbuf++;
206}
207
208/* build up command queue */
209static void maple_send(void)
210{
211 int i;
212 int maple_packets;
213 struct mapleq *mq, *nmq;
214
215 if (!list_empty(&maple_sentq))
216 return;
217 if (list_empty(&maple_waitq) || !maple_dma_done())
218 return;
219 maple_packets = 0;
220 maple_sendptr = maple_lastptr = maple_sendbuf;
221 list_for_each_entry_safe(mq, nmq, &maple_waitq, list) {
222 maple_build_block(mq);
223 list_move(&mq->list, &maple_sentq);
224 if (maple_packets++ > MAPLE_MAXPACKETS)
225 break;
226 }
227 if (maple_packets > 0) {
228 for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
229 dma_cache_sync(0, maple_sendbuf + i * PAGE_SIZE,
230 PAGE_SIZE, DMA_BIDIRECTIONAL);
231 }
232}
233
234static int attach_matching_maple_driver(struct device_driver *driver,
235 void *devptr)
236{
237 struct maple_driver *maple_drv;
238 struct maple_device *mdev;
239
240 mdev = devptr;
241 maple_drv = to_maple_driver(driver);
242 if (mdev->devinfo.function & be32_to_cpu(maple_drv->function)) {
243 if (maple_drv->connect(mdev) == 0) {
244 mdev->driver = maple_drv;
245 return 1;
246 }
247 }
248 return 0;
249}
250
251static void maple_detach_driver(struct maple_device *mdev)
252{
253 if (!mdev)
254 return;
255 if (mdev->driver) {
256 if (mdev->driver->disconnect)
257 mdev->driver->disconnect(mdev);
258 }
259 mdev->driver = NULL;
260 if (mdev->registered) {
261 maple_release_device(&mdev->dev);
262 device_unregister(&mdev->dev);
263 }
264 mdev->registered = 0;
265 maple_free_dev(mdev);
266}
267
268/* process initial MAPLE_COMMAND_DEVINFO for each device or port */
269static void maple_attach_driver(struct maple_device *dev)
270{
271 char *p;
272
273 char *recvbuf;
274 unsigned long function;
275 int matched, retval;
276
277 recvbuf = dev->mq->recvbuf;
278 memcpy(&dev->devinfo, recvbuf + 4, sizeof(dev->devinfo));
279 memcpy(dev->product_name, dev->devinfo.product_name, 30);
280 memcpy(dev->product_licence, dev->devinfo.product_licence, 60);
281 dev->product_name[30] = '\0';
282 dev->product_licence[60] = '\0';
283
284 for (p = dev->product_name + 29; dev->product_name <= p; p--)
285 if (*p == ' ')
286 *p = '\0';
287 else
288 break;
289
290 for (p = dev->product_licence + 59; dev->product_licence <= p; p--)
291 if (*p == ' ')
292 *p = '\0';
293 else
294 break;
295
296 function = be32_to_cpu(dev->devinfo.function);
297
298 if (function > 0x200) {
299 /* Do this silently - as not a real device */
300 function = 0;
301 dev->driver = &maple_dummy_driver;
302 sprintf(dev->dev.bus_id, "%d:0.port", dev->port);
303 } else {
304 printk(KERN_INFO
305 "Maple bus at (%d, %d): Connected function 0x%lX\n",
306 dev->port, dev->unit, function);
307
308 matched =
309 bus_for_each_drv(&maple_bus_type, NULL, dev,
310 attach_matching_maple_driver);
311
312 if (matched == 0) {
313 /* Driver does not exist yet */
314 printk(KERN_INFO
315 "No maple driver found for this device\n");
316 dev->driver = &maple_dummy_driver;
317 }
318
319 sprintf(dev->dev.bus_id, "%d:0%d.%lX", dev->port,
320 dev->unit, function);
321 }
322 dev->function = function;
323 dev->dev.bus = &maple_bus_type;
324 dev->dev.parent = &maple_bus;
325 dev->dev.release = &maple_release_device;
326 retval = device_register(&dev->dev);
327 if (retval) {
328 printk(KERN_INFO
329 "Maple bus: Attempt to register device (%x, %x) failed.\n",
330 dev->port, dev->unit);
331 maple_free_dev(dev);
332 }
333 dev->registered = 1;
334}
335
336/*
337 * if device has been registered for the given
338 * port and unit then return 1 - allows identification
339 * of which devices need to be attached or detached
340 */
341static int detach_maple_device(struct device *device, void *portptr)
342{
343 struct maple_device_specify *ds;
344 struct maple_device *mdev;
345
346 ds = portptr;
347 mdev = to_maple_dev(device);
348 if (mdev->port == ds->port && mdev->unit == ds->unit)
349 return 1;
350 return 0;
351}
352
353static int setup_maple_commands(struct device *device, void *ignored)
354{
355 struct maple_device *maple_dev = to_maple_dev(device);
356
357 if ((maple_dev->interval > 0)
358 && time_after(jiffies, maple_dev->when)) {
359 maple_dev->when = jiffies + maple_dev->interval;
360 maple_dev->mq->command = MAPLE_COMMAND_GETCOND;
361 maple_dev->mq->sendbuf = &maple_dev->function;
362 maple_dev->mq->length = 1;
363 maple_add_packet(maple_dev->mq);
364 liststatus++;
365 } else {
366 if (time_after(jiffies, maple_pnp_time)) {
367 maple_dev->mq->command = MAPLE_COMMAND_DEVINFO;
368 maple_dev->mq->length = 0;
369 maple_add_packet(maple_dev->mq);
370 liststatus++;
371 }
372 }
373
374 return 0;
375}
376
377/* VBLANK bottom half - implemented via workqueue */
378static void maple_vblank_handler(struct work_struct *work)
379{
380 if (!maple_dma_done())
381 return;
382 if (!list_empty(&maple_sentq))
383 return;
384 ctrl_outl(0, MAPLE_ENABLE);
385 liststatus = 0;
386 bus_for_each_dev(&maple_bus_type, NULL, NULL,
387 setup_maple_commands);
388 if (time_after(jiffies, maple_pnp_time))
389 maple_pnp_time = jiffies + MAPLE_PNP_INTERVAL;
390 if (liststatus && list_empty(&maple_sentq)) {
391 INIT_LIST_HEAD(&maple_sentq);
392 maple_send();
393 }
394 maplebus_dma_reset();
395}
396
397/* handle devices added via hotplugs - placing them on queue for DEVINFO*/
398static void maple_map_subunits(struct maple_device *mdev, int submask)
399{
400 int retval, k, devcheck;
401 struct maple_device *mdev_add;
402 struct maple_device_specify ds;
403
404 for (k = 0; k < 5; k++) {
405 ds.port = mdev->port;
406 ds.unit = k + 1;
407 retval =
408 bus_for_each_dev(&maple_bus_type, NULL, &ds,
409 detach_maple_device);
410 if (retval) {
411 submask = submask >> 1;
412 continue;
413 }
414 devcheck = submask & 0x01;
415 if (devcheck) {
416 mdev_add = maple_alloc_dev(mdev->port, k + 1);
417 if (!mdev_add)
418 return;
419 mdev_add->mq->command = MAPLE_COMMAND_DEVINFO;
420 mdev_add->mq->length = 0;
421 maple_add_packet(mdev_add->mq);
422 scanning = 1;
423 }
424 submask = submask >> 1;
425 }
426}
427
428/* mark a device as removed */
429static void maple_clean_submap(struct maple_device *mdev)
430{
431 int killbit;
432
433 killbit = (mdev->unit > 0 ? (1 << (mdev->unit - 1)) & 0x1f : 0x20);
434 killbit = ~killbit;
435 killbit &= 0xFF;
436 subdevice_map[mdev->port] = subdevice_map[mdev->port] & killbit;
437}
438
439/* handle empty port or hotplug removal */
440static void maple_response_none(struct maple_device *mdev,
441 struct mapleq *mq)
442{
443 if (mdev->unit != 0) {
444 list_del(&mq->list);
445 maple_clean_submap(mdev);
446 printk(KERN_INFO
447 "Maple bus device detaching at (%d, %d)\n",
448 mdev->port, mdev->unit);
449 maple_detach_driver(mdev);
450 return;
451 }
452 if (!started) {
453 printk(KERN_INFO "No maple devices attached to port %d\n",
454 mdev->port);
455 return;
456 }
457 maple_clean_submap(mdev);
458}
459
460/* preprocess hotplugs or scans */
461static void maple_response_devinfo(struct maple_device *mdev,
462 char *recvbuf)
463{
464 char submask;
465 if ((!started) || (scanning == 2)) {
466 maple_attach_driver(mdev);
467 return;
468 }
469 if (mdev->unit == 0) {
470 submask = recvbuf[2] & 0x1F;
471 if (submask ^ subdevice_map[mdev->port]) {
472 maple_map_subunits(mdev, submask);
473 subdevice_map[mdev->port] = submask;
474 }
475 }
476}
477
478/* maple dma end bottom half - implemented via workqueue */
479static void maple_dma_handler(struct work_struct *work)
480{
481 struct mapleq *mq, *nmq;
482 struct maple_device *dev;
483 char *recvbuf;
484 enum maple_code code;
485
486 if (!maple_dma_done())
487 return;
488 ctrl_outl(0, MAPLE_ENABLE);
489 if (!list_empty(&maple_sentq)) {
490 list_for_each_entry_safe(mq, nmq, &maple_sentq, list) {
491 recvbuf = mq->recvbuf;
492 code = recvbuf[0];
493 dev = mq->dev;
494 switch (code) {
495 case MAPLE_RESPONSE_NONE:
496 maple_response_none(dev, mq);
497 break;
498
499 case MAPLE_RESPONSE_DEVINFO:
500 maple_response_devinfo(dev, recvbuf);
501 break;
502
503 case MAPLE_RESPONSE_DATATRF:
504 if (dev->callback)
505 dev->callback(mq);
506 break;
507
508 case MAPLE_RESPONSE_FILEERR:
509 case MAPLE_RESPONSE_AGAIN:
510 case MAPLE_RESPONSE_BADCMD:
511 case MAPLE_RESPONSE_BADFUNC:
512 printk(KERN_DEBUG
513 "Maple non-fatal error 0x%X\n",
514 code);
515 break;
516
517 case MAPLE_RESPONSE_ALLINFO:
518 printk(KERN_DEBUG
519 "Maple - extended device information not supported\n");
520 break;
521
522 case MAPLE_RESPONSE_OK:
523 break;
524
525 default:
526 break;
527 }
528 }
529 INIT_LIST_HEAD(&maple_sentq);
530 if (scanning == 1) {
531 maple_send();
532 scanning = 2;
533 } else
534 scanning = 0;
535
536 if (started == 0)
537 started = 1;
538 }
539 maplebus_dma_reset();
540}
541
542static irqreturn_t maplebus_dma_interrupt(int irq, void *dev_id)
543{
544 /* Load everything into the bottom half */
545 schedule_work(&maple_dma_process);
546 return IRQ_HANDLED;
547}
548
549static irqreturn_t maplebus_vblank_interrupt(int irq, void *dev_id)
550{
551 schedule_work(&maple_vblank_process);
552 return IRQ_HANDLED;
553}
554
555static struct irqaction maple_dma_irq = {
556 .name = "maple bus DMA handler",
557 .handler = maplebus_dma_interrupt,
558 .flags = IRQF_SHARED,
559};
560
561static struct irqaction maple_vblank_irq = {
562 .name = "maple bus VBLANK handler",
563 .handler = maplebus_vblank_interrupt,
564 .flags = IRQF_SHARED,
565};
566
567static int maple_set_dma_interrupt_handler(void)
568{
569 return setup_irq(HW_EVENT_MAPLE_DMA, &maple_dma_irq);
570}
571
572static int maple_set_vblank_interrupt_handler(void)
573{
574 return setup_irq(HW_EVENT_VSYNC, &maple_vblank_irq);
575}
576
577static int maple_get_dma_buffer(void)
578{
579 maple_sendbuf =
580 (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
581 MAPLE_DMA_PAGES);
582 if (!maple_sendbuf)
583 return -ENOMEM;
584 return 0;
585}
586
587static int match_maple_bus_driver(struct device *devptr,
588 struct device_driver *drvptr)
589{
590 struct maple_driver *maple_drv;
591 struct maple_device *maple_dev;
592
593 maple_drv = container_of(drvptr, struct maple_driver, drv);
594 maple_dev = container_of(devptr, struct maple_device, dev);
595 /* Trap empty port case */
596 if (maple_dev->devinfo.function == 0xFFFFFFFF)
597 return 0;
598 else if (maple_dev->devinfo.function &
599 be32_to_cpu(maple_drv->function))
600 return 1;
601 return 0;
602}
603
604static int maple_bus_uevent(struct device *dev, char **envp,
605 int num_envp, char *buffer, int buffer_size)
606{
607 return 0;
608}
609
610static void maple_bus_release(struct device *dev)
611{
612}
613
614static struct maple_driver maple_dummy_driver = {
615 .drv = {
616 .name = "maple_dummy_driver",
617 .bus = &maple_bus_type,
618 },
619};
620
621struct bus_type maple_bus_type = {
622 .name = "maple",
623 .match = match_maple_bus_driver,
624 .uevent = maple_bus_uevent,
625};
626EXPORT_SYMBOL_GPL(maple_bus_type);
627
628static struct device maple_bus = {
629 .bus_id = "maple",
630 .release = maple_bus_release,
631};
632
633static int __init maple_bus_init(void)
634{
635 int retval, i;
636 struct maple_device *mdev[MAPLE_PORTS];
637 ctrl_outl(0, MAPLE_STATE);
638
639 retval = device_register(&maple_bus);
640 if (retval)
641 goto cleanup;
642
643 retval = bus_register(&maple_bus_type);
644 if (retval)
645 goto cleanup_device;
646
647 retval = driver_register(&maple_dummy_driver.drv);
648
649 if (retval)
650 goto cleanup_bus;
651
652 /* allocate memory for maple bus dma */
653 retval = maple_get_dma_buffer();
654 if (retval) {
655 printk(KERN_INFO
656 "Maple bus: Failed to allocate Maple DMA buffers\n");
657 goto cleanup_basic;
658 }
659
660 /* set up DMA interrupt handler */
661 retval = maple_set_dma_interrupt_handler();
662 if (retval) {
663 printk(KERN_INFO
664 "Maple bus: Failed to grab maple DMA IRQ\n");
665 goto cleanup_dma;
666 }
667
668 /* set up VBLANK interrupt handler */
669 retval = maple_set_vblank_interrupt_handler();
670 if (retval) {
671 printk(KERN_INFO "Maple bus: Failed to grab VBLANK IRQ\n");
672 goto cleanup_irq;
673 }
674
675 maple_queue_cache =
676 kmem_cache_create("maple_queue_cache", 0x400, 0,
677 SLAB_HWCACHE_ALIGN, NULL);
678
679 if (!maple_queue_cache)
680 goto cleanup_bothirqs;
681
682 /* setup maple ports */
683 for (i = 0; i < MAPLE_PORTS; i++) {
684 mdev[i] = maple_alloc_dev(i, 0);
685 if (!mdev[i]) {
686 while (i-- > 0)
687 maple_free_dev(mdev[i]);
688 goto cleanup_cache;
689 }
690 mdev[i]->registered = 0;
691 mdev[i]->mq->command = MAPLE_COMMAND_DEVINFO;
692 mdev[i]->mq->length = 0;
693 maple_attach_driver(mdev[i]);
694 maple_add_packet(mdev[i]->mq);
695 subdevice_map[i] = 0;
696 }
697
698 /* setup maplebus hardware */
699 maplebus_dma_reset();
700
701 /* initial detection */
702 maple_send();
703
704 maple_pnp_time = jiffies;
705
706 printk(KERN_INFO "Maple bus core now registered.\n");
707
708 return 0;
709
710cleanup_cache:
711 kmem_cache_destroy(maple_queue_cache);
712
713cleanup_bothirqs:
714 free_irq(HW_EVENT_VSYNC, 0);
715
716cleanup_irq:
717 free_irq(HW_EVENT_MAPLE_DMA, 0);
718
719cleanup_dma:
720 free_pages((unsigned long) maple_sendbuf, MAPLE_DMA_PAGES);
721
722cleanup_basic:
723 driver_unregister(&maple_dummy_driver.drv);
724
725cleanup_bus:
726 bus_unregister(&maple_bus_type);
727
728cleanup_device:
729 device_unregister(&maple_bus);
730
731cleanup:
732 printk(KERN_INFO "Maple bus registration failed\n");
733 return retval;
734}
735subsys_initcall(maple_bus_init);
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 74d5182db4b2..cfd13eb866b8 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -11,6 +11,7 @@
11#include "ssb_private.h" 11#include "ssb_private.h"
12 12
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/io.h>
14#include <linux/ssb/ssb.h> 15#include <linux/ssb/ssb.h>
15#include <linux/ssb/ssb_regs.h> 16#include <linux/ssb/ssb_regs.h>
16#include <linux/dma-mapping.h> 17#include <linux/dma-mapping.h>
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 0899fccbd570..fbea2bd129c7 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -125,8 +125,8 @@ static int hp680bl_remove(struct platform_device *pdev)
125{ 125{
126 struct backlight_device *bd = platform_get_drvdata(pdev); 126 struct backlight_device *bd = platform_get_drvdata(pdev);
127 127
128 hp680bl_data.brightness = 0; 128 bd->props.brightness = 0;
129 hp680bl_data.power = 0; 129 bd->props.power = 0;
130 hp680bl_send_intensity(bd); 130 hp680bl_send_intensity(bd);
131 131
132 backlight_device_unregister(bd); 132 backlight_device_unregister(bd);
diff --git a/drivers/video/pvr2fb.c b/drivers/video/pvr2fb.c
index 7d6c29800d14..06805c9b237b 100644
--- a/drivers/video/pvr2fb.c
+++ b/drivers/video/pvr2fb.c
@@ -667,6 +667,8 @@ static int pvr2_init_cable(void)
667 related */ 667 related */
668 if (cable_type == CT_COMPOSITE) 668 if (cable_type == CT_COMPOSITE)
669 fb_writel(3 << 8, VOUTC); 669 fb_writel(3 << 8, VOUTC);
670 else if (cable_type == CT_RGB)
671 fb_writel(1 << 9, VOUTC);
670 else 672 else
671 fb_writel(0, VOUTC); 673 fb_writel(0, VOUTC);
672 674
@@ -890,7 +892,7 @@ static int __init pvr2fb_dc_init(void)
890 pvr2_fix.mmio_start = 0xa05f8000; /* registers start here */ 892 pvr2_fix.mmio_start = 0xa05f8000; /* registers start here */
891 pvr2_fix.mmio_len = 0x2000; 893 pvr2_fix.mmio_len = 0x2000;
892 894
893 if (request_irq(HW_EVENT_VSYNC, pvr2fb_interrupt, 0, 895 if (request_irq(HW_EVENT_VSYNC, pvr2fb_interrupt, IRQF_SHARED,
894 "pvr2 VBL handler", fb_info)) { 896 "pvr2 VBL handler", fb_info)) {
895 return -EBUSY; 897 return -EBUSY;
896 } 898 }