aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-i386/irq.h1
-rw-r--r--include/asm-i386/mach-default/irq_vectors_limits.h2
-rw-r--r--include/asm-i386/mmu_context.h2
-rw-r--r--include/asm-i386/paravirt.h22
-rw-r--r--include/asm-i386/pgalloc.h6
-rw-r--r--include/asm-i386/setup.h4
-rw-r--r--include/asm-i386/smp.h5
-rw-r--r--include/asm-i386/timer.h32
-rw-r--r--include/asm-i386/unistd.h3
-rw-r--r--include/asm-i386/vmi_time.h2
-rw-r--r--include/asm-i386/xen/hypercall.h413
-rw-r--r--include/asm-i386/xen/hypervisor.h73
-rw-r--r--include/asm-i386/xen/interface.h188
-rw-r--r--include/asm-powerpc/systbl.h1
-rw-r--r--include/asm-powerpc/unistd.h3
-rw-r--r--include/asm-sparc64/io.h5
-rw-r--r--include/asm-sparc64/mdesc.h10
-rw-r--r--include/asm-sparc64/vio.h2
-rw-r--r--include/asm-x86_64/unistd.h2
-rw-r--r--include/linux/elfnote.h22
-rw-r--r--include/linux/ext4_fs.h104
-rw-r--r--include/linux/ext4_fs_extents.h43
-rw-r--r--include/linux/ext4_fs_i.h5
-rw-r--r--include/linux/ext4_fs_sb.h3
-rw-r--r--include/linux/falloc.h6
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/jbd2.h6
-rw-r--r--include/linux/kmod.h52
-rw-r--r--include/linux/major.h2
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/linux/netfilter_ipv4/ipt_iprange.h2
-rw-r--r--include/linux/page-flags.h5
-rw-r--r--include/linux/reboot.h5
-rw-r--r--include/linux/string.h4
-rw-r--r--include/linux/syscalls.h1
-rw-r--r--include/linux/vmalloc.h4
-rw-r--r--include/mtd/ubi-header.h101
-rw-r--r--include/net/tcp.h6
-rw-r--r--include/net/xfrm.h1
-rw-r--r--include/xen/events.h48
-rw-r--r--include/xen/features.h23
-rw-r--r--include/xen/grant_table.h107
-rw-r--r--include/xen/hvc-console.h6
-rw-r--r--include/xen/interface/elfnote.h133
-rw-r--r--include/xen/interface/event_channel.h195
-rw-r--r--include/xen/interface/features.h43
-rw-r--r--include/xen/interface/grant_table.h375
-rw-r--r--include/xen/interface/io/blkif.h94
-rw-r--r--include/xen/interface/io/console.h23
-rw-r--r--include/xen/interface/io/netif.h158
-rw-r--r--include/xen/interface/io/ring.h260
-rw-r--r--include/xen/interface/io/xenbus.h44
-rw-r--r--include/xen/interface/io/xs_wire.h87
-rw-r--r--include/xen/interface/memory.h145
-rw-r--r--include/xen/interface/physdev.h145
-rw-r--r--include/xen/interface/sched.h77
-rw-r--r--include/xen/interface/vcpu.h167
-rw-r--r--include/xen/interface/version.h60
-rw-r--r--include/xen/interface/xen.h447
-rw-r--r--include/xen/page.h179
-rw-r--r--include/xen/xenbus.h234
61 files changed, 4091 insertions, 111 deletions
diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h
index 9e15ce0006eb..36f310632c49 100644
--- a/include/asm-i386/irq.h
+++ b/include/asm-i386/irq.h
@@ -41,6 +41,7 @@ extern int irqbalance_disable(char *str);
41extern void fixup_irqs(cpumask_t map); 41extern void fixup_irqs(cpumask_t map);
42#endif 42#endif
43 43
44unsigned int do_IRQ(struct pt_regs *regs);
44void init_IRQ(void); 45void init_IRQ(void);
45void __init native_init_IRQ(void); 46void __init native_init_IRQ(void);
46 47
diff --git a/include/asm-i386/mach-default/irq_vectors_limits.h b/include/asm-i386/mach-default/irq_vectors_limits.h
index 7f161e760be6..a90c7a60109f 100644
--- a/include/asm-i386/mach-default/irq_vectors_limits.h
+++ b/include/asm-i386/mach-default/irq_vectors_limits.h
@@ -1,7 +1,7 @@
1#ifndef _ASM_IRQ_VECTORS_LIMITS_H 1#ifndef _ASM_IRQ_VECTORS_LIMITS_H
2#define _ASM_IRQ_VECTORS_LIMITS_H 2#define _ASM_IRQ_VECTORS_LIMITS_H
3 3
4#ifdef CONFIG_X86_IO_APIC 4#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT)
5#define NR_IRQS 224 5#define NR_IRQS 224
6# if (224 >= 32 * NR_CPUS) 6# if (224 >= 32 * NR_CPUS)
7# define NR_IRQ_VECTORS NR_IRQS 7# define NR_IRQ_VECTORS NR_IRQS
diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h
index 8198d1cca1f3..7eb0b0b1fb3c 100644
--- a/include/asm-i386/mmu_context.h
+++ b/include/asm-i386/mmu_context.h
@@ -32,6 +32,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
32#endif 32#endif
33} 33}
34 34
35void leave_mm(unsigned long cpu);
36
35static inline void switch_mm(struct mm_struct *prev, 37static inline void switch_mm(struct mm_struct *prev,
36 struct mm_struct *next, 38 struct mm_struct *next,
37 struct task_struct *tsk) 39 struct task_struct *tsk)
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 7f846a7d6bcc..7df88be2dd9e 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -52,6 +52,8 @@ struct paravirt_ops
52 /* Basic arch-specific setup */ 52 /* Basic arch-specific setup */
53 void (*arch_setup)(void); 53 void (*arch_setup)(void);
54 char *(*memory_setup)(void); 54 char *(*memory_setup)(void);
55 void (*post_allocator_init)(void);
56
55 void (*init_IRQ)(void); 57 void (*init_IRQ)(void);
56 void (*time_init)(void); 58 void (*time_init)(void);
57 59
@@ -116,7 +118,7 @@ struct paravirt_ops
116 118
117 u64 (*read_tsc)(void); 119 u64 (*read_tsc)(void);
118 u64 (*read_pmc)(void); 120 u64 (*read_pmc)(void);
119 u64 (*get_scheduled_cycles)(void); 121 unsigned long long (*sched_clock)(void);
120 unsigned long (*get_cpu_khz)(void); 122 unsigned long (*get_cpu_khz)(void);
121 123
122 /* Segment descriptor handling */ 124 /* Segment descriptor handling */
@@ -173,7 +175,7 @@ struct paravirt_ops
173 unsigned long va); 175 unsigned long va);
174 176
175 /* Hooks for allocating/releasing pagetable pages */ 177 /* Hooks for allocating/releasing pagetable pages */
176 void (*alloc_pt)(u32 pfn); 178 void (*alloc_pt)(struct mm_struct *mm, u32 pfn);
177 void (*alloc_pd)(u32 pfn); 179 void (*alloc_pd)(u32 pfn);
178 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 180 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
179 void (*release_pt)(u32 pfn); 181 void (*release_pt)(u32 pfn);
@@ -260,6 +262,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len)
260unsigned paravirt_patch_insns(void *site, unsigned len, 262unsigned paravirt_patch_insns(void *site, unsigned len,
261 const char *start, const char *end); 263 const char *start, const char *end);
262 264
265int paravirt_disable_iospace(void);
263 266
264/* 267/*
265 * This generates an indirect call based on the operation type number. 268 * This generates an indirect call based on the operation type number.
@@ -563,7 +566,10 @@ static inline u64 paravirt_read_tsc(void)
563 566
564#define rdtscll(val) (val = paravirt_read_tsc()) 567#define rdtscll(val) (val = paravirt_read_tsc())
565 568
566#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) 569static inline unsigned long long paravirt_sched_clock(void)
570{
571 return PVOP_CALL0(unsigned long long, sched_clock);
572}
567#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) 573#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz())
568 574
569#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) 575#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
@@ -669,6 +675,12 @@ static inline void setup_secondary_clock(void)
669} 675}
670#endif 676#endif
671 677
678static inline void paravirt_post_allocator_init(void)
679{
680 if (paravirt_ops.post_allocator_init)
681 (*paravirt_ops.post_allocator_init)();
682}
683
672static inline void paravirt_pagetable_setup_start(pgd_t *base) 684static inline void paravirt_pagetable_setup_start(pgd_t *base)
673{ 685{
674 if (paravirt_ops.pagetable_setup_start) 686 if (paravirt_ops.pagetable_setup_start)
@@ -725,9 +737,9 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
725 PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); 737 PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va);
726} 738}
727 739
728static inline void paravirt_alloc_pt(unsigned pfn) 740static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn)
729{ 741{
730 PVOP_VCALL1(alloc_pt, pfn); 742 PVOP_VCALL2(alloc_pt, mm, pfn);
731} 743}
732static inline void paravirt_release_pt(unsigned pfn) 744static inline void paravirt_release_pt(unsigned pfn)
733{ 745{
diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h
index d07b7afc2692..f2fc33ceb9f2 100644
--- a/include/asm-i386/pgalloc.h
+++ b/include/asm-i386/pgalloc.h
@@ -7,7 +7,7 @@
7#ifdef CONFIG_PARAVIRT 7#ifdef CONFIG_PARAVIRT
8#include <asm/paravirt.h> 8#include <asm/paravirt.h>
9#else 9#else
10#define paravirt_alloc_pt(pfn) do { } while (0) 10#define paravirt_alloc_pt(mm, pfn) do { } while (0)
11#define paravirt_alloc_pd(pfn) do { } while (0) 11#define paravirt_alloc_pd(pfn) do { } while (0)
12#define paravirt_alloc_pd(pfn) do { } while (0) 12#define paravirt_alloc_pd(pfn) do { } while (0)
13#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) 13#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
@@ -17,13 +17,13 @@
17 17
18#define pmd_populate_kernel(mm, pmd, pte) \ 18#define pmd_populate_kernel(mm, pmd, pte) \
19do { \ 19do { \
20 paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ 20 paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \
21 set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ 21 set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
22} while (0) 22} while (0)
23 23
24#define pmd_populate(mm, pmd, pte) \ 24#define pmd_populate(mm, pmd, pte) \
25do { \ 25do { \
26 paravirt_alloc_pt(page_to_pfn(pte)); \ 26 paravirt_alloc_pt(mm, page_to_pfn(pte)); \
27 set_pmd(pmd, __pmd(_PAGE_TABLE + \ 27 set_pmd(pmd, __pmd(_PAGE_TABLE + \
28 ((unsigned long long)page_to_pfn(pte) << \ 28 ((unsigned long long)page_to_pfn(pte) << \
29 (unsigned long long) PAGE_SHIFT))); \ 29 (unsigned long long) PAGE_SHIFT))); \
diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h
index 0d5bff9dc4a5..7862fe858a9e 100644
--- a/include/asm-i386/setup.h
+++ b/include/asm-i386/setup.h
@@ -81,6 +81,10 @@ void __init add_memory_region(unsigned long long start,
81 81
82extern unsigned long init_pg_tables_end; 82extern unsigned long init_pg_tables_end;
83 83
84#ifndef CONFIG_PARAVIRT
85#define paravirt_post_allocator_init() do {} while (0)
86#endif
87
84#endif /* __ASSEMBLY__ */ 88#endif /* __ASSEMBLY__ */
85 89
86#endif /* __KERNEL__ */ 90#endif /* __KERNEL__ */
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index 0c7132787062..1f73bde165b1 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -43,9 +43,12 @@ extern u8 x86_cpu_to_apicid[];
43 43
44#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] 44#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
45 45
46extern void set_cpu_sibling_map(int cpu);
47
46#ifdef CONFIG_HOTPLUG_CPU 48#ifdef CONFIG_HOTPLUG_CPU
47extern void cpu_exit_clear(void); 49extern void cpu_exit_clear(void);
48extern void cpu_uninit(void); 50extern void cpu_uninit(void);
51extern void remove_siblinginfo(int cpu);
49#endif 52#endif
50 53
51struct smp_ops 54struct smp_ops
@@ -129,6 +132,8 @@ extern int __cpu_disable(void);
129extern void __cpu_die(unsigned int cpu); 132extern void __cpu_die(unsigned int cpu);
130extern unsigned int num_processors; 133extern unsigned int num_processors;
131 134
135void __cpuinit smp_store_cpu_info(int id);
136
132#endif /* !__ASSEMBLY__ */ 137#endif /* !__ASSEMBLY__ */
133 138
134#else /* CONFIG_SMP */ 139#else /* CONFIG_SMP */
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h
index 153770e25faa..51a713e33a9e 100644
--- a/include/asm-i386/timer.h
+++ b/include/asm-i386/timer.h
@@ -15,8 +15,38 @@ extern int no_sync_cmos_clock;
15extern int recalibrate_cpu_khz(void); 15extern int recalibrate_cpu_khz(void);
16 16
17#ifndef CONFIG_PARAVIRT 17#ifndef CONFIG_PARAVIRT
18#define get_scheduled_cycles(val) rdtscll(val)
19#define calculate_cpu_khz() native_calculate_cpu_khz() 18#define calculate_cpu_khz() native_calculate_cpu_khz()
20#endif 19#endif
21 20
21/* Accellerators for sched_clock()
22 * convert from cycles(64bits) => nanoseconds (64bits)
23 * basic equation:
24 * ns = cycles / (freq / ns_per_sec)
25 * ns = cycles * (ns_per_sec / freq)
26 * ns = cycles * (10^9 / (cpu_khz * 10^3))
27 * ns = cycles * (10^6 / cpu_khz)
28 *
29 * Then we use scaling math (suggested by george@mvista.com) to get:
30 * ns = cycles * (10^6 * SC / cpu_khz) / SC
31 * ns = cycles * cyc2ns_scale / SC
32 *
33 * And since SC is a constant power of two, we can convert the div
34 * into a shift.
35 *
36 * We can use khz divisor instead of mhz to keep a better percision, since
37 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
38 * (mathieu.desnoyers@polymtl.ca)
39 *
40 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
41 */
42extern unsigned long cyc2ns_scale __read_mostly;
43
44#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
45
46static inline unsigned long long cycles_2_ns(unsigned long long cyc)
47{
48 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
49}
50
51
22#endif 52#endif
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index e84ace1ec8bf..9b15545eb9b5 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -329,10 +329,11 @@
329#define __NR_signalfd 321 329#define __NR_signalfd 321
330#define __NR_timerfd 322 330#define __NR_timerfd 322
331#define __NR_eventfd 323 331#define __NR_eventfd 323
332#define __NR_fallocate 324
332 333
333#ifdef __KERNEL__ 334#ifdef __KERNEL__
334 335
335#define NR_syscalls 324 336#define NR_syscalls 325
336 337
337#define __ARCH_WANT_IPC_PARSE_VERSION 338#define __ARCH_WANT_IPC_PARSE_VERSION
338#define __ARCH_WANT_OLD_READDIR 339#define __ARCH_WANT_OLD_READDIR
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h
index 213930b995cb..478188130328 100644
--- a/include/asm-i386/vmi_time.h
+++ b/include/asm-i386/vmi_time.h
@@ -49,7 +49,7 @@ extern struct vmi_timer_ops {
49extern void __init vmi_time_init(void); 49extern void __init vmi_time_init(void);
50extern unsigned long vmi_get_wallclock(void); 50extern unsigned long vmi_get_wallclock(void);
51extern int vmi_set_wallclock(unsigned long now); 51extern int vmi_set_wallclock(unsigned long now);
52extern unsigned long long vmi_get_sched_cycles(void); 52extern unsigned long long vmi_sched_clock(void);
53extern unsigned long vmi_cpu_khz(void); 53extern unsigned long vmi_cpu_khz(void);
54 54
55#ifdef CONFIG_X86_LOCAL_APIC 55#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/include/asm-i386/xen/hypercall.h b/include/asm-i386/xen/hypercall.h
new file mode 100644
index 000000000000..bc0ee7d961ca
--- /dev/null
+++ b/include/asm-i386/xen/hypercall.h
@@ -0,0 +1,413 @@
1/******************************************************************************
2 * hypercall.h
3 *
4 * Linux-specific hypervisor handling.
5 *
6 * Copyright (c) 2002-2004, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#ifndef __HYPERCALL_H__
34#define __HYPERCALL_H__
35
36#include <linux/errno.h>
37#include <linux/string.h>
38
39#include <xen/interface/xen.h>
40#include <xen/interface/sched.h>
41#include <xen/interface/physdev.h>
42
43extern struct { char _entry[32]; } hypercall_page[];
44
45#define _hypercall0(type, name) \
46({ \
47 long __res; \
48 asm volatile ( \
49 "call %[call]" \
50 : "=a" (__res) \
51 : [call] "m" (hypercall_page[__HYPERVISOR_##name]) \
52 : "memory" ); \
53 (type)__res; \
54})
55
56#define _hypercall1(type, name, a1) \
57({ \
58 long __res, __ign1; \
59 asm volatile ( \
60 "call %[call]" \
61 : "=a" (__res), "=b" (__ign1) \
62 : "1" ((long)(a1)), \
63 [call] "m" (hypercall_page[__HYPERVISOR_##name]) \
64 : "memory" ); \
65 (type)__res; \
66})
67
68#define _hypercall2(type, name, a1, a2) \
69({ \
70 long __res, __ign1, __ign2; \
71 asm volatile ( \
72 "call %[call]" \
73 : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \
74 : "1" ((long)(a1)), "2" ((long)(a2)), \
75 [call] "m" (hypercall_page[__HYPERVISOR_##name]) \
76 : "memory" ); \
77 (type)__res; \
78})
79
80#define _hypercall3(type, name, a1, a2, a3) \
81({ \
82 long __res, __ign1, __ign2, __ign3; \
83 asm volatile ( \
84 "call %[call]" \
85 : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
86 "=d" (__ign3) \
87 : "1" ((long)(a1)), "2" ((long)(a2)), \
88 "3" ((long)(a3)), \
89 [call] "m" (hypercall_page[__HYPERVISOR_##name]) \
90 : "memory" ); \
91 (type)__res; \
92})
93
94#define _hypercall4(type, name, a1, a2, a3, a4) \
95({ \
96 long __res, __ign1, __ign2, __ign3, __ign4; \
97 asm volatile ( \
98 "call %[call]" \
99 : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
100 "=d" (__ign3), "=S" (__ign4) \
101 : "1" ((long)(a1)), "2" ((long)(a2)), \
102 "3" ((long)(a3)), "4" ((long)(a4)), \
103 [call] "m" (hypercall_page[__HYPERVISOR_##name]) \
104 : "memory" ); \
105 (type)__res; \
106})
107
108#define _hypercall5(type, name, a1, a2, a3, a4, a5) \
109({ \
110 long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \
111 asm volatile ( \
112 "call %[call]" \
113 : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \
114 "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \
115 : "1" ((long)(a1)), "2" ((long)(a2)), \
116 "3" ((long)(a3)), "4" ((long)(a4)), \
117 "5" ((long)(a5)), \
118 [call] "m" (hypercall_page[__HYPERVISOR_##name]) \
119 : "memory" ); \
120 (type)__res; \
121})
122
123static inline int
124HYPERVISOR_set_trap_table(struct trap_info *table)
125{
126 return _hypercall1(int, set_trap_table, table);
127}
128
129static inline int
130HYPERVISOR_mmu_update(struct mmu_update *req, int count,
131 int *success_count, domid_t domid)
132{
133 return _hypercall4(int, mmu_update, req, count, success_count, domid);
134}
135
136static inline int
137HYPERVISOR_mmuext_op(struct mmuext_op *op, int count,
138 int *success_count, domid_t domid)
139{
140 return _hypercall4(int, mmuext_op, op, count, success_count, domid);
141}
142
143static inline int
144HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
145{
146 return _hypercall2(int, set_gdt, frame_list, entries);
147}
148
149static inline int
150HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp)
151{
152 return _hypercall2(int, stack_switch, ss, esp);
153}
154
155static inline int
156HYPERVISOR_set_callbacks(unsigned long event_selector,
157 unsigned long event_address,
158 unsigned long failsafe_selector,
159 unsigned long failsafe_address)
160{
161 return _hypercall4(int, set_callbacks,
162 event_selector, event_address,
163 failsafe_selector, failsafe_address);
164}
165
166static inline int
167HYPERVISOR_fpu_taskswitch(int set)
168{
169 return _hypercall1(int, fpu_taskswitch, set);
170}
171
172static inline int
173HYPERVISOR_sched_op(int cmd, unsigned long arg)
174{
175 return _hypercall2(int, sched_op, cmd, arg);
176}
177
178static inline long
179HYPERVISOR_set_timer_op(u64 timeout)
180{
181 unsigned long timeout_hi = (unsigned long)(timeout>>32);
182 unsigned long timeout_lo = (unsigned long)timeout;
183 return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
184}
185
186static inline int
187HYPERVISOR_set_debugreg(int reg, unsigned long value)
188{
189 return _hypercall2(int, set_debugreg, reg, value);
190}
191
192static inline unsigned long
193HYPERVISOR_get_debugreg(int reg)
194{
195 return _hypercall1(unsigned long, get_debugreg, reg);
196}
197
198static inline int
199HYPERVISOR_update_descriptor(u64 ma, u64 desc)
200{
201 return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
202}
203
204static inline int
205HYPERVISOR_memory_op(unsigned int cmd, void *arg)
206{
207 return _hypercall2(int, memory_op, cmd, arg);
208}
209
210static inline int
211HYPERVISOR_multicall(void *call_list, int nr_calls)
212{
213 return _hypercall2(int, multicall, call_list, nr_calls);
214}
215
216static inline int
217HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val,
218 unsigned long flags)
219{
220 unsigned long pte_hi = 0;
221#ifdef CONFIG_X86_PAE
222 pte_hi = new_val.pte_high;
223#endif
224 return _hypercall4(int, update_va_mapping, va,
225 new_val.pte_low, pte_hi, flags);
226}
227
228static inline int
229HYPERVISOR_event_channel_op(int cmd, void *arg)
230{
231 int rc = _hypercall2(int, event_channel_op, cmd, arg);
232 if (unlikely(rc == -ENOSYS)) {
233 struct evtchn_op op;
234 op.cmd = cmd;
235 memcpy(&op.u, arg, sizeof(op.u));
236 rc = _hypercall1(int, event_channel_op_compat, &op);
237 memcpy(arg, &op.u, sizeof(op.u));
238 }
239 return rc;
240}
241
242static inline int
243HYPERVISOR_xen_version(int cmd, void *arg)
244{
245 return _hypercall2(int, xen_version, cmd, arg);
246}
247
248static inline int
249HYPERVISOR_console_io(int cmd, int count, char *str)
250{
251 return _hypercall3(int, console_io, cmd, count, str);
252}
253
254static inline int
255HYPERVISOR_physdev_op(int cmd, void *arg)
256{
257 int rc = _hypercall2(int, physdev_op, cmd, arg);
258 if (unlikely(rc == -ENOSYS)) {
259 struct physdev_op op;
260 op.cmd = cmd;
261 memcpy(&op.u, arg, sizeof(op.u));
262 rc = _hypercall1(int, physdev_op_compat, &op);
263 memcpy(arg, &op.u, sizeof(op.u));
264 }
265 return rc;
266}
267
268static inline int
269HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
270{
271 return _hypercall3(int, grant_table_op, cmd, uop, count);
272}
273
274static inline int
275HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val,
276 unsigned long flags, domid_t domid)
277{
278 unsigned long pte_hi = 0;
279#ifdef CONFIG_X86_PAE
280 pte_hi = new_val.pte_high;
281#endif
282 return _hypercall5(int, update_va_mapping_otherdomain, va,
283 new_val.pte_low, pte_hi, flags, domid);
284}
285
286static inline int
287HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type)
288{
289 return _hypercall2(int, vm_assist, cmd, type);
290}
291
292static inline int
293HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
294{
295 return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
296}
297
298static inline int
299HYPERVISOR_suspend(unsigned long srec)
300{
301 return _hypercall3(int, sched_op, SCHEDOP_shutdown,
302 SHUTDOWN_suspend, srec);
303}
304
305static inline int
306HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
307{
308 return _hypercall2(int, nmi_op, op, arg);
309}
310
311static inline void
312MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
313 pte_t new_val, unsigned long flags)
314{
315 mcl->op = __HYPERVISOR_update_va_mapping;
316 mcl->args[0] = va;
317#ifdef CONFIG_X86_PAE
318 mcl->args[1] = new_val.pte_low;
319 mcl->args[2] = new_val.pte_high;
320#else
321 mcl->args[1] = new_val.pte_low;
322 mcl->args[2] = 0;
323#endif
324 mcl->args[3] = flags;
325}
326
327static inline void
328MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd,
329 void *uop, unsigned int count)
330{
331 mcl->op = __HYPERVISOR_grant_table_op;
332 mcl->args[0] = cmd;
333 mcl->args[1] = (unsigned long)uop;
334 mcl->args[2] = count;
335}
336
337static inline void
338MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va,
339 pte_t new_val, unsigned long flags,
340 domid_t domid)
341{
342 mcl->op = __HYPERVISOR_update_va_mapping_otherdomain;
343 mcl->args[0] = va;
344#ifdef CONFIG_X86_PAE
345 mcl->args[1] = new_val.pte_low;
346 mcl->args[2] = new_val.pte_high;
347#else
348 mcl->args[1] = new_val.pte_low;
349 mcl->args[2] = 0;
350#endif
351 mcl->args[3] = flags;
352 mcl->args[4] = domid;
353}
354
355static inline void
356MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
357 struct desc_struct desc)
358{
359 mcl->op = __HYPERVISOR_update_descriptor;
360 mcl->args[0] = maddr;
361 mcl->args[1] = maddr >> 32;
362 mcl->args[2] = desc.a;
363 mcl->args[3] = desc.b;
364}
365
366static inline void
367MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg)
368{
369 mcl->op = __HYPERVISOR_memory_op;
370 mcl->args[0] = cmd;
371 mcl->args[1] = (unsigned long)arg;
372}
373
374static inline void
375MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
376 int count, int *success_count, domid_t domid)
377{
378 mcl->op = __HYPERVISOR_mmu_update;
379 mcl->args[0] = (unsigned long)req;
380 mcl->args[1] = count;
381 mcl->args[2] = (unsigned long)success_count;
382 mcl->args[3] = domid;
383}
384
385static inline void
386MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count,
387 int *success_count, domid_t domid)
388{
389 mcl->op = __HYPERVISOR_mmuext_op;
390 mcl->args[0] = (unsigned long)op;
391 mcl->args[1] = count;
392 mcl->args[2] = (unsigned long)success_count;
393 mcl->args[3] = domid;
394}
395
396static inline void
397MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries)
398{
399 mcl->op = __HYPERVISOR_set_gdt;
400 mcl->args[0] = (unsigned long)frames;
401 mcl->args[1] = entries;
402}
403
404static inline void
405MULTI_stack_switch(struct multicall_entry *mcl,
406 unsigned long ss, unsigned long esp)
407{
408 mcl->op = __HYPERVISOR_stack_switch;
409 mcl->args[0] = ss;
410 mcl->args[1] = esp;
411}
412
413#endif /* __HYPERCALL_H__ */
diff --git a/include/asm-i386/xen/hypervisor.h b/include/asm-i386/xen/hypervisor.h
new file mode 100644
index 000000000000..8e15dd28c91f
--- /dev/null
+++ b/include/asm-i386/xen/hypervisor.h
@@ -0,0 +1,73 @@
1/******************************************************************************
2 * hypervisor.h
3 *
4 * Linux-specific hypervisor handling.
5 *
6 * Copyright (c) 2002-2004, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
32
33#ifndef __HYPERVISOR_H__
34#define __HYPERVISOR_H__
35
36#include <linux/types.h>
37#include <linux/kernel.h>
38#include <linux/version.h>
39
40#include <xen/interface/xen.h>
41#include <xen/interface/version.h>
42
43#include <asm/ptrace.h>
44#include <asm/page.h>
45#include <asm/desc.h>
46#if defined(__i386__)
47# ifdef CONFIG_X86_PAE
48# include <asm-generic/pgtable-nopud.h>
49# else
50# include <asm-generic/pgtable-nopmd.h>
51# endif
52#endif
53#include <asm/xen/hypercall.h>
54
55/* arch/i386/kernel/setup.c */
56extern struct shared_info *HYPERVISOR_shared_info;
57extern struct start_info *xen_start_info;
58#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
59
60/* arch/i386/mach-xen/evtchn.c */
61/* Force a proper event-channel callback from Xen. */
62extern void force_evtchn_callback(void);
63
64/* Turn jiffies into Xen system time. */
65u64 jiffies_to_st(unsigned long jiffies);
66
67
68#define MULTI_UVMFLAGS_INDEX 3
69#define MULTI_UVMDOMID_INDEX 4
70
71#define is_running_on_xen() (xen_start_info ? 1 : 0)
72
73#endif /* __HYPERVISOR_H__ */
diff --git a/include/asm-i386/xen/interface.h b/include/asm-i386/xen/interface.h
new file mode 100644
index 000000000000..165c3968e138
--- /dev/null
+++ b/include/asm-i386/xen/interface.h
@@ -0,0 +1,188 @@
1/******************************************************************************
2 * arch-x86_32.h
3 *
4 * Guest OS interface to x86 32-bit Xen.
5 *
6 * Copyright (c) 2004, K A Fraser
7 */
8
9#ifndef __XEN_PUBLIC_ARCH_X86_32_H__
10#define __XEN_PUBLIC_ARCH_X86_32_H__
11
12#ifdef __XEN__
13#define __DEFINE_GUEST_HANDLE(name, type) \
14 typedef struct { type *p; } __guest_handle_ ## name
15#else
16#define __DEFINE_GUEST_HANDLE(name, type) \
17 typedef type * __guest_handle_ ## name
18#endif
19
20#define DEFINE_GUEST_HANDLE_STRUCT(name) \
21 __DEFINE_GUEST_HANDLE(name, struct name)
22#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
23#define GUEST_HANDLE(name) __guest_handle_ ## name
24
25#ifndef __ASSEMBLY__
26/* Guest handles for primitive C types. */
27__DEFINE_GUEST_HANDLE(uchar, unsigned char);
28__DEFINE_GUEST_HANDLE(uint, unsigned int);
29__DEFINE_GUEST_HANDLE(ulong, unsigned long);
30DEFINE_GUEST_HANDLE(char);
31DEFINE_GUEST_HANDLE(int);
32DEFINE_GUEST_HANDLE(long);
33DEFINE_GUEST_HANDLE(void);
34#endif
35
36/*
37 * SEGMENT DESCRIPTOR TABLES
38 */
39/*
40 * A number of GDT entries are reserved by Xen. These are not situated at the
41 * start of the GDT because some stupid OSes export hard-coded selector values
42 * in their ABI. These hard-coded values are always near the start of the GDT,
43 * so Xen places itself out of the way, at the far end of the GDT.
44 */
45#define FIRST_RESERVED_GDT_PAGE 14
46#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096)
47#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
48
49/*
50 * These flat segments are in the Xen-private section of every GDT. Since these
51 * are also present in the initial GDT, many OSes will be able to avoid
52 * installing their own GDT.
53 */
54#define FLAT_RING1_CS 0xe019 /* GDT index 259 */
55#define FLAT_RING1_DS 0xe021 /* GDT index 260 */
56#define FLAT_RING1_SS 0xe021 /* GDT index 260 */
57#define FLAT_RING3_CS 0xe02b /* GDT index 261 */
58#define FLAT_RING3_DS 0xe033 /* GDT index 262 */
59#define FLAT_RING3_SS 0xe033 /* GDT index 262 */
60
61#define FLAT_KERNEL_CS FLAT_RING1_CS
62#define FLAT_KERNEL_DS FLAT_RING1_DS
63#define FLAT_KERNEL_SS FLAT_RING1_SS
64#define FLAT_USER_CS FLAT_RING3_CS
65#define FLAT_USER_DS FLAT_RING3_DS
66#define FLAT_USER_SS FLAT_RING3_SS
67
68/* And the trap vector is... */
69#define TRAP_INSTR "int $0x82"
70
71/*
72 * Virtual addresses beyond this are not modifiable by guest OSes. The
73 * machine->physical mapping table starts at this address, read-only.
74 */
75#ifdef CONFIG_X86_PAE
76#define __HYPERVISOR_VIRT_START 0xF5800000
77#else
78#define __HYPERVISOR_VIRT_START 0xFC000000
79#endif
80
81#ifndef HYPERVISOR_VIRT_START
82#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
83#endif
84
85#ifndef machine_to_phys_mapping
86#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
87#endif
88
89/* Maximum number of virtual CPUs in multi-processor guests. */
90#define MAX_VIRT_CPUS 32
91
92#ifndef __ASSEMBLY__
93
94/*
95 * Send an array of these to HYPERVISOR_set_trap_table()
96 */
97#define TI_GET_DPL(_ti) ((_ti)->flags & 3)
98#define TI_GET_IF(_ti) ((_ti)->flags & 4)
99#define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl))
100#define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2))
101
102struct trap_info {
103 uint8_t vector; /* exception vector */
104 uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
105 uint16_t cs; /* code selector */
106 unsigned long address; /* code offset */
107};
108DEFINE_GUEST_HANDLE_STRUCT(trap_info);
109
110struct cpu_user_regs {
111 uint32_t ebx;
112 uint32_t ecx;
113 uint32_t edx;
114 uint32_t esi;
115 uint32_t edi;
116 uint32_t ebp;
117 uint32_t eax;
118 uint16_t error_code; /* private */
119 uint16_t entry_vector; /* private */
120 uint32_t eip;
121 uint16_t cs;
122 uint8_t saved_upcall_mask;
123 uint8_t _pad0;
124 uint32_t eflags; /* eflags.IF == !saved_upcall_mask */
125 uint32_t esp;
126 uint16_t ss, _pad1;
127 uint16_t es, _pad2;
128 uint16_t ds, _pad3;
129 uint16_t fs, _pad4;
130 uint16_t gs, _pad5;
131};
132DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
133
134typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
135
136/*
137 * The following is all CPU context. Note that the fpu_ctxt block is filled
138 * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
139 */
140struct vcpu_guest_context {
141 /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
142 struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */
143#define VGCF_I387_VALID (1<<0)
144#define VGCF_HVM_GUEST (1<<1)
145#define VGCF_IN_KERNEL (1<<2)
146 unsigned long flags; /* VGCF_* flags */
147 struct cpu_user_regs user_regs; /* User-level CPU registers */
148 struct trap_info trap_ctxt[256]; /* Virtual IDT */
149 unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
150 unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
151 unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
152 unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
153 unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
154 unsigned long event_callback_cs; /* CS:EIP of event callback */
155 unsigned long event_callback_eip;
156 unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
157 unsigned long failsafe_callback_eip;
158 unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
159};
160DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
161
162struct arch_shared_info {
163 unsigned long max_pfn; /* max pfn that appears in table */
164 /* Frame containing list of mfns containing list of mfns containing p2m. */
165 unsigned long pfn_to_mfn_frame_list_list;
166 unsigned long nmi_reason;
167};
168
169struct arch_vcpu_info {
170 unsigned long cr2;
171 unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
172};
173
174#endif /* !__ASSEMBLY__ */
175
176/*
177 * Prefix forces emulation of some non-trapping instructions.
178 * Currently only CPUID.
179 */
180#ifdef __ASSEMBLY__
181#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
182#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
183#else
184#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
185#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
186#endif
187
188#endif
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index 1cc3f9cb6f4e..cc6d87228258 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -308,6 +308,7 @@ COMPAT_SYS_SPU(move_pages)
308SYSCALL_SPU(getcpu) 308SYSCALL_SPU(getcpu)
309COMPAT_SYS(epoll_pwait) 309COMPAT_SYS(epoll_pwait)
310COMPAT_SYS_SPU(utimensat) 310COMPAT_SYS_SPU(utimensat)
311COMPAT_SYS(fallocate)
311COMPAT_SYS_SPU(signalfd) 312COMPAT_SYS_SPU(signalfd)
312COMPAT_SYS_SPU(timerfd) 313COMPAT_SYS_SPU(timerfd)
313SYSCALL_SPU(eventfd) 314SYSCALL_SPU(eventfd)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index f71c6061f1ec..97d82b6a9406 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -331,10 +331,11 @@
331#define __NR_timerfd 306 331#define __NR_timerfd 306
332#define __NR_eventfd 307 332#define __NR_eventfd 307
333#define __NR_sync_file_range2 308 333#define __NR_sync_file_range2 308
334#define __NR_fallocate 309
334 335
335#ifdef __KERNEL__ 336#ifdef __KERNEL__
336 337
337#define __NR_syscalls 309 338#define __NR_syscalls 310
338 339
339#define __NR__exit __NR_exit 340#define __NR__exit __NR_exit
340#define NR_syscalls __NR_syscalls 341#define NR_syscalls __NR_syscalls
diff --git a/include/asm-sparc64/io.h b/include/asm-sparc64/io.h
index ad595b679842..9565a892801e 100644
--- a/include/asm-sparc64/io.h
+++ b/include/asm-sparc64/io.h
@@ -14,11 +14,6 @@
14#define __SLOW_DOWN_IO do { } while (0) 14#define __SLOW_DOWN_IO do { } while (0)
15#define SLOW_DOWN_IO do { } while (0) 15#define SLOW_DOWN_IO do { } while (0)
16 16
17extern unsigned long virt_to_bus_not_defined_use_pci_map(volatile void *addr);
18#define virt_to_bus virt_to_bus_not_defined_use_pci_map
19extern unsigned long bus_to_virt_not_defined_use_pci_map(volatile void *addr);
20#define bus_to_virt bus_to_virt_not_defined_use_pci_map
21
22/* BIO layer definitions. */ 17/* BIO layer definitions. */
23extern unsigned long kern_base, kern_size; 18extern unsigned long kern_base, kern_size;
24#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) 19#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h
index e97c43133752..1acc7272e537 100644
--- a/include/asm-sparc64/mdesc.h
+++ b/include/asm-sparc64/mdesc.h
@@ -61,6 +61,16 @@ extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc);
61 61
62extern void mdesc_update(void); 62extern void mdesc_update(void);
63 63
64struct mdesc_notifier_client {
65 void (*add)(struct mdesc_handle *handle, u64 node);
66 void (*remove)(struct mdesc_handle *handle, u64 node);
67
68 const char *node_name;
69 struct mdesc_notifier_client *next;
70};
71
72extern void mdesc_register_notifier(struct mdesc_notifier_client *client);
73
64extern void mdesc_fill_in_cpu_data(cpumask_t mask); 74extern void mdesc_fill_in_cpu_data(cpumask_t mask);
65 75
66extern void sun4v_mdesc_init(void); 76extern void sun4v_mdesc_init(void);
diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h
index 83c96422e9d6..c0a8d4ed5bcb 100644
--- a/include/asm-sparc64/vio.h
+++ b/include/asm-sparc64/vio.h
@@ -264,7 +264,7 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr,
264 ((dr->prod - dr->cons) & (ring_size - 1))); 264 ((dr->prod - dr->cons) & (ring_size - 1)));
265} 265}
266 266
267#define VIO_MAX_TYPE_LEN 64 267#define VIO_MAX_TYPE_LEN 32
268#define VIO_MAX_COMPAT_LEN 64 268#define VIO_MAX_COMPAT_LEN 64
269 269
270struct vio_dev { 270struct vio_dev {
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 8696f8ad401e..fc4e73f5f1fa 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -630,6 +630,8 @@ __SYSCALL(__NR_signalfd, sys_signalfd)
630__SYSCALL(__NR_timerfd, sys_timerfd) 630__SYSCALL(__NR_timerfd, sys_timerfd)
631#define __NR_eventfd 284 631#define __NR_eventfd 284
632__SYSCALL(__NR_eventfd, sys_eventfd) 632__SYSCALL(__NR_eventfd, sys_eventfd)
633#define __NR_fallocate 285
634__SYSCALL(__NR_fallocate, sys_fallocate)
633 635
634#ifndef __NO_STUBS 636#ifndef __NO_STUBS
635#define __ARCH_WANT_OLD_READDIR 637#define __ARCH_WANT_OLD_READDIR
diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
index 9a1e0674e56c..e831759b2fb5 100644
--- a/include/linux/elfnote.h
+++ b/include/linux/elfnote.h
@@ -38,17 +38,25 @@
38 * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") 38 * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
39 * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) 39 * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
40 */ 40 */
41#define ELFNOTE(name, type, desctype, descdata) \ 41#define ELFNOTE_START(name, type, flags) \
42.pushsection .note.name, "",@note ; \ 42.pushsection .note.name, flags,@note ; \
43 .align 4 ; \ 43 .balign 4 ; \
44 .long 2f - 1f /* namesz */ ; \ 44 .long 2f - 1f /* namesz */ ; \
45 .long 4f - 3f /* descsz */ ; \ 45 .long 4484f - 3f /* descsz */ ; \
46 .long type ; \ 46 .long type ; \
471:.asciz #name ; \ 471:.asciz #name ; \
482:.align 4 ; \ 482:.balign 4 ; \
493:desctype descdata ; \ 493:
504:.align 4 ; \ 50
51#define ELFNOTE_END \
524484:.balign 4 ; \
51.popsection ; 53.popsection ;
54
55#define ELFNOTE(name, type, desc) \
56 ELFNOTE_START(name, type, "") \
57 desc ; \
58 ELFNOTE_END
59
52#else /* !__ASSEMBLER__ */ 60#else /* !__ASSEMBLER__ */
53#include <linux/elf.h> 61#include <linux/elf.h>
54/* 62/*
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index de1f9f78625a..cdee7aaa57aa 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -71,7 +71,7 @@
71/* 71/*
72 * Maximal count of links to a file 72 * Maximal count of links to a file
73 */ 73 */
74#define EXT4_LINK_MAX 32000 74#define EXT4_LINK_MAX 65000
75 75
76/* 76/*
77 * Macro-instructions used to manage several block sizes 77 * Macro-instructions used to manage several block sizes
@@ -102,6 +102,7 @@
102 EXT4_GOOD_OLD_FIRST_INO : \ 102 EXT4_GOOD_OLD_FIRST_INO : \
103 (s)->s_first_ino) 103 (s)->s_first_ino)
104#endif 104#endif
105#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
105 106
106/* 107/*
107 * Macro-instructions used to manage fragments 108 * Macro-instructions used to manage fragments
@@ -201,6 +202,7 @@ struct ext4_group_desc
201#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ 202#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
202#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ 203#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
203#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ 204#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
205#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
204 206
205/* Used to pass group descriptor data when online resize is done */ 207/* Used to pass group descriptor data when online resize is done */
206struct ext4_new_group_input { 208struct ext4_new_group_input {
@@ -225,6 +227,11 @@ struct ext4_new_group_data {
225 __u32 free_blocks_count; 227 __u32 free_blocks_count;
226}; 228};
227 229
230/*
231 * Following is used by preallocation code to tell get_blocks() that we
232 * want uninitialzed extents.
233 */
234#define EXT4_CREATE_UNINITIALIZED_EXT 2
228 235
229/* 236/*
230 * ioctl commands 237 * ioctl commands
@@ -237,7 +244,7 @@ struct ext4_new_group_data {
237#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) 244#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
238#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION 245#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
239#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION 246#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
240#ifdef CONFIG_JBD_DEBUG 247#ifdef CONFIG_JBD2_DEBUG
241#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) 248#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
242#endif 249#endif
243#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) 250#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
@@ -253,7 +260,7 @@ struct ext4_new_group_data {
253#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) 260#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
254#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) 261#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
255#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) 262#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
256#ifdef CONFIG_JBD_DEBUG 263#ifdef CONFIG_JBD2_DEBUG
257#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) 264#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
258#endif 265#endif
259#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION 266#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
@@ -282,7 +289,7 @@ struct ext4_inode {
282 __le16 i_uid; /* Low 16 bits of Owner Uid */ 289 __le16 i_uid; /* Low 16 bits of Owner Uid */
283 __le32 i_size; /* Size in bytes */ 290 __le32 i_size; /* Size in bytes */
284 __le32 i_atime; /* Access time */ 291 __le32 i_atime; /* Access time */
285 __le32 i_ctime; /* Creation time */ 292 __le32 i_ctime; /* Inode Change time */
286 __le32 i_mtime; /* Modification time */ 293 __le32 i_mtime; /* Modification time */
287 __le32 i_dtime; /* Deletion Time */ 294 __le32 i_dtime; /* Deletion Time */
288 __le16 i_gid; /* Low 16 bits of Group Id */ 295 __le16 i_gid; /* Low 16 bits of Group Id */
@@ -331,10 +338,85 @@ struct ext4_inode {
331 } osd2; /* OS dependent 2 */ 338 } osd2; /* OS dependent 2 */
332 __le16 i_extra_isize; 339 __le16 i_extra_isize;
333 __le16 i_pad1; 340 __le16 i_pad1;
341 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
342 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
343 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
344 __le32 i_crtime; /* File Creation time */
345 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
334}; 346};
335 347
336#define i_size_high i_dir_acl 348#define i_size_high i_dir_acl
337 349
350#define EXT4_EPOCH_BITS 2
351#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
352#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
353
354/*
355 * Extended fields will fit into an inode if the filesystem was formatted
356 * with large inodes (-I 256 or larger) and there are not currently any EAs
357 * consuming all of the available space. For new inodes we always reserve
358 * enough space for the kernel's known extended fields, but for inodes
359 * created with an old kernel this might not have been the case. None of
360 * the extended inode fields is critical for correct filesystem operation.
361 * This macro checks if a certain field fits in the inode. Note that
362 * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
363 */
364#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
365 ((offsetof(typeof(*ext4_inode), field) + \
366 sizeof((ext4_inode)->field)) \
367 <= (EXT4_GOOD_OLD_INODE_SIZE + \
368 (einode)->i_extra_isize)) \
369
370static inline __le32 ext4_encode_extra_time(struct timespec *time)
371{
372 return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
373 time->tv_sec >> 32 : 0) |
374 ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
375}
376
377static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
378{
379 if (sizeof(time->tv_sec) > 4)
380 time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
381 << 32;
382 time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
383}
384
385#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
386do { \
387 (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
388 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
389 (raw_inode)->xtime ## _extra = \
390 ext4_encode_extra_time(&(inode)->xtime); \
391} while (0)
392
393#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
394do { \
395 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
396 (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
397 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
398 (raw_inode)->xtime ## _extra = \
399 ext4_encode_extra_time(&(einode)->xtime); \
400} while (0)
401
402#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
403do { \
404 (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
405 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
406 ext4_decode_extra_time(&(inode)->xtime, \
407 raw_inode->xtime ## _extra); \
408} while (0)
409
410#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
411do { \
412 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
413 (einode)->xtime.tv_sec = \
414 (signed)le32_to_cpu((raw_inode)->xtime); \
415 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
416 ext4_decode_extra_time(&(einode)->xtime, \
417 raw_inode->xtime ## _extra); \
418} while (0)
419
338#if defined(__KERNEL__) || defined(__linux__) 420#if defined(__KERNEL__) || defined(__linux__)
339#define i_reserved1 osd1.linux1.l_i_reserved1 421#define i_reserved1 osd1.linux1.l_i_reserved1
340#define i_frag osd2.linux2.l_i_frag 422#define i_frag osd2.linux2.l_i_frag
@@ -533,6 +615,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
533 return container_of(inode, struct ext4_inode_info, vfs_inode); 615 return container_of(inode, struct ext4_inode_info, vfs_inode);
534} 616}
535 617
618static inline struct timespec ext4_current_time(struct inode *inode)
619{
620 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
621 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
622}
623
624
536static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) 625static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
537{ 626{
538 return ino == EXT4_ROOT_INO || 627 return ino == EXT4_ROOT_INO ||
@@ -603,6 +692,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
603#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 692#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
604#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 693#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
605#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 694#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
695#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
696#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
606 697
607#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 698#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
608#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 699#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -620,6 +711,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
620 EXT4_FEATURE_INCOMPAT_64BIT) 711 EXT4_FEATURE_INCOMPAT_64BIT)
621#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 712#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
622 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 713 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
714 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
715 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
623 EXT4_FEATURE_RO_COMPAT_BTREE_DIR) 716 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
624 717
625/* 718/*
@@ -862,6 +955,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
862extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); 955extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
863extern void ext4_truncate (struct inode *); 956extern void ext4_truncate (struct inode *);
864extern void ext4_set_inode_flags(struct inode *); 957extern void ext4_set_inode_flags(struct inode *);
958extern void ext4_get_inode_flags(struct ext4_inode_info *);
865extern void ext4_set_aops(struct inode *inode); 959extern void ext4_set_aops(struct inode *inode);
866extern int ext4_writepage_trans_blocks(struct inode *); 960extern int ext4_writepage_trans_blocks(struct inode *);
867extern int ext4_block_truncate_page(handle_t *handle, struct page *page, 961extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
@@ -983,6 +1077,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
983extern void ext4_ext_truncate(struct inode *, struct page *); 1077extern void ext4_ext_truncate(struct inode *, struct page *);
984extern void ext4_ext_init(struct super_block *); 1078extern void ext4_ext_init(struct super_block *);
985extern void ext4_ext_release(struct super_block *); 1079extern void ext4_ext_release(struct super_block *);
1080extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1081 loff_t len);
986static inline int 1082static inline int
987ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, 1083ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
988 unsigned long max_blocks, struct buffer_head *bh, 1084 unsigned long max_blocks, struct buffer_head *bh,
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index acfe59740b03..81406f3655d4 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
141 141
142#define EXT_MAX_BLOCK 0xffffffff 142#define EXT_MAX_BLOCK 0xffffffff
143 143
144#define EXT_MAX_LEN ((1UL << 15) - 1) 144/*
145 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
146 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
147 * MSB of ee_len field in the extent datastructure to signify if this
148 * particular extent is an initialized extent or an uninitialized (i.e.
149 * preallocated).
150 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
151 * uninitialized extent.
152 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
153 * uninitialized one. In other words, if MSB of ee_len is set, it is an
154 * uninitialized extent with only one special scenario when ee_len = 0x8000.
155 * In this case we can not have an uninitialized extent of zero length and
156 * thus we make it as a special case of initialized extent with 0x8000 length.
157 * This way we get better extent-to-group alignment for initialized extents.
158 * Hence, the maximum number of blocks we can have in an *initialized*
159 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
160 */
161#define EXT_INIT_MAX_LEN (1UL << 15)
162#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
145 163
146 164
147#define EXT_FIRST_EXTENT(__hdr__) \ 165#define EXT_FIRST_EXTENT(__hdr__) \
@@ -188,8 +206,31 @@ ext4_ext_invalidate_cache(struct inode *inode)
188 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; 206 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
189} 207}
190 208
209static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
210{
211 /* We can not have an uninitialized extent of zero length! */
212 BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
213 ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
214}
215
216static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
217{
218 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
219 return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
220}
221
222static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
223{
224 return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
225 le16_to_cpu(ext->ee_len) :
226 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
227}
228
191extern int ext4_extent_tree_init(handle_t *, struct inode *); 229extern int ext4_extent_tree_init(handle_t *, struct inode *);
192extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); 230extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
231extern int ext4_ext_try_to_merge(struct inode *inode,
232 struct ext4_ext_path *path,
233 struct ext4_extent *);
193extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); 234extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
194extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); 235extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
195extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); 236extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 9de494406995..1a511e9905aa 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -153,6 +153,11 @@ struct ext4_inode_info {
153 153
154 unsigned long i_ext_generation; 154 unsigned long i_ext_generation;
155 struct ext4_ext_cache i_cached_extent; 155 struct ext4_ext_cache i_cached_extent;
156 /*
157 * File creation time. Its function is same as that of
158 * struct timespec i_{a,c,m}time in the generic inode.
159 */
160 struct timespec i_crtime;
156}; 161};
157 162
158#endif /* _LINUX_EXT4_FS_I */ 163#endif /* _LINUX_EXT4_FS_I */
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index 2347557a327a..1b2ffee12be9 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -73,7 +73,7 @@ struct ext4_sb_info {
73 struct list_head s_orphan; 73 struct list_head s_orphan;
74 unsigned long s_commit_interval; 74 unsigned long s_commit_interval;
75 struct block_device *journal_bdev; 75 struct block_device *journal_bdev;
76#ifdef CONFIG_JBD_DEBUG 76#ifdef CONFIG_JBD2_DEBUG
77 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ 77 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
78 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ 78 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
79#endif 79#endif
@@ -81,6 +81,7 @@ struct ext4_sb_info {
81 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ 81 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
82 int s_jquota_fmt; /* Format of quota to use */ 82 int s_jquota_fmt; /* Format of quota to use */
83#endif 83#endif
84 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
84 85
85#ifdef EXTENTS_STATS 86#ifdef EXTENTS_STATS
86 /* ext4 extents stats */ 87 /* ext4 extents stats */
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
new file mode 100644
index 000000000000..8e912ab6a072
--- /dev/null
+++ b/include/linux/falloc.h
@@ -0,0 +1,6 @@
1#ifndef _FALLOC_H_
2#define _FALLOC_H_
3
4#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
5
6#endif /* _FALLOC_H_ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98205f680476..0b806c5e32eb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1147,6 +1147,8 @@ struct inode_operations {
1147 ssize_t (*listxattr) (struct dentry *, char *, size_t); 1147 ssize_t (*listxattr) (struct dentry *, char *, size_t);
1148 int (*removexattr) (struct dentry *, const char *); 1148 int (*removexattr) (struct dentry *, const char *);
1149 void (*truncate_range)(struct inode *, loff_t, loff_t); 1149 void (*truncate_range)(struct inode *, loff_t, loff_t);
1150 long (*fallocate)(struct inode *inode, int mode, loff_t offset,
1151 loff_t len);
1150}; 1152};
1151 1153
1152struct seq_file; 1154struct seq_file;
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0e0fedd2039a..260d6d76c5f3 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -50,14 +50,14 @@
50 */ 50 */
51#define JBD_DEFAULT_MAX_COMMIT_AGE 5 51#define JBD_DEFAULT_MAX_COMMIT_AGE 5
52 52
53#ifdef CONFIG_JBD_DEBUG 53#ifdef CONFIG_JBD2_DEBUG
54/* 54/*
55 * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal 55 * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
56 * consistency checks. By default we don't do this unless 56 * consistency checks. By default we don't do this unless
57 * CONFIG_JBD_DEBUG is on. 57 * CONFIG_JBD2_DEBUG is on.
58 */ 58 */
59#define JBD_EXPENSIVE_CHECKING 59#define JBD_EXPENSIVE_CHECKING
60extern int jbd2_journal_enable_debug; 60extern u8 jbd2_journal_enable_debug;
61 61
62#define jbd_debug(n, f, a...) \ 62#define jbd_debug(n, f, a...) \
63 do { \ 63 do { \
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 10f505c8431d..5dc13848891b 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -36,13 +36,57 @@ static inline int request_module(const char * name, ...) { return -ENOSYS; }
36#define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x))) 36#define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x)))
37 37
38struct key; 38struct key;
39extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[], 39struct file;
40 struct key *session_keyring, int wait); 40struct subprocess_info;
41
42/* Allocate a subprocess_info structure */
43struct subprocess_info *call_usermodehelper_setup(char *path,
44 char **argv, char **envp);
45
46/* Set various pieces of state into the subprocess_info structure */
47void call_usermodehelper_setkeys(struct subprocess_info *info,
48 struct key *session_keyring);
49int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
50 struct file **filp);
51void call_usermodehelper_setcleanup(struct subprocess_info *info,
52 void (*cleanup)(char **argv, char **envp));
53
54enum umh_wait {
55 UMH_NO_WAIT = -1, /* don't wait at all */
56 UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */
57 UMH_WAIT_PROC = 1, /* wait for the process to complete */
58};
59
60/* Actually execute the sub-process */
61int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait);
62
63/* Free the subprocess_info. This is only needed if you're not going
64 to call call_usermodehelper_exec */
65void call_usermodehelper_freeinfo(struct subprocess_info *info);
41 66
42static inline int 67static inline int
43call_usermodehelper(char *path, char **argv, char **envp, int wait) 68call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
44{ 69{
45 return call_usermodehelper_keys(path, argv, envp, NULL, wait); 70 struct subprocess_info *info;
71
72 info = call_usermodehelper_setup(path, argv, envp);
73 if (info == NULL)
74 return -ENOMEM;
75 return call_usermodehelper_exec(info, wait);
76}
77
78static inline int
79call_usermodehelper_keys(char *path, char **argv, char **envp,
80 struct key *session_keyring, enum umh_wait wait)
81{
82 struct subprocess_info *info;
83
84 info = call_usermodehelper_setup(path, argv, envp);
85 if (info == NULL)
86 return -ENOMEM;
87
88 call_usermodehelper_setkeys(info, session_keyring);
89 return call_usermodehelper_exec(info, wait);
46} 90}
47 91
48extern void usermodehelper_init(void); 92extern void usermodehelper_init(void);
diff --git a/include/linux/major.h b/include/linux/major.h
index 7e7c9093919a..0cb98053537a 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h
@@ -158,6 +158,8 @@
158#define VXSPEC_MAJOR 200 /* VERITAS volume config driver */ 158#define VXSPEC_MAJOR 200 /* VERITAS volume config driver */
159#define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */ 159#define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */
160 160
161#define XENVBD_MAJOR 202 /* Xen virtual block device */
162
161#define MSR_MAJOR 202 163#define MSR_MAJOR 202
162#define CPUID_MAJOR 203 164#define CPUID_MAJOR 203
163 165
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index da7a13c97eb8..9820ca1e45e2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1098,10 +1098,8 @@ extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all
1098extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); 1098extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
1099extern int dev_mc_sync(struct net_device *to, struct net_device *from); 1099extern int dev_mc_sync(struct net_device *to, struct net_device *from);
1100extern void dev_mc_unsync(struct net_device *to, struct net_device *from); 1100extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
1101extern void dev_mc_discard(struct net_device *dev);
1102extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); 1101extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
1103extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); 1102extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
1104extern void __dev_addr_discard(struct dev_addr_list **list);
1105extern void dev_set_promiscuity(struct net_device *dev, int inc); 1103extern void dev_set_promiscuity(struct net_device *dev, int inc);
1106extern void dev_set_allmulti(struct net_device *dev, int inc); 1104extern void dev_set_allmulti(struct net_device *dev, int inc);
1107extern void netdev_state_change(struct net_device *dev); 1105extern void netdev_state_change(struct net_device *dev);
diff --git a/include/linux/netfilter_ipv4/ipt_iprange.h b/include/linux/netfilter_ipv4/ipt_iprange.h
index 34ab0fb736e2..a92fefc3c7ec 100644
--- a/include/linux/netfilter_ipv4/ipt_iprange.h
+++ b/include/linux/netfilter_ipv4/ipt_iprange.h
@@ -1,6 +1,8 @@
1#ifndef _IPT_IPRANGE_H 1#ifndef _IPT_IPRANGE_H
2#define _IPT_IPRANGE_H 2#define _IPT_IPRANGE_H
3 3
4#include <linux/types.h>
5
4#define IPRANGE_SRC 0x01 /* Match source IP address */ 6#define IPRANGE_SRC 0x01 /* Match source IP address */
5#define IPRANGE_DST 0x02 /* Match destination IP address */ 7#define IPRANGE_DST 0x02 /* Match destination IP address */
6#define IPRANGE_SRC_INV 0x10 /* Negate the condition */ 8#define IPRANGE_SRC_INV 0x10 /* Negate the condition */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index ae2d79f2107e..731cd2ac3227 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -92,6 +92,7 @@
92 92
93/* PG_owner_priv_1 users should have descriptive aliases */ 93/* PG_owner_priv_1 users should have descriptive aliases */
94#define PG_checked PG_owner_priv_1 /* Used by some filesystems */ 94#define PG_checked PG_owner_priv_1 /* Used by some filesystems */
95#define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */
95 96
96#if (BITS_PER_LONG > 32) 97#if (BITS_PER_LONG > 32)
97/* 98/*
@@ -170,6 +171,10 @@ static inline void SetPageUptodate(struct page *page)
170#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) 171#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
171#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) 172#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags)
172 173
174#define PagePinned(page) test_bit(PG_pinned, &(page)->flags)
175#define SetPagePinned(page) set_bit(PG_pinned, &(page)->flags)
176#define ClearPagePinned(page) clear_bit(PG_pinned, &(page)->flags)
177
173#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) 178#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
174#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) 179#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags)
175#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) 180#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags)
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 1dd1c707311f..85ea63f462af 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -67,6 +67,11 @@ extern void kernel_power_off(void);
67 67
68void ctrl_alt_del(void); 68void ctrl_alt_del(void);
69 69
70#define POWEROFF_CMD_PATH_LEN 256
71extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN];
72
73extern int orderly_poweroff(bool force);
74
70/* 75/*
71 * Emergency restart, callable from an interrupt handler. 76 * Emergency restart, callable from an interrupt handler.
72 */ 77 */
diff --git a/include/linux/string.h b/include/linux/string.h
index 7f2eb6a477f9..836062b7582a 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -105,8 +105,12 @@ extern void * memchr(const void *,int,__kernel_size_t);
105#endif 105#endif
106 106
107extern char *kstrdup(const char *s, gfp_t gfp); 107extern char *kstrdup(const char *s, gfp_t gfp);
108extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
108extern void *kmemdup(const void *src, size_t len, gfp_t gfp); 109extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
109 110
111extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
112extern void argv_free(char **argv);
113
110#ifdef __cplusplus 114#ifdef __cplusplus
111} 115}
112#endif 116#endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 83d0ec11235e..7a8b1e3322e0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
610asmlinkage long sys_timerfd(int ufd, int clockid, int flags, 610asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
611 const struct itimerspec __user *utmr); 611 const struct itimerspec __user *utmr);
612asmlinkage long sys_eventfd(unsigned int count); 612asmlinkage long sys_eventfd(unsigned int count);
613asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
613 614
614int kernel_execve(const char *filename, char *const argv[], char *const envp[]); 615int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
615 616
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 132b260aef1e..c2b10cae5da5 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -70,6 +70,10 @@ extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
70 struct page ***pages); 70 struct page ***pages);
71extern void unmap_kernel_range(unsigned long addr, unsigned long size); 71extern void unmap_kernel_range(unsigned long addr, unsigned long size);
72 72
73/* Allocate/destroy a 'vmalloc' VM area. */
74extern struct vm_struct *alloc_vm_area(size_t size);
75extern void free_vm_area(struct vm_struct *area);
76
73/* 77/*
74 * Internals. Dont't use.. 78 * Internals. Dont't use..
75 */ 79 */
diff --git a/include/mtd/ubi-header.h b/include/mtd/ubi-header.h
index fa479c71aa34..74efa7763479 100644
--- a/include/mtd/ubi-header.h
+++ b/include/mtd/ubi-header.h
@@ -74,42 +74,13 @@ enum {
74 UBI_COMPAT_REJECT = 5 74 UBI_COMPAT_REJECT = 5
75}; 75};
76 76
77/*
78 * ubi16_t/ubi32_t/ubi64_t - 16, 32, and 64-bit integers used in UBI on-flash
79 * data structures.
80 */
81typedef struct {
82 uint16_t int16;
83} __attribute__ ((packed)) ubi16_t;
84
85typedef struct {
86 uint32_t int32;
87} __attribute__ ((packed)) ubi32_t;
88
89typedef struct {
90 uint64_t int64;
91} __attribute__ ((packed)) ubi64_t;
92
93/*
94 * In this implementation of UBI uses the big-endian format for on-flash
95 * integers. The below are the corresponding conversion macros.
96 */
97#define cpu_to_ubi16(x) ((ubi16_t){__cpu_to_be16(x)})
98#define ubi16_to_cpu(x) ((uint16_t)__be16_to_cpu((x).int16))
99
100#define cpu_to_ubi32(x) ((ubi32_t){__cpu_to_be32(x)})
101#define ubi32_to_cpu(x) ((uint32_t)__be32_to_cpu((x).int32))
102
103#define cpu_to_ubi64(x) ((ubi64_t){__cpu_to_be64(x)})
104#define ubi64_to_cpu(x) ((uint64_t)__be64_to_cpu((x).int64))
105
106/* Sizes of UBI headers */ 77/* Sizes of UBI headers */
107#define UBI_EC_HDR_SIZE sizeof(struct ubi_ec_hdr) 78#define UBI_EC_HDR_SIZE sizeof(struct ubi_ec_hdr)
108#define UBI_VID_HDR_SIZE sizeof(struct ubi_vid_hdr) 79#define UBI_VID_HDR_SIZE sizeof(struct ubi_vid_hdr)
109 80
110/* Sizes of UBI headers without the ending CRC */ 81/* Sizes of UBI headers without the ending CRC */
111#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(ubi32_t)) 82#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(__be32))
112#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(ubi32_t)) 83#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(__be32))
113 84
114/** 85/**
115 * struct ubi_ec_hdr - UBI erase counter header. 86 * struct ubi_ec_hdr - UBI erase counter header.
@@ -137,14 +108,14 @@ typedef struct {
137 * eraseblocks. 108 * eraseblocks.
138 */ 109 */
139struct ubi_ec_hdr { 110struct ubi_ec_hdr {
140 ubi32_t magic; 111 __be32 magic;
141 uint8_t version; 112 __u8 version;
142 uint8_t padding1[3]; 113 __u8 padding1[3];
143 ubi64_t ec; /* Warning: the current limit is 31-bit anyway! */ 114 __be64 ec; /* Warning: the current limit is 31-bit anyway! */
144 ubi32_t vid_hdr_offset; 115 __be32 vid_hdr_offset;
145 ubi32_t data_offset; 116 __be32 data_offset;
146 uint8_t padding2[36]; 117 __u8 padding2[36];
147 ubi32_t hdr_crc; 118 __be32 hdr_crc;
148} __attribute__ ((packed)); 119} __attribute__ ((packed));
149 120
150/** 121/**
@@ -262,22 +233,22 @@ struct ubi_ec_hdr {
262 * software (say, cramfs) on top of the UBI volume. 233 * software (say, cramfs) on top of the UBI volume.
263 */ 234 */
264struct ubi_vid_hdr { 235struct ubi_vid_hdr {
265 ubi32_t magic; 236 __be32 magic;
266 uint8_t version; 237 __u8 version;
267 uint8_t vol_type; 238 __u8 vol_type;
268 uint8_t copy_flag; 239 __u8 copy_flag;
269 uint8_t compat; 240 __u8 compat;
270 ubi32_t vol_id; 241 __be32 vol_id;
271 ubi32_t lnum; 242 __be32 lnum;
272 ubi32_t leb_ver; /* obsolete, to be removed, don't use */ 243 __be32 leb_ver; /* obsolete, to be removed, don't use */
273 ubi32_t data_size; 244 __be32 data_size;
274 ubi32_t used_ebs; 245 __be32 used_ebs;
275 ubi32_t data_pad; 246 __be32 data_pad;
276 ubi32_t data_crc; 247 __be32 data_crc;
277 uint8_t padding1[4]; 248 __u8 padding1[4];
278 ubi64_t sqnum; 249 __be64 sqnum;
279 uint8_t padding2[12]; 250 __u8 padding2[12];
280 ubi32_t hdr_crc; 251 __be32 hdr_crc;
281} __attribute__ ((packed)); 252} __attribute__ ((packed));
282 253
283/* Internal UBI volumes count */ 254/* Internal UBI volumes count */
@@ -306,7 +277,7 @@ struct ubi_vid_hdr {
306#define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record) 277#define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record)
307 278
308/* Size of the volume table record without the ending CRC */ 279/* Size of the volume table record without the ending CRC */
309#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(ubi32_t)) 280#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(__be32))
310 281
311/** 282/**
312 * struct ubi_vtbl_record - a record in the volume table. 283 * struct ubi_vtbl_record - a record in the volume table.
@@ -346,15 +317,15 @@ struct ubi_vid_hdr {
346 * Empty records contain all zeroes and the CRC checksum of those zeroes. 317 * Empty records contain all zeroes and the CRC checksum of those zeroes.
347 */ 318 */
348struct ubi_vtbl_record { 319struct ubi_vtbl_record {
349 ubi32_t reserved_pebs; 320 __be32 reserved_pebs;
350 ubi32_t alignment; 321 __be32 alignment;
351 ubi32_t data_pad; 322 __be32 data_pad;
352 uint8_t vol_type; 323 __u8 vol_type;
353 uint8_t upd_marker; 324 __u8 upd_marker;
354 ubi16_t name_len; 325 __be16 name_len;
355 uint8_t name[UBI_VOL_NAME_MAX+1]; 326 __u8 name[UBI_VOL_NAME_MAX+1];
356 uint8_t padding2[24]; 327 __u8 padding2[24];
357 ubi32_t crc; 328 __be32 crc;
358} __attribute__ ((packed)); 329} __attribute__ ((packed));
359 330
360#endif /* !__UBI_HEADER_H__ */ 331#endif /* !__UBI_HEADER_H__ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a8af9ae00177..8b404b1ef7c8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -652,8 +652,7 @@ struct tcp_congestion_ops {
652 /* lower bound for congestion window (optional) */ 652 /* lower bound for congestion window (optional) */
653 u32 (*min_cwnd)(const struct sock *sk); 653 u32 (*min_cwnd)(const struct sock *sk);
654 /* do new cwnd calculation (required) */ 654 /* do new cwnd calculation (required) */
655 void (*cong_avoid)(struct sock *sk, u32 ack, 655 void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight, int good_ack);
656 u32 rtt, u32 in_flight, int good_ack);
657 /* call before changing ca_state (optional) */ 656 /* call before changing ca_state (optional) */
658 void (*set_state)(struct sock *sk, u8 new_state); 657 void (*set_state)(struct sock *sk, u8 new_state);
659 /* call when cwnd event occurs (optional) */ 658 /* call when cwnd event occurs (optional) */
@@ -684,8 +683,7 @@ extern void tcp_slow_start(struct tcp_sock *tp);
684 683
685extern struct tcp_congestion_ops tcp_init_congestion_ops; 684extern struct tcp_congestion_ops tcp_init_congestion_ops;
686extern u32 tcp_reno_ssthresh(struct sock *sk); 685extern u32 tcp_reno_ssthresh(struct sock *sk);
687extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, 686extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag);
688 u32 rtt, u32 in_flight, int flag);
689extern u32 tcp_reno_min_cwnd(const struct sock *sk); 687extern u32 tcp_reno_min_cwnd(const struct sock *sk);
690extern struct tcp_congestion_ops tcp_reno; 688extern struct tcp_congestion_ops tcp_reno;
691 689
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index ae959e950174..a5f80bfbaaa4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -585,7 +585,6 @@ static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ct
585struct xfrm_dst 585struct xfrm_dst
586{ 586{
587 union { 587 union {
588 struct xfrm_dst *next;
589 struct dst_entry dst; 588 struct dst_entry dst;
590 struct rtable rt; 589 struct rtable rt;
591 struct rt6_info rt6; 590 struct rt6_info rt6;
diff --git a/include/xen/events.h b/include/xen/events.h
new file mode 100644
index 000000000000..2bde54d29be5
--- /dev/null
+++ b/include/xen/events.h
@@ -0,0 +1,48 @@
1#ifndef _XEN_EVENTS_H
2#define _XEN_EVENTS_H
3
4#include <linux/interrupt.h>
5
6#include <xen/interface/event_channel.h>
7#include <asm/xen/hypercall.h>
8
9enum ipi_vector {
10 XEN_RESCHEDULE_VECTOR,
11 XEN_CALL_FUNCTION_VECTOR,
12
13 XEN_NR_IPIS,
14};
15
16int bind_evtchn_to_irq(unsigned int evtchn);
17int bind_evtchn_to_irqhandler(unsigned int evtchn,
18 irq_handler_t handler,
19 unsigned long irqflags, const char *devname,
20 void *dev_id);
21int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
22 irq_handler_t handler,
23 unsigned long irqflags, const char *devname,
24 void *dev_id);
25int bind_ipi_to_irqhandler(enum ipi_vector ipi,
26 unsigned int cpu,
27 irq_handler_t handler,
28 unsigned long irqflags,
29 const char *devname,
30 void *dev_id);
31
32/*
33 * Common unbind function for all event sources. Takes IRQ to unbind from.
34 * Automatically closes the underlying event channel (even for bindings
35 * made with bind_evtchn_to_irqhandler()).
36 */
37void unbind_from_irqhandler(unsigned int irq, void *dev_id);
38
39void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
40
41static inline void notify_remote_via_evtchn(int port)
42{
43 struct evtchn_send send = { .port = port };
44 (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
45}
46
47extern void notify_remote_via_irq(int irq);
48#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/features.h b/include/xen/features.h
new file mode 100644
index 000000000000..27292d4d2a6a
--- /dev/null
+++ b/include/xen/features.h
@@ -0,0 +1,23 @@
1/******************************************************************************
2 * features.h
3 *
4 * Query the features reported by Xen.
5 *
6 * Copyright (c) 2006, Ian Campbell
7 */
8
9#ifndef __XEN_FEATURES_H__
10#define __XEN_FEATURES_H__
11
12#include <xen/interface/features.h>
13
14void xen_setup_features(void);
15
16extern u8 xen_features[XENFEAT_NR_SUBMAPS * 32];
17
18static inline int xen_feature(int flag)
19{
20 return xen_features[flag];
21}
22
23#endif /* __ASM_XEN_FEATURES_H__ */
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
new file mode 100644
index 000000000000..761c83498e03
--- /dev/null
+++ b/include/xen/grant_table.h
@@ -0,0 +1,107 @@
1/******************************************************************************
2 * grant_table.h
3 *
4 * Two sets of functionality:
5 * 1. Granting foreign access to our memory reservation.
6 * 2. Accessing others' memory reservations via grant references.
7 * (i.e., mechanisms for both sender and recipient of grant references)
8 *
9 * Copyright (c) 2004-2005, K A Fraser
10 * Copyright (c) 2005, Christopher Clark
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE.
35 */
36
37#ifndef __ASM_GNTTAB_H__
38#define __ASM_GNTTAB_H__
39
40#include <asm/xen/hypervisor.h>
41#include <xen/interface/grant_table.h>
42
43/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
44#define NR_GRANT_FRAMES 4
45
46struct gnttab_free_callback {
47 struct gnttab_free_callback *next;
48 void (*fn)(void *);
49 void *arg;
50 u16 count;
51};
52
53int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
54 int readonly);
55
56/*
57 * End access through the given grant reference, iff the grant entry is no
58 * longer in use. Return 1 if the grant entry was freed, 0 if it is still in
59 * use.
60 */
61int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
62
63/*
64 * Eventually end access through the given grant reference, and once that
65 * access has been ended, free the given page too. Access will be ended
66 * immediately iff the grant entry is not in use, otherwise it will happen
67 * some time later. page may be 0, in which case no freeing will occur.
68 */
69void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
70 unsigned long page);
71
72int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
73
74unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
75unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
76
77int gnttab_query_foreign_access(grant_ref_t ref);
78
79/*
80 * operations on reserved batches of grant references
81 */
82int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
83
84void gnttab_free_grant_reference(grant_ref_t ref);
85
86void gnttab_free_grant_references(grant_ref_t head);
87
88int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
89
90int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
91
92void gnttab_release_grant_reference(grant_ref_t *private_head,
93 grant_ref_t release);
94
95void gnttab_request_free_callback(struct gnttab_free_callback *callback,
96 void (*fn)(void *), void *arg, u16 count);
97void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
98
99void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
100 unsigned long frame, int readonly);
101
102void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
103 unsigned long pfn);
104
105#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
106
107#endif /* __ASM_GNTTAB_H__ */
diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h
new file mode 100644
index 000000000000..21c0ecfd786d
--- /dev/null
+++ b/include/xen/hvc-console.h
@@ -0,0 +1,6 @@
1#ifndef XEN_HVC_CONSOLE_H
2#define XEN_HVC_CONSOLE_H
3
4extern struct console xenboot_console;
5
6#endif /* XEN_HVC_CONSOLE_H */
diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h
new file mode 100644
index 000000000000..a64d3df5bd95
--- /dev/null
+++ b/include/xen/interface/elfnote.h
@@ -0,0 +1,133 @@
1/******************************************************************************
2 * elfnote.h
3 *
4 * Definitions used for the Xen ELF notes.
5 *
6 * Copyright (c) 2006, Ian Campbell, XenSource Ltd.
7 */
8
9#ifndef __XEN_PUBLIC_ELFNOTE_H__
10#define __XEN_PUBLIC_ELFNOTE_H__
11
12/*
13 * The notes should live in a SHT_NOTE segment and have "Xen" in the
14 * name field.
15 *
16 * Numeric types are either 4 or 8 bytes depending on the content of
17 * the desc field.
18 *
19 * LEGACY indicated the fields in the legacy __xen_guest string which
20 * this a note type replaces.
21 */
22
23/*
24 * NAME=VALUE pair (string).
25 *
26 * LEGACY: FEATURES and PAE
27 */
28#define XEN_ELFNOTE_INFO 0
29
30/*
31 * The virtual address of the entry point (numeric).
32 *
33 * LEGACY: VIRT_ENTRY
34 */
35#define XEN_ELFNOTE_ENTRY 1
36
37/* The virtual address of the hypercall transfer page (numeric).
38 *
39 * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page
40 * number not a virtual address)
41 */
42#define XEN_ELFNOTE_HYPERCALL_PAGE 2
43
44/* The virtual address where the kernel image should be mapped (numeric).
45 *
46 * Defaults to 0.
47 *
48 * LEGACY: VIRT_BASE
49 */
50#define XEN_ELFNOTE_VIRT_BASE 3
51
52/*
53 * The offset of the ELF paddr field from the acutal required
54 * psuedo-physical address (numeric).
55 *
56 * This is used to maintain backwards compatibility with older kernels
57 * which wrote __PAGE_OFFSET into that field. This field defaults to 0
58 * if not present.
59 *
60 * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE)
61 */
62#define XEN_ELFNOTE_PADDR_OFFSET 4
63
64/*
65 * The version of Xen that we work with (string).
66 *
67 * LEGACY: XEN_VER
68 */
69#define XEN_ELFNOTE_XEN_VERSION 5
70
71/*
72 * The name of the guest operating system (string).
73 *
74 * LEGACY: GUEST_OS
75 */
76#define XEN_ELFNOTE_GUEST_OS 6
77
78/*
79 * The version of the guest operating system (string).
80 *
81 * LEGACY: GUEST_VER
82 */
83#define XEN_ELFNOTE_GUEST_VERSION 7
84
85/*
86 * The loader type (string).
87 *
88 * LEGACY: LOADER
89 */
90#define XEN_ELFNOTE_LOADER 8
91
92/*
93 * The kernel supports PAE (x86/32 only, string = "yes" or "no").
94 *
95 * LEGACY: PAE (n.b. The legacy interface included a provision to
96 * indicate 'extended-cr3' support allowing L3 page tables to be
97 * placed above 4G. It is assumed that any kernel new enough to use
98 * these ELF notes will include this and therefore "yes" here is
99 * equivalent to "yes[entended-cr3]" in the __xen_guest interface.
100 */
101#define XEN_ELFNOTE_PAE_MODE 9
102
103/*
104 * The features supported/required by this kernel (string).
105 *
106 * The string must consist of a list of feature names (as given in
107 * features.h, without the "XENFEAT_" prefix) separated by '|'
108 * characters. If a feature is required for the kernel to function
109 * then the feature name must be preceded by a '!' character.
110 *
111 * LEGACY: FEATURES
112 */
113#define XEN_ELFNOTE_FEATURES 10
114
115/*
116 * The kernel requires the symbol table to be loaded (string = "yes" or "no")
117 * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence
118 * of this string as a boolean flag rather than requiring "yes" or
119 * "no".
120 */
121#define XEN_ELFNOTE_BSD_SYMTAB 11
122
123#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
124
125/*
126 * Local variables:
127 * mode: C
128 * c-set-style: "BSD"
129 * c-basic-offset: 4
130 * tab-width: 4
131 * indent-tabs-mode: nil
132 * End:
133 */
diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
new file mode 100644
index 000000000000..919b5bdcb2bd
--- /dev/null
+++ b/include/xen/interface/event_channel.h
@@ -0,0 +1,195 @@
1/******************************************************************************
2 * event_channel.h
3 *
4 * Event channels between domains.
5 *
6 * Copyright (c) 2003-2004, K A Fraser.
7 */
8
9#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
10#define __XEN_PUBLIC_EVENT_CHANNEL_H__
11
12typedef uint32_t evtchn_port_t;
13DEFINE_GUEST_HANDLE(evtchn_port_t);
14
15/*
16 * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
17 * accepting interdomain bindings from domain <remote_dom>. A fresh port
18 * is allocated in <dom> and returned as <port>.
19 * NOTES:
20 * 1. If the caller is unprivileged then <dom> must be DOMID_SELF.
21 * 2. <rdom> may be DOMID_SELF, allowing loopback connections.
22 */
23#define EVTCHNOP_alloc_unbound 6
24struct evtchn_alloc_unbound {
25 /* IN parameters */
26 domid_t dom, remote_dom;
27 /* OUT parameters */
28 evtchn_port_t port;
29};
30
31/*
32 * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
33 * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
34 * a port that is unbound and marked as accepting bindings from the calling
35 * domain. A fresh port is allocated in the calling domain and returned as
36 * <local_port>.
37 * NOTES:
38 * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
39 */
40#define EVTCHNOP_bind_interdomain 0
41struct evtchn_bind_interdomain {
42 /* IN parameters. */
43 domid_t remote_dom;
44 evtchn_port_t remote_port;
45 /* OUT parameters. */
46 evtchn_port_t local_port;
47};
48
49/*
50 * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
51 * vcpu.
52 * NOTES:
53 * 1. A virtual IRQ may be bound to at most one event channel per vcpu.
54 * 2. The allocated event channel is bound to the specified vcpu. The binding
55 * may not be changed.
56 */
57#define EVTCHNOP_bind_virq 1
58struct evtchn_bind_virq {
59 /* IN parameters. */
60 uint32_t virq;
61 uint32_t vcpu;
62 /* OUT parameters. */
63 evtchn_port_t port;
64};
65
66/*
67 * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
68 * NOTES:
69 * 1. A physical IRQ may be bound to at most one event channel per domain.
70 * 2. Only a sufficiently-privileged domain may bind to a physical IRQ.
71 */
72#define EVTCHNOP_bind_pirq 2
73struct evtchn_bind_pirq {
74 /* IN parameters. */
75 uint32_t pirq;
76#define BIND_PIRQ__WILL_SHARE 1
77 uint32_t flags; /* BIND_PIRQ__* */
78 /* OUT parameters. */
79 evtchn_port_t port;
80};
81
82/*
83 * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
84 * NOTES:
85 * 1. The allocated event channel is bound to the specified vcpu. The binding
86 * may not be changed.
87 */
88#define EVTCHNOP_bind_ipi 7
89struct evtchn_bind_ipi {
90 uint32_t vcpu;
91 /* OUT parameters. */
92 evtchn_port_t port;
93};
94
95/*
96 * EVTCHNOP_close: Close a local event channel <port>. If the channel is
97 * interdomain then the remote end is placed in the unbound state
98 * (EVTCHNSTAT_unbound), awaiting a new connection.
99 */
100#define EVTCHNOP_close 3
101struct evtchn_close {
102 /* IN parameters. */
103 evtchn_port_t port;
104};
105
106/*
107 * EVTCHNOP_send: Send an event to the remote end of the channel whose local
108 * endpoint is <port>.
109 */
110#define EVTCHNOP_send 4
111struct evtchn_send {
112 /* IN parameters. */
113 evtchn_port_t port;
114};
115
116/*
117 * EVTCHNOP_status: Get the current status of the communication channel which
118 * has an endpoint at <dom, port>.
119 * NOTES:
120 * 1. <dom> may be specified as DOMID_SELF.
121 * 2. Only a sufficiently-privileged domain may obtain the status of an event
122 * channel for which <dom> is not DOMID_SELF.
123 */
124#define EVTCHNOP_status 5
125struct evtchn_status {
126 /* IN parameters */
127 domid_t dom;
128 evtchn_port_t port;
129 /* OUT parameters */
130#define EVTCHNSTAT_closed 0 /* Channel is not in use. */
131#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/
132#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */
133#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */
134#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */
135#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */
136 uint32_t status;
137 uint32_t vcpu; /* VCPU to which this channel is bound. */
138 union {
139 struct {
140 domid_t dom;
141 } unbound; /* EVTCHNSTAT_unbound */
142 struct {
143 domid_t dom;
144 evtchn_port_t port;
145 } interdomain; /* EVTCHNSTAT_interdomain */
146 uint32_t pirq; /* EVTCHNSTAT_pirq */
147 uint32_t virq; /* EVTCHNSTAT_virq */
148 } u;
149};
150
151/*
152 * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
153 * event is pending.
154 * NOTES:
155 * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised
156 * the binding. This binding cannot be changed.
157 * 2. All other channels notify vcpu0 by default. This default is set when
158 * the channel is allocated (a port that is freed and subsequently reused
159 * has its binding reset to vcpu0).
160 */
161#define EVTCHNOP_bind_vcpu 8
162struct evtchn_bind_vcpu {
163 /* IN parameters. */
164 evtchn_port_t port;
165 uint32_t vcpu;
166};
167
168/*
169 * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
170 * a notification to the appropriate VCPU if an event is pending.
171 */
172#define EVTCHNOP_unmask 9
173struct evtchn_unmask {
174 /* IN parameters. */
175 evtchn_port_t port;
176};
177
178struct evtchn_op {
179 uint32_t cmd; /* EVTCHNOP_* */
180 union {
181 struct evtchn_alloc_unbound alloc_unbound;
182 struct evtchn_bind_interdomain bind_interdomain;
183 struct evtchn_bind_virq bind_virq;
184 struct evtchn_bind_pirq bind_pirq;
185 struct evtchn_bind_ipi bind_ipi;
186 struct evtchn_close close;
187 struct evtchn_send send;
188 struct evtchn_status status;
189 struct evtchn_bind_vcpu bind_vcpu;
190 struct evtchn_unmask unmask;
191 } u;
192};
193DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
194
195#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
new file mode 100644
index 000000000000..d73228d16488
--- /dev/null
+++ b/include/xen/interface/features.h
@@ -0,0 +1,43 @@
1/******************************************************************************
2 * features.h
3 *
4 * Feature flags, reported by XENVER_get_features.
5 *
6 * Copyright (c) 2006, Keir Fraser <keir@xensource.com>
7 */
8
9#ifndef __XEN_PUBLIC_FEATURES_H__
10#define __XEN_PUBLIC_FEATURES_H__
11
12/*
13 * If set, the guest does not need to write-protect its pagetables, and can
14 * update them via direct writes.
15 */
16#define XENFEAT_writable_page_tables 0
17
18/*
19 * If set, the guest does not need to write-protect its segment descriptor
20 * tables, and can update them via direct writes.
21 */
22#define XENFEAT_writable_descriptor_tables 1
23
24/*
25 * If set, translation between the guest's 'pseudo-physical' address space
26 * and the host's machine address space are handled by the hypervisor. In this
27 * mode the guest does not need to perform phys-to/from-machine translations
28 * when performing page table operations.
29 */
30#define XENFEAT_auto_translated_physmap 2
31
32/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */
33#define XENFEAT_supervisor_mode_kernel 3
34
35/*
36 * If set, the guest does not need to allocate x86 PAE page directories
37 * below 4GB. This flag is usually implied by auto_translated_physmap.
38 */
39#define XENFEAT_pae_pgdir_above_4gb 4
40
41#define XENFEAT_NR_SUBMAPS 1
42
43#endif /* __XEN_PUBLIC_FEATURES_H__ */
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
new file mode 100644
index 000000000000..219049802cf2
--- /dev/null
+++ b/include/xen/interface/grant_table.h
@@ -0,0 +1,375 @@
1/******************************************************************************
2 * grant_table.h
3 *
4 * Interface for granting foreign access to page frames, and receiving
5 * page-ownership transfers.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to
9 * deal in the Software without restriction, including without limitation the
10 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 *
25 * Copyright (c) 2004, K A Fraser
26 */
27
28#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
29#define __XEN_PUBLIC_GRANT_TABLE_H__
30
31
32/***********************************
33 * GRANT TABLE REPRESENTATION
34 */
35
36/* Some rough guidelines on accessing and updating grant-table entries
37 * in a concurrency-safe manner. For more information, Linux contains a
38 * reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
39 *
40 * NB. WMB is a no-op on current-generation x86 processors. However, a
41 * compiler barrier will still be required.
42 *
43 * Introducing a valid entry into the grant table:
44 * 1. Write ent->domid.
45 * 2. Write ent->frame:
46 * GTF_permit_access: Frame to which access is permitted.
47 * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
48 * frame, or zero if none.
49 * 3. Write memory barrier (WMB).
50 * 4. Write ent->flags, inc. valid type.
51 *
52 * Invalidating an unused GTF_permit_access entry:
53 * 1. flags = ent->flags.
54 * 2. Observe that !(flags & (GTF_reading|GTF_writing)).
55 * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
56 * NB. No need for WMB as reuse of entry is control-dependent on success of
57 * step 3, and all architectures guarantee ordering of ctrl-dep writes.
58 *
59 * Invalidating an in-use GTF_permit_access entry:
60 * This cannot be done directly. Request assistance from the domain controller
61 * which can set a timeout on the use of a grant entry and take necessary
62 * action. (NB. This is not yet implemented!).
63 *
64 * Invalidating an unused GTF_accept_transfer entry:
65 * 1. flags = ent->flags.
66 * 2. Observe that !(flags & GTF_transfer_committed). [*]
67 * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
68 * NB. No need for WMB as reuse of entry is control-dependent on success of
69 * step 3, and all architectures guarantee ordering of ctrl-dep writes.
70 * [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
71 * The guest must /not/ modify the grant entry until the address of the
72 * transferred frame is written. It is safe for the guest to spin waiting
73 * for this to occur (detect by observing GTF_transfer_completed in
74 * ent->flags).
75 *
76 * Invalidating a committed GTF_accept_transfer entry:
77 * 1. Wait for (ent->flags & GTF_transfer_completed).
78 *
79 * Changing a GTF_permit_access from writable to read-only:
80 * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
81 *
82 * Changing a GTF_permit_access from read-only to writable:
83 * Use SMP-safe bit-setting instruction.
84 */
85
86/*
87 * A grant table comprises a packed array of grant entries in one or more
88 * page frames shared between Xen and a guest.
89 * [XEN]: This field is written by Xen and read by the sharing guest.
90 * [GST]: This field is written by the guest and read by Xen.
91 */
92struct grant_entry {
93 /* GTF_xxx: various type and flag information. [XEN,GST] */
94 uint16_t flags;
95 /* The domain being granted foreign privileges. [GST] */
96 domid_t domid;
97 /*
98 * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
99 * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
100 */
101 uint32_t frame;
102};
103
104/*
105 * Type of grant entry.
106 * GTF_invalid: This grant entry grants no privileges.
107 * GTF_permit_access: Allow @domid to map/access @frame.
108 * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
109 * to this guest. Xen writes the page number to @frame.
110 */
111#define GTF_invalid (0U<<0)
112#define GTF_permit_access (1U<<0)
113#define GTF_accept_transfer (2U<<0)
114#define GTF_type_mask (3U<<0)
115
116/*
117 * Subflags for GTF_permit_access.
118 * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
119 * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
120 * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
121 */
122#define _GTF_readonly (2)
123#define GTF_readonly (1U<<_GTF_readonly)
124#define _GTF_reading (3)
125#define GTF_reading (1U<<_GTF_reading)
126#define _GTF_writing (4)
127#define GTF_writing (1U<<_GTF_writing)
128
129/*
130 * Subflags for GTF_accept_transfer:
131 * GTF_transfer_committed: Xen sets this flag to indicate that it is committed
132 * to transferring ownership of a page frame. When a guest sees this flag
133 * it must /not/ modify the grant entry until GTF_transfer_completed is
134 * set by Xen.
135 * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
136 * after reading GTF_transfer_committed. Xen will always write the frame
137 * address, followed by ORing this flag, in a timely manner.
138 */
139#define _GTF_transfer_committed (2)
140#define GTF_transfer_committed (1U<<_GTF_transfer_committed)
141#define _GTF_transfer_completed (3)
142#define GTF_transfer_completed (1U<<_GTF_transfer_completed)
143
144
145/***********************************
146 * GRANT TABLE QUERIES AND USES
147 */
148
149/*
150 * Reference to a grant entry in a specified domain's grant table.
151 */
152typedef uint32_t grant_ref_t;
153
154/*
155 * Handle to track a mapping created via a grant reference.
156 */
157typedef uint32_t grant_handle_t;
158
159/*
160 * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
161 * by devices and/or host CPUs. If successful, <handle> is a tracking number
162 * that must be presented later to destroy the mapping(s). On error, <handle>
163 * is a negative status code.
164 * NOTES:
165 * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
166 * via which I/O devices may access the granted frame.
167 * 2. If GNTMAP_host_map is specified then a mapping will be added at
168 * either a host virtual address in the current address space, or at
169 * a PTE at the specified machine address. The type of mapping to
170 * perform is selected through the GNTMAP_contains_pte flag, and the
171 * address is specified in <host_addr>.
172 * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
173 * host mapping is destroyed by other means then it is *NOT* guaranteed
174 * to be accounted to the correct grant reference!
175 */
176#define GNTTABOP_map_grant_ref 0
177struct gnttab_map_grant_ref {
178 /* IN parameters. */
179 uint64_t host_addr;
180 uint32_t flags; /* GNTMAP_* */
181 grant_ref_t ref;
182 domid_t dom;
183 /* OUT parameters. */
184 int16_t status; /* GNTST_* */
185 grant_handle_t handle;
186 uint64_t dev_bus_addr;
187};
188
189/*
190 * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
191 * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
192 * field is ignored. If non-zero, they must refer to a device/host mapping
193 * that is tracked by <handle>
194 * NOTES:
195 * 1. The call may fail in an undefined manner if either mapping is not
196 * tracked by <handle>.
197 * 3. After executing a batch of unmaps, it is guaranteed that no stale
198 * mappings will remain in the device or host TLBs.
199 */
200#define GNTTABOP_unmap_grant_ref 1
201struct gnttab_unmap_grant_ref {
202 /* IN parameters. */
203 uint64_t host_addr;
204 uint64_t dev_bus_addr;
205 grant_handle_t handle;
206 /* OUT parameters. */
207 int16_t status; /* GNTST_* */
208};
209
210/*
211 * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
212 * <nr_frames> pages. The frame addresses are written to the <frame_list>.
213 * Only <nr_frames> addresses are written, even if the table is larger.
214 * NOTES:
215 * 1. <dom> may be specified as DOMID_SELF.
216 * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
217 * 3. Xen may not support more than a single grant-table page per domain.
218 */
219#define GNTTABOP_setup_table 2
220struct gnttab_setup_table {
221 /* IN parameters. */
222 domid_t dom;
223 uint32_t nr_frames;
224 /* OUT parameters. */
225 int16_t status; /* GNTST_* */
226 ulong *frame_list;
227};
228
229/*
230 * GNTTABOP_dump_table: Dump the contents of the grant table to the
231 * xen console. Debugging use only.
232 */
233#define GNTTABOP_dump_table 3
234struct gnttab_dump_table {
235 /* IN parameters. */
236 domid_t dom;
237 /* OUT parameters. */
238 int16_t status; /* GNTST_* */
239};
240
241/*
242 * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
243 * foreign domain has previously registered its interest in the transfer via
244 * <domid, ref>.
245 *
246 * Note that, even if the transfer fails, the specified page no longer belongs
247 * to the calling domain *unless* the error is GNTST_bad_page.
248 */
249#define GNTTABOP_transfer 4
250struct gnttab_transfer {
251 /* IN parameters. */
252 unsigned long mfn;
253 domid_t domid;
254 grant_ref_t ref;
255 /* OUT parameters. */
256 int16_t status;
257};
258
259
260/*
261 * GNTTABOP_copy: Hypervisor based copy
262 * source and destinations can be eithers MFNs or, for foreign domains,
263 * grant references. the foreign domain has to grant read/write access
264 * in its grant table.
265 *
266 * The flags specify what type source and destinations are (either MFN
267 * or grant reference).
268 *
269 * Note that this can also be used to copy data between two domains
270 * via a third party if the source and destination domains had previously
271 * grant appropriate access to their pages to the third party.
272 *
273 * source_offset specifies an offset in the source frame, dest_offset
274 * the offset in the target frame and len specifies the number of
275 * bytes to be copied.
276 */
277
278#define _GNTCOPY_source_gref (0)
279#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref)
280#define _GNTCOPY_dest_gref (1)
281#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref)
282
283#define GNTTABOP_copy 5
284struct gnttab_copy {
285 /* IN parameters. */
286 struct {
287 union {
288 grant_ref_t ref;
289 unsigned long gmfn;
290 } u;
291 domid_t domid;
292 uint16_t offset;
293 } source, dest;
294 uint16_t len;
295 uint16_t flags; /* GNTCOPY_* */
296 /* OUT parameters. */
297 int16_t status;
298};
299
300/*
301 * GNTTABOP_query_size: Query the current and maximum sizes of the shared
302 * grant table.
303 * NOTES:
304 * 1. <dom> may be specified as DOMID_SELF.
305 * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
306 */
307#define GNTTABOP_query_size 6
308struct gnttab_query_size {
309 /* IN parameters. */
310 domid_t dom;
311 /* OUT parameters. */
312 uint32_t nr_frames;
313 uint32_t max_nr_frames;
314 int16_t status; /* GNTST_* */
315};
316
317
318/*
319 * Bitfield values for update_pin_status.flags.
320 */
321 /* Map the grant entry for access by I/O devices. */
322#define _GNTMAP_device_map (0)
323#define GNTMAP_device_map (1<<_GNTMAP_device_map)
324 /* Map the grant entry for access by host CPUs. */
325#define _GNTMAP_host_map (1)
326#define GNTMAP_host_map (1<<_GNTMAP_host_map)
327 /* Accesses to the granted frame will be restricted to read-only access. */
328#define _GNTMAP_readonly (2)
329#define GNTMAP_readonly (1<<_GNTMAP_readonly)
330 /*
331 * GNTMAP_host_map subflag:
332 * 0 => The host mapping is usable only by the guest OS.
333 * 1 => The host mapping is usable by guest OS + current application.
334 */
335#define _GNTMAP_application_map (3)
336#define GNTMAP_application_map (1<<_GNTMAP_application_map)
337
338 /*
339 * GNTMAP_contains_pte subflag:
340 * 0 => This map request contains a host virtual address.
341 * 1 => This map request contains the machine addess of the PTE to update.
342 */
343#define _GNTMAP_contains_pte (4)
344#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte)
345
346/*
347 * Values for error status returns. All errors are -ve.
348 */
349#define GNTST_okay (0) /* Normal return. */
350#define GNTST_general_error (-1) /* General undefined error. */
351#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */
352#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */
353#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */
354#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */
355#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/
356#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */
357#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
358#define GNTST_bad_page (-9) /* Specified page was invalid for op. */
359#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */
360
361#define GNTTABOP_error_msgs { \
362 "okay", \
363 "undefined error", \
364 "unrecognised domain id", \
365 "invalid grant reference", \
366 "invalid mapping handle", \
367 "invalid virtual address", \
368 "invalid device address", \
369 "no spare translation slot in the I/O MMU", \
370 "permission denied", \
371 "bad page", \
372 "copy arguments cross page boundary" \
373}
374
375#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
new file mode 100644
index 000000000000..c2d1fa4dc1ee
--- /dev/null
+++ b/include/xen/interface/io/blkif.h
@@ -0,0 +1,94 @@
1/******************************************************************************
2 * blkif.h
3 *
4 * Unified block-device I/O interface for Xen guest OSes.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser
7 */
8
9#ifndef __XEN_PUBLIC_IO_BLKIF_H__
10#define __XEN_PUBLIC_IO_BLKIF_H__
11
12#include "ring.h"
13#include "../grant_table.h"
14
15/*
16 * Front->back notifications: When enqueuing a new request, sending a
17 * notification can be made conditional on req_event (i.e., the generic
18 * hold-off mechanism provided by the ring macros). Backends must set
19 * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
20 *
21 * Back->front notifications: When enqueuing a new response, sending a
22 * notification can be made conditional on rsp_event (i.e., the generic
23 * hold-off mechanism provided by the ring macros). Frontends must set
24 * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
25 */
26
27typedef uint16_t blkif_vdev_t;
28typedef uint64_t blkif_sector_t;
29
30/*
31 * REQUEST CODES.
32 */
33#define BLKIF_OP_READ 0
34#define BLKIF_OP_WRITE 1
35/*
36 * Recognised only if "feature-barrier" is present in backend xenbus info.
37 * The "feature_barrier" node contains a boolean indicating whether barrier
38 * requests are likely to succeed or fail. Either way, a barrier request
39 * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
40 * the underlying block-device hardware. The boolean simply indicates whether
41 * or not it is worthwhile for the frontend to attempt barrier requests.
42 * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
43 * create the "feature-barrier" node!
44 */
45#define BLKIF_OP_WRITE_BARRIER 2
46
47/*
48 * Maximum scatter/gather segments per request.
49 * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
50 * NB. This could be 12 if the ring indexes weren't stored in the same page.
51 */
52#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
53
54struct blkif_request {
55 uint8_t operation; /* BLKIF_OP_??? */
56 uint8_t nr_segments; /* number of segments */
57 blkif_vdev_t handle; /* only for read/write requests */
58 uint64_t id; /* private guest value, echoed in resp */
59 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
60 struct blkif_request_segment {
61 grant_ref_t gref; /* reference to I/O buffer frame */
62 /* @first_sect: first sector in frame to transfer (inclusive). */
63 /* @last_sect: last sector in frame to transfer (inclusive). */
64 uint8_t first_sect, last_sect;
65 } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
66};
67
68struct blkif_response {
69 uint64_t id; /* copied from request */
70 uint8_t operation; /* copied from request */
71 int16_t status; /* BLKIF_RSP_??? */
72};
73
74/*
75 * STATUS RETURN CODES.
76 */
77 /* Operation not supported (only happens on barrier writes). */
78#define BLKIF_RSP_EOPNOTSUPP -2
79 /* Operation failed for some unspecified reason (-EIO). */
80#define BLKIF_RSP_ERROR -1
81 /* Operation completed successfully. */
82#define BLKIF_RSP_OKAY 0
83
84/*
85 * Generate blkif ring structures and types.
86 */
87
88DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
89
90#define VDISK_CDROM 0x1
91#define VDISK_REMOVABLE 0x2
92#define VDISK_READONLY 0x4
93
94#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/xen/interface/io/console.h b/include/xen/interface/io/console.h
new file mode 100644
index 000000000000..e563de70f784
--- /dev/null
+++ b/include/xen/interface/io/console.h
@@ -0,0 +1,23 @@
1/******************************************************************************
2 * console.h
3 *
4 * Console I/O interface for Xen guest OSes.
5 *
6 * Copyright (c) 2005, Keir Fraser
7 */
8
9#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
10#define __XEN_PUBLIC_IO_CONSOLE_H__
11
12typedef uint32_t XENCONS_RING_IDX;
13
14#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
15
16struct xencons_interface {
17 char in[1024];
18 char out[2048];
19 XENCONS_RING_IDX in_cons, in_prod;
20 XENCONS_RING_IDX out_cons, out_prod;
21};
22
23#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
new file mode 100644
index 000000000000..518481c95f18
--- /dev/null
+++ b/include/xen/interface/io/netif.h
@@ -0,0 +1,158 @@
1/******************************************************************************
2 * netif.h
3 *
4 * Unified network-device I/O interface for Xen guest OSes.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser
7 */
8
9#ifndef __XEN_PUBLIC_IO_NETIF_H__
10#define __XEN_PUBLIC_IO_NETIF_H__
11
12#include "ring.h"
13#include "../grant_table.h"
14
15/*
16 * Notifications after enqueuing any type of message should be conditional on
17 * the appropriate req_event or rsp_event field in the shared ring.
18 * If the client sends notification for rx requests then it should specify
19 * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
20 * that it cannot safely queue packets (as it may not be kicked to send them).
21 */
22
23/*
24 * This is the 'wire' format for packets:
25 * Request 1: netif_tx_request -- NETTXF_* (any flags)
26 * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info)
27 * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE)
28 * Request 4: netif_tx_request -- NETTXF_more_data
29 * Request 5: netif_tx_request -- NETTXF_more_data
30 * ...
31 * Request N: netif_tx_request -- 0
32 */
33
34/* Protocol checksum field is blank in the packet (hardware offload)? */
35#define _NETTXF_csum_blank (0)
36#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank)
37
38/* Packet data has been validated against protocol checksum. */
39#define _NETTXF_data_validated (1)
40#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
41
42/* Packet continues in the next request descriptor. */
43#define _NETTXF_more_data (2)
44#define NETTXF_more_data (1U<<_NETTXF_more_data)
45
46/* Packet to be followed by extra descriptor(s). */
47#define _NETTXF_extra_info (3)
48#define NETTXF_extra_info (1U<<_NETTXF_extra_info)
49
50struct xen_netif_tx_request {
51 grant_ref_t gref; /* Reference to buffer page */
52 uint16_t offset; /* Offset within buffer page */
53 uint16_t flags; /* NETTXF_* */
54 uint16_t id; /* Echoed in response message. */
55 uint16_t size; /* Packet size in bytes. */
56};
57
58/* Types of netif_extra_info descriptors. */
59#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
60#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
61#define XEN_NETIF_EXTRA_TYPE_MAX (2)
62
63/* netif_extra_info flags. */
64#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
65#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
66
67/* GSO types - only TCPv4 currently supported. */
68#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
69
70/*
71 * This structure needs to fit within both netif_tx_request and
72 * netif_rx_response for compatibility.
73 */
74struct xen_netif_extra_info {
75 uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */
76 uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
77
78 union {
79 struct {
80 /*
81 * Maximum payload size of each segment. For
82 * example, for TCP this is just the path MSS.
83 */
84 uint16_t size;
85
86 /*
87 * GSO type. This determines the protocol of
88 * the packet and any extra features required
89 * to segment the packet properly.
90 */
91 uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
92
93 /* Future expansion. */
94 uint8_t pad;
95
96 /*
97 * GSO features. This specifies any extra GSO
98 * features required to process this packet,
99 * such as ECN support for TCPv4.
100 */
101 uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
102 } gso;
103
104 uint16_t pad[3];
105 } u;
106};
107
108struct xen_netif_tx_response {
109 uint16_t id;
110 int16_t status; /* NETIF_RSP_* */
111};
112
113struct xen_netif_rx_request {
114 uint16_t id; /* Echoed in response message. */
115 grant_ref_t gref; /* Reference to incoming granted frame */
116};
117
118/* Packet data has been validated against protocol checksum. */
119#define _NETRXF_data_validated (0)
120#define NETRXF_data_validated (1U<<_NETRXF_data_validated)
121
122/* Protocol checksum field is blank in the packet (hardware offload)? */
123#define _NETRXF_csum_blank (1)
124#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
125
126/* Packet continues in the next request descriptor. */
127#define _NETRXF_more_data (2)
128#define NETRXF_more_data (1U<<_NETRXF_more_data)
129
130/* Packet to be followed by extra descriptor(s). */
131#define _NETRXF_extra_info (3)
132#define NETRXF_extra_info (1U<<_NETRXF_extra_info)
133
134struct xen_netif_rx_response {
135 uint16_t id;
136 uint16_t offset; /* Offset in page of start of received packet */
137 uint16_t flags; /* NETRXF_* */
138 int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
139};
140
141/*
142 * Generate netif ring structures and types.
143 */
144
145DEFINE_RING_TYPES(xen_netif_tx,
146 struct xen_netif_tx_request,
147 struct xen_netif_tx_response);
148DEFINE_RING_TYPES(xen_netif_rx,
149 struct xen_netif_rx_request,
150 struct xen_netif_rx_response);
151
152#define NETIF_RSP_DROPPED -2
153#define NETIF_RSP_ERROR -1
154#define NETIF_RSP_OKAY 0
155/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
156#define NETIF_RSP_NULL 1
157
158#endif
diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
new file mode 100644
index 000000000000..e8cbf431c8cc
--- /dev/null
+++ b/include/xen/interface/io/ring.h
@@ -0,0 +1,260 @@
1/******************************************************************************
2 * ring.h
3 *
4 * Shared producer-consumer ring macros.
5 *
6 * Tim Deegan and Andrew Warfield November 2004.
7 */
8
9#ifndef __XEN_PUBLIC_IO_RING_H__
10#define __XEN_PUBLIC_IO_RING_H__
11
12typedef unsigned int RING_IDX;
13
14/* Round a 32-bit unsigned constant down to the nearest power of two. */
15#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1))
16#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x))
17#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x))
18#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x))
19#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
20
21/*
22 * Calculate size of a shared ring, given the total available space for the
23 * ring and indexes (_sz), and the name tag of the request/response structure.
24 * A ring contains as many entries as will fit, rounded down to the nearest
25 * power of two (so we can mask with (size-1) to loop around).
26 */
27#define __RING_SIZE(_s, _sz) \
28 (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
29
30/*
31 * Macros to make the correct C datatypes for a new kind of ring.
32 *
33 * To make a new ring datatype, you need to have two message structures,
34 * let's say struct request, and struct response already defined.
35 *
36 * In a header where you want the ring datatype declared, you then do:
37 *
38 * DEFINE_RING_TYPES(mytag, struct request, struct response);
39 *
40 * These expand out to give you a set of types, as you can see below.
41 * The most important of these are:
42 *
43 * struct mytag_sring - The shared ring.
44 * struct mytag_front_ring - The 'front' half of the ring.
45 * struct mytag_back_ring - The 'back' half of the ring.
46 *
47 * To initialize a ring in your code you need to know the location and size
48 * of the shared memory area (PAGE_SIZE, for instance). To initialise
49 * the front half:
50 *
51 * struct mytag_front_ring front_ring;
52 * SHARED_RING_INIT((struct mytag_sring *)shared_page);
53 * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page,
54 * PAGE_SIZE);
55 *
56 * Initializing the back follows similarly (note that only the front
57 * initializes the shared ring):
58 *
59 * struct mytag_back_ring back_ring;
60 * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page,
61 * PAGE_SIZE);
62 */
63
64#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \
65 \
66/* Shared ring entry */ \
67union __name##_sring_entry { \
68 __req_t req; \
69 __rsp_t rsp; \
70}; \
71 \
72/* Shared ring page */ \
73struct __name##_sring { \
74 RING_IDX req_prod, req_event; \
75 RING_IDX rsp_prod, rsp_event; \
76 uint8_t pad[48]; \
77 union __name##_sring_entry ring[1]; /* variable-length */ \
78}; \
79 \
80/* "Front" end's private variables */ \
81struct __name##_front_ring { \
82 RING_IDX req_prod_pvt; \
83 RING_IDX rsp_cons; \
84 unsigned int nr_ents; \
85 struct __name##_sring *sring; \
86}; \
87 \
88/* "Back" end's private variables */ \
89struct __name##_back_ring { \
90 RING_IDX rsp_prod_pvt; \
91 RING_IDX req_cons; \
92 unsigned int nr_ents; \
93 struct __name##_sring *sring; \
94};
95
96/*
97 * Macros for manipulating rings.
98 *
99 * FRONT_RING_whatever works on the "front end" of a ring: here
100 * requests are pushed on to the ring and responses taken off it.
101 *
102 * BACK_RING_whatever works on the "back end" of a ring: here
103 * requests are taken off the ring and responses put on.
104 *
105 * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
106 * This is OK in 1-for-1 request-response situations where the
107 * requestor (front end) never has more than RING_SIZE()-1
108 * outstanding requests.
109 */
110
111/* Initialising empty rings */
112#define SHARED_RING_INIT(_s) do { \
113 (_s)->req_prod = (_s)->rsp_prod = 0; \
114 (_s)->req_event = (_s)->rsp_event = 1; \
115 memset((_s)->pad, 0, sizeof((_s)->pad)); \
116} while(0)
117
118#define FRONT_RING_INIT(_r, _s, __size) do { \
119 (_r)->req_prod_pvt = 0; \
120 (_r)->rsp_cons = 0; \
121 (_r)->nr_ents = __RING_SIZE(_s, __size); \
122 (_r)->sring = (_s); \
123} while (0)
124
125#define BACK_RING_INIT(_r, _s, __size) do { \
126 (_r)->rsp_prod_pvt = 0; \
127 (_r)->req_cons = 0; \
128 (_r)->nr_ents = __RING_SIZE(_s, __size); \
129 (_r)->sring = (_s); \
130} while (0)
131
132/* Initialize to existing shared indexes -- for recovery */
133#define FRONT_RING_ATTACH(_r, _s, __size) do { \
134 (_r)->sring = (_s); \
135 (_r)->req_prod_pvt = (_s)->req_prod; \
136 (_r)->rsp_cons = (_s)->rsp_prod; \
137 (_r)->nr_ents = __RING_SIZE(_s, __size); \
138} while (0)
139
140#define BACK_RING_ATTACH(_r, _s, __size) do { \
141 (_r)->sring = (_s); \
142 (_r)->rsp_prod_pvt = (_s)->rsp_prod; \
143 (_r)->req_cons = (_s)->req_prod; \
144 (_r)->nr_ents = __RING_SIZE(_s, __size); \
145} while (0)
146
147/* How big is this ring? */
148#define RING_SIZE(_r) \
149 ((_r)->nr_ents)
150
151/* Number of free requests (for use on front side only). */
152#define RING_FREE_REQUESTS(_r) \
153 (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
154
155/* Test if there is an empty slot available on the front ring.
156 * (This is only meaningful from the front. )
157 */
158#define RING_FULL(_r) \
159 (RING_FREE_REQUESTS(_r) == 0)
160
161/* Test if there are outstanding messages to be processed on a ring. */
162#define RING_HAS_UNCONSUMED_RESPONSES(_r) \
163 ((_r)->sring->rsp_prod - (_r)->rsp_cons)
164
165#define RING_HAS_UNCONSUMED_REQUESTS(_r) \
166 ({ \
167 unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \
168 unsigned int rsp = RING_SIZE(_r) - \
169 ((_r)->req_cons - (_r)->rsp_prod_pvt); \
170 req < rsp ? req : rsp; \
171 })
172
173/* Direct access to individual ring elements, by index. */
174#define RING_GET_REQUEST(_r, _idx) \
175 (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
176
177#define RING_GET_RESPONSE(_r, _idx) \
178 (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
179
180/* Loop termination condition: Would the specified index overflow the ring? */
181#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \
182 (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
183
184#define RING_PUSH_REQUESTS(_r) do { \
185 wmb(); /* back sees requests /before/ updated producer index */ \
186 (_r)->sring->req_prod = (_r)->req_prod_pvt; \
187} while (0)
188
189#define RING_PUSH_RESPONSES(_r) do { \
190 wmb(); /* front sees responses /before/ updated producer index */ \
191 (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \
192} while (0)
193
194/*
195 * Notification hold-off (req_event and rsp_event):
196 *
197 * When queueing requests or responses on a shared ring, it may not always be
198 * necessary to notify the remote end. For example, if requests are in flight
199 * in a backend, the front may be able to queue further requests without
200 * notifying the back (if the back checks for new requests when it queues
201 * responses).
202 *
203 * When enqueuing requests or responses:
204 *
205 * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
206 * is a boolean return value. True indicates that the receiver requires an
207 * asynchronous notification.
208 *
209 * After dequeuing requests or responses (before sleeping the connection):
210 *
211 * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
212 * The second argument is a boolean return value. True indicates that there
213 * are pending messages on the ring (i.e., the connection should not be put
214 * to sleep).
215 *
216 * These macros will set the req_event/rsp_event field to trigger a
217 * notification on the very next message that is enqueued. If you want to
218 * create batches of work (i.e., only receive a notification after several
219 * messages have been enqueued) then you will need to create a customised
220 * version of the FINAL_CHECK macro in your own code, which sets the event
221 * field appropriately.
222 */
223
224#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \
225 RING_IDX __old = (_r)->sring->req_prod; \
226 RING_IDX __new = (_r)->req_prod_pvt; \
227 wmb(); /* back sees requests /before/ updated producer index */ \
228 (_r)->sring->req_prod = __new; \
229 mb(); /* back sees new requests /before/ we check req_event */ \
230 (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \
231 (RING_IDX)(__new - __old)); \
232} while (0)
233
234#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \
235 RING_IDX __old = (_r)->sring->rsp_prod; \
236 RING_IDX __new = (_r)->rsp_prod_pvt; \
237 wmb(); /* front sees responses /before/ updated producer index */ \
238 (_r)->sring->rsp_prod = __new; \
239 mb(); /* front sees new responses /before/ we check rsp_event */ \
240 (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \
241 (RING_IDX)(__new - __old)); \
242} while (0)
243
244#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \
245 (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
246 if (_work_to_do) break; \
247 (_r)->sring->req_event = (_r)->req_cons + 1; \
248 mb(); \
249 (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \
250} while (0)
251
252#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \
253 (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
254 if (_work_to_do) break; \
255 (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \
256 mb(); \
257 (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \
258} while (0)
259
260#endif /* __XEN_PUBLIC_IO_RING_H__ */
diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h
new file mode 100644
index 000000000000..46508c7fa399
--- /dev/null
+++ b/include/xen/interface/io/xenbus.h
@@ -0,0 +1,44 @@
1/*****************************************************************************
2 * xenbus.h
3 *
4 * Xenbus protocol details.
5 *
6 * Copyright (C) 2005 XenSource Ltd.
7 */
8
9#ifndef _XEN_PUBLIC_IO_XENBUS_H
10#define _XEN_PUBLIC_IO_XENBUS_H
11
12/* The state of either end of the Xenbus, i.e. the current communication
13 status of initialisation across the bus. States here imply nothing about
14 the state of the connection between the driver and the kernel's device
15 layers. */
16enum xenbus_state
17{
18 XenbusStateUnknown = 0,
19 XenbusStateInitialising = 1,
20 XenbusStateInitWait = 2, /* Finished early
21 initialisation, but waiting
22 for information from the peer
23 or hotplug scripts. */
24 XenbusStateInitialised = 3, /* Initialised and waiting for a
25 connection from the peer. */
26 XenbusStateConnected = 4,
27 XenbusStateClosing = 5, /* The device is being closed
28 due to an error or an unplug
29 event. */
30 XenbusStateClosed = 6
31
32};
33
34#endif /* _XEN_PUBLIC_IO_XENBUS_H */
35
36/*
37 * Local variables:
38 * c-file-style: "linux"
39 * indent-tabs-mode: t
40 * c-indent-level: 8
41 * c-basic-offset: 8
42 * tab-width: 8
43 * End:
44 */
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h
new file mode 100644
index 000000000000..99fcffb372d1
--- /dev/null
+++ b/include/xen/interface/io/xs_wire.h
@@ -0,0 +1,87 @@
1/*
2 * Details of the "wire" protocol between Xen Store Daemon and client
3 * library or guest kernel.
4 * Copyright (C) 2005 Rusty Russell IBM Corporation
5 */
6
7#ifndef _XS_WIRE_H
8#define _XS_WIRE_H
9
10enum xsd_sockmsg_type
11{
12 XS_DEBUG,
13 XS_DIRECTORY,
14 XS_READ,
15 XS_GET_PERMS,
16 XS_WATCH,
17 XS_UNWATCH,
18 XS_TRANSACTION_START,
19 XS_TRANSACTION_END,
20 XS_INTRODUCE,
21 XS_RELEASE,
22 XS_GET_DOMAIN_PATH,
23 XS_WRITE,
24 XS_MKDIR,
25 XS_RM,
26 XS_SET_PERMS,
27 XS_WATCH_EVENT,
28 XS_ERROR,
29 XS_IS_DOMAIN_INTRODUCED
30};
31
32#define XS_WRITE_NONE "NONE"
33#define XS_WRITE_CREATE "CREATE"
34#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
35
36/* We hand errors as strings, for portability. */
37struct xsd_errors
38{
39 int errnum;
40 const char *errstring;
41};
42#define XSD_ERROR(x) { x, #x }
43static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
44 XSD_ERROR(EINVAL),
45 XSD_ERROR(EACCES),
46 XSD_ERROR(EEXIST),
47 XSD_ERROR(EISDIR),
48 XSD_ERROR(ENOENT),
49 XSD_ERROR(ENOMEM),
50 XSD_ERROR(ENOSPC),
51 XSD_ERROR(EIO),
52 XSD_ERROR(ENOTEMPTY),
53 XSD_ERROR(ENOSYS),
54 XSD_ERROR(EROFS),
55 XSD_ERROR(EBUSY),
56 XSD_ERROR(EAGAIN),
57 XSD_ERROR(EISCONN)
58};
59
60struct xsd_sockmsg
61{
62 uint32_t type; /* XS_??? */
63 uint32_t req_id;/* Request identifier, echoed in daemon's response. */
64 uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
65 uint32_t len; /* Length of data following this. */
66
67 /* Generally followed by nul-terminated string(s). */
68};
69
70enum xs_watch_type
71{
72 XS_WATCH_PATH = 0,
73 XS_WATCH_TOKEN
74};
75
76/* Inter-domain shared memory communications. */
77#define XENSTORE_RING_SIZE 1024
78typedef uint32_t XENSTORE_RING_IDX;
79#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))
80struct xenstore_domain_interface {
81 char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
82 char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
83 XENSTORE_RING_IDX req_cons, req_prod;
84 XENSTORE_RING_IDX rsp_cons, rsp_prod;
85};
86
87#endif /* _XS_WIRE_H */
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
new file mode 100644
index 000000000000..af36ead16817
--- /dev/null
+++ b/include/xen/interface/memory.h
@@ -0,0 +1,145 @@
1/******************************************************************************
2 * memory.h
3 *
4 * Memory reservation and information.
5 *
6 * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
7 */
8
9#ifndef __XEN_PUBLIC_MEMORY_H__
10#define __XEN_PUBLIC_MEMORY_H__
11
12/*
13 * Increase or decrease the specified domain's memory reservation. Returns a
14 * -ve errcode on failure, or the # extents successfully allocated or freed.
15 * arg == addr of struct xen_memory_reservation.
16 */
17#define XENMEM_increase_reservation 0
18#define XENMEM_decrease_reservation 1
19#define XENMEM_populate_physmap 6
20struct xen_memory_reservation {
21
22 /*
23 * XENMEM_increase_reservation:
24 * OUT: MFN (*not* GMFN) bases of extents that were allocated
25 * XENMEM_decrease_reservation:
26 * IN: GMFN bases of extents to free
27 * XENMEM_populate_physmap:
28 * IN: GPFN bases of extents to populate with memory
29 * OUT: GMFN bases of extents that were allocated
30 * (NB. This command also updates the mach_to_phys translation table)
31 */
32 GUEST_HANDLE(ulong) extent_start;
33
34 /* Number of extents, and size/alignment of each (2^extent_order pages). */
35 unsigned long nr_extents;
36 unsigned int extent_order;
37
38 /*
39 * Maximum # bits addressable by the user of the allocated region (e.g.,
40 * I/O devices often have a 32-bit limitation even in 64-bit systems). If
41 * zero then the user has no addressing restriction.
42 * This field is not used by XENMEM_decrease_reservation.
43 */
44 unsigned int address_bits;
45
46 /*
47 * Domain whose reservation is being changed.
48 * Unprivileged domains can specify only DOMID_SELF.
49 */
50 domid_t domid;
51
52};
53DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
54
55/*
56 * Returns the maximum machine frame number of mapped RAM in this system.
57 * This command always succeeds (it never returns an error code).
58 * arg == NULL.
59 */
60#define XENMEM_maximum_ram_page 2
61
62/*
63 * Returns the current or maximum memory reservation, in pages, of the
64 * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
65 * arg == addr of domid_t.
66 */
67#define XENMEM_current_reservation 3
68#define XENMEM_maximum_reservation 4
69
70/*
71 * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
72 * mapping table. Architectures which do not have a m2p table do not implement
73 * this command.
74 * arg == addr of xen_machphys_mfn_list_t.
75 */
76#define XENMEM_machphys_mfn_list 5
77struct xen_machphys_mfn_list {
78 /*
79 * Size of the 'extent_start' array. Fewer entries will be filled if the
80 * machphys table is smaller than max_extents * 2MB.
81 */
82 unsigned int max_extents;
83
84 /*
85 * Pointer to buffer to fill with list of extent starts. If there are
86 * any large discontiguities in the machine address space, 2MB gaps in
87 * the machphys table will be represented by an MFN base of zero.
88 */
89 GUEST_HANDLE(ulong) extent_start;
90
91 /*
92 * Number of extents written to the above array. This will be smaller
93 * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
94 */
95 unsigned int nr_extents;
96};
97DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
98
99/*
100 * Sets the GPFN at which a particular page appears in the specified guest's
101 * pseudophysical address space.
102 * arg == addr of xen_add_to_physmap_t.
103 */
104#define XENMEM_add_to_physmap 7
105struct xen_add_to_physmap {
106 /* Which domain to change the mapping for. */
107 domid_t domid;
108
109 /* Source mapping space. */
110#define XENMAPSPACE_shared_info 0 /* shared info page */
111#define XENMAPSPACE_grant_table 1 /* grant table page */
112 unsigned int space;
113
114 /* Index into source mapping space. */
115 unsigned long idx;
116
117 /* GPFN where the source mapping page should appear. */
118 unsigned long gpfn;
119};
120DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
121
122/*
123 * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
124 * code on failure. This call only works for auto-translated guests.
125 */
126#define XENMEM_translate_gpfn_list 8
127struct xen_translate_gpfn_list {
128 /* Which domain to translate for? */
129 domid_t domid;
130
131 /* Length of list. */
132 unsigned long nr_gpfns;
133
134 /* List of GPFNs to translate. */
135 GUEST_HANDLE(ulong) gpfn_list;
136
137 /*
138 * Output list to contain MFN translations. May be the same as the input
139 * list (in which case each input GPFN is overwritten with the output MFN).
140 */
141 GUEST_HANDLE(ulong) mfn_list;
142};
143DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
144
145#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
new file mode 100644
index 000000000000..cd6939147cb6
--- /dev/null
+++ b/include/xen/interface/physdev.h
@@ -0,0 +1,145 @@
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy
3 * of this software and associated documentation files (the "Software"), to
4 * deal in the Software without restriction, including without limitation the
5 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6 * sell copies of the Software, and to permit persons to whom the Software is
7 * furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in
10 * all copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
18 * DEALINGS IN THE SOFTWARE.
19 */
20
21#ifndef __XEN_PUBLIC_PHYSDEV_H__
22#define __XEN_PUBLIC_PHYSDEV_H__
23
24/*
25 * Prototype for this hypercall is:
26 * int physdev_op(int cmd, void *args)
27 * @cmd == PHYSDEVOP_??? (physdev operation).
28 * @args == Operation-specific extra arguments (NULL if none).
29 */
30
31/*
32 * Notify end-of-interrupt (EOI) for the specified IRQ.
33 * @arg == pointer to physdev_eoi structure.
34 */
35#define PHYSDEVOP_eoi 12
36struct physdev_eoi {
37 /* IN */
38 uint32_t irq;
39};
40
41/*
42 * Query the status of an IRQ line.
43 * @arg == pointer to physdev_irq_status_query structure.
44 */
45#define PHYSDEVOP_irq_status_query 5
46struct physdev_irq_status_query {
47 /* IN */
48 uint32_t irq;
49 /* OUT */
50 uint32_t flags; /* XENIRQSTAT_* */
51};
52
53/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
54#define _XENIRQSTAT_needs_eoi (0)
55#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi)
56
57/* IRQ shared by multiple guests? */
58#define _XENIRQSTAT_shared (1)
59#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared)
60
61/*
62 * Set the current VCPU's I/O privilege level.
63 * @arg == pointer to physdev_set_iopl structure.
64 */
65#define PHYSDEVOP_set_iopl 6
66struct physdev_set_iopl {
67 /* IN */
68 uint32_t iopl;
69};
70
71/*
72 * Set the current VCPU's I/O-port permissions bitmap.
73 * @arg == pointer to physdev_set_iobitmap structure.
74 */
75#define PHYSDEVOP_set_iobitmap 7
76struct physdev_set_iobitmap {
77 /* IN */
78 uint8_t * bitmap;
79 uint32_t nr_ports;
80};
81
82/*
83 * Read or write an IO-APIC register.
84 * @arg == pointer to physdev_apic structure.
85 */
86#define PHYSDEVOP_apic_read 8
87#define PHYSDEVOP_apic_write 9
88struct physdev_apic {
89 /* IN */
90 unsigned long apic_physbase;
91 uint32_t reg;
92 /* IN or OUT */
93 uint32_t value;
94};
95
96/*
97 * Allocate or free a physical upcall vector for the specified IRQ line.
98 * @arg == pointer to physdev_irq structure.
99 */
100#define PHYSDEVOP_alloc_irq_vector 10
101#define PHYSDEVOP_free_irq_vector 11
102struct physdev_irq {
103 /* IN */
104 uint32_t irq;
105 /* IN or OUT */
106 uint32_t vector;
107};
108
109/*
110 * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
111 * hypercall since 0x00030202.
112 */
113struct physdev_op {
114 uint32_t cmd;
115 union {
116 struct physdev_irq_status_query irq_status_query;
117 struct physdev_set_iopl set_iopl;
118 struct physdev_set_iobitmap set_iobitmap;
119 struct physdev_apic apic_op;
120 struct physdev_irq irq_op;
121 } u;
122};
123
124/*
125 * Notify that some PIRQ-bound event channels have been unmasked.
126 * ** This command is obsolete since interface version 0x00030202 and is **
127 * ** unsupported by newer versions of Xen. **
128 */
129#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4
130
131/*
132 * These all-capitals physdev operation names are superceded by the new names
133 * (defined above) since interface version 0x00030202.
134 */
135#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query
136#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl
137#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap
138#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read
139#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write
140#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector
141#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector
142#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi
143#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared
144
145#endif /* __XEN_PUBLIC_PHYSDEV_H__ */
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
new file mode 100644
index 000000000000..5fec575a800a
--- /dev/null
+++ b/include/xen/interface/sched.h
@@ -0,0 +1,77 @@
1/******************************************************************************
2 * sched.h
3 *
4 * Scheduler state interactions
5 *
6 * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
7 */
8
9#ifndef __XEN_PUBLIC_SCHED_H__
10#define __XEN_PUBLIC_SCHED_H__
11
12#include "event_channel.h"
13
14/*
15 * The prototype for this hypercall is:
16 * long sched_op_new(int cmd, void *arg)
17 * @cmd == SCHEDOP_??? (scheduler operation).
18 * @arg == Operation-specific extra argument(s), as described below.
19 *
20 * **NOTE**:
21 * Versions of Xen prior to 3.0.2 provide only the following legacy version
22 * of this hypercall, supporting only the commands yield, block and shutdown:
23 * long sched_op(int cmd, unsigned long arg)
24 * @cmd == SCHEDOP_??? (scheduler operation).
25 * @arg == 0 (SCHEDOP_yield and SCHEDOP_block)
26 * == SHUTDOWN_* code (SCHEDOP_shutdown)
27 */
28
29/*
30 * Voluntarily yield the CPU.
31 * @arg == NULL.
32 */
33#define SCHEDOP_yield 0
34
35/*
36 * Block execution of this VCPU until an event is received for processing.
37 * If called with event upcalls masked, this operation will atomically
38 * reenable event delivery and check for pending events before blocking the
39 * VCPU. This avoids a "wakeup waiting" race.
40 * @arg == NULL.
41 */
42#define SCHEDOP_block 1
43
44/*
45 * Halt execution of this domain (all VCPUs) and notify the system controller.
46 * @arg == pointer to sched_shutdown structure.
47 */
48#define SCHEDOP_shutdown 2
49struct sched_shutdown {
50 unsigned int reason; /* SHUTDOWN_* */
51};
52DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
53
54/*
55 * Poll a set of event-channel ports. Return when one or more are pending. An
56 * optional timeout may be specified.
57 * @arg == pointer to sched_poll structure.
58 */
59#define SCHEDOP_poll 3
60struct sched_poll {
61 GUEST_HANDLE(evtchn_port_t) ports;
62 unsigned int nr_ports;
63 uint64_t timeout;
64};
65DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
66
67/*
68 * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
69 * software to determine the appropriate action. For the most part, Xen does
70 * not care about the shutdown code.
71 */
72#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */
73#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */
74#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */
75#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */
76
77#endif /* __XEN_PUBLIC_SCHED_H__ */
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
new file mode 100644
index 000000000000..ff61ea365997
--- /dev/null
+++ b/include/xen/interface/vcpu.h
@@ -0,0 +1,167 @@
1/******************************************************************************
2 * vcpu.h
3 *
4 * VCPU initialisation, query, and hotplug.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
25 */
26
27#ifndef __XEN_PUBLIC_VCPU_H__
28#define __XEN_PUBLIC_VCPU_H__
29
30/*
31 * Prototype for this hypercall is:
32 * int vcpu_op(int cmd, int vcpuid, void *extra_args)
33 * @cmd == VCPUOP_??? (VCPU operation).
34 * @vcpuid == VCPU to operate on.
35 * @extra_args == Operation-specific extra arguments (NULL if none).
36 */
37
38/*
39 * Initialise a VCPU. Each VCPU can be initialised only once. A
40 * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
41 *
42 * @extra_arg == pointer to vcpu_guest_context structure containing initial
43 * state for the VCPU.
44 */
45#define VCPUOP_initialise 0
46
47/*
48 * Bring up a VCPU. This makes the VCPU runnable. This operation will fail
49 * if the VCPU has not been initialised (VCPUOP_initialise).
50 */
51#define VCPUOP_up 1
52
53/*
54 * Bring down a VCPU (i.e., make it non-runnable).
55 * There are a few caveats that callers should observe:
56 * 1. This operation may return, and VCPU_is_up may return false, before the
57 * VCPU stops running (i.e., the command is asynchronous). It is a good
58 * idea to ensure that the VCPU has entered a non-critical loop before
59 * bringing it down. Alternatively, this operation is guaranteed
60 * synchronous if invoked by the VCPU itself.
61 * 2. After a VCPU is initialised, there is currently no way to drop all its
62 * references to domain memory. Even a VCPU that is down still holds
63 * memory references via its pagetable base pointer and GDT. It is good
64 * practise to move a VCPU onto an 'idle' or default page table, LDT and
65 * GDT before bringing it down.
66 */
67#define VCPUOP_down 2
68
69/* Returns 1 if the given VCPU is up. */
70#define VCPUOP_is_up 3
71
72/*
73 * Return information about the state and running time of a VCPU.
74 * @extra_arg == pointer to vcpu_runstate_info structure.
75 */
76#define VCPUOP_get_runstate_info 4
77struct vcpu_runstate_info {
78 /* VCPU's current state (RUNSTATE_*). */
79 int state;
80 /* When was current state entered (system time, ns)? */
81 uint64_t state_entry_time;
82 /*
83 * Time spent in each RUNSTATE_* (ns). The sum of these times is
84 * guaranteed not to drift from system time.
85 */
86 uint64_t time[4];
87};
88
89/* VCPU is currently running on a physical CPU. */
90#define RUNSTATE_running 0
91
92/* VCPU is runnable, but not currently scheduled on any physical CPU. */
93#define RUNSTATE_runnable 1
94
95/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
96#define RUNSTATE_blocked 2
97
98/*
99 * VCPU is not runnable, but it is not blocked.
100 * This is a 'catch all' state for things like hotplug and pauses by the
101 * system administrator (or for critical sections in the hypervisor).
102 * RUNSTATE_blocked dominates this state (it is the preferred state).
103 */
104#define RUNSTATE_offline 3
105
106/*
107 * Register a shared memory area from which the guest may obtain its own
108 * runstate information without needing to execute a hypercall.
109 * Notes:
110 * 1. The registered address may be virtual or physical, depending on the
111 * platform. The virtual address should be registered on x86 systems.
112 * 2. Only one shared area may be registered per VCPU. The shared area is
113 * updated by the hypervisor each time the VCPU is scheduled. Thus
114 * runstate.state will always be RUNSTATE_running and
115 * runstate.state_entry_time will indicate the system time at which the
116 * VCPU was last scheduled to run.
117 * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
118 */
119#define VCPUOP_register_runstate_memory_area 5
120struct vcpu_register_runstate_memory_area {
121 union {
122 struct vcpu_runstate_info *v;
123 uint64_t p;
124 } addr;
125};
126
127/*
128 * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
129 * which can be set via these commands. Periods smaller than one millisecond
130 * may not be supported.
131 */
132#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */
133#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */
134struct vcpu_set_periodic_timer {
135 uint64_t period_ns;
136};
137
138/*
139 * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
140 * timer which can be set via these commands.
141 */
142#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */
143#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
144struct vcpu_set_singleshot_timer {
145 uint64_t timeout_abs_ns;
146 uint32_t flags; /* VCPU_SSHOTTMR_??? */
147};
148
149/* Flags to VCPUOP_set_singleshot_timer. */
150 /* Require the timeout to be in the future (return -ETIME if it's passed). */
151#define _VCPU_SSHOTTMR_future (0)
152#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future)
153
154/*
155 * Register a memory location in the guest address space for the
156 * vcpu_info structure. This allows the guest to place the vcpu_info
157 * structure in a convenient place, such as in a per-cpu data area.
158 * The pointer need not be page aligned, but the structure must not
159 * cross a page boundary.
160 */
161#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */
162struct vcpu_register_vcpu_info {
163 uint32_t mfn; /* mfn of page to place vcpu_info */
164 uint32_t offset; /* offset within page */
165};
166
167#endif /* __XEN_PUBLIC_VCPU_H__ */
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
new file mode 100644
index 000000000000..453235e923f0
--- /dev/null
+++ b/include/xen/interface/version.h
@@ -0,0 +1,60 @@
1/******************************************************************************
2 * version.h
3 *
4 * Xen version, type, and compile information.
5 *
6 * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com>
7 * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
8 */
9
10#ifndef __XEN_PUBLIC_VERSION_H__
11#define __XEN_PUBLIC_VERSION_H__
12
13/* NB. All ops return zero on success, except XENVER_version. */
14
15/* arg == NULL; returns major:minor (16:16). */
16#define XENVER_version 0
17
18/* arg == xen_extraversion_t. */
19#define XENVER_extraversion 1
20struct xen_extraversion {
21 char extraversion[16];
22};
23#define XEN_EXTRAVERSION_LEN (sizeof(struct xen_extraversion))
24
25/* arg == xen_compile_info_t. */
26#define XENVER_compile_info 2
27struct xen_compile_info {
28 char compiler[64];
29 char compile_by[16];
30 char compile_domain[32];
31 char compile_date[32];
32};
33
34#define XENVER_capabilities 3
35struct xen_capabilities_info {
36 char info[1024];
37};
38#define XEN_CAPABILITIES_INFO_LEN (sizeof(struct xen_capabilities_info))
39
40#define XENVER_changeset 4
41struct xen_changeset_info {
42 char info[64];
43};
44#define XEN_CHANGESET_INFO_LEN (sizeof(struct xen_changeset_info))
45
46#define XENVER_platform_parameters 5
47struct xen_platform_parameters {
48 unsigned long virt_start;
49};
50
51#define XENVER_get_features 6
52struct xen_feature_info {
53 unsigned int submap_idx; /* IN: which 32-bit submap to return */
54 uint32_t submap; /* OUT: 32-bit submap */
55};
56
57/* Declares the features reported by XENVER_get_features. */
58#include "features.h"
59
60#endif /* __XEN_PUBLIC_VERSION_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
new file mode 100644
index 000000000000..518a5bf79ed3
--- /dev/null
+++ b/include/xen/interface/xen.h
@@ -0,0 +1,447 @@
1/******************************************************************************
2 * xen.h
3 *
4 * Guest OS interface to Xen.
5 *
6 * Copyright (c) 2004, K A Fraser
7 */
8
9#ifndef __XEN_PUBLIC_XEN_H__
10#define __XEN_PUBLIC_XEN_H__
11
12#include <asm/xen/interface.h>
13
14/*
15 * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
16 */
17
18/*
19 * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5.
20 * EAX = return value
21 * (argument registers may be clobbered on return)
22 * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6.
23 * RAX = return value
24 * (argument registers not clobbered on return; RCX, R11 are)
25 */
26#define __HYPERVISOR_set_trap_table 0
27#define __HYPERVISOR_mmu_update 1
28#define __HYPERVISOR_set_gdt 2
29#define __HYPERVISOR_stack_switch 3
30#define __HYPERVISOR_set_callbacks 4
31#define __HYPERVISOR_fpu_taskswitch 5
32#define __HYPERVISOR_sched_op 6
33#define __HYPERVISOR_dom0_op 7
34#define __HYPERVISOR_set_debugreg 8
35#define __HYPERVISOR_get_debugreg 9
36#define __HYPERVISOR_update_descriptor 10
37#define __HYPERVISOR_memory_op 12
38#define __HYPERVISOR_multicall 13
39#define __HYPERVISOR_update_va_mapping 14
40#define __HYPERVISOR_set_timer_op 15
41#define __HYPERVISOR_event_channel_op_compat 16
42#define __HYPERVISOR_xen_version 17
43#define __HYPERVISOR_console_io 18
44#define __HYPERVISOR_physdev_op_compat 19
45#define __HYPERVISOR_grant_table_op 20
46#define __HYPERVISOR_vm_assist 21
47#define __HYPERVISOR_update_va_mapping_otherdomain 22
48#define __HYPERVISOR_iret 23 /* x86 only */
49#define __HYPERVISOR_vcpu_op 24
50#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
51#define __HYPERVISOR_mmuext_op 26
52#define __HYPERVISOR_acm_op 27
53#define __HYPERVISOR_nmi_op 28
54#define __HYPERVISOR_sched_op_new 29
55#define __HYPERVISOR_callback_op 30
56#define __HYPERVISOR_xenoprof_op 31
57#define __HYPERVISOR_event_channel_op 32
58#define __HYPERVISOR_physdev_op 33
59#define __HYPERVISOR_hvm_op 34
60
61/*
62 * VIRTUAL INTERRUPTS
63 *
64 * Virtual interrupts that a guest OS may receive from Xen.
65 */
66#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */
67#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */
68#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */
69#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */
70#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
71#define NR_VIRQS 8
72
73/*
74 * MMU-UPDATE REQUESTS
75 *
76 * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
77 * A foreigndom (FD) can be specified (or DOMID_SELF for none).
78 * Where the FD has some effect, it is described below.
79 * ptr[1:0] specifies the appropriate MMU_* command.
80 *
81 * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
82 * Updates an entry in a page table. If updating an L1 table, and the new
83 * table entry is valid/present, the mapped frame must belong to the FD, if
84 * an FD has been specified. If attempting to map an I/O page then the
85 * caller assumes the privilege of the FD.
86 * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
87 * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
88 * ptr[:2] -- Machine address of the page-table entry to modify.
89 * val -- Value to write.
90 *
91 * ptr[1:0] == MMU_MACHPHYS_UPDATE:
92 * Updates an entry in the machine->pseudo-physical mapping table.
93 * ptr[:2] -- Machine address within the frame whose mapping to modify.
94 * The frame must belong to the FD, if one is specified.
95 * val -- Value to write into the mapping entry.
96 */
97#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
98#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */
99
100/*
101 * MMU EXTENDED OPERATIONS
102 *
103 * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
104 * A foreigndom (FD) can be specified (or DOMID_SELF for none).
105 * Where the FD has some effect, it is described below.
106 *
107 * cmd: MMUEXT_(UN)PIN_*_TABLE
108 * mfn: Machine frame number to be (un)pinned as a p.t. page.
109 * The frame must belong to the FD, if one is specified.
110 *
111 * cmd: MMUEXT_NEW_BASEPTR
112 * mfn: Machine frame number of new page-table base to install in MMU.
113 *
114 * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
115 * mfn: Machine frame number of new page-table base to install in MMU
116 * when in user space.
117 *
118 * cmd: MMUEXT_TLB_FLUSH_LOCAL
119 * No additional arguments. Flushes local TLB.
120 *
121 * cmd: MMUEXT_INVLPG_LOCAL
122 * linear_addr: Linear address to be flushed from the local TLB.
123 *
124 * cmd: MMUEXT_TLB_FLUSH_MULTI
125 * vcpumask: Pointer to bitmap of VCPUs to be flushed.
126 *
127 * cmd: MMUEXT_INVLPG_MULTI
128 * linear_addr: Linear address to be flushed.
129 * vcpumask: Pointer to bitmap of VCPUs to be flushed.
130 *
131 * cmd: MMUEXT_TLB_FLUSH_ALL
132 * No additional arguments. Flushes all VCPUs' TLBs.
133 *
134 * cmd: MMUEXT_INVLPG_ALL
135 * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
136 *
137 * cmd: MMUEXT_FLUSH_CACHE
138 * No additional arguments. Writes back and flushes cache contents.
139 *
140 * cmd: MMUEXT_SET_LDT
141 * linear_addr: Linear address of LDT base (NB. must be page-aligned).
142 * nr_ents: Number of entries in LDT.
143 */
144#define MMUEXT_PIN_L1_TABLE 0
145#define MMUEXT_PIN_L2_TABLE 1
146#define MMUEXT_PIN_L3_TABLE 2
147#define MMUEXT_PIN_L4_TABLE 3
148#define MMUEXT_UNPIN_TABLE 4
149#define MMUEXT_NEW_BASEPTR 5
150#define MMUEXT_TLB_FLUSH_LOCAL 6
151#define MMUEXT_INVLPG_LOCAL 7
152#define MMUEXT_TLB_FLUSH_MULTI 8
153#define MMUEXT_INVLPG_MULTI 9
154#define MMUEXT_TLB_FLUSH_ALL 10
155#define MMUEXT_INVLPG_ALL 11
156#define MMUEXT_FLUSH_CACHE 12
157#define MMUEXT_SET_LDT 13
158#define MMUEXT_NEW_USER_BASEPTR 15
159
160#ifndef __ASSEMBLY__
161struct mmuext_op {
162 unsigned int cmd;
163 union {
164 /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
165 unsigned long mfn;
166 /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
167 unsigned long linear_addr;
168 } arg1;
169 union {
170 /* SET_LDT */
171 unsigned int nr_ents;
172 /* TLB_FLUSH_MULTI, INVLPG_MULTI */
173 void *vcpumask;
174 } arg2;
175};
176DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
177#endif
178
179/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
180/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */
181/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */
182#define UVMF_NONE (0UL<<0) /* No flushing at all. */
183#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */
184#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */
185#define UVMF_FLUSHTYPE_MASK (3UL<<0)
186#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */
187#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */
188#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */
189
190/*
191 * Commands to HYPERVISOR_console_io().
192 */
193#define CONSOLEIO_write 0
194#define CONSOLEIO_read 1
195
196/*
197 * Commands to HYPERVISOR_vm_assist().
198 */
199#define VMASST_CMD_enable 0
200#define VMASST_CMD_disable 1
201#define VMASST_TYPE_4gb_segments 0
202#define VMASST_TYPE_4gb_segments_notify 1
203#define VMASST_TYPE_writable_pagetables 2
204#define VMASST_TYPE_pae_extended_cr3 3
205#define MAX_VMASST_TYPE 3
206
207#ifndef __ASSEMBLY__
208
209typedef uint16_t domid_t;
210
211/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
212#define DOMID_FIRST_RESERVED (0x7FF0U)
213
214/* DOMID_SELF is used in certain contexts to refer to oneself. */
215#define DOMID_SELF (0x7FF0U)
216
217/*
218 * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
219 * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
220 * is useful to ensure that no mappings to the OS's own heap are accidentally
221 * installed. (e.g., in Linux this could cause havoc as reference counts
222 * aren't adjusted on the I/O-mapping code path).
223 * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can
224 * be specified by any calling domain.
225 */
226#define DOMID_IO (0x7FF1U)
227
228/*
229 * DOMID_XEN is used to allow privileged domains to map restricted parts of
230 * Xen's heap space (e.g., the machine_to_phys table).
231 * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if
232 * the caller is privileged.
233 */
234#define DOMID_XEN (0x7FF2U)
235
236/*
237 * Send an array of these to HYPERVISOR_mmu_update().
238 * NB. The fields are natural pointer/address size for this architecture.
239 */
240struct mmu_update {
241 uint64_t ptr; /* Machine address of PTE. */
242 uint64_t val; /* New contents of PTE. */
243};
244DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
245
246/*
247 * Send an array of these to HYPERVISOR_multicall().
248 * NB. The fields are natural register size for this architecture.
249 */
250struct multicall_entry {
251 unsigned long op;
252 long result;
253 unsigned long args[6];
254};
255DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
256
257/*
258 * Event channel endpoints per domain:
259 * 1024 if a long is 32 bits; 4096 if a long is 64 bits.
260 */
261#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
262
263struct vcpu_time_info {
264 /*
265 * Updates to the following values are preceded and followed
266 * by an increment of 'version'. The guest can therefore
267 * detect updates by looking for changes to 'version'. If the
268 * least-significant bit of the version number is set then an
269 * update is in progress and the guest must wait to read a
270 * consistent set of values. The correct way to interact with
271 * the version number is similar to Linux's seqlock: see the
272 * implementations of read_seqbegin/read_seqretry.
273 */
274 uint32_t version;
275 uint32_t pad0;
276 uint64_t tsc_timestamp; /* TSC at last update of time vals. */
277 uint64_t system_time; /* Time, in nanosecs, since boot. */
278 /*
279 * Current system time:
280 * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul
281 * CPU frequency (Hz):
282 * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
283 */
284 uint32_t tsc_to_system_mul;
285 int8_t tsc_shift;
286 int8_t pad1[3];
287}; /* 32 bytes */
288
289struct vcpu_info {
290 /*
291 * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
292 * a pending notification for a particular VCPU. It is then cleared
293 * by the guest OS /before/ checking for pending work, thus avoiding
294 * a set-and-check race. Note that the mask is only accessed by Xen
295 * on the CPU that is currently hosting the VCPU. This means that the
296 * pending and mask flags can be updated by the guest without special
297 * synchronisation (i.e., no need for the x86 LOCK prefix).
298 * This may seem suboptimal because if the pending flag is set by
299 * a different CPU then an IPI may be scheduled even when the mask
300 * is set. However, note:
301 * 1. The task of 'interrupt holdoff' is covered by the per-event-
302 * channel mask bits. A 'noisy' event that is continually being
303 * triggered can be masked at source at this very precise
304 * granularity.
305 * 2. The main purpose of the per-VCPU mask is therefore to restrict
306 * reentrant execution: whether for concurrency control, or to
307 * prevent unbounded stack usage. Whatever the purpose, we expect
308 * that the mask will be asserted only for short periods at a time,
309 * and so the likelihood of a 'spurious' IPI is suitably small.
310 * The mask is read before making an event upcall to the guest: a
311 * non-zero mask therefore guarantees that the VCPU will not receive
312 * an upcall activation. The mask is cleared when the VCPU requests
313 * to block: this avoids wakeup-waiting races.
314 */
315 uint8_t evtchn_upcall_pending;
316 uint8_t evtchn_upcall_mask;
317 unsigned long evtchn_pending_sel;
318 struct arch_vcpu_info arch;
319 struct vcpu_time_info time;
320}; /* 64 bytes (x86) */
321
322/*
323 * Xen/kernel shared data -- pointer provided in start_info.
324 * NB. We expect that this struct is smaller than a page.
325 */
326struct shared_info {
327 struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
328
329 /*
330 * A domain can create "event channels" on which it can send and receive
331 * asynchronous event notifications. There are three classes of event that
332 * are delivered by this mechanism:
333 * 1. Bi-directional inter- and intra-domain connections. Domains must
334 * arrange out-of-band to set up a connection (usually by allocating
335 * an unbound 'listener' port and avertising that via a storage service
336 * such as xenstore).
337 * 2. Physical interrupts. A domain with suitable hardware-access
338 * privileges can bind an event-channel port to a physical interrupt
339 * source.
340 * 3. Virtual interrupts ('events'). A domain can bind an event-channel
341 * port to a virtual interrupt source, such as the virtual-timer
342 * device or the emergency console.
343 *
344 * Event channels are addressed by a "port index". Each channel is
345 * associated with two bits of information:
346 * 1. PENDING -- notifies the domain that there is a pending notification
347 * to be processed. This bit is cleared by the guest.
348 * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING
349 * will cause an asynchronous upcall to be scheduled. This bit is only
350 * updated by the guest. It is read-only within Xen. If a channel
351 * becomes pending while the channel is masked then the 'edge' is lost
352 * (i.e., when the channel is unmasked, the guest must manually handle
353 * pending notifications as no upcall will be scheduled by Xen).
354 *
355 * To expedite scanning of pending notifications, any 0->1 pending
356 * transition on an unmasked channel causes a corresponding bit in a
357 * per-vcpu selector word to be set. Each bit in the selector covers a
358 * 'C long' in the PENDING bitfield array.
359 */
360 unsigned long evtchn_pending[sizeof(unsigned long) * 8];
361 unsigned long evtchn_mask[sizeof(unsigned long) * 8];
362
363 /*
364 * Wallclock time: updated only by control software. Guests should base
365 * their gettimeofday() syscall on this wallclock-base value.
366 */
367 uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */
368 uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
369 uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
370
371 struct arch_shared_info arch;
372
373};
374
375/*
376 * Start-of-day memory layout for the initial domain (DOM0):
377 * 1. The domain is started within contiguous virtual-memory region.
378 * 2. The contiguous region begins and ends on an aligned 4MB boundary.
379 * 3. The region start corresponds to the load address of the OS image.
380 * If the load address is not 4MB aligned then the address is rounded down.
381 * 4. This the order of bootstrap elements in the initial virtual region:
382 * a. relocated kernel image
383 * b. initial ram disk [mod_start, mod_len]
384 * c. list of allocated page frames [mfn_list, nr_pages]
385 * d. start_info_t structure [register ESI (x86)]
386 * e. bootstrap page tables [pt_base, CR3 (x86)]
387 * f. bootstrap stack [register ESP (x86)]
388 * 5. Bootstrap elements are packed together, but each is 4kB-aligned.
389 * 6. The initial ram disk may be omitted.
390 * 7. The list of page frames forms a contiguous 'pseudo-physical' memory
391 * layout for the domain. In particular, the bootstrap virtual-memory
392 * region is a 1:1 mapping to the first section of the pseudo-physical map.
393 * 8. All bootstrap elements are mapped read-writable for the guest OS. The
394 * only exception is the bootstrap page table, which is mapped read-only.
395 * 9. There is guaranteed to be at least 512kB padding after the final
396 * bootstrap element. If necessary, the bootstrap virtual region is
397 * extended by an extra 4MB to ensure this.
398 */
399
400#define MAX_GUEST_CMDLINE 1024
401struct start_info {
402 /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */
403 char magic[32]; /* "xen-<version>-<platform>". */
404 unsigned long nr_pages; /* Total pages allocated to this domain. */
405 unsigned long shared_info; /* MACHINE address of shared info struct. */
406 uint32_t flags; /* SIF_xxx flags. */
407 unsigned long store_mfn; /* MACHINE page number of shared page. */
408 uint32_t store_evtchn; /* Event channel for store communication. */
409 union {
410 struct {
411 unsigned long mfn; /* MACHINE page number of console page. */
412 uint32_t evtchn; /* Event channel for console page. */
413 } domU;
414 struct {
415 uint32_t info_off; /* Offset of console_info struct. */
416 uint32_t info_size; /* Size of console_info struct from start.*/
417 } dom0;
418 } console;
419 /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */
420 unsigned long pt_base; /* VIRTUAL address of page directory. */
421 unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */
422 unsigned long mfn_list; /* VIRTUAL address of page-frame list. */
423 unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
424 unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
425 int8_t cmd_line[MAX_GUEST_CMDLINE];
426};
427
428/* These flags are passed in the 'flags' field of start_info_t. */
429#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
430#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
431
432typedef uint64_t cpumap_t;
433
434typedef uint8_t xen_domain_handle_t[16];
435
436/* Turn a plain number into a C unsigned long constant. */
437#define __mk_unsigned_long(x) x ## UL
438#define mk_unsigned_long(x) __mk_unsigned_long(x)
439
440#else /* __ASSEMBLY__ */
441
442/* In assembly code we cannot use C numeric constant suffixes. */
443#define mk_unsigned_long(x) x
444
445#endif /* !__ASSEMBLY__ */
446
447#endif /* __XEN_PUBLIC_XEN_H__ */
diff --git a/include/xen/page.h b/include/xen/page.h
new file mode 100644
index 000000000000..1df6c1930578
--- /dev/null
+++ b/include/xen/page.h
@@ -0,0 +1,179 @@
1#ifndef __XEN_PAGE_H
2#define __XEN_PAGE_H
3
4#include <linux/pfn.h>
5
6#include <asm/uaccess.h>
7
8#include <xen/features.h>
9
10#ifdef CONFIG_X86_PAE
11/* Xen machine address */
12typedef struct xmaddr {
13 unsigned long long maddr;
14} xmaddr_t;
15
16/* Xen pseudo-physical address */
17typedef struct xpaddr {
18 unsigned long long paddr;
19} xpaddr_t;
20#else
21/* Xen machine address */
22typedef struct xmaddr {
23 unsigned long maddr;
24} xmaddr_t;
25
26/* Xen pseudo-physical address */
27typedef struct xpaddr {
28 unsigned long paddr;
29} xpaddr_t;
30#endif
31
32#define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
33#define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
34
35/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
36#define INVALID_P2M_ENTRY (~0UL)
37#define FOREIGN_FRAME_BIT (1UL<<31)
38#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
39
40extern unsigned long *phys_to_machine_mapping;
41
42static inline unsigned long pfn_to_mfn(unsigned long pfn)
43{
44 if (xen_feature(XENFEAT_auto_translated_physmap))
45 return pfn;
46
47 return phys_to_machine_mapping[(unsigned int)(pfn)] &
48 ~FOREIGN_FRAME_BIT;
49}
50
51static inline int phys_to_machine_mapping_valid(unsigned long pfn)
52{
53 if (xen_feature(XENFEAT_auto_translated_physmap))
54 return 1;
55
56 return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
57}
58
59static inline unsigned long mfn_to_pfn(unsigned long mfn)
60{
61 unsigned long pfn;
62
63 if (xen_feature(XENFEAT_auto_translated_physmap))
64 return mfn;
65
66#if 0
67 if (unlikely((mfn >> machine_to_phys_order) != 0))
68 return max_mapnr;
69#endif
70
71 pfn = 0;
72 /*
73 * The array access can fail (e.g., device space beyond end of RAM).
74 * In such cases it doesn't matter what we return (we return garbage),
75 * but we must handle the fault without crashing!
76 */
77 __get_user(pfn, &machine_to_phys_mapping[mfn]);
78
79 return pfn;
80}
81
82static inline xmaddr_t phys_to_machine(xpaddr_t phys)
83{
84 unsigned offset = phys.paddr & ~PAGE_MASK;
85 return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
86}
87
88static inline xpaddr_t machine_to_phys(xmaddr_t machine)
89{
90 unsigned offset = machine.maddr & ~PAGE_MASK;
91 return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
92}
93
94/*
95 * We detect special mappings in one of two ways:
96 * 1. If the MFN is an I/O page then Xen will set the m2p entry
97 * to be outside our maximum possible pseudophys range.
98 * 2. If the MFN belongs to a different domain then we will certainly
99 * not have MFN in our p2m table. Conversely, if the page is ours,
100 * then we'll have p2m(m2p(MFN))==MFN.
101 * If we detect a special mapping then it doesn't have a 'struct page'.
102 * We force !pfn_valid() by returning an out-of-range pointer.
103 *
104 * NB. These checks require that, for any MFN that is not in our reservation,
105 * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
106 * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
107 * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
108 *
109 * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
110 * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
111 * require. In all the cases we care about, the FOREIGN_FRAME bit is
112 * masked (e.g., pfn_to_mfn()) so behaviour there is correct.
113 */
114static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
115{
116 extern unsigned long max_mapnr;
117 unsigned long pfn = mfn_to_pfn(mfn);
118 if ((pfn < max_mapnr)
119 && !xen_feature(XENFEAT_auto_translated_physmap)
120 && (phys_to_machine_mapping[pfn] != mfn))
121 return max_mapnr; /* force !pfn_valid() */
122 return pfn;
123}
124
125static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
126{
127 if (xen_feature(XENFEAT_auto_translated_physmap)) {
128 BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
129 return;
130 }
131 phys_to_machine_mapping[pfn] = mfn;
132}
133
134/* VIRT <-> MACHINE conversion */
135#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
136#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
137#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
138
139#ifdef CONFIG_X86_PAE
140#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
141 (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)))
142
143static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
144{
145 pte_t pte;
146
147 pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) |
148 (pgprot_val(pgprot) >> 32);
149 pte.pte_high &= (__supported_pte_mask >> 32);
150 pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
151 pte.pte_low &= __supported_pte_mask;
152
153 return pte;
154}
155
156static inline unsigned long long pte_val_ma(pte_t x)
157{
158 return ((unsigned long long)x.pte_high << 32) | x.pte_low;
159}
160#define pmd_val_ma(v) ((v).pmd)
161#define pud_val_ma(v) ((v).pgd.pgd)
162#define __pte_ma(x) ((pte_t) { .pte_low = (x), .pte_high = (x)>>32 } )
163#define __pmd_ma(x) ((pmd_t) { (x) } )
164#else /* !X86_PAE */
165#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
166#define mfn_pte(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
167#define pte_val_ma(x) ((x).pte_low)
168#define pmd_val_ma(v) ((v).pud.pgd.pgd)
169#define __pte_ma(x) ((pte_t) { (x) } )
170#endif /* CONFIG_X86_PAE */
171
172#define pgd_val_ma(x) ((x).pgd)
173
174
175xmaddr_t arbitrary_virt_to_machine(unsigned long address);
176void make_lowmem_page_readonly(void *vaddr);
177void make_lowmem_page_readwrite(void *vaddr);
178
179#endif /* __XEN_PAGE_H */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
new file mode 100644
index 000000000000..6f7c290651ae
--- /dev/null
+++ b/include/xen/xenbus.h
@@ -0,0 +1,234 @@
1/******************************************************************************
2 * xenbus.h
3 *
4 * Talks to Xen Store to figure out what devices we have.
5 *
6 * Copyright (C) 2005 Rusty Russell, IBM Corporation
7 * Copyright (C) 2005 XenSource Ltd.
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
33
34#ifndef _XEN_XENBUS_H
35#define _XEN_XENBUS_H
36
37#include <linux/device.h>
38#include <linux/notifier.h>
39#include <linux/mutex.h>
40#include <linux/completion.h>
41#include <linux/init.h>
42#include <xen/interface/xen.h>
43#include <xen/interface/grant_table.h>
44#include <xen/interface/io/xenbus.h>
45#include <xen/interface/io/xs_wire.h>
46
47/* Register callback to watch this node. */
48struct xenbus_watch
49{
50 struct list_head list;
51
52 /* Path being watched. */
53 const char *node;
54
55 /* Callback (executed in a process context with no locks held). */
56 void (*callback)(struct xenbus_watch *,
57 const char **vec, unsigned int len);
58};
59
60
61/* A xenbus device. */
62struct xenbus_device {
63 const char *devicetype;
64 const char *nodename;
65 const char *otherend;
66 int otherend_id;
67 struct xenbus_watch otherend_watch;
68 struct device dev;
69 enum xenbus_state state;
70 struct completion down;
71};
72
73static inline struct xenbus_device *to_xenbus_device(struct device *dev)
74{
75 return container_of(dev, struct xenbus_device, dev);
76}
77
78struct xenbus_device_id
79{
80 /* .../device/<device_type>/<identifier> */
81 char devicetype[32]; /* General class of device. */
82};
83
84/* A xenbus driver. */
85struct xenbus_driver {
86 char *name;
87 struct module *owner;
88 const struct xenbus_device_id *ids;
89 int (*probe)(struct xenbus_device *dev,
90 const struct xenbus_device_id *id);
91 void (*otherend_changed)(struct xenbus_device *dev,
92 enum xenbus_state backend_state);
93 int (*remove)(struct xenbus_device *dev);
94 int (*suspend)(struct xenbus_device *dev);
95 int (*suspend_cancel)(struct xenbus_device *dev);
96 int (*resume)(struct xenbus_device *dev);
97 int (*uevent)(struct xenbus_device *, char **, int, char *, int);
98 struct device_driver driver;
99 int (*read_otherend_details)(struct xenbus_device *dev);
100};
101
102static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
103{
104 return container_of(drv, struct xenbus_driver, driver);
105}
106
107int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
108 struct module *owner,
109 const char *mod_name);
110
111static inline int __must_check
112xenbus_register_frontend(struct xenbus_driver *drv)
113{
114 WARN_ON(drv->owner != THIS_MODULE);
115 return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
116}
117
118int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
119 struct module *owner,
120 const char *mod_name);
121static inline int __must_check
122xenbus_register_backend(struct xenbus_driver *drv)
123{
124 WARN_ON(drv->owner != THIS_MODULE);
125 return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
126}
127
128void xenbus_unregister_driver(struct xenbus_driver *drv);
129
130struct xenbus_transaction
131{
132 u32 id;
133};
134
135/* Nil transaction ID. */
136#define XBT_NIL ((struct xenbus_transaction) { 0 })
137
138int __init xenbus_dev_init(void);
139
140char **xenbus_directory(struct xenbus_transaction t,
141 const char *dir, const char *node, unsigned int *num);
142void *xenbus_read(struct xenbus_transaction t,
143 const char *dir, const char *node, unsigned int *len);
144int xenbus_write(struct xenbus_transaction t,
145 const char *dir, const char *node, const char *string);
146int xenbus_mkdir(struct xenbus_transaction t,
147 const char *dir, const char *node);
148int xenbus_exists(struct xenbus_transaction t,
149 const char *dir, const char *node);
150int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node);
151int xenbus_transaction_start(struct xenbus_transaction *t);
152int xenbus_transaction_end(struct xenbus_transaction t, int abort);
153
154/* Single read and scanf: returns -errno or num scanned if > 0. */
155int xenbus_scanf(struct xenbus_transaction t,
156 const char *dir, const char *node, const char *fmt, ...)
157 __attribute__((format(scanf, 4, 5)));
158
159/* Single printf and write: returns -errno or 0. */
160int xenbus_printf(struct xenbus_transaction t,
161 const char *dir, const char *node, const char *fmt, ...)
162 __attribute__((format(printf, 4, 5)));
163
164/* Generic read function: NULL-terminated triples of name,
165 * sprintf-style type string, and pointer. Returns 0 or errno.*/
166int xenbus_gather(struct xenbus_transaction t, const char *dir, ...);
167
168/* notifer routines for when the xenstore comes up */
169extern int xenstored_ready;
170int register_xenstore_notifier(struct notifier_block *nb);
171void unregister_xenstore_notifier(struct notifier_block *nb);
172
173int register_xenbus_watch(struct xenbus_watch *watch);
174void unregister_xenbus_watch(struct xenbus_watch *watch);
175void xs_suspend(void);
176void xs_resume(void);
177void xs_suspend_cancel(void);
178
179/* Used by xenbus_dev to borrow kernel's store connection. */
180void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);
181
182struct work_struct;
183
184/* Prepare for domain suspend: then resume or cancel the suspend. */
185void xenbus_suspend(void);
186void xenbus_resume(void);
187void xenbus_probe(struct work_struct *);
188void xenbus_suspend_cancel(void);
189
190#define XENBUS_IS_ERR_READ(str) ({ \
191 if (!IS_ERR(str) && strlen(str) == 0) { \
192 kfree(str); \
193 str = ERR_PTR(-ERANGE); \
194 } \
195 IS_ERR(str); \
196})
197
198#define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE)
199
200int xenbus_watch_path(struct xenbus_device *dev, const char *path,
201 struct xenbus_watch *watch,
202 void (*callback)(struct xenbus_watch *,
203 const char **, unsigned int));
204int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
205 void (*callback)(struct xenbus_watch *,
206 const char **, unsigned int),
207 const char *pathfmt, ...)
208 __attribute__ ((format (printf, 4, 5)));
209
210int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
211int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
212int xenbus_map_ring_valloc(struct xenbus_device *dev,
213 int gnt_ref, void **vaddr);
214int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
215 grant_handle_t *handle, void *vaddr);
216
217int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
218int xenbus_unmap_ring(struct xenbus_device *dev,
219 grant_handle_t handle, void *vaddr);
220
221int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
222int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);
223int xenbus_free_evtchn(struct xenbus_device *dev, int port);
224
225enum xenbus_state xenbus_read_driver_state(const char *path);
226
227void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...);
228void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...);
229
230const char *xenbus_strstate(enum xenbus_state state);
231int xenbus_dev_is_online(struct xenbus_device *dev);
232int xenbus_frontend_closed(struct xenbus_device *dev);
233
234#endif /* _XEN_XENBUS_H */