aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/frv/Kconfig1
-rw-r--r--arch/frv/include/asm/atomic.h68
-rw-r--r--arch/frv/include/asm/perf_counter.h17
-rw-r--r--arch/frv/include/asm/system.h2
-rw-r--r--arch/frv/kernel/frv_ksyms.c4
-rw-r--r--arch/frv/lib/Makefile4
-rw-r--r--arch/frv/lib/atomic-ops.S3
-rw-r--r--arch/frv/lib/atomic64-ops.S162
-rw-r--r--arch/frv/lib/perf_counter.c19
-rw-r--r--arch/sh/Kconfig.debug4
-rw-r--r--arch/sh/boards/mach-se/7206/io.c2
-rw-r--r--arch/sh/boards/mach-se/7724/setup.c110
-rw-r--r--arch/sh/configs/migor_defconfig53
-rw-r--r--arch/sh/configs/se7724_defconfig25
-rw-r--r--arch/sh/include/asm/perf_counter.h2
-rw-r--r--arch/sh/include/asm/syscall_32.h1
-rw-r--r--arch/sh/include/mach-se/mach/se7724.h5
-rw-r--r--arch/sh/mm/fault_32.c61
-rw-r--r--arch/sh/mm/tlbflush_64.c15
-rw-r--r--arch/x86/Kconfig15
-rw-r--r--arch/x86/include/asm/proto.h11
-rw-r--r--arch/x86/kernel/e820.c16
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--drivers/clocksource/sh_tmu.c2
-rw-r--r--drivers/pci/intel-iommu.c695
-rw-r--r--drivers/scsi/cxgb3i/cxgb3i_iscsi.c4
-rw-r--r--drivers/scsi/fnic/fnic_main.c8
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c7
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.c7
-rw-r--r--drivers/scsi/scsi_transport_fc.c5
-rw-r--r--drivers/scsi/zalon.c2
-rw-r--r--drivers/video/sh_mobile_lcdcfb.c53
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/extent-tree.c566
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/inode.c25
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/relocation.c5
-rw-r--r--fs/btrfs/transaction.c4
-rw-r--r--mm/nommu.c21
-rw-r--r--tools/perf/perf.h10
42 files changed, 1396 insertions, 636 deletions
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 8a5bd7a9c6f5..b86e19c9b5b0 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,6 +7,7 @@ config FRV
7 default y 7 default y
8 select HAVE_IDE 8 select HAVE_IDE
9 select HAVE_ARCH_TRACEHOOK 9 select HAVE_ARCH_TRACEHOOK
10 select HAVE_PERF_COUNTERS
10 11
11config ZONE_DMA 12config ZONE_DMA
12 bool 13 bool
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 0409d981fd39..00a57af79afc 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -121,10 +121,72 @@ static inline void atomic_dec(atomic_t *v)
121#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0) 121#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
122#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0) 122#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
123 123
124/*
125 * 64-bit atomic ops
126 */
127typedef struct {
128 volatile long long counter;
129} atomic64_t;
130
131#define ATOMIC64_INIT(i) { (i) }
132
133static inline long long atomic64_read(atomic64_t *v)
134{
135 long long counter;
136
137 asm("ldd%I1 %M1,%0"
138 : "=e"(counter)
139 : "m"(v->counter));
140 return counter;
141}
142
143static inline void atomic64_set(atomic64_t *v, long long i)
144{
145 asm volatile("std%I0 %1,%M0"
146 : "=m"(v->counter)
147 : "e"(i));
148}
149
150extern long long atomic64_inc_return(atomic64_t *v);
151extern long long atomic64_dec_return(atomic64_t *v);
152extern long long atomic64_add_return(long long i, atomic64_t *v);
153extern long long atomic64_sub_return(long long i, atomic64_t *v);
154
155static inline long long atomic64_add_negative(long long i, atomic64_t *v)
156{
157 return atomic64_add_return(i, v) < 0;
158}
159
160static inline void atomic64_add(long long i, atomic64_t *v)
161{
162 atomic64_add_return(i, v);
163}
164
165static inline void atomic64_sub(long long i, atomic64_t *v)
166{
167 atomic64_sub_return(i, v);
168}
169
170static inline void atomic64_inc(atomic64_t *v)
171{
172 atomic64_inc_return(v);
173}
174
175static inline void atomic64_dec(atomic64_t *v)
176{
177 atomic64_dec_return(v);
178}
179
180#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
181#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
182#define atomic64_inc_and_test(v) (atomic64_inc_return((v)) == 0)
183
124/*****************************************************************************/ 184/*****************************************************************************/
125/* 185/*
126 * exchange value with memory 186 * exchange value with memory
127 */ 187 */
188extern uint64_t __xchg_64(uint64_t i, volatile void *v);
189
128#ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS 190#ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
129 191
130#define xchg(ptr, x) \ 192#define xchg(ptr, x) \
@@ -174,8 +236,10 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
174 236
175#define tas(ptr) (xchg((ptr), 1)) 237#define tas(ptr) (xchg((ptr), 1))
176 238
177#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new)) 239#define atomic_cmpxchg(v, old, new) (cmpxchg(&(v)->counter, old, new))
178#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) 240#define atomic_xchg(v, new) (xchg(&(v)->counter, new))
241#define atomic64_cmpxchg(v, old, new) (__cmpxchg_64(old, new, &(v)->counter))
242#define atomic64_xchg(v, new) (__xchg_64(new, &(v)->counter))
179 243
180static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) 244static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
181{ 245{
diff --git a/arch/frv/include/asm/perf_counter.h b/arch/frv/include/asm/perf_counter.h
new file mode 100644
index 000000000000..ccf726e61b2e
--- /dev/null
+++ b/arch/frv/include/asm/perf_counter.h
@@ -0,0 +1,17 @@
1/* FRV performance counter support
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#ifndef _ASM_PERF_COUNTER_H
13#define _ASM_PERF_COUNTER_H
14
15#define PERF_COUNTER_INDEX_OFFSET 0
16
17#endif /* _ASM_PERF_COUNTER_H */
diff --git a/arch/frv/include/asm/system.h b/arch/frv/include/asm/system.h
index 7742ec000cc4..efd22d9077ac 100644
--- a/arch/frv/include/asm/system.h
+++ b/arch/frv/include/asm/system.h
@@ -208,6 +208,8 @@ extern void free_initmem(void);
208 * - if (*ptr == test) then orig = *ptr; *ptr = test; 208 * - if (*ptr == test) then orig = *ptr; *ptr = test;
209 * - if (*ptr != test) then orig = *ptr; 209 * - if (*ptr != test) then orig = *ptr;
210 */ 210 */
211extern uint64_t __cmpxchg_64(uint64_t test, uint64_t new, volatile uint64_t *v);
212
211#ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS 213#ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
212 214
213#define cmpxchg(ptr, test, new) \ 215#define cmpxchg(ptr, test, new) \
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c
index 0316b3c50eff..a89803b58b9a 100644
--- a/arch/frv/kernel/frv_ksyms.c
+++ b/arch/frv/kernel/frv_ksyms.c
@@ -67,6 +67,10 @@ EXPORT_SYMBOL(atomic_sub_return);
67EXPORT_SYMBOL(__xchg_32); 67EXPORT_SYMBOL(__xchg_32);
68EXPORT_SYMBOL(__cmpxchg_32); 68EXPORT_SYMBOL(__cmpxchg_32);
69#endif 69#endif
70EXPORT_SYMBOL(atomic64_add_return);
71EXPORT_SYMBOL(atomic64_sub_return);
72EXPORT_SYMBOL(__xchg_64);
73EXPORT_SYMBOL(__cmpxchg_64);
70 74
71EXPORT_SYMBOL(__debug_bug_printk); 75EXPORT_SYMBOL(__debug_bug_printk);
72EXPORT_SYMBOL(__delay_loops_MHz); 76EXPORT_SYMBOL(__delay_loops_MHz);
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index 08be305c9f44..0a377210c89b 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -4,5 +4,5 @@
4 4
5lib-y := \ 5lib-y := \
6 __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ 6 __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
7 checksum.o memcpy.o memset.o atomic-ops.o \ 7 checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
8 outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o 8 outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_counter.o
diff --git a/arch/frv/lib/atomic-ops.S b/arch/frv/lib/atomic-ops.S
index ee0ac905fb08..5e9e6ab5dd0e 100644
--- a/arch/frv/lib/atomic-ops.S
+++ b/arch/frv/lib/atomic-ops.S
@@ -163,11 +163,10 @@ __cmpxchg_32:
163 ld.p @(gr11,gr0),gr8 163 ld.p @(gr11,gr0),gr8
164 orcr cc7,cc7,cc3 164 orcr cc7,cc7,cc3
165 subcc gr8,gr9,gr7,icc0 165 subcc gr8,gr9,gr7,icc0
166 bne icc0,#0,1f 166 bnelr icc0,#0
167 cst.p gr10,@(gr11,gr0) ,cc3,#1 167 cst.p gr10,@(gr11,gr0) ,cc3,#1
168 corcc gr29,gr29,gr0 ,cc3,#1 168 corcc gr29,gr29,gr0 ,cc3,#1
169 beq icc3,#0,0b 169 beq icc3,#0,0b
1701:
171 bralr 170 bralr
172 171
173 .size __cmpxchg_32, .-__cmpxchg_32 172 .size __cmpxchg_32, .-__cmpxchg_32
diff --git a/arch/frv/lib/atomic64-ops.S b/arch/frv/lib/atomic64-ops.S
new file mode 100644
index 000000000000..b6194eeac127
--- /dev/null
+++ b/arch/frv/lib/atomic64-ops.S
@@ -0,0 +1,162 @@
1/* kernel atomic64 operations
2 *
3 * For an explanation of how atomic ops work in this arch, see:
4 * Documentation/frv/atomic-ops.txt
5 *
6 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
7 * Written by David Howells (dhowells@redhat.com)
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
13 */
14
15#include <asm/spr-regs.h>
16
17 .text
18 .balign 4
19
20
21###############################################################################
22#
23# long long atomic64_inc_return(atomic64_t *v)
24#
25###############################################################################
26 .globl atomic64_inc_return
27 .type atomic64_inc_return,@function
28atomic64_inc_return:
29 or.p gr8,gr8,gr10
300:
31 orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */
32 ckeq icc3,cc7
33 ldd.p @(gr10,gr0),gr8 /* LDD.P/ORCR must be atomic */
34 orcr cc7,cc7,cc3 /* set CC3 to true */
35 addicc gr9,#1,gr9,icc0
36 addxi gr8,#0,gr8,icc0
37 cstd.p gr8,@(gr10,gr0) ,cc3,#1
38 corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */
39 beq icc3,#0,0b
40 bralr
41
42 .size atomic64_inc_return, .-atomic64_inc_return
43
44###############################################################################
45#
46# long long atomic64_dec_return(atomic64_t *v)
47#
48###############################################################################
49 .globl atomic64_dec_return
50 .type atomic64_dec_return,@function
51atomic64_dec_return:
52 or.p gr8,gr8,gr10
530:
54 orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */
55 ckeq icc3,cc7
56 ldd.p @(gr10,gr0),gr8 /* LDD.P/ORCR must be atomic */
57 orcr cc7,cc7,cc3 /* set CC3 to true */
58 subicc gr9,#1,gr9,icc0
59 subxi gr8,#0,gr8,icc0
60 cstd.p gr8,@(gr10,gr0) ,cc3,#1
61 corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */
62 beq icc3,#0,0b
63 bralr
64
65 .size atomic64_dec_return, .-atomic64_dec_return
66
67###############################################################################
68#
69# long long atomic64_add_return(long long i, atomic64_t *v)
70#
71###############################################################################
72 .globl atomic64_add_return
73 .type atomic64_add_return,@function
74atomic64_add_return:
75 or.p gr8,gr8,gr4
76 or gr9,gr9,gr5
770:
78 orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */
79 ckeq icc3,cc7
80 ldd.p @(gr10,gr0),gr8 /* LDD.P/ORCR must be atomic */
81 orcr cc7,cc7,cc3 /* set CC3 to true */
82 addcc gr9,gr5,gr9,icc0
83 addx gr8,gr4,gr8,icc0
84 cstd.p gr8,@(gr10,gr0) ,cc3,#1
85 corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */
86 beq icc3,#0,0b
87 bralr
88
89 .size atomic64_add_return, .-atomic64_add_return
90
91###############################################################################
92#
93# long long atomic64_sub_return(long long i, atomic64_t *v)
94#
95###############################################################################
96 .globl atomic64_sub_return
97 .type atomic64_sub_return,@function
98atomic64_sub_return:
99 or.p gr8,gr8,gr4
100 or gr9,gr9,gr5
1010:
102 orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */
103 ckeq icc3,cc7
104 ldd.p @(gr10,gr0),gr8 /* LDD.P/ORCR must be atomic */
105 orcr cc7,cc7,cc3 /* set CC3 to true */
106 subcc gr9,gr5,gr9,icc0
107 subx gr8,gr4,gr8,icc0
108 cstd.p gr8,@(gr10,gr0) ,cc3,#1
109 corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */
110 beq icc3,#0,0b
111 bralr
112
113 .size atomic64_sub_return, .-atomic64_sub_return
114
115###############################################################################
116#
117# uint64_t __xchg_64(uint64_t i, uint64_t *v)
118#
119###############################################################################
120 .globl __xchg_64
121 .type __xchg_64,@function
122__xchg_64:
123 or.p gr8,gr8,gr4
124 or gr9,gr9,gr5
1250:
126 orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */
127 ckeq icc3,cc7
128 ldd.p @(gr10,gr0),gr8 /* LDD.P/ORCR must be atomic */
129 orcr cc7,cc7,cc3 /* set CC3 to true */
130 cstd.p gr4,@(gr10,gr0) ,cc3,#1
131 corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */
132 beq icc3,#0,0b
133 bralr
134
135 .size __xchg_64, .-__xchg_64
136
137###############################################################################
138#
139# uint64_t __cmpxchg_64(uint64_t test, uint64_t new, uint64_t *v)
140#
141###############################################################################
142 .globl __cmpxchg_64
143 .type __cmpxchg_64,@function
144__cmpxchg_64:
145 or.p gr8,gr8,gr4
146 or gr9,gr9,gr5
1470:
148 orcc gr0,gr0,gr0,icc3 /* set ICC3.Z */
149 ckeq icc3,cc7
150 ldd.p @(gr12,gr0),gr8 /* LDD.P/ORCR must be atomic */
151 orcr cc7,cc7,cc3
152 subcc gr8,gr4,gr0,icc0
153 subcc.p gr9,gr5,gr0,icc1
154 bnelr icc0,#0
155 bnelr icc1,#0
156 cstd.p gr10,@(gr12,gr0) ,cc3,#1
157 corcc gr29,gr29,gr0 ,cc3,#1 /* clear ICC3.Z if store happens */
158 beq icc3,#0,0b
159 bralr
160
161 .size __cmpxchg_64, .-__cmpxchg_64
162
diff --git a/arch/frv/lib/perf_counter.c b/arch/frv/lib/perf_counter.c
new file mode 100644
index 000000000000..2000feecd571
--- /dev/null
+++ b/arch/frv/lib/perf_counter.c
@@ -0,0 +1,19 @@
1/* Performance counter handling
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/perf_counter.h>
13
14/*
15 * mark the performance counter as pending
16 */
17void set_perf_counter_pending(void)
18{
19}
diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug
index 8ece0b5bd028..39224b57c6ef 100644
--- a/arch/sh/Kconfig.debug
+++ b/arch/sh/Kconfig.debug
@@ -61,10 +61,6 @@ config EARLY_PRINTK
61 select both the EARLY_SCIF_CONSOLE and SH_STANDARD_BIOS, using 61 select both the EARLY_SCIF_CONSOLE and SH_STANDARD_BIOS, using
62 the kernel command line option to toggle back and forth. 62 the kernel command line option to toggle back and forth.
63 63
64config DEBUG_BOOTMEM
65 depends on DEBUG_KERNEL
66 bool "Debug BOOTMEM initialization"
67
68config DEBUG_STACKOVERFLOW 64config DEBUG_STACKOVERFLOW
69 bool "Check for stack overflows" 65 bool "Check for stack overflows"
70 depends on DEBUG_KERNEL && SUPERH32 66 depends on DEBUG_KERNEL && SUPERH32
diff --git a/arch/sh/boards/mach-se/7206/io.c b/arch/sh/boards/mach-se/7206/io.c
index 9c3a33210d61..180455642a43 100644
--- a/arch/sh/boards/mach-se/7206/io.c
+++ b/arch/sh/boards/mach-se/7206/io.c
@@ -50,7 +50,7 @@ unsigned char se7206_inb_p(unsigned long port)
50 50
51unsigned short se7206_inw(unsigned long port) 51unsigned short se7206_inw(unsigned long port)
52{ 52{
53 return *port2adr(port);; 53 return *port2adr(port);
54} 54}
55 55
56void se7206_outb(unsigned char value, unsigned long port) 56void se7206_outb(unsigned char value, unsigned long port)
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 9cd04bd558b8..c050a8d76dfd 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -23,6 +23,8 @@
23#include <media/sh_mobile_ceu.h> 23#include <media/sh_mobile_ceu.h>
24#include <asm/io.h> 24#include <asm/io.h>
25#include <asm/heartbeat.h> 25#include <asm/heartbeat.h>
26#include <asm/sh_eth.h>
27#include <asm/clock.h>
26#include <asm/sh_keysc.h> 28#include <asm/sh_keysc.h>
27#include <cpu/sh7724.h> 29#include <cpu/sh7724.h>
28#include <mach-se/mach/se7724.h> 30#include <mach-se/mach/se7724.h>
@@ -272,6 +274,34 @@ static struct platform_device keysc_device = {
272 }, 274 },
273}; 275};
274 276
277/* SH Eth */
278static struct resource sh_eth_resources[] = {
279 [0] = {
280 .start = SH_ETH_ADDR,
281 .end = SH_ETH_ADDR + 0x1FC,
282 .flags = IORESOURCE_MEM,
283 },
284 [1] = {
285 .start = 91,
286 .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
287 },
288};
289
290struct sh_eth_plat_data sh_eth_plat = {
291 .phy = 0x1f, /* SMSC LAN8187 */
292 .edmac_endian = EDMAC_LITTLE_ENDIAN,
293};
294
295static struct platform_device sh_eth_device = {
296 .name = "sh-eth",
297 .id = 0,
298 .dev = {
299 .platform_data = &sh_eth_plat,
300 },
301 .num_resources = ARRAY_SIZE(sh_eth_resources),
302 .resource = sh_eth_resources,
303};
304
275static struct platform_device *ms7724se_devices[] __initdata = { 305static struct platform_device *ms7724se_devices[] __initdata = {
276 &heartbeat_device, 306 &heartbeat_device,
277 &smc91x_eth_device, 307 &smc91x_eth_device,
@@ -280,8 +310,57 @@ static struct platform_device *ms7724se_devices[] __initdata = {
280 &ceu0_device, 310 &ceu0_device,
281 &ceu1_device, 311 &ceu1_device,
282 &keysc_device, 312 &keysc_device,
313 &sh_eth_device,
283}; 314};
284 315
316#define EEPROM_OP 0xBA206000
317#define EEPROM_ADR 0xBA206004
318#define EEPROM_DATA 0xBA20600C
319#define EEPROM_STAT 0xBA206010
320#define EEPROM_STRT 0xBA206014
321static int __init sh_eth_is_eeprom_ready(void)
322{
323 int t = 10000;
324
325 while (t--) {
326 if (!ctrl_inw(EEPROM_STAT))
327 return 1;
328 cpu_relax();
329 }
330
331 printk(KERN_ERR "ms7724se can not access to eeprom\n");
332 return 0;
333}
334
335static void __init sh_eth_init(void)
336{
337 int i;
338 u16 mac[3];
339
340 /* check EEPROM status */
341 if (!sh_eth_is_eeprom_ready())
342 return;
343
344 /* read MAC addr from EEPROM */
345 for (i = 0 ; i < 3 ; i++) {
346 ctrl_outw(0x0, EEPROM_OP); /* read */
347 ctrl_outw(i*2, EEPROM_ADR);
348 ctrl_outw(0x1, EEPROM_STRT);
349 if (!sh_eth_is_eeprom_ready())
350 return;
351
352 mac[i] = ctrl_inw(EEPROM_DATA);
353 mac[i] = ((mac[i] & 0xFF) << 8) | (mac[i] >> 8); /* swap */
354 }
355
356 /* reset sh-eth */
357 ctrl_outl(0x1, SH_ETH_ADDR + 0x0);
358
359 /* set MAC addr */
360 ctrl_outl(((mac[0] << 16) | (mac[1])), SH_ETH_MAHR);
361 ctrl_outl((mac[2]), SH_ETH_MALR);
362}
363
285#define SW4140 0xBA201000 364#define SW4140 0xBA201000
286#define FPGA_OUT 0xBA200400 365#define FPGA_OUT 0xBA200400
287#define PORT_HIZA 0xA4050158 366#define PORT_HIZA 0xA4050158
@@ -302,7 +381,8 @@ static int __init devices_setup(void)
302 ctrl_outw(ctrl_inw(FPGA_OUT) & 381 ctrl_outw(ctrl_inw(FPGA_OUT) &
303 ~((1 << 1) | /* LAN */ 382 ~((1 << 1) | /* LAN */
304 (1 << 6) | /* VIDEO DAC */ 383 (1 << 6) | /* VIDEO DAC */
305 (1 << 12)), /* USB0 */ 384 (1 << 12) | /* USB0 */
385 (1 << 14)), /* RMII */
306 FPGA_OUT); 386 FPGA_OUT);
307 387
308 /* enable IRQ 0,1,2 */ 388 /* enable IRQ 0,1,2 */
@@ -374,7 +454,7 @@ static int __init devices_setup(void)
374 gpio_request(GPIO_FN_VIO0_CLK, NULL); 454 gpio_request(GPIO_FN_VIO0_CLK, NULL);
375 gpio_request(GPIO_FN_VIO0_FLD, NULL); 455 gpio_request(GPIO_FN_VIO0_FLD, NULL);
376 gpio_request(GPIO_FN_VIO0_HD, NULL); 456 gpio_request(GPIO_FN_VIO0_HD, NULL);
377 platform_resource_setup_memory(&ceu0_device, "ceu", 4 << 20); 457 platform_resource_setup_memory(&ceu0_device, "ceu0", 4 << 20);
378 458
379 /* enable CEU1 */ 459 /* enable CEU1 */
380 gpio_request(GPIO_FN_VIO1_D7, NULL); 460 gpio_request(GPIO_FN_VIO1_D7, NULL);
@@ -389,7 +469,7 @@ static int __init devices_setup(void)
389 gpio_request(GPIO_FN_VIO1_HD, NULL); 469 gpio_request(GPIO_FN_VIO1_HD, NULL);
390 gpio_request(GPIO_FN_VIO1_VD, NULL); 470 gpio_request(GPIO_FN_VIO1_VD, NULL);
391 gpio_request(GPIO_FN_VIO1_CLK, NULL); 471 gpio_request(GPIO_FN_VIO1_CLK, NULL);
392 platform_resource_setup_memory(&ceu1_device, "ceu", 4 << 20); 472 platform_resource_setup_memory(&ceu1_device, "ceu1", 4 << 20);
393 473
394 /* KEYSC */ 474 /* KEYSC */
395 gpio_request(GPIO_FN_KEYOUT5_IN5, NULL); 475 gpio_request(GPIO_FN_KEYOUT5_IN5, NULL);
@@ -404,6 +484,28 @@ static int __init devices_setup(void)
404 gpio_request(GPIO_FN_KEYOUT1, NULL); 484 gpio_request(GPIO_FN_KEYOUT1, NULL);
405 gpio_request(GPIO_FN_KEYOUT0, NULL); 485 gpio_request(GPIO_FN_KEYOUT0, NULL);
406 486
487 /*
488 * enable SH-Eth
489 *
490 * please remove J33 pin from your board !!
491 *
492 * ms7724 board should not use GPIO_FN_LNKSTA pin
493 * So, This time PTX5 is set to input pin
494 */
495 gpio_request(GPIO_FN_RMII_RXD0, NULL);
496 gpio_request(GPIO_FN_RMII_RXD1, NULL);
497 gpio_request(GPIO_FN_RMII_TXD0, NULL);
498 gpio_request(GPIO_FN_RMII_TXD1, NULL);
499 gpio_request(GPIO_FN_RMII_REF_CLK, NULL);
500 gpio_request(GPIO_FN_RMII_TX_EN, NULL);
501 gpio_request(GPIO_FN_RMII_RX_ER, NULL);
502 gpio_request(GPIO_FN_RMII_CRS_DV, NULL);
503 gpio_request(GPIO_FN_MDIO, NULL);
504 gpio_request(GPIO_FN_MDC, NULL);
505 gpio_request(GPIO_PTX5, NULL);
506 gpio_direction_input(GPIO_PTX5);
507 sh_eth_init();
508
407 if (sw & SW41_B) { 509 if (sw & SW41_B) {
408 /* SVGA */ 510 /* SVGA */
409 lcdc_info.ch[0].lcd_cfg.xres = 800; 511 lcdc_info.ch[0].lcd_cfg.xres = 800;
@@ -437,7 +539,7 @@ static int __init devices_setup(void)
437 } 539 }
438 540
439 return platform_add_devices(ms7724se_devices, 541 return platform_add_devices(ms7724se_devices,
440 ARRAY_SIZE(ms7724se_devices)); 542 ARRAY_SIZE(ms7724se_devices));
441} 543}
442device_initcall(devices_setup); 544device_initcall(devices_setup);
443 545
diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig
index da627d22c009..b18cfd39cac6 100644
--- a/arch/sh/configs/migor_defconfig
+++ b/arch/sh/configs/migor_defconfig
@@ -309,7 +309,7 @@ CONFIG_ZERO_PAGE_OFFSET=0x00001000
309CONFIG_BOOT_LINK_OFFSET=0x00800000 309CONFIG_BOOT_LINK_OFFSET=0x00800000
310CONFIG_ENTRY_OFFSET=0x00001000 310CONFIG_ENTRY_OFFSET=0x00001000
311CONFIG_CMDLINE_BOOL=y 311CONFIG_CMDLINE_BOOL=y
312CONFIG_CMDLINE="console=ttySC0,115200 earlyprintk=serial ip=on root=/dev/nfs ip=dhcp" 312CONFIG_CMDLINE="console=tty0 console=ttySC0,115200 earlyprintk=serial ip=on root=/dev/nfs ip=dhcp"
313 313
314# 314#
315# Bus options 315# Bus options
@@ -858,7 +858,35 @@ CONFIG_VIDEO_SH_MOBILE_CEU=y
858# 858#
859# CONFIG_VGASTATE is not set 859# CONFIG_VGASTATE is not set
860# CONFIG_VIDEO_OUTPUT_CONTROL is not set 860# CONFIG_VIDEO_OUTPUT_CONTROL is not set
861# CONFIG_FB is not set 861CONFIG_FB=y
862# CONFIG_FIRMWARE_EDID is not set
863# CONFIG_FB_DDC is not set
864# CONFIG_FB_BOOT_VESA_SUPPORT is not set
865# CONFIG_FB_CFB_FILLRECT is not set
866# CONFIG_FB_CFB_COPYAREA is not set
867# CONFIG_FB_CFB_IMAGEBLIT is not set
868# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
869CONFIG_FB_SYS_FILLRECT=y
870CONFIG_FB_SYS_COPYAREA=y
871CONFIG_FB_SYS_IMAGEBLIT=y
872# CONFIG_FB_FOREIGN_ENDIAN is not set
873CONFIG_FB_SYS_FOPS=y
874CONFIG_FB_DEFERRED_IO=y
875# CONFIG_FB_SVGALIB is not set
876# CONFIG_FB_MACMODES is not set
877# CONFIG_FB_BACKLIGHT is not set
878# CONFIG_FB_MODE_HELPERS is not set
879# CONFIG_FB_TILEBLITTING is not set
880
881#
882# Frame buffer hardware drivers
883#
884# CONFIG_FB_S1D13XXX is not set
885CONFIG_FB_SH_MOBILE_LCDC=y
886# CONFIG_FB_VIRTUAL is not set
887# CONFIG_FB_METRONOME is not set
888# CONFIG_FB_MB862XX is not set
889# CONFIG_FB_BROADSHEET is not set
862# CONFIG_BACKLIGHT_LCD_SUPPORT is not set 890# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
863 891
864# 892#
@@ -870,6 +898,27 @@ CONFIG_VIDEO_SH_MOBILE_CEU=y
870# Console display driver support 898# Console display driver support
871# 899#
872CONFIG_DUMMY_CONSOLE=y 900CONFIG_DUMMY_CONSOLE=y
901CONFIG_FRAMEBUFFER_CONSOLE=y
902CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
903# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
904CONFIG_FONTS=y
905# CONFIG_FONT_8x8 is not set
906# CONFIG_FONT_8x16 is not set
907# CONFIG_FONT_6x11 is not set
908# CONFIG_FONT_7x14 is not set
909# CONFIG_FONT_PEARL_8x8 is not set
910# CONFIG_FONT_ACORN_8x8 is not set
911CONFIG_FONT_MINI_4x6=y
912# CONFIG_FONT_SUN8x16 is not set
913# CONFIG_FONT_SUN12x22 is not set
914# CONFIG_FONT_10x18 is not set
915CONFIG_LOGO=y
916# CONFIG_LOGO_LINUX_MONO is not set
917# CONFIG_LOGO_LINUX_VGA16 is not set
918# CONFIG_LOGO_LINUX_CLUT224 is not set
919# CONFIG_LOGO_SUPERH_MONO is not set
920CONFIG_LOGO_SUPERH_VGA16=y
921# CONFIG_LOGO_SUPERH_CLUT224 is not set
873# CONFIG_SOUND is not set 922# CONFIG_SOUND is not set
874CONFIG_HID_SUPPORT=y 923CONFIG_HID_SUPPORT=y
875CONFIG_HID=y 924CONFIG_HID=y
diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig
index 3840270283e4..3ee783a0a075 100644
--- a/arch/sh/configs/se7724_defconfig
+++ b/arch/sh/configs/se7724_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.30 3# Linux kernel version: 2.6.30
4# Thu Jun 18 16:09:05 2009 4# Mon Jun 29 16:28:43 2009
5# 5#
6CONFIG_SUPERH=y 6CONFIG_SUPERH=y
7CONFIG_SUPERH32=y 7CONFIG_SUPERH32=y
@@ -14,6 +14,7 @@ CONFIG_GENERIC_HWEIGHT=y
14CONFIG_GENERIC_HARDIRQS=y 14CONFIG_GENERIC_HARDIRQS=y
15CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y 15CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
16CONFIG_GENERIC_IRQ_PROBE=y 16CONFIG_GENERIC_IRQ_PROBE=y
17CONFIG_IRQ_PER_CPU=y
17CONFIG_GENERIC_GPIO=y 18CONFIG_GENERIC_GPIO=y
18CONFIG_GENERIC_TIME=y 19CONFIG_GENERIC_TIME=y
19CONFIG_GENERIC_CLOCKEVENTS=y 20CONFIG_GENERIC_CLOCKEVENTS=y
@@ -28,7 +29,9 @@ CONFIG_HAVE_LATENCYTOP_SUPPORT=y
28# CONFIG_ARCH_HAS_ILOG2_U64 is not set 29# CONFIG_ARCH_HAS_ILOG2_U64 is not set
29CONFIG_ARCH_NO_VIRT_TO_BUS=y 30CONFIG_ARCH_NO_VIRT_TO_BUS=y
30CONFIG_ARCH_HAS_DEFAULT_IDLE=y 31CONFIG_ARCH_HAS_DEFAULT_IDLE=y
32CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
31CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" 33CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
34CONFIG_CONSTRUCTORS=y
32 35
33# 36#
34# General setup 37# General setup
@@ -88,10 +91,12 @@ CONFIG_TIMERFD=y
88CONFIG_EVENTFD=y 91CONFIG_EVENTFD=y
89CONFIG_SHMEM=y 92CONFIG_SHMEM=y
90CONFIG_AIO=y 93CONFIG_AIO=y
94CONFIG_HAVE_PERF_COUNTERS=y
91 95
92# 96#
93# Performance Counters 97# Performance Counters
94# 98#
99# CONFIG_PERF_COUNTERS is not set
95CONFIG_VM_EVENT_COUNTERS=y 100CONFIG_VM_EVENT_COUNTERS=y
96# CONFIG_STRIP_ASM_SYMS is not set 101# CONFIG_STRIP_ASM_SYMS is not set
97CONFIG_COMPAT_BRK=y 102CONFIG_COMPAT_BRK=y
@@ -107,6 +112,10 @@ CONFIG_HAVE_KRETPROBES=y
107CONFIG_HAVE_ARCH_TRACEHOOK=y 112CONFIG_HAVE_ARCH_TRACEHOOK=y
108CONFIG_HAVE_CLK=y 113CONFIG_HAVE_CLK=y
109CONFIG_HAVE_DMA_API_DEBUG=y 114CONFIG_HAVE_DMA_API_DEBUG=y
115
116#
117# GCOV-based kernel profiling
118#
110# CONFIG_SLOW_WORK is not set 119# CONFIG_SLOW_WORK is not set
111CONFIG_HAVE_GENERIC_DMA_COHERENT=y 120CONFIG_HAVE_GENERIC_DMA_COHERENT=y
112CONFIG_SLABINFO=y 121CONFIG_SLABINFO=y
@@ -119,7 +128,7 @@ CONFIG_MODULE_UNLOAD=y
119# CONFIG_MODVERSIONS is not set 128# CONFIG_MODVERSIONS is not set
120# CONFIG_MODULE_SRCVERSION_ALL is not set 129# CONFIG_MODULE_SRCVERSION_ALL is not set
121CONFIG_BLOCK=y 130CONFIG_BLOCK=y
122# CONFIG_LBD is not set 131CONFIG_LBDAF=y
123# CONFIG_BLK_DEV_BSG is not set 132# CONFIG_BLK_DEV_BSG is not set
124# CONFIG_BLK_DEV_INTEGRITY is not set 133# CONFIG_BLK_DEV_INTEGRITY is not set
125 134
@@ -584,7 +593,6 @@ CONFIG_SCSI_WAIT_SCAN=m
584# CONFIG_SCSI_SRP_ATTRS is not set 593# CONFIG_SCSI_SRP_ATTRS is not set
585CONFIG_SCSI_LOWLEVEL=y 594CONFIG_SCSI_LOWLEVEL=y
586# CONFIG_ISCSI_TCP is not set 595# CONFIG_ISCSI_TCP is not set
587# CONFIG_SCSI_BNX2_ISCSI is not set
588# CONFIG_LIBFC is not set 596# CONFIG_LIBFC is not set
589# CONFIG_LIBFCOE is not set 597# CONFIG_LIBFCOE is not set
590# CONFIG_SCSI_DEBUG is not set 598# CONFIG_SCSI_DEBUG is not set
@@ -624,7 +632,7 @@ CONFIG_NET_ETHERNET=y
624CONFIG_MII=y 632CONFIG_MII=y
625# CONFIG_AX88796 is not set 633# CONFIG_AX88796 is not set
626# CONFIG_STNIC is not set 634# CONFIG_STNIC is not set
627# CONFIG_SH_ETH is not set 635CONFIG_SH_ETH=y
628CONFIG_SMC91X=y 636CONFIG_SMC91X=y
629# CONFIG_ENC28J60 is not set 637# CONFIG_ENC28J60 is not set
630# CONFIG_ETHOC is not set 638# CONFIG_ETHOC is not set
@@ -801,6 +809,11 @@ CONFIG_SPI_BITBANG=y
801# 809#
802# CONFIG_SPI_SPIDEV is not set 810# CONFIG_SPI_SPIDEV is not set
803# CONFIG_SPI_TLE62X0 is not set 811# CONFIG_SPI_TLE62X0 is not set
812
813#
814# PPS support
815#
816# CONFIG_PPS is not set
804CONFIG_ARCH_REQUIRE_GPIOLIB=y 817CONFIG_ARCH_REQUIRE_GPIOLIB=y
805CONFIG_GPIOLIB=y 818CONFIG_GPIOLIB=y
806# CONFIG_GPIO_SYSFS is not set 819# CONFIG_GPIO_SYSFS is not set
@@ -851,6 +864,8 @@ CONFIG_SSB_POSSIBLE=y
851# CONFIG_MFD_WM8400 is not set 864# CONFIG_MFD_WM8400 is not set
852# CONFIG_MFD_WM8350_I2C is not set 865# CONFIG_MFD_WM8350_I2C is not set
853# CONFIG_MFD_PCF50633 is not set 866# CONFIG_MFD_PCF50633 is not set
867# CONFIG_AB3100_CORE is not set
868# CONFIG_EZX_PCAP is not set
854# CONFIG_REGULATOR is not set 869# CONFIG_REGULATOR is not set
855CONFIG_MEDIA_SUPPORT=y 870CONFIG_MEDIA_SUPPORT=y
856 871
@@ -1196,6 +1211,7 @@ CONFIG_RTC_DRV_PCF8563=y
1196# CONFIG_RTC_DRV_S35390A is not set 1211# CONFIG_RTC_DRV_S35390A is not set
1197# CONFIG_RTC_DRV_FM3130 is not set 1212# CONFIG_RTC_DRV_FM3130 is not set
1198# CONFIG_RTC_DRV_RX8581 is not set 1213# CONFIG_RTC_DRV_RX8581 is not set
1214# CONFIG_RTC_DRV_RX8025 is not set
1199 1215
1200# 1216#
1201# SPI RTC drivers 1217# SPI RTC drivers
@@ -1260,6 +1276,7 @@ CONFIG_FS_MBCACHE=y
1260# CONFIG_JFS_FS is not set 1276# CONFIG_JFS_FS is not set
1261CONFIG_FS_POSIX_ACL=y 1277CONFIG_FS_POSIX_ACL=y
1262# CONFIG_XFS_FS is not set 1278# CONFIG_XFS_FS is not set
1279# CONFIG_GFS2_FS is not set
1263# CONFIG_OCFS2_FS is not set 1280# CONFIG_OCFS2_FS is not set
1264# CONFIG_BTRFS_FS is not set 1281# CONFIG_BTRFS_FS is not set
1265CONFIG_FILE_LOCKING=y 1282CONFIG_FILE_LOCKING=y
diff --git a/arch/sh/include/asm/perf_counter.h b/arch/sh/include/asm/perf_counter.h
index a8153c2aa6fa..61c2b40c802c 100644
--- a/arch/sh/include/asm/perf_counter.h
+++ b/arch/sh/include/asm/perf_counter.h
@@ -2,6 +2,6 @@
2#define __ASM_SH_PERF_COUNTER_H 2#define __ASM_SH_PERF_COUNTER_H
3 3
4/* SH only supports software counters through this interface. */ 4/* SH only supports software counters through this interface. */
5#define set_perf_counter_pending() do { } while (0) 5static inline void set_perf_counter_pending(void) {}
6 6
7#endif /* __ASM_SH_PERF_COUNTER_H */ 7#endif /* __ASM_SH_PERF_COUNTER_H */
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 5bc34681d994..6f83f2cc45c1 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/kernel.h> 4#include <linux/kernel.h>
5#include <linux/sched.h> 5#include <linux/sched.h>
6#include <linux/err.h>
6#include <asm/ptrace.h> 7#include <asm/ptrace.h>
7 8
8/* The system call number is given by the user in R3 */ 9/* The system call number is given by the user in R3 */
diff --git a/arch/sh/include/mach-se/mach/se7724.h b/arch/sh/include/mach-se/mach/se7724.h
index 74164b60d0db..29514a39d0f5 100644
--- a/arch/sh/include/mach-se/mach/se7724.h
+++ b/arch/sh/include/mach-se/mach/se7724.h
@@ -20,6 +20,11 @@
20 */ 20 */
21#include <asm/addrspace.h> 21#include <asm/addrspace.h>
22 22
23/* SH Eth */
24#define SH_ETH_ADDR (0xA4600000)
25#define SH_ETH_MAHR (SH_ETH_ADDR + 0x1C0)
26#define SH_ETH_MALR (SH_ETH_ADDR + 0x1C8)
27
23#define PA_LED (0xba203000) /* 8bit LED */ 28#define PA_LED (0xba203000) /* 8bit LED */
24#define IRQ_MODE (0xba200010) 29#define IRQ_MODE (0xba200010)
25#define IRQ0_SR (0xba200014) 30#define IRQ0_SR (0xba200014)
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index cc8ddbdf3d7a..71925946f1e1 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -15,12 +15,28 @@
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/hardirq.h> 16#include <linux/hardirq.h>
17#include <linux/kprobes.h> 17#include <linux/kprobes.h>
18#include <linux/marker.h> 18#include <linux/perf_counter.h>
19#include <asm/io_trapped.h> 19#include <asm/io_trapped.h>
20#include <asm/system.h> 20#include <asm/system.h>
21#include <asm/mmu_context.h> 21#include <asm/mmu_context.h>
22#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
23 23
24static inline int notify_page_fault(struct pt_regs *regs, int trap)
25{
26 int ret = 0;
27
28#ifdef CONFIG_KPROBES
29 if (!user_mode(regs)) {
30 preempt_disable();
31 if (kprobe_running() && kprobe_fault_handler(regs, trap))
32 ret = 1;
33 preempt_enable();
34 }
35#endif
36
37 return ret;
38}
39
24/* 40/*
25 * This routine handles page faults. It determines the address, 41 * This routine handles page faults. It determines the address,
26 * and the problem, and then passes it off to one of the appropriate 42 * and the problem, and then passes it off to one of the appropriate
@@ -87,13 +103,16 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
87 return; 103 return;
88 } 104 }
89 105
106 mm = tsk->mm;
107
108 if (unlikely(notify_page_fault(regs, lookup_exception_vector())))
109 return;
110
90 /* Only enable interrupts if they were on before the fault */ 111 /* Only enable interrupts if they were on before the fault */
91 if ((regs->sr & SR_IMASK) != SR_IMASK) { 112 if ((regs->sr & SR_IMASK) != SR_IMASK)
92 trace_hardirqs_on();
93 local_irq_enable(); 113 local_irq_enable();
94 }
95 114
96 mm = tsk->mm; 115 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
97 116
98 /* 117 /*
99 * If we're in an interrupt or have no user 118 * If we're in an interrupt or have no user
@@ -141,10 +160,15 @@ survive:
141 goto do_sigbus; 160 goto do_sigbus;
142 BUG(); 161 BUG();
143 } 162 }
144 if (fault & VM_FAULT_MAJOR) 163 if (fault & VM_FAULT_MAJOR) {
145 tsk->maj_flt++; 164 tsk->maj_flt++;
146 else 165 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
166 regs, address);
167 } else {
147 tsk->min_flt++; 168 tsk->min_flt++;
169 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
170 regs, address);
171 }
148 172
149 up_read(&mm->mmap_sem); 173 up_read(&mm->mmap_sem);
150 return; 174 return;
@@ -245,22 +269,6 @@ do_sigbus:
245 goto no_context; 269 goto no_context;
246} 270}
247 271
248static inline int notify_page_fault(struct pt_regs *regs, int trap)
249{
250 int ret = 0;
251
252#ifdef CONFIG_KPROBES
253 if (!user_mode(regs)) {
254 preempt_disable();
255 if (kprobe_running() && kprobe_fault_handler(regs, trap))
256 ret = 1;
257 preempt_enable();
258 }
259#endif
260
261 return ret;
262}
263
264/* 272/*
265 * Called with interrupts disabled. 273 * Called with interrupts disabled.
266 */ 274 */
@@ -273,12 +281,7 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
273 pmd_t *pmd; 281 pmd_t *pmd;
274 pte_t *pte; 282 pte_t *pte;
275 pte_t entry; 283 pte_t entry;
276 int ret = 0; 284 int ret = 1;
277
278 if (notify_page_fault(regs, lookup_exception_vector()))
279 goto out;
280
281 ret = 1;
282 285
283 /* 286 /*
284 * We don't take page faults for P1, P2, and parts of P4, these 287 * We don't take page faults for P1, P2, and parts of P4, these
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index fcbb6e135cef..3ce40ea34824 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 2000, 2001 Paolo Alberelli 4 * Copyright (C) 2000, 2001 Paolo Alberelli
5 * Copyright (C) 2003 Richard Curnow (/proc/tlb, bug fixes) 5 * Copyright (C) 2003 Richard Curnow (/proc/tlb, bug fixes)
6 * Copyright (C) 2003 Paul Mundt 6 * Copyright (C) 2003 - 2009 Paul Mundt
7 * 7 *
8 * This file is subject to the terms and conditions of the GNU General Public 8 * This file is subject to the terms and conditions of the GNU General Public
9 * License. See the file "COPYING" in the main directory of this archive 9 * License. See the file "COPYING" in the main directory of this archive
@@ -20,6 +20,7 @@
20#include <linux/mman.h> 20#include <linux/mman.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/smp.h> 22#include <linux/smp.h>
23#include <linux/perf_counter.h>
23#include <linux/interrupt.h> 24#include <linux/interrupt.h>
24#include <asm/system.h> 25#include <asm/system.h>
25#include <asm/io.h> 26#include <asm/io.h>
@@ -115,6 +116,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
115 /* Not an IO address, so reenable interrupts */ 116 /* Not an IO address, so reenable interrupts */
116 local_irq_enable(); 117 local_irq_enable();
117 118
119 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
120
118 /* 121 /*
119 * If we're in an interrupt or have no user 122 * If we're in an interrupt or have no user
120 * context, we must not take the fault.. 123 * context, we must not take the fault..
@@ -195,10 +198,16 @@ survive:
195 goto do_sigbus; 198 goto do_sigbus;
196 BUG(); 199 BUG();
197 } 200 }
198 if (fault & VM_FAULT_MAJOR) 201
202 if (fault & VM_FAULT_MAJOR) {
199 tsk->maj_flt++; 203 tsk->maj_flt++;
200 else 204 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
205 regs, address);
206 } else {
201 tsk->min_flt++; 207 tsk->min_flt++;
208 perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
209 regs, address);
210 }
202 211
203 /* If we get here, the page fault has been handled. Do the TLB refill 212 /* If we get here, the page fault has been handled. Do the TLB refill
204 now from the newly-setup PTE, to avoid having to fault again right 213 now from the newly-setup PTE, to avoid having to fault again right
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d1430ef6b4f9..c07f72205909 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1913,25 +1913,14 @@ config DMAR_DEFAULT_ON
1913 recommended you say N here while the DMAR code remains 1913 recommended you say N here while the DMAR code remains
1914 experimental. 1914 experimental.
1915 1915
1916config DMAR_GFX_WA
1917 def_bool y
1918 prompt "Support for Graphics workaround"
1919 depends on DMAR
1920 ---help---
1921 Current Graphics drivers tend to use physical address
1922 for DMA and avoid using DMA APIs. Setting this config
1923 option permits the IOMMU driver to set a unity map for
1924 all the OS-visible memory. Hence the driver can continue
1925 to use physical addresses for DMA.
1926
1927config DMAR_FLOPPY_WA 1916config DMAR_FLOPPY_WA
1928 def_bool y 1917 def_bool y
1929 depends on DMAR 1918 depends on DMAR
1930 ---help--- 1919 ---help---
1931 Floppy disk drivers are know to bypass DMA API calls 1920 Floppy disk drivers are known to bypass DMA API calls
1932 thereby failing to work when IOMMU is enabled. This 1921 thereby failing to work when IOMMU is enabled. This
1933 workaround will setup a 1:1 mapping for the first 1922 workaround will setup a 1:1 mapping for the first
1934 16M to make floppy (an ISA device) work. 1923 16MiB to make floppy (an ISA device) work.
1935 1924
1936config INTR_REMAP 1925config INTR_REMAP
1937 bool "Support for Interrupt Remapping (EXPERIMENTAL)" 1926 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 49fb3ecf3bb3..621f56d73121 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -22,7 +22,14 @@ extern int reboot_force;
22 22
23long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); 23long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
24 24
25#define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) 25/*
26#define round_down(x, y) ((x) & ~((y) - 1)) 26 * This looks more complex than it should be. But we need to
27 * get the type for the ~ right in round_down (it needs to be
28 * as wide as the result!), and we want to evaluate the macro
29 * arguments just once each.
30 */
31#define __round_mask(x,y) ((__typeof__(x))((y)-1))
32#define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1)
33#define round_down(x,y) ((x) & ~__round_mask(x,y))
27 34
28#endif /* _ASM_X86_PROTO_H */ 35#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7271fa33d791..c4ca89d9aaf4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1383,6 +1383,8 @@ static unsigned long ram_alignment(resource_size_t pos)
1383 return 32*1024*1024; 1383 return 32*1024*1024;
1384} 1384}
1385 1385
1386#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
1387
1386void __init e820_reserve_resources_late(void) 1388void __init e820_reserve_resources_late(void)
1387{ 1389{
1388 int i; 1390 int i;
@@ -1400,17 +1402,19 @@ void __init e820_reserve_resources_late(void)
1400 * avoid stolen RAM: 1402 * avoid stolen RAM:
1401 */ 1403 */
1402 for (i = 0; i < e820.nr_map; i++) { 1404 for (i = 0; i < e820.nr_map; i++) {
1403 struct e820entry *entry = &e820_saved.map[i]; 1405 struct e820entry *entry = &e820.map[i];
1404 resource_size_t start, end; 1406 u64 start, end;
1405 1407
1406 if (entry->type != E820_RAM) 1408 if (entry->type != E820_RAM)
1407 continue; 1409 continue;
1408 start = entry->addr + entry->size; 1410 start = entry->addr + entry->size;
1409 end = round_up(start, ram_alignment(start)); 1411 end = round_up(start, ram_alignment(start)) - 1;
1410 if (start == end) 1412 if (end > MAX_RESOURCE_SIZE)
1413 end = MAX_RESOURCE_SIZE;
1414 if (start >= end)
1411 continue; 1415 continue;
1412 reserve_region_with_split(&iomem_resource, start, 1416 reserve_region_with_split(&iomem_resource, start, end,
1413 end - 1, "RAM buffer"); 1417 "RAM buffer");
1414 } 1418 }
1415} 1419}
1416 1420
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 47630479b067..1a041bcf506b 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -211,11 +211,11 @@ static __init int iommu_setup(char *p)
211#ifdef CONFIG_SWIOTLB 211#ifdef CONFIG_SWIOTLB
212 if (!strncmp(p, "soft", 4)) 212 if (!strncmp(p, "soft", 4))
213 swiotlb = 1; 213 swiotlb = 1;
214#endif
214 if (!strncmp(p, "pt", 2)) { 215 if (!strncmp(p, "pt", 2)) {
215 iommu_pass_through = 1; 216 iommu_pass_through = 1;
216 return 1; 217 return 1;
217 } 218 }
218#endif
219 219
220 gart_parse_options(p); 220 gart_parse_options(p);
221 221
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index 9ffb05f4095d..93c2322feab7 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -161,7 +161,7 @@ static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta,
161 if (periodic) 161 if (periodic)
162 sh_tmu_write(p, TCOR, delta); 162 sh_tmu_write(p, TCOR, delta);
163 else 163 else
164 sh_tmu_write(p, TCOR, 0); 164 sh_tmu_write(p, TCOR, 0xffffffff);
165 165
166 sh_tmu_write(p, TCNT, delta); 166 sh_tmu_write(p, TCNT, delta);
167 167
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 420afa887283..53075424a434 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -56,14 +56,32 @@
56#define MAX_AGAW_WIDTH 64 56#define MAX_AGAW_WIDTH 64
57 57
58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 58#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
59#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
59 60
60#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 61#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
61#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 62#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
62#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 63#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
63 64
64#ifndef PHYSICAL_PAGE_MASK 65
65#define PHYSICAL_PAGE_MASK PAGE_MASK 66/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
66#endif 67 are never going to work. */
68static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
69{
70 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
71}
72
73static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
74{
75 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
76}
77static inline unsigned long page_to_dma_pfn(struct page *pg)
78{
79 return mm_to_dma_pfn(page_to_pfn(pg));
80}
81static inline unsigned long virt_to_dma_pfn(void *p)
82{
83 return page_to_dma_pfn(virt_to_page(p));
84}
67 85
68/* global iommu list, set NULL for ignored DMAR units */ 86/* global iommu list, set NULL for ignored DMAR units */
69static struct intel_iommu **g_iommus; 87static struct intel_iommu **g_iommus;
@@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
204 222
205static inline u64 dma_pte_addr(struct dma_pte *pte) 223static inline u64 dma_pte_addr(struct dma_pte *pte)
206{ 224{
207 return (pte->val & VTD_PAGE_MASK); 225#ifdef CONFIG_64BIT
226 return pte->val & VTD_PAGE_MASK;
227#else
228 /* Must have a full atomic 64-bit read */
229 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
230#endif
208} 231}
209 232
210static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) 233static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
211{ 234{
212 pte->val |= (addr & VTD_PAGE_MASK); 235 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
213} 236}
214 237
215static inline bool dma_pte_present(struct dma_pte *pte) 238static inline bool dma_pte_present(struct dma_pte *pte)
@@ -217,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte)
217 return (pte->val & 3) != 0; 240 return (pte->val & 3) != 0;
218} 241}
219 242
243static inline int first_pte_in_page(struct dma_pte *pte)
244{
245 return !((unsigned long)pte & ~VTD_PAGE_MASK);
246}
247
220/* 248/*
221 * This domain is a statically identity mapping domain. 249 * This domain is a statically identity mapping domain.
222 * 1. This domain creats a static 1:1 mapping to all usable memory. 250 * 1. This domain creats a static 1:1 mapping to all usable memory.
@@ -244,7 +272,6 @@ struct dmar_domain {
244 struct iova_domain iovad; /* iova's that belong to this domain */ 272 struct iova_domain iovad; /* iova's that belong to this domain */
245 273
246 struct dma_pte *pgd; /* virtual address */ 274 struct dma_pte *pgd; /* virtual address */
247 spinlock_t mapping_lock; /* page table lock */
248 int gaw; /* max guest address width */ 275 int gaw; /* max guest address width */
249 276
250 /* adjusted guest address width, 0 is level 2 30-bit */ 277 /* adjusted guest address width, 0 is level 2 30-bit */
@@ -648,80 +675,78 @@ static inline int width_to_agaw(int width)
648 675
649static inline unsigned int level_to_offset_bits(int level) 676static inline unsigned int level_to_offset_bits(int level)
650{ 677{
651 return (12 + (level - 1) * LEVEL_STRIDE); 678 return (level - 1) * LEVEL_STRIDE;
652} 679}
653 680
654static inline int address_level_offset(u64 addr, int level) 681static inline int pfn_level_offset(unsigned long pfn, int level)
655{ 682{
656 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); 683 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
657} 684}
658 685
659static inline u64 level_mask(int level) 686static inline unsigned long level_mask(int level)
660{ 687{
661 return ((u64)-1 << level_to_offset_bits(level)); 688 return -1UL << level_to_offset_bits(level);
662} 689}
663 690
664static inline u64 level_size(int level) 691static inline unsigned long level_size(int level)
665{ 692{
666 return ((u64)1 << level_to_offset_bits(level)); 693 return 1UL << level_to_offset_bits(level);
667} 694}
668 695
669static inline u64 align_to_level(u64 addr, int level) 696static inline unsigned long align_to_level(unsigned long pfn, int level)
670{ 697{
671 return ((addr + level_size(level) - 1) & level_mask(level)); 698 return (pfn + level_size(level) - 1) & level_mask(level);
672} 699}
673 700
674static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) 701static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
702 unsigned long pfn)
675{ 703{
676 int addr_width = agaw_to_width(domain->agaw); 704 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
677 struct dma_pte *parent, *pte = NULL; 705 struct dma_pte *parent, *pte = NULL;
678 int level = agaw_to_level(domain->agaw); 706 int level = agaw_to_level(domain->agaw);
679 int offset; 707 int offset;
680 unsigned long flags;
681 708
682 BUG_ON(!domain->pgd); 709 BUG_ON(!domain->pgd);
683 710 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
684 addr &= (((u64)1) << addr_width) - 1;
685 parent = domain->pgd; 711 parent = domain->pgd;
686 712
687 spin_lock_irqsave(&domain->mapping_lock, flags);
688 while (level > 0) { 713 while (level > 0) {
689 void *tmp_page; 714 void *tmp_page;
690 715
691 offset = address_level_offset(addr, level); 716 offset = pfn_level_offset(pfn, level);
692 pte = &parent[offset]; 717 pte = &parent[offset];
693 if (level == 1) 718 if (level == 1)
694 break; 719 break;
695 720
696 if (!dma_pte_present(pte)) { 721 if (!dma_pte_present(pte)) {
722 uint64_t pteval;
723
697 tmp_page = alloc_pgtable_page(); 724 tmp_page = alloc_pgtable_page();
698 725
699 if (!tmp_page) { 726 if (!tmp_page)
700 spin_unlock_irqrestore(&domain->mapping_lock,
701 flags);
702 return NULL; 727 return NULL;
728
729 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
730 pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
731 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
732 /* Someone else set it while we were thinking; use theirs. */
733 free_pgtable_page(tmp_page);
734 } else {
735 dma_pte_addr(pte);
736 domain_flush_cache(domain, pte, sizeof(*pte));
703 } 737 }
704 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
705 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
706 /*
707 * high level table always sets r/w, last level page
708 * table control read/write
709 */
710 dma_set_pte_readable(pte);
711 dma_set_pte_writable(pte);
712 domain_flush_cache(domain, pte, sizeof(*pte));
713 } 738 }
714 parent = phys_to_virt(dma_pte_addr(pte)); 739 parent = phys_to_virt(dma_pte_addr(pte));
715 level--; 740 level--;
716 } 741 }
717 742
718 spin_unlock_irqrestore(&domain->mapping_lock, flags);
719 return pte; 743 return pte;
720} 744}
721 745
722/* return address's pte at specific level */ 746/* return address's pte at specific level */
723static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, 747static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
724 int level) 748 unsigned long pfn,
749 int level)
725{ 750{
726 struct dma_pte *parent, *pte = NULL; 751 struct dma_pte *parent, *pte = NULL;
727 int total = agaw_to_level(domain->agaw); 752 int total = agaw_to_level(domain->agaw);
@@ -729,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
729 754
730 parent = domain->pgd; 755 parent = domain->pgd;
731 while (level <= total) { 756 while (level <= total) {
732 offset = address_level_offset(addr, total); 757 offset = pfn_level_offset(pfn, total);
733 pte = &parent[offset]; 758 pte = &parent[offset];
734 if (level == total) 759 if (level == total)
735 return pte; 760 return pte;
@@ -742,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
742 return NULL; 767 return NULL;
743} 768}
744 769
745/* clear one page's page table */
746static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
747{
748 struct dma_pte *pte = NULL;
749
750 /* get last level pte */
751 pte = dma_addr_level_pte(domain, addr, 1);
752
753 if (pte) {
754 dma_clear_pte(pte);
755 domain_flush_cache(domain, pte, sizeof(*pte));
756 }
757}
758
759/* clear last level pte, a tlb flush should be followed */ 770/* clear last level pte, a tlb flush should be followed */
760static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) 771static void dma_pte_clear_range(struct dmar_domain *domain,
772 unsigned long start_pfn,
773 unsigned long last_pfn)
761{ 774{
762 int addr_width = agaw_to_width(domain->agaw); 775 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
763 int npages; 776 struct dma_pte *first_pte, *pte;
777
778 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
779 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
764 780
765 start &= (((u64)1) << addr_width) - 1; 781 /* we don't need lock here; nobody else touches the iova range */
766 end &= (((u64)1) << addr_width) - 1; 782 while (start_pfn <= last_pfn) {
767 /* in case it's partial page */ 783 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
768 start &= PAGE_MASK; 784 if (!pte) {
769 end = PAGE_ALIGN(end); 785 start_pfn = align_to_level(start_pfn + 1, 2);
770 npages = (end - start) / VTD_PAGE_SIZE; 786 continue;
787 }
788 do {
789 dma_clear_pte(pte);
790 start_pfn++;
791 pte++;
792 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
771 793
772 /* we don't need lock here, nobody else touches the iova range */ 794 domain_flush_cache(domain, first_pte,
773 while (npages--) { 795 (void *)pte - (void *)first_pte);
774 dma_pte_clear_one(domain, start);
775 start += VTD_PAGE_SIZE;
776 } 796 }
777} 797}
778 798
779/* free page table pages. last level pte should already be cleared */ 799/* free page table pages. last level pte should already be cleared */
780static void dma_pte_free_pagetable(struct dmar_domain *domain, 800static void dma_pte_free_pagetable(struct dmar_domain *domain,
781 u64 start, u64 end) 801 unsigned long start_pfn,
802 unsigned long last_pfn)
782{ 803{
783 int addr_width = agaw_to_width(domain->agaw); 804 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
784 struct dma_pte *pte; 805 struct dma_pte *first_pte, *pte;
785 int total = agaw_to_level(domain->agaw); 806 int total = agaw_to_level(domain->agaw);
786 int level; 807 int level;
787 u64 tmp; 808 unsigned long tmp;
788 809
789 start &= (((u64)1) << addr_width) - 1; 810 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
790 end &= (((u64)1) << addr_width) - 1; 811 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
791 812
792 /* we don't need lock here, nobody else touches the iova range */ 813 /* We don't need lock here; nobody else touches the iova range */
793 level = 2; 814 level = 2;
794 while (level <= total) { 815 while (level <= total) {
795 tmp = align_to_level(start, level); 816 tmp = align_to_level(start_pfn, level);
796 if (tmp >= end || (tmp + level_size(level) > end)) 817
818 /* If we can't even clear one PTE at this level, we're done */
819 if (tmp + level_size(level) - 1 > last_pfn)
797 return; 820 return;
798 821
799 while (tmp < end) { 822 while (tmp + level_size(level) - 1 <= last_pfn) {
800 pte = dma_addr_level_pte(domain, tmp, level); 823 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
801 if (pte) { 824 if (!pte) {
802 free_pgtable_page( 825 tmp = align_to_level(tmp + 1, level + 1);
803 phys_to_virt(dma_pte_addr(pte))); 826 continue;
804 dma_clear_pte(pte);
805 domain_flush_cache(domain, pte, sizeof(*pte));
806 } 827 }
807 tmp += level_size(level); 828 do {
829 if (dma_pte_present(pte)) {
830 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
831 dma_clear_pte(pte);
832 }
833 pte++;
834 tmp += level_size(level);
835 } while (!first_pte_in_page(pte) &&
836 tmp + level_size(level) - 1 <= last_pfn);
837
838 domain_flush_cache(domain, first_pte,
839 (void *)pte - (void *)first_pte);
840
808 } 841 }
809 level++; 842 level++;
810 } 843 }
811 /* free pgd */ 844 /* free pgd */
812 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { 845 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
813 free_pgtable_page(domain->pgd); 846 free_pgtable_page(domain->pgd);
814 domain->pgd = NULL; 847 domain->pgd = NULL;
815 } 848 }
@@ -1035,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1035} 1068}
1036 1069
1037static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1070static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1038 u64 addr, unsigned int pages) 1071 unsigned long pfn, unsigned int pages)
1039{ 1072{
1040 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1073 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1074 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1041 1075
1042 BUG_ON(addr & (~VTD_PAGE_MASK));
1043 BUG_ON(pages == 0); 1076 BUG_ON(pages == 0);
1044 1077
1045 /* 1078 /*
@@ -1054,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1054 else 1087 else
1055 iommu->flush.flush_iotlb(iommu, did, addr, mask, 1088 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1056 DMA_TLB_PSI_FLUSH); 1089 DMA_TLB_PSI_FLUSH);
1057 if (did) 1090
1091 /*
1092 * In caching mode, domain ID 0 is reserved for non-present to present
1093 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1094 */
1095 if (!cap_caching_mode(iommu->cap) || did)
1058 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1096 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1059} 1097}
1060 1098
@@ -1279,7 +1317,6 @@ static void dmar_init_reserved_ranges(void)
1279 struct pci_dev *pdev = NULL; 1317 struct pci_dev *pdev = NULL;
1280 struct iova *iova; 1318 struct iova *iova;
1281 int i; 1319 int i;
1282 u64 addr, size;
1283 1320
1284 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); 1321 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1285 1322
@@ -1302,12 +1339,9 @@ static void dmar_init_reserved_ranges(void)
1302 r = &pdev->resource[i]; 1339 r = &pdev->resource[i];
1303 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1340 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1304 continue; 1341 continue;
1305 addr = r->start; 1342 iova = reserve_iova(&reserved_iova_list,
1306 addr &= PHYSICAL_PAGE_MASK; 1343 IOVA_PFN(r->start),
1307 size = r->end - addr; 1344 IOVA_PFN(r->end));
1308 size = PAGE_ALIGN(size);
1309 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1310 IOVA_PFN(size + addr) - 1);
1311 if (!iova) 1345 if (!iova)
1312 printk(KERN_ERR "Reserve iova failed\n"); 1346 printk(KERN_ERR "Reserve iova failed\n");
1313 } 1347 }
@@ -1341,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
1341 unsigned long sagaw; 1375 unsigned long sagaw;
1342 1376
1343 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 1377 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1344 spin_lock_init(&domain->mapping_lock);
1345 spin_lock_init(&domain->iommu_lock); 1378 spin_lock_init(&domain->iommu_lock);
1346 1379
1347 domain_reserve_special_ranges(domain); 1380 domain_reserve_special_ranges(domain);
@@ -1388,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain)
1388{ 1421{
1389 struct dmar_drhd_unit *drhd; 1422 struct dmar_drhd_unit *drhd;
1390 struct intel_iommu *iommu; 1423 struct intel_iommu *iommu;
1391 u64 end;
1392 1424
1393 /* Domain 0 is reserved, so dont process it */ 1425 /* Domain 0 is reserved, so dont process it */
1394 if (!domain) 1426 if (!domain)
@@ -1397,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain)
1397 domain_remove_dev_info(domain); 1429 domain_remove_dev_info(domain);
1398 /* destroy iovas */ 1430 /* destroy iovas */
1399 put_iova_domain(&domain->iovad); 1431 put_iova_domain(&domain->iovad);
1400 end = DOMAIN_MAX_ADDR(domain->gaw);
1401 end = end & (~PAGE_MASK);
1402 1432
1403 /* clear ptes */ 1433 /* clear ptes */
1404 dma_pte_clear_range(domain, 0, end); 1434 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1405 1435
1406 /* free page tables */ 1436 /* free page tables */
1407 dma_pte_free_pagetable(domain, 0, end); 1437 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1408 1438
1409 for_each_active_iommu(iommu, drhd) 1439 for_each_active_iommu(iommu, drhd)
1410 if (test_bit(iommu->seq_id, &domain->iommu_bmp)) 1440 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
@@ -1618,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev)
1618 tmp->devfn); 1648 tmp->devfn);
1619} 1649}
1620 1650
1621static int 1651static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1622domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, 1652 struct scatterlist *sg, unsigned long phys_pfn,
1623 u64 hpa, size_t size, int prot) 1653 unsigned long nr_pages, int prot)
1624{ 1654{
1625 u64 start_pfn, end_pfn; 1655 struct dma_pte *first_pte = NULL, *pte = NULL;
1626 struct dma_pte *pte; 1656 phys_addr_t uninitialized_var(pteval);
1627 int index; 1657 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1628 int addr_width = agaw_to_width(domain->agaw); 1658 unsigned long sg_res;
1629 1659
1630 hpa &= (((u64)1) << addr_width) - 1; 1660 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1631 1661
1632 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 1662 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1633 return -EINVAL; 1663 return -EINVAL;
1634 iova &= PAGE_MASK; 1664
1635 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; 1665 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1636 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; 1666
1637 index = 0; 1667 if (sg)
1638 while (start_pfn < end_pfn) { 1668 sg_res = 0;
1639 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); 1669 else {
1640 if (!pte) 1670 sg_res = nr_pages + 1;
1641 return -ENOMEM; 1671 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1672 }
1673
1674 while (nr_pages--) {
1675 uint64_t tmp;
1676
1677 if (!sg_res) {
1678 sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT;
1679 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1680 sg->dma_length = sg->length;
1681 pteval = page_to_phys(sg_page(sg)) | prot;
1682 }
1683 if (!pte) {
1684 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1685 if (!pte)
1686 return -ENOMEM;
1687 }
1642 /* We don't need lock here, nobody else 1688 /* We don't need lock here, nobody else
1643 * touches the iova range 1689 * touches the iova range
1644 */ 1690 */
1645 BUG_ON(dma_pte_addr(pte)); 1691 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1646 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); 1692 if (tmp) {
1647 dma_set_pte_prot(pte, prot); 1693 static int dumps = 5;
1648 if (prot & DMA_PTE_SNP) 1694 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1649 dma_set_pte_snp(pte); 1695 iov_pfn, tmp, (unsigned long long)pteval);
1650 domain_flush_cache(domain, pte, sizeof(*pte)); 1696 if (dumps) {
1651 start_pfn++; 1697 dumps--;
1652 index++; 1698 debug_dma_dump_mappings(NULL);
1699 }
1700 WARN_ON(1);
1701 }
1702 pte++;
1703 if (!nr_pages || first_pte_in_page(pte)) {
1704 domain_flush_cache(domain, first_pte,
1705 (void *)pte - (void *)first_pte);
1706 pte = NULL;
1707 }
1708 iov_pfn++;
1709 pteval += VTD_PAGE_SIZE;
1710 sg_res--;
1711 if (!sg_res)
1712 sg = sg_next(sg);
1653 } 1713 }
1654 return 0; 1714 return 0;
1655} 1715}
1656 1716
1717static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1718 struct scatterlist *sg, unsigned long nr_pages,
1719 int prot)
1720{
1721 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1722}
1723
1724static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1725 unsigned long phys_pfn, unsigned long nr_pages,
1726 int prot)
1727{
1728 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1729}
1730
1657static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 1731static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1658{ 1732{
1659 if (!iommu) 1733 if (!iommu)
@@ -1844,58 +1918,61 @@ error:
1844 1918
1845static int iommu_identity_mapping; 1919static int iommu_identity_mapping;
1846 1920
1921static int iommu_domain_identity_map(struct dmar_domain *domain,
1922 unsigned long long start,
1923 unsigned long long end)
1924{
1925 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1926 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1927
1928 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1929 dma_to_mm_pfn(last_vpfn))) {
1930 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1931 return -ENOMEM;
1932 }
1933
1934 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1935 start, end, domain->id);
1936 /*
1937 * RMRR range might have overlap with physical memory range,
1938 * clear it first
1939 */
1940 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1941
1942 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1943 last_vpfn - first_vpfn + 1,
1944 DMA_PTE_READ|DMA_PTE_WRITE);
1945}
1946
1847static int iommu_prepare_identity_map(struct pci_dev *pdev, 1947static int iommu_prepare_identity_map(struct pci_dev *pdev,
1848 unsigned long long start, 1948 unsigned long long start,
1849 unsigned long long end) 1949 unsigned long long end)
1850{ 1950{
1851 struct dmar_domain *domain; 1951 struct dmar_domain *domain;
1852 unsigned long size;
1853 unsigned long long base;
1854 int ret; 1952 int ret;
1855 1953
1856 printk(KERN_INFO 1954 printk(KERN_INFO
1857 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 1955 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1858 pci_name(pdev), start, end); 1956 pci_name(pdev), start, end);
1859 if (iommu_identity_mapping) 1957
1860 domain = si_domain; 1958 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1861 else
1862 /* page table init */
1863 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1864 if (!domain) 1959 if (!domain)
1865 return -ENOMEM; 1960 return -ENOMEM;
1866 1961
1867 /* The address might not be aligned */ 1962 ret = iommu_domain_identity_map(domain, start, end);
1868 base = start & PAGE_MASK;
1869 size = end - base;
1870 size = PAGE_ALIGN(size);
1871 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1872 IOVA_PFN(base + size) - 1)) {
1873 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1874 ret = -ENOMEM;
1875 goto error;
1876 }
1877
1878 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1879 size, base, pci_name(pdev));
1880 /*
1881 * RMRR range might have overlap with physical memory range,
1882 * clear it first
1883 */
1884 dma_pte_clear_range(domain, base, base + size);
1885
1886 ret = domain_page_mapping(domain, base, base, size,
1887 DMA_PTE_READ|DMA_PTE_WRITE);
1888 if (ret) 1963 if (ret)
1889 goto error; 1964 goto error;
1890 1965
1891 /* context entry init */ 1966 /* context entry init */
1892 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); 1967 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
1893 if (!ret) 1968 if (ret)
1894 return 0; 1969 goto error;
1895error: 1970
1971 return 0;
1972
1973 error:
1896 domain_exit(domain); 1974 domain_exit(domain);
1897 return ret; 1975 return ret;
1898
1899} 1976}
1900 1977
1901static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 1978static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
@@ -1907,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1907 rmrr->end_address + 1); 1984 rmrr->end_address + 1);
1908} 1985}
1909 1986
1910struct iommu_prepare_data {
1911 struct pci_dev *pdev;
1912 int ret;
1913};
1914
1915static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1916 unsigned long end_pfn, void *datax)
1917{
1918 struct iommu_prepare_data *data;
1919
1920 data = (struct iommu_prepare_data *)datax;
1921
1922 data->ret = iommu_prepare_identity_map(data->pdev,
1923 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1924 return data->ret;
1925
1926}
1927
1928static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1929{
1930 int nid;
1931 struct iommu_prepare_data data;
1932
1933 data.pdev = pdev;
1934 data.ret = 0;
1935
1936 for_each_online_node(nid) {
1937 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1938 if (data.ret)
1939 return data.ret;
1940 }
1941 return data.ret;
1942}
1943
1944#ifdef CONFIG_DMAR_GFX_WA
1945static void __init iommu_prepare_gfx_mapping(void)
1946{
1947 struct pci_dev *pdev = NULL;
1948 int ret;
1949
1950 for_each_pci_dev(pdev) {
1951 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1952 !IS_GFX_DEVICE(pdev))
1953 continue;
1954 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1955 pci_name(pdev));
1956 ret = iommu_prepare_with_active_regions(pdev);
1957 if (ret)
1958 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1959 }
1960}
1961#else /* !CONFIG_DMAR_GFX_WA */
1962static inline void iommu_prepare_gfx_mapping(void)
1963{
1964 return;
1965}
1966#endif
1967
1968#ifdef CONFIG_DMAR_FLOPPY_WA 1987#ifdef CONFIG_DMAR_FLOPPY_WA
1969static inline void iommu_prepare_isa(void) 1988static inline void iommu_prepare_isa(void)
1970{ 1989{
@@ -1975,12 +1994,12 @@ static inline void iommu_prepare_isa(void)
1975 if (!pdev) 1994 if (!pdev)
1976 return; 1995 return;
1977 1996
1978 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); 1997 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
1979 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); 1998 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1980 1999
1981 if (ret) 2000 if (ret)
1982 printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " 2001 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
1983 "floppy might not work\n"); 2002 "floppy might not work\n");
1984 2003
1985} 2004}
1986#else 2005#else
@@ -2008,16 +2027,30 @@ static int __init init_context_pass_through(void)
2008} 2027}
2009 2028
2010static int md_domain_init(struct dmar_domain *domain, int guest_width); 2029static int md_domain_init(struct dmar_domain *domain, int guest_width);
2030
2031static int __init si_domain_work_fn(unsigned long start_pfn,
2032 unsigned long end_pfn, void *datax)
2033{
2034 int *ret = datax;
2035
2036 *ret = iommu_domain_identity_map(si_domain,
2037 (uint64_t)start_pfn << PAGE_SHIFT,
2038 (uint64_t)end_pfn << PAGE_SHIFT);
2039 return *ret;
2040
2041}
2042
2011static int si_domain_init(void) 2043static int si_domain_init(void)
2012{ 2044{
2013 struct dmar_drhd_unit *drhd; 2045 struct dmar_drhd_unit *drhd;
2014 struct intel_iommu *iommu; 2046 struct intel_iommu *iommu;
2015 int ret = 0; 2047 int nid, ret = 0;
2016 2048
2017 si_domain = alloc_domain(); 2049 si_domain = alloc_domain();
2018 if (!si_domain) 2050 if (!si_domain)
2019 return -EFAULT; 2051 return -EFAULT;
2020 2052
2053 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2021 2054
2022 for_each_active_iommu(iommu, drhd) { 2055 for_each_active_iommu(iommu, drhd) {
2023 ret = iommu_attach_domain(si_domain, iommu); 2056 ret = iommu_attach_domain(si_domain, iommu);
@@ -2034,6 +2067,12 @@ static int si_domain_init(void)
2034 2067
2035 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; 2068 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2036 2069
2070 for_each_online_node(nid) {
2071 work_with_active_regions(nid, si_domain_work_fn, &ret);
2072 if (ret)
2073 return ret;
2074 }
2075
2037 return 0; 2076 return 0;
2038} 2077}
2039 2078
@@ -2087,13 +2126,14 @@ static int iommu_prepare_static_identity_mapping(void)
2087 if (ret) 2126 if (ret)
2088 return -EFAULT; 2127 return -EFAULT;
2089 2128
2090 printk(KERN_INFO "IOMMU: Setting identity map:\n");
2091 for_each_pci_dev(pdev) { 2129 for_each_pci_dev(pdev) {
2092 ret = iommu_prepare_with_active_regions(pdev); 2130 printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
2093 if (ret) { 2131 pci_name(pdev));
2094 printk(KERN_INFO "1:1 mapping to one domain failed.\n"); 2132
2095 return -EFAULT; 2133 ret = domain_context_mapping(si_domain, pdev,
2096 } 2134 CONTEXT_TT_MULTI_LEVEL);
2135 if (ret)
2136 return ret;
2097 ret = domain_add_dev_info(si_domain, pdev); 2137 ret = domain_add_dev_info(si_domain, pdev);
2098 if (ret) 2138 if (ret)
2099 return ret; 2139 return ret;
@@ -2284,8 +2324,6 @@ int __init init_dmars(void)
2284 } 2324 }
2285 } 2325 }
2286 2326
2287 iommu_prepare_gfx_mapping();
2288
2289 iommu_prepare_isa(); 2327 iommu_prepare_isa();
2290 } 2328 }
2291 2329
@@ -2330,50 +2368,40 @@ error:
2330 return ret; 2368 return ret;
2331} 2369}
2332 2370
2333static inline u64 aligned_size(u64 host_addr, size_t size) 2371static inline unsigned long aligned_nrpages(unsigned long host_addr,
2334{ 2372 size_t size)
2335 u64 addr;
2336 addr = (host_addr & (~PAGE_MASK)) + size;
2337 return PAGE_ALIGN(addr);
2338}
2339
2340struct iova *
2341iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2342{ 2373{
2343 struct iova *piova; 2374 host_addr &= ~PAGE_MASK;
2375 host_addr += size + PAGE_SIZE - 1;
2344 2376
2345 /* Make sure it's in range */ 2377 return host_addr >> VTD_PAGE_SHIFT;
2346 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2347 if (!size || (IOVA_START_ADDR + size > end))
2348 return NULL;
2349
2350 piova = alloc_iova(&domain->iovad,
2351 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2352 return piova;
2353} 2378}
2354 2379
2355static struct iova * 2380static struct iova *intel_alloc_iova(struct device *dev,
2356__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, 2381 struct dmar_domain *domain,
2357 size_t size, u64 dma_mask) 2382 unsigned long nrpages, uint64_t dma_mask)
2358{ 2383{
2359 struct pci_dev *pdev = to_pci_dev(dev); 2384 struct pci_dev *pdev = to_pci_dev(dev);
2360 struct iova *iova = NULL; 2385 struct iova *iova = NULL;
2361 2386
2362 if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) 2387 /* Restrict dma_mask to the width that the iommu can handle */
2363 iova = iommu_alloc_iova(domain, size, dma_mask); 2388 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2364 else { 2389
2390 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2365 /* 2391 /*
2366 * First try to allocate an io virtual address in 2392 * First try to allocate an io virtual address in
2367 * DMA_BIT_MASK(32) and if that fails then try allocating 2393 * DMA_BIT_MASK(32) and if that fails then try allocating
2368 * from higher range 2394 * from higher range
2369 */ 2395 */
2370 iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); 2396 iova = alloc_iova(&domain->iovad, nrpages,
2371 if (!iova) 2397 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2372 iova = iommu_alloc_iova(domain, size, dma_mask); 2398 if (iova)
2373 } 2399 return iova;
2374 2400 }
2375 if (!iova) { 2401 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2376 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); 2402 if (unlikely(!iova)) {
2403 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2404 nrpages, pci_name(pdev));
2377 return NULL; 2405 return NULL;
2378 } 2406 }
2379 2407
@@ -2476,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2476 return 0; 2504 return 0;
2477 2505
2478 iommu = domain_get_iommu(domain); 2506 iommu = domain_get_iommu(domain);
2479 size = aligned_size((u64)paddr, size); 2507 size = aligned_nrpages(paddr, size);
2480 2508
2481 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2509 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2482 if (!iova) 2510 if (!iova)
2483 goto error; 2511 goto error;
2484 2512
2485 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2486
2487 /* 2513 /*
2488 * Check if DMAR supports zero-length reads on write only 2514 * Check if DMAR supports zero-length reads on write only
2489 * mappings.. 2515 * mappings..
@@ -2499,20 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2499 * might have two guest_addr mapping to the same host paddr, but this 2525 * might have two guest_addr mapping to the same host paddr, but this
2500 * is not a big problem 2526 * is not a big problem
2501 */ 2527 */
2502 ret = domain_page_mapping(domain, start_paddr, 2528 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2503 ((u64)paddr) & PHYSICAL_PAGE_MASK, 2529 paddr >> VTD_PAGE_SHIFT, size, prot);
2504 size, prot);
2505 if (ret) 2530 if (ret)
2506 goto error; 2531 goto error;
2507 2532
2508 /* it's a non-present to present mapping. Only flush if caching mode */ 2533 /* it's a non-present to present mapping. Only flush if caching mode */
2509 if (cap_caching_mode(iommu->cap)) 2534 if (cap_caching_mode(iommu->cap))
2510 iommu_flush_iotlb_psi(iommu, 0, start_paddr, 2535 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2511 size >> VTD_PAGE_SHIFT);
2512 else 2536 else
2513 iommu_flush_write_buffer(iommu); 2537 iommu_flush_write_buffer(iommu);
2514 2538
2515 return start_paddr + ((u64)paddr & (~PAGE_MASK)); 2539 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2540 start_paddr += paddr & ~PAGE_MASK;
2541 return start_paddr;
2516 2542
2517error: 2543error:
2518 if (iova) 2544 if (iova)
@@ -2605,7 +2631,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2605{ 2631{
2606 struct pci_dev *pdev = to_pci_dev(dev); 2632 struct pci_dev *pdev = to_pci_dev(dev);
2607 struct dmar_domain *domain; 2633 struct dmar_domain *domain;
2608 unsigned long start_addr; 2634 unsigned long start_pfn, last_pfn;
2609 struct iova *iova; 2635 struct iova *iova;
2610 struct intel_iommu *iommu; 2636 struct intel_iommu *iommu;
2611 2637
@@ -2618,22 +2644,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2618 iommu = domain_get_iommu(domain); 2644 iommu = domain_get_iommu(domain);
2619 2645
2620 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 2646 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2621 if (!iova) 2647 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2648 (unsigned long long)dev_addr))
2622 return; 2649 return;
2623 2650
2624 start_addr = iova->pfn_lo << PAGE_SHIFT; 2651 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2625 size = aligned_size((u64)dev_addr, size); 2652 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2626 2653
2627 pr_debug("Device %s unmapping: %zx@%llx\n", 2654 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2628 pci_name(pdev), size, (unsigned long long)start_addr); 2655 pci_name(pdev), start_pfn, last_pfn);
2629 2656
2630 /* clear the whole page */ 2657 /* clear the whole page */
2631 dma_pte_clear_range(domain, start_addr, start_addr + size); 2658 dma_pte_clear_range(domain, start_pfn, last_pfn);
2659
2632 /* free page tables */ 2660 /* free page tables */
2633 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2661 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2662
2634 if (intel_iommu_strict) { 2663 if (intel_iommu_strict) {
2635 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2664 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2636 size >> VTD_PAGE_SHIFT); 2665 last_pfn - start_pfn + 1);
2637 /* free iova */ 2666 /* free iova */
2638 __free_iova(&domain->iovad, iova); 2667 __free_iova(&domain->iovad, iova);
2639 } else { 2668 } else {
@@ -2691,14 +2720,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2691 int nelems, enum dma_data_direction dir, 2720 int nelems, enum dma_data_direction dir,
2692 struct dma_attrs *attrs) 2721 struct dma_attrs *attrs)
2693{ 2722{
2694 int i;
2695 struct pci_dev *pdev = to_pci_dev(hwdev); 2723 struct pci_dev *pdev = to_pci_dev(hwdev);
2696 struct dmar_domain *domain; 2724 struct dmar_domain *domain;
2697 unsigned long start_addr; 2725 unsigned long start_pfn, last_pfn;
2698 struct iova *iova; 2726 struct iova *iova;
2699 size_t size = 0;
2700 phys_addr_t addr;
2701 struct scatterlist *sg;
2702 struct intel_iommu *iommu; 2727 struct intel_iommu *iommu;
2703 2728
2704 if (iommu_no_mapping(pdev)) 2729 if (iommu_no_mapping(pdev))
@@ -2710,22 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2710 iommu = domain_get_iommu(domain); 2735 iommu = domain_get_iommu(domain);
2711 2736
2712 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); 2737 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2713 if (!iova) 2738 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2739 (unsigned long long)sglist[0].dma_address))
2714 return; 2740 return;
2715 for_each_sg(sglist, sg, nelems, i) {
2716 addr = page_to_phys(sg_page(sg)) + sg->offset;
2717 size += aligned_size((u64)addr, sg->length);
2718 }
2719 2741
2720 start_addr = iova->pfn_lo << PAGE_SHIFT; 2742 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2743 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2721 2744
2722 /* clear the whole page */ 2745 /* clear the whole page */
2723 dma_pte_clear_range(domain, start_addr, start_addr + size); 2746 dma_pte_clear_range(domain, start_pfn, last_pfn);
2747
2724 /* free page tables */ 2748 /* free page tables */
2725 dma_pte_free_pagetable(domain, start_addr, start_addr + size); 2749 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2726 2750
2727 iommu_flush_iotlb_psi(iommu, domain->id, start_addr, 2751 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2728 size >> VTD_PAGE_SHIFT); 2752 (last_pfn - start_pfn + 1));
2729 2753
2730 /* free iova */ 2754 /* free iova */
2731 __free_iova(&domain->iovad, iova); 2755 __free_iova(&domain->iovad, iova);
@@ -2748,17 +2772,16 @@ static int intel_nontranslate_map_sg(struct device *hddev,
2748static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, 2772static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2749 enum dma_data_direction dir, struct dma_attrs *attrs) 2773 enum dma_data_direction dir, struct dma_attrs *attrs)
2750{ 2774{
2751 phys_addr_t addr;
2752 int i; 2775 int i;
2753 struct pci_dev *pdev = to_pci_dev(hwdev); 2776 struct pci_dev *pdev = to_pci_dev(hwdev);
2754 struct dmar_domain *domain; 2777 struct dmar_domain *domain;
2755 size_t size = 0; 2778 size_t size = 0;
2756 int prot = 0; 2779 int prot = 0;
2757 size_t offset = 0; 2780 size_t offset_pfn = 0;
2758 struct iova *iova = NULL; 2781 struct iova *iova = NULL;
2759 int ret; 2782 int ret;
2760 struct scatterlist *sg; 2783 struct scatterlist *sg;
2761 unsigned long start_addr; 2784 unsigned long start_vpfn;
2762 struct intel_iommu *iommu; 2785 struct intel_iommu *iommu;
2763 2786
2764 BUG_ON(dir == DMA_NONE); 2787 BUG_ON(dir == DMA_NONE);
@@ -2771,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2771 2794
2772 iommu = domain_get_iommu(domain); 2795 iommu = domain_get_iommu(domain);
2773 2796
2774 for_each_sg(sglist, sg, nelems, i) { 2797 for_each_sg(sglist, sg, nelems, i)
2775 addr = page_to_phys(sg_page(sg)) + sg->offset; 2798 size += aligned_nrpages(sg->offset, sg->length);
2776 size += aligned_size((u64)addr, sg->length);
2777 }
2778 2799
2779 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); 2800 iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2780 if (!iova) { 2801 if (!iova) {
2781 sglist->dma_length = 0; 2802 sglist->dma_length = 0;
2782 return 0; 2803 return 0;
@@ -2792,35 +2813,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
2792 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 2813 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2793 prot |= DMA_PTE_WRITE; 2814 prot |= DMA_PTE_WRITE;
2794 2815
2795 start_addr = iova->pfn_lo << PAGE_SHIFT; 2816 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2796 offset = 0; 2817
2797 for_each_sg(sglist, sg, nelems, i) { 2818 ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot);
2798 addr = page_to_phys(sg_page(sg)) + sg->offset; 2819 if (unlikely(ret)) {
2799 size = aligned_size((u64)addr, sg->length); 2820 /* clear the page */
2800 ret = domain_page_mapping(domain, start_addr + offset, 2821 dma_pte_clear_range(domain, start_vpfn,
2801 ((u64)addr) & PHYSICAL_PAGE_MASK, 2822 start_vpfn + size - 1);
2802 size, prot); 2823 /* free page tables */
2803 if (ret) { 2824 dma_pte_free_pagetable(domain, start_vpfn,
2804 /* clear the page */ 2825 start_vpfn + size - 1);
2805 dma_pte_clear_range(domain, start_addr, 2826 /* free iova */
2806 start_addr + offset); 2827 __free_iova(&domain->iovad, iova);
2807 /* free page tables */ 2828 return 0;
2808 dma_pte_free_pagetable(domain, start_addr,
2809 start_addr + offset);
2810 /* free iova */
2811 __free_iova(&domain->iovad, iova);
2812 return 0;
2813 }
2814 sg->dma_address = start_addr + offset +
2815 ((u64)addr & (~PAGE_MASK));
2816 sg->dma_length = sg->length;
2817 offset += size;
2818 } 2829 }
2819 2830
2820 /* it's a non-present to present mapping. Only flush if caching mode */ 2831 /* it's a non-present to present mapping. Only flush if caching mode */
2821 if (cap_caching_mode(iommu->cap)) 2832 if (cap_caching_mode(iommu->cap))
2822 iommu_flush_iotlb_psi(iommu, 0, start_addr, 2833 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2823 offset >> VTD_PAGE_SHIFT);
2824 else 2834 else
2825 iommu_flush_write_buffer(iommu); 2835 iommu_flush_write_buffer(iommu);
2826 2836
@@ -3325,7 +3335,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
3325 int adjust_width; 3335 int adjust_width;
3326 3336
3327 init_iova_domain(&domain->iovad, DMA_32BIT_PFN); 3337 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3328 spin_lock_init(&domain->mapping_lock);
3329 spin_lock_init(&domain->iommu_lock); 3338 spin_lock_init(&domain->iommu_lock);
3330 3339
3331 domain_reserve_special_ranges(domain); 3340 domain_reserve_special_ranges(domain);
@@ -3379,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain)
3379 3388
3380static void vm_domain_exit(struct dmar_domain *domain) 3389static void vm_domain_exit(struct dmar_domain *domain)
3381{ 3390{
3382 u64 end;
3383
3384 /* Domain 0 is reserved, so dont process it */ 3391 /* Domain 0 is reserved, so dont process it */
3385 if (!domain) 3392 if (!domain)
3386 return; 3393 return;
@@ -3388,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain)
3388 vm_domain_remove_all_dev_info(domain); 3395 vm_domain_remove_all_dev_info(domain);
3389 /* destroy iovas */ 3396 /* destroy iovas */
3390 put_iova_domain(&domain->iovad); 3397 put_iova_domain(&domain->iovad);
3391 end = DOMAIN_MAX_ADDR(domain->gaw);
3392 end = end & (~VTD_PAGE_MASK);
3393 3398
3394 /* clear ptes */ 3399 /* clear ptes */
3395 dma_pte_clear_range(domain, 0, end); 3400 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3396 3401
3397 /* free page tables */ 3402 /* free page tables */
3398 dma_pte_free_pagetable(domain, 0, end); 3403 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3399 3404
3400 iommu_free_vm_domain(domain); 3405 iommu_free_vm_domain(domain);
3401 free_domain_mem(domain); 3406 free_domain_mem(domain);
@@ -3504,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3504 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 3509 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3505 prot |= DMA_PTE_SNP; 3510 prot |= DMA_PTE_SNP;
3506 3511
3507 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); 3512 max_addr = iova + size;
3508 if (dmar_domain->max_addr < max_addr) { 3513 if (dmar_domain->max_addr < max_addr) {
3509 int min_agaw; 3514 int min_agaw;
3510 u64 end; 3515 u64 end;
@@ -3522,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
3522 } 3527 }
3523 dmar_domain->max_addr = max_addr; 3528 dmar_domain->max_addr = max_addr;
3524 } 3529 }
3525 3530 /* Round up size to next multiple of PAGE_SIZE, if it and
3526 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); 3531 the low bits of hpa would take us onto the next page */
3532 size = aligned_nrpages(hpa, size);
3533 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3534 hpa >> VTD_PAGE_SHIFT, size, prot);
3527 return ret; 3535 return ret;
3528} 3536}
3529 3537
@@ -3531,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain,
3531 unsigned long iova, size_t size) 3539 unsigned long iova, size_t size)
3532{ 3540{
3533 struct dmar_domain *dmar_domain = domain->priv; 3541 struct dmar_domain *dmar_domain = domain->priv;
3534 dma_addr_t base;
3535 3542
3536 /* The address might not be aligned */ 3543 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3537 base = iova & VTD_PAGE_MASK; 3544 (iova + size - 1) >> VTD_PAGE_SHIFT);
3538 size = VTD_PAGE_ALIGN(size);
3539 dma_pte_clear_range(dmar_domain, base, base + size);
3540 3545
3541 if (dmar_domain->max_addr == base + size) 3546 if (dmar_domain->max_addr == iova + size)
3542 dmar_domain->max_addr = base; 3547 dmar_domain->max_addr = iova;
3543} 3548}
3544 3549
3545static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 3550static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -3549,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3549 struct dma_pte *pte; 3554 struct dma_pte *pte;
3550 u64 phys = 0; 3555 u64 phys = 0;
3551 3556
3552 pte = addr_to_dma_pte(dmar_domain, iova); 3557 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3553 if (pte) 3558 if (pte)
3554 phys = dma_pte_addr(pte); 3559 phys = dma_pte_addr(pte);
3555 3560
diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
index 74369a3f963b..c399f485aa7d 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/inet.h> 14#include <linux/inet.h>
15#include <linux/crypto.h> 15#include <linux/crypto.h>
16#include <linux/if_vlan.h>
16#include <net/dst.h> 17#include <net/dst.h>
17#include <net/tcp.h> 18#include <net/tcp.h>
18#include <scsi/scsi_cmnd.h> 19#include <scsi/scsi_cmnd.h>
@@ -184,6 +185,9 @@ static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev)
184 struct cxgb3i_adapter *snic; 185 struct cxgb3i_adapter *snic;
185 int i; 186 int i;
186 187
188 if (ndev->priv_flags & IFF_802_1Q_VLAN)
189 ndev = vlan_dev_real_dev(ndev);
190
187 read_lock(&cxgb3i_snic_rwlock); 191 read_lock(&cxgb3i_snic_rwlock);
188 list_for_each_entry(snic, &cxgb3i_snic_list, list_head) { 192 list_for_each_entry(snic, &cxgb3i_snic_list, list_head) {
189 for (i = 0; i < snic->hba_cnt; i++) { 193 for (i = 0; i < snic->hba_cnt; i++) {
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index a84072865fc2..2c266c01dc5a 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -473,16 +473,16 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
473 * limitation for the device. Try 40-bit first, and 473 * limitation for the device. Try 40-bit first, and
474 * fail to 32-bit. 474 * fail to 32-bit.
475 */ 475 */
476 err = pci_set_dma_mask(pdev, DMA_40BIT_MASK); 476 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
477 if (err) { 477 if (err) {
478 err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); 478 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
479 if (err) { 479 if (err) {
480 shost_printk(KERN_ERR, fnic->lport->host, 480 shost_printk(KERN_ERR, fnic->lport->host,
481 "No usable DMA configuration " 481 "No usable DMA configuration "
482 "aborting\n"); 482 "aborting\n");
483 goto err_out_release_regions; 483 goto err_out_release_regions;
484 } 484 }
485 err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); 485 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
486 if (err) { 486 if (err) {
487 shost_printk(KERN_ERR, fnic->lport->host, 487 shost_printk(KERN_ERR, fnic->lport->host,
488 "Unable to obtain 32-bit DMA " 488 "Unable to obtain 32-bit DMA "
@@ -490,7 +490,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
490 goto err_out_release_regions; 490 goto err_out_release_regions;
491 } 491 }
492 } else { 492 } else {
493 err = pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK); 493 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
494 if (err) { 494 if (err) {
495 shost_printk(KERN_ERR, fnic->lport->host, 495 shost_printk(KERN_ERR, fnic->lport->host,
496 "Unable to obtain 40-bit DMA " 496 "Unable to obtain 40-bit DMA "
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index eabf36502856..bfc996971b81 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -245,7 +245,7 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
245 struct vnic_wq_copy *wq, 245 struct vnic_wq_copy *wq,
246 struct fnic_io_req *io_req, 246 struct fnic_io_req *io_req,
247 struct scsi_cmnd *sc, 247 struct scsi_cmnd *sc,
248 u32 sg_count) 248 int sg_count)
249{ 249{
250 struct scatterlist *sg; 250 struct scatterlist *sg;
251 struct fc_rport *rport = starget_to_rport(scsi_target(sc->device)); 251 struct fc_rport *rport = starget_to_rport(scsi_target(sc->device));
@@ -260,9 +260,6 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic,
260 char msg[2]; 260 char msg[2];
261 261
262 if (sg_count) { 262 if (sg_count) {
263 BUG_ON(sg_count < 0);
264 BUG_ON(sg_count > FNIC_MAX_SG_DESC_CNT);
265
266 /* For each SGE, create a device desc entry */ 263 /* For each SGE, create a device desc entry */
267 desc = io_req->sgl_list; 264 desc = io_req->sgl_list;
268 for_each_sg(scsi_sglist(sc), sg, sg_count, i) { 265 for_each_sg(scsi_sglist(sc), sg, sg_count, i) {
@@ -344,7 +341,7 @@ int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
344 struct fnic *fnic; 341 struct fnic *fnic;
345 struct vnic_wq_copy *wq; 342 struct vnic_wq_copy *wq;
346 int ret; 343 int ret;
347 u32 sg_count; 344 int sg_count;
348 unsigned long flags; 345 unsigned long flags;
349 unsigned long ptr; 346 unsigned long ptr;
350 347
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 869a11bdccbd..9928704e235f 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1095,9 +1095,14 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct)
1095 MAX_INDIRECT_BUFS); 1095 MAX_INDIRECT_BUFS);
1096 hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS; 1096 hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
1097 } 1097 }
1098
1099 if (hostdata->madapter_info.os_type == 3) {
1100 enable_fast_fail(hostdata);
1101 return;
1102 }
1098 } 1103 }
1099 1104
1100 enable_fast_fail(hostdata); 1105 send_srp_login(hostdata);
1101} 1106}
1102 1107
1103/** 1108/**
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 2eee9e6e4fe8..292c02f810d0 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3670,13 +3670,14 @@ static void
3670fc_bsg_goose_queue(struct fc_rport *rport) 3670fc_bsg_goose_queue(struct fc_rport *rport)
3671{ 3671{
3672 int flagset; 3672 int flagset;
3673 unsigned long flags;
3673 3674
3674 if (!rport->rqst_q) 3675 if (!rport->rqst_q)
3675 return; 3676 return;
3676 3677
3677 get_device(&rport->dev); 3678 get_device(&rport->dev);
3678 3679
3679 spin_lock(rport->rqst_q->queue_lock); 3680 spin_lock_irqsave(rport->rqst_q->queue_lock, flags);
3680 flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) && 3681 flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) &&
3681 !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); 3682 !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
3682 if (flagset) 3683 if (flagset)
@@ -3684,7 +3685,7 @@ fc_bsg_goose_queue(struct fc_rport *rport)
3684 __blk_run_queue(rport->rqst_q); 3685 __blk_run_queue(rport->rqst_q);
3685 if (flagset) 3686 if (flagset)
3686 queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); 3687 queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
3687 spin_unlock(rport->rqst_q->queue_lock); 3688 spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
3688 3689
3689 put_device(&rport->dev); 3690 put_device(&rport->dev);
3690} 3691}
diff --git a/drivers/scsi/zalon.c b/drivers/scsi/zalon.c
index 97f3158fa7b5..27e84e4b1fa9 100644
--- a/drivers/scsi/zalon.c
+++ b/drivers/scsi/zalon.c
@@ -134,7 +134,7 @@ zalon_probe(struct parisc_device *dev)
134 134
135 host = ncr_attach(&zalon7xx_template, unit, &device); 135 host = ncr_attach(&zalon7xx_template, unit, &device);
136 if (!host) 136 if (!host)
137 goto fail; 137 return -ENODEV;
138 138
139 if (request_irq(dev->irq, ncr53c8xx_intr, IRQF_SHARED, "zalon", host)) { 139 if (request_irq(dev->irq, ncr53c8xx_intr, IRQF_SHARED, "zalon", host)) {
140 dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n ", 140 dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n ",
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index f10d2fbeda06..da983b720f08 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -17,6 +17,7 @@
17#include <linux/platform_device.h> 17#include <linux/platform_device.h>
18#include <linux/dma-mapping.h> 18#include <linux/dma-mapping.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/vmalloc.h>
20#include <video/sh_mobile_lcdc.h> 21#include <video/sh_mobile_lcdc.h>
21#include <asm/atomic.h> 22#include <asm/atomic.h>
22 23
@@ -33,6 +34,7 @@ struct sh_mobile_lcdc_chan {
33 struct fb_info info; 34 struct fb_info info;
34 dma_addr_t dma_handle; 35 dma_addr_t dma_handle;
35 struct fb_deferred_io defio; 36 struct fb_deferred_io defio;
37 struct scatterlist *sglist;
36 unsigned long frame_end; 38 unsigned long frame_end;
37 wait_queue_head_t frame_end_wait; 39 wait_queue_head_t frame_end_wait;
38}; 40};
@@ -206,16 +208,38 @@ static void sh_mobile_lcdc_clk_on(struct sh_mobile_lcdc_priv *priv) {}
206static void sh_mobile_lcdc_clk_off(struct sh_mobile_lcdc_priv *priv) {} 208static void sh_mobile_lcdc_clk_off(struct sh_mobile_lcdc_priv *priv) {}
207#endif 209#endif
208 210
211static int sh_mobile_lcdc_sginit(struct fb_info *info,
212 struct list_head *pagelist)
213{
214 struct sh_mobile_lcdc_chan *ch = info->par;
215 unsigned int nr_pages_max = info->fix.smem_len >> PAGE_SHIFT;
216 struct page *page;
217 int nr_pages = 0;
218
219 sg_init_table(ch->sglist, nr_pages_max);
220
221 list_for_each_entry(page, pagelist, lru)
222 sg_set_page(&ch->sglist[nr_pages++], page, PAGE_SIZE, 0);
223
224 return nr_pages;
225}
226
209static void sh_mobile_lcdc_deferred_io(struct fb_info *info, 227static void sh_mobile_lcdc_deferred_io(struct fb_info *info,
210 struct list_head *pagelist) 228 struct list_head *pagelist)
211{ 229{
212 struct sh_mobile_lcdc_chan *ch = info->par; 230 struct sh_mobile_lcdc_chan *ch = info->par;
231 unsigned int nr_pages;
213 232
214 /* enable clocks before accessing hardware */ 233 /* enable clocks before accessing hardware */
215 sh_mobile_lcdc_clk_on(ch->lcdc); 234 sh_mobile_lcdc_clk_on(ch->lcdc);
216 235
236 nr_pages = sh_mobile_lcdc_sginit(info, pagelist);
237 dma_map_sg(info->dev, ch->sglist, nr_pages, DMA_TO_DEVICE);
238
217 /* trigger panel update */ 239 /* trigger panel update */
218 lcdc_write_chan(ch, LDSM2R, 1); 240 lcdc_write_chan(ch, LDSM2R, 1);
241
242 dma_unmap_sg(info->dev, ch->sglist, nr_pages, DMA_TO_DEVICE);
219} 243}
220 244
221static void sh_mobile_lcdc_deferred_io_touch(struct fb_info *info) 245static void sh_mobile_lcdc_deferred_io_touch(struct fb_info *info)
@@ -846,21 +870,31 @@ static int __init sh_mobile_lcdc_probe(struct platform_device *pdev)
846 } 870 }
847 871
848 for (i = 0; i < j; i++) { 872 for (i = 0; i < j; i++) {
849 error = register_framebuffer(&priv->ch[i].info); 873 struct sh_mobile_lcdc_chan *ch = priv->ch + i;
874
875 info = &ch->info;
876
877 if (info->fbdefio) {
878 priv->ch->sglist = vmalloc(sizeof(struct scatterlist) *
879 info->fix.smem_len >> PAGE_SHIFT);
880 if (!priv->ch->sglist) {
881 dev_err(&pdev->dev, "cannot allocate sglist\n");
882 goto err1;
883 }
884 }
885
886 error = register_framebuffer(info);
850 if (error < 0) 887 if (error < 0)
851 goto err1; 888 goto err1;
852 }
853 889
854 for (i = 0; i < j; i++) {
855 info = &priv->ch[i].info;
856 dev_info(info->dev, 890 dev_info(info->dev,
857 "registered %s/%s as %dx%d %dbpp.\n", 891 "registered %s/%s as %dx%d %dbpp.\n",
858 pdev->name, 892 pdev->name,
859 (priv->ch[i].cfg.chan == LCDC_CHAN_MAINLCD) ? 893 (ch->cfg.chan == LCDC_CHAN_MAINLCD) ?
860 "mainlcd" : "sublcd", 894 "mainlcd" : "sublcd",
861 (int) priv->ch[i].cfg.lcd_cfg.xres, 895 (int) ch->cfg.lcd_cfg.xres,
862 (int) priv->ch[i].cfg.lcd_cfg.yres, 896 (int) ch->cfg.lcd_cfg.yres,
863 priv->ch[i].cfg.bpp); 897 ch->cfg.bpp);
864 898
865 /* deferred io mode: disable clock to save power */ 899 /* deferred io mode: disable clock to save power */
866 if (info->fbdefio) 900 if (info->fbdefio)
@@ -892,6 +926,9 @@ static int sh_mobile_lcdc_remove(struct platform_device *pdev)
892 if (!info->device) 926 if (!info->device)
893 continue; 927 continue;
894 928
929 if (priv->ch[i].sglist)
930 vfree(priv->ch[i].sglist);
931
895 dma_free_coherent(&pdev->dev, info->fix.smem_len, 932 dma_free_coherent(&pdev->dev, info->fix.smem_len,
896 info->screen_base, priv->ch[i].dma_handle); 933 info->screen_base, priv->ch[i].dma_handle);
897 fb_dealloc_cmap(&info->cmap); 934 fb_dealloc_cmap(&info->cmap);
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7f88628a1a72..6e4f6c50a120 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
299 "btrfs-%s-%d", workers->name, 299 "btrfs-%s-%d", workers->name,
300 workers->num_workers + i); 300 workers->num_workers + i);
301 if (IS_ERR(worker->task)) { 301 if (IS_ERR(worker->task)) {
302 kfree(worker);
303 ret = PTR_ERR(worker->task); 302 ret = PTR_ERR(worker->task);
303 kfree(worker);
304 goto fail; 304 goto fail;
305 } 305 }
306 306
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2779c2f5360a..98a873838717 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2074,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2074int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2074int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2075int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2075int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2076int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2076int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2077int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 2077int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
2078 *root);
2079int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 2078int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2080 struct btrfs_root *root, 2079 struct btrfs_root *root,
2081 struct extent_buffer *node, 2080 struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d208c5ce..a5aca3997d42 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
990 return type; 990 return type;
991} 991}
992 992
993static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) 993static int find_next_key(struct btrfs_path *path, int level,
994 struct btrfs_key *key)
994 995
995{ 996{
996 int level; 997 for (; level < BTRFS_MAX_LEVEL; level++) {
997 BUG_ON(!path->keep_locks);
998 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
999 if (!path->nodes[level]) 998 if (!path->nodes[level])
1000 break; 999 break;
1001 btrfs_assert_tree_locked(path->nodes[level]);
1002 if (path->slots[level] + 1 >= 1000 if (path->slots[level] + 1 >=
1003 btrfs_header_nritems(path->nodes[level])) 1001 btrfs_header_nritems(path->nodes[level]))
1004 continue; 1002 continue;
@@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1158 * For simplicity, we just do not add new inline back 1156 * For simplicity, we just do not add new inline back
1159 * ref if there is any kind of item for this block 1157 * ref if there is any kind of item for this block
1160 */ 1158 */
1161 if (find_next_key(path, &key) == 0 && key.objectid == bytenr && 1159 if (find_next_key(path, 0, &key) == 0 &&
1160 key.objectid == bytenr &&
1162 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1161 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1163 err = -EAGAIN; 1162 err = -EAGAIN;
1164 goto out; 1163 goto out;
@@ -2697,7 +2696,7 @@ again:
2697 2696
2698 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2697 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
2699 ", %llu bytes_used, %llu bytes_reserved, " 2698 ", %llu bytes_used, %llu bytes_reserved, "
2700 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 2699 "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
2701 "%llu total\n", (unsigned long long)bytes, 2700 "%llu total\n", (unsigned long long)bytes,
2702 (unsigned long long)data_sinfo->bytes_delalloc, 2701 (unsigned long long)data_sinfo->bytes_delalloc,
2703 (unsigned long long)data_sinfo->bytes_used, 2702 (unsigned long long)data_sinfo->bytes_used,
@@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4128 return buf; 4127 return buf;
4129} 4128}
4130 4129
4130#if 0
4131int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4131int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4132 struct btrfs_root *root, struct extent_buffer *leaf) 4132 struct btrfs_root *root, struct extent_buffer *leaf)
4133{ 4133{
@@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4171 return 0; 4171 return 0;
4172} 4172}
4173 4173
4174#if 0
4175
4176static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4174static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
4177 struct btrfs_root *root, 4175 struct btrfs_root *root,
4178 struct btrfs_leaf_ref *ref) 4176 struct btrfs_leaf_ref *ref)
@@ -4553,262 +4551,471 @@ out:
4553} 4551}
4554#endif 4552#endif
4555 4553
4554struct walk_control {
4555 u64 refs[BTRFS_MAX_LEVEL];
4556 u64 flags[BTRFS_MAX_LEVEL];
4557 struct btrfs_key update_progress;
4558 int stage;
4559 int level;
4560 int shared_level;
4561 int update_ref;
4562 int keep_locks;
4563};
4564
4565#define DROP_REFERENCE 1
4566#define UPDATE_BACKREF 2
4567
4556/* 4568/*
4557 * helper function for drop_subtree, this function is similar to 4569 * hepler to process tree block while walking down the tree.
4558 * walk_down_tree. The main difference is that it checks reference 4570 *
4559 * counts while tree blocks are locked. 4571 * when wc->stage == DROP_REFERENCE, this function checks
4572 * reference count of the block. if the block is shared and
4573 * we need update back refs for the subtree rooted at the
4574 * block, this function changes wc->stage to UPDATE_BACKREF
4575 *
4576 * when wc->stage == UPDATE_BACKREF, this function updates
4577 * back refs for pointers in the block.
4578 *
4579 * NOTE: return value 1 means we should stop walking down.
4560 */ 4580 */
4561static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4581static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4562 struct btrfs_root *root, 4582 struct btrfs_root *root,
4563 struct btrfs_path *path, int *level) 4583 struct btrfs_path *path,
4584 struct walk_control *wc)
4564{ 4585{
4565 struct extent_buffer *next; 4586 int level = wc->level;
4566 struct extent_buffer *cur; 4587 struct extent_buffer *eb = path->nodes[level];
4567 struct extent_buffer *parent; 4588 struct btrfs_key key;
4568 u64 bytenr; 4589 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4569 u64 ptr_gen;
4570 u64 refs;
4571 u64 flags;
4572 u32 blocksize;
4573 int ret; 4590 int ret;
4574 4591
4575 cur = path->nodes[*level]; 4592 if (wc->stage == UPDATE_BACKREF &&
4576 ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, 4593 btrfs_header_owner(eb) != root->root_key.objectid)
4577 &refs, &flags); 4594 return 1;
4578 BUG_ON(ret);
4579 if (refs > 1)
4580 goto out;
4581 4595
4582 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4596 /*
4597 * when reference count of tree block is 1, it won't increase
4598 * again. once full backref flag is set, we never clear it.
4599 */
4600 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4601 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
4602 BUG_ON(!path->locks[level]);
4603 ret = btrfs_lookup_extent_info(trans, root,
4604 eb->start, eb->len,
4605 &wc->refs[level],
4606 &wc->flags[level]);
4607 BUG_ON(ret);
4608 BUG_ON(wc->refs[level] == 0);
4609 }
4583 4610
4584 while (*level >= 0) { 4611 if (wc->stage == DROP_REFERENCE &&
4585 cur = path->nodes[*level]; 4612 wc->update_ref && wc->refs[level] > 1) {
4586 if (*level == 0) { 4613 BUG_ON(eb == root->node);
4587 ret = btrfs_drop_leaf_ref(trans, root, cur); 4614 BUG_ON(path->slots[level] > 0);
4588 BUG_ON(ret); 4615 if (level == 0)
4589 clean_tree_block(trans, root, cur); 4616 btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
4590 break; 4617 else
4591 } 4618 btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
4592 if (path->slots[*level] >= btrfs_header_nritems(cur)) { 4619 if (btrfs_header_owner(eb) == root->root_key.objectid &&
4593 clean_tree_block(trans, root, cur); 4620 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
4594 break; 4621 wc->stage = UPDATE_BACKREF;
4622 wc->shared_level = level;
4595 } 4623 }
4624 }
4596 4625
4597 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4626 if (wc->stage == DROP_REFERENCE) {
4598 blocksize = btrfs_level_size(root, *level - 1); 4627 if (wc->refs[level] > 1)
4599 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4628 return 1;
4600 4629
4601 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4630 if (path->locks[level] && !wc->keep_locks) {
4602 btrfs_tree_lock(next); 4631 btrfs_tree_unlock(eb);
4603 btrfs_set_lock_blocking(next); 4632 path->locks[level] = 0;
4633 }
4634 return 0;
4635 }
4604 4636
4605 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 4637 /* wc->stage == UPDATE_BACKREF */
4606 &refs, &flags); 4638 if (!(wc->flags[level] & flag)) {
4639 BUG_ON(!path->locks[level]);
4640 ret = btrfs_inc_ref(trans, root, eb, 1);
4607 BUG_ON(ret); 4641 BUG_ON(ret);
4608 if (refs > 1) { 4642 ret = btrfs_dec_ref(trans, root, eb, 0);
4609 parent = path->nodes[*level]; 4643 BUG_ON(ret);
4610 ret = btrfs_free_extent(trans, root, bytenr, 4644 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
4611 blocksize, parent->start, 4645 eb->len, flag, 0);
4612 btrfs_header_owner(parent), 4646 BUG_ON(ret);
4613 *level - 1, 0); 4647 wc->flags[level] |= flag;
4648 }
4649
4650 /*
4651 * the block is shared by multiple trees, so it's not good to
4652 * keep the tree lock
4653 */
4654 if (path->locks[level] && level > 0) {
4655 btrfs_tree_unlock(eb);
4656 path->locks[level] = 0;
4657 }
4658 return 0;
4659}
4660
4661/*
4662 * hepler to process tree block while walking up the tree.
4663 *
4664 * when wc->stage == DROP_REFERENCE, this function drops
4665 * reference count on the block.
4666 *
4667 * when wc->stage == UPDATE_BACKREF, this function changes
4668 * wc->stage back to DROP_REFERENCE if we changed wc->stage
4669 * to UPDATE_BACKREF previously while processing the block.
4670 *
4671 * NOTE: return value 1 means we should stop walking up.
4672 */
4673static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4674 struct btrfs_root *root,
4675 struct btrfs_path *path,
4676 struct walk_control *wc)
4677{
4678 int ret = 0;
4679 int level = wc->level;
4680 struct extent_buffer *eb = path->nodes[level];
4681 u64 parent = 0;
4682
4683 if (wc->stage == UPDATE_BACKREF) {
4684 BUG_ON(wc->shared_level < level);
4685 if (level < wc->shared_level)
4686 goto out;
4687
4688 BUG_ON(wc->refs[level] <= 1);
4689 ret = find_next_key(path, level + 1, &wc->update_progress);
4690 if (ret > 0)
4691 wc->update_ref = 0;
4692
4693 wc->stage = DROP_REFERENCE;
4694 wc->shared_level = -1;
4695 path->slots[level] = 0;
4696
4697 /*
4698 * check reference count again if the block isn't locked.
4699 * we should start walking down the tree again if reference
4700 * count is one.
4701 */
4702 if (!path->locks[level]) {
4703 BUG_ON(level == 0);
4704 btrfs_tree_lock(eb);
4705 btrfs_set_lock_blocking(eb);
4706 path->locks[level] = 1;
4707
4708 ret = btrfs_lookup_extent_info(trans, root,
4709 eb->start, eb->len,
4710 &wc->refs[level],
4711 &wc->flags[level]);
4614 BUG_ON(ret); 4712 BUG_ON(ret);
4615 path->slots[*level]++; 4713 BUG_ON(wc->refs[level] == 0);
4616 btrfs_tree_unlock(next); 4714 if (wc->refs[level] == 1) {
4617 free_extent_buffer(next); 4715 btrfs_tree_unlock(eb);
4618 continue; 4716 path->locks[level] = 0;
4717 return 1;
4718 }
4719 } else {
4720 BUG_ON(level != 0);
4619 } 4721 }
4722 }
4620 4723
4621 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4724 /* wc->stage == DROP_REFERENCE */
4725 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
4622 4726
4623 *level = btrfs_header_level(next); 4727 if (wc->refs[level] == 1) {
4624 path->nodes[*level] = next; 4728 if (level == 0) {
4625 path->slots[*level] = 0; 4729 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4626 path->locks[*level] = 1; 4730 ret = btrfs_dec_ref(trans, root, eb, 1);
4627 cond_resched(); 4731 else
4732 ret = btrfs_dec_ref(trans, root, eb, 0);
4733 BUG_ON(ret);
4734 }
4735 /* make block locked assertion in clean_tree_block happy */
4736 if (!path->locks[level] &&
4737 btrfs_header_generation(eb) == trans->transid) {
4738 btrfs_tree_lock(eb);
4739 btrfs_set_lock_blocking(eb);
4740 path->locks[level] = 1;
4741 }
4742 clean_tree_block(trans, root, eb);
4743 }
4744
4745 if (eb == root->node) {
4746 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4747 parent = eb->start;
4748 else
4749 BUG_ON(root->root_key.objectid !=
4750 btrfs_header_owner(eb));
4751 } else {
4752 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4753 parent = path->nodes[level + 1]->start;
4754 else
4755 BUG_ON(root->root_key.objectid !=
4756 btrfs_header_owner(path->nodes[level + 1]));
4628 } 4757 }
4629out:
4630 if (path->nodes[*level] == root->node)
4631 parent = path->nodes[*level];
4632 else
4633 parent = path->nodes[*level + 1];
4634 bytenr = path->nodes[*level]->start;
4635 blocksize = path->nodes[*level]->len;
4636 4758
4637 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, 4759 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
4638 btrfs_header_owner(parent), *level, 0); 4760 root->root_key.objectid, level, 0);
4639 BUG_ON(ret); 4761 BUG_ON(ret);
4762out:
4763 wc->refs[level] = 0;
4764 wc->flags[level] = 0;
4765 return ret;
4766}
4767
4768static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4769 struct btrfs_root *root,
4770 struct btrfs_path *path,
4771 struct walk_control *wc)
4772{
4773 struct extent_buffer *next;
4774 struct extent_buffer *cur;
4775 u64 bytenr;
4776 u64 ptr_gen;
4777 u32 blocksize;
4778 int level = wc->level;
4779 int ret;
4780
4781 while (level >= 0) {
4782 cur = path->nodes[level];
4783 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
4640 4784
4641 if (path->locks[*level]) { 4785 ret = walk_down_proc(trans, root, path, wc);
4642 btrfs_tree_unlock(path->nodes[*level]); 4786 if (ret > 0)
4643 path->locks[*level] = 0; 4787 break;
4788
4789 if (level == 0)
4790 break;
4791
4792 bytenr = btrfs_node_blockptr(cur, path->slots[level]);
4793 blocksize = btrfs_level_size(root, level - 1);
4794 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
4795
4796 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
4797 btrfs_tree_lock(next);
4798 btrfs_set_lock_blocking(next);
4799
4800 level--;
4801 BUG_ON(level != btrfs_header_level(next));
4802 path->nodes[level] = next;
4803 path->slots[level] = 0;
4804 path->locks[level] = 1;
4805 wc->level = level;
4644 } 4806 }
4645 free_extent_buffer(path->nodes[*level]);
4646 path->nodes[*level] = NULL;
4647 *level += 1;
4648 cond_resched();
4649 return 0; 4807 return 0;
4650} 4808}
4651 4809
4652/*
4653 * helper for dropping snapshots. This walks back up the tree in the path
4654 * to find the first node higher up where we haven't yet gone through
4655 * all the slots
4656 */
4657static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 4810static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
4658 struct btrfs_root *root, 4811 struct btrfs_root *root,
4659 struct btrfs_path *path, 4812 struct btrfs_path *path,
4660 int *level, int max_level) 4813 struct walk_control *wc, int max_level)
4661{ 4814{
4662 struct btrfs_root_item *root_item = &root->root_item; 4815 int level = wc->level;
4663 int i;
4664 int slot;
4665 int ret; 4816 int ret;
4666 4817
4667 for (i = *level; i < max_level && path->nodes[i]; i++) { 4818 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
4668 slot = path->slots[i]; 4819 while (level < max_level && path->nodes[level]) {
4669 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 4820 wc->level = level;
4670 /* 4821 if (path->slots[level] + 1 <
4671 * there is more work to do in this level. 4822 btrfs_header_nritems(path->nodes[level])) {
4672 * Update the drop_progress marker to reflect 4823 path->slots[level]++;
4673 * the work we've done so far, and then bump
4674 * the slot number
4675 */
4676 path->slots[i]++;
4677 WARN_ON(*level == 0);
4678 if (max_level == BTRFS_MAX_LEVEL) {
4679 btrfs_node_key(path->nodes[i],
4680 &root_item->drop_progress,
4681 path->slots[i]);
4682 root_item->drop_level = i;
4683 }
4684 *level = i;
4685 return 0; 4824 return 0;
4686 } else { 4825 } else {
4687 struct extent_buffer *parent; 4826 ret = walk_up_proc(trans, root, path, wc);
4688 4827 if (ret > 0)
4689 /* 4828 return 0;
4690 * this whole node is done, free our reference
4691 * on it and go up one level
4692 */
4693 if (path->nodes[*level] == root->node)
4694 parent = path->nodes[*level];
4695 else
4696 parent = path->nodes[*level + 1];
4697 4829
4698 clean_tree_block(trans, root, path->nodes[i]); 4830 if (path->locks[level]) {
4699 ret = btrfs_free_extent(trans, root, 4831 btrfs_tree_unlock(path->nodes[level]);
4700 path->nodes[i]->start, 4832 path->locks[level] = 0;
4701 path->nodes[i]->len,
4702 parent->start,
4703 btrfs_header_owner(parent),
4704 *level, 0);
4705 BUG_ON(ret);
4706 if (path->locks[*level]) {
4707 btrfs_tree_unlock(path->nodes[i]);
4708 path->locks[i] = 0;
4709 } 4833 }
4710 free_extent_buffer(path->nodes[i]); 4834 free_extent_buffer(path->nodes[level]);
4711 path->nodes[i] = NULL; 4835 path->nodes[level] = NULL;
4712 *level = i + 1; 4836 level++;
4713 } 4837 }
4714 } 4838 }
4715 return 1; 4839 return 1;
4716} 4840}
4717 4841
4718/* 4842/*
4719 * drop the reference count on the tree rooted at 'snap'. This traverses 4843 * drop a subvolume tree.
4720 * the tree freeing any blocks that have a ref count of zero after being 4844 *
4721 * decremented. 4845 * this function traverses the tree freeing any blocks that only
4846 * referenced by the tree.
4847 *
4848 * when a shared tree block is found. this function decreases its
4849 * reference count by one. if update_ref is true, this function
4850 * also make sure backrefs for the shared block and all lower level
4851 * blocks are properly updated.
4722 */ 4852 */
4723int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 4853int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
4724 *root)
4725{ 4854{
4726 int ret = 0;
4727 int wret;
4728 int level;
4729 struct btrfs_path *path; 4855 struct btrfs_path *path;
4730 int update_count; 4856 struct btrfs_trans_handle *trans;
4857 struct btrfs_root *tree_root = root->fs_info->tree_root;
4731 struct btrfs_root_item *root_item = &root->root_item; 4858 struct btrfs_root_item *root_item = &root->root_item;
4859 struct walk_control *wc;
4860 struct btrfs_key key;
4861 int err = 0;
4862 int ret;
4863 int level;
4732 4864
4733 path = btrfs_alloc_path(); 4865 path = btrfs_alloc_path();
4734 BUG_ON(!path); 4866 BUG_ON(!path);
4735 4867
4736 level = btrfs_header_level(root->node); 4868 wc = kzalloc(sizeof(*wc), GFP_NOFS);
4869 BUG_ON(!wc);
4870
4871 trans = btrfs_start_transaction(tree_root, 1);
4872
4737 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4873 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4874 level = btrfs_header_level(root->node);
4738 path->nodes[level] = btrfs_lock_root_node(root); 4875 path->nodes[level] = btrfs_lock_root_node(root);
4739 btrfs_set_lock_blocking(path->nodes[level]); 4876 btrfs_set_lock_blocking(path->nodes[level]);
4740 path->slots[level] = 0; 4877 path->slots[level] = 0;
4741 path->locks[level] = 1; 4878 path->locks[level] = 1;
4879 memset(&wc->update_progress, 0,
4880 sizeof(wc->update_progress));
4742 } else { 4881 } else {
4743 struct btrfs_key key;
4744 struct btrfs_disk_key found_key;
4745 struct extent_buffer *node;
4746
4747 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 4882 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4883 memcpy(&wc->update_progress, &key,
4884 sizeof(wc->update_progress));
4885
4748 level = root_item->drop_level; 4886 level = root_item->drop_level;
4887 BUG_ON(level == 0);
4749 path->lowest_level = level; 4888 path->lowest_level = level;
4750 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4889 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4751 if (wret < 0) { 4890 path->lowest_level = 0;
4752 ret = wret; 4891 if (ret < 0) {
4892 err = ret;
4753 goto out; 4893 goto out;
4754 } 4894 }
4755 node = path->nodes[level]; 4895 btrfs_node_key_to_cpu(path->nodes[level], &key,
4756 btrfs_node_key(node, &found_key, path->slots[level]); 4896 path->slots[level]);
4757 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 4897 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
4758 sizeof(found_key))); 4898
4759 /* 4899 /*
4760 * unlock our path, this is safe because only this 4900 * unlock our path, this is safe because only this
4761 * function is allowed to delete this snapshot 4901 * function is allowed to delete this snapshot
4762 */ 4902 */
4763 btrfs_unlock_up_safe(path, 0); 4903 btrfs_unlock_up_safe(path, 0);
4904
4905 level = btrfs_header_level(root->node);
4906 while (1) {
4907 btrfs_tree_lock(path->nodes[level]);
4908 btrfs_set_lock_blocking(path->nodes[level]);
4909
4910 ret = btrfs_lookup_extent_info(trans, root,
4911 path->nodes[level]->start,
4912 path->nodes[level]->len,
4913 &wc->refs[level],
4914 &wc->flags[level]);
4915 BUG_ON(ret);
4916 BUG_ON(wc->refs[level] == 0);
4917
4918 if (level == root_item->drop_level)
4919 break;
4920
4921 btrfs_tree_unlock(path->nodes[level]);
4922 WARN_ON(wc->refs[level] != 1);
4923 level--;
4924 }
4764 } 4925 }
4926
4927 wc->level = level;
4928 wc->shared_level = -1;
4929 wc->stage = DROP_REFERENCE;
4930 wc->update_ref = update_ref;
4931 wc->keep_locks = 0;
4932
4765 while (1) { 4933 while (1) {
4766 unsigned long update; 4934 ret = walk_down_tree(trans, root, path, wc);
4767 wret = walk_down_tree(trans, root, path, &level); 4935 if (ret < 0) {
4768 if (wret > 0) 4936 err = ret;
4769 break; 4937 break;
4770 if (wret < 0) 4938 }
4771 ret = wret;
4772 4939
4773 wret = walk_up_tree(trans, root, path, &level, 4940 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
4774 BTRFS_MAX_LEVEL); 4941 if (ret < 0) {
4775 if (wret > 0) 4942 err = ret;
4776 break; 4943 break;
4777 if (wret < 0) 4944 }
4778 ret = wret; 4945
4779 if (trans->transaction->in_commit || 4946 if (ret > 0) {
4780 trans->transaction->delayed_refs.flushing) { 4947 BUG_ON(wc->stage != DROP_REFERENCE);
4781 ret = -EAGAIN;
4782 break; 4948 break;
4783 } 4949 }
4784 for (update_count = 0; update_count < 16; update_count++) { 4950
4951 if (wc->stage == DROP_REFERENCE) {
4952 level = wc->level;
4953 btrfs_node_key(path->nodes[level],
4954 &root_item->drop_progress,
4955 path->slots[level]);
4956 root_item->drop_level = level;
4957 }
4958
4959 BUG_ON(wc->level == 0);
4960 if (trans->transaction->in_commit ||
4961 trans->transaction->delayed_refs.flushing) {
4962 ret = btrfs_update_root(trans, tree_root,
4963 &root->root_key,
4964 root_item);
4965 BUG_ON(ret);
4966
4967 btrfs_end_transaction(trans, tree_root);
4968 trans = btrfs_start_transaction(tree_root, 1);
4969 } else {
4970 unsigned long update;
4785 update = trans->delayed_ref_updates; 4971 update = trans->delayed_ref_updates;
4786 trans->delayed_ref_updates = 0; 4972 trans->delayed_ref_updates = 0;
4787 if (update) 4973 if (update)
4788 btrfs_run_delayed_refs(trans, root, update); 4974 btrfs_run_delayed_refs(trans, tree_root,
4789 else 4975 update);
4790 break;
4791 } 4976 }
4792 } 4977 }
4978 btrfs_release_path(root, path);
4979 BUG_ON(err);
4980
4981 ret = btrfs_del_root(trans, tree_root, &root->root_key);
4982 BUG_ON(ret);
4983
4984 free_extent_buffer(root->node);
4985 free_extent_buffer(root->commit_root);
4986 kfree(root);
4793out: 4987out:
4988 btrfs_end_transaction(trans, tree_root);
4989 kfree(wc);
4794 btrfs_free_path(path); 4990 btrfs_free_path(path);
4795 return ret; 4991 return err;
4796} 4992}
4797 4993
4994/*
4995 * drop subtree rooted at tree block 'node'.
4996 *
4997 * NOTE: this function will unlock and release tree block 'node'
4998 */
4798int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 4999int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4799 struct btrfs_root *root, 5000 struct btrfs_root *root,
4800 struct extent_buffer *node, 5001 struct extent_buffer *node,
4801 struct extent_buffer *parent) 5002 struct extent_buffer *parent)
4802{ 5003{
4803 struct btrfs_path *path; 5004 struct btrfs_path *path;
5005 struct walk_control *wc;
4804 int level; 5006 int level;
4805 int parent_level; 5007 int parent_level;
4806 int ret = 0; 5008 int ret = 0;
4807 int wret; 5009 int wret;
4808 5010
5011 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5012
4809 path = btrfs_alloc_path(); 5013 path = btrfs_alloc_path();
4810 BUG_ON(!path); 5014 BUG_ON(!path);
4811 5015
5016 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5017 BUG_ON(!wc);
5018
4812 btrfs_assert_tree_locked(parent); 5019 btrfs_assert_tree_locked(parent);
4813 parent_level = btrfs_header_level(parent); 5020 parent_level = btrfs_header_level(parent);
4814 extent_buffer_get(parent); 5021 extent_buffer_get(parent);
@@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4817 5024
4818 btrfs_assert_tree_locked(node); 5025 btrfs_assert_tree_locked(node);
4819 level = btrfs_header_level(node); 5026 level = btrfs_header_level(node);
4820 extent_buffer_get(node);
4821 path->nodes[level] = node; 5027 path->nodes[level] = node;
4822 path->slots[level] = 0; 5028 path->slots[level] = 0;
5029 path->locks[level] = 1;
5030
5031 wc->refs[parent_level] = 1;
5032 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5033 wc->level = level;
5034 wc->shared_level = -1;
5035 wc->stage = DROP_REFERENCE;
5036 wc->update_ref = 0;
5037 wc->keep_locks = 1;
4823 5038
4824 while (1) { 5039 while (1) {
4825 wret = walk_down_tree(trans, root, path, &level); 5040 wret = walk_down_tree(trans, root, path, wc);
4826 if (wret < 0) 5041 if (wret < 0) {
4827 ret = wret; 5042 ret = wret;
4828 if (wret != 0)
4829 break; 5043 break;
5044 }
4830 5045
4831 wret = walk_up_tree(trans, root, path, &level, parent_level); 5046 wret = walk_up_tree(trans, root, path, wc, parent_level);
4832 if (wret < 0) 5047 if (wret < 0)
4833 ret = wret; 5048 ret = wret;
4834 if (wret != 0) 5049 if (wret != 0)
4835 break; 5050 break;
4836 } 5051 }
4837 5052
5053 kfree(wc);
4838 btrfs_free_path(path); 5054 btrfs_free_path(path);
4839 return ret; 5055 return ret;
4840} 5056}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 126477eaecf5..7c3cd248d8d6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
151 } 151 }
152 if (end_pos > isize) { 152 if (end_pos > isize) {
153 i_size_write(inode, end_pos); 153 i_size_write(inode, end_pos);
154 btrfs_update_inode(trans, root, inode); 154 /* we've only changed i_size in ram, and we haven't updated
155 * the disk i_size. There is no need to log the inode
156 * at this time.
157 */
155 } 158 }
156 err = btrfs_end_transaction(trans, root); 159 err = btrfs_end_transaction(trans, root);
157out_unlock: 160out_unlock:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dbe1aabf96cd..7ffa3d34ea19 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3580,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3580 owner = 1; 3580 owner = 1;
3581 BTRFS_I(inode)->block_group = 3581 BTRFS_I(inode)->block_group =
3582 btrfs_find_block_group(root, 0, alloc_hint, owner); 3582 btrfs_find_block_group(root, 0, alloc_hint, owner);
3583 if ((mode & S_IFREG)) {
3584 if (btrfs_test_opt(root, NODATASUM))
3585 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3586 if (btrfs_test_opt(root, NODATACOW))
3587 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3588 }
3589 3583
3590 key[0].objectid = objectid; 3584 key[0].objectid = objectid;
3591 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 3585 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -3640,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3640 3634
3641 btrfs_inherit_iflags(inode, dir); 3635 btrfs_inherit_iflags(inode, dir);
3642 3636
3637 if ((mode & S_IFREG)) {
3638 if (btrfs_test_opt(root, NODATASUM))
3639 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3640 if (btrfs_test_opt(root, NODATACOW))
3641 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3642 }
3643
3643 insert_inode_hash(inode); 3644 insert_inode_hash(inode);
3644 inode_tree_add(inode); 3645 inode_tree_add(inode);
3645 return inode; 3646 return inode;
@@ -5082,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5082 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5083 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5083 struct extent_map *em; 5084 struct extent_map *em;
5084 struct btrfs_trans_handle *trans; 5085 struct btrfs_trans_handle *trans;
5086 struct btrfs_root *root;
5085 int ret; 5087 int ret;
5086 5088
5087 alloc_start = offset & ~mask; 5089 alloc_start = offset & ~mask;
@@ -5100,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5100 goto out; 5102 goto out;
5101 } 5103 }
5102 5104
5105 root = BTRFS_I(inode)->root;
5106
5107 ret = btrfs_check_data_free_space(root, inode,
5108 alloc_end - alloc_start);
5109 if (ret)
5110 goto out;
5111
5103 locked_end = alloc_end - 1; 5112 locked_end = alloc_end - 1;
5104 while (1) { 5113 while (1) {
5105 struct btrfs_ordered_extent *ordered; 5114 struct btrfs_ordered_extent *ordered;
@@ -5107,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5107 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); 5116 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5108 if (!trans) { 5117 if (!trans) {
5109 ret = -EIO; 5118 ret = -EIO;
5110 goto out; 5119 goto out_free;
5111 } 5120 }
5112 5121
5113 /* the extent lock is ordered inside the running 5122 /* the extent lock is ordered inside the running
@@ -5168,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5168 GFP_NOFS); 5177 GFP_NOFS);
5169 5178
5170 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5179 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5180out_free:
5181 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5171out: 5182out:
5172 mutex_unlock(&inode->i_mutex); 5183 mutex_unlock(&inode->i_mutex);
5173 return ret; 5184 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index eff18f5b5362..9f4db848db10 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1028 struct btrfs_file_extent_item); 1028 struct btrfs_file_extent_item);
1029 comp = btrfs_file_extent_compression(leaf, extent); 1029 comp = btrfs_file_extent_compression(leaf, extent);
1030 type = btrfs_file_extent_type(leaf, extent); 1030 type = btrfs_file_extent_type(leaf, extent);
1031 if (type == BTRFS_FILE_EXTENT_REG) { 1031 if (type == BTRFS_FILE_EXTENT_REG ||
1032 type == BTRFS_FILE_EXTENT_PREALLOC) {
1032 disko = btrfs_file_extent_disk_bytenr(leaf, 1033 disko = btrfs_file_extent_disk_bytenr(leaf,
1033 extent); 1034 extent);
1034 diskl = btrfs_file_extent_disk_num_bytes(leaf, 1035 diskl = btrfs_file_extent_disk_num_bytes(leaf,
@@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1051 new_key.objectid = inode->i_ino; 1052 new_key.objectid = inode->i_ino;
1052 new_key.offset = key.offset + destoff - off; 1053 new_key.offset = key.offset + destoff - off;
1053 1054
1054 if (type == BTRFS_FILE_EXTENT_REG) { 1055 if (type == BTRFS_FILE_EXTENT_REG ||
1056 type == BTRFS_FILE_EXTENT_PREALLOC) {
1055 ret = btrfs_insert_empty_item(trans, root, path, 1057 ret = btrfs_insert_empty_item(trans, root, path,
1056 &new_key, size); 1058 &new_key, size);
1057 if (ret) 1059 if (ret)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b23dc209ae10..008397934778 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work)
1788 btrfs_end_transaction(trans, root); 1788 btrfs_end_transaction(trans, root);
1789 } 1789 }
1790 1790
1791 btrfs_drop_dead_root(reloc_root); 1791 btrfs_drop_snapshot(reloc_root, 0);
1792 1792
1793 if (atomic_dec_and_test(async->num_pending)) 1793 if (atomic_dec_and_test(async->num_pending))
1794 complete(async->done); 1794 complete(async->done);
@@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2075 2075
2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2077 BUG_ON(ret); 2077 BUG_ON(ret);
2078
2079 btrfs_tree_unlock(eb);
2080 free_extent_buffer(eb);
2081 } 2078 }
2082 if (!lowest) { 2079 if (!lowest) {
2083 btrfs_tree_unlock(upper->eb); 2080 btrfs_tree_unlock(upper->eb);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4e83457ea253..2dbf1c1f56ee 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
593 return 0; 593 return 0;
594} 594}
595 595
596#if 0
596/* 597/*
597 * when dropping snapshots, we generate a ton of delayed refs, and it makes 598 * when dropping snapshots, we generate a ton of delayed refs, and it makes
598 * sense not to join the transaction while it is trying to flush the current 599 * sense not to join the transaction while it is trying to flush the current
@@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root)
681 btrfs_btree_balance_dirty(tree_root, nr); 682 btrfs_btree_balance_dirty(tree_root, nr);
682 return ret; 683 return ret;
683} 684}
685#endif
684 686
685/* 687/*
686 * new snapshots need to be created at a very specific time in the 688 * new snapshots need to be created at a very specific time in the
@@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1081 while (!list_empty(&list)) { 1083 while (!list_empty(&list)) {
1082 root = list_entry(list.next, struct btrfs_root, root_list); 1084 root = list_entry(list.next, struct btrfs_root, root_list);
1083 list_del_init(&root->root_list); 1085 list_del_init(&root->root_list);
1084 btrfs_drop_dead_root(root); 1086 btrfs_drop_snapshot(root, 0);
1085 } 1087 }
1086 return 0; 1088 return 0;
1087} 1089}
diff --git a/mm/nommu.c b/mm/nommu.c
index bf0cc762a7d2..53cab10fece4 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -238,6 +238,27 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
238} 238}
239EXPORT_SYMBOL(get_user_pages); 239EXPORT_SYMBOL(get_user_pages);
240 240
241/**
242 * follow_pfn - look up PFN at a user virtual address
243 * @vma: memory mapping
244 * @address: user virtual address
245 * @pfn: location to store found PFN
246 *
247 * Only IO mappings and raw PFN mappings are allowed.
248 *
249 * Returns zero and the pfn at @pfn on success, -ve otherwise.
250 */
251int follow_pfn(struct vm_area_struct *vma, unsigned long address,
252 unsigned long *pfn)
253{
254 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
255 return -EINVAL;
256
257 *pfn = address >> PAGE_SHIFT;
258 return 0;
259}
260EXPORT_SYMBOL(follow_pfn);
261
241DEFINE_RWLOCK(vmlist_lock); 262DEFINE_RWLOCK(vmlist_lock);
242struct vm_struct *vmlist; 263struct vm_struct *vmlist;
243 264
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index ce394192c85a..8f729aedc1a3 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -19,6 +19,16 @@
19#define cpu_relax() asm volatile("" ::: "memory"); 19#define cpu_relax() asm volatile("" ::: "memory");
20#endif 20#endif
21 21
22#ifdef __sh__
23#include "../../arch/sh/include/asm/unistd.h"
24#if defined(__SH4A__) || defined(__SH5__)
25# define rmb() asm volatile("synco" ::: "memory")
26#else
27# define rmb() asm volatile("" ::: "memory")
28#endif
29#define cpu_relax() asm volatile("" ::: "memory")
30#endif
31
22#include <time.h> 32#include <time.h>
23#include <unistd.h> 33#include <unistd.h>
24#include <sys/types.h> 34#include <sys/types.h>