aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/sparc/Kconfig7
-rw-r--r--arch/sparc/include/asm/cpudata_32.h5
-rw-r--r--arch/sparc/include/asm/floppy_32.h40
-rw-r--r--arch/sparc/include/asm/io.h13
-rw-r--r--arch/sparc/include/asm/irq_32.h6
-rw-r--r--arch/sparc/include/asm/leon.h41
-rw-r--r--arch/sparc/include/asm/pcic.h12
-rw-r--r--arch/sparc/include/asm/pgtable_32.h6
-rw-r--r--arch/sparc/include/asm/pgtable_64.h3
-rw-r--r--arch/sparc/include/asm/setup.h12
-rw-r--r--arch/sparc/include/asm/smp_32.h37
-rw-r--r--arch/sparc/include/asm/smp_64.h4
-rw-r--r--arch/sparc/include/asm/spinlock_32.h1
-rw-r--r--arch/sparc/include/asm/system_32.h5
-rw-r--r--arch/sparc/include/asm/system_64.h4
-rw-r--r--arch/sparc/include/asm/winmacro.h9
-rw-r--r--arch/sparc/kernel/Makefile4
-rw-r--r--arch/sparc/kernel/cpu.c139
-rw-r--r--arch/sparc/kernel/cpumap.c4
-rw-r--r--arch/sparc/kernel/devices.c4
-rw-r--r--arch/sparc/kernel/ds.c14
-rw-r--r--arch/sparc/kernel/entry.S41
-rw-r--r--arch/sparc/kernel/head_32.S51
-rw-r--r--arch/sparc/kernel/ioport.c42
-rw-r--r--arch/sparc/kernel/irq.h51
-rw-r--r--arch/sparc/kernel/irq_32.c513
-rw-r--r--arch/sparc/kernel/irq_64.c6
-rw-r--r--arch/sparc/kernel/kernel.h5
-rw-r--r--arch/sparc/kernel/leon_kernel.c365
-rw-r--r--arch/sparc/kernel/leon_smp.c148
-rw-r--r--arch/sparc/kernel/mdesc.c2
-rw-r--r--arch/sparc/kernel/of_device_64.c3
-rw-r--r--arch/sparc/kernel/pci_msi.c3
-rw-r--r--arch/sparc/kernel/pcic.c83
-rw-r--r--arch/sparc/kernel/perf_event.c1
-rw-r--r--arch/sparc/kernel/process_32.c12
-rw-r--r--arch/sparc/kernel/prom_32.c1
-rw-r--r--arch/sparc/kernel/setup_32.c87
-rw-r--r--arch/sparc/kernel/setup_64.c78
-rw-r--r--arch/sparc/kernel/smp_32.c103
-rw-r--r--arch/sparc/kernel/smp_64.c58
-rw-r--r--arch/sparc/kernel/sun4c_irq.c150
-rw-r--r--arch/sparc/kernel/sun4d_irq.c494
-rw-r--r--arch/sparc/kernel/sun4d_smp.c93
-rw-r--r--arch/sparc/kernel/sun4m_irq.c179
-rw-r--r--arch/sparc/kernel/sun4m_smp.c51
-rw-r--r--arch/sparc/kernel/sysfs.c3
-rw-r--r--arch/sparc/kernel/time_32.c10
-rw-r--r--arch/sparc/kernel/us2e_cpufreq.c4
-rw-r--r--arch/sparc/kernel/us3_cpufreq.c4
-rw-r--r--arch/sparc/lib/Makefile1
-rw-r--r--arch/sparc/lib/rwsem_32.S204
-rw-r--r--arch/sparc/mm/init_64.c14
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--drivers/block/drbd/drbd_int.h1
-rw-r--r--drivers/dma/ioat/dma.c1
-rw-r--r--drivers/dma/ioat/dma_v2.c1
-rw-r--r--drivers/dma/ioat/dma_v3.c1
-rw-r--r--drivers/ide/ide-acpi.c4
-rw-r--r--drivers/ide/ide-floppy.c2
-rw-r--r--drivers/ide/ide-scan-pci.c2
-rw-r--r--drivers/ide/pmac.c4
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c1
-rw-r--r--drivers/md/bitmap.c10
-rw-r--r--drivers/md/md.c23
-rw-r--r--drivers/md/multipath.c60
-rw-r--r--drivers/md/multipath.h1
-rw-r--r--drivers/md/raid1.c506
-rw-r--r--drivers/md/raid1.h4
-rw-r--r--drivers/md/raid10.c424
-rw-r--r--drivers/md/raid5.c41
-rw-r--r--drivers/net/igb/igb_main.c2
-rw-r--r--fs/compat.c235
-rw-r--r--fs/exec.c125
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/log.c29
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/nilfs2/alloc.c12
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/btnode.c19
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c38
-rw-r--r--fs/nilfs2/cpfile.c24
-rw-r--r--fs/nilfs2/dat.c4
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c25
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/inode.c23
-rw-r--r--fs/nilfs2/ioctl.c61
-rw-r--r--fs/nilfs2/mdt.c8
-rw-r--r--fs/nilfs2/mdt.h9
-rw-r--r--fs/nilfs2/nilfs.h7
-rw-r--r--fs/nilfs2/page.c79
-rw-r--r--fs/nilfs2/page.h7
-rw-r--r--fs/nilfs2/recovery.c12
-rw-r--r--fs/nilfs2/segbuf.c17
-rw-r--r--fs/nilfs2/segment.c190
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c274
-rw-r--r--fs/nilfs2/sufile.h4
-rw-r--r--fs/nilfs2/super.c131
-rw-r--r--fs/nilfs2/the_nilfs.c24
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--include/linux/binfmts.h4
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/nilfs2_fs.h4
-rw-r--r--include/linux/skbuff.h6
-rw-r--r--include/net/mac80211.h3
-rw-r--r--init/Kconfig6
-rw-r--r--kernel/sched.c2
-rw-r--r--net/ipv4/fib_trie.c2
-rwxr-xr-xtools/testing/ktest/ktest.pl156
-rw-r--r--tools/testing/ktest/sample.conf93
113 files changed, 3086 insertions, 2894 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e560d102215a..63a027c9ada5 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,6 +25,10 @@ config SPARC
25 select HAVE_DMA_ATTRS 25 select HAVE_DMA_ATTRS
26 select HAVE_DMA_API_DEBUG 26 select HAVE_DMA_API_DEBUG
27 select HAVE_ARCH_JUMP_LABEL 27 select HAVE_ARCH_JUMP_LABEL
28 select HAVE_GENERIC_HARDIRQS
29 select GENERIC_HARDIRQS_NO_DEPRECATED
30 select GENERIC_IRQ_SHOW
31 select USE_GENERIC_SMP_HELPERS if SMP
28 32
29config SPARC32 33config SPARC32
30 def_bool !64BIT 34 def_bool !64BIT
@@ -43,15 +47,12 @@ config SPARC64
43 select HAVE_DYNAMIC_FTRACE 47 select HAVE_DYNAMIC_FTRACE
44 select HAVE_FTRACE_MCOUNT_RECORD 48 select HAVE_FTRACE_MCOUNT_RECORD
45 select HAVE_SYSCALL_TRACEPOINTS 49 select HAVE_SYSCALL_TRACEPOINTS
46 select USE_GENERIC_SMP_HELPERS if SMP
47 select RTC_DRV_CMOS 50 select RTC_DRV_CMOS
48 select RTC_DRV_BQ4802 51 select RTC_DRV_BQ4802
49 select RTC_DRV_SUN4V 52 select RTC_DRV_SUN4V
50 select RTC_DRV_STARFIRE 53 select RTC_DRV_STARFIRE
51 select HAVE_PERF_EVENTS 54 select HAVE_PERF_EVENTS
52 select PERF_USE_VMALLOC 55 select PERF_USE_VMALLOC
53 select HAVE_GENERIC_HARDIRQS
54 select GENERIC_IRQ_SHOW
55 select IRQ_PREFLOW_FASTEOI 56 select IRQ_PREFLOW_FASTEOI
56 57
57config ARCH_DEFCONFIG 58config ARCH_DEFCONFIG
diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h
index 31d48a0e32c7..a4c5a938b936 100644
--- a/arch/sparc/include/asm/cpudata_32.h
+++ b/arch/sparc/include/asm/cpudata_32.h
@@ -16,6 +16,10 @@ typedef struct {
16 unsigned long clock_tick; 16 unsigned long clock_tick;
17 unsigned int multiplier; 17 unsigned int multiplier;
18 unsigned int counter; 18 unsigned int counter;
19#ifdef CONFIG_SMP
20 unsigned int irq_resched_count;
21 unsigned int irq_call_count;
22#endif
19 int prom_node; 23 int prom_node;
20 int mid; 24 int mid;
21 int next; 25 int next;
@@ -23,5 +27,6 @@ typedef struct {
23 27
24DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); 28DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
25#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) 29#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
30#define local_cpu_data() __get_cpu_var(__cpu_data)
26 31
27#endif /* _SPARC_CPUDATA_H */ 32#endif /* _SPARC_CPUDATA_H */
diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h
index 86666f70322e..482c79e2a416 100644
--- a/arch/sparc/include/asm/floppy_32.h
+++ b/arch/sparc/include/asm/floppy_32.h
@@ -281,28 +281,27 @@ static inline void sun_fd_enable_dma(void)
281 pdma_areasize = pdma_size; 281 pdma_areasize = pdma_size;
282} 282}
283 283
284/* Our low-level entry point in arch/sparc/kernel/entry.S */ 284extern int sparc_floppy_request_irq(unsigned int irq,
285extern int sparc_floppy_request_irq(int irq, unsigned long flags, 285 irq_handler_t irq_handler);
286 irq_handler_t irq_handler);
287 286
288static int sun_fd_request_irq(void) 287static int sun_fd_request_irq(void)
289{ 288{
290 static int once = 0; 289 static int once = 0;
291 int error;
292 290
293 if(!once) { 291 if (!once) {
294 once = 1; 292 once = 1;
295 error = sparc_floppy_request_irq(FLOPPY_IRQ, 293 return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt);
296 IRQF_DISABLED, 294 } else {
297 floppy_interrupt); 295 return 0;
298 return ((error == 0) ? 0 : -1); 296 }
299 } else return 0;
300} 297}
301 298
302static struct linux_prom_registers fd_regs[2]; 299static struct linux_prom_registers fd_regs[2];
303 300
304static int sun_floppy_init(void) 301static int sun_floppy_init(void)
305{ 302{
303 struct platform_device *op;
304 struct device_node *dp;
306 char state[128]; 305 char state[128];
307 phandle tnode, fd_node; 306 phandle tnode, fd_node;
308 int num_regs; 307 int num_regs;
@@ -310,7 +309,6 @@ static int sun_floppy_init(void)
310 309
311 use_virtual_dma = 1; 310 use_virtual_dma = 1;
312 311
313 FLOPPY_IRQ = 11;
314 /* Forget it if we aren't on a machine that could possibly 312 /* Forget it if we aren't on a machine that could possibly
315 * ever have a floppy drive. 313 * ever have a floppy drive.
316 */ 314 */
@@ -349,6 +347,26 @@ static int sun_floppy_init(void)
349 sun_fdc = (struct sun_flpy_controller *) 347 sun_fdc = (struct sun_flpy_controller *)
350 of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy"); 348 of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy");
351 349
350 /* Look up irq in platform_device.
351 * We try "SUNW,fdtwo" and "fd"
352 */
353 for_each_node_by_name(dp, "SUNW,fdtwo") {
354 op = of_find_device_by_node(dp);
355 if (op)
356 break;
357 }
358 if (!op) {
359 for_each_node_by_name(dp, "fd") {
360 op = of_find_device_by_node(dp);
361 if (op)
362 break;
363 }
364 }
365 if (!op)
366 goto no_sun_fdc;
367
368 FLOPPY_IRQ = op->archdata.irqs[0];
369
352 /* Last minute sanity check... */ 370 /* Last minute sanity check... */
353 if(sun_fdc->status_82072 == 0xff) { 371 if(sun_fdc->status_82072 == 0xff) {
354 sun_fdc = NULL; 372 sun_fdc = NULL;
diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h
index a34b2994937a..f6902cf3cbe9 100644
--- a/arch/sparc/include/asm/io.h
+++ b/arch/sparc/include/asm/io.h
@@ -5,4 +5,17 @@
5#else 5#else
6#include <asm/io_32.h> 6#include <asm/io_32.h>
7#endif 7#endif
8
9/*
10 * Defines used for both SPARC32 and SPARC64
11 */
12
13/* Big endian versions of memory read/write routines */
14#define readb_be(__addr) __raw_readb(__addr)
15#define readw_be(__addr) __raw_readw(__addr)
16#define readl_be(__addr) __raw_readl(__addr)
17#define writeb_be(__b, __addr) __raw_writeb(__b, __addr)
18#define writel_be(__w, __addr) __raw_writel(__w, __addr)
19#define writew_be(__l, __addr) __raw_writew(__l, __addr)
20
8#endif 21#endif
diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h
index eced3e3ebd30..2ae3acaeb1b3 100644
--- a/arch/sparc/include/asm/irq_32.h
+++ b/arch/sparc/include/asm/irq_32.h
@@ -6,7 +6,11 @@
6#ifndef _SPARC_IRQ_H 6#ifndef _SPARC_IRQ_H
7#define _SPARC_IRQ_H 7#define _SPARC_IRQ_H
8 8
9#define NR_IRQS 16 9/* Allocated number of logical irq numbers.
10 * sun4d boxes (ss2000e) should be OK with ~32.
11 * Be on the safe side and make room for 64
12 */
13#define NR_IRQS 64
10 14
11#include <linux/interrupt.h> 15#include <linux/interrupt.h>
12 16
diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
index c04f96fb753c..6bdaf1e43d2a 100644
--- a/arch/sparc/include/asm/leon.h
+++ b/arch/sparc/include/asm/leon.h
@@ -52,29 +52,6 @@
52#define LEON_DIAGF_VALID 0x2000 52#define LEON_DIAGF_VALID 0x2000
53#define LEON_DIAGF_VALID_SHIFT 13 53#define LEON_DIAGF_VALID_SHIFT 13
54 54
55/*
56 * Interrupt Sources
57 *
58 * The interrupt source numbers directly map to the trap type and to
59 * the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask,
60 * and the Interrupt Pending Registers.
61 */
62#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR 1
63#define LEON_INTERRUPT_UART_1_RX_TX 2
64#define LEON_INTERRUPT_UART_0_RX_TX 3
65#define LEON_INTERRUPT_EXTERNAL_0 4
66#define LEON_INTERRUPT_EXTERNAL_1 5
67#define LEON_INTERRUPT_EXTERNAL_2 6
68#define LEON_INTERRUPT_EXTERNAL_3 7
69#define LEON_INTERRUPT_TIMER1 8
70#define LEON_INTERRUPT_TIMER2 9
71#define LEON_INTERRUPT_EMPTY1 10
72#define LEON_INTERRUPT_EMPTY2 11
73#define LEON_INTERRUPT_OPEN_ETH 12
74#define LEON_INTERRUPT_EMPTY4 13
75#define LEON_INTERRUPT_EMPTY5 14
76#define LEON_INTERRUPT_EMPTY6 15
77
78/* irq masks */ 55/* irq masks */
79#define LEON_HARD_INT(x) (1 << (x)) /* irq 0-15 */ 56#define LEON_HARD_INT(x) (1 << (x)) /* irq 0-15 */
80#define LEON_IRQMASK_R 0x0000fffe /* bit 15- 1 of lregs.irqmask */ 57#define LEON_IRQMASK_R 0x0000fffe /* bit 15- 1 of lregs.irqmask */
@@ -183,7 +160,6 @@ static inline void leon_srmmu_enabletlb(void)
183/* macro access for leon_readnobuffer_reg() */ 160/* macro access for leon_readnobuffer_reg() */
184#define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x)) 161#define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x))
185 162
186extern void sparc_leon_eirq_register(int eirq);
187extern void leon_init(void); 163extern void leon_init(void);
188extern void leon_switch_mm(void); 164extern void leon_switch_mm(void);
189extern void leon_init_IRQ(void); 165extern void leon_init_IRQ(void);
@@ -239,8 +215,8 @@ static inline int sparc_leon3_cpuid(void)
239#endif /*!__ASSEMBLY__*/ 215#endif /*!__ASSEMBLY__*/
240 216
241#ifdef CONFIG_SMP 217#ifdef CONFIG_SMP
242# define LEON3_IRQ_RESCHEDULE 13 218# define LEON3_IRQ_IPI_DEFAULT 13
243# define LEON3_IRQ_TICKER (leon_percpu_timer_dev[0].irq) 219# define LEON3_IRQ_TICKER (leon3_ticker_irq)
244# define LEON3_IRQ_CROSS_CALL 15 220# define LEON3_IRQ_CROSS_CALL 15
245#endif 221#endif
246 222
@@ -339,9 +315,9 @@ struct leon2_cacheregs {
339#include <linux/interrupt.h> 315#include <linux/interrupt.h>
340 316
341struct device_node; 317struct device_node;
342extern int sparc_leon_eirq_get(int eirq, int cpu); 318extern unsigned int leon_build_device_irq(unsigned int real_irq,
343extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id); 319 irq_flow_handler_t flow_handler,
344extern void sparc_leon_eirq_register(int eirq); 320 const char *name, int do_ack);
345extern void leon_clear_clock_irq(void); 321extern void leon_clear_clock_irq(void);
346extern void leon_load_profile_irq(int cpu, unsigned int limit); 322extern void leon_load_profile_irq(int cpu, unsigned int limit);
347extern void leon_init_timers(irq_handler_t counter_fn); 323extern void leon_init_timers(irq_handler_t counter_fn);
@@ -358,6 +334,7 @@ extern void leon3_getCacheRegs(struct leon3_cacheregs *regs);
358extern int leon_flush_needed(void); 334extern int leon_flush_needed(void);
359extern void leon_switch_mm(void); 335extern void leon_switch_mm(void);
360extern int srmmu_swprobe_trace; 336extern int srmmu_swprobe_trace;
337extern int leon3_ticker_irq;
361 338
362#ifdef CONFIG_SMP 339#ifdef CONFIG_SMP
363extern int leon_smp_nrcpus(void); 340extern int leon_smp_nrcpus(void);
@@ -366,17 +343,19 @@ extern void leon_smp_done(void);
366extern void leon_boot_cpus(void); 343extern void leon_boot_cpus(void);
367extern int leon_boot_one_cpu(int i); 344extern int leon_boot_one_cpu(int i);
368void leon_init_smp(void); 345void leon_init_smp(void);
369extern void cpu_probe(void);
370extern void cpu_idle(void); 346extern void cpu_idle(void);
371extern void init_IRQ(void); 347extern void init_IRQ(void);
372extern void cpu_panic(void); 348extern void cpu_panic(void);
373extern int __leon_processor_id(void); 349extern int __leon_processor_id(void);
374void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu); 350void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu);
351extern irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused);
375 352
376extern unsigned int real_irq_entry[], smpleon_ticker[]; 353extern unsigned int real_irq_entry[];
354extern unsigned int smpleon_ipi[];
377extern unsigned int patchme_maybe_smp_msg[]; 355extern unsigned int patchme_maybe_smp_msg[];
378extern unsigned int t_nmi[], linux_trap_ipi15_leon[]; 356extern unsigned int t_nmi[], linux_trap_ipi15_leon[];
379extern unsigned int linux_trap_ipi15_sun4m[]; 357extern unsigned int linux_trap_ipi15_sun4m[];
358extern int leon_ipi_irq;
380 359
381#endif /* CONFIG_SMP */ 360#endif /* CONFIG_SMP */
382 361
diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h
index f20ef562b265..7eb5d78f5211 100644
--- a/arch/sparc/include/asm/pcic.h
+++ b/arch/sparc/include/asm/pcic.h
@@ -29,11 +29,17 @@ struct linux_pcic {
29 int pcic_imdim; 29 int pcic_imdim;
30}; 30};
31 31
32extern int pcic_probe(void); 32#ifdef CONFIG_PCI
33/* Erm... MJ redefined pcibios_present() so that it does not work early. */
34extern int pcic_present(void); 33extern int pcic_present(void);
34extern int pcic_probe(void);
35extern void pci_time_init(void);
35extern void sun4m_pci_init_IRQ(void); 36extern void sun4m_pci_init_IRQ(void);
36 37#else
38static inline int pcic_present(void) { return 0; }
39static inline int pcic_probe(void) { return 0; }
40static inline void pci_time_init(void) {}
41static inline void sun4m_pci_init_IRQ(void) {}
42#endif
37#endif 43#endif
38 44
39/* Size of PCI I/O space which we relocate. */ 45/* Size of PCI I/O space which we relocate. */
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 303bd4dc8292..5b31a8e89823 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -8,6 +8,8 @@
8 * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 8 * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
9 */ 9 */
10 10
11#include <linux/const.h>
12
11#ifndef __ASSEMBLY__ 13#ifndef __ASSEMBLY__
12#include <asm-generic/4level-fixup.h> 14#include <asm-generic/4level-fixup.h>
13 15
@@ -456,9 +458,9 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma,
456 458
457#endif /* !(__ASSEMBLY__) */ 459#endif /* !(__ASSEMBLY__) */
458 460
459#define VMALLOC_START 0xfe600000 461#define VMALLOC_START _AC(0xfe600000,UL)
460/* XXX Alter this when I get around to fixing sun4c - Anton */ 462/* XXX Alter this when I get around to fixing sun4c - Anton */
461#define VMALLOC_END 0xffc00000 463#define VMALLOC_END _AC(0xffc00000,UL)
462 464
463 465
464/* We provide our own get_unmapped_area to cope with VA holes for userland */ 466/* We provide our own get_unmapped_area to cope with VA holes for userland */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index f8dddb7045bb..b77128c80524 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -699,6 +699,9 @@ extern pmd_t swapper_low_pmd_dir[2048];
699extern void paging_init(void); 699extern void paging_init(void);
700extern unsigned long find_ecache_flush_span(unsigned long size); 700extern unsigned long find_ecache_flush_span(unsigned long size);
701 701
702struct seq_file;
703extern void mmu_info(struct seq_file *);
704
702/* These do nothing with the way I have things setup. */ 705/* These do nothing with the way I have things setup. */
703#define mmu_lockarea(vaddr, len) (vaddr) 706#define mmu_lockarea(vaddr, len) (vaddr)
704#define mmu_unlockarea(vaddr, len) do { } while(0) 707#define mmu_unlockarea(vaddr, len) do { } while(0)
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 2643c62f4ac0..64718ba26434 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -11,4 +11,16 @@
11# define COMMAND_LINE_SIZE 256 11# define COMMAND_LINE_SIZE 256
12#endif 12#endif
13 13
14#ifdef __KERNEL__
15
16#ifdef CONFIG_SPARC32
17/* The CPU that was used for booting
18 * Only sun4d + leon may have boot_cpu_id != 0
19 */
20extern unsigned char boot_cpu_id;
21extern unsigned char boot_cpu_id4;
22#endif
23
24#endif /* __KERNEL__ */
25
14#endif /* _SPARC_SETUP_H */ 26#endif /* _SPARC_SETUP_H */
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index d82d7f4c0a79..093f10843ff2 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -50,42 +50,38 @@ void smp_callin(void);
50void smp_boot_cpus(void); 50void smp_boot_cpus(void);
51void smp_store_cpu_info(int); 51void smp_store_cpu_info(int);
52 52
53void smp_resched_interrupt(void);
54void smp_call_function_single_interrupt(void);
55void smp_call_function_interrupt(void);
56
53struct seq_file; 57struct seq_file;
54void smp_bogo(struct seq_file *); 58void smp_bogo(struct seq_file *);
55void smp_info(struct seq_file *); 59void smp_info(struct seq_file *);
56 60
57BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long) 61BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long)
58BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void) 62BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void)
63BTFIXUPDEF_CALL(void, smp_ipi_resched, int);
64BTFIXUPDEF_CALL(void, smp_ipi_single, int);
65BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int);
59BTFIXUPDEF_BLACKBOX(hard_smp_processor_id) 66BTFIXUPDEF_BLACKBOX(hard_smp_processor_id)
60BTFIXUPDEF_BLACKBOX(load_current) 67BTFIXUPDEF_BLACKBOX(load_current)
61 68
62#define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4) 69#define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4)
63 70
64static inline void xc0(smpfunc_t func) { smp_cross_call(func, cpu_online_map, 0, 0, 0, 0); } 71static inline void xc0(smpfunc_t func) { smp_cross_call(func, *cpu_online_mask, 0, 0, 0, 0); }
65static inline void xc1(smpfunc_t func, unsigned long arg1) 72static inline void xc1(smpfunc_t func, unsigned long arg1)
66{ smp_cross_call(func, cpu_online_map, arg1, 0, 0, 0); } 73{ smp_cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); }
67static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2) 74static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2)
68{ smp_cross_call(func, cpu_online_map, arg1, arg2, 0, 0); } 75{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); }
69static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2, 76static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2,
70 unsigned long arg3) 77 unsigned long arg3)
71{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, 0); } 78{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); }
72static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2, 79static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2,
73 unsigned long arg3, unsigned long arg4) 80 unsigned long arg3, unsigned long arg4)
74{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, arg4); } 81{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, arg4); }
75
76static inline int smp_call_function(void (*func)(void *info), void *info, int wait)
77{
78 xc1((smpfunc_t)func, (unsigned long)info);
79 return 0;
80}
81 82
82static inline int smp_call_function_single(int cpuid, void (*func) (void *info), 83extern void arch_send_call_function_single_ipi(int cpu);
83 void *info, int wait) 84extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
84{
85 smp_cross_call((smpfunc_t)func, cpumask_of_cpu(cpuid),
86 (unsigned long) info, 0, 0, 0);
87 return 0;
88}
89 85
90static inline int cpu_logical_map(int cpu) 86static inline int cpu_logical_map(int cpu)
91{ 87{
@@ -135,6 +131,11 @@ static inline int hard_smp_processor_id(void)
135 __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t" 131 __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t"
136 "nop; nop" : 132 "nop; nop" :
137 "=&r" (cpuid)); 133 "=&r" (cpuid));
134 - leon
135 __asm__ __volatile__( "rd %asr17, %0\n\t"
136 "srl %0, 0x1c, %0\n\t"
137 "nop\n\t" :
138 "=&r" (cpuid));
138 See btfixup.h and btfixupprep.c to understand how a blackbox works. 139 See btfixup.h and btfixupprep.c to understand how a blackbox works.
139 */ 140 */
140 __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t" 141 __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t"
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index f49e11cd4ded..20bca8950710 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -49,6 +49,10 @@ extern void cpu_play_dead(void);
49 49
50extern void smp_fetch_global_regs(void); 50extern void smp_fetch_global_regs(void);
51 51
52struct seq_file;
53void smp_bogo(struct seq_file *);
54void smp_info(struct seq_file *);
55
52#ifdef CONFIG_HOTPLUG_CPU 56#ifdef CONFIG_HOTPLUG_CPU
53extern int __cpu_disable(void); 57extern int __cpu_disable(void);
54extern void __cpu_die(unsigned int cpu); 58extern void __cpu_die(unsigned int cpu);
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 7f9b9dba38a6..5f5b8bf3f50d 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,6 +9,7 @@
9#ifndef __ASSEMBLY__ 9#ifndef __ASSEMBLY__
10 10
11#include <asm/psr.h> 11#include <asm/psr.h>
12#include <asm/processor.h> /* for cpu_relax */
12 13
13#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) 14#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
14 15
diff --git a/arch/sparc/include/asm/system_32.h b/arch/sparc/include/asm/system_32.h
index 890036b3689a..47a7e862474e 100644
--- a/arch/sparc/include/asm/system_32.h
+++ b/arch/sparc/include/asm/system_32.h
@@ -15,11 +15,6 @@
15 15
16#include <linux/irqflags.h> 16#include <linux/irqflags.h>
17 17
18static inline unsigned int probe_irq_mask(unsigned long val)
19{
20 return 0;
21}
22
23/* 18/*
24 * Sparc (general) CPU types 19 * Sparc (general) CPU types
25 */ 20 */
diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
index e3b65d8cf41b..3c96d3bb9f15 100644
--- a/arch/sparc/include/asm/system_64.h
+++ b/arch/sparc/include/asm/system_64.h
@@ -29,10 +29,6 @@ enum sparc_cpu {
29/* This cannot ever be a sun4c :) That's just history. */ 29/* This cannot ever be a sun4c :) That's just history. */
30#define ARCH_SUN4C 0 30#define ARCH_SUN4C 0
31 31
32extern const char *sparc_cpu_type;
33extern const char *sparc_fpu_type;
34extern const char *sparc_pmu_type;
35
36extern char reboot_command[]; 32extern char reboot_command[];
37 33
38/* These are here in an effort to more fully work around Spitfire Errata 34/* These are here in an effort to more fully work around Spitfire Errata
diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h
index 5b0a06dc3bcb..a9be04b0d049 100644
--- a/arch/sparc/include/asm/winmacro.h
+++ b/arch/sparc/include/asm/winmacro.h
@@ -103,6 +103,7 @@
103 st %scratch, [%cur_reg + TI_W_SAVED]; 103 st %scratch, [%cur_reg + TI_W_SAVED];
104 104
105#ifdef CONFIG_SMP 105#ifdef CONFIG_SMP
106/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */
106#define LOAD_CURRENT4M(dest_reg, idreg) \ 107#define LOAD_CURRENT4M(dest_reg, idreg) \
107 rd %tbr, %idreg; \ 108 rd %tbr, %idreg; \
108 sethi %hi(current_set), %dest_reg; \ 109 sethi %hi(current_set), %dest_reg; \
@@ -118,6 +119,14 @@
118 or %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \ 119 or %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \
119 ld [%idreg + %dest_reg], %dest_reg; 120 ld [%idreg + %dest_reg], %dest_reg;
120 121
122#define LOAD_CURRENT_LEON(dest_reg, idreg) \
123 rd %asr17, %idreg; \
124 sethi %hi(current_set), %dest_reg; \
125 srl %idreg, 0x1c, %idreg; \
126 or %dest_reg, %lo(current_set), %dest_reg; \
127 sll %idreg, 0x2, %idreg; \
128 ld [%idreg + %dest_reg], %dest_reg;
129
121/* Blackbox - take care with this... - check smp4m and smp4d before changing this. */ 130/* Blackbox - take care with this... - check smp4m and smp4d before changing this. */
122#define LOAD_CURRENT(dest_reg, idreg) \ 131#define LOAD_CURRENT(dest_reg, idreg) \
123 sethi %hi(___b_load_current), %idreg; \ 132 sethi %hi(___b_load_current), %idreg; \
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 99aa4db6e9c2..9cff2709a96d 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -71,10 +71,6 @@ obj-$(CONFIG_SPARC64) += pcr.o
71obj-$(CONFIG_SPARC64) += nmi.o 71obj-$(CONFIG_SPARC64) += nmi.o
72obj-$(CONFIG_SPARC64_SMP) += cpumap.o 72obj-$(CONFIG_SPARC64_SMP) += cpumap.o
73 73
74# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
75obj-$(CONFIG_SPARC32) += devres.o
76devres-y := ../../../kernel/irq/devres.o
77
78obj-y += dma.o 74obj-y += dma.o
79 75
80obj-$(CONFIG_SPARC32_PCI) += pcic.o 76obj-$(CONFIG_SPARC32_PCI) += pcic.o
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 7925c54f4133..138dbbc8dc84 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -4,6 +4,7 @@
4 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 4 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
5 */ 5 */
6 6
7#include <linux/seq_file.h>
7#include <linux/kernel.h> 8#include <linux/kernel.h>
8#include <linux/module.h> 9#include <linux/module.h>
9#include <linux/init.h> 10#include <linux/init.h>
@@ -11,7 +12,9 @@
11#include <linux/threads.h> 12#include <linux/threads.h>
12 13
13#include <asm/spitfire.h> 14#include <asm/spitfire.h>
15#include <asm/pgtable.h>
14#include <asm/oplib.h> 16#include <asm/oplib.h>
17#include <asm/setup.h>
15#include <asm/page.h> 18#include <asm/page.h>
16#include <asm/head.h> 19#include <asm/head.h>
17#include <asm/psr.h> 20#include <asm/psr.h>
@@ -23,6 +26,9 @@
23DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; 26DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
24EXPORT_PER_CPU_SYMBOL(__cpu_data); 27EXPORT_PER_CPU_SYMBOL(__cpu_data);
25 28
29int ncpus_probed;
30unsigned int fsr_storage;
31
26struct cpu_info { 32struct cpu_info {
27 int psr_vers; 33 int psr_vers;
28 const char *name; 34 const char *name;
@@ -247,13 +253,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = {
247 * machine type value into consideration too. I will fix this. 253 * machine type value into consideration too. I will fix this.
248 */ 254 */
249 255
250const char *sparc_cpu_type; 256static const char *sparc_cpu_type;
251const char *sparc_fpu_type; 257static const char *sparc_fpu_type;
252const char *sparc_pmu_type; 258const char *sparc_pmu_type;
253 259
254unsigned int fsr_storage;
255 260
256static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) 261static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
257{ 262{
258 const struct manufacturer_info *manuf; 263 const struct manufacturer_info *manuf;
259 int i; 264 int i;
@@ -313,7 +318,123 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
313} 318}
314 319
315#ifdef CONFIG_SPARC32 320#ifdef CONFIG_SPARC32
316void __cpuinit cpu_probe(void) 321static int show_cpuinfo(struct seq_file *m, void *__unused)
322{
323 seq_printf(m,
324 "cpu\t\t: %s\n"
325 "fpu\t\t: %s\n"
326 "promlib\t\t: Version %d Revision %d\n"
327 "prom\t\t: %d.%d\n"
328 "type\t\t: %s\n"
329 "ncpus probed\t: %d\n"
330 "ncpus active\t: %d\n"
331#ifndef CONFIG_SMP
332 "CPU0Bogo\t: %lu.%02lu\n"
333 "CPU0ClkTck\t: %ld\n"
334#endif
335 ,
336 sparc_cpu_type,
337 sparc_fpu_type ,
338 romvec->pv_romvers,
339 prom_rev,
340 romvec->pv_printrev >> 16,
341 romvec->pv_printrev & 0xffff,
342 &cputypval[0],
343 ncpus_probed,
344 num_online_cpus()
345#ifndef CONFIG_SMP
346 , cpu_data(0).udelay_val/(500000/HZ),
347 (cpu_data(0).udelay_val/(5000/HZ)) % 100,
348 cpu_data(0).clock_tick
349#endif
350 );
351
352#ifdef CONFIG_SMP
353 smp_bogo(m);
354#endif
355 mmu_info(m);
356#ifdef CONFIG_SMP
357 smp_info(m);
358#endif
359 return 0;
360}
361#endif /* CONFIG_SPARC32 */
362
363#ifdef CONFIG_SPARC64
364unsigned int dcache_parity_tl1_occurred;
365unsigned int icache_parity_tl1_occurred;
366
367
368static int show_cpuinfo(struct seq_file *m, void *__unused)
369{
370 seq_printf(m,
371 "cpu\t\t: %s\n"
372 "fpu\t\t: %s\n"
373 "pmu\t\t: %s\n"
374 "prom\t\t: %s\n"
375 "type\t\t: %s\n"
376 "ncpus probed\t: %d\n"
377 "ncpus active\t: %d\n"
378 "D$ parity tl1\t: %u\n"
379 "I$ parity tl1\t: %u\n"
380#ifndef CONFIG_SMP
381 "Cpu0ClkTck\t: %016lx\n"
382#endif
383 ,
384 sparc_cpu_type,
385 sparc_fpu_type,
386 sparc_pmu_type,
387 prom_version,
388 ((tlb_type == hypervisor) ?
389 "sun4v" :
390 "sun4u"),
391 ncpus_probed,
392 num_online_cpus(),
393 dcache_parity_tl1_occurred,
394 icache_parity_tl1_occurred
395#ifndef CONFIG_SMP
396 , cpu_data(0).clock_tick
397#endif
398 );
399#ifdef CONFIG_SMP
400 smp_bogo(m);
401#endif
402 mmu_info(m);
403#ifdef CONFIG_SMP
404 smp_info(m);
405#endif
406 return 0;
407}
408#endif /* CONFIG_SPARC64 */
409
410static void *c_start(struct seq_file *m, loff_t *pos)
411{
412 /* The pointer we are returning is arbitrary,
413 * it just has to be non-NULL and not IS_ERR
414 * in the success case.
415 */
416 return *pos == 0 ? &c_start : NULL;
417}
418
419static void *c_next(struct seq_file *m, void *v, loff_t *pos)
420{
421 ++*pos;
422 return c_start(m, pos);
423}
424
425static void c_stop(struct seq_file *m, void *v)
426{
427}
428
429const struct seq_operations cpuinfo_op = {
430 .start =c_start,
431 .next = c_next,
432 .stop = c_stop,
433 .show = show_cpuinfo,
434};
435
436#ifdef CONFIG_SPARC32
437static int __init cpu_type_probe(void)
317{ 438{
318 int psr_impl, psr_vers, fpu_vers; 439 int psr_impl, psr_vers, fpu_vers;
319 int psr; 440 int psr;
@@ -332,8 +453,12 @@ void __cpuinit cpu_probe(void)
332 put_psr(psr); 453 put_psr(psr);
333 454
334 set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers); 455 set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
456
457 return 0;
335} 458}
336#else 459#endif /* CONFIG_SPARC32 */
460
461#ifdef CONFIG_SPARC64
337static void __init sun4v_cpu_probe(void) 462static void __init sun4v_cpu_probe(void)
338{ 463{
339 switch (sun4v_chip_type) { 464 switch (sun4v_chip_type) {
@@ -374,6 +499,6 @@ static int __init cpu_type_probe(void)
374 } 499 }
375 return 0; 500 return 0;
376} 501}
502#endif /* CONFIG_SPARC64 */
377 503
378early_initcall(cpu_type_probe); 504early_initcall(cpu_type_probe);
379#endif
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index 8de64c8126bc..d91fd782743a 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -202,7 +202,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void)
202 new_tree->total_nodes = n; 202 new_tree->total_nodes = n;
203 memcpy(&new_tree->level, tmp_level, sizeof(tmp_level)); 203 memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
204 204
205 prev_cpu = cpu = first_cpu(cpu_online_map); 205 prev_cpu = cpu = cpumask_first(cpu_online_mask);
206 206
207 /* Initialize all levels in the tree with the first CPU */ 207 /* Initialize all levels in the tree with the first CPU */
208 for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) { 208 for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
@@ -381,7 +381,7 @@ static int simple_map_to_cpu(unsigned int index)
381 } 381 }
382 382
383 /* Impossible, since num_online_cpus() <= num_possible_cpus() */ 383 /* Impossible, since num_online_cpus() <= num_possible_cpus() */
384 return first_cpu(cpu_online_map); 384 return cpumask_first(cpu_online_mask);
385} 385}
386 386
387static int _map_to_cpu(unsigned int index) 387static int _map_to_cpu(unsigned int index)
diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c
index d2eddd6647cd..113c052c3043 100644
--- a/arch/sparc/kernel/devices.c
+++ b/arch/sparc/kernel/devices.c
@@ -20,7 +20,6 @@
20#include <asm/system.h> 20#include <asm/system.h>
21#include <asm/cpudata.h> 21#include <asm/cpudata.h>
22 22
23extern void cpu_probe(void);
24extern void clock_stop_probe(void); /* tadpole.c */ 23extern void clock_stop_probe(void); /* tadpole.c */
25extern void sun4c_probe_memerr_reg(void); 24extern void sun4c_probe_memerr_reg(void);
26 25
@@ -115,7 +114,7 @@ int cpu_get_hwmid(phandle prom_node)
115 114
116void __init device_scan(void) 115void __init device_scan(void)
117{ 116{
118 prom_printf("Booting Linux...\n"); 117 printk(KERN_NOTICE "Booting Linux...\n");
119 118
120#ifndef CONFIG_SMP 119#ifndef CONFIG_SMP
121 { 120 {
@@ -133,7 +132,6 @@ void __init device_scan(void)
133 } 132 }
134#endif /* !CONFIG_SMP */ 133#endif /* !CONFIG_SMP */
135 134
136 cpu_probe();
137 { 135 {
138 extern void auxio_probe(void); 136 extern void auxio_probe(void);
139 extern void auxio_power_probe(void); 137 extern void auxio_power_probe(void);
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index 3add4de8a1a9..dd1342c0a3be 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -497,7 +497,7 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
497 tag->num_records = ncpus; 497 tag->num_records = ncpus;
498 498
499 i = 0; 499 i = 0;
500 for_each_cpu_mask(cpu, *mask) { 500 for_each_cpu(cpu, mask) {
501 ent[i].cpu = cpu; 501 ent[i].cpu = cpu;
502 ent[i].result = DR_CPU_RES_OK; 502 ent[i].result = DR_CPU_RES_OK;
503 ent[i].stat = default_stat; 503 ent[i].stat = default_stat;
@@ -534,7 +534,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
534 int resp_len, ncpus, cpu; 534 int resp_len, ncpus, cpu;
535 unsigned long flags; 535 unsigned long flags;
536 536
537 ncpus = cpus_weight(*mask); 537 ncpus = cpumask_weight(mask);
538 resp_len = dr_cpu_size_response(ncpus); 538 resp_len = dr_cpu_size_response(ncpus);
539 resp = kzalloc(resp_len, GFP_KERNEL); 539 resp = kzalloc(resp_len, GFP_KERNEL);
540 if (!resp) 540 if (!resp)
@@ -547,7 +547,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
547 mdesc_populate_present_mask(mask); 547 mdesc_populate_present_mask(mask);
548 mdesc_fill_in_cpu_data(mask); 548 mdesc_fill_in_cpu_data(mask);
549 549
550 for_each_cpu_mask(cpu, *mask) { 550 for_each_cpu(cpu, mask) {
551 int err; 551 int err;
552 552
553 printk(KERN_INFO "ds-%llu: Starting cpu %d...\n", 553 printk(KERN_INFO "ds-%llu: Starting cpu %d...\n",
@@ -593,7 +593,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
593 int resp_len, ncpus, cpu; 593 int resp_len, ncpus, cpu;
594 unsigned long flags; 594 unsigned long flags;
595 595
596 ncpus = cpus_weight(*mask); 596 ncpus = cpumask_weight(mask);
597 resp_len = dr_cpu_size_response(ncpus); 597 resp_len = dr_cpu_size_response(ncpus);
598 resp = kzalloc(resp_len, GFP_KERNEL); 598 resp = kzalloc(resp_len, GFP_KERNEL);
599 if (!resp) 599 if (!resp)
@@ -603,7 +603,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
603 resp_len, ncpus, mask, 603 resp_len, ncpus, mask,
604 DR_CPU_STAT_UNCONFIGURED); 604 DR_CPU_STAT_UNCONFIGURED);
605 605
606 for_each_cpu_mask(cpu, *mask) { 606 for_each_cpu(cpu, mask) {
607 int err; 607 int err;
608 608
609 printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n", 609 printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n",
@@ -649,13 +649,13 @@ static void __cpuinit dr_cpu_data(struct ds_info *dp,
649 649
650 purge_dups(cpu_list, tag->num_records); 650 purge_dups(cpu_list, tag->num_records);
651 651
652 cpus_clear(mask); 652 cpumask_clear(&mask);
653 for (i = 0; i < tag->num_records; i++) { 653 for (i = 0; i < tag->num_records; i++) {
654 if (cpu_list[i] == CPU_SENTINEL) 654 if (cpu_list[i] == CPU_SENTINEL)
655 continue; 655 continue;
656 656
657 if (cpu_list[i] < nr_cpu_ids) 657 if (cpu_list[i] < nr_cpu_ids)
658 cpu_set(cpu_list[i], mask); 658 cpumask_set_cpu(cpu_list[i], &mask);
659 } 659 }
660 660
661 if (tag->type == DR_CPU_CONFIGURE) 661 if (tag->type == DR_CPU_CONFIGURE)
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 6da784a5612b..8341963f4c84 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -269,19 +269,22 @@ smp4m_ticker:
269 /* Here is where we check for possible SMP IPI passed to us 269 /* Here is where we check for possible SMP IPI passed to us
270 * on some level other than 15 which is the NMI and only used 270 * on some level other than 15 which is the NMI and only used
271 * for cross calls. That has a separate entry point below. 271 * for cross calls. That has a separate entry point below.
272 *
273 * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*.
272 */ 274 */
273maybe_smp4m_msg: 275maybe_smp4m_msg:
274 GET_PROCESSOR4M_ID(o3) 276 GET_PROCESSOR4M_ID(o3)
275 sethi %hi(sun4m_irq_percpu), %l5 277 sethi %hi(sun4m_irq_percpu), %l5
276 sll %o3, 2, %o3 278 sll %o3, 2, %o3
277 or %l5, %lo(sun4m_irq_percpu), %o5 279 or %l5, %lo(sun4m_irq_percpu), %o5
278 sethi %hi(0x40000000), %o2 280 sethi %hi(0x70000000), %o2 ! Check all soft-IRQs
279 ld [%o5 + %o3], %o1 281 ld [%o5 + %o3], %o1
280 ld [%o1 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending 282 ld [%o1 + 0x00], %o3 ! sun4m_irq_percpu[cpu]->pending
281 andcc %o3, %o2, %g0 283 andcc %o3, %o2, %g0
282 be,a smp4m_ticker 284 be,a smp4m_ticker
283 cmp %l7, 14 285 cmp %l7, 14
284 st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x40000000 286 /* Soft-IRQ IPI */
287 st %o2, [%o1 + 0x04] ! sun4m_irq_percpu[cpu]->clear=0x70000000
285 WRITE_PAUSE 288 WRITE_PAUSE
286 ld [%o1 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending 289 ld [%o1 + 0x00], %g0 ! sun4m_irq_percpu[cpu]->pending
287 WRITE_PAUSE 290 WRITE_PAUSE
@@ -290,9 +293,27 @@ maybe_smp4m_msg:
290 WRITE_PAUSE 293 WRITE_PAUSE
291 wr %l4, PSR_ET, %psr 294 wr %l4, PSR_ET, %psr
292 WRITE_PAUSE 295 WRITE_PAUSE
293 call smp_reschedule_irq 296 sll %o2, 28, %o2 ! shift for simpler checks below
297maybe_smp4m_msg_check_single:
298 andcc %o2, 0x1, %g0
299 beq,a maybe_smp4m_msg_check_mask
300 andcc %o2, 0x2, %g0
301 call smp_call_function_single_interrupt
294 nop 302 nop
295 303 andcc %o2, 0x2, %g0
304maybe_smp4m_msg_check_mask:
305 beq,a maybe_smp4m_msg_check_resched
306 andcc %o2, 0x4, %g0
307 call smp_call_function_interrupt
308 nop
309 andcc %o2, 0x4, %g0
310maybe_smp4m_msg_check_resched:
311 /* rescheduling is done in RESTORE_ALL regardless, but incr stats */
312 beq,a maybe_smp4m_msg_out
313 nop
314 call smp_resched_interrupt
315 nop
316maybe_smp4m_msg_out:
296 RESTORE_ALL 317 RESTORE_ALL
297 318
298 .align 4 319 .align 4
@@ -401,18 +422,18 @@ linux_trap_ipi15_sun4d:
4011: b,a 1b 4221: b,a 1b
402 423
403#ifdef CONFIG_SPARC_LEON 424#ifdef CONFIG_SPARC_LEON
404 425 .globl smpleon_ipi
405 .globl smpleon_ticker 426 .extern leon_ipi_interrupt
406 /* SMP per-cpu ticker interrupts are handled specially. */ 427 /* SMP per-cpu IPI interrupts are handled specially. */
407smpleon_ticker: 428smpleon_ipi:
408 SAVE_ALL 429 SAVE_ALL
409 or %l0, PSR_PIL, %g2 430 or %l0, PSR_PIL, %g2
410 wr %g2, 0x0, %psr 431 wr %g2, 0x0, %psr
411 WRITE_PAUSE 432 WRITE_PAUSE
412 wr %g2, PSR_ET, %psr 433 wr %g2, PSR_ET, %psr
413 WRITE_PAUSE 434 WRITE_PAUSE
414 call leon_percpu_timer_interrupt 435 call leonsmp_ipi_interrupt
415 add %sp, STACKFRAME_SZ, %o0 436 add %sp, STACKFRAME_SZ, %o1 ! pt_regs
416 wr %l0, PSR_ET, %psr 437 wr %l0, PSR_ET, %psr
417 WRITE_PAUSE 438 WRITE_PAUSE
418 RESTORE_ALL 439 RESTORE_ALL
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index 59423491cef8..587785759838 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -810,31 +810,25 @@ found_version:
810got_prop: 810got_prop:
811#ifdef CONFIG_SPARC_LEON 811#ifdef CONFIG_SPARC_LEON
812 /* no cpu-type check is needed, it is a SPARC-LEON */ 812 /* no cpu-type check is needed, it is a SPARC-LEON */
813#ifdef CONFIG_SMP
814 ba leon_smp_init
815 nop
816 813
817 .global leon_smp_init 814 sethi %hi(boot_cpu_id), %g2 ! boot-cpu index
818leon_smp_init:
819 sethi %hi(boot_cpu_id), %g1 ! master always 0
820 stb %g0, [%g1 + %lo(boot_cpu_id)]
821 sethi %hi(boot_cpu_id4), %g1 ! master always 0
822 stb %g0, [%g1 + %lo(boot_cpu_id4)]
823 815
824 rd %asr17,%g1 816#ifdef CONFIG_SMP
825 srl %g1,28,%g1 817 ldub [%g2 + %lo(boot_cpu_id)], %g1
818 cmp %g1, 0xff ! unset means first CPU
819 bne leon_smp_cpu_startup ! continue only with master
820 nop
821#endif
822 /* Get CPU-ID from most significant 4-bit of ASR17 */
823 rd %asr17, %g1
824 srl %g1, 28, %g1
826 825
827 cmp %g0,%g1 826 /* Update boot_cpu_id only on boot cpu */
828 beq sun4c_continue_boot !continue with master 827 stub %g1, [%g2 + %lo(boot_cpu_id)]
829 nop
830 828
831 ba leon_smp_cpu_startup
832 nop
833#else
834 ba sun4c_continue_boot 829 ba sun4c_continue_boot
835 nop 830 nop
836#endif 831#endif
837#endif
838 set cputypval, %o2 832 set cputypval, %o2
839 ldub [%o2 + 0x4], %l1 833 ldub [%o2 + 0x4], %l1
840 834
@@ -893,9 +887,6 @@ sun4d_init:
893 sta %g4, [%g0] ASI_M_VIKING_TMP1 887 sta %g4, [%g0] ASI_M_VIKING_TMP1
894 sethi %hi(boot_cpu_id), %g5 888 sethi %hi(boot_cpu_id), %g5
895 stb %g4, [%g5 + %lo(boot_cpu_id)] 889 stb %g4, [%g5 + %lo(boot_cpu_id)]
896 sll %g4, 2, %g4
897 sethi %hi(boot_cpu_id4), %g5
898 stb %g4, [%g5 + %lo(boot_cpu_id4)]
899#endif 890#endif
900 891
901 /* Fall through to sun4m_init */ 892 /* Fall through to sun4m_init */
@@ -1024,14 +1015,28 @@ sun4c_continue_boot:
1024 bl 1b 1015 bl 1b
1025 add %o0, 0x1, %o0 1016 add %o0, 0x1, %o0
1026 1017
1018 /* If boot_cpu_id has not been setup by machine specific
1019 * init-code above we default it to zero.
1020 */
1021 sethi %hi(boot_cpu_id), %g2
1022 ldub [%g2 + %lo(boot_cpu_id)], %g3
1023 cmp %g3, 0xff
1024 bne 1f
1025 nop
1026 mov %g0, %g3
1027 stub %g3, [%g2 + %lo(boot_cpu_id)]
1028
10291: /* boot_cpu_id set. calculate boot_cpu_id4 = boot_cpu_id*4 */
1030 sll %g3, 2, %g3
1031 sethi %hi(boot_cpu_id4), %g2
1032 stub %g3, [%g2 + %lo(boot_cpu_id4)]
1033
1027 /* Initialize the uwinmask value for init task just in case. 1034 /* Initialize the uwinmask value for init task just in case.
1028 * But first make current_set[boot_cpu_id] point to something useful. 1035 * But first make current_set[boot_cpu_id] point to something useful.
1029 */ 1036 */
1030 set init_thread_union, %g6 1037 set init_thread_union, %g6
1031 set current_set, %g2 1038 set current_set, %g2
1032#ifdef CONFIG_SMP 1039#ifdef CONFIG_SMP
1033 sethi %hi(boot_cpu_id4), %g3
1034 ldub [%g3 + %lo(boot_cpu_id4)], %g3
1035 st %g6, [%g2] 1040 st %g6, [%g2]
1036 add %g2, %g3, %g2 1041 add %g2, %g3, %g2
1037#endif 1042#endif
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index c6ce9a6a4790..1c9c80a1a86a 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -50,10 +50,15 @@
50#include <asm/io-unit.h> 50#include <asm/io-unit.h>
51#include <asm/leon.h> 51#include <asm/leon.h>
52 52
53/* This function must make sure that caches and memory are coherent after DMA
54 * On LEON systems without cache snooping it flushes the entire D-CACHE.
55 */
53#ifndef CONFIG_SPARC_LEON 56#ifndef CONFIG_SPARC_LEON
54#define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */ 57static inline void dma_make_coherent(unsigned long pa, unsigned long len)
58{
59}
55#else 60#else
56static inline void mmu_inval_dma_area(void *va, unsigned long len) 61static inline void dma_make_coherent(unsigned long pa, unsigned long len)
57{ 62{
58 if (!sparc_leon3_snooping_enabled()) 63 if (!sparc_leon3_snooping_enabled())
59 leon_flush_dcache_all(); 64 leon_flush_dcache_all();
@@ -284,7 +289,6 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len,
284 printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total); 289 printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total);
285 goto err_nova; 290 goto err_nova;
286 } 291 }
287 mmu_inval_dma_area((void *)va, len_total);
288 292
289 // XXX The mmu_map_dma_area does this for us below, see comments. 293 // XXX The mmu_map_dma_area does this for us below, see comments.
290 // sparc_mapiorange(0, virt_to_phys(va), res->start, len_total); 294 // sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
@@ -336,7 +340,6 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p,
336 release_resource(res); 340 release_resource(res);
337 kfree(res); 341 kfree(res);
338 342
339 /* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */
340 pgv = virt_to_page(p); 343 pgv = virt_to_page(p);
341 mmu_unmap_dma_area(dev, ba, n); 344 mmu_unmap_dma_area(dev, ba, n);
342 345
@@ -463,7 +466,6 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
463 printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total); 466 printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total);
464 goto err_nova; 467 goto err_nova;
465 } 468 }
466 mmu_inval_dma_area(va, len_total);
467 sparc_mapiorange(0, virt_to_phys(va), res->start, len_total); 469 sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
468 470
469 *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */ 471 *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
@@ -489,7 +491,6 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
489 dma_addr_t ba) 491 dma_addr_t ba)
490{ 492{
491 struct resource *res; 493 struct resource *res;
492 void *pgp;
493 494
494 if ((res = _sparc_find_resource(&_sparc_dvma, 495 if ((res = _sparc_find_resource(&_sparc_dvma,
495 (unsigned long)p)) == NULL) { 496 (unsigned long)p)) == NULL) {
@@ -509,14 +510,12 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
509 return; 510 return;
510 } 511 }
511 512
512 pgp = phys_to_virt(ba); /* bus_to_virt actually */ 513 dma_make_coherent(ba, n);
513 mmu_inval_dma_area(pgp, n);
514 sparc_unmapiorange((unsigned long)p, n); 514 sparc_unmapiorange((unsigned long)p, n);
515 515
516 release_resource(res); 516 release_resource(res);
517 kfree(res); 517 kfree(res);
518 518 free_pages((unsigned long)phys_to_virt(ba), get_order(n));
519 free_pages((unsigned long)pgp, get_order(n));
520} 519}
521 520
522/* 521/*
@@ -535,7 +534,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
535 enum dma_data_direction dir, struct dma_attrs *attrs) 534 enum dma_data_direction dir, struct dma_attrs *attrs)
536{ 535{
537 if (dir != PCI_DMA_TODEVICE) 536 if (dir != PCI_DMA_TODEVICE)
538 mmu_inval_dma_area(phys_to_virt(ba), PAGE_ALIGN(size)); 537 dma_make_coherent(ba, PAGE_ALIGN(size));
539} 538}
540 539
541/* Map a set of buffers described by scatterlist in streaming 540/* Map a set of buffers described by scatterlist in streaming
@@ -562,8 +561,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
562 561
563 /* IIep is write-through, not flushing. */ 562 /* IIep is write-through, not flushing. */
564 for_each_sg(sgl, sg, nents, n) { 563 for_each_sg(sgl, sg, nents, n) {
565 BUG_ON(page_address(sg_page(sg)) == NULL); 564 sg->dma_address = sg_phys(sg);
566 sg->dma_address = virt_to_phys(sg_virt(sg));
567 sg->dma_length = sg->length; 565 sg->dma_length = sg->length;
568 } 566 }
569 return nents; 567 return nents;
@@ -582,9 +580,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
582 580
583 if (dir != PCI_DMA_TODEVICE) { 581 if (dir != PCI_DMA_TODEVICE) {
584 for_each_sg(sgl, sg, nents, n) { 582 for_each_sg(sgl, sg, nents, n) {
585 BUG_ON(page_address(sg_page(sg)) == NULL); 583 dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
586 mmu_inval_dma_area(page_address(sg_page(sg)),
587 PAGE_ALIGN(sg->length));
588 } 584 }
589 } 585 }
590} 586}
@@ -603,8 +599,7 @@ static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
603 size_t size, enum dma_data_direction dir) 599 size_t size, enum dma_data_direction dir)
604{ 600{
605 if (dir != PCI_DMA_TODEVICE) { 601 if (dir != PCI_DMA_TODEVICE) {
606 mmu_inval_dma_area(phys_to_virt(ba), 602 dma_make_coherent(ba, PAGE_ALIGN(size));
607 PAGE_ALIGN(size));
608 } 603 }
609} 604}
610 605
@@ -612,8 +607,7 @@ static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
612 size_t size, enum dma_data_direction dir) 607 size_t size, enum dma_data_direction dir)
613{ 608{
614 if (dir != PCI_DMA_TODEVICE) { 609 if (dir != PCI_DMA_TODEVICE) {
615 mmu_inval_dma_area(phys_to_virt(ba), 610 dma_make_coherent(ba, PAGE_ALIGN(size));
616 PAGE_ALIGN(size));
617 } 611 }
618} 612}
619 613
@@ -631,9 +625,7 @@ static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
631 625
632 if (dir != PCI_DMA_TODEVICE) { 626 if (dir != PCI_DMA_TODEVICE) {
633 for_each_sg(sgl, sg, nents, n) { 627 for_each_sg(sgl, sg, nents, n) {
634 BUG_ON(page_address(sg_page(sg)) == NULL); 628 dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
635 mmu_inval_dma_area(page_address(sg_page(sg)),
636 PAGE_ALIGN(sg->length));
637 } 629 }
638 } 630 }
639} 631}
@@ -646,9 +638,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *
646 638
647 if (dir != PCI_DMA_TODEVICE) { 639 if (dir != PCI_DMA_TODEVICE) {
648 for_each_sg(sgl, sg, nents, n) { 640 for_each_sg(sgl, sg, nents, n) {
649 BUG_ON(page_address(sg_page(sg)) == NULL); 641 dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
650 mmu_inval_dma_area(page_address(sg_page(sg)),
651 PAGE_ALIGN(sg->length));
652 } 642 }
653 } 643 }
654} 644}
diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h
index 008453b798ec..100b9c204e78 100644
--- a/arch/sparc/kernel/irq.h
+++ b/arch/sparc/kernel/irq.h
@@ -2,6 +2,23 @@
2 2
3#include <asm/btfixup.h> 3#include <asm/btfixup.h>
4 4
5struct irq_bucket {
6 struct irq_bucket *next;
7 unsigned int real_irq;
8 unsigned int irq;
9 unsigned int pil;
10};
11
12#define SUN4D_MAX_BOARD 10
13#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5)
14
15/* Map between the irq identifier used in hw to the
16 * irq_bucket. The map is sufficient large to hold
17 * the sun4d hw identifiers.
18 */
19extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
20
21
5/* sun4m specific type definitions */ 22/* sun4m specific type definitions */
6 23
7/* This maps direct to CPU specific interrupt registers */ 24/* This maps direct to CPU specific interrupt registers */
@@ -35,6 +52,10 @@ struct sparc_irq_config {
35}; 52};
36extern struct sparc_irq_config sparc_irq_config; 53extern struct sparc_irq_config sparc_irq_config;
37 54
55unsigned int irq_alloc(unsigned int real_irq, unsigned int pil);
56void irq_link(unsigned int irq);
57void irq_unlink(unsigned int irq);
58void handler_irq(unsigned int pil, struct pt_regs *regs);
38 59
39/* Dave Redman (djhr@tadpole.co.uk) 60/* Dave Redman (djhr@tadpole.co.uk)
40 * changed these to function pointers.. it saves cycles and will allow 61 * changed these to function pointers.. it saves cycles and will allow
@@ -44,33 +65,9 @@ extern struct sparc_irq_config sparc_irq_config;
44 * Changed these to btfixup entities... It saves cycles :) 65 * Changed these to btfixup entities... It saves cycles :)
45 */ 66 */
46 67
47BTFIXUPDEF_CALL(void, disable_irq, unsigned int)
48BTFIXUPDEF_CALL(void, enable_irq, unsigned int)
49BTFIXUPDEF_CALL(void, disable_pil_irq, unsigned int)
50BTFIXUPDEF_CALL(void, enable_pil_irq, unsigned int)
51BTFIXUPDEF_CALL(void, clear_clock_irq, void) 68BTFIXUPDEF_CALL(void, clear_clock_irq, void)
52BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int) 69BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int)
53 70
54static inline void __disable_irq(unsigned int irq)
55{
56 BTFIXUP_CALL(disable_irq)(irq);
57}
58
59static inline void __enable_irq(unsigned int irq)
60{
61 BTFIXUP_CALL(enable_irq)(irq);
62}
63
64static inline void disable_pil_irq(unsigned int irq)
65{
66 BTFIXUP_CALL(disable_pil_irq)(irq);
67}
68
69static inline void enable_pil_irq(unsigned int irq)
70{
71 BTFIXUP_CALL(enable_pil_irq)(irq);
72}
73
74static inline void clear_clock_irq(void) 71static inline void clear_clock_irq(void)
75{ 72{
76 BTFIXUP_CALL(clear_clock_irq)(); 73 BTFIXUP_CALL(clear_clock_irq)();
@@ -89,4 +86,10 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int)
89#define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level) 86#define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level)
90#define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level) 87#define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level)
91#define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu) 88#define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu)
89
90/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
91#define SUN4D_IPI_IRQ 14
92
93extern void sun4d_ipi_interrupt(void);
94
92#endif 95#endif
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
index 7c93df4099cb..9b89d842913c 100644
--- a/arch/sparc/kernel/irq_32.c
+++ b/arch/sparc/kernel/irq_32.c
@@ -15,6 +15,7 @@
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16 16
17#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
18#include <asm/cpudata.h>
18#include <asm/pcic.h> 19#include <asm/pcic.h>
19#include <asm/leon.h> 20#include <asm/leon.h>
20 21
@@ -101,284 +102,173 @@ EXPORT_SYMBOL(arch_local_irq_restore);
101 * directed CPU interrupts using the existing enable/disable irq code 102 * directed CPU interrupts using the existing enable/disable irq code
102 * with tweaks. 103 * with tweaks.
103 * 104 *
105 * Sun4d complicates things even further. IRQ numbers are arbitrary
106 * 32-bit values in that case. Since this is similar to sparc64,
107 * we adopt a virtual IRQ numbering scheme as is done there.
108 * Virutal interrupt numbers are allocated by build_irq(). So NR_IRQS
109 * just becomes a limit of how many interrupt sources we can handle in
110 * a single system. Even fully loaded SS2000 machines top off at
111 * about 32 interrupt sources or so, therefore a NR_IRQS value of 64
112 * is more than enough.
113 *
114 * We keep a map of per-PIL enable interrupts. These get wired
115 * up via the irq_chip->startup() method which gets invoked by
116 * the generic IRQ layer during request_irq().
104 */ 117 */
105 118
106 119
120/* Table of allocated irqs. Unused entries has irq == 0 */
121static struct irq_bucket irq_table[NR_IRQS];
122/* Protect access to irq_table */
123static DEFINE_SPINLOCK(irq_table_lock);
107 124
108/* 125/* Map between the irq identifier used in hw to the irq_bucket. */
109 * Dave Redman (djhr@tadpole.co.uk) 126struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
110 * 127/* Protect access to irq_map */
111 * There used to be extern calls and hard coded values here.. very sucky! 128static DEFINE_SPINLOCK(irq_map_lock);
112 * instead, because some of the devices attach very early, I do something
113 * equally sucky but at least we'll never try to free statically allocated
114 * space or call kmalloc before kmalloc_init :(.
115 *
116 * In fact it's the timer10 that attaches first.. then timer14
117 * then kmalloc_init is called.. then the tty interrupts attach.
118 * hmmm....
119 *
120 */
121#define MAX_STATIC_ALLOC 4
122struct irqaction static_irqaction[MAX_STATIC_ALLOC];
123int static_irq_count;
124
125static struct {
126 struct irqaction *action;
127 int flags;
128} sparc_irq[NR_IRQS];
129#define SPARC_IRQ_INPROGRESS 1
130
131/* Used to protect the IRQ action lists */
132DEFINE_SPINLOCK(irq_action_lock);
133 129
134int show_interrupts(struct seq_file *p, void *v) 130/* Allocate a new irq from the irq_table */
131unsigned int irq_alloc(unsigned int real_irq, unsigned int pil)
135{ 132{
136 int i = *(loff_t *)v;
137 struct irqaction *action;
138 unsigned long flags; 133 unsigned long flags;
139#ifdef CONFIG_SMP 134 unsigned int i;
140 int j; 135
141#endif 136 spin_lock_irqsave(&irq_table_lock, flags);
137 for (i = 1; i < NR_IRQS; i++) {
138 if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil)
139 goto found;
140 }
142 141
143 if (sparc_cpu_model == sun4d) 142 for (i = 1; i < NR_IRQS; i++) {
144 return show_sun4d_interrupts(p, v); 143 if (!irq_table[i].irq)
144 break;
145 }
145 146
146 spin_lock_irqsave(&irq_action_lock, flags);
147 if (i < NR_IRQS) { 147 if (i < NR_IRQS) {
148 action = sparc_irq[i].action; 148 irq_table[i].real_irq = real_irq;
149 if (!action) 149 irq_table[i].irq = i;
150 goto out_unlock; 150 irq_table[i].pil = pil;
151 seq_printf(p, "%3d: ", i); 151 } else {
152#ifndef CONFIG_SMP 152 printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
153 seq_printf(p, "%10u ", kstat_irqs(i)); 153 i = 0;
154#else
155 for_each_online_cpu(j) {
156 seq_printf(p, "%10u ",
157 kstat_cpu(j).irqs[i]);
158 }
159#endif
160 seq_printf(p, " %c %s",
161 (action->flags & IRQF_DISABLED) ? '+' : ' ',
162 action->name);
163 for (action = action->next; action; action = action->next) {
164 seq_printf(p, ",%s %s",
165 (action->flags & IRQF_DISABLED) ? " +" : "",
166 action->name);
167 }
168 seq_putc(p, '\n');
169 } 154 }
170out_unlock: 155found:
171 spin_unlock_irqrestore(&irq_action_lock, flags); 156 spin_unlock_irqrestore(&irq_table_lock, flags);
172 return 0; 157
158 return i;
173} 159}
174 160
175void free_irq(unsigned int irq, void *dev_id) 161/* Based on a single pil handler_irq may need to call several
162 * interrupt handlers. Use irq_map as entry to irq_table,
163 * and let each entry in irq_table point to the next entry.
164 */
165void irq_link(unsigned int irq)
176{ 166{
177 struct irqaction *action; 167 struct irq_bucket *p;
178 struct irqaction **actionp;
179 unsigned long flags; 168 unsigned long flags;
180 unsigned int cpu_irq; 169 unsigned int pil;
181
182 if (sparc_cpu_model == sun4d) {
183 sun4d_free_irq(irq, dev_id);
184 return;
185 }
186 cpu_irq = irq & (NR_IRQS - 1);
187 if (cpu_irq > 14) { /* 14 irq levels on the sparc */
188 printk(KERN_ERR "Trying to free bogus IRQ %d\n", irq);
189 return;
190 }
191 170
192 spin_lock_irqsave(&irq_action_lock, flags); 171 BUG_ON(irq >= NR_IRQS);
193 172
194 actionp = &sparc_irq[cpu_irq].action; 173 spin_lock_irqsave(&irq_map_lock, flags);
195 action = *actionp;
196 174
197 if (!action->handler) { 175 p = &irq_table[irq];
198 printk(KERN_ERR "Trying to free free IRQ%d\n", irq); 176 pil = p->pil;
199 goto out_unlock; 177 BUG_ON(pil > SUN4D_MAX_IRQ);
200 } 178 p->next = irq_map[pil];
201 if (dev_id) { 179 irq_map[pil] = p;
202 for (; action; action = action->next) {
203 if (action->dev_id == dev_id)
204 break;
205 actionp = &action->next;
206 }
207 if (!action) {
208 printk(KERN_ERR "Trying to free free shared IRQ%d\n",
209 irq);
210 goto out_unlock;
211 }
212 } else if (action->flags & IRQF_SHARED) {
213 printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
214 irq);
215 goto out_unlock;
216 }
217 if (action->flags & SA_STATIC_ALLOC) {
218 /*
219 * This interrupt is marked as specially allocated
220 * so it is a bad idea to free it.
221 */
222 printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
223 irq, action->name);
224 goto out_unlock;
225 }
226
227 *actionp = action->next;
228 180
229 spin_unlock_irqrestore(&irq_action_lock, flags); 181 spin_unlock_irqrestore(&irq_map_lock, flags);
182}
230 183
231 synchronize_irq(irq); 184void irq_unlink(unsigned int irq)
185{
186 struct irq_bucket *p, **pnext;
187 unsigned long flags;
232 188
233 spin_lock_irqsave(&irq_action_lock, flags); 189 BUG_ON(irq >= NR_IRQS);
234 190
235 kfree(action); 191 spin_lock_irqsave(&irq_map_lock, flags);
236 192
237 if (!sparc_irq[cpu_irq].action) 193 p = &irq_table[irq];
238 __disable_irq(irq); 194 BUG_ON(p->pil > SUN4D_MAX_IRQ);
195 pnext = &irq_map[p->pil];
196 while (*pnext != p)
197 pnext = &(*pnext)->next;
198 *pnext = p->next;
239 199
240out_unlock: 200 spin_unlock_irqrestore(&irq_map_lock, flags);
241 spin_unlock_irqrestore(&irq_action_lock, flags);
242} 201}
243EXPORT_SYMBOL(free_irq);
244
245/*
246 * This is called when we want to synchronize with
247 * interrupts. We may for example tell a device to
248 * stop sending interrupts: but to make sure there
249 * are no interrupts that are executing on another
250 * CPU we need to call this function.
251 */
252#ifdef CONFIG_SMP
253void synchronize_irq(unsigned int irq)
254{
255 unsigned int cpu_irq;
256 202
257 cpu_irq = irq & (NR_IRQS - 1);
258 while (sparc_irq[cpu_irq].flags & SPARC_IRQ_INPROGRESS)
259 cpu_relax();
260}
261EXPORT_SYMBOL(synchronize_irq);
262#endif /* SMP */
263 203
264void unexpected_irq(int irq, void *dev_id, struct pt_regs *regs) 204/* /proc/interrupts printing */
205int arch_show_interrupts(struct seq_file *p, int prec)
265{ 206{
266 int i; 207 int j;
267 struct irqaction *action;
268 unsigned int cpu_irq;
269 208
270 cpu_irq = irq & (NR_IRQS - 1); 209#ifdef CONFIG_SMP
271 action = sparc_irq[cpu_irq].action; 210 seq_printf(p, "RES: ");
272 211 for_each_online_cpu(j)
273 printk(KERN_ERR "IO device interrupt, irq = %d\n", irq); 212 seq_printf(p, "%10u ", cpu_data(j).irq_resched_count);
274 printk(KERN_ERR "PC = %08lx NPC = %08lx FP=%08lx\n", regs->pc, 213 seq_printf(p, " IPI rescheduling interrupts\n");
275 regs->npc, regs->u_regs[14]); 214 seq_printf(p, "CAL: ");
276 if (action) { 215 for_each_online_cpu(j)
277 printk(KERN_ERR "Expecting: "); 216 seq_printf(p, "%10u ", cpu_data(j).irq_call_count);
278 for (i = 0; i < 16; i++) 217 seq_printf(p, " IPI function call interrupts\n");
279 if (action->handler) 218#endif
280 printk(KERN_CONT "[%s:%d:0x%x] ", action->name, 219 seq_printf(p, "NMI: ");
281 i, (unsigned int)action->handler); 220 for_each_online_cpu(j)
282 } 221 seq_printf(p, "%10u ", cpu_data(j).counter);
283 printk(KERN_ERR "AIEEE\n"); 222 seq_printf(p, " Non-maskable interrupts\n");
284 panic("bogus interrupt received"); 223 return 0;
285} 224}
286 225
287void handler_irq(int pil, struct pt_regs *regs) 226void handler_irq(unsigned int pil, struct pt_regs *regs)
288{ 227{
289 struct pt_regs *old_regs; 228 struct pt_regs *old_regs;
290 struct irqaction *action; 229 struct irq_bucket *p;
291 int cpu = smp_processor_id();
292 230
231 BUG_ON(pil > 15);
293 old_regs = set_irq_regs(regs); 232 old_regs = set_irq_regs(regs);
294 irq_enter(); 233 irq_enter();
295 disable_pil_irq(pil); 234
296#ifdef CONFIG_SMP 235 p = irq_map[pil];
297 /* Only rotate on lower priority IRQs (scsi, ethernet, etc.). */ 236 while (p) {
298 if ((sparc_cpu_model==sun4m) && (pil < 10)) 237 struct irq_bucket *next = p->next;
299 smp4m_irq_rotate(cpu); 238
300#endif 239 generic_handle_irq(p->irq);
301 action = sparc_irq[pil].action; 240 p = next;
302 sparc_irq[pil].flags |= SPARC_IRQ_INPROGRESS; 241 }
303 kstat_cpu(cpu).irqs[pil]++;
304 do {
305 if (!action || !action->handler)
306 unexpected_irq(pil, NULL, regs);
307 action->handler(pil, action->dev_id);
308 action = action->next;
309 } while (action);
310 sparc_irq[pil].flags &= ~SPARC_IRQ_INPROGRESS;
311 enable_pil_irq(pil);
312 irq_exit(); 242 irq_exit();
313 set_irq_regs(old_regs); 243 set_irq_regs(old_regs);
314} 244}
315 245
316#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE) 246#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE)
247static unsigned int floppy_irq;
317 248
318/* 249int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler)
319 * Fast IRQs on the Sparc can only have one routine attached to them,
320 * thus no sharing possible.
321 */
322static int request_fast_irq(unsigned int irq,
323 void (*handler)(void),
324 unsigned long irqflags, const char *devname)
325{ 250{
326 struct irqaction *action;
327 unsigned long flags;
328 unsigned int cpu_irq; 251 unsigned int cpu_irq;
329 int ret; 252 int err;
253
330#if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON 254#if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON
331 struct tt_entry *trap_table; 255 struct tt_entry *trap_table;
332#endif 256#endif
333 cpu_irq = irq & (NR_IRQS - 1);
334 if (cpu_irq > 14) {
335 ret = -EINVAL;
336 goto out;
337 }
338 if (!handler) {
339 ret = -EINVAL;
340 goto out;
341 }
342 257
343 spin_lock_irqsave(&irq_action_lock, flags); 258 err = request_irq(irq, irq_handler, 0, "floppy", NULL);
259 if (err)
260 return -1;
344 261
345 action = sparc_irq[cpu_irq].action; 262 /* Save for later use in floppy interrupt handler */
346 if (action) { 263 floppy_irq = irq;
347 if (action->flags & IRQF_SHARED)
348 panic("Trying to register fast irq when already shared.\n");
349 if (irqflags & IRQF_SHARED)
350 panic("Trying to register fast irq as shared.\n");
351 264
352 /* Anyway, someone already owns it so cannot be made fast. */ 265 cpu_irq = (irq & (NR_IRQS - 1));
353 printk(KERN_ERR "request_fast_irq: Trying to register yet already owned.\n");
354 ret = -EBUSY;
355 goto out_unlock;
356 }
357
358 /*
359 * If this is flagged as statically allocated then we use our
360 * private struct which is never freed.
361 */
362 if (irqflags & SA_STATIC_ALLOC) {
363 if (static_irq_count < MAX_STATIC_ALLOC)
364 action = &static_irqaction[static_irq_count++];
365 else
366 printk(KERN_ERR "Fast IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
367 irq, devname);
368 }
369
370 if (action == NULL)
371 action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
372 if (!action) {
373 ret = -ENOMEM;
374 goto out_unlock;
375 }
376 266
377 /* Dork with trap table if we get this far. */ 267 /* Dork with trap table if we get this far. */
378#define INSTANTIATE(table) \ 268#define INSTANTIATE(table) \
379 table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \ 269 table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \
380 table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \ 270 table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \
381 SPARC_BRANCH((unsigned long) handler, \ 271 SPARC_BRANCH((unsigned long) floppy_hardint, \
382 (unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\ 272 (unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\
383 table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \ 273 table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \
384 table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP; 274 table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP;
@@ -399,22 +289,9 @@ static int request_fast_irq(unsigned int irq,
399 * writing we have no CPU-neutral interface to fine-grained flushes. 289 * writing we have no CPU-neutral interface to fine-grained flushes.
400 */ 290 */
401 flush_cache_all(); 291 flush_cache_all();
402 292 return 0;
403 action->flags = irqflags;
404 action->name = devname;
405 action->dev_id = NULL;
406 action->next = NULL;
407
408 sparc_irq[cpu_irq].action = action;
409
410 __enable_irq(irq);
411
412 ret = 0;
413out_unlock:
414 spin_unlock_irqrestore(&irq_action_lock, flags);
415out:
416 return ret;
417} 293}
294EXPORT_SYMBOL(sparc_floppy_request_irq);
418 295
419/* 296/*
420 * These variables are used to access state from the assembler 297 * These variables are used to access state from the assembler
@@ -440,154 +317,23 @@ EXPORT_SYMBOL(pdma_base);
440unsigned long pdma_areasize; 317unsigned long pdma_areasize;
441EXPORT_SYMBOL(pdma_areasize); 318EXPORT_SYMBOL(pdma_areasize);
442 319
443static irq_handler_t floppy_irq_handler; 320/* Use the generic irq support to call floppy_interrupt
444 321 * which was setup using request_irq() in sparc_floppy_request_irq().
322 * We only have one floppy interrupt so we do not need to check
323 * for additional handlers being wired up by irq_link()
324 */
445void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs) 325void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs)
446{ 326{
447 struct pt_regs *old_regs; 327 struct pt_regs *old_regs;
448 int cpu = smp_processor_id();
449 328
450 old_regs = set_irq_regs(regs); 329 old_regs = set_irq_regs(regs);
451 disable_pil_irq(irq);
452 irq_enter(); 330 irq_enter();
453 kstat_cpu(cpu).irqs[irq]++; 331 generic_handle_irq(floppy_irq);
454 floppy_irq_handler(irq, dev_id);
455 irq_exit(); 332 irq_exit();
456 enable_pil_irq(irq);
457 set_irq_regs(old_regs); 333 set_irq_regs(old_regs);
458 /*
459 * XXX Eek, it's totally changed with preempt_count() and such
460 * if (softirq_pending(cpu))
461 * do_softirq();
462 */
463}
464
465int sparc_floppy_request_irq(int irq, unsigned long flags,
466 irq_handler_t irq_handler)
467{
468 floppy_irq_handler = irq_handler;
469 return request_fast_irq(irq, floppy_hardint, flags, "floppy");
470} 334}
471EXPORT_SYMBOL(sparc_floppy_request_irq);
472
473#endif 335#endif
474 336
475int request_irq(unsigned int irq,
476 irq_handler_t handler,
477 unsigned long irqflags, const char *devname, void *dev_id)
478{
479 struct irqaction *action, **actionp;
480 unsigned long flags;
481 unsigned int cpu_irq;
482 int ret;
483
484 if (sparc_cpu_model == sun4d)
485 return sun4d_request_irq(irq, handler, irqflags, devname, dev_id);
486
487 cpu_irq = irq & (NR_IRQS - 1);
488 if (cpu_irq > 14) {
489 ret = -EINVAL;
490 goto out;
491 }
492 if (!handler) {
493 ret = -EINVAL;
494 goto out;
495 }
496
497 spin_lock_irqsave(&irq_action_lock, flags);
498
499 actionp = &sparc_irq[cpu_irq].action;
500 action = *actionp;
501 if (action) {
502 if (!(action->flags & IRQF_SHARED) || !(irqflags & IRQF_SHARED)) {
503 ret = -EBUSY;
504 goto out_unlock;
505 }
506 if ((action->flags & IRQF_DISABLED) != (irqflags & IRQF_DISABLED)) {
507 printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
508 irq);
509 ret = -EBUSY;
510 goto out_unlock;
511 }
512 for ( ; action; action = *actionp)
513 actionp = &action->next;
514 }
515
516 /* If this is flagged as statically allocated then we use our
517 * private struct which is never freed.
518 */
519 if (irqflags & SA_STATIC_ALLOC) {
520 if (static_irq_count < MAX_STATIC_ALLOC)
521 action = &static_irqaction[static_irq_count++];
522 else
523 printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
524 irq, devname);
525 }
526 if (action == NULL)
527 action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
528 if (!action) {
529 ret = -ENOMEM;
530 goto out_unlock;
531 }
532
533 action->handler = handler;
534 action->flags = irqflags;
535 action->name = devname;
536 action->next = NULL;
537 action->dev_id = dev_id;
538
539 *actionp = action;
540
541 __enable_irq(irq);
542
543 ret = 0;
544out_unlock:
545 spin_unlock_irqrestore(&irq_action_lock, flags);
546out:
547 return ret;
548}
549EXPORT_SYMBOL(request_irq);
550
551void disable_irq_nosync(unsigned int irq)
552{
553 __disable_irq(irq);
554}
555EXPORT_SYMBOL(disable_irq_nosync);
556
557void disable_irq(unsigned int irq)
558{
559 __disable_irq(irq);
560}
561EXPORT_SYMBOL(disable_irq);
562
563void enable_irq(unsigned int irq)
564{
565 __enable_irq(irq);
566}
567EXPORT_SYMBOL(enable_irq);
568
569/*
570 * We really don't need these at all on the Sparc. We only have
571 * stubs here because they are exported to modules.
572 */
573unsigned long probe_irq_on(void)
574{
575 return 0;
576}
577EXPORT_SYMBOL(probe_irq_on);
578
579int probe_irq_off(unsigned long mask)
580{
581 return 0;
582}
583EXPORT_SYMBOL(probe_irq_off);
584
585static unsigned int build_device_irq(struct platform_device *op,
586 unsigned int real_irq)
587{
588 return real_irq;
589}
590
591/* djhr 337/* djhr
592 * This could probably be made indirect too and assigned in the CPU 338 * This could probably be made indirect too and assigned in the CPU
593 * bits of the code. That would be much nicer I think and would also 339 * bits of the code. That would be much nicer I think and would also
@@ -598,8 +344,6 @@ static unsigned int build_device_irq(struct platform_device *op,
598 344
599void __init init_IRQ(void) 345void __init init_IRQ(void)
600{ 346{
601 sparc_irq_config.build_device_irq = build_device_irq;
602
603 switch (sparc_cpu_model) { 347 switch (sparc_cpu_model) {
604 case sun4c: 348 case sun4c:
605 case sun4: 349 case sun4:
@@ -607,14 +351,11 @@ void __init init_IRQ(void)
607 break; 351 break;
608 352
609 case sun4m: 353 case sun4m:
610#ifdef CONFIG_PCI
611 pcic_probe(); 354 pcic_probe();
612 if (pcic_present()) { 355 if (pcic_present())
613 sun4m_pci_init_IRQ(); 356 sun4m_pci_init_IRQ();
614 break; 357 else
615 } 358 sun4m_init_IRQ();
616#endif
617 sun4m_init_IRQ();
618 break; 359 break;
619 360
620 case sun4d: 361 case sun4d:
@@ -632,9 +373,3 @@ void __init init_IRQ(void)
632 btfixup(); 373 btfixup();
633} 374}
634 375
635#ifdef CONFIG_PROC_FS
636void init_irq_proc(void)
637{
638 /* For now, nothing... */
639}
640#endif /* CONFIG_PROC_FS */
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index b1d275ce3435..4e78862d12fd 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -224,13 +224,13 @@ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
224 int cpuid; 224 int cpuid;
225 225
226 cpumask_copy(&mask, affinity); 226 cpumask_copy(&mask, affinity);
227 if (cpus_equal(mask, cpu_online_map)) { 227 if (cpumask_equal(&mask, cpu_online_mask)) {
228 cpuid = map_to_cpu(irq); 228 cpuid = map_to_cpu(irq);
229 } else { 229 } else {
230 cpumask_t tmp; 230 cpumask_t tmp;
231 231
232 cpus_and(tmp, cpu_online_map, mask); 232 cpumask_and(&tmp, cpu_online_mask, &mask);
233 cpuid = cpus_empty(tmp) ? map_to_cpu(irq) : first_cpu(tmp); 233 cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp);
234 } 234 }
235 235
236 return cpuid; 236 return cpuid;
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 24ad449886be..6f6544cfa0ef 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -6,11 +6,9 @@
6#include <asm/traps.h> 6#include <asm/traps.h>
7 7
8/* cpu.c */ 8/* cpu.c */
9extern const char *sparc_cpu_type;
10extern const char *sparc_pmu_type; 9extern const char *sparc_pmu_type;
11extern const char *sparc_fpu_type;
12
13extern unsigned int fsr_storage; 10extern unsigned int fsr_storage;
11extern int ncpus_probed;
14 12
15#ifdef CONFIG_SPARC32 13#ifdef CONFIG_SPARC32
16/* cpu.c */ 14/* cpu.c */
@@ -37,6 +35,7 @@ extern void sun4c_init_IRQ(void);
37extern unsigned int lvl14_resolution; 35extern unsigned int lvl14_resolution;
38 36
39extern void sun4m_init_IRQ(void); 37extern void sun4m_init_IRQ(void);
38extern void sun4m_unmask_profile_irq(void);
40extern void sun4m_clear_profile_irq(int cpu); 39extern void sun4m_clear_profile_irq(int cpu);
41 40
42/* sun4d_irq.c */ 41/* sun4d_irq.c */
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 2969f777fa11..2f538ac2e139 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -19,53 +19,70 @@
19#include <asm/leon_amba.h> 19#include <asm/leon_amba.h>
20#include <asm/traps.h> 20#include <asm/traps.h>
21#include <asm/cacheflush.h> 21#include <asm/cacheflush.h>
22#include <asm/smp.h>
23#include <asm/setup.h>
22 24
23#include "prom.h" 25#include "prom.h"
24#include "irq.h" 26#include "irq.h"
25 27
26struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */ 28struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */
27struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */ 29struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */
28struct amba_apb_device leon_percpu_timer_dev[16];
29 30
30int leondebug_irq_disable; 31int leondebug_irq_disable;
31int leon_debug_irqout; 32int leon_debug_irqout;
32static int dummy_master_l10_counter; 33static int dummy_master_l10_counter;
33unsigned long amba_system_id; 34unsigned long amba_system_id;
35static DEFINE_SPINLOCK(leon_irq_lock);
34 36
35unsigned long leon3_gptimer_irq; /* interrupt controller irq number */ 37unsigned long leon3_gptimer_irq; /* interrupt controller irq number */
36unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */ 38unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */
39int leon3_ticker_irq; /* Timer ticker IRQ */
37unsigned int sparc_leon_eirq; 40unsigned int sparc_leon_eirq;
38#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0])) 41#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu])
42#define LEON_IACK (&leon3_irqctrl_regs->iclear)
43#define LEON_DO_ACK_HW 1
39 44
40/* Return the IRQ of the pending IRQ on the extended IRQ controller */ 45/* Return the last ACKed IRQ by the Extended IRQ controller. It has already
41int sparc_leon_eirq_get(int eirq, int cpu) 46 * been (automatically) ACKed when the CPU takes the trap.
47 */
48static inline unsigned int leon_eirq_get(int cpu)
42{ 49{
43 return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f; 50 return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f;
44} 51}
45 52
46irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id) 53/* Handle one or multiple IRQs from the extended interrupt controller */
54static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc)
47{ 55{
48 printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n"); 56 unsigned int eirq;
49 return IRQ_HANDLED; 57 int cpu = sparc_leon3_cpuid();
58
59 eirq = leon_eirq_get(cpu);
60 if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */
61 generic_handle_irq(irq_map[eirq]->irq);
50} 62}
51 63
52/* The extended IRQ controller has been found, this function registers it */ 64/* The extended IRQ controller has been found, this function registers it */
53void sparc_leon_eirq_register(int eirq) 65void leon_eirq_setup(unsigned int eirq)
54{ 66{
55 int irq; 67 unsigned long mask, oldmask;
68 unsigned int veirq;
56 69
57 /* Register a "BAD" handler for this interrupt, it should never happen */ 70 if (eirq < 1 || eirq > 0xf) {
58 irq = request_irq(eirq, sparc_leon_eirq_isr, 71 printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq);
59 (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL); 72 return;
60
61 if (irq) {
62 printk(KERN_ERR
63 "sparc_leon_eirq_register: unable to attach IRQ%d\n",
64 eirq);
65 } else {
66 sparc_leon_eirq = eirq;
67 } 73 }
68 74
75 veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0);
76
77 /*
78 * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ
79 * controller have a mask-bit of their own, so this is safe.
80 */
81 irq_link(veirq);
82 mask = 1 << eirq;
83 oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id));
84 LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask));
85 sparc_leon_eirq = eirq;
69} 86}
70 87
71static inline unsigned long get_irqmask(unsigned int irq) 88static inline unsigned long get_irqmask(unsigned int irq)
@@ -83,35 +100,151 @@ static inline unsigned long get_irqmask(unsigned int irq)
83 return mask; 100 return mask;
84} 101}
85 102
86static void leon_enable_irq(unsigned int irq_nr) 103#ifdef CONFIG_SMP
104static int irq_choose_cpu(const struct cpumask *affinity)
87{ 105{
88 unsigned long mask, flags; 106 cpumask_t mask;
89 mask = get_irqmask(irq_nr); 107
90 local_irq_save(flags); 108 cpus_and(mask, cpu_online_map, *affinity);
91 LEON3_BYPASS_STORE_PA(LEON_IMASK, 109 if (cpus_equal(mask, cpu_online_map) || cpus_empty(mask))
92 (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask))); 110 return boot_cpu_id;
93 local_irq_restore(flags); 111 else
112 return first_cpu(mask);
94} 113}
114#else
115#define irq_choose_cpu(affinity) boot_cpu_id
116#endif
95 117
96static void leon_disable_irq(unsigned int irq_nr) 118static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest,
119 bool force)
97{ 120{
98 unsigned long mask, flags; 121 unsigned long mask, oldmask, flags;
99 mask = get_irqmask(irq_nr); 122 int oldcpu, newcpu;
100 local_irq_save(flags); 123
101 LEON3_BYPASS_STORE_PA(LEON_IMASK, 124 mask = (unsigned long)data->chip_data;
102 (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask))); 125 oldcpu = irq_choose_cpu(data->affinity);
103 local_irq_restore(flags); 126 newcpu = irq_choose_cpu(dest);
127
128 if (oldcpu == newcpu)
129 goto out;
130
131 /* unmask on old CPU first before enabling on the selected CPU */
132 spin_lock_irqsave(&leon_irq_lock, flags);
133 oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu));
134 LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask));
135 oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu));
136 LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask));
137 spin_unlock_irqrestore(&leon_irq_lock, flags);
138out:
139 return IRQ_SET_MASK_OK;
140}
141
142static void leon_unmask_irq(struct irq_data *data)
143{
144 unsigned long mask, oldmask, flags;
145 int cpu;
146
147 mask = (unsigned long)data->chip_data;
148 cpu = irq_choose_cpu(data->affinity);
149 spin_lock_irqsave(&leon_irq_lock, flags);
150 oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
151 LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask));
152 spin_unlock_irqrestore(&leon_irq_lock, flags);
153}
154
155static void leon_mask_irq(struct irq_data *data)
156{
157 unsigned long mask, oldmask, flags;
158 int cpu;
159
160 mask = (unsigned long)data->chip_data;
161 cpu = irq_choose_cpu(data->affinity);
162 spin_lock_irqsave(&leon_irq_lock, flags);
163 oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
164 LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask));
165 spin_unlock_irqrestore(&leon_irq_lock, flags);
166}
167
168static unsigned int leon_startup_irq(struct irq_data *data)
169{
170 irq_link(data->irq);
171 leon_unmask_irq(data);
172 return 0;
173}
104 174
175static void leon_shutdown_irq(struct irq_data *data)
176{
177 leon_mask_irq(data);
178 irq_unlink(data->irq);
179}
180
181/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */
182static void leon_eoi_irq(struct irq_data *data)
183{
184 unsigned long mask = (unsigned long)data->chip_data;
185
186 if (mask & LEON_DO_ACK_HW)
187 LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW);
188}
189
190static struct irq_chip leon_irq = {
191 .name = "leon",
192 .irq_startup = leon_startup_irq,
193 .irq_shutdown = leon_shutdown_irq,
194 .irq_mask = leon_mask_irq,
195 .irq_unmask = leon_unmask_irq,
196 .irq_eoi = leon_eoi_irq,
197 .irq_set_affinity = leon_set_affinity,
198};
199
200/*
201 * Build a LEON IRQ for the edge triggered LEON IRQ controller:
202 * Edge (normal) IRQ - handle_simple_irq, ack=DONT-CARE, never ack
203 * Level IRQ (PCI|Level-GPIO) - handle_fasteoi_irq, ack=1, ack after ISR
204 * Per-CPU Edge - handle_percpu_irq, ack=0
205 */
206unsigned int leon_build_device_irq(unsigned int real_irq,
207 irq_flow_handler_t flow_handler,
208 const char *name, int do_ack)
209{
210 unsigned int irq;
211 unsigned long mask;
212
213 irq = 0;
214 mask = get_irqmask(real_irq);
215 if (mask == 0)
216 goto out;
217
218 irq = irq_alloc(real_irq, real_irq);
219 if (irq == 0)
220 goto out;
221
222 if (do_ack)
223 mask |= LEON_DO_ACK_HW;
224
225 irq_set_chip_and_handler_name(irq, &leon_irq,
226 flow_handler, name);
227 irq_set_chip_data(irq, (void *)mask);
228
229out:
230 return irq;
231}
232
233static unsigned int _leon_build_device_irq(struct platform_device *op,
234 unsigned int real_irq)
235{
236 return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0);
105} 237}
106 238
107void __init leon_init_timers(irq_handler_t counter_fn) 239void __init leon_init_timers(irq_handler_t counter_fn)
108{ 240{
109 int irq; 241 int irq, eirq;
110 struct device_node *rootnp, *np, *nnp; 242 struct device_node *rootnp, *np, *nnp;
111 struct property *pp; 243 struct property *pp;
112 int len; 244 int len;
113 int cpu, icsel; 245 int icsel;
114 int ampopts; 246 int ampopts;
247 int err;
115 248
116 leondebug_irq_disable = 0; 249 leondebug_irq_disable = 0;
117 leon_debug_irqout = 0; 250 leon_debug_irqout = 0;
@@ -173,98 +306,85 @@ void __init leon_init_timers(irq_handler_t counter_fn)
173 leon3_gptimer_irq = *(unsigned int *)pp->value; 306 leon3_gptimer_irq = *(unsigned int *)pp->value;
174 } while (0); 307 } while (0);
175 308
176 if (leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq) { 309 if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq))
177 LEON3_BYPASS_STORE_PA( 310 goto bad;
178 &leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0); 311
179 LEON3_BYPASS_STORE_PA( 312 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
180 &leon3_gptimer_regs->e[leon3_gptimer_idx].rld, 313 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
181 (((1000000 / HZ) - 1))); 314 (((1000000 / HZ) - 1)));
182 LEON3_BYPASS_STORE_PA( 315 LEON3_BYPASS_STORE_PA(
183 &leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0); 316 &leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0);
184 317
185#ifdef CONFIG_SMP 318#ifdef CONFIG_SMP
186 leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs; 319 leon3_ticker_irq = leon3_gptimer_irq + 1 + leon3_gptimer_idx;
187 leon_percpu_timer_dev[0].irq = leon3_gptimer_irq + 1 +
188 leon3_gptimer_idx;
189
190 if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
191 (1<<LEON3_GPTIMER_SEPIRQ))) {
192 prom_printf("irq timer not configured with separate irqs\n");
193 BUG();
194 }
195 320
196 LEON3_BYPASS_STORE_PA( 321 if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
197 &leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, 0); 322 (1<<LEON3_GPTIMER_SEPIRQ))) {
198 LEON3_BYPASS_STORE_PA( 323 printk(KERN_ERR "timer not configured with separate irqs\n");
199 &leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld, 324 BUG();
200 (((1000000/HZ) - 1)));
201 LEON3_BYPASS_STORE_PA(
202 &leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 0);
203# endif
204
205 /*
206 * The IRQ controller may (if implemented) consist of multiple
207 * IRQ controllers, each mapped on a 4Kb boundary.
208 * Each CPU may be routed to different IRQCTRLs, however
209 * we assume that all CPUs (in SMP system) is routed to the
210 * same IRQ Controller, and for non-SMP only one IRQCTRL is
211 * accessed anyway.
212 * In AMP systems, Linux must run on CPU0 for the time being.
213 */
214 cpu = sparc_leon3_cpuid();
215 icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[cpu/8]);
216 icsel = (icsel >> ((7 - (cpu&0x7)) * 4)) & 0xf;
217 leon3_irqctrl_regs += icsel;
218 } else {
219 goto bad;
220 } 325 }
221 326
222 irq = request_irq(leon3_gptimer_irq+leon3_gptimer_idx, 327 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val,
223 counter_fn, 328 0);
224 (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); 329 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
330 (((1000000/HZ) - 1)));
331 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
332 0);
333#endif
225 334
226 if (irq) { 335 /*
227 printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n", 336 * The IRQ controller may (if implemented) consist of multiple
228 LEON_INTERRUPT_TIMER1); 337 * IRQ controllers, each mapped on a 4Kb boundary.
338 * Each CPU may be routed to different IRQCTRLs, however
339 * we assume that all CPUs (in SMP system) is routed to the
340 * same IRQ Controller, and for non-SMP only one IRQCTRL is
341 * accessed anyway.
342 * In AMP systems, Linux must run on CPU0 for the time being.
343 */
344 icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]);
345 icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf;
346 leon3_irqctrl_regs += icsel;
347
348 /* Mask all IRQs on boot-cpu IRQ controller */
349 LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0);
350
351 /* Probe extended IRQ controller */
352 eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus)
353 >> 16) & 0xf;
354 if (eirq != 0)
355 leon_eirq_setup(eirq);
356
357 irq = _leon_build_device_irq(NULL, leon3_gptimer_irq+leon3_gptimer_idx);
358 err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
359 if (err) {
360 printk(KERN_ERR "unable to attach timer IRQ%d\n", irq);
229 prom_halt(); 361 prom_halt();
230 } 362 }
231 363
232# ifdef CONFIG_SMP 364 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
233 { 365 LEON3_GPTIMER_EN |
234 unsigned long flags; 366 LEON3_GPTIMER_RL |
235 struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)]; 367 LEON3_GPTIMER_LD |
236 368 LEON3_GPTIMER_IRQEN);
237 /* For SMP we use the level 14 ticker, however the bootup code
238 * has copied the firmwares level 14 vector into boot cpu's
239 * trap table, we must fix this now or we get squashed.
240 */
241 local_irq_save(flags);
242
243 patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */
244
245 /* Adjust so that we jump directly to smpleon_ticker */
246 trap_table->inst_three += smpleon_ticker - real_irq_entry;
247 369
248 local_flush_cache_all(); 370#ifdef CONFIG_SMP
249 local_irq_restore(flags); 371 /* Install per-cpu IRQ handler for broadcasted ticker */
372 irq = leon_build_device_irq(leon3_ticker_irq, handle_percpu_irq,
373 "per-cpu", 0);
374 err = request_irq(irq, leon_percpu_timer_interrupt,
375 IRQF_PERCPU | IRQF_TIMER, "ticker",
376 NULL);
377 if (err) {
378 printk(KERN_ERR "unable to attach ticker IRQ%d\n", irq);
379 prom_halt();
250 } 380 }
251# endif
252
253 if (leon3_gptimer_regs) {
254 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
255 LEON3_GPTIMER_EN |
256 LEON3_GPTIMER_RL |
257 LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN);
258 381
259#ifdef CONFIG_SMP 382 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
260 LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 383 LEON3_GPTIMER_EN |
261 LEON3_GPTIMER_EN | 384 LEON3_GPTIMER_RL |
262 LEON3_GPTIMER_RL | 385 LEON3_GPTIMER_LD |
263 LEON3_GPTIMER_LD | 386 LEON3_GPTIMER_IRQEN);
264 LEON3_GPTIMER_IRQEN);
265#endif 387#endif
266
267 }
268 return; 388 return;
269bad: 389bad:
270 printk(KERN_ERR "No Timer/irqctrl found\n"); 390 printk(KERN_ERR "No Timer/irqctrl found\n");
@@ -281,9 +401,6 @@ void leon_load_profile_irq(int cpu, unsigned int limit)
281 BUG(); 401 BUG();
282} 402}
283 403
284
285
286
287void __init leon_trans_init(struct device_node *dp) 404void __init leon_trans_init(struct device_node *dp)
288{ 405{
289 if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) { 406 if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) {
@@ -337,22 +454,18 @@ void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu)
337{ 454{
338 unsigned long mask, flags, *addr; 455 unsigned long mask, flags, *addr;
339 mask = get_irqmask(irq_nr); 456 mask = get_irqmask(irq_nr);
340 local_irq_save(flags); 457 spin_lock_irqsave(&leon_irq_lock, flags);
341 addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]); 458 addr = (unsigned long *)LEON_IMASK(cpu);
342 LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask))); 459 LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask));
343 local_irq_restore(flags); 460 spin_unlock_irqrestore(&leon_irq_lock, flags);
344} 461}
345 462
346#endif 463#endif
347 464
348void __init leon_init_IRQ(void) 465void __init leon_init_IRQ(void)
349{ 466{
350 sparc_irq_config.init_timers = leon_init_timers; 467 sparc_irq_config.init_timers = leon_init_timers;
351 468 sparc_irq_config.build_device_irq = _leon_build_device_irq;
352 BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM);
353 BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM);
354 BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM);
355 BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM);
356 469
357 BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq, 470 BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq,
358 BTFIXUPCALL_NORM); 471 BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 8f5de4aa3c0a..fe8fb44c609c 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -14,6 +14,7 @@
14#include <linux/smp.h> 14#include <linux/smp.h>
15#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/kernel_stat.h> 16#include <linux/kernel_stat.h>
17#include <linux/of.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/spinlock.h> 19#include <linux/spinlock.h>
19#include <linux/mm.h> 20#include <linux/mm.h>
@@ -29,6 +30,7 @@
29#include <asm/ptrace.h> 30#include <asm/ptrace.h>
30#include <asm/atomic.h> 31#include <asm/atomic.h>
31#include <asm/irq_regs.h> 32#include <asm/irq_regs.h>
33#include <asm/traps.h>
32 34
33#include <asm/delay.h> 35#include <asm/delay.h>
34#include <asm/irq.h> 36#include <asm/irq.h>
@@ -50,9 +52,12 @@
50extern ctxd_t *srmmu_ctx_table_phys; 52extern ctxd_t *srmmu_ctx_table_phys;
51static int smp_processors_ready; 53static int smp_processors_ready;
52extern volatile unsigned long cpu_callin_map[NR_CPUS]; 54extern volatile unsigned long cpu_callin_map[NR_CPUS];
53extern unsigned char boot_cpu_id;
54extern cpumask_t smp_commenced_mask; 55extern cpumask_t smp_commenced_mask;
55void __init leon_configure_cache_smp(void); 56void __init leon_configure_cache_smp(void);
57static void leon_ipi_init(void);
58
59/* IRQ number of LEON IPIs */
60int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT;
56 61
57static inline unsigned long do_swap(volatile unsigned long *ptr, 62static inline unsigned long do_swap(volatile unsigned long *ptr,
58 unsigned long val) 63 unsigned long val)
@@ -94,8 +99,6 @@ void __cpuinit leon_callin(void)
94 local_flush_cache_all(); 99 local_flush_cache_all();
95 local_flush_tlb_all(); 100 local_flush_tlb_all();
96 101
97 cpu_probe();
98
99 /* Fix idle thread fields. */ 102 /* Fix idle thread fields. */
100 __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(&current_set[cpuid]) 103 __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(&current_set[cpuid])
101 : "memory" /* paranoid */); 104 : "memory" /* paranoid */);
@@ -104,11 +107,11 @@ void __cpuinit leon_callin(void)
104 atomic_inc(&init_mm.mm_count); 107 atomic_inc(&init_mm.mm_count);
105 current->active_mm = &init_mm; 108 current->active_mm = &init_mm;
106 109
107 while (!cpu_isset(cpuid, smp_commenced_mask)) 110 while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
108 mb(); 111 mb();
109 112
110 local_irq_enable(); 113 local_irq_enable();
111 cpu_set(cpuid, cpu_online_map); 114 set_cpu_online(cpuid, true);
112} 115}
113 116
114/* 117/*
@@ -179,13 +182,16 @@ void __init leon_boot_cpus(void)
179 int nrcpu = leon_smp_nrcpus(); 182 int nrcpu = leon_smp_nrcpus();
180 int me = smp_processor_id(); 183 int me = smp_processor_id();
181 184
185 /* Setup IPI */
186 leon_ipi_init();
187
182 printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me, 188 printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me,
183 (unsigned int)nrcpu, (unsigned int)NR_CPUS, 189 (unsigned int)nrcpu, (unsigned int)NR_CPUS,
184 (unsigned int)&(leon3_irqctrl_regs->mpstatus)); 190 (unsigned int)&(leon3_irqctrl_regs->mpstatus));
185 191
186 leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me); 192 leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me);
187 leon_enable_irq_cpu(LEON3_IRQ_TICKER, me); 193 leon_enable_irq_cpu(LEON3_IRQ_TICKER, me);
188 leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me); 194 leon_enable_irq_cpu(leon_ipi_irq, me);
189 195
190 leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER); 196 leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER);
191 197
@@ -220,6 +226,10 @@ int __cpuinit leon_boot_one_cpu(int i)
220 (unsigned int)&leon3_irqctrl_regs->mpstatus); 226 (unsigned int)&leon3_irqctrl_regs->mpstatus);
221 local_flush_cache_all(); 227 local_flush_cache_all();
222 228
229 /* Make sure all IRQs are of from the start for this new CPU */
230 LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0);
231
232 /* Wake one CPU */
223 LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i); 233 LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i);
224 234
225 /* wheee... it's going... */ 235 /* wheee... it's going... */
@@ -236,7 +246,7 @@ int __cpuinit leon_boot_one_cpu(int i)
236 } else { 246 } else {
237 leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i); 247 leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i);
238 leon_enable_irq_cpu(LEON3_IRQ_TICKER, i); 248 leon_enable_irq_cpu(LEON3_IRQ_TICKER, i);
239 leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i); 249 leon_enable_irq_cpu(leon_ipi_irq, i);
240 } 250 }
241 251
242 local_flush_cache_all(); 252 local_flush_cache_all();
@@ -262,21 +272,21 @@ void __init leon_smp_done(void)
262 local_flush_cache_all(); 272 local_flush_cache_all();
263 273
264 /* Free unneeded trap tables */ 274 /* Free unneeded trap tables */
265 if (!cpu_isset(1, cpu_present_map)) { 275 if (!cpu_present(1)) {
266 ClearPageReserved(virt_to_page(&trapbase_cpu1)); 276 ClearPageReserved(virt_to_page(&trapbase_cpu1));
267 init_page_count(virt_to_page(&trapbase_cpu1)); 277 init_page_count(virt_to_page(&trapbase_cpu1));
268 free_page((unsigned long)&trapbase_cpu1); 278 free_page((unsigned long)&trapbase_cpu1);
269 totalram_pages++; 279 totalram_pages++;
270 num_physpages++; 280 num_physpages++;
271 } 281 }
272 if (!cpu_isset(2, cpu_present_map)) { 282 if (!cpu_present(2)) {
273 ClearPageReserved(virt_to_page(&trapbase_cpu2)); 283 ClearPageReserved(virt_to_page(&trapbase_cpu2));
274 init_page_count(virt_to_page(&trapbase_cpu2)); 284 init_page_count(virt_to_page(&trapbase_cpu2));
275 free_page((unsigned long)&trapbase_cpu2); 285 free_page((unsigned long)&trapbase_cpu2);
276 totalram_pages++; 286 totalram_pages++;
277 num_physpages++; 287 num_physpages++;
278 } 288 }
279 if (!cpu_isset(3, cpu_present_map)) { 289 if (!cpu_present(3)) {
280 ClearPageReserved(virt_to_page(&trapbase_cpu3)); 290 ClearPageReserved(virt_to_page(&trapbase_cpu3));
281 init_page_count(virt_to_page(&trapbase_cpu3)); 291 init_page_count(virt_to_page(&trapbase_cpu3));
282 free_page((unsigned long)&trapbase_cpu3); 292 free_page((unsigned long)&trapbase_cpu3);
@@ -292,6 +302,99 @@ void leon_irq_rotate(int cpu)
292{ 302{
293} 303}
294 304
305struct leon_ipi_work {
306 int single;
307 int msk;
308 int resched;
309};
310
311static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work);
312
313/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ
314 * is used for all three types of IPIs.
315 */
316static void __init leon_ipi_init(void)
317{
318 int cpu, len;
319 struct leon_ipi_work *work;
320 struct property *pp;
321 struct device_node *rootnp;
322 struct tt_entry *trap_table;
323 unsigned long flags;
324
325 /* Find IPI IRQ or stick with default value */
326 rootnp = of_find_node_by_path("/ambapp0");
327 if (rootnp) {
328 pp = of_find_property(rootnp, "ipi_num", &len);
329 if (pp && (*(int *)pp->value))
330 leon_ipi_irq = *(int *)pp->value;
331 }
332 printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq);
333
334 /* Adjust so that we jump directly to smpleon_ipi */
335 local_irq_save(flags);
336 trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)];
337 trap_table->inst_three += smpleon_ipi - real_irq_entry;
338 local_flush_cache_all();
339 local_irq_restore(flags);
340
341 for_each_possible_cpu(cpu) {
342 work = &per_cpu(leon_ipi_work, cpu);
343 work->single = work->msk = work->resched = 0;
344 }
345}
346
347static void leon_ipi_single(int cpu)
348{
349 struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
350
351 /* Mark work */
352 work->single = 1;
353
354 /* Generate IRQ on the CPU */
355 set_cpu_int(cpu, leon_ipi_irq);
356}
357
358static void leon_ipi_mask_one(int cpu)
359{
360 struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
361
362 /* Mark work */
363 work->msk = 1;
364
365 /* Generate IRQ on the CPU */
366 set_cpu_int(cpu, leon_ipi_irq);
367}
368
369static void leon_ipi_resched(int cpu)
370{
371 struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
372
373 /* Mark work */
374 work->resched = 1;
375
376 /* Generate IRQ on the CPU (any IRQ will cause resched) */
377 set_cpu_int(cpu, leon_ipi_irq);
378}
379
380void leonsmp_ipi_interrupt(void)
381{
382 struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work);
383
384 if (work->single) {
385 work->single = 0;
386 smp_call_function_single_interrupt();
387 }
388 if (work->msk) {
389 work->msk = 0;
390 smp_call_function_interrupt();
391 }
392 if (work->resched) {
393 work->resched = 0;
394 smp_resched_interrupt();
395 }
396}
397
295static struct smp_funcall { 398static struct smp_funcall {
296 smpfunc_t func; 399 smpfunc_t func;
297 unsigned long arg1; 400 unsigned long arg1;
@@ -337,10 +440,10 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
337 { 440 {
338 register int i; 441 register int i;
339 442
340 cpu_clear(smp_processor_id(), mask); 443 cpumask_clear_cpu(smp_processor_id(), &mask);
341 cpus_and(mask, cpu_online_map, mask); 444 cpumask_and(&mask, cpu_online_mask, &mask);
342 for (i = 0; i <= high; i++) { 445 for (i = 0; i <= high; i++) {
343 if (cpu_isset(i, mask)) { 446 if (cpumask_test_cpu(i, &mask)) {
344 ccall_info.processors_in[i] = 0; 447 ccall_info.processors_in[i] = 0;
345 ccall_info.processors_out[i] = 0; 448 ccall_info.processors_out[i] = 0;
346 set_cpu_int(i, LEON3_IRQ_CROSS_CALL); 449 set_cpu_int(i, LEON3_IRQ_CROSS_CALL);
@@ -354,7 +457,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
354 457
355 i = 0; 458 i = 0;
356 do { 459 do {
357 if (!cpu_isset(i, mask)) 460 if (!cpumask_test_cpu(i, &mask))
358 continue; 461 continue;
359 462
360 while (!ccall_info.processors_in[i]) 463 while (!ccall_info.processors_in[i])
@@ -363,7 +466,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
363 466
364 i = 0; 467 i = 0;
365 do { 468 do {
366 if (!cpu_isset(i, mask)) 469 if (!cpumask_test_cpu(i, &mask))
367 continue; 470 continue;
368 471
369 while (!ccall_info.processors_out[i]) 472 while (!ccall_info.processors_out[i])
@@ -386,27 +489,23 @@ void leon_cross_call_irq(void)
386 ccall_info.processors_out[i] = 1; 489 ccall_info.processors_out[i] = 1;
387} 490}
388 491
389void leon_percpu_timer_interrupt(struct pt_regs *regs) 492irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused)
390{ 493{
391 struct pt_regs *old_regs;
392 int cpu = smp_processor_id(); 494 int cpu = smp_processor_id();
393 495
394 old_regs = set_irq_regs(regs);
395
396 leon_clear_profile_irq(cpu); 496 leon_clear_profile_irq(cpu);
397 497
398 profile_tick(CPU_PROFILING); 498 profile_tick(CPU_PROFILING);
399 499
400 if (!--prof_counter(cpu)) { 500 if (!--prof_counter(cpu)) {
401 int user = user_mode(regs); 501 int user = user_mode(get_irq_regs());
402 502
403 irq_enter();
404 update_process_times(user); 503 update_process_times(user);
405 irq_exit();
406 504
407 prof_counter(cpu) = prof_multiplier(cpu); 505 prof_counter(cpu) = prof_multiplier(cpu);
408 } 506 }
409 set_irq_regs(old_regs); 507
508 return IRQ_HANDLED;
410} 509}
411 510
412static void __init smp_setup_percpu_timer(void) 511static void __init smp_setup_percpu_timer(void)
@@ -449,6 +548,9 @@ void __init leon_init_smp(void)
449 BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM); 548 BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM);
450 BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id, 549 BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id,
451 BTFIXUPCALL_NORM); 550 BTFIXUPCALL_NORM);
551 BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM);
552 BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM);
553 BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM);
452} 554}
453 555
454#endif /* CONFIG_SPARC_LEON */ 556#endif /* CONFIG_SPARC_LEON */
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 56db06432ce9..42f28c7420e1 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -768,7 +768,7 @@ static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handl
768 cpuid, NR_CPUS); 768 cpuid, NR_CPUS);
769 continue; 769 continue;
770 } 770 }
771 if (!cpu_isset(cpuid, *mask)) 771 if (!cpumask_test_cpu(cpuid, mask))
772 continue; 772 continue;
773#endif 773#endif
774 774
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 5c149689bb20..3bb2eace58cf 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -622,8 +622,9 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
622out: 622out:
623 nid = of_node_to_nid(dp); 623 nid = of_node_to_nid(dp);
624 if (nid != -1) { 624 if (nid != -1) {
625 cpumask_t numa_mask = *cpumask_of_node(nid); 625 cpumask_t numa_mask;
626 626
627 cpumask_copy(&numa_mask, cpumask_of_node(nid));
627 irq_set_affinity(irq, &numa_mask); 628 irq_set_affinity(irq, &numa_mask);
628 } 629 }
629 630
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 30982e9ab626..580651af73f2 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -284,8 +284,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
284 284
285 nid = pbm->numa_node; 285 nid = pbm->numa_node;
286 if (nid != -1) { 286 if (nid != -1) {
287 cpumask_t numa_mask = *cpumask_of_node(nid); 287 cpumask_t numa_mask;
288 288
289 cpumask_copy(&numa_mask, cpumask_of_node(nid));
289 irq_set_affinity(irq, &numa_mask); 290 irq_set_affinity(irq, &numa_mask);
290 } 291 }
291 err = request_irq(irq, sparc64_msiq_interrupt, 0, 292 err = request_irq(irq, sparc64_msiq_interrupt, 0,
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index 2cdc131b50ac..948601a066ff 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -164,6 +164,9 @@ void __iomem *pcic_regs;
164volatile int pcic_speculative; 164volatile int pcic_speculative;
165volatile int pcic_trapped; 165volatile int pcic_trapped;
166 166
167/* forward */
168unsigned int pcic_build_device_irq(struct platform_device *op,
169 unsigned int real_irq);
167 170
168#define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3)) 171#define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3))
169 172
@@ -523,6 +526,7 @@ static void
523pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) 526pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
524{ 527{
525 struct pcic_ca2irq *p; 528 struct pcic_ca2irq *p;
529 unsigned int real_irq;
526 int i, ivec; 530 int i, ivec;
527 char namebuf[64]; 531 char namebuf[64];
528 532
@@ -551,26 +555,25 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
551 i = p->pin; 555 i = p->pin;
552 if (i >= 0 && i < 4) { 556 if (i >= 0 && i < 4) {
553 ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO); 557 ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO);
554 dev->irq = ivec >> (i << 2) & 0xF; 558 real_irq = ivec >> (i << 2) & 0xF;
555 } else if (i >= 4 && i < 8) { 559 } else if (i >= 4 && i < 8) {
556 ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI); 560 ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI);
557 dev->irq = ivec >> ((i-4) << 2) & 0xF; 561 real_irq = ivec >> ((i-4) << 2) & 0xF;
558 } else { /* Corrupted map */ 562 } else { /* Corrupted map */
559 printk("PCIC: BAD PIN %d\n", i); for (;;) {} 563 printk("PCIC: BAD PIN %d\n", i); for (;;) {}
560 } 564 }
561/* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */ 565/* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */
562 566
563 /* 567 /* real_irq means PROM did not bother to program the upper
564 * dev->irq=0 means PROM did not bother to program the upper
565 * half of PCIC. This happens on JS-E with PROM 3.11, for instance. 568 * half of PCIC. This happens on JS-E with PROM 3.11, for instance.
566 */ 569 */
567 if (dev->irq == 0 || p->force) { 570 if (real_irq == 0 || p->force) {
568 if (p->irq == 0 || p->irq >= 15) { /* Corrupted map */ 571 if (p->irq == 0 || p->irq >= 15) { /* Corrupted map */
569 printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {} 572 printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {}
570 } 573 }
571 printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n", 574 printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n",
572 p->irq, p->pin, dev->bus->number, dev->devfn); 575 p->irq, p->pin, dev->bus->number, dev->devfn);
573 dev->irq = p->irq; 576 real_irq = p->irq;
574 577
575 i = p->pin; 578 i = p->pin;
576 if (i >= 4) { 579 if (i >= 4) {
@@ -584,7 +587,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
584 ivec |= p->irq << (i << 2); 587 ivec |= p->irq << (i << 2);
585 writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO); 588 writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO);
586 } 589 }
587 } 590 }
591 dev->irq = pcic_build_device_irq(NULL, real_irq);
588} 592}
589 593
590/* 594/*
@@ -729,6 +733,7 @@ void __init pci_time_init(void)
729 struct linux_pcic *pcic = &pcic0; 733 struct linux_pcic *pcic = &pcic0;
730 unsigned long v; 734 unsigned long v;
731 int timer_irq, irq; 735 int timer_irq, irq;
736 int err;
732 737
733 do_arch_gettimeoffset = pci_gettimeoffset; 738 do_arch_gettimeoffset = pci_gettimeoffset;
734 739
@@ -740,9 +745,10 @@ void __init pci_time_init(void)
740 timer_irq = PCI_COUNTER_IRQ_SYS(v); 745 timer_irq = PCI_COUNTER_IRQ_SYS(v);
741 writel (PCI_COUNTER_IRQ_SET(timer_irq, 0), 746 writel (PCI_COUNTER_IRQ_SET(timer_irq, 0),
742 pcic->pcic_regs+PCI_COUNTER_IRQ); 747 pcic->pcic_regs+PCI_COUNTER_IRQ);
743 irq = request_irq(timer_irq, pcic_timer_handler, 748 irq = pcic_build_device_irq(NULL, timer_irq);
744 (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); 749 err = request_irq(irq, pcic_timer_handler,
745 if (irq) { 750 IRQF_TIMER, "timer", NULL);
751 if (err) {
746 prom_printf("time_init: unable to attach IRQ%d\n", timer_irq); 752 prom_printf("time_init: unable to attach IRQ%d\n", timer_irq);
747 prom_halt(); 753 prom_halt();
748 } 754 }
@@ -803,50 +809,73 @@ static inline unsigned long get_irqmask(int irq_nr)
803 return 1 << irq_nr; 809 return 1 << irq_nr;
804} 810}
805 811
806static void pcic_disable_irq(unsigned int irq_nr) 812static void pcic_mask_irq(struct irq_data *data)
807{ 813{
808 unsigned long mask, flags; 814 unsigned long mask, flags;
809 815
810 mask = get_irqmask(irq_nr); 816 mask = (unsigned long)data->chip_data;
811 local_irq_save(flags); 817 local_irq_save(flags);
812 writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET); 818 writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
813 local_irq_restore(flags); 819 local_irq_restore(flags);
814} 820}
815 821
816static void pcic_enable_irq(unsigned int irq_nr) 822static void pcic_unmask_irq(struct irq_data *data)
817{ 823{
818 unsigned long mask, flags; 824 unsigned long mask, flags;
819 825
820 mask = get_irqmask(irq_nr); 826 mask = (unsigned long)data->chip_data;
821 local_irq_save(flags); 827 local_irq_save(flags);
822 writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR); 828 writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
823 local_irq_restore(flags); 829 local_irq_restore(flags);
824} 830}
825 831
826static void pcic_load_profile_irq(int cpu, unsigned int limit) 832static unsigned int pcic_startup_irq(struct irq_data *data)
827{ 833{
828 printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__); 834 irq_link(data->irq);
835 pcic_unmask_irq(data);
836 return 0;
829} 837}
830 838
831/* We assume the caller has disabled local interrupts when these are called, 839static struct irq_chip pcic_irq = {
832 * or else very bizarre behavior will result. 840 .name = "pcic",
833 */ 841 .irq_startup = pcic_startup_irq,
834static void pcic_disable_pil_irq(unsigned int pil) 842 .irq_mask = pcic_mask_irq,
843 .irq_unmask = pcic_unmask_irq,
844};
845
846unsigned int pcic_build_device_irq(struct platform_device *op,
847 unsigned int real_irq)
835{ 848{
836 writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET); 849 unsigned int irq;
850 unsigned long mask;
851
852 irq = 0;
853 mask = get_irqmask(real_irq);
854 if (mask == 0)
855 goto out;
856
857 irq = irq_alloc(real_irq, real_irq);
858 if (irq == 0)
859 goto out;
860
861 irq_set_chip_and_handler_name(irq, &pcic_irq,
862 handle_level_irq, "PCIC");
863 irq_set_chip_data(irq, (void *)mask);
864
865out:
866 return irq;
837} 867}
838 868
839static void pcic_enable_pil_irq(unsigned int pil) 869
870static void pcic_load_profile_irq(int cpu, unsigned int limit)
840{ 871{
841 writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR); 872 printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
842} 873}
843 874
844void __init sun4m_pci_init_IRQ(void) 875void __init sun4m_pci_init_IRQ(void)
845{ 876{
846 BTFIXUPSET_CALL(enable_irq, pcic_enable_irq, BTFIXUPCALL_NORM); 877 sparc_irq_config.build_device_irq = pcic_build_device_irq;
847 BTFIXUPSET_CALL(disable_irq, pcic_disable_irq, BTFIXUPCALL_NORM); 878
848 BTFIXUPSET_CALL(enable_pil_irq, pcic_enable_pil_irq, BTFIXUPCALL_NORM);
849 BTFIXUPSET_CALL(disable_pil_irq, pcic_disable_pil_irq, BTFIXUPCALL_NORM);
850 BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM); 879 BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM);
851 BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM); 880 BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM);
852} 881}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index ee8426ede7c7..2cb0e1c001e2 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -26,6 +26,7 @@
26#include <asm/nmi.h> 26#include <asm/nmi.h>
27#include <asm/pcr.h> 27#include <asm/pcr.h>
28 28
29#include "kernel.h"
29#include "kstack.h" 30#include "kstack.h"
30 31
31/* Sparc64 chips have two performance counters, 32-bits each, with 32/* Sparc64 chips have two performance counters, 32-bits each, with
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 17529298c50a..c8cc461ff75f 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -128,8 +128,16 @@ void cpu_idle(void)
128 set_thread_flag(TIF_POLLING_NRFLAG); 128 set_thread_flag(TIF_POLLING_NRFLAG);
129 /* endless idle loop with no priority at all */ 129 /* endless idle loop with no priority at all */
130 while(1) { 130 while(1) {
131 while (!need_resched()) 131#ifdef CONFIG_SPARC_LEON
132 cpu_relax(); 132 if (pm_idle) {
133 while (!need_resched())
134 (*pm_idle)();
135 } else
136#endif
137 {
138 while (!need_resched())
139 cpu_relax();
140 }
133 preempt_enable_no_resched(); 141 preempt_enable_no_resched();
134 schedule(); 142 schedule();
135 preempt_disable(); 143 preempt_disable();
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 05fb25330583..5ce3d15a99b0 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -326,7 +326,6 @@ void __init of_console_init(void)
326 of_console_options = NULL; 326 of_console_options = NULL;
327 } 327 }
328 328
329 prom_printf(msg, of_console_path);
330 printk(msg, of_console_path); 329 printk(msg, of_console_path);
331} 330}
332 331
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index 7b8b76c9557f..3609bdee9ed2 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -103,16 +103,20 @@ static unsigned int boot_flags __initdata = 0;
103/* Exported for mm/init.c:paging_init. */ 103/* Exported for mm/init.c:paging_init. */
104unsigned long cmdline_memory_size __initdata = 0; 104unsigned long cmdline_memory_size __initdata = 0;
105 105
106/* which CPU booted us (0xff = not set) */
107unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */
108unsigned char boot_cpu_id4; /* boot_cpu_id << 2 */
109
106static void 110static void
107prom_console_write(struct console *con, const char *s, unsigned n) 111prom_console_write(struct console *con, const char *s, unsigned n)
108{ 112{
109 prom_write(s, n); 113 prom_write(s, n);
110} 114}
111 115
112static struct console prom_debug_console = { 116static struct console prom_early_console = {
113 .name = "debug", 117 .name = "earlyprom",
114 .write = prom_console_write, 118 .write = prom_console_write,
115 .flags = CON_PRINTBUFFER, 119 .flags = CON_PRINTBUFFER | CON_BOOT,
116 .index = -1, 120 .index = -1,
117}; 121};
118 122
@@ -133,8 +137,7 @@ static void __init process_switch(char c)
133 prom_halt(); 137 prom_halt();
134 break; 138 break;
135 case 'p': 139 case 'p':
136 /* Use PROM debug console. */ 140 /* Just ignore, this behavior is now the default. */
137 register_console(&prom_debug_console);
138 break; 141 break;
139 default: 142 default:
140 printk("Unknown boot switch (-%c)\n", c); 143 printk("Unknown boot switch (-%c)\n", c);
@@ -215,6 +218,10 @@ void __init setup_arch(char **cmdline_p)
215 strcpy(boot_command_line, *cmdline_p); 218 strcpy(boot_command_line, *cmdline_p);
216 parse_early_param(); 219 parse_early_param();
217 220
221 boot_flags_init(*cmdline_p);
222
223 register_console(&prom_early_console);
224
218 /* Set sparc_cpu_model */ 225 /* Set sparc_cpu_model */
219 sparc_cpu_model = sun_unknown; 226 sparc_cpu_model = sun_unknown;
220 if (!strcmp(&cputypval[0], "sun4 ")) 227 if (!strcmp(&cputypval[0], "sun4 "))
@@ -265,7 +272,6 @@ void __init setup_arch(char **cmdline_p)
265#ifdef CONFIG_DUMMY_CONSOLE 272#ifdef CONFIG_DUMMY_CONSOLE
266 conswitchp = &dummy_con; 273 conswitchp = &dummy_con;
267#endif 274#endif
268 boot_flags_init(*cmdline_p);
269 275
270 idprom_init(); 276 idprom_init();
271 if (ARCH_SUN4C) 277 if (ARCH_SUN4C)
@@ -311,75 +317,6 @@ void __init setup_arch(char **cmdline_p)
311 smp_setup_cpu_possible_map(); 317 smp_setup_cpu_possible_map();
312} 318}
313 319
314static int ncpus_probed;
315
316static int show_cpuinfo(struct seq_file *m, void *__unused)
317{
318 seq_printf(m,
319 "cpu\t\t: %s\n"
320 "fpu\t\t: %s\n"
321 "promlib\t\t: Version %d Revision %d\n"
322 "prom\t\t: %d.%d\n"
323 "type\t\t: %s\n"
324 "ncpus probed\t: %d\n"
325 "ncpus active\t: %d\n"
326#ifndef CONFIG_SMP
327 "CPU0Bogo\t: %lu.%02lu\n"
328 "CPU0ClkTck\t: %ld\n"
329#endif
330 ,
331 sparc_cpu_type,
332 sparc_fpu_type ,
333 romvec->pv_romvers,
334 prom_rev,
335 romvec->pv_printrev >> 16,
336 romvec->pv_printrev & 0xffff,
337 &cputypval[0],
338 ncpus_probed,
339 num_online_cpus()
340#ifndef CONFIG_SMP
341 , cpu_data(0).udelay_val/(500000/HZ),
342 (cpu_data(0).udelay_val/(5000/HZ)) % 100,
343 cpu_data(0).clock_tick
344#endif
345 );
346
347#ifdef CONFIG_SMP
348 smp_bogo(m);
349#endif
350 mmu_info(m);
351#ifdef CONFIG_SMP
352 smp_info(m);
353#endif
354 return 0;
355}
356
357static void *c_start(struct seq_file *m, loff_t *pos)
358{
359 /* The pointer we are returning is arbitrary,
360 * it just has to be non-NULL and not IS_ERR
361 * in the success case.
362 */
363 return *pos == 0 ? &c_start : NULL;
364}
365
366static void *c_next(struct seq_file *m, void *v, loff_t *pos)
367{
368 ++*pos;
369 return c_start(m, pos);
370}
371
372static void c_stop(struct seq_file *m, void *v)
373{
374}
375
376const struct seq_operations cpuinfo_op = {
377 .start =c_start,
378 .next = c_next,
379 .stop = c_stop,
380 .show = show_cpuinfo,
381};
382
383extern int stop_a_enabled; 320extern int stop_a_enabled;
384 321
385void sun_do_break(void) 322void sun_do_break(void)
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 29bafe051bb1..f3b6850cc8db 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -339,84 +339,6 @@ void __init setup_arch(char **cmdline_p)
339 paging_init(); 339 paging_init();
340} 340}
341 341
342/* BUFFER is PAGE_SIZE bytes long. */
343
344extern void smp_info(struct seq_file *);
345extern void smp_bogo(struct seq_file *);
346extern void mmu_info(struct seq_file *);
347
348unsigned int dcache_parity_tl1_occurred;
349unsigned int icache_parity_tl1_occurred;
350
351int ncpus_probed;
352
353static int show_cpuinfo(struct seq_file *m, void *__unused)
354{
355 seq_printf(m,
356 "cpu\t\t: %s\n"
357 "fpu\t\t: %s\n"
358 "pmu\t\t: %s\n"
359 "prom\t\t: %s\n"
360 "type\t\t: %s\n"
361 "ncpus probed\t: %d\n"
362 "ncpus active\t: %d\n"
363 "D$ parity tl1\t: %u\n"
364 "I$ parity tl1\t: %u\n"
365#ifndef CONFIG_SMP
366 "Cpu0ClkTck\t: %016lx\n"
367#endif
368 ,
369 sparc_cpu_type,
370 sparc_fpu_type,
371 sparc_pmu_type,
372 prom_version,
373 ((tlb_type == hypervisor) ?
374 "sun4v" :
375 "sun4u"),
376 ncpus_probed,
377 num_online_cpus(),
378 dcache_parity_tl1_occurred,
379 icache_parity_tl1_occurred
380#ifndef CONFIG_SMP
381 , cpu_data(0).clock_tick
382#endif
383 );
384#ifdef CONFIG_SMP
385 smp_bogo(m);
386#endif
387 mmu_info(m);
388#ifdef CONFIG_SMP
389 smp_info(m);
390#endif
391 return 0;
392}
393
394static void *c_start(struct seq_file *m, loff_t *pos)
395{
396 /* The pointer we are returning is arbitrary,
397 * it just has to be non-NULL and not IS_ERR
398 * in the success case.
399 */
400 return *pos == 0 ? &c_start : NULL;
401}
402
403static void *c_next(struct seq_file *m, void *v, loff_t *pos)
404{
405 ++*pos;
406 return c_start(m, pos);
407}
408
409static void c_stop(struct seq_file *m, void *v)
410{
411}
412
413const struct seq_operations cpuinfo_op = {
414 .start =c_start,
415 .next = c_next,
416 .stop = c_stop,
417 .show = show_cpuinfo,
418};
419
420extern int stop_a_enabled; 342extern int stop_a_enabled;
421 343
422void sun_do_break(void) 344void sun_do_break(void)
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 442286d83435..d5b3958be0b4 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -37,8 +37,6 @@
37#include "irq.h" 37#include "irq.h"
38 38
39volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; 39volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
40unsigned char boot_cpu_id = 0;
41unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
42 40
43cpumask_t smp_commenced_mask = CPU_MASK_NONE; 41cpumask_t smp_commenced_mask = CPU_MASK_NONE;
44 42
@@ -130,14 +128,57 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
130void smp_send_reschedule(int cpu) 128void smp_send_reschedule(int cpu)
131{ 129{
132 /* 130 /*
133 * XXX missing reschedule IPI, see scheduler_ipi() 131 * CPU model dependent way of implementing IPI generation targeting
132 * a single CPU. The trap handler needs only to do trap entry/return
133 * to call schedule.
134 */ 134 */
135 BTFIXUP_CALL(smp_ipi_resched)(cpu);
135} 136}
136 137
137void smp_send_stop(void) 138void smp_send_stop(void)
138{ 139{
139} 140}
140 141
142void arch_send_call_function_single_ipi(int cpu)
143{
144 /* trigger one IPI single call on one CPU */
145 BTFIXUP_CALL(smp_ipi_single)(cpu);
146}
147
148void arch_send_call_function_ipi_mask(const struct cpumask *mask)
149{
150 int cpu;
151
152 /* trigger IPI mask call on each CPU */
153 for_each_cpu(cpu, mask)
154 BTFIXUP_CALL(smp_ipi_mask_one)(cpu);
155}
156
157void smp_resched_interrupt(void)
158{
159 irq_enter();
160 scheduler_ipi();
161 local_cpu_data().irq_resched_count++;
162 irq_exit();
163 /* re-schedule routine called by interrupt return code. */
164}
165
166void smp_call_function_single_interrupt(void)
167{
168 irq_enter();
169 generic_smp_call_function_single_interrupt();
170 local_cpu_data().irq_call_count++;
171 irq_exit();
172}
173
174void smp_call_function_interrupt(void)
175{
176 irq_enter();
177 generic_smp_call_function_interrupt();
178 local_cpu_data().irq_call_count++;
179 irq_exit();
180}
181
141void smp_flush_cache_all(void) 182void smp_flush_cache_all(void)
142{ 183{
143 xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all)); 184 xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all));
@@ -153,9 +194,10 @@ void smp_flush_tlb_all(void)
153void smp_flush_cache_mm(struct mm_struct *mm) 194void smp_flush_cache_mm(struct mm_struct *mm)
154{ 195{
155 if(mm->context != NO_CONTEXT) { 196 if(mm->context != NO_CONTEXT) {
156 cpumask_t cpu_mask = *mm_cpumask(mm); 197 cpumask_t cpu_mask;
157 cpu_clear(smp_processor_id(), cpu_mask); 198 cpumask_copy(&cpu_mask, mm_cpumask(mm));
158 if (!cpus_empty(cpu_mask)) 199 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
200 if (!cpumask_empty(&cpu_mask))
159 xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm); 201 xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm);
160 local_flush_cache_mm(mm); 202 local_flush_cache_mm(mm);
161 } 203 }
@@ -164,9 +206,10 @@ void smp_flush_cache_mm(struct mm_struct *mm)
164void smp_flush_tlb_mm(struct mm_struct *mm) 206void smp_flush_tlb_mm(struct mm_struct *mm)
165{ 207{
166 if(mm->context != NO_CONTEXT) { 208 if(mm->context != NO_CONTEXT) {
167 cpumask_t cpu_mask = *mm_cpumask(mm); 209 cpumask_t cpu_mask;
168 cpu_clear(smp_processor_id(), cpu_mask); 210 cpumask_copy(&cpu_mask, mm_cpumask(mm));
169 if (!cpus_empty(cpu_mask)) { 211 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
212 if (!cpumask_empty(&cpu_mask)) {
170 xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm); 213 xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm);
171 if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm) 214 if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
172 cpumask_copy(mm_cpumask(mm), 215 cpumask_copy(mm_cpumask(mm),
@@ -182,9 +225,10 @@ void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
182 struct mm_struct *mm = vma->vm_mm; 225 struct mm_struct *mm = vma->vm_mm;
183 226
184 if (mm->context != NO_CONTEXT) { 227 if (mm->context != NO_CONTEXT) {
185 cpumask_t cpu_mask = *mm_cpumask(mm); 228 cpumask_t cpu_mask;
186 cpu_clear(smp_processor_id(), cpu_mask); 229 cpumask_copy(&cpu_mask, mm_cpumask(mm));
187 if (!cpus_empty(cpu_mask)) 230 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
231 if (!cpumask_empty(&cpu_mask))
188 xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end); 232 xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end);
189 local_flush_cache_range(vma, start, end); 233 local_flush_cache_range(vma, start, end);
190 } 234 }
@@ -196,9 +240,10 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
196 struct mm_struct *mm = vma->vm_mm; 240 struct mm_struct *mm = vma->vm_mm;
197 241
198 if (mm->context != NO_CONTEXT) { 242 if (mm->context != NO_CONTEXT) {
199 cpumask_t cpu_mask = *mm_cpumask(mm); 243 cpumask_t cpu_mask;
200 cpu_clear(smp_processor_id(), cpu_mask); 244 cpumask_copy(&cpu_mask, mm_cpumask(mm));
201 if (!cpus_empty(cpu_mask)) 245 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
246 if (!cpumask_empty(&cpu_mask))
202 xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end); 247 xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end);
203 local_flush_tlb_range(vma, start, end); 248 local_flush_tlb_range(vma, start, end);
204 } 249 }
@@ -209,9 +254,10 @@ void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
209 struct mm_struct *mm = vma->vm_mm; 254 struct mm_struct *mm = vma->vm_mm;
210 255
211 if(mm->context != NO_CONTEXT) { 256 if(mm->context != NO_CONTEXT) {
212 cpumask_t cpu_mask = *mm_cpumask(mm); 257 cpumask_t cpu_mask;
213 cpu_clear(smp_processor_id(), cpu_mask); 258 cpumask_copy(&cpu_mask, mm_cpumask(mm));
214 if (!cpus_empty(cpu_mask)) 259 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
260 if (!cpumask_empty(&cpu_mask))
215 xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page); 261 xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page);
216 local_flush_cache_page(vma, page); 262 local_flush_cache_page(vma, page);
217 } 263 }
@@ -222,19 +268,15 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
222 struct mm_struct *mm = vma->vm_mm; 268 struct mm_struct *mm = vma->vm_mm;
223 269
224 if(mm->context != NO_CONTEXT) { 270 if(mm->context != NO_CONTEXT) {
225 cpumask_t cpu_mask = *mm_cpumask(mm); 271 cpumask_t cpu_mask;
226 cpu_clear(smp_processor_id(), cpu_mask); 272 cpumask_copy(&cpu_mask, mm_cpumask(mm));
227 if (!cpus_empty(cpu_mask)) 273 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
274 if (!cpumask_empty(&cpu_mask))
228 xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page); 275 xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page);
229 local_flush_tlb_page(vma, page); 276 local_flush_tlb_page(vma, page);
230 } 277 }
231} 278}
232 279
233void smp_reschedule_irq(void)
234{
235 set_need_resched();
236}
237
238void smp_flush_page_to_ram(unsigned long page) 280void smp_flush_page_to_ram(unsigned long page)
239{ 281{
240 /* Current theory is that those who call this are the one's 282 /* Current theory is that those who call this are the one's
@@ -251,9 +293,10 @@ void smp_flush_page_to_ram(unsigned long page)
251 293
252void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) 294void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
253{ 295{
254 cpumask_t cpu_mask = *mm_cpumask(mm); 296 cpumask_t cpu_mask;
255 cpu_clear(smp_processor_id(), cpu_mask); 297 cpumask_copy(&cpu_mask, mm_cpumask(mm));
256 if (!cpus_empty(cpu_mask)) 298 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
299 if (!cpumask_empty(&cpu_mask))
257 xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr); 300 xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr);
258 local_flush_sig_insns(mm, insn_addr); 301 local_flush_sig_insns(mm, insn_addr);
259} 302}
@@ -407,7 +450,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
407 }; 450 };
408 451
409 if (!ret) { 452 if (!ret) {
410 cpu_set(cpu, smp_commenced_mask); 453 cpumask_set_cpu(cpu, &smp_commenced_mask);
411 while (!cpu_online(cpu)) 454 while (!cpu_online(cpu))
412 mb(); 455 mb();
413 } 456 }
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 9478da7fdb3e..99cb17251bb5 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -121,11 +121,11 @@ void __cpuinit smp_callin(void)
121 /* inform the notifiers about the new cpu */ 121 /* inform the notifiers about the new cpu */
122 notify_cpu_starting(cpuid); 122 notify_cpu_starting(cpuid);
123 123
124 while (!cpu_isset(cpuid, smp_commenced_mask)) 124 while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
125 rmb(); 125 rmb();
126 126
127 ipi_call_lock_irq(); 127 ipi_call_lock_irq();
128 cpu_set(cpuid, cpu_online_map); 128 set_cpu_online(cpuid, true);
129 ipi_call_unlock_irq(); 129 ipi_call_unlock_irq();
130 130
131 /* idle thread is expected to have preempt disabled */ 131 /* idle thread is expected to have preempt disabled */
@@ -785,7 +785,7 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
785 785
786/* Send cross call to all processors mentioned in MASK_P 786/* Send cross call to all processors mentioned in MASK_P
787 * except self. Really, there are only two cases currently, 787 * except self. Really, there are only two cases currently,
788 * "&cpu_online_map" and "&mm->cpu_vm_mask". 788 * "cpu_online_mask" and "mm_cpumask(mm)".
789 */ 789 */
790static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask) 790static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
791{ 791{
@@ -797,7 +797,7 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d
797/* Send cross call to all processors except self. */ 797/* Send cross call to all processors except self. */
798static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) 798static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
799{ 799{
800 smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map); 800 smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
801} 801}
802 802
803extern unsigned long xcall_sync_tick; 803extern unsigned long xcall_sync_tick;
@@ -805,7 +805,7 @@ extern unsigned long xcall_sync_tick;
805static void smp_start_sync_tick_client(int cpu) 805static void smp_start_sync_tick_client(int cpu)
806{ 806{
807 xcall_deliver((u64) &xcall_sync_tick, 0, 0, 807 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
808 &cpumask_of_cpu(cpu)); 808 cpumask_of(cpu));
809} 809}
810 810
811extern unsigned long xcall_call_function; 811extern unsigned long xcall_call_function;
@@ -820,7 +820,7 @@ extern unsigned long xcall_call_function_single;
820void arch_send_call_function_single_ipi(int cpu) 820void arch_send_call_function_single_ipi(int cpu)
821{ 821{
822 xcall_deliver((u64) &xcall_call_function_single, 0, 0, 822 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
823 &cpumask_of_cpu(cpu)); 823 cpumask_of(cpu));
824} 824}
825 825
826void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) 826void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
@@ -918,7 +918,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
918 } 918 }
919 if (data0) { 919 if (data0) {
920 xcall_deliver(data0, __pa(pg_addr), 920 xcall_deliver(data0, __pa(pg_addr),
921 (u64) pg_addr, &cpumask_of_cpu(cpu)); 921 (u64) pg_addr, cpumask_of(cpu));
922#ifdef CONFIG_DEBUG_DCFLUSH 922#ifdef CONFIG_DEBUG_DCFLUSH
923 atomic_inc(&dcpage_flushes_xcall); 923 atomic_inc(&dcpage_flushes_xcall);
924#endif 924#endif
@@ -954,7 +954,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
954 } 954 }
955 if (data0) { 955 if (data0) {
956 xcall_deliver(data0, __pa(pg_addr), 956 xcall_deliver(data0, __pa(pg_addr),
957 (u64) pg_addr, &cpu_online_map); 957 (u64) pg_addr, cpu_online_mask);
958#ifdef CONFIG_DEBUG_DCFLUSH 958#ifdef CONFIG_DEBUG_DCFLUSH
959 atomic_inc(&dcpage_flushes_xcall); 959 atomic_inc(&dcpage_flushes_xcall);
960#endif 960#endif
@@ -1197,32 +1197,32 @@ void __devinit smp_fill_in_sib_core_maps(void)
1197 for_each_present_cpu(i) { 1197 for_each_present_cpu(i) {
1198 unsigned int j; 1198 unsigned int j;
1199 1199
1200 cpus_clear(cpu_core_map[i]); 1200 cpumask_clear(&cpu_core_map[i]);
1201 if (cpu_data(i).core_id == 0) { 1201 if (cpu_data(i).core_id == 0) {
1202 cpu_set(i, cpu_core_map[i]); 1202 cpumask_set_cpu(i, &cpu_core_map[i]);
1203 continue; 1203 continue;
1204 } 1204 }
1205 1205
1206 for_each_present_cpu(j) { 1206 for_each_present_cpu(j) {
1207 if (cpu_data(i).core_id == 1207 if (cpu_data(i).core_id ==
1208 cpu_data(j).core_id) 1208 cpu_data(j).core_id)
1209 cpu_set(j, cpu_core_map[i]); 1209 cpumask_set_cpu(j, &cpu_core_map[i]);
1210 } 1210 }
1211 } 1211 }
1212 1212
1213 for_each_present_cpu(i) { 1213 for_each_present_cpu(i) {
1214 unsigned int j; 1214 unsigned int j;
1215 1215
1216 cpus_clear(per_cpu(cpu_sibling_map, i)); 1216 cpumask_clear(&per_cpu(cpu_sibling_map, i));
1217 if (cpu_data(i).proc_id == -1) { 1217 if (cpu_data(i).proc_id == -1) {
1218 cpu_set(i, per_cpu(cpu_sibling_map, i)); 1218 cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
1219 continue; 1219 continue;
1220 } 1220 }
1221 1221
1222 for_each_present_cpu(j) { 1222 for_each_present_cpu(j) {
1223 if (cpu_data(i).proc_id == 1223 if (cpu_data(i).proc_id ==
1224 cpu_data(j).proc_id) 1224 cpu_data(j).proc_id)
1225 cpu_set(j, per_cpu(cpu_sibling_map, i)); 1225 cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
1226 } 1226 }
1227 } 1227 }
1228} 1228}
@@ -1232,10 +1232,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
1232 int ret = smp_boot_one_cpu(cpu); 1232 int ret = smp_boot_one_cpu(cpu);
1233 1233
1234 if (!ret) { 1234 if (!ret) {
1235 cpu_set(cpu, smp_commenced_mask); 1235 cpumask_set_cpu(cpu, &smp_commenced_mask);
1236 while (!cpu_isset(cpu, cpu_online_map)) 1236 while (!cpu_online(cpu))
1237 mb(); 1237 mb();
1238 if (!cpu_isset(cpu, cpu_online_map)) { 1238 if (!cpu_online(cpu)) {
1239 ret = -ENODEV; 1239 ret = -ENODEV;
1240 } else { 1240 } else {
1241 /* On SUN4V, writes to %tick and %stick are 1241 /* On SUN4V, writes to %tick and %stick are
@@ -1269,7 +1269,7 @@ void cpu_play_dead(void)
1269 tb->nonresum_mondo_pa, 0); 1269 tb->nonresum_mondo_pa, 0);
1270 } 1270 }
1271 1271
1272 cpu_clear(cpu, smp_commenced_mask); 1272 cpumask_clear_cpu(cpu, &smp_commenced_mask);
1273 membar_safe("#Sync"); 1273 membar_safe("#Sync");
1274 1274
1275 local_irq_disable(); 1275 local_irq_disable();
@@ -1290,13 +1290,13 @@ int __cpu_disable(void)
1290 cpuinfo_sparc *c; 1290 cpuinfo_sparc *c;
1291 int i; 1291 int i;
1292 1292
1293 for_each_cpu_mask(i, cpu_core_map[cpu]) 1293 for_each_cpu(i, &cpu_core_map[cpu])
1294 cpu_clear(cpu, cpu_core_map[i]); 1294 cpumask_clear_cpu(cpu, &cpu_core_map[i]);
1295 cpus_clear(cpu_core_map[cpu]); 1295 cpumask_clear(&cpu_core_map[cpu]);
1296 1296
1297 for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu)) 1297 for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
1298 cpu_clear(cpu, per_cpu(cpu_sibling_map, i)); 1298 cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
1299 cpus_clear(per_cpu(cpu_sibling_map, cpu)); 1299 cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
1300 1300
1301 c = &cpu_data(cpu); 1301 c = &cpu_data(cpu);
1302 1302
@@ -1313,7 +1313,7 @@ int __cpu_disable(void)
1313 local_irq_disable(); 1313 local_irq_disable();
1314 1314
1315 ipi_call_lock(); 1315 ipi_call_lock();
1316 cpu_clear(cpu, cpu_online_map); 1316 set_cpu_online(cpu, false);
1317 ipi_call_unlock(); 1317 ipi_call_unlock();
1318 1318
1319 cpu_map_rebuild(); 1319 cpu_map_rebuild();
@@ -1327,11 +1327,11 @@ void __cpu_die(unsigned int cpu)
1327 1327
1328 for (i = 0; i < 100; i++) { 1328 for (i = 0; i < 100; i++) {
1329 smp_rmb(); 1329 smp_rmb();
1330 if (!cpu_isset(cpu, smp_commenced_mask)) 1330 if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
1331 break; 1331 break;
1332 msleep(100); 1332 msleep(100);
1333 } 1333 }
1334 if (cpu_isset(cpu, smp_commenced_mask)) { 1334 if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
1335 printk(KERN_ERR "CPU %u didn't die...\n", cpu); 1335 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1336 } else { 1336 } else {
1337#if defined(CONFIG_SUN_LDOMS) 1337#if defined(CONFIG_SUN_LDOMS)
@@ -1341,7 +1341,7 @@ void __cpu_die(unsigned int cpu)
1341 do { 1341 do {
1342 hv_err = sun4v_cpu_stop(cpu); 1342 hv_err = sun4v_cpu_stop(cpu);
1343 if (hv_err == HV_EOK) { 1343 if (hv_err == HV_EOK) {
1344 cpu_clear(cpu, cpu_present_map); 1344 set_cpu_present(cpu, false);
1345 break; 1345 break;
1346 } 1346 }
1347 } while (--limit > 0); 1347 } while (--limit > 0);
@@ -1362,7 +1362,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
1362void smp_send_reschedule(int cpu) 1362void smp_send_reschedule(int cpu)
1363{ 1363{
1364 xcall_deliver((u64) &xcall_receive_signal, 0, 0, 1364 xcall_deliver((u64) &xcall_receive_signal, 0, 0,
1365 &cpumask_of_cpu(cpu)); 1365 cpumask_of(cpu));
1366} 1366}
1367 1367
1368void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) 1368void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c
index 90eea38ad66f..f6bf25a2ff80 100644
--- a/arch/sparc/kernel/sun4c_irq.c
+++ b/arch/sparc/kernel/sun4c_irq.c
@@ -65,62 +65,94 @@
65 */ 65 */
66unsigned char __iomem *interrupt_enable; 66unsigned char __iomem *interrupt_enable;
67 67
68static void sun4c_disable_irq(unsigned int irq_nr) 68static void sun4c_mask_irq(struct irq_data *data)
69{ 69{
70 unsigned long flags; 70 unsigned long mask = (unsigned long)data->chip_data;
71 unsigned char current_mask, new_mask; 71
72 72 if (mask) {
73 local_irq_save(flags); 73 unsigned long flags;
74 irq_nr &= (NR_IRQS - 1); 74
75 current_mask = sbus_readb(interrupt_enable); 75 local_irq_save(flags);
76 switch (irq_nr) { 76 mask = sbus_readb(interrupt_enable) & ~mask;
77 case 1: 77 sbus_writeb(mask, interrupt_enable);
78 new_mask = ((current_mask) & (~(SUN4C_INT_E1)));
79 break;
80 case 8:
81 new_mask = ((current_mask) & (~(SUN4C_INT_E8)));
82 break;
83 case 10:
84 new_mask = ((current_mask) & (~(SUN4C_INT_E10)));
85 break;
86 case 14:
87 new_mask = ((current_mask) & (~(SUN4C_INT_E14)));
88 break;
89 default:
90 local_irq_restore(flags); 78 local_irq_restore(flags);
91 return;
92 } 79 }
93 sbus_writeb(new_mask, interrupt_enable);
94 local_irq_restore(flags);
95} 80}
96 81
97static void sun4c_enable_irq(unsigned int irq_nr) 82static void sun4c_unmask_irq(struct irq_data *data)
98{ 83{
99 unsigned long flags; 84 unsigned long mask = (unsigned long)data->chip_data;
100 unsigned char current_mask, new_mask; 85
101 86 if (mask) {
102 local_irq_save(flags); 87 unsigned long flags;
103 irq_nr &= (NR_IRQS - 1); 88
104 current_mask = sbus_readb(interrupt_enable); 89 local_irq_save(flags);
105 switch (irq_nr) { 90 mask = sbus_readb(interrupt_enable) | mask;
106 case 1: 91 sbus_writeb(mask, interrupt_enable);
107 new_mask = ((current_mask) | SUN4C_INT_E1);
108 break;
109 case 8:
110 new_mask = ((current_mask) | SUN4C_INT_E8);
111 break;
112 case 10:
113 new_mask = ((current_mask) | SUN4C_INT_E10);
114 break;
115 case 14:
116 new_mask = ((current_mask) | SUN4C_INT_E14);
117 break;
118 default:
119 local_irq_restore(flags); 92 local_irq_restore(flags);
120 return;
121 } 93 }
122 sbus_writeb(new_mask, interrupt_enable); 94}
123 local_irq_restore(flags); 95
96static unsigned int sun4c_startup_irq(struct irq_data *data)
97{
98 irq_link(data->irq);
99 sun4c_unmask_irq(data);
100
101 return 0;
102}
103
104static void sun4c_shutdown_irq(struct irq_data *data)
105{
106 sun4c_mask_irq(data);
107 irq_unlink(data->irq);
108}
109
110static struct irq_chip sun4c_irq = {
111 .name = "sun4c",
112 .irq_startup = sun4c_startup_irq,
113 .irq_shutdown = sun4c_shutdown_irq,
114 .irq_mask = sun4c_mask_irq,
115 .irq_unmask = sun4c_unmask_irq,
116};
117
118static unsigned int sun4c_build_device_irq(struct platform_device *op,
119 unsigned int real_irq)
120{
121 unsigned int irq;
122
123 if (real_irq >= 16) {
124 prom_printf("Bogus sun4c IRQ %u\n", real_irq);
125 prom_halt();
126 }
127
128 irq = irq_alloc(real_irq, real_irq);
129 if (irq) {
130 unsigned long mask = 0UL;
131
132 switch (real_irq) {
133 case 1:
134 mask = SUN4C_INT_E1;
135 break;
136 case 8:
137 mask = SUN4C_INT_E8;
138 break;
139 case 10:
140 mask = SUN4C_INT_E10;
141 break;
142 case 14:
143 mask = SUN4C_INT_E14;
144 break;
145 default:
146 /* All the rest are either always enabled,
147 * or are for signalling software interrupts.
148 */
149 break;
150 }
151 irq_set_chip_and_handler_name(irq, &sun4c_irq,
152 handle_level_irq, "level");
153 irq_set_chip_data(irq, (void *)mask);
154 }
155 return irq;
124} 156}
125 157
126struct sun4c_timer_info { 158struct sun4c_timer_info {
@@ -144,8 +176,9 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit)
144 176
145static void __init sun4c_init_timers(irq_handler_t counter_fn) 177static void __init sun4c_init_timers(irq_handler_t counter_fn)
146{ 178{
147 const struct linux_prom_irqs *irq; 179 const struct linux_prom_irqs *prom_irqs;
148 struct device_node *dp; 180 struct device_node *dp;
181 unsigned int irq;
149 const u32 *addr; 182 const u32 *addr;
150 int err; 183 int err;
151 184
@@ -163,9 +196,9 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
163 196
164 sun4c_timers = (void __iomem *) (unsigned long) addr[0]; 197 sun4c_timers = (void __iomem *) (unsigned long) addr[0];
165 198
166 irq = of_get_property(dp, "intr", NULL); 199 prom_irqs = of_get_property(dp, "intr", NULL);
167 of_node_put(dp); 200 of_node_put(dp);
168 if (!irq) { 201 if (!prom_irqs) {
169 prom_printf("sun4c_init_timers: No intr property\n"); 202 prom_printf("sun4c_init_timers: No intr property\n");
170 prom_halt(); 203 prom_halt();
171 } 204 }
@@ -178,15 +211,15 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
178 211
179 master_l10_counter = &sun4c_timers->l10_count; 212 master_l10_counter = &sun4c_timers->l10_count;
180 213
181 err = request_irq(irq[0].pri, counter_fn, 214 irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri);
182 (IRQF_DISABLED | SA_STATIC_ALLOC), 215 err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
183 "timer", NULL);
184 if (err) { 216 if (err) {
185 prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err); 217 prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err);
186 prom_halt(); 218 prom_halt();
187 } 219 }
188 220
189 sun4c_disable_irq(irq[1].pri); 221 /* disable timer interrupt */
222 sun4c_mask_irq(irq_get_irq_data(irq));
190} 223}
191 224
192#ifdef CONFIG_SMP 225#ifdef CONFIG_SMP
@@ -215,14 +248,11 @@ void __init sun4c_init_IRQ(void)
215 248
216 interrupt_enable = (void __iomem *) (unsigned long) addr[0]; 249 interrupt_enable = (void __iomem *) (unsigned long) addr[0];
217 250
218 BTFIXUPSET_CALL(enable_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
219 BTFIXUPSET_CALL(disable_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
220 BTFIXUPSET_CALL(enable_pil_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
221 BTFIXUPSET_CALL(disable_pil_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
222 BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM); 251 BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM);
223 BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP); 252 BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP);
224 253
225 sparc_irq_config.init_timers = sun4c_init_timers; 254 sparc_irq_config.init_timers = sun4c_init_timers;
255 sparc_irq_config.build_device_irq = sun4c_build_device_irq;
226 256
227#ifdef CONFIG_SMP 257#ifdef CONFIG_SMP
228 BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP); 258 BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP);
diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c
index 77b4a8992710..a9ea60eb2c10 100644
--- a/arch/sparc/kernel/sun4d_irq.c
+++ b/arch/sparc/kernel/sun4d_irq.c
@@ -14,6 +14,7 @@
14#include <asm/io.h> 14#include <asm/io.h>
15#include <asm/sbi.h> 15#include <asm/sbi.h>
16#include <asm/cacheflush.h> 16#include <asm/cacheflush.h>
17#include <asm/setup.h>
17 18
18#include "kernel.h" 19#include "kernel.h"
19#include "irq.h" 20#include "irq.h"
@@ -22,22 +23,20 @@
22 * cpu local. CPU local interrupts cover the timer interrupts 23 * cpu local. CPU local interrupts cover the timer interrupts
23 * and whatnot, and we encode those as normal PILs between 24 * and whatnot, and we encode those as normal PILs between
24 * 0 and 15. 25 * 0 and 15.
25 * 26 * SBUS interrupts are encodes as a combination of board, level and slot.
26 * SBUS interrupts are encoded integers including the board number
27 * (plus one), the SBUS level, and the SBUS slot number. Sun4D
28 * IRQ dispatch is done by:
29 *
30 * 1) Reading the BW local interrupt table in order to get the bus
31 * interrupt mask.
32 *
33 * This table is indexed by SBUS interrupt level which can be
34 * derived from the PIL we got interrupted on.
35 *
36 * 2) For each bus showing interrupt pending from #1, read the
37 * SBI interrupt state register. This will indicate which slots
38 * have interrupts pending for that SBUS interrupt level.
39 */ 27 */
40 28
29struct sun4d_handler_data {
30 unsigned int cpuid; /* target cpu */
31 unsigned int real_irq; /* interrupt level */
32};
33
34
35static unsigned int sun4d_encode_irq(int board, int lvl, int slot)
36{
37 return (board + 1) << 5 | (lvl << 2) | slot;
38}
39
41struct sun4d_timer_regs { 40struct sun4d_timer_regs {
42 u32 l10_timer_limit; 41 u32 l10_timer_limit;
43 u32 l10_cur_countx; 42 u32 l10_cur_countx;
@@ -48,17 +47,12 @@ struct sun4d_timer_regs {
48 47
49static struct sun4d_timer_regs __iomem *sun4d_timers; 48static struct sun4d_timer_regs __iomem *sun4d_timers;
50 49
51#define TIMER_IRQ 10 50#define SUN4D_TIMER_IRQ 10
52
53#define MAX_STATIC_ALLOC 4
54static unsigned char sbus_tid[32];
55
56static struct irqaction *irq_action[NR_IRQS];
57 51
58static struct sbus_action { 52/* Specify which cpu handle interrupts from which board.
59 struct irqaction *action; 53 * Index is board - value is cpu.
60 /* For SMP this needs to be extended */ 54 */
61} *sbus_actions; 55static unsigned char board_to_cpu[32];
62 56
63static int pil_to_sbus[] = { 57static int pil_to_sbus[] = {
64 0, 58 0,
@@ -79,152 +73,81 @@ static int pil_to_sbus[] = {
79 0, 73 0,
80}; 74};
81 75
82static int sbus_to_pil[] = {
83 0,
84 2,
85 3,
86 5,
87 7,
88 9,
89 11,
90 13,
91};
92
93static int nsbi;
94
95/* Exported for sun4d_smp.c */ 76/* Exported for sun4d_smp.c */
96DEFINE_SPINLOCK(sun4d_imsk_lock); 77DEFINE_SPINLOCK(sun4d_imsk_lock);
97 78
98int show_sun4d_interrupts(struct seq_file *p, void *v) 79/* SBUS interrupts are encoded integers including the board number
80 * (plus one), the SBUS level, and the SBUS slot number. Sun4D
81 * IRQ dispatch is done by:
82 *
83 * 1) Reading the BW local interrupt table in order to get the bus
84 * interrupt mask.
85 *
86 * This table is indexed by SBUS interrupt level which can be
87 * derived from the PIL we got interrupted on.
88 *
89 * 2) For each bus showing interrupt pending from #1, read the
90 * SBI interrupt state register. This will indicate which slots
91 * have interrupts pending for that SBUS interrupt level.
92 *
93 * 3) Call the genreric IRQ support.
94 */
95static void sun4d_sbus_handler_irq(int sbusl)
99{ 96{
100 int i = *(loff_t *) v, j = 0, k = 0, sbusl; 97 unsigned int bus_mask;
101 struct irqaction *action; 98 unsigned int sbino, slot;
102 unsigned long flags; 99 unsigned int sbil;
103#ifdef CONFIG_SMP 100
104 int x; 101 bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
105#endif 102 bw_clear_intr_mask(sbusl, bus_mask);
106 103
107 spin_lock_irqsave(&irq_action_lock, flags); 104 sbil = (sbusl << 2);
108 if (i < NR_IRQS) { 105 /* Loop for each pending SBI */
109 sbusl = pil_to_sbus[i]; 106 for (sbino = 0; bus_mask; sbino++) {
110 if (!sbusl) { 107 unsigned int idx, mask;
111 action = *(i + irq_action); 108
112 if (!action) 109 bus_mask >>= 1;
113 goto out_unlock; 110 if (!(bus_mask & 1))
114 } else { 111 continue;
115 for (j = 0; j < nsbi; j++) { 112 /* XXX This seems to ACK the irq twice. acquire_sbi()
116 for (k = 0; k < 4; k++) 113 * XXX uses swap, therefore this writes 0xf << sbil,
117 action = sbus_actions[(j << 5) + (sbusl << 2) + k].action; 114 * XXX then later release_sbi() will write the individual
118 if (action) 115 * XXX bits which were set again.
119 goto found_it; 116 */
120 } 117 mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
121 goto out_unlock; 118 mask &= (0xf << sbil);
122 } 119
123found_it: seq_printf(p, "%3d: ", i); 120 /* Loop for each pending SBI slot */
124#ifndef CONFIG_SMP 121 idx = 0;
125 seq_printf(p, "%10u ", kstat_irqs(i)); 122 slot = (1 << sbil);
126#else 123 while (mask != 0) {
127 for_each_online_cpu(x) 124 unsigned int pil;
128 seq_printf(p, "%10u ", 125 struct irq_bucket *p;
129 kstat_cpu(cpu_logical_map(x)).irqs[i]); 126
130#endif 127 idx++;
131 seq_printf(p, "%c %s", 128 slot <<= 1;
132 (action->flags & IRQF_DISABLED) ? '+' : ' ', 129 if (!(mask & slot))
133 action->name); 130 continue;
134 action = action->next; 131
135 for (;;) { 132 mask &= ~slot;
136 for (; action; action = action->next) { 133 pil = sun4d_encode_irq(sbino, sbil, idx);
137 seq_printf(p, ",%s %s", 134
138 (action->flags & IRQF_DISABLED) ? " +" : "", 135 p = irq_map[pil];
139 action->name); 136 while (p) {
140 } 137 struct irq_bucket *next;
141 if (!sbusl) 138
142 break; 139 next = p->next;
143 k++; 140 generic_handle_irq(p->irq);
144 if (k < 4) { 141 p = next;
145 action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
146 } else {
147 j++;
148 if (j == nsbi)
149 break;
150 k = 0;
151 action = sbus_actions[(j << 5) + (sbusl << 2)].action;
152 } 142 }
143 release_sbi(SBI2DEVID(sbino), slot);
153 } 144 }
154 seq_putc(p, '\n');
155 } 145 }
156out_unlock:
157 spin_unlock_irqrestore(&irq_action_lock, flags);
158 return 0;
159}
160
161void sun4d_free_irq(unsigned int irq, void *dev_id)
162{
163 struct irqaction *action, **actionp;
164 struct irqaction *tmp = NULL;
165 unsigned long flags;
166
167 spin_lock_irqsave(&irq_action_lock, flags);
168 if (irq < 15)
169 actionp = irq + irq_action;
170 else
171 actionp = &(sbus_actions[irq - (1 << 5)].action);
172 action = *actionp;
173 if (!action) {
174 printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
175 goto out_unlock;
176 }
177 if (dev_id) {
178 for (; action; action = action->next) {
179 if (action->dev_id == dev_id)
180 break;
181 tmp = action;
182 }
183 if (!action) {
184 printk(KERN_ERR "Trying to free free shared IRQ%d\n",
185 irq);
186 goto out_unlock;
187 }
188 } else if (action->flags & IRQF_SHARED) {
189 printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
190 irq);
191 goto out_unlock;
192 }
193 if (action->flags & SA_STATIC_ALLOC) {
194 /*
195 * This interrupt is marked as specially allocated
196 * so it is a bad idea to free it.
197 */
198 printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
199 irq, action->name);
200 goto out_unlock;
201 }
202
203 if (tmp)
204 tmp->next = action->next;
205 else
206 *actionp = action->next;
207
208 spin_unlock_irqrestore(&irq_action_lock, flags);
209
210 synchronize_irq(irq);
211
212 spin_lock_irqsave(&irq_action_lock, flags);
213
214 kfree(action);
215
216 if (!(*actionp))
217 __disable_irq(irq);
218
219out_unlock:
220 spin_unlock_irqrestore(&irq_action_lock, flags);
221} 146}
222 147
223void sun4d_handler_irq(int pil, struct pt_regs *regs) 148void sun4d_handler_irq(int pil, struct pt_regs *regs)
224{ 149{
225 struct pt_regs *old_regs; 150 struct pt_regs *old_regs;
226 struct irqaction *action;
227 int cpu = smp_processor_id();
228 /* SBUS IRQ level (1 - 7) */ 151 /* SBUS IRQ level (1 - 7) */
229 int sbusl = pil_to_sbus[pil]; 152 int sbusl = pil_to_sbus[pil];
230 153
@@ -233,160 +156,96 @@ void sun4d_handler_irq(int pil, struct pt_regs *regs)
233 156
234 cc_set_iclr(1 << pil); 157 cc_set_iclr(1 << pil);
235 158
159#ifdef CONFIG_SMP
160 /*
161 * Check IPI data structures after IRQ has been cleared. Hard and Soft
162 * IRQ can happen at the same time, so both cases are always handled.
163 */
164 if (pil == SUN4D_IPI_IRQ)
165 sun4d_ipi_interrupt();
166#endif
167
236 old_regs = set_irq_regs(regs); 168 old_regs = set_irq_regs(regs);
237 irq_enter(); 169 irq_enter();
238 kstat_cpu(cpu).irqs[pil]++; 170 if (sbusl == 0) {
239 if (!sbusl) { 171 /* cpu interrupt */
240 action = *(pil + irq_action); 172 struct irq_bucket *p;
241 if (!action) 173
242 unexpected_irq(pil, NULL, regs); 174 p = irq_map[pil];
243 do { 175 while (p) {
244 action->handler(pil, action->dev_id); 176 struct irq_bucket *next;
245 action = action->next; 177
246 } while (action); 178 next = p->next;
179 generic_handle_irq(p->irq);
180 p = next;
181 }
247 } else { 182 } else {
248 int bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff; 183 /* SBUS interrupt */
249 int sbino; 184 sun4d_sbus_handler_irq(sbusl);
250 struct sbus_action *actionp;
251 unsigned mask, slot;
252 int sbil = (sbusl << 2);
253
254 bw_clear_intr_mask(sbusl, bus_mask);
255
256 /* Loop for each pending SBI */
257 for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1)
258 if (bus_mask & 1) {
259 mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
260 mask &= (0xf << sbil);
261 actionp = sbus_actions + (sbino << 5) + (sbil);
262 /* Loop for each pending SBI slot */
263 for (slot = (1 << sbil); mask; slot <<= 1, actionp++)
264 if (mask & slot) {
265 mask &= ~slot;
266 action = actionp->action;
267
268 if (!action)
269 unexpected_irq(pil, NULL, regs);
270 do {
271 action->handler(pil, action->dev_id);
272 action = action->next;
273 } while (action);
274 release_sbi(SBI2DEVID(sbino), slot);
275 }
276 }
277 } 185 }
278 irq_exit(); 186 irq_exit();
279 set_irq_regs(old_regs); 187 set_irq_regs(old_regs);
280} 188}
281 189
282int sun4d_request_irq(unsigned int irq, 190
283 irq_handler_t handler, 191static void sun4d_mask_irq(struct irq_data *data)
284 unsigned long irqflags, const char *devname, void *dev_id)
285{ 192{
286 struct irqaction *action, *tmp = NULL, **actionp; 193 struct sun4d_handler_data *handler_data = data->handler_data;
194 unsigned int real_irq;
195#ifdef CONFIG_SMP
196 int cpuid = handler_data->cpuid;
287 unsigned long flags; 197 unsigned long flags;
288 int ret; 198#endif
289 199 real_irq = handler_data->real_irq;
290 if (irq > 14 && irq < (1 << 5)) { 200#ifdef CONFIG_SMP
291 ret = -EINVAL; 201 spin_lock_irqsave(&sun4d_imsk_lock, flags);
292 goto out; 202 cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq));
293 } 203 spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
294 204#else
295 if (!handler) { 205 cc_set_imsk(cc_get_imsk() | (1 << real_irq));
296 ret = -EINVAL; 206#endif
297 goto out;
298 }
299
300 spin_lock_irqsave(&irq_action_lock, flags);
301
302 if (irq >= (1 << 5))
303 actionp = &(sbus_actions[irq - (1 << 5)].action);
304 else
305 actionp = irq + irq_action;
306 action = *actionp;
307
308 if (action) {
309 if ((action->flags & IRQF_SHARED) && (irqflags & IRQF_SHARED)) {
310 for (tmp = action; tmp->next; tmp = tmp->next)
311 /* find last entry - tmp used below */;
312 } else {
313 ret = -EBUSY;
314 goto out_unlock;
315 }
316 if ((action->flags & IRQF_DISABLED) ^ (irqflags & IRQF_DISABLED)) {
317 printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
318 irq);
319 ret = -EBUSY;
320 goto out_unlock;
321 }
322 action = NULL; /* Or else! */
323 }
324
325 /* If this is flagged as statically allocated then we use our
326 * private struct which is never freed.
327 */
328 if (irqflags & SA_STATIC_ALLOC) {
329 if (static_irq_count < MAX_STATIC_ALLOC)
330 action = &static_irqaction[static_irq_count++];
331 else
332 printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
333 irq, devname);
334 }
335
336 if (action == NULL)
337 action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
338
339 if (!action) {
340 ret = -ENOMEM;
341 goto out_unlock;
342 }
343
344 action->handler = handler;
345 action->flags = irqflags;
346 action->name = devname;
347 action->next = NULL;
348 action->dev_id = dev_id;
349
350 if (tmp)
351 tmp->next = action;
352 else
353 *actionp = action;
354
355 __enable_irq(irq);
356
357 ret = 0;
358out_unlock:
359 spin_unlock_irqrestore(&irq_action_lock, flags);
360out:
361 return ret;
362} 207}
363 208
364static void sun4d_disable_irq(unsigned int irq) 209static void sun4d_unmask_irq(struct irq_data *data)
365{ 210{
366 int tid = sbus_tid[(irq >> 5) - 1]; 211 struct sun4d_handler_data *handler_data = data->handler_data;
212 unsigned int real_irq;
213#ifdef CONFIG_SMP
214 int cpuid = handler_data->cpuid;
367 unsigned long flags; 215 unsigned long flags;
216#endif
217 real_irq = handler_data->real_irq;
368 218
369 if (irq < NR_IRQS) 219#ifdef CONFIG_SMP
370 return;
371
372 spin_lock_irqsave(&sun4d_imsk_lock, flags); 220 spin_lock_irqsave(&sun4d_imsk_lock, flags);
373 cc_set_imsk_other(tid, cc_get_imsk_other(tid) | (1 << sbus_to_pil[(irq >> 2) & 7])); 221 cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | ~(1 << real_irq));
374 spin_unlock_irqrestore(&sun4d_imsk_lock, flags); 222 spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
223#else
224 cc_set_imsk(cc_get_imsk() | ~(1 << real_irq));
225#endif
375} 226}
376 227
377static void sun4d_enable_irq(unsigned int irq) 228static unsigned int sun4d_startup_irq(struct irq_data *data)
378{ 229{
379 int tid = sbus_tid[(irq >> 5) - 1]; 230 irq_link(data->irq);
380 unsigned long flags; 231 sun4d_unmask_irq(data);
381 232 return 0;
382 if (irq < NR_IRQS) 233}
383 return;
384 234
385 spin_lock_irqsave(&sun4d_imsk_lock, flags); 235static void sun4d_shutdown_irq(struct irq_data *data)
386 cc_set_imsk_other(tid, cc_get_imsk_other(tid) & ~(1 << sbus_to_pil[(irq >> 2) & 7])); 236{
387 spin_unlock_irqrestore(&sun4d_imsk_lock, flags); 237 sun4d_mask_irq(data);
238 irq_unlink(data->irq);
388} 239}
389 240
241struct irq_chip sun4d_irq = {
242 .name = "sun4d",
243 .irq_startup = sun4d_startup_irq,
244 .irq_shutdown = sun4d_shutdown_irq,
245 .irq_unmask = sun4d_unmask_irq,
246 .irq_mask = sun4d_mask_irq,
247};
248
390#ifdef CONFIG_SMP 249#ifdef CONFIG_SMP
391static void sun4d_set_cpu_int(int cpu, int level) 250static void sun4d_set_cpu_int(int cpu, int level)
392{ 251{
@@ -413,7 +272,7 @@ void __init sun4d_distribute_irqs(void)
413 for_each_node_by_name(dp, "sbi") { 272 for_each_node_by_name(dp, "sbi") {
414 int devid = of_getintprop_default(dp, "device-id", 0); 273 int devid = of_getintprop_default(dp, "device-id", 0);
415 int board = of_getintprop_default(dp, "board#", 0); 274 int board = of_getintprop_default(dp, "board#", 0);
416 sbus_tid[board] = cpuid; 275 board_to_cpu[board] = cpuid;
417 set_sbi_tid(devid, cpuid << 3); 276 set_sbi_tid(devid, cpuid << 3);
418 } 277 }
419 printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid); 278 printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid);
@@ -443,15 +302,16 @@ static void __init sun4d_load_profile_irqs(void)
443unsigned int sun4d_build_device_irq(struct platform_device *op, 302unsigned int sun4d_build_device_irq(struct platform_device *op,
444 unsigned int real_irq) 303 unsigned int real_irq)
445{ 304{
446 static int pil_to_sbus[] = {
447 0, 0, 1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 0,
448 };
449 struct device_node *dp = op->dev.of_node; 305 struct device_node *dp = op->dev.of_node;
450 struct device_node *io_unit, *sbi = dp->parent; 306 struct device_node *io_unit, *sbi = dp->parent;
451 const struct linux_prom_registers *regs; 307 const struct linux_prom_registers *regs;
308 struct sun4d_handler_data *handler_data;
309 unsigned int pil;
310 unsigned int irq;
452 int board, slot; 311 int board, slot;
453 int sbusl; 312 int sbusl;
454 313
314 irq = 0;
455 while (sbi) { 315 while (sbi) {
456 if (!strcmp(sbi->name, "sbi")) 316 if (!strcmp(sbi->name, "sbi"))
457 break; 317 break;
@@ -484,7 +344,28 @@ unsigned int sun4d_build_device_irq(struct platform_device *op,
484 344
485 sbusl = pil_to_sbus[real_irq]; 345 sbusl = pil_to_sbus[real_irq];
486 if (sbusl) 346 if (sbusl)
487 return (((board + 1) << 5) + (sbusl << 2) + slot); 347 pil = sun4d_encode_irq(board, sbusl, slot);
348 else
349 pil = real_irq;
350
351 irq = irq_alloc(real_irq, pil);
352 if (irq == 0)
353 goto err_out;
354
355 handler_data = irq_get_handler_data(irq);
356 if (unlikely(handler_data))
357 goto err_out;
358
359 handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC);
360 if (unlikely(!handler_data)) {
361 prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n");
362 prom_halt();
363 }
364 handler_data->cpuid = board_to_cpu[board];
365 handler_data->real_irq = real_irq;
366 irq_set_chip_and_handler_name(irq, &sun4d_irq,
367 handle_level_irq, "level");
368 irq_set_handler_data(irq, handler_data);
488 369
489err_out: 370err_out:
490 return real_irq; 371 return real_irq;
@@ -518,6 +399,7 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
518{ 399{
519 struct device_node *dp; 400 struct device_node *dp;
520 struct resource res; 401 struct resource res;
402 unsigned int irq;
521 const u32 *reg; 403 const u32 *reg;
522 int err; 404 int err;
523 405
@@ -552,9 +434,8 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
552 434
553 master_l10_counter = &sun4d_timers->l10_cur_count; 435 master_l10_counter = &sun4d_timers->l10_cur_count;
554 436
555 err = request_irq(TIMER_IRQ, counter_fn, 437 irq = sun4d_build_device_irq(NULL, SUN4D_TIMER_IRQ);
556 (IRQF_DISABLED | SA_STATIC_ALLOC), 438 err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
557 "timer", NULL);
558 if (err) { 439 if (err) {
559 prom_printf("sun4d_init_timers: request_irq() failed with %d\n", 440 prom_printf("sun4d_init_timers: request_irq() failed with %d\n",
560 err); 441 err);
@@ -567,27 +448,16 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
567void __init sun4d_init_sbi_irq(void) 448void __init sun4d_init_sbi_irq(void)
568{ 449{
569 struct device_node *dp; 450 struct device_node *dp;
570 int target_cpu = 0; 451 int target_cpu;
571 452
572#ifdef CONFIG_SMP
573 target_cpu = boot_cpu_id; 453 target_cpu = boot_cpu_id;
574#endif
575
576 nsbi = 0;
577 for_each_node_by_name(dp, "sbi")
578 nsbi++;
579 sbus_actions = kzalloc(nsbi * 8 * 4 * sizeof(struct sbus_action), GFP_ATOMIC);
580 if (!sbus_actions) {
581 prom_printf("SUN4D: Cannot allocate sbus_actions, halting.\n");
582 prom_halt();
583 }
584 for_each_node_by_name(dp, "sbi") { 454 for_each_node_by_name(dp, "sbi") {
585 int devid = of_getintprop_default(dp, "device-id", 0); 455 int devid = of_getintprop_default(dp, "device-id", 0);
586 int board = of_getintprop_default(dp, "board#", 0); 456 int board = of_getintprop_default(dp, "board#", 0);
587 unsigned int mask; 457 unsigned int mask;
588 458
589 set_sbi_tid(devid, target_cpu << 3); 459 set_sbi_tid(devid, target_cpu << 3);
590 sbus_tid[board] = target_cpu; 460 board_to_cpu[board] = target_cpu;
591 461
592 /* Get rid of pending irqs from PROM */ 462 /* Get rid of pending irqs from PROM */
593 mask = acquire_sbi(devid, 0xffffffff); 463 mask = acquire_sbi(devid, 0xffffffff);
@@ -603,12 +473,10 @@ void __init sun4d_init_IRQ(void)
603{ 473{
604 local_irq_disable(); 474 local_irq_disable();
605 475
606 BTFIXUPSET_CALL(enable_irq, sun4d_enable_irq, BTFIXUPCALL_NORM);
607 BTFIXUPSET_CALL(disable_irq, sun4d_disable_irq, BTFIXUPCALL_NORM);
608 BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM); 476 BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM);
609 BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM); 477 BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM);
610 478
611 sparc_irq_config.init_timers = sun4d_init_timers; 479 sparc_irq_config.init_timers = sun4d_init_timers;
612 sparc_irq_config.build_device_irq = sun4d_build_device_irq; 480 sparc_irq_config.build_device_irq = sun4d_build_device_irq;
613 481
614#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 475d50b96cd0..133387980b56 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -32,6 +32,7 @@ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned lon
32 return val; 32 return val;
33} 33}
34 34
35static void smp4d_ipi_init(void);
35static void smp_setup_percpu_timer(void); 36static void smp_setup_percpu_timer(void);
36 37
37static unsigned char cpu_leds[32]; 38static unsigned char cpu_leds[32];
@@ -80,8 +81,6 @@ void __cpuinit smp4d_callin(void)
80 local_flush_cache_all(); 81 local_flush_cache_all();
81 local_flush_tlb_all(); 82 local_flush_tlb_all();
82 83
83 cpu_probe();
84
85 while ((unsigned long)current_set[cpuid] < PAGE_OFFSET) 84 while ((unsigned long)current_set[cpuid] < PAGE_OFFSET)
86 barrier(); 85 barrier();
87 86
@@ -105,7 +104,7 @@ void __cpuinit smp4d_callin(void)
105 104
106 local_irq_enable(); /* We don't allow PIL 14 yet */ 105 local_irq_enable(); /* We don't allow PIL 14 yet */
107 106
108 while (!cpu_isset(cpuid, smp_commenced_mask)) 107 while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
109 barrier(); 108 barrier();
110 109
111 spin_lock_irqsave(&sun4d_imsk_lock, flags); 110 spin_lock_irqsave(&sun4d_imsk_lock, flags);
@@ -120,6 +119,7 @@ void __cpuinit smp4d_callin(void)
120 */ 119 */
121void __init smp4d_boot_cpus(void) 120void __init smp4d_boot_cpus(void)
122{ 121{
122 smp4d_ipi_init();
123 if (boot_cpu_id) 123 if (boot_cpu_id)
124 current_set[0] = NULL; 124 current_set[0] = NULL;
125 smp_setup_percpu_timer(); 125 smp_setup_percpu_timer();
@@ -191,6 +191,80 @@ void __init smp4d_smp_done(void)
191 sun4d_distribute_irqs(); 191 sun4d_distribute_irqs();
192} 192}
193 193
194/* Memory structure giving interrupt handler information about IPI generated */
195struct sun4d_ipi_work {
196 int single;
197 int msk;
198 int resched;
199};
200
201static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work);
202
203/* Initialize IPIs on the SUN4D SMP machine */
204static void __init smp4d_ipi_init(void)
205{
206 int cpu;
207 struct sun4d_ipi_work *work;
208
209 printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ);
210
211 for_each_possible_cpu(cpu) {
212 work = &per_cpu(sun4d_ipi_work, cpu);
213 work->single = work->msk = work->resched = 0;
214 }
215}
216
217void sun4d_ipi_interrupt(void)
218{
219 struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work);
220
221 if (work->single) {
222 work->single = 0;
223 smp_call_function_single_interrupt();
224 }
225 if (work->msk) {
226 work->msk = 0;
227 smp_call_function_interrupt();
228 }
229 if (work->resched) {
230 work->resched = 0;
231 smp_resched_interrupt();
232 }
233}
234
235static void smp4d_ipi_single(int cpu)
236{
237 struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
238
239 /* Mark work */
240 work->single = 1;
241
242 /* Generate IRQ on the CPU */
243 sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
244}
245
246static void smp4d_ipi_mask_one(int cpu)
247{
248 struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
249
250 /* Mark work */
251 work->msk = 1;
252
253 /* Generate IRQ on the CPU */
254 sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
255}
256
257static void smp4d_ipi_resched(int cpu)
258{
259 struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
260
261 /* Mark work */
262 work->resched = 1;
263
264 /* Generate IRQ on the CPU (any IRQ will cause resched) */
265 sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
266}
267
194static struct smp_funcall { 268static struct smp_funcall {
195 smpfunc_t func; 269 smpfunc_t func;
196 unsigned long arg1; 270 unsigned long arg1;
@@ -239,10 +313,10 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
239 { 313 {
240 register int i; 314 register int i;
241 315
242 cpu_clear(smp_processor_id(), mask); 316 cpumask_clear_cpu(smp_processor_id(), &mask);
243 cpus_and(mask, cpu_online_map, mask); 317 cpumask_and(&mask, cpu_online_mask, &mask);
244 for (i = 0; i <= high; i++) { 318 for (i = 0; i <= high; i++) {
245 if (cpu_isset(i, mask)) { 319 if (cpumask_test_cpu(i, &mask)) {
246 ccall_info.processors_in[i] = 0; 320 ccall_info.processors_in[i] = 0;
247 ccall_info.processors_out[i] = 0; 321 ccall_info.processors_out[i] = 0;
248 sun4d_send_ipi(i, IRQ_CROSS_CALL); 322 sun4d_send_ipi(i, IRQ_CROSS_CALL);
@@ -255,7 +329,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
255 329
256 i = 0; 330 i = 0;
257 do { 331 do {
258 if (!cpu_isset(i, mask)) 332 if (!cpumask_test_cpu(i, &mask))
259 continue; 333 continue;
260 while (!ccall_info.processors_in[i]) 334 while (!ccall_info.processors_in[i])
261 barrier(); 335 barrier();
@@ -263,7 +337,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
263 337
264 i = 0; 338 i = 0;
265 do { 339 do {
266 if (!cpu_isset(i, mask)) 340 if (!cpumask_test_cpu(i, &mask))
267 continue; 341 continue;
268 while (!ccall_info.processors_out[i]) 342 while (!ccall_info.processors_out[i])
269 barrier(); 343 barrier();
@@ -356,6 +430,9 @@ void __init sun4d_init_smp(void)
356 BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current); 430 BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current);
357 BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM); 431 BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM);
358 BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM); 432 BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM);
433 BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM);
434 BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM);
435 BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM);
359 436
360 for (i = 0; i < NR_CPUS; i++) { 437 for (i = 0; i < NR_CPUS; i++) {
361 ccall_info.processors_in[i] = 1; 438 ccall_info.processors_in[i] = 1;
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
index 69df6257a32e..422c16dad1f6 100644
--- a/arch/sparc/kernel/sun4m_irq.c
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -100,6 +100,11 @@
100struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS]; 100struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS];
101struct sun4m_irq_global __iomem *sun4m_irq_global; 101struct sun4m_irq_global __iomem *sun4m_irq_global;
102 102
103struct sun4m_handler_data {
104 bool percpu;
105 long mask;
106};
107
103/* Dave Redman (djhr@tadpole.co.uk) 108/* Dave Redman (djhr@tadpole.co.uk)
104 * The sun4m interrupt registers. 109 * The sun4m interrupt registers.
105 */ 110 */
@@ -142,9 +147,9 @@ struct sun4m_irq_global __iomem *sun4m_irq_global;
142#define OBP_INT_LEVEL_VME 0x40 147#define OBP_INT_LEVEL_VME 0x40
143 148
144#define SUN4M_TIMER_IRQ (OBP_INT_LEVEL_ONBOARD | 10) 149#define SUN4M_TIMER_IRQ (OBP_INT_LEVEL_ONBOARD | 10)
145#define SUM4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14) 150#define SUN4M_PROFILE_IRQ (OBP_INT_LEVEL_ONBOARD | 14)
146 151
147static unsigned long irq_mask[0x50] = { 152static unsigned long sun4m_imask[0x50] = {
148 /* 0x00 - SMP */ 153 /* 0x00 - SMP */
149 0, SUN4M_SOFT_INT(1), 154 0, SUN4M_SOFT_INT(1),
150 SUN4M_SOFT_INT(2), SUN4M_SOFT_INT(3), 155 SUN4M_SOFT_INT(2), SUN4M_SOFT_INT(3),
@@ -169,7 +174,7 @@ static unsigned long irq_mask[0x50] = {
169 SUN4M_INT_VIDEO, SUN4M_INT_MODULE, 174 SUN4M_INT_VIDEO, SUN4M_INT_MODULE,
170 SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY, 175 SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY,
171 (SUN4M_INT_SERIAL | SUN4M_INT_KBDMS), 176 (SUN4M_INT_SERIAL | SUN4M_INT_KBDMS),
172 SUN4M_INT_AUDIO, 0, SUN4M_INT_MODULE_ERR, 177 SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR,
173 /* 0x30 - sbus */ 178 /* 0x30 - sbus */
174 0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1), 179 0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1),
175 0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3), 180 0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3),
@@ -182,105 +187,110 @@ static unsigned long irq_mask[0x50] = {
182 0, SUN4M_INT_VME(6), 0, 0 187 0, SUN4M_INT_VME(6), 0, 0
183}; 188};
184 189
185static unsigned long sun4m_get_irqmask(unsigned int irq) 190static void sun4m_mask_irq(struct irq_data *data)
186{ 191{
187 unsigned long mask; 192 struct sun4m_handler_data *handler_data = data->handler_data;
188 193 int cpu = smp_processor_id();
189 if (irq < 0x50)
190 mask = irq_mask[irq];
191 else
192 mask = 0;
193 194
194 if (!mask) 195 if (handler_data->mask) {
195 printk(KERN_ERR "sun4m_get_irqmask: IRQ%d has no valid mask!\n", 196 unsigned long flags;
196 irq);
197 197
198 return mask; 198 local_irq_save(flags);
199 if (handler_data->percpu) {
200 sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set);
201 } else {
202 sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set);
203 }
204 local_irq_restore(flags);
205 }
199} 206}
200 207
201static void sun4m_disable_irq(unsigned int irq_nr) 208static void sun4m_unmask_irq(struct irq_data *data)
202{ 209{
203 unsigned long mask, flags; 210 struct sun4m_handler_data *handler_data = data->handler_data;
204 int cpu = smp_processor_id(); 211 int cpu = smp_processor_id();
205 212
206 mask = sun4m_get_irqmask(irq_nr); 213 if (handler_data->mask) {
207 local_irq_save(flags); 214 unsigned long flags;
208 if (irq_nr > 15)
209 sbus_writel(mask, &sun4m_irq_global->mask_set);
210 else
211 sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
212 local_irq_restore(flags);
213}
214
215static void sun4m_enable_irq(unsigned int irq_nr)
216{
217 unsigned long mask, flags;
218 int cpu = smp_processor_id();
219 215
220 /* Dreadful floppy hack. When we use 0x2b instead of
221 * 0x0b the system blows (it starts to whistle!).
222 * So we continue to use 0x0b. Fixme ASAP. --P3
223 */
224 if (irq_nr != 0x0b) {
225 mask = sun4m_get_irqmask(irq_nr);
226 local_irq_save(flags);
227 if (irq_nr > 15)
228 sbus_writel(mask, &sun4m_irq_global->mask_clear);
229 else
230 sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
231 local_irq_restore(flags);
232 } else {
233 local_irq_save(flags); 216 local_irq_save(flags);
234 sbus_writel(SUN4M_INT_FLOPPY, &sun4m_irq_global->mask_clear); 217 if (handler_data->percpu) {
218 sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear);
219 } else {
220 sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear);
221 }
235 local_irq_restore(flags); 222 local_irq_restore(flags);
236 } 223 }
237} 224}
238 225
239static unsigned long cpu_pil_to_imask[16] = { 226static unsigned int sun4m_startup_irq(struct irq_data *data)
240/*0*/ 0x00000000, 227{
241/*1*/ 0x00000000, 228 irq_link(data->irq);
242/*2*/ SUN4M_INT_SBUS(0) | SUN4M_INT_VME(0), 229 sun4m_unmask_irq(data);
243/*3*/ SUN4M_INT_SBUS(1) | SUN4M_INT_VME(1), 230 return 0;
244/*4*/ SUN4M_INT_SCSI, 231}
245/*5*/ SUN4M_INT_SBUS(2) | SUN4M_INT_VME(2),
246/*6*/ SUN4M_INT_ETHERNET,
247/*7*/ SUN4M_INT_SBUS(3) | SUN4M_INT_VME(3),
248/*8*/ SUN4M_INT_VIDEO,
249/*9*/ SUN4M_INT_SBUS(4) | SUN4M_INT_VME(4) | SUN4M_INT_MODULE_ERR,
250/*10*/ SUN4M_INT_REALTIME,
251/*11*/ SUN4M_INT_SBUS(5) | SUN4M_INT_VME(5) | SUN4M_INT_FLOPPY,
252/*12*/ SUN4M_INT_SERIAL | SUN4M_INT_KBDMS,
253/*13*/ SUN4M_INT_SBUS(6) | SUN4M_INT_VME(6) | SUN4M_INT_AUDIO,
254/*14*/ SUN4M_INT_E14,
255/*15*/ SUN4M_INT_ERROR,
256};
257 232
258/* We assume the caller has disabled local interrupts when these are called, 233static void sun4m_shutdown_irq(struct irq_data *data)
259 * or else very bizarre behavior will result.
260 */
261static void sun4m_disable_pil_irq(unsigned int pil)
262{ 234{
263 sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_set); 235 sun4m_mask_irq(data);
236 irq_unlink(data->irq);
264} 237}
265 238
266static void sun4m_enable_pil_irq(unsigned int pil) 239static struct irq_chip sun4m_irq = {
240 .name = "sun4m",
241 .irq_startup = sun4m_startup_irq,
242 .irq_shutdown = sun4m_shutdown_irq,
243 .irq_mask = sun4m_mask_irq,
244 .irq_unmask = sun4m_unmask_irq,
245};
246
247
248static unsigned int sun4m_build_device_irq(struct platform_device *op,
249 unsigned int real_irq)
267{ 250{
268 sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_clear); 251 struct sun4m_handler_data *handler_data;
252 unsigned int irq;
253 unsigned int pil;
254
255 if (real_irq >= OBP_INT_LEVEL_VME) {
256 prom_printf("Bogus sun4m IRQ %u\n", real_irq);
257 prom_halt();
258 }
259 pil = (real_irq & 0xf);
260 irq = irq_alloc(real_irq, pil);
261
262 if (irq == 0)
263 goto out;
264
265 handler_data = irq_get_handler_data(irq);
266 if (unlikely(handler_data))
267 goto out;
268
269 handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC);
270 if (unlikely(!handler_data)) {
271 prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n");
272 prom_halt();
273 }
274
275 handler_data->mask = sun4m_imask[real_irq];
276 handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD;
277 irq_set_chip_and_handler_name(irq, &sun4m_irq,
278 handle_level_irq, "level");
279 irq_set_handler_data(irq, handler_data);
280
281out:
282 return irq;
269} 283}
270 284
271#ifdef CONFIG_SMP 285#ifdef CONFIG_SMP
272static void sun4m_send_ipi(int cpu, int level) 286static void sun4m_send_ipi(int cpu, int level)
273{ 287{
274 unsigned long mask = sun4m_get_irqmask(level); 288 sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set);
275
276 sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
277} 289}
278 290
279static void sun4m_clear_ipi(int cpu, int level) 291static void sun4m_clear_ipi(int cpu, int level)
280{ 292{
281 unsigned long mask = sun4m_get_irqmask(level); 293 sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->clear);
282
283 sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
284} 294}
285 295
286static void sun4m_set_udt(int cpu) 296static void sun4m_set_udt(int cpu)
@@ -343,7 +353,15 @@ void sun4m_nmi(struct pt_regs *regs)
343 prom_halt(); 353 prom_halt();
344} 354}
345 355
346/* Exported for sun4m_smp.c */ 356void sun4m_unmask_profile_irq(void)
357{
358 unsigned long flags;
359
360 local_irq_save(flags);
361 sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear);
362 local_irq_restore(flags);
363}
364
347void sun4m_clear_profile_irq(int cpu) 365void sun4m_clear_profile_irq(int cpu)
348{ 366{
349 sbus_readl(&timers_percpu[cpu]->l14_limit); 367 sbus_readl(&timers_percpu[cpu]->l14_limit);
@@ -358,6 +376,7 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
358{ 376{
359 struct device_node *dp = of_find_node_by_name(NULL, "counter"); 377 struct device_node *dp = of_find_node_by_name(NULL, "counter");
360 int i, err, len, num_cpu_timers; 378 int i, err, len, num_cpu_timers;
379 unsigned int irq;
361 const u32 *addr; 380 const u32 *addr;
362 381
363 if (!dp) { 382 if (!dp) {
@@ -384,8 +403,9 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
384 403
385 master_l10_counter = &timers_global->l10_count; 404 master_l10_counter = &timers_global->l10_count;
386 405
387 err = request_irq(SUN4M_TIMER_IRQ, counter_fn, 406 irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ);
388 (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL); 407
408 err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
389 if (err) { 409 if (err) {
390 printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n", 410 printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n",
391 err); 411 err);
@@ -452,14 +472,11 @@ void __init sun4m_init_IRQ(void)
452 if (num_cpu_iregs == 4) 472 if (num_cpu_iregs == 4)
453 sbus_writel(0, &sun4m_irq_global->interrupt_target); 473 sbus_writel(0, &sun4m_irq_global->interrupt_target);
454 474
455 BTFIXUPSET_CALL(enable_irq, sun4m_enable_irq, BTFIXUPCALL_NORM);
456 BTFIXUPSET_CALL(disable_irq, sun4m_disable_irq, BTFIXUPCALL_NORM);
457 BTFIXUPSET_CALL(enable_pil_irq, sun4m_enable_pil_irq, BTFIXUPCALL_NORM);
458 BTFIXUPSET_CALL(disable_pil_irq, sun4m_disable_pil_irq, BTFIXUPCALL_NORM);
459 BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM); 475 BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM);
460 BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM); 476 BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM);
461 477
462 sparc_irq_config.init_timers = sun4m_init_timers; 478 sparc_irq_config.init_timers = sun4m_init_timers;
479 sparc_irq_config.build_device_irq = sun4m_build_device_irq;
463 480
464#ifdef CONFIG_SMP 481#ifdef CONFIG_SMP
465 BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM); 482 BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM);
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 5cc7dc51de3d..594768686525 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -15,6 +15,9 @@
15#include "irq.h" 15#include "irq.h"
16#include "kernel.h" 16#include "kernel.h"
17 17
18#define IRQ_IPI_SINGLE 12
19#define IRQ_IPI_MASK 13
20#define IRQ_IPI_RESCHED 14
18#define IRQ_CROSS_CALL 15 21#define IRQ_CROSS_CALL 15
19 22
20static inline unsigned long 23static inline unsigned long
@@ -26,6 +29,7 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val)
26 return val; 29 return val;
27} 30}
28 31
32static void smp4m_ipi_init(void);
29static void smp_setup_percpu_timer(void); 33static void smp_setup_percpu_timer(void);
30 34
31void __cpuinit smp4m_callin(void) 35void __cpuinit smp4m_callin(void)
@@ -59,8 +63,6 @@ void __cpuinit smp4m_callin(void)
59 local_flush_cache_all(); 63 local_flush_cache_all();
60 local_flush_tlb_all(); 64 local_flush_tlb_all();
61 65
62 cpu_probe();
63
64 /* Fix idle thread fields. */ 66 /* Fix idle thread fields. */
65 __asm__ __volatile__("ld [%0], %%g6\n\t" 67 __asm__ __volatile__("ld [%0], %%g6\n\t"
66 : : "r" (&current_set[cpuid]) 68 : : "r" (&current_set[cpuid])
@@ -70,7 +72,7 @@ void __cpuinit smp4m_callin(void)
70 atomic_inc(&init_mm.mm_count); 72 atomic_inc(&init_mm.mm_count);
71 current->active_mm = &init_mm; 73 current->active_mm = &init_mm;
72 74
73 while (!cpu_isset(cpuid, smp_commenced_mask)) 75 while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
74 mb(); 76 mb();
75 77
76 local_irq_enable(); 78 local_irq_enable();
@@ -83,6 +85,7 @@ void __cpuinit smp4m_callin(void)
83 */ 85 */
84void __init smp4m_boot_cpus(void) 86void __init smp4m_boot_cpus(void)
85{ 87{
88 smp4m_ipi_init();
86 smp_setup_percpu_timer(); 89 smp_setup_percpu_timer();
87 local_flush_cache_all(); 90 local_flush_cache_all();
88} 91}
@@ -150,18 +153,25 @@ void __init smp4m_smp_done(void)
150 /* Ok, they are spinning and ready to go. */ 153 /* Ok, they are spinning and ready to go. */
151} 154}
152 155
153/* At each hardware IRQ, we get this called to forward IRQ reception 156
154 * to the next processor. The caller must disable the IRQ level being 157/* Initialize IPIs on the SUN4M SMP machine */
155 * serviced globally so that there are no double interrupts received. 158static void __init smp4m_ipi_init(void)
156 * 159{
157 * XXX See sparc64 irq.c. 160}
158 */ 161
159void smp4m_irq_rotate(int cpu) 162static void smp4m_ipi_resched(int cpu)
163{
164 set_cpu_int(cpu, IRQ_IPI_RESCHED);
165}
166
167static void smp4m_ipi_single(int cpu)
160{ 168{
161 int next = cpu_data(cpu).next; 169 set_cpu_int(cpu, IRQ_IPI_SINGLE);
170}
162 171
163 if (next != cpu) 172static void smp4m_ipi_mask_one(int cpu)
164 set_irq_udt(next); 173{
174 set_cpu_int(cpu, IRQ_IPI_MASK);
165} 175}
166 176
167static struct smp_funcall { 177static struct smp_funcall {
@@ -199,10 +209,10 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
199 { 209 {
200 register int i; 210 register int i;
201 211
202 cpu_clear(smp_processor_id(), mask); 212 cpumask_clear_cpu(smp_processor_id(), &mask);
203 cpus_and(mask, cpu_online_map, mask); 213 cpumask_and(&mask, cpu_online_mask, &mask);
204 for (i = 0; i < ncpus; i++) { 214 for (i = 0; i < ncpus; i++) {
205 if (cpu_isset(i, mask)) { 215 if (cpumask_test_cpu(i, &mask)) {
206 ccall_info.processors_in[i] = 0; 216 ccall_info.processors_in[i] = 0;
207 ccall_info.processors_out[i] = 0; 217 ccall_info.processors_out[i] = 0;
208 set_cpu_int(i, IRQ_CROSS_CALL); 218 set_cpu_int(i, IRQ_CROSS_CALL);
@@ -218,7 +228,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
218 228
219 i = 0; 229 i = 0;
220 do { 230 do {
221 if (!cpu_isset(i, mask)) 231 if (!cpumask_test_cpu(i, &mask))
222 continue; 232 continue;
223 while (!ccall_info.processors_in[i]) 233 while (!ccall_info.processors_in[i])
224 barrier(); 234 barrier();
@@ -226,7 +236,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
226 236
227 i = 0; 237 i = 0;
228 do { 238 do {
229 if (!cpu_isset(i, mask)) 239 if (!cpumask_test_cpu(i, &mask))
230 continue; 240 continue;
231 while (!ccall_info.processors_out[i]) 241 while (!ccall_info.processors_out[i])
232 barrier(); 242 barrier();
@@ -277,7 +287,7 @@ static void __cpuinit smp_setup_percpu_timer(void)
277 load_profile_irq(cpu, lvl14_resolution); 287 load_profile_irq(cpu, lvl14_resolution);
278 288
279 if (cpu == boot_cpu_id) 289 if (cpu == boot_cpu_id)
280 enable_pil_irq(14); 290 sun4m_unmask_profile_irq();
281} 291}
282 292
283static void __init smp4m_blackbox_id(unsigned *addr) 293static void __init smp4m_blackbox_id(unsigned *addr)
@@ -306,4 +316,7 @@ void __init sun4m_init_smp(void)
306 BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current); 316 BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current);
307 BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM); 317 BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM);
308 BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM); 318 BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM);
319 BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM);
320 BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM);
321 BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM);
309} 322}
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 1eb8b00aed75..7408201d7efb 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -103,9 +103,10 @@ static unsigned long run_on_cpu(unsigned long cpu,
103 unsigned long (*func)(unsigned long), 103 unsigned long (*func)(unsigned long),
104 unsigned long arg) 104 unsigned long arg)
105{ 105{
106 cpumask_t old_affinity = current->cpus_allowed; 106 cpumask_t old_affinity;
107 unsigned long ret; 107 unsigned long ret;
108 108
109 cpumask_copy(&old_affinity, tsk_cpus_allowed(current));
109 /* should return -EINVAL to userspace */ 110 /* should return -EINVAL to userspace */
110 if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) 111 if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
111 return 0; 112 return 0;
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 96046a4024c2..1060e0672a4b 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -228,14 +228,10 @@ static void __init sbus_time_init(void)
228 228
229void __init time_init(void) 229void __init time_init(void)
230{ 230{
231#ifdef CONFIG_PCI 231 if (pcic_present())
232 extern void pci_time_init(void);
233 if (pcic_present()) {
234 pci_time_init(); 232 pci_time_init();
235 return; 233 else
236 } 234 sbus_time_init();
237#endif
238 sbus_time_init();
239} 235}
240 236
241 237
diff --git a/arch/sparc/kernel/us2e_cpufreq.c b/arch/sparc/kernel/us2e_cpufreq.c
index 8f982b76c712..531d54fc9829 100644
--- a/arch/sparc/kernel/us2e_cpufreq.c
+++ b/arch/sparc/kernel/us2e_cpufreq.c
@@ -237,7 +237,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
237 if (!cpu_online(cpu)) 237 if (!cpu_online(cpu))
238 return 0; 238 return 0;
239 239
240 cpus_allowed = current->cpus_allowed; 240 cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
241 set_cpus_allowed_ptr(current, cpumask_of(cpu)); 241 set_cpus_allowed_ptr(current, cpumask_of(cpu));
242 242
243 clock_tick = sparc64_get_clock_tick(cpu) / 1000; 243 clock_tick = sparc64_get_clock_tick(cpu) / 1000;
@@ -258,7 +258,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
258 if (!cpu_online(cpu)) 258 if (!cpu_online(cpu))
259 return; 259 return;
260 260
261 cpus_allowed = current->cpus_allowed; 261 cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
262 set_cpus_allowed_ptr(current, cpumask_of(cpu)); 262 set_cpus_allowed_ptr(current, cpumask_of(cpu));
263 263
264 new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000; 264 new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c
index f35d1e794548..9a8ceb700833 100644
--- a/arch/sparc/kernel/us3_cpufreq.c
+++ b/arch/sparc/kernel/us3_cpufreq.c
@@ -85,7 +85,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
85 if (!cpu_online(cpu)) 85 if (!cpu_online(cpu))
86 return 0; 86 return 0;
87 87
88 cpus_allowed = current->cpus_allowed; 88 cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
89 set_cpus_allowed_ptr(current, cpumask_of(cpu)); 89 set_cpus_allowed_ptr(current, cpumask_of(cpu));
90 90
91 reg = read_safari_cfg(); 91 reg = read_safari_cfg();
@@ -105,7 +105,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index)
105 if (!cpu_online(cpu)) 105 if (!cpu_online(cpu))
106 return; 106 return;
107 107
108 cpus_allowed = current->cpus_allowed; 108 cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
109 set_cpus_allowed_ptr(current, cpumask_of(cpu)); 109 set_cpus_allowed_ptr(current, cpumask_of(cpu));
110 110
111 new_freq = sparc64_get_clock_tick(cpu) / 1000; 111 new_freq = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 846d1c4374ea..7f01b8fce8bc 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -15,7 +15,6 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
15lib-$(CONFIG_SPARC32) += copy_user.o locks.o 15lib-$(CONFIG_SPARC32) += copy_user.o locks.o
16lib-y += atomic_$(BITS).o 16lib-y += atomic_$(BITS).o
17lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o 17lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
18lib-$(CONFIG_SPARC32) += rwsem_32.o
19lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o 18lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
20 19
21lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o 20lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S
deleted file mode 100644
index 9675268e7fde..000000000000
--- a/arch/sparc/lib/rwsem_32.S
+++ /dev/null
@@ -1,204 +0,0 @@
1/*
2 * Assembly part of rw semaphores.
3 *
4 * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
5 */
6
7#include <asm/ptrace.h>
8#include <asm/psr.h>
9
10 .section .sched.text, "ax"
11 .align 4
12
13 .globl ___down_read
14___down_read:
15 rd %psr, %g3
16 nop
17 nop
18 nop
19 or %g3, PSR_PIL, %g7
20 wr %g7, 0, %psr
21 nop
22 nop
23 nop
24#ifdef CONFIG_SMP
251: ldstub [%g1 + 4], %g7
26 tst %g7
27 bne 1b
28 ld [%g1], %g7
29 sub %g7, 1, %g7
30 st %g7, [%g1]
31 stb %g0, [%g1 + 4]
32#else
33 ld [%g1], %g7
34 sub %g7, 1, %g7
35 st %g7, [%g1]
36#endif
37 wr %g3, 0, %psr
38 add %g7, 1, %g7
39 nop
40 nop
41 subcc %g7, 1, %g7
42 bneg 3f
43 nop
442: jmpl %o7, %g0
45 mov %g4, %o7
463: save %sp, -64, %sp
47 mov %g1, %l1
48 mov %g4, %l4
49 bcs 4f
50 mov %g5, %l5
51 call down_read_failed
52 mov %l1, %o0
53 mov %l1, %g1
54 mov %l4, %g4
55 ba ___down_read
56 restore %l5, %g0, %g5
574: call down_read_failed_biased
58 mov %l1, %o0
59 mov %l1, %g1
60 mov %l4, %g4
61 ba 2b
62 restore %l5, %g0, %g5
63
64 .globl ___down_write
65___down_write:
66 rd %psr, %g3
67 nop
68 nop
69 nop
70 or %g3, PSR_PIL, %g7
71 wr %g7, 0, %psr
72 sethi %hi(0x01000000), %g2
73 nop
74 nop
75#ifdef CONFIG_SMP
761: ldstub [%g1 + 4], %g7
77 tst %g7
78 bne 1b
79 ld [%g1], %g7
80 sub %g7, %g2, %g7
81 st %g7, [%g1]
82 stb %g0, [%g1 + 4]
83#else
84 ld [%g1], %g7
85 sub %g7, %g2, %g7
86 st %g7, [%g1]
87#endif
88 wr %g3, 0, %psr
89 add %g7, %g2, %g7
90 nop
91 nop
92 subcc %g7, %g2, %g7
93 bne 3f
94 nop
952: jmpl %o7, %g0
96 mov %g4, %o7
973: save %sp, -64, %sp
98 mov %g1, %l1
99 mov %g4, %l4
100 bcs 4f
101 mov %g5, %l5
102 call down_write_failed
103 mov %l1, %o0
104 mov %l1, %g1
105 mov %l4, %g4
106 ba ___down_write
107 restore %l5, %g0, %g5
1084: call down_write_failed_biased
109 mov %l1, %o0
110 mov %l1, %g1
111 mov %l4, %g4
112 ba 2b
113 restore %l5, %g0, %g5
114
115 .text
116 .globl ___up_read
117___up_read:
118 rd %psr, %g3
119 nop
120 nop
121 nop
122 or %g3, PSR_PIL, %g7
123 wr %g7, 0, %psr
124 nop
125 nop
126 nop
127#ifdef CONFIG_SMP
1281: ldstub [%g1 + 4], %g7
129 tst %g7
130 bne 1b
131 ld [%g1], %g7
132 add %g7, 1, %g7
133 st %g7, [%g1]
134 stb %g0, [%g1 + 4]
135#else
136 ld [%g1], %g7
137 add %g7, 1, %g7
138 st %g7, [%g1]
139#endif
140 wr %g3, 0, %psr
141 nop
142 nop
143 nop
144 cmp %g7, 0
145 be 3f
146 nop
1472: jmpl %o7, %g0
148 mov %g4, %o7
1493: save %sp, -64, %sp
150 mov %g1, %l1
151 mov %g4, %l4
152 mov %g5, %l5
153 clr %o1
154 call __rwsem_wake
155 mov %l1, %o0
156 mov %l1, %g1
157 mov %l4, %g4
158 ba 2b
159 restore %l5, %g0, %g5
160
161 .globl ___up_write
162___up_write:
163 rd %psr, %g3
164 nop
165 nop
166 nop
167 or %g3, PSR_PIL, %g7
168 wr %g7, 0, %psr
169 sethi %hi(0x01000000), %g2
170 nop
171 nop
172#ifdef CONFIG_SMP
1731: ldstub [%g1 + 4], %g7
174 tst %g7
175 bne 1b
176 ld [%g1], %g7
177 add %g7, %g2, %g7
178 st %g7, [%g1]
179 stb %g0, [%g1 + 4]
180#else
181 ld [%g1], %g7
182 add %g7, %g2, %g7
183 st %g7, [%g1]
184#endif
185 wr %g3, 0, %psr
186 sub %g7, %g2, %g7
187 nop
188 nop
189 addcc %g7, %g2, %g7
190 bcs 3f
191 nop
1922: jmpl %o7, %g0
193 mov %g4, %o7
1943: save %sp, -64, %sp
195 mov %g1, %l1
196 mov %g4, %l4
197 mov %g5, %l5
198 mov %g7, %o1
199 call __rwsem_wake
200 mov %l1, %o0
201 mov %l1, %g1
202 mov %l4, %g4
203 ba 2b
204 restore %l5, %g0, %g5
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 2f6ae1d1fb6b..e10cd03fab80 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -862,7 +862,7 @@ static void init_node_masks_nonnuma(void)
862 for (i = 0; i < NR_CPUS; i++) 862 for (i = 0; i < NR_CPUS; i++)
863 numa_cpu_lookup_table[i] = 0; 863 numa_cpu_lookup_table[i] = 0;
864 864
865 numa_cpumask_lookup_table[0] = CPU_MASK_ALL; 865 cpumask_setall(&numa_cpumask_lookup_table[0]);
866} 866}
867 867
868#ifdef CONFIG_NEED_MULTIPLE_NODES 868#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1080,7 +1080,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
1080{ 1080{
1081 u64 arc; 1081 u64 arc;
1082 1082
1083 cpus_clear(*mask); 1083 cpumask_clear(mask);
1084 1084
1085 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { 1085 mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
1086 u64 target = mdesc_arc_target(md, arc); 1086 u64 target = mdesc_arc_target(md, arc);
@@ -1091,7 +1091,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
1091 continue; 1091 continue;
1092 id = mdesc_get_property(md, target, "id", NULL); 1092 id = mdesc_get_property(md, target, "id", NULL);
1093 if (*id < nr_cpu_ids) 1093 if (*id < nr_cpu_ids)
1094 cpu_set(*id, *mask); 1094 cpumask_set_cpu(*id, mask);
1095 } 1095 }
1096} 1096}
1097 1097
@@ -1153,13 +1153,13 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
1153 1153
1154 numa_parse_mdesc_group_cpus(md, grp, &mask); 1154 numa_parse_mdesc_group_cpus(md, grp, &mask);
1155 1155
1156 for_each_cpu_mask(cpu, mask) 1156 for_each_cpu(cpu, &mask)
1157 numa_cpu_lookup_table[cpu] = index; 1157 numa_cpu_lookup_table[cpu] = index;
1158 numa_cpumask_lookup_table[index] = mask; 1158 cpumask_copy(&numa_cpumask_lookup_table[index], &mask);
1159 1159
1160 if (numa_debug) { 1160 if (numa_debug) {
1161 printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); 1161 printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
1162 for_each_cpu_mask(cpu, mask) 1162 for_each_cpu(cpu, &mask)
1163 printk("%d ", cpu); 1163 printk("%d ", cpu);
1164 printk("]\n"); 1164 printk("]\n");
1165 } 1165 }
@@ -1218,7 +1218,7 @@ static int __init numa_parse_jbus(void)
1218 index = 0; 1218 index = 0;
1219 for_each_present_cpu(cpu) { 1219 for_each_present_cpu(cpu) {
1220 numa_cpu_lookup_table[cpu] = index; 1220 numa_cpu_lookup_table[cpu] = index;
1221 numa_cpumask_lookup_table[index] = cpumask_of_cpu(cpu); 1221 cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
1222 node_masks[index].mask = ~((1UL << 36UL) - 1UL); 1222 node_masks[index].mask = ~((1UL << 36UL) - 1UL);
1223 node_masks[index].val = cpu << 36UL; 1223 node_masks[index].val = cpu << 36UL;
1224 1224
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cbc70a27430c..c8b41623377f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -254,7 +254,7 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
254} 254}
255#endif 255#endif
256 256
257static int disable_smep __initdata; 257static int disable_smep __cpuinitdata;
258static __init int setup_disable_smep(char *arg) 258static __init int setup_disable_smep(char *arg)
259{ 259{
260 disable_smep = 1; 260 disable_smep = 1;
@@ -262,7 +262,7 @@ static __init int setup_disable_smep(char *arg)
262} 262}
263__setup("nosmep", setup_disable_smep); 263__setup("nosmep", setup_disable_smep);
264 264
265static __init void setup_smep(struct cpuinfo_x86 *c) 265static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
266{ 266{
267 if (cpu_has(c, X86_FEATURE_SMEP)) { 267 if (cpu_has(c, X86_FEATURE_SMEP)) {
268 if (unlikely(disable_smep)) { 268 if (unlikely(disable_smep)) {
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b2699bb2e530..d871b14ed5a1 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -42,6 +42,7 @@
42#include <linux/genhd.h> 42#include <linux/genhd.h>
43#include <net/tcp.h> 43#include <net/tcp.h>
44#include <linux/lru_cache.h> 44#include <linux/lru_cache.h>
45#include <linux/prefetch.h>
45 46
46#ifdef __CHECKER__ 47#ifdef __CHECKER__
47# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr"))) 48# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index c9213ead4a26..a4d6cb0c0343 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -34,6 +34,7 @@
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/dma-mapping.h> 35#include <linux/dma-mapping.h>
36#include <linux/workqueue.h> 36#include <linux/workqueue.h>
37#include <linux/prefetch.h>
37#include <linux/i7300_idle.h> 38#include <linux/i7300_idle.h>
38#include "dma.h" 39#include "dma.h"
39#include "registers.h" 40#include "registers.h"
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index effd140fc042..f4a51d4d0349 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -34,6 +34,7 @@
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/dma-mapping.h> 35#include <linux/dma-mapping.h>
36#include <linux/workqueue.h> 36#include <linux/workqueue.h>
37#include <linux/prefetch.h>
37#include <linux/i7300_idle.h> 38#include <linux/i7300_idle.h>
38#include "dma.h" 39#include "dma.h"
39#include "dma_v2.h" 40#include "dma_v2.h"
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index d0f499098479..d845dc4b7103 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -60,6 +60,7 @@
60#include <linux/gfp.h> 60#include <linux/gfp.h>
61#include <linux/dmaengine.h> 61#include <linux/dmaengine.h>
62#include <linux/dma-mapping.h> 62#include <linux/dma-mapping.h>
63#include <linux/prefetch.h>
63#include "registers.h" 64#include "registers.h"
64#include "hw.h" 65#include "hw.h"
65#include "dma.h" 66#include "dma.h"
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index c26c11905ffe..2af8cb460a3b 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -416,21 +416,21 @@ void ide_acpi_get_timing(ide_hwif_t *hwif)
416 416
417 out_obj = output.pointer; 417 out_obj = output.pointer;
418 if (out_obj->type != ACPI_TYPE_BUFFER) { 418 if (out_obj->type != ACPI_TYPE_BUFFER) {
419 kfree(output.pointer);
420 DEBPRINT("Run _GTM: error: " 419 DEBPRINT("Run _GTM: error: "
421 "expected object type of ACPI_TYPE_BUFFER, " 420 "expected object type of ACPI_TYPE_BUFFER, "
422 "got 0x%x\n", out_obj->type); 421 "got 0x%x\n", out_obj->type);
422 kfree(output.pointer);
423 return; 423 return;
424 } 424 }
425 425
426 if (!out_obj->buffer.length || !out_obj->buffer.pointer || 426 if (!out_obj->buffer.length || !out_obj->buffer.pointer ||
427 out_obj->buffer.length != sizeof(struct GTM_buffer)) { 427 out_obj->buffer.length != sizeof(struct GTM_buffer)) {
428 kfree(output.pointer);
429 printk(KERN_ERR 428 printk(KERN_ERR
430 "%s: unexpected _GTM length (0x%x)[should be 0x%zx] or " 429 "%s: unexpected _GTM length (0x%x)[should be 0x%zx] or "
431 "addr (0x%p)\n", 430 "addr (0x%p)\n",
432 __func__, out_obj->buffer.length, 431 __func__, out_obj->buffer.length,
433 sizeof(struct GTM_buffer), out_obj->buffer.pointer); 432 sizeof(struct GTM_buffer), out_obj->buffer.pointer);
433 kfree(output.pointer);
434 return; 434 return;
435 } 435 }
436 436
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 5a702d02c848..61fdf544fbd6 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -73,7 +73,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
73 drive->failed_pc = NULL; 73 drive->failed_pc = NULL;
74 74
75 if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 || 75 if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
76 (rq && rq->cmd_type == REQ_TYPE_BLOCK_PC)) 76 rq->cmd_type == REQ_TYPE_BLOCK_PC)
77 uptodate = 1; /* FIXME */ 77 uptodate = 1; /* FIXME */
78 else if (pc->c[0] == GPCMD_REQUEST_SENSE) { 78 else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
79 79
diff --git a/drivers/ide/ide-scan-pci.c b/drivers/ide/ide-scan-pci.c
index 0e79efff1deb..c3da53e7bb2b 100644
--- a/drivers/ide/ide-scan-pci.c
+++ b/drivers/ide/ide-scan-pci.c
@@ -88,7 +88,7 @@ static int __init ide_scan_pcibus(void)
88 struct list_head *l, *n; 88 struct list_head *l, *n;
89 89
90 pre_init = 0; 90 pre_init = 0;
91 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev))) 91 for_each_pci_dev(dev)
92 ide_scan_pcidev(dev); 92 ide_scan_pcidev(dev);
93 93
94 /* 94 /*
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index ebcf8e470a97..1db7c4368dbf 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1334,7 +1334,7 @@ out_free_pmif:
1334static int 1334static int
1335pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) 1335pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
1336{ 1336{
1337 pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)pci_get_drvdata(pdev); 1337 pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev);
1338 int rc = 0; 1338 int rc = 0;
1339 1339
1340 if (mesg.event != pdev->dev.power.power_state.event 1340 if (mesg.event != pdev->dev.power.power_state.event
@@ -1350,7 +1350,7 @@ pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
1350static int 1350static int
1351pmac_ide_pci_resume(struct pci_dev *pdev) 1351pmac_ide_pci_resume(struct pci_dev *pdev)
1352{ 1352{
1353 pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)pci_get_drvdata(pdev); 1353 pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev);
1354 int rc = 0; 1354 int rc = 0;
1355 1355
1356 if (pdev->dev.power.power_state.event != PM_EVENT_ON) { 1356 if (pdev->dev.power.power_state.event != PM_EVENT_ON) {
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index dc85d777578e..0cfc455630d0 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -47,6 +47,7 @@
47#include <linux/init.h> 47#include <linux/init.h>
48#include <linux/dma-mapping.h> 48#include <linux/dma-mapping.h>
49#include <linux/slab.h> 49#include <linux/slab.h>
50#include <linux/prefetch.h>
50 51
51#include <asm/io.h> 52#include <asm/io.h>
52#include <asm/irq.h> 53#include <asm/irq.h>
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 5c9362792f1d..70bd738b8b99 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -493,11 +493,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
493 spin_unlock_irqrestore(&bitmap->lock, flags); 493 spin_unlock_irqrestore(&bitmap->lock, flags);
494 sb = kmap_atomic(bitmap->sb_page, KM_USER0); 494 sb = kmap_atomic(bitmap->sb_page, KM_USER0);
495 sb->events = cpu_to_le64(bitmap->mddev->events); 495 sb->events = cpu_to_le64(bitmap->mddev->events);
496 if (bitmap->mddev->events < bitmap->events_cleared) { 496 if (bitmap->mddev->events < bitmap->events_cleared)
497 /* rocking back to read-only */ 497 /* rocking back to read-only */
498 bitmap->events_cleared = bitmap->mddev->events; 498 bitmap->events_cleared = bitmap->mddev->events;
499 sb->events_cleared = cpu_to_le64(bitmap->events_cleared); 499 sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
500 } 500 sb->state = cpu_to_le32(bitmap->flags);
501 /* Just in case these have been changed via sysfs: */ 501 /* Just in case these have been changed via sysfs: */
502 sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); 502 sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
503 sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); 503 sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
@@ -618,7 +618,7 @@ success:
618 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 618 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
619 bitmap->flags |= BITMAP_HOSTENDIAN; 619 bitmap->flags |= BITMAP_HOSTENDIAN;
620 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 620 bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
621 if (sb->state & cpu_to_le32(BITMAP_STALE)) 621 if (bitmap->flags & BITMAP_STALE)
622 bitmap->events_cleared = bitmap->mddev->events; 622 bitmap->events_cleared = bitmap->mddev->events;
623 err = 0; 623 err = 0;
624out: 624out:
@@ -652,9 +652,11 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
652 switch (op) { 652 switch (op) {
653 case MASK_SET: 653 case MASK_SET:
654 sb->state |= cpu_to_le32(bits); 654 sb->state |= cpu_to_le32(bits);
655 bitmap->flags |= bits;
655 break; 656 break;
656 case MASK_UNSET: 657 case MASK_UNSET:
657 sb->state &= cpu_to_le32(~bits); 658 sb->state &= cpu_to_le32(~bits);
659 bitmap->flags &= ~bits;
658 break; 660 break;
659 default: 661 default:
660 BUG(); 662 BUG();
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7d6f7f18a920..aa640a85bb21 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3324,7 +3324,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
3324 char *e; 3324 char *e;
3325 unsigned long long n = simple_strtoull(buf, &e, 10); 3325 unsigned long long n = simple_strtoull(buf, &e, 10);
3326 3326
3327 if (mddev->pers) 3327 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3328 return -EBUSY; 3328 return -EBUSY;
3329 if (cmd_match(buf, "none")) 3329 if (cmd_match(buf, "none"))
3330 n = MaxSector; 3330 n = MaxSector;
@@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name)
4347 disk->fops = &md_fops; 4347 disk->fops = &md_fops;
4348 disk->private_data = mddev; 4348 disk->private_data = mddev;
4349 disk->queue = mddev->queue; 4349 disk->queue = mddev->queue;
4350 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4350 /* Allow extended partitions. This makes the 4351 /* Allow extended partitions. This makes the
4351 * 'mdp' device redundant, but we can't really 4352 * 'mdp' device redundant, but we can't really
4352 * remove it now. 4353 * remove it now.
4353 */ 4354 */
4354 disk->flags |= GENHD_FL_EXT_DEVT; 4355 disk->flags |= GENHD_FL_EXT_DEVT;
4355 add_disk(disk);
4356 mddev->gendisk = disk; 4356 mddev->gendisk = disk;
4357 /* As soon as we call add_disk(), another thread could get
4358 * through to md_open, so make sure it doesn't get too far
4359 */
4360 mutex_lock(&mddev->open_mutex);
4361 add_disk(disk);
4362
4357 error = kobject_init_and_add(&mddev->kobj, &md_ktype, 4363 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
4358 &disk_to_dev(disk)->kobj, "%s", "md"); 4364 &disk_to_dev(disk)->kobj, "%s", "md");
4359 if (error) { 4365 if (error) {
@@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name)
4367 if (mddev->kobj.sd && 4373 if (mddev->kobj.sd &&
4368 sysfs_create_group(&mddev->kobj, &md_bitmap_group)) 4374 sysfs_create_group(&mddev->kobj, &md_bitmap_group))
4369 printk(KERN_DEBUG "pointless warning\n"); 4375 printk(KERN_DEBUG "pointless warning\n");
4370 4376 mutex_unlock(&mddev->open_mutex);
4371 blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
4372 abort: 4377 abort:
4373 mutex_unlock(&disks_mutex); 4378 mutex_unlock(&disks_mutex);
4374 if (!error && mddev->kobj.sd) { 4379 if (!error && mddev->kobj.sd) {
@@ -5211,6 +5216,16 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
5211 } else 5216 } else
5212 super_types[mddev->major_version]. 5217 super_types[mddev->major_version].
5213 validate_super(mddev, rdev); 5218 validate_super(mddev, rdev);
5219 if ((info->state & (1<<MD_DISK_SYNC)) &&
5220 (!test_bit(In_sync, &rdev->flags) ||
5221 rdev->raid_disk != info->raid_disk)) {
5222 /* This was a hot-add request, but events doesn't
5223 * match, so reject it.
5224 */
5225 export_rdev(rdev);
5226 return -EINVAL;
5227 }
5228
5214 if (test_bit(In_sync, &rdev->flags)) 5229 if (test_bit(In_sync, &rdev->flags))
5215 rdev->saved_raid_disk = rdev->raid_disk; 5230 rdev->saved_raid_disk = rdev->raid_disk;
5216 else 5231 else
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index c35890990985..3535c23af288 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -146,7 +146,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
146 int i; 146 int i;
147 147
148 seq_printf (seq, " [%d/%d] [", conf->raid_disks, 148 seq_printf (seq, " [%d/%d] [", conf->raid_disks,
149 conf->working_disks); 149 conf->raid_disks - mddev->degraded);
150 for (i = 0; i < conf->raid_disks; i++) 150 for (i = 0; i < conf->raid_disks; i++)
151 seq_printf (seq, "%s", 151 seq_printf (seq, "%s",
152 conf->multipaths[i].rdev && 152 conf->multipaths[i].rdev &&
@@ -186,35 +186,36 @@ static int multipath_congested(void *data, int bits)
186static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) 186static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
187{ 187{
188 multipath_conf_t *conf = mddev->private; 188 multipath_conf_t *conf = mddev->private;
189 char b[BDEVNAME_SIZE];
189 190
190 if (conf->working_disks <= 1) { 191 if (conf->raid_disks - mddev->degraded <= 1) {
191 /* 192 /*
192 * Uh oh, we can do nothing if this is our last path, but 193 * Uh oh, we can do nothing if this is our last path, but
193 * first check if this is a queued request for a device 194 * first check if this is a queued request for a device
194 * which has just failed. 195 * which has just failed.
195 */ 196 */
196 printk(KERN_ALERT 197 printk(KERN_ALERT
197 "multipath: only one IO path left and IO error.\n"); 198 "multipath: only one IO path left and IO error.\n");
198 /* leave it active... it's all we have */ 199 /* leave it active... it's all we have */
199 } else { 200 return;
200 /* 201 }
201 * Mark disk as unusable 202 /*
202 */ 203 * Mark disk as unusable
203 if (!test_bit(Faulty, &rdev->flags)) { 204 */
204 char b[BDEVNAME_SIZE]; 205 if (test_and_clear_bit(In_sync, &rdev->flags)) {
205 clear_bit(In_sync, &rdev->flags); 206 unsigned long flags;
206 set_bit(Faulty, &rdev->flags); 207 spin_lock_irqsave(&conf->device_lock, flags);
207 set_bit(MD_CHANGE_DEVS, &mddev->flags); 208 mddev->degraded++;
208 conf->working_disks--; 209 spin_unlock_irqrestore(&conf->device_lock, flags);
209 mddev->degraded++;
210 printk(KERN_ALERT "multipath: IO failure on %s,"
211 " disabling IO path.\n"
212 "multipath: Operation continuing"
213 " on %d IO paths.\n",
214 bdevname (rdev->bdev,b),
215 conf->working_disks);
216 }
217 } 210 }
211 set_bit(Faulty, &rdev->flags);
212 set_bit(MD_CHANGE_DEVS, &mddev->flags);
213 printk(KERN_ALERT "multipath: IO failure on %s,"
214 " disabling IO path.\n"
215 "multipath: Operation continuing"
216 " on %d IO paths.\n",
217 bdevname(rdev->bdev, b),
218 conf->raid_disks - mddev->degraded);
218} 219}
219 220
220static void print_multipath_conf (multipath_conf_t *conf) 221static void print_multipath_conf (multipath_conf_t *conf)
@@ -227,7 +228,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
227 printk("(conf==NULL)\n"); 228 printk("(conf==NULL)\n");
228 return; 229 return;
229 } 230 }
230 printk(" --- wd:%d rd:%d\n", conf->working_disks, 231 printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
231 conf->raid_disks); 232 conf->raid_disks);
232 233
233 for (i = 0; i < conf->raid_disks; i++) { 234 for (i = 0; i < conf->raid_disks; i++) {
@@ -274,10 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
274 PAGE_CACHE_SIZE - 1); 275 PAGE_CACHE_SIZE - 1);
275 } 276 }
276 277
277 conf->working_disks++; 278 spin_lock_irq(&conf->device_lock);
278 mddev->degraded--; 279 mddev->degraded--;
279 rdev->raid_disk = path; 280 rdev->raid_disk = path;
280 set_bit(In_sync, &rdev->flags); 281 set_bit(In_sync, &rdev->flags);
282 spin_unlock_irq(&conf->device_lock);
281 rcu_assign_pointer(p->rdev, rdev); 283 rcu_assign_pointer(p->rdev, rdev);
282 err = 0; 284 err = 0;
283 md_integrity_add_rdev(rdev, mddev); 285 md_integrity_add_rdev(rdev, mddev);
@@ -391,6 +393,7 @@ static int multipath_run (mddev_t *mddev)
391 int disk_idx; 393 int disk_idx;
392 struct multipath_info *disk; 394 struct multipath_info *disk;
393 mdk_rdev_t *rdev; 395 mdk_rdev_t *rdev;
396 int working_disks;
394 397
395 if (md_check_no_bitmap(mddev)) 398 if (md_check_no_bitmap(mddev))
396 return -EINVAL; 399 return -EINVAL;
@@ -424,7 +427,7 @@ static int multipath_run (mddev_t *mddev)
424 goto out_free_conf; 427 goto out_free_conf;
425 } 428 }
426 429
427 conf->working_disks = 0; 430 working_disks = 0;
428 list_for_each_entry(rdev, &mddev->disks, same_set) { 431 list_for_each_entry(rdev, &mddev->disks, same_set) {
429 disk_idx = rdev->raid_disk; 432 disk_idx = rdev->raid_disk;
430 if (disk_idx < 0 || 433 if (disk_idx < 0 ||
@@ -446,7 +449,7 @@ static int multipath_run (mddev_t *mddev)
446 } 449 }
447 450
448 if (!test_bit(Faulty, &rdev->flags)) 451 if (!test_bit(Faulty, &rdev->flags))
449 conf->working_disks++; 452 working_disks++;
450 } 453 }
451 454
452 conf->raid_disks = mddev->raid_disks; 455 conf->raid_disks = mddev->raid_disks;
@@ -454,12 +457,12 @@ static int multipath_run (mddev_t *mddev)
454 spin_lock_init(&conf->device_lock); 457 spin_lock_init(&conf->device_lock);
455 INIT_LIST_HEAD(&conf->retry_list); 458 INIT_LIST_HEAD(&conf->retry_list);
456 459
457 if (!conf->working_disks) { 460 if (!working_disks) {
458 printk(KERN_ERR "multipath: no operational IO paths for %s\n", 461 printk(KERN_ERR "multipath: no operational IO paths for %s\n",
459 mdname(mddev)); 462 mdname(mddev));
460 goto out_free_conf; 463 goto out_free_conf;
461 } 464 }
462 mddev->degraded = conf->raid_disks - conf->working_disks; 465 mddev->degraded = conf->raid_disks - working_disks;
463 466
464 conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS, 467 conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
465 sizeof(struct multipath_bh)); 468 sizeof(struct multipath_bh));
@@ -481,7 +484,8 @@ static int multipath_run (mddev_t *mddev)
481 484
482 printk(KERN_INFO 485 printk(KERN_INFO
483 "multipath: array %s active with %d out of %d IO paths\n", 486 "multipath: array %s active with %d out of %d IO paths\n",
484 mdname(mddev), conf->working_disks, mddev->raid_disks); 487 mdname(mddev), conf->raid_disks - mddev->degraded,
488 mddev->raid_disks);
485 /* 489 /*
486 * Ok, everything is just fine now 490 * Ok, everything is just fine now
487 */ 491 */
diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h
index d1c2a8d78395..3c5a45eb5f8a 100644
--- a/drivers/md/multipath.h
+++ b/drivers/md/multipath.h
@@ -9,7 +9,6 @@ struct multipath_private_data {
9 mddev_t *mddev; 9 mddev_t *mddev;
10 struct multipath_info *multipaths; 10 struct multipath_info *multipaths;
11 int raid_disks; 11 int raid_disks;
12 int working_disks;
13 spinlock_t device_lock; 12 spinlock_t device_lock;
14 struct list_head retry_list; 13 struct list_head retry_list;
15 14
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2b7a7ff401dc..5d096096f958 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
297 rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); 297 rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
298} 298}
299 299
300static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv, 300static void r1_bio_write_done(r1bio_t *r1_bio)
301 int behind)
302{ 301{
303 if (atomic_dec_and_test(&r1_bio->remaining)) 302 if (atomic_dec_and_test(&r1_bio->remaining))
304 { 303 {
305 /* it really is the end of this request */ 304 /* it really is the end of this request */
306 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { 305 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
307 /* free extra copy of the data pages */ 306 /* free extra copy of the data pages */
308 int i = vcnt; 307 int i = r1_bio->behind_page_count;
309 while (i--) 308 while (i--)
310 safe_put_page(bv[i].bv_page); 309 safe_put_page(r1_bio->behind_pages[i]);
310 kfree(r1_bio->behind_pages);
311 r1_bio->behind_pages = NULL;
311 } 312 }
312 /* clear the bitmap if all writes complete successfully */ 313 /* clear the bitmap if all writes complete successfully */
313 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, 314 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
314 r1_bio->sectors, 315 r1_bio->sectors,
315 !test_bit(R1BIO_Degraded, &r1_bio->state), 316 !test_bit(R1BIO_Degraded, &r1_bio->state),
316 behind); 317 test_bit(R1BIO_BehindIO, &r1_bio->state));
317 md_write_end(r1_bio->mddev); 318 md_write_end(r1_bio->mddev);
318 raid_end_bio_io(r1_bio); 319 raid_end_bio_io(r1_bio);
319 } 320 }
@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
386 * Let's see if all mirrored write operations have finished 387 * Let's see if all mirrored write operations have finished
387 * already. 388 * already.
388 */ 389 */
389 r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind); 390 r1_bio_write_done(r1_bio);
390 391
391 if (to_put) 392 if (to_put)
392 bio_put(to_put); 393 bio_put(to_put);
@@ -411,10 +412,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
411{ 412{
412 const sector_t this_sector = r1_bio->sector; 413 const sector_t this_sector = r1_bio->sector;
413 const int sectors = r1_bio->sectors; 414 const int sectors = r1_bio->sectors;
414 int new_disk = -1;
415 int start_disk; 415 int start_disk;
416 int best_disk;
416 int i; 417 int i;
417 sector_t new_distance, current_distance; 418 sector_t best_dist;
418 mdk_rdev_t *rdev; 419 mdk_rdev_t *rdev;
419 int choose_first; 420 int choose_first;
420 421
@@ -425,6 +426,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
425 * We take the first readable disk when above the resync window. 426 * We take the first readable disk when above the resync window.
426 */ 427 */
427 retry: 428 retry:
429 best_disk = -1;
430 best_dist = MaxSector;
428 if (conf->mddev->recovery_cp < MaxSector && 431 if (conf->mddev->recovery_cp < MaxSector &&
429 (this_sector + sectors >= conf->next_resync)) { 432 (this_sector + sectors >= conf->next_resync)) {
430 choose_first = 1; 433 choose_first = 1;
@@ -434,8 +437,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
434 start_disk = conf->last_used; 437 start_disk = conf->last_used;
435 } 438 }
436 439
437 /* make sure the disk is operational */
438 for (i = 0 ; i < conf->raid_disks ; i++) { 440 for (i = 0 ; i < conf->raid_disks ; i++) {
441 sector_t dist;
439 int disk = start_disk + i; 442 int disk = start_disk + i;
440 if (disk >= conf->raid_disks) 443 if (disk >= conf->raid_disks)
441 disk -= conf->raid_disks; 444 disk -= conf->raid_disks;
@@ -443,60 +446,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
443 rdev = rcu_dereference(conf->mirrors[disk].rdev); 446 rdev = rcu_dereference(conf->mirrors[disk].rdev);
444 if (r1_bio->bios[disk] == IO_BLOCKED 447 if (r1_bio->bios[disk] == IO_BLOCKED
445 || rdev == NULL 448 || rdev == NULL
446 || !test_bit(In_sync, &rdev->flags)) 449 || test_bit(Faulty, &rdev->flags))
447 continue; 450 continue;
448 451 if (!test_bit(In_sync, &rdev->flags) &&
449 new_disk = disk; 452 rdev->recovery_offset < this_sector + sectors)
450 if (!test_bit(WriteMostly, &rdev->flags))
451 break;
452 }
453
454 if (new_disk < 0 || choose_first)
455 goto rb_out;
456
457 /*
458 * Don't change to another disk for sequential reads:
459 */
460 if (conf->next_seq_sect == this_sector)
461 goto rb_out;
462 if (this_sector == conf->mirrors[new_disk].head_position)
463 goto rb_out;
464
465 current_distance = abs(this_sector
466 - conf->mirrors[new_disk].head_position);
467
468 /* look for a better disk - i.e. head is closer */
469 start_disk = new_disk;
470 for (i = 1; i < conf->raid_disks; i++) {
471 int disk = start_disk + 1;
472 if (disk >= conf->raid_disks)
473 disk -= conf->raid_disks;
474
475 rdev = rcu_dereference(conf->mirrors[disk].rdev);
476 if (r1_bio->bios[disk] == IO_BLOCKED
477 || rdev == NULL
478 || !test_bit(In_sync, &rdev->flags)
479 || test_bit(WriteMostly, &rdev->flags))
480 continue; 453 continue;
481 454 if (test_bit(WriteMostly, &rdev->flags)) {
482 if (!atomic_read(&rdev->nr_pending)) { 455 /* Don't balance among write-mostly, just
483 new_disk = disk; 456 * use the first as a last resort */
457 if (best_disk < 0)
458 best_disk = disk;
459 continue;
460 }
461 /* This is a reasonable device to use. It might
462 * even be best.
463 */
464 dist = abs(this_sector - conf->mirrors[disk].head_position);
465 if (choose_first
466 /* Don't change to another disk for sequential reads */
467 || conf->next_seq_sect == this_sector
468 || dist == 0
469 /* If device is idle, use it */
470 || atomic_read(&rdev->nr_pending) == 0) {
471 best_disk = disk;
484 break; 472 break;
485 } 473 }
486 new_distance = abs(this_sector - conf->mirrors[disk].head_position); 474 if (dist < best_dist) {
487 if (new_distance < current_distance) { 475 best_dist = dist;
488 current_distance = new_distance; 476 best_disk = disk;
489 new_disk = disk;
490 } 477 }
491 } 478 }
492 479
493 rb_out: 480 if (best_disk >= 0) {
494 if (new_disk >= 0) { 481 rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
495 rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
496 if (!rdev) 482 if (!rdev)
497 goto retry; 483 goto retry;
498 atomic_inc(&rdev->nr_pending); 484 atomic_inc(&rdev->nr_pending);
499 if (!test_bit(In_sync, &rdev->flags)) { 485 if (test_bit(Faulty, &rdev->flags)) {
500 /* cannot risk returning a device that failed 486 /* cannot risk returning a device that failed
501 * before we inc'ed nr_pending 487 * before we inc'ed nr_pending
502 */ 488 */
@@ -504,11 +490,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
504 goto retry; 490 goto retry;
505 } 491 }
506 conf->next_seq_sect = this_sector + sectors; 492 conf->next_seq_sect = this_sector + sectors;
507 conf->last_used = new_disk; 493 conf->last_used = best_disk;
508 } 494 }
509 rcu_read_unlock(); 495 rcu_read_unlock();
510 496
511 return new_disk; 497 return best_disk;
512} 498}
513 499
514static int raid1_congested(void *data, int bits) 500static int raid1_congested(void *data, int bits)
@@ -675,37 +661,36 @@ static void unfreeze_array(conf_t *conf)
675 661
676 662
677/* duplicate the data pages for behind I/O 663/* duplicate the data pages for behind I/O
678 * We return a list of bio_vec rather than just page pointers
679 * as it makes freeing easier
680 */ 664 */
681static struct bio_vec *alloc_behind_pages(struct bio *bio) 665static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
682{ 666{
683 int i; 667 int i;
684 struct bio_vec *bvec; 668 struct bio_vec *bvec;
685 struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), 669 struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
686 GFP_NOIO); 670 GFP_NOIO);
687 if (unlikely(!pages)) 671 if (unlikely(!pages))
688 goto do_sync_io; 672 return;
689 673
690 bio_for_each_segment(bvec, bio, i) { 674 bio_for_each_segment(bvec, bio, i) {
691 pages[i].bv_page = alloc_page(GFP_NOIO); 675 pages[i] = alloc_page(GFP_NOIO);
692 if (unlikely(!pages[i].bv_page)) 676 if (unlikely(!pages[i]))
693 goto do_sync_io; 677 goto do_sync_io;
694 memcpy(kmap(pages[i].bv_page) + bvec->bv_offset, 678 memcpy(kmap(pages[i]) + bvec->bv_offset,
695 kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); 679 kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
696 kunmap(pages[i].bv_page); 680 kunmap(pages[i]);
697 kunmap(bvec->bv_page); 681 kunmap(bvec->bv_page);
698 } 682 }
699 683 r1_bio->behind_pages = pages;
700 return pages; 684 r1_bio->behind_page_count = bio->bi_vcnt;
685 set_bit(R1BIO_BehindIO, &r1_bio->state);
686 return;
701 687
702do_sync_io: 688do_sync_io:
703 if (pages) 689 for (i = 0; i < bio->bi_vcnt; i++)
704 for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++) 690 if (pages[i])
705 put_page(pages[i].bv_page); 691 put_page(pages[i]);
706 kfree(pages); 692 kfree(pages);
707 PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); 693 PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
708 return NULL;
709} 694}
710 695
711static int make_request(mddev_t *mddev, struct bio * bio) 696static int make_request(mddev_t *mddev, struct bio * bio)
@@ -717,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
717 int i, targets = 0, disks; 702 int i, targets = 0, disks;
718 struct bitmap *bitmap; 703 struct bitmap *bitmap;
719 unsigned long flags; 704 unsigned long flags;
720 struct bio_vec *behind_pages = NULL;
721 const int rw = bio_data_dir(bio); 705 const int rw = bio_data_dir(bio);
722 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 706 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
723 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); 707 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
@@ -870,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
870 if (bitmap && 854 if (bitmap &&
871 (atomic_read(&bitmap->behind_writes) 855 (atomic_read(&bitmap->behind_writes)
872 < mddev->bitmap_info.max_write_behind) && 856 < mddev->bitmap_info.max_write_behind) &&
873 !waitqueue_active(&bitmap->behind_wait) && 857 !waitqueue_active(&bitmap->behind_wait))
874 (behind_pages = alloc_behind_pages(bio)) != NULL) 858 alloc_behind_pages(bio, r1_bio);
875 set_bit(R1BIO_BehindIO, &r1_bio->state);
876 859
877 atomic_set(&r1_bio->remaining, 1); 860 atomic_set(&r1_bio->remaining, 1);
878 atomic_set(&r1_bio->behind_remaining, 0); 861 atomic_set(&r1_bio->behind_remaining, 0);
@@ -893,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
893 mbio->bi_rw = WRITE | do_flush_fua | do_sync; 876 mbio->bi_rw = WRITE | do_flush_fua | do_sync;
894 mbio->bi_private = r1_bio; 877 mbio->bi_private = r1_bio;
895 878
896 if (behind_pages) { 879 if (r1_bio->behind_pages) {
897 struct bio_vec *bvec; 880 struct bio_vec *bvec;
898 int j; 881 int j;
899 882
@@ -905,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
905 * them all 888 * them all
906 */ 889 */
907 __bio_for_each_segment(bvec, mbio, j, 0) 890 __bio_for_each_segment(bvec, mbio, j, 0)
908 bvec->bv_page = behind_pages[j].bv_page; 891 bvec->bv_page = r1_bio->behind_pages[j];
909 if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) 892 if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
910 atomic_inc(&r1_bio->behind_remaining); 893 atomic_inc(&r1_bio->behind_remaining);
911 } 894 }
@@ -915,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
915 bio_list_add(&conf->pending_bio_list, mbio); 898 bio_list_add(&conf->pending_bio_list, mbio);
916 spin_unlock_irqrestore(&conf->device_lock, flags); 899 spin_unlock_irqrestore(&conf->device_lock, flags);
917 } 900 }
918 r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); 901 r1_bio_write_done(r1_bio);
919 kfree(behind_pages); /* the behind pages are attached to the bios now */
920 902
921 /* In case raid1d snuck in to freeze_array */ 903 /* In case raid1d snuck in to freeze_array */
922 wake_up(&conf->wait_barrier); 904 wake_up(&conf->wait_barrier);
@@ -1196,194 +1178,210 @@ static void end_sync_write(struct bio *bio, int error)
1196 } 1178 }
1197} 1179}
1198 1180
1199static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) 1181static int fix_sync_read_error(r1bio_t *r1_bio)
1200{ 1182{
1183 /* Try some synchronous reads of other devices to get
1184 * good data, much like with normal read errors. Only
1185 * read into the pages we already have so we don't
1186 * need to re-issue the read request.
1187 * We don't need to freeze the array, because being in an
1188 * active sync request, there is no normal IO, and
1189 * no overlapping syncs.
1190 */
1191 mddev_t *mddev = r1_bio->mddev;
1201 conf_t *conf = mddev->private; 1192 conf_t *conf = mddev->private;
1202 int i; 1193 struct bio *bio = r1_bio->bios[r1_bio->read_disk];
1203 int disks = conf->raid_disks; 1194 sector_t sect = r1_bio->sector;
1204 struct bio *bio, *wbio; 1195 int sectors = r1_bio->sectors;
1205 1196 int idx = 0;
1206 bio = r1_bio->bios[r1_bio->read_disk];
1207 1197
1198 while(sectors) {
1199 int s = sectors;
1200 int d = r1_bio->read_disk;
1201 int success = 0;
1202 mdk_rdev_t *rdev;
1203 int start;
1208 1204
1209 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 1205 if (s > (PAGE_SIZE>>9))
1210 /* We have read all readable devices. If we haven't 1206 s = PAGE_SIZE >> 9;
1211 * got the block, then there is no hope left. 1207 do {
1212 * If we have, then we want to do a comparison 1208 if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
1213 * and skip the write if everything is the same. 1209 /* No rcu protection needed here devices
1214 * If any blocks failed to read, then we need to 1210 * can only be removed when no resync is
1215 * attempt an over-write 1211 * active, and resync is currently active
1216 */ 1212 */
1217 int primary; 1213 rdev = conf->mirrors[d].rdev;
1218 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { 1214 if (sync_page_io(rdev,
1219 for (i=0; i<mddev->raid_disks; i++) 1215 sect,
1220 if (r1_bio->bios[i]->bi_end_io == end_sync_read) 1216 s<<9,
1221 md_error(mddev, conf->mirrors[i].rdev); 1217 bio->bi_io_vec[idx].bv_page,
1218 READ, false)) {
1219 success = 1;
1220 break;
1221 }
1222 }
1223 d++;
1224 if (d == conf->raid_disks)
1225 d = 0;
1226 } while (!success && d != r1_bio->read_disk);
1222 1227
1223 md_done_sync(mddev, r1_bio->sectors, 1); 1228 if (!success) {
1229 char b[BDEVNAME_SIZE];
1230 /* Cannot read from anywhere, array is toast */
1231 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
1232 printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
1233 " for block %llu\n",
1234 mdname(mddev),
1235 bdevname(bio->bi_bdev, b),
1236 (unsigned long long)r1_bio->sector);
1237 md_done_sync(mddev, r1_bio->sectors, 0);
1224 put_buf(r1_bio); 1238 put_buf(r1_bio);
1225 return; 1239 return 0;
1226 } 1240 }
1227 for (primary=0; primary<mddev->raid_disks; primary++)
1228 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
1229 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
1230 r1_bio->bios[primary]->bi_end_io = NULL;
1231 rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
1232 break;
1233 }
1234 r1_bio->read_disk = primary;
1235 for (i=0; i<mddev->raid_disks; i++)
1236 if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
1237 int j;
1238 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1239 struct bio *pbio = r1_bio->bios[primary];
1240 struct bio *sbio = r1_bio->bios[i];
1241
1242 if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
1243 for (j = vcnt; j-- ; ) {
1244 struct page *p, *s;
1245 p = pbio->bi_io_vec[j].bv_page;
1246 s = sbio->bi_io_vec[j].bv_page;
1247 if (memcmp(page_address(p),
1248 page_address(s),
1249 PAGE_SIZE))
1250 break;
1251 }
1252 } else
1253 j = 0;
1254 if (j >= 0)
1255 mddev->resync_mismatches += r1_bio->sectors;
1256 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
1257 && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
1258 sbio->bi_end_io = NULL;
1259 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
1260 } else {
1261 /* fixup the bio for reuse */
1262 int size;
1263 sbio->bi_vcnt = vcnt;
1264 sbio->bi_size = r1_bio->sectors << 9;
1265 sbio->bi_idx = 0;
1266 sbio->bi_phys_segments = 0;
1267 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1268 sbio->bi_flags |= 1 << BIO_UPTODATE;
1269 sbio->bi_next = NULL;
1270 sbio->bi_sector = r1_bio->sector +
1271 conf->mirrors[i].rdev->data_offset;
1272 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1273 size = sbio->bi_size;
1274 for (j = 0; j < vcnt ; j++) {
1275 struct bio_vec *bi;
1276 bi = &sbio->bi_io_vec[j];
1277 bi->bv_offset = 0;
1278 if (size > PAGE_SIZE)
1279 bi->bv_len = PAGE_SIZE;
1280 else
1281 bi->bv_len = size;
1282 size -= PAGE_SIZE;
1283 memcpy(page_address(bi->bv_page),
1284 page_address(pbio->bi_io_vec[j].bv_page),
1285 PAGE_SIZE);
1286 }
1287 1241
1288 } 1242 start = d;
1289 } 1243 /* write it back and re-read */
1244 while (d != r1_bio->read_disk) {
1245 if (d == 0)
1246 d = conf->raid_disks;
1247 d--;
1248 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1249 continue;
1250 rdev = conf->mirrors[d].rdev;
1251 if (sync_page_io(rdev,
1252 sect,
1253 s<<9,
1254 bio->bi_io_vec[idx].bv_page,
1255 WRITE, false) == 0) {
1256 r1_bio->bios[d]->bi_end_io = NULL;
1257 rdev_dec_pending(rdev, mddev);
1258 md_error(mddev, rdev);
1259 } else
1260 atomic_add(s, &rdev->corrected_errors);
1261 }
1262 d = start;
1263 while (d != r1_bio->read_disk) {
1264 if (d == 0)
1265 d = conf->raid_disks;
1266 d--;
1267 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1268 continue;
1269 rdev = conf->mirrors[d].rdev;
1270 if (sync_page_io(rdev,
1271 sect,
1272 s<<9,
1273 bio->bi_io_vec[idx].bv_page,
1274 READ, false) == 0)
1275 md_error(mddev, rdev);
1276 }
1277 sectors -= s;
1278 sect += s;
1279 idx ++;
1290 } 1280 }
1291 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { 1281 set_bit(R1BIO_Uptodate, &r1_bio->state);
1292 /* ouch - failed to read all of that. 1282 set_bit(BIO_UPTODATE, &bio->bi_flags);
1293 * Try some synchronous reads of other devices to get 1283 return 1;
1294 * good data, much like with normal read errors. Only 1284}
1295 * read into the pages we already have so we don't 1285
1296 * need to re-issue the read request. 1286static int process_checks(r1bio_t *r1_bio)
1297 * We don't need to freeze the array, because being in an 1287{
1298 * active sync request, there is no normal IO, and 1288 /* We have read all readable devices. If we haven't
1299 * no overlapping syncs. 1289 * got the block, then there is no hope left.
1300 */ 1290 * If we have, then we want to do a comparison
1301 sector_t sect = r1_bio->sector; 1291 * and skip the write if everything is the same.
1302 int sectors = r1_bio->sectors; 1292 * If any blocks failed to read, then we need to
1303 int idx = 0; 1293 * attempt an over-write
1304 1294 */
1305 while(sectors) { 1295 mddev_t *mddev = r1_bio->mddev;
1306 int s = sectors; 1296 conf_t *conf = mddev->private;
1307 int d = r1_bio->read_disk; 1297 int primary;
1308 int success = 0; 1298 int i;
1309 mdk_rdev_t *rdev; 1299
1310 1300 for (primary = 0; primary < conf->raid_disks; primary++)
1311 if (s > (PAGE_SIZE>>9)) 1301 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
1312 s = PAGE_SIZE >> 9; 1302 test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
1313 do { 1303 r1_bio->bios[primary]->bi_end_io = NULL;
1314 if (r1_bio->bios[d]->bi_end_io == end_sync_read) { 1304 rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
1315 /* No rcu protection needed here devices 1305 break;
1316 * can only be removed when no resync is 1306 }
1317 * active, and resync is currently active 1307 r1_bio->read_disk = primary;
1318 */ 1308 for (i = 0; i < conf->raid_disks; i++) {
1319 rdev = conf->mirrors[d].rdev; 1309 int j;
1320 if (sync_page_io(rdev, 1310 int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
1321 sect, 1311 struct bio *pbio = r1_bio->bios[primary];
1322 s<<9, 1312 struct bio *sbio = r1_bio->bios[i];
1323 bio->bi_io_vec[idx].bv_page, 1313 int size;
1324 READ, false)) { 1314
1325 success = 1; 1315 if (r1_bio->bios[i]->bi_end_io != end_sync_read)
1326 break; 1316 continue;
1327 } 1317
1328 } 1318 if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
1329 d++; 1319 for (j = vcnt; j-- ; ) {
1330 if (d == conf->raid_disks) 1320 struct page *p, *s;
1331 d = 0; 1321 p = pbio->bi_io_vec[j].bv_page;
1332 } while (!success && d != r1_bio->read_disk); 1322 s = sbio->bi_io_vec[j].bv_page;
1333 1323 if (memcmp(page_address(p),
1334 if (success) { 1324 page_address(s),
1335 int start = d; 1325 PAGE_SIZE))
1336 /* write it back and re-read */ 1326 break;
1337 set_bit(R1BIO_Uptodate, &r1_bio->state);
1338 while (d != r1_bio->read_disk) {
1339 if (d == 0)
1340 d = conf->raid_disks;
1341 d--;
1342 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1343 continue;
1344 rdev = conf->mirrors[d].rdev;
1345 atomic_add(s, &rdev->corrected_errors);
1346 if (sync_page_io(rdev,
1347 sect,
1348 s<<9,
1349 bio->bi_io_vec[idx].bv_page,
1350 WRITE, false) == 0)
1351 md_error(mddev, rdev);
1352 }
1353 d = start;
1354 while (d != r1_bio->read_disk) {
1355 if (d == 0)
1356 d = conf->raid_disks;
1357 d--;
1358 if (r1_bio->bios[d]->bi_end_io != end_sync_read)
1359 continue;
1360 rdev = conf->mirrors[d].rdev;
1361 if (sync_page_io(rdev,
1362 sect,
1363 s<<9,
1364 bio->bi_io_vec[idx].bv_page,
1365 READ, false) == 0)
1366 md_error(mddev, rdev);
1367 }
1368 } else {
1369 char b[BDEVNAME_SIZE];
1370 /* Cannot read from anywhere, array is toast */
1371 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
1372 printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
1373 " for block %llu\n",
1374 mdname(mddev),
1375 bdevname(bio->bi_bdev, b),
1376 (unsigned long long)r1_bio->sector);
1377 md_done_sync(mddev, r1_bio->sectors, 0);
1378 put_buf(r1_bio);
1379 return;
1380 } 1327 }
1381 sectors -= s; 1328 } else
1382 sect += s; 1329 j = 0;
1383 idx ++; 1330 if (j >= 0)
1331 mddev->resync_mismatches += r1_bio->sectors;
1332 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
1333 && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
1334 /* No need to write to this device. */
1335 sbio->bi_end_io = NULL;
1336 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
1337 continue;
1338 }
1339 /* fixup the bio for reuse */
1340 sbio->bi_vcnt = vcnt;
1341 sbio->bi_size = r1_bio->sectors << 9;
1342 sbio->bi_idx = 0;
1343 sbio->bi_phys_segments = 0;
1344 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1345 sbio->bi_flags |= 1 << BIO_UPTODATE;
1346 sbio->bi_next = NULL;
1347 sbio->bi_sector = r1_bio->sector +
1348 conf->mirrors[i].rdev->data_offset;
1349 sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1350 size = sbio->bi_size;
1351 for (j = 0; j < vcnt ; j++) {
1352 struct bio_vec *bi;
1353 bi = &sbio->bi_io_vec[j];
1354 bi->bv_offset = 0;
1355 if (size > PAGE_SIZE)
1356 bi->bv_len = PAGE_SIZE;
1357 else
1358 bi->bv_len = size;
1359 size -= PAGE_SIZE;
1360 memcpy(page_address(bi->bv_page),
1361 page_address(pbio->bi_io_vec[j].bv_page),
1362 PAGE_SIZE);
1384 } 1363 }
1385 } 1364 }
1365 return 0;
1366}
1386 1367
1368static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1369{
1370 conf_t *conf = mddev->private;
1371 int i;
1372 int disks = conf->raid_disks;
1373 struct bio *bio, *wbio;
1374
1375 bio = r1_bio->bios[r1_bio->read_disk];
1376
1377 if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
1378 /* ouch - failed to read all of that. */
1379 if (!fix_sync_read_error(r1_bio))
1380 return;
1381
1382 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
1383 if (process_checks(r1_bio) < 0)
1384 return;
1387 /* 1385 /*
1388 * schedule writes 1386 * schedule writes
1389 */ 1387 */
@@ -2063,7 +2061,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
2063 set_capacity(mddev->gendisk, mddev->array_sectors); 2061 set_capacity(mddev->gendisk, mddev->array_sectors);
2064 revalidate_disk(mddev->gendisk); 2062 revalidate_disk(mddev->gendisk);
2065 if (sectors > mddev->dev_sectors && 2063 if (sectors > mddev->dev_sectors &&
2066 mddev->recovery_cp == MaxSector) { 2064 mddev->recovery_cp > mddev->dev_sectors) {
2067 mddev->recovery_cp = mddev->dev_sectors; 2065 mddev->recovery_cp = mddev->dev_sectors;
2068 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 2066 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2069 } 2067 }
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index cbfdf1a6acd9..5fc4ca1af863 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -94,7 +94,9 @@ struct r1bio_s {
94 int read_disk; 94 int read_disk;
95 95
96 struct list_head retry_list; 96 struct list_head retry_list;
97 struct bitmap_update *bitmap_update; 97 /* Next two are only valid when R1BIO_BehindIO is set */
98 struct page **behind_pages;
99 int behind_page_count;
98 /* 100 /*
99 * if the IO is in WRITE direction, then multiple bios are used. 101 * if the IO is in WRITE direction, then multiple bios are used.
100 * We choose the number when they are allocated. 102 * We choose the number when they are allocated.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8e9462626ec5..6e846688962f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error)
271 */ 271 */
272 set_bit(R10BIO_Uptodate, &r10_bio->state); 272 set_bit(R10BIO_Uptodate, &r10_bio->state);
273 raid_end_bio_io(r10_bio); 273 raid_end_bio_io(r10_bio);
274 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
274 } else { 275 } else {
275 /* 276 /*
276 * oops, read error: 277 * oops, read error - keep the refcount on the rdev
277 */ 278 */
278 char b[BDEVNAME_SIZE]; 279 char b[BDEVNAME_SIZE];
279 if (printk_ratelimit()) 280 if (printk_ratelimit())
@@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error)
282 bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); 283 bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
283 reschedule_retry(r10_bio); 284 reschedule_retry(r10_bio);
284 } 285 }
285
286 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
287} 286}
288 287
289static void raid10_end_write_request(struct bio *bio, int error) 288static void raid10_end_write_request(struct bio *bio, int error)
@@ -488,13 +487,19 @@ static int raid10_mergeable_bvec(struct request_queue *q,
488static int read_balance(conf_t *conf, r10bio_t *r10_bio) 487static int read_balance(conf_t *conf, r10bio_t *r10_bio)
489{ 488{
490 const sector_t this_sector = r10_bio->sector; 489 const sector_t this_sector = r10_bio->sector;
491 int disk, slot, nslot; 490 int disk, slot;
492 const int sectors = r10_bio->sectors; 491 const int sectors = r10_bio->sectors;
493 sector_t new_distance, current_distance; 492 sector_t new_distance, best_dist;
494 mdk_rdev_t *rdev; 493 mdk_rdev_t *rdev;
494 int do_balance;
495 int best_slot;
495 496
496 raid10_find_phys(conf, r10_bio); 497 raid10_find_phys(conf, r10_bio);
497 rcu_read_lock(); 498 rcu_read_lock();
499retry:
500 best_slot = -1;
501 best_dist = MaxSector;
502 do_balance = 1;
498 /* 503 /*
499 * Check if we can balance. We can balance on the whole 504 * Check if we can balance. We can balance on the whole
500 * device if no resync is going on (recovery is ok), or below 505 * device if no resync is going on (recovery is ok), or below
@@ -502,86 +507,58 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
502 * above the resync window. 507 * above the resync window.
503 */ 508 */
504 if (conf->mddev->recovery_cp < MaxSector 509 if (conf->mddev->recovery_cp < MaxSector
505 && (this_sector + sectors >= conf->next_resync)) { 510 && (this_sector + sectors >= conf->next_resync))
506 /* make sure that disk is operational */ 511 do_balance = 0;
507 slot = 0;
508 disk = r10_bio->devs[slot].devnum;
509
510 while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
511 r10_bio->devs[slot].bio == IO_BLOCKED ||
512 !test_bit(In_sync, &rdev->flags)) {
513 slot++;
514 if (slot == conf->copies) {
515 slot = 0;
516 disk = -1;
517 break;
518 }
519 disk = r10_bio->devs[slot].devnum;
520 }
521 goto rb_out;
522 }
523
524 512
525 /* make sure the disk is operational */ 513 for (slot = 0; slot < conf->copies ; slot++) {
526 slot = 0; 514 if (r10_bio->devs[slot].bio == IO_BLOCKED)
527 disk = r10_bio->devs[slot].devnum; 515 continue;
528 while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
529 r10_bio->devs[slot].bio == IO_BLOCKED ||
530 !test_bit(In_sync, &rdev->flags)) {
531 slot ++;
532 if (slot == conf->copies) {
533 disk = -1;
534 goto rb_out;
535 }
536 disk = r10_bio->devs[slot].devnum; 516 disk = r10_bio->devs[slot].devnum;
537 } 517 rdev = rcu_dereference(conf->mirrors[disk].rdev);
538 518 if (rdev == NULL)
539
540 current_distance = abs(r10_bio->devs[slot].addr -
541 conf->mirrors[disk].head_position);
542
543 /* Find the disk whose head is closest,
544 * or - for far > 1 - find the closest to partition beginning */
545
546 for (nslot = slot; nslot < conf->copies; nslot++) {
547 int ndisk = r10_bio->devs[nslot].devnum;
548
549
550 if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
551 r10_bio->devs[nslot].bio == IO_BLOCKED ||
552 !test_bit(In_sync, &rdev->flags))
553 continue; 519 continue;
520 if (!test_bit(In_sync, &rdev->flags))
521 continue;
522
523 if (!do_balance)
524 break;
554 525
555 /* This optimisation is debatable, and completely destroys 526 /* This optimisation is debatable, and completely destroys
556 * sequential read speed for 'far copies' arrays. So only 527 * sequential read speed for 'far copies' arrays. So only
557 * keep it for 'near' arrays, and review those later. 528 * keep it for 'near' arrays, and review those later.
558 */ 529 */
559 if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) { 530 if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending))
560 disk = ndisk;
561 slot = nslot;
562 break; 531 break;
563 }
564 532
565 /* for far > 1 always use the lowest address */ 533 /* for far > 1 always use the lowest address */
566 if (conf->far_copies > 1) 534 if (conf->far_copies > 1)
567 new_distance = r10_bio->devs[nslot].addr; 535 new_distance = r10_bio->devs[slot].addr;
568 else 536 else
569 new_distance = abs(r10_bio->devs[nslot].addr - 537 new_distance = abs(r10_bio->devs[slot].addr -
570 conf->mirrors[ndisk].head_position); 538 conf->mirrors[disk].head_position);
571 if (new_distance < current_distance) { 539 if (new_distance < best_dist) {
572 current_distance = new_distance; 540 best_dist = new_distance;
573 disk = ndisk; 541 best_slot = slot;
574 slot = nslot;
575 } 542 }
576 } 543 }
544 if (slot == conf->copies)
545 slot = best_slot;
577 546
578rb_out: 547 if (slot >= 0) {
579 r10_bio->read_slot = slot; 548 disk = r10_bio->devs[slot].devnum;
580/* conf->next_seq_sect = this_sector + sectors;*/ 549 rdev = rcu_dereference(conf->mirrors[disk].rdev);
581 550 if (!rdev)
582 if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL) 551 goto retry;
583 atomic_inc(&conf->mirrors[disk].rdev->nr_pending); 552 atomic_inc(&rdev->nr_pending);
584 else 553 if (test_bit(Faulty, &rdev->flags)) {
554 /* Cannot risk returning a device that failed
555 * before we inc'ed nr_pending
556 */
557 rdev_dec_pending(rdev, conf->mddev);
558 goto retry;
559 }
560 r10_bio->read_slot = slot;
561 } else
585 disk = -1; 562 disk = -1;
586 rcu_read_unlock(); 563 rcu_read_unlock();
587 564
@@ -1460,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1460 int max_read_errors = atomic_read(&mddev->max_corr_read_errors); 1437 int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
1461 int d = r10_bio->devs[r10_bio->read_slot].devnum; 1438 int d = r10_bio->devs[r10_bio->read_slot].devnum;
1462 1439
1463 rcu_read_lock(); 1440 /* still own a reference to this rdev, so it cannot
1464 rdev = rcu_dereference(conf->mirrors[d].rdev); 1441 * have been cleared recently.
1465 if (rdev) { /* If rdev is not NULL */ 1442 */
1466 char b[BDEVNAME_SIZE]; 1443 rdev = conf->mirrors[d].rdev;
1467 int cur_read_error_count = 0;
1468 1444
1469 bdevname(rdev->bdev, b); 1445 if (test_bit(Faulty, &rdev->flags))
1446 /* drive has already been failed, just ignore any
1447 more fix_read_error() attempts */
1448 return;
1470 1449
1471 if (test_bit(Faulty, &rdev->flags)) { 1450 check_decay_read_errors(mddev, rdev);
1472 rcu_read_unlock(); 1451 atomic_inc(&rdev->read_errors);
1473 /* drive has already been failed, just ignore any 1452 if (atomic_read(&rdev->read_errors) > max_read_errors) {
1474 more fix_read_error() attempts */ 1453 char b[BDEVNAME_SIZE];
1475 return; 1454 bdevname(rdev->bdev, b);
1476 }
1477 1455
1478 check_decay_read_errors(mddev, rdev); 1456 printk(KERN_NOTICE
1479 atomic_inc(&rdev->read_errors); 1457 "md/raid10:%s: %s: Raid device exceeded "
1480 cur_read_error_count = atomic_read(&rdev->read_errors); 1458 "read_error threshold [cur %d:max %d]\n",
1481 if (cur_read_error_count > max_read_errors) { 1459 mdname(mddev), b,
1482 rcu_read_unlock(); 1460 atomic_read(&rdev->read_errors), max_read_errors);
1483 printk(KERN_NOTICE 1461 printk(KERN_NOTICE
1484 "md/raid10:%s: %s: Raid device exceeded " 1462 "md/raid10:%s: %s: Failing raid device\n",
1485 "read_error threshold " 1463 mdname(mddev), b);
1486 "[cur %d:max %d]\n", 1464 md_error(mddev, conf->mirrors[d].rdev);
1487 mdname(mddev), 1465 return;
1488 b, cur_read_error_count, max_read_errors);
1489 printk(KERN_NOTICE
1490 "md/raid10:%s: %s: Failing raid "
1491 "device\n", mdname(mddev), b);
1492 md_error(mddev, conf->mirrors[d].rdev);
1493 return;
1494 }
1495 } 1466 }
1496 rcu_read_unlock();
1497 1467
1498 while(sectors) { 1468 while(sectors) {
1499 int s = sectors; 1469 int s = sectors;
@@ -1562,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1562 "write failed" 1532 "write failed"
1563 " (%d sectors at %llu on %s)\n", 1533 " (%d sectors at %llu on %s)\n",
1564 mdname(mddev), s, 1534 mdname(mddev), s,
1565 (unsigned long long)(sect+ 1535 (unsigned long long)(
1566 rdev->data_offset), 1536 sect + rdev->data_offset),
1567 bdevname(rdev->bdev, b)); 1537 bdevname(rdev->bdev, b));
1568 printk(KERN_NOTICE "md/raid10:%s: %s: failing " 1538 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
1569 "drive\n", 1539 "drive\n",
@@ -1599,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1599 "corrected sectors" 1569 "corrected sectors"
1600 " (%d sectors at %llu on %s)\n", 1570 " (%d sectors at %llu on %s)\n",
1601 mdname(mddev), s, 1571 mdname(mddev), s,
1602 (unsigned long long)(sect+ 1572 (unsigned long long)(
1603 rdev->data_offset), 1573 sect + rdev->data_offset),
1604 bdevname(rdev->bdev, b)); 1574 bdevname(rdev->bdev, b));
1605 printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", 1575 printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
1606 mdname(mddev), 1576 mdname(mddev),
@@ -1612,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1612 "md/raid10:%s: read error corrected" 1582 "md/raid10:%s: read error corrected"
1613 " (%d sectors at %llu on %s)\n", 1583 " (%d sectors at %llu on %s)\n",
1614 mdname(mddev), s, 1584 mdname(mddev), s,
1615 (unsigned long long)(sect+ 1585 (unsigned long long)(
1616 rdev->data_offset), 1586 sect + rdev->data_offset),
1617 bdevname(rdev->bdev, b)); 1587 bdevname(rdev->bdev, b));
1618 } 1588 }
1619 1589
@@ -1663,7 +1633,8 @@ static void raid10d(mddev_t *mddev)
1663 else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) 1633 else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
1664 recovery_request_write(mddev, r10_bio); 1634 recovery_request_write(mddev, r10_bio);
1665 else { 1635 else {
1666 int mirror; 1636 int slot = r10_bio->read_slot;
1637 int mirror = r10_bio->devs[slot].devnum;
1667 /* we got a read error. Maybe the drive is bad. Maybe just 1638 /* we got a read error. Maybe the drive is bad. Maybe just
1668 * the block and we can fix it. 1639 * the block and we can fix it.
1669 * We freeze all other IO, and try reading the block from 1640 * We freeze all other IO, and try reading the block from
@@ -1677,9 +1648,10 @@ static void raid10d(mddev_t *mddev)
1677 fix_read_error(conf, mddev, r10_bio); 1648 fix_read_error(conf, mddev, r10_bio);
1678 unfreeze_array(conf); 1649 unfreeze_array(conf);
1679 } 1650 }
1651 rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
1680 1652
1681 bio = r10_bio->devs[r10_bio->read_slot].bio; 1653 bio = r10_bio->devs[slot].bio;
1682 r10_bio->devs[r10_bio->read_slot].bio = 1654 r10_bio->devs[slot].bio =
1683 mddev->ro ? IO_BLOCKED : NULL; 1655 mddev->ro ? IO_BLOCKED : NULL;
1684 mirror = read_balance(conf, r10_bio); 1656 mirror = read_balance(conf, r10_bio);
1685 if (mirror == -1) { 1657 if (mirror == -1) {
@@ -1693,6 +1665,7 @@ static void raid10d(mddev_t *mddev)
1693 } else { 1665 } else {
1694 const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); 1666 const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
1695 bio_put(bio); 1667 bio_put(bio);
1668 slot = r10_bio->read_slot;
1696 rdev = conf->mirrors[mirror].rdev; 1669 rdev = conf->mirrors[mirror].rdev;
1697 if (printk_ratelimit()) 1670 if (printk_ratelimit())
1698 printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to" 1671 printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to"
@@ -1702,8 +1675,8 @@ static void raid10d(mddev_t *mddev)
1702 (unsigned long long)r10_bio->sector); 1675 (unsigned long long)r10_bio->sector);
1703 bio = bio_clone_mddev(r10_bio->master_bio, 1676 bio = bio_clone_mddev(r10_bio->master_bio,
1704 GFP_NOIO, mddev); 1677 GFP_NOIO, mddev);
1705 r10_bio->devs[r10_bio->read_slot].bio = bio; 1678 r10_bio->devs[slot].bio = bio;
1706 bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr 1679 bio->bi_sector = r10_bio->devs[slot].addr
1707 + rdev->data_offset; 1680 + rdev->data_offset;
1708 bio->bi_bdev = rdev->bdev; 1681 bio->bi_bdev = rdev->bdev;
1709 bio->bi_rw = READ | do_sync; 1682 bio->bi_rw = READ | do_sync;
@@ -1763,13 +1736,13 @@ static int init_resync(conf_t *conf)
1763 * 1736 *
1764 */ 1737 */
1765 1738
1766static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) 1739static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
1740 int *skipped, int go_faster)
1767{ 1741{
1768 conf_t *conf = mddev->private; 1742 conf_t *conf = mddev->private;
1769 r10bio_t *r10_bio; 1743 r10bio_t *r10_bio;
1770 struct bio *biolist = NULL, *bio; 1744 struct bio *biolist = NULL, *bio;
1771 sector_t max_sector, nr_sectors; 1745 sector_t max_sector, nr_sectors;
1772 int disk;
1773 int i; 1746 int i;
1774 int max_sync; 1747 int max_sync;
1775 sector_t sync_blocks; 1748 sector_t sync_blocks;
@@ -1858,108 +1831,114 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1858 int j, k; 1831 int j, k;
1859 r10_bio = NULL; 1832 r10_bio = NULL;
1860 1833
1861 for (i=0 ; i<conf->raid_disks; i++) 1834 for (i=0 ; i<conf->raid_disks; i++) {
1862 if (conf->mirrors[i].rdev && 1835 int still_degraded;
1863 !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { 1836 r10bio_t *rb2;
1864 int still_degraded = 0; 1837 sector_t sect;
1865 /* want to reconstruct this device */ 1838 int must_sync;
1866 r10bio_t *rb2 = r10_bio;
1867 sector_t sect = raid10_find_virt(conf, sector_nr, i);
1868 int must_sync;
1869 /* Unless we are doing a full sync, we only need
1870 * to recover the block if it is set in the bitmap
1871 */
1872 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1873 &sync_blocks, 1);
1874 if (sync_blocks < max_sync)
1875 max_sync = sync_blocks;
1876 if (!must_sync &&
1877 !conf->fullsync) {
1878 /* yep, skip the sync_blocks here, but don't assume
1879 * that there will never be anything to do here
1880 */
1881 chunks_skipped = -1;
1882 continue;
1883 }
1884 1839
1885 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 1840 if (conf->mirrors[i].rdev == NULL ||
1886 raise_barrier(conf, rb2 != NULL); 1841 test_bit(In_sync, &conf->mirrors[i].rdev->flags))
1887 atomic_set(&r10_bio->remaining, 0); 1842 continue;
1888 1843
1889 r10_bio->master_bio = (struct bio*)rb2; 1844 still_degraded = 0;
1890 if (rb2) 1845 /* want to reconstruct this device */
1891 atomic_inc(&rb2->remaining); 1846 rb2 = r10_bio;
1892 r10_bio->mddev = mddev; 1847 sect = raid10_find_virt(conf, sector_nr, i);
1893 set_bit(R10BIO_IsRecover, &r10_bio->state); 1848 /* Unless we are doing a full sync, we only need
1894 r10_bio->sector = sect; 1849 * to recover the block if it is set in the bitmap
1850 */
1851 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1852 &sync_blocks, 1);
1853 if (sync_blocks < max_sync)
1854 max_sync = sync_blocks;
1855 if (!must_sync &&
1856 !conf->fullsync) {
1857 /* yep, skip the sync_blocks here, but don't assume
1858 * that there will never be anything to do here
1859 */
1860 chunks_skipped = -1;
1861 continue;
1862 }
1895 1863
1896 raid10_find_phys(conf, r10_bio); 1864 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
1865 raise_barrier(conf, rb2 != NULL);
1866 atomic_set(&r10_bio->remaining, 0);
1897 1867
1898 /* Need to check if the array will still be 1868 r10_bio->master_bio = (struct bio*)rb2;
1899 * degraded 1869 if (rb2)
1900 */ 1870 atomic_inc(&rb2->remaining);
1901 for (j=0; j<conf->raid_disks; j++) 1871 r10_bio->mddev = mddev;
1902 if (conf->mirrors[j].rdev == NULL || 1872 set_bit(R10BIO_IsRecover, &r10_bio->state);
1903 test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { 1873 r10_bio->sector = sect;
1904 still_degraded = 1;
1905 break;
1906 }
1907
1908 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1909 &sync_blocks, still_degraded);
1910
1911 for (j=0; j<conf->copies;j++) {
1912 int d = r10_bio->devs[j].devnum;
1913 if (conf->mirrors[d].rdev &&
1914 test_bit(In_sync, &conf->mirrors[d].rdev->flags)) {
1915 /* This is where we read from */
1916 bio = r10_bio->devs[0].bio;
1917 bio->bi_next = biolist;
1918 biolist = bio;
1919 bio->bi_private = r10_bio;
1920 bio->bi_end_io = end_sync_read;
1921 bio->bi_rw = READ;
1922 bio->bi_sector = r10_bio->devs[j].addr +
1923 conf->mirrors[d].rdev->data_offset;
1924 bio->bi_bdev = conf->mirrors[d].rdev->bdev;
1925 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
1926 atomic_inc(&r10_bio->remaining);
1927 /* and we write to 'i' */
1928
1929 for (k=0; k<conf->copies; k++)
1930 if (r10_bio->devs[k].devnum == i)
1931 break;
1932 BUG_ON(k == conf->copies);
1933 bio = r10_bio->devs[1].bio;
1934 bio->bi_next = biolist;
1935 biolist = bio;
1936 bio->bi_private = r10_bio;
1937 bio->bi_end_io = end_sync_write;
1938 bio->bi_rw = WRITE;
1939 bio->bi_sector = r10_bio->devs[k].addr +
1940 conf->mirrors[i].rdev->data_offset;
1941 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
1942
1943 r10_bio->devs[0].devnum = d;
1944 r10_bio->devs[1].devnum = i;
1945 1874
1946 break; 1875 raid10_find_phys(conf, r10_bio);
1947 } 1876
1948 } 1877 /* Need to check if the array will still be
1949 if (j == conf->copies) { 1878 * degraded
1950 /* Cannot recover, so abort the recovery */ 1879 */
1951 put_buf(r10_bio); 1880 for (j=0; j<conf->raid_disks; j++)
1952 if (rb2) 1881 if (conf->mirrors[j].rdev == NULL ||
1953 atomic_dec(&rb2->remaining); 1882 test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
1954 r10_bio = rb2; 1883 still_degraded = 1;
1955 if (!test_and_set_bit(MD_RECOVERY_INTR,
1956 &mddev->recovery))
1957 printk(KERN_INFO "md/raid10:%s: insufficient "
1958 "working devices for recovery.\n",
1959 mdname(mddev));
1960 break; 1884 break;
1961 } 1885 }
1886
1887 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1888 &sync_blocks, still_degraded);
1889
1890 for (j=0; j<conf->copies;j++) {
1891 int d = r10_bio->devs[j].devnum;
1892 if (!conf->mirrors[d].rdev ||
1893 !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
1894 continue;
1895 /* This is where we read from */
1896 bio = r10_bio->devs[0].bio;
1897 bio->bi_next = biolist;
1898 biolist = bio;
1899 bio->bi_private = r10_bio;
1900 bio->bi_end_io = end_sync_read;
1901 bio->bi_rw = READ;
1902 bio->bi_sector = r10_bio->devs[j].addr +
1903 conf->mirrors[d].rdev->data_offset;
1904 bio->bi_bdev = conf->mirrors[d].rdev->bdev;
1905 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
1906 atomic_inc(&r10_bio->remaining);
1907 /* and we write to 'i' */
1908
1909 for (k=0; k<conf->copies; k++)
1910 if (r10_bio->devs[k].devnum == i)
1911 break;
1912 BUG_ON(k == conf->copies);
1913 bio = r10_bio->devs[1].bio;
1914 bio->bi_next = biolist;
1915 biolist = bio;
1916 bio->bi_private = r10_bio;
1917 bio->bi_end_io = end_sync_write;
1918 bio->bi_rw = WRITE;
1919 bio->bi_sector = r10_bio->devs[k].addr +
1920 conf->mirrors[i].rdev->data_offset;
1921 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
1922
1923 r10_bio->devs[0].devnum = d;
1924 r10_bio->devs[1].devnum = i;
1925
1926 break;
1927 }
1928 if (j == conf->copies) {
1929 /* Cannot recover, so abort the recovery */
1930 put_buf(r10_bio);
1931 if (rb2)
1932 atomic_dec(&rb2->remaining);
1933 r10_bio = rb2;
1934 if (!test_and_set_bit(MD_RECOVERY_INTR,
1935 &mddev->recovery))
1936 printk(KERN_INFO "md/raid10:%s: insufficient "
1937 "working devices for recovery.\n",
1938 mdname(mddev));
1939 break;
1962 } 1940 }
1941 }
1963 if (biolist == NULL) { 1942 if (biolist == NULL) {
1964 while (r10_bio) { 1943 while (r10_bio) {
1965 r10bio_t *rb2 = r10_bio; 1944 r10bio_t *rb2 = r10_bio;
@@ -1977,7 +1956,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1977 1956
1978 if (!bitmap_start_sync(mddev->bitmap, sector_nr, 1957 if (!bitmap_start_sync(mddev->bitmap, sector_nr,
1979 &sync_blocks, mddev->degraded) && 1958 &sync_blocks, mddev->degraded) &&
1980 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 1959 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
1960 &mddev->recovery)) {
1981 /* We can skip this block */ 1961 /* We can skip this block */
1982 *skipped = 1; 1962 *skipped = 1;
1983 return sync_blocks + sectors_skipped; 1963 return sync_blocks + sectors_skipped;
@@ -2022,7 +2002,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
2022 for (i=0; i<conf->copies; i++) { 2002 for (i=0; i<conf->copies; i++) {
2023 int d = r10_bio->devs[i].devnum; 2003 int d = r10_bio->devs[i].devnum;
2024 if (r10_bio->devs[i].bio->bi_end_io) 2004 if (r10_bio->devs[i].bio->bi_end_io)
2025 rdev_dec_pending(conf->mirrors[d].rdev, mddev); 2005 rdev_dec_pending(conf->mirrors[d].rdev,
2006 mddev);
2026 } 2007 }
2027 put_buf(r10_bio); 2008 put_buf(r10_bio);
2028 biolist = NULL; 2009 biolist = NULL;
@@ -2047,26 +2028,27 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
2047 do { 2028 do {
2048 struct page *page; 2029 struct page *page;
2049 int len = PAGE_SIZE; 2030 int len = PAGE_SIZE;
2050 disk = 0;
2051 if (sector_nr + (len>>9) > max_sector) 2031 if (sector_nr + (len>>9) > max_sector)
2052 len = (max_sector - sector_nr) << 9; 2032 len = (max_sector - sector_nr) << 9;
2053 if (len == 0) 2033 if (len == 0)
2054 break; 2034 break;
2055 for (bio= biolist ; bio ; bio=bio->bi_next) { 2035 for (bio= biolist ; bio ; bio=bio->bi_next) {
2036 struct bio *bio2;
2056 page = bio->bi_io_vec[bio->bi_vcnt].bv_page; 2037 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
2057 if (bio_add_page(bio, page, len, 0) == 0) { 2038 if (bio_add_page(bio, page, len, 0))
2058 /* stop here */ 2039 continue;
2059 struct bio *bio2; 2040
2060 bio->bi_io_vec[bio->bi_vcnt].bv_page = page; 2041 /* stop here */
2061 for (bio2 = biolist; bio2 && bio2 != bio; bio2 = bio2->bi_next) { 2042 bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
2062 /* remove last page from this bio */ 2043 for (bio2 = biolist;
2063 bio2->bi_vcnt--; 2044 bio2 && bio2 != bio;
2064 bio2->bi_size -= len; 2045 bio2 = bio2->bi_next) {
2065 bio2->bi_flags &= ~(1<< BIO_SEG_VALID); 2046 /* remove last page from this bio */
2066 } 2047 bio2->bi_vcnt--;
2067 goto bio_full; 2048 bio2->bi_size -= len;
2049 bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
2068 } 2050 }
2069 disk = i; 2051 goto bio_full;
2070 } 2052 }
2071 nr_sectors += len>>9; 2053 nr_sectors += len>>9;
2072 sector_nr += len>>9; 2054 sector_nr += len>>9;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 49bf5f891435..34dd54539f7b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1700,27 +1700,25 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1700 raid5_conf_t *conf = mddev->private; 1700 raid5_conf_t *conf = mddev->private;
1701 pr_debug("raid456: error called\n"); 1701 pr_debug("raid456: error called\n");
1702 1702
1703 if (!test_bit(Faulty, &rdev->flags)) { 1703 if (test_and_clear_bit(In_sync, &rdev->flags)) {
1704 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1704 unsigned long flags;
1705 if (test_and_clear_bit(In_sync, &rdev->flags)) { 1705 spin_lock_irqsave(&conf->device_lock, flags);
1706 unsigned long flags; 1706 mddev->degraded++;
1707 spin_lock_irqsave(&conf->device_lock, flags); 1707 spin_unlock_irqrestore(&conf->device_lock, flags);
1708 mddev->degraded++; 1708 /*
1709 spin_unlock_irqrestore(&conf->device_lock, flags); 1709 * if recovery was running, make sure it aborts.
1710 /* 1710 */
1711 * if recovery was running, make sure it aborts. 1711 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1712 */
1713 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1714 }
1715 set_bit(Faulty, &rdev->flags);
1716 printk(KERN_ALERT
1717 "md/raid:%s: Disk failure on %s, disabling device.\n"
1718 "md/raid:%s: Operation continuing on %d devices.\n",
1719 mdname(mddev),
1720 bdevname(rdev->bdev, b),
1721 mdname(mddev),
1722 conf->raid_disks - mddev->degraded);
1723 } 1712 }
1713 set_bit(Faulty, &rdev->flags);
1714 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1715 printk(KERN_ALERT
1716 "md/raid:%s: Disk failure on %s, disabling device.\n"
1717 "md/raid:%s: Operation continuing on %d devices.\n",
1718 mdname(mddev),
1719 bdevname(rdev->bdev, b),
1720 mdname(mddev),
1721 conf->raid_disks - mddev->degraded);
1724} 1722}
1725 1723
1726/* 1724/*
@@ -5391,7 +5389,8 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
5391 return -EINVAL; 5389 return -EINVAL;
5392 set_capacity(mddev->gendisk, mddev->array_sectors); 5390 set_capacity(mddev->gendisk, mddev->array_sectors);
5393 revalidate_disk(mddev->gendisk); 5391 revalidate_disk(mddev->gendisk);
5394 if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { 5392 if (sectors > mddev->dev_sectors &&
5393 mddev->recovery_cp > mddev->dev_sectors) {
5395 mddev->recovery_cp = mddev->dev_sectors; 5394 mddev->recovery_cp = mddev->dev_sectors;
5396 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5395 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5397 } 5396 }
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 0ae93529bfc1..18fccf913635 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -45,9 +45,9 @@
45#include <linux/interrupt.h> 45#include <linux/interrupt.h>
46#include <linux/if_ether.h> 46#include <linux/if_ether.h>
47#include <linux/aer.h> 47#include <linux/aer.h>
48#include <linux/prefetch.h>
48#ifdef CONFIG_IGB_DCA 49#ifdef CONFIG_IGB_DCA
49#include <linux/dca.h> 50#include <linux/dca.h>
50#include <linux/prefetch.h>
51#endif 51#endif
52#include "igb.h" 52#include "igb.h"
53 53
diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
1306 return do_sys_open(dfd, filename, flags, mode); 1306 return do_sys_open(dfd, filename, flags, mode);
1307} 1307}
1308 1308
1309/*
1310 * compat_count() counts the number of arguments/envelopes. It is basically
1311 * a copy of count() from fs/exec.c, except that it works with 32 bit argv
1312 * and envp pointers.
1313 */
1314static int compat_count(compat_uptr_t __user *argv, int max)
1315{
1316 int i = 0;
1317
1318 if (argv != NULL) {
1319 for (;;) {
1320 compat_uptr_t p;
1321
1322 if (get_user(p, argv))
1323 return -EFAULT;
1324 if (!p)
1325 break;
1326 argv++;
1327 if (i++ >= max)
1328 return -E2BIG;
1329
1330 if (fatal_signal_pending(current))
1331 return -ERESTARTNOHAND;
1332 cond_resched();
1333 }
1334 }
1335 return i;
1336}
1337
1338/*
1339 * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
1340 * except that it works with 32 bit argv and envp pointers.
1341 */
1342static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1343 struct linux_binprm *bprm)
1344{
1345 struct page *kmapped_page = NULL;
1346 char *kaddr = NULL;
1347 unsigned long kpos = 0;
1348 int ret;
1349
1350 while (argc-- > 0) {
1351 compat_uptr_t str;
1352 int len;
1353 unsigned long pos;
1354
1355 if (get_user(str, argv+argc) ||
1356 !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
1357 ret = -EFAULT;
1358 goto out;
1359 }
1360
1361 if (len > MAX_ARG_STRLEN) {
1362 ret = -E2BIG;
1363 goto out;
1364 }
1365
1366 /* We're going to work our way backwords. */
1367 pos = bprm->p;
1368 str += len;
1369 bprm->p -= len;
1370
1371 while (len > 0) {
1372 int offset, bytes_to_copy;
1373
1374 if (fatal_signal_pending(current)) {
1375 ret = -ERESTARTNOHAND;
1376 goto out;
1377 }
1378 cond_resched();
1379
1380 offset = pos % PAGE_SIZE;
1381 if (offset == 0)
1382 offset = PAGE_SIZE;
1383
1384 bytes_to_copy = offset;
1385 if (bytes_to_copy > len)
1386 bytes_to_copy = len;
1387
1388 offset -= bytes_to_copy;
1389 pos -= bytes_to_copy;
1390 str -= bytes_to_copy;
1391 len -= bytes_to_copy;
1392
1393 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
1394 struct page *page;
1395
1396 page = get_arg_page(bprm, pos, 1);
1397 if (!page) {
1398 ret = -E2BIG;
1399 goto out;
1400 }
1401
1402 if (kmapped_page) {
1403 flush_kernel_dcache_page(kmapped_page);
1404 kunmap(kmapped_page);
1405 put_page(kmapped_page);
1406 }
1407 kmapped_page = page;
1408 kaddr = kmap(kmapped_page);
1409 kpos = pos & PAGE_MASK;
1410 flush_cache_page(bprm->vma, kpos,
1411 page_to_pfn(kmapped_page));
1412 }
1413 if (copy_from_user(kaddr+offset, compat_ptr(str),
1414 bytes_to_copy)) {
1415 ret = -EFAULT;
1416 goto out;
1417 }
1418 }
1419 }
1420 ret = 0;
1421out:
1422 if (kmapped_page) {
1423 flush_kernel_dcache_page(kmapped_page);
1424 kunmap(kmapped_page);
1425 put_page(kmapped_page);
1426 }
1427 return ret;
1428}
1429
1430/*
1431 * compat_do_execve() is mostly a copy of do_execve(), with the exception
1432 * that it processes 32 bit argv and envp pointers.
1433 */
1434int compat_do_execve(char * filename,
1435 compat_uptr_t __user *argv,
1436 compat_uptr_t __user *envp,
1437 struct pt_regs * regs)
1438{
1439 struct linux_binprm *bprm;
1440 struct file *file;
1441 struct files_struct *displaced;
1442 bool clear_in_exec;
1443 int retval;
1444
1445 retval = unshare_files(&displaced);
1446 if (retval)
1447 goto out_ret;
1448
1449 retval = -ENOMEM;
1450 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1451 if (!bprm)
1452 goto out_files;
1453
1454 retval = prepare_bprm_creds(bprm);
1455 if (retval)
1456 goto out_free;
1457
1458 retval = check_unsafe_exec(bprm);
1459 if (retval < 0)
1460 goto out_free;
1461 clear_in_exec = retval;
1462 current->in_execve = 1;
1463
1464 file = open_exec(filename);
1465 retval = PTR_ERR(file);
1466 if (IS_ERR(file))
1467 goto out_unmark;
1468
1469 sched_exec();
1470
1471 bprm->file = file;
1472 bprm->filename = filename;
1473 bprm->interp = filename;
1474
1475 retval = bprm_mm_init(bprm);
1476 if (retval)
1477 goto out_file;
1478
1479 bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
1480 if ((retval = bprm->argc) < 0)
1481 goto out;
1482
1483 bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
1484 if ((retval = bprm->envc) < 0)
1485 goto out;
1486
1487 retval = prepare_binprm(bprm);
1488 if (retval < 0)
1489 goto out;
1490
1491 retval = copy_strings_kernel(1, &bprm->filename, bprm);
1492 if (retval < 0)
1493 goto out;
1494
1495 bprm->exec = bprm->p;
1496 retval = compat_copy_strings(bprm->envc, envp, bprm);
1497 if (retval < 0)
1498 goto out;
1499
1500 retval = compat_copy_strings(bprm->argc, argv, bprm);
1501 if (retval < 0)
1502 goto out;
1503
1504 retval = search_binary_handler(bprm, regs);
1505 if (retval < 0)
1506 goto out;
1507
1508 /* execve succeeded */
1509 current->fs->in_exec = 0;
1510 current->in_execve = 0;
1511 acct_update_integrals(current);
1512 free_bprm(bprm);
1513 if (displaced)
1514 put_files_struct(displaced);
1515 return retval;
1516
1517out:
1518 if (bprm->mm) {
1519 acct_arg_size(bprm, 0);
1520 mmput(bprm->mm);
1521 }
1522
1523out_file:
1524 if (bprm->file) {
1525 allow_write_access(bprm->file);
1526 fput(bprm->file);
1527 }
1528
1529out_unmark:
1530 if (clear_in_exec)
1531 current->fs->in_exec = 0;
1532 current->in_execve = 0;
1533
1534out_free:
1535 free_bprm(bprm);
1536
1537out_files:
1538 if (displaced)
1539 reset_files_struct(displaced);
1540out_ret:
1541 return retval;
1542}
1543
1544#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1309#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1545 1310
1546static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, 1311static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/exec.c b/fs/exec.c
index 8328beb9016f..c016896dcbb2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
55#include <linux/fs_struct.h> 55#include <linux/fs_struct.h>
56#include <linux/pipe_fs_i.h> 56#include <linux/pipe_fs_i.h>
57#include <linux/oom.h> 57#include <linux/oom.h>
58#include <linux/compat.h>
58 59
59#include <asm/uaccess.h> 60#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 61#include <asm/mmu_context.h>
@@ -166,8 +167,13 @@ out:
166} 167}
167 168
168#ifdef CONFIG_MMU 169#ifdef CONFIG_MMU
169 170/*
170void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 171 * The nascent bprm->mm is not visible until exec_mmap() but it can
172 * use a lot of memory, account these pages in current->mm temporary
173 * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
174 * change the counter back via acct_arg_size(0).
175 */
176static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
171{ 177{
172 struct mm_struct *mm = current->mm; 178 struct mm_struct *mm = current->mm;
173 long diff = (long)(pages - bprm->vma_pages); 179 long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +192,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
186#endif 192#endif
187} 193}
188 194
189struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 195static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
190 int write) 196 int write)
191{ 197{
192 struct page *page; 198 struct page *page;
@@ -305,11 +311,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
305 311
306#else 312#else
307 313
308void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 314static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
309{ 315{
310} 316}
311 317
312struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 318static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
313 int write) 319 int write)
314{ 320{
315 struct page *page; 321 struct page *page;
@@ -398,22 +404,56 @@ err:
398 return err; 404 return err;
399} 405}
400 406
407struct user_arg_ptr {
408#ifdef CONFIG_COMPAT
409 bool is_compat;
410#endif
411 union {
412 const char __user *const __user *native;
413#ifdef CONFIG_COMPAT
414 compat_uptr_t __user *compat;
415#endif
416 } ptr;
417};
418
419static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
420{
421 const char __user *native;
422
423#ifdef CONFIG_COMPAT
424 if (unlikely(argv.is_compat)) {
425 compat_uptr_t compat;
426
427 if (get_user(compat, argv.ptr.compat + nr))
428 return ERR_PTR(-EFAULT);
429
430 return compat_ptr(compat);
431 }
432#endif
433
434 if (get_user(native, argv.ptr.native + nr))
435 return ERR_PTR(-EFAULT);
436
437 return native;
438}
439
401/* 440/*
402 * count() counts the number of strings in array ARGV. 441 * count() counts the number of strings in array ARGV.
403 */ 442 */
404static int count(const char __user * const __user * argv, int max) 443static int count(struct user_arg_ptr argv, int max)
405{ 444{
406 int i = 0; 445 int i = 0;
407 446
408 if (argv != NULL) { 447 if (argv.ptr.native != NULL) {
409 for (;;) { 448 for (;;) {
410 const char __user * p; 449 const char __user *p = get_user_arg_ptr(argv, i);
411 450
412 if (get_user(p, argv))
413 return -EFAULT;
414 if (!p) 451 if (!p)
415 break; 452 break;
416 argv++; 453
454 if (IS_ERR(p))
455 return -EFAULT;
456
417 if (i++ >= max) 457 if (i++ >= max)
418 return -E2BIG; 458 return -E2BIG;
419 459
@@ -430,7 +470,7 @@ static int count(const char __user * const __user * argv, int max)
430 * processes's memory to the new process's stack. The call to get_user_pages() 470 * processes's memory to the new process's stack. The call to get_user_pages()
431 * ensures the destination page is created and not swapped out. 471 * ensures the destination page is created and not swapped out.
432 */ 472 */
433static int copy_strings(int argc, const char __user *const __user *argv, 473static int copy_strings(int argc, struct user_arg_ptr argv,
434 struct linux_binprm *bprm) 474 struct linux_binprm *bprm)
435{ 475{
436 struct page *kmapped_page = NULL; 476 struct page *kmapped_page = NULL;
@@ -443,16 +483,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
443 int len; 483 int len;
444 unsigned long pos; 484 unsigned long pos;
445 485
446 if (get_user(str, argv+argc) || 486 ret = -EFAULT;
447 !(len = strnlen_user(str, MAX_ARG_STRLEN))) { 487 str = get_user_arg_ptr(argv, argc);
448 ret = -EFAULT; 488 if (IS_ERR(str))
449 goto out; 489 goto out;
450 }
451 490
452 if (!valid_arg_len(bprm, len)) { 491 len = strnlen_user(str, MAX_ARG_STRLEN);
453 ret = -E2BIG; 492 if (!len)
493 goto out;
494
495 ret = -E2BIG;
496 if (!valid_arg_len(bprm, len))
454 goto out; 497 goto out;
455 }
456 498
457 /* We're going to work our way backwords. */ 499 /* We're going to work our way backwords. */
458 pos = bprm->p; 500 pos = bprm->p;
@@ -519,14 +561,19 @@ out:
519/* 561/*
520 * Like copy_strings, but get argv and its values from kernel memory. 562 * Like copy_strings, but get argv and its values from kernel memory.
521 */ 563 */
522int copy_strings_kernel(int argc, const char *const *argv, 564int copy_strings_kernel(int argc, const char *const *__argv,
523 struct linux_binprm *bprm) 565 struct linux_binprm *bprm)
524{ 566{
525 int r; 567 int r;
526 mm_segment_t oldfs = get_fs(); 568 mm_segment_t oldfs = get_fs();
569 struct user_arg_ptr argv = {
570 .ptr.native = (const char __user *const __user *)__argv,
571 };
572
527 set_fs(KERNEL_DS); 573 set_fs(KERNEL_DS);
528 r = copy_strings(argc, (const char __user *const __user *)argv, bprm); 574 r = copy_strings(argc, argv, bprm);
529 set_fs(oldfs); 575 set_fs(oldfs);
576
530 return r; 577 return r;
531} 578}
532EXPORT_SYMBOL(copy_strings_kernel); 579EXPORT_SYMBOL(copy_strings_kernel);
@@ -1379,10 +1426,10 @@ EXPORT_SYMBOL(search_binary_handler);
1379/* 1426/*
1380 * sys_execve() executes a new program. 1427 * sys_execve() executes a new program.
1381 */ 1428 */
1382int do_execve(const char * filename, 1429static int do_execve_common(const char *filename,
1383 const char __user *const __user *argv, 1430 struct user_arg_ptr argv,
1384 const char __user *const __user *envp, 1431 struct user_arg_ptr envp,
1385 struct pt_regs * regs) 1432 struct pt_regs *regs)
1386{ 1433{
1387 struct linux_binprm *bprm; 1434 struct linux_binprm *bprm;
1388 struct file *file; 1435 struct file *file;
@@ -1489,6 +1536,34 @@ out_ret:
1489 return retval; 1536 return retval;
1490} 1537}
1491 1538
1539int do_execve(const char *filename,
1540 const char __user *const __user *__argv,
1541 const char __user *const __user *__envp,
1542 struct pt_regs *regs)
1543{
1544 struct user_arg_ptr argv = { .ptr.native = __argv };
1545 struct user_arg_ptr envp = { .ptr.native = __envp };
1546 return do_execve_common(filename, argv, envp, regs);
1547}
1548
1549#ifdef CONFIG_COMPAT
1550int compat_do_execve(char *filename,
1551 compat_uptr_t __user *__argv,
1552 compat_uptr_t __user *__envp,
1553 struct pt_regs *regs)
1554{
1555 struct user_arg_ptr argv = {
1556 .is_compat = true,
1557 .ptr.compat = __argv,
1558 };
1559 struct user_arg_ptr envp = {
1560 .is_compat = true,
1561 .ptr.compat = __envp,
1562 };
1563 return do_execve_common(filename, argv, envp, regs);
1564}
1565#endif
1566
1492void set_binfmt(struct linux_binfmt *new) 1567void set_binfmt(struct linux_binfmt *new)
1493{ 1568{
1494 struct mm_struct *mm = current->mm; 1569 struct mm_struct *mm = current->mm;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 74add2ddcc3f..e65493a8ac00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
780 metadata = (height != ip->i_height - 1); 780 metadata = (height != ip->i_height - 1);
781 if (metadata) 781 if (metadata)
782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
783 else if (ip->i_depth)
784 revokes = sdp->sd_inptrs;
783 785
784 if (ip != GFS2_I(sdp->sd_rindex)) 786 if (ip != GFS2_I(sdp->sd_rindex))
785 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); 787 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cec26c00b50d..903115f2bb34 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -228,6 +228,27 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
228 return ret; 228 return ret;
229} 229}
230 230
231static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
232{
233 struct gfs2_ail *ai;
234 struct gfs2_bufdata *bd;
235 struct buffer_head *bh;
236
237 spin_lock(&sdp->sd_ail_lock);
238 list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
239 list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
240 bh = bd->bd_bh;
241 if (!buffer_locked(bh))
242 continue;
243 get_bh(bh);
244 spin_unlock(&sdp->sd_ail_lock);
245 wait_on_buffer(bh);
246 brelse(bh);
247 return;
248 }
249 }
250 spin_unlock(&sdp->sd_ail_lock);
251}
231 252
232/** 253/**
233 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced 254 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
@@ -878,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
878 gfs2_log_flush(sdp, NULL); 899 gfs2_log_flush(sdp, NULL);
879 for (;;) { 900 for (;;) {
880 gfs2_ail1_start(sdp); 901 gfs2_ail1_start(sdp);
902 gfs2_ail1_wait(sdp);
881 if (gfs2_ail1_empty(sdp)) 903 if (gfs2_ail1_empty(sdp))
882 break; 904 break;
883 msleep(10);
884 } 905 }
885} 906}
886 907
@@ -920,12 +941,14 @@ int gfs2_logd(void *data)
920 941
921 if (gfs2_ail_flush_reqd(sdp)) { 942 if (gfs2_ail_flush_reqd(sdp)) {
922 gfs2_ail1_start(sdp); 943 gfs2_ail1_start(sdp);
923 io_schedule(); 944 gfs2_ail1_wait(sdp);
924 gfs2_ail1_empty(sdp); 945 gfs2_ail1_empty(sdp);
925 gfs2_log_flush(sdp, NULL); 946 gfs2_log_flush(sdp, NULL);
926 } 947 }
927 948
928 wake_up(&sdp->sd_log_waitq); 949 if (!gfs2_ail_flush_reqd(sdp))
950 wake_up(&sdp->sd_log_waitq);
951
929 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 952 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
930 if (freezing(current)) 953 if (freezing(current))
931 refrigerator(); 954 refrigerator();
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7273ad3c85ba..9b780df3fd54 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1629,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1629 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1629 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1630 1630
1631 gfs2_trans_add_rg(rgd); 1631 gfs2_trans_add_rg(rgd);
1632
1633 /* Directories keep their data in the metadata address space */
1634 if (ip->i_depth)
1635 gfs2_meta_wipe(ip, bstart, blen);
1632} 1636}
1633 1637
1634/** 1638/**
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index f7684483785e..eed4d7b26249 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
489void nilfs_palloc_commit_alloc_entry(struct inode *inode, 489void nilfs_palloc_commit_alloc_entry(struct inode *inode,
490 struct nilfs_palloc_req *req) 490 struct nilfs_palloc_req *req)
491{ 491{
492 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 492 mark_buffer_dirty(req->pr_bitmap_bh);
493 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 493 mark_buffer_dirty(req->pr_desc_bh);
494 nilfs_mdt_mark_dirty(inode); 494 nilfs_mdt_mark_dirty(inode);
495 495
496 brelse(req->pr_bitmap_bh); 496 brelse(req->pr_bitmap_bh);
@@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
527 kunmap(req->pr_bitmap_bh->b_page); 527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page); 528 kunmap(req->pr_desc_bh->b_page);
529 529
530 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 530 mark_buffer_dirty(req->pr_desc_bh);
531 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 531 mark_buffer_dirty(req->pr_bitmap_bh);
532 nilfs_mdt_mark_dirty(inode); 532 nilfs_mdt_mark_dirty(inode);
533 533
534 brelse(req->pr_bitmap_bh); 534 brelse(req->pr_bitmap_bh);
@@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
683 kunmap(bitmap_bh->b_page); 683 kunmap(bitmap_bh->b_page);
684 kunmap(desc_bh->b_page); 684 kunmap(desc_bh->b_page);
685 685
686 nilfs_mdt_mark_buffer_dirty(desc_bh); 686 mark_buffer_dirty(desc_bh);
687 nilfs_mdt_mark_buffer_dirty(bitmap_bh); 687 mark_buffer_dirty(bitmap_bh);
688 nilfs_mdt_mark_dirty(inode); 688 nilfs_mdt_mark_dirty(inode);
689 689
690 brelse(bitmap_bh); 690 brelse(bitmap_bh);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 4723f04e9b12..aadbd0b5e3e8 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -34,7 +34,9 @@
34 34
35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) 35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
36{ 36{
37 return NILFS_I_NILFS(bmap->b_inode)->ns_dat; 37 struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
38
39 return nilfs->ns_dat;
38} 40}
39 41
40static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, 42static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 609cd223eea8..a35ae35e6932 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37void nilfs_btnode_cache_init(struct address_space *btnc,
38 struct backing_dev_info *bdi)
39{
40 nilfs_mapping_init(btnc, bdi);
41}
42
43void nilfs_btnode_cache_clear(struct address_space *btnc) 37void nilfs_btnode_cache_clear(struct address_space *btnc)
44{ 38{
45 invalidate_mapping_pages(btnc, 0, -1); 39 invalidate_mapping_pages(btnc, 0, -1);
@@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
62 BUG(); 56 BUG();
63 } 57 }
64 memset(bh->b_data, 0, 1 << inode->i_blkbits); 58 memset(bh->b_data, 0, 1 << inode->i_blkbits);
65 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 59 bh->b_bdev = inode->i_sb->s_bdev;
66 bh->b_blocknr = blocknr; 60 bh->b_blocknr = blocknr;
67 set_buffer_mapped(bh); 61 set_buffer_mapped(bh);
68 set_buffer_uptodate(bh); 62 set_buffer_uptodate(bh);
@@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
94 if (pblocknr == 0) { 88 if (pblocknr == 0) {
95 pblocknr = blocknr; 89 pblocknr = blocknr;
96 if (inode->i_ino != NILFS_DAT_INO) { 90 if (inode->i_ino != NILFS_DAT_INO) {
97 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; 91 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
98 92
99 /* blocknr is a virtual block number */ 93 /* blocknr is a virtual block number */
100 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 94 err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
95 &pblocknr);
101 if (unlikely(err)) { 96 if (unlikely(err)) {
102 brelse(bh); 97 brelse(bh);
103 goto out_locked; 98 goto out_locked;
@@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
120 goto found; 115 goto found;
121 } 116 }
122 set_buffer_mapped(bh); 117 set_buffer_mapped(bh);
123 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 118 bh->b_bdev = inode->i_sb->s_bdev;
124 bh->b_blocknr = pblocknr; /* set block address for read */ 119 bh->b_blocknr = pblocknr; /* set block address for read */
125 bh->b_end_io = end_buffer_read_sync; 120 bh->b_end_io = end_buffer_read_sync;
126 get_bh(bh); 121 get_bh(bh);
@@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
259 "invalid oldkey %lld (newkey=%lld)", 254 "invalid oldkey %lld (newkey=%lld)",
260 (unsigned long long)oldkey, 255 (unsigned long long)oldkey,
261 (unsigned long long)newkey); 256 (unsigned long long)newkey);
262 nilfs_btnode_mark_dirty(obh); 257 mark_buffer_dirty(obh);
263 258
264 spin_lock_irq(&btnc->tree_lock); 259 spin_lock_irq(&btnc->tree_lock);
265 radix_tree_delete(&btnc->page_tree, oldkey); 260 radix_tree_delete(&btnc->page_tree, oldkey);
@@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
271 unlock_page(opage); 266 unlock_page(opage);
272 } else { 267 } else {
273 nilfs_copy_buffer(nbh, obh); 268 nilfs_copy_buffer(nbh, obh);
274 nilfs_btnode_mark_dirty(nbh); 269 mark_buffer_dirty(nbh);
275 270
276 nbh->b_blocknr = newkey; 271 nbh->b_blocknr = newkey;
277 ctxt->bh = nbh; 272 ctxt->bh = nbh;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 1b8ebd888c28..3a4dd2d8d3fc 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
41void nilfs_btnode_cache_clear(struct address_space *); 40void nilfs_btnode_cache_clear(struct address_space *);
42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 41struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
43 __u64 blocknr); 42 __u64 blocknr);
@@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *,
51void nilfs_btnode_abort_change_key(struct address_space *, 50void nilfs_btnode_abort_change_key(struct address_space *,
52 struct nilfs_btnode_chkey_ctxt *); 51 struct nilfs_btnode_chkey_ctxt *);
53 52
54#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh)
55
56
57#endif /* _NILFS_BTNODE_H */ 53#endif /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index d451ae0e0bf3..7eafe468a29c 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
714 nilfs_btree_get_nonroot_node(path, level), 714 nilfs_btree_get_nonroot_node(path, level),
715 path[level].bp_index, key); 715 path[level].bp_index, key);
716 if (!buffer_dirty(path[level].bp_bh)) 716 if (!buffer_dirty(path[level].bp_bh))
717 nilfs_btnode_mark_dirty(path[level].bp_bh); 717 mark_buffer_dirty(path[level].bp_bh);
718 } while ((path[level].bp_index == 0) && 718 } while ((path[level].bp_index == 0) &&
719 (++level < nilfs_btree_height(btree) - 1)); 719 (++level < nilfs_btree_height(btree) - 1));
720 } 720 }
@@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
739 nilfs_btree_node_insert(node, path[level].bp_index, 739 nilfs_btree_node_insert(node, path[level].bp_index,
740 *keyp, *ptrp, ncblk); 740 *keyp, *ptrp, ncblk);
741 if (!buffer_dirty(path[level].bp_bh)) 741 if (!buffer_dirty(path[level].bp_bh))
742 nilfs_btnode_mark_dirty(path[level].bp_bh); 742 mark_buffer_dirty(path[level].bp_bh);
743 743
744 if (path[level].bp_index == 0) 744 if (path[level].bp_index == 0)
745 nilfs_btree_promote_key(btree, path, level + 1, 745 nilfs_btree_promote_key(btree, path, level + 1,
@@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
778 778
779 if (!buffer_dirty(path[level].bp_bh)) 779 if (!buffer_dirty(path[level].bp_bh))
780 nilfs_btnode_mark_dirty(path[level].bp_bh); 780 mark_buffer_dirty(path[level].bp_bh);
781 if (!buffer_dirty(path[level].bp_sib_bh)) 781 if (!buffer_dirty(path[level].bp_sib_bh))
782 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 782 mark_buffer_dirty(path[level].bp_sib_bh);
783 783
784 nilfs_btree_promote_key(btree, path, level + 1, 784 nilfs_btree_promote_key(btree, path, level + 1,
785 nilfs_btree_node_get_key(node, 0)); 785 nilfs_btree_node_get_key(node, 0));
@@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
824 824
825 if (!buffer_dirty(path[level].bp_bh)) 825 if (!buffer_dirty(path[level].bp_bh))
826 nilfs_btnode_mark_dirty(path[level].bp_bh); 826 mark_buffer_dirty(path[level].bp_bh);
827 if (!buffer_dirty(path[level].bp_sib_bh)) 827 if (!buffer_dirty(path[level].bp_sib_bh))
828 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 828 mark_buffer_dirty(path[level].bp_sib_bh);
829 829
830 path[level + 1].bp_index++; 830 path[level + 1].bp_index++;
831 nilfs_btree_promote_key(btree, path, level + 1, 831 nilfs_btree_promote_key(btree, path, level + 1,
@@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
871 871
872 if (!buffer_dirty(path[level].bp_bh)) 872 if (!buffer_dirty(path[level].bp_bh))
873 nilfs_btnode_mark_dirty(path[level].bp_bh); 873 mark_buffer_dirty(path[level].bp_bh);
874 if (!buffer_dirty(path[level].bp_sib_bh)) 874 if (!buffer_dirty(path[level].bp_sib_bh))
875 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 875 mark_buffer_dirty(path[level].bp_sib_bh);
876 876
877 newkey = nilfs_btree_node_get_key(right, 0); 877 newkey = nilfs_btree_node_get_key(right, 0);
878 newptr = path[level].bp_newreq.bpr_ptr; 878 newptr = path[level].bp_newreq.bpr_ptr;
@@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree,
919 nilfs_btree_node_set_level(root, level + 1); 919 nilfs_btree_node_set_level(root, level + 1);
920 920
921 if (!buffer_dirty(path[level].bp_sib_bh)) 921 if (!buffer_dirty(path[level].bp_sib_bh))
922 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 922 mark_buffer_dirty(path[level].bp_sib_bh);
923 923
924 path[level].bp_bh = path[level].bp_sib_bh; 924 path[level].bp_bh = path[level].bp_sib_bh;
925 path[level].bp_sib_bh = NULL; 925 path[level].bp_sib_bh = NULL;
@@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1194 nilfs_btree_node_delete(node, path[level].bp_index, 1194 nilfs_btree_node_delete(node, path[level].bp_index,
1195 keyp, ptrp, ncblk); 1195 keyp, ptrp, ncblk);
1196 if (!buffer_dirty(path[level].bp_bh)) 1196 if (!buffer_dirty(path[level].bp_bh))
1197 nilfs_btnode_mark_dirty(path[level].bp_bh); 1197 mark_buffer_dirty(path[level].bp_bh);
1198 if (path[level].bp_index == 0) 1198 if (path[level].bp_index == 0)
1199 nilfs_btree_promote_key(btree, path, level + 1, 1199 nilfs_btree_promote_key(btree, path, level + 1,
1200 nilfs_btree_node_get_key(node, 0)); 1200 nilfs_btree_node_get_key(node, 0));
@@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227 1227
1228 if (!buffer_dirty(path[level].bp_bh)) 1228 if (!buffer_dirty(path[level].bp_bh))
1229 nilfs_btnode_mark_dirty(path[level].bp_bh); 1229 mark_buffer_dirty(path[level].bp_bh);
1230 if (!buffer_dirty(path[level].bp_sib_bh)) 1230 if (!buffer_dirty(path[level].bp_sib_bh))
1231 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1231 mark_buffer_dirty(path[level].bp_sib_bh);
1232 1232
1233 nilfs_btree_promote_key(btree, path, level + 1, 1233 nilfs_btree_promote_key(btree, path, level + 1,
1234 nilfs_btree_node_get_key(node, 0)); 1234 nilfs_btree_node_get_key(node, 0));
@@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259 1259
1260 if (!buffer_dirty(path[level].bp_bh)) 1260 if (!buffer_dirty(path[level].bp_bh))
1261 nilfs_btnode_mark_dirty(path[level].bp_bh); 1261 mark_buffer_dirty(path[level].bp_bh);
1262 if (!buffer_dirty(path[level].bp_sib_bh)) 1262 if (!buffer_dirty(path[level].bp_sib_bh))
1263 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1263 mark_buffer_dirty(path[level].bp_sib_bh);
1264 1264
1265 path[level + 1].bp_index++; 1265 path[level + 1].bp_index++;
1266 nilfs_btree_promote_key(btree, path, level + 1, 1266 nilfs_btree_promote_key(btree, path, level + 1,
@@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290 1290
1291 if (!buffer_dirty(path[level].bp_sib_bh)) 1291 if (!buffer_dirty(path[level].bp_sib_bh))
1292 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1292 mark_buffer_dirty(path[level].bp_sib_bh);
1293 1293
1294 nilfs_btnode_delete(path[level].bp_bh); 1294 nilfs_btnode_delete(path[level].bp_bh);
1295 path[level].bp_bh = path[level].bp_sib_bh; 1295 path[level].bp_bh = path[level].bp_sib_bh;
@@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316 1316
1317 if (!buffer_dirty(path[level].bp_bh)) 1317 if (!buffer_dirty(path[level].bp_bh))
1318 nilfs_btnode_mark_dirty(path[level].bp_bh); 1318 mark_buffer_dirty(path[level].bp_bh);
1319 1319
1320 nilfs_btnode_delete(path[level].bp_sib_bh); 1320 nilfs_btnode_delete(path[level].bp_sib_bh);
1321 path[level].bp_sib_bh = NULL; 1321 path[level].bp_sib_bh = NULL;
@@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1711 if (!buffer_dirty(bh)) 1711 if (!buffer_dirty(bh))
1712 nilfs_btnode_mark_dirty(bh); 1712 mark_buffer_dirty(bh);
1713 if (!nilfs_bmap_dirty(btree)) 1713 if (!nilfs_bmap_dirty(btree))
1714 nilfs_bmap_set_dirty(btree); 1714 nilfs_bmap_set_dirty(btree);
1715 1715
@@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1787{ 1787{
1788 while ((++level < nilfs_btree_height(btree) - 1) && 1788 while ((++level < nilfs_btree_height(btree) - 1) &&
1789 !buffer_dirty(path[level].bp_bh)) 1789 !buffer_dirty(path[level].bp_bh))
1790 nilfs_btnode_mark_dirty(path[level].bp_bh); 1790 mark_buffer_dirty(path[level].bp_bh);
1791 1791
1792 return 0; 1792 return 0;
1793} 1793}
@@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2229 } 2229 }
2230 2230
2231 if (!buffer_dirty(bh)) 2231 if (!buffer_dirty(bh))
2232 nilfs_btnode_mark_dirty(bh); 2232 mark_buffer_dirty(bh);
2233 brelse(bh); 2233 brelse(bh);
2234 if (!nilfs_bmap_dirty(btree)) 2234 if (!nilfs_bmap_dirty(btree))
2235 nilfs_bmap_set_dirty(btree); 2235 nilfs_bmap_set_dirty(btree);
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 5ff15a8a1024..c9b342c8b503 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
216 if (!nilfs_cpfile_is_in_first(cpfile, cno)) 216 if (!nilfs_cpfile_is_in_first(cpfile, cno))
217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, 217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
218 kaddr, 1); 218 kaddr, 1);
219 nilfs_mdt_mark_buffer_dirty(cp_bh); 219 mark_buffer_dirty(cp_bh);
220 220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr); 223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1); 224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0); 225 kunmap_atomic(kaddr, KM_USER0);
226 nilfs_mdt_mark_buffer_dirty(header_bh); 226 mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile); 227 nilfs_mdt_mark_dirty(cpfile);
228 } 228 }
229 229
@@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
326 } 326 }
327 if (nicps > 0) { 327 if (nicps > 0) {
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) { 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 count = 332 count =
@@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
358 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 358 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
359 kaddr); 359 kaddr);
360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); 360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
361 nilfs_mdt_mark_buffer_dirty(header_bh); 361 mark_buffer_dirty(header_bh);
362 nilfs_mdt_mark_dirty(cpfile); 362 nilfs_mdt_mark_dirty(cpfile);
363 kunmap_atomic(kaddr, KM_USER0); 363 kunmap_atomic(kaddr, KM_USER0);
364 } 364 }
@@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
671 le64_add_cpu(&header->ch_nsnapshots, 1); 671 le64_add_cpu(&header->ch_nsnapshots, 1);
672 kunmap_atomic(kaddr, KM_USER0); 672 kunmap_atomic(kaddr, KM_USER0);
673 673
674 nilfs_mdt_mark_buffer_dirty(prev_bh); 674 mark_buffer_dirty(prev_bh);
675 nilfs_mdt_mark_buffer_dirty(curr_bh); 675 mark_buffer_dirty(curr_bh);
676 nilfs_mdt_mark_buffer_dirty(cp_bh); 676 mark_buffer_dirty(cp_bh);
677 nilfs_mdt_mark_buffer_dirty(header_bh); 677 mark_buffer_dirty(header_bh);
678 nilfs_mdt_mark_dirty(cpfile); 678 nilfs_mdt_mark_dirty(cpfile);
679 679
680 brelse(prev_bh); 680 brelse(prev_bh);
@@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
774 le64_add_cpu(&header->ch_nsnapshots, -1); 774 le64_add_cpu(&header->ch_nsnapshots, -1);
775 kunmap_atomic(kaddr, KM_USER0); 775 kunmap_atomic(kaddr, KM_USER0);
776 776
777 nilfs_mdt_mark_buffer_dirty(next_bh); 777 mark_buffer_dirty(next_bh);
778 nilfs_mdt_mark_buffer_dirty(prev_bh); 778 mark_buffer_dirty(prev_bh);
779 nilfs_mdt_mark_buffer_dirty(cp_bh); 779 mark_buffer_dirty(cp_bh);
780 nilfs_mdt_mark_buffer_dirty(header_bh); 780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(cpfile); 781 nilfs_mdt_mark_dirty(cpfile);
782 782
783 brelse(prev_bh); 783 brelse(prev_bh);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 59e5fe742f7b..fcc2f869af16 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat,
54static void nilfs_dat_commit_entry(struct inode *dat, 54static void nilfs_dat_commit_entry(struct inode *dat,
55 struct nilfs_palloc_req *req) 55 struct nilfs_palloc_req *req)
56{ 56{
57 nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); 57 mark_buffer_dirty(req->pr_entry_bh);
58 nilfs_mdt_mark_dirty(dat); 58 nilfs_mdt_mark_dirty(dat);
59 brelse(req->pr_entry_bh); 59 brelse(req->pr_entry_bh);
60} 60}
@@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
361 entry->de_blocknr = cpu_to_le64(blocknr); 361 entry->de_blocknr = cpu_to_le64(blocknr);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr, KM_USER0);
363 363
364 nilfs_mdt_mark_buffer_dirty(entry_bh); 364 mark_buffer_dirty(entry_bh);
365 nilfs_mdt_mark_dirty(dat); 365 nilfs_mdt_mark_dirty(dat);
366 366
367 brelse(entry_bh); 367 brelse(entry_bh);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 397e73258631..d7eeca62febd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
111 nilfs_transaction_commit(inode->i_sb); 111 nilfs_transaction_commit(inode->i_sb);
112 112
113 mapped: 113 mapped:
114 SetPageChecked(page);
115 wait_on_page_writeback(page); 114 wait_on_page_writeback(page);
116 return VM_FAULT_LOCKED; 115 return VM_FAULT_LOCKED;
117} 116}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c2a3e23f8b2..08a07a218d26 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,9 +48,6 @@
48#include "dat.h" 48#include "dat.h"
49#include "ifile.h" 49#include "ifile.h"
50 50
51static const struct address_space_operations def_gcinode_aops = {
52};
53
54/* 51/*
55 * nilfs_gccache_submit_read_data() - add data buffer and submit read request 52 * nilfs_gccache_submit_read_data() - add data buffer and submit read request
56 * @inode - gc inode 53 * @inode - gc inode
@@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
87 goto out; 84 goto out;
88 85
89 if (pbn == 0) { 86 if (pbn == 0) {
90 struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; 87 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
91 /* use original dat, not gc dat. */ 88
92 err = nilfs_dat_translate(dat_inode, vbn, &pbn); 89 err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
93 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ 90 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
94 brelse(bh); 91 brelse(bh);
95 goto failed; 92 goto failed;
@@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
103 } 100 }
104 101
105 if (!buffer_mapped(bh)) { 102 if (!buffer_mapped(bh)) {
106 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 103 bh->b_bdev = inode->i_sb->s_bdev;
107 set_buffer_mapped(bh); 104 set_buffer_mapped(bh);
108 } 105 }
109 bh->b_blocknr = pbn; 106 bh->b_blocknr = pbn;
@@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
160 if (buffer_dirty(bh)) 157 if (buffer_dirty(bh))
161 return -EEXIST; 158 return -EEXIST;
162 159
163 if (buffer_nilfs_node(bh)) { 160 if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
164 if (nilfs_btree_broken_node_block(bh)) { 161 clear_buffer_uptodate(bh);
165 clear_buffer_uptodate(bh); 162 return -EIO;
166 return -EIO;
167 }
168 nilfs_btnode_mark_dirty(bh);
169 } else {
170 nilfs_mark_buffer_dirty(bh);
171 } 163 }
164 mark_buffer_dirty(bh);
172 return 0; 165 return 0;
173} 166}
174 167
@@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
178 171
179 inode->i_mode = S_IFREG; 172 inode->i_mode = S_IFREG;
180 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 173 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
181 inode->i_mapping->a_ops = &def_gcinode_aops; 174 inode->i_mapping->a_ops = &empty_aops;
182 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; 175 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
183 176
184 ii->i_flags = 0; 177 ii->i_flags = 0;
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index bfc73d3a30ed..684d76300a80 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
80 return ret; 80 return ret;
81 } 81 }
82 nilfs_palloc_commit_alloc_entry(ifile, &req); 82 nilfs_palloc_commit_alloc_entry(ifile, &req);
83 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 83 mark_buffer_dirty(req.pr_entry_bh);
84 nilfs_mdt_mark_dirty(ifile); 84 nilfs_mdt_mark_dirty(ifile);
85 *out_ino = (ino_t)req.pr_entry_nr; 85 *out_ino = (ino_t)req.pr_entry_nr;
86 *out_bh = req.pr_entry_bh; 86 *out_bh = req.pr_entry_bh;
@@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
128 raw_inode->i_flags = 0; 128 raw_inode->i_flags = 0;
129 kunmap_atomic(kaddr, KM_USER0); 129 kunmap_atomic(kaddr, KM_USER0);
130 130
131 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 131 mark_buffer_dirty(req.pr_entry_bh);
132 brelse(req.pr_entry_bh); 132 brelse(req.pr_entry_bh);
133 133
134 nilfs_palloc_commit_free_entry(ifile, &req); 134 nilfs_palloc_commit_free_entry(ifile, &req);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c0aa27490c02..587f18432832 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
74 struct buffer_head *bh_result, int create) 74 struct buffer_head *bh_result, int create)
75{ 75{
76 struct nilfs_inode_info *ii = NILFS_I(inode); 76 struct nilfs_inode_info *ii = NILFS_I(inode);
77 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
77 __u64 blknum = 0; 78 __u64 blknum = 0;
78 int err = 0, ret; 79 int err = 0, ret;
79 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; 80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
81 81
82 down_read(&NILFS_MDT(dat)->mi_sem); 82 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
84 up_read(&NILFS_MDT(dat)->mi_sem); 84 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
85 if (ret >= 0) { /* found */ 85 if (ret >= 0) { /* found */
86 map_bh(bh_result, inode->i_sb, blknum); 86 map_bh(bh_result, inode->i_sb, blknum);
87 if (ret > 0) 87 if (ret > 0)
@@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode,
596 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 596 raw_inode->i_flags = cpu_to_le32(ii->i_flags);
597 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 597 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
598 598
599 if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
600 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
601
602 /* zero-fill unused portion in the case of super root block */
603 raw_inode->i_xattr = 0;
604 raw_inode->i_pad = 0;
605 memset((void *)raw_inode + sizeof(*raw_inode), 0,
606 nilfs->ns_inode_size - sizeof(*raw_inode));
607 }
608
599 if (has_bmap) 609 if (has_bmap)
600 nilfs_bmap_write(ii->i_bmap, raw_inode); 610 nilfs_bmap_write(ii->i_bmap, raw_inode);
601 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 611 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
872 return -EINVAL; /* NILFS_I_DIRTY may remain for 882 return -EINVAL; /* NILFS_I_DIRTY may remain for
873 freeing inode */ 883 freeing inode */
874 } 884 }
875 list_del(&ii->i_dirty); 885 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
876 list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
877 set_bit(NILFS_I_QUEUED, &ii->i_state); 886 set_bit(NILFS_I_QUEUED, &ii->i_state);
878 } 887 }
879 spin_unlock(&nilfs->ns_inode_lock); 888 spin_unlock(&nilfs->ns_inode_lock);
@@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
892 return err; 901 return err;
893 } 902 }
894 nilfs_update_inode(inode, ibh); 903 nilfs_update_inode(inode, ibh);
895 nilfs_mdt_mark_buffer_dirty(ibh); 904 mark_buffer_dirty(ibh);
896 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 905 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
897 brelse(ibh); 906 brelse(ibh);
898 return 0; 907 return 0;
@@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode)
931int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 940int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
932 __u64 start, __u64 len) 941 __u64 start, __u64 len)
933{ 942{
934 struct the_nilfs *nilfs = NILFS_I_NILFS(inode); 943 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
935 __u64 logical = 0, phys = 0, size = 0; 944 __u64 logical = 0, phys = 0, size = 0;
936 __u32 flags = 0; 945 __u32 flags = 0;
937 loff_t isize; 946 loff_t isize;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..41d6743d303c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
698 return 0; 698 return 0;
699} 699}
700 700
701static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
702 void __user *argp)
703{
704 __u64 newsize;
705 int ret = -EPERM;
706
707 if (!capable(CAP_SYS_ADMIN))
708 goto out;
709
710 ret = mnt_want_write(filp->f_path.mnt);
711 if (ret)
712 goto out;
713
714 ret = -EFAULT;
715 if (copy_from_user(&newsize, argp, sizeof(newsize)))
716 goto out_drop_write;
717
718 ret = nilfs_resize_fs(inode->i_sb, newsize);
719
720out_drop_write:
721 mnt_drop_write(filp->f_path.mnt);
722out:
723 return ret;
724}
725
726static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
727{
728 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
729 __u64 range[2];
730 __u64 minseg, maxseg;
731 unsigned long segbytes;
732 int ret = -EPERM;
733
734 if (!capable(CAP_SYS_ADMIN))
735 goto out;
736
737 ret = -EFAULT;
738 if (copy_from_user(range, argp, sizeof(__u64[2])))
739 goto out;
740
741 ret = -ERANGE;
742 if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
743 goto out;
744
745 segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
746
747 minseg = range[0] + segbytes - 1;
748 do_div(minseg, segbytes);
749 maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
750 do_div(maxseg, segbytes);
751 maxseg--;
752
753 ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
754out:
755 return ret;
756}
757
701static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, 758static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
702 unsigned int cmd, void __user *argp, 759 unsigned int cmd, void __user *argp,
703 size_t membsz, 760 size_t membsz,
@@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
763 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); 820 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
764 case NILFS_IOCTL_SYNC: 821 case NILFS_IOCTL_SYNC:
765 return nilfs_ioctl_sync(inode, filp, cmd, argp); 822 return nilfs_ioctl_sync(inode, filp, cmd, argp);
823 case NILFS_IOCTL_RESIZE:
824 return nilfs_ioctl_resize(inode, filp, argp);
825 case NILFS_IOCTL_SET_ALLOC_RANGE:
826 return nilfs_ioctl_set_alloc_range(inode, argp);
766 default: 827 default:
767 return -ENOTTY; 828 return -ENOTTY;
768 } 829 }
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a649b05f7069..800e8d78a83b 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
66 kunmap_atomic(kaddr, KM_USER0); 66 kunmap_atomic(kaddr, KM_USER0);
67 67
68 set_buffer_uptodate(bh); 68 set_buffer_uptodate(bh);
69 nilfs_mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
70 nilfs_mdt_mark_dirty(inode); 70 nilfs_mdt_mark_dirty(inode);
71 return 0; 71 return 0;
72} 72}
@@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
355 err = nilfs_mdt_read_block(inode, block, 0, &bh); 355 err = nilfs_mdt_read_block(inode, block, 0, &bh);
356 if (unlikely(err)) 356 if (unlikely(err))
357 return err; 357 return err;
358 nilfs_mark_buffer_dirty(bh); 358 mark_buffer_dirty(bh);
359 nilfs_mdt_mark_dirty(inode); 359 nilfs_mdt_mark_dirty(inode);
360 brelse(bh); 360 brelse(bh);
361 return 0; 361 return 0;
@@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
450 450
451 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
452 address_space_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
453 nilfs_mapping_init(&shadow->frozen_data, bdi); 453 nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
454 address_space_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi); 455 nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
456 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
457 return 0; 457 return 0;
458} 458}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ed68563ec708..ab20a4baa50f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
64 return inode->i_private; 64 return inode->i_private;
65} 65}
66 66
67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
68{
69 return inode->i_sb->s_fs_info;
70}
71
72/* Default GFP flags using highmem */ 67/* Default GFP flags using highmem */
73#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) 68#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
74 69
@@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
93struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, 88struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
94 struct buffer_head *bh); 89 struct buffer_head *bh);
95 90
96#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
97
98static inline void nilfs_mdt_mark_dirty(struct inode *inode) 91static inline void nilfs_mdt_mark_dirty(struct inode *inode)
99{ 92{
100 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) 93 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
@@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode)
108 101
109static inline __u64 nilfs_mdt_cno(struct inode *inode) 102static inline __u64 nilfs_mdt_cno(struct inode *inode)
110{ 103{
111 return NILFS_I_NILFS(inode)->ns_cno; 104 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
112} 105}
113 106
114#define nilfs_mdt_bgl_lock(inode, bg) \ 107#define nilfs_mdt_bgl_lock(inode, bg) \
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a8dd344303cb..a9c6a531f80c 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
80 return &ii->vfs_inode; 80 return &ii->vfs_inode;
81} 81}
82 82
83static inline struct inode *NILFS_AS_I(struct address_space *mapping)
84{
85 return (mapping->host) ? :
86 container_of(mapping, struct inode, i_data);
87}
88
89/* 83/*
90 * Dynamic state flags of NILFS on-memory inode (i_state) 84 * Dynamic state flags of NILFS on-memory inode (i_state)
91 */ 85 */
@@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
298 int flip); 292 int flip);
299int nilfs_commit_super(struct super_block *sb, int flag); 293int nilfs_commit_super(struct super_block *sb, int flag);
300int nilfs_cleanup_super(struct super_block *sb); 294int nilfs_cleanup_super(struct super_block *sb);
295int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
301int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, 296int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
302 struct nilfs_root **root); 297 struct nilfs_root **root);
303int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); 298int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1168059c7efd..65221a04c6f0 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,8 +37,7 @@
37 37
38#define NILFS_BUFFER_INHERENT_BITS \ 38#define NILFS_BUFFER_INHERENT_BITS \
39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ 40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
41 (1UL << BH_NILFS_Checked))
42 41
43static struct buffer_head * 42static struct buffer_head *
44__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 43__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
59 return bh; 58 return bh;
60} 59}
61 60
62/*
63 * Since the page cache of B-tree node pages or data page cache of pseudo
64 * inodes does not have a valid mapping->host pointer, calling
65 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
66 * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
67 * To avoid this problem, the old style mark_buffer_dirty() is used instead.
68 */
69void nilfs_mark_buffer_dirty(struct buffer_head *bh)
70{
71 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
72 __set_page_dirty_nobuffers(bh->b_page);
73}
74
75struct buffer_head *nilfs_grab_buffer(struct inode *inode, 61struct buffer_head *nilfs_grab_buffer(struct inode *inode,
76 struct address_space *mapping, 62 struct address_space *mapping,
77 unsigned long blkoff, 63 unsigned long blkoff,
@@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page)
183void nilfs_page_bug(struct page *page) 169void nilfs_page_bug(struct page *page)
184{ 170{
185 struct address_space *m; 171 struct address_space *m;
186 unsigned long ino = 0; 172 unsigned long ino;
187 173
188 if (unlikely(!page)) { 174 if (unlikely(!page)) {
189 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 175 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
@@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page)
191 } 177 }
192 178
193 m = page->mapping; 179 m = page->mapping;
194 if (m) { 180 ino = m ? m->host->i_ino : 0;
195 struct inode *inode = NILFS_AS_I(m); 181
196 if (inode != NULL)
197 ino = inode->i_ino;
198 }
199 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 182 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
200 "mapping=%p ino=%lu\n", 183 "mapping=%p ino=%lu\n",
201 page, atomic_read(&page->_count), 184 page, atomic_read(&page->_count),
@@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page)
217} 200}
218 201
219/** 202/**
220 * nilfs_alloc_private_page - allocate a private page with buffer heads
221 *
222 * Return Value: On success, a pointer to the allocated page is returned.
223 * On error, NULL is returned.
224 */
225struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
226 unsigned long state)
227{
228 struct buffer_head *bh, *head, *tail;
229 struct page *page;
230
231 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
232 if (unlikely(!page))
233 return NULL;
234
235 lock_page(page);
236 head = alloc_page_buffers(page, size, 0);
237 if (unlikely(!head)) {
238 unlock_page(page);
239 __free_page(page);
240 return NULL;
241 }
242
243 bh = head;
244 do {
245 bh->b_state = (1UL << BH_NILFS_Allocated) | state;
246 tail = bh;
247 bh->b_bdev = bdev;
248 bh = bh->b_this_page;
249 } while (bh);
250
251 tail->b_this_page = head;
252 attach_page_buffers(page, head);
253
254 return page;
255}
256
257void nilfs_free_private_page(struct page *page)
258{
259 BUG_ON(!PageLocked(page));
260 BUG_ON(page->mapping);
261
262 if (page_has_buffers(page) && !try_to_free_buffers(page))
263 NILFS_PAGE_BUG(page, "failed to free page");
264
265 unlock_page(page);
266 __free_page(page);
267}
268
269/**
270 * nilfs_copy_page -- copy the page with buffers 203 * nilfs_copy_page -- copy the page with buffers
271 * @dst: destination page 204 * @dst: destination page
272 * @src: source page 205 * @src: source page
@@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 425 return nc;
493} 426}
494 427
495void nilfs_mapping_init(struct address_space *mapping, 428void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
496 struct backing_dev_info *bdi) 429 struct backing_dev_info *bdi)
497{ 430{
498 mapping->host = NULL; 431 mapping->host = inode;
499 mapping->flags = 0; 432 mapping->flags = 0;
500 mapping_set_gfp_mask(mapping, GFP_NOFS); 433 mapping_set_gfp_mask(mapping, GFP_NOFS);
501 mapping->assoc_mapping = NULL; 434 mapping->assoc_mapping = NULL;
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f06b79ad7493..fb7de71605a0 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -38,14 +38,12 @@ enum {
38 BH_NILFS_Redirected, 38 BH_NILFS_Redirected,
39}; 39};
40 40
41BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
42BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ 41BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
43BUFFER_FNS(NILFS_Volatile, nilfs_volatile) 42BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
44BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ 43BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
45BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ 44BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
46 45
47 46
48void nilfs_mark_buffer_dirty(struct buffer_head *bh);
49int __nilfs_clear_page_dirty(struct page *); 47int __nilfs_clear_page_dirty(struct page *);
50 48
51struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, 49struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
@@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *);
54void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); 52void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
55int nilfs_page_buffers_clean(struct page *); 53int nilfs_page_buffers_clean(struct page *);
56void nilfs_page_bug(struct page *); 54void nilfs_page_bug(struct page *);
57struct page *nilfs_alloc_private_page(struct block_device *, int,
58 unsigned long);
59void nilfs_free_private_page(struct page *);
60 55
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 56int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 57void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 58void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init(struct address_space *mapping, 59void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
65 struct backing_dev_info *bdi); 60 struct backing_dev_info *bdi);
66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 61unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
67unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 62unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba4a64518f38..a604ac0331b2 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
387static void dispose_recovery_list(struct list_head *head) 387static void dispose_recovery_list(struct list_head *head)
388{ 388{
389 while (!list_empty(head)) { 389 while (!list_empty(head)) {
390 struct nilfs_recovery_block *rb 390 struct nilfs_recovery_block *rb;
391 = list_entry(head->next, 391
392 struct nilfs_recovery_block, list); 392 rb = list_first_entry(head, struct nilfs_recovery_block, list);
393 list_del(&rb->list); 393 list_del(&rb->list);
394 kfree(rb); 394 kfree(rb);
395 } 395 }
@@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
416void nilfs_dispose_segment_list(struct list_head *head) 416void nilfs_dispose_segment_list(struct list_head *head)
417{ 417{
418 while (!list_empty(head)) { 418 while (!list_empty(head)) {
419 struct nilfs_segment_entry *ent 419 struct nilfs_segment_entry *ent;
420 = list_entry(head->next, 420
421 struct nilfs_segment_entry, list); 421 ent = list_first_entry(head, struct nilfs_segment_entry, list);
422 list_del(&ent->list); 422 list_del(&ent->list);
423 kfree(ent); 423 kfree(ent);
424 } 424 }
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2853ff20f85a..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
239 u32 seed) 239 u32 seed)
240{ 240{
241 struct nilfs_super_root *raw_sr; 241 struct nilfs_super_root *raw_sr;
242 struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
243 unsigned srsize;
242 u32 crc; 244 u32 crc;
243 245
244 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; 246 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
247 srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
245 crc = crc32_le(seed, 248 crc = crc32_le(seed,
246 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), 249 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
247 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); 250 srsize - sizeof(raw_sr->sr_sum));
248 raw_sr->sr_sum = cpu_to_le32(crc); 251 raw_sr->sr_sum = cpu_to_le32(crc);
249} 252}
250 253
@@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list)
254 257
255 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { 258 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
256 list_del_init(&bh->b_assoc_buffers); 259 list_del_init(&bh->b_assoc_buffers);
257 if (buffer_nilfs_allocated(bh)) {
258 struct page *clone_page = bh->b_page;
259
260 /* remove clone page */
261 brelse(bh);
262 page_cache_release(clone_page); /* for each bh */
263 if (page_count(clone_page) <= 2) {
264 lock_page(clone_page);
265 nilfs_free_private_page(clone_page);
266 }
267 continue;
268 }
269 brelse(bh); 260 brelse(bh);
270 } 261 }
271} 262}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index afe4f2183454..141646e88fb5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
655 if (unlikely(page->index > last)) 655 if (unlikely(page->index > last))
656 break; 656 break;
657 657
658 if (mapping->host) { 658 lock_page(page);
659 lock_page(page); 659 if (!page_has_buffers(page))
660 if (!page_has_buffers(page)) 660 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
661 create_empty_buffers(page, 661 unlock_page(page);
662 1 << inode->i_blkbits, 0);
663 unlock_page(page);
664 }
665 662
666 bh = head = page_buffers(page); 663 bh = head = page_buffers(page);
667 do { 664 do {
@@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
809 /* The following code is duplicated with cpfile. But, it is 806 /* The following code is duplicated with cpfile. But, it is
810 needed to collect the checkpoint even if it was not newly 807 needed to collect the checkpoint even if it was not newly
811 created */ 808 created */
812 nilfs_mdt_mark_buffer_dirty(bh_cp); 809 mark_buffer_dirty(bh_cp);
813 nilfs_mdt_mark_dirty(nilfs->ns_cpfile); 810 nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
814 nilfs_cpfile_put_checkpoint( 811 nilfs_cpfile_put_checkpoint(
815 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 812 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
@@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
889{ 886{
890 struct buffer_head *bh_sr; 887 struct buffer_head *bh_sr;
891 struct nilfs_super_root *raw_sr; 888 struct nilfs_super_root *raw_sr;
892 unsigned isz = nilfs->ns_inode_size; 889 unsigned isz, srsz;
893 890
894 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 891 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
895 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 892 raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
893 isz = nilfs->ns_inode_size;
894 srsz = NILFS_SR_BYTES(isz);
896 895
897 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 896 raw_sr->sr_bytes = cpu_to_le16(srsz);
898 raw_sr->sr_nongc_ctime 897 raw_sr->sr_nongc_ctime
899 = cpu_to_le64(nilfs_doing_gc() ? 898 = cpu_to_le64(nilfs_doing_gc() ?
900 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 899 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
@@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
906 NILFS_SR_CPFILE_OFFSET(isz), 1); 905 NILFS_SR_CPFILE_OFFSET(isz), 1);
907 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + 906 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
908 NILFS_SR_SUFILE_OFFSET(isz), 1); 907 NILFS_SR_SUFILE_OFFSET(isz), 1);
908 memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
909} 909}
910 910
911static void nilfs_redirty_inodes(struct list_head *head) 911static void nilfs_redirty_inodes(struct list_head *head)
@@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
954 954
955 dispose_buffers: 955 dispose_buffers:
956 while (!list_empty(listp)) { 956 while (!list_empty(listp)) {
957 bh = list_entry(listp->next, struct buffer_head, 957 bh = list_first_entry(listp, struct buffer_head,
958 b_assoc_buffers); 958 b_assoc_buffers);
959 list_del_init(&bh->b_assoc_buffers); 959 list_del_init(&bh->b_assoc_buffers);
960 brelse(bh); 960 brelse(bh);
961 } 961 }
@@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1500 nblocks = le32_to_cpu(finfo->fi_nblocks); 1500 nblocks = le32_to_cpu(finfo->fi_nblocks);
1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1502 1502
1503 if (buffer_nilfs_node(bh)) 1503 inode = bh->b_page->mapping->host;
1504 inode = NILFS_BTNC_I(bh->b_page->mapping);
1505 else
1506 inode = NILFS_AS_I(bh->b_page->mapping);
1507 1504
1508 if (mode == SC_LSEG_DSYNC) 1505 if (mode == SC_LSEG_DSYNC)
1509 sc_op = &nilfs_sc_dsync_ops; 1506 sc_op = &nilfs_sc_dsync_ops;
@@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1556 return 0; 1553 return 0;
1557} 1554}
1558 1555
1559static int 1556static void nilfs_begin_page_io(struct page *page)
1560nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1561{
1562 struct page *clone_page;
1563 struct buffer_head *bh, *head, *bh2;
1564 void *kaddr;
1565
1566 bh = head = page_buffers(page);
1567
1568 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
1569 if (unlikely(!clone_page))
1570 return -ENOMEM;
1571
1572 bh2 = page_buffers(clone_page);
1573 kaddr = kmap_atomic(page, KM_USER0);
1574 do {
1575 if (list_empty(&bh->b_assoc_buffers))
1576 continue;
1577 get_bh(bh2);
1578 page_cache_get(clone_page); /* for each bh */
1579 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
1580 bh2->b_blocknr = bh->b_blocknr;
1581 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
1582 list_add_tail(&bh->b_assoc_buffers, out);
1583 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
1584 kunmap_atomic(kaddr, KM_USER0);
1585
1586 if (!TestSetPageWriteback(clone_page))
1587 account_page_writeback(clone_page);
1588 unlock_page(clone_page);
1589
1590 return 0;
1591}
1592
1593static int nilfs_test_page_to_be_frozen(struct page *page)
1594{
1595 struct address_space *mapping = page->mapping;
1596
1597 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
1598 return 0;
1599
1600 if (page_mapped(page)) {
1601 ClearPageChecked(page);
1602 return 1;
1603 }
1604 return PageChecked(page);
1605}
1606
1607static int nilfs_begin_page_io(struct page *page, struct list_head *out)
1608{ 1557{
1609 if (!page || PageWriteback(page)) 1558 if (!page || PageWriteback(page))
1610 /* For split b-tree node pages, this function may be called 1559 /* For split b-tree node pages, this function may be called
1611 twice. We ignore the 2nd or later calls by this check. */ 1560 twice. We ignore the 2nd or later calls by this check. */
1612 return 0; 1561 return;
1613 1562
1614 lock_page(page); 1563 lock_page(page);
1615 clear_page_dirty_for_io(page); 1564 clear_page_dirty_for_io(page);
1616 set_page_writeback(page); 1565 set_page_writeback(page);
1617 unlock_page(page); 1566 unlock_page(page);
1618
1619 if (nilfs_test_page_to_be_frozen(page)) {
1620 int err = nilfs_copy_replace_page_buffers(page, out);
1621 if (unlikely(err))
1622 return err;
1623 }
1624 return 0;
1625} 1567}
1626 1568
1627static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, 1569static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1628 struct page **failed_page)
1629{ 1570{
1630 struct nilfs_segment_buffer *segbuf; 1571 struct nilfs_segment_buffer *segbuf;
1631 struct page *bd_page = NULL, *fs_page = NULL; 1572 struct page *bd_page = NULL, *fs_page = NULL;
1632 struct list_head *list = &sci->sc_copied_buffers;
1633 int err;
1634 1573
1635 *failed_page = NULL;
1636 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1574 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1637 struct buffer_head *bh; 1575 struct buffer_head *bh;
1638 1576
@@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1662 break; 1600 break;
1663 } 1601 }
1664 if (bh->b_page != fs_page) { 1602 if (bh->b_page != fs_page) {
1665 err = nilfs_begin_page_io(fs_page, list); 1603 nilfs_begin_page_io(fs_page);
1666 if (unlikely(err)) {
1667 *failed_page = fs_page;
1668 goto out;
1669 }
1670 fs_page = bh->b_page; 1604 fs_page = bh->b_page;
1671 } 1605 }
1672 } 1606 }
@@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1677 set_page_writeback(bd_page); 1611 set_page_writeback(bd_page);
1678 unlock_page(bd_page); 1612 unlock_page(bd_page);
1679 } 1613 }
1680 err = nilfs_begin_page_io(fs_page, list); 1614 nilfs_begin_page_io(fs_page);
1681 if (unlikely(err))
1682 *failed_page = fs_page;
1683 out:
1684 return err;
1685} 1615}
1686 1616
1687static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1617static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1694 return ret; 1624 return ret;
1695} 1625}
1696 1626
1697static void __nilfs_end_page_io(struct page *page, int err)
1698{
1699 if (!err) {
1700 if (!nilfs_page_buffers_clean(page))
1701 __set_page_dirty_nobuffers(page);
1702 ClearPageError(page);
1703 } else {
1704 __set_page_dirty_nobuffers(page);
1705 SetPageError(page);
1706 }
1707
1708 if (buffer_nilfs_allocated(page_buffers(page))) {
1709 if (TestClearPageWriteback(page))
1710 dec_zone_page_state(page, NR_WRITEBACK);
1711 } else
1712 end_page_writeback(page);
1713}
1714
1715static void nilfs_end_page_io(struct page *page, int err) 1627static void nilfs_end_page_io(struct page *page, int err)
1716{ 1628{
1717 if (!page) 1629 if (!page)
@@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err)
1738 return; 1650 return;
1739 } 1651 }
1740 1652
1741 __nilfs_end_page_io(page, err); 1653 if (!err) {
1742} 1654 if (!nilfs_page_buffers_clean(page))
1743 1655 __set_page_dirty_nobuffers(page);
1744static void nilfs_clear_copied_buffers(struct list_head *list, int err) 1656 ClearPageError(page);
1745{ 1657 } else {
1746 struct buffer_head *bh, *head; 1658 __set_page_dirty_nobuffers(page);
1747 struct page *page; 1659 SetPageError(page);
1748
1749 while (!list_empty(list)) {
1750 bh = list_entry(list->next, struct buffer_head,
1751 b_assoc_buffers);
1752 page = bh->b_page;
1753 page_cache_get(page);
1754 head = bh = page_buffers(page);
1755 do {
1756 if (!list_empty(&bh->b_assoc_buffers)) {
1757 list_del_init(&bh->b_assoc_buffers);
1758 if (!err) {
1759 set_buffer_uptodate(bh);
1760 clear_buffer_dirty(bh);
1761 clear_buffer_delay(bh);
1762 clear_buffer_nilfs_volatile(bh);
1763 }
1764 brelse(bh); /* for b_assoc_buffers */
1765 }
1766 } while ((bh = bh->b_this_page) != head);
1767
1768 __nilfs_end_page_io(page, err);
1769 page_cache_release(page);
1770 } 1660 }
1661
1662 end_page_writeback(page);
1771} 1663}
1772 1664
1773static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1665static void nilfs_abort_logs(struct list_head *logs, int err)
1774 int err)
1775{ 1666{
1776 struct nilfs_segment_buffer *segbuf; 1667 struct nilfs_segment_buffer *segbuf;
1777 struct page *bd_page = NULL, *fs_page = NULL; 1668 struct page *bd_page = NULL, *fs_page = NULL;
@@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1801 } 1692 }
1802 if (bh->b_page != fs_page) { 1693 if (bh->b_page != fs_page) {
1803 nilfs_end_page_io(fs_page, err); 1694 nilfs_end_page_io(fs_page, err);
1804 if (fs_page && fs_page == failed_page)
1805 return;
1806 fs_page = bh->b_page; 1695 fs_page = bh->b_page;
1807 } 1696 }
1808 } 1697 }
@@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1821 1710
1822 list_splice_tail_init(&sci->sc_write_logs, &logs); 1711 list_splice_tail_init(&sci->sc_write_logs, &logs);
1823 ret = nilfs_wait_on_logs(&logs); 1712 ret = nilfs_wait_on_logs(&logs);
1824 nilfs_abort_logs(&logs, NULL, ret ? : err); 1713 nilfs_abort_logs(&logs, ret ? : err);
1825 1714
1826 list_splice_tail_init(&sci->sc_segbufs, &logs); 1715 list_splice_tail_init(&sci->sc_segbufs, &logs);
1827 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1716 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1828 nilfs_free_incomplete_logs(&logs, nilfs); 1717 nilfs_free_incomplete_logs(&logs, nilfs);
1829 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1830 1718
1831 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1719 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1832 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1720 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
@@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1920 1808
1921 nilfs_end_page_io(fs_page, 0); 1809 nilfs_end_page_io(fs_page, 0);
1922 1810
1923 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
1924
1925 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1811 nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1926 1812
1927 if (nilfs_doing_gc()) 1813 if (nilfs_doing_gc())
@@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1979 "failed to get inode block.\n"); 1865 "failed to get inode block.\n");
1980 return err; 1866 return err;
1981 } 1867 }
1982 nilfs_mdt_mark_buffer_dirty(ibh); 1868 mark_buffer_dirty(ibh);
1983 nilfs_mdt_mark_dirty(ifile); 1869 nilfs_mdt_mark_dirty(ifile);
1984 spin_lock(&nilfs->ns_inode_lock); 1870 spin_lock(&nilfs->ns_inode_lock);
1985 if (likely(!ii->i_bh)) 1871 if (likely(!ii->i_bh))
@@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1991 1877
1992 clear_bit(NILFS_I_QUEUED, &ii->i_state); 1878 clear_bit(NILFS_I_QUEUED, &ii->i_state);
1993 set_bit(NILFS_I_BUSY, &ii->i_state); 1879 set_bit(NILFS_I_BUSY, &ii->i_state);
1994 list_del(&ii->i_dirty); 1880 list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
1995 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
1996 } 1881 }
1997 spin_unlock(&nilfs->ns_inode_lock); 1882 spin_unlock(&nilfs->ns_inode_lock);
1998 1883
@@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2014 clear_bit(NILFS_I_BUSY, &ii->i_state); 1899 clear_bit(NILFS_I_BUSY, &ii->i_state);
2015 brelse(ii->i_bh); 1900 brelse(ii->i_bh);
2016 ii->i_bh = NULL; 1901 ii->i_bh = NULL;
2017 list_del(&ii->i_dirty); 1902 list_move_tail(&ii->i_dirty, &ti->ti_garbage);
2018 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2019 } 1903 }
2020 spin_unlock(&nilfs->ns_inode_lock); 1904 spin_unlock(&nilfs->ns_inode_lock);
2021} 1905}
@@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2026static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 1910static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2027{ 1911{
2028 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1912 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2029 struct page *failed_page;
2030 int err; 1913 int err;
2031 1914
2032 sci->sc_stage.scnt = NILFS_ST_INIT; 1915 sci->sc_stage.scnt = NILFS_ST_INIT;
@@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2081 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 1964 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2082 1965
2083 /* Write partial segments */ 1966 /* Write partial segments */
2084 err = nilfs_segctor_prepare_write(sci, &failed_page); 1967 nilfs_segctor_prepare_write(sci);
2085 if (err) {
2086 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
2087 goto failed_to_write;
2088 }
2089 1968
2090 nilfs_add_checksums_on_logs(&sci->sc_segbufs, 1969 nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2091 nilfs->ns_crc_seed); 1970 nilfs->ns_crc_seed);
@@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2687 INIT_LIST_HEAD(&sci->sc_segbufs); 2566 INIT_LIST_HEAD(&sci->sc_segbufs);
2688 INIT_LIST_HEAD(&sci->sc_write_logs); 2567 INIT_LIST_HEAD(&sci->sc_write_logs);
2689 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2568 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2690 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2691 init_timer(&sci->sc_timer); 2569 init_timer(&sci->sc_timer);
2692 2570
2693 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2571 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2741 if (flag || !nilfs_segctor_confirm(sci)) 2619 if (flag || !nilfs_segctor_confirm(sci))
2742 nilfs_segctor_write_out(sci); 2620 nilfs_segctor_write_out(sci);
2743 2621
2744 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2745
2746 if (!list_empty(&sci->sc_dirty_files)) { 2622 if (!list_empty(&sci->sc_dirty_files)) {
2747 nilfs_warning(sci->sc_super, __func__, 2623 nilfs_warning(sci->sc_super, __func__,
2748 "dirty file(s) after the final construction\n"); 2624 "dirty file(s) after the final construction\n");
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 6c02a86745fb..38a1d0013314 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -92,7 +92,6 @@ struct nilfs_segsum_pointer {
92 * @sc_nblk_inc: Block count of current generation 92 * @sc_nblk_inc: Block count of current generation
93 * @sc_dirty_files: List of files to be written 93 * @sc_dirty_files: List of files to be written
94 * @sc_gc_inodes: List of GC inodes having blocks to be written 94 * @sc_gc_inodes: List of GC inodes having blocks to be written
95 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
96 * @sc_freesegs: array of segment numbers to be freed 95 * @sc_freesegs: array of segment numbers to be freed
97 * @sc_nfreesegs: number of segments on @sc_freesegs 96 * @sc_nfreesegs: number of segments on @sc_freesegs
98 * @sc_dsync_inode: inode whose data pages are written for a sync operation 97 * @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -136,7 +135,6 @@ struct nilfs_sc_info {
136 135
137 struct list_head sc_dirty_files; 136 struct list_head sc_dirty_files;
138 struct list_head sc_gc_inodes; 137 struct list_head sc_gc_inodes;
139 struct list_head sc_copied_buffers;
140 138
141 __u64 *sc_freesegs; 139 __u64 *sc_freesegs;
142 size_t sc_nfreesegs; 140 size_t sc_nfreesegs;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..0a0aba617d8a 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
33 33
34struct nilfs_sufile_info { 34struct nilfs_sufile_info {
35 struct nilfs_mdt_info mi; 35 struct nilfs_mdt_info mi;
36 unsigned long ncleansegs; 36 unsigned long ncleansegs;/* number of clean segments */
37 __u64 allocmin; /* lower limit of allocatable segment range */
38 __u64 allocmax; /* upper limit of allocatable segment range */
37}; 39};
38 40
39static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) 41static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
96 create, NULL, bhp); 98 create, NULL, bhp);
97} 99}
98 100
101static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile,
102 __u64 segnum)
103{
104 return nilfs_mdt_delete_block(sufile,
105 nilfs_sufile_get_blkoff(sufile, segnum));
106}
107
99static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, 108static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
100 u64 ncleanadd, u64 ndirtyadd) 109 u64 ncleanadd, u64 ndirtyadd)
101{ 110{
@@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
108 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); 117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
109 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr, KM_USER0);
110 119
111 nilfs_mdt_mark_buffer_dirty(header_bh); 120 mark_buffer_dirty(header_bh);
112} 121}
113 122
114/** 123/**
@@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
248} 257}
249 258
250/** 259/**
260 * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
261 * @sufile: inode of segment usage file
262 * @start: minimum segment number of allocatable region (inclusive)
263 * @end: maximum segment number of allocatable region (inclusive)
264 *
265 * Return Value: On success, 0 is returned. On error, one of the
266 * following negative error codes is returned.
267 *
268 * %-ERANGE - invalid segment region
269 */
270int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
271{
272 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
273 __u64 nsegs;
274 int ret = -ERANGE;
275
276 down_write(&NILFS_MDT(sufile)->mi_sem);
277 nsegs = nilfs_sufile_get_nsegments(sufile);
278
279 if (start <= end && end < nsegs) {
280 sui->allocmin = start;
281 sui->allocmax = end;
282 ret = 0;
283 }
284 up_write(&NILFS_MDT(sufile)->mi_sem);
285 return ret;
286}
287
288/**
251 * nilfs_sufile_alloc - allocate a segment 289 * nilfs_sufile_alloc - allocate a segment
252 * @sufile: inode of segment usage file 290 * @sufile: inode of segment usage file
253 * @segnump: pointer to segment number 291 * @segnump: pointer to segment number
@@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
269 struct buffer_head *header_bh, *su_bh; 307 struct buffer_head *header_bh, *su_bh;
270 struct nilfs_sufile_header *header; 308 struct nilfs_sufile_header *header;
271 struct nilfs_segment_usage *su; 309 struct nilfs_segment_usage *su;
310 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
272 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 311 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
273 __u64 segnum, maxsegnum, last_alloc; 312 __u64 segnum, maxsegnum, last_alloc;
274 void *kaddr; 313 void *kaddr;
275 unsigned long nsegments, ncleansegs, nsus; 314 unsigned long nsegments, ncleansegs, nsus, cnt;
276 int ret, i, j; 315 int ret, j;
277 316
278 down_write(&NILFS_MDT(sufile)->mi_sem); 317 down_write(&NILFS_MDT(sufile)->mi_sem);
279 318
@@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
287 kunmap_atomic(kaddr, KM_USER0); 326 kunmap_atomic(kaddr, KM_USER0);
288 327
289 nsegments = nilfs_sufile_get_nsegments(sufile); 328 nsegments = nilfs_sufile_get_nsegments(sufile);
329 maxsegnum = sui->allocmax;
290 segnum = last_alloc + 1; 330 segnum = last_alloc + 1;
291 maxsegnum = nsegments - 1; 331 if (segnum < sui->allocmin || segnum > sui->allocmax)
292 for (i = 0; i < nsegments; i += nsus) { 332 segnum = sui->allocmin;
293 if (segnum >= nsegments) { 333
294 /* wrap around */ 334 for (cnt = 0; cnt < nsegments; cnt += nsus) {
295 segnum = 0; 335 if (segnum > maxsegnum) {
296 maxsegnum = last_alloc; 336 if (cnt < sui->allocmax - sui->allocmin + 1) {
337 /*
338 * wrap around in the limited region.
339 * if allocation started from
340 * sui->allocmin, this never happens.
341 */
342 segnum = sui->allocmin;
343 maxsegnum = last_alloc;
344 } else if (segnum > sui->allocmin &&
345 sui->allocmax + 1 < nsegments) {
346 segnum = sui->allocmax + 1;
347 maxsegnum = nsegments - 1;
348 } else if (sui->allocmin > 0) {
349 segnum = 0;
350 maxsegnum = sui->allocmin - 1;
351 } else {
352 break; /* never happens */
353 }
297 } 354 }
298 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, 355 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
299 &su_bh); 356 &su_bh);
@@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
319 header->sh_last_alloc = cpu_to_le64(segnum); 376 header->sh_last_alloc = cpu_to_le64(segnum);
320 kunmap_atomic(kaddr, KM_USER0); 377 kunmap_atomic(kaddr, KM_USER0);
321 378
322 NILFS_SUI(sufile)->ncleansegs--; 379 sui->ncleansegs--;
323 nilfs_mdt_mark_buffer_dirty(header_bh); 380 mark_buffer_dirty(header_bh);
324 nilfs_mdt_mark_buffer_dirty(su_bh); 381 mark_buffer_dirty(su_bh);
325 nilfs_mdt_mark_dirty(sufile); 382 nilfs_mdt_mark_dirty(sufile);
326 brelse(su_bh); 383 brelse(su_bh);
327 *segnump = segnum; 384 *segnump = segnum;
@@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
364 nilfs_sufile_mod_counter(header_bh, -1, 1); 421 nilfs_sufile_mod_counter(header_bh, -1, 1);
365 NILFS_SUI(sufile)->ncleansegs--; 422 NILFS_SUI(sufile)->ncleansegs--;
366 423
367 nilfs_mdt_mark_buffer_dirty(su_bh); 424 mark_buffer_dirty(su_bh);
368 nilfs_mdt_mark_dirty(sufile); 425 nilfs_mdt_mark_dirty(sufile);
369} 426}
370 427
@@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
395 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
396 NILFS_SUI(sufile)->ncleansegs -= clean; 453 NILFS_SUI(sufile)->ncleansegs -= clean;
397 454
398 nilfs_mdt_mark_buffer_dirty(su_bh); 455 mark_buffer_dirty(su_bh);
399 nilfs_mdt_mark_dirty(sufile); 456 nilfs_mdt_mark_dirty(sufile);
400} 457}
401 458
@@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
421 sudirty = nilfs_segment_usage_dirty(su); 478 sudirty = nilfs_segment_usage_dirty(su);
422 nilfs_segment_usage_set_clean(su); 479 nilfs_segment_usage_set_clean(su);
423 kunmap_atomic(kaddr, KM_USER0); 480 kunmap_atomic(kaddr, KM_USER0);
424 nilfs_mdt_mark_buffer_dirty(su_bh); 481 mark_buffer_dirty(su_bh);
425 482
426 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
427 NILFS_SUI(sufile)->ncleansegs++; 484 NILFS_SUI(sufile)->ncleansegs++;
@@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
441 498
442 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); 499 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
443 if (!ret) { 500 if (!ret) {
444 nilfs_mdt_mark_buffer_dirty(bh); 501 mark_buffer_dirty(bh);
445 nilfs_mdt_mark_dirty(sufile); 502 nilfs_mdt_mark_dirty(sufile);
446 brelse(bh); 503 brelse(bh);
447 } 504 }
@@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
476 su->su_nblocks = cpu_to_le32(nblocks); 533 su->su_nblocks = cpu_to_le32(nblocks);
477 kunmap_atomic(kaddr, KM_USER0); 534 kunmap_atomic(kaddr, KM_USER0);
478 535
479 nilfs_mdt_mark_buffer_dirty(bh); 536 mark_buffer_dirty(bh);
480 nilfs_mdt_mark_dirty(sufile); 537 nilfs_mdt_mark_dirty(sufile);
481 brelse(bh); 538 brelse(bh);
482 539
@@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
505{ 562{
506 struct buffer_head *header_bh; 563 struct buffer_head *header_bh;
507 struct nilfs_sufile_header *header; 564 struct nilfs_sufile_header *header;
508 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 565 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
509 void *kaddr; 566 void *kaddr;
510 int ret; 567 int ret;
511 568
@@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
555 nilfs_sufile_mod_counter(header_bh, -1, 0); 612 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 NILFS_SUI(sufile)->ncleansegs--; 613 NILFS_SUI(sufile)->ncleansegs--;
557 } 614 }
558 nilfs_mdt_mark_buffer_dirty(su_bh); 615 mark_buffer_dirty(su_bh);
559 nilfs_mdt_mark_dirty(sufile); 616 nilfs_mdt_mark_dirty(sufile);
560} 617}
561 618
562/** 619/**
620 * nilfs_sufile_truncate_range - truncate range of segment array
621 * @sufile: inode of segment usage file
622 * @start: start segment number (inclusive)
623 * @end: end segment number (inclusive)
624 *
625 * Return Value: On success, 0 is returned. On error, one of the
626 * following negative error codes is returned.
627 *
628 * %-EIO - I/O error.
629 *
630 * %-ENOMEM - Insufficient amount of memory available.
631 *
632 * %-EINVAL - Invalid number of segments specified
633 *
634 * %-EBUSY - Dirty or active segments are present in the range
635 */
636static int nilfs_sufile_truncate_range(struct inode *sufile,
637 __u64 start, __u64 end)
638{
639 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
640 struct buffer_head *header_bh;
641 struct buffer_head *su_bh;
642 struct nilfs_segment_usage *su, *su2;
643 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
644 unsigned long segusages_per_block;
645 unsigned long nsegs, ncleaned;
646 __u64 segnum;
647 void *kaddr;
648 ssize_t n, nc;
649 int ret;
650 int j;
651
652 nsegs = nilfs_sufile_get_nsegments(sufile);
653
654 ret = -EINVAL;
655 if (start > end || start >= nsegs)
656 goto out;
657
658 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
659 if (ret < 0)
660 goto out;
661
662 segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
663 ncleaned = 0;
664
665 for (segnum = start; segnum <= end; segnum += n) {
666 n = min_t(unsigned long,
667 segusages_per_block -
668 nilfs_sufile_get_offset(sufile, segnum),
669 end - segnum + 1);
670 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
671 &su_bh);
672 if (ret < 0) {
673 if (ret != -ENOENT)
674 goto out_header;
675 /* hole */
676 continue;
677 }
678 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
679 su = nilfs_sufile_block_get_segment_usage(
680 sufile, segnum, su_bh, kaddr);
681 su2 = su;
682 for (j = 0; j < n; j++, su = (void *)su + susz) {
683 if ((le32_to_cpu(su->su_flags) &
684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
685 nilfs_segment_is_active(nilfs, segnum + j)) {
686 ret = -EBUSY;
687 kunmap_atomic(kaddr, KM_USER0);
688 brelse(su_bh);
689 goto out_header;
690 }
691 }
692 nc = 0;
693 for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
694 if (nilfs_segment_usage_error(su)) {
695 nilfs_segment_usage_set_clean(su);
696 nc++;
697 }
698 }
699 kunmap_atomic(kaddr, KM_USER0);
700 if (nc > 0) {
701 mark_buffer_dirty(su_bh);
702 ncleaned += nc;
703 }
704 brelse(su_bh);
705
706 if (n == segusages_per_block) {
707 /* make hole */
708 nilfs_sufile_delete_segment_usage_block(sufile, segnum);
709 }
710 }
711 ret = 0;
712
713out_header:
714 if (ncleaned > 0) {
715 NILFS_SUI(sufile)->ncleansegs += ncleaned;
716 nilfs_sufile_mod_counter(header_bh, ncleaned, 0);
717 nilfs_mdt_mark_dirty(sufile);
718 }
719 brelse(header_bh);
720out:
721 return ret;
722}
723
724/**
725 * nilfs_sufile_resize - resize segment array
726 * @sufile: inode of segment usage file
727 * @newnsegs: new number of segments
728 *
729 * Return Value: On success, 0 is returned. On error, one of the
730 * following negative error codes is returned.
731 *
732 * %-EIO - I/O error.
733 *
734 * %-ENOMEM - Insufficient amount of memory available.
735 *
736 * %-ENOSPC - Enough free space is not left for shrinking
737 *
738 * %-EBUSY - Dirty or active segments exist in the region to be truncated
739 */
740int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
741{
742 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
743 struct buffer_head *header_bh;
744 struct nilfs_sufile_header *header;
745 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
746 void *kaddr;
747 unsigned long nsegs, nrsvsegs;
748 int ret = 0;
749
750 down_write(&NILFS_MDT(sufile)->mi_sem);
751
752 nsegs = nilfs_sufile_get_nsegments(sufile);
753 if (nsegs == newnsegs)
754 goto out;
755
756 ret = -ENOSPC;
757 nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs);
758 if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs)
759 goto out;
760
761 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
762 if (ret < 0)
763 goto out;
764
765 if (newnsegs > nsegs) {
766 sui->ncleansegs += newnsegs - nsegs;
767 } else /* newnsegs < nsegs */ {
768 ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1);
769 if (ret < 0)
770 goto out_header;
771
772 sui->ncleansegs -= nsegs - newnsegs;
773 }
774
775 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
776 header = kaddr + bh_offset(header_bh);
777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
778 kunmap_atomic(kaddr, KM_USER0);
779
780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(sufile);
782 nilfs_set_nsegments(nilfs, newnsegs);
783
784out_header:
785 brelse(header_bh);
786out:
787 up_write(&NILFS_MDT(sufile)->mi_sem);
788 return ret;
789}
790
791/**
563 * nilfs_sufile_get_suinfo - 792 * nilfs_sufile_get_suinfo -
564 * @sufile: inode of segment usage file 793 * @sufile: inode of segment usage file
565 * @segnum: segment number to start looking 794 * @segnum: segment number to start looking
@@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
583 struct nilfs_segment_usage *su; 812 struct nilfs_segment_usage *su;
584 struct nilfs_suinfo *si = buf; 813 struct nilfs_suinfo *si = buf;
585 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 814 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
586 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 815 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
587 void *kaddr; 816 void *kaddr;
588 unsigned long nsegs, segusages_per_block; 817 unsigned long nsegs, segusages_per_block;
589 ssize_t n; 818 ssize_t n;
@@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
679 kunmap_atomic(kaddr, KM_USER0); 908 kunmap_atomic(kaddr, KM_USER0);
680 brelse(header_bh); 909 brelse(header_bh);
681 910
911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
912 sui->allocmin = 0;
913
682 unlock_new_inode(sufile); 914 unlock_new_inode(sufile);
683 out: 915 out:
684 *inodep = sufile; 916 *inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..e84bc5b51fc1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -31,11 +31,12 @@
31 31
32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) 32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
33{ 33{
34 return NILFS_I_NILFS(sufile)->ns_nsegments; 34 return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
35} 35}
36 36
37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); 37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
38 38
39int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
39int nilfs_sufile_alloc(struct inode *, __u64 *); 40int nilfs_sufile_alloc(struct inode *, __u64 *);
40int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); 41int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
41int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, 42int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
@@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
61void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, 62void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
62 struct buffer_head *); 63 struct buffer_head *);
63 64
65int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
64int nilfs_sufile_read(struct super_block *sb, size_t susize, 66int nilfs_sufile_read(struct super_block *sb, size_t susize,
65 struct nilfs_inode *raw_inode, struct inode **inodep); 67 struct nilfs_inode *raw_inode, struct inode **inodep);
66 68
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 062cca065195..8351c44a7320 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -56,6 +56,7 @@
56#include "btnode.h" 56#include "btnode.h"
57#include "page.h" 57#include "page.h"
58#include "cpfile.h" 58#include "cpfile.h"
59#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
59#include "ifile.h" 60#include "ifile.h"
60#include "dat.h" 61#include "dat.h"
61#include "segment.h" 62#include "segment.h"
@@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
165 ii->i_state = 0; 166 ii->i_state = 0;
166 ii->i_cno = 0; 167 ii->i_cno = 0;
167 ii->vfs_inode.i_version = 1; 168 ii->vfs_inode.i_version = 1;
168 nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); 169 nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
169 return &ii->vfs_inode; 170 return &ii->vfs_inode;
170} 171}
171 172
@@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb)
347 return ret; 348 return ret;
348} 349}
349 350
351/**
352 * nilfs_move_2nd_super - relocate secondary super block
353 * @sb: super block instance
354 * @sb2off: new offset of the secondary super block (in bytes)
355 */
356static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
357{
358 struct the_nilfs *nilfs = sb->s_fs_info;
359 struct buffer_head *nsbh;
360 struct nilfs_super_block *nsbp;
361 sector_t blocknr, newblocknr;
362 unsigned long offset;
363 int sb2i = -1; /* array index of the secondary superblock */
364 int ret = 0;
365
366 /* nilfs->ns_sem must be locked by the caller. */
367 if (nilfs->ns_sbh[1] &&
368 nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
369 sb2i = 1;
370 blocknr = nilfs->ns_sbh[1]->b_blocknr;
371 } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
372 sb2i = 0;
373 blocknr = nilfs->ns_sbh[0]->b_blocknr;
374 }
375 if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
376 goto out; /* super block location is unchanged */
377
378 /* Get new super block buffer */
379 newblocknr = sb2off >> nilfs->ns_blocksize_bits;
380 offset = sb2off & (nilfs->ns_blocksize - 1);
381 nsbh = sb_getblk(sb, newblocknr);
382 if (!nsbh) {
383 printk(KERN_WARNING
384 "NILFS warning: unable to move secondary superblock "
385 "to block %llu\n", (unsigned long long)newblocknr);
386 ret = -EIO;
387 goto out;
388 }
389 nsbp = (void *)nsbh->b_data + offset;
390 memset(nsbp, 0, nilfs->ns_blocksize);
391
392 if (sb2i >= 0) {
393 memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
394 brelse(nilfs->ns_sbh[sb2i]);
395 nilfs->ns_sbh[sb2i] = nsbh;
396 nilfs->ns_sbp[sb2i] = nsbp;
397 } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
398 /* secondary super block will be restored to index 1 */
399 nilfs->ns_sbh[1] = nsbh;
400 nilfs->ns_sbp[1] = nsbp;
401 } else {
402 brelse(nsbh);
403 }
404out:
405 return ret;
406}
407
408/**
409 * nilfs_resize_fs - resize the filesystem
410 * @sb: super block instance
411 * @newsize: new size of the filesystem (in bytes)
412 */
413int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
414{
415 struct the_nilfs *nilfs = sb->s_fs_info;
416 struct nilfs_super_block **sbp;
417 __u64 devsize, newnsegs;
418 loff_t sb2off;
419 int ret;
420
421 ret = -ERANGE;
422 devsize = i_size_read(sb->s_bdev->bd_inode);
423 if (newsize > devsize)
424 goto out;
425
426 /*
427 * Write lock is required to protect some functions depending
428 * on the number of segments, the number of reserved segments,
429 * and so forth.
430 */
431 down_write(&nilfs->ns_segctor_sem);
432
433 sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
434 newnsegs = sb2off >> nilfs->ns_blocksize_bits;
435 do_div(newnsegs, nilfs->ns_blocks_per_segment);
436
437 ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
438 up_write(&nilfs->ns_segctor_sem);
439 if (ret < 0)
440 goto out;
441
442 ret = nilfs_construct_segment(sb);
443 if (ret < 0)
444 goto out;
445
446 down_write(&nilfs->ns_sem);
447 nilfs_move_2nd_super(sb, sb2off);
448 ret = -EIO;
449 sbp = nilfs_prepare_super(sb, 0);
450 if (likely(sbp)) {
451 nilfs_set_log_cursor(sbp[0], nilfs);
452 /*
453 * Drop NILFS_RESIZE_FS flag for compatibility with
454 * mount-time resize which may be implemented in a
455 * future release.
456 */
457 sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
458 ~NILFS_RESIZE_FS);
459 sbp[0]->s_dev_size = cpu_to_le64(newsize);
460 sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
461 if (sbp[1])
462 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
463 ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
464 }
465 up_write(&nilfs->ns_sem);
466
467 /*
468 * Reset the range of allocatable segments last. This order
469 * is important in the case of expansion because the secondary
470 * superblock must be protected from log write until migration
471 * completes.
472 */
473 if (!ret)
474 nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
475out:
476 return ret;
477}
478
350static void nilfs_put_super(struct super_block *sb) 479static void nilfs_put_super(struct super_block *sb)
351{ 480{
352 struct the_nilfs *nilfs = sb->s_fs_info; 481 struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d2acd1a651f3..d32714094375 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
363 return res; 363 return res;
364} 364}
365 365
366/**
367 * nilfs_nrsvsegs - calculate the number of reserved segments
368 * @nilfs: nilfs object
369 * @nsegs: total number of segments
370 */
371unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
372{
373 return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
374 DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
375 100));
376}
377
378void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
379{
380 nilfs->ns_nsegments = nsegs;
381 nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
382}
383
366static int nilfs_store_disk_layout(struct the_nilfs *nilfs, 384static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
367 struct nilfs_super_block *sbp) 385 struct nilfs_super_block *sbp)
368{ 386{
@@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
389 } 407 }
390 408
391 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); 409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
392 nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
393 nilfs->ns_r_segments_percentage = 410 nilfs->ns_r_segments_percentage =
394 le32_to_cpu(sbp->s_r_segments_percentage); 411 le32_to_cpu(sbp->s_r_segments_percentage);
395 nilfs->ns_nrsvsegs = 412 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
396 max_t(unsigned long, NILFS_MIN_NRSVSEGS,
397 DIV_ROUND_UP(nilfs->ns_nsegments *
398 nilfs->ns_r_segments_percentage, 100));
399 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); 413 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
400 return 0; 414 return 0;
401} 415}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f4968145c2a3..9992b11312ff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev);
268void destroy_nilfs(struct the_nilfs *nilfs); 268void destroy_nilfs(struct the_nilfs *nilfs);
269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); 269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); 270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
271unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
272void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
271int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); 273int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
272int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); 274int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
273struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); 275struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c3d6512eded1..8845613fd7e3 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -60,10 +60,6 @@ struct linux_binprm {
60 unsigned long loader, exec; 60 unsigned long loader, exec;
61}; 61};
62 62
63extern void acct_arg_size(struct linux_binprm *bprm, unsigned long pages);
64extern struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
65 int write);
66
67#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 63#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
68#define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT) 64#define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT)
69 65
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 072fe8c93e6f..42557851b12e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -18,13 +18,13 @@
18#include <linux/pci.h> 18#include <linux/pci.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/pm.h> 20#include <linux/pm.h>
21#include <linux/mutex.h>
21#ifdef CONFIG_BLK_DEV_IDEACPI 22#ifdef CONFIG_BLK_DEV_IDEACPI
22#include <acpi/acpi.h> 23#include <acpi/acpi.h>
23#endif 24#endif
24#include <asm/byteorder.h> 25#include <asm/byteorder.h>
25#include <asm/system.h> 26#include <asm/system.h>
26#include <asm/io.h> 27#include <asm/io.h>
27#include <asm/mutex.h>
28 28
29/* for request_sense */ 29/* for request_sense */
30#include <linux/cdrom.h> 30#include <linux/cdrom.h>
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 8768c469e93e..7454ad7451b4 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -107,7 +107,7 @@ struct nilfs_super_root {
107#define NILFS_SR_DAT_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 0) 107#define NILFS_SR_DAT_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 0)
108#define NILFS_SR_CPFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 1) 108#define NILFS_SR_CPFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 1)
109#define NILFS_SR_SUFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 2) 109#define NILFS_SR_SUFILE_OFFSET(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 2)
110#define NILFS_SR_BYTES (sizeof(struct nilfs_super_root)) 110#define NILFS_SR_BYTES(inode_size) NILFS_SR_MDT_OFFSET(inode_size, 3)
111 111
112/* 112/*
113 * Maximal mount counts 113 * Maximal mount counts
@@ -845,5 +845,7 @@ struct nilfs_bdesc {
845 _IOR(NILFS_IOCTL_IDENT, 0x8A, __u64) 845 _IOR(NILFS_IOCTL_IDENT, 0x8A, __u64)
846#define NILFS_IOCTL_RESIZE \ 846#define NILFS_IOCTL_RESIZE \
847 _IOW(NILFS_IOCTL_IDENT, 0x8B, __u64) 847 _IOW(NILFS_IOCTL_IDENT, 0x8B, __u64)
848#define NILFS_IOCTL_SET_ALLOC_RANGE \
849 _IOW(NILFS_IOCTL_IDENT, 0x8C, __u64[2])
848 850
849#endif /* _LINUX_NILFS_FS_H */ 851#endif /* _LINUX_NILFS_FS_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aeaad97e6815..e8b78ce14474 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1782,7 +1782,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
1782 1782
1783#define skb_queue_walk(queue, skb) \ 1783#define skb_queue_walk(queue, skb) \
1784 for (skb = (queue)->next; \ 1784 for (skb = (queue)->next; \
1785 (skb != (struct sk_buff *)(queue)); \ 1785 skb != (struct sk_buff *)(queue); \
1786 skb = skb->next) 1786 skb = skb->next)
1787 1787
1788#define skb_queue_walk_safe(queue, skb, tmp) \ 1788#define skb_queue_walk_safe(queue, skb, tmp) \
@@ -1791,7 +1791,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
1791 skb = tmp, tmp = skb->next) 1791 skb = tmp, tmp = skb->next)
1792 1792
1793#define skb_queue_walk_from(queue, skb) \ 1793#define skb_queue_walk_from(queue, skb) \
1794 for (; (skb != (struct sk_buff *)(queue)); \ 1794 for (; skb != (struct sk_buff *)(queue); \
1795 skb = skb->next) 1795 skb = skb->next)
1796 1796
1797#define skb_queue_walk_from_safe(queue, skb, tmp) \ 1797#define skb_queue_walk_from_safe(queue, skb, tmp) \
@@ -1801,7 +1801,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
1801 1801
1802#define skb_queue_reverse_walk(queue, skb) \ 1802#define skb_queue_reverse_walk(queue, skb) \
1803 for (skb = (queue)->prev; \ 1803 for (skb = (queue)->prev; \
1804 (skb != (struct sk_buff *)(queue)); \ 1804 skb != (struct sk_buff *)(queue); \
1805 skb = skb->prev) 1805 skb = skb->prev)
1806 1806
1807#define skb_queue_reverse_walk_safe(queue, skb, tmp) \ 1807#define skb_queue_reverse_walk_safe(queue, skb, tmp) \
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8c7189c3f6ed..e6d6a66a8f71 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -538,7 +538,7 @@ struct ieee80211_tx_info {
538}; 538};
539 539
540/** 540/**
541 * ieee80211_sched_scan_ies - scheduled scan IEs 541 * struct ieee80211_sched_scan_ies - scheduled scan IEs
542 * 542 *
543 * This structure is used to pass the appropriate IEs to be used in scheduled 543 * This structure is used to pass the appropriate IEs to be used in scheduled
544 * scans for all bands. It contains both the IEs passed from the userspace 544 * scans for all bands. It contains both the IEs passed from the userspace
@@ -2278,6 +2278,7 @@ static inline int ieee80211_sta_ps_transition_ni(struct ieee80211_sta *sta,
2278 2278
2279/** 2279/**
2280 * ieee80211_sta_set_tim - set the TIM bit for a sleeping station 2280 * ieee80211_sta_set_tim - set the TIM bit for a sleeping station
2281 * @sta: &struct ieee80211_sta pointer for the sleeping station
2281 * 2282 *
2282 * If a driver buffers frames for a powersave station instead of passing 2283 * If a driver buffers frames for a powersave station instead of passing
2283 * them back to mac80211 for retransmission, the station needs to be told 2284 * them back to mac80211 for retransmission, the station needs to be told
diff --git a/init/Kconfig b/init/Kconfig
index 4986ecc49e65..c8b172efaa65 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -827,11 +827,6 @@ config SCHED_AUTOGROUP
827 desktop applications. Task group autogeneration is currently based 827 desktop applications. Task group autogeneration is currently based
828 upon task session. 828 upon task session.
829 829
830config SCHED_TTWU_QUEUE
831 bool
832 depends on !SPARC32
833 default y
834
835config MM_OWNER 830config MM_OWNER
836 bool 831 bool
837 832
@@ -908,7 +903,6 @@ endif
908 903
909config CC_OPTIMIZE_FOR_SIZE 904config CC_OPTIMIZE_FOR_SIZE
910 bool "Optimize for size" 905 bool "Optimize for size"
911 default y
912 help 906 help
913 Enabling this option will pass "-Os" instead of "-O2" to gcc 907 Enabling this option will pass "-Os" instead of "-O2" to gcc
914 resulting in a smaller kernel. 908 resulting in a smaller kernel.
diff --git a/kernel/sched.c b/kernel/sched.c
index c62acf45d3b9..0516af415085 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2564,7 +2564,7 @@ static void ttwu_queue(struct task_struct *p, int cpu)
2564{ 2564{
2565 struct rq *rq = cpu_rq(cpu); 2565 struct rq *rq = cpu_rq(cpu);
2566 2566
2567#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) 2567#if defined(CONFIG_SMP)
2568 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { 2568 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2569 ttwu_queue_remote(p, cpu); 2569 ttwu_queue_remote(p, cpu);
2570 return; 2570 return;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index dd5a3206423e..58c25ea5a5c1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -67,12 +67,12 @@
67#include <linux/if_arp.h> 67#include <linux/if_arp.h>
68#include <linux/proc_fs.h> 68#include <linux/proc_fs.h>
69#include <linux/rcupdate.h> 69#include <linux/rcupdate.h>
70#include <linux/prefetch.h>
71#include <linux/skbuff.h> 70#include <linux/skbuff.h>
72#include <linux/netlink.h> 71#include <linux/netlink.h>
73#include <linux/init.h> 72#include <linux/init.h>
74#include <linux/list.h> 73#include <linux/list.h>
75#include <linux/slab.h> 74#include <linux/slab.h>
75#include <linux/prefetch.h>
76#include <net/net_namespace.h> 76#include <net/net_namespace.h>
77#include <net/ip.h> 77#include <net/ip.h>
78#include <net/protocol.h> 78#include <net/protocol.h>
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8ce792ea08e9..1fd29b2daa92 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -36,6 +36,7 @@ $default{"REBOOT_ON_SUCCESS"} = 1;
36$default{"POWEROFF_ON_SUCCESS"} = 0; 36$default{"POWEROFF_ON_SUCCESS"} = 0;
37$default{"BUILD_OPTIONS"} = ""; 37$default{"BUILD_OPTIONS"} = "";
38$default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects 38$default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects
39$default{"PATCHCHECK_SLEEP_TIME"} = 60; # sleep time between patch checks
39$default{"CLEAR_LOG"} = 0; 40$default{"CLEAR_LOG"} = 0;
40$default{"BISECT_MANUAL"} = 0; 41$default{"BISECT_MANUAL"} = 0;
41$default{"BISECT_SKIP"} = 1; 42$default{"BISECT_SKIP"} = 1;
@@ -96,6 +97,7 @@ my $monitor_pid;
96my $monitor_cnt = 0; 97my $monitor_cnt = 0;
97my $sleep_time; 98my $sleep_time;
98my $bisect_sleep_time; 99my $bisect_sleep_time;
100my $patchcheck_sleep_time;
99my $store_failures; 101my $store_failures;
100my $timeout; 102my $timeout;
101my $booted_timeout; 103my $booted_timeout;
@@ -112,6 +114,7 @@ my $successes = 0;
112 114
113my %entered_configs; 115my %entered_configs;
114my %config_help; 116my %config_help;
117my %variable;
115 118
116$config_help{"MACHINE"} = << "EOF" 119$config_help{"MACHINE"} = << "EOF"
117 The machine hostname that you will test. 120 The machine hostname that you will test.
@@ -260,6 +263,39 @@ sub get_ktest_configs {
260 } 263 }
261} 264}
262 265
266sub process_variables {
267 my ($value) = @_;
268 my $retval = "";
269
270 # We want to check for '\', and it is just easier
271 # to check the previous characet of '$' and not need
272 # to worry if '$' is the first character. By adding
273 # a space to $value, we can just check [^\\]\$ and
274 # it will still work.
275 $value = " $value";
276
277 while ($value =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
278 my $begin = $1;
279 my $var = $2;
280 my $end = $3;
281 # append beginning of value to retval
282 $retval = "$retval$begin";
283 if (defined($variable{$var})) {
284 $retval = "$retval$variable{$var}";
285 } else {
286 # put back the origin piece.
287 $retval = "$retval\$\{$var\}";
288 }
289 $value = $end;
290 }
291 $retval = "$retval$value";
292
293 # remove the space added in the beginning
294 $retval =~ s/ //;
295
296 return "$retval"
297}
298
263sub set_value { 299sub set_value {
264 my ($lvalue, $rvalue) = @_; 300 my ($lvalue, $rvalue) = @_;
265 301
@@ -269,10 +305,22 @@ sub set_value {
269 if ($rvalue =~ /^\s*$/) { 305 if ($rvalue =~ /^\s*$/) {
270 delete $opt{$lvalue}; 306 delete $opt{$lvalue};
271 } else { 307 } else {
308 $rvalue = process_variables($rvalue);
272 $opt{$lvalue} = $rvalue; 309 $opt{$lvalue} = $rvalue;
273 } 310 }
274} 311}
275 312
313sub set_variable {
314 my ($lvalue, $rvalue) = @_;
315
316 if ($rvalue =~ /^\s*$/) {
317 delete $variable{$lvalue};
318 } else {
319 $rvalue = process_variables($rvalue);
320 $variable{$lvalue} = $rvalue;
321 }
322}
323
276sub read_config { 324sub read_config {
277 my ($config) = @_; 325 my ($config) = @_;
278 326
@@ -385,6 +433,22 @@ sub read_config {
385 $repeats{$val} = $repeat; 433 $repeats{$val} = $repeat;
386 } 434 }
387 } 435 }
436 } elsif (/^\s*([A-Z_\[\]\d]+)\s*:=\s*(.*?)\s*$/) {
437 next if ($skip);
438
439 my $lvalue = $1;
440 my $rvalue = $2;
441
442 # process config variables.
443 # Config variables are only active while reading the
444 # config and can be defined anywhere. They also ignore
445 # TEST_START and DEFAULTS, but are skipped if they are in
446 # on of these sections that have SKIP defined.
447 # The save variable can be
448 # defined multiple times and the new one simply overrides
449 # the prevous one.
450 set_variable($lvalue, $rvalue);
451
388 } else { 452 } else {
389 die "$name: $.: Garbage found in config\n$_"; 453 die "$name: $.: Garbage found in config\n$_";
390 } 454 }
@@ -838,6 +902,7 @@ sub monitor {
838 902
839 if ($stop_test_after > 0 && !$booted && !$bug) { 903 if ($stop_test_after > 0 && !$booted && !$bug) {
840 if (time - $monitor_start > $stop_test_after) { 904 if (time - $monitor_start > $stop_test_after) {
905 doprint "STOP_TEST_AFTER ($stop_test_after seconds) timed out\n";
841 $done = 1; 906 $done = 1;
842 } 907 }
843 } 908 }
@@ -907,7 +972,7 @@ sub install {
907 return if (!defined($post_install)); 972 return if (!defined($post_install));
908 973
909 my $cp_post_install = $post_install; 974 my $cp_post_install = $post_install;
910 $cp_post_install = s/\$KERNEL_VERSION/$version/g; 975 $cp_post_install =~ s/\$KERNEL_VERSION/$version/g;
911 run_command "$cp_post_install" or 976 run_command "$cp_post_install" or
912 dodie "Failed to run post install"; 977 dodie "Failed to run post install";
913} 978}
@@ -1247,14 +1312,14 @@ sub run_bisect_test {
1247 1312
1248 if ($failed) { 1313 if ($failed) {
1249 $result = 0; 1314 $result = 0;
1250
1251 # reboot the box to a good kernel
1252 if ($type ne "build") {
1253 bisect_reboot;
1254 }
1255 } else { 1315 } else {
1256 $result = 1; 1316 $result = 1;
1257 } 1317 }
1318
1319 # reboot the box to a kernel we can ssh to
1320 if ($type ne "build") {
1321 bisect_reboot;
1322 }
1258 $in_bisect = 0; 1323 $in_bisect = 0;
1259 1324
1260 return $result; 1325 return $result;
@@ -1763,6 +1828,14 @@ sub config_bisect {
1763 success $i; 1828 success $i;
1764} 1829}
1765 1830
1831sub patchcheck_reboot {
1832 doprint "Reboot and sleep $patchcheck_sleep_time seconds\n";
1833 reboot;
1834 start_monitor;
1835 wait_for_monitor $patchcheck_sleep_time;
1836 end_monitor;
1837}
1838
1766sub patchcheck { 1839sub patchcheck {
1767 my ($i) = @_; 1840 my ($i) = @_;
1768 1841
@@ -1854,6 +1927,8 @@ sub patchcheck {
1854 end_monitor; 1927 end_monitor;
1855 return 0 if ($failed); 1928 return 0 if ($failed);
1856 1929
1930 patchcheck_reboot;
1931
1857 } 1932 }
1858 $in_patchcheck = 0; 1933 $in_patchcheck = 0;
1859 success $i; 1934 success $i;
@@ -1944,7 +2019,7 @@ for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
1944 } 2019 }
1945} 2020}
1946 2021
1947sub set_test_option { 2022sub __set_test_option {
1948 my ($name, $i) = @_; 2023 my ($name, $i) = @_;
1949 2024
1950 my $option = "$name\[$i\]"; 2025 my $option = "$name\[$i\]";
@@ -1970,6 +2045,72 @@ sub set_test_option {
1970 return undef; 2045 return undef;
1971} 2046}
1972 2047
2048sub eval_option {
2049 my ($option, $i) = @_;
2050
2051 # Add space to evaluate the character before $
2052 $option = " $option";
2053 my $retval = "";
2054
2055 while ($option =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
2056 my $start = $1;
2057 my $var = $2;
2058 my $end = $3;
2059
2060 # Append beginning of line
2061 $retval = "$retval$start";
2062
2063 # If the iteration option OPT[$i] exists, then use that.
2064 # otherwise see if the default OPT (without [$i]) exists.
2065
2066 my $o = "$var\[$i\]";
2067
2068 if (defined($opt{$o})) {
2069 $o = $opt{$o};
2070 $retval = "$retval$o";
2071 } elsif (defined($opt{$var})) {
2072 $o = $opt{$var};
2073 $retval = "$retval$o";
2074 } else {
2075 $retval = "$retval\$\{$var\}";
2076 }
2077
2078 $option = $end;
2079 }
2080
2081 $retval = "$retval$option";
2082
2083 $retval =~ s/^ //;
2084
2085 return $retval;
2086}
2087
2088sub set_test_option {
2089 my ($name, $i) = @_;
2090
2091 my $option = __set_test_option($name, $i);
2092 return $option if (!defined($option));
2093
2094 my $prev = "";
2095
2096 # Since an option can evaluate to another option,
2097 # keep iterating until we do not evaluate any more
2098 # options.
2099 my $r = 0;
2100 while ($prev ne $option) {
2101 # Check for recursive evaluations.
2102 # 100 deep should be more than enough.
2103 if ($r++ > 100) {
2104 die "Over 100 evaluations accurred with $name\n" .
2105 "Check for recursive variables\n";
2106 }
2107 $prev = $option;
2108 $option = eval_option($option, $i);
2109 }
2110
2111 return $option;
2112}
2113
1973# First we need to do is the builds 2114# First we need to do is the builds
1974for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { 2115for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
1975 2116
@@ -2003,6 +2144,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
2003 $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i); 2144 $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i);
2004 $sleep_time = set_test_option("SLEEP_TIME", $i); 2145 $sleep_time = set_test_option("SLEEP_TIME", $i);
2005 $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i); 2146 $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i);
2147 $patchcheck_sleep_time = set_test_option("PATCHCHECK_SLEEP_TIME", $i);
2006 $bisect_manual = set_test_option("BISECT_MANUAL", $i); 2148 $bisect_manual = set_test_option("BISECT_MANUAL", $i);
2007 $bisect_skip = set_test_option("BISECT_SKIP", $i); 2149 $bisect_skip = set_test_option("BISECT_SKIP", $i);
2008 $store_failures = set_test_option("STORE_FAILURES", $i); 2150 $store_failures = set_test_option("STORE_FAILURES", $i);
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 4c5d6bd74a02..48cbcc80602a 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -73,6 +73,95 @@
73# ktest will fail to execute, and no tests will run. 73# ktest will fail to execute, and no tests will run.
74# 74#
75 75
76#### Config variables ####
77#
78# This config file can also contain "config variables".
79# These are assigned with ":=" instead of the ktest option
80# assigment "=".
81#
82# The difference between ktest options and config variables
83# is that config variables can be used multiple times,
84# where each instance will override the previous instance.
85# And that they only live at time of processing this config.
86#
87# The advantage to config variables are that they can be used
88# by any option or any other config variables to define thing
89# that you may use over and over again in the options.
90#
91# For example:
92#
93# USER := root
94# TARGET := mybox
95# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test
96#
97# TEST_START
98# MIN_CONFIG = config1
99# TEST = ${TEST_CASE}
100#
101# TEST_START
102# MIN_CONFIG = config2
103# TEST = ${TEST_CASE}
104#
105# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test2
106#
107# TEST_START
108# MIN_CONFIG = config1
109# TEST = ${TEST_CASE}
110#
111# TEST_START
112# MIN_CONFIG = config2
113# TEST = ${TEST_CASE}
114#
115# TEST_DIR := /home/me/test
116#
117# BUILD_DIR = ${TEST_DIR}/linux.git
118# OUTPUT_DIR = ${TEST_DIR}/test
119#
120# Note, the config variables are evaluated immediately, thus
121# updating TARGET after TEST_CASE has been assigned does nothing
122# to TEST_CASE.
123#
124# As shown in the example, to evaluate a config variable, you
125# use the ${X} convention. Simple $X will not work.
126#
127# If the config variable does not exist, the ${X} will not
128# be evaluated. Thus:
129#
130# MAKE_CMD = PATH=/mypath:${PATH} make
131#
132# If PATH is not a config variable, then the ${PATH} in
133# the MAKE_CMD option will be evaluated by the shell when
134# the MAKE_CMD option is passed into shell processing.
135
136#### Using options in other options ####
137#
138# Options that are defined in the config file may also be used
139# by other options. All options are evaulated at time of
140# use (except that config variables are evaluated at config
141# processing time).
142#
143# If an ktest option is used within another option, instead of
144# typing it again in that option you can simply use the option
145# just like you can config variables.
146#
147# MACHINE = mybox
148#
149# TEST = ssh root@${MACHINE} /path/to/test
150#
151# The option will be used per test case. Thus:
152#
153# TEST_TYPE = test
154# TEST = ssh root@{MACHINE}
155#
156# TEST_START
157# MACHINE = box1
158#
159# TEST_START
160# MACHINE = box2
161#
162# For both test cases, MACHINE will be evaluated at the time
163# of the test case. The first test will run ssh root@box1
164# and the second will run ssh root@box2.
76 165
77#### Mandatory Default Options #### 166#### Mandatory Default Options ####
78 167
@@ -366,6 +455,10 @@
366# (default 60) 455# (default 60)
367#BISECT_SLEEP_TIME = 60 456#BISECT_SLEEP_TIME = 60
368 457
458# The time in between patch checks to sleep (in seconds)
459# (default 60)
460#PATCHCHECK_SLEEP_TIME = 60
461
369# Reboot the target box on error (default 0) 462# Reboot the target box on error (default 0)
370#REBOOT_ON_ERROR = 0 463#REBOOT_ON_ERROR = 0
371 464