From 96f339c6b95344942fe5fce012ff4f2a3b2cb80d Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Wed, 28 Sep 2005 17:05:19 -0500 Subject: [IA64] mbcs_init() should give up unless running on sn2 CONFIG_SGI_MBCS is enabled in generic kernels, but the driver may oops some other platforms. Check whether we are running on sn2 and bail out if we are not before doing anything dangerous. Acked-by: Bruce Losure Signed-off-by: Greg Edwards Signed-off-by: Tony Luck --- drivers/char/mbcs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c index 3fa64c6311..c268ee04b2 100644 --- a/drivers/char/mbcs.c +++ b/drivers/char/mbcs.c @@ -830,6 +830,9 @@ static int __init mbcs_init(void) { int rv; + if (!ia64_platform_is("sn2")) + return -ENODEV; + // Put driver into chrdevs[]. Get major number. rv = register_chrdev(mbcs_major, DEVICE_NAME, &mbcs_ops); if (rv < 0) { -- cgit v1.2.2 From 50688ea9ed6fe154058c065bc7dc60980533a2c8 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 13 Oct 2005 22:04:36 +0100 Subject: [ARM] 3007/1: BAST - add CONFIG_ISA to build Patch from Ben Dooks The Simtec EB2410ITX (BAST) has a PC/104 slot, and therefore we should enable CONFIG_ISA to allow the drivers for ISA peripherals to be selected Signed-off-by: Ben Dooks Signed-off-by: Russell King --- arch/arm/mach-s3c2410/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-s3c2410/Kconfig b/arch/arm/mach-s3c2410/Kconfig index 06807c6ee6..c796bcdd61 100644 --- a/arch/arm/mach-s3c2410/Kconfig +++ b/arch/arm/mach-s3c2410/Kconfig @@ -12,6 +12,7 @@ config MACH_ANUBIS config ARCH_BAST bool "Simtec Electronics BAST (EB2410ITX)" select CPU_S3C2410 + select ISA help Say Y here if you are using the Simtec Electronics EB2410ITX development board (also known as BAST) -- cgit v1.2.2 From 13b1f64c16e2eb96a021b49cf3986528046ba3dc Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 13 Oct 2005 22:04:37 +0100 Subject: [ARM] 3008/1: the exception table is not read-only Patch from Nicolas Pitre ... and therefore should not live in the .text section. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/vmlinux.lds.S | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 08e58ecd44..0d5db5279c 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -89,13 +89,6 @@ SECTIONS *(.got) /* Global offset table */ } - . = ALIGN(16); - __ex_table : { /* Exception table */ - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - RODATA _etext = .; /* End of text and rodata section */ @@ -137,6 +130,14 @@ SECTIONS . = ALIGN(32); *(.data.cacheline_aligned) + /* + * The exception fixup table (might need resorting at runtime) + */ + . = ALIGN(32); + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + /* * and the usual data section */ -- cgit v1.2.2 From 51e8513615ed8202b22ba9a43b0c7376ea4f6868 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 21:10:08 -0700 Subject: [SPARC64]: Consolidate common PCI IOMMU init code. All the PCI controller drivers were doing the same thing setting up the IOMMU software state, put it all in one spot. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 54 ++++++++++++++++++++++++++++++++++--- arch/sparc64/kernel/pci_psycho.c | 44 ++++--------------------------- arch/sparc64/kernel/pci_sabre.c | 39 +++------------------------ arch/sparc64/kernel/pci_schizo.c | 57 +++------------------------------------- include/asm-sparc64/pbm.h | 2 +- 5 files changed, 63 insertions(+), 133 deletions(-) diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index 425c60cfea..cdee7d9fed 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -80,13 +80,59 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte) iopte_val(*iopte) = val; } -void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize) +void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask) { - int i; + unsigned long i, tsbbase, order; + + /* Setup initial software IOMMU state. */ + spin_lock_init(&iommu->lock); + iommu->ctx_lowest_free = 1; + iommu->page_table_map_base = dma_offset; + iommu->dma_addr_mask = dma_addr_mask; + + switch (tsbsize / (8 * 1024)) { + case 64: + iommu->page_table_sz_bits = 16; + break; + case 128: + iommu->page_table_sz_bits = 17; + break; + default: + prom_printf("PCI_IOMMU: Illegal TSB size %d\n", + tsbsize / (8 * 1024)); + prom_halt(); + break; + }; + + iommu->lowest_consistent_map = + 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); + + for (i = 0; i < PBM_NCLUSTERS; i++) { + iommu->alloc_info[i].flush = 0; + iommu->alloc_info[i].next = 0; + } - tsbsize /= sizeof(iopte_t); + /* Allocate and initialize the dummy page which we + * set inactive IO PTEs to point to. + */ + iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); + if (!iommu->dummy_page) { + prom_printf("PCI_IOMMU: Error, gfp(dummy_page) failed.\n"); + prom_halt(); + } + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); + iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); + + /* Now allocate and setup the IOMMU page table itself. */ + order = get_order(tsbsize); + tsbbase = __get_free_pages(GFP_KERNEL, order); + if (!tsbbase) { + prom_printf("PCI_IOMMU: Error, gfp(tsb) failed.\n"); + prom_halt(); + } + iommu->page_table = (iopte_t *)tsbbase; - for (i = 0; i < tsbsize; i++) + for (i = 0; i < tsbsize / sizeof(iopte_t); i++) iopte_make_dummy(iommu, &iommu->page_table[i]); } diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index 6ed1ef25e0..c03ed5f49d 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -1207,13 +1207,9 @@ static void psycho_scan_bus(struct pci_controller_info *p) static void psycho_iommu_init(struct pci_controller_info *p) { struct pci_iommu *iommu = p->pbm_A.iommu; - unsigned long tsbbase, i; + unsigned long i; u64 control; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL; iommu->iommu_tsbbase = p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE; @@ -1240,40 +1236,10 @@ static void psycho_iommu_init(struct pci_controller_info *p) /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - /* Using assumed page size 8K with 128K entries we need 1MB iommu page - * table (128K ioptes * 8 bytes per iopte). This is - * page order 7 on UltraSparc. - */ - tsbbase = __get_free_pages(GFP_KERNEL, get_order(IO_TSB_SIZE)); - if (!tsbbase) { - prom_printf("PSYCHO_IOMMU: Error, gfp(tsb) failed.\n"); - prom_halt(); - } - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_sz_bits = 17; - iommu->page_table_map_base = 0xc0000000; - iommu->dma_addr_mask = 0xffffffff; - pci_iommu_table_init(iommu, IO_TSB_SIZE); - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } - - psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, __pa(tsbbase)); + psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_TSBBASE, + __pa(iommu->page_table)); control = psycho_read(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL); control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ); @@ -1281,7 +1247,7 @@ static void psycho_iommu_init(struct pci_controller_info *p) psycho_write(p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL, control); /* If necessary, hook us up for starfire IRQ translations. */ - if(this_is_starfire) + if (this_is_starfire) p->starfire_cookie = starfire_hookup(p->pbm_A.portid); else p->starfire_cookie = NULL; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index 0ee6bd5b9a..da8e136419 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -1267,13 +1267,9 @@ static void sabre_iommu_init(struct pci_controller_info *p, u32 dma_mask) { struct pci_iommu *iommu = p->pbm_A.iommu; - unsigned long tsbbase, i, order; + unsigned long i; u64 control; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses. */ iommu->iommu_control = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL; iommu->iommu_tsbbase = p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE; @@ -1295,26 +1291,10 @@ static void sabre_iommu_init(struct pci_controller_info *p, /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, tsbsize * 1024 * 8, dvma_offset, dma_mask); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - tsbbase = __get_free_pages(GFP_KERNEL, order = get_order(tsbsize * 1024 * 8)); - if (!tsbbase) { - prom_printf("SABRE_IOMMU: Error, gfp(tsb) failed.\n"); - prom_halt(); - } - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_map_base = dvma_offset; - iommu->dma_addr_mask = dma_mask; - pci_iommu_table_init(iommu, PAGE_SIZE << order); - - sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, __pa(tsbbase)); + sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_TSBBASE, + __pa(iommu->page_table)); control = sabre_read(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL); control &= ~(SABRE_IOMMUCTRL_TSBSZ | SABRE_IOMMUCTRL_TBWSZ); @@ -1322,11 +1302,9 @@ static void sabre_iommu_init(struct pci_controller_info *p, switch(tsbsize) { case 64: control |= SABRE_IOMMU_TSBSZ_64K; - iommu->page_table_sz_bits = 16; break; case 128: control |= SABRE_IOMMU_TSBSZ_128K; - iommu->page_table_sz_bits = 17; break; default: prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize); @@ -1334,15 +1312,6 @@ static void sabre_iommu_init(struct pci_controller_info *p, break; } sabre_write(p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL, control); - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } } static void pbm_register_toplevel_resources(struct pci_controller_info *p, diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index cae5b61fe2..d8c4e0919b 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -1765,7 +1765,7 @@ static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm) static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) { struct pci_iommu *iommu = pbm->iommu; - unsigned long tsbbase, i, tagbase, database, order; + unsigned long i, tagbase, database; u32 vdma[2], dma_mask; u64 control; int err, tsbsize; @@ -1800,10 +1800,6 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) prom_halt(); }; - /* Setup initial software IOMMU state. */ - spin_lock_init(&iommu->lock); - iommu->ctx_lowest_free = 1; - /* Register addresses, SCHIZO has iommu ctx flushing. */ iommu->iommu_control = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL; iommu->iommu_tsbbase = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE; @@ -1832,56 +1828,9 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm) /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ + pci_iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); - iommu->dummy_page = __get_free_pages(GFP_KERNEL, 0); - if (!iommu->dummy_page) { - prom_printf("PSYCHO_IOMMU: Error, gfp(dummy_page) failed.\n"); - prom_halt(); - } - memset((void *)iommu->dummy_page, 0, PAGE_SIZE); - iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); - - /* Using assumed page size 8K with 128K entries we need 1MB iommu page - * table (128K ioptes * 8 bytes per iopte). This is - * page order 7 on UltraSparc. - */ - order = get_order(tsbsize * 8 * 1024); - tsbbase = __get_free_pages(GFP_KERNEL, order); - if (!tsbbase) { - prom_printf("%s: Error, gfp(tsb) failed.\n", pbm->name); - prom_halt(); - } - - iommu->page_table = (iopte_t *)tsbbase; - iommu->page_table_map_base = vdma[0]; - iommu->dma_addr_mask = dma_mask; - pci_iommu_table_init(iommu, PAGE_SIZE << order); - - switch (tsbsize) { - case 64: - iommu->page_table_sz_bits = 16; - break; - - case 128: - iommu->page_table_sz_bits = 17; - break; - - default: - prom_printf("iommu_init: Illegal TSB size %d\n", tsbsize); - prom_halt(); - break; - }; - - /* We start with no consistent mappings. */ - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; - } - - schizo_write(iommu->iommu_tsbbase, __pa(tsbbase)); + schizo_write(iommu->iommu_tsbbase, __pa(iommu->page_table)); control = schizo_read(iommu->iommu_control); control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ); diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h index 38bbbccb40..c067407de0 100644 --- a/include/asm-sparc64/pbm.h +++ b/include/asm-sparc64/pbm.h @@ -102,7 +102,7 @@ struct pci_iommu { u32 dma_addr_mask; }; -extern void pci_iommu_table_init(struct pci_iommu *, int); +extern void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask); /* This describes a PCI bus module's streaming buffer. */ struct pci_strbuf { -- cgit v1.2.2 From 688cb30bdc3e398d97682a6a58f825821ee838c2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 13 Oct 2005 22:15:24 -0700 Subject: [SPARC64]: Eliminate PCI IOMMU dma mapping size limit. The hairy fast allocator in the sparc64 PCI IOMMU code has a hard limit of 256 pages. Certain devices can exceed this when performing very large I/Os. So replace with a more simple allocator, based largely upon the arch/ppc64/kernel/iommu.c code. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_iommu.c | 365 +++++++++++++++------------------------- include/asm-sparc64/pbm.h | 28 +-- 2 files changed, 145 insertions(+), 248 deletions(-) diff --git a/arch/sparc64/kernel/pci_iommu.c b/arch/sparc64/kernel/pci_iommu.c index cdee7d9fed..a11910be10 100644 --- a/arch/sparc64/kernel/pci_iommu.c +++ b/arch/sparc64/kernel/pci_iommu.c @@ -49,12 +49,6 @@ static void __iommu_flushall(struct pci_iommu *iommu) /* Ensure completion of previous PIO writes. */ (void) pci_iommu_read(iommu->write_complete_reg); - - /* Now update everyone's flush point. */ - for (entry = 0; entry < PBM_NCLUSTERS; entry++) { - iommu->alloc_info[entry].flush = - iommu->alloc_info[entry].next; - } } #define IOPTE_CONSISTENT(CTX) \ @@ -80,9 +74,61 @@ static void inline iopte_make_dummy(struct pci_iommu *iommu, iopte_t *iopte) iopte_val(*iopte) = val; } +/* Based largely upon the ppc64 iommu allocator. */ +static long pci_arena_alloc(struct pci_iommu *iommu, unsigned long npages) +{ + struct pci_iommu_arena *arena = &iommu->arena; + unsigned long n, i, start, end, limit; + int pass; + + limit = arena->limit; + start = arena->hint; + pass = 0; + +again: + n = find_next_zero_bit(arena->map, limit, start); + end = n + npages; + if (unlikely(end >= limit)) { + if (likely(pass < 1)) { + limit = start; + start = 0; + __iommu_flushall(iommu); + pass++; + goto again; + } else { + /* Scanned the whole thing, give up. */ + return -1; + } + } + + for (i = n; i < end; i++) { + if (test_bit(i, arena->map)) { + start = i + 1; + goto again; + } + } + + for (i = n; i < end; i++) + __set_bit(i, arena->map); + + arena->hint = end; + + return n; +} + +static void pci_arena_free(struct pci_iommu_arena *arena, unsigned long base, unsigned long npages) +{ + unsigned long i; + + for (i = base; i < (base + npages); i++) + __clear_bit(i, arena->map); +} + void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask) { - unsigned long i, tsbbase, order; + unsigned long i, tsbbase, order, sz, num_tsb_entries; + + num_tsb_entries = tsbsize / sizeof(iopte_t); /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); @@ -90,27 +136,16 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, iommu->page_table_map_base = dma_offset; iommu->dma_addr_mask = dma_addr_mask; - switch (tsbsize / (8 * 1024)) { - case 64: - iommu->page_table_sz_bits = 16; - break; - case 128: - iommu->page_table_sz_bits = 17; - break; - default: - prom_printf("PCI_IOMMU: Illegal TSB size %d\n", - tsbsize / (8 * 1024)); + /* Allocate and initialize the free area map. */ + sz = num_tsb_entries / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kmalloc(sz, GFP_KERNEL); + if (!iommu->arena.map) { + prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); prom_halt(); - break; - }; - - iommu->lowest_consistent_map = - 1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS); - - for (i = 0; i < PBM_NCLUSTERS; i++) { - iommu->alloc_info[i].flush = 0; - iommu->alloc_info[i].next = 0; } + memset(iommu->arena.map, 0, sz); + iommu->arena.limit = num_tsb_entries; /* Allocate and initialize the dummy page which we * set inactive IO PTEs to point to. @@ -132,114 +167,24 @@ void pci_iommu_table_init(struct pci_iommu *iommu, int tsbsize, u32 dma_offset, } iommu->page_table = (iopte_t *)tsbbase; - for (i = 0; i < tsbsize / sizeof(iopte_t); i++) + for (i = 0; i < num_tsb_entries; i++) iopte_make_dummy(iommu, &iommu->page_table[i]); } -static iopte_t *alloc_streaming_cluster(struct pci_iommu *iommu, unsigned long npages) +static inline iopte_t *alloc_npages(struct pci_iommu *iommu, unsigned long npages) { - iopte_t *iopte, *limit, *first; - unsigned long cnum, ent, flush_point; - - cnum = 0; - while ((1UL << cnum) < npages) - cnum++; - iopte = (iommu->page_table + - (cnum << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - - if (cnum == 0) - limit = (iommu->page_table + - iommu->lowest_consistent_map); - else - limit = (iopte + - (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); + long entry; - iopte += ((ent = iommu->alloc_info[cnum].next) << cnum); - flush_point = iommu->alloc_info[cnum].flush; - - first = iopte; - for (;;) { - if (IOPTE_IS_DUMMY(iommu, iopte)) { - if ((iopte + (1 << cnum)) >= limit) - ent = 0; - else - ent = ent + 1; - iommu->alloc_info[cnum].next = ent; - if (ent == flush_point) - __iommu_flushall(iommu); - break; - } - iopte += (1 << cnum); - ent++; - if (iopte >= limit) { - iopte = (iommu->page_table + - (cnum << - (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - ent = 0; - } - if (ent == flush_point) - __iommu_flushall(iommu); - if (iopte == first) - goto bad; - } - - /* I've got your streaming cluster right here buddy boy... */ - return iopte; + entry = pci_arena_alloc(iommu, npages); + if (unlikely(entry < 0)) + return NULL; -bad: - printk(KERN_EMERG "pci_iommu: alloc_streaming_cluster of npages(%ld) failed!\n", - npages); - return NULL; + return iommu->page_table + entry; } -static void free_streaming_cluster(struct pci_iommu *iommu, dma_addr_t base, - unsigned long npages, unsigned long ctx) +static inline void free_npages(struct pci_iommu *iommu, dma_addr_t base, unsigned long npages) { - unsigned long cnum, ent; - - cnum = 0; - while ((1UL << cnum) < npages) - cnum++; - - ent = (base << (32 - IO_PAGE_SHIFT + PBM_LOGCLUSTERS - iommu->page_table_sz_bits)) - >> (32 + PBM_LOGCLUSTERS + cnum - iommu->page_table_sz_bits); - - /* If the global flush might not have caught this entry, - * adjust the flush point such that we will flush before - * ever trying to reuse it. - */ -#define between(X,Y,Z) (((Z) - (Y)) >= ((X) - (Y))) - if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush)) - iommu->alloc_info[cnum].flush = ent; -#undef between -} - -/* We allocate consistent mappings from the end of cluster zero. */ -static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long npages) -{ - iopte_t *iopte; - - iopte = iommu->page_table + (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS)); - while (iopte > iommu->page_table) { - iopte--; - if (IOPTE_IS_DUMMY(iommu, iopte)) { - unsigned long tmp = npages; - - while (--tmp) { - iopte--; - if (!IOPTE_IS_DUMMY(iommu, iopte)) - break; - } - if (tmp == 0) { - u32 entry = (iopte - iommu->page_table); - - if (entry < iommu->lowest_consistent_map) - iommu->lowest_consistent_map = entry; - return iopte; - } - } - } - return NULL; + pci_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages); } static int iommu_alloc_ctx(struct pci_iommu *iommu) @@ -279,7 +224,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad struct pcidev_cookie *pcp; struct pci_iommu *iommu; iopte_t *iopte; - unsigned long flags, order, first_page, ctx; + unsigned long flags, order, first_page; void *ret; int npages; @@ -297,9 +242,10 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad iommu = pcp->pbm->iommu; spin_lock_irqsave(&iommu->lock, flags); - iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT); - if (iopte == NULL) { - spin_unlock_irqrestore(&iommu->lock, flags); + iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (unlikely(iopte == NULL)) { free_pages(first_page, order); return NULL; } @@ -308,31 +254,15 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); ret = (void *) first_page; npages = size >> IO_PAGE_SHIFT; - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); first_page = __pa(first_page); while (npages--) { - iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) | + iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) | IOPTE_WRITE | (first_page & IOPTE_PAGE)); iopte++; first_page += IO_PAGE_SIZE; } - { - int i; - u32 daddr = *dma_addrp; - - npages = size >> IO_PAGE_SHIFT; - for (i = 0; i < npages; i++) { - pci_iommu_write(iommu->iommu_flush, daddr); - daddr += IO_PAGE_SIZE; - } - } - - spin_unlock_irqrestore(&iommu->lock, flags); - return ret; } @@ -342,7 +272,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ struct pcidev_cookie *pcp; struct pci_iommu *iommu; iopte_t *iopte; - unsigned long flags, order, npages, i, ctx; + unsigned long flags, order, npages; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; pcp = pdev->sysdata; @@ -352,46 +282,7 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_ spin_lock_irqsave(&iommu->lock, flags); - if ((iopte - iommu->page_table) == - iommu->lowest_consistent_map) { - iopte_t *walk = iopte + npages; - iopte_t *limit; - - limit = (iommu->page_table + - (1 << (iommu->page_table_sz_bits - PBM_LOGCLUSTERS))); - while (walk < limit) { - if (!IOPTE_IS_DUMMY(iommu, walk)) - break; - walk++; - } - iommu->lowest_consistent_map = - (walk - iommu->page_table); - } - - /* Data for consistent mappings cannot enter the streaming - * buffers, so we only need to update the TSB. We flush - * the IOMMU here as well to prevent conflicts with the - * streaming mapping deferred tlb flush scheme. - */ - - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; - - for (i = 0; i < npages; i++, iopte++) - iopte_make_dummy(iommu, iopte); - - if (iommu->iommu_ctxflush) { - pci_iommu_write(iommu->iommu_ctxflush, ctx); - } else { - for (i = 0; i < npages; i++) { - u32 daddr = dvma + (i << IO_PAGE_SHIFT); - - pci_iommu_write(iommu->iommu_flush, daddr); - } - } - - iommu_free_ctx(iommu, ctx); + free_npages(iommu, dvma, npages); spin_unlock_irqrestore(&iommu->lock, flags); @@ -418,25 +309,27 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct iommu = pcp->pbm->iommu; strbuf = &pcp->pbm->stc; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) + goto bad_no_ctx; oaddr = (unsigned long)ptr; npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; spin_lock_irqsave(&iommu->lock, flags); + base = alloc_npages(iommu, npages); + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = iommu_alloc_ctx(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); - base = alloc_streaming_cluster(iommu, npages); - if (base == NULL) + if (unlikely(!base)) goto bad; + bus_addr = (iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); if (strbuf->strbuf_enabled) iopte_protection = IOPTE_STREAMING(ctx); else @@ -447,12 +340,13 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE) iopte_val(*base) = iopte_protection | base_paddr; - spin_unlock_irqrestore(&iommu->lock, flags); - return ret; bad: - spin_unlock_irqrestore(&iommu->lock, flags); + iommu_free_ctx(iommu, ctx); +bad_no_ctx: + if (printk_ratelimit()) + WARN_ON(1); return PCI_DMA_ERROR_CODE; } @@ -527,10 +421,13 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int struct pci_iommu *iommu; struct pci_strbuf *strbuf; iopte_t *base; - unsigned long flags, npages, ctx; + unsigned long flags, npages, ctx, i; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + return; + } pcp = pdev->sysdata; iommu = pcp->pbm->iommu; @@ -556,13 +453,14 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int /* Step 1: Kick data out of streaming buffers if necessary. */ if (strbuf->strbuf_enabled) - pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); + pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, + npages, direction); - /* Step 2: Clear out first TSB entry. */ - iopte_make_dummy(iommu, base); + /* Step 2: Clear out TSB entries. */ + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); - free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, - npages, ctx); + free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); iommu_free_ctx(iommu, ctx); @@ -667,6 +565,8 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int pci_map_single(pdev, (page_address(sglist->page) + sglist->offset), sglist->length, direction); + if (unlikely(sglist->dma_address == PCI_DMA_ERROR_CODE)) + return 0; sglist->dma_length = sglist->length; return 1; } @@ -675,21 +575,29 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int iommu = pcp->pbm->iommu; strbuf = &pcp->pbm->stc; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) + goto bad_no_ctx; /* Step 1: Prepare scatter list. */ npages = prepare_sg(sglist, nelems); - /* Step 2: Allocate a cluster. */ + /* Step 2: Allocate a cluster and context, if necessary. */ spin_lock_irqsave(&iommu->lock, flags); - base = alloc_streaming_cluster(iommu, npages); + base = alloc_npages(iommu, npages); + ctx = 0; + if (iommu->iommu_ctxflush) + ctx = iommu_alloc_ctx(iommu); + + spin_unlock_irqrestore(&iommu->lock, flags); + if (base == NULL) goto bad; - dma_base = iommu->page_table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT); + + dma_base = iommu->page_table_map_base + + ((base - iommu->page_table) << IO_PAGE_SHIFT); /* Step 3: Normalize DMA addresses. */ used = nelems; @@ -702,30 +610,28 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int } used = nelems - used; - /* Step 4: Choose a context if necessary. */ - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = iommu_alloc_ctx(iommu); - - /* Step 5: Create the mappings. */ + /* Step 4: Create the mappings. */ if (strbuf->strbuf_enabled) iopte_protection = IOPTE_STREAMING(ctx); else iopte_protection = IOPTE_CONSISTENT(ctx); if (direction != PCI_DMA_TODEVICE) iopte_protection |= IOPTE_WRITE; - fill_sg (base, sglist, used, nelems, iopte_protection); + + fill_sg(base, sglist, used, nelems, iopte_protection); + #ifdef VERIFY_SG verify_sglist(sglist, nelems, base, npages); #endif - spin_unlock_irqrestore(&iommu->lock, flags); - return used; bad: - spin_unlock_irqrestore(&iommu->lock, flags); - return PCI_DMA_ERROR_CODE; + iommu_free_ctx(iommu, ctx); +bad_no_ctx: + if (printk_ratelimit()) + WARN_ON(1); + return 0; } /* Unmap a set of streaming mode DMA translations. */ @@ -738,8 +644,10 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, unsigned long flags, ctx, i, npages; u32 bus_addr; - if (direction == PCI_DMA_NONE) - BUG(); + if (unlikely(direction == PCI_DMA_NONE)) { + if (printk_ratelimit()) + WARN_ON(1); + } pcp = pdev->sysdata; iommu = pcp->pbm->iommu; @@ -751,7 +659,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, if (sglist[i].dma_length == 0) break; i--; - npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> IO_PAGE_SHIFT; + npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - + bus_addr) >> IO_PAGE_SHIFT; base = iommu->page_table + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); @@ -772,11 +681,11 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, if (strbuf->strbuf_enabled) pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); - /* Step 2: Clear out first TSB entry. */ - iopte_make_dummy(iommu, base); + /* Step 2: Clear out the TSB entries. */ + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); - free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base, - npages, ctx); + free_npages(iommu, bus_addr - iommu->page_table_map_base, npages); iommu_free_ctx(iommu, ctx); diff --git a/include/asm-sparc64/pbm.h b/include/asm-sparc64/pbm.h index c067407de0..dd35a2c779 100644 --- a/include/asm-sparc64/pbm.h +++ b/include/asm-sparc64/pbm.h @@ -27,23 +27,27 @@ * PCI bus. */ -#define PBM_LOGCLUSTERS 3 -#define PBM_NCLUSTERS (1 << PBM_LOGCLUSTERS) - struct pci_controller_info; /* This contains the software state necessary to drive a PCI * controller's IOMMU. */ +struct pci_iommu_arena { + unsigned long *map; + unsigned int hint; + unsigned int limit; +}; + struct pci_iommu { /* This protects the controller's IOMMU and all * streaming buffers underneath. */ spinlock_t lock; + struct pci_iommu_arena arena; + /* IOMMU page table, a linear array of ioptes. */ iopte_t *page_table; /* The page table itself. */ - int page_table_sz_bits; /* log2 of ow many pages does it map? */ /* Base PCI memory space address where IOMMU mappings * begin. @@ -62,12 +66,6 @@ struct pci_iommu { */ unsigned long write_complete_reg; - /* The lowest used consistent mapping entry. Since - * we allocate consistent maps out of cluster 0 this - * is relative to the beginning of closter 0. - */ - u32 lowest_consistent_map; - /* In order to deal with some buggy third-party PCI bridges that * do wrong prefetching, we never mark valid mappings as invalid. * Instead we point them at this dummy page. @@ -75,16 +73,6 @@ struct pci_iommu { unsigned long dummy_page; unsigned long dummy_page_pa; - /* If PBM_NCLUSTERS is ever decreased to 4 or lower, - * or if largest supported page_table_sz * 8K goes above - * 2GB, you must increase the size of the type of - * these counters. You have been duly warned. -DaveM - */ - struct { - u16 next; - u16 flush; - } alloc_info[PBM_NCLUSTERS]; - /* CTX allocation. */ unsigned long ctx_lowest_free; unsigned long ctx_bitmap[IOMMU_NUM_CTXS / (sizeof(unsigned long) * 8)]; -- cgit v1.2.2 From 6205d158d16d2619bf30f0aff47a8e09b07106e9 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 14 Oct 2005 12:24:24 +0100 Subject: [ARM] 3009/1: S3C2410 - io.h offsets too large for LDRH/STRH Patch from Ben Dooks The __inwc/__outwc calls are capable of creating LDRH and STRH instructions with offsets over 8bits as GCC does not have a constraint for an 8bit offset. Signed-off-by: Ben Dooks Signed-off-by: Russell King --- include/asm-arm/arch-s3c2410/io.h | 58 ++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/include/asm-arm/arch-s3c2410/io.h b/include/asm-arm/arch-s3c2410/io.h index 418233a7ee..4bf272ed9a 100644 --- a/include/asm-arm/arch-s3c2410/io.h +++ b/include/asm-arm/arch-s3c2410/io.h @@ -9,7 +9,7 @@ * 06-Dec-1997 RMK Created. * 02-Sep-2003 BJD Modified for S3C2410 * 10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA - * + * 13-Oct-2005 BJD Fixed problems with LDRH/STRH offset range */ #ifndef __ASM_ARM_ARCH_IO_H @@ -97,7 +97,7 @@ DECLARE_IO(int,l,"") else \ __asm__ __volatile__( \ "strb %0, [%1, #0] @ outbc" \ - : : "r" (value), "r" ((port))); \ + : : "r" (value), "r" ((port))); \ }) #define __inbc(port) \ @@ -110,35 +110,61 @@ DECLARE_IO(int,l,"") else \ __asm__ __volatile__( \ "ldrb %0, [%1, #0] @ inbc" \ - : "=r" (result) : "r" ((port))); \ + : "=r" (result) : "r" ((port))); \ result; \ }) #define __outwc(value,port) \ ({ \ unsigned long v = value; \ - if (__PORT_PCIO((port))) \ - __asm__ __volatile__( \ - "strh %0, [%1, %2] @ outwc" \ - : : "r" (v), "r" (PCIO_BASE), "Jr" ((port))); \ - else \ + if (__PORT_PCIO((port))) { \ + if ((port) < 256 && (port) > -256) \ + __asm__ __volatile__( \ + "strh %0, [%1, %2] @ outwc" \ + : : "r" (v), "r" (PCIO_BASE), "Jr" ((port))); \ + else if ((port) > 0) \ + __asm__ __volatile__( \ + "strh %0, [%1, %2] @ outwc" \ + : : "r" (v), \ + "r" (PCIO_BASE + ((port) & ~0xff)), \ + "Jr" (((port) & 0xff))); \ + else \ + __asm__ __volatile__( \ + "strh %0, [%1, #0] @ outwc" \ + : : "r" (v), \ + "r" (PCIO_BASE + (port))); \ + } else \ __asm__ __volatile__( \ "strh %0, [%1, #0] @ outwc" \ - : : "r" (v), "r" ((port))); \ + : : "r" (v), "r" ((port))); \ }) #define __inwc(port) \ ({ \ unsigned short result; \ - if (__PORT_PCIO((port))) \ - __asm__ __volatile__( \ - "ldrh %0, [%1, %2] @ inwc" \ - : "=r" (result) : "r" (PCIO_BASE), "Jr" ((port))); \ - else \ + if (__PORT_PCIO((port))) { \ + if ((port) < 256 && (port) > -256 ) \ + __asm__ __volatile__( \ + "ldrh %0, [%1, %2] @ inwc" \ + : "=r" (result) \ + : "r" (PCIO_BASE), \ + "Jr" ((port))); \ + else if ((port) > 0) \ + __asm__ __volatile__( \ + "ldrh %0, [%1, %2] @ inwc" \ + : "=r" (result) \ + : "r" (PCIO_BASE + ((port) & ~0xff)), \ + "Jr" (((port) & 0xff))); \ + else \ + __asm__ __volatile__( \ + "ldrh %0, [%1, #0] @ inwc" \ + : "=r" (result) \ + : "r" (PCIO_BASE + ((port)))); \ + } else \ __asm__ __volatile__( \ "ldrh %0, [%1, #0] @ inwc" \ - : "=r" (result) : "r" ((port))); \ - result; \ + : "=r" (result) : "r" ((port))); \ + result; \ }) #define __outlc(value,port) \ -- cgit v1.2.2 From cb90d681ae439e525de9de519508ac9041342321 Mon Sep 17 00:00:00 2001 From: Deepak Saxena Date: Fri, 14 Oct 2005 12:49:15 +0100 Subject: [ARM] 2980/1: Fix L7200 core.c compile Patch from Deepak Saxena This patch fixes L7200 so that it builds in 2.6.latest. I do not have the hardware so don't know if it actually still works, but the changes are fairly trivial. I am not even sure if anyone still maintains, uses, or cares about this machine type. Signed-off-by: Deepak Saxena Signed-off-by: Russell King --- arch/arm/mach-l7200/core.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-l7200/core.c b/arch/arm/mach-l7200/core.c index 5fd8c9f97f..03ed742ae2 100644 --- a/arch/arm/mach-l7200/core.c +++ b/arch/arm/mach-l7200/core.c @@ -7,11 +7,17 @@ */ #include #include +#include +#include +#include +#include #include #include +#include #include +#include /* * IRQ base register @@ -47,6 +53,12 @@ static void l7200_unmask_irq(unsigned int irq) { IRQ_ENABLE = 1 << irq; } + +static struct irqchip l7200_irq_chip = { + .ack = l7200_mask_irq, + .mask = l7200_mask_irq, + .unmask = l7200_unmask_irq +}; static void __init l7200_init_irq(void) { @@ -56,11 +68,9 @@ static void __init l7200_init_irq(void) FIQ_ENABLECLEAR = 0xffffffff; /* clear all fast interrupt enables */ for (irq = 0; irq < NR_IRQS; irq++) { - irq_desc[irq].valid = 1; - irq_desc[irq].probe_ok = 1; - irq_desc[irq].mask_ack = l7200_mask_irq; - irq_desc[irq].mask = l7200_mask_irq; - irq_desc[irq].unmask = l7200_unmask_irq; + set_irq_chip(irq, &l7200_irq_chip); + set_irq_flags(irq, IRQF_VALID); + set_irq_handler(irq, do_level_IRQ); } init_FIQ(); -- cgit v1.2.2 From 04f03bf7dbd04c15c30d91c6a277f6970cc4ef14 Mon Sep 17 00:00:00 2001 From: Baris Cicek Date: Fri, 14 Oct 2005 14:32:40 +0100 Subject: [SERIAL] Add SupraExpress 336i Sp ASVD modem ID Signed-off-by: Russell King --- drivers/serial/8250_pnp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index 6b321e82ca..c2786fc41c 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -272,6 +272,8 @@ static const struct pnp_device_id pnp_dev_table[] = { { "SUP1421", 0 }, /* SupraExpress 33.6 Data/Fax PnP modem */ { "SUP1590", 0 }, + /* SupraExpress 336i Sp ASVD */ + { "SUP1620", 0 }, /* SupraExpress 33.6 Data/Fax PnP modem */ { "SUP1760", 0 }, /* Phoebe Micro */ -- cgit v1.2.2 From cb38c569e5ecf9e922e66963b6da2751b4f13d81 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 14 Oct 2005 16:07:25 +0100 Subject: [ARM] 3011/1: pxafb: Add ability to set device parent + fix spitz compile error Patch from Richard Purdie Add a function to allow machines to set the parent of the pxa framebuffer device. This means the power up/down sequence can be controlled where required by the machine. Update spitz to use the new function, fixing a compile error. Signed-off-by: Richard Purdie Signed-off-by: Russell King --- arch/arm/mach-pxa/generic.c | 5 +++++ arch/arm/mach-pxa/spitz.c | 2 +- include/asm-arm/arch-pxa/pxafb.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-pxa/generic.c b/arch/arm/mach-pxa/generic.c index d0660a8c4b..d327c127ed 100644 --- a/arch/arm/mach-pxa/generic.c +++ b/arch/arm/mach-pxa/generic.c @@ -208,6 +208,11 @@ static struct platform_device pxafb_device = { .resource = pxafb_resources, }; +void __init set_pxa_fb_parent(struct device *parent_dev) +{ + pxafb_device.dev.parent = parent_dev; +} + static struct platform_device ffuart_device = { .name = "pxa2xx-uart", .id = 0, diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 568afe3d6e..7eaeb24aae 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -328,7 +328,7 @@ static void __init common_init(void) platform_add_devices(devices, ARRAY_SIZE(devices)); pxa_set_mci_info(&spitz_mci_platform_data); - pxafb_device.dev.parent = &spitzssp_device.dev; + set_pxa_fb_parent(&spitzssp_device.dev); set_pxa_fb_info(&spitz_pxafb_info); } diff --git a/include/asm-arm/arch-pxa/pxafb.h b/include/asm-arm/arch-pxa/pxafb.h index 21c0e16dce..aba9b30f42 100644 --- a/include/asm-arm/arch-pxa/pxafb.h +++ b/include/asm-arm/arch-pxa/pxafb.h @@ -66,4 +66,5 @@ struct pxafb_mach_info { }; void set_pxa_fb_info(struct pxafb_mach_info *hard_pxa_fb_info); +void set_pxa_fb_parent(struct device *parent_dev); unsigned long pxafb_get_hsync_time(struct device *dev); -- cgit v1.2.2 From 10f92eb7c6b4e8069c2defd2ad23b74f31f8962d Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 14 Oct 2005 16:07:26 +0100 Subject: [ARM] 3012/1: Corgi/Spitz LCD: Use bus_find_device to locate pxafb - fix compile error Patch from Richard Purdie Update corgi_lcd to use bus_find_device to locate the pxafb device hence fixing a compile error. Signed-off-by: Richard Purdie Signed-off-by: Russell King --- arch/arm/mach-pxa/corgi_lcd.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-pxa/corgi_lcd.c b/arch/arm/mach-pxa/corgi_lcd.c index c02ef7c0f7..850538fade 100644 --- a/arch/arm/mach-pxa/corgi_lcd.c +++ b/arch/arm/mach-pxa/corgi_lcd.c @@ -467,6 +467,7 @@ void corgi_put_hsync(void) { if (get_hsync_time) symbol_put(w100fb_get_hsynclen); + get_hsync_time = NULL; } void corgi_wait_hsync(void) @@ -476,20 +477,37 @@ void corgi_wait_hsync(void) #endif #ifdef CONFIG_PXA_SHARP_Cxx00 +static struct device *spitz_pxafb_dev; + +static int is_pxafb_device(struct device * dev, void * data) +{ + struct platform_device *pdev = container_of(dev, struct platform_device, dev); + + return (strncmp(pdev->name, "pxa2xx-fb", 9) == 0); +} + unsigned long spitz_get_hsync_len(void) { + if (!spitz_pxafb_dev) { + spitz_pxafb_dev = bus_find_device(&platform_bus_type, NULL, NULL, is_pxafb_device); + if (!spitz_pxafb_dev) + return 0; + } if (!get_hsync_time) get_hsync_time = symbol_get(pxafb_get_hsync_time); if (!get_hsync_time) return 0; - return pxafb_get_hsync_time(&pxafb_device.dev); + return pxafb_get_hsync_time(spitz_pxafb_dev); } void spitz_put_hsync(void) { + put_device(spitz_pxafb_dev); if (get_hsync_time) symbol_put(pxafb_get_hsync_time); + spitz_pxafb_dev = NULL; + get_hsync_time = NULL; } void spitz_wait_hsync(void) -- cgit v1.2.2 From 72023b63cc512d2d7c2a31c6bc1be497eafbd834 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 14 Oct 2005 16:07:27 +0100 Subject: [ARM] 3013/1: Spitz: Fix compile errors Patch from Richard Purdie Remove a couple of lines of accidently added code causing compile errors. Signed-off-by: Richard Purdie Signed-off-by: Russell King --- arch/arm/mach-pxa/spitz.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 7eaeb24aae..d0ab428c2d 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -304,7 +303,6 @@ static struct platform_device *devices[] __initdata = { &spitzkbd_device, &spitzts_device, &spitzbl_device, - &spitzbattery_device, }; static void __init common_init(void) -- cgit v1.2.2 From 414894938b88c1ad2e9cea6502ceccefb30605c4 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 14 Oct 2005 16:07:28 +0100 Subject: [ARM] 3014/1: Spitz keyboard: Correct the right shift key Patch from Richard Purdie Correct the right shift key entry in the spitz keyboard driver. Signed-off-by: Richard Purdie Signed-off-by: Russell King --- drivers/input/keyboard/spitzkbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c index 1714045a18..344f460054 100644 --- a/drivers/input/keyboard/spitzkbd.c +++ b/drivers/input/keyboard/spitzkbd.c @@ -53,7 +53,7 @@ static unsigned char spitzkbd_keycode[NR_SCANCODES] = { KEY_LEFTCTRL, KEY_1, KEY_3, KEY_5, KEY_6, KEY_7, KEY_9, KEY_0, KEY_BACKSPACE, SPITZ_KEY_EXOK, SPITZ_KEY_EXCANCEL, 0, 0, 0, 0, 0, /* 1-16 */ 0, KEY_2, KEY_4, KEY_R, KEY_Y, KEY_8, KEY_I, KEY_O, KEY_P, SPITZ_KEY_EXJOGDOWN, SPITZ_KEY_EXJOGUP, 0, 0, 0, 0, 0, /* 17-32 */ KEY_TAB, KEY_Q, KEY_E, KEY_T, KEY_G, KEY_U, KEY_J, KEY_K, 0, 0, 0, 0, 0, 0, 0, 0, /* 33-48 */ - SPITZ_KEY_CALENDER, KEY_W, KEY_S, KEY_F, KEY_V, KEY_H, KEY_M, KEY_L, 0, 0, KEY_RIGHTSHIFT, 0, 0, 0, 0, 0, /* 49-64 */ + SPITZ_KEY_CALENDER, KEY_W, KEY_S, KEY_F, KEY_V, KEY_H, KEY_M, KEY_L, 0, KEY_RIGHTSHIFT, 0, 0, 0, 0, 0, 0, /* 49-64 */ SPITZ_KEY_ADDRESS, KEY_A, KEY_D, KEY_C, KEY_B, KEY_N, KEY_DOT, 0, KEY_ENTER, KEY_LEFTSHIFT, 0, 0, 0, 0, 0, 0, /* 65-80 */ SPITZ_KEY_MAIL, KEY_Z, KEY_X, KEY_MINUS, KEY_SPACE, KEY_COMMA, 0, KEY_UP, 0, 0, SPITZ_KEY_FN, 0, 0, 0, 0, 0, /* 81-96 */ KEY_SYSRQ, SPITZ_KEY_JAP1, SPITZ_KEY_JAP2, SPITZ_KEY_CANCEL, SPITZ_KEY_OK, SPITZ_KEY_MENU, KEY_LEFT, KEY_DOWN, KEY_RIGHT, 0, 0, 0, 0, 0, 0, 0 /* 97-112 */ -- cgit v1.2.2 From f75884d28a6eae5a422d0454b982da3842f777af Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 14 Oct 2005 13:44:32 -0700 Subject: [QLOGICPTI]: Handle INQUIRY response sniffing correctly. These days, in 2.6.x, even INQUIRY commands are sent using scatter gather lists. Bug reported by Tom 'spot' Callaway. Signed-off-by: David S. Miller --- drivers/scsi/qlogicpti.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c index a917ab7475..1fd5fc6d0f 100644 --- a/drivers/scsi/qlogicpti.c +++ b/drivers/scsi/qlogicpti.c @@ -1119,6 +1119,36 @@ static inline void update_can_queue(struct Scsi_Host *host, u_int in_ptr, u_int host->sg_tablesize = QLOGICPTI_MAX_SG(num_free); } +static unsigned int scsi_rbuf_get(struct scsi_cmnd *cmd, unsigned char **buf_out) +{ + unsigned char *buf; + unsigned int buflen; + + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; + buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + buflen = sg->length; + } else { + buf = cmd->request_buffer; + buflen = cmd->request_bufflen; + } + + *buf_out = buf; + return buflen; +} + +static void scsi_rbuf_put(struct scsi_cmnd *cmd, unsigned char *buf) +{ + if (cmd->use_sg) { + struct scatterlist *sg; + + sg = (struct scatterlist *) cmd->request_buffer; + kunmap_atomic(buf - sg->offset, KM_IRQ0); + } +} + /* * Until we scan the entire bus with inquiries, go throught this fella... */ @@ -1145,11 +1175,9 @@ static void ourdone(struct scsi_cmnd *Cmnd) int ok = host_byte(Cmnd->result) == DID_OK; if (Cmnd->cmnd[0] == 0x12 && ok) { unsigned char *iqd; + unsigned int iqd_len; - if (Cmnd->use_sg != 0) - BUG(); - - iqd = ((unsigned char *)Cmnd->buffer); + iqd_len = scsi_rbuf_get(Cmnd, &iqd); /* tags handled in midlayer */ /* enable sync mode? */ @@ -1163,6 +1191,9 @@ static void ourdone(struct scsi_cmnd *Cmnd) if (iqd[7] & 0x20) { qpti->dev_param[tgt].device_flags |= 0x20; } + + scsi_rbuf_put(Cmnd, iqd); + qpti->sbits |= (1 << tgt); } else if (!ok) { qpti->sbits |= (1 << tgt); -- cgit v1.2.2 From b4d1b825785847cddee6d104113da913f2ca8efb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 14 Oct 2005 15:26:08 -0700 Subject: [SPARC64]: Fix powering off on SMP. Doing a "SUNW,stop-self" firmware call on the other cpus is not the correct thing to do when dropping into the firmware for a halt, reboot, or power-off. For now, just do nothing to quiet the other cpus, as the system should be quiescent enough. Later we may decide to implement smp_send_stop() like the other SMP platforms do. Based upon a report from Christopher Zimmermann. Signed-off-by: David S. Miller --- arch/sparc64/kernel/smp.c | 7 ------- arch/sparc64/mm/ultra.S | 16 ---------------- arch/sparc64/prom/misc.c | 12 ------------ 3 files changed, 35 deletions(-) diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 590df5a16f..b137fd63f5 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -1001,13 +1001,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) preempt_enable(); } -extern unsigned long xcall_promstop; - -void smp_promstop_others(void) -{ - smp_cross_call(&xcall_promstop, 0, 0, 0); -} - #define prof_multiplier(__cpu) cpu_data(__cpu).multiplier #define prof_counter(__cpu) cpu_data(__cpu).counter diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 058b8126c1..e4c9151fa1 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -453,22 +453,6 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address nop nop - .globl xcall_promstop -xcall_promstop: - rdpr %pstate, %g2 - wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate - rdpr %pil, %g2 - wrpr %g0, 15, %pil - sethi %hi(109f), %g7 - b,pt %xcc, etrap_irq -109: or %g7, %lo(109b), %g7 - flushw - call prom_stopself - nop - /* We should not return, just spin if we do... */ -1: b,a,pt %xcc, 1b - nop - .data errata32_hwbug: diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c index 9b895faf07..87f5cfce23 100644 --- a/arch/sparc64/prom/misc.c +++ b/arch/sparc64/prom/misc.c @@ -68,19 +68,11 @@ void prom_cmdline(void) local_irq_restore(flags); } -#ifdef CONFIG_SMP -extern void smp_promstop_others(void); -#endif - /* Drop into the prom, but completely terminate the program. * No chance of continuing. */ void prom_halt(void) { -#ifdef CONFIG_SMP - smp_promstop_others(); - udelay(8000); -#endif again: p1275_cmd("exit", P1275_INOUT(0, 0)); goto again; /* PROM is out to get me -DaveM */ @@ -88,10 +80,6 @@ again: void prom_halt_power_off(void) { -#ifdef CONFIG_SMP - smp_promstop_others(); - udelay(8000); -#endif p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0)); /* if nothing else helps, we just halt */ -- cgit v1.2.2 From 43d2c4ca385b02ab7a604aa09a9da36f1668bee6 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Fri, 14 Oct 2005 15:59:00 -0700 Subject: [PATCH] mips: fix build error in TANBAC TB0226 arch/mips/pci/fixup-tb0226.c: In function `pcibios_map_irq': arch/mips/pci/fixup-tb0226.c:31: warning: implicit declaration of function `vr41xx_set_irq_trigger' arch/mips/pci/fixup-tb0226.c:32: error: `TRIGGER_LEVEL' undeclared (first use in this function) arch/mips/pci/fixup-tb0226.c:32: error: (Each undeclared identifier is reported only once arch/mips/pci/fixup-tb0226.c:32: error: for each function it appears in.) arch/mips/pci/fixup-tb0226.c:33: error: `SIGNAL_THROUGH' undeclared (first use in this function) arch/mips/pci/fixup-tb0226.c:34: warning: implicit declaration of function `vr41xx_set_irq_level' arch/mips/pci/fixup-tb0226.c:34: error: `LEVEL_LOW' undeclared (first use in this function) Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/pci/fixup-tb0226.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c index 61513d5d97..b5d42b12de 100644 --- a/arch/mips/pci/fixup-tb0226.c +++ b/arch/mips/pci/fixup-tb0226.c @@ -1,7 +1,7 @@ /* * fixup-tb0226.c, The TANBAC TB0226 specific PCI fixups. * - * Copyright (C) 2002-2004 Yoichi Yuasa + * Copyright (C) 2002-2005 Yoichi Yuasa * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -20,6 +20,7 @@ #include #include +#include #include int __init pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin) @@ -29,42 +30,42 @@ int __init pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin) switch (slot) { case 12: vr41xx_set_irq_trigger(GD82559_1_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); - vr41xx_set_irq_level(GD82559_1_PIN, LEVEL_LOW); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); + vr41xx_set_irq_level(GD82559_1_PIN, IRQ_LEVEL_LOW); irq = GD82559_1_IRQ; break; case 13: vr41xx_set_irq_trigger(GD82559_2_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); - vr41xx_set_irq_level(GD82559_2_PIN, LEVEL_LOW); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); + vr41xx_set_irq_level(GD82559_2_PIN, IRQ_LEVEL_LOW); irq = GD82559_2_IRQ; break; case 14: switch (pin) { case 1: vr41xx_set_irq_trigger(UPD720100_INTA_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); vr41xx_set_irq_level(UPD720100_INTA_PIN, - LEVEL_LOW); + IRQ_LEVEL_LOW); irq = UPD720100_INTA_IRQ; break; case 2: vr41xx_set_irq_trigger(UPD720100_INTB_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); vr41xx_set_irq_level(UPD720100_INTB_PIN, - LEVEL_LOW); + IRQ_LEVEL_LOW); irq = UPD720100_INTB_IRQ; break; case 3: vr41xx_set_irq_trigger(UPD720100_INTC_PIN, - TRIGGER_LEVEL, - SIGNAL_THROUGH); + IRQ_TRIGGER_LEVEL, + IRQ_SIGNAL_THROUGH); vr41xx_set_irq_level(UPD720100_INTC_PIN, - LEVEL_LOW); + IRQ_LEVEL_LOW); irq = UPD720100_INTC_IRQ; break; default: -- cgit v1.2.2 From 6edb7467be2195e7eeb6844e37668253af216100 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Fri, 14 Oct 2005 15:59:01 -0700 Subject: [PATCH] zaurus: fix compilation with cpufreq disabled This fixes compilation with CPU_FREQ disabled. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/sa1100fb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c index beeec7b514..8000890e42 100644 --- a/drivers/video/sa1100fb.c +++ b/drivers/video/sa1100fb.c @@ -592,6 +592,7 @@ sa1100fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, return ret; } +#ifdef CONFIG_CPU_FREQ /* * sa1100fb_display_dma_period() * Calculate the minimum period (in picoseconds) between two DMA @@ -606,6 +607,7 @@ static inline unsigned int sa1100fb_display_dma_period(struct fb_var_screeninfo */ return var->pixclock * 8 * 16 / var->bits_per_pixel; } +#endif /* * sa1100fb_check_var(): -- cgit v1.2.2 From 4846d0172dd7fb6a77843644caa6d9a8909225b9 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Fri, 14 Oct 2005 15:59:02 -0700 Subject: [PATCH] zaurus: fix soc_common.c This fixes wrong comments, non-working debug subsystem, and some potentially dangerous macros. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/pcmcia/soc_common.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index 888b70e6a4..9e7ccd8a43 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -66,7 +66,7 @@ void soc_pcmcia_debug(struct soc_pcmcia_socket *skt, const char *func, if (pc_debug > lvl) { printk(KERN_DEBUG "skt%u: %s: ", skt->nr, func); va_start(args, fmt); - printk(fmt, args); + vprintk(fmt, args); va_end(args); } } @@ -321,8 +321,6 @@ soc_common_pcmcia_get_socket(struct pcmcia_socket *sock, socket_state_t *state) * less punt all of this work and let the kernel handle the details * of power configuration, reset, &c. We also record the value of * `state' in order to regurgitate it to the PCMCIA core later. - * - * Returns: 0 */ static int soc_common_pcmcia_set_socket(struct pcmcia_socket *sock, socket_state_t *state) @@ -407,7 +405,7 @@ soc_common_pcmcia_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *m * the map speed as requested, but override the address ranges * supplied by Card Services. * - * Returns: 0 on success, -1 on error + * Returns: 0 on success, -ERRNO on error */ static int soc_common_pcmcia_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map *map) @@ -655,8 +653,8 @@ static void soc_pcmcia_cpufreq_unregister(void) } #else -#define soc_pcmcia_cpufreq_register() -#define soc_pcmcia_cpufreq_unregister() +static int soc_pcmcia_cpufreq_register(void) { return 0; } +static void soc_pcmcia_cpufreq_unregister(void) {} #endif int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr) @@ -738,7 +736,7 @@ int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops goto out_err_5; } - if ( list_empty(&soc_pcmcia_sockets) ) + if (list_empty(&soc_pcmcia_sockets)) soc_pcmcia_cpufreq_register(); list_add(&skt->node, &soc_pcmcia_sockets); @@ -839,7 +837,7 @@ int soc_common_drv_pcmcia_remove(struct device *dev) release_resource(&skt->res_io); release_resource(&skt->res_skt); } - if ( list_empty(&soc_pcmcia_sockets) ) + if (list_empty(&soc_pcmcia_sockets)) soc_pcmcia_cpufreq_unregister(); up(&soc_pcmcia_sockets_lock); -- cgit v1.2.2 From 65d406ace3b44e042807d3f9a2e71088818e80f2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Fri, 14 Oct 2005 15:59:03 -0700 Subject: [PATCH] zaurus: fix dependencies on collie keyboard This fixes depenencies of collie keyboard. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/input/keyboard/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 444f7756fe..571a68691a 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -93,7 +93,7 @@ config KEYBOARD_LKKBD config KEYBOARD_LOCOMO tristate "LoCoMo Keyboard Support" - depends on SHARP_LOCOMO + depends on SHARP_LOCOMO && INPUT_KEYBOARD help Say Y here if you are running Linux on a Sharp Zaurus Collie or Poodle based PDA -- cgit v1.2.2 From c6ecf7ed3131961e5aeedb0efd217afa0808798f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Oct 2005 15:59:03 -0700 Subject: [PATCH] Add missing export of getnstimeofday() Adds the missing EXPORT_SYMBOL_GPL for getnstimeofday() when CONFIG_TIME_INTERPOLATION isn't set. Needed by drivers/char/mmtimer.c Signed-off-by: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time.c b/kernel/time.c index dd5ae1162a..40c2410ac9 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -570,6 +570,7 @@ void getnstimeofday(struct timespec *tv) tv->tv_sec = x.tv_sec; tv->tv_nsec = x.tv_usec * NSEC_PER_USEC; } +EXPORT_SYMBOL_GPL(getnstimeofday); #endif #if (BITS_PER_LONG < 64) -- cgit v1.2.2 From f1ac046d7b297186f755fb213589b539426e1406 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 14 Oct 2005 15:59:04 -0700 Subject: [PATCH] radio-cadet: check request_region() return value correctly Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/radio/radio-cadet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c index 022913da8c..9b0406318f 100644 --- a/drivers/media/radio/radio-cadet.c +++ b/drivers/media/radio/radio-cadet.c @@ -543,7 +543,7 @@ static int cadet_probe(void) for(i=0;i<8;i++) { io=iovals[i]; - if(request_region(io,2, "cadet-probe")>=0) { + if (request_region(io, 2, "cadet-probe")) { cadet_setfreq(1410); if(cadet_getfreq()==1410) { release_region(io, 2); -- cgit v1.2.2 From e26148d934762b61133a64b6862f870624ff617d Mon Sep 17 00:00:00 2001 From: Tim Schmielau Date: Fri, 14 Oct 2005 15:59:05 -0700 Subject: [PATCH] Fix copy-and-paste error in BSD accounting Fix copy and paste error in jiffies_to_AHZ conversion which leads to wrong BSD accounting information on alpha and ia64 when CONFIG_BSD_PROCESS_ACCT_V3 is turned on. Also update comment to match reorganised header files. Signed-off-by: Tim Schmielau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/acct.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/acct.h b/include/linux/acct.h index 1993a36917..19f70462b3 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -162,13 +162,13 @@ typedef struct acct acct_t; #ifdef __KERNEL__ /* * Yet another set of HZ to *HZ helper functions. - * See for the original. + * See for the original. */ static inline u32 jiffies_to_AHZ(unsigned long x) { #if (TICK_NSEC % (NSEC_PER_SEC / AHZ)) == 0 - return x / (HZ / USER_HZ); + return x / (HZ / AHZ); #else u64 tmp = (u64)x * TICK_NSEC; do_div(tmp, (NSEC_PER_SEC / AHZ)); -- cgit v1.2.2 From 6593b58cfb54138781c5cd88f605e2ae663301b0 Mon Sep 17 00:00:00 2001 From: Matteo Croce <3297627799@wind.it> Date: Fri, 14 Oct 2005 15:59:06 -0700 Subject: [PATCH] wireless/airo: Build fix The aironet PCI driver has a build dependency on ISA that prevent the driver to compile on systems that doesn't support ISA, like x86_64. The driver really doesn't depend on ISA, it does some ISA stuff in the initialization code, since the driver supports both ISA and PCI cards. So the driver should depend on ISA_DMA_API to build on all systems, and this will not hurt PCI at all. Signed-off-by: Matteo Croce <3297627799@wind.it> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/wireless/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 00a07f32a8..7187958e40 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -243,7 +243,7 @@ config IPW_DEBUG config AIRO tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards" - depends on NET_RADIO && ISA && (PCI || BROKEN) + depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN) ---help--- This is the standard Linux driver to support Cisco/Aironet ISA and PCI 802.11 wireless cards. -- cgit v1.2.2 From a90933fb4e7b85587e5cbdf863deeb16695c19bd Mon Sep 17 00:00:00 2001 From: Hirokazu Takata Date: Fri, 14 Oct 2005 15:59:07 -0700 Subject: [PATCH] m32r: Fix smp.c for preempt kernel This patch fixes the following BUG message of arch/m32r/smp.c for CONFIG_DEBUG_PREEMPT: BUG: using smp_processor_id() in preemptible This message is displayed by an smp_processor_id() execution during kernel's preemptible-state. Signed-off-by: Hitoshi Yamamoto Signed-off-by: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/smp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index a4576ac7e8..8b1f6eb768 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c @@ -275,12 +275,14 @@ static void flush_tlb_all_ipi(void *info) *==========================================================================*/ void smp_flush_tlb_mm(struct mm_struct *mm) { - int cpu_id = smp_processor_id(); + int cpu_id; cpumask_t cpu_mask; - unsigned long *mmc = &mm->context[cpu_id]; + unsigned long *mmc; unsigned long flags; preempt_disable(); + cpu_id = smp_processor_id(); + mmc = &mm->context[cpu_id]; cpu_mask = mm->cpu_vm_mask; cpu_clear(cpu_id, cpu_mask); @@ -343,12 +345,14 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - int cpu_id = smp_processor_id(); + int cpu_id; cpumask_t cpu_mask; - unsigned long *mmc = &mm->context[cpu_id]; + unsigned long *mmc; unsigned long flags; preempt_disable(); + cpu_id = smp_processor_id(); + mmc = &mm->context[cpu_id]; cpu_mask = mm->cpu_vm_mask; cpu_clear(cpu_id, cpu_mask); -- cgit v1.2.2 From 1350843cf0fa46e2f633c78b335777aac3d054b2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 14 Oct 2005 15:59:09 -0700 Subject: [PATCH] ppc64: Fix G5 model in /proc/cpuinfo Andreas Schwab spotted that recent kernels broke reporting of the PowerMac machine model in /proc/cpuinfo. This fixes it. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/pmac_setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index 2575525206..fa8121d53b 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -115,7 +115,7 @@ static void __pmac pmac_show_cpuinfo(struct seq_file *m) /* find motherboard type */ seq_printf(m, "machine\t\t: "); - np = find_devices("device-tree"); + np = of_find_node_by_path("/"); if (np != NULL) { pp = (char *) get_property(np, "model", NULL); if (pp != NULL) @@ -133,6 +133,7 @@ static void __pmac pmac_show_cpuinfo(struct seq_file *m) } seq_printf(m, "\n"); } + of_node_put(np); } else seq_printf(m, "PowerMac\n"); -- cgit v1.2.2 From 0e7734d3ca24302a513e69dd24a560c34047c038 Mon Sep 17 00:00:00 2001 From: Mark Haverkamp Date: Fri, 14 Oct 2005 15:59:10 -0700 Subject: [PATCH] aacraid: host_lock not released fix While doing some testing of error cases I ran into this bug. In some cases the reset handler can exit with the host_lock still held. Signed-off-by: Mark Haverkamp Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/scsi/aacraid/linit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index de8490a928..a1f9ceef0a 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -453,9 +453,9 @@ static int aac_eh_reset(struct scsi_cmnd* cmd) /* * We can exit If all the commands are complete */ + spin_unlock_irq(host->host_lock); if (active == 0) return SUCCESS; - spin_unlock_irq(host->host_lock); ssleep(1); spin_lock_irq(host->host_lock); } -- cgit v1.2.2 From 2d1f87a728a5ddd9ee0418e14a12e5cb0372fad1 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Fri, 14 Oct 2005 15:59:11 -0700 Subject: [PATCH] Dallas's 1-wire bus compile error drivers/built-in.o: In function `w1_alloc_dev': undefined reference to `netlink_kernel_create' drivers/built-in.o: In function `w1_alloc_dev': undefined reference to `sock_release' Signed-off-by: Evgeniy Polyakov Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/w1/w1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 1b6b74c116..14016b1cd9 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -77,8 +77,7 @@ static void w1_master_release(struct device *dev) dev_dbg(dev, "%s: Releasing %s.\n", __func__, md->name); - if (md->nls && md->nls->sk_socket) - sock_release(md->nls->sk_socket); + dev_fini_netlink(md); memset(md, 0, sizeof(struct w1_master) + sizeof(struct w1_bus_master)); kfree(md); } -- cgit v1.2.2 From 63c6764ce4c650245a41a95a2235207d25ca4fde Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Fri, 14 Oct 2005 15:59:11 -0700 Subject: [PATCH] nommu build error fix "proc_smaps_operations" is not defined in case of "CONFIG_MMU=n". Signed-off-by: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b33f94020..a170450aad 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -103,7 +103,9 @@ enum pid_directory_inos { PROC_TGID_NUMA_MAPS, PROC_TGID_MOUNTS, PROC_TGID_WCHAN, +#ifdef CONFIG_MMU PROC_TGID_SMAPS, +#endif #ifdef CONFIG_SCHEDSTATS PROC_TGID_SCHEDSTAT, #endif @@ -141,7 +143,9 @@ enum pid_directory_inos { PROC_TID_NUMA_MAPS, PROC_TID_MOUNTS, PROC_TID_WCHAN, +#ifdef CONFIG_MMU PROC_TID_SMAPS, +#endif #ifdef CONFIG_SCHEDSTATS PROC_TID_SCHEDSTAT, #endif @@ -195,7 +199,9 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_MMU E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), +#endif #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -235,7 +241,9 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_MMU E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), +#endif #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -630,6 +638,7 @@ static struct file_operations proc_numa_maps_operations = { }; #endif +#ifdef CONFIG_MMU extern struct seq_operations proc_pid_smaps_op; static int smaps_open(struct inode *inode, struct file *file) { @@ -648,6 +657,7 @@ static struct file_operations proc_smaps_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif extern struct seq_operations mounts_op; static int mounts_open(struct inode *inode, struct file *file) @@ -1681,10 +1691,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; break; +#ifdef CONFIG_MMU case PROC_TID_SMAPS: case PROC_TGID_SMAPS: inode->i_fop = &proc_smaps_operations; break; +#endif #ifdef CONFIG_SECURITY case PROC_TID_ATTR: inode->i_nlink = 2; -- cgit v1.2.2 From d656901bca2e19057ca7a6e48bc56dec9ca7003e Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Fri, 14 Oct 2005 15:59:12 -0700 Subject: [PATCH] sh-sci.c sci_start_tx error Argument does not agree. Signed-off-by: Yoshinori Sato Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c index 5122663078..430754ebac 100644 --- a/drivers/serial/sh-sci.c +++ b/drivers/serial/sh-sci.c @@ -967,7 +967,7 @@ static int sci_startup(struct uart_port *port) #endif sci_request_irq(s); - sci_start_tx(port, 1); + sci_start_tx(port); sci_start_rx(port, 1); return 0; -- cgit v1.2.2 From 757e0108473787f470294ce77bf703fedddfce7d Mon Sep 17 00:00:00 2001 From: "Kolli, Neela Syam" Date: Fri, 14 Oct 2005 15:59:13 -0700 Subject: [PATCH] megaraid maintainers entry I am taking over all Megaraid SCSI drivers. Here is the patch for the MAINTENERS file. Signed-off-by: Neela Syam Kolli Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index abf7f7a17a..767fb61096 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1618,6 +1618,13 @@ M: vandrove@vc.cvut.cz L: linux-fbdev-devel@lists.sourceforge.net S: Maintained +MEGARAID SCSI DRIVERS +P: Neela Syam Kolli +M: Neela.Kolli@engenio.com +S: linux-scsi@vger.kernel.org +W: http://megaraid.lsilogic.com +S: Maintained + MEMORY TECHNOLOGY DEVICES P: David Woodhouse M: dwmw2@infradead.org -- cgit v1.2.2 From 7a3ca7d2b5ec31b2cfa594b961d77e68075e33c7 Mon Sep 17 00:00:00 2001 From: Randall Nortman Date: Fri, 14 Oct 2005 17:21:50 -0700 Subject: [PATCH] usbserial: Regression in USB generic serial driver Kernel version 2.6.13 introduced a regression in the generic USB serial converter driver (usbserial.o, drivers/usb/serial/generic.c). The bug manifests, as far as I can tell, whenever you attempt to write to the device -- the write will never complete (write() returns 0, or blocks). Signed-off-by: Randall Nortman Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/usb/serial/generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index ddde5fb13f..5f7d3193d3 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -223,7 +223,7 @@ int usb_serial_generic_write_room (struct usb_serial_port *port) dbg("%s - port %d", __FUNCTION__, port->number); if (serial->num_bulk_out) { - if (port->write_urb_busy) + if (!(port->write_urb_busy)) room = port->bulk_out_size; } -- cgit v1.2.2 From e6850cce8f0fcb0e16b981f13cb9c69618bbdaf1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 15 Oct 2005 16:15:38 -0700 Subject: [NETFILTER]: Fix ip6_table.c build with NETFILTER_DEBUG enabled. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv6/netfilter/ip6_tables.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index b03e90649e..21deec25a1 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -975,7 +975,6 @@ replace_table(struct ip6t_table *table, struct ip6t_entry *table_base; unsigned int i; - for (i = 0; i < num_possible_cpus(); i++) { for_each_cpu(i) { table_base = (void *)newinfo->entries -- cgit v1.2.2 From 688ce17b8599abc548b406c00e4d18ae0dec954f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Oct 2005 00:17:33 -0700 Subject: [PATCH]: highest_possible_processor_id() has to be a macro ... otherwise, things like alpha and sparc64 break and break badly. They define cpu_possible_map to something else in smp.h *AFTER* having included cpumask.h. If that puppy is a macro, expansion will happen at the actual caller, when we'd already seen #define cpu_possible_map ... and we will get the right thing used. As an inline helper it will be tokenized before we get to that define and that's it; no matter what we define later, it won't affect anything. We get modules with dependency on cpu_possible_map instead of the right symbol (phys_cpu_present_map in case of sparc64), or outright link errors if they are built-in. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/cpumask.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe9778301d..9bdba8169b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -393,15 +393,13 @@ extern cpumask_t cpu_present_map; #define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) /* Find the highest possible smp_processor_id() */ -static inline unsigned int highest_possible_processor_id(void) -{ - unsigned int cpu, highest = 0; - - for_each_cpu_mask(cpu, cpu_possible_map) - highest = cpu; - - return highest; -} +#define highest_possible_processor_id() \ +({ \ + unsigned int cpu, highest = 0; \ + for_each_cpu_mask(cpu, cpu_possible_map) \ + highest = cpu; \ + highest; \ +}) #endif /* __LINUX_CPUMASK_H */ -- cgit v1.2.2 From 3d80636a0d5f056ffc26472d05b6027a7a9f6e1c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Oct 2005 17:36:06 -0700 Subject: Fix memory ordering bug in page reclaim As noticed by Nick Piggin, we need to make sure that we check the page count before we check for PageDirty, since the dirty check is only valid if the count implies that we're the only possible ones holding the page. We always did do this, but the code needs a read-memory-barrier to make sure that the orderign is also honored by the CPU. (The writer side is ordered due to the atomic decrement and test on the page count, see the discussion on linux-kernel) Signed-off-by: Linus Torvalds --- mm/vmscan.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 0ea71e887b..64f9570cff 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -511,10 +511,11 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) * PageDirty _after_ making sure that the page is freeable and * not in use by anybody. (pagecache + us == 2) */ - if (page_count(page) != 2 || PageDirty(page)) { - write_unlock_irq(&mapping->tree_lock); - goto keep_locked; - } + if (unlikely(page_count(page) != 2)) + goto cannot_free; + smp_rmb(); + if (unlikely(PageDirty(page))) + goto cannot_free; #ifdef CONFIG_SWAP if (PageSwapCache(page)) { @@ -538,6 +539,10 @@ free_it: __pagevec_release_nonlru(&freed_pvec); continue; +cannot_free: + write_unlock_irq(&mapping->tree_lock); + goto keep_locked; + activate_locked: SetPageActive(page); pgactivate++; -- cgit v1.2.2 From b24d18aa743dad0c42918157c5d717686269d3a8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 16 Oct 2005 20:29:20 -0700 Subject: [PATCH] list: add missing rcu_dereference on first element It seems that all the list_*_rcu primitives are missing a memory barrier on the very first dereference. For example, #define list_for_each_rcu(pos, head) \ for (pos = (head)->next; prefetch(pos->next), pos != (head); \ pos = rcu_dereference(pos->next)) It will go something like: pos = (head)->next prefetch(pos->next) pos != (head) do stuff We're missing a barrier here. pos = rcu_dereference(pos->next) fetch pos->next barrier given by rcu_dereference(pos->next) store pos Without the missing barrier, the pos->next value may turn out to be stale. In fact, if "do stuff" were also dereferencing pos and relying on list_for_each_rcu to provide the barrier then it may also break. So here is a patch to make sure that we have a barrier for the first element in the list. Signed-off-by: Herbert Xu Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/include/linux/list.h b/include/linux/list.h index e6ec596822..084971f333 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -442,12 +442,14 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_rcu(pos, head) \ - for (pos = (head)->next; prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) #define __list_for_each_rcu(pos, head) \ - for (pos = (head)->next; pos != (head); \ - pos = rcu_dereference(pos->next)) + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) /** * list_for_each_safe_rcu - iterate over an rcu-protected list safe @@ -461,8 +463,9 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_safe_rcu(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = rcu_dereference(n), n = pos->next) + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -474,11 +477,11 @@ static inline void list_splice_init(struct list_head *list, * the _rcu list-mutation primitives such as list_add_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = rcu_dereference(list_entry(pos->member.next, \ - typeof(*pos), member))) +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) /** @@ -492,8 +495,9 @@ static inline void list_splice_init(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = (pos)->next; prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference((pos)->next)) + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) /* * Double linked lists with a single pointer list head. @@ -696,8 +700,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, pos = n) #define hlist_for_each_rcu(pos, head) \ - for ((pos) = (head)->first; pos && ({ prefetch((pos)->next); 1; }); \ - (pos) = rcu_dereference((pos)->next)) + for ((pos) = (head)->first; \ + rcu_dereference((pos)) && ({ prefetch((pos)->next); 1; }); \ + (pos) = (pos)->next) /** * hlist_for_each_entry - iterate over list of given type @@ -762,9 +767,9 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ for (pos = (head)->first; \ - pos && ({ prefetch(pos->next); 1;}) && \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1;}) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = rcu_dereference(pos->next)) + pos = pos->next) #else #warning "don't include kernel headers in userspace" -- cgit v1.2.2 From 0aec4867dca149e2049e8439b76bd82ad9dac52c Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 16 Oct 2005 20:29:22 -0700 Subject: [PATCH] SVGATextMode fix Fix bug 5441. I didn't know about messy programs like svgatextmode... Couldn't this be integrated in some linux/drivers/video/console/svgacon.c ?... So because of the existence of the svgatextmode program, the kernel is not supposed to touch to CRT_OVERFLOW/SYNC_END/DISP/DISP_END/OFFSET ? Disabling the check in vgacon_resize() might help indeed, but I'm really not sure whether it will work for any chipset: in my patch, CRT registers are set at each console switch, since stty rows/cols apply to consoles separately... The attached solution is to keep the test, but if it fails, we assume that the caller knows what it does (i.e. it is svgatextmode) and then disable any further call to vgacon_doresize. Svgatextmode is usually used to _expand_ the display, not to shrink it. And it is harmless in the case of a too big stty rows/cols: the display will just be cropped. I tested it on my laptop, and it works fine with svgatextmode. A better solution would be that svgatextmode explicitely tells the kernel not to care about video timing, but for this an interface needs be defined and svgatextmode be patched. Signed-off-by: Samuel Thibault Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/console/vgacon.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 6ef6f7760e..809fee2140 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -565,7 +565,11 @@ static int vgacon_switch(struct vc_data *c) scr_memcpyw((u16 *) c->vc_origin, (u16 *) c->vc_screenbuf, c->vc_screenbuf_size > vga_vram_size ? vga_vram_size : c->vc_screenbuf_size); - vgacon_doresize(c, c->vc_cols, c->vc_rows); + if (!(vga_video_num_columns % 2) && + vga_video_num_columns <= ORIG_VIDEO_COLS && + vga_video_num_lines <= (ORIG_VIDEO_LINES * + vga_default_font_height) / c->vc_font.height) + vgacon_doresize(c, c->vc_cols, c->vc_rows); } return 0; /* Redrawing not needed */ @@ -1023,7 +1027,8 @@ static int vgacon_resize(struct vc_data *c, unsigned int width, if (width % 2 || width > ORIG_VIDEO_COLS || height > (ORIG_VIDEO_LINES * vga_default_font_height)/ c->vc_font.height) - return -EINVAL; + /* let svgatextmode tinker with video timings */ + return 0; if (CON_IS_VISIBLE(c) && !vga_is_gfx) /* who knows */ vgacon_doresize(c, width, height); -- cgit v1.2.2 From 9b3acc21d7677787ef456d17574a084a1c1193ae Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sun, 16 Oct 2005 20:29:24 -0700 Subject: [PATCH] fix vpx3220 offset issue in SECAM Fix bug #5404 in kernel bugzilla. It basically updates the vpx3220 initialization tables with some newer values that we've had in CVS for a while (and that, for some reason, never ended up in the kernel... must've gotten lost). Those fix a ~16 pixels noise at the top of the picture in at least SECAM, although (now that I think about it) PAL was probably affected, also. Signed-off-by: Ronald S. Bultje Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/video/vpx3220.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/media/video/vpx3220.c b/drivers/media/video/vpx3220.c index 4437bdebe2..2bafca45c0 100644 --- a/drivers/media/video/vpx3220.c +++ b/drivers/media/video/vpx3220.c @@ -203,7 +203,7 @@ static const unsigned short init_ntsc[] = { 0x8c, 640, /* Horizontal length */ 0x8d, 640, /* Number of pixels */ 0x8f, 0xc00, /* Disable window 2 */ - 0xf0, 0x173, /* 13.5 MHz transport, Forced + 0xf0, 0x73, /* 13.5 MHz transport, Forced * mode, latch windows */ 0xf2, 0x13, /* NTSC M, composite input */ 0xe7, 0x1e1, /* Enable vertical standard @@ -212,38 +212,36 @@ static const unsigned short init_ntsc[] = { static const unsigned short init_pal[] = { 0x88, 23, /* Window 1 vertical begin */ - 0x89, 288 + 16, /* Vertical lines in (16 lines + 0x89, 288, /* Vertical lines in (16 lines * skipped by the VFE) */ - 0x8a, 288 + 16, /* Vertical lines out (16 lines + 0x8a, 288, /* Vertical lines out (16 lines * skipped by the VFE) */ 0x8b, 16, /* Horizontal begin */ 0x8c, 768, /* Horizontal length */ 0x8d, 784, /* Number of pixels * Must be >= Horizontal begin + Horizontal length */ 0x8f, 0xc00, /* Disable window 2 */ - 0xf0, 0x177, /* 13.5 MHz transport, Forced + 0xf0, 0x77, /* 13.5 MHz transport, Forced * mode, latch windows */ 0xf2, 0x3d1, /* PAL B,G,H,I, composite input */ - 0xe7, 0x261, /* PAL/SECAM set to 288 + 16 lines - * change to 0x241 for 288 lines */ + 0xe7, 0x241, /* PAL/SECAM set to 288 lines */ }; static const unsigned short init_secam[] = { - 0x88, 23 - 16, /* Window 1 vertical begin */ - 0x89, 288 + 16, /* Vertical lines in (16 lines + 0x88, 23, /* Window 1 vertical begin */ + 0x89, 288, /* Vertical lines in (16 lines * skipped by the VFE) */ - 0x8a, 288 + 16, /* Vertical lines out (16 lines + 0x8a, 288, /* Vertical lines out (16 lines * skipped by the VFE) */ 0x8b, 16, /* Horizontal begin */ 0x8c, 768, /* Horizontal length */ 0x8d, 784, /* Number of pixels * Must be >= Horizontal begin + Horizontal length */ 0x8f, 0xc00, /* Disable window 2 */ - 0xf0, 0x177, /* 13.5 MHz transport, Forced + 0xf0, 0x77, /* 13.5 MHz transport, Forced * mode, latch windows */ 0xf2, 0x3d5, /* SECAM, composite input */ - 0xe7, 0x261, /* PAL/SECAM set to 288 + 16 lines - * change to 0x241 for 288 lines */ + 0xe7, 0x241, /* PAL/SECAM set to 288 lines */ }; static const unsigned char init_common[] = { -- cgit v1.2.2 From de21eb63add932c61e018d20a760dcaed8c3e40c Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sun, 16 Oct 2005 20:29:25 -0700 Subject: [PATCH] fix black/white-only svideo input in vpx3220 decoder Fix the fact that the svideo input will only give input in black/white in some circumstances. Reason is that in the PCI controller driver (zr36067), after setting input, we reset norm, which overwrites the input register with the default. This patch makes it always set the correct value for the input when changing norm. Signed-off-by: Ronald S. Bultje Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/video/vpx3220.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/media/video/vpx3220.c b/drivers/media/video/vpx3220.c index 2bafca45c0..137b58f2c6 100644 --- a/drivers/media/video/vpx3220.c +++ b/drivers/media/video/vpx3220.c @@ -408,6 +408,12 @@ vpx3220_command (struct i2c_client *client, case DECODER_SET_NORM: { int *iarg = arg, data; + int temp_input; + + /* Here we back up the input selection because it gets + overwritten when we fill the registers with the + choosen video norm */ + temp_input = vpx3220_fp_read(client, 0xf2); dprintk(1, KERN_DEBUG "%s: DECODER_SET_NORM %d\n", I2C_NAME(client), *iarg); @@ -447,6 +453,10 @@ vpx3220_command (struct i2c_client *client, } decoder->norm = *iarg; + + /* And here we set the backed up video input again */ + vpx3220_fp_write(client, 0xf2, temp_input | 0x0010); + udelay(10); } break; -- cgit v1.2.2 From 2cc78eb52bc1ae89f0a4fa5a00eb998dffde4a9f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 17 Oct 2005 09:10:15 -0700 Subject: Increase default RCU batching sharply Dipankar made RCU limit the batch size to improve latency, but that approach is unworkable: it can cause the RCU queues to grow without bounds, since the batch limiter ended up limiting the callbacks. So make the limit much higher, and start planning on instead limiting the batch size by doing RCU callbacks more often if the queue looks like it might be growing too long. Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index bef3b6901b..dd99415b15 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -71,7 +71,7 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; /* Fake initialization required by compiler */ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; -static int maxbatch = 10; +static int maxbatch = 10000; #ifndef __HAVE_ARCH_CMPXCHG /* -- cgit v1.2.2 From e9b765decfb49ddc105d303d491e1bee9769436f Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Mon, 17 Oct 2005 14:30:43 -0700 Subject: [PATCH] isp116x-hcd: fix handling of short transfers Increased use of scatter-gather by usb-storage driver after 2.6.13 has exposed a buggy codepath in isp116x-hcd, which was probably never visited before: bug happened only for those urbs, for which URB_SHORT_NOT_OK was set AND short transfer occurred. The fix attached was tested in 2 ways: (a) it fixed failing initialization of a flash drive with an embedded hub; (b) the fix was tested with 'usbtest' against a modified g_zero driver (on top of net2280), which generated short bulk IN transfers of various lengths including multiples and non-multiples of max_packet_length. Signed-off-by: Olav Kongas Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/usb/host/isp116x-hcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index 41bbae83fc..e142056b0d 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -326,7 +326,8 @@ static void postproc_atl_queue(struct isp116x *isp116x) usb_settoggle(udev, ep->epnum, ep->nextpid == USB_PID_OUT, - PTD_GET_TOGGLE(ptd) ^ 1); + PTD_GET_TOGGLE(ptd)); + urb->actual_length += PTD_GET_COUNT(ptd); urb->status = cc_to_error[TD_DATAUNDERRUN]; spin_unlock(&urb->lock); continue; -- cgit v1.2.2 From 13b58ee51802a45d2b8853ffe0003d9fa768195c Mon Sep 17 00:00:00 2001 From: Christian Krause Date: Mon, 17 Oct 2005 14:30:48 -0700 Subject: [PATCH] USB: fix bug in handling of highspeed usb HID devices During the development of an USB device I found a bug in the handling of Highspeed HID devices in the kernel. What happened? Highspeed HID devices are correctly recognized and enumerated by the kernel. But even if usbhid kernel module is loaded, no HID reports are received by the kernel. The output of the hardware USB analyzer told me that the host doesn't even poll for interrupt IN transfers (even the "interrupt in" USB transfer are polled by the host). After some debugging in hid-core.c I've found the reason. In case of a highspeed device, the endpoint interval is re-calculated in driver/usb/input/hid-core.c: line 1669: /* handle potential highspeed HID correctly */ interval = endpoint->bInterval; if (dev->speed == USB_SPEED_HIGH) interval = 1 << (interval - 1); Basically this calculation is correct (refer to USB 2.0 spec, 9.6.6). This new calculated value of "interval" is used as input for usb_fill_int_urb: line 1685: usb_fill_int_urb(hid->urbin, dev, pipe, hid->inbuf, 0, hid_irq_in, hid, interval); Unfortunately the same calculation as above is done a second time in usb_fill_int_urb in the file include/linux/usb.h: line 933: if (dev->speed == USB_SPEED_HIGH) urb->interval = 1 << (interval - 1); else urb->interval = interval; This means, that if the endpoint descriptor (of a high speed device) specifies e.g. bInterval = 7, the urb->interval gets the value: hid-core.c: interval = 1 << (7-1) = 0x40 = 64 urb->interval = 1 << (interval -1) = 1 << (63) = integer overflow Because of this the value of urb->interval is sometimes negative and is rejected in core/urb.c: line 353: /* too small? */ if (urb->interval <= 0) return -EINVAL; The conclusion is, that the recalculaton of the interval (which is necessary for highspeed) should not be made twice, because this is simply wrong. ;-) Re-calculation in usb_fill_int_urb makes more sense, because it is the most general approach. So it would make sense to remove it from hid-core.c. Because in hid-core.c the interval variable is only used for calling usb_fill_int_urb, it is no problem to remove the highspeed re-calculation in this file. Signed-off-by: Christian Krause Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/usb/input/hid-core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c index a99865c689..41f92b9247 100644 --- a/drivers/usb/input/hid-core.c +++ b/drivers/usb/input/hid-core.c @@ -1702,10 +1702,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf) if ((endpoint->bmAttributes & 3) != 3) /* Not an interrupt endpoint */ continue; - /* handle potential highspeed HID correctly */ interval = endpoint->bInterval; - if (dev->speed == USB_SPEED_HIGH) - interval = 1 << (interval - 1); /* Change the polling interval of mice. */ if (hid->collection->usage == HID_GD_MOUSE && hid_mousepoll_interval > 0) -- cgit v1.2.2 From b3c52da33ce95747b1bff86cce716d4f1397f14a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2005 06:02:00 -0400 Subject: [PATCH] NFS: Fix cache consistency races If the data cache has been marked as potentially invalid by nfs_refresh_inode, we should invalidate it rather than assume that changes are due to our own activity. Also ensure that we always start with a valid cache before declaring it to be protected by a delegation. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/delegation.c | 4 ++++ fs/nfs/file.c | 3 ++- fs/nfs/inode.c | 7 ++----- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d7f7eb669d..4a36839f0b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -85,6 +85,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct struct nfs_delegation *delegation; int status = 0; + /* Ensure we first revalidate the attributes and page cache! */ + if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR))) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + delegation = nfs_alloc_delegation(); if (delegation == NULL) return -ENOMEM; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f6b9eda925..6bdcfa95de 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -137,7 +137,8 @@ static int nfs_revalidate_file(struct inode *inode, struct file *filp) struct nfs_inode *nfsi = NFS_I(inode); int retval = 0; - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)) + || nfs_attribute_timeout(inode)) retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); nfs_revalidate_mapping(inode, filp->f_mapping); return 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6922469d6f..6be46d21c0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1226,10 +1226,6 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) loff_t cur_size, new_isize; int data_unstable; - /* Do we hold a delegation? */ - if (nfs_have_delegation(inode, FMODE_READ)) - return 0; - spin_lock(&inode->i_lock); /* Are we in the process of updating data on the server? */ @@ -1350,7 +1346,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign nfsi->read_cache_jiffies = fattr->timestamp; /* Are we racing with known updates of the metadata on the server? */ - data_unstable = ! nfs_verify_change_attribute(inode, verifier); + data_unstable = ! (nfs_verify_change_attribute(inode, verifier) || + (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)); /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); -- cgit v1.2.2 From 6ce969171d5187f7621be68c0ebbc7fb02ec53f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2005 06:03:23 -0400 Subject: [PATCH] NFS: Fix Oopsable/unnecessary i_count manipulations in nfs_wait_on_inode() Oopsable since nfs_wait_on_inode() can get called as part of iput_final(). Unnecessary since the caller had better be damned sure that the inode won't disappear from underneath it anyway. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6be46d21c0..d4eadeea12 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -877,12 +877,10 @@ static int nfs_wait_on_inode(struct inode *inode) sigset_t oldmask; int error; - atomic_inc(&inode->i_count); rpc_clnt_sigmask(clnt, &oldmask); error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, nfs_wait_schedule, TASK_INTERRUPTIBLE); rpc_clnt_sigunmask(clnt, &oldmask); - iput(inode); return error; } -- cgit v1.2.2 From 47d6b08334a43fafa61a587f721fa21ef65d81be Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Oct 2005 18:49:42 +0400 Subject: [PATCH] posix-timers: fix task accounting Make sure we release the task struct properly when releasing pending timers. release_task() does write_lock_irq(&tasklist_lock), so it can't race with run_posix_cpu_timers() on any cpu. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index ad85d3f0dc..7a51a5597c 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -424,6 +424,7 @@ static void cleanup_timers(struct list_head *head, cputime_t ptime = cputime_add(utime, stime); list_for_each_entry_safe(timer, next, head, entry) { + put_task_struct(timer->task); timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, ptime)) { @@ -436,6 +437,7 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { + put_task_struct(timer->task); timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, utime)) { @@ -448,6 +450,7 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { + put_task_struct(timer->task); timer->task = NULL; list_del_init(&timer->entry); if (timer->expires.sched < sched_time) { -- cgit v1.2.2 From cc675230a9ca17010694bc8bd3c69ca9adf2efef Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Mon, 17 Oct 2005 13:01:57 -0400 Subject: [PATCH] Fix and clean up quirk_intel_ide_combined() configuration This change makes quirk_intel_ide_combined() dependent on the precise conditions under which it is needed: * IDE is built in * IDE SATA option is not set * ata_piix or ahci drivers are enabled This fixes an issue where some modular configurations would not cause the quirk to be enabled. Signed-off-by: Jeff Garzik Signed-off-by: Linus torvalds --- drivers/pci/quirks.c | 4 ++-- drivers/scsi/Kconfig | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 11ca44387c..a6a630a950 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1233,7 +1233,7 @@ static void __init quirk_alder_ioapic(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EESSC, quirk_alder_ioapic ); #endif -#ifdef CONFIG_SCSI_SATA +#ifdef CONFIG_SCSI_SATA_INTEL_COMBINED static void __devinit quirk_intel_ide_combined(struct pci_dev *pdev) { u8 prog, comb, tmp; @@ -1310,7 +1310,7 @@ static void __devinit quirk_intel_ide_combined(struct pci_dev *pdev) request_region(0x170, 8, "libata"); /* port 1 */ } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_intel_ide_combined ); -#endif /* CONFIG_SCSI_SATA */ +#endif /* CONFIG_SCSI_SATA_INTEL_COMBINED */ int pcie_mch_quirk; diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 20019b82b4..3ee9b8b33b 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -553,6 +553,11 @@ config SCSI_SATA_VITESSE If unsure, say N. +config SCSI_SATA_INTEL_COMBINED + bool + depends on IDE=y && !BLK_DEV_IDE_SATA && (SCSI_SATA_AHCI || SCSI_ATA_PIIX) + default y + config SCSI_BUSLOGIC tristate "BusLogic SCSI support" depends on (PCI || ISA || MCA) && SCSI && ISA_DMA_API -- cgit v1.2.2 From 5ee832dbc6770135ec8d63296af0a4374557bb79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Oct 2005 20:01:21 +0200 Subject: [PATCH] rcu: keep rcu callback event counter This makes call_rcu() keep track of how many events there are on the RCU list, and cause a reschedule event when the list gets too long. This helps keep RCU event lists down. Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 1 + kernel/rcupdate.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4e65eb44ad..70191a5a14 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -94,6 +94,7 @@ struct rcu_data { long batch; /* Batch # for current RCU batch */ struct rcu_head *nxtlist; struct rcu_head **nxttail; + long count; /* # of queued items */ struct rcu_head *curlist; struct rcu_head **curtail; struct rcu_head *donelist; diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index dd99415b15..2559d4b8f2 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -109,6 +109,10 @@ void fastcall call_rcu(struct rcu_head *head, rdp = &__get_cpu_var(rcu_data); *rdp->nxttail = head; rdp->nxttail = &head->next; + + if (unlikely(++rdp->count > 10000)) + set_need_resched(); + local_irq_restore(flags); } @@ -140,6 +144,12 @@ void fastcall call_rcu_bh(struct rcu_head *head, rdp = &__get_cpu_var(rcu_bh_data); *rdp->nxttail = head; rdp->nxttail = &head->next; + rdp->count++; +/* + * Should we directly call rcu_do_batch() here ? + * if (unlikely(rdp->count > 10000)) + * rcu_do_batch(rdp); + */ local_irq_restore(flags); } @@ -157,6 +167,7 @@ static void rcu_do_batch(struct rcu_data *rdp) next = rdp->donelist = list->next; list->func(list); list = next; + rdp->count--; if (++count >= maxbatch) break; } -- cgit v1.2.2 From b65574fec5db1211bce7fc8bec7a2b32486e0670 Mon Sep 17 00:00:00 2001 From: David McCullough Date: Mon, 17 Oct 2005 16:43:29 -0700 Subject: [PATCH] output of /proc/maps on nommu systems is incomplete Currently you do not get all the map entries on nommu systems because the start function doesn't index into the list using the value of "pos". Signed-off-by: David McCullough Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/nommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index f3bf016d5e..cff10ab1af 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -91,6 +91,7 @@ static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) next = _rb; break; } + pos--; } return next; -- cgit v1.2.2 From 9ac0b9c1927228a38a71216536176af8811a435a Mon Sep 17 00:00:00 2001 From: Stephan Brodkorb Date: Mon, 17 Oct 2005 16:43:30 -0700 Subject: [PATCH] n_r3964 mod_timer() fix Since Revision 1.10 was released the n_r3964 module wasn't able to receive any data. The reason for that behavior is because there were some wrong calls of mod_timer(...) in the function receive_char (...). This patch should fix this problem and was successfully tested with talking to some kuka industrial robots. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/n_r3964.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 97d6dc24b8..853c98cee6 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -695,7 +695,7 @@ static void receive_char(struct r3964_info *pInfo, const unsigned char c) { TRACE_PE("IDLE - got STX but no space in rx_queue!"); pInfo->state=R3964_WAIT_FOR_RX_BUF; - mod_timer(&pInfo->tmr, R3964_TO_NO_BUF); + mod_timer(&pInfo->tmr, jiffies + R3964_TO_NO_BUF); break; } start_receiving: @@ -705,7 +705,7 @@ start_receiving: pInfo->last_rx = 0; pInfo->flags &= ~R3964_ERROR; pInfo->state=R3964_RECEIVING; - mod_timer(&pInfo->tmr, R3964_TO_ZVZ); + mod_timer(&pInfo->tmr, jiffies + R3964_TO_ZVZ); pInfo->nRetry = 0; put_char(pInfo, DLE); flush(pInfo); @@ -732,7 +732,7 @@ start_receiving: if(pInfo->flags & R3964_BCC) { pInfo->state = R3964_WAIT_FOR_BCC; - mod_timer(&pInfo->tmr, R3964_TO_ZVZ); + mod_timer(&pInfo->tmr, jiffies + R3964_TO_ZVZ); } else { @@ -744,7 +744,7 @@ start_receiving: pInfo->last_rx = c; char_to_buf: pInfo->rx_buf[pInfo->rx_position++] = c; - mod_timer(&pInfo->tmr, R3964_TO_ZVZ); + mod_timer(&pInfo->tmr, jiffies + R3964_TO_ZVZ); } } /* else: overflow-msg? BUF_SIZE>MTU; should not happen? */ -- cgit v1.2.2 From 5cc9eeef9a9567acdfc2f6943f24381bf460f008 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 17 Oct 2005 16:43:31 -0700 Subject: [PATCH] Fix /proc/acpi/events around suspend Fix -EIO on /proc/acpi/events after suspends. This actually breaks suspending by power button in many setups. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/event.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index bfa8b76de4..2dbb1b0f11 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -58,9 +58,8 @@ acpi_system_read_event(struct file *file, char __user * buffer, size_t count, return_VALUE(-EAGAIN); result = acpi_bus_receive_event(&event); - if (result) { - return_VALUE(-EIO); - } + if (result) + return_VALUE(result); chars_remaining = sprintf(str, "%s %s %08x %08x\n", event.device_class ? event. -- cgit v1.2.2 From e7507ed91e093b9e4e218e41ebfdce05458258fc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 17 Oct 2005 16:43:32 -0700 Subject: [PATCH] uniput - fix crash on SMP Only signal completion after marking request slot as free, otherwise other processor can free request structure before we finish using it. Signed-off-by: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/input/misc/uinput.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index d5c5b32045..4015a91f4b 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -90,11 +90,11 @@ static inline int uinput_request_reserve_slot(struct uinput_device *udev, struct static void uinput_request_done(struct uinput_device *udev, struct uinput_request *request) { - complete(&request->done); - /* Mark slot as available */ udev->requests[request->id] = NULL; wake_up_interruptible(&udev->requests_waitq); + + complete(&request->done); } static int uinput_request_submit(struct input_dev *dev, struct uinput_request *request) -- cgit v1.2.2 From 4faa5285283fad081443e3612ca426a311bb6c7e Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2005 16:43:33 -0700 Subject: [PATCH] aio: revert lock_kiocb() lock_kiocb() was introduced to serialize retrying and cancellation. In the process of doing so it tried to sleep waiting for KIF_LOCKED while holding the ctx_lock spinlock. Recent fixes have ensured that multiple concurrent retries won't be attempted for a given iocb. Cancel has other problems and has no significant in-tree users that have been complaining about it. So for the immediate future we'll revert sleeping with the lock held and will address proper cancellation and retry serialization in the future. Signed-off-by: Zach Brown Acked-by: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 26 +------------------------- include/linux/aio.h | 7 ++++++- 2 files changed, 7 insertions(+), 26 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index d6b1551342..9fe7216457 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -398,7 +398,7 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx) if (unlikely(!req)) return NULL; - req->ki_flags = 1 << KIF_LOCKED; + req->ki_flags = 0; req->ki_users = 2; req->ki_key = 0; req->ki_ctx = ctx; @@ -547,25 +547,6 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id) return ioctx; } -static int lock_kiocb_action(void *param) -{ - schedule(); - return 0; -} - -static inline void lock_kiocb(struct kiocb *iocb) -{ - wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action, - TASK_UNINTERRUPTIBLE); -} - -static inline void unlock_kiocb(struct kiocb *iocb) -{ - kiocbClearLocked(iocb); - smp_mb__after_clear_bit(); - wake_up_bit(&iocb->ki_flags, KIF_LOCKED); -} - /* * use_mm * Makes the calling kernel thread take on the specified @@ -796,9 +777,7 @@ static int __aio_run_iocbs(struct kioctx *ctx) * Hold an extra reference while retrying i/o. */ iocb->ki_users++; /* grab extra reference */ - lock_kiocb(iocb); aio_run_iocb(iocb); - unlock_kiocb(iocb); if (__aio_put_req(ctx, iocb)) /* drop extra ref */ put_ioctx(ctx); } @@ -1542,7 +1521,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, spin_lock_irq(&ctx->ctx_lock); aio_run_iocb(req); - unlock_kiocb(req); if (!list_empty(&ctx->run_list)) { /* drain the run list */ while (__aio_run_iocbs(ctx)) @@ -1674,7 +1652,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (NULL != cancel) { struct io_event tmp; pr_debug("calling cancel\n"); - lock_kiocb(kiocb); memset(&tmp, 0, sizeof(tmp)); tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user; tmp.data = kiocb->ki_user_data; @@ -1686,7 +1663,6 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, if (copy_to_user(result, &tmp, sizeof(tmp))) ret = -EFAULT; } - unlock_kiocb(kiocb); } else ret = -EINVAL; diff --git a/include/linux/aio.h b/include/linux/aio.h index 60def658b2..0decf66117 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -24,7 +24,12 @@ struct kioctx; #define KIOCB_SYNC_KEY (~0U) /* ki_flags bits */ -#define KIF_LOCKED 0 +/* + * This may be used for cancel/retry serialization in the future, but + * for now it's unused and we probably don't want modules to even + * think they can use it. + */ +/* #define KIF_LOCKED 0 */ #define KIF_KICKED 1 #define KIF_CANCELLED 2 -- cgit v1.2.2 From 39ca371c45b04cd50d0974030ae051906fc516b6 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Mon, 17 Oct 2005 16:43:34 -0700 Subject: [PATCH] kbuild: Eliminate build error when KALLSYMS not defined The following build error happens with 2.6.14-rc4 when CONFIG_KALLSYMS is not defined. The error message in a fragment of the output was: CC arch/i386/lib/usercopy.o AR arch/i386/lib/lib.a /bin/sh: line 1: +@: command not found make[3]: warning: jobserver unavailable: using -j1. Add `+' to parent make rule. CHK include/linux/compile.h Signed-off-by: Mark Rustad Signed-off-by: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 504ba3ceb2..be33d758c0 100644 --- a/Makefile +++ b/Makefile @@ -660,8 +660,10 @@ quiet_cmd_sysmap = SYSMAP # Link of vmlinux # If CONFIG_KALLSYMS is set .version is already updated # Generate System.map and verify that the content is consistent - +# Use + in front of the vmlinux_version rule to silent warning with make -j2 +# First command is ':' to allow us to use + in front of the rule define rule_vmlinux__ + : $(if $(CONFIG_KALLSYMS),,+$(call cmd,vmlinux_version)) $(call cmd,vmlinux__) -- cgit v1.2.2 From ea635a517e350eb03ab5f01618417f31b82a9a4d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 6 Oct 2005 23:12:58 -0400 Subject: SUNRPC: Retry rpcbind requests if the server's portmapper isn't up After a server crash/reboot, rebinding should always retry, otherwise requests on "hard" mounts will fail when they shouldn't. Test plan: Run a lock-intensive workload against a server while rebooting the server repeatedly. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a8f01d726..a5f7029b1d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -759,7 +759,8 @@ call_bind_status(struct rpc_task *task) case -EACCES: dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", task->tk_pid); - break; + rpc_delay(task, 3*HZ); + goto retry_bind; case -ETIMEDOUT: dprintk("RPC: %4d rpcbind request timed out\n", task->tk_pid); -- cgit v1.2.2 From 5e5ce5be6f0161d2a069a4f8a1154fe639c5c02f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:11 -0700 Subject: RPC: allow call_encode() to delay transmission of an RPC call. Currently, call_encode will cause the entire RPC call to abort if it returns an error. This is unnecessarily rigid, and gets in the way of attempts to allow the NFSv4 layer to order RPC calls that carry sequence ids. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 23 ++++++++++++----------- net/sunrpc/xprt.c | 8 ++++++++ 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 99cad3ead8..068e1fb086 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -211,6 +211,7 @@ int xprt_reserve_xprt(struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); +void xprt_abort_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a5f7029b1d..5342740563 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -678,13 +678,11 @@ call_allocate(struct rpc_task *task) static void call_encode(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; struct rpc_rqst *req = task->tk_rqstp; struct xdr_buf *sndbuf = &req->rq_snd_buf; struct xdr_buf *rcvbuf = &req->rq_rcv_buf; unsigned int bufsiz; kxdrproc_t encode; - int status; u32 *p; dprintk("RPC: %4d call_encode (status %d)\n", @@ -712,12 +710,9 @@ call_encode(struct rpc_task *task) rpc_exit(task, -EIO); return; } - if (encode && (status = rpcauth_wrap_req(task, encode, req, p, - task->tk_msg.rpc_argp)) < 0) { - printk(KERN_WARNING "%s: can't encode arguments: %d\n", - clnt->cl_protname, -status); - rpc_exit(task, status); - } + if (encode != NULL) + task->tk_status = rpcauth_wrap_req(task, encode, req, p, + task->tk_msg.rpc_argp); } /* @@ -865,10 +860,12 @@ call_transmit(struct rpc_task *task) if (task->tk_status != 0) return; /* Encode here so that rpcsec_gss can use correct sequence number. */ - if (!task->tk_rqstp->rq_bytes_sent) + if (task->tk_rqstp->rq_bytes_sent == 0) { call_encode(task); - if (task->tk_status < 0) - return; + /* Did the encode result in an error condition? */ + if (task->tk_status != 0) + goto out_nosend; + } xprt_transmit(task); if (task->tk_status < 0) return; @@ -876,6 +873,10 @@ call_transmit(struct rpc_task *task) task->tk_action = NULL; rpc_wake_up_task(task); } + return; +out_nosend: + /* release socket write lock before attempting to handle error */ + xprt_abort_transmit(task); } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 215be0d0ef..1ba55dc38b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -709,6 +709,14 @@ out_unlock: return err; } +void +xprt_abort_transmit(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + xprt_release_write(xprt, task); +} + /** * xprt_transmit - send an RPC request on a transport * @task: controlling RPC task -- cgit v1.2.2 From cee54fc944422c44e476736c045a9e8053cb0644 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:12 -0700 Subject: NFSv4: Add functions to order RPC calls NFSv4 file state-changing functions such as OPEN, CLOSE, LOCK,... are all labelled with "sequence identifiers" in order to prevent the server from reordering RPC requests, as this could cause its file state to become out of sync with the client. Currently the NFS client code enforces this ordering locally using semaphores to restrict access to structures until the RPC call is done. This, of course, only works with synchronous RPC calls, since the user process must first grab the semaphore. By dropping semaphores, and instead teaching the RPC engine to hold the RPC calls until they are ready to be sent, we can extend this process to work nicely with asynchronous RPC calls too. This patch adds a new list called "rpc_sequence" that defines the order of the RPC calls to be sent. We add one such list for each state_owner. When an RPC call is ready to be sent, it checks if it is top of the rpc_sequence list. If so, it proceeds. If not, it goes back to sleep, and loops until it hits top of the list. Once the RPC call has completed, it can then bump the sequence id counter, and remove itself from the rpc_sequence list, and then wake up the next sleeper. Note that the state_owner sequence ids and lock_owner sequence ids are all indexed to the same rpc_sequence list, so OPEN, LOCK,... requests are all ordered w.r.t. each other. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 42 ++++++++++++-- fs/nfs/nfs4proc.c | 111 +++++++++++++++++++++++------------- fs/nfs/nfs4state.c | 145 +++++++++++++++++++++++++++++++++++++++--------- fs/nfs/nfs4xdr.c | 43 +++++++++++--- include/linux/nfs_xdr.h | 15 ++--- 5 files changed, 273 insertions(+), 83 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ec1a22d7b8..6ac6708484 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -92,6 +92,35 @@ struct nfs4_client { unsigned char cl_id_uniquifier; }; +/* + * struct rpc_sequence ensures that RPC calls are sent in the exact + * order that they appear on the list. + */ +struct rpc_sequence { + struct rpc_wait_queue wait; /* RPC call delay queue */ + spinlock_t lock; /* Protects the list */ + struct list_head list; /* Defines sequence of RPC calls */ +}; + +#define NFS_SEQID_CONFIRMED 1 +struct nfs_seqid_counter { + struct rpc_sequence *sequence; + int flags; + u32 counter; +}; + +struct nfs_seqid { + struct list_head list; + struct nfs_seqid_counter *sequence; + struct rpc_task *task; +}; + +static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) +{ + if (seqid_mutating_err(-status)) + seqid->flags |= NFS_SEQID_CONFIRMED; +} + /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only @@ -106,12 +135,13 @@ struct nfs4_state_owner { struct nfs4_client *so_client; u32 so_id; /* 32-bit identifier, unique */ struct semaphore so_sema; - u32 so_seqid; /* protected by so_sema */ atomic_t so_count; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; struct list_head so_delegations; + struct nfs_seqid_counter so_seqid; + struct rpc_sequence so_sequence; }; /* @@ -132,7 +162,7 @@ struct nfs4_lock_state { fl_owner_t ls_owner; /* POSIX lock owner */ #define NFS_LOCK_INITIALIZED 1 int ls_flags; - u32 ls_seqid; + struct nfs_seqid_counter ls_seqid; u32 ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; @@ -224,12 +254,16 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); -extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern void nfs4_schedule_state_recovery(struct nfs4_client *); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); +extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); +extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); +extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); +extern void nfs_free_seqid(struct nfs_seqid *seqid); + extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9701ca8c94..9ba89e7cdd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -218,7 +218,6 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st struct nfs_delegation *delegation = NFS_I(inode)->delegation; struct nfs_openargs o_arg = { .fh = NFS_FH(inode), - .seqid = sp->so_seqid, .id = sp->so_id, .open_flags = state->state, .clientid = server->nfs4_state->cl_clientid, @@ -245,8 +244,13 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st } o_arg.u.delegation_type = delegation->type; } + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (o_arg.seqid == NULL) + return -ENOMEM; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + /* Confirm the sequence as being established */ + nfs_confirm_seqid(&sp->so_seqid, status); + nfs_increment_open_seqid(status, o_arg.seqid); if (status == 0) { memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); if (o_res.delegation_type != 0) { @@ -256,6 +260,7 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st nfs_async_inode_return_delegation(inode, &o_res.stateid); } } + nfs_free_seqid(o_arg.seqid); clear_bit(NFS_DELEGATED_STATE, &state->flags); /* Ensure we update the inode attributes */ NFS_CACHEINV(inode); @@ -307,16 +312,20 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state goto out; if (state->state == 0) goto out; - arg.seqid = sp->so_seqid; + arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + status = -ENOMEM; + if (arg.seqid == NULL) + goto out; arg.open_flags = state->state; memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + nfs_increment_open_seqid(status, arg.seqid); if (status >= 0) { memcpy(state->stateid.data, res.stateid.data, sizeof(state->stateid.data)); clear_bit(NFS_DELEGATED_STATE, &state->flags); } + nfs_free_seqid(arg.seqid); out: up(&sp->so_sema); dput(parent); @@ -345,11 +354,11 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) return err; } -static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) { struct nfs_open_confirmargs arg = { .fh = fh, - .seqid = sp->so_seqid, + .seqid = seqid, .stateid = *stateid, }; struct nfs_open_confirmres res; @@ -362,7 +371,9 @@ static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nf int status; status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + /* Confirm the sequence as being established */ + nfs_confirm_seqid(&sp->so_seqid, status); + nfs_increment_open_seqid(status, seqid); if (status >= 0) memcpy(stateid, &res.stateid, sizeof(*stateid)); return status; @@ -380,21 +391,21 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, stru int status; /* Update sequence id. The caller must serialize! */ - o_arg->seqid = sp->so_seqid; o_arg->id = sp->so_id; o_arg->clientid = sp->so_client->cl_clientid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); + nfs_increment_open_seqid(status, o_arg->seqid); if (status != 0) goto out; update_changeattr(dir, &o_res->cinfo); if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(server->client, &o_res->fh, - sp, &o_res->stateid); + sp, &o_res->stateid, o_arg->seqid); if (status != 0) goto out; } + nfs_confirm_seqid(&sp->so_seqid, 0); if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); out: @@ -465,6 +476,10 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st set_bit(NFS_DELEGATED_STATE, &state->flags); goto out; } + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + status = -ENOMEM; + if (o_arg.seqid == NULL) + goto out; status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_nodeleg; @@ -490,6 +505,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); } out_nodeleg: + nfs_free_seqid(o_arg.seqid); clear_bit(NFS_DELEGATED_STATE, &state->flags); out: dput(parent); @@ -667,6 +683,9 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st /* Serialization for the sequence id */ down(&sp->so_sema); + o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (o_arg.seqid == NULL) + return -ENOMEM; status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); if (status != 0) goto out_err; @@ -681,6 +700,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st update_open_stateid(state, &o_res.stateid, flags); if (o_res.delegation_type != 0) nfs_inode_set_delegation(inode, cred, &o_res); + nfs_free_seqid(o_arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); @@ -690,6 +710,7 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); + nfs_free_seqid(o_arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); } @@ -718,7 +739,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * It is actually a sign of a bug on the client or on the server. * * If we receive a BAD_SEQID error in the particular case of - * doing an OPEN, we assume that nfs4_increment_seqid() will + * doing an OPEN, we assume that nfs_increment_open_seqid() will * have unhashed the old state_owner for us, and that we can * therefore safely retry using a new one. We should still warn * the user though... @@ -799,7 +820,7 @@ static void nfs4_close_done(struct rpc_task *task) /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ - nfs4_increment_seqid(task->tk_status, sp); + nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: memcpy(&state->stateid, &calldata->res.stateid, @@ -818,6 +839,7 @@ static void nfs4_close_done(struct rpc_task *task) } state->state = calldata->arg.open_flags; nfs4_put_open_state(state); + nfs_free_seqid(calldata->arg.seqid); up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&server->nfs4_state->cl_sem); @@ -865,7 +887,11 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) calldata->state = state; calldata->arg.fh = NFS_FH(inode); /* Serialization for the sequence id */ - calldata->arg.seqid = state->owner->so_seqid; + calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); + if (calldata->arg.seqid == NULL) { + kfree(calldata); + return -ENOMEM; + } calldata->arg.open_flags = mode; memcpy(&calldata->arg.stateid, &state->stateid, sizeof(calldata->arg.stateid)); @@ -2729,15 +2755,19 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock /* We might have lost the locks! */ if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) goto out; - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); + status = -ENOMEM; + if (luargs.seqid == NULL) + goto out; + memcpy(luargs.stateid.data, lsp->ls_stateid.data, sizeof(luargs.stateid.data)); arg.u.locku = &luargs; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); + nfs_increment_lock_seqid(status, luargs.seqid); if (status == 0) - memcpy(&lsp->ls_stateid, &res.u.stateid, - sizeof(lsp->ls_stateid)); + memcpy(lsp->ls_stateid.data, res.u.stateid.data, + sizeof(lsp->ls_stateid.data)); + nfs_free_seqid(luargs.seqid); out: up(&state->lock_sema); if (status == 0) @@ -2783,9 +2813,13 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r .reclaim = reclaim, .new_lock_owner = 0, }; - int status; + struct nfs_seqid *lock_seqid; + int status = -ENOMEM; - if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { + lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (lock_seqid == NULL) + return -ENOMEM; + if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { .lock_owner = { @@ -2793,39 +2827,40 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r }, }; - otl.lock_seqid = lsp->ls_seqid; + otl.lock_seqid = lock_seqid; otl.lock_owner.id = lsp->ls_id; memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); largs.u.open_lock = &otl; largs.new_lock_owner = 1; arg.u.lock = &largs; down(&owner->so_sema); - otl.open_seqid = owner->so_seqid; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment open_owner seqid on success, and - * seqid mutating errors */ - nfs4_increment_seqid(status, owner); - up(&owner->so_sema); - if (status == 0) { - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - lsp->ls_seqid++; + otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid); + if (otl.open_seqid != NULL) { + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment seqid on success, and seqid mutating errors */ + nfs_increment_open_seqid(status, otl.open_seqid); + nfs_free_seqid(otl.open_seqid); } + up(&owner->so_sema); + if (status == 0) + nfs_confirm_seqid(&lsp->ls_seqid, 0); } else { - struct nfs_exist_lock el = { - .seqid = lsp->ls_seqid, - }; + struct nfs_exist_lock el; memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); largs.u.exist_lock = ⪙ arg.u.lock = &largs; + el.seqid = lock_seqid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment seqid on success, and * seqid mutating errors*/ - nfs4_increment_lock_seqid(status, lsp); } + /* increment seqid on success, and seqid mutating errors*/ + nfs_increment_lock_seqid(status, lock_seqid); /* save the returned stateid. */ - if (status == 0) - memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - else if (status == -NFS4ERR_DENIED) + if (status == 0) { + memcpy(lsp->ls_stateid.data, res.u.stateid.data, sizeof(lsp->ls_stateid.data)); + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; + nfs_free_seqid(lock_seqid); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index afe587d82f..f535c219cf 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -264,13 +264,16 @@ nfs4_alloc_state_owner(void) { struct nfs4_state_owner *sp; - sp = kmalloc(sizeof(*sp),GFP_KERNEL); + sp = kzalloc(sizeof(*sp),GFP_KERNEL); if (!sp) return NULL; init_MUTEX(&sp->so_sema); - sp->so_seqid = 0; /* arbitrary */ INIT_LIST_HEAD(&sp->so_states); INIT_LIST_HEAD(&sp->so_delegations); + rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); + sp->so_seqid.sequence = &sp->so_sequence; + spin_lock_init(&sp->so_sequence.lock); + INIT_LIST_HEAD(&sp->so_sequence.list); atomic_set(&sp->so_count, 1); return sp; } @@ -553,12 +556,10 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f struct nfs4_lock_state *lsp; struct nfs4_client *clp = state->owner->so_client; - lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); if (lsp == NULL) return NULL; - lsp->ls_flags = 0; - lsp->ls_seqid = 0; /* arbitrary */ - memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + lsp->ls_seqid.sequence = &state->owner->so_sequence; atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; spin_lock(&clp->cl_lock); @@ -673,29 +674,102 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f nfs4_put_lock_state(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) +{ + struct rpc_sequence *sequence = counter->sequence; + struct nfs_seqid *new; + + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (new != NULL) { + new->sequence = counter; + new->task = NULL; + spin_lock(&sequence->lock); + list_add_tail(&new->list, &sequence->list); + spin_unlock(&sequence->lock); + } + return new; +} + +void nfs_free_seqid(struct nfs_seqid *seqid) { - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; + struct rpc_sequence *sequence = seqid->sequence->sequence; + struct rpc_task *next = NULL; + + spin_lock(&sequence->lock); + list_del(&seqid->list); + if (!list_empty(&sequence->list)) { + next = list_entry(sequence->list.next, struct nfs_seqid, list)->task; + if (next) + rpc_wake_up_task(next); + } + spin_unlock(&sequence->lock); + kfree(seqid); } /* -* Called with sp->so_sema and clp->cl_sem held. -* -* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or -* failed with a seqid incrementing error - -* see comments nfs_fs.h:seqid_mutating_error() -*/ -void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) -{ - if (status == NFS_OK || seqid_mutating_err(-status)) - sp->so_seqid++; - /* If the server returns BAD_SEQID, unhash state_owner here */ - if (status == -NFS4ERR_BAD_SEQID) + * Called with sp->so_sema and clp->cl_sem held. + * + * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid) +{ + switch (status) { + case 0: + break; + case -NFS4ERR_BAD_SEQID: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_BADXDR: + case -NFS4ERR_RESOURCE: + case -NFS4ERR_NOFILEHANDLE: + /* Non-seqid mutating errors */ + return; + }; + /* + * Note: no locking needed as we are guaranteed to be first + * on the sequence list + */ + seqid->sequence->counter++; +} + +void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) +{ + if (status == -NFS4ERR_BAD_SEQID) { + struct nfs4_state_owner *sp = container_of(seqid->sequence, + struct nfs4_state_owner, so_seqid); nfs4_drop_state_owner(sp); + } + return nfs_increment_seqid(status, seqid); +} + +/* + * Called with ls->lock_sema and clp->cl_sem held. + * + * Increment the seqid if the LOCK/LOCKU succeeded, or + * failed with a seqid incrementing error - + * see comments nfs_fs.h:seqid_mutating_error() + */ +void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) +{ + return nfs_increment_seqid(status, seqid); +} + +int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) +{ + struct rpc_sequence *sequence = seqid->sequence->sequence; + int status = 0; + + spin_lock(&sequence->lock); + if (sequence->list.next != &seqid->list) { + seqid->task = task; + rpc_sleep_on(&sequence->wait, task, NULL, NULL); + status = -EAGAIN; + } + spin_unlock(&sequence->lock); + return status; } static int reclaimer(void *); @@ -791,8 +865,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n if (state->state == 0) continue; status = ops->recover_open(sp, state); - list_for_each_entry(lock, &state->lock_states, ls_locks) - lock->ls_flags &= ~NFS_LOCK_INITIALIZED; if (status >= 0) { status = nfs4_reclaim_locks(ops, state); if (status < 0) @@ -831,6 +903,26 @@ out_err: return status; } +static void nfs4_state_mark_reclaim(struct nfs4_client *clp) +{ + struct nfs4_state_owner *sp; + struct nfs4_state *state; + struct nfs4_lock_state *lock; + + /* Reset all sequence ids to zero */ + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + sp->so_seqid.counter = 0; + sp->so_seqid.flags = 0; + list_for_each_entry(state, &sp->so_states, open_states) { + list_for_each_entry(lock, &state->lock_states, ls_locks) { + lock->ls_seqid.counter = 0; + lock->ls_seqid.flags = 0; + lock->ls_flags &= ~NFS_LOCK_INITIALIZED; + } + } + } +} + static int reclaimer(void *ptr) { struct reclaimer_args *args = (struct reclaimer_args *)ptr; @@ -864,6 +956,7 @@ restart_loop: default: ops = &nfs4_network_partition_recovery_ops; }; + nfs4_state_mark_reclaim(clp); status = __nfs4_init_client(clp); if (status) goto out_error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6c564ef948..fcd28a29a2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -604,7 +604,7 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_CLOSE); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); return 0; @@ -732,9 +732,9 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) struct nfs_open_to_lock *ol = opargs->u.open_lock; RESERVE_SPACE(40); - WRITE32(ol->open_seqid); + WRITE32(ol->open_seqid->sequence->counter); WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); - WRITE32(ol->lock_seqid); + WRITE32(ol->lock_seqid->sequence->counter); WRITE64(ol->lock_owner.clientid); WRITE32(4); WRITE32(ol->lock_owner.id); @@ -744,7 +744,7 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) RESERVE_SPACE(20); WRITEMEM(&el->stateid, sizeof(el->stateid)); - WRITE32(el->seqid); + WRITE32(el->seqid->sequence->counter); } return 0; @@ -775,7 +775,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) RESERVE_SPACE(44); WRITE32(OP_LOCKU); WRITE32(arg->type); - WRITE32(opargs->seqid); + WRITE32(opargs->seqid->sequence->counter); WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); WRITE64(arg->offset); WRITE64(arg->length); @@ -826,7 +826,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena */ RESERVE_SPACE(8); WRITE32(OP_OPEN); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); RESERVE_SPACE(16); WRITE64(arg->clientid); @@ -941,7 +941,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_OPEN_CONFIRM); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); return 0; } @@ -953,7 +953,7 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_OPEN_DOWNGRADE); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); - WRITE32(arg->seqid); + WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); return 0; } @@ -1416,6 +1416,9 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1437,6 +1440,9 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1464,6 +1470,9 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1485,6 +1494,9 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1506,6 +1518,9 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct }; int status; + status = nfs_wait_on_sequence(args->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1525,8 +1540,17 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka struct compound_hdr hdr = { .nops = 2, }; + struct nfs_lock_opargs *opargs = args->u.lock; + struct nfs_seqid *seqid; int status; + if (opargs->new_lock_owner) + seqid = opargs->u.open_lock->lock_seqid; + else + seqid = opargs->u.exist_lock->seqid; + status = nfs_wait_on_sequence(seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1569,6 +1593,9 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lock }; int status; + status = nfs_wait_on_sequence(args->u.locku->seqid, req->rq_task); + if (status != 0) + goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index a2bf6914ff..d578912bf9 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -96,12 +96,13 @@ struct nfs4_change_info { u64 after; }; +struct nfs_seqid; /* * Arguments to the open call. */ struct nfs_openargs { const struct nfs_fh * fh; - __u32 seqid; + struct nfs_seqid * seqid; int open_flags; __u64 clientid; __u32 id; @@ -136,7 +137,7 @@ struct nfs_openres { struct nfs_open_confirmargs { const struct nfs_fh * fh; nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; }; struct nfs_open_confirmres { @@ -149,7 +150,7 @@ struct nfs_open_confirmres { struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; int open_flags; }; @@ -165,15 +166,15 @@ struct nfs_lowner { }; struct nfs_open_to_lock { - __u32 open_seqid; + struct nfs_seqid * open_seqid; nfs4_stateid open_stateid; - __u32 lock_seqid; + struct nfs_seqid * lock_seqid; struct nfs_lowner lock_owner; }; struct nfs_exist_lock { nfs4_stateid stateid; - __u32 seqid; + struct nfs_seqid * seqid; }; struct nfs_lock_opargs { @@ -186,7 +187,7 @@ struct nfs_lock_opargs { }; struct nfs_locku_opargs { - __u32 seqid; + struct nfs_seqid * seqid; nfs4_stateid stateid; }; -- cgit v1.2.2 From 9512135df14f8293b9bc5e8fb22d4279dee5ff66 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:12 -0700 Subject: NFSv4: Fix a potential CLOSE race Once the state_owner and lock_owner semaphores get removed, it will be possible for other OPEN requests to reopen the same file if they have lower sequence ids than our CLOSE call. This patch ensures that we recheck the file state once nfs_wait_on_sequence() has completed waiting. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 110 +++++++++++++++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 6 ++- fs/nfs/nfs4xdr.c | 14 ++---- include/linux/nfs_xdr.h | 2 +- 4 files changed, 87 insertions(+), 45 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9ba89e7cdd..5154ddf6d9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -189,6 +189,21 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf nfsi->change_attr = cinfo->after; } +/* Helper for asynchronous RPC calls */ +static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin, + rpc_action tk_exit, void *calldata) +{ + struct rpc_task *task; + + if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC))) + return -ENOMEM; + + task->tk_calldata = calldata; + task->tk_action = tk_begin; + rpc_execute(task); + return 0; +} + static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) { struct inode *inode = state->inode; @@ -810,11 +825,24 @@ struct nfs4_closedata { struct nfs_closeres res; }; +static void nfs4_free_closedata(struct nfs4_closedata *calldata) +{ + struct nfs4_state *state = calldata->state; + struct nfs4_state_owner *sp = state->owner; + struct nfs_server *server = NFS_SERVER(calldata->inode); + + nfs4_put_open_state(calldata->state); + nfs_free_seqid(calldata->arg.seqid); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&server->nfs4_state->cl_sem); + kfree(calldata); +} + static void nfs4_close_done(struct rpc_task *task) { struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; struct nfs4_state *state = calldata->state; - struct nfs4_state_owner *sp = state->owner; struct nfs_server *server = NFS_SERVER(calldata->inode); /* hmm. we are done with the inode, and in the process of freeing @@ -838,25 +866,46 @@ static void nfs4_close_done(struct rpc_task *task) } } state->state = calldata->arg.open_flags; - nfs4_put_open_state(state); - nfs_free_seqid(calldata->arg.seqid); - up(&sp->so_sema); - nfs4_put_state_owner(sp); - up_read(&server->nfs4_state->cl_sem); - kfree(calldata); + nfs4_free_closedata(calldata); } -static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata) +static void nfs4_close_begin(struct rpc_task *task) { + struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata; + struct nfs4_state *state = calldata->state; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &calldata->arg, .rpc_resp = &calldata->res, - .rpc_cred = calldata->state->owner->so_cred, + .rpc_cred = state->owner->so_cred, }; - if (calldata->arg.open_flags != 0) + int mode = 0; + int status; + + status = nfs_wait_on_sequence(calldata->arg.seqid, task); + if (status != 0) + return; + /* Don't reorder reads */ + smp_rmb(); + /* Recalculate the new open mode in case someone reopened the file + * while we were waiting in line to be scheduled. + */ + if (state->nreaders != 0) + mode |= FMODE_READ; + if (state->nwriters != 0) + mode |= FMODE_WRITE; + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + state->state = mode; + if (mode == state->state) { + nfs4_free_closedata(calldata); + task->tk_exit = NULL; + rpc_exit(task, 0); + return; + } + if (mode != 0) msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata); + calldata->arg.open_flags = mode; + rpc_call_setup(task, &msg, 0); } /* @@ -873,35 +922,30 @@ static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata * int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) { struct nfs4_closedata *calldata; - int status; + int status = -ENOMEM; - /* Tell caller we're done */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { - state->state = mode; - return 0; - } - calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL); + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) - return -ENOMEM; + goto out; calldata->inode = inode; calldata->state = state; calldata->arg.fh = NFS_FH(inode); + calldata->arg.stateid = &state->stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); - if (calldata->arg.seqid == NULL) { - kfree(calldata); - return -ENOMEM; - } - calldata->arg.open_flags = mode; - memcpy(&calldata->arg.stateid, &state->stateid, - sizeof(calldata->arg.stateid)); - status = nfs4_close_call(NFS_SERVER(inode)->client, calldata); - /* - * Return -EINPROGRESS on success in order to indicate to the - * caller that an asynchronous RPC call has been launched, and - * that it will release the semaphores on completion. - */ - return (status == 0) ? -EINPROGRESS : status; + if (calldata->arg.seqid == NULL) + goto out_free_calldata; + + status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_close_begin, + nfs4_close_done, calldata); + if (status == 0) + goto out; + + nfs_free_seqid(calldata->arg.seqid); +out_free_calldata: + kfree(calldata); +out: + return status; } struct inode * diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f535c219cf..59c93f37e1 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -518,7 +518,11 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) newstate |= FMODE_WRITE; if (state->state == newstate) goto out; - if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS) + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { + state->state = newstate; + goto out; + } + if (nfs4_do_close(inode, state, newstate) == 0) return; } out: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fcd28a29a2..934ec50ea6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -602,10 +602,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid->data)); WRITE32(OP_CLOSE); WRITE32(arg->seqid->sequence->counter); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); return 0; } @@ -950,9 +950,9 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid->data)); WRITE32(OP_OPEN_DOWNGRADE); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->open_flags); return 0; @@ -1416,9 +1416,6 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -1518,9 +1515,6 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct }; int status; - status = nfs_wait_on_sequence(args->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d578912bf9..cac0df950c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -149,7 +149,7 @@ struct nfs_open_confirmres { */ struct nfs_closeargs { struct nfs_fh * fh; - nfs4_stateid stateid; + nfs4_stateid * stateid; struct nfs_seqid * seqid; int open_flags; }; -- cgit v1.2.2 From e6dfa553cffcb9740f932311dff42f81d6ac63bb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:13 -0700 Subject: NFSv4: Remove obsolete state_owner and lock_owner semaphores OPEN, CLOSE, etc no longer need these semaphores to ensure ordering of requests. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 6 ------ fs/nfs/nfs4proc.c | 18 ------------------ fs/nfs/nfs4state.c | 15 ++++----------- 3 files changed, 4 insertions(+), 35 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 6ac6708484..d4fcb5d0ce 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -125,16 +125,11 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only * semantics by allowing the server to identify replayed requests. - * - * The ->so_sema is held during all state_owner seqid-mutating operations: - * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize - * so_seqid. */ struct nfs4_state_owner { struct list_head so_list; /* per-clientid list of state_owners */ struct nfs4_client *so_client; u32 so_id; /* 32-bit identifier, unique */ - struct semaphore so_sema; atomic_t so_count; struct rpc_cred *so_cred; /* Associated cred */ @@ -183,7 +178,6 @@ struct nfs4_state { struct inode *inode; /* Pointer to the inode */ unsigned long flags; /* Do we hold any locks? */ - struct semaphore lock_sema; /* Serializes file locking operations */ spinlock_t state_lock; /* Protects the lock_states list */ nfs4_stateid stateid; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5154ddf6d9..25bab6032d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -224,7 +224,6 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, /* * OPEN_RECLAIM: * reclaim state on the server after a reboot. - * Assumes caller is holding the sp->so_sem */ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) { @@ -322,7 +321,6 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state }; int status = 0; - down(&sp->so_sema); if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) goto out; if (state->state == 0) @@ -342,7 +340,6 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state } nfs_free_seqid(arg.seqid); out: - up(&sp->so_sema); dput(parent); return status; } @@ -595,7 +592,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); goto out_err; } - down(&sp->so_sema); state = nfs4_get_open_state(inode, sp); if (state == NULL) goto out_err; @@ -620,7 +616,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred set_bit(NFS_DELEGATED_STATE, &state->flags); update_open_stateid(state, &delegation->stateid, open_flags); out_ok: - up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&nfsi->rwsem); up_read(&clp->cl_sem); @@ -631,7 +626,6 @@ out_err: if (sp != NULL) { if (state != NULL) nfs4_put_open_state(state); - up(&sp->so_sema); nfs4_put_state_owner(sp); } up_read(&nfsi->rwsem); @@ -696,7 +690,6 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st } else o_arg.u.attrs = sattr; /* Serialization for the sequence id */ - down(&sp->so_sema); o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); if (o_arg.seqid == NULL) @@ -716,7 +709,6 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st if (o_res.delegation_type != 0) nfs_inode_set_delegation(inode, cred, &o_res); nfs_free_seqid(o_arg.seqid); - up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&clp->cl_sem); *res = state; @@ -726,7 +718,6 @@ out_err: if (state != NULL) nfs4_put_open_state(state); nfs_free_seqid(o_arg.seqid); - up(&sp->so_sema); nfs4_put_state_owner(sp); } /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */ @@ -833,7 +824,6 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata) nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); - up(&sp->so_sema); nfs4_put_state_owner(sp); up_read(&server->nfs4_state->cl_sem); kfree(calldata); @@ -2702,7 +2692,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; - down(&state->lock_sema); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; @@ -2729,7 +2718,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock status = 0; } out: - up(&state->lock_sema); up_read(&clp->cl_sem); return status; } @@ -2791,7 +2779,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock int status; down_read(&clp->cl_sem); - down(&state->lock_sema); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; @@ -2813,7 +2800,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock sizeof(lsp->ls_stateid.data)); nfs_free_seqid(luargs.seqid); out: - up(&state->lock_sema); if (status == 0) do_vfs_lock(request->fl_file, request); up_read(&clp->cl_sem); @@ -2877,7 +2863,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r largs.u.open_lock = &otl; largs.new_lock_owner = 1; arg.u.lock = &largs; - down(&owner->so_sema); otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid); if (otl.open_seqid != NULL) { status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); @@ -2885,7 +2870,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r nfs_increment_open_seqid(status, otl.open_seqid); nfs_free_seqid(otl.open_seqid); } - up(&owner->so_sema); if (status == 0) nfs_confirm_seqid(&lsp->ls_seqid, 0); } else { @@ -2944,11 +2928,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock int status; down_read(&clp->cl_sem); - down(&state->lock_sema); status = nfs4_set_lock_state(state, request); if (status == 0) status = _nfs4_do_setlk(state, cmd, request, 0); - up(&state->lock_sema); if (status == 0) { /* Note: we always want to sleep here! */ request->fl_flags |= FL_SLEEP; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 59c93f37e1..86c08c165c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -267,7 +267,6 @@ nfs4_alloc_state_owner(void) sp = kzalloc(sizeof(*sp),GFP_KERNEL); if (!sp) return NULL; - init_MUTEX(&sp->so_sema); INIT_LIST_HEAD(&sp->so_states); INIT_LIST_HEAD(&sp->so_delegations); rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); @@ -362,7 +361,6 @@ nfs4_alloc_open_state(void) memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); - init_MUTEX(&state->lock_sema); spin_lock_init(&state->state_lock); return state; } @@ -444,7 +442,6 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) state = __nfs4_find_state_byowner(inode, owner); if (state == NULL && new != NULL) { state = new; - /* Caller *must* be holding owner->so_sem */ /* Note: The reclaim code dictates that we add stateless * and read-only stateids to the end of the list */ list_add_tail(&state->open_states, &owner->so_states); @@ -464,7 +461,7 @@ out: /* * Beware! Caller must be holding exactly one - * reference to clp->cl_sem and owner->so_sema! + * reference to clp->cl_sem! */ void nfs4_put_open_state(struct nfs4_state *state) { @@ -485,7 +482,6 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Beware! Caller must be holding no references to clp->cl_sem! - * of owner->so_sema! */ void nfs4_close_state(struct nfs4_state *state, mode_t mode) { @@ -496,7 +492,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) atomic_inc(&owner->so_count); down_read(&clp->cl_sem); - down(&owner->so_sema); /* Protect against nfs4_find_state() */ spin_lock(&inode->i_lock); if (mode & FMODE_READ) @@ -527,7 +522,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) } out: nfs4_put_open_state(state); - up(&owner->so_sema); nfs4_put_state_owner(owner); up_read(&clp->cl_sem); } @@ -553,7 +547,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema */ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { @@ -577,7 +570,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. * - * The caller must be holding state->lock_sema and clp->cl_sem + * The caller must be holding clp->cl_sem */ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { @@ -711,7 +704,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid) } /* - * Called with sp->so_sema and clp->cl_sem held. + * Called with clp->cl_sem held. * * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or * failed with a seqid incrementing error - @@ -750,7 +743,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) } /* - * Called with ls->lock_sema and clp->cl_sem held. + * Called with clp->cl_sem held. * * Increment the seqid if the LOCK/LOCKU succeeded, or * failed with a seqid incrementing error - -- cgit v1.2.2 From 83c9d41e456033000ccfadf535f3098d8739488d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:13 -0700 Subject: NFSv4: Remove nfs4_client->cl_sem from close() path We no longer need to worry about collisions between close() and the state recovery code, since the new close will automatically recheck the file state once it is done waiting on its sequence slot. Ditto for the nfs4_proc_locku() procedure. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 ----- fs/nfs/nfs4state.c | 9 +-------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 25bab6032d..611052baca 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -820,12 +820,10 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata) { struct nfs4_state *state = calldata->state; struct nfs4_state_owner *sp = state->owner; - struct nfs_server *server = NFS_SERVER(calldata->inode); nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); - up_read(&server->nfs4_state->cl_sem); kfree(calldata); } @@ -2758,7 +2756,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_client *clp = server->nfs4_state; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), @@ -2778,7 +2775,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock struct nfs_locku_opargs luargs; int status; - down_read(&clp->cl_sem); status = nfs4_set_lock_state(state, request); if (status != 0) goto out; @@ -2802,7 +2798,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock out: if (status == 0) do_vfs_lock(request->fl_file, request); - up_read(&clp->cl_sem); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 86c08c165c..bb35743619 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -481,17 +481,15 @@ void nfs4_put_open_state(struct nfs4_state *state) } /* - * Beware! Caller must be holding no references to clp->cl_sem! + * Close the current file. */ void nfs4_close_state(struct nfs4_state *state, mode_t mode) { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - struct nfs4_client *clp = owner->so_client; int newstate; atomic_inc(&owner->so_count); - down_read(&clp->cl_sem); /* Protect against nfs4_find_state() */ spin_lock(&inode->i_lock); if (mode & FMODE_READ) @@ -523,7 +521,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) out: nfs4_put_open_state(state); nfs4_put_state_owner(owner); - up_read(&clp->cl_sem); } /* @@ -704,8 +701,6 @@ void nfs_free_seqid(struct nfs_seqid *seqid) } /* - * Called with clp->cl_sem held. - * * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() @@ -743,8 +738,6 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) } /* - * Called with clp->cl_sem held. - * * Increment the seqid if the LOCK/LOCKU succeeded, or * failed with a seqid incrementing error - * see comments nfs_fs.h:seqid_mutating_error() -- cgit v1.2.2 From 0a8838f972883112f0a7b259141b24db17583c2d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:14 -0700 Subject: NFSv4: Add missing handling of OPEN_CONFIRM requests on CLAIM_DELEGATE_CUR. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 611052baca..f57dba8150 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -56,6 +56,7 @@ #define NFS4_POLL_RETRY_MIN (1*HZ) #define NFS4_POLL_RETRY_MAX (15*HZ) +static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); @@ -333,11 +334,21 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); nfs_increment_open_seqid(status, arg.seqid); + if (status != 0) + goto out_free; + if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode), + sp, &res.stateid, arg.seqid); + if (status != 0) + goto out_free; + } + nfs_confirm_seqid(&sp->so_seqid, 0); if (status >= 0) { memcpy(state->stateid.data, res.stateid.data, sizeof(state->stateid.data)); clear_bit(NFS_DELEGATED_STATE, &state->flags); } +out_free: nfs_free_seqid(arg.seqid); out: dput(parent); @@ -366,7 +377,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) return err; } -static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) +static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid) { struct nfs_open_confirmargs arg = { .fh = fh, -- cgit v1.2.2 From faf5f49c2d9c0af2847837c232a432cc146e203b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:15 -0700 Subject: NFSv4: Make NFS clean up byte range locks asynchronously Currently we fail to do so if the process was signalled. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 161 +++++++++++++++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 2 +- fs/nfs/nfs4xdr.c | 9 +-- include/linux/nfs_xdr.h | 2 +- 5 files changed, 116 insertions(+), 59 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d4fcb5d0ce..2215cdee43 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -249,6 +249,7 @@ extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, mode_t); extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f57dba8150..612a9a14ae 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -58,9 +58,9 @@ static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); +static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); +static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -2422,7 +2422,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen } static int -nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { struct nfs4_client *clp = server->nfs4_state; @@ -2500,7 +2500,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs4_client *clp = server->nfs4_state; int ret = errorcode; @@ -2763,68 +2763,127 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) return res; } -static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +struct nfs4_unlockdata { + struct nfs_lockargs arg; + struct nfs_locku_opargs luargs; + struct nfs_lockres res; + struct nfs4_lock_state *lsp; + struct nfs_open_context *ctx; + atomic_t refcount; + struct completion completion; +}; + +static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata) { - struct inode *inode = state->inode; - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_lockargs arg = { - .fh = NFS_FH(inode), - .type = nfs4_lck_type(cmd, request), - .offset = request->fl_start, - .length = nfs4_lck_length(request), - }; - struct nfs_lockres res = { - .server = server, - }; + if (atomic_dec_and_test(&calldata->refcount)) { + nfs_free_seqid(calldata->luargs.seqid); + nfs4_put_lock_state(calldata->lsp); + put_nfs_open_context(calldata->ctx); + kfree(calldata); + } +} + +static void nfs4_locku_complete(struct nfs4_unlockdata *calldata) +{ + complete(&calldata->completion); + nfs4_locku_release_calldata(calldata); +} + +static void nfs4_locku_done(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; + + nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid); + switch (task->tk_status) { + case 0: + memcpy(calldata->lsp->ls_stateid.data, + calldata->res.u.stateid.data, + sizeof(calldata->lsp->ls_stateid.data)); + break; + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_state_recovery(calldata->res.server->nfs4_state); + break; + default: + if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) { + rpc_restart_call(task); + return; + } + } + nfs4_locku_complete(calldata); +} + +static void nfs4_locku_begin(struct rpc_task *task) +{ + struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], - .rpc_argp = &arg, - .rpc_resp = &res, - .rpc_cred = state->owner->so_cred, + .rpc_argp = &calldata->arg, + .rpc_resp = &calldata->res, + .rpc_cred = calldata->lsp->ls_state->owner->so_cred, }; + int status; + + status = nfs_wait_on_sequence(calldata->luargs.seqid, task); + if (status != 0) + return; + if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { + nfs4_locku_complete(calldata); + task->tk_exit = NULL; + rpc_exit(task, 0); + return; + } + rpc_call_setup(task, &msg, 0); +} + +static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_unlockdata *calldata; + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp; - struct nfs_locku_opargs luargs; int status; - + status = nfs4_set_lock_state(state, request); if (status != 0) - goto out; + return status; lsp = request->fl_u.nfs4_fl.owner; /* We might have lost the locks! */ if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) - goto out; - luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); - status = -ENOMEM; - if (luargs.seqid == NULL) - goto out; - memcpy(luargs.stateid.data, lsp->ls_stateid.data, sizeof(luargs.stateid.data)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs_increment_lock_seqid(status, luargs.seqid); - - if (status == 0) - memcpy(lsp->ls_stateid.data, res.u.stateid.data, - sizeof(lsp->ls_stateid.data)); - nfs_free_seqid(luargs.seqid); -out: + return 0; + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + return -ENOMEM; + calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (calldata->luargs.seqid == NULL) { + kfree(calldata); + return -ENOMEM; + } + calldata->luargs.stateid = &lsp->ls_stateid; + calldata->arg.fh = NFS_FH(inode); + calldata->arg.type = nfs4_lck_type(cmd, request); + calldata->arg.offset = request->fl_start; + calldata->arg.length = nfs4_lck_length(request); + calldata->arg.u.locku = &calldata->luargs; + calldata->res.server = server; + calldata->lsp = lsp; + atomic_inc(&lsp->ls_count); + + /* Ensure we don't close file until we're done freeing locks! */ + calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data); + + atomic_set(&calldata->refcount, 2); + init_completion(&calldata->completion); + + status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin, + nfs4_locku_done, calldata); if (status == 0) - do_vfs_lock(request->fl_file, request); + wait_for_completion_interruptible(&calldata->completion); + do_vfs_lock(request->fl_file, request); + nfs4_locku_release_calldata(calldata); return status; } -static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) -{ - struct nfs4_exception exception = { }; - int err; - - do { - err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_unlck(state, cmd, request), - &exception); - } while (exception.retry); - return err; -} - static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim) { struct inode *inode = state->inode; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bb35743619..23834c8fb7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -600,7 +600,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ * Release reference to lock_state, and free it if we see that * it is no longer in use */ -static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) +void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { struct nfs4_state *state; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 934ec50ea6..4706192cfb 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -776,7 +776,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg) WRITE32(OP_LOCKU); WRITE32(arg->type); WRITE32(opargs->seqid->sequence->counter); - WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); + WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data)); WRITE64(arg->offset); WRITE64(arg->length); @@ -1587,9 +1587,6 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lock }; int status; - status = nfs_wait_on_sequence(args->u.locku->seqid, req->rq_task); - if (status != 0) - goto out; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -2934,8 +2931,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) status = decode_op_hdr(xdr, OP_LOCKU); if (status == 0) { - READ_BUF(sizeof(nfs4_stateid)); - COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + READ_BUF(sizeof(res->u.stateid.data)); + COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); } return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cac0df950c..849f95c5fa 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -188,7 +188,7 @@ struct nfs_lock_opargs { struct nfs_locku_opargs { struct nfs_seqid * seqid; - nfs4_stateid stateid; + nfs4_stateid * stateid; }; struct nfs_lockargs { -- cgit v1.2.2 From 06735b3454824bd561decbde46111f144e905923 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:15 -0700 Subject: NFSv4: Fix up handling of open_to_lock sequence ids Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 69 +++++++++++++++++++++++-------------------------- fs/nfs/nfs4xdr.c | 32 ++++++++++------------- include/linux/nfs_xdr.h | 19 +++----------- 3 files changed, 49 insertions(+), 71 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 612a9a14ae..35da15342e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2889,11 +2889,23 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; + struct nfs_lock_opargs largs = { + .lock_stateid = &lsp->ls_stateid, + .open_stateid = &state->stateid, + .lock_owner = { + .clientid = server->nfs4_state->cl_clientid, + .id = lsp->ls_id, + }, + .reclaim = reclaim, + }; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), .offset = request->fl_start, .length = nfs4_lck_length(request), + .u = { + .lock = &largs, + }, }; struct nfs_lockres res = { .server = server, @@ -2904,56 +2916,39 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r .rpc_resp = &res, .rpc_cred = state->owner->so_cred, }; - struct nfs_lock_opargs largs = { - .reclaim = reclaim, - .new_lock_owner = 0, - }; - struct nfs_seqid *lock_seqid; int status = -ENOMEM; - lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); - if (lock_seqid == NULL) + largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + if (largs.lock_seqid == NULL) return -ENOMEM; if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) { struct nfs4_state_owner *owner = state->owner; - struct nfs_open_to_lock otl = { - .lock_owner = { - .clientid = server->nfs4_state->cl_clientid, - }, - }; - - otl.lock_seqid = lock_seqid; - otl.lock_owner.id = lsp->ls_id; - memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); - largs.u.open_lock = &otl; + + largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid); + if (largs.open_seqid == NULL) + goto out; largs.new_lock_owner = 1; - arg.u.lock = &largs; - otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid); - if (otl.open_seqid != NULL) { - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - /* increment seqid on success, and seqid mutating errors */ - nfs_increment_open_seqid(status, otl.open_seqid); - nfs_free_seqid(otl.open_seqid); + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment open seqid on success, and seqid mutating errors */ + if (largs.new_lock_owner != 0) { + nfs_increment_open_seqid(status, largs.open_seqid); + if (status == 0) + nfs_confirm_seqid(&lsp->ls_seqid, 0); } - if (status == 0) - nfs_confirm_seqid(&lsp->ls_seqid, 0); - } else { - struct nfs_exist_lock el; - memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); - largs.u.exist_lock = ⪙ - arg.u.lock = &largs; - el.seqid = lock_seqid; + nfs_free_seqid(largs.open_seqid); + } else status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - } - /* increment seqid on success, and seqid mutating errors*/ - nfs_increment_lock_seqid(status, lock_seqid); + /* increment lock seqid on success, and seqid mutating errors*/ + nfs_increment_lock_seqid(status, largs.lock_seqid); /* save the returned stateid. */ if (status == 0) { - memcpy(lsp->ls_stateid.data, res.u.stateid.data, sizeof(lsp->ls_stateid.data)); + memcpy(lsp->ls_stateid.data, res.u.stateid.data, + sizeof(lsp->ls_stateid.data)); lsp->ls_flags |= NFS_LOCK_INITIALIZED; } else if (status == -NFS4ERR_DENIED) status = -EAGAIN; - nfs_free_seqid(lock_seqid); +out: + nfs_free_seqid(largs.lock_seqid); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4706192cfb..c5c75235c5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -729,22 +729,18 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg) WRITE64(arg->length); WRITE32(opargs->new_lock_owner); if (opargs->new_lock_owner){ - struct nfs_open_to_lock *ol = opargs->u.open_lock; - RESERVE_SPACE(40); - WRITE32(ol->open_seqid->sequence->counter); - WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); - WRITE32(ol->lock_seqid->sequence->counter); - WRITE64(ol->lock_owner.clientid); + WRITE32(opargs->open_seqid->sequence->counter); + WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data)); + WRITE32(opargs->lock_seqid->sequence->counter); + WRITE64(opargs->lock_owner.clientid); WRITE32(4); - WRITE32(ol->lock_owner.id); + WRITE32(opargs->lock_owner.id); } else { - struct nfs_exist_lock *el = opargs->u.exist_lock; - RESERVE_SPACE(20); - WRITEMEM(&el->stateid, sizeof(el->stateid)); - WRITE32(el->seqid->sequence->counter); + WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data)); + WRITE32(opargs->lock_seqid->sequence->counter); } return 0; @@ -1535,16 +1531,14 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka .nops = 2, }; struct nfs_lock_opargs *opargs = args->u.lock; - struct nfs_seqid *seqid; int status; - if (opargs->new_lock_owner) - seqid = opargs->u.open_lock->lock_seqid; - else - seqid = opargs->u.exist_lock->seqid; - status = nfs_wait_on_sequence(seqid, req->rq_task); + status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task); if (status != 0) goto out; + /* Do we need to do an open_to_lock_owner? */ + if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED) + opargs->new_lock_owner = 0; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); status = encode_putfh(&xdr, args->fh); @@ -2908,8 +2902,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) status = decode_op_hdr(xdr, OP_LOCK); if (status == 0) { - READ_BUF(sizeof(nfs4_stateid)); - COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + READ_BUF(sizeof(res->u.stateid.data)); + COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data)); } else if (status == -NFS4ERR_DENIED) return decode_lock_denied(xdr, &res->u.denied); return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 849f95c5fa..57efcc27f2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -165,25 +165,14 @@ struct nfs_lowner { u32 id; }; -struct nfs_open_to_lock { - struct nfs_seqid * open_seqid; - nfs4_stateid open_stateid; +struct nfs_lock_opargs { struct nfs_seqid * lock_seqid; + nfs4_stateid * lock_stateid; + struct nfs_seqid * open_seqid; + nfs4_stateid * open_stateid; struct nfs_lowner lock_owner; -}; - -struct nfs_exist_lock { - nfs4_stateid stateid; - struct nfs_seqid * seqid; -}; - -struct nfs_lock_opargs { __u32 reclaim; __u32 new_lock_owner; - union { - struct nfs_open_to_lock *open_lock; - struct nfs_exist_lock *exist_lock; - } u; }; struct nfs_locku_opargs { -- cgit v1.2.2 From 039c4d7a82d8268ec71f59679460b41d0dd9b225 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:16 -0700 Subject: NFS: Fix up a race in the NFS implementation of GETLK ...and fix a memory corruption bug due to improper use of memcpy() on a struct file_lock. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6bdcfa95de..572d859348 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -376,22 +376,31 @@ out_swapfile: static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { + struct file_lock *cfl; struct inode *inode = filp->f_mapping->host; int status = 0; lock_kernel(); - /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) - status = NFS_PROTO(inode)->lock(filp, cmd, fl); - else { - struct file_lock *cfl = posix_test_lock(filp, fl); - - fl->fl_type = F_UNLCK; - if (cfl != NULL) - memcpy(fl, cfl, sizeof(*fl)); + /* Try local locking first */ + cfl = posix_test_lock(filp, fl); + if (cfl != NULL) { + locks_copy_lock(fl, cfl); + goto out; } + + if (nfs_have_delegation(inode, FMODE_READ)) + goto out_noconflict; + + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) + goto out_noconflict; + + status = NFS_PROTO(inode)->lock(filp, cmd, fl); +out: unlock_kernel(); return status; +out_noconflict: + fl->fl_type = F_UNLCK; + goto out; } static int do_vfs_lock(struct file *file, struct file_lock *fl) -- cgit v1.2.2 From 834f2a4a1554dc5b2598038b3fe8703defcbe467 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:16 -0700 Subject: VFS: Allow the filesystem to return a full file pointer on open intent This is needed by NFSv4 for atomicity reasons: our open command is in fact a lookup+open, so we need to be able to propagate open context information from lookup() into the resulting struct file's private_data field. Signed-off-by: Trond Myklebust --- fs/exec.c | 12 +++---- fs/namei.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++---- fs/open.c | 79 +++++++++++++++++++++++++++++++++++-------- include/linux/namei.h | 8 +++++ 4 files changed, 165 insertions(+), 27 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index a04a575ad4..d2208f7c87 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __user * library) struct nameidata nd; int error; - nd.intent.open.flags = FMODE_READ; - error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ); if (error) goto out; @@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __user * library) if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __user * library) out: return error; exit: + release_open_intent(&nd); path_release(&nd); goto out; } @@ -490,8 +490,7 @@ struct file *open_exec(const char *name) int err; struct file *file; - nd.intent.open.flags = FMODE_READ; - err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ); file = ERR_PTR(err); if (!err) { @@ -504,7 +503,7 @@ struct file *open_exec(const char *name) err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nameidata_to_filp(&nd, O_RDONLY); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { @@ -516,6 +515,7 @@ out: return file; } } + release_open_intent(&nd); path_release(&nd); } goto out; diff --git a/fs/namei.c b/fs/namei.c index aa62dbda93..0d1dff7d3d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -317,6 +318,18 @@ void path_release_on_umount(struct nameidata *nd) mntput_no_expire(nd->mnt); } +/** + * release_open_intent - free up open intent resources + * @nd: pointer to nameidata + */ +void release_open_intent(struct nameidata *nd) +{ + if (nd->intent.open.file->f_dentry == NULL) + put_filp(nd->intent.open.file); + else + fput(nd->intent.open.file); +} + /* * Internal lookup() using the new generic dcache. * SMP-safe @@ -1052,6 +1065,70 @@ out: return retval; } +static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags, int create_mode) +{ + struct file *filp = get_empty_filp(); + int err; + + if (filp == NULL) + return -ENFILE; + nd->intent.open.file = filp; + nd->intent.open.flags = open_flags; + nd->intent.open.create_mode = create_mode; + err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd); + if (IS_ERR(nd->intent.open.file)) { + if (err == 0) { + err = PTR_ERR(nd->intent.open.file); + path_release(nd); + } + } else if (err != 0) + release_open_intent(nd); + return err; +} + +/** + * path_lookup_open - lookup a file path with open intent + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + */ +int path_lookup_open(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) +{ + return __path_lookup_intent_open(name, lookup_flags, nd, + open_flags, 0); +} + +/** + * path_lookup_create - lookup a file path with open + create intent + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + * @create_mode: create intent flags + */ +int path_lookup_create(const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags, int create_mode) +{ + return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd, + open_flags, create_mode); +} + +int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) +{ + char *tmp = getname(name); + int err = PTR_ERR(tmp); + + if (!IS_ERR(tmp)) { + err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0); + putname(tmp); + } + return err; +} + /* * Restricted form of lookup. Doesn't follow links, single-component only, * needs parent already locked. Doesn't follow mounts. @@ -1416,27 +1493,27 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) */ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) { - int acc_mode, error = 0; + int acc_mode, error; struct path path; struct dentry *dir; int count = 0; acc_mode = ACC_MODE(flag); + /* O_TRUNC implies we need access checks for write permissions */ + if (flag & O_TRUNC) + acc_mode |= MAY_WRITE; + /* Allow the LSM permission hook to distinguish append access from general write access. */ if (flag & O_APPEND) acc_mode |= MAY_APPEND; - /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; - /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { - error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); + error = path_lookup_open(pathname, lookup_flags(flag), nd, flag); if (error) return error; goto ok; @@ -1445,7 +1522,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) /* * Create - we need to know the parent. */ - error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); + error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode); if (error) return error; @@ -1520,6 +1597,8 @@ ok: exit_dput: dput_path(&path, nd); exit: + if (!IS_ERR(nd->intent.open.file)) + release_open_intent(nd); path_release(nd); return error; diff --git a/fs/open.c b/fs/open.c index f0d90cf049..8d06ec911f 100644 --- a/fs/open.c +++ b/fs/open.c @@ -739,7 +739,8 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) } static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - int flags, struct file *f) + int flags, struct file *f, + int (*open)(struct inode *, struct file *)) { struct inode *inode; int error; @@ -761,11 +762,14 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_op = fops_get(inode->i_fop); file_move(f, &inode->i_sb->s_files); - if (f->f_op && f->f_op->open) { - error = f->f_op->open(inode,f); + if (!open && f->f_op) + open = f->f_op->open; + if (open) { + error = open(inode, f); if (error) goto cleanup_all; } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); @@ -814,28 +818,75 @@ struct file *filp_open(const char * filename, int flags, int mode) { int namei_flags, error; struct nameidata nd; - struct file *f; namei_flags = flags; if ((namei_flags+1) & O_ACCMODE) namei_flags++; - if (namei_flags & O_TRUNC) - namei_flags |= 2; - - error = -ENFILE; - f = get_empty_filp(); - if (f == NULL) - return ERR_PTR(error); error = open_namei(filename, namei_flags, mode, &nd); if (!error) - return __dentry_open(nd.dentry, nd.mnt, flags, f); + return nameidata_to_filp(&nd, flags); - put_filp(f); return ERR_PTR(error); } EXPORT_SYMBOL(filp_open); +/** + * lookup_instantiate_filp - instantiates the open intent filp + * @nd: pointer to nameidata + * @dentry: pointer to dentry + * @open: open callback + * + * Helper for filesystems that want to use lookup open intents and pass back + * a fully instantiated struct file to the caller. + * This function is meant to be called from within a filesystem's + * lookup method. + * Note that in case of error, nd->intent.open.file is destroyed, but the + * path information remains valid. + * If the open callback is set to NULL, then the standard f_op->open() + * filesystem callback is substituted. + */ +struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)) +{ + if (IS_ERR(nd->intent.open.file)) + goto out; + if (IS_ERR(dentry)) + goto out_err; + nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt), + nd->intent.open.flags - 1, + nd->intent.open.file, + open); +out: + return nd->intent.open.file; +out_err: + release_open_intent(nd); + nd->intent.open.file = (struct file *)dentry; + goto out; +} +EXPORT_SYMBOL_GPL(lookup_instantiate_filp); + +/** + * nameidata_to_filp - convert a nameidata to an open filp. + * @nd: pointer to nameidata + * @flags: open flags + * + * Note that this function destroys the original nameidata + */ +struct file *nameidata_to_filp(struct nameidata *nd, int flags) +{ + struct file *filp; + + /* Pick up the filp from the open intent */ + filp = nd->intent.open.file; + /* Has the filesystem initialised the file for us? */ + if (filp->f_dentry == NULL) + filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL); + else + path_release(nd); + return filp; +} + struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) { int error; @@ -846,7 +897,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) if (f == NULL) return ERR_PTR(error); - return __dentry_open(dentry, mnt, flags, f); + return __dentry_open(dentry, mnt, flags, f, NULL); } EXPORT_SYMBOL(dentry_open); diff --git a/include/linux/namei.h b/include/linux/namei.h index 7db67b008c..1c975d0d9e 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -8,6 +8,7 @@ struct vfsmount; struct open_intent { int flags; int create_mode; + struct file *file; }; enum { MAX_NESTED_LINKS = 5 }; @@ -65,6 +66,13 @@ extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); +extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags); +extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags); +extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, + int (*open)(struct inode *, struct file *)); +extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); +extern void release_open_intent(struct nameidata *); + extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -- cgit v1.2.2 From 02a913a73b52071e93f4b76db3e86138d19efffd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:17 -0700 Subject: NFSv4: Eliminate nfsv4 open race... Make NFSv4 return the fully initialized file pointer with the stateid that it created in the lookup w/intent. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 29 +++++----- fs/nfs/nfs3proc.c | 2 +- fs/nfs/nfs4_fs.h | 4 +- fs/nfs/nfs4proc.c | 137 ++++++++++++++++++++---------------------------- fs/nfs/proc.c | 2 +- include/linux/nfs_xdr.h | 2 +- 6 files changed, 74 insertions(+), 102 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c70eabd6d1..a6e251f21f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -914,7 +914,6 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd) static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *res = NULL; - struct inode *inode = NULL; int error; /* Check that we are indeed trying to open this file */ @@ -928,8 +927,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL */ - if (nd->intent.open.flags & O_EXCL) - goto no_entry; + if (nd->intent.open.flags & O_EXCL) { + d_add(dentry, NULL); + goto out; + } /* Open the file on the server */ lock_kernel(); @@ -943,18 +944,18 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry if (nd->intent.open.flags & O_CREAT) { nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); nfs_end_data_update(dir); } else - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); + if (IS_ERR(res)) { + error = PTR_ERR(res); switch (error) { /* Make a negative dentry */ case -ENOENT: - inode = NULL; - break; + res = NULL; + goto out; /* This turned out not to be a regular file */ case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) @@ -962,13 +963,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry /* case -EISDIR: */ /* case -EINVAL: */ default: - res = ERR_PTR(error); goto out; } - } -no_entry: - res = d_add_unique(dentry, inode); - if (res != NULL) + } else if (res != NULL) dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -1012,7 +1009,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) */ lock_kernel(); verifier = nfs_save_change_attribute(dir); - ret = nfs4_open_revalidate(dir, dentry, openflags); + ret = nfs4_open_revalidate(dir, dentry, openflags, nd); if (!ret) nfs_set_verifier(dentry, verifier); unlock_kernel(); @@ -1135,7 +1132,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); nfs_end_data_update(dir); if (error != 0) goto out_err; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index edc9551404..df80477c5a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -299,7 +299,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 2215cdee43..8a37881990 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -215,8 +215,8 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); -extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); +extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 35da15342e..c9ecb81196 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -947,12 +948,26 @@ out: return status; } -struct inode * +static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +{ + struct file *filp; + + filp = lookup_instantiate_filp(nd, dentry, NULL); + if (!IS_ERR(filp)) { + struct nfs_open_context *ctx; + ctx = (struct nfs_open_context *)filp->private_data; + ctx->state = state; + } else + nfs4_close_state(state, nd->intent.open.flags); +} + +struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; + struct dentry *res; if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -966,16 +981,23 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); if (IS_ERR(cred)) - return (struct inode *)cred; + return (struct dentry *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); put_rpccred(cred); - if (IS_ERR(state)) - return (struct inode *)state; - return state->inode; + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) + d_add(dentry, NULL); + return (struct dentry *)state; + } + res = d_add_unique(dentry, state->inode); + if (res != NULL) + dentry = res; + nfs4_intent_set_file(nd, dentry, state); + return res; } int -nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { struct rpc_cred *cred; struct nfs4_state *state; @@ -988,18 +1010,30 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) if (IS_ERR(state)) state = nfs4_do_open(dir, dentry, openflags, NULL, cred); put_rpccred(cred); - if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) - return 1; - if (IS_ERR(state)) - return 0; + if (IS_ERR(state)) { + switch (PTR_ERR(state)) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + lookup_instantiate_filp(nd, (struct dentry *)state, NULL); + return 1; + case -ENOENT: + if (dentry->d_inode == NULL) + return 1; + } + goto out_drop; + } inode = state->inode; + iput(inode); if (inode == dentry->d_inode) { - iput(inode); + nfs4_intent_set_file(nd, dentry, state); return 1; } - d_drop(dentry); nfs4_close_state(state, openflags); - iput(inode); +out_drop: + d_drop(dentry); return 0; } @@ -1500,7 +1534,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata) static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs4_state *state; struct rpc_cred *cred; @@ -1522,13 +1556,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(state->inode), sattr, state); - if (status == 0) { + if (status == 0) nfs_setattr_update_inode(state->inode, sattr); - goto out; - } - } else if (flags != 0) - goto out; - nfs4_close_state(state, flags); + } + if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) + nfs4_intent_set_file(nd, dentry, state); + else + nfs4_close_state(state, flags); out: return status; } @@ -2175,65 +2209,6 @@ nfs4_proc_renew(struct nfs4_client *clp) return 0; } -/* - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this open method is prone to inefficiency and race conditions - * due to the lookup, potential create, and open VFS calls from sys_open() - * placed on the wire. - */ -static int -nfs4_proc_file_open(struct inode *inode, struct file *filp) -{ - struct dentry *dentry = filp->f_dentry; - struct nfs_open_context *ctx; - struct nfs4_state *state = NULL; - struct rpc_cred *cred; - int status = -ENOMEM; - - dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", - (int)dentry->d_parent->d_name.len, - dentry->d_parent->d_name.name, - (int)dentry->d_name.len, dentry->d_name.name); - - - /* Find our open stateid */ - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); - ctx = alloc_nfs_open_context(dentry, cred); - put_rpccred(cred); - if (unlikely(ctx == NULL)) - return -ENOMEM; - status = -EIO; /* ERACE actually */ - state = nfs4_find_state(inode, cred, filp->f_mode); - if (unlikely(state == NULL)) - goto no_state; - ctx->state = state; - nfs4_close_state(state, filp->f_mode); - ctx->mode = filp->f_mode; - nfs_file_set_open_context(filp, ctx); - put_nfs_open_context(ctx); - if (filp->f_mode & FMODE_WRITE) - nfs_begin_data_update(inode); - return 0; -no_state: - printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - put_nfs_open_context(ctx); - return status; -} - -/* - * Release our state - */ -static int -nfs4_proc_file_release(struct inode *inode, struct file *filp) -{ - if (filp->f_mode & FMODE_WRITE) - nfs_end_data_update(inode); - nfs_file_clear_open_context(filp); - return 0; -} - static inline int nfs4_server_supports_acls(struct nfs_server *server) { return (server->caps & NFS_CAP_ACLS) @@ -3145,8 +3120,8 @@ struct nfs_rpc_ops nfs_v4_clientops = { .read_setup = nfs4_proc_read_setup, .write_setup = nfs4_proc_write_setup, .commit_setup = nfs4_proc_commit_setup, - .file_open = nfs4_proc_file_open, - .file_release = nfs4_proc_file_release, + .file_open = nfs_open, + .file_release = nfs_release, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, }; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index be23c3fb92..8fef86523d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -216,7 +216,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 57efcc27f2..60086dac11 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -712,7 +712,7 @@ struct nfs_rpc_ops { int (*write) (struct nfs_write_data *); int (*commit) (struct nfs_write_data *); int (*create) (struct inode *, struct dentry *, - struct iattr *, int); + struct iattr *, int, struct nameidata *); int (*remove) (struct inode *, struct qstr *); int (*unlink_setup) (struct rpc_message *, struct dentry *, struct qstr *); -- cgit v1.2.2 From 6f926b5ba75c568296ec227e7d782db4ddbdca5c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:18 -0700 Subject: [NFS]: Check that the server returns a valid regular file to our OPEN request Since it appears that some servers don't... Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 +++- fs/nfs/nfs4proc.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a6e251f21f..ac331d2d4c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -957,10 +957,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry res = NULL; goto out; /* This turned out not to be a regular file */ + case -EISDIR: + case -ENOTDIR: + goto no_open; case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) goto no_open; - /* case -EISDIR: */ /* case -EINVAL: */ default: goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c9ecb81196..3db1c9f0b0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -419,6 +419,22 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, stru o_arg->clientid = sp->so_client->cl_clientid; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + if (status == 0) { + /* OPEN on anything except a regular file is disallowed in NFSv4 */ + switch (o_res->f_attr->mode & S_IFMT) { + case S_IFREG: + break; + case S_IFLNK: + status = -ELOOP; + break; + case S_IFDIR: + status = -EISDIR; + break; + default: + status = -ENOTDIR; + } + } + nfs_increment_open_seqid(status, o_arg->seqid); if (status != 0) goto out; -- cgit v1.2.2 From cdce5d6b94b6182f6d8a5b7b52923933e98cbc92 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:18 -0700 Subject: VFS: Make link_path_walk set LOOKUP_CONTINUE before calling permission(). This will allow nfs_permission() to perform additional optimizations when walking the path, by folding the ACCESS(MAY_EXEC) call on the directory into the lookup revalidation. Signed-off-by: Trond Myklebust --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 0d1dff7d3d..aaaa810362 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -763,6 +763,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) struct qstr this; unsigned int c; + nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode, nd); if (err == -EAGAIN) { err = permission(inode, MAY_EXEC, nd); @@ -815,7 +816,6 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) if (err < 0) break; } - nd->flags |= LOOKUP_CONTINUE; /* This does the actual lookups.. */ err = do_lookup(nd, &this, &next); if (err) -- cgit v1.2.2 From cae7a073a4c5484cc5713eab606bf54b46724ab3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:19 -0700 Subject: NFSv4: Return delegation upon rename or removal of file. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 16 +++++++++++++++- fs/nfs/dir.c | 3 +++ fs/nfs/inode.c | 3 +-- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 4a36839f0b..44135af989 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -142,7 +142,7 @@ static void nfs_msync_inode(struct inode *inode) /* * Basic procedure for returning a delegation to the server */ -int nfs_inode_return_delegation(struct inode *inode) +int __nfs_inode_return_delegation(struct inode *inode) { struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; struct nfs_inode *nfsi = NFS_I(inode); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 3f6c45a29d..8017846b56 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -25,7 +25,7 @@ struct nfs_delegation { int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); -int nfs_inode_return_delegation(struct inode *inode); +int __nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); @@ -47,11 +47,25 @@ static inline int nfs_have_delegation(struct inode *inode, int flags) return 1; return 0; } + +static inline int nfs_inode_return_delegation(struct inode *inode) +{ + int err = 0; + + if (NFS_I(inode)->delegation != NULL) + err = __nfs_inode_return_delegation(inode); + return err; +} #else static inline int nfs_have_delegation(struct inode *inode, int flags) { return 0; } + +static inline int nfs_inode_return_delegation(struct inode *inode) +{ + return 0; +} #endif #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ac331d2d4c..72f50c0117 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -801,6 +801,7 @@ static int nfs_dentry_delete(struct dentry *dentry) */ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) { + nfs_inode_return_delegation(inode); if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { lock_kernel(); inode->i_nlink--; @@ -1329,6 +1330,7 @@ static int nfs_safe_remove(struct dentry *dentry) nfs_begin_data_update(dir); if (inode != NULL) { + nfs_inode_return_delegation(inode); nfs_begin_data_update(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ @@ -1547,6 +1549,7 @@ go_ahead: nfs_wb_all(old_inode); shrink_dcache_parent(old_dentry); } + nfs_inode_return_delegation(old_inode); if (new_inode) d_delete(new_dentry); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d7abaa0047..6b0f7fe6bd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1671,8 +1671,7 @@ static void nfs4_clear_inode(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); /* If we are holding a delegation, return it! */ - if (nfsi->delegation != NULL) - nfs_inode_return_delegation(inode); + nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); /* Now clear out any remaining state */ -- cgit v1.2.2 From 642ac54923e0291ae2c8e42edde18d8c9c0a3c84 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:19 -0700 Subject: NFSv4: Return delegations in case we're changing ACLs Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 +++++ fs/nfs/nfs4proc.c | 1 + 2 files changed, 6 insertions(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6b0f7fe6bd..65d5ab45dd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -853,6 +853,11 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) filemap_fdatawait(inode->i_mapping); nfs_wb_all(inode); } + /* + * Return any delegations if we're going to change ACLs + */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + nfs_inode_return_delegation(inode); error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3db1c9f0b0..c10dcd12af 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2405,6 +2405,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; + nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); if (ret == 0) -- cgit v1.2.2 From b8e5c4c2978fa666287525a9de2fa648a7581262 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:20 -0700 Subject: NFSv4: If a delegated open fails, ensure that we return the delegation Unless of course the open fails due to permission issues. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c10dcd12af..a8be9af610 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -658,6 +658,8 @@ out_err: } up_read(&nfsi->rwsem); up_read(&clp->cl_sem); + if (err != -EACCES) + nfs_inode_return_delegation(inode); return err; } -- cgit v1.2.2 From 550f57470c6506f5ef3c708335dea6bd48bf3dc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:21 -0700 Subject: NFSv4: Ensure that we recover from the OPEN + OPEN_CONFIRM BAD_STATEID race If the server is in the unconfirmed OPEN state for a given open owner and receives a second OPEN for the same open owner, it will cancel the state of the first request and set up an OPEN_CONFIRM for the second. This can cause a race that is discussed in rfc3530 on page 181. The following patch allows the client to recover by retrying the original open request. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a8be9af610..ff378126cc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -785,6 +785,16 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, exception.retry = 1; continue; } + /* + * BAD_STATEID on OPEN means that the server cancelled our + * state before it received the OPEN_CONFIRM. + * Recover by retrying the request as per the discussion + * on Page 181 of RFC3530. + */ + if (status == -NFS4ERR_BAD_STATEID) { + exception.retry = 1; + continue; + } res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), status, &exception)); } while (exception.retry); -- cgit v1.2.2 From 4c780a4688b421baa896b59778c05d7e068e479f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:21 -0700 Subject: Fix Connectathon locking test failure We currently fail Connectathon test 6.10 in the case of 32-bit locks due to incorrect error checking. Also add support for l->l_len < 0 to 64-bit locks. Signed-off-by: Trond Myklebust --- fs/locks.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 7eb1d77b92..a1e8b22480 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -316,21 +316,22 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, /* POSIX-1996 leaves the case l->l_len < 0 undefined; POSIX-2001 defines it. */ start += l->l_start; - end = start + l->l_len - 1; - if (l->l_len < 0) { + if (start < 0) + return -EINVAL; + fl->fl_end = OFFSET_MAX; + if (l->l_len > 0) { + end = start + l->l_len - 1; + fl->fl_end = end; + } else if (l->l_len < 0) { end = start - 1; + fl->fl_end = end; start += l->l_len; + if (start < 0) + return -EINVAL; } - - if (start < 0) - return -EINVAL; - if (l->l_len > 0 && end < 0) - return -EOVERFLOW; - fl->fl_start = start; /* we record the absolute position */ - fl->fl_end = end; - if (l->l_len == 0) - fl->fl_end = OFFSET_MAX; + if (fl->fl_end < fl->fl_start) + return -EOVERFLOW; fl->fl_owner = current->files; fl->fl_pid = current->tgid; @@ -362,14 +363,21 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, return -EINVAL; } - if (((start += l->l_start) < 0) || (l->l_len < 0)) + start += l->l_start; + if (start < 0) return -EINVAL; - fl->fl_end = start + l->l_len - 1; - if (l->l_len > 0 && fl->fl_end < 0) - return -EOVERFLOW; + fl->fl_end = OFFSET_MAX; + if (l->l_len > 0) { + fl->fl_end = start + l->l_len - 1; + } else if (l->l_len < 0) { + fl->fl_end = start - 1; + start += l->l_len; + if (start < 0) + return -EINVAL; + } fl->fl_start = start; /* we record the absolute position */ - if (l->l_len == 0) - fl->fl_end = OFFSET_MAX; + if (fl->fl_end < fl->fl_start) + return -EOVERFLOW; fl->fl_owner = current->files; fl->fl_pid = current->tgid; -- cgit v1.2.2 From 6fe43f9e3701f7a9f2be151a5e6cfe94b87e92f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Oct 2005 14:20:22 -0700 Subject: NFS: Fix rename of directory onto empty directory If someone tries to rename a directory onto an empty directory, we currently fail and return EBUSY. This patch ensures that we try the rename if both source and target are directories, and that we fail with a correct error of EISDIR if the source is not a directory. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 72f50c0117..eb50c19fc2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1511,9 +1511,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, */ if (!new_inode) goto go_ahead; - if (S_ISDIR(new_inode->i_mode)) - goto out; - else if (atomic_read(&new_dentry->d_count) > 2) { + if (S_ISDIR(new_inode->i_mode)) { + error = -EISDIR; + if (!S_ISDIR(old_inode->i_mode)) + goto out; + } else if (atomic_read(&new_dentry->d_count) > 2) { int err; /* copy the target dentry's name */ dentry = d_alloc(new_dentry->d_parent, -- cgit v1.2.2