aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-rbd7
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/include/asm/unwind.h16
-rw-r--r--arch/arm/kernel/perf_event.c3
-rw-r--r--arch/arm/kernel/setup.c14
-rw-r--r--arch/arm/kernel/unwind.c129
-rw-r--r--arch/x86/include/asm/e820.h8
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/kernel/e820.c3
-rw-r--r--arch/x86/kernel/setup.c21
-rw-r--r--arch/x86/platform/efi/efi.c29
-rw-r--r--arch/x86/platform/efi/efi_32.c48
-rw-r--r--arch/x86/platform/efi/efi_64.c17
-rw-r--r--drivers/block/rbd.c101
-rw-r--r--drivers/hwmon/jz4740-hwmon.c4
-rw-r--r--drivers/mmc/card/block.c8
-rw-r--r--drivers/mmc/core/core.c98
-rw-r--r--drivers/mmc/core/mmc.c12
-rw-r--r--drivers/mmc/host/mxcmmc.c1
-rw-r--r--drivers/mmc/host/omap_hsmmc.c7
-rw-r--r--drivers/mmc/host/sdhci-cns3xxx.c1
-rw-r--r--drivers/mmc/host/sdhci-s3c.c2
-rw-r--r--drivers/mmc/host/sh_mmcif.c2
-rw-r--r--drivers/mmc/host/tmio_mmc_pio.c2
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/caps.c187
-rw-r--r--fs/ceph/dir.c24
-rw-r--r--fs/ceph/file.c23
-rw-r--r--fs/ceph/inode.c53
-rw-r--r--fs/ceph/ioctl.c4
-rw-r--r--fs/ceph/mds_client.c33
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/snap.c16
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/super.h31
-rw-r--r--fs/ceph/xattr.c42
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c36
-rw-r--r--fs/fs-writeback.c5
-rw-r--r--fs/fuse/dev.c3
-rw-r--r--fs/fuse/file.c6
-rw-r--r--fs/fuse/inode.c24
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/proc/root.c8
-rw-r--r--fs/ubifs/super.c18
-rw-r--r--include/linux/log2.h1
-rw-r--r--include/linux/mmc/card.h6
-rw-r--r--ipc/mqueue.c8
-rw-r--r--ipc/msgutil.c5
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/page-writeback.c32
-rw-r--r--net/ceph/crush/mapper.c35
52 files changed, 564 insertions, 605 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index fa72ccb2282e..dbedafb095e2 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -57,13 +57,6 @@ create_snap
57 57
58 $ echo <snap-name> > /sys/bus/rbd/devices/<dev-id>/snap_create 58 $ echo <snap-name> > /sys/bus/rbd/devices/<dev-id>/snap_create
59 59
60rollback_snap
61
62 Rolls back data to the specified snapshot. This goes over the entire
63 list of rados blocks and sends a rollback command to each.
64
65 $ echo <snap-name> > /sys/bus/rbd/devices/<dev-id>/snap_rollback
66
67snap_* 60snap_*
68 61
69 A directory per each snapshot 62 A directory per each snapshot
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e084b7e981e8..776d76b8cb69 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -220,8 +220,9 @@ config NEED_MACH_MEMORY_H
220 be avoided when possible. 220 be avoided when possible.
221 221
222config PHYS_OFFSET 222config PHYS_OFFSET
223 hex "Physical address of main memory" 223 hex "Physical address of main memory" if MMU
224 depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H 224 depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
225 default DRAM_BASE if !MMU
225 help 226 help
226 Please provide the physical address corresponding to the 227 Please provide the physical address corresponding to the
227 location of main memory in your system. 228 location of main memory in your system.
diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h
index a5edf421005c..d1c3f3a71c94 100644
--- a/arch/arm/include/asm/unwind.h
+++ b/arch/arm/include/asm/unwind.h
@@ -30,14 +30,15 @@ enum unwind_reason_code {
30}; 30};
31 31
32struct unwind_idx { 32struct unwind_idx {
33 unsigned long addr; 33 unsigned long addr_offset;
34 unsigned long insn; 34 unsigned long insn;
35}; 35};
36 36
37struct unwind_table { 37struct unwind_table {
38 struct list_head list; 38 struct list_head list;
39 struct unwind_idx *start; 39 const struct unwind_idx *start;
40 struct unwind_idx *stop; 40 const struct unwind_idx *origin;
41 const struct unwind_idx *stop;
41 unsigned long begin_addr; 42 unsigned long begin_addr;
42 unsigned long end_addr; 43 unsigned long end_addr;
43}; 44};
@@ -49,15 +50,6 @@ extern struct unwind_table *unwind_table_add(unsigned long start,
49extern void unwind_table_del(struct unwind_table *tab); 50extern void unwind_table_del(struct unwind_table *tab);
50extern void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk); 51extern void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk);
51 52
52#ifdef CONFIG_ARM_UNWIND
53extern int __init unwind_init(void);
54#else
55static inline int __init unwind_init(void)
56{
57 return 0;
58}
59#endif
60
61#endif /* !__ASSEMBLY__ */ 53#endif /* !__ASSEMBLY__ */
62 54
63#ifdef CONFIG_ARM_UNWIND 55#ifdef CONFIG_ARM_UNWIND
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 8e9c98edc068..88b0941ce51e 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -640,6 +640,9 @@ static struct platform_device_id armpmu_plat_device_ids[] = {
640 640
641static int __devinit armpmu_device_probe(struct platform_device *pdev) 641static int __devinit armpmu_device_probe(struct platform_device *pdev)
642{ 642{
643 if (!cpu_pmu)
644 return -ENODEV;
645
643 cpu_pmu->plat_device = pdev; 646 cpu_pmu->plat_device = pdev;
644 return 0; 647 return 0;
645} 648}
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 3448a3f9cc8c..8fc2c8fcbdc6 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -895,8 +895,6 @@ void __init setup_arch(char **cmdline_p)
895{ 895{
896 struct machine_desc *mdesc; 896 struct machine_desc *mdesc;
897 897
898 unwind_init();
899
900 setup_processor(); 898 setup_processor();
901 mdesc = setup_machine_fdt(__atags_pointer); 899 mdesc = setup_machine_fdt(__atags_pointer);
902 if (!mdesc) 900 if (!mdesc)
@@ -904,6 +902,12 @@ void __init setup_arch(char **cmdline_p)
904 machine_desc = mdesc; 902 machine_desc = mdesc;
905 machine_name = mdesc->name; 903 machine_name = mdesc->name;
906 904
905#ifdef CONFIG_ZONE_DMA
906 if (mdesc->dma_zone_size) {
907 extern unsigned long arm_dma_zone_size;
908 arm_dma_zone_size = mdesc->dma_zone_size;
909 }
910#endif
907 if (mdesc->soft_reboot) 911 if (mdesc->soft_reboot)
908 reboot_setup("s"); 912 reboot_setup("s");
909 913
@@ -934,12 +938,6 @@ void __init setup_arch(char **cmdline_p)
934 938
935 tcm_init(); 939 tcm_init();
936 940
937#ifdef CONFIG_ZONE_DMA
938 if (mdesc->dma_zone_size) {
939 extern unsigned long arm_dma_zone_size;
940 arm_dma_zone_size = mdesc->dma_zone_size;
941 }
942#endif
943#ifdef CONFIG_MULTI_IRQ_HANDLER 941#ifdef CONFIG_MULTI_IRQ_HANDLER
944 handle_arch_irq = mdesc->handle_irq; 942 handle_arch_irq = mdesc->handle_irq;
945#endif 943#endif
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index e7e8365795c3..3f03fe0c3269 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -67,7 +67,7 @@ EXPORT_SYMBOL(__aeabi_unwind_cpp_pr2);
67 67
68struct unwind_ctrl_block { 68struct unwind_ctrl_block {
69 unsigned long vrs[16]; /* virtual register set */ 69 unsigned long vrs[16]; /* virtual register set */
70 unsigned long *insn; /* pointer to the current instructions word */ 70 const unsigned long *insn; /* pointer to the current instructions word */
71 int entries; /* number of entries left to interpret */ 71 int entries; /* number of entries left to interpret */
72 int byte; /* current byte number in the instructions word */ 72 int byte; /* current byte number in the instructions word */
73}; 73};
@@ -83,8 +83,9 @@ enum regs {
83 PC = 15 83 PC = 15
84}; 84};
85 85
86extern struct unwind_idx __start_unwind_idx[]; 86extern const struct unwind_idx __start_unwind_idx[];
87extern struct unwind_idx __stop_unwind_idx[]; 87static const struct unwind_idx *__origin_unwind_idx;
88extern const struct unwind_idx __stop_unwind_idx[];
88 89
89static DEFINE_SPINLOCK(unwind_lock); 90static DEFINE_SPINLOCK(unwind_lock);
90static LIST_HEAD(unwind_tables); 91static LIST_HEAD(unwind_tables);
@@ -98,45 +99,99 @@ static LIST_HEAD(unwind_tables);
98}) 99})
99 100
100/* 101/*
101 * Binary search in the unwind index. The entries entries are 102 * Binary search in the unwind index. The entries are
102 * guaranteed to be sorted in ascending order by the linker. 103 * guaranteed to be sorted in ascending order by the linker.
104 *
105 * start = first entry
106 * origin = first entry with positive offset (or stop if there is no such entry)
107 * stop - 1 = last entry
103 */ 108 */
104static struct unwind_idx *search_index(unsigned long addr, 109static const struct unwind_idx *search_index(unsigned long addr,
105 struct unwind_idx *first, 110 const struct unwind_idx *start,
106 struct unwind_idx *last) 111 const struct unwind_idx *origin,
112 const struct unwind_idx *stop)
107{ 113{
108 pr_debug("%s(%08lx, %p, %p)\n", __func__, addr, first, last); 114 unsigned long addr_prel31;
115
116 pr_debug("%s(%08lx, %p, %p, %p)\n",
117 __func__, addr, start, origin, stop);
118
119 /*
120 * only search in the section with the matching sign. This way the
121 * prel31 numbers can be compared as unsigned longs.
122 */
123 if (addr < (unsigned long)start)
124 /* negative offsets: [start; origin) */
125 stop = origin;
126 else
127 /* positive offsets: [origin; stop) */
128 start = origin;
129
130 /* prel31 for address relavive to start */
131 addr_prel31 = (addr - (unsigned long)start) & 0x7fffffff;
109 132
110 if (addr < first->addr) { 133 while (start < stop - 1) {
134 const struct unwind_idx *mid = start + ((stop - start) >> 1);
135
136 /*
137 * As addr_prel31 is relative to start an offset is needed to
138 * make it relative to mid.
139 */
140 if (addr_prel31 - ((unsigned long)mid - (unsigned long)start) <
141 mid->addr_offset)
142 stop = mid;
143 else {
144 /* keep addr_prel31 relative to start */
145 addr_prel31 -= ((unsigned long)mid -
146 (unsigned long)start);
147 start = mid;
148 }
149 }
150
151 if (likely(start->addr_offset <= addr_prel31))
152 return start;
153 else {
111 pr_warning("unwind: Unknown symbol address %08lx\n", addr); 154 pr_warning("unwind: Unknown symbol address %08lx\n", addr);
112 return NULL; 155 return NULL;
113 } else if (addr >= last->addr) 156 }
114 return last; 157}
115 158
116 while (first < last - 1) { 159static const struct unwind_idx *unwind_find_origin(
117 struct unwind_idx *mid = first + ((last - first + 1) >> 1); 160 const struct unwind_idx *start, const struct unwind_idx *stop)
161{
162 pr_debug("%s(%p, %p)\n", __func__, start, stop);
163 while (start < stop - 1) {
164 const struct unwind_idx *mid = start + ((stop - start) >> 1);
118 165
119 if (addr < mid->addr) 166 if (mid->addr_offset >= 0x40000000)
120 last = mid; 167 /* negative offset */
168 start = mid;
121 else 169 else
122 first = mid; 170 /* positive offset */
171 stop = mid;
123 } 172 }
124 173 pr_debug("%s -> %p\n", __func__, stop);
125 return first; 174 return stop;
126} 175}
127 176
128static struct unwind_idx *unwind_find_idx(unsigned long addr) 177static const struct unwind_idx *unwind_find_idx(unsigned long addr)
129{ 178{
130 struct unwind_idx *idx = NULL; 179 const struct unwind_idx *idx = NULL;
131 unsigned long flags; 180 unsigned long flags;
132 181
133 pr_debug("%s(%08lx)\n", __func__, addr); 182 pr_debug("%s(%08lx)\n", __func__, addr);
134 183
135 if (core_kernel_text(addr)) 184 if (core_kernel_text(addr)) {
185 if (unlikely(!__origin_unwind_idx))
186 __origin_unwind_idx =
187 unwind_find_origin(__start_unwind_idx,
188 __stop_unwind_idx);
189
136 /* main unwind table */ 190 /* main unwind table */
137 idx = search_index(addr, __start_unwind_idx, 191 idx = search_index(addr, __start_unwind_idx,
138 __stop_unwind_idx - 1); 192 __origin_unwind_idx,
139 else { 193 __stop_unwind_idx);
194 } else {
140 /* module unwind tables */ 195 /* module unwind tables */
141 struct unwind_table *table; 196 struct unwind_table *table;
142 197
@@ -145,7 +200,8 @@ static struct unwind_idx *unwind_find_idx(unsigned long addr)
145 if (addr >= table->begin_addr && 200 if (addr >= table->begin_addr &&
146 addr < table->end_addr) { 201 addr < table->end_addr) {
147 idx = search_index(addr, table->start, 202 idx = search_index(addr, table->start,
148 table->stop - 1); 203 table->origin,
204 table->stop);
149 /* Move-to-front to exploit common traces */ 205 /* Move-to-front to exploit common traces */
150 list_move(&table->list, &unwind_tables); 206 list_move(&table->list, &unwind_tables);
151 break; 207 break;
@@ -274,7 +330,7 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl)
274int unwind_frame(struct stackframe *frame) 330int unwind_frame(struct stackframe *frame)
275{ 331{
276 unsigned long high, low; 332 unsigned long high, low;
277 struct unwind_idx *idx; 333 const struct unwind_idx *idx;
278 struct unwind_ctrl_block ctrl; 334 struct unwind_ctrl_block ctrl;
279 335
280 /* only go to a higher address on the stack */ 336 /* only go to a higher address on the stack */
@@ -399,7 +455,6 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
399 unsigned long text_size) 455 unsigned long text_size)
400{ 456{
401 unsigned long flags; 457 unsigned long flags;
402 struct unwind_idx *idx;
403 struct unwind_table *tab = kmalloc(sizeof(*tab), GFP_KERNEL); 458 struct unwind_table *tab = kmalloc(sizeof(*tab), GFP_KERNEL);
404 459
405 pr_debug("%s(%08lx, %08lx, %08lx, %08lx)\n", __func__, start, size, 460 pr_debug("%s(%08lx, %08lx, %08lx, %08lx)\n", __func__, start, size,
@@ -408,15 +463,12 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size,
408 if (!tab) 463 if (!tab)
409 return tab; 464 return tab;
410 465
411 tab->start = (struct unwind_idx *)start; 466 tab->start = (const struct unwind_idx *)start;
412 tab->stop = (struct unwind_idx *)(start + size); 467 tab->stop = (const struct unwind_idx *)(start + size);
468 tab->origin = unwind_find_origin(tab->start, tab->stop);
413 tab->begin_addr = text_addr; 469 tab->begin_addr = text_addr;
414 tab->end_addr = text_addr + text_size; 470 tab->end_addr = text_addr + text_size;
415 471
416 /* Convert the symbol addresses to absolute values */
417 for (idx = tab->start; idx < tab->stop; idx++)
418 idx->addr = prel31_to_addr(&idx->addr);
419
420 spin_lock_irqsave(&unwind_lock, flags); 472 spin_lock_irqsave(&unwind_lock, flags);
421 list_add_tail(&tab->list, &unwind_tables); 473 list_add_tail(&tab->list, &unwind_tables);
422 spin_unlock_irqrestore(&unwind_lock, flags); 474 spin_unlock_irqrestore(&unwind_lock, flags);
@@ -437,16 +489,3 @@ void unwind_table_del(struct unwind_table *tab)
437 489
438 kfree(tab); 490 kfree(tab);
439} 491}
440
441int __init unwind_init(void)
442{
443 struct unwind_idx *idx;
444
445 /* Convert the symbol addresses to absolute values */
446 for (idx = __start_unwind_idx; idx < __stop_unwind_idx; idx++)
447 idx->addr = prel31_to_addr(&idx->addr);
448
449 pr_debug("unwind: ARM stack unwinding initialised\n");
450
451 return 0;
452}
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index c9547033e38e..908b96957d88 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -53,13 +53,6 @@
53 */ 53 */
54#define E820_RESERVED_KERN 128 54#define E820_RESERVED_KERN 128
55 55
56/*
57 * Address ranges that need to be mapped by the kernel direct
58 * mapping. This is used to make sure regions such as
59 * EFI_RUNTIME_SERVICES_DATA are directly mapped. See setup_arch().
60 */
61#define E820_RESERVED_EFI 129
62
63#ifndef __ASSEMBLY__ 56#ifndef __ASSEMBLY__
64#include <linux/types.h> 57#include <linux/types.h>
65struct e820entry { 58struct e820entry {
@@ -122,7 +115,6 @@ static inline void early_memtest(unsigned long start, unsigned long end)
122} 115}
123#endif 116#endif
124 117
125extern unsigned long e820_end_pfn(unsigned long limit_pfn, unsigned type);
126extern unsigned long e820_end_of_ram_pfn(void); 118extern unsigned long e820_end_of_ram_pfn(void);
127extern unsigned long e820_end_of_low_ram_pfn(void); 119extern unsigned long e820_end_of_low_ram_pfn(void);
128extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 120extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index b8d8bfcd44a9..7093e4a6a0bc 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -33,6 +33,8 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
33#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ 33#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
34 efi_call_virt(f, a1, a2, a3, a4, a5, a6) 34 efi_call_virt(f, a1, a2, a3, a4, a5, a6)
35 35
36#define efi_ioremap(addr, size, type) ioremap_cache(addr, size)
37
36#else /* !CONFIG_X86_32 */ 38#else /* !CONFIG_X86_32 */
37 39
38extern u64 efi_call0(void *fp); 40extern u64 efi_call0(void *fp);
@@ -82,6 +84,9 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
82 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ 84 efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
83 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) 85 (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
84 86
87extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
88 u32 type);
89
85#endif /* CONFIG_X86_32 */ 90#endif /* CONFIG_X86_32 */
86 91
87extern int add_efi_memmap; 92extern int add_efi_memmap;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 65ffd110a81b..303a0e48f076 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -135,7 +135,6 @@ static void __init e820_print_type(u32 type)
135 printk(KERN_CONT "(usable)"); 135 printk(KERN_CONT "(usable)");
136 break; 136 break;
137 case E820_RESERVED: 137 case E820_RESERVED:
138 case E820_RESERVED_EFI:
139 printk(KERN_CONT "(reserved)"); 138 printk(KERN_CONT "(reserved)");
140 break; 139 break;
141 case E820_ACPI: 140 case E820_ACPI:
@@ -784,7 +783,7 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
784/* 783/*
785 * Find the highest page frame number we have available 784 * Find the highest page frame number we have available
786 */ 785 */
787unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) 786static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
788{ 787{
789 int i; 788 int i;
790 unsigned long last_pfn = 0; 789 unsigned long last_pfn = 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9a9e40fb091c..cf0ef986cb6d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -691,8 +691,6 @@ early_param("reservelow", parse_reservelow);
691 691
692void __init setup_arch(char **cmdline_p) 692void __init setup_arch(char **cmdline_p)
693{ 693{
694 unsigned long end_pfn;
695
696#ifdef CONFIG_X86_32 694#ifdef CONFIG_X86_32
697 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 695 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
698 visws_early_detect(); 696 visws_early_detect();
@@ -934,24 +932,7 @@ void __init setup_arch(char **cmdline_p)
934 init_gbpages(); 932 init_gbpages();
935 933
936 /* max_pfn_mapped is updated here */ 934 /* max_pfn_mapped is updated here */
937 end_pfn = max_low_pfn; 935 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
938
939#ifdef CONFIG_X86_64
940 /*
941 * There may be regions after the last E820_RAM region that we
942 * want to include in the kernel direct mapping, such as
943 * EFI_RUNTIME_SERVICES_DATA.
944 */
945 if (efi_enabled) {
946 unsigned long efi_end;
947
948 efi_end = e820_end_pfn(MAXMEM>>PAGE_SHIFT, E820_RESERVED_EFI);
949 if (efi_end > max_low_pfn)
950 end_pfn = efi_end;
951 }
952#endif
953
954 max_low_pfn_mapped = init_memory_mapping(0, end_pfn << PAGE_SHIFT);
955 max_pfn_mapped = max_low_pfn_mapped; 936 max_pfn_mapped = max_low_pfn_mapped;
956 937
957#ifdef CONFIG_X86_64 938#ifdef CONFIG_X86_64
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index c9718a16be15..37718f0f053d 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -323,13 +323,10 @@ static void __init do_add_efi_memmap(void)
323 case EFI_UNUSABLE_MEMORY: 323 case EFI_UNUSABLE_MEMORY:
324 e820_type = E820_UNUSABLE; 324 e820_type = E820_UNUSABLE;
325 break; 325 break;
326 case EFI_RUNTIME_SERVICES_DATA:
327 e820_type = E820_RESERVED_EFI;
328 break;
329 default: 326 default:
330 /* 327 /*
331 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE 328 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
332 * EFI_MEMORY_MAPPED_IO 329 * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
333 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE 330 * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
334 */ 331 */
335 e820_type = E820_RESERVED; 332 e820_type = E820_RESERVED;
@@ -674,21 +671,10 @@ void __init efi_enter_virtual_mode(void)
674 end_pfn = PFN_UP(end); 671 end_pfn = PFN_UP(end);
675 if (end_pfn <= max_low_pfn_mapped 672 if (end_pfn <= max_low_pfn_mapped
676 || (end_pfn > (1UL << (32 - PAGE_SHIFT)) 673 || (end_pfn > (1UL << (32 - PAGE_SHIFT))
677 && end_pfn <= max_pfn_mapped)) { 674 && end_pfn <= max_pfn_mapped))
678 va = __va(md->phys_addr); 675 va = __va(md->phys_addr);
679 676 else
680 if (!(md->attribute & EFI_MEMORY_WB)) { 677 va = efi_ioremap(md->phys_addr, size, md->type);
681 addr = (u64) (unsigned long)va;
682 npages = md->num_pages;
683 memrange_efi_to_native(&addr, &npages);
684 set_memory_uc(addr, npages);
685 }
686 } else {
687 if (!(md->attribute & EFI_MEMORY_WB))
688 va = ioremap_nocache(md->phys_addr, size);
689 else
690 va = ioremap_cache(md->phys_addr, size);
691 }
692 678
693 md->virt_addr = (u64) (unsigned long) va; 679 md->virt_addr = (u64) (unsigned long) va;
694 680
@@ -698,6 +684,13 @@ void __init efi_enter_virtual_mode(void)
698 continue; 684 continue;
699 } 685 }
700 686
687 if (!(md->attribute & EFI_MEMORY_WB)) {
688 addr = md->virt_addr;
689 npages = md->num_pages;
690 memrange_efi_to_native(&addr, &npages);
691 set_memory_uc(addr, npages);
692 }
693
701 systab = (u64) (unsigned long) efi_phys.systab; 694 systab = (u64) (unsigned long) efi_phys.systab;
702 if (md->phys_addr <= systab && systab < end) { 695 if (md->phys_addr <= systab && systab < end) {
703 systab += md->virt_addr - md->phys_addr; 696 systab += md->virt_addr - md->phys_addr;
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index e36bf714cb77..40e446941dd7 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -39,43 +39,14 @@
39 */ 39 */
40 40
41static unsigned long efi_rt_eflags; 41static unsigned long efi_rt_eflags;
42static pgd_t efi_bak_pg_dir_pointer[2];
43 42
44void efi_call_phys_prelog(void) 43void efi_call_phys_prelog(void)
45{ 44{
46 unsigned long cr4;
47 unsigned long temp;
48 struct desc_ptr gdt_descr; 45 struct desc_ptr gdt_descr;
49 46
50 local_irq_save(efi_rt_eflags); 47 local_irq_save(efi_rt_eflags);
51 48
52 /* 49 load_cr3(initial_page_table);
53 * If I don't have PAE, I should just duplicate two entries in page
54 * directory. If I have PAE, I just need to duplicate one entry in
55 * page directory.
56 */
57 cr4 = read_cr4_safe();
58
59 if (cr4 & X86_CR4_PAE) {
60 efi_bak_pg_dir_pointer[0].pgd =
61 swapper_pg_dir[pgd_index(0)].pgd;
62 swapper_pg_dir[0].pgd =
63 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
64 } else {
65 efi_bak_pg_dir_pointer[0].pgd =
66 swapper_pg_dir[pgd_index(0)].pgd;
67 efi_bak_pg_dir_pointer[1].pgd =
68 swapper_pg_dir[pgd_index(0x400000)].pgd;
69 swapper_pg_dir[pgd_index(0)].pgd =
70 swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd;
71 temp = PAGE_OFFSET + 0x400000;
72 swapper_pg_dir[pgd_index(0x400000)].pgd =
73 swapper_pg_dir[pgd_index(temp)].pgd;
74 }
75
76 /*
77 * After the lock is released, the original page table is restored.
78 */
79 __flush_tlb_all(); 50 __flush_tlb_all();
80 51
81 gdt_descr.address = __pa(get_cpu_gdt_table(0)); 52 gdt_descr.address = __pa(get_cpu_gdt_table(0));
@@ -85,28 +56,13 @@ void efi_call_phys_prelog(void)
85 56
86void efi_call_phys_epilog(void) 57void efi_call_phys_epilog(void)
87{ 58{
88 unsigned long cr4;
89 struct desc_ptr gdt_descr; 59 struct desc_ptr gdt_descr;
90 60
91 gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); 61 gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
92 gdt_descr.size = GDT_SIZE - 1; 62 gdt_descr.size = GDT_SIZE - 1;
93 load_gdt(&gdt_descr); 63 load_gdt(&gdt_descr);
94 64
95 cr4 = read_cr4_safe(); 65 load_cr3(swapper_pg_dir);
96
97 if (cr4 & X86_CR4_PAE) {
98 swapper_pg_dir[pgd_index(0)].pgd =
99 efi_bak_pg_dir_pointer[0].pgd;
100 } else {
101 swapper_pg_dir[pgd_index(0)].pgd =
102 efi_bak_pg_dir_pointer[0].pgd;
103 swapper_pg_dir[pgd_index(0x400000)].pgd =
104 efi_bak_pg_dir_pointer[1].pgd;
105 }
106
107 /*
108 * After the lock is released, the original page table is restored.
109 */
110 __flush_tlb_all(); 66 __flush_tlb_all();
111 67
112 local_irq_restore(efi_rt_eflags); 68 local_irq_restore(efi_rt_eflags);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 312250c6b2de..ac3aa54e2654 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -80,3 +80,20 @@ void __init efi_call_phys_epilog(void)
80 local_irq_restore(efi_flags); 80 local_irq_restore(efi_flags);
81 early_code_mapping_set_exec(0); 81 early_code_mapping_set_exec(0);
82} 82}
83
84void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
85 u32 type)
86{
87 unsigned long last_map_pfn;
88
89 if (type == EFI_MEMORY_MAPPED_IO)
90 return ioremap(phys_addr, size);
91
92 last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
93 if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
94 unsigned long top = last_map_pfn << PAGE_SHIFT;
95 efi_ioremap(top, size - (top - phys_addr), type);
96 }
97
98 return (void __iomem *)__va(phys_addr);
99}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 65cc424359b0..148ab944378d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -183,10 +183,6 @@ static LIST_HEAD(rbd_client_list); /* clients */
183 183
184static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); 184static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
185static void rbd_dev_release(struct device *dev); 185static void rbd_dev_release(struct device *dev);
186static ssize_t rbd_snap_rollback(struct device *dev,
187 struct device_attribute *attr,
188 const char *buf,
189 size_t size);
190static ssize_t rbd_snap_add(struct device *dev, 186static ssize_t rbd_snap_add(struct device *dev,
191 struct device_attribute *attr, 187 struct device_attribute *attr,
192 const char *buf, 188 const char *buf,
@@ -461,6 +457,10 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
461 u32 snap_count = le32_to_cpu(ondisk->snap_count); 457 u32 snap_count = le32_to_cpu(ondisk->snap_count);
462 int ret = -ENOMEM; 458 int ret = -ENOMEM;
463 459
460 if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) {
461 return -ENXIO;
462 }
463
464 init_rwsem(&header->snap_rwsem); 464 init_rwsem(&header->snap_rwsem);
465 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); 465 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
466 header->snapc = kmalloc(sizeof(struct ceph_snap_context) + 466 header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
@@ -1356,32 +1356,6 @@ fail:
1356} 1356}
1357 1357
1358/* 1358/*
1359 * Request sync osd rollback
1360 */
1361static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
1362 u64 snapid,
1363 const char *obj)
1364{
1365 struct ceph_osd_req_op *ops;
1366 int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_ROLLBACK, 0);
1367 if (ret < 0)
1368 return ret;
1369
1370 ops[0].snap.snapid = snapid;
1371
1372 ret = rbd_req_sync_op(dev, NULL,
1373 CEPH_NOSNAP,
1374 0,
1375 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1376 ops,
1377 1, obj, 0, 0, NULL, NULL, NULL);
1378
1379 rbd_destroy_ops(ops);
1380
1381 return ret;
1382}
1383
1384/*
1385 * Request sync osd read 1359 * Request sync osd read
1386 */ 1360 */
1387static int rbd_req_sync_exec(struct rbd_device *dev, 1361static int rbd_req_sync_exec(struct rbd_device *dev,
@@ -1610,8 +1584,13 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
1610 goto out_dh; 1584 goto out_dh;
1611 1585
1612 rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); 1586 rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL);
1613 if (rc < 0) 1587 if (rc < 0) {
1588 if (rc == -ENXIO) {
1589 pr_warning("unrecognized header format"
1590 " for image %s", rbd_dev->obj);
1591 }
1614 goto out_dh; 1592 goto out_dh;
1593 }
1615 1594
1616 if (snap_count != header->total_snaps) { 1595 if (snap_count != header->total_snaps) {
1617 snap_count = header->total_snaps; 1596 snap_count = header->total_snaps;
@@ -1882,7 +1861,6 @@ static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
1882static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 1861static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
1883static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 1862static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
1884static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add); 1863static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
1885static DEVICE_ATTR(rollback_snap, S_IWUSR, NULL, rbd_snap_rollback);
1886 1864
1887static struct attribute *rbd_attrs[] = { 1865static struct attribute *rbd_attrs[] = {
1888 &dev_attr_size.attr, 1866 &dev_attr_size.attr,
@@ -1893,7 +1871,6 @@ static struct attribute *rbd_attrs[] = {
1893 &dev_attr_current_snap.attr, 1871 &dev_attr_current_snap.attr,
1894 &dev_attr_refresh.attr, 1872 &dev_attr_refresh.attr,
1895 &dev_attr_create_snap.attr, 1873 &dev_attr_create_snap.attr,
1896 &dev_attr_rollback_snap.attr,
1897 NULL 1874 NULL
1898}; 1875};
1899 1876
@@ -2424,64 +2401,6 @@ err_unlock:
2424 return ret; 2401 return ret;
2425} 2402}
2426 2403
2427static ssize_t rbd_snap_rollback(struct device *dev,
2428 struct device_attribute *attr,
2429 const char *buf,
2430 size_t count)
2431{
2432 struct rbd_device *rbd_dev = dev_to_rbd(dev);
2433 int ret;
2434 u64 snapid;
2435 u64 cur_ofs;
2436 char *seg_name = NULL;
2437 char *snap_name = kmalloc(count + 1, GFP_KERNEL);
2438 ret = -ENOMEM;
2439 if (!snap_name)
2440 return ret;
2441
2442 /* parse snaps add command */
2443 snprintf(snap_name, count, "%s", buf);
2444 seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
2445 if (!seg_name)
2446 goto done;
2447
2448 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2449
2450 ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
2451 if (ret < 0)
2452 goto done_unlock;
2453
2454 dout("snapid=%lld\n", snapid);
2455
2456 cur_ofs = 0;
2457 while (cur_ofs < rbd_dev->header.image_size) {
2458 cur_ofs += rbd_get_segment(&rbd_dev->header,
2459 rbd_dev->obj,
2460 cur_ofs, (u64)-1,
2461 seg_name, NULL);
2462 dout("seg_name=%s\n", seg_name);
2463
2464 ret = rbd_req_sync_rollback_obj(rbd_dev, snapid, seg_name);
2465 if (ret < 0)
2466 pr_warning("could not roll back obj %s err=%d\n",
2467 seg_name, ret);
2468 }
2469
2470 ret = __rbd_update_snaps(rbd_dev);
2471 if (ret < 0)
2472 goto done_unlock;
2473
2474 ret = count;
2475
2476done_unlock:
2477 mutex_unlock(&ctl_mutex);
2478done:
2479 kfree(seg_name);
2480 kfree(snap_name);
2481
2482 return ret;
2483}
2484
2485static struct bus_attribute rbd_bus_attrs[] = { 2404static struct bus_attribute rbd_bus_attrs[] = {
2486 __ATTR(add, S_IWUSR, NULL, rbd_add), 2405 __ATTR(add, S_IWUSR, NULL, rbd_add),
2487 __ATTR(remove, S_IWUSR, NULL, rbd_remove), 2406 __ATTR(remove, S_IWUSR, NULL, rbd_remove),
diff --git a/drivers/hwmon/jz4740-hwmon.c b/drivers/hwmon/jz4740-hwmon.c
index 7a48b1eb4233..5253d23361d9 100644
--- a/drivers/hwmon/jz4740-hwmon.c
+++ b/drivers/hwmon/jz4740-hwmon.c
@@ -59,7 +59,7 @@ static ssize_t jz4740_hwmon_read_adcin(struct device *dev,
59{ 59{
60 struct jz4740_hwmon *hwmon = dev_get_drvdata(dev); 60 struct jz4740_hwmon *hwmon = dev_get_drvdata(dev);
61 struct completion *completion = &hwmon->read_completion; 61 struct completion *completion = &hwmon->read_completion;
62 unsigned long t; 62 long t;
63 unsigned long val; 63 unsigned long val;
64 int ret; 64 int ret;
65 65
@@ -203,7 +203,7 @@ static int __devexit jz4740_hwmon_remove(struct platform_device *pdev)
203 return 0; 203 return 0;
204} 204}
205 205
206struct platform_driver jz4740_hwmon_driver = { 206static struct platform_driver jz4740_hwmon_driver = {
207 .probe = jz4740_hwmon_probe, 207 .probe = jz4740_hwmon_probe,
208 .remove = __devexit_p(jz4740_hwmon_remove), 208 .remove = __devexit_p(jz4740_hwmon_remove),
209 .driver = { 209 .driver = {
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index a1cb21f95302..1e0e27cbe987 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1606,6 +1606,14 @@ static const struct mmc_fixup blk_fixups[] =
1606 MMC_QUIRK_BLK_NO_CMD23), 1606 MMC_QUIRK_BLK_NO_CMD23),
1607 MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc, 1607 MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
1608 MMC_QUIRK_BLK_NO_CMD23), 1608 MMC_QUIRK_BLK_NO_CMD23),
1609
1610 /*
1611 * Some Micron MMC cards needs longer data read timeout than
1612 * indicated in CSD.
1613 */
1614 MMC_FIXUP(CID_NAME_ANY, 0x13, 0x200, add_quirk_mmc,
1615 MMC_QUIRK_LONG_READ_TIME),
1616
1609 END_FIXUP 1617 END_FIXUP
1610}; 1618};
1611 1619
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5278ffb20e74..950b97d7412a 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -529,6 +529,18 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
529 data->timeout_clks = 0; 529 data->timeout_clks = 0;
530 } 530 }
531 } 531 }
532
533 /*
534 * Some cards require longer data read timeout than indicated in CSD.
535 * Address this by setting the read timeout to a "reasonably high"
536 * value. For the cards tested, 300ms has proven enough. If necessary,
537 * this value can be increased if other problematic cards require this.
538 */
539 if (mmc_card_long_read_time(card) && data->flags & MMC_DATA_READ) {
540 data->timeout_ns = 300000000;
541 data->timeout_clks = 0;
542 }
543
532 /* 544 /*
533 * Some cards need very high timeouts if driven in SPI mode. 545 * Some cards need very high timeouts if driven in SPI mode.
534 * The worst observed timeout was 900ms after writing a 546 * The worst observed timeout was 900ms after writing a
@@ -1213,6 +1225,46 @@ void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type)
1213 mmc_host_clk_release(host); 1225 mmc_host_clk_release(host);
1214} 1226}
1215 1227
1228static void mmc_poweroff_notify(struct mmc_host *host)
1229{
1230 struct mmc_card *card;
1231 unsigned int timeout;
1232 unsigned int notify_type = EXT_CSD_NO_POWER_NOTIFICATION;
1233 int err = 0;
1234
1235 card = host->card;
1236
1237 /*
1238 * Send power notify command only if card
1239 * is mmc and notify state is powered ON
1240 */
1241 if (card && mmc_card_mmc(card) &&
1242 (card->poweroff_notify_state == MMC_POWERED_ON)) {
1243
1244 if (host->power_notify_type == MMC_HOST_PW_NOTIFY_SHORT) {
1245 notify_type = EXT_CSD_POWER_OFF_SHORT;
1246 timeout = card->ext_csd.generic_cmd6_time;
1247 card->poweroff_notify_state = MMC_POWEROFF_SHORT;
1248 } else {
1249 notify_type = EXT_CSD_POWER_OFF_LONG;
1250 timeout = card->ext_csd.power_off_longtime;
1251 card->poweroff_notify_state = MMC_POWEROFF_LONG;
1252 }
1253
1254 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
1255 EXT_CSD_POWER_OFF_NOTIFICATION,
1256 notify_type, timeout);
1257
1258 if (err && err != -EBADMSG)
1259 pr_err("Device failed to respond within %d poweroff "
1260 "time. Forcefully powering down the device\n",
1261 timeout);
1262
1263 /* Set the card state to no notification after the poweroff */
1264 card->poweroff_notify_state = MMC_NO_POWER_NOTIFICATION;
1265 }
1266}
1267
1216/* 1268/*
1217 * Apply power to the MMC stack. This is a two-stage process. 1269 * Apply power to the MMC stack. This is a two-stage process.
1218 * First, we enable power to the card without the clock running. 1270 * First, we enable power to the card without the clock running.
@@ -1269,42 +1321,12 @@ static void mmc_power_up(struct mmc_host *host)
1269 1321
1270void mmc_power_off(struct mmc_host *host) 1322void mmc_power_off(struct mmc_host *host)
1271{ 1323{
1272 struct mmc_card *card;
1273 unsigned int notify_type;
1274 unsigned int timeout;
1275 int err;
1276
1277 mmc_host_clk_hold(host); 1324 mmc_host_clk_hold(host);
1278 1325
1279 card = host->card;
1280 host->ios.clock = 0; 1326 host->ios.clock = 0;
1281 host->ios.vdd = 0; 1327 host->ios.vdd = 0;
1282 1328
1283 if (card && mmc_card_mmc(card) && 1329 mmc_poweroff_notify(host);
1284 (card->poweroff_notify_state == MMC_POWERED_ON)) {
1285
1286 if (host->power_notify_type == MMC_HOST_PW_NOTIFY_SHORT) {
1287 notify_type = EXT_CSD_POWER_OFF_SHORT;
1288 timeout = card->ext_csd.generic_cmd6_time;
1289 card->poweroff_notify_state = MMC_POWEROFF_SHORT;
1290 } else {
1291 notify_type = EXT_CSD_POWER_OFF_LONG;
1292 timeout = card->ext_csd.power_off_longtime;
1293 card->poweroff_notify_state = MMC_POWEROFF_LONG;
1294 }
1295
1296 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
1297 EXT_CSD_POWER_OFF_NOTIFICATION,
1298 notify_type, timeout);
1299
1300 if (err && err != -EBADMSG)
1301 pr_err("Device failed to respond within %d poweroff "
1302 "time. Forcefully powering down the device\n",
1303 timeout);
1304
1305 /* Set the card state to no notification after the poweroff */
1306 card->poweroff_notify_state = MMC_NO_POWER_NOTIFICATION;
1307 }
1308 1330
1309 /* 1331 /*
1310 * Reset ocr mask to be the highest possible voltage supported for 1332 * Reset ocr mask to be the highest possible voltage supported for
@@ -2196,7 +2218,7 @@ int mmc_card_sleep(struct mmc_host *host)
2196 2218
2197 mmc_bus_get(host); 2219 mmc_bus_get(host);
2198 2220
2199 if (host->bus_ops && !host->bus_dead && host->bus_ops->awake) 2221 if (host->bus_ops && !host->bus_dead && host->bus_ops->sleep)
2200 err = host->bus_ops->sleep(host); 2222 err = host->bus_ops->sleep(host);
2201 2223
2202 mmc_bus_put(host); 2224 mmc_bus_put(host);
@@ -2302,8 +2324,17 @@ int mmc_suspend_host(struct mmc_host *host)
2302 * pre-claim the host. 2324 * pre-claim the host.
2303 */ 2325 */
2304 if (mmc_try_claim_host(host)) { 2326 if (mmc_try_claim_host(host)) {
2305 if (host->bus_ops->suspend) 2327 if (host->bus_ops->suspend) {
2328 /*
2329 * For eMMC 4.5 device send notify command
2330 * before sleep, because in sleep state eMMC 4.5
2331 * devices respond to only RESET and AWAKE cmd
2332 */
2333 mmc_poweroff_notify(host);
2306 err = host->bus_ops->suspend(host); 2334 err = host->bus_ops->suspend(host);
2335 }
2336 mmc_do_release_host(host);
2337
2307 if (err == -ENOSYS || !host->bus_ops->resume) { 2338 if (err == -ENOSYS || !host->bus_ops->resume) {
2308 /* 2339 /*
2309 * We simply "remove" the card in this case. 2340 * We simply "remove" the card in this case.
@@ -2318,7 +2349,6 @@ int mmc_suspend_host(struct mmc_host *host)
2318 host->pm_flags = 0; 2349 host->pm_flags = 0;
2319 err = 0; 2350 err = 0;
2320 } 2351 }
2321 mmc_do_release_host(host);
2322 } else { 2352 } else {
2323 err = -EBUSY; 2353 err = -EBUSY;
2324 } 2354 }
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index dbf421a6279c..d240427c1246 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -876,17 +876,21 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
876 * set the notification byte in the ext_csd register of device 876 * set the notification byte in the ext_csd register of device
877 */ 877 */
878 if ((host->caps2 & MMC_CAP2_POWEROFF_NOTIFY) && 878 if ((host->caps2 & MMC_CAP2_POWEROFF_NOTIFY) &&
879 (card->poweroff_notify_state == MMC_NO_POWER_NOTIFICATION)) { 879 (card->ext_csd.rev >= 6)) {
880 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, 880 err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
881 EXT_CSD_POWER_OFF_NOTIFICATION, 881 EXT_CSD_POWER_OFF_NOTIFICATION,
882 EXT_CSD_POWER_ON, 882 EXT_CSD_POWER_ON,
883 card->ext_csd.generic_cmd6_time); 883 card->ext_csd.generic_cmd6_time);
884 if (err && err != -EBADMSG) 884 if (err && err != -EBADMSG)
885 goto free_card; 885 goto free_card;
886 }
887 886
888 if (!err) 887 /*
889 card->poweroff_notify_state = MMC_POWERED_ON; 888 * The err can be -EBADMSG or 0,
889 * so check for success and update the flag
890 */
891 if (!err)
892 card->poweroff_notify_state = MMC_POWERED_ON;
893 }
890 894
891 /* 895 /*
892 * Activate high speed (if supported) 896 * Activate high speed (if supported)
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 325ea61e12d3..8e0fbe994047 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -732,6 +732,7 @@ static void mxcmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
732 "failed to config DMA channel. Falling back to PIO\n"); 732 "failed to config DMA channel. Falling back to PIO\n");
733 dma_release_channel(host->dma); 733 dma_release_channel(host->dma);
734 host->do_dma = 0; 734 host->do_dma = 0;
735 host->dma = NULL;
735 } 736 }
736 } 737 }
737 738
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 101cd31c8220..d5fe43d53c51 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1010,6 +1010,7 @@ static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno)
1010 host->data->sg_len, 1010 host->data->sg_len,
1011 omap_hsmmc_get_dma_dir(host, host->data)); 1011 omap_hsmmc_get_dma_dir(host, host->data));
1012 omap_free_dma(dma_ch); 1012 omap_free_dma(dma_ch);
1013 host->data->host_cookie = 0;
1013 } 1014 }
1014 host->data = NULL; 1015 host->data = NULL;
1015} 1016}
@@ -1575,8 +1576,10 @@ static void omap_hsmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
1575 struct mmc_data *data = mrq->data; 1576 struct mmc_data *data = mrq->data;
1576 1577
1577 if (host->use_dma) { 1578 if (host->use_dma) {
1578 dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, 1579 if (data->host_cookie)
1579 omap_hsmmc_get_dma_dir(host, data)); 1580 dma_unmap_sg(mmc_dev(host->mmc), data->sg,
1581 data->sg_len,
1582 omap_hsmmc_get_dma_dir(host, data));
1580 data->host_cookie = 0; 1583 data->host_cookie = 0;
1581 } 1584 }
1582} 1585}
diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index 4b920b7621cf..87b6f079b6e0 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c
@@ -15,6 +15,7 @@
15#include <linux/delay.h> 15#include <linux/delay.h>
16#include <linux/device.h> 16#include <linux/device.h>
17#include <linux/mmc/host.h> 17#include <linux/mmc/host.h>
18#include <linux/module.h>
18#include <mach/cns3xxx.h> 19#include <mach/cns3xxx.h>
19#include "sdhci-pltfm.h" 20#include "sdhci-pltfm.h"
20 21
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 3d00e722efc9..cb60c4197e0a 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -644,8 +644,6 @@ static int sdhci_s3c_resume(struct platform_device *dev)
644static struct platform_driver sdhci_s3c_driver = { 644static struct platform_driver sdhci_s3c_driver = {
645 .probe = sdhci_s3c_probe, 645 .probe = sdhci_s3c_probe,
646 .remove = __devexit_p(sdhci_s3c_remove), 646 .remove = __devexit_p(sdhci_s3c_remove),
647 .suspend = sdhci_s3c_suspend,
648 .resume = sdhci_s3c_resume,
649 .driver = { 647 .driver = {
650 .owner = THIS_MODULE, 648 .owner = THIS_MODULE,
651 .name = "s3c-sdhci", 649 .name = "s3c-sdhci",
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 369366c8e205..d5505f3fe2a1 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -908,7 +908,7 @@ static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
908 if (host->power) { 908 if (host->power) {
909 pm_runtime_put(&host->pd->dev); 909 pm_runtime_put(&host->pd->dev);
910 host->power = false; 910 host->power = false;
911 if (p->down_pwr) 911 if (p->down_pwr && ios->power_mode == MMC_POWER_OFF)
912 p->down_pwr(host->pd); 912 p->down_pwr(host->pd);
913 } 913 }
914 host->state = STATE_IDLE; 914 host->state = STATE_IDLE;
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index d85a60cda167..4208b3958069 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -798,7 +798,7 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
798 /* start bus clock */ 798 /* start bus clock */
799 tmio_mmc_clk_start(host); 799 tmio_mmc_clk_start(host);
800 } else if (ios->power_mode != MMC_POWER_UP) { 800 } else if (ios->power_mode != MMC_POWER_UP) {
801 if (host->set_pwr) 801 if (host->set_pwr && ios->power_mode == MMC_POWER_OFF)
802 host->set_pwr(host->pdev, 0); 802 host->set_pwr(host->pdev, 0);
803 if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && 803 if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
804 pdata->power) { 804 pdata->power) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4144caf2f9d3..173b1d22e59b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
87 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); 87 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
88 88
89 /* dirty the head */ 89 /* dirty the head */
90 spin_lock(&inode->i_lock); 90 spin_lock(&ci->i_ceph_lock);
91 if (ci->i_head_snapc == NULL) 91 if (ci->i_head_snapc == NULL)
92 ci->i_head_snapc = ceph_get_snap_context(snapc); 92 ci->i_head_snapc = ceph_get_snap_context(snapc);
93 ++ci->i_wrbuffer_ref_head; 93 ++ci->i_wrbuffer_ref_head;
@@ -100,7 +100,7 @@ static int ceph_set_page_dirty(struct page *page)
100 ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, 100 ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
101 ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, 101 ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
102 snapc, snapc->seq, snapc->num_snaps); 102 snapc, snapc->seq, snapc->num_snaps);
103 spin_unlock(&inode->i_lock); 103 spin_unlock(&ci->i_ceph_lock);
104 104
105 /* now adjust page */ 105 /* now adjust page */
106 spin_lock_irq(&mapping->tree_lock); 106 spin_lock_irq(&mapping->tree_lock);
@@ -391,7 +391,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
391 struct ceph_snap_context *snapc = NULL; 391 struct ceph_snap_context *snapc = NULL;
392 struct ceph_cap_snap *capsnap = NULL; 392 struct ceph_cap_snap *capsnap = NULL;
393 393
394 spin_lock(&inode->i_lock); 394 spin_lock(&ci->i_ceph_lock);
395 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 395 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
396 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 396 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
397 capsnap->context, capsnap->dirty_pages); 397 capsnap->context, capsnap->dirty_pages);
@@ -407,7 +407,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
407 dout(" head snapc %p has %d dirty pages\n", 407 dout(" head snapc %p has %d dirty pages\n",
408 snapc, ci->i_wrbuffer_ref_head); 408 snapc, ci->i_wrbuffer_ref_head);
409 } 409 }
410 spin_unlock(&inode->i_lock); 410 spin_unlock(&ci->i_ceph_lock);
411 return snapc; 411 return snapc;
412} 412}
413 413
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0f327c6c9679..8b53193e4f7c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -309,7 +309,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
309/* 309/*
310 * Find ceph_cap for given mds, if any. 310 * Find ceph_cap for given mds, if any.
311 * 311 *
312 * Called with i_lock held. 312 * Called with i_ceph_lock held.
313 */ 313 */
314static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) 314static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
315{ 315{
@@ -332,9 +332,9 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
332{ 332{
333 struct ceph_cap *cap; 333 struct ceph_cap *cap;
334 334
335 spin_lock(&ci->vfs_inode.i_lock); 335 spin_lock(&ci->i_ceph_lock);
336 cap = __get_cap_for_mds(ci, mds); 336 cap = __get_cap_for_mds(ci, mds);
337 spin_unlock(&ci->vfs_inode.i_lock); 337 spin_unlock(&ci->i_ceph_lock);
338 return cap; 338 return cap;
339} 339}
340 340
@@ -361,15 +361,16 @@ static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
361 361
362int ceph_get_cap_mds(struct inode *inode) 362int ceph_get_cap_mds(struct inode *inode)
363{ 363{
364 struct ceph_inode_info *ci = ceph_inode(inode);
364 int mds; 365 int mds;
365 spin_lock(&inode->i_lock); 366 spin_lock(&ci->i_ceph_lock);
366 mds = __ceph_get_cap_mds(ceph_inode(inode)); 367 mds = __ceph_get_cap_mds(ceph_inode(inode));
367 spin_unlock(&inode->i_lock); 368 spin_unlock(&ci->i_ceph_lock);
368 return mds; 369 return mds;
369} 370}
370 371
371/* 372/*
372 * Called under i_lock. 373 * Called under i_ceph_lock.
373 */ 374 */
374static void __insert_cap_node(struct ceph_inode_info *ci, 375static void __insert_cap_node(struct ceph_inode_info *ci,
375 struct ceph_cap *new) 376 struct ceph_cap *new)
@@ -415,7 +416,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
415 * 416 *
416 * If I_FLUSH is set, leave the inode at the front of the list. 417 * If I_FLUSH is set, leave the inode at the front of the list.
417 * 418 *
418 * Caller holds i_lock 419 * Caller holds i_ceph_lock
419 * -> we take mdsc->cap_delay_lock 420 * -> we take mdsc->cap_delay_lock
420 */ 421 */
421static void __cap_delay_requeue(struct ceph_mds_client *mdsc, 422static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
@@ -457,7 +458,7 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
457/* 458/*
458 * Cancel delayed work on cap. 459 * Cancel delayed work on cap.
459 * 460 *
460 * Caller must hold i_lock. 461 * Caller must hold i_ceph_lock.
461 */ 462 */
462static void __cap_delay_cancel(struct ceph_mds_client *mdsc, 463static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
463 struct ceph_inode_info *ci) 464 struct ceph_inode_info *ci)
@@ -532,14 +533,14 @@ int ceph_add_cap(struct inode *inode,
532 wanted |= ceph_caps_for_mode(fmode); 533 wanted |= ceph_caps_for_mode(fmode);
533 534
534retry: 535retry:
535 spin_lock(&inode->i_lock); 536 spin_lock(&ci->i_ceph_lock);
536 cap = __get_cap_for_mds(ci, mds); 537 cap = __get_cap_for_mds(ci, mds);
537 if (!cap) { 538 if (!cap) {
538 if (new_cap) { 539 if (new_cap) {
539 cap = new_cap; 540 cap = new_cap;
540 new_cap = NULL; 541 new_cap = NULL;
541 } else { 542 } else {
542 spin_unlock(&inode->i_lock); 543 spin_unlock(&ci->i_ceph_lock);
543 new_cap = get_cap(mdsc, caps_reservation); 544 new_cap = get_cap(mdsc, caps_reservation);
544 if (new_cap == NULL) 545 if (new_cap == NULL)
545 return -ENOMEM; 546 return -ENOMEM;
@@ -625,7 +626,7 @@ retry:
625 626
626 if (fmode >= 0) 627 if (fmode >= 0)
627 __ceph_get_fmode(ci, fmode); 628 __ceph_get_fmode(ci, fmode);
628 spin_unlock(&inode->i_lock); 629 spin_unlock(&ci->i_ceph_lock);
629 wake_up_all(&ci->i_cap_wq); 630 wake_up_all(&ci->i_cap_wq);
630 return 0; 631 return 0;
631} 632}
@@ -792,7 +793,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
792 struct rb_node *p; 793 struct rb_node *p;
793 int ret = 0; 794 int ret = 0;
794 795
795 spin_lock(&inode->i_lock); 796 spin_lock(&ci->i_ceph_lock);
796 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 797 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
797 cap = rb_entry(p, struct ceph_cap, ci_node); 798 cap = rb_entry(p, struct ceph_cap, ci_node);
798 if (__cap_is_valid(cap) && 799 if (__cap_is_valid(cap) &&
@@ -801,7 +802,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
801 break; 802 break;
802 } 803 }
803 } 804 }
804 spin_unlock(&inode->i_lock); 805 spin_unlock(&ci->i_ceph_lock);
805 dout("ceph_caps_revoking %p %s = %d\n", inode, 806 dout("ceph_caps_revoking %p %s = %d\n", inode,
806 ceph_cap_string(mask), ret); 807 ceph_cap_string(mask), ret);
807 return ret; 808 return ret;
@@ -855,7 +856,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
855} 856}
856 857
857/* 858/*
858 * called under i_lock 859 * called under i_ceph_lock
859 */ 860 */
860static int __ceph_is_any_caps(struct ceph_inode_info *ci) 861static int __ceph_is_any_caps(struct ceph_inode_info *ci)
861{ 862{
@@ -865,7 +866,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
865/* 866/*
866 * Remove a cap. Take steps to deal with a racing iterate_session_caps. 867 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
867 * 868 *
868 * caller should hold i_lock. 869 * caller should hold i_ceph_lock.
869 * caller will not hold session s_mutex if called from destroy_inode. 870 * caller will not hold session s_mutex if called from destroy_inode.
870 */ 871 */
871void __ceph_remove_cap(struct ceph_cap *cap) 872void __ceph_remove_cap(struct ceph_cap *cap)
@@ -1028,7 +1029,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,
1028 1029
1029/* 1030/*
1030 * Queue cap releases when an inode is dropped from our cache. Since 1031 * Queue cap releases when an inode is dropped from our cache. Since
1031 * inode is about to be destroyed, there is no need for i_lock. 1032 * inode is about to be destroyed, there is no need for i_ceph_lock.
1032 */ 1033 */
1033void ceph_queue_caps_release(struct inode *inode) 1034void ceph_queue_caps_release(struct inode *inode)
1034{ 1035{
@@ -1049,7 +1050,7 @@ void ceph_queue_caps_release(struct inode *inode)
1049 1050
1050/* 1051/*
1051 * Send a cap msg on the given inode. Update our caps state, then 1052 * Send a cap msg on the given inode. Update our caps state, then
1052 * drop i_lock and send the message. 1053 * drop i_ceph_lock and send the message.
1053 * 1054 *
1054 * Make note of max_size reported/requested from mds, revoked caps 1055 * Make note of max_size reported/requested from mds, revoked caps
1055 * that have now been implemented. 1056 * that have now been implemented.
@@ -1061,13 +1062,13 @@ void ceph_queue_caps_release(struct inode *inode)
1061 * Return non-zero if delayed release, or we experienced an error 1062 * Return non-zero if delayed release, or we experienced an error
1062 * such that the caller should requeue + retry later. 1063 * such that the caller should requeue + retry later.
1063 * 1064 *
1064 * called with i_lock, then drops it. 1065 * called with i_ceph_lock, then drops it.
1065 * caller should hold snap_rwsem (read), s_mutex. 1066 * caller should hold snap_rwsem (read), s_mutex.
1066 */ 1067 */
1067static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, 1068static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1068 int op, int used, int want, int retain, int flushing, 1069 int op, int used, int want, int retain, int flushing,
1069 unsigned *pflush_tid) 1070 unsigned *pflush_tid)
1070 __releases(cap->ci->vfs_inode->i_lock) 1071 __releases(cap->ci->i_ceph_lock)
1071{ 1072{
1072 struct ceph_inode_info *ci = cap->ci; 1073 struct ceph_inode_info *ci = cap->ci;
1073 struct inode *inode = &ci->vfs_inode; 1074 struct inode *inode = &ci->vfs_inode;
@@ -1170,7 +1171,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1170 xattr_version = ci->i_xattrs.version; 1171 xattr_version = ci->i_xattrs.version;
1171 } 1172 }
1172 1173
1173 spin_unlock(&inode->i_lock); 1174 spin_unlock(&ci->i_ceph_lock);
1174 1175
1175 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1176 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
1176 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, 1177 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
@@ -1198,13 +1199,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1198 * Unless @again is true, skip cap_snaps that were already sent to 1199 * Unless @again is true, skip cap_snaps that were already sent to
1199 * the MDS (i.e., during this session). 1200 * the MDS (i.e., during this session).
1200 * 1201 *
1201 * Called under i_lock. Takes s_mutex as needed. 1202 * Called under i_ceph_lock. Takes s_mutex as needed.
1202 */ 1203 */
1203void __ceph_flush_snaps(struct ceph_inode_info *ci, 1204void __ceph_flush_snaps(struct ceph_inode_info *ci,
1204 struct ceph_mds_session **psession, 1205 struct ceph_mds_session **psession,
1205 int again) 1206 int again)
1206 __releases(ci->vfs_inode->i_lock) 1207 __releases(ci->i_ceph_lock)
1207 __acquires(ci->vfs_inode->i_lock) 1208 __acquires(ci->i_ceph_lock)
1208{ 1209{
1209 struct inode *inode = &ci->vfs_inode; 1210 struct inode *inode = &ci->vfs_inode;
1210 int mds; 1211 int mds;
@@ -1261,7 +1262,7 @@ retry:
1261 session = NULL; 1262 session = NULL;
1262 } 1263 }
1263 if (!session) { 1264 if (!session) {
1264 spin_unlock(&inode->i_lock); 1265 spin_unlock(&ci->i_ceph_lock);
1265 mutex_lock(&mdsc->mutex); 1266 mutex_lock(&mdsc->mutex);
1266 session = __ceph_lookup_mds_session(mdsc, mds); 1267 session = __ceph_lookup_mds_session(mdsc, mds);
1267 mutex_unlock(&mdsc->mutex); 1268 mutex_unlock(&mdsc->mutex);
@@ -1275,7 +1276,7 @@ retry:
1275 * deletion or migration. retry, and we'll 1276 * deletion or migration. retry, and we'll
1276 * get a better @mds value next time. 1277 * get a better @mds value next time.
1277 */ 1278 */
1278 spin_lock(&inode->i_lock); 1279 spin_lock(&ci->i_ceph_lock);
1279 goto retry; 1280 goto retry;
1280 } 1281 }
1281 1282
@@ -1285,7 +1286,7 @@ retry:
1285 list_del_init(&capsnap->flushing_item); 1286 list_del_init(&capsnap->flushing_item);
1286 list_add_tail(&capsnap->flushing_item, 1287 list_add_tail(&capsnap->flushing_item,
1287 &session->s_cap_snaps_flushing); 1288 &session->s_cap_snaps_flushing);
1288 spin_unlock(&inode->i_lock); 1289 spin_unlock(&ci->i_ceph_lock);
1289 1290
1290 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", 1291 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1291 inode, capsnap, capsnap->follows, capsnap->flush_tid); 1292 inode, capsnap, capsnap->follows, capsnap->flush_tid);
@@ -1302,7 +1303,7 @@ retry:
1302 next_follows = capsnap->follows + 1; 1303 next_follows = capsnap->follows + 1;
1303 ceph_put_cap_snap(capsnap); 1304 ceph_put_cap_snap(capsnap);
1304 1305
1305 spin_lock(&inode->i_lock); 1306 spin_lock(&ci->i_ceph_lock);
1306 goto retry; 1307 goto retry;
1307 } 1308 }
1308 1309
@@ -1322,11 +1323,9 @@ out:
1322 1323
1323static void ceph_flush_snaps(struct ceph_inode_info *ci) 1324static void ceph_flush_snaps(struct ceph_inode_info *ci)
1324{ 1325{
1325 struct inode *inode = &ci->vfs_inode; 1326 spin_lock(&ci->i_ceph_lock);
1326
1327 spin_lock(&inode->i_lock);
1328 __ceph_flush_snaps(ci, NULL, 0); 1327 __ceph_flush_snaps(ci, NULL, 0);
1329 spin_unlock(&inode->i_lock); 1328 spin_unlock(&ci->i_ceph_lock);
1330} 1329}
1331 1330
1332/* 1331/*
@@ -1373,7 +1372,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1373 * Add dirty inode to the flushing list. Assigned a seq number so we 1372 * Add dirty inode to the flushing list. Assigned a seq number so we
1374 * can wait for caps to flush without starving. 1373 * can wait for caps to flush without starving.
1375 * 1374 *
1376 * Called under i_lock. 1375 * Called under i_ceph_lock.
1377 */ 1376 */
1378static int __mark_caps_flushing(struct inode *inode, 1377static int __mark_caps_flushing(struct inode *inode,
1379 struct ceph_mds_session *session) 1378 struct ceph_mds_session *session)
@@ -1421,9 +1420,9 @@ static int try_nonblocking_invalidate(struct inode *inode)
1421 struct ceph_inode_info *ci = ceph_inode(inode); 1420 struct ceph_inode_info *ci = ceph_inode(inode);
1422 u32 invalidating_gen = ci->i_rdcache_gen; 1421 u32 invalidating_gen = ci->i_rdcache_gen;
1423 1422
1424 spin_unlock(&inode->i_lock); 1423 spin_unlock(&ci->i_ceph_lock);
1425 invalidate_mapping_pages(&inode->i_data, 0, -1); 1424 invalidate_mapping_pages(&inode->i_data, 0, -1);
1426 spin_lock(&inode->i_lock); 1425 spin_lock(&ci->i_ceph_lock);
1427 1426
1428 if (inode->i_data.nrpages == 0 && 1427 if (inode->i_data.nrpages == 0 &&
1429 invalidating_gen == ci->i_rdcache_gen) { 1428 invalidating_gen == ci->i_rdcache_gen) {
@@ -1470,7 +1469,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1470 if (mdsc->stopping) 1469 if (mdsc->stopping)
1471 is_delayed = 1; 1470 is_delayed = 1;
1472 1471
1473 spin_lock(&inode->i_lock); 1472 spin_lock(&ci->i_ceph_lock);
1474 1473
1475 if (ci->i_ceph_flags & CEPH_I_FLUSH) 1474 if (ci->i_ceph_flags & CEPH_I_FLUSH)
1476 flags |= CHECK_CAPS_FLUSH; 1475 flags |= CHECK_CAPS_FLUSH;
@@ -1480,7 +1479,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1480 __ceph_flush_snaps(ci, &session, 0); 1479 __ceph_flush_snaps(ci, &session, 0);
1481 goto retry_locked; 1480 goto retry_locked;
1482retry: 1481retry:
1483 spin_lock(&inode->i_lock); 1482 spin_lock(&ci->i_ceph_lock);
1484retry_locked: 1483retry_locked:
1485 file_wanted = __ceph_caps_file_wanted(ci); 1484 file_wanted = __ceph_caps_file_wanted(ci);
1486 used = __ceph_caps_used(ci); 1485 used = __ceph_caps_used(ci);
@@ -1634,7 +1633,7 @@ ack:
1634 if (mutex_trylock(&session->s_mutex) == 0) { 1633 if (mutex_trylock(&session->s_mutex) == 0) {
1635 dout("inverting session/ino locks on %p\n", 1634 dout("inverting session/ino locks on %p\n",
1636 session); 1635 session);
1637 spin_unlock(&inode->i_lock); 1636 spin_unlock(&ci->i_ceph_lock);
1638 if (took_snap_rwsem) { 1637 if (took_snap_rwsem) {
1639 up_read(&mdsc->snap_rwsem); 1638 up_read(&mdsc->snap_rwsem);
1640 took_snap_rwsem = 0; 1639 took_snap_rwsem = 0;
@@ -1648,7 +1647,7 @@ ack:
1648 if (down_read_trylock(&mdsc->snap_rwsem) == 0) { 1647 if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
1649 dout("inverting snap/in locks on %p\n", 1648 dout("inverting snap/in locks on %p\n",
1650 inode); 1649 inode);
1651 spin_unlock(&inode->i_lock); 1650 spin_unlock(&ci->i_ceph_lock);
1652 down_read(&mdsc->snap_rwsem); 1651 down_read(&mdsc->snap_rwsem);
1653 took_snap_rwsem = 1; 1652 took_snap_rwsem = 1;
1654 goto retry; 1653 goto retry;
@@ -1664,10 +1663,10 @@ ack:
1664 mds = cap->mds; /* remember mds, so we don't repeat */ 1663 mds = cap->mds; /* remember mds, so we don't repeat */
1665 sent++; 1664 sent++;
1666 1665
1667 /* __send_cap drops i_lock */ 1666 /* __send_cap drops i_ceph_lock */
1668 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, 1667 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
1669 retain, flushing, NULL); 1668 retain, flushing, NULL);
1670 goto retry; /* retake i_lock and restart our cap scan. */ 1669 goto retry; /* retake i_ceph_lock and restart our cap scan. */
1671 } 1670 }
1672 1671
1673 /* 1672 /*
@@ -1681,7 +1680,7 @@ ack:
1681 else if (!is_delayed || force_requeue) 1680 else if (!is_delayed || force_requeue)
1682 __cap_delay_requeue(mdsc, ci); 1681 __cap_delay_requeue(mdsc, ci);
1683 1682
1684 spin_unlock(&inode->i_lock); 1683 spin_unlock(&ci->i_ceph_lock);
1685 1684
1686 if (queue_invalidate) 1685 if (queue_invalidate)
1687 ceph_queue_invalidate(inode); 1686 ceph_queue_invalidate(inode);
@@ -1704,7 +1703,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
1704 int flushing = 0; 1703 int flushing = 0;
1705 1704
1706retry: 1705retry:
1707 spin_lock(&inode->i_lock); 1706 spin_lock(&ci->i_ceph_lock);
1708 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { 1707 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
1709 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); 1708 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
1710 goto out; 1709 goto out;
@@ -1716,7 +1715,7 @@ retry:
1716 int delayed; 1715 int delayed;
1717 1716
1718 if (!session) { 1717 if (!session) {
1719 spin_unlock(&inode->i_lock); 1718 spin_unlock(&ci->i_ceph_lock);
1720 session = cap->session; 1719 session = cap->session;
1721 mutex_lock(&session->s_mutex); 1720 mutex_lock(&session->s_mutex);
1722 goto retry; 1721 goto retry;
@@ -1727,18 +1726,18 @@ retry:
1727 1726
1728 flushing = __mark_caps_flushing(inode, session); 1727 flushing = __mark_caps_flushing(inode, session);
1729 1728
1730 /* __send_cap drops i_lock */ 1729 /* __send_cap drops i_ceph_lock */
1731 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, 1730 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
1732 cap->issued | cap->implemented, flushing, 1731 cap->issued | cap->implemented, flushing,
1733 flush_tid); 1732 flush_tid);
1734 if (!delayed) 1733 if (!delayed)
1735 goto out_unlocked; 1734 goto out_unlocked;
1736 1735
1737 spin_lock(&inode->i_lock); 1736 spin_lock(&ci->i_ceph_lock);
1738 __cap_delay_requeue(mdsc, ci); 1737 __cap_delay_requeue(mdsc, ci);
1739 } 1738 }
1740out: 1739out:
1741 spin_unlock(&inode->i_lock); 1740 spin_unlock(&ci->i_ceph_lock);
1742out_unlocked: 1741out_unlocked:
1743 if (session && unlock_session) 1742 if (session && unlock_session)
1744 mutex_unlock(&session->s_mutex); 1743 mutex_unlock(&session->s_mutex);
@@ -1753,7 +1752,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
1753 struct ceph_inode_info *ci = ceph_inode(inode); 1752 struct ceph_inode_info *ci = ceph_inode(inode);
1754 int i, ret = 1; 1753 int i, ret = 1;
1755 1754
1756 spin_lock(&inode->i_lock); 1755 spin_lock(&ci->i_ceph_lock);
1757 for (i = 0; i < CEPH_CAP_BITS; i++) 1756 for (i = 0; i < CEPH_CAP_BITS; i++)
1758 if ((ci->i_flushing_caps & (1 << i)) && 1757 if ((ci->i_flushing_caps & (1 << i)) &&
1759 ci->i_cap_flush_tid[i] <= tid) { 1758 ci->i_cap_flush_tid[i] <= tid) {
@@ -1761,7 +1760,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
1761 ret = 0; 1760 ret = 0;
1762 break; 1761 break;
1763 } 1762 }
1764 spin_unlock(&inode->i_lock); 1763 spin_unlock(&ci->i_ceph_lock);
1765 return ret; 1764 return ret;
1766} 1765}
1767 1766
@@ -1868,10 +1867,10 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
1868 struct ceph_mds_client *mdsc = 1867 struct ceph_mds_client *mdsc =
1869 ceph_sb_to_client(inode->i_sb)->mdsc; 1868 ceph_sb_to_client(inode->i_sb)->mdsc;
1870 1869
1871 spin_lock(&inode->i_lock); 1870 spin_lock(&ci->i_ceph_lock);
1872 if (__ceph_caps_dirty(ci)) 1871 if (__ceph_caps_dirty(ci))
1873 __cap_delay_requeue_front(mdsc, ci); 1872 __cap_delay_requeue_front(mdsc, ci);
1874 spin_unlock(&inode->i_lock); 1873 spin_unlock(&ci->i_ceph_lock);
1875 } 1874 }
1876 return err; 1875 return err;
1877} 1876}
@@ -1894,7 +1893,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1894 struct inode *inode = &ci->vfs_inode; 1893 struct inode *inode = &ci->vfs_inode;
1895 struct ceph_cap *cap; 1894 struct ceph_cap *cap;
1896 1895
1897 spin_lock(&inode->i_lock); 1896 spin_lock(&ci->i_ceph_lock);
1898 cap = ci->i_auth_cap; 1897 cap = ci->i_auth_cap;
1899 if (cap && cap->session == session) { 1898 if (cap && cap->session == session) {
1900 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1899 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
@@ -1904,7 +1903,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1904 pr_err("%p auth cap %p not mds%d ???\n", inode, 1903 pr_err("%p auth cap %p not mds%d ???\n", inode,
1905 cap, session->s_mds); 1904 cap, session->s_mds);
1906 } 1905 }
1907 spin_unlock(&inode->i_lock); 1906 spin_unlock(&ci->i_ceph_lock);
1908 } 1907 }
1909} 1908}
1910 1909
@@ -1921,7 +1920,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1921 struct ceph_cap *cap; 1920 struct ceph_cap *cap;
1922 int delayed = 0; 1921 int delayed = 0;
1923 1922
1924 spin_lock(&inode->i_lock); 1923 spin_lock(&ci->i_ceph_lock);
1925 cap = ci->i_auth_cap; 1924 cap = ci->i_auth_cap;
1926 if (cap && cap->session == session) { 1925 if (cap && cap->session == session) {
1927 dout("kick_flushing_caps %p cap %p %s\n", inode, 1926 dout("kick_flushing_caps %p cap %p %s\n", inode,
@@ -1932,14 +1931,14 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1932 cap->issued | cap->implemented, 1931 cap->issued | cap->implemented,
1933 ci->i_flushing_caps, NULL); 1932 ci->i_flushing_caps, NULL);
1934 if (delayed) { 1933 if (delayed) {
1935 spin_lock(&inode->i_lock); 1934 spin_lock(&ci->i_ceph_lock);
1936 __cap_delay_requeue(mdsc, ci); 1935 __cap_delay_requeue(mdsc, ci);
1937 spin_unlock(&inode->i_lock); 1936 spin_unlock(&ci->i_ceph_lock);
1938 } 1937 }
1939 } else { 1938 } else {
1940 pr_err("%p auth cap %p not mds%d ???\n", inode, 1939 pr_err("%p auth cap %p not mds%d ???\n", inode,
1941 cap, session->s_mds); 1940 cap, session->s_mds);
1942 spin_unlock(&inode->i_lock); 1941 spin_unlock(&ci->i_ceph_lock);
1943 } 1942 }
1944 } 1943 }
1945} 1944}
@@ -1952,7 +1951,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1952 struct ceph_cap *cap; 1951 struct ceph_cap *cap;
1953 int delayed = 0; 1952 int delayed = 0;
1954 1953
1955 spin_lock(&inode->i_lock); 1954 spin_lock(&ci->i_ceph_lock);
1956 cap = ci->i_auth_cap; 1955 cap = ci->i_auth_cap;
1957 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, 1956 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
1958 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); 1957 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
@@ -1964,12 +1963,12 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1964 cap->issued | cap->implemented, 1963 cap->issued | cap->implemented,
1965 ci->i_flushing_caps, NULL); 1964 ci->i_flushing_caps, NULL);
1966 if (delayed) { 1965 if (delayed) {
1967 spin_lock(&inode->i_lock); 1966 spin_lock(&ci->i_ceph_lock);
1968 __cap_delay_requeue(mdsc, ci); 1967 __cap_delay_requeue(mdsc, ci);
1969 spin_unlock(&inode->i_lock); 1968 spin_unlock(&ci->i_ceph_lock);
1970 } 1969 }
1971 } else { 1970 } else {
1972 spin_unlock(&inode->i_lock); 1971 spin_unlock(&ci->i_ceph_lock);
1973 } 1972 }
1974} 1973}
1975 1974
@@ -1978,7 +1977,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1978 * Take references to capabilities we hold, so that we don't release 1977 * Take references to capabilities we hold, so that we don't release
1979 * them to the MDS prematurely. 1978 * them to the MDS prematurely.
1980 * 1979 *
1981 * Protected by i_lock. 1980 * Protected by i_ceph_lock.
1982 */ 1981 */
1983static void __take_cap_refs(struct ceph_inode_info *ci, int got) 1982static void __take_cap_refs(struct ceph_inode_info *ci, int got)
1984{ 1983{
@@ -2016,7 +2015,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2016 2015
2017 dout("get_cap_refs %p need %s want %s\n", inode, 2016 dout("get_cap_refs %p need %s want %s\n", inode,
2018 ceph_cap_string(need), ceph_cap_string(want)); 2017 ceph_cap_string(need), ceph_cap_string(want));
2019 spin_lock(&inode->i_lock); 2018 spin_lock(&ci->i_ceph_lock);
2020 2019
2021 /* make sure file is actually open */ 2020 /* make sure file is actually open */
2022 file_wanted = __ceph_caps_file_wanted(ci); 2021 file_wanted = __ceph_caps_file_wanted(ci);
@@ -2077,7 +2076,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2077 ceph_cap_string(have), ceph_cap_string(need)); 2076 ceph_cap_string(have), ceph_cap_string(need));
2078 } 2077 }
2079out: 2078out:
2080 spin_unlock(&inode->i_lock); 2079 spin_unlock(&ci->i_ceph_lock);
2081 dout("get_cap_refs %p ret %d got %s\n", inode, 2080 dout("get_cap_refs %p ret %d got %s\n", inode,
2082 ret, ceph_cap_string(*got)); 2081 ret, ceph_cap_string(*got));
2083 return ret; 2082 return ret;
@@ -2094,7 +2093,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2094 int check = 0; 2093 int check = 0;
2095 2094
2096 /* do we need to explicitly request a larger max_size? */ 2095 /* do we need to explicitly request a larger max_size? */
2097 spin_lock(&inode->i_lock); 2096 spin_lock(&ci->i_ceph_lock);
2098 if ((endoff >= ci->i_max_size || 2097 if ((endoff >= ci->i_max_size ||
2099 endoff > (inode->i_size << 1)) && 2098 endoff > (inode->i_size << 1)) &&
2100 endoff > ci->i_wanted_max_size) { 2099 endoff > ci->i_wanted_max_size) {
@@ -2103,7 +2102,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2103 ci->i_wanted_max_size = endoff; 2102 ci->i_wanted_max_size = endoff;
2104 check = 1; 2103 check = 1;
2105 } 2104 }
2106 spin_unlock(&inode->i_lock); 2105 spin_unlock(&ci->i_ceph_lock);
2107 if (check) 2106 if (check)
2108 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2107 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2109} 2108}
@@ -2140,9 +2139,9 @@ retry:
2140 */ 2139 */
2141void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) 2140void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
2142{ 2141{
2143 spin_lock(&ci->vfs_inode.i_lock); 2142 spin_lock(&ci->i_ceph_lock);
2144 __take_cap_refs(ci, caps); 2143 __take_cap_refs(ci, caps);
2145 spin_unlock(&ci->vfs_inode.i_lock); 2144 spin_unlock(&ci->i_ceph_lock);
2146} 2145}
2147 2146
2148/* 2147/*
@@ -2160,7 +2159,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2160 int last = 0, put = 0, flushsnaps = 0, wake = 0; 2159 int last = 0, put = 0, flushsnaps = 0, wake = 0;
2161 struct ceph_cap_snap *capsnap; 2160 struct ceph_cap_snap *capsnap;
2162 2161
2163 spin_lock(&inode->i_lock); 2162 spin_lock(&ci->i_ceph_lock);
2164 if (had & CEPH_CAP_PIN) 2163 if (had & CEPH_CAP_PIN)
2165 --ci->i_pin_ref; 2164 --ci->i_pin_ref;
2166 if (had & CEPH_CAP_FILE_RD) 2165 if (had & CEPH_CAP_FILE_RD)
@@ -2193,7 +2192,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2193 } 2192 }
2194 } 2193 }
2195 } 2194 }
2196 spin_unlock(&inode->i_lock); 2195 spin_unlock(&ci->i_ceph_lock);
2197 2196
2198 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), 2197 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
2199 last ? " last" : "", put ? " put" : ""); 2198 last ? " last" : "", put ? " put" : "");
@@ -2225,7 +2224,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2225 int found = 0; 2224 int found = 0;
2226 struct ceph_cap_snap *capsnap = NULL; 2225 struct ceph_cap_snap *capsnap = NULL;
2227 2226
2228 spin_lock(&inode->i_lock); 2227 spin_lock(&ci->i_ceph_lock);
2229 ci->i_wrbuffer_ref -= nr; 2228 ci->i_wrbuffer_ref -= nr;
2230 last = !ci->i_wrbuffer_ref; 2229 last = !ci->i_wrbuffer_ref;
2231 2230
@@ -2274,7 +2273,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2274 } 2273 }
2275 } 2274 }
2276 2275
2277 spin_unlock(&inode->i_lock); 2276 spin_unlock(&ci->i_ceph_lock);
2278 2277
2279 if (last) { 2278 if (last) {
2280 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2279 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2291,7 +2290,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2291 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2290 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2292 * actually be a revocation if it specifies a smaller cap set.) 2291 * actually be a revocation if it specifies a smaller cap set.)
2293 * 2292 *
2294 * caller holds s_mutex and i_lock, we drop both. 2293 * caller holds s_mutex and i_ceph_lock, we drop both.
2295 * 2294 *
2296 * return value: 2295 * return value:
2297 * 0 - ok 2296 * 0 - ok
@@ -2302,7 +2301,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2302 struct ceph_mds_session *session, 2301 struct ceph_mds_session *session,
2303 struct ceph_cap *cap, 2302 struct ceph_cap *cap,
2304 struct ceph_buffer *xattr_buf) 2303 struct ceph_buffer *xattr_buf)
2305 __releases(inode->i_lock) 2304 __releases(ci->i_ceph_lock)
2306{ 2305{
2307 struct ceph_inode_info *ci = ceph_inode(inode); 2306 struct ceph_inode_info *ci = ceph_inode(inode);
2308 int mds = session->s_mds; 2307 int mds = session->s_mds;
@@ -2453,7 +2452,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2453 } 2452 }
2454 BUG_ON(cap->issued & ~cap->implemented); 2453 BUG_ON(cap->issued & ~cap->implemented);
2455 2454
2456 spin_unlock(&inode->i_lock); 2455 spin_unlock(&ci->i_ceph_lock);
2457 if (writeback) 2456 if (writeback)
2458 /* 2457 /*
2459 * queue inode for writeback: we can't actually call 2458 * queue inode for writeback: we can't actually call
@@ -2483,7 +2482,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2483 struct ceph_mds_caps *m, 2482 struct ceph_mds_caps *m,
2484 struct ceph_mds_session *session, 2483 struct ceph_mds_session *session,
2485 struct ceph_cap *cap) 2484 struct ceph_cap *cap)
2486 __releases(inode->i_lock) 2485 __releases(ci->i_ceph_lock)
2487{ 2486{
2488 struct ceph_inode_info *ci = ceph_inode(inode); 2487 struct ceph_inode_info *ci = ceph_inode(inode);
2489 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 2488 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -2539,7 +2538,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2539 wake_up_all(&ci->i_cap_wq); 2538 wake_up_all(&ci->i_cap_wq);
2540 2539
2541out: 2540out:
2542 spin_unlock(&inode->i_lock); 2541 spin_unlock(&ci->i_ceph_lock);
2543 if (drop) 2542 if (drop)
2544 iput(inode); 2543 iput(inode);
2545} 2544}
@@ -2562,7 +2561,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2562 dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n", 2561 dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
2563 inode, ci, session->s_mds, follows); 2562 inode, ci, session->s_mds, follows);
2564 2563
2565 spin_lock(&inode->i_lock); 2564 spin_lock(&ci->i_ceph_lock);
2566 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 2565 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
2567 if (capsnap->follows == follows) { 2566 if (capsnap->follows == follows) {
2568 if (capsnap->flush_tid != flush_tid) { 2567 if (capsnap->flush_tid != flush_tid) {
@@ -2585,7 +2584,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2585 capsnap, capsnap->follows); 2584 capsnap, capsnap->follows);
2586 } 2585 }
2587 } 2586 }
2588 spin_unlock(&inode->i_lock); 2587 spin_unlock(&ci->i_ceph_lock);
2589 if (drop) 2588 if (drop)
2590 iput(inode); 2589 iput(inode);
2591} 2590}
@@ -2598,7 +2597,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2598static void handle_cap_trunc(struct inode *inode, 2597static void handle_cap_trunc(struct inode *inode,
2599 struct ceph_mds_caps *trunc, 2598 struct ceph_mds_caps *trunc,
2600 struct ceph_mds_session *session) 2599 struct ceph_mds_session *session)
2601 __releases(inode->i_lock) 2600 __releases(ci->i_ceph_lock)
2602{ 2601{
2603 struct ceph_inode_info *ci = ceph_inode(inode); 2602 struct ceph_inode_info *ci = ceph_inode(inode);
2604 int mds = session->s_mds; 2603 int mds = session->s_mds;
@@ -2617,7 +2616,7 @@ static void handle_cap_trunc(struct inode *inode,
2617 inode, mds, seq, truncate_size, truncate_seq); 2616 inode, mds, seq, truncate_size, truncate_seq);
2618 queue_trunc = ceph_fill_file_size(inode, issued, 2617 queue_trunc = ceph_fill_file_size(inode, issued,
2619 truncate_seq, truncate_size, size); 2618 truncate_seq, truncate_size, size);
2620 spin_unlock(&inode->i_lock); 2619 spin_unlock(&ci->i_ceph_lock);
2621 2620
2622 if (queue_trunc) 2621 if (queue_trunc)
2623 ceph_queue_vmtruncate(inode); 2622 ceph_queue_vmtruncate(inode);
@@ -2646,7 +2645,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2646 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n", 2645 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
2647 inode, ci, mds, mseq); 2646 inode, ci, mds, mseq);
2648 2647
2649 spin_lock(&inode->i_lock); 2648 spin_lock(&ci->i_ceph_lock);
2650 2649
2651 /* make sure we haven't seen a higher mseq */ 2650 /* make sure we haven't seen a higher mseq */
2652 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 2651 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
@@ -2690,7 +2689,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2690 } 2689 }
2691 /* else, we already released it */ 2690 /* else, we already released it */
2692 2691
2693 spin_unlock(&inode->i_lock); 2692 spin_unlock(&ci->i_ceph_lock);
2694} 2693}
2695 2694
2696/* 2695/*
@@ -2745,9 +2744,9 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2745 up_read(&mdsc->snap_rwsem); 2744 up_read(&mdsc->snap_rwsem);
2746 2745
2747 /* make sure we re-request max_size, if necessary */ 2746 /* make sure we re-request max_size, if necessary */
2748 spin_lock(&inode->i_lock); 2747 spin_lock(&ci->i_ceph_lock);
2749 ci->i_requested_max_size = 0; 2748 ci->i_requested_max_size = 0;
2750 spin_unlock(&inode->i_lock); 2749 spin_unlock(&ci->i_ceph_lock);
2751} 2750}
2752 2751
2753/* 2752/*
@@ -2762,6 +2761,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2762 struct ceph_mds_client *mdsc = session->s_mdsc; 2761 struct ceph_mds_client *mdsc = session->s_mdsc;
2763 struct super_block *sb = mdsc->fsc->sb; 2762 struct super_block *sb = mdsc->fsc->sb;
2764 struct inode *inode; 2763 struct inode *inode;
2764 struct ceph_inode_info *ci;
2765 struct ceph_cap *cap; 2765 struct ceph_cap *cap;
2766 struct ceph_mds_caps *h; 2766 struct ceph_mds_caps *h;
2767 int mds = session->s_mds; 2767 int mds = session->s_mds;
@@ -2815,6 +2815,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2815 2815
2816 /* lookup ino */ 2816 /* lookup ino */
2817 inode = ceph_find_inode(sb, vino); 2817 inode = ceph_find_inode(sb, vino);
2818 ci = ceph_inode(inode);
2818 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino, 2819 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
2819 vino.snap, inode); 2820 vino.snap, inode);
2820 if (!inode) { 2821 if (!inode) {
@@ -2844,16 +2845,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2844 } 2845 }
2845 2846
2846 /* the rest require a cap */ 2847 /* the rest require a cap */
2847 spin_lock(&inode->i_lock); 2848 spin_lock(&ci->i_ceph_lock);
2848 cap = __get_cap_for_mds(ceph_inode(inode), mds); 2849 cap = __get_cap_for_mds(ceph_inode(inode), mds);
2849 if (!cap) { 2850 if (!cap) {
2850 dout(" no cap on %p ino %llx.%llx from mds%d\n", 2851 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2851 inode, ceph_ino(inode), ceph_snap(inode), mds); 2852 inode, ceph_ino(inode), ceph_snap(inode), mds);
2852 spin_unlock(&inode->i_lock); 2853 spin_unlock(&ci->i_ceph_lock);
2853 goto flush_cap_releases; 2854 goto flush_cap_releases;
2854 } 2855 }
2855 2856
2856 /* note that each of these drops i_lock for us */ 2857 /* note that each of these drops i_ceph_lock for us */
2857 switch (op) { 2858 switch (op) {
2858 case CEPH_CAP_OP_REVOKE: 2859 case CEPH_CAP_OP_REVOKE:
2859 case CEPH_CAP_OP_GRANT: 2860 case CEPH_CAP_OP_GRANT:
@@ -2869,7 +2870,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2869 break; 2870 break;
2870 2871
2871 default: 2872 default:
2872 spin_unlock(&inode->i_lock); 2873 spin_unlock(&ci->i_ceph_lock);
2873 pr_err("ceph_handle_caps: unknown cap op %d %s\n", op, 2874 pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
2874 ceph_cap_op_name(op)); 2875 ceph_cap_op_name(op));
2875 } 2876 }
@@ -2962,13 +2963,13 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
2962 struct inode *inode = &ci->vfs_inode; 2963 struct inode *inode = &ci->vfs_inode;
2963 int last = 0; 2964 int last = 0;
2964 2965
2965 spin_lock(&inode->i_lock); 2966 spin_lock(&ci->i_ceph_lock);
2966 dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode, 2967 dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
2967 ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1); 2968 ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
2968 BUG_ON(ci->i_nr_by_mode[fmode] == 0); 2969 BUG_ON(ci->i_nr_by_mode[fmode] == 0);
2969 if (--ci->i_nr_by_mode[fmode] == 0) 2970 if (--ci->i_nr_by_mode[fmode] == 0)
2970 last++; 2971 last++;
2971 spin_unlock(&inode->i_lock); 2972 spin_unlock(&ci->i_ceph_lock);
2972 2973
2973 if (last && ci->i_vino.snap == CEPH_NOSNAP) 2974 if (last && ci->i_vino.snap == CEPH_NOSNAP)
2974 ceph_check_caps(ci, 0, NULL); 2975 ceph_check_caps(ci, 0, NULL);
@@ -2991,7 +2992,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2991 int used, dirty; 2992 int used, dirty;
2992 int ret = 0; 2993 int ret = 0;
2993 2994
2994 spin_lock(&inode->i_lock); 2995 spin_lock(&ci->i_ceph_lock);
2995 used = __ceph_caps_used(ci); 2996 used = __ceph_caps_used(ci);
2996 dirty = __ceph_caps_dirty(ci); 2997 dirty = __ceph_caps_dirty(ci);
2997 2998
@@ -3046,7 +3047,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
3046 inode, cap, ceph_cap_string(cap->issued)); 3047 inode, cap, ceph_cap_string(cap->issued));
3047 } 3048 }
3048 } 3049 }
3049 spin_unlock(&inode->i_lock); 3050 spin_unlock(&ci->i_ceph_lock);
3050 return ret; 3051 return ret;
3051} 3052}
3052 3053
@@ -3061,7 +3062,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
3061 3062
3062 /* 3063 /*
3063 * force an record for the directory caps if we have a dentry lease. 3064 * force an record for the directory caps if we have a dentry lease.
3064 * this is racy (can't take i_lock and d_lock together), but it 3065 * this is racy (can't take i_ceph_lock and d_lock together), but it
3065 * doesn't have to be perfect; the mds will revoke anything we don't 3066 * doesn't have to be perfect; the mds will revoke anything we don't
3066 * release. 3067 * release.
3067 */ 3068 */
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index bca3948e9dbf..3eeb97661262 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
281 } 281 }
282 282
283 /* can we use the dcache? */ 283 /* can we use the dcache? */
284 spin_lock(&inode->i_lock); 284 spin_lock(&ci->i_ceph_lock);
285 if ((filp->f_pos == 2 || fi->dentry) && 285 if ((filp->f_pos == 2 || fi->dentry) &&
286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
287 ceph_snap(inode) != CEPH_SNAPDIR && 287 ceph_snap(inode) != CEPH_SNAPDIR &&
288 ceph_dir_test_complete(inode) && 288 ceph_dir_test_complete(inode) &&
289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
290 spin_unlock(&inode->i_lock); 290 spin_unlock(&ci->i_ceph_lock);
291 err = __dcache_readdir(filp, dirent, filldir); 291 err = __dcache_readdir(filp, dirent, filldir);
292 if (err != -EAGAIN) 292 if (err != -EAGAIN)
293 return err; 293 return err;
294 } else { 294 } else {
295 spin_unlock(&inode->i_lock); 295 spin_unlock(&ci->i_ceph_lock);
296 } 296 }
297 if (fi->dentry) { 297 if (fi->dentry) {
298 err = note_last_dentry(fi, fi->dentry->d_name.name, 298 err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -428,12 +428,12 @@ more:
428 * were released during the whole readdir, and we should have 428 * were released during the whole readdir, and we should have
429 * the complete dir contents in our cache. 429 * the complete dir contents in our cache.
430 */ 430 */
431 spin_lock(&inode->i_lock); 431 spin_lock(&ci->i_ceph_lock);
432 if (ci->i_release_count == fi->dir_release_count) { 432 if (ci->i_release_count == fi->dir_release_count) {
433 ceph_dir_set_complete(inode); 433 ceph_dir_set_complete(inode);
434 ci->i_max_offset = filp->f_pos; 434 ci->i_max_offset = filp->f_pos;
435 } 435 }
436 spin_unlock(&inode->i_lock); 436 spin_unlock(&ci->i_ceph_lock);
437 437
438 dout("readdir %p filp %p done.\n", inode, filp); 438 dout("readdir %p filp %p done.\n", inode, filp);
439 return 0; 439 return 0;
@@ -607,7 +607,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
607 struct ceph_inode_info *ci = ceph_inode(dir); 607 struct ceph_inode_info *ci = ceph_inode(dir);
608 struct ceph_dentry_info *di = ceph_dentry(dentry); 608 struct ceph_dentry_info *di = ceph_dentry(dentry);
609 609
610 spin_lock(&dir->i_lock); 610 spin_lock(&ci->i_ceph_lock);
611 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 611 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
612 if (strncmp(dentry->d_name.name, 612 if (strncmp(dentry->d_name.name,
613 fsc->mount_options->snapdir_name, 613 fsc->mount_options->snapdir_name,
@@ -615,13 +615,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
615 !is_root_ceph_dentry(dir, dentry) && 615 !is_root_ceph_dentry(dir, dentry) &&
616 ceph_dir_test_complete(dir) && 616 ceph_dir_test_complete(dir) &&
617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
618 spin_unlock(&dir->i_lock); 618 spin_unlock(&ci->i_ceph_lock);
619 dout(" dir %p complete, -ENOENT\n", dir); 619 dout(" dir %p complete, -ENOENT\n", dir);
620 d_add(dentry, NULL); 620 d_add(dentry, NULL);
621 di->lease_shared_gen = ci->i_shared_gen; 621 di->lease_shared_gen = ci->i_shared_gen;
622 return NULL; 622 return NULL;
623 } 623 }
624 spin_unlock(&dir->i_lock); 624 spin_unlock(&ci->i_ceph_lock);
625 } 625 }
626 626
627 op = ceph_snap(dir) == CEPH_SNAPDIR ? 627 op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -841,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
841 struct ceph_inode_info *ci = ceph_inode(inode); 841 struct ceph_inode_info *ci = ceph_inode(inode);
842 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 842 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
843 843
844 spin_lock(&inode->i_lock); 844 spin_lock(&ci->i_ceph_lock);
845 if (inode->i_nlink == 1) { 845 if (inode->i_nlink == 1) {
846 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 846 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
847 ci->i_ceph_flags |= CEPH_I_NODELAY; 847 ci->i_ceph_flags |= CEPH_I_NODELAY;
848 } 848 }
849 spin_unlock(&inode->i_lock); 849 spin_unlock(&ci->i_ceph_lock);
850 return drop; 850 return drop;
851} 851}
852 852
@@ -1015,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
1015 struct ceph_dentry_info *di = ceph_dentry(dentry); 1015 struct ceph_dentry_info *di = ceph_dentry(dentry);
1016 int valid = 0; 1016 int valid = 0;
1017 1017
1018 spin_lock(&dir->i_lock); 1018 spin_lock(&ci->i_ceph_lock);
1019 if (ci->i_shared_gen == di->lease_shared_gen) 1019 if (ci->i_shared_gen == di->lease_shared_gen)
1020 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 1020 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
1021 spin_unlock(&dir->i_lock); 1021 spin_unlock(&ci->i_ceph_lock);
1022 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 1022 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
1023 dir, (unsigned)ci->i_shared_gen, dentry, 1023 dir, (unsigned)ci->i_shared_gen, dentry,
1024 (unsigned)di->lease_shared_gen, valid); 1024 (unsigned)di->lease_shared_gen, valid);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ce549d31eeb7..ed72428d9c75 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -147,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)
147 147
148 /* trivially open snapdir */ 148 /* trivially open snapdir */
149 if (ceph_snap(inode) == CEPH_SNAPDIR) { 149 if (ceph_snap(inode) == CEPH_SNAPDIR) {
150 spin_lock(&inode->i_lock); 150 spin_lock(&ci->i_ceph_lock);
151 __ceph_get_fmode(ci, fmode); 151 __ceph_get_fmode(ci, fmode);
152 spin_unlock(&inode->i_lock); 152 spin_unlock(&ci->i_ceph_lock);
153 return ceph_init_file(inode, file, fmode); 153 return ceph_init_file(inode, file, fmode);
154 } 154 }
155 155
@@ -158,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
158 * write) or any MDS (for read). Update wanted set 158 * write) or any MDS (for read). Update wanted set
159 * asynchronously. 159 * asynchronously.
160 */ 160 */
161 spin_lock(&inode->i_lock); 161 spin_lock(&ci->i_ceph_lock);
162 if (__ceph_is_any_real_caps(ci) && 162 if (__ceph_is_any_real_caps(ci) &&
163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
164 int mds_wanted = __ceph_caps_mds_wanted(ci); 164 int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -168,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
168 inode, fmode, ceph_cap_string(wanted), 168 inode, fmode, ceph_cap_string(wanted),
169 ceph_cap_string(issued)); 169 ceph_cap_string(issued));
170 __ceph_get_fmode(ci, fmode); 170 __ceph_get_fmode(ci, fmode);
171 spin_unlock(&inode->i_lock); 171 spin_unlock(&ci->i_ceph_lock);
172 172
173 /* adjust wanted? */ 173 /* adjust wanted? */
174 if ((issued & wanted) != wanted && 174 if ((issued & wanted) != wanted &&
@@ -180,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
180 } else if (ceph_snap(inode) != CEPH_NOSNAP && 180 } else if (ceph_snap(inode) != CEPH_NOSNAP &&
181 (ci->i_snap_caps & wanted) == wanted) { 181 (ci->i_snap_caps & wanted) == wanted) {
182 __ceph_get_fmode(ci, fmode); 182 __ceph_get_fmode(ci, fmode);
183 spin_unlock(&inode->i_lock); 183 spin_unlock(&ci->i_ceph_lock);
184 return ceph_init_file(inode, file, fmode); 184 return ceph_init_file(inode, file, fmode);
185 } 185 }
186 spin_unlock(&inode->i_lock); 186 spin_unlock(&ci->i_ceph_lock);
187 187
188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); 188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
189 req = prepare_open_request(inode->i_sb, flags, 0); 189 req = prepare_open_request(inode->i_sb, flags, 0);
@@ -743,9 +743,9 @@ retry_snap:
743 */ 743 */
744 int dirty; 744 int dirty;
745 745
746 spin_lock(&inode->i_lock); 746 spin_lock(&ci->i_ceph_lock);
747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
748 spin_unlock(&inode->i_lock); 748 spin_unlock(&ci->i_ceph_lock);
749 ceph_put_cap_refs(ci, got); 749 ceph_put_cap_refs(ci, got);
750 750
751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
@@ -764,9 +764,9 @@ retry_snap:
764 764
765 if (ret >= 0) { 765 if (ret >= 0) {
766 int dirty; 766 int dirty;
767 spin_lock(&inode->i_lock); 767 spin_lock(&ci->i_ceph_lock);
768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
769 spin_unlock(&inode->i_lock); 769 spin_unlock(&ci->i_ceph_lock);
770 if (dirty) 770 if (dirty)
771 __mark_inode_dirty(inode, dirty); 771 __mark_inode_dirty(inode, dirty);
772 } 772 }
@@ -797,7 +797,8 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
797 797
798 mutex_lock(&inode->i_mutex); 798 mutex_lock(&inode->i_mutex);
799 __ceph_do_pending_vmtruncate(inode); 799 __ceph_do_pending_vmtruncate(inode);
800 if (origin != SEEK_CUR || origin != SEEK_SET) { 800
801 if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
801 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 802 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
802 if (ret < 0) { 803 if (ret < 0) {
803 offset = ret; 804 offset = ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 116f36502f17..87fb132fb330 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
297 297
298 dout("alloc_inode %p\n", &ci->vfs_inode); 298 dout("alloc_inode %p\n", &ci->vfs_inode);
299 299
300 spin_lock_init(&ci->i_ceph_lock);
301
300 ci->i_version = 0; 302 ci->i_version = 0;
301 ci->i_time_warp_seq = 0; 303 ci->i_time_warp_seq = 0;
302 ci->i_ceph_flags = 0; 304 ci->i_ceph_flags = 0;
@@ -583,7 +585,7 @@ static int fill_inode(struct inode *inode,
583 iinfo->xattr_len); 585 iinfo->xattr_len);
584 } 586 }
585 587
586 spin_lock(&inode->i_lock); 588 spin_lock(&ci->i_ceph_lock);
587 589
588 /* 590 /*
589 * provided version will be odd if inode value is projected, 591 * provided version will be odd if inode value is projected,
@@ -680,7 +682,7 @@ static int fill_inode(struct inode *inode,
680 char *sym; 682 char *sym;
681 683
682 BUG_ON(symlen != inode->i_size); 684 BUG_ON(symlen != inode->i_size);
683 spin_unlock(&inode->i_lock); 685 spin_unlock(&ci->i_ceph_lock);
684 686
685 err = -ENOMEM; 687 err = -ENOMEM;
686 sym = kmalloc(symlen+1, GFP_NOFS); 688 sym = kmalloc(symlen+1, GFP_NOFS);
@@ -689,7 +691,7 @@ static int fill_inode(struct inode *inode,
689 memcpy(sym, iinfo->symlink, symlen); 691 memcpy(sym, iinfo->symlink, symlen);
690 sym[symlen] = 0; 692 sym[symlen] = 0;
691 693
692 spin_lock(&inode->i_lock); 694 spin_lock(&ci->i_ceph_lock);
693 if (!ci->i_symlink) 695 if (!ci->i_symlink)
694 ci->i_symlink = sym; 696 ci->i_symlink = sym;
695 else 697 else
@@ -715,7 +717,7 @@ static int fill_inode(struct inode *inode,
715 } 717 }
716 718
717no_change: 719no_change:
718 spin_unlock(&inode->i_lock); 720 spin_unlock(&ci->i_ceph_lock);
719 721
720 /* queue truncate if we saw i_size decrease */ 722 /* queue truncate if we saw i_size decrease */
721 if (queue_trunc) 723 if (queue_trunc)
@@ -750,13 +752,13 @@ no_change:
750 info->cap.flags, 752 info->cap.flags,
751 caps_reservation); 753 caps_reservation);
752 } else { 754 } else {
753 spin_lock(&inode->i_lock); 755 spin_lock(&ci->i_ceph_lock);
754 dout(" %p got snap_caps %s\n", inode, 756 dout(" %p got snap_caps %s\n", inode,
755 ceph_cap_string(le32_to_cpu(info->cap.caps))); 757 ceph_cap_string(le32_to_cpu(info->cap.caps)));
756 ci->i_snap_caps |= le32_to_cpu(info->cap.caps); 758 ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
757 if (cap_fmode >= 0) 759 if (cap_fmode >= 0)
758 __ceph_get_fmode(ci, cap_fmode); 760 __ceph_get_fmode(ci, cap_fmode);
759 spin_unlock(&inode->i_lock); 761 spin_unlock(&ci->i_ceph_lock);
760 } 762 }
761 } else if (cap_fmode >= 0) { 763 } else if (cap_fmode >= 0) {
762 pr_warning("mds issued no caps on %llx.%llx\n", 764 pr_warning("mds issued no caps on %llx.%llx\n",
@@ -849,19 +851,20 @@ static void ceph_set_dentry_offset(struct dentry *dn)
849{ 851{
850 struct dentry *dir = dn->d_parent; 852 struct dentry *dir = dn->d_parent;
851 struct inode *inode = dir->d_inode; 853 struct inode *inode = dir->d_inode;
854 struct ceph_inode_info *ci = ceph_inode(inode);
852 struct ceph_dentry_info *di; 855 struct ceph_dentry_info *di;
853 856
854 BUG_ON(!inode); 857 BUG_ON(!inode);
855 858
856 di = ceph_dentry(dn); 859 di = ceph_dentry(dn);
857 860
858 spin_lock(&inode->i_lock); 861 spin_lock(&ci->i_ceph_lock);
859 if (!ceph_dir_test_complete(inode)) { 862 if (!ceph_dir_test_complete(inode)) {
860 spin_unlock(&inode->i_lock); 863 spin_unlock(&ci->i_ceph_lock);
861 return; 864 return;
862 } 865 }
863 di->offset = ceph_inode(inode)->i_max_offset++; 866 di->offset = ceph_inode(inode)->i_max_offset++;
864 spin_unlock(&inode->i_lock); 867 spin_unlock(&ci->i_ceph_lock);
865 868
866 spin_lock(&dir->d_lock); 869 spin_lock(&dir->d_lock);
867 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); 870 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1308,7 +1311,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1308 struct ceph_inode_info *ci = ceph_inode(inode); 1311 struct ceph_inode_info *ci = ceph_inode(inode);
1309 int ret = 0; 1312 int ret = 0;
1310 1313
1311 spin_lock(&inode->i_lock); 1314 spin_lock(&ci->i_ceph_lock);
1312 dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size); 1315 dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
1313 inode->i_size = size; 1316 inode->i_size = size;
1314 inode->i_blocks = (size + (1 << 9) - 1) >> 9; 1317 inode->i_blocks = (size + (1 << 9) - 1) >> 9;
@@ -1318,7 +1321,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1318 (ci->i_reported_size << 1) < ci->i_max_size) 1321 (ci->i_reported_size << 1) < ci->i_max_size)
1319 ret = 1; 1322 ret = 1;
1320 1323
1321 spin_unlock(&inode->i_lock); 1324 spin_unlock(&ci->i_ceph_lock);
1322 return ret; 1325 return ret;
1323} 1326}
1324 1327
@@ -1376,20 +1379,20 @@ static void ceph_invalidate_work(struct work_struct *work)
1376 u32 orig_gen; 1379 u32 orig_gen;
1377 int check = 0; 1380 int check = 0;
1378 1381
1379 spin_lock(&inode->i_lock); 1382 spin_lock(&ci->i_ceph_lock);
1380 dout("invalidate_pages %p gen %d revoking %d\n", inode, 1383 dout("invalidate_pages %p gen %d revoking %d\n", inode,
1381 ci->i_rdcache_gen, ci->i_rdcache_revoking); 1384 ci->i_rdcache_gen, ci->i_rdcache_revoking);
1382 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { 1385 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1383 /* nevermind! */ 1386 /* nevermind! */
1384 spin_unlock(&inode->i_lock); 1387 spin_unlock(&ci->i_ceph_lock);
1385 goto out; 1388 goto out;
1386 } 1389 }
1387 orig_gen = ci->i_rdcache_gen; 1390 orig_gen = ci->i_rdcache_gen;
1388 spin_unlock(&inode->i_lock); 1391 spin_unlock(&ci->i_ceph_lock);
1389 1392
1390 truncate_inode_pages(&inode->i_data, 0); 1393 truncate_inode_pages(&inode->i_data, 0);
1391 1394
1392 spin_lock(&inode->i_lock); 1395 spin_lock(&ci->i_ceph_lock);
1393 if (orig_gen == ci->i_rdcache_gen && 1396 if (orig_gen == ci->i_rdcache_gen &&
1394 orig_gen == ci->i_rdcache_revoking) { 1397 orig_gen == ci->i_rdcache_revoking) {
1395 dout("invalidate_pages %p gen %d successful\n", inode, 1398 dout("invalidate_pages %p gen %d successful\n", inode,
@@ -1401,7 +1404,7 @@ static void ceph_invalidate_work(struct work_struct *work)
1401 inode, orig_gen, ci->i_rdcache_gen, 1404 inode, orig_gen, ci->i_rdcache_gen,
1402 ci->i_rdcache_revoking); 1405 ci->i_rdcache_revoking);
1403 } 1406 }
1404 spin_unlock(&inode->i_lock); 1407 spin_unlock(&ci->i_ceph_lock);
1405 1408
1406 if (check) 1409 if (check)
1407 ceph_check_caps(ci, 0, NULL); 1410 ceph_check_caps(ci, 0, NULL);
@@ -1460,10 +1463,10 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
1460 int wrbuffer_refs, wake = 0; 1463 int wrbuffer_refs, wake = 0;
1461 1464
1462retry: 1465retry:
1463 spin_lock(&inode->i_lock); 1466 spin_lock(&ci->i_ceph_lock);
1464 if (ci->i_truncate_pending == 0) { 1467 if (ci->i_truncate_pending == 0) {
1465 dout("__do_pending_vmtruncate %p none pending\n", inode); 1468 dout("__do_pending_vmtruncate %p none pending\n", inode);
1466 spin_unlock(&inode->i_lock); 1469 spin_unlock(&ci->i_ceph_lock);
1467 return; 1470 return;
1468 } 1471 }
1469 1472
@@ -1474,7 +1477,7 @@ retry:
1474 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { 1477 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
1475 dout("__do_pending_vmtruncate %p flushing snaps first\n", 1478 dout("__do_pending_vmtruncate %p flushing snaps first\n",
1476 inode); 1479 inode);
1477 spin_unlock(&inode->i_lock); 1480 spin_unlock(&ci->i_ceph_lock);
1478 filemap_write_and_wait_range(&inode->i_data, 0, 1481 filemap_write_and_wait_range(&inode->i_data, 0,
1479 inode->i_sb->s_maxbytes); 1482 inode->i_sb->s_maxbytes);
1480 goto retry; 1483 goto retry;
@@ -1484,15 +1487,15 @@ retry:
1484 wrbuffer_refs = ci->i_wrbuffer_ref; 1487 wrbuffer_refs = ci->i_wrbuffer_ref;
1485 dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode, 1488 dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
1486 ci->i_truncate_pending, to); 1489 ci->i_truncate_pending, to);
1487 spin_unlock(&inode->i_lock); 1490 spin_unlock(&ci->i_ceph_lock);
1488 1491
1489 truncate_inode_pages(inode->i_mapping, to); 1492 truncate_inode_pages(inode->i_mapping, to);
1490 1493
1491 spin_lock(&inode->i_lock); 1494 spin_lock(&ci->i_ceph_lock);
1492 ci->i_truncate_pending--; 1495 ci->i_truncate_pending--;
1493 if (ci->i_truncate_pending == 0) 1496 if (ci->i_truncate_pending == 0)
1494 wake = 1; 1497 wake = 1;
1495 spin_unlock(&inode->i_lock); 1498 spin_unlock(&ci->i_ceph_lock);
1496 1499
1497 if (wrbuffer_refs == 0) 1500 if (wrbuffer_refs == 0)
1498 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 1501 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -1547,7 +1550,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1547 if (IS_ERR(req)) 1550 if (IS_ERR(req))
1548 return PTR_ERR(req); 1551 return PTR_ERR(req);
1549 1552
1550 spin_lock(&inode->i_lock); 1553 spin_lock(&ci->i_ceph_lock);
1551 issued = __ceph_caps_issued(ci, NULL); 1554 issued = __ceph_caps_issued(ci, NULL);
1552 dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); 1555 dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
1553 1556
@@ -1695,7 +1698,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1695 } 1698 }
1696 1699
1697 release &= issued; 1700 release &= issued;
1698 spin_unlock(&inode->i_lock); 1701 spin_unlock(&ci->i_ceph_lock);
1699 1702
1700 if (inode_dirty_flags) 1703 if (inode_dirty_flags)
1701 __mark_inode_dirty(inode, inode_dirty_flags); 1704 __mark_inode_dirty(inode, inode_dirty_flags);
@@ -1717,7 +1720,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1717 __ceph_do_pending_vmtruncate(inode); 1720 __ceph_do_pending_vmtruncate(inode);
1718 return err; 1721 return err;
1719out: 1722out:
1720 spin_unlock(&inode->i_lock); 1723 spin_unlock(&ci->i_ceph_lock);
1721 ceph_mdsc_put_request(req); 1724 ceph_mdsc_put_request(req);
1722 return err; 1725 return err;
1723} 1726}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 5a14c29cbba6..790914a598dd 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -241,11 +241,11 @@ static long ceph_ioctl_lazyio(struct file *file)
241 struct ceph_inode_info *ci = ceph_inode(inode); 241 struct ceph_inode_info *ci = ceph_inode(inode);
242 242
243 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { 243 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
244 spin_lock(&inode->i_lock); 244 spin_lock(&ci->i_ceph_lock);
245 ci->i_nr_by_mode[fi->fmode]--; 245 ci->i_nr_by_mode[fi->fmode]--;
246 fi->fmode |= CEPH_FILE_MODE_LAZY; 246 fi->fmode |= CEPH_FILE_MODE_LAZY;
247 ci->i_nr_by_mode[fi->fmode]++; 247 ci->i_nr_by_mode[fi->fmode]++;
248 spin_unlock(&inode->i_lock); 248 spin_unlock(&ci->i_ceph_lock);
249 dout("ioctl_layzio: file %p marked lazy\n", file); 249 dout("ioctl_layzio: file %p marked lazy\n", file);
250 250
251 ceph_check_caps(ci, 0, NULL); 251 ceph_check_caps(ci, 0, NULL);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 264ab701154f..6203d805eb45 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -732,21 +732,21 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
732 } 732 }
733 } 733 }
734 734
735 spin_lock(&inode->i_lock); 735 spin_lock(&ci->i_ceph_lock);
736 cap = NULL; 736 cap = NULL;
737 if (mode == USE_AUTH_MDS) 737 if (mode == USE_AUTH_MDS)
738 cap = ci->i_auth_cap; 738 cap = ci->i_auth_cap;
739 if (!cap && !RB_EMPTY_ROOT(&ci->i_caps)) 739 if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
740 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); 740 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
741 if (!cap) { 741 if (!cap) {
742 spin_unlock(&inode->i_lock); 742 spin_unlock(&ci->i_ceph_lock);
743 goto random; 743 goto random;
744 } 744 }
745 mds = cap->session->s_mds; 745 mds = cap->session->s_mds;
746 dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n", 746 dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
747 inode, ceph_vinop(inode), mds, 747 inode, ceph_vinop(inode), mds,
748 cap == ci->i_auth_cap ? "auth " : "", cap); 748 cap == ci->i_auth_cap ? "auth " : "", cap);
749 spin_unlock(&inode->i_lock); 749 spin_unlock(&ci->i_ceph_lock);
750 return mds; 750 return mds;
751 751
752random: 752random:
@@ -951,7 +951,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
951 951
952 dout("removing cap %p, ci is %p, inode is %p\n", 952 dout("removing cap %p, ci is %p, inode is %p\n",
953 cap, ci, &ci->vfs_inode); 953 cap, ci, &ci->vfs_inode);
954 spin_lock(&inode->i_lock); 954 spin_lock(&ci->i_ceph_lock);
955 __ceph_remove_cap(cap); 955 __ceph_remove_cap(cap);
956 if (!__ceph_is_any_real_caps(ci)) { 956 if (!__ceph_is_any_real_caps(ci)) {
957 struct ceph_mds_client *mdsc = 957 struct ceph_mds_client *mdsc =
@@ -984,7 +984,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
984 } 984 }
985 spin_unlock(&mdsc->cap_dirty_lock); 985 spin_unlock(&mdsc->cap_dirty_lock);
986 } 986 }
987 spin_unlock(&inode->i_lock); 987 spin_unlock(&ci->i_ceph_lock);
988 while (drop--) 988 while (drop--)
989 iput(inode); 989 iput(inode);
990 return 0; 990 return 0;
@@ -1015,10 +1015,10 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
1015 1015
1016 wake_up_all(&ci->i_cap_wq); 1016 wake_up_all(&ci->i_cap_wq);
1017 if (arg) { 1017 if (arg) {
1018 spin_lock(&inode->i_lock); 1018 spin_lock(&ci->i_ceph_lock);
1019 ci->i_wanted_max_size = 0; 1019 ci->i_wanted_max_size = 0;
1020 ci->i_requested_max_size = 0; 1020 ci->i_requested_max_size = 0;
1021 spin_unlock(&inode->i_lock); 1021 spin_unlock(&ci->i_ceph_lock);
1022 } 1022 }
1023 return 0; 1023 return 0;
1024} 1024}
@@ -1151,7 +1151,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1151 if (session->s_trim_caps <= 0) 1151 if (session->s_trim_caps <= 0)
1152 return -1; 1152 return -1;
1153 1153
1154 spin_lock(&inode->i_lock); 1154 spin_lock(&ci->i_ceph_lock);
1155 mine = cap->issued | cap->implemented; 1155 mine = cap->issued | cap->implemented;
1156 used = __ceph_caps_used(ci); 1156 used = __ceph_caps_used(ci);
1157 oissued = __ceph_caps_issued_other(ci, cap); 1157 oissued = __ceph_caps_issued_other(ci, cap);
@@ -1170,7 +1170,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1170 __ceph_remove_cap(cap); 1170 __ceph_remove_cap(cap);
1171 } else { 1171 } else {
1172 /* try to drop referring dentries */ 1172 /* try to drop referring dentries */
1173 spin_unlock(&inode->i_lock); 1173 spin_unlock(&ci->i_ceph_lock);
1174 d_prune_aliases(inode); 1174 d_prune_aliases(inode);
1175 dout("trim_caps_cb %p cap %p pruned, count now %d\n", 1175 dout("trim_caps_cb %p cap %p pruned, count now %d\n",
1176 inode, cap, atomic_read(&inode->i_count)); 1176 inode, cap, atomic_read(&inode->i_count));
@@ -1178,7 +1178,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1178 } 1178 }
1179 1179
1180out: 1180out:
1181 spin_unlock(&inode->i_lock); 1181 spin_unlock(&ci->i_ceph_lock);
1182 return 0; 1182 return 0;
1183} 1183}
1184 1184
@@ -1296,7 +1296,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1296 i_flushing_item); 1296 i_flushing_item);
1297 struct inode *inode = &ci->vfs_inode; 1297 struct inode *inode = &ci->vfs_inode;
1298 1298
1299 spin_lock(&inode->i_lock); 1299 spin_lock(&ci->i_ceph_lock);
1300 if (ci->i_cap_flush_seq <= want_flush_seq) { 1300 if (ci->i_cap_flush_seq <= want_flush_seq) {
1301 dout("check_cap_flush still flushing %p " 1301 dout("check_cap_flush still flushing %p "
1302 "seq %lld <= %lld to mds%d\n", inode, 1302 "seq %lld <= %lld to mds%d\n", inode,
@@ -1304,7 +1304,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1304 session->s_mds); 1304 session->s_mds);
1305 ret = 0; 1305 ret = 0;
1306 } 1306 }
1307 spin_unlock(&inode->i_lock); 1307 spin_unlock(&ci->i_ceph_lock);
1308 } 1308 }
1309 mutex_unlock(&session->s_mutex); 1309 mutex_unlock(&session->s_mutex);
1310 ceph_put_mds_session(session); 1310 ceph_put_mds_session(session);
@@ -1495,6 +1495,7 @@ retry:
1495 pos, temp); 1495 pos, temp);
1496 } else if (stop_on_nosnap && inode && 1496 } else if (stop_on_nosnap && inode &&
1497 ceph_snap(inode) == CEPH_NOSNAP) { 1497 ceph_snap(inode) == CEPH_NOSNAP) {
1498 spin_unlock(&temp->d_lock);
1498 break; 1499 break;
1499 } else { 1500 } else {
1500 pos -= temp->d_name.len; 1501 pos -= temp->d_name.len;
@@ -2011,10 +2012,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
2011 struct ceph_inode_info *ci = ceph_inode(inode); 2012 struct ceph_inode_info *ci = ceph_inode(inode);
2012 2013
2013 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode); 2014 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
2014 spin_lock(&inode->i_lock); 2015 spin_lock(&ci->i_ceph_lock);
2015 ceph_dir_clear_complete(inode); 2016 ceph_dir_clear_complete(inode);
2016 ci->i_release_count++; 2017 ci->i_release_count++;
2017 spin_unlock(&inode->i_lock); 2018 spin_unlock(&ci->i_ceph_lock);
2018 2019
2019 if (req->r_dentry) 2020 if (req->r_dentry)
2020 ceph_invalidate_dentry_lease(req->r_dentry); 2021 ceph_invalidate_dentry_lease(req->r_dentry);
@@ -2422,7 +2423,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2422 if (err) 2423 if (err)
2423 goto out_free; 2424 goto out_free;
2424 2425
2425 spin_lock(&inode->i_lock); 2426 spin_lock(&ci->i_ceph_lock);
2426 cap->seq = 0; /* reset cap seq */ 2427 cap->seq = 0; /* reset cap seq */
2427 cap->issue_seq = 0; /* and issue_seq */ 2428 cap->issue_seq = 0; /* and issue_seq */
2428 2429
@@ -2445,7 +2446,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2445 rec.v1.pathbase = cpu_to_le64(pathbase); 2446 rec.v1.pathbase = cpu_to_le64(pathbase);
2446 reclen = sizeof(rec.v1); 2447 reclen = sizeof(rec.v1);
2447 } 2448 }
2448 spin_unlock(&inode->i_lock); 2449 spin_unlock(&ci->i_ceph_lock);
2449 2450
2450 if (recon_state->flock) { 2451 if (recon_state->flock) {
2451 int num_fcntl_locks, num_flock_locks; 2452 int num_fcntl_locks, num_flock_locks;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4bb239921dbd..a50ca0e39475 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -20,7 +20,7 @@
20 * 20 *
21 * mdsc->snap_rwsem 21 * mdsc->snap_rwsem
22 * 22 *
23 * inode->i_lock 23 * ci->i_ceph_lock
24 * mdsc->snap_flush_lock 24 * mdsc->snap_flush_lock
25 * mdsc->cap_delay_lock 25 * mdsc->cap_delay_lock
26 * 26 *
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e26437191333..a559c80f127a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -446,7 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
446 return; 446 return;
447 } 447 }
448 448
449 spin_lock(&inode->i_lock); 449 spin_lock(&ci->i_ceph_lock);
450 used = __ceph_caps_used(ci); 450 used = __ceph_caps_used(ci);
451 dirty = __ceph_caps_dirty(ci); 451 dirty = __ceph_caps_dirty(ci);
452 452
@@ -528,7 +528,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
528 kfree(capsnap); 528 kfree(capsnap);
529 } 529 }
530 530
531 spin_unlock(&inode->i_lock); 531 spin_unlock(&ci->i_ceph_lock);
532} 532}
533 533
534/* 534/*
@@ -537,7 +537,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
537 * 537 *
538 * If capsnap can now be flushed, add to snap_flush list, and return 1. 538 * If capsnap can now be flushed, add to snap_flush list, and return 1.
539 * 539 *
540 * Caller must hold i_lock. 540 * Caller must hold i_ceph_lock.
541 */ 541 */
542int __ceph_finish_cap_snap(struct ceph_inode_info *ci, 542int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
543 struct ceph_cap_snap *capsnap) 543 struct ceph_cap_snap *capsnap)
@@ -739,9 +739,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
739 inode = &ci->vfs_inode; 739 inode = &ci->vfs_inode;
740 ihold(inode); 740 ihold(inode);
741 spin_unlock(&mdsc->snap_flush_lock); 741 spin_unlock(&mdsc->snap_flush_lock);
742 spin_lock(&inode->i_lock); 742 spin_lock(&ci->i_ceph_lock);
743 __ceph_flush_snaps(ci, &session, 0); 743 __ceph_flush_snaps(ci, &session, 0);
744 spin_unlock(&inode->i_lock); 744 spin_unlock(&ci->i_ceph_lock);
745 iput(inode); 745 iput(inode);
746 spin_lock(&mdsc->snap_flush_lock); 746 spin_lock(&mdsc->snap_flush_lock);
747 } 747 }
@@ -847,7 +847,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
847 continue; 847 continue;
848 ci = ceph_inode(inode); 848 ci = ceph_inode(inode);
849 849
850 spin_lock(&inode->i_lock); 850 spin_lock(&ci->i_ceph_lock);
851 if (!ci->i_snap_realm) 851 if (!ci->i_snap_realm)
852 goto skip_inode; 852 goto skip_inode;
853 /* 853 /*
@@ -876,7 +876,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
876 oldrealm = ci->i_snap_realm; 876 oldrealm = ci->i_snap_realm;
877 ci->i_snap_realm = realm; 877 ci->i_snap_realm = realm;
878 spin_unlock(&realm->inodes_with_caps_lock); 878 spin_unlock(&realm->inodes_with_caps_lock);
879 spin_unlock(&inode->i_lock); 879 spin_unlock(&ci->i_ceph_lock);
880 880
881 ceph_get_snap_realm(mdsc, realm); 881 ceph_get_snap_realm(mdsc, realm);
882 ceph_put_snap_realm(mdsc, oldrealm); 882 ceph_put_snap_realm(mdsc, oldrealm);
@@ -885,7 +885,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
885 continue; 885 continue;
886 886
887skip_inode: 887skip_inode:
888 spin_unlock(&inode->i_lock); 888 spin_unlock(&ci->i_ceph_lock);
889 iput(inode); 889 iput(inode);
890 } 890 }
891 891
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8dc73a594a90..b48f15f101a0 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -383,7 +383,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
383 if (fsopt->rsize != CEPH_RSIZE_DEFAULT) 383 if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
384 seq_printf(m, ",rsize=%d", fsopt->rsize); 384 seq_printf(m, ",rsize=%d", fsopt->rsize);
385 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 385 if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
386 seq_printf(m, ",rasize=%d", fsopt->rsize); 386 seq_printf(m, ",rasize=%d", fsopt->rasize);
387 if (fsopt->congestion_kb != default_congestion_kb()) 387 if (fsopt->congestion_kb != default_congestion_kb())
388 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 388 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
389 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 389 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 01bf189e08a9..edcbf3774a56 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -220,7 +220,7 @@ struct ceph_dentry_info {
220 * The locking for D_COMPLETE is a bit odd: 220 * The locking for D_COMPLETE is a bit odd:
221 * - we can clear it at almost any time (see ceph_d_prune) 221 * - we can clear it at almost any time (see ceph_d_prune)
222 * - it is only meaningful if: 222 * - it is only meaningful if:
223 * - we hold dir inode i_lock 223 * - we hold dir inode i_ceph_lock
224 * - we hold dir FILE_SHARED caps 224 * - we hold dir FILE_SHARED caps
225 * - the dentry D_COMPLETE is set 225 * - the dentry D_COMPLETE is set
226 */ 226 */
@@ -250,6 +250,8 @@ struct ceph_inode_xattrs_info {
250struct ceph_inode_info { 250struct ceph_inode_info {
251 struct ceph_vino i_vino; /* ceph ino + snap */ 251 struct ceph_vino i_vino; /* ceph ino + snap */
252 252
253 spinlock_t i_ceph_lock;
254
253 u64 i_version; 255 u64 i_version;
254 u32 i_time_warp_seq; 256 u32 i_time_warp_seq;
255 257
@@ -271,7 +273,7 @@ struct ceph_inode_info {
271 273
272 struct ceph_inode_xattrs_info i_xattrs; 274 struct ceph_inode_xattrs_info i_xattrs;
273 275
274 /* capabilities. protected _both_ by i_lock and cap->session's 276 /* capabilities. protected _both_ by i_ceph_lock and cap->session's
275 * s_mutex. */ 277 * s_mutex. */
276 struct rb_root i_caps; /* cap list */ 278 struct rb_root i_caps; /* cap list */
277 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */ 279 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
@@ -437,18 +439,18 @@ static inline void ceph_i_clear(struct inode *inode, unsigned mask)
437{ 439{
438 struct ceph_inode_info *ci = ceph_inode(inode); 440 struct ceph_inode_info *ci = ceph_inode(inode);
439 441
440 spin_lock(&inode->i_lock); 442 spin_lock(&ci->i_ceph_lock);
441 ci->i_ceph_flags &= ~mask; 443 ci->i_ceph_flags &= ~mask;
442 spin_unlock(&inode->i_lock); 444 spin_unlock(&ci->i_ceph_lock);
443} 445}
444 446
445static inline void ceph_i_set(struct inode *inode, unsigned mask) 447static inline void ceph_i_set(struct inode *inode, unsigned mask)
446{ 448{
447 struct ceph_inode_info *ci = ceph_inode(inode); 449 struct ceph_inode_info *ci = ceph_inode(inode);
448 450
449 spin_lock(&inode->i_lock); 451 spin_lock(&ci->i_ceph_lock);
450 ci->i_ceph_flags |= mask; 452 ci->i_ceph_flags |= mask;
451 spin_unlock(&inode->i_lock); 453 spin_unlock(&ci->i_ceph_lock);
452} 454}
453 455
454static inline bool ceph_i_test(struct inode *inode, unsigned mask) 456static inline bool ceph_i_test(struct inode *inode, unsigned mask)
@@ -456,9 +458,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
456 struct ceph_inode_info *ci = ceph_inode(inode); 458 struct ceph_inode_info *ci = ceph_inode(inode);
457 bool r; 459 bool r;
458 460
459 spin_lock(&inode->i_lock); 461 spin_lock(&ci->i_ceph_lock);
460 r = (ci->i_ceph_flags & mask) == mask; 462 r = (ci->i_ceph_flags & mask) == mask;
461 spin_unlock(&inode->i_lock); 463 spin_unlock(&ci->i_ceph_lock);
462 return r; 464 return r;
463} 465}
464 466
@@ -508,9 +510,9 @@ extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
508static inline int ceph_caps_issued(struct ceph_inode_info *ci) 510static inline int ceph_caps_issued(struct ceph_inode_info *ci)
509{ 511{
510 int issued; 512 int issued;
511 spin_lock(&ci->vfs_inode.i_lock); 513 spin_lock(&ci->i_ceph_lock);
512 issued = __ceph_caps_issued(ci, NULL); 514 issued = __ceph_caps_issued(ci, NULL);
513 spin_unlock(&ci->vfs_inode.i_lock); 515 spin_unlock(&ci->i_ceph_lock);
514 return issued; 516 return issued;
515} 517}
516 518
@@ -518,9 +520,9 @@ static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
518 int touch) 520 int touch)
519{ 521{
520 int r; 522 int r;
521 spin_lock(&ci->vfs_inode.i_lock); 523 spin_lock(&ci->i_ceph_lock);
522 r = __ceph_caps_issued_mask(ci, mask, touch); 524 r = __ceph_caps_issued_mask(ci, mask, touch);
523 spin_unlock(&ci->vfs_inode.i_lock); 525 spin_unlock(&ci->i_ceph_lock);
524 return r; 526 return r;
525} 527}
526 528
@@ -743,10 +745,9 @@ extern int ceph_add_cap(struct inode *inode,
743extern void __ceph_remove_cap(struct ceph_cap *cap); 745extern void __ceph_remove_cap(struct ceph_cap *cap);
744static inline void ceph_remove_cap(struct ceph_cap *cap) 746static inline void ceph_remove_cap(struct ceph_cap *cap)
745{ 747{
746 struct inode *inode = &cap->ci->vfs_inode; 748 spin_lock(&cap->ci->i_ceph_lock);
747 spin_lock(&inode->i_lock);
748 __ceph_remove_cap(cap); 749 __ceph_remove_cap(cap);
749 spin_unlock(&inode->i_lock); 750 spin_unlock(&cap->ci->i_ceph_lock);
750} 751}
751extern void ceph_put_cap(struct ceph_mds_client *mdsc, 752extern void ceph_put_cap(struct ceph_mds_client *mdsc,
752 struct ceph_cap *cap); 753 struct ceph_cap *cap);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 96c6739a0280..a5e36e4488a7 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -343,8 +343,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
343} 343}
344 344
345static int __build_xattrs(struct inode *inode) 345static int __build_xattrs(struct inode *inode)
346 __releases(inode->i_lock) 346 __releases(ci->i_ceph_lock)
347 __acquires(inode->i_lock) 347 __acquires(ci->i_ceph_lock)
348{ 348{
349 u32 namelen; 349 u32 namelen;
350 u32 numattr = 0; 350 u32 numattr = 0;
@@ -372,7 +372,7 @@ start:
372 end = p + ci->i_xattrs.blob->vec.iov_len; 372 end = p + ci->i_xattrs.blob->vec.iov_len;
373 ceph_decode_32_safe(&p, end, numattr, bad); 373 ceph_decode_32_safe(&p, end, numattr, bad);
374 xattr_version = ci->i_xattrs.version; 374 xattr_version = ci->i_xattrs.version;
375 spin_unlock(&inode->i_lock); 375 spin_unlock(&ci->i_ceph_lock);
376 376
377 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *), 377 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
378 GFP_NOFS); 378 GFP_NOFS);
@@ -387,7 +387,7 @@ start:
387 goto bad_lock; 387 goto bad_lock;
388 } 388 }
389 389
390 spin_lock(&inode->i_lock); 390 spin_lock(&ci->i_ceph_lock);
391 if (ci->i_xattrs.version != xattr_version) { 391 if (ci->i_xattrs.version != xattr_version) {
392 /* lost a race, retry */ 392 /* lost a race, retry */
393 for (i = 0; i < numattr; i++) 393 for (i = 0; i < numattr; i++)
@@ -418,7 +418,7 @@ start:
418 418
419 return err; 419 return err;
420bad_lock: 420bad_lock:
421 spin_lock(&inode->i_lock); 421 spin_lock(&ci->i_ceph_lock);
422bad: 422bad:
423 if (xattrs) { 423 if (xattrs) {
424 for (i = 0; i < numattr; i++) 424 for (i = 0; i < numattr; i++)
@@ -512,7 +512,7 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
512 if (vxattrs) 512 if (vxattrs)
513 vxattr = ceph_match_vxattr(vxattrs, name); 513 vxattr = ceph_match_vxattr(vxattrs, name);
514 514
515 spin_lock(&inode->i_lock); 515 spin_lock(&ci->i_ceph_lock);
516 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 516 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
517 ci->i_xattrs.version, ci->i_xattrs.index_version); 517 ci->i_xattrs.version, ci->i_xattrs.index_version);
518 518
@@ -520,14 +520,14 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
520 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 520 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
521 goto get_xattr; 521 goto get_xattr;
522 } else { 522 } else {
523 spin_unlock(&inode->i_lock); 523 spin_unlock(&ci->i_ceph_lock);
524 /* get xattrs from mds (if we don't already have them) */ 524 /* get xattrs from mds (if we don't already have them) */
525 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 525 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
526 if (err) 526 if (err)
527 return err; 527 return err;
528 } 528 }
529 529
530 spin_lock(&inode->i_lock); 530 spin_lock(&ci->i_ceph_lock);
531 531
532 if (vxattr && vxattr->readonly) { 532 if (vxattr && vxattr->readonly) {
533 err = vxattr->getxattr_cb(ci, value, size); 533 err = vxattr->getxattr_cb(ci, value, size);
@@ -558,7 +558,7 @@ get_xattr:
558 memcpy(value, xattr->val, xattr->val_len); 558 memcpy(value, xattr->val, xattr->val_len);
559 559
560out: 560out:
561 spin_unlock(&inode->i_lock); 561 spin_unlock(&ci->i_ceph_lock);
562 return err; 562 return err;
563} 563}
564 564
@@ -573,7 +573,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
573 u32 len; 573 u32 len;
574 int i; 574 int i;
575 575
576 spin_lock(&inode->i_lock); 576 spin_lock(&ci->i_ceph_lock);
577 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 577 dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
578 ci->i_xattrs.version, ci->i_xattrs.index_version); 578 ci->i_xattrs.version, ci->i_xattrs.index_version);
579 579
@@ -581,13 +581,13 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
581 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 581 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
582 goto list_xattr; 582 goto list_xattr;
583 } else { 583 } else {
584 spin_unlock(&inode->i_lock); 584 spin_unlock(&ci->i_ceph_lock);
585 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 585 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
586 if (err) 586 if (err)
587 return err; 587 return err;
588 } 588 }
589 589
590 spin_lock(&inode->i_lock); 590 spin_lock(&ci->i_ceph_lock);
591 591
592 err = __build_xattrs(inode); 592 err = __build_xattrs(inode);
593 if (err < 0) 593 if (err < 0)
@@ -619,7 +619,7 @@ list_xattr:
619 } 619 }
620 620
621out: 621out:
622 spin_unlock(&inode->i_lock); 622 spin_unlock(&ci->i_ceph_lock);
623 return err; 623 return err;
624} 624}
625 625
@@ -739,7 +739,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
739 if (!xattr) 739 if (!xattr)
740 goto out; 740 goto out;
741 741
742 spin_lock(&inode->i_lock); 742 spin_lock(&ci->i_ceph_lock);
743retry: 743retry:
744 issued = __ceph_caps_issued(ci, NULL); 744 issued = __ceph_caps_issued(ci, NULL);
745 if (!(issued & CEPH_CAP_XATTR_EXCL)) 745 if (!(issued & CEPH_CAP_XATTR_EXCL))
@@ -752,12 +752,12 @@ retry:
752 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 752 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
753 struct ceph_buffer *blob = NULL; 753 struct ceph_buffer *blob = NULL;
754 754
755 spin_unlock(&inode->i_lock); 755 spin_unlock(&ci->i_ceph_lock);
756 dout(" preaallocating new blob size=%d\n", required_blob_size); 756 dout(" preaallocating new blob size=%d\n", required_blob_size);
757 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 757 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
758 if (!blob) 758 if (!blob)
759 goto out; 759 goto out;
760 spin_lock(&inode->i_lock); 760 spin_lock(&ci->i_ceph_lock);
761 if (ci->i_xattrs.prealloc_blob) 761 if (ci->i_xattrs.prealloc_blob)
762 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 762 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
763 ci->i_xattrs.prealloc_blob = blob; 763 ci->i_xattrs.prealloc_blob = blob;
@@ -770,13 +770,13 @@ retry:
770 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 770 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
771 ci->i_xattrs.dirty = true; 771 ci->i_xattrs.dirty = true;
772 inode->i_ctime = CURRENT_TIME; 772 inode->i_ctime = CURRENT_TIME;
773 spin_unlock(&inode->i_lock); 773 spin_unlock(&ci->i_ceph_lock);
774 if (dirty) 774 if (dirty)
775 __mark_inode_dirty(inode, dirty); 775 __mark_inode_dirty(inode, dirty);
776 return err; 776 return err;
777 777
778do_sync: 778do_sync:
779 spin_unlock(&inode->i_lock); 779 spin_unlock(&ci->i_ceph_lock);
780 err = ceph_sync_setxattr(dentry, name, value, size, flags); 780 err = ceph_sync_setxattr(dentry, name, value, size, flags);
781out: 781out:
782 kfree(newname); 782 kfree(newname);
@@ -833,7 +833,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
833 return -EOPNOTSUPP; 833 return -EOPNOTSUPP;
834 } 834 }
835 835
836 spin_lock(&inode->i_lock); 836 spin_lock(&ci->i_ceph_lock);
837 __build_xattrs(inode); 837 __build_xattrs(inode);
838 issued = __ceph_caps_issued(ci, NULL); 838 issued = __ceph_caps_issued(ci, NULL);
839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -846,12 +846,12 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
846 ci->i_xattrs.dirty = true; 846 ci->i_xattrs.dirty = true;
847 inode->i_ctime = CURRENT_TIME; 847 inode->i_ctime = CURRENT_TIME;
848 848
849 spin_unlock(&inode->i_lock); 849 spin_unlock(&ci->i_ceph_lock);
850 if (dirty) 850 if (dirty)
851 __mark_inode_dirty(inode, dirty); 851 __mark_inode_dirty(inode, dirty);
852 return err; 852 return err;
853do_sync: 853do_sync:
854 spin_unlock(&inode->i_lock); 854 spin_unlock(&ci->i_ceph_lock);
855 err = ceph_send_removexattr(dentry, name); 855 err = ceph_send_removexattr(dentry, name);
856 return err; 856 return err;
857} 857}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index ca418aaf6352..9d8715c45f25 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -292,7 +292,7 @@ int __init configfs_inode_init(void)
292 return bdi_init(&configfs_backing_dev_info); 292 return bdi_init(&configfs_backing_dev_info);
293} 293}
294 294
295void __exit configfs_inode_exit(void) 295void configfs_inode_exit(void)
296{ 296{
297 bdi_destroy(&configfs_backing_dev_info); 297 bdi_destroy(&configfs_backing_dev_info);
298} 298}
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index ecc62178beda..276e15cafd58 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -143,28 +143,26 @@ static int __init configfs_init(void)
143 goto out; 143 goto out;
144 144
145 config_kobj = kobject_create_and_add("config", kernel_kobj); 145 config_kobj = kobject_create_and_add("config", kernel_kobj);
146 if (!config_kobj) { 146 if (!config_kobj)
147 kmem_cache_destroy(configfs_dir_cachep); 147 goto out2;
148 configfs_dir_cachep = NULL; 148
149 goto out; 149 err = configfs_inode_init();
150 } 150 if (err)
151 goto out3;
151 152
152 err = register_filesystem(&configfs_fs_type); 153 err = register_filesystem(&configfs_fs_type);
153 if (err) { 154 if (err)
154 printk(KERN_ERR "configfs: Unable to register filesystem!\n"); 155 goto out4;
155 kobject_put(config_kobj);
156 kmem_cache_destroy(configfs_dir_cachep);
157 configfs_dir_cachep = NULL;
158 goto out;
159 }
160 156
161 err = configfs_inode_init(); 157 return 0;
162 if (err) { 158out4:
163 unregister_filesystem(&configfs_fs_type); 159 printk(KERN_ERR "configfs: Unable to register filesystem!\n");
164 kobject_put(config_kobj); 160 configfs_inode_exit();
165 kmem_cache_destroy(configfs_dir_cachep); 161out3:
166 configfs_dir_cachep = NULL; 162 kobject_put(config_kobj);
167 } 163out2:
164 kmem_cache_destroy(configfs_dir_cachep);
165 configfs_dir_cachep = NULL;
168out: 166out:
169 return err; 167 return err;
170} 168}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73c3992b2bb4..ac86f8b3e3cb 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -156,6 +156,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
156 * bdi_start_writeback - start writeback 156 * bdi_start_writeback - start writeback
157 * @bdi: the backing device to write from 157 * @bdi: the backing device to write from
158 * @nr_pages: the number of pages to write 158 * @nr_pages: the number of pages to write
159 * @reason: reason why some writeback work was initiated
159 * 160 *
160 * Description: 161 * Description:
161 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 162 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
@@ -1223,6 +1224,7 @@ static void wait_sb_inodes(struct super_block *sb)
1223 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block 1224 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
1224 * @sb: the superblock 1225 * @sb: the superblock
1225 * @nr: the number of pages to write 1226 * @nr: the number of pages to write
1227 * @reason: reason why some writeback work initiated
1226 * 1228 *
1227 * Start writeback on some inodes on this super_block. No guarantees are made 1229 * Start writeback on some inodes on this super_block. No guarantees are made
1228 * on how many (if any) will be written, and this function does not wait 1230 * on how many (if any) will be written, and this function does not wait
@@ -1251,6 +1253,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
1251/** 1253/**
1252 * writeback_inodes_sb - writeback dirty inodes from given super_block 1254 * writeback_inodes_sb - writeback dirty inodes from given super_block
1253 * @sb: the superblock 1255 * @sb: the superblock
1256 * @reason: reason why some writeback work was initiated
1254 * 1257 *
1255 * Start writeback on some inodes on this super_block. No guarantees are made 1258 * Start writeback on some inodes on this super_block. No guarantees are made
1256 * on how many (if any) will be written, and this function does not wait 1259 * on how many (if any) will be written, and this function does not wait
@@ -1265,6 +1268,7 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1265/** 1268/**
1266 * writeback_inodes_sb_if_idle - start writeback if none underway 1269 * writeback_inodes_sb_if_idle - start writeback if none underway
1267 * @sb: the superblock 1270 * @sb: the superblock
1271 * @reason: reason why some writeback work was initiated
1268 * 1272 *
1269 * Invoke writeback_inodes_sb if no writeback is currently underway. 1273 * Invoke writeback_inodes_sb if no writeback is currently underway.
1270 * Returns 1 if writeback was started, 0 if not. 1274 * Returns 1 if writeback was started, 0 if not.
@@ -1285,6 +1289,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1285 * writeback_inodes_sb_if_idle - start writeback if none underway 1289 * writeback_inodes_sb_if_idle - start writeback if none underway
1286 * @sb: the superblock 1290 * @sb: the superblock
1287 * @nr: the number of pages to write 1291 * @nr: the number of pages to write
1292 * @reason: reason why some writeback work was initiated
1288 * 1293 *
1289 * Invoke writeback_inodes_sb if no writeback is currently underway. 1294 * Invoke writeback_inodes_sb if no writeback is currently underway.
1290 * Returns 1 if writeback was started, 0 if not. 1295 * Returns 1 if writeback was started, 0 if not.
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5cb8614508c3..2aaf3eaaf13d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1512,7 +1512,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1512 else if (outarg->offset + num > file_size) 1512 else if (outarg->offset + num > file_size)
1513 num = file_size - outarg->offset; 1513 num = file_size - outarg->offset;
1514 1514
1515 while (num) { 1515 while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1516 struct page *page; 1516 struct page *page;
1517 unsigned int this_num; 1517 unsigned int this_num;
1518 1518
@@ -1526,6 +1526,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1526 1526
1527 num -= this_num; 1527 num -= this_num;
1528 total_len += this_num; 1528 total_len += this_num;
1529 index++;
1529 } 1530 }
1530 req->misc.retrieve_in.offset = outarg->offset; 1531 req->misc.retrieve_in.offset = outarg->offset;
1531 req->misc.retrieve_in.size = total_len; 1532 req->misc.retrieve_in.size = total_len;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 594f07a81c28..0c84100acd44 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1556,7 +1556,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1556 struct inode *inode = file->f_path.dentry->d_inode; 1556 struct inode *inode = file->f_path.dentry->d_inode;
1557 1557
1558 mutex_lock(&inode->i_mutex); 1558 mutex_lock(&inode->i_mutex);
1559 if (origin != SEEK_CUR || origin != SEEK_SET) { 1559 if (origin != SEEK_CUR && origin != SEEK_SET) {
1560 retval = fuse_update_attributes(inode, NULL, file, NULL); 1560 retval = fuse_update_attributes(inode, NULL, file, NULL);
1561 if (retval) 1561 if (retval)
1562 goto exit; 1562 goto exit;
@@ -1567,6 +1567,10 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1567 offset += i_size_read(inode); 1567 offset += i_size_read(inode);
1568 break; 1568 break;
1569 case SEEK_CUR: 1569 case SEEK_CUR:
1570 if (offset == 0) {
1571 retval = file->f_pos;
1572 goto exit;
1573 }
1570 offset += file->f_pos; 1574 offset += file->f_pos;
1571 break; 1575 break;
1572 case SEEK_DATA: 1576 case SEEK_DATA:
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3e6d72756479..aa83109b9431 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1138,28 +1138,28 @@ static int __init fuse_fs_init(void)
1138{ 1138{
1139 int err; 1139 int err;
1140 1140
1141 err = register_filesystem(&fuse_fs_type);
1142 if (err)
1143 goto out;
1144
1145 err = register_fuseblk();
1146 if (err)
1147 goto out_unreg;
1148
1149 fuse_inode_cachep = kmem_cache_create("fuse_inode", 1141 fuse_inode_cachep = kmem_cache_create("fuse_inode",
1150 sizeof(struct fuse_inode), 1142 sizeof(struct fuse_inode),
1151 0, SLAB_HWCACHE_ALIGN, 1143 0, SLAB_HWCACHE_ALIGN,
1152 fuse_inode_init_once); 1144 fuse_inode_init_once);
1153 err = -ENOMEM; 1145 err = -ENOMEM;
1154 if (!fuse_inode_cachep) 1146 if (!fuse_inode_cachep)
1155 goto out_unreg2; 1147 goto out;
1148
1149 err = register_fuseblk();
1150 if (err)
1151 goto out2;
1152
1153 err = register_filesystem(&fuse_fs_type);
1154 if (err)
1155 goto out3;
1156 1156
1157 return 0; 1157 return 0;
1158 1158
1159 out_unreg2: 1159 out3:
1160 unregister_fuseblk(); 1160 unregister_fuseblk();
1161 out_unreg: 1161 out2:
1162 unregister_filesystem(&fuse_fs_type); 1162 kmem_cache_destroy(fuse_inode_cachep);
1163 out: 1163 out:
1164 return err; 1164 return err;
1165} 1165}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 5b5fa33b6b9d..cbd1a61c110a 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -548,7 +548,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
548 548
549 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY); 549 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
550 if (error) 550 if (error)
551 goto out_bdi; 551 goto out_fput;
552 552
553 server->ncp_filp = ncp_filp; 553 server->ncp_filp = ncp_filp;
554 server->ncp_sock = sock; 554 server->ncp_sock = sock;
@@ -559,7 +559,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
559 error = -EBADF; 559 error = -EBADF;
560 server->info_filp = fget(data.info_fd); 560 server->info_filp = fget(data.info_fd);
561 if (!server->info_filp) 561 if (!server->info_filp)
562 goto out_fput; 562 goto out_bdi;
563 error = -ENOTSOCK; 563 error = -ENOTSOCK;
564 sock_inode = server->info_filp->f_path.dentry->d_inode; 564 sock_inode = server->info_filp->f_path.dentry->d_inode;
565 if (!S_ISSOCK(sock_inode->i_mode)) 565 if (!S_ISSOCK(sock_inode->i_mode))
@@ -746,9 +746,9 @@ out_nls:
746out_fput2: 746out_fput2:
747 if (server->info_filp) 747 if (server->info_filp)
748 fput(server->info_filp); 748 fput(server->info_filp);
749out_fput:
750 bdi_destroy(&server->bdi);
751out_bdi: 749out_bdi:
750 bdi_destroy(&server->bdi);
751out_fput:
752 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 752 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
753 * 753 *
754 * The previously used put_filp(ncp_filp); was bogus, since 754 * The previously used put_filp(ncp_filp); was bogus, since
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b77b874..03102d978180 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -91,20 +91,18 @@ static struct file_system_type proc_fs_type = {
91 91
92void __init proc_root_init(void) 92void __init proc_root_init(void)
93{ 93{
94 struct vfsmount *mnt;
95 int err; 94 int err;
96 95
97 proc_init_inodecache(); 96 proc_init_inodecache();
98 err = register_filesystem(&proc_fs_type); 97 err = register_filesystem(&proc_fs_type);
99 if (err) 98 if (err)
100 return; 99 return;
101 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); 100 err = pid_ns_prepare_proc(&init_pid_ns);
102 if (IS_ERR(mnt)) { 101 if (err) {
103 unregister_filesystem(&proc_fs_type); 102 unregister_filesystem(&proc_fs_type);
104 return; 103 return;
105 } 104 }
106 105
107 init_pid_ns.proc_mnt = mnt;
108 proc_symlink("mounts", NULL, "self/mounts"); 106 proc_symlink("mounts", NULL, "self/mounts");
109 107
110 proc_net_init(); 108 proc_net_init();
@@ -209,5 +207,5 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
209 207
210void pid_ns_release_proc(struct pid_namespace *ns) 208void pid_ns_release_proc(struct pid_namespace *ns)
211{ 209{
212 mntput(ns->proc_mnt); 210 kern_unmount(ns->proc_mnt);
213} 211}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 20403dc5d437..ae0e76bb6ebf 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2264,19 +2264,12 @@ static int __init ubifs_init(void)
2264 return -EINVAL; 2264 return -EINVAL;
2265 } 2265 }
2266 2266
2267 err = register_filesystem(&ubifs_fs_type);
2268 if (err) {
2269 ubifs_err("cannot register file system, error %d", err);
2270 return err;
2271 }
2272
2273 err = -ENOMEM;
2274 ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", 2267 ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
2275 sizeof(struct ubifs_inode), 0, 2268 sizeof(struct ubifs_inode), 0,
2276 SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, 2269 SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
2277 &inode_slab_ctor); 2270 &inode_slab_ctor);
2278 if (!ubifs_inode_slab) 2271 if (!ubifs_inode_slab)
2279 goto out_reg; 2272 return -ENOMEM;
2280 2273
2281 register_shrinker(&ubifs_shrinker_info); 2274 register_shrinker(&ubifs_shrinker_info);
2282 2275
@@ -2288,15 +2281,20 @@ static int __init ubifs_init(void)
2288 if (err) 2281 if (err)
2289 goto out_compr; 2282 goto out_compr;
2290 2283
2284 err = register_filesystem(&ubifs_fs_type);
2285 if (err) {
2286 ubifs_err("cannot register file system, error %d", err);
2287 goto out_dbg;
2288 }
2291 return 0; 2289 return 0;
2292 2290
2291out_dbg:
2292 dbg_debugfs_exit();
2293out_compr: 2293out_compr:
2294 ubifs_compressors_exit(); 2294 ubifs_compressors_exit();
2295out_shrinker: 2295out_shrinker:
2296 unregister_shrinker(&ubifs_shrinker_info); 2296 unregister_shrinker(&ubifs_shrinker_info);
2297 kmem_cache_destroy(ubifs_inode_slab); 2297 kmem_cache_destroy(ubifs_inode_slab);
2298out_reg:
2299 unregister_filesystem(&ubifs_fs_type);
2300 return err; 2298 return err;
2301} 2299}
2302/* late_initcall to let compressors initialize first */ 2300/* late_initcall to let compressors initialize first */
diff --git a/include/linux/log2.h b/include/linux/log2.h
index 25b808631cd9..fd7ff3d91e6a 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -185,7 +185,6 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
185#define rounddown_pow_of_two(n) \ 185#define rounddown_pow_of_two(n) \
186( \ 186( \
187 __builtin_constant_p(n) ? ( \ 187 __builtin_constant_p(n) ? ( \
188 (n == 1) ? 0 : \
189 (1UL << ilog2(n))) : \ 188 (1UL << ilog2(n))) : \
190 __rounddown_pow_of_two(n) \ 189 __rounddown_pow_of_two(n) \
191 ) 190 )
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 415f2db414e1..c8ef9bc54d50 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -218,6 +218,7 @@ struct mmc_card {
218#define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */ 218#define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */
219#define MMC_QUIRK_BLK_NO_CMD23 (1<<7) /* Avoid CMD23 for regular multiblock */ 219#define MMC_QUIRK_BLK_NO_CMD23 (1<<7) /* Avoid CMD23 for regular multiblock */
220#define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8) /* Avoid sending 512 bytes in */ 220#define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8) /* Avoid sending 512 bytes in */
221#define MMC_QUIRK_LONG_READ_TIME (1<<9) /* Data read time > CSD says */
221 /* byte mode */ 222 /* byte mode */
222 unsigned int poweroff_notify_state; /* eMMC4.5 notify feature */ 223 unsigned int poweroff_notify_state; /* eMMC4.5 notify feature */
223#define MMC_NO_POWER_NOTIFICATION 0 224#define MMC_NO_POWER_NOTIFICATION 0
@@ -433,6 +434,11 @@ static inline int mmc_card_broken_byte_mode_512(const struct mmc_card *c)
433 return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512; 434 return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512;
434} 435}
435 436
437static inline int mmc_card_long_read_time(const struct mmc_card *c)
438{
439 return c->quirks & MMC_QUIRK_LONG_READ_TIME;
440}
441
436#define mmc_card_name(c) ((c)->cid.prod_name) 442#define mmc_card_name(c) ((c)->cid.prod_name)
437#define mmc_card_id(c) (dev_name(&(c)->dev)) 443#define mmc_card_id(c) (dev_name(&(c)->dev))
438 444
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 2e0ecfcc881d..5b4293d9819d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1269,7 +1269,7 @@ void mq_clear_sbinfo(struct ipc_namespace *ns)
1269 1269
1270void mq_put_mnt(struct ipc_namespace *ns) 1270void mq_put_mnt(struct ipc_namespace *ns)
1271{ 1271{
1272 mntput(ns->mq_mnt); 1272 kern_unmount(ns->mq_mnt);
1273} 1273}
1274 1274
1275static int __init init_mqueue_fs(void) 1275static int __init init_mqueue_fs(void)
@@ -1291,11 +1291,9 @@ static int __init init_mqueue_fs(void)
1291 1291
1292 spin_lock_init(&mq_lock); 1292 spin_lock_init(&mq_lock);
1293 1293
1294 init_ipc_ns.mq_mnt = kern_mount_data(&mqueue_fs_type, &init_ipc_ns); 1294 error = mq_init_ns(&init_ipc_ns);
1295 if (IS_ERR(init_ipc_ns.mq_mnt)) { 1295 if (error)
1296 error = PTR_ERR(init_ipc_ns.mq_mnt);
1297 goto out_filesystem; 1296 goto out_filesystem;
1298 }
1299 1297
1300 return 0; 1298 return 0;
1301 1299
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 8b5ce5d3f3ef..5652101cdac0 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -27,11 +27,6 @@ DEFINE_SPINLOCK(mq_lock);
27 */ 27 */
28struct ipc_namespace init_ipc_ns = { 28struct ipc_namespace init_ipc_ns = {
29 .count = ATOMIC_INIT(1), 29 .count = ATOMIC_INIT(1),
30#ifdef CONFIG_POSIX_MQUEUE
31 .mq_queues_max = DFLT_QUEUESMAX,
32 .mq_msg_max = DFLT_MSGMAX,
33 .mq_msgsize_max = DFLT_MSGSIZEMAX,
34#endif
35 .user_ns = &init_user_ns, 30 .user_ns = &init_user_ns,
36}; 31};
37 32
diff --git a/mm/filemap.c b/mm/filemap.c
index c0018f2d50e0..c106d3b3cc64 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2407,7 +2407,6 @@ static ssize_t generic_perform_write(struct file *file,
2407 iov_iter_count(i)); 2407 iov_iter_count(i));
2408 2408
2409again: 2409again:
2410
2411 /* 2410 /*
2412 * Bring in the user page that we will copy from _first_. 2411 * Bring in the user page that we will copy from _first_.
2413 * Otherwise there's a nasty deadlock on copying from the 2412 * Otherwise there's a nasty deadlock on copying from the
@@ -2463,7 +2462,10 @@ again:
2463 written += copied; 2462 written += copied;
2464 2463
2465 balance_dirty_pages_ratelimited(mapping); 2464 balance_dirty_pages_ratelimited(mapping);
2466 2465 if (fatal_signal_pending(current)) {
2466 status = -EINTR;
2467 break;
2468 }
2467 } while (iov_iter_count(i)); 2469 } while (iov_iter_count(i));
2468 2470
2469 return written ? written : status; 2471 return written ? written : status;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 71252486bc6f..50f08241f981 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -411,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
411 * 411 *
412 * Returns @bdi's dirty limit in pages. The term "dirty" in the context of 412 * Returns @bdi's dirty limit in pages. The term "dirty" in the context of
413 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. 413 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
414 * And the "limit" in the name is not seriously taken as hard limit in 414 *
415 * balance_dirty_pages(). 415 * Note that balance_dirty_pages() will only seriously take it as a hard limit
416 * when sleeping max_pause per page is not enough to keep the dirty pages under
417 * control. For example, when the device is completely stalled due to some error
418 * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
419 * In the other normal situations, it acts more gently by throttling the tasks
420 * more (rather than completely block them) when the bdi dirty pages go high.
416 * 421 *
417 * It allocates high/low dirty limits to fast/slow devices, in order to prevent 422 * It allocates high/low dirty limits to fast/slow devices, in order to prevent
418 * - starving fast devices 423 * - starving fast devices
@@ -594,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
594 */ 599 */
595 if (unlikely(bdi_thresh > thresh)) 600 if (unlikely(bdi_thresh > thresh))
596 bdi_thresh = thresh; 601 bdi_thresh = thresh;
602 /*
603 * It's very possible that bdi_thresh is close to 0 not because the
604 * device is slow, but that it has remained inactive for long time.
605 * Honour such devices a reasonable good (hopefully IO efficient)
606 * threshold, so that the occasional writes won't be blocked and active
607 * writes can rampup the threshold quickly.
608 */
597 bdi_thresh = max(bdi_thresh, (limit - dirty) / 8); 609 bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
598 /* 610 /*
599 * scale global setpoint to bdi's: 611 * scale global setpoint to bdi's:
@@ -977,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
977 * 989 *
978 * 8 serves as the safety ratio. 990 * 8 serves as the safety ratio.
979 */ 991 */
980 if (bdi_dirty) 992 t = min(t, bdi_dirty * HZ / (8 * bw + 1));
981 t = min(t, bdi_dirty * HZ / (8 * bw + 1));
982 993
983 /* 994 /*
984 * The pause time will be settled within range (max_pause/4, max_pause). 995 * The pause time will be settled within range (max_pause/4, max_pause).
@@ -1136,6 +1147,19 @@ pause:
1136 if (task_ratelimit) 1147 if (task_ratelimit)
1137 break; 1148 break;
1138 1149
1150 /*
1151 * In the case of an unresponding NFS server and the NFS dirty
1152 * pages exceeds dirty_thresh, give the other good bdi's a pipe
1153 * to go through, so that tasks on them still remain responsive.
1154 *
1155 * In theory 1 page is enough to keep the comsumer-producer
1156 * pipe going: the flusher cleans 1 page => the task dirties 1
1157 * more page. However bdi_dirty has accounting errors. So use
1158 * the larger and more IO friendly bdi_stat_error.
1159 */
1160 if (bdi_dirty <= bdi_stat_error(bdi))
1161 break;
1162
1139 if (fatal_signal_pending(current)) 1163 if (fatal_signal_pending(current))
1140 break; 1164 break;
1141 } 1165 }
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 42599e31dcad..3a94eae7abe9 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -477,7 +477,6 @@ int crush_do_rule(struct crush_map *map,
477 int i, j; 477 int i, j;
478 int numrep; 478 int numrep;
479 int firstn; 479 int firstn;
480 int rc = -1;
481 480
482 BUG_ON(ruleno >= map->max_rules); 481 BUG_ON(ruleno >= map->max_rules);
483 482
@@ -491,23 +490,18 @@ int crush_do_rule(struct crush_map *map,
491 * that this may or may not correspond to the specific types 490 * that this may or may not correspond to the specific types
492 * referenced by the crush rule. 491 * referenced by the crush rule.
493 */ 492 */
494 if (force >= 0) { 493 if (force >= 0 &&
495 if (force >= map->max_devices || 494 force < map->max_devices &&
496 map->device_parents[force] == 0) { 495 map->device_parents[force] != 0 &&
497 /*dprintk("CRUSH: forcefed device dne\n");*/ 496 !is_out(map, weight, force, x)) {
498 rc = -1; /* force fed device dne */ 497 while (1) {
499 goto out; 498 force_context[++force_pos] = force;
500 } 499 if (force >= 0)
501 if (!is_out(map, weight, force, x)) { 500 force = map->device_parents[force];
502 while (1) { 501 else
503 force_context[++force_pos] = force; 502 force = map->bucket_parents[-1-force];
504 if (force >= 0) 503 if (force == 0)
505 force = map->device_parents[force]; 504 break;
506 else
507 force = map->bucket_parents[-1-force];
508 if (force == 0)
509 break;
510 }
511 } 505 }
512 } 506 }
513 507
@@ -600,10 +594,7 @@ int crush_do_rule(struct crush_map *map,
600 BUG_ON(1); 594 BUG_ON(1);
601 } 595 }
602 } 596 }
603 rc = result_len; 597 return result_len;
604
605out:
606 return rc;
607} 598}
608 599
609 600