aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/i386/xen/enlighten.c4
-rw-r--r--arch/powerpc/kernel/time.c8
-rw-r--r--arch/powerpc/kernel/vdso.c12
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c4
-rw-r--r--arch/um/include/kern_util.h2
-rw-r--r--arch/um/kernel/irq.c7
-rw-r--r--arch/um/os-Linux/file.c3
-rw-r--r--arch/um/os-Linux/signal.c4
-rw-r--r--arch/x86_64/mm/fault.c7
-rw-r--r--drivers/base/core.c29
-rw-r--r--drivers/block/DAC960.c1
-rw-r--r--drivers/char/agp/agp.h3
-rw-r--r--drivers/char/agp/intel-agp.c2
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c3
-rw-r--r--drivers/char/mspec.c69
-rw-r--r--drivers/ide/ide-disk.c1
-rw-r--r--drivers/ide/ppc/pmac.c2
-rw-r--r--drivers/media/video/usbvision/usbvision-cards.c1
-rw-r--r--drivers/mtd/nand/cafe_nand.c3
-rw-r--r--drivers/rtc/rtc-ds1553.c2
-rw-r--r--drivers/rtc/rtc-ds1742.c2
-rw-r--r--drivers/serial/sunsab.c107
-rw-r--r--drivers/video/intelfb/intelfbhw.c2
-rw-r--r--fs/ext3/namei.c73
-rw-r--r--fs/ext4/namei.c73
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/xfs_buf_item.h5
-rw-r--r--fs/xfs/xfs_filestream.c3
-rw-r--r--fs/xfs/xfs_log_recover.c51
-rw-r--r--fs/xfs/xfs_mru_cache.c72
-rw-r--r--fs/xfs/xfs_mru_cache.h6
-rw-r--r--fs/xfs/xfs_trans_buf.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c20
-rw-r--r--include/asm-powerpc/time.h5
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/user_namespace.h2
-rw-r--r--init/Kconfig1
-rw-r--r--init/do_mounts_initrd.c4
-rw-r--r--kernel/user.c45
-rw-r--r--kernel/user_namespace.c4
-rw-r--r--kernel/utsname.c2
-rw-r--r--mm/hugetlb.c4
-rw-r--r--mm/mempolicy.c79
46 files changed, 519 insertions, 223 deletions
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c
index f0c37511d8da..f01bfcd4bdee 100644
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -623,8 +623,8 @@ static unsigned long xen_read_cr2_direct(void)
623 623
624static void xen_write_cr4(unsigned long cr4) 624static void xen_write_cr4(unsigned long cr4)
625{ 625{
626 /* never allow TSC to be disabled */ 626 /* Just ignore cr4 changes; Xen doesn't allow us to do
627 native_write_cr4(cr4 & ~X86_CR4_TSD); 627 anything anyway. */
628} 628}
629 629
630static unsigned long xen_read_cr3(void) 630static unsigned long xen_read_cr3(void)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 727a6699f2f4..c627cf86d1e3 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -239,7 +239,7 @@ static void snapshot_tb_and_purr(void *data)
239 struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data); 239 struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
240 240
241 local_irq_save(flags); 241 local_irq_save(flags);
242 p->tb = mftb(); 242 p->tb = get_tb_or_rtc();
243 p->purr = mfspr(SPRN_PURR); 243 p->purr = mfspr(SPRN_PURR);
244 wmb(); 244 wmb();
245 p->initialized = 1; 245 p->initialized = 1;
@@ -317,7 +317,7 @@ static void snapshot_purr(void)
317 */ 317 */
318void snapshot_timebase(void) 318void snapshot_timebase(void)
319{ 319{
320 __get_cpu_var(last_jiffy) = get_tb(); 320 __get_cpu_var(last_jiffy) = get_tb_or_rtc();
321 snapshot_purr(); 321 snapshot_purr();
322} 322}
323 323
@@ -684,6 +684,8 @@ void timer_interrupt(struct pt_regs * regs)
684 684
685 write_seqlock(&xtime_lock); 685 write_seqlock(&xtime_lock);
686 tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy; 686 tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
687 if (__USE_RTC() && tb_next_jiffy >= 1000000000)
688 tb_next_jiffy -= 1000000000;
687 if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) { 689 if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
688 tb_last_jiffy = tb_next_jiffy; 690 tb_last_jiffy = tb_next_jiffy;
689 do_timer(1); 691 do_timer(1);
@@ -977,7 +979,7 @@ void __init time_init(void)
977 tb_to_ns_scale = scale; 979 tb_to_ns_scale = scale;
978 tb_to_ns_shift = shift; 980 tb_to_ns_shift = shift;
979 /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ 981 /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
980 boot_tb = get_tb(); 982 boot_tb = get_tb_or_rtc();
981 983
982 tm = get_boot_time(); 984 tm = get_boot_time();
983 985
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index cef01e4e8989..213fa31ac537 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -98,6 +98,18 @@ static struct vdso_patch_def vdso_patches[] = {
98 CPU_FTR_USE_TB, 0, 98 CPU_FTR_USE_TB, 0,
99 "__kernel_gettimeofday", NULL 99 "__kernel_gettimeofday", NULL
100 }, 100 },
101 {
102 CPU_FTR_USE_TB, 0,
103 "__kernel_clock_gettime", NULL
104 },
105 {
106 CPU_FTR_USE_TB, 0,
107 "__kernel_clock_getres", NULL
108 },
109 {
110 CPU_FTR_USE_TB, 0,
111 "__kernel_get_tbfreq", NULL
112 },
101}; 113};
102 114
103/* 115/*
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index c784edd40ea7..5bebe7fbe056 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -579,7 +579,7 @@ static struct spu *find_victim(struct spu_context *ctx)
579 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { 579 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
580 struct spu_context *tmp = spu->ctx; 580 struct spu_context *tmp = spu->ctx;
581 581
582 if (tmp->prio > ctx->prio && 582 if (tmp && tmp->prio > ctx->prio &&
583 (!victim || tmp->prio > victim->prio)) 583 (!victim || tmp->prio > victim->prio))
584 victim = spu->ctx; 584 victim = spu->ctx;
585 } 585 }
@@ -611,9 +611,9 @@ static struct spu *find_victim(struct spu_context *ctx)
611 611
612 mutex_lock(&cbe_spu_info[node].list_mutex); 612 mutex_lock(&cbe_spu_info[node].list_mutex);
613 cbe_spu_info[node].nr_active--; 613 cbe_spu_info[node].nr_active--;
614 spu_unbind_context(spu, victim);
614 mutex_unlock(&cbe_spu_info[node].list_mutex); 615 mutex_unlock(&cbe_spu_info[node].list_mutex);
615 616
616 spu_unbind_context(spu, victim);
617 victim->stats.invol_ctx_switch++; 617 victim->stats.invol_ctx_switch++;
618 spu->stats.invol_ctx_switch++; 618 spu->stats.invol_ctx_switch++;
619 mutex_unlock(&victim->state_mutex); 619 mutex_unlock(&victim->state_mutex);
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
index 8d7f7c1cb9c6..6c2be26f1d7d 100644
--- a/arch/um/include/kern_util.h
+++ b/arch/um/include/kern_util.h
@@ -117,7 +117,7 @@ extern void sigio_handler(int sig, union uml_pt_regs *regs);
117 117
118extern void copy_sc(union uml_pt_regs *regs, void *from); 118extern void copy_sc(union uml_pt_regs *regs, void *from);
119 119
120unsigned long to_irq_stack(int sig, unsigned long *mask_out); 120extern unsigned long to_irq_stack(unsigned long *mask_out);
121unsigned long from_irq_stack(int nested); 121unsigned long from_irq_stack(int nested);
122 122
123#endif 123#endif
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9870febdbead..cf0dd9cf8c43 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -518,13 +518,13 @@ int init_aio_irq(int irq, char *name, irq_handler_t handler)
518 518
519static unsigned long pending_mask; 519static unsigned long pending_mask;
520 520
521unsigned long to_irq_stack(int sig, unsigned long *mask_out) 521unsigned long to_irq_stack(unsigned long *mask_out)
522{ 522{
523 struct thread_info *ti; 523 struct thread_info *ti;
524 unsigned long mask, old; 524 unsigned long mask, old;
525 int nested; 525 int nested;
526 526
527 mask = xchg(&pending_mask, 1 << sig); 527 mask = xchg(&pending_mask, *mask_out);
528 if(mask != 0){ 528 if(mask != 0){
529 /* If any interrupts come in at this point, we want to 529 /* If any interrupts come in at this point, we want to
530 * make sure that their bits aren't lost by our 530 * make sure that their bits aren't lost by our
@@ -534,7 +534,7 @@ unsigned long to_irq_stack(int sig, unsigned long *mask_out)
534 * and pending_mask contains a bit for each interrupt 534 * and pending_mask contains a bit for each interrupt
535 * that came in. 535 * that came in.
536 */ 536 */
537 old = 1 << sig; 537 old = *mask_out;
538 do { 538 do {
539 old |= mask; 539 old |= mask;
540 mask = xchg(&pending_mask, old); 540 mask = xchg(&pending_mask, old);
@@ -550,6 +550,7 @@ unsigned long to_irq_stack(int sig, unsigned long *mask_out)
550 550
551 task = cpu_tasks[ti->cpu].task; 551 task = cpu_tasks[ti->cpu].task;
552 tti = task_thread_info(task); 552 tti = task_thread_info(task);
553
553 *ti = *tti; 554 *ti = *tti;
554 ti->real_thread = tti; 555 ti->real_thread = tti;
555 task->stack = ti; 556 task->stack = ti;
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 6f92f732d253..c3ecc2a84e0c 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -320,7 +320,8 @@ int os_file_size(char *file, unsigned long long *size_out)
320 } 320 }
321 321
322 if(S_ISBLK(buf.ust_mode)){ 322 if(S_ISBLK(buf.ust_mode)){
323 int fd, blocks; 323 int fd;
324 long blocks;
324 325
325 fd = os_open_file(file, of_read(OPENFLAGS()), 0); 326 fd = os_open_file(file, of_read(OPENFLAGS()), 0);
326 if(fd < 0){ 327 if(fd < 0){
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 18e5c8b67eb8..b98f7ea2d2f6 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -119,7 +119,7 @@ void (*handlers[_NSIG])(int sig, struct sigcontext *sc);
119 119
120void handle_signal(int sig, struct sigcontext *sc) 120void handle_signal(int sig, struct sigcontext *sc)
121{ 121{
122 unsigned long pending = 0; 122 unsigned long pending = 1UL << sig;
123 123
124 do { 124 do {
125 int nested, bail; 125 int nested, bail;
@@ -134,7 +134,7 @@ void handle_signal(int sig, struct sigcontext *sc)
134 * have to return, and the upper handler will deal 134 * have to return, and the upper handler will deal
135 * with this interrupt. 135 * with this interrupt.
136 */ 136 */
137 bail = to_irq_stack(sig, &pending); 137 bail = to_irq_stack(&pending);
138 if(bail) 138 if(bail)
139 return; 139 return;
140 140
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 327c9f2fa626..54816adb8e93 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -374,6 +374,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
374 if (unlikely(in_atomic() || !mm)) 374 if (unlikely(in_atomic() || !mm))
375 goto bad_area_nosemaphore; 375 goto bad_area_nosemaphore;
376 376
377 /*
378 * User-mode registers count as a user access even for any
379 * potential system fault or CPU buglet.
380 */
381 if (user_mode_vm(regs))
382 error_code |= PF_USER;
383
377 again: 384 again:
378 /* When running in the kernel we expect faults to occur only to 385 /* When running in the kernel we expect faults to occur only to
379 * addresses in user space. All other faults represent errors in the 386 * addresses in user space. All other faults represent errors in the
diff --git a/drivers/base/core.c b/drivers/base/core.c
index e6738bcbe5a9..6de33d7a29ba 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -679,14 +679,26 @@ static int device_add_class_symlinks(struct device *dev)
679 goto out_subsys; 679 goto out_subsys;
680 } 680 }
681 if (dev->parent) { 681 if (dev->parent) {
682 error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
683 "device");
684 if (error)
685 goto out_busid;
686#ifdef CONFIG_SYSFS_DEPRECATED 682#ifdef CONFIG_SYSFS_DEPRECATED
687 { 683 {
688 char * class_name = make_class_name(dev->class->name, 684 struct device *parent = dev->parent;
689 &dev->kobj); 685 char *class_name;
686
687 /*
688 * In old sysfs stacked class devices had 'device'
689 * link pointing to real device instead of parent
690 */
691 while (parent->class && !parent->bus && parent->parent)
692 parent = parent->parent;
693
694 error = sysfs_create_link(&dev->kobj,
695 &parent->kobj,
696 "device");
697 if (error)
698 goto out_busid;
699
700 class_name = make_class_name(dev->class->name,
701 &dev->kobj);
690 if (class_name) 702 if (class_name)
691 error = sysfs_create_link(&dev->parent->kobj, 703 error = sysfs_create_link(&dev->parent->kobj,
692 &dev->kobj, class_name); 704 &dev->kobj, class_name);
@@ -694,6 +706,11 @@ static int device_add_class_symlinks(struct device *dev)
694 if (error) 706 if (error)
695 goto out_device; 707 goto out_device;
696 } 708 }
709#else
710 error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
711 "device");
712 if (error)
713 goto out_busid;
697#endif 714#endif
698 } 715 }
699 return 0; 716 return 0;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 504a95d888b2..84d6aa500e26 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -31,6 +31,7 @@
31#include <linux/genhd.h> 31#include <linux/genhd.h>
32#include <linux/hdreg.h> 32#include <linux/hdreg.h>
33#include <linux/blkpg.h> 33#include <linux/blkpg.h>
34#include <linux/dma-mapping.h>
34#include <linux/interrupt.h> 35#include <linux/interrupt.h>
35#include <linux/ioport.h> 36#include <linux/ioport.h>
36#include <linux/mm.h> 37#include <linux/mm.h>
diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index 35ab1a9f8e8b..8955e7ff759a 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -176,7 +176,7 @@ struct agp_bridge_data {
176#define I830_GMCH_MEM_MASK 0x1 176#define I830_GMCH_MEM_MASK 0x1
177#define I830_GMCH_MEM_64M 0x1 177#define I830_GMCH_MEM_64M 0x1
178#define I830_GMCH_MEM_128M 0 178#define I830_GMCH_MEM_128M 0
179#define I830_GMCH_GMS_MASK 0xF0 179#define I830_GMCH_GMS_MASK 0x70
180#define I830_GMCH_GMS_DISABLED 0x00 180#define I830_GMCH_GMS_DISABLED 0x00
181#define I830_GMCH_GMS_LOCAL 0x10 181#define I830_GMCH_GMS_LOCAL 0x10
182#define I830_GMCH_GMS_STOLEN_512 0x20 182#define I830_GMCH_GMS_STOLEN_512 0x20
@@ -190,6 +190,7 @@ struct agp_bridge_data {
190#define INTEL_I830_ERRSTS 0x92 190#define INTEL_I830_ERRSTS 0x92
191 191
192/* Intel 855GM/852GM registers */ 192/* Intel 855GM/852GM registers */
193#define I855_GMCH_GMS_MASK 0xF0
193#define I855_GMCH_GMS_STOLEN_0M 0x0 194#define I855_GMCH_GMS_STOLEN_0M 0x0
194#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) 195#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4)
195#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) 196#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4)
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 7c69bf259caa..a5d0e95a227a 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -511,7 +511,7 @@ static void intel_i830_init_gtt_entries(void)
511 */ 511 */
512 if (IS_G33) 512 if (IS_G33)
513 size = 0; 513 size = 0;
514 switch (gmch_ctrl & I830_GMCH_GMS_MASK) { 514 switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
515 case I855_GMCH_GMS_STOLEN_1M: 515 case I855_GMCH_GMS_STOLEN_1M:
516 gtt_entries = MB(1) - KB(size); 516 gtt_entries = MB(1) - KB(size);
517 break; 517 break;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 9b07f7851061..dd441ff4af56 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2215,7 +2215,8 @@ static int ipmi_pci_resume(struct pci_dev *pdev)
2215 2215
2216static struct pci_device_id ipmi_pci_devices[] = { 2216static struct pci_device_id ipmi_pci_devices[] = {
2217 { PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) }, 2217 { PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) },
2218 { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) } 2218 { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) },
2219 { 0, }
2219}; 2220};
2220MODULE_DEVICE_TABLE(pci, ipmi_pci_devices); 2221MODULE_DEVICE_TABLE(pci, ipmi_pci_devices);
2221 2222
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index c08a4152ee8f..049a46cc9f87 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -67,7 +67,7 @@
67/* 67/*
68 * Page types allocated by the device. 68 * Page types allocated by the device.
69 */ 69 */
70enum { 70enum mspec_page_type {
71 MSPEC_FETCHOP = 1, 71 MSPEC_FETCHOP = 1,
72 MSPEC_CACHED, 72 MSPEC_CACHED,
73 MSPEC_UNCACHED 73 MSPEC_UNCACHED
@@ -83,15 +83,25 @@ static int is_sn2;
83 * One of these structures is allocated when an mspec region is mmaped. The 83 * One of these structures is allocated when an mspec region is mmaped. The
84 * structure is pointed to by the vma->vm_private_data field in the vma struct. 84 * structure is pointed to by the vma->vm_private_data field in the vma struct.
85 * This structure is used to record the addresses of the mspec pages. 85 * This structure is used to record the addresses of the mspec pages.
86 * This structure is shared by all vma's that are split off from the
87 * original vma when split_vma()'s are done.
88 *
89 * The refcnt is incremented atomically because mm->mmap_sem does not
90 * protect in fork case where multiple tasks share the vma_data.
86 */ 91 */
87struct vma_data { 92struct vma_data {
88 atomic_t refcnt; /* Number of vmas sharing the data. */ 93 atomic_t refcnt; /* Number of vmas sharing the data. */
89 spinlock_t lock; /* Serialize access to the vma. */ 94 spinlock_t lock; /* Serialize access to this structure. */
90 int count; /* Number of pages allocated. */ 95 int count; /* Number of pages allocated. */
91 int type; /* Type of pages allocated. */ 96 enum mspec_page_type type; /* Type of pages allocated. */
97 int flags; /* See VMD_xxx below. */
98 unsigned long vm_start; /* Original (unsplit) base. */
99 unsigned long vm_end; /* Original (unsplit) end. */
92 unsigned long maddr[0]; /* Array of MSPEC addresses. */ 100 unsigned long maddr[0]; /* Array of MSPEC addresses. */
93}; 101};
94 102
103#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */
104
95/* used on shub2 to clear FOP cache in the HUB */ 105/* used on shub2 to clear FOP cache in the HUB */
96static unsigned long scratch_page[MAX_NUMNODES]; 106static unsigned long scratch_page[MAX_NUMNODES];
97#define SH2_AMO_CACHE_ENTRIES 4 107#define SH2_AMO_CACHE_ENTRIES 4
@@ -129,8 +139,8 @@ mspec_zero_block(unsigned long addr, int len)
129 * mspec_open 139 * mspec_open
130 * 140 *
131 * Called when a device mapping is created by a means other than mmap 141 * Called when a device mapping is created by a means other than mmap
132 * (via fork, etc.). Increments the reference count on the underlying 142 * (via fork, munmap, etc.). Increments the reference count on the
133 * mspec data so it is not freed prematurely. 143 * underlying mspec data so it is not freed prematurely.
134 */ 144 */
135static void 145static void
136mspec_open(struct vm_area_struct *vma) 146mspec_open(struct vm_area_struct *vma)
@@ -151,34 +161,44 @@ static void
151mspec_close(struct vm_area_struct *vma) 161mspec_close(struct vm_area_struct *vma)
152{ 162{
153 struct vma_data *vdata; 163 struct vma_data *vdata;
154 int i, pages, result, vdata_size; 164 int index, last_index, result;
165 unsigned long my_page;
155 166
156 vdata = vma->vm_private_data; 167 vdata = vma->vm_private_data;
157 if (!atomic_dec_and_test(&vdata->refcnt))
158 return;
159 168
160 pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 169 BUG_ON(vma->vm_start < vdata->vm_start || vma->vm_end > vdata->vm_end);
161 vdata_size = sizeof(struct vma_data) + pages * sizeof(long); 170
162 for (i = 0; i < pages; i++) { 171 spin_lock(&vdata->lock);
163 if (vdata->maddr[i] == 0) 172 index = (vma->vm_start - vdata->vm_start) >> PAGE_SHIFT;
173 last_index = (vma->vm_end - vdata->vm_start) >> PAGE_SHIFT;
174 for (; index < last_index; index++) {
175 if (vdata->maddr[index] == 0)
164 continue; 176 continue;
165 /* 177 /*
166 * Clear the page before sticking it back 178 * Clear the page before sticking it back
167 * into the pool. 179 * into the pool.
168 */ 180 */
169 result = mspec_zero_block(vdata->maddr[i], PAGE_SIZE); 181 my_page = vdata->maddr[index];
182 vdata->maddr[index] = 0;
183 spin_unlock(&vdata->lock);
184 result = mspec_zero_block(my_page, PAGE_SIZE);
170 if (!result) 185 if (!result)
171 uncached_free_page(vdata->maddr[i]); 186 uncached_free_page(my_page);
172 else 187 else
173 printk(KERN_WARNING "mspec_close(): " 188 printk(KERN_WARNING "mspec_close(): "
174 "failed to zero page %i\n", 189 "failed to zero page %i\n",
175 result); 190 result);
191 spin_lock(&vdata->lock);
176 } 192 }
193 spin_unlock(&vdata->lock);
177 194
178 if (vdata_size <= PAGE_SIZE) 195 if (!atomic_dec_and_test(&vdata->refcnt))
179 kfree(vdata); 196 return;
180 else 197
198 if (vdata->flags & VMD_VMALLOCED)
181 vfree(vdata); 199 vfree(vdata);
200 else
201 kfree(vdata);
182} 202}
183 203
184 204
@@ -195,7 +215,8 @@ mspec_nopfn(struct vm_area_struct *vma, unsigned long address)
195 int index; 215 int index;
196 struct vma_data *vdata = vma->vm_private_data; 216 struct vma_data *vdata = vma->vm_private_data;
197 217
198 index = (address - vma->vm_start) >> PAGE_SHIFT; 218 BUG_ON(address < vdata->vm_start || address >= vdata->vm_end);
219 index = (address - vdata->vm_start) >> PAGE_SHIFT;
199 maddr = (volatile unsigned long) vdata->maddr[index]; 220 maddr = (volatile unsigned long) vdata->maddr[index];
200 if (maddr == 0) { 221 if (maddr == 0) {
201 maddr = uncached_alloc_page(numa_node_id()); 222 maddr = uncached_alloc_page(numa_node_id());
@@ -237,10 +258,11 @@ static struct vm_operations_struct mspec_vm_ops = {
237 * underlying pages. 258 * underlying pages.
238 */ 259 */
239static int 260static int
240mspec_mmap(struct file *file, struct vm_area_struct *vma, int type) 261mspec_mmap(struct file *file, struct vm_area_struct *vma,
262 enum mspec_page_type type)
241{ 263{
242 struct vma_data *vdata; 264 struct vma_data *vdata;
243 int pages, vdata_size; 265 int pages, vdata_size, flags = 0;
244 266
245 if (vma->vm_pgoff != 0) 267 if (vma->vm_pgoff != 0)
246 return -EINVAL; 268 return -EINVAL;
@@ -255,12 +277,17 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma, int type)
255 vdata_size = sizeof(struct vma_data) + pages * sizeof(long); 277 vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
256 if (vdata_size <= PAGE_SIZE) 278 if (vdata_size <= PAGE_SIZE)
257 vdata = kmalloc(vdata_size, GFP_KERNEL); 279 vdata = kmalloc(vdata_size, GFP_KERNEL);
258 else 280 else {
259 vdata = vmalloc(vdata_size); 281 vdata = vmalloc(vdata_size);
282 flags = VMD_VMALLOCED;
283 }
260 if (!vdata) 284 if (!vdata)
261 return -ENOMEM; 285 return -ENOMEM;
262 memset(vdata, 0, vdata_size); 286 memset(vdata, 0, vdata_size);
263 287
288 vdata->vm_start = vma->vm_start;
289 vdata->vm_end = vma->vm_end;
290 vdata->flags = flags;
264 vdata->type = type; 291 vdata->type = type;
265 spin_lock_init(&vdata->lock); 292 spin_lock_init(&vdata->lock);
266 vdata->refcnt = ATOMIC_INIT(1); 293 vdata->refcnt = ATOMIC_INIT(1);
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index eba1adbc1b6a..4754769eda97 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -487,6 +487,7 @@ static inline int idedisk_supports_lba48(const struct hd_driveid *id)
487 */ 487 */
488static const struct drive_list_entry hpa_list[] = { 488static const struct drive_list_entry hpa_list[] = {
489 { "ST340823A", NULL }, 489 { "ST340823A", NULL },
490 { "ST320413A", NULL },
490 { NULL, NULL } 491 { NULL, NULL }
491}; 492};
492 493
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index 4b13cd9a027d..f19eb6daeefd 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1802,9 +1802,7 @@ pmac_ide_dma_check(ide_drive_t *drive)
1802{ 1802{
1803 struct hd_driveid *id = drive->id; 1803 struct hd_driveid *id = drive->id;
1804 ide_hwif_t *hwif = HWIF(drive); 1804 ide_hwif_t *hwif = HWIF(drive);
1805 pmac_ide_hwif_t* pmif = (pmac_ide_hwif_t *)hwif->hwif_data;
1806 int enable = 1; 1805 int enable = 1;
1807 int map;
1808 drive->using_dma = 0; 1806 drive->using_dma = 0;
1809 1807
1810 if (drive->media == ide_floppy) 1808 if (drive->media == ide_floppy)
diff --git a/drivers/media/video/usbvision/usbvision-cards.c b/drivers/media/video/usbvision/usbvision-cards.c
index 380564cd3317..f09eb102731b 100644
--- a/drivers/media/video/usbvision/usbvision-cards.c
+++ b/drivers/media/video/usbvision/usbvision-cards.c
@@ -1081,6 +1081,7 @@ struct usb_device_id usbvision_table [] = {
1081 { USB_DEVICE(0x2304, 0x0301), .driver_info=PINNA_LINX_VD_IN_CAB_PAL }, 1081 { USB_DEVICE(0x2304, 0x0301), .driver_info=PINNA_LINX_VD_IN_CAB_PAL },
1082 { USB_DEVICE(0x2304, 0x0419), .driver_info=PINNA_PCTV_BUNGEE_PAL_FM }, 1082 { USB_DEVICE(0x2304, 0x0419), .driver_info=PINNA_PCTV_BUNGEE_PAL_FM },
1083 { USB_DEVICE(0x2400, 0x4200), .driver_info=HPG_WINTV }, 1083 { USB_DEVICE(0x2400, 0x4200), .driver_info=HPG_WINTV },
1084 { }, /* terminate list */
1084}; 1085};
1085 1086
1086MODULE_DEVICE_TABLE (usb, usbvision_table); 1087MODULE_DEVICE_TABLE (usb, usbvision_table);
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index cff969d05d4a..6f32a35eb106 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -816,7 +816,8 @@ static void __devexit cafe_nand_remove(struct pci_dev *pdev)
816} 816}
817 817
818static struct pci_device_id cafe_nand_tbl[] = { 818static struct pci_device_id cafe_nand_tbl[] = {
819 { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 } 819 { 0x11ab, 0x4100, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_MEMORY_FLASH << 8, 0xFFFF0 },
820 { 0, }
820}; 821};
821 822
822MODULE_DEVICE_TABLE(pci, cafe_nand_tbl); 823MODULE_DEVICE_TABLE(pci, cafe_nand_tbl);
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 46da5714932c..5ab3492817d1 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -61,7 +61,7 @@
61struct rtc_plat_data { 61struct rtc_plat_data {
62 struct rtc_device *rtc; 62 struct rtc_device *rtc;
63 void __iomem *ioaddr; 63 void __iomem *ioaddr;
64 unsigned long baseaddr; 64 resource_size_t baseaddr;
65 unsigned long last_jiffies; 65 unsigned long last_jiffies;
66 int irq; 66 int irq;
67 unsigned int irqen; 67 unsigned int irqen;
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index b2e5481ba3b6..67291b0f8283 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -55,7 +55,7 @@ struct rtc_plat_data {
55 void __iomem *ioaddr_rtc; 55 void __iomem *ioaddr_rtc;
56 size_t size_nvram; 56 size_t size_nvram;
57 size_t size; 57 size_t size;
58 unsigned long baseaddr; 58 resource_size_t baseaddr;
59 unsigned long last_jiffies; 59 unsigned long last_jiffies;
60}; 60};
61 61
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index bca57bb94939..e348ba684050 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -58,6 +58,7 @@ struct uart_sunsab_port {
58 unsigned char interrupt_mask1;/* ISR1 masking */ 58 unsigned char interrupt_mask1;/* ISR1 masking */
59 unsigned char pvr_dtr_bit; /* Which PVR bit is DTR */ 59 unsigned char pvr_dtr_bit; /* Which PVR bit is DTR */
60 unsigned char pvr_dsr_bit; /* Which PVR bit is DSR */ 60 unsigned char pvr_dsr_bit; /* Which PVR bit is DSR */
61 unsigned int gis_shift;
61 int type; /* SAB82532 version */ 62 int type; /* SAB82532 version */
62 63
63 /* Setting configuration bits while the transmitter is active 64 /* Setting configuration bits while the transmitter is active
@@ -305,13 +306,15 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
305 struct tty_struct *tty; 306 struct tty_struct *tty;
306 union sab82532_irq_status status; 307 union sab82532_irq_status status;
307 unsigned long flags; 308 unsigned long flags;
309 unsigned char gis;
308 310
309 spin_lock_irqsave(&up->port.lock, flags); 311 spin_lock_irqsave(&up->port.lock, flags);
310 312
311 status.stat = 0; 313 status.stat = 0;
312 if (readb(&up->regs->r.gis) & SAB82532_GIS_ISA0) 314 gis = readb(&up->regs->r.gis) >> up->gis_shift;
315 if (gis & 1)
313 status.sreg.isr0 = readb(&up->regs->r.isr0); 316 status.sreg.isr0 = readb(&up->regs->r.isr0);
314 if (readb(&up->regs->r.gis) & SAB82532_GIS_ISA1) 317 if (gis & 2)
315 status.sreg.isr1 = readb(&up->regs->r.isr1); 318 status.sreg.isr1 = readb(&up->regs->r.isr1);
316 319
317 tty = NULL; 320 tty = NULL;
@@ -327,35 +330,6 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
327 transmit_chars(up, &status); 330 transmit_chars(up, &status);
328 } 331 }
329 332
330 spin_unlock(&up->port.lock);
331
332 if (tty)
333 tty_flip_buffer_push(tty);
334
335 up++;
336
337 spin_lock(&up->port.lock);
338
339 status.stat = 0;
340 if (readb(&up->regs->r.gis) & SAB82532_GIS_ISB0)
341 status.sreg.isr0 = readb(&up->regs->r.isr0);
342 if (readb(&up->regs->r.gis) & SAB82532_GIS_ISB1)
343 status.sreg.isr1 = readb(&up->regs->r.isr1);
344
345 tty = NULL;
346 if (status.stat) {
347 if ((status.sreg.isr0 & (SAB82532_ISR0_TCD | SAB82532_ISR0_TIME |
348 SAB82532_ISR0_RFO | SAB82532_ISR0_RPF)) ||
349 (status.sreg.isr1 & SAB82532_ISR1_BRK))
350
351 tty = receive_chars(up, &status);
352 if ((status.sreg.isr0 & SAB82532_ISR0_CDSC) ||
353 (status.sreg.isr1 & (SAB82532_ISR1_BRK | SAB82532_ISR1_CSC)))
354 check_status(up, &status);
355 if (status.sreg.isr1 & (SAB82532_ISR1_ALLS | SAB82532_ISR1_XPR))
356 transmit_chars(up, &status);
357 }
358
359 spin_unlock_irqrestore(&up->port.lock, flags); 333 spin_unlock_irqrestore(&up->port.lock, flags);
360 334
361 if (tty) 335 if (tty)
@@ -539,6 +513,10 @@ static int sunsab_startup(struct uart_port *port)
539 struct uart_sunsab_port *up = (struct uart_sunsab_port *) port; 513 struct uart_sunsab_port *up = (struct uart_sunsab_port *) port;
540 unsigned long flags; 514 unsigned long flags;
541 unsigned char tmp; 515 unsigned char tmp;
516 int err = request_irq(up->port.irq, sunsab_interrupt,
517 IRQF_SHARED, "sab", up);
518 if (err)
519 return err;
542 520
543 spin_lock_irqsave(&up->port.lock, flags); 521 spin_lock_irqsave(&up->port.lock, flags);
544 522
@@ -641,6 +619,7 @@ static void sunsab_shutdown(struct uart_port *port)
641#endif 619#endif
642 620
643 spin_unlock_irqrestore(&up->port.lock, flags); 621 spin_unlock_irqrestore(&up->port.lock, flags);
622 free_irq(up->port.irq, up);
644} 623}
645 624
646/* 625/*
@@ -1008,9 +987,11 @@ static int __devinit sunsab_init_one(struct uart_sunsab_port *up,
1008 if ((up->port.line & 0x1) == 0) { 987 if ((up->port.line & 0x1) == 0) {
1009 up->pvr_dsr_bit = (1 << 0); 988 up->pvr_dsr_bit = (1 << 0);
1010 up->pvr_dtr_bit = (1 << 1); 989 up->pvr_dtr_bit = (1 << 1);
990 up->gis_shift = 2;
1011 } else { 991 } else {
1012 up->pvr_dsr_bit = (1 << 3); 992 up->pvr_dsr_bit = (1 << 3);
1013 up->pvr_dtr_bit = (1 << 2); 993 up->pvr_dtr_bit = (1 << 2);
994 up->gis_shift = 0;
1014 } 995 }
1015 up->cached_pvr = (1 << 1) | (1 << 2) | (1 << 4); 996 up->cached_pvr = (1 << 1) | (1 << 2) | (1 << 4);
1016 writeb(up->cached_pvr, &up->regs->w.pvr); 997 writeb(up->cached_pvr, &up->regs->w.pvr);
@@ -1023,19 +1004,6 @@ static int __devinit sunsab_init_one(struct uart_sunsab_port *up,
1023 up->tec_timeout = SAB82532_MAX_TEC_TIMEOUT; 1004 up->tec_timeout = SAB82532_MAX_TEC_TIMEOUT;
1024 up->cec_timeout = SAB82532_MAX_CEC_TIMEOUT; 1005 up->cec_timeout = SAB82532_MAX_CEC_TIMEOUT;
1025 1006
1026 if (!(up->port.line & 0x01)) {
1027 int err;
1028
1029 err = request_irq(up->port.irq, sunsab_interrupt,
1030 IRQF_SHARED, "sab", up);
1031 if (err) {
1032 of_iounmap(&op->resource[0],
1033 up->port.membase,
1034 sizeof(union sab82532_async_regs));
1035 return err;
1036 }
1037 }
1038
1039 return 0; 1007 return 0;
1040} 1008}
1041 1009
@@ -1051,52 +1019,60 @@ static int __devinit sab_probe(struct of_device *op, const struct of_device_id *
1051 0, 1019 0,
1052 (inst * 2) + 0); 1020 (inst * 2) + 0);
1053 if (err) 1021 if (err)
1054 return err; 1022 goto out;
1055 1023
1056 err = sunsab_init_one(&up[1], op, 1024 err = sunsab_init_one(&up[1], op,
1057 sizeof(union sab82532_async_regs), 1025 sizeof(union sab82532_async_regs),
1058 (inst * 2) + 1); 1026 (inst * 2) + 1);
1059 if (err) { 1027 if (err)
1060 of_iounmap(&op->resource[0], 1028 goto out1;
1061 up[0].port.membase,
1062 sizeof(union sab82532_async_regs));
1063 free_irq(up[0].port.irq, &up[0]);
1064 return err;
1065 }
1066 1029
1067 sunserial_console_match(SUNSAB_CONSOLE(), op->node, 1030 sunserial_console_match(SUNSAB_CONSOLE(), op->node,
1068 &sunsab_reg, up[0].port.line); 1031 &sunsab_reg, up[0].port.line);
1069 uart_add_one_port(&sunsab_reg, &up[0].port);
1070 1032
1071 sunserial_console_match(SUNSAB_CONSOLE(), op->node, 1033 sunserial_console_match(SUNSAB_CONSOLE(), op->node,
1072 &sunsab_reg, up[1].port.line); 1034 &sunsab_reg, up[1].port.line);
1073 uart_add_one_port(&sunsab_reg, &up[1].port); 1035
1036 err = uart_add_one_port(&sunsab_reg, &up[0].port);
1037 if (err)
1038 goto out2;
1039
1040 err = uart_add_one_port(&sunsab_reg, &up[1].port);
1041 if (err)
1042 goto out3;
1074 1043
1075 dev_set_drvdata(&op->dev, &up[0]); 1044 dev_set_drvdata(&op->dev, &up[0]);
1076 1045
1077 inst++; 1046 inst++;
1078 1047
1079 return 0; 1048 return 0;
1080}
1081
1082static void __devexit sab_remove_one(struct uart_sunsab_port *up)
1083{
1084 struct of_device *op = to_of_device(up->port.dev);
1085 1049
1086 uart_remove_one_port(&sunsab_reg, &up->port); 1050out3:
1087 if (!(up->port.line & 1)) 1051 uart_remove_one_port(&sunsab_reg, &up[0].port);
1088 free_irq(up->port.irq, up); 1052out2:
1089 of_iounmap(&op->resource[0], 1053 of_iounmap(&op->resource[0],
1090 up->port.membase, 1054 up[1].port.membase,
1091 sizeof(union sab82532_async_regs)); 1055 sizeof(union sab82532_async_regs));
1056out1:
1057 of_iounmap(&op->resource[0],
1058 up[0].port.membase,
1059 sizeof(union sab82532_async_regs));
1060out:
1061 return err;
1092} 1062}
1093 1063
1094static int __devexit sab_remove(struct of_device *op) 1064static int __devexit sab_remove(struct of_device *op)
1095{ 1065{
1096 struct uart_sunsab_port *up = dev_get_drvdata(&op->dev); 1066 struct uart_sunsab_port *up = dev_get_drvdata(&op->dev);
1097 1067
1098 sab_remove_one(&up[0]); 1068 uart_remove_one_port(&sunsab_reg, &up[1].port);
1099 sab_remove_one(&up[1]); 1069 uart_remove_one_port(&sunsab_reg, &up[0].port);
1070 of_iounmap(&op->resource[0],
1071 up[1].port.membase,
1072 sizeof(union sab82532_async_regs));
1073 of_iounmap(&op->resource[0],
1074 up[0].port.membase,
1075 sizeof(union sab82532_async_regs));
1100 1076
1101 dev_set_drvdata(&op->dev, NULL); 1077 dev_set_drvdata(&op->dev, NULL);
1102 1078
@@ -1143,6 +1119,7 @@ static int __init sunsab_init(void)
1143 1119
1144 sunsab_reg.minor = sunserial_current_minor; 1120 sunsab_reg.minor = sunserial_current_minor;
1145 sunsab_reg.nr = num_channels; 1121 sunsab_reg.nr = num_channels;
1122 sunsab_reg.cons = SUNSAB_CONSOLE();
1146 1123
1147 err = uart_register_driver(&sunsab_reg); 1124 err = uart_register_driver(&sunsab_reg);
1148 if (err) { 1125 if (err) {
diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c
index b21d0dec9283..6a47682d8614 100644
--- a/drivers/video/intelfb/intelfbhw.c
+++ b/drivers/video/intelfb/intelfbhw.c
@@ -1352,7 +1352,7 @@ intelfbhw_program_mode(struct intelfb_info *dinfo,
1352 1352
1353 /* turn off PLL */ 1353 /* turn off PLL */
1354 tmp = INREG(dpll_reg); 1354 tmp = INREG(dpll_reg);
1355 dpll_reg &= ~DPLL_VCO_ENABLE; 1355 tmp &= ~DPLL_VCO_ENABLE;
1356 OUTREG(dpll_reg, tmp); 1356 OUTREG(dpll_reg, tmp);
1357 1357
1358 /* Set PLL parameters */ 1358 /* Set PLL parameters */
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1586807b8177..c1fa1908dba0 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
140struct dx_map_entry 140struct dx_map_entry
141{ 141{
142 u32 hash; 142 u32 hash;
143 u32 offs; 143 u16 offs;
144 u16 size;
144}; 145};
145 146
146#ifdef CONFIG_EXT3_INDEX 147#ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
379 380
380 entries = (struct dx_entry *) (((char *)&root->info) + 381 entries = (struct dx_entry *) (((char *)&root->info) +
381 root->info.info_length); 382 root->info.info_length);
382 assert(dx_get_limit(entries) == dx_root_limit(dir, 383
383 root->info.info_length)); 384 if (dx_get_limit(entries) != dx_root_limit(dir,
385 root->info.info_length)) {
386 ext3_warning(dir->i_sb, __FUNCTION__,
387 "dx entry: limit != root limit");
388 brelse(bh);
389 *err = ERR_BAD_DX_DIR;
390 goto fail;
391 }
392
384 dxtrace (printk("Look up %x", hash)); 393 dxtrace (printk("Look up %x", hash));
385 while (1) 394 while (1)
386 { 395 {
387 count = dx_get_count(entries); 396 count = dx_get_count(entries);
388 assert (count && count <= dx_get_limit(entries)); 397 if (!count || count > dx_get_limit(entries)) {
398 ext3_warning(dir->i_sb, __FUNCTION__,
399 "dx entry: no count or count > limit");
400 brelse(bh);
401 *err = ERR_BAD_DX_DIR;
402 goto fail2;
403 }
404
389 p = entries + 1; 405 p = entries + 1;
390 q = entries + count - 1; 406 q = entries + count - 1;
391 while (p <= q) 407 while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
423 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) 439 if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
424 goto fail2; 440 goto fail2;
425 at = entries = ((struct dx_node *) bh->b_data)->entries; 441 at = entries = ((struct dx_node *) bh->b_data)->entries;
426 assert (dx_get_limit(entries) == dx_node_limit (dir)); 442 if (dx_get_limit(entries) != dx_node_limit (dir)) {
443 ext3_warning(dir->i_sb, __FUNCTION__,
444 "dx entry: limit != node limit");
445 brelse(bh);
446 *err = ERR_BAD_DX_DIR;
447 goto fail2;
448 }
427 frame++; 449 frame++;
450 frame->bh = NULL;
428 } 451 }
429fail2: 452fail2:
430 while (frame >= frame_in) { 453 while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
432 frame--; 455 frame--;
433 } 456 }
434fail: 457fail:
458 if (*err == ERR_BAD_DX_DIR)
459 ext3_warning(dir->i_sb, __FUNCTION__,
460 "Corrupt dir inode %ld, running e2fsck is "
461 "recommended.", dir->i_ino);
435 return NULL; 462 return NULL;
436} 463}
437 464
@@ -671,6 +698,10 @@ errout:
671 * Directory block splitting, compacting 698 * Directory block splitting, compacting
672 */ 699 */
673 700
701/*
702 * Create map of hash values, offsets, and sizes, stored at end of block.
703 * Returns number of entries mapped.
704 */
674static int dx_make_map (struct ext3_dir_entry_2 *de, int size, 705static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
675 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) 706 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
676{ 707{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
684 ext3fs_dirhash(de->name, de->name_len, &h); 715 ext3fs_dirhash(de->name, de->name_len, &h);
685 map_tail--; 716 map_tail--;
686 map_tail->hash = h.hash; 717 map_tail->hash = h.hash;
687 map_tail->offs = (u32) ((char *) de - base); 718 map_tail->offs = (u16) ((char *) de - base);
719 map_tail->size = le16_to_cpu(de->rec_len);
688 count++; 720 count++;
689 cond_resched(); 721 cond_resched();
690 } 722 }
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
694 return count; 726 return count;
695} 727}
696 728
729/* Sort map by hash value */
697static void dx_sort_map (struct dx_map_entry *map, unsigned count) 730static void dx_sort_map (struct dx_map_entry *map, unsigned count)
698{ 731{
699 struct dx_map_entry *p, *q, *top = map + count - 1; 732 struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
1091} 1124}
1092 1125
1093#ifdef CONFIG_EXT3_INDEX 1126#ifdef CONFIG_EXT3_INDEX
1127/*
1128 * Move count entries from end of map between two memory locations.
1129 * Returns pointer to last entry moved.
1130 */
1094static struct ext3_dir_entry_2 * 1131static struct ext3_dir_entry_2 *
1095dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) 1132dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1096{ 1133{
@@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1109 return (struct ext3_dir_entry_2 *) (to - rec_len); 1146 return (struct ext3_dir_entry_2 *) (to - rec_len);
1110} 1147}
1111 1148
1149/*
1150 * Compact each dir entry in the range to the minimal rec_len.
1151 * Returns pointer to last entry in range.
1152 */
1112static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) 1153static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
1113{ 1154{
1114 struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; 1155 struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
1131 return prev; 1172 return prev;
1132} 1173}
1133 1174
1175/*
1176 * Split a full leaf block to make room for a new dir entry.
1177 * Allocate a new block, and move entries so that they are approx. equally full.
1178 * Returns pointer to de in block into which the new entry will be inserted.
1179 */
1134static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1180static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1135 struct buffer_head **bh,struct dx_frame *frame, 1181 struct buffer_head **bh,struct dx_frame *frame,
1136 struct dx_hash_info *hinfo, int *error) 1182 struct dx_hash_info *hinfo, int *error)
@@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1142 u32 hash2; 1188 u32 hash2;
1143 struct dx_map_entry *map; 1189 struct dx_map_entry *map;
1144 char *data1 = (*bh)->b_data, *data2; 1190 char *data1 = (*bh)->b_data, *data2;
1145 unsigned split; 1191 unsigned split, move, size, i;
1146 struct ext3_dir_entry_2 *de = NULL, *de2; 1192 struct ext3_dir_entry_2 *de = NULL, *de2;
1147 int err = 0; 1193 int err = 0;
1148 1194
@@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1170 count = dx_make_map ((struct ext3_dir_entry_2 *) data1, 1216 count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
1171 blocksize, hinfo, map); 1217 blocksize, hinfo, map);
1172 map -= count; 1218 map -= count;
1173 split = count/2; // need to adjust to actual middle
1174 dx_sort_map (map, count); 1219 dx_sort_map (map, count);
1220 /* Split the existing block in the middle, size-wise */
1221 size = 0;
1222 move = 0;
1223 for (i = count-1; i >= 0; i--) {
1224 /* is more than half of this entry in 2nd half of the block? */
1225 if (size + map[i].size/2 > blocksize/2)
1226 break;
1227 size += map[i].size;
1228 move++;
1229 }
1230 /* map index at which we will split */
1231 split = count - move;
1175 hash2 = map[split].hash; 1232 hash2 = map[split].hash;
1176 continued = hash2 == map[split - 1].hash; 1233 continued = hash2 == map[split - 1].hash;
1177 dxtrace(printk("Split block %i at %x, %i/%i\n", 1234 dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da224974af78..5fdb862e71c4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
140struct dx_map_entry 140struct dx_map_entry
141{ 141{
142 u32 hash; 142 u32 hash;
143 u32 offs; 143 u16 offs;
144 u16 size;
144}; 145};
145 146
146#ifdef CONFIG_EXT4_INDEX 147#ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
379 380
380 entries = (struct dx_entry *) (((char *)&root->info) + 381 entries = (struct dx_entry *) (((char *)&root->info) +
381 root->info.info_length); 382 root->info.info_length);
382 assert(dx_get_limit(entries) == dx_root_limit(dir, 383
383 root->info.info_length)); 384 if (dx_get_limit(entries) != dx_root_limit(dir,
385 root->info.info_length)) {
386 ext4_warning(dir->i_sb, __FUNCTION__,
387 "dx entry: limit != root limit");
388 brelse(bh);
389 *err = ERR_BAD_DX_DIR;
390 goto fail;
391 }
392
384 dxtrace (printk("Look up %x", hash)); 393 dxtrace (printk("Look up %x", hash));
385 while (1) 394 while (1)
386 { 395 {
387 count = dx_get_count(entries); 396 count = dx_get_count(entries);
388 assert (count && count <= dx_get_limit(entries)); 397 if (!count || count > dx_get_limit(entries)) {
398 ext4_warning(dir->i_sb, __FUNCTION__,
399 "dx entry: no count or count > limit");
400 brelse(bh);
401 *err = ERR_BAD_DX_DIR;
402 goto fail2;
403 }
404
389 p = entries + 1; 405 p = entries + 1;
390 q = entries + count - 1; 406 q = entries + count - 1;
391 while (p <= q) 407 while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
423 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) 439 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
424 goto fail2; 440 goto fail2;
425 at = entries = ((struct dx_node *) bh->b_data)->entries; 441 at = entries = ((struct dx_node *) bh->b_data)->entries;
426 assert (dx_get_limit(entries) == dx_node_limit (dir)); 442 if (dx_get_limit(entries) != dx_node_limit (dir)) {
443 ext4_warning(dir->i_sb, __FUNCTION__,
444 "dx entry: limit != node limit");
445 brelse(bh);
446 *err = ERR_BAD_DX_DIR;
447 goto fail2;
448 }
427 frame++; 449 frame++;
450 frame->bh = NULL;
428 } 451 }
429fail2: 452fail2:
430 while (frame >= frame_in) { 453 while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
432 frame--; 455 frame--;
433 } 456 }
434fail: 457fail:
458 if (*err == ERR_BAD_DX_DIR)
459 ext4_warning(dir->i_sb, __FUNCTION__,
460 "Corrupt dir inode %ld, running e2fsck is "
461 "recommended.", dir->i_ino);
435 return NULL; 462 return NULL;
436} 463}
437 464
@@ -671,6 +698,10 @@ errout:
671 * Directory block splitting, compacting 698 * Directory block splitting, compacting
672 */ 699 */
673 700
701/*
702 * Create map of hash values, offsets, and sizes, stored at end of block.
703 * Returns number of entries mapped.
704 */
674static int dx_make_map (struct ext4_dir_entry_2 *de, int size, 705static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
675 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) 706 struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
676{ 707{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
684 ext4fs_dirhash(de->name, de->name_len, &h); 715 ext4fs_dirhash(de->name, de->name_len, &h);
685 map_tail--; 716 map_tail--;
686 map_tail->hash = h.hash; 717 map_tail->hash = h.hash;
687 map_tail->offs = (u32) ((char *) de - base); 718 map_tail->offs = (u16) ((char *) de - base);
719 map_tail->size = le16_to_cpu(de->rec_len);
688 count++; 720 count++;
689 cond_resched(); 721 cond_resched();
690 } 722 }
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
694 return count; 726 return count;
695} 727}
696 728
729/* Sort map by hash value */
697static void dx_sort_map (struct dx_map_entry *map, unsigned count) 730static void dx_sort_map (struct dx_map_entry *map, unsigned count)
698{ 731{
699 struct dx_map_entry *p, *q, *top = map + count - 1; 732 struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
1089} 1122}
1090 1123
1091#ifdef CONFIG_EXT4_INDEX 1124#ifdef CONFIG_EXT4_INDEX
1125/*
1126 * Move count entries from end of map between two memory locations.
1127 * Returns pointer to last entry moved.
1128 */
1092static struct ext4_dir_entry_2 * 1129static struct ext4_dir_entry_2 *
1093dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) 1130dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1094{ 1131{
@@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
1107 return (struct ext4_dir_entry_2 *) (to - rec_len); 1144 return (struct ext4_dir_entry_2 *) (to - rec_len);
1108} 1145}
1109 1146
1147/*
1148 * Compact each dir entry in the range to the minimal rec_len.
1149 * Returns pointer to last entry in range.
1150 */
1110static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) 1151static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
1111{ 1152{
1112 struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; 1153 struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
1129 return prev; 1170 return prev;
1130} 1171}
1131 1172
1173/*
1174 * Split a full leaf block to make room for a new dir entry.
1175 * Allocate a new block, and move entries so that they are approx. equally full.
1176 * Returns pointer to de in block into which the new entry will be inserted.
1177 */
1132static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1178static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1133 struct buffer_head **bh,struct dx_frame *frame, 1179 struct buffer_head **bh,struct dx_frame *frame,
1134 struct dx_hash_info *hinfo, int *error) 1180 struct dx_hash_info *hinfo, int *error)
@@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1140 u32 hash2; 1186 u32 hash2;
1141 struct dx_map_entry *map; 1187 struct dx_map_entry *map;
1142 char *data1 = (*bh)->b_data, *data2; 1188 char *data1 = (*bh)->b_data, *data2;
1143 unsigned split; 1189 unsigned split, move, size, i;
1144 struct ext4_dir_entry_2 *de = NULL, *de2; 1190 struct ext4_dir_entry_2 *de = NULL, *de2;
1145 int err = 0; 1191 int err = 0;
1146 1192
@@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1168 count = dx_make_map ((struct ext4_dir_entry_2 *) data1, 1214 count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
1169 blocksize, hinfo, map); 1215 blocksize, hinfo, map);
1170 map -= count; 1216 map -= count;
1171 split = count/2; // need to adjust to actual middle
1172 dx_sort_map (map, count); 1217 dx_sort_map (map, count);
1218 /* Split the existing block in the middle, size-wise */
1219 size = 0;
1220 move = 0;
1221 for (i = count-1; i >= 0; i--) {
1222 /* is more than half of this entry in 2nd half of the block? */
1223 if (size + map[i].size/2 > blocksize/2)
1224 break;
1225 size += map[i].size;
1226 move++;
1227 }
1228 /* map index at which we will split */
1229 split = count - move;
1173 hash2 = map[split].hash; 1230 hash2 = map[split].hash;
1174 continued = hash2 == map[split - 1].hash; 1231 continued = hash2 == map[split - 1].hash;
1175 dxtrace(printk("Split block %i at %x, %i/%i\n", 1232 dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8ed593766f16..b878528b64c1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void)
345 unregister_shrinker(&acl_shrinker); 345 unregister_shrinker(&acl_shrinker);
346#ifdef CONFIG_NFS_V4 346#ifdef CONFIG_NFS_V4
347 unregister_filesystem(&nfs4_fs_type); 347 unregister_filesystem(&nfs4_fs_type);
348 nfs_unregister_sysctl();
349#endif 348#endif
349 nfs_unregister_sysctl();
350 unregister_filesystem(&nfs_fs_type); 350 unregister_filesystem(&nfs_fs_type);
351} 351}
352 352
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d9c40fe64195..5f152f60d74d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
181 ip->i_d.di_size = isize; 181 ip->i_d.di_size = isize;
182 ip->i_update_core = 1; 182 ip->i_update_core = 1;
183 ip->i_update_size = 1; 183 ip->i_update_size = 1;
184 mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
184 } 185 }
185 186
186 xfs_iunlock(ip, XFS_ILOCK_EXCL); 187 xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4528f9a3f304..491d1f4f202d 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -415,8 +415,10 @@ xfs_fs_write_inode(
415 415
416 if (vp) { 416 if (vp) {
417 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); 417 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
418 if (sync) 418 if (sync) {
419 filemap_fdatawait(inode->i_mapping);
419 flags |= FLUSH_SYNC; 420 flags |= FLUSH_SYNC;
421 }
420 error = bhv_vop_iflush(vp, flags); 422 error = bhv_vop_iflush(vp, flags);
421 if (error == EAGAIN) 423 if (error == EAGAIN)
422 error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0; 424 error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index d7e136143066..fa25b7dcc6c3 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t {
52#define XFS_BLI_UDQUOT_BUF 0x4 52#define XFS_BLI_UDQUOT_BUF 0x4
53#define XFS_BLI_PDQUOT_BUF 0x8 53#define XFS_BLI_PDQUOT_BUF 0x8
54#define XFS_BLI_GDQUOT_BUF 0x10 54#define XFS_BLI_GDQUOT_BUF 0x10
55/*
56 * This flag indicates that the buffer contains newly allocated
57 * inodes.
58 */
59#define XFS_BLI_INODE_NEW_BUF 0x20
55 60
56#define XFS_BLI_CHUNK 128 61#define XFS_BLI_CHUNK 128
57#define XFS_BLI_SHIFT 7 62#define XFS_BLI_SHIFT 7
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index ce2278611bb7..16f8e175167d 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -467,8 +467,7 @@ void
467xfs_filestream_flush( 467xfs_filestream_flush(
468 xfs_mount_t *mp) 468 xfs_mount_t *mp)
469{ 469{
470 /* point in time flush, so keep the reaper running */ 470 xfs_mru_cache_flush(mp->m_filestream);
471 xfs_mru_cache_flush(mp->m_filestream, 1);
472} 471}
473 472
474/* 473/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ae6e8e5f3db..dacb19739cc2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer(
1874/*ARGSUSED*/ 1874/*ARGSUSED*/
1875STATIC void 1875STATIC void
1876xlog_recover_do_reg_buffer( 1876xlog_recover_do_reg_buffer(
1877 xfs_mount_t *mp,
1877 xlog_recover_item_t *item, 1878 xlog_recover_item_t *item,
1878 xfs_buf_t *bp, 1879 xfs_buf_t *bp,
1879 xfs_buf_log_format_t *buf_f) 1880 xfs_buf_log_format_t *buf_f)
@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer(
1884 unsigned int *data_map = NULL; 1885 unsigned int *data_map = NULL;
1885 unsigned int map_size = 0; 1886 unsigned int map_size = 0;
1886 int error; 1887 int error;
1888 int stale_buf = 1;
1889
1890 /*
1891 * Scan through the on-disk inode buffer and attempt to
1892 * determine if it has been written to since it was logged.
1893 *
1894 * - If any of the magic numbers are incorrect then the buffer is stale
1895 * - If any of the modes are non-zero then the buffer is not stale
1896 * - If all of the modes are zero and at least one of the generation
1897 * counts is non-zero then the buffer is stale
1898 *
1899 * If the end result is a stale buffer then the log buffer is replayed
1900 * otherwise it is skipped.
1901 *
1902 * This heuristic is not perfect. It can be improved by scanning the
1903 * entire inode chunk for evidence that any of the inode clusters have
1904 * been updated. To fix this problem completely we will need a major
1905 * architectural change to the logging system.
1906 */
1907 if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
1908 xfs_dinode_t *dip;
1909 int inodes_per_buf;
1910 int mode_count = 0;
1911 int gen_count = 0;
1912
1913 stale_buf = 0;
1914 inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
1915 for (i = 0; i < inodes_per_buf; i++) {
1916 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
1917 i * mp->m_sb.sb_inodesize);
1918 if (be16_to_cpu(dip->di_core.di_magic) !=
1919 XFS_DINODE_MAGIC) {
1920 stale_buf = 1;
1921 break;
1922 }
1923 if (be16_to_cpu(dip->di_core.di_mode))
1924 mode_count++;
1925 if (be16_to_cpu(dip->di_core.di_gen))
1926 gen_count++;
1927 }
1928
1929 if (!mode_count && gen_count)
1930 stale_buf = 1;
1931 }
1887 1932
1888 switch (buf_f->blf_type) { 1933 switch (buf_f->blf_type) {
1889 case XFS_LI_BUF: 1934 case XFS_LI_BUF:
@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer(
1917 -1, 0, XFS_QMOPT_DOWARN, 1962 -1, 0, XFS_QMOPT_DOWARN,
1918 "dquot_buf_recover"); 1963 "dquot_buf_recover");
1919 } 1964 }
1920 if (!error) 1965 if (!error && stale_buf)
1921 memcpy(xfs_buf_offset(bp, 1966 memcpy(xfs_buf_offset(bp,
1922 (uint)bit << XFS_BLI_SHIFT), /* dest */ 1967 (uint)bit << XFS_BLI_SHIFT), /* dest */
1923 item->ri_buf[i].i_addr, /* source */ 1968 item->ri_buf[i].i_addr, /* source */
@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer(
2089 if (log->l_quotaoffs_flag & type) 2134 if (log->l_quotaoffs_flag & type)
2090 return; 2135 return;
2091 2136
2092 xlog_recover_do_reg_buffer(item, bp, buf_f); 2137 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2093} 2138}
2094 2139
2095/* 2140/*
@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans(
2190 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2235 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
2191 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2236 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2192 } else { 2237 } else {
2193 xlog_recover_do_reg_buffer(item, bp, buf_f); 2238 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2194 } 2239 }
2195 if (error) 2240 if (error)
2196 return XFS_ERROR(error); 2241 return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 7deb9e3cbbd3..e0b358c1c533 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
206 */ 206 */
207 if (!_xfs_mru_cache_migrate(mru, now)) { 207 if (!_xfs_mru_cache_migrate(mru, now)) {
208 mru->time_zero = now; 208 mru->time_zero = now;
209 if (!mru->next_reap) 209 if (!mru->queued) {
210 mru->next_reap = mru->grp_count * mru->grp_time; 210 mru->queued = 1;
211 queue_delayed_work(xfs_mru_reap_wq, &mru->work,
212 mru->grp_count * mru->grp_time);
213 }
211 } else { 214 } else {
212 grp = (now - mru->time_zero) / mru->grp_time; 215 grp = (now - mru->time_zero) / mru->grp_time;
213 grp = (mru->lru_grp + grp) % mru->grp_count; 216 grp = (mru->lru_grp + grp) % mru->grp_count;
@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
271 struct work_struct *work) 274 struct work_struct *work)
272{ 275{
273 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); 276 xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
274 unsigned long now; 277 unsigned long now, next;
275 278
276 ASSERT(mru && mru->lists); 279 ASSERT(mru && mru->lists);
277 if (!mru || !mru->lists) 280 if (!mru || !mru->lists)
278 return; 281 return;
279 282
280 mutex_spinlock(&mru->lock); 283 mutex_spinlock(&mru->lock);
281 now = jiffies; 284 next = _xfs_mru_cache_migrate(mru, jiffies);
282 if (mru->reap_all || 285 _xfs_mru_cache_clear_reap_list(mru);
283 (mru->next_reap && time_after(now, mru->next_reap))) { 286
284 if (mru->reap_all) 287 mru->queued = next;
285 now += mru->grp_count * mru->grp_time * 2; 288 if ((mru->queued > 0)) {
286 mru->next_reap = _xfs_mru_cache_migrate(mru, now); 289 now = jiffies;
287 _xfs_mru_cache_clear_reap_list(mru); 290 if (next <= now)
291 next = 0;
292 else
293 next -= now;
294 queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
288 } 295 }
289 296
290 /*
291 * the process that triggered the reap_all is responsible
292 * for restating the periodic reap if it is required.
293 */
294 if (!mru->reap_all)
295 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
296 mru->reap_all = 0;
297 mutex_spinunlock(&mru->lock, 0); 297 mutex_spinunlock(&mru->lock, 0);
298} 298}
299 299
@@ -352,7 +352,7 @@ xfs_mru_cache_create(
352 352
353 /* An extra list is needed to avoid reaping up to a grp_time early. */ 353 /* An extra list is needed to avoid reaping up to a grp_time early. */
354 mru->grp_count = grp_count + 1; 354 mru->grp_count = grp_count + 1;
355 mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); 355 mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
356 356
357 if (!mru->lists) { 357 if (!mru->lists) {
358 err = ENOMEM; 358 err = ENOMEM;
@@ -374,11 +374,6 @@ xfs_mru_cache_create(
374 mru->grp_time = grp_time; 374 mru->grp_time = grp_time;
375 mru->free_func = free_func; 375 mru->free_func = free_func;
376 376
377 /* start up the reaper event */
378 mru->next_reap = 0;
379 mru->reap_all = 0;
380 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
381
382 *mrup = mru; 377 *mrup = mru;
383 378
384exit: 379exit:
@@ -394,35 +389,25 @@ exit:
394 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their 389 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
395 * free functions as they're deleted. When this function returns, the caller is 390 * free functions as they're deleted. When this function returns, the caller is
396 * guaranteed that all the free functions for all the elements have finished 391 * guaranteed that all the free functions for all the elements have finished
397 * executing. 392 * executing and the reaper is not running.
398 *
399 * While we are flushing, we stop the periodic reaper event from triggering.
400 * Normally, we want to restart this periodic event, but if we are shutting
401 * down the cache we do not want it restarted. hence the restart parameter
402 * where 0 = do not restart reaper and 1 = restart reaper.
403 */ 393 */
404void 394void
405xfs_mru_cache_flush( 395xfs_mru_cache_flush(
406 xfs_mru_cache_t *mru, 396 xfs_mru_cache_t *mru)
407 int restart)
408{ 397{
409 if (!mru || !mru->lists) 398 if (!mru || !mru->lists)
410 return; 399 return;
411 400
412 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
413
414 mutex_spinlock(&mru->lock); 401 mutex_spinlock(&mru->lock);
415 mru->reap_all = 1; 402 if (mru->queued) {
416 mutex_spinunlock(&mru->lock, 0); 403 mutex_spinunlock(&mru->lock, 0);
404 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
405 mutex_spinlock(&mru->lock);
406 }
417 407
418 queue_work(xfs_mru_reap_wq, &mru->work.work); 408 _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
419 flush_workqueue(xfs_mru_reap_wq); 409 _xfs_mru_cache_clear_reap_list(mru);
420 410
421 mutex_spinlock(&mru->lock);
422 WARN_ON_ONCE(mru->reap_all != 0);
423 mru->reap_all = 0;
424 if (restart)
425 queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
426 mutex_spinunlock(&mru->lock, 0); 411 mutex_spinunlock(&mru->lock, 0);
427} 412}
428 413
@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
433 if (!mru || !mru->lists) 418 if (!mru || !mru->lists)
434 return; 419 return;
435 420
436 /* we don't want the reaper to restart here */ 421 xfs_mru_cache_flush(mru);
437 xfs_mru_cache_flush(mru, 0);
438 422
439 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 423 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
440 kmem_free(mru, sizeof(*mru)); 424 kmem_free(mru, sizeof(*mru));
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 624fd10ee8e5..dd58ea1bbebe 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
32 unsigned int grp_time; /* Time period spanned by grps. */ 32 unsigned int grp_time; /* Time period spanned by grps. */
33 unsigned int lru_grp; /* Group containing time zero. */ 33 unsigned int lru_grp; /* Group containing time zero. */
34 unsigned long time_zero; /* Time first element was added. */ 34 unsigned long time_zero; /* Time first element was added. */
35 unsigned long next_reap; /* Time that the reaper should
36 next do something. */
37 unsigned int reap_all; /* if set, reap all lists */
38 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ 35 xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
39 struct delayed_work work; /* Workqueue data for reaping. */ 36 struct delayed_work work; /* Workqueue data for reaping. */
37 unsigned int queued; /* work has been queued */
40} xfs_mru_cache_t; 38} xfs_mru_cache_t;
41 39
42int xfs_mru_cache_init(void); 40int xfs_mru_cache_init(void);
@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
44int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, 42int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
45 unsigned int grp_count, 43 unsigned int grp_count,
46 xfs_mru_cache_free_func_t free_func); 44 xfs_mru_cache_free_func_t free_func);
47void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); 45void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
48void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); 46void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
49int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, 47int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
50 void *value); 48 void *value);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022b..95fff6872a2f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf(
966 ASSERT(atomic_read(&bip->bli_refcount) > 0); 966 ASSERT(atomic_read(&bip->bli_refcount) > 0);
967 967
968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 968 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
969 bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
969} 970}
970 971
971 972
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1a5ad8cd97b0..603459229904 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1082,6 +1082,9 @@ xfs_fsync(
1082 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1082 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
1083 return XFS_ERROR(EIO); 1083 return XFS_ERROR(EIO);
1084 1084
1085 if (flag & FSYNC_DATA)
1086 filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
1087
1085 /* 1088 /*
1086 * We always need to make sure that the required inode state 1089 * We always need to make sure that the required inode state
1087 * is safe on disk. The vnode might be clean but because 1090 * is safe on disk. The vnode might be clean but because
@@ -3769,12 +3772,16 @@ xfs_inode_flush(
3769 sync_lsn = log->l_last_sync_lsn; 3772 sync_lsn = log->l_last_sync_lsn;
3770 GRANT_UNLOCK(log, s); 3773 GRANT_UNLOCK(log, s);
3771 3774
3772 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) 3775 if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
3773 return 0; 3776 if (flags & FLUSH_SYNC)
3777 log_flags |= XFS_LOG_SYNC;
3778 error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
3779 if (error)
3780 return error;
3781 }
3774 3782
3775 if (flags & FLUSH_SYNC) 3783 if (ip->i_update_core == 0)
3776 log_flags |= XFS_LOG_SYNC; 3784 return 0;
3777 return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
3778 } 3785 }
3779 } 3786 }
3780 3787
@@ -3788,9 +3795,6 @@ xfs_inode_flush(
3788 if (flags & FLUSH_INODE) { 3795 if (flags & FLUSH_INODE) {
3789 int flush_flags; 3796 int flush_flags;
3790 3797
3791 if (xfs_ipincount(ip))
3792 return EAGAIN;
3793
3794 if (flags & FLUSH_SYNC) { 3798 if (flags & FLUSH_SYNC) {
3795 xfs_ilock(ip, XFS_ILOCK_SHARED); 3799 xfs_ilock(ip, XFS_ILOCK_SHARED);
3796 xfs_iflock(ip); 3800 xfs_iflock(ip);
diff --git a/include/asm-powerpc/time.h b/include/asm-powerpc/time.h
index d7f5ddfbaac7..c104c15c6625 100644
--- a/include/asm-powerpc/time.h
+++ b/include/asm-powerpc/time.h
@@ -149,6 +149,11 @@ static inline u64 get_tb(void)
149} 149}
150#endif /* !CONFIG_PPC64 */ 150#endif /* !CONFIG_PPC64 */
151 151
152static inline u64 get_tb_or_rtc(void)
153{
154 return __USE_RTC() ? get_rtc() : get_tb();
155}
156
152static inline void set_tb(unsigned int upper, unsigned int lower) 157static inline void set_tb(unsigned int upper, unsigned int lower)
153{ 158{
154 mtspr(SPRN_TBWL, 0); 159 mtspr(SPRN_TBWL, 0);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5bdd656e88cf..a020eb2d4e2a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -159,7 +159,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
159 159
160extern struct mempolicy default_policy; 160extern struct mempolicy default_policy;
161extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 161extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
162 unsigned long addr, gfp_t gfp_flags); 162 unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol);
163extern unsigned slab_node(struct mempolicy *policy); 163extern unsigned slab_node(struct mempolicy *policy);
164 164
165extern enum zone_type policy_zone; 165extern enum zone_type policy_zone;
@@ -256,7 +256,7 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
256#define set_cpuset_being_rebound(x) do {} while (0) 256#define set_cpuset_being_rebound(x) do {} while (0)
257 257
258static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, 258static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
259 unsigned long addr, gfp_t gfp_flags) 259 unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
260{ 260{
261 return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); 261 return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags);
262} 262}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f4e324ed2e44..5445eaec6908 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -593,7 +593,7 @@ struct user_struct {
593#endif 593#endif
594 594
595 /* Hash table maintenance information */ 595 /* Hash table maintenance information */
596 struct list_head uidhash_list; 596 struct hlist_node uidhash_node;
597 uid_t uid; 597 uid_t uid;
598}; 598};
599 599
@@ -1472,6 +1472,7 @@ static inline struct user_struct *get_uid(struct user_struct *u)
1472} 1472}
1473extern void free_uid(struct user_struct *); 1473extern void free_uid(struct user_struct *);
1474extern void switch_uid(struct user_struct *); 1474extern void switch_uid(struct user_struct *);
1475extern void release_uids(struct user_namespace *ns);
1475 1476
1476#include <asm/current.h> 1477#include <asm/current.h>
1477 1478
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 1101b0ce878f..b5f41d4c2eec 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -11,7 +11,7 @@
11 11
12struct user_namespace { 12struct user_namespace {
13 struct kref kref; 13 struct kref kref;
14 struct list_head uidhash_table[UIDHASH_SZ]; 14 struct hlist_head uidhash_table[UIDHASH_SZ];
15 struct user_struct *root_user; 15 struct user_struct *root_user;
16}; 16};
17 17
diff --git a/init/Kconfig b/init/Kconfig
index 96b54595f1dc..d54d0cadcc06 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -488,6 +488,7 @@ config SIGNALFD
488config TIMERFD 488config TIMERFD
489 bool "Enable timerfd() system call" if EMBEDDED 489 bool "Enable timerfd() system call" if EMBEDDED
490 select ANON_INODES 490 select ANON_INODES
491 depends on BROKEN
491 default y 492 default y
492 help 493 help
493 Enable the timerfd() system call that allows to receive timer 494 Enable the timerfd() system call that allows to receive timer
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index a6b4c0c08e13..fd4fc12d2624 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -57,8 +57,10 @@ static void __init handle_initrd(void)
57 57
58 pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); 58 pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD);
59 if (pid > 0) 59 if (pid > 0)
60 while (pid != sys_wait4(-1, NULL, 0, NULL)) 60 while (pid != sys_wait4(-1, NULL, 0, NULL)) {
61 try_to_freeze();
61 yield(); 62 yield();
63 }
62 64
63 /* move initrd to rootfs' /old */ 65 /* move initrd to rootfs' /old */
64 sys_fchdir(old_fd); 66 sys_fchdir(old_fd);
diff --git a/kernel/user.c b/kernel/user.c
index e7d11cef6998..9ca2848fc356 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -55,25 +55,22 @@ struct user_struct root_user = {
55/* 55/*
56 * These routines must be called with the uidhash spinlock held! 56 * These routines must be called with the uidhash spinlock held!
57 */ 57 */
58static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent) 58static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
59{ 59{
60 list_add(&up->uidhash_list, hashent); 60 hlist_add_head(&up->uidhash_node, hashent);
61} 61}
62 62
63static inline void uid_hash_remove(struct user_struct *up) 63static inline void uid_hash_remove(struct user_struct *up)
64{ 64{
65 list_del(&up->uidhash_list); 65 hlist_del_init(&up->uidhash_node);
66} 66}
67 67
68static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) 68static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
69{ 69{
70 struct list_head *up; 70 struct user_struct *user;
71 71 struct hlist_node *h;
72 list_for_each(up, hashent) {
73 struct user_struct *user;
74
75 user = list_entry(up, struct user_struct, uidhash_list);
76 72
73 hlist_for_each_entry(user, h, hashent, uidhash_node) {
77 if(user->uid == uid) { 74 if(user->uid == uid) {
78 atomic_inc(&user->__count); 75 atomic_inc(&user->__count);
79 return user; 76 return user;
@@ -122,7 +119,7 @@ void free_uid(struct user_struct *up)
122 119
123struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) 120struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
124{ 121{
125 struct list_head *hashent = uidhashentry(ns, uid); 122 struct hlist_head *hashent = uidhashentry(ns, uid);
126 struct user_struct *up; 123 struct user_struct *up;
127 124
128 spin_lock_irq(&uidhash_lock); 125 spin_lock_irq(&uidhash_lock);
@@ -202,6 +199,30 @@ void switch_uid(struct user_struct *new_user)
202 suid_keys(current); 199 suid_keys(current);
203} 200}
204 201
202void release_uids(struct user_namespace *ns)
203{
204 int i;
205 unsigned long flags;
206 struct hlist_head *head;
207 struct hlist_node *nd;
208
209 spin_lock_irqsave(&uidhash_lock, flags);
210 /*
211 * collapse the chains so that the user_struct-s will
212 * be still alive, but not in hashes. subsequent free_uid()
213 * will free them.
214 */
215 for (i = 0; i < UIDHASH_SZ; i++) {
216 head = ns->uidhash_table + i;
217 while (!hlist_empty(head)) {
218 nd = head->first;
219 hlist_del_init(nd);
220 }
221 }
222 spin_unlock_irqrestore(&uidhash_lock, flags);
223
224 free_uid(ns->root_user);
225}
205 226
206static int __init uid_cache_init(void) 227static int __init uid_cache_init(void)
207{ 228{
@@ -211,7 +232,7 @@ static int __init uid_cache_init(void)
211 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 232 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
212 233
213 for(n = 0; n < UIDHASH_SZ; ++n) 234 for(n = 0; n < UIDHASH_SZ; ++n)
214 INIT_LIST_HEAD(init_user_ns.uidhash_table + n); 235 INIT_HLIST_HEAD(init_user_ns.uidhash_table + n);
215 236
216 /* Insert the root user immediately (init already runs as root) */ 237 /* Insert the root user immediately (init already runs as root) */
217 spin_lock_irq(&uidhash_lock); 238 spin_lock_irq(&uidhash_lock);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 85af9422ea6e..7af90fc4f0fd 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -39,7 +39,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
39 kref_init(&ns->kref); 39 kref_init(&ns->kref);
40 40
41 for (n = 0; n < UIDHASH_SZ; ++n) 41 for (n = 0; n < UIDHASH_SZ; ++n)
42 INIT_LIST_HEAD(ns->uidhash_table + n); 42 INIT_HLIST_HEAD(ns->uidhash_table + n);
43 43
44 /* Insert new root user. */ 44 /* Insert new root user. */
45 ns->root_user = alloc_uid(ns, 0); 45 ns->root_user = alloc_uid(ns, 0);
@@ -81,7 +81,7 @@ void free_user_ns(struct kref *kref)
81 struct user_namespace *ns; 81 struct user_namespace *ns;
82 82
83 ns = container_of(kref, struct user_namespace, kref); 83 ns = container_of(kref, struct user_namespace, kref);
84 free_uid(ns->root_user); 84 release_uids(ns);
85 kfree(ns); 85 kfree(ns);
86} 86}
87 87
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 9d8180a0f0d8..816d7b24fa03 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -28,7 +28,9 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
28 if (!ns) 28 if (!ns)
29 return ERR_PTR(-ENOMEM); 29 return ERR_PTR(-ENOMEM);
30 30
31 down_read(&uts_sem);
31 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 32 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
33 up_read(&uts_sem);
32 kref_init(&ns->kref); 34 kref_init(&ns->kref);
33 return ns; 35 return ns;
34} 36}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index de4cf458d6e1..84c795ee2d65 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -71,8 +71,9 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
71{ 71{
72 int nid; 72 int nid;
73 struct page *page = NULL; 73 struct page *page = NULL;
74 struct mempolicy *mpol;
74 struct zonelist *zonelist = huge_zonelist(vma, address, 75 struct zonelist *zonelist = huge_zonelist(vma, address,
75 htlb_alloc_mask); 76 htlb_alloc_mask, &mpol);
76 struct zone **z; 77 struct zone **z;
77 78
78 for (z = zonelist->zones; *z; z++) { 79 for (z = zonelist->zones; *z; z++) {
@@ -87,6 +88,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
87 break; 88 break;
88 } 89 }
89 } 90 }
91 mpol_free(mpol); /* unref if mpol !NULL */
90 return page; 92 return page;
91} 93}
92 94
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bb54b88c3d5a..3d6ac9505d07 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1077,21 +1077,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
1077 1077
1078#endif 1078#endif
1079 1079
1080/* Return effective policy for a VMA */ 1080/*
1081 * get_vma_policy(@task, @vma, @addr)
1082 * @task - task for fallback if vma policy == default
1083 * @vma - virtual memory area whose policy is sought
1084 * @addr - address in @vma for shared policy lookup
1085 *
1086 * Returns effective policy for a VMA at specified address.
1087 * Falls back to @task or system default policy, as necessary.
1088 * Returned policy has extra reference count if shared, vma,
1089 * or some other task's policy [show_numa_maps() can pass
1090 * @task != current]. It is the caller's responsibility to
1091 * free the reference in these cases.
1092 */
1081static struct mempolicy * get_vma_policy(struct task_struct *task, 1093static struct mempolicy * get_vma_policy(struct task_struct *task,
1082 struct vm_area_struct *vma, unsigned long addr) 1094 struct vm_area_struct *vma, unsigned long addr)
1083{ 1095{
1084 struct mempolicy *pol = task->mempolicy; 1096 struct mempolicy *pol = task->mempolicy;
1097 int shared_pol = 0;
1085 1098
1086 if (vma) { 1099 if (vma) {
1087 if (vma->vm_ops && vma->vm_ops->get_policy) 1100 if (vma->vm_ops && vma->vm_ops->get_policy) {
1088 pol = vma->vm_ops->get_policy(vma, addr); 1101 pol = vma->vm_ops->get_policy(vma, addr);
1089 else if (vma->vm_policy && 1102 shared_pol = 1; /* if pol non-NULL, add ref below */
1103 } else if (vma->vm_policy &&
1090 vma->vm_policy->policy != MPOL_DEFAULT) 1104 vma->vm_policy->policy != MPOL_DEFAULT)
1091 pol = vma->vm_policy; 1105 pol = vma->vm_policy;
1092 } 1106 }
1093 if (!pol) 1107 if (!pol)
1094 pol = &default_policy; 1108 pol = &default_policy;
1109 else if (!shared_pol && pol != current->mempolicy)
1110 mpol_get(pol); /* vma or other task's policy */
1095 return pol; 1111 return pol;
1096} 1112}
1097 1113
@@ -1207,19 +1223,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
1207} 1223}
1208 1224
1209#ifdef CONFIG_HUGETLBFS 1225#ifdef CONFIG_HUGETLBFS
1210/* Return a zonelist suitable for a huge page allocation. */ 1226/*
1227 * huge_zonelist(@vma, @addr, @gfp_flags, @mpol)
1228 * @vma = virtual memory area whose policy is sought
1229 * @addr = address in @vma for shared policy lookup and interleave policy
1230 * @gfp_flags = for requested zone
1231 * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy
1232 *
1233 * Returns a zonelist suitable for a huge page allocation.
1234 * If the effective policy is 'BIND, returns pointer to policy's zonelist.
1235 * If it is also a policy for which get_vma_policy() returns an extra
1236 * reference, we must hold that reference until after allocation.
1237 * In that case, return policy via @mpol so hugetlb allocation can drop
1238 * the reference. For non-'BIND referenced policies, we can/do drop the
1239 * reference here, so the caller doesn't need to know about the special case
1240 * for default and current task policy.
1241 */
1211struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, 1242struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1212 gfp_t gfp_flags) 1243 gfp_t gfp_flags, struct mempolicy **mpol)
1213{ 1244{
1214 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1245 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1246 struct zonelist *zl;
1215 1247
1248 *mpol = NULL; /* probably no unref needed */
1216 if (pol->policy == MPOL_INTERLEAVE) { 1249 if (pol->policy == MPOL_INTERLEAVE) {
1217 unsigned nid; 1250 unsigned nid;
1218 1251
1219 nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); 1252 nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
1253 __mpol_free(pol); /* finished with pol */
1220 return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); 1254 return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
1221 } 1255 }
1222 return zonelist_policy(GFP_HIGHUSER, pol); 1256
1257 zl = zonelist_policy(GFP_HIGHUSER, pol);
1258 if (unlikely(pol != &default_policy && pol != current->mempolicy)) {
1259 if (pol->policy != MPOL_BIND)
1260 __mpol_free(pol); /* finished with pol */
1261 else
1262 *mpol = pol; /* unref needed after allocation */
1263 }
1264 return zl;
1223} 1265}
1224#endif 1266#endif
1225 1267
@@ -1264,6 +1306,7 @@ struct page *
1264alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) 1306alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1265{ 1307{
1266 struct mempolicy *pol = get_vma_policy(current, vma, addr); 1308 struct mempolicy *pol = get_vma_policy(current, vma, addr);
1309 struct zonelist *zl;
1267 1310
1268 cpuset_update_task_memory_state(); 1311 cpuset_update_task_memory_state();
1269 1312
@@ -1273,7 +1316,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
1273 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); 1316 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
1274 return alloc_page_interleave(gfp, 0, nid); 1317 return alloc_page_interleave(gfp, 0, nid);
1275 } 1318 }
1276 return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); 1319 zl = zonelist_policy(gfp, pol);
1320 if (pol != &default_policy && pol != current->mempolicy) {
1321 /*
1322 * slow path: ref counted policy -- shared or vma
1323 */
1324 struct page *page = __alloc_pages(gfp, 0, zl);
1325 __mpol_free(pol);
1326 return page;
1327 }
1328 /*
1329 * fast path: default or task policy
1330 */
1331 return __alloc_pages(gfp, 0, zl);
1277} 1332}
1278 1333
1279/** 1334/**
@@ -1872,6 +1927,7 @@ int show_numa_map(struct seq_file *m, void *v)
1872 struct numa_maps *md; 1927 struct numa_maps *md;
1873 struct file *file = vma->vm_file; 1928 struct file *file = vma->vm_file;
1874 struct mm_struct *mm = vma->vm_mm; 1929 struct mm_struct *mm = vma->vm_mm;
1930 struct mempolicy *pol;
1875 int n; 1931 int n;
1876 char buffer[50]; 1932 char buffer[50];
1877 1933
@@ -1882,8 +1938,13 @@ int show_numa_map(struct seq_file *m, void *v)
1882 if (!md) 1938 if (!md)
1883 return 0; 1939 return 0;
1884 1940
1885 mpol_to_str(buffer, sizeof(buffer), 1941 pol = get_vma_policy(priv->task, vma, vma->vm_start);
1886 get_vma_policy(priv->task, vma, vma->vm_start)); 1942 mpol_to_str(buffer, sizeof(buffer), pol);
1943 /*
1944 * unref shared or other task's mempolicy
1945 */
1946 if (pol != &default_policy && pol != current->mempolicy)
1947 __mpol_free(pol);
1887 1948
1888 seq_printf(m, "%08lx %s", vma->vm_start, buffer); 1949 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1889 1950