aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 20:19:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-28 20:19:28 -0400
commit532bfc851a7475fb6a36c1e953aa395798a7cca7 (patch)
treea7892e5a31330dd59f31959efbe9fda1803784fd
parent0195c00244dc2e9f522475868fa278c473ba7339 (diff)
parent8da00edc1069f01c34510fa405dc15d96c090a3f (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge third batch of patches from Andrew Morton: - Some MM stragglers - core SMP library cleanups (on_each_cpu_mask) - Some IPI optimisations - kexec - kdump - IPMI - the radix-tree iterator work - various other misc bits. "That'll do for -rc1. I still have ~10 patches for 3.4, will send those along when they've baked a little more." * emailed from Andrew Morton <akpm@linux-foundation.org>: (35 commits) backlight: fix typo in tosa_lcd.c crc32: add help text for the algorithm select option mm: move hugepage test examples to tools/testing/selftests/vm mm: move slabinfo.c to tools/vm mm: move page-types.c from Documentation to tools/vm selftests/Makefile: make `run_tests' depend on `all' selftests: launch individual selftests from the main Makefile radix-tree: use iterators in find_get_pages* functions radix-tree: rewrite gang lookup using iterator radix-tree: introduce bit-optimized iterator fs/proc/namespaces.c: prevent crash when ns_entries[] is empty nbd: rename the nbd_device variable from lo to nbd pidns: add reboot_pid_ns() to handle the reboot syscall sysctl: use bitmap library functions ipmi: use locks on watchdog timeout set on reboot ipmi: simplify locking ipmi: fix message handling during panics ipmi: use a tasklet for handling received messages ipmi: increase KCS timeouts ipmi: decrease the IPMI message transaction time in interrupt mode ...
-rw-r--r--Documentation/Makefile2
-rw-r--r--Documentation/vm/Makefile8
-rw-r--r--arch/arm/kernel/smp_tlb.c20
-rw-r--r--arch/ia64/kernel/acpi.c6
-rw-r--r--arch/ia64/kernel/irq_ia64.c8
-rw-r--r--arch/ia64/kernel/mca.c6
-rw-r--r--arch/ia64/kernel/msi_ia64.c4
-rw-r--r--arch/ia64/kernel/setup.c2
-rw-r--r--arch/ia64/kernel/smp.c2
-rw-r--r--arch/ia64/kernel/smpboot.c19
-rw-r--r--arch/ia64/kernel/topology.c3
-rw-r--r--arch/tile/include/asm/smp.h7
-rw-r--r--arch/tile/kernel/smp.c19
-rw-r--r--arch/x86/kernel/setup.c11
-rw-r--r--drivers/block/nbd.c295
-rw-r--r--drivers/char/ipmi/ipmi_kcs_sm.c4
-rw-r--r--drivers/char/ipmi/ipmi_msghandler.c242
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c72
-rw-r--r--drivers/char/ipmi/ipmi_watchdog.c21
-rw-r--r--drivers/video/backlight/tosa_lcd.c2
-rw-r--r--fs/buffer.c15
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/namespaces.c6
-rw-r--r--fs/proc/task_mmu.c5
-rw-r--r--include/linux/cpumask.h3
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/pid_namespace.h8
-rw-r--r--include/linux/radix-tree.h196
-rw-r--r--include/linux/smp.h46
-rw-r--r--include/linux/swap.h3
-rw-r--r--kernel/kexec.c6
-rw-r--r--kernel/pid_namespace.c33
-rw-r--r--kernel/smp.c90
-rw-r--r--kernel/sys.c9
-rw-r--r--kernel/sysctl.c8
-rw-r--r--lib/Kconfig4
-rw-r--r--lib/cpumask.c12
-rw-r--r--lib/radix-tree.c442
-rw-r--r--mm/filemap.c86
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/page_alloc.c44
-rw-r--r--mm/slub.c10
-rw-r--r--mm/swapfile.c3
-rw-r--r--mm/truncate.c40
-rw-r--r--tools/testing/selftests/Makefile7
-rw-r--r--tools/testing/selftests/breakpoints/Makefile7
-rw-r--r--tools/testing/selftests/run_tests8
-rw-r--r--tools/testing/selftests/vm/Makefile14
-rw-r--r--tools/testing/selftests/vm/hugepage-mmap.c (renamed from Documentation/vm/hugepage-mmap.c)13
-rw-r--r--tools/testing/selftests/vm/hugepage-shm.c (renamed from Documentation/vm/hugepage-shm.c)10
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c (renamed from Documentation/vm/map_hugetlb.c)10
-rw-r--r--tools/testing/selftests/vm/run_vmtests77
-rw-r--r--tools/vm/Makefile11
-rw-r--r--tools/vm/page-types.c (renamed from Documentation/vm/page-types.c)6
-rw-r--r--tools/vm/slabinfo.c (renamed from tools/slub/slabinfo.c)0
55 files changed, 1225 insertions, 768 deletions
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 9b4bc5c76f33..30b656ece7aa 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -1,3 +1,3 @@
1obj-m := DocBook/ accounting/ auxdisplay/ connector/ \ 1obj-m := DocBook/ accounting/ auxdisplay/ connector/ \
2 filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \ 2 filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \
3 pcmcia/ spi/ timers/ vm/ watchdog/src/ 3 pcmcia/ spi/ timers/ watchdog/src/
diff --git a/Documentation/vm/Makefile b/Documentation/vm/Makefile
deleted file mode 100644
index 3fa4d0668864..000000000000
--- a/Documentation/vm/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
1# kbuild trick to avoid linker error. Can be omitted if a module is built.
2obj- := dummy.o
3
4# List of programs to build
5hostprogs-y := page-types hugepage-mmap hugepage-shm map_hugetlb
6
7# Tell kbuild to always build the programs
8always := $(hostprogs-y)
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 7dcb35285be7..02c5d2ce23bf 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -13,18 +13,6 @@
13#include <asm/smp_plat.h> 13#include <asm/smp_plat.h>
14#include <asm/tlbflush.h> 14#include <asm/tlbflush.h>
15 15
16static void on_each_cpu_mask(void (*func)(void *), void *info, int wait,
17 const struct cpumask *mask)
18{
19 preempt_disable();
20
21 smp_call_function_many(mask, func, info, wait);
22 if (cpumask_test_cpu(smp_processor_id(), mask))
23 func(info);
24
25 preempt_enable();
26}
27
28/**********************************************************************/ 16/**********************************************************************/
29 17
30/* 18/*
@@ -87,7 +75,7 @@ void flush_tlb_all(void)
87void flush_tlb_mm(struct mm_struct *mm) 75void flush_tlb_mm(struct mm_struct *mm)
88{ 76{
89 if (tlb_ops_need_broadcast()) 77 if (tlb_ops_need_broadcast())
90 on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm)); 78 on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
91 else 79 else
92 local_flush_tlb_mm(mm); 80 local_flush_tlb_mm(mm);
93} 81}
@@ -98,7 +86,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
98 struct tlb_args ta; 86 struct tlb_args ta;
99 ta.ta_vma = vma; 87 ta.ta_vma = vma;
100 ta.ta_start = uaddr; 88 ta.ta_start = uaddr;
101 on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm)); 89 on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
90 &ta, 1);
102 } else 91 } else
103 local_flush_tlb_page(vma, uaddr); 92 local_flush_tlb_page(vma, uaddr);
104} 93}
@@ -121,7 +110,8 @@ void flush_tlb_range(struct vm_area_struct *vma,
121 ta.ta_vma = vma; 110 ta.ta_vma = vma;
122 ta.ta_start = start; 111 ta.ta_start = start;
123 ta.ta_end = end; 112 ta.ta_end = end;
124 on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm)); 113 on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
114 &ta, 1);
125 } else 115 } else
126 local_flush_tlb_range(vma, start, end); 116 local_flush_tlb_range(vma, start, end);
127} 117}
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index d1cc81e63ba6..ac795d311f44 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -843,7 +843,7 @@ early_param("additional_cpus", setup_additional_cpus);
843 * are onlined, or offlined. The reason is per-cpu data-structures 843 * are onlined, or offlined. The reason is per-cpu data-structures
844 * are allocated by some modules at init time, and dont expect to 844 * are allocated by some modules at init time, and dont expect to
845 * do this dynamically on cpu arrival/departure. 845 * do this dynamically on cpu arrival/departure.
846 * cpu_present_map on the other hand can change dynamically. 846 * cpu_present_mask on the other hand can change dynamically.
847 * In case when cpu_hotplug is not compiled, then we resort to current 847 * In case when cpu_hotplug is not compiled, then we resort to current
848 * behaviour, which is cpu_possible == cpu_present. 848 * behaviour, which is cpu_possible == cpu_present.
849 * - Ashok Raj 849 * - Ashok Raj
@@ -921,7 +921,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
921 921
922 acpi_map_cpu2node(handle, cpu, physid); 922 acpi_map_cpu2node(handle, cpu, physid);
923 923
924 cpu_set(cpu, cpu_present_map); 924 set_cpu_present(cpu, true);
925 ia64_cpu_to_sapicid[cpu] = physid; 925 ia64_cpu_to_sapicid[cpu] = physid;
926 926
927 acpi_processor_set_pdc(handle); 927 acpi_processor_set_pdc(handle);
@@ -940,7 +940,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
940int acpi_unmap_lsapic(int cpu) 940int acpi_unmap_lsapic(int cpu)
941{ 941{
942 ia64_cpu_to_sapicid[cpu] = -1; 942 ia64_cpu_to_sapicid[cpu] = -1;
943 cpu_clear(cpu, cpu_present_map); 943 set_cpu_present(cpu, false);
944 944
945#ifdef CONFIG_ACPI_NUMA 945#ifdef CONFIG_ACPI_NUMA
946 /* NUMA specific cleanup's */ 946 /* NUMA specific cleanup's */
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 08113b1d30f7..5c3e0888265a 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -117,7 +117,7 @@ static inline int find_unassigned_vector(cpumask_t domain)
117 cpumask_t mask; 117 cpumask_t mask;
118 int pos, vector; 118 int pos, vector;
119 119
120 cpus_and(mask, domain, cpu_online_map); 120 cpumask_and(&mask, &domain, cpu_online_mask);
121 if (cpus_empty(mask)) 121 if (cpus_empty(mask))
122 return -EINVAL; 122 return -EINVAL;
123 123
@@ -140,7 +140,7 @@ static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
140 BUG_ON((unsigned)irq >= NR_IRQS); 140 BUG_ON((unsigned)irq >= NR_IRQS);
141 BUG_ON((unsigned)vector >= IA64_NUM_VECTORS); 141 BUG_ON((unsigned)vector >= IA64_NUM_VECTORS);
142 142
143 cpus_and(mask, domain, cpu_online_map); 143 cpumask_and(&mask, &domain, cpu_online_mask);
144 if (cpus_empty(mask)) 144 if (cpus_empty(mask))
145 return -EINVAL; 145 return -EINVAL;
146 if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain)) 146 if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
@@ -178,7 +178,7 @@ static void __clear_irq_vector(int irq)
178 BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED); 178 BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED);
179 vector = cfg->vector; 179 vector = cfg->vector;
180 domain = cfg->domain; 180 domain = cfg->domain;
181 cpus_and(mask, cfg->domain, cpu_online_map); 181 cpumask_and(&mask, &cfg->domain, cpu_online_mask);
182 for_each_cpu_mask(cpu, mask) 182 for_each_cpu_mask(cpu, mask)
183 per_cpu(vector_irq, cpu)[vector] = -1; 183 per_cpu(vector_irq, cpu)[vector] = -1;
184 cfg->vector = IRQ_VECTOR_UNASSIGNED; 184 cfg->vector = IRQ_VECTOR_UNASSIGNED;
@@ -321,7 +321,7 @@ void irq_complete_move(unsigned irq)
321 if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain))) 321 if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain)))
322 return; 322 return;
323 323
324 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); 324 cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask);
325 cfg->move_cleanup_count = cpus_weight(cleanup_mask); 325 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
326 for_each_cpu_mask(i, cleanup_mask) 326 for_each_cpu_mask(i, cleanup_mask)
327 platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0); 327 platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0);
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index a39fe098a732..65bf9cd39044 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1514,7 +1514,8 @@ static void
1514ia64_mca_cmc_poll (unsigned long dummy) 1514ia64_mca_cmc_poll (unsigned long dummy)
1515{ 1515{
1516 /* Trigger a CMC interrupt cascade */ 1516 /* Trigger a CMC interrupt cascade */
1517 platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); 1517 platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR,
1518 IA64_IPI_DM_INT, 0);
1518} 1519}
1519 1520
1520/* 1521/*
@@ -1590,7 +1591,8 @@ static void
1590ia64_mca_cpe_poll (unsigned long dummy) 1591ia64_mca_cpe_poll (unsigned long dummy)
1591{ 1592{
1592 /* Trigger a CPE interrupt cascade */ 1593 /* Trigger a CPE interrupt cascade */
1593 platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); 1594 platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR,
1595 IA64_IPI_DM_INT, 0);
1594} 1596}
1595 1597
1596#endif /* CONFIG_ACPI */ 1598#endif /* CONFIG_ACPI */
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 94e0db72d4a6..fb2f1e622877 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -57,7 +57,7 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
57 return irq; 57 return irq;
58 58
59 irq_set_msi_desc(irq, desc); 59 irq_set_msi_desc(irq, desc);
60 cpus_and(mask, irq_to_domain(irq), cpu_online_map); 60 cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
61 dest_phys_id = cpu_physical_id(first_cpu(mask)); 61 dest_phys_id = cpu_physical_id(first_cpu(mask));
62 vector = irq_to_vector(irq); 62 vector = irq_to_vector(irq);
63 63
@@ -179,7 +179,7 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
179 unsigned dest; 179 unsigned dest;
180 cpumask_t mask; 180 cpumask_t mask;
181 181
182 cpus_and(mask, irq_to_domain(irq), cpu_online_map); 182 cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
183 dest = cpu_physical_id(first_cpu(mask)); 183 dest = cpu_physical_id(first_cpu(mask));
184 184
185 msg->address_hi = 0; 185 msg->address_hi = 0;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c45e6ddb4ddb..aaefd9b94f2f 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -485,7 +485,7 @@ mark_bsp_online (void)
485{ 485{
486#ifdef CONFIG_SMP 486#ifdef CONFIG_SMP
487 /* If we register an early console, allow CPU 0 to printk */ 487 /* If we register an early console, allow CPU 0 to printk */
488 cpu_set(smp_processor_id(), cpu_online_map); 488 set_cpu_online(smp_processor_id(), true);
489#endif 489#endif
490} 490}
491 491
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index e27f925032ae..9fcd4e63048f 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -76,7 +76,7 @@ stop_this_cpu(void)
76 /* 76 /*
77 * Remove this CPU: 77 * Remove this CPU:
78 */ 78 */
79 cpu_clear(smp_processor_id(), cpu_online_map); 79 set_cpu_online(smp_processor_id(), false);
80 max_xtp(); 80 max_xtp();
81 local_irq_disable(); 81 local_irq_disable();
82 cpu_halt(); 82 cpu_halt();
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index fb7927be75c4..796f6a5b966a 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -400,7 +400,7 @@ smp_callin (void)
400 /* Setup the per cpu irq handling data structures */ 400 /* Setup the per cpu irq handling data structures */
401 __setup_vector_irq(cpuid); 401 __setup_vector_irq(cpuid);
402 notify_cpu_starting(cpuid); 402 notify_cpu_starting(cpuid);
403 cpu_set(cpuid, cpu_online_map); 403 set_cpu_online(cpuid, true);
404 per_cpu(cpu_state, cpuid) = CPU_ONLINE; 404 per_cpu(cpu_state, cpuid) = CPU_ONLINE;
405 spin_unlock(&vector_lock); 405 spin_unlock(&vector_lock);
406 ipi_call_unlock_irq(); 406 ipi_call_unlock_irq();
@@ -547,7 +547,7 @@ do_rest:
547 if (!cpu_isset(cpu, cpu_callin_map)) { 547 if (!cpu_isset(cpu, cpu_callin_map)) {
548 printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); 548 printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
549 ia64_cpu_to_sapicid[cpu] = -1; 549 ia64_cpu_to_sapicid[cpu] = -1;
550 cpu_clear(cpu, cpu_online_map); /* was set in smp_callin() */ 550 set_cpu_online(cpu, false); /* was set in smp_callin() */
551 return -EINVAL; 551 return -EINVAL;
552 } 552 }
553 return 0; 553 return 0;
@@ -577,8 +577,7 @@ smp_build_cpu_map (void)
577 } 577 }
578 578
579 ia64_cpu_to_sapicid[0] = boot_cpu_id; 579 ia64_cpu_to_sapicid[0] = boot_cpu_id;
580 cpus_clear(cpu_present_map); 580 init_cpu_present(cpumask_of(0));
581 set_cpu_present(0, true);
582 set_cpu_possible(0, true); 581 set_cpu_possible(0, true);
583 for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { 582 for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
584 sapicid = smp_boot_data.cpu_phys_id[i]; 583 sapicid = smp_boot_data.cpu_phys_id[i];
@@ -605,10 +604,6 @@ smp_prepare_cpus (unsigned int max_cpus)
605 604
606 smp_setup_percpu_timer(); 605 smp_setup_percpu_timer();
607 606
608 /*
609 * We have the boot CPU online for sure.
610 */
611 cpu_set(0, cpu_online_map);
612 cpu_set(0, cpu_callin_map); 607 cpu_set(0, cpu_callin_map);
613 608
614 local_cpu_data->loops_per_jiffy = loops_per_jiffy; 609 local_cpu_data->loops_per_jiffy = loops_per_jiffy;
@@ -632,7 +627,7 @@ smp_prepare_cpus (unsigned int max_cpus)
632 627
633void __devinit smp_prepare_boot_cpu(void) 628void __devinit smp_prepare_boot_cpu(void)
634{ 629{
635 cpu_set(smp_processor_id(), cpu_online_map); 630 set_cpu_online(smp_processor_id(), true);
636 cpu_set(smp_processor_id(), cpu_callin_map); 631 cpu_set(smp_processor_id(), cpu_callin_map);
637 set_numa_node(cpu_to_node_map[smp_processor_id()]); 632 set_numa_node(cpu_to_node_map[smp_processor_id()]);
638 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 633 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
@@ -689,7 +684,7 @@ int migrate_platform_irqs(unsigned int cpu)
689 /* 684 /*
690 * Now re-target the CPEI to a different processor 685 * Now re-target the CPEI to a different processor
691 */ 686 */
692 new_cpei_cpu = any_online_cpu(cpu_online_map); 687 new_cpei_cpu = cpumask_any(cpu_online_mask);
693 mask = cpumask_of(new_cpei_cpu); 688 mask = cpumask_of(new_cpei_cpu);
694 set_cpei_target_cpu(new_cpei_cpu); 689 set_cpei_target_cpu(new_cpei_cpu);
695 data = irq_get_irq_data(ia64_cpe_irq); 690 data = irq_get_irq_data(ia64_cpe_irq);
@@ -731,10 +726,10 @@ int __cpu_disable(void)
731 return -EBUSY; 726 return -EBUSY;
732 } 727 }
733 728
734 cpu_clear(cpu, cpu_online_map); 729 set_cpu_online(cpu, false);
735 730
736 if (migrate_platform_irqs(cpu)) { 731 if (migrate_platform_irqs(cpu)) {
737 cpu_set(cpu, cpu_online_map); 732 set_cpu_online(cpu, true);
738 return -EBUSY; 733 return -EBUSY;
739 } 734 }
740 735
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 9deb21dbf629..c64460b9c704 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -220,7 +220,8 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
220 ssize_t len; 220 ssize_t len;
221 cpumask_t shared_cpu_map; 221 cpumask_t shared_cpu_map;
222 222
223 cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map); 223 cpumask_and(&shared_cpu_map,
224 &this_leaf->shared_cpu_map, cpu_online_mask);
224 len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map); 225 len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
225 len += sprintf(buf+len, "\n"); 226 len += sprintf(buf+len, "\n");
226 return len; 227 return len;
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 532124ae4b12..1aa759aeb5b3 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -43,10 +43,6 @@ void evaluate_message(int tag);
43/* Boot a secondary cpu */ 43/* Boot a secondary cpu */
44void online_secondary(void); 44void online_secondary(void);
45 45
46/* Call a function on a specified set of CPUs (may include this one). */
47extern void on_each_cpu_mask(const struct cpumask *mask,
48 void (*func)(void *), void *info, bool wait);
49
50/* Topology of the supervisor tile grid, and coordinates of boot processor */ 46/* Topology of the supervisor tile grid, and coordinates of boot processor */
51extern HV_Topology smp_topology; 47extern HV_Topology smp_topology;
52 48
@@ -91,9 +87,6 @@ void print_disabled_cpus(void);
91 87
92#else /* !CONFIG_SMP */ 88#else /* !CONFIG_SMP */
93 89
94#define on_each_cpu_mask(mask, func, info, wait) \
95 do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0)
96
97#define smp_master_cpu 0 90#define smp_master_cpu 0
98#define smp_height 1 91#define smp_height 1
99#define smp_width 1 92#define smp_width 1
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index c52224d5ed45..a44e103c5a63 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -87,25 +87,6 @@ void send_IPI_allbutself(int tag)
87 send_IPI_many(&mask, tag); 87 send_IPI_many(&mask, tag);
88} 88}
89 89
90
91/*
92 * Provide smp_call_function_mask, but also run function locally
93 * if specified in the mask.
94 */
95void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
96 void *info, bool wait)
97{
98 int cpu = get_cpu();
99 smp_call_function_many(mask, func, info, wait);
100 if (cpumask_test_cpu(cpu, mask)) {
101 local_irq_disable();
102 func(info);
103 local_irq_enable();
104 }
105 put_cpu();
106}
107
108
109/* 90/*
110 * Functions related to starting/stopping cpus. 91 * Functions related to starting/stopping cpus.
111 */ 92 */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 8cbeb7209c3e..1a2901562059 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -508,15 +508,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
508 508
509#ifdef CONFIG_KEXEC 509#ifdef CONFIG_KEXEC
510 510
511static inline unsigned long long get_total_mem(void)
512{
513 unsigned long long total;
514
515 total = max_pfn - min_low_pfn;
516
517 return total << PAGE_SHIFT;
518}
519
520/* 511/*
521 * Keep the crash kernel below this limit. On 32 bits earlier kernels 512 * Keep the crash kernel below this limit. On 32 bits earlier kernels
522 * would limit the kernel to the low 512 MiB due to mapping restrictions. 513 * would limit the kernel to the low 512 MiB due to mapping restrictions.
@@ -535,7 +526,7 @@ static void __init reserve_crashkernel(void)
535 unsigned long long crash_size, crash_base; 526 unsigned long long crash_size, crash_base;
536 int ret; 527 int ret;
537 528
538 total_mem = get_total_mem(); 529 total_mem = memblock_phys_mem_size();
539 530
540 ret = parse_crashkernel(boot_command_line, total_mem, 531 ret = parse_crashkernel(boot_command_line, total_mem,
541 &crash_size, &crash_base); 532 &crash_size, &crash_base);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index c7ba11f9b203..061427a75d37 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -38,7 +38,7 @@
38 38
39#include <linux/nbd.h> 39#include <linux/nbd.h>
40 40
41#define LO_MAGIC 0x68797548 41#define NBD_MAGIC 0x68797548
42 42
43#ifdef NDEBUG 43#ifdef NDEBUG
44#define dprintk(flags, fmt...) 44#define dprintk(flags, fmt...)
@@ -115,7 +115,7 @@ static void nbd_end_request(struct request *req)
115 spin_unlock_irqrestore(q->queue_lock, flags); 115 spin_unlock_irqrestore(q->queue_lock, flags);
116} 116}
117 117
118static void sock_shutdown(struct nbd_device *lo, int lock) 118static void sock_shutdown(struct nbd_device *nbd, int lock)
119{ 119{
120 /* Forcibly shutdown the socket causing all listeners 120 /* Forcibly shutdown the socket causing all listeners
121 * to error 121 * to error
@@ -124,14 +124,14 @@ static void sock_shutdown(struct nbd_device *lo, int lock)
124 * there should be a more generic interface rather than 124 * there should be a more generic interface rather than
125 * calling socket ops directly here */ 125 * calling socket ops directly here */
126 if (lock) 126 if (lock)
127 mutex_lock(&lo->tx_lock); 127 mutex_lock(&nbd->tx_lock);
128 if (lo->sock) { 128 if (nbd->sock) {
129 dev_warn(disk_to_dev(lo->disk), "shutting down socket\n"); 129 dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
130 kernel_sock_shutdown(lo->sock, SHUT_RDWR); 130 kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
131 lo->sock = NULL; 131 nbd->sock = NULL;
132 } 132 }
133 if (lock) 133 if (lock)
134 mutex_unlock(&lo->tx_lock); 134 mutex_unlock(&nbd->tx_lock);
135} 135}
136 136
137static void nbd_xmit_timeout(unsigned long arg) 137static void nbd_xmit_timeout(unsigned long arg)
@@ -146,17 +146,17 @@ static void nbd_xmit_timeout(unsigned long arg)
146/* 146/*
147 * Send or receive packet. 147 * Send or receive packet.
148 */ 148 */
149static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, 149static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
150 int msg_flags) 150 int msg_flags)
151{ 151{
152 struct socket *sock = lo->sock; 152 struct socket *sock = nbd->sock;
153 int result; 153 int result;
154 struct msghdr msg; 154 struct msghdr msg;
155 struct kvec iov; 155 struct kvec iov;
156 sigset_t blocked, oldset; 156 sigset_t blocked, oldset;
157 157
158 if (unlikely(!sock)) { 158 if (unlikely(!sock)) {
159 dev_err(disk_to_dev(lo->disk), 159 dev_err(disk_to_dev(nbd->disk),
160 "Attempted %s on closed socket in sock_xmit\n", 160 "Attempted %s on closed socket in sock_xmit\n",
161 (send ? "send" : "recv")); 161 (send ? "send" : "recv"));
162 return -EINVAL; 162 return -EINVAL;
@@ -180,15 +180,15 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
180 if (send) { 180 if (send) {
181 struct timer_list ti; 181 struct timer_list ti;
182 182
183 if (lo->xmit_timeout) { 183 if (nbd->xmit_timeout) {
184 init_timer(&ti); 184 init_timer(&ti);
185 ti.function = nbd_xmit_timeout; 185 ti.function = nbd_xmit_timeout;
186 ti.data = (unsigned long)current; 186 ti.data = (unsigned long)current;
187 ti.expires = jiffies + lo->xmit_timeout; 187 ti.expires = jiffies + nbd->xmit_timeout;
188 add_timer(&ti); 188 add_timer(&ti);
189 } 189 }
190 result = kernel_sendmsg(sock, &msg, &iov, 1, size); 190 result = kernel_sendmsg(sock, &msg, &iov, 1, size);
191 if (lo->xmit_timeout) 191 if (nbd->xmit_timeout)
192 del_timer_sync(&ti); 192 del_timer_sync(&ti);
193 } else 193 } else
194 result = kernel_recvmsg(sock, &msg, &iov, 1, size, 194 result = kernel_recvmsg(sock, &msg, &iov, 1, size,
@@ -200,7 +200,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
200 task_pid_nr(current), current->comm, 200 task_pid_nr(current), current->comm,
201 dequeue_signal_lock(current, &current->blocked, &info)); 201 dequeue_signal_lock(current, &current->blocked, &info));
202 result = -EINTR; 202 result = -EINTR;
203 sock_shutdown(lo, !send); 203 sock_shutdown(nbd, !send);
204 break; 204 break;
205 } 205 }
206 206
@@ -218,18 +218,19 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
218 return result; 218 return result;
219} 219}
220 220
221static inline int sock_send_bvec(struct nbd_device *lo, struct bio_vec *bvec, 221static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
222 int flags) 222 int flags)
223{ 223{
224 int result; 224 int result;
225 void *kaddr = kmap(bvec->bv_page); 225 void *kaddr = kmap(bvec->bv_page);
226 result = sock_xmit(lo, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); 226 result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset,
227 bvec->bv_len, flags);
227 kunmap(bvec->bv_page); 228 kunmap(bvec->bv_page);
228 return result; 229 return result;
229} 230}
230 231
231/* always call with the tx_lock held */ 232/* always call with the tx_lock held */
232static int nbd_send_req(struct nbd_device *lo, struct request *req) 233static int nbd_send_req(struct nbd_device *nbd, struct request *req)
233{ 234{
234 int result, flags; 235 int result, flags;
235 struct nbd_request request; 236 struct nbd_request request;
@@ -242,14 +243,14 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
242 memcpy(request.handle, &req, sizeof(req)); 243 memcpy(request.handle, &req, sizeof(req));
243 244
244 dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", 245 dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
245 lo->disk->disk_name, req, 246 nbd->disk->disk_name, req,
246 nbdcmd_to_ascii(nbd_cmd(req)), 247 nbdcmd_to_ascii(nbd_cmd(req)),
247 (unsigned long long)blk_rq_pos(req) << 9, 248 (unsigned long long)blk_rq_pos(req) << 9,
248 blk_rq_bytes(req)); 249 blk_rq_bytes(req));
249 result = sock_xmit(lo, 1, &request, sizeof(request), 250 result = sock_xmit(nbd, 1, &request, sizeof(request),
250 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); 251 (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
251 if (result <= 0) { 252 if (result <= 0) {
252 dev_err(disk_to_dev(lo->disk), 253 dev_err(disk_to_dev(nbd->disk),
253 "Send control failed (result %d)\n", result); 254 "Send control failed (result %d)\n", result);
254 goto error_out; 255 goto error_out;
255 } 256 }
@@ -266,10 +267,10 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
266 if (!rq_iter_last(req, iter)) 267 if (!rq_iter_last(req, iter))
267 flags = MSG_MORE; 268 flags = MSG_MORE;
268 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", 269 dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
269 lo->disk->disk_name, req, bvec->bv_len); 270 nbd->disk->disk_name, req, bvec->bv_len);
270 result = sock_send_bvec(lo, bvec, flags); 271 result = sock_send_bvec(nbd, bvec, flags);
271 if (result <= 0) { 272 if (result <= 0) {
272 dev_err(disk_to_dev(lo->disk), 273 dev_err(disk_to_dev(nbd->disk),
273 "Send data failed (result %d)\n", 274 "Send data failed (result %d)\n",
274 result); 275 result);
275 goto error_out; 276 goto error_out;
@@ -282,25 +283,25 @@ error_out:
282 return -EIO; 283 return -EIO;
283} 284}
284 285
285static struct request *nbd_find_request(struct nbd_device *lo, 286static struct request *nbd_find_request(struct nbd_device *nbd,
286 struct request *xreq) 287 struct request *xreq)
287{ 288{
288 struct request *req, *tmp; 289 struct request *req, *tmp;
289 int err; 290 int err;
290 291
291 err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq); 292 err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
292 if (unlikely(err)) 293 if (unlikely(err))
293 goto out; 294 goto out;
294 295
295 spin_lock(&lo->queue_lock); 296 spin_lock(&nbd->queue_lock);
296 list_for_each_entry_safe(req, tmp, &lo->queue_head, queuelist) { 297 list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
297 if (req != xreq) 298 if (req != xreq)
298 continue; 299 continue;
299 list_del_init(&req->queuelist); 300 list_del_init(&req->queuelist);
300 spin_unlock(&lo->queue_lock); 301 spin_unlock(&nbd->queue_lock);
301 return req; 302 return req;
302 } 303 }
303 spin_unlock(&lo->queue_lock); 304 spin_unlock(&nbd->queue_lock);
304 305
305 err = -ENOENT; 306 err = -ENOENT;
306 307
@@ -308,78 +309,78 @@ out:
308 return ERR_PTR(err); 309 return ERR_PTR(err);
309} 310}
310 311
311static inline int sock_recv_bvec(struct nbd_device *lo, struct bio_vec *bvec) 312static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
312{ 313{
313 int result; 314 int result;
314 void *kaddr = kmap(bvec->bv_page); 315 void *kaddr = kmap(bvec->bv_page);
315 result = sock_xmit(lo, 0, kaddr + bvec->bv_offset, bvec->bv_len, 316 result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len,
316 MSG_WAITALL); 317 MSG_WAITALL);
317 kunmap(bvec->bv_page); 318 kunmap(bvec->bv_page);
318 return result; 319 return result;
319} 320}
320 321
321/* NULL returned = something went wrong, inform userspace */ 322/* NULL returned = something went wrong, inform userspace */
322static struct request *nbd_read_stat(struct nbd_device *lo) 323static struct request *nbd_read_stat(struct nbd_device *nbd)
323{ 324{
324 int result; 325 int result;
325 struct nbd_reply reply; 326 struct nbd_reply reply;
326 struct request *req; 327 struct request *req;
327 328
328 reply.magic = 0; 329 reply.magic = 0;
329 result = sock_xmit(lo, 0, &reply, sizeof(reply), MSG_WAITALL); 330 result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
330 if (result <= 0) { 331 if (result <= 0) {
331 dev_err(disk_to_dev(lo->disk), 332 dev_err(disk_to_dev(nbd->disk),
332 "Receive control failed (result %d)\n", result); 333 "Receive control failed (result %d)\n", result);
333 goto harderror; 334 goto harderror;
334 } 335 }
335 336
336 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) { 337 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
337 dev_err(disk_to_dev(lo->disk), "Wrong magic (0x%lx)\n", 338 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
338 (unsigned long)ntohl(reply.magic)); 339 (unsigned long)ntohl(reply.magic));
339 result = -EPROTO; 340 result = -EPROTO;
340 goto harderror; 341 goto harderror;
341 } 342 }
342 343
343 req = nbd_find_request(lo, *(struct request **)reply.handle); 344 req = nbd_find_request(nbd, *(struct request **)reply.handle);
344 if (IS_ERR(req)) { 345 if (IS_ERR(req)) {
345 result = PTR_ERR(req); 346 result = PTR_ERR(req);
346 if (result != -ENOENT) 347 if (result != -ENOENT)
347 goto harderror; 348 goto harderror;
348 349
349 dev_err(disk_to_dev(lo->disk), "Unexpected reply (%p)\n", 350 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
350 reply.handle); 351 reply.handle);
351 result = -EBADR; 352 result = -EBADR;
352 goto harderror; 353 goto harderror;
353 } 354 }
354 355
355 if (ntohl(reply.error)) { 356 if (ntohl(reply.error)) {
356 dev_err(disk_to_dev(lo->disk), "Other side returned error (%d)\n", 357 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
357 ntohl(reply.error)); 358 ntohl(reply.error));
358 req->errors++; 359 req->errors++;
359 return req; 360 return req;
360 } 361 }
361 362
362 dprintk(DBG_RX, "%s: request %p: got reply\n", 363 dprintk(DBG_RX, "%s: request %p: got reply\n",
363 lo->disk->disk_name, req); 364 nbd->disk->disk_name, req);
364 if (nbd_cmd(req) == NBD_CMD_READ) { 365 if (nbd_cmd(req) == NBD_CMD_READ) {
365 struct req_iterator iter; 366 struct req_iterator iter;
366 struct bio_vec *bvec; 367 struct bio_vec *bvec;
367 368
368 rq_for_each_segment(bvec, req, iter) { 369 rq_for_each_segment(bvec, req, iter) {
369 result = sock_recv_bvec(lo, bvec); 370 result = sock_recv_bvec(nbd, bvec);
370 if (result <= 0) { 371 if (result <= 0) {
371 dev_err(disk_to_dev(lo->disk), "Receive data failed (result %d)\n", 372 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
372 result); 373 result);
373 req->errors++; 374 req->errors++;
374 return req; 375 return req;
375 } 376 }
376 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", 377 dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
377 lo->disk->disk_name, req, bvec->bv_len); 378 nbd->disk->disk_name, req, bvec->bv_len);
378 } 379 }
379 } 380 }
380 return req; 381 return req;
381harderror: 382harderror:
382 lo->harderror = result; 383 nbd->harderror = result;
383 return NULL; 384 return NULL;
384} 385}
385 386
@@ -397,48 +398,48 @@ static struct device_attribute pid_attr = {
397 .show = pid_show, 398 .show = pid_show,
398}; 399};
399 400
400static int nbd_do_it(struct nbd_device *lo) 401static int nbd_do_it(struct nbd_device *nbd)
401{ 402{
402 struct request *req; 403 struct request *req;
403 int ret; 404 int ret;
404 405
405 BUG_ON(lo->magic != LO_MAGIC); 406 BUG_ON(nbd->magic != NBD_MAGIC);
406 407
407 lo->pid = task_pid_nr(current); 408 nbd->pid = task_pid_nr(current);
408 ret = device_create_file(disk_to_dev(lo->disk), &pid_attr); 409 ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
409 if (ret) { 410 if (ret) {
410 dev_err(disk_to_dev(lo->disk), "device_create_file failed!\n"); 411 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
411 lo->pid = 0; 412 nbd->pid = 0;
412 return ret; 413 return ret;
413 } 414 }
414 415
415 while ((req = nbd_read_stat(lo)) != NULL) 416 while ((req = nbd_read_stat(nbd)) != NULL)
416 nbd_end_request(req); 417 nbd_end_request(req);
417 418
418 device_remove_file(disk_to_dev(lo->disk), &pid_attr); 419 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
419 lo->pid = 0; 420 nbd->pid = 0;
420 return 0; 421 return 0;
421} 422}
422 423
423static void nbd_clear_que(struct nbd_device *lo) 424static void nbd_clear_que(struct nbd_device *nbd)
424{ 425{
425 struct request *req; 426 struct request *req;
426 427
427 BUG_ON(lo->magic != LO_MAGIC); 428 BUG_ON(nbd->magic != NBD_MAGIC);
428 429
429 /* 430 /*
430 * Because we have set lo->sock to NULL under the tx_lock, all 431 * Because we have set nbd->sock to NULL under the tx_lock, all
431 * modifications to the list must have completed by now. For 432 * modifications to the list must have completed by now. For
432 * the same reason, the active_req must be NULL. 433 * the same reason, the active_req must be NULL.
433 * 434 *
434 * As a consequence, we don't need to take the spin lock while 435 * As a consequence, we don't need to take the spin lock while
435 * purging the list here. 436 * purging the list here.
436 */ 437 */
437 BUG_ON(lo->sock); 438 BUG_ON(nbd->sock);
438 BUG_ON(lo->active_req); 439 BUG_ON(nbd->active_req);
439 440
440 while (!list_empty(&lo->queue_head)) { 441 while (!list_empty(&nbd->queue_head)) {
441 req = list_entry(lo->queue_head.next, struct request, 442 req = list_entry(nbd->queue_head.next, struct request,
442 queuelist); 443 queuelist);
443 list_del_init(&req->queuelist); 444 list_del_init(&req->queuelist);
444 req->errors++; 445 req->errors++;
@@ -447,7 +448,7 @@ static void nbd_clear_que(struct nbd_device *lo)
447} 448}
448 449
449 450
450static void nbd_handle_req(struct nbd_device *lo, struct request *req) 451static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
451{ 452{
452 if (req->cmd_type != REQ_TYPE_FS) 453 if (req->cmd_type != REQ_TYPE_FS)
453 goto error_out; 454 goto error_out;
@@ -455,8 +456,8 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
455 nbd_cmd(req) = NBD_CMD_READ; 456 nbd_cmd(req) = NBD_CMD_READ;
456 if (rq_data_dir(req) == WRITE) { 457 if (rq_data_dir(req) == WRITE) {
457 nbd_cmd(req) = NBD_CMD_WRITE; 458 nbd_cmd(req) = NBD_CMD_WRITE;
458 if (lo->flags & NBD_READ_ONLY) { 459 if (nbd->flags & NBD_READ_ONLY) {
459 dev_err(disk_to_dev(lo->disk), 460 dev_err(disk_to_dev(nbd->disk),
460 "Write on read-only\n"); 461 "Write on read-only\n");
461 goto error_out; 462 goto error_out;
462 } 463 }
@@ -464,29 +465,29 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
464 465
465 req->errors = 0; 466 req->errors = 0;
466 467
467 mutex_lock(&lo->tx_lock); 468 mutex_lock(&nbd->tx_lock);
468 if (unlikely(!lo->sock)) { 469 if (unlikely(!nbd->sock)) {
469 mutex_unlock(&lo->tx_lock); 470 mutex_unlock(&nbd->tx_lock);
470 dev_err(disk_to_dev(lo->disk), 471 dev_err(disk_to_dev(nbd->disk),
471 "Attempted send on closed socket\n"); 472 "Attempted send on closed socket\n");
472 goto error_out; 473 goto error_out;
473 } 474 }
474 475
475 lo->active_req = req; 476 nbd->active_req = req;
476 477
477 if (nbd_send_req(lo, req) != 0) { 478 if (nbd_send_req(nbd, req) != 0) {
478 dev_err(disk_to_dev(lo->disk), "Request send failed\n"); 479 dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
479 req->errors++; 480 req->errors++;
480 nbd_end_request(req); 481 nbd_end_request(req);
481 } else { 482 } else {
482 spin_lock(&lo->queue_lock); 483 spin_lock(&nbd->queue_lock);
483 list_add(&req->queuelist, &lo->queue_head); 484 list_add(&req->queuelist, &nbd->queue_head);
484 spin_unlock(&lo->queue_lock); 485 spin_unlock(&nbd->queue_lock);
485 } 486 }
486 487
487 lo->active_req = NULL; 488 nbd->active_req = NULL;
488 mutex_unlock(&lo->tx_lock); 489 mutex_unlock(&nbd->tx_lock);
489 wake_up_all(&lo->active_wq); 490 wake_up_all(&nbd->active_wq);
490 491
491 return; 492 return;
492 493
@@ -497,28 +498,28 @@ error_out:
497 498
498static int nbd_thread(void *data) 499static int nbd_thread(void *data)
499{ 500{
500 struct nbd_device *lo = data; 501 struct nbd_device *nbd = data;
501 struct request *req; 502 struct request *req;
502 503
503 set_user_nice(current, -20); 504 set_user_nice(current, -20);
504 while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { 505 while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
505 /* wait for something to do */ 506 /* wait for something to do */
506 wait_event_interruptible(lo->waiting_wq, 507 wait_event_interruptible(nbd->waiting_wq,
507 kthread_should_stop() || 508 kthread_should_stop() ||
508 !list_empty(&lo->waiting_queue)); 509 !list_empty(&nbd->waiting_queue));
509 510
510 /* extract request */ 511 /* extract request */
511 if (list_empty(&lo->waiting_queue)) 512 if (list_empty(&nbd->waiting_queue))
512 continue; 513 continue;
513 514
514 spin_lock_irq(&lo->queue_lock); 515 spin_lock_irq(&nbd->queue_lock);
515 req = list_entry(lo->waiting_queue.next, struct request, 516 req = list_entry(nbd->waiting_queue.next, struct request,
516 queuelist); 517 queuelist);
517 list_del_init(&req->queuelist); 518 list_del_init(&req->queuelist);
518 spin_unlock_irq(&lo->queue_lock); 519 spin_unlock_irq(&nbd->queue_lock);
519 520
520 /* handle request */ 521 /* handle request */
521 nbd_handle_req(lo, req); 522 nbd_handle_req(nbd, req);
522 } 523 }
523 return 0; 524 return 0;
524} 525}
@@ -526,7 +527,7 @@ static int nbd_thread(void *data)
526/* 527/*
527 * We always wait for result of write, for now. It would be nice to make it optional 528 * We always wait for result of write, for now. It would be nice to make it optional
528 * in future 529 * in future
529 * if ((rq_data_dir(req) == WRITE) && (lo->flags & NBD_WRITE_NOCHK)) 530 * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
530 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); } 531 * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
531 */ 532 */
532 533
@@ -535,19 +536,19 @@ static void do_nbd_request(struct request_queue *q)
535 struct request *req; 536 struct request *req;
536 537
537 while ((req = blk_fetch_request(q)) != NULL) { 538 while ((req = blk_fetch_request(q)) != NULL) {
538 struct nbd_device *lo; 539 struct nbd_device *nbd;
539 540
540 spin_unlock_irq(q->queue_lock); 541 spin_unlock_irq(q->queue_lock);
541 542
542 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", 543 dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
543 req->rq_disk->disk_name, req, req->cmd_type); 544 req->rq_disk->disk_name, req, req->cmd_type);
544 545
545 lo = req->rq_disk->private_data; 546 nbd = req->rq_disk->private_data;
546 547
547 BUG_ON(lo->magic != LO_MAGIC); 548 BUG_ON(nbd->magic != NBD_MAGIC);
548 549
549 if (unlikely(!lo->sock)) { 550 if (unlikely(!nbd->sock)) {
550 dev_err(disk_to_dev(lo->disk), 551 dev_err(disk_to_dev(nbd->disk),
551 "Attempted send on closed socket\n"); 552 "Attempted send on closed socket\n");
552 req->errors++; 553 req->errors++;
553 nbd_end_request(req); 554 nbd_end_request(req);
@@ -555,11 +556,11 @@ static void do_nbd_request(struct request_queue *q)
555 continue; 556 continue;
556 } 557 }
557 558
558 spin_lock_irq(&lo->queue_lock); 559 spin_lock_irq(&nbd->queue_lock);
559 list_add_tail(&req->queuelist, &lo->waiting_queue); 560 list_add_tail(&req->queuelist, &nbd->waiting_queue);
560 spin_unlock_irq(&lo->queue_lock); 561 spin_unlock_irq(&nbd->queue_lock);
561 562
562 wake_up(&lo->waiting_wq); 563 wake_up(&nbd->waiting_wq);
563 564
564 spin_lock_irq(q->queue_lock); 565 spin_lock_irq(q->queue_lock);
565 } 566 }
@@ -567,32 +568,32 @@ static void do_nbd_request(struct request_queue *q)
567 568
568/* Must be called with tx_lock held */ 569/* Must be called with tx_lock held */
569 570
570static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, 571static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
571 unsigned int cmd, unsigned long arg) 572 unsigned int cmd, unsigned long arg)
572{ 573{
573 switch (cmd) { 574 switch (cmd) {
574 case NBD_DISCONNECT: { 575 case NBD_DISCONNECT: {
575 struct request sreq; 576 struct request sreq;
576 577
577 dev_info(disk_to_dev(lo->disk), "NBD_DISCONNECT\n"); 578 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
578 579
579 blk_rq_init(NULL, &sreq); 580 blk_rq_init(NULL, &sreq);
580 sreq.cmd_type = REQ_TYPE_SPECIAL; 581 sreq.cmd_type = REQ_TYPE_SPECIAL;
581 nbd_cmd(&sreq) = NBD_CMD_DISC; 582 nbd_cmd(&sreq) = NBD_CMD_DISC;
582 if (!lo->sock) 583 if (!nbd->sock)
583 return -EINVAL; 584 return -EINVAL;
584 nbd_send_req(lo, &sreq); 585 nbd_send_req(nbd, &sreq);
585 return 0; 586 return 0;
586 } 587 }
587 588
588 case NBD_CLEAR_SOCK: { 589 case NBD_CLEAR_SOCK: {
589 struct file *file; 590 struct file *file;
590 591
591 lo->sock = NULL; 592 nbd->sock = NULL;
592 file = lo->file; 593 file = nbd->file;
593 lo->file = NULL; 594 nbd->file = NULL;
594 nbd_clear_que(lo); 595 nbd_clear_que(nbd);
595 BUG_ON(!list_empty(&lo->queue_head)); 596 BUG_ON(!list_empty(&nbd->queue_head));
596 if (file) 597 if (file)
597 fput(file); 598 fput(file);
598 return 0; 599 return 0;
@@ -600,14 +601,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
600 601
601 case NBD_SET_SOCK: { 602 case NBD_SET_SOCK: {
602 struct file *file; 603 struct file *file;
603 if (lo->file) 604 if (nbd->file)
604 return -EBUSY; 605 return -EBUSY;
605 file = fget(arg); 606 file = fget(arg);
606 if (file) { 607 if (file) {
607 struct inode *inode = file->f_path.dentry->d_inode; 608 struct inode *inode = file->f_path.dentry->d_inode;
608 if (S_ISSOCK(inode->i_mode)) { 609 if (S_ISSOCK(inode->i_mode)) {
609 lo->file = file; 610 nbd->file = file;
610 lo->sock = SOCKET_I(inode); 611 nbd->sock = SOCKET_I(inode);
611 if (max_part > 0) 612 if (max_part > 0)
612 bdev->bd_invalidated = 1; 613 bdev->bd_invalidated = 1;
613 return 0; 614 return 0;
@@ -619,29 +620,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
619 } 620 }
620 621
621 case NBD_SET_BLKSIZE: 622 case NBD_SET_BLKSIZE:
622 lo->blksize = arg; 623 nbd->blksize = arg;
623 lo->bytesize &= ~(lo->blksize-1); 624 nbd->bytesize &= ~(nbd->blksize-1);
624 bdev->bd_inode->i_size = lo->bytesize; 625 bdev->bd_inode->i_size = nbd->bytesize;
625 set_blocksize(bdev, lo->blksize); 626 set_blocksize(bdev, nbd->blksize);
626 set_capacity(lo->disk, lo->bytesize >> 9); 627 set_capacity(nbd->disk, nbd->bytesize >> 9);
627 return 0; 628 return 0;
628 629
629 case NBD_SET_SIZE: 630 case NBD_SET_SIZE:
630 lo->bytesize = arg & ~(lo->blksize-1); 631 nbd->bytesize = arg & ~(nbd->blksize-1);
631 bdev->bd_inode->i_size = lo->bytesize; 632 bdev->bd_inode->i_size = nbd->bytesize;
632 set_blocksize(bdev, lo->blksize); 633 set_blocksize(bdev, nbd->blksize);
633 set_capacity(lo->disk, lo->bytesize >> 9); 634 set_capacity(nbd->disk, nbd->bytesize >> 9);
634 return 0; 635 return 0;
635 636
636 case NBD_SET_TIMEOUT: 637 case NBD_SET_TIMEOUT:
637 lo->xmit_timeout = arg * HZ; 638 nbd->xmit_timeout = arg * HZ;
638 return 0; 639 return 0;
639 640
640 case NBD_SET_SIZE_BLOCKS: 641 case NBD_SET_SIZE_BLOCKS:
641 lo->bytesize = ((u64) arg) * lo->blksize; 642 nbd->bytesize = ((u64) arg) * nbd->blksize;
642 bdev->bd_inode->i_size = lo->bytesize; 643 bdev->bd_inode->i_size = nbd->bytesize;
643 set_blocksize(bdev, lo->blksize); 644 set_blocksize(bdev, nbd->blksize);
644 set_capacity(lo->disk, lo->bytesize >> 9); 645 set_capacity(nbd->disk, nbd->bytesize >> 9);
645 return 0; 646 return 0;
646 647
647 case NBD_DO_IT: { 648 case NBD_DO_IT: {
@@ -649,38 +650,38 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
649 struct file *file; 650 struct file *file;
650 int error; 651 int error;
651 652
652 if (lo->pid) 653 if (nbd->pid)
653 return -EBUSY; 654 return -EBUSY;
654 if (!lo->file) 655 if (!nbd->file)
655 return -EINVAL; 656 return -EINVAL;
656 657
657 mutex_unlock(&lo->tx_lock); 658 mutex_unlock(&nbd->tx_lock);
658 659
659 thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); 660 thread = kthread_create(nbd_thread, nbd, nbd->disk->disk_name);
660 if (IS_ERR(thread)) { 661 if (IS_ERR(thread)) {
661 mutex_lock(&lo->tx_lock); 662 mutex_lock(&nbd->tx_lock);
662 return PTR_ERR(thread); 663 return PTR_ERR(thread);
663 } 664 }
664 wake_up_process(thread); 665 wake_up_process(thread);
665 error = nbd_do_it(lo); 666 error = nbd_do_it(nbd);
666 kthread_stop(thread); 667 kthread_stop(thread);
667 668
668 mutex_lock(&lo->tx_lock); 669 mutex_lock(&nbd->tx_lock);
669 if (error) 670 if (error)
670 return error; 671 return error;
671 sock_shutdown(lo, 0); 672 sock_shutdown(nbd, 0);
672 file = lo->file; 673 file = nbd->file;
673 lo->file = NULL; 674 nbd->file = NULL;
674 nbd_clear_que(lo); 675 nbd_clear_que(nbd);
675 dev_warn(disk_to_dev(lo->disk), "queue cleared\n"); 676 dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
676 if (file) 677 if (file)
677 fput(file); 678 fput(file);
678 lo->bytesize = 0; 679 nbd->bytesize = 0;
679 bdev->bd_inode->i_size = 0; 680 bdev->bd_inode->i_size = 0;
680 set_capacity(lo->disk, 0); 681 set_capacity(nbd->disk, 0);
681 if (max_part > 0) 682 if (max_part > 0)
682 ioctl_by_bdev(bdev, BLKRRPART, 0); 683 ioctl_by_bdev(bdev, BLKRRPART, 0);
683 return lo->harderror; 684 return nbd->harderror;
684 } 685 }
685 686
686 case NBD_CLEAR_QUE: 687 case NBD_CLEAR_QUE:
@@ -688,14 +689,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
688 * This is for compatibility only. The queue is always cleared 689 * This is for compatibility only. The queue is always cleared
689 * by NBD_DO_IT or NBD_CLEAR_SOCK. 690 * by NBD_DO_IT or NBD_CLEAR_SOCK.
690 */ 691 */
691 BUG_ON(!lo->sock && !list_empty(&lo->queue_head)); 692 BUG_ON(!nbd->sock && !list_empty(&nbd->queue_head));
692 return 0; 693 return 0;
693 694
694 case NBD_PRINT_DEBUG: 695 case NBD_PRINT_DEBUG:
695 dev_info(disk_to_dev(lo->disk), 696 dev_info(disk_to_dev(nbd->disk),
696 "next = %p, prev = %p, head = %p\n", 697 "next = %p, prev = %p, head = %p\n",
697 lo->queue_head.next, lo->queue_head.prev, 698 nbd->queue_head.next, nbd->queue_head.prev,
698 &lo->queue_head); 699 &nbd->queue_head);
699 return 0; 700 return 0;
700 } 701 }
701 return -ENOTTY; 702 return -ENOTTY;
@@ -704,21 +705,21 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
704static int nbd_ioctl(struct block_device *bdev, fmode_t mode, 705static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
705 unsigned int cmd, unsigned long arg) 706 unsigned int cmd, unsigned long arg)
706{ 707{
707 struct nbd_device *lo = bdev->bd_disk->private_data; 708 struct nbd_device *nbd = bdev->bd_disk->private_data;
708 int error; 709 int error;
709 710
710 if (!capable(CAP_SYS_ADMIN)) 711 if (!capable(CAP_SYS_ADMIN))
711 return -EPERM; 712 return -EPERM;
712 713
713 BUG_ON(lo->magic != LO_MAGIC); 714 BUG_ON(nbd->magic != NBD_MAGIC);
714 715
715 /* Anyone capable of this syscall can do *real bad* things */ 716 /* Anyone capable of this syscall can do *real bad* things */
716 dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n", 717 dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
717 lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); 718 nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
718 719
719 mutex_lock(&lo->tx_lock); 720 mutex_lock(&nbd->tx_lock);
720 error = __nbd_ioctl(bdev, lo, cmd, arg); 721 error = __nbd_ioctl(bdev, nbd, cmd, arg);
721 mutex_unlock(&lo->tx_lock); 722 mutex_unlock(&nbd->tx_lock);
722 723
723 return error; 724 return error;
724} 725}
@@ -804,7 +805,7 @@ static int __init nbd_init(void)
804 for (i = 0; i < nbds_max; i++) { 805 for (i = 0; i < nbds_max; i++) {
805 struct gendisk *disk = nbd_dev[i].disk; 806 struct gendisk *disk = nbd_dev[i].disk;
806 nbd_dev[i].file = NULL; 807 nbd_dev[i].file = NULL;
807 nbd_dev[i].magic = LO_MAGIC; 808 nbd_dev[i].magic = NBD_MAGIC;
808 nbd_dev[i].flags = 0; 809 nbd_dev[i].flags = 0;
809 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); 810 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
810 spin_lock_init(&nbd_dev[i].queue_lock); 811 spin_lock_init(&nbd_dev[i].queue_lock);
diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c
index cf82fedae099..e53fc24c6af3 100644
--- a/drivers/char/ipmi/ipmi_kcs_sm.c
+++ b/drivers/char/ipmi/ipmi_kcs_sm.c
@@ -118,8 +118,8 @@ enum kcs_states {
118#define MAX_KCS_WRITE_SIZE IPMI_MAX_MSG_LENGTH 118#define MAX_KCS_WRITE_SIZE IPMI_MAX_MSG_LENGTH
119 119
120/* Timeouts in microseconds. */ 120/* Timeouts in microseconds. */
121#define IBF_RETRY_TIMEOUT 1000000 121#define IBF_RETRY_TIMEOUT 5000000
122#define OBF_RETRY_TIMEOUT 1000000 122#define OBF_RETRY_TIMEOUT 5000000
123#define MAX_ERROR_RETRIES 10 123#define MAX_ERROR_RETRIES 10
124#define ERROR0_OBF_WAIT_JIFFIES (2*HZ) 124#define ERROR0_OBF_WAIT_JIFFIES (2*HZ)
125 125
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index c90e9390b78c..2c29942b1326 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -45,6 +45,7 @@
45#include <linux/init.h> 45#include <linux/init.h>
46#include <linux/proc_fs.h> 46#include <linux/proc_fs.h>
47#include <linux/rcupdate.h> 47#include <linux/rcupdate.h>
48#include <linux/interrupt.h>
48 49
49#define PFX "IPMI message handler: " 50#define PFX "IPMI message handler: "
50 51
@@ -52,6 +53,8 @@
52 53
53static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void); 54static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void);
54static int ipmi_init_msghandler(void); 55static int ipmi_init_msghandler(void);
56static void smi_recv_tasklet(unsigned long);
57static void handle_new_recv_msgs(ipmi_smi_t intf);
55 58
56static int initialized; 59static int initialized;
57 60
@@ -354,12 +357,15 @@ struct ipmi_smi {
354 int curr_seq; 357 int curr_seq;
355 358
356 /* 359 /*
357 * Messages that were delayed for some reason (out of memory, 360 * Messages queued for delivery. If delivery fails (out of memory
358 * for instance), will go in here to be processed later in a 361 * for instance), They will stay in here to be processed later in a
359 * periodic timer interrupt. 362 * periodic timer interrupt. The tasklet is for handling received
363 * messages directly from the handler.
360 */ 364 */
361 spinlock_t waiting_msgs_lock; 365 spinlock_t waiting_msgs_lock;
362 struct list_head waiting_msgs; 366 struct list_head waiting_msgs;
367 atomic_t watchdog_pretimeouts_to_deliver;
368 struct tasklet_struct recv_tasklet;
363 369
364 /* 370 /*
365 * The list of command receivers that are registered for commands 371 * The list of command receivers that are registered for commands
@@ -492,6 +498,8 @@ static void clean_up_interface_data(ipmi_smi_t intf)
492 struct cmd_rcvr *rcvr, *rcvr2; 498 struct cmd_rcvr *rcvr, *rcvr2;
493 struct list_head list; 499 struct list_head list;
494 500
501 tasklet_kill(&intf->recv_tasklet);
502
495 free_smi_msg_list(&intf->waiting_msgs); 503 free_smi_msg_list(&intf->waiting_msgs);
496 free_recv_msg_list(&intf->waiting_events); 504 free_recv_msg_list(&intf->waiting_events);
497 505
@@ -2785,12 +2793,17 @@ channel_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg)
2785 return; 2793 return;
2786} 2794}
2787 2795
2788void ipmi_poll_interface(ipmi_user_t user) 2796static void ipmi_poll(ipmi_smi_t intf)
2789{ 2797{
2790 ipmi_smi_t intf = user->intf;
2791
2792 if (intf->handlers->poll) 2798 if (intf->handlers->poll)
2793 intf->handlers->poll(intf->send_info); 2799 intf->handlers->poll(intf->send_info);
2800 /* In case something came in */
2801 handle_new_recv_msgs(intf);
2802}
2803
2804void ipmi_poll_interface(ipmi_user_t user)
2805{
2806 ipmi_poll(user->intf);
2794} 2807}
2795EXPORT_SYMBOL(ipmi_poll_interface); 2808EXPORT_SYMBOL(ipmi_poll_interface);
2796 2809
@@ -2859,6 +2872,10 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
2859#endif 2872#endif
2860 spin_lock_init(&intf->waiting_msgs_lock); 2873 spin_lock_init(&intf->waiting_msgs_lock);
2861 INIT_LIST_HEAD(&intf->waiting_msgs); 2874 INIT_LIST_HEAD(&intf->waiting_msgs);
2875 tasklet_init(&intf->recv_tasklet,
2876 smi_recv_tasklet,
2877 (unsigned long) intf);
2878 atomic_set(&intf->watchdog_pretimeouts_to_deliver, 0);
2862 spin_lock_init(&intf->events_lock); 2879 spin_lock_init(&intf->events_lock);
2863 INIT_LIST_HEAD(&intf->waiting_events); 2880 INIT_LIST_HEAD(&intf->waiting_events);
2864 intf->waiting_events_count = 0; 2881 intf->waiting_events_count = 0;
@@ -3621,11 +3638,11 @@ static int handle_bmc_rsp(ipmi_smi_t intf,
3621} 3638}
3622 3639
3623/* 3640/*
3624 * Handle a new message. Return 1 if the message should be requeued, 3641 * Handle a received message. Return 1 if the message should be requeued,
3625 * 0 if the message should be freed, or -1 if the message should not 3642 * 0 if the message should be freed, or -1 if the message should not
3626 * be freed or requeued. 3643 * be freed or requeued.
3627 */ 3644 */
3628static int handle_new_recv_msg(ipmi_smi_t intf, 3645static int handle_one_recv_msg(ipmi_smi_t intf,
3629 struct ipmi_smi_msg *msg) 3646 struct ipmi_smi_msg *msg)
3630{ 3647{
3631 int requeue; 3648 int requeue;
@@ -3783,12 +3800,72 @@ static int handle_new_recv_msg(ipmi_smi_t intf,
3783 return requeue; 3800 return requeue;
3784} 3801}
3785 3802
3803/*
3804 * If there are messages in the queue or pretimeouts, handle them.
3805 */
3806static void handle_new_recv_msgs(ipmi_smi_t intf)
3807{
3808 struct ipmi_smi_msg *smi_msg;
3809 unsigned long flags = 0;
3810 int rv;
3811 int run_to_completion = intf->run_to_completion;
3812
3813 /* See if any waiting messages need to be processed. */
3814 if (!run_to_completion)
3815 spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
3816 while (!list_empty(&intf->waiting_msgs)) {
3817 smi_msg = list_entry(intf->waiting_msgs.next,
3818 struct ipmi_smi_msg, link);
3819 list_del(&smi_msg->link);
3820 if (!run_to_completion)
3821 spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
3822 rv = handle_one_recv_msg(intf, smi_msg);
3823 if (!run_to_completion)
3824 spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
3825 if (rv == 0) {
3826 /* Message handled */
3827 ipmi_free_smi_msg(smi_msg);
3828 } else if (rv < 0) {
3829 /* Fatal error on the message, del but don't free. */
3830 } else {
3831 /*
3832 * To preserve message order, quit if we
3833 * can't handle a message.
3834 */
3835 list_add(&smi_msg->link, &intf->waiting_msgs);
3836 break;
3837 }
3838 }
3839 if (!run_to_completion)
3840 spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
3841
3842 /*
3843 * If the pretimout count is non-zero, decrement one from it and
3844 * deliver pretimeouts to all the users.
3845 */
3846 if (atomic_add_unless(&intf->watchdog_pretimeouts_to_deliver, -1, 0)) {
3847 ipmi_user_t user;
3848
3849 rcu_read_lock();
3850 list_for_each_entry_rcu(user, &intf->users, link) {
3851 if (user->handler->ipmi_watchdog_pretimeout)
3852 user->handler->ipmi_watchdog_pretimeout(
3853 user->handler_data);
3854 }
3855 rcu_read_unlock();
3856 }
3857}
3858
3859static void smi_recv_tasklet(unsigned long val)
3860{
3861 handle_new_recv_msgs((ipmi_smi_t) val);
3862}
3863
3786/* Handle a new message from the lower layer. */ 3864/* Handle a new message from the lower layer. */
3787void ipmi_smi_msg_received(ipmi_smi_t intf, 3865void ipmi_smi_msg_received(ipmi_smi_t intf,
3788 struct ipmi_smi_msg *msg) 3866 struct ipmi_smi_msg *msg)
3789{ 3867{
3790 unsigned long flags = 0; /* keep us warning-free. */ 3868 unsigned long flags = 0; /* keep us warning-free. */
3791 int rv;
3792 int run_to_completion; 3869 int run_to_completion;
3793 3870
3794 3871
@@ -3842,31 +3919,11 @@ void ipmi_smi_msg_received(ipmi_smi_t intf,
3842 run_to_completion = intf->run_to_completion; 3919 run_to_completion = intf->run_to_completion;
3843 if (!run_to_completion) 3920 if (!run_to_completion)
3844 spin_lock_irqsave(&intf->waiting_msgs_lock, flags); 3921 spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
3845 if (!list_empty(&intf->waiting_msgs)) { 3922 list_add_tail(&msg->link, &intf->waiting_msgs);
3846 list_add_tail(&msg->link, &intf->waiting_msgs);
3847 if (!run_to_completion)
3848 spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
3849 goto out;
3850 }
3851 if (!run_to_completion) 3923 if (!run_to_completion)
3852 spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); 3924 spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
3853 3925
3854 rv = handle_new_recv_msg(intf, msg); 3926 tasklet_schedule(&intf->recv_tasklet);
3855 if (rv > 0) {
3856 /*
3857 * Could not handle the message now, just add it to a
3858 * list to handle later.
3859 */
3860 run_to_completion = intf->run_to_completion;
3861 if (!run_to_completion)
3862 spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
3863 list_add_tail(&msg->link, &intf->waiting_msgs);
3864 if (!run_to_completion)
3865 spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
3866 } else if (rv == 0) {
3867 ipmi_free_smi_msg(msg);
3868 }
3869
3870 out: 3927 out:
3871 return; 3928 return;
3872} 3929}
@@ -3874,16 +3931,8 @@ EXPORT_SYMBOL(ipmi_smi_msg_received);
3874 3931
3875void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf) 3932void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf)
3876{ 3933{
3877 ipmi_user_t user; 3934 atomic_set(&intf->watchdog_pretimeouts_to_deliver, 1);
3878 3935 tasklet_schedule(&intf->recv_tasklet);
3879 rcu_read_lock();
3880 list_for_each_entry_rcu(user, &intf->users, link) {
3881 if (!user->handler->ipmi_watchdog_pretimeout)
3882 continue;
3883
3884 user->handler->ipmi_watchdog_pretimeout(user->handler_data);
3885 }
3886 rcu_read_unlock();
3887} 3936}
3888EXPORT_SYMBOL(ipmi_smi_watchdog_pretimeout); 3937EXPORT_SYMBOL(ipmi_smi_watchdog_pretimeout);
3889 3938
@@ -3997,28 +4046,12 @@ static void ipmi_timeout_handler(long timeout_period)
3997 ipmi_smi_t intf; 4046 ipmi_smi_t intf;
3998 struct list_head timeouts; 4047 struct list_head timeouts;
3999 struct ipmi_recv_msg *msg, *msg2; 4048 struct ipmi_recv_msg *msg, *msg2;
4000 struct ipmi_smi_msg *smi_msg, *smi_msg2;
4001 unsigned long flags; 4049 unsigned long flags;
4002 int i; 4050 int i;
4003 4051
4004 rcu_read_lock(); 4052 rcu_read_lock();
4005 list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { 4053 list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
4006 /* See if any waiting messages need to be processed. */ 4054 tasklet_schedule(&intf->recv_tasklet);
4007 spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
4008 list_for_each_entry_safe(smi_msg, smi_msg2,
4009 &intf->waiting_msgs, link) {
4010 if (!handle_new_recv_msg(intf, smi_msg)) {
4011 list_del(&smi_msg->link);
4012 ipmi_free_smi_msg(smi_msg);
4013 } else {
4014 /*
4015 * To preserve message order, quit if we
4016 * can't handle a message.
4017 */
4018 break;
4019 }
4020 }
4021 spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
4022 4055
4023 /* 4056 /*
4024 * Go through the seq table and find any messages that 4057 * Go through the seq table and find any messages that
@@ -4172,12 +4205,48 @@ EXPORT_SYMBOL(ipmi_free_recv_msg);
4172 4205
4173#ifdef CONFIG_IPMI_PANIC_EVENT 4206#ifdef CONFIG_IPMI_PANIC_EVENT
4174 4207
4208static atomic_t panic_done_count = ATOMIC_INIT(0);
4209
4175static void dummy_smi_done_handler(struct ipmi_smi_msg *msg) 4210static void dummy_smi_done_handler(struct ipmi_smi_msg *msg)
4176{ 4211{
4212 atomic_dec(&panic_done_count);
4177} 4213}
4178 4214
4179static void dummy_recv_done_handler(struct ipmi_recv_msg *msg) 4215static void dummy_recv_done_handler(struct ipmi_recv_msg *msg)
4180{ 4216{
4217 atomic_dec(&panic_done_count);
4218}
4219
4220/*
4221 * Inside a panic, send a message and wait for a response.
4222 */
4223static void ipmi_panic_request_and_wait(ipmi_smi_t intf,
4224 struct ipmi_addr *addr,
4225 struct kernel_ipmi_msg *msg)
4226{
4227 struct ipmi_smi_msg smi_msg;
4228 struct ipmi_recv_msg recv_msg;
4229 int rv;
4230
4231 smi_msg.done = dummy_smi_done_handler;
4232 recv_msg.done = dummy_recv_done_handler;
4233 atomic_add(2, &panic_done_count);
4234 rv = i_ipmi_request(NULL,
4235 intf,
4236 addr,
4237 0,
4238 msg,
4239 intf,
4240 &smi_msg,
4241 &recv_msg,
4242 0,
4243 intf->channels[0].address,
4244 intf->channels[0].lun,
4245 0, 1); /* Don't retry, and don't wait. */
4246 if (rv)
4247 atomic_sub(2, &panic_done_count);
4248 while (atomic_read(&panic_done_count) != 0)
4249 ipmi_poll(intf);
4181} 4250}
4182 4251
4183#ifdef CONFIG_IPMI_PANIC_STRING 4252#ifdef CONFIG_IPMI_PANIC_STRING
@@ -4216,8 +4285,6 @@ static void send_panic_events(char *str)
4216 unsigned char data[16]; 4285 unsigned char data[16];
4217 struct ipmi_system_interface_addr *si; 4286 struct ipmi_system_interface_addr *si;
4218 struct ipmi_addr addr; 4287 struct ipmi_addr addr;
4219 struct ipmi_smi_msg smi_msg;
4220 struct ipmi_recv_msg recv_msg;
4221 4288
4222 si = (struct ipmi_system_interface_addr *) &addr; 4289 si = (struct ipmi_system_interface_addr *) &addr;
4223 si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; 4290 si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
@@ -4245,9 +4312,6 @@ static void send_panic_events(char *str)
4245 data[7] = str[2]; 4312 data[7] = str[2];
4246 } 4313 }
4247 4314
4248 smi_msg.done = dummy_smi_done_handler;
4249 recv_msg.done = dummy_recv_done_handler;
4250
4251 /* For every registered interface, send the event. */ 4315 /* For every registered interface, send the event. */
4252 list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { 4316 list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
4253 if (!intf->handlers) 4317 if (!intf->handlers)
@@ -4257,18 +4321,7 @@ static void send_panic_events(char *str)
4257 intf->run_to_completion = 1; 4321 intf->run_to_completion = 1;
4258 /* Send the event announcing the panic. */ 4322 /* Send the event announcing the panic. */
4259 intf->handlers->set_run_to_completion(intf->send_info, 1); 4323 intf->handlers->set_run_to_completion(intf->send_info, 1);
4260 i_ipmi_request(NULL, 4324 ipmi_panic_request_and_wait(intf, &addr, &msg);
4261 intf,
4262 &addr,
4263 0,
4264 &msg,
4265 intf,
4266 &smi_msg,
4267 &recv_msg,
4268 0,
4269 intf->channels[0].address,
4270 intf->channels[0].lun,
4271 0, 1); /* Don't retry, and don't wait. */
4272 } 4325 }
4273 4326
4274#ifdef CONFIG_IPMI_PANIC_STRING 4327#ifdef CONFIG_IPMI_PANIC_STRING
@@ -4316,18 +4369,7 @@ static void send_panic_events(char *str)
4316 msg.data = NULL; 4369 msg.data = NULL;
4317 msg.data_len = 0; 4370 msg.data_len = 0;
4318 intf->null_user_handler = device_id_fetcher; 4371 intf->null_user_handler = device_id_fetcher;
4319 i_ipmi_request(NULL, 4372 ipmi_panic_request_and_wait(intf, &addr, &msg);
4320 intf,
4321 &addr,
4322 0,
4323 &msg,
4324 intf,
4325 &smi_msg,
4326 &recv_msg,
4327 0,
4328 intf->channels[0].address,
4329 intf->channels[0].lun,
4330 0, 1); /* Don't retry, and don't wait. */
4331 4373
4332 if (intf->local_event_generator) { 4374 if (intf->local_event_generator) {
4333 /* Request the event receiver from the local MC. */ 4375 /* Request the event receiver from the local MC. */
@@ -4336,18 +4378,7 @@ static void send_panic_events(char *str)
4336 msg.data = NULL; 4378 msg.data = NULL;
4337 msg.data_len = 0; 4379 msg.data_len = 0;
4338 intf->null_user_handler = event_receiver_fetcher; 4380 intf->null_user_handler = event_receiver_fetcher;
4339 i_ipmi_request(NULL, 4381 ipmi_panic_request_and_wait(intf, &addr, &msg);
4340 intf,
4341 &addr,
4342 0,
4343 &msg,
4344 intf,
4345 &smi_msg,
4346 &recv_msg,
4347 0,
4348 intf->channels[0].address,
4349 intf->channels[0].lun,
4350 0, 1); /* no retry, and no wait. */
4351 } 4382 }
4352 intf->null_user_handler = NULL; 4383 intf->null_user_handler = NULL;
4353 4384
@@ -4404,18 +4435,7 @@ static void send_panic_events(char *str)
4404 strncpy(data+5, p, 11); 4435 strncpy(data+5, p, 11);
4405 p += size; 4436 p += size;
4406 4437
4407 i_ipmi_request(NULL, 4438 ipmi_panic_request_and_wait(intf, &addr, &msg);
4408 intf,
4409 &addr,
4410 0,
4411 &msg,
4412 intf,
4413 &smi_msg,
4414 &recv_msg,
4415 0,
4416 intf->channels[0].address,
4417 intf->channels[0].lun,
4418 0, 1); /* no retry, and no wait. */
4419 } 4439 }
4420 } 4440 }
4421#endif /* CONFIG_IPMI_PANIC_STRING */ 4441#endif /* CONFIG_IPMI_PANIC_STRING */
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index f9fdc114b31d..1e638fff40ea 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -170,7 +170,6 @@ struct smi_info {
170 struct si_sm_handlers *handlers; 170 struct si_sm_handlers *handlers;
171 enum si_type si_type; 171 enum si_type si_type;
172 spinlock_t si_lock; 172 spinlock_t si_lock;
173 spinlock_t msg_lock;
174 struct list_head xmit_msgs; 173 struct list_head xmit_msgs;
175 struct list_head hp_xmit_msgs; 174 struct list_head hp_xmit_msgs;
176 struct ipmi_smi_msg *curr_msg; 175 struct ipmi_smi_msg *curr_msg;
@@ -319,16 +318,8 @@ static int register_xaction_notifier(struct notifier_block *nb)
319static void deliver_recv_msg(struct smi_info *smi_info, 318static void deliver_recv_msg(struct smi_info *smi_info,
320 struct ipmi_smi_msg *msg) 319 struct ipmi_smi_msg *msg)
321{ 320{
322 /* Deliver the message to the upper layer with the lock 321 /* Deliver the message to the upper layer. */
323 released. */ 322 ipmi_smi_msg_received(smi_info->intf, msg);
324
325 if (smi_info->run_to_completion) {
326 ipmi_smi_msg_received(smi_info->intf, msg);
327 } else {
328 spin_unlock(&(smi_info->si_lock));
329 ipmi_smi_msg_received(smi_info->intf, msg);
330 spin_lock(&(smi_info->si_lock));
331 }
332} 323}
333 324
334static void return_hosed_msg(struct smi_info *smi_info, int cCode) 325static void return_hosed_msg(struct smi_info *smi_info, int cCode)
@@ -357,13 +348,6 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
357 struct timeval t; 348 struct timeval t;
358#endif 349#endif
359 350
360 /*
361 * No need to save flags, we aleady have interrupts off and we
362 * already hold the SMI lock.
363 */
364 if (!smi_info->run_to_completion)
365 spin_lock(&(smi_info->msg_lock));
366
367 /* Pick the high priority queue first. */ 351 /* Pick the high priority queue first. */
368 if (!list_empty(&(smi_info->hp_xmit_msgs))) { 352 if (!list_empty(&(smi_info->hp_xmit_msgs))) {
369 entry = smi_info->hp_xmit_msgs.next; 353 entry = smi_info->hp_xmit_msgs.next;
@@ -401,9 +385,6 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
401 rv = SI_SM_CALL_WITHOUT_DELAY; 385 rv = SI_SM_CALL_WITHOUT_DELAY;
402 } 386 }
403 out: 387 out:
404 if (!smi_info->run_to_completion)
405 spin_unlock(&(smi_info->msg_lock));
406
407 return rv; 388 return rv;
408} 389}
409 390
@@ -480,9 +461,7 @@ static void handle_flags(struct smi_info *smi_info)
480 461
481 start_clear_flags(smi_info); 462 start_clear_flags(smi_info);
482 smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; 463 smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
483 spin_unlock(&(smi_info->si_lock));
484 ipmi_smi_watchdog_pretimeout(smi_info->intf); 464 ipmi_smi_watchdog_pretimeout(smi_info->intf);
485 spin_lock(&(smi_info->si_lock));
486 } else if (smi_info->msg_flags & RECEIVE_MSG_AVAIL) { 465 } else if (smi_info->msg_flags & RECEIVE_MSG_AVAIL) {
487 /* Messages available. */ 466 /* Messages available. */
488 smi_info->curr_msg = ipmi_alloc_smi_msg(); 467 smi_info->curr_msg = ipmi_alloc_smi_msg();
@@ -888,19 +867,6 @@ static void sender(void *send_info,
888 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec); 867 printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
889#endif 868#endif
890 869
891 /*
892 * last_timeout_jiffies is updated here to avoid
893 * smi_timeout() handler passing very large time_diff
894 * value to smi_event_handler() that causes
895 * the send command to abort.
896 */
897 smi_info->last_timeout_jiffies = jiffies;
898
899 mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
900
901 if (smi_info->thread)
902 wake_up_process(smi_info->thread);
903
904 if (smi_info->run_to_completion) { 870 if (smi_info->run_to_completion) {
905 /* 871 /*
906 * If we are running to completion, then throw it in 872 * If we are running to completion, then throw it in
@@ -923,16 +889,29 @@ static void sender(void *send_info,
923 return; 889 return;
924 } 890 }
925 891
926 spin_lock_irqsave(&smi_info->msg_lock, flags); 892 spin_lock_irqsave(&smi_info->si_lock, flags);
927 if (priority > 0) 893 if (priority > 0)
928 list_add_tail(&msg->link, &smi_info->hp_xmit_msgs); 894 list_add_tail(&msg->link, &smi_info->hp_xmit_msgs);
929 else 895 else
930 list_add_tail(&msg->link, &smi_info->xmit_msgs); 896 list_add_tail(&msg->link, &smi_info->xmit_msgs);
931 spin_unlock_irqrestore(&smi_info->msg_lock, flags);
932 897
933 spin_lock_irqsave(&smi_info->si_lock, flags); 898 if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
934 if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) 899 /*
900 * last_timeout_jiffies is updated here to avoid
901 * smi_timeout() handler passing very large time_diff
902 * value to smi_event_handler() that causes
903 * the send command to abort.
904 */
905 smi_info->last_timeout_jiffies = jiffies;
906
907 mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
908
909 if (smi_info->thread)
910 wake_up_process(smi_info->thread);
911
935 start_next_msg(smi_info); 912 start_next_msg(smi_info);
913 smi_event_handler(smi_info, 0);
914 }
936 spin_unlock_irqrestore(&smi_info->si_lock, flags); 915 spin_unlock_irqrestore(&smi_info->si_lock, flags);
937} 916}
938 917
@@ -1033,16 +1012,19 @@ static int ipmi_thread(void *data)
1033static void poll(void *send_info) 1012static void poll(void *send_info)
1034{ 1013{
1035 struct smi_info *smi_info = send_info; 1014 struct smi_info *smi_info = send_info;
1036 unsigned long flags; 1015 unsigned long flags = 0;
1016 int run_to_completion = smi_info->run_to_completion;
1037 1017
1038 /* 1018 /*
1039 * Make sure there is some delay in the poll loop so we can 1019 * Make sure there is some delay in the poll loop so we can
1040 * drive time forward and timeout things. 1020 * drive time forward and timeout things.
1041 */ 1021 */
1042 udelay(10); 1022 udelay(10);
1043 spin_lock_irqsave(&smi_info->si_lock, flags); 1023 if (!run_to_completion)
1024 spin_lock_irqsave(&smi_info->si_lock, flags);
1044 smi_event_handler(smi_info, 10); 1025 smi_event_handler(smi_info, 10);
1045 spin_unlock_irqrestore(&smi_info->si_lock, flags); 1026 if (!run_to_completion)
1027 spin_unlock_irqrestore(&smi_info->si_lock, flags);
1046} 1028}
1047 1029
1048static void request_events(void *send_info) 1030static void request_events(void *send_info)
@@ -1679,10 +1661,8 @@ static struct smi_info *smi_info_alloc(void)
1679{ 1661{
1680 struct smi_info *info = kzalloc(sizeof(*info), GFP_KERNEL); 1662 struct smi_info *info = kzalloc(sizeof(*info), GFP_KERNEL);
1681 1663
1682 if (info) { 1664 if (info)
1683 spin_lock_init(&info->si_lock); 1665 spin_lock_init(&info->si_lock);
1684 spin_lock_init(&info->msg_lock);
1685 }
1686 return info; 1666 return info;
1687} 1667}
1688 1668
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 020a6aec2d86..7ed356e52035 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -520,6 +520,7 @@ static void panic_halt_ipmi_heartbeat(void)
520 msg.cmd = IPMI_WDOG_RESET_TIMER; 520 msg.cmd = IPMI_WDOG_RESET_TIMER;
521 msg.data = NULL; 521 msg.data = NULL;
522 msg.data_len = 0; 522 msg.data_len = 0;
523 atomic_add(2, &panic_done_count);
523 rv = ipmi_request_supply_msgs(watchdog_user, 524 rv = ipmi_request_supply_msgs(watchdog_user,
524 (struct ipmi_addr *) &addr, 525 (struct ipmi_addr *) &addr,
525 0, 526 0,
@@ -528,8 +529,8 @@ static void panic_halt_ipmi_heartbeat(void)
528 &panic_halt_heartbeat_smi_msg, 529 &panic_halt_heartbeat_smi_msg,
529 &panic_halt_heartbeat_recv_msg, 530 &panic_halt_heartbeat_recv_msg,
530 1); 531 1);
531 if (!rv) 532 if (rv)
532 atomic_add(2, &panic_done_count); 533 atomic_sub(2, &panic_done_count);
533} 534}
534 535
535static struct ipmi_smi_msg panic_halt_smi_msg = { 536static struct ipmi_smi_msg panic_halt_smi_msg = {
@@ -553,16 +554,18 @@ static void panic_halt_ipmi_set_timeout(void)
553 /* Wait for the messages to be free. */ 554 /* Wait for the messages to be free. */
554 while (atomic_read(&panic_done_count) != 0) 555 while (atomic_read(&panic_done_count) != 0)
555 ipmi_poll_interface(watchdog_user); 556 ipmi_poll_interface(watchdog_user);
557 atomic_add(2, &panic_done_count);
556 rv = i_ipmi_set_timeout(&panic_halt_smi_msg, 558 rv = i_ipmi_set_timeout(&panic_halt_smi_msg,
557 &panic_halt_recv_msg, 559 &panic_halt_recv_msg,
558 &send_heartbeat_now); 560 &send_heartbeat_now);
559 if (!rv) { 561 if (rv) {
560 atomic_add(2, &panic_done_count); 562 atomic_sub(2, &panic_done_count);
561 if (send_heartbeat_now)
562 panic_halt_ipmi_heartbeat();
563 } else
564 printk(KERN_WARNING PFX 563 printk(KERN_WARNING PFX
565 "Unable to extend the watchdog timeout."); 564 "Unable to extend the watchdog timeout.");
565 } else {
566 if (send_heartbeat_now)
567 panic_halt_ipmi_heartbeat();
568 }
566 while (atomic_read(&panic_done_count) != 0) 569 while (atomic_read(&panic_done_count) != 0)
567 ipmi_poll_interface(watchdog_user); 570 ipmi_poll_interface(watchdog_user);
568} 571}
@@ -1164,7 +1167,7 @@ static int wdog_reboot_handler(struct notifier_block *this,
1164 if (code == SYS_POWER_OFF || code == SYS_HALT) { 1167 if (code == SYS_POWER_OFF || code == SYS_HALT) {
1165 /* Disable the WDT if we are shutting down. */ 1168 /* Disable the WDT if we are shutting down. */
1166 ipmi_watchdog_state = WDOG_TIMEOUT_NONE; 1169 ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
1167 panic_halt_ipmi_set_timeout(); 1170 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
1168 } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { 1171 } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
1169 /* Set a long timer to let the reboot happens, but 1172 /* Set a long timer to let the reboot happens, but
1170 reboot if it hangs, but only if the watchdog 1173 reboot if it hangs, but only if the watchdog
@@ -1172,7 +1175,7 @@ static int wdog_reboot_handler(struct notifier_block *this,
1172 timeout = 120; 1175 timeout = 120;
1173 pretimeout = 0; 1176 pretimeout = 0;
1174 ipmi_watchdog_state = WDOG_TIMEOUT_RESET; 1177 ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
1175 panic_halt_ipmi_set_timeout(); 1178 ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
1176 } 1179 }
1177 } 1180 }
1178 return NOTIFY_OK; 1181 return NOTIFY_OK;
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index a2161f631a83..2231aec23918 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -271,7 +271,7 @@ static int tosa_lcd_resume(struct spi_device *spi)
271} 271}
272#else 272#else
273#define tosa_lcd_suspend NULL 273#define tosa_lcd_suspend NULL
274#define tosa_lcd_reume NULL 274#define tosa_lcd_resume NULL
275#endif 275#endif
276 276
277static struct spi_driver tosa_lcd_driver = { 277static struct spi_driver tosa_lcd_driver = {
diff --git a/fs/buffer.c b/fs/buffer.c
index 70e2017edd70..36d66653b931 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1384,10 +1384,23 @@ static void invalidate_bh_lru(void *arg)
1384 } 1384 }
1385 put_cpu_var(bh_lrus); 1385 put_cpu_var(bh_lrus);
1386} 1386}
1387
1388static bool has_bh_in_lru(int cpu, void *dummy)
1389{
1390 struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
1391 int i;
1387 1392
1393 for (i = 0; i < BH_LRU_SIZE; i++) {
1394 if (b->bhs[i])
1395 return 1;
1396 }
1397
1398 return 0;
1399}
1400
1388void invalidate_bh_lrus(void) 1401void invalidate_bh_lrus(void)
1389{ 1402{
1390 on_each_cpu(invalidate_bh_lru, NULL, 1); 1403 on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
1391} 1404}
1392EXPORT_SYMBOL_GPL(invalidate_bh_lrus); 1405EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
1393 1406
diff --git a/fs/proc/array.c b/fs/proc/array.c
index fbb53c249086..f9bd395b3473 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -550,7 +550,7 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
550 seq_put_decimal_ull(m, ' ', shared); 550 seq_put_decimal_ull(m, ' ', shared);
551 seq_put_decimal_ull(m, ' ', text); 551 seq_put_decimal_ull(m, ' ', text);
552 seq_put_decimal_ull(m, ' ', 0); 552 seq_put_decimal_ull(m, ' ', 0);
553 seq_put_decimal_ull(m, ' ', text); 553 seq_put_decimal_ull(m, ' ', data);
554 seq_put_decimal_ull(m, ' ', 0); 554 seq_put_decimal_ull(m, ' ', 0);
555 seq_putc(m, '\n'); 555 seq_putc(m, '\n');
556 556
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 3551f1f839eb..0d9e23a39e49 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -156,15 +156,15 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
156 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 156 if (!ptrace_may_access(task, PTRACE_MODE_READ))
157 goto out; 157 goto out;
158 158
159 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; 159 last = &ns_entries[ARRAY_SIZE(ns_entries)];
160 for (entry = ns_entries; entry <= last; entry++) { 160 for (entry = ns_entries; entry < last; entry++) {
161 if (strlen((*entry)->name) != len) 161 if (strlen((*entry)->name) != len)
162 continue; 162 continue;
163 if (!memcmp(dentry->d_name.name, (*entry)->name, len)) 163 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
164 break; 164 break;
165 } 165 }
166 error = ERR_PTR(-ENOENT); 166 error = ERR_PTR(-ENOENT);
167 if (entry > last) 167 if (entry == last)
168 goto out; 168 goto out;
169 169
170 error = proc_ns_instantiate(dir, dentry, task, *entry); 170 error = proc_ns_instantiate(dir, dentry, task, *entry);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9694cc283511..c283832d411d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -781,9 +781,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
781 int err = 0; 781 int err = 0;
782 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); 782 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
783 783
784 if (pmd_trans_unstable(pmd))
785 return 0;
786
787 /* find the first VMA at or above 'addr' */ 784 /* find the first VMA at or above 'addr' */
788 vma = find_vma(walk->mm, addr); 785 vma = find_vma(walk->mm, addr);
789 spin_lock(&walk->mm->page_table_lock); 786 spin_lock(&walk->mm->page_table_lock);
@@ -802,6 +799,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
802 return err; 799 return err;
803 } 800 }
804 801
802 if (pmd_trans_unstable(pmd))
803 return 0;
805 for (; addr != end; addr += PAGE_SIZE) { 804 for (; addr != end; addr += PAGE_SIZE) {
806 805
807 /* check to see if we've left 'vma' behind 806 /* check to see if we've left 'vma' behind
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 7b9b75a529be..1ffdb9856bb9 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -810,11 +810,10 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
810#else /* NR_CPUS > 1 */ 810#else /* NR_CPUS > 1 */
811int __first_cpu(const cpumask_t *srcp); 811int __first_cpu(const cpumask_t *srcp);
812int __next_cpu(int n, const cpumask_t *srcp); 812int __next_cpu(int n, const cpumask_t *srcp);
813int __any_online_cpu(const cpumask_t *mask);
814 813
815#define first_cpu(src) __first_cpu(&(src)) 814#define first_cpu(src) __first_cpu(&(src))
816#define next_cpu(n, src) __next_cpu((n), &(src)) 815#define next_cpu(n, src) __next_cpu((n), &(src))
817#define any_online_cpu(mask) __any_online_cpu(&(mask)) 816#define any_online_cpu(mask) cpumask_any_and(&mask, cpu_online_mask)
818#define for_each_cpu_mask(cpu, mask) \ 817#define for_each_cpu_mask(cpu, mask) \
819 for ((cpu) = -1; \ 818 for ((cpu) = -1; \
820 (cpu) = next_cpu((cpu), (mask)), \ 819 (cpu) = next_cpu((cpu), (mask)), \
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f2a60dde8c9e..d8738a464b94 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -954,7 +954,7 @@ extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
954extern void truncate_setsize(struct inode *inode, loff_t newsize); 954extern void truncate_setsize(struct inode *inode, loff_t newsize);
955extern int vmtruncate(struct inode *inode, loff_t offset); 955extern int vmtruncate(struct inode *inode, loff_t offset);
956extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); 956extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
957 957void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
958int truncate_inode_page(struct address_space *mapping, struct page *page); 958int truncate_inode_page(struct address_space *mapping, struct page *page);
959int generic_error_remove_page(struct address_space *mapping, struct page *page); 959int generic_error_remove_page(struct address_space *mapping, struct page *page);
960 960
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index f5bd679be46b..b067bd8c49d0 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -33,6 +33,7 @@ struct pid_namespace {
33#endif 33#endif
34 gid_t pid_gid; 34 gid_t pid_gid;
35 int hide_pid; 35 int hide_pid;
36 int reboot; /* group exit code if this pidns was rebooted */
36}; 37};
37 38
38extern struct pid_namespace init_pid_ns; 39extern struct pid_namespace init_pid_ns;
@@ -48,6 +49,7 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
48extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); 49extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
49extern void free_pid_ns(struct kref *kref); 50extern void free_pid_ns(struct kref *kref);
50extern void zap_pid_ns_processes(struct pid_namespace *pid_ns); 51extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
52extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
51 53
52static inline void put_pid_ns(struct pid_namespace *ns) 54static inline void put_pid_ns(struct pid_namespace *ns)
53{ 55{
@@ -75,11 +77,15 @@ static inline void put_pid_ns(struct pid_namespace *ns)
75{ 77{
76} 78}
77 79
78
79static inline void zap_pid_ns_processes(struct pid_namespace *ns) 80static inline void zap_pid_ns_processes(struct pid_namespace *ns)
80{ 81{
81 BUG(); 82 BUG();
82} 83}
84
85static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
86{
87 return 0;
88}
83#endif /* CONFIG_PID_NS */ 89#endif /* CONFIG_PID_NS */
84 90
85extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); 91extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index e9a48234e693..0d04cd69ab9b 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -2,6 +2,7 @@
2 * Copyright (C) 2001 Momchil Velikov 2 * Copyright (C) 2001 Momchil Velikov
3 * Portions Copyright (C) 2001 Christoph Hellwig 3 * Portions Copyright (C) 2001 Christoph Hellwig
4 * Copyright (C) 2006 Nick Piggin 4 * Copyright (C) 2006 Nick Piggin
5 * Copyright (C) 2012 Konstantin Khlebnikov
5 * 6 *
6 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as 8 * modify it under the terms of the GNU General Public License as
@@ -257,4 +258,199 @@ static inline void radix_tree_preload_end(void)
257 preempt_enable(); 258 preempt_enable();
258} 259}
259 260
261/**
262 * struct radix_tree_iter - radix tree iterator state
263 *
264 * @index: index of current slot
265 * @next_index: next-to-last index for this chunk
266 * @tags: bit-mask for tag-iterating
267 *
268 * This radix tree iterator works in terms of "chunks" of slots. A chunk is a
269 * subinterval of slots contained within one radix tree leaf node. It is
270 * described by a pointer to its first slot and a struct radix_tree_iter
271 * which holds the chunk's position in the tree and its size. For tagged
272 * iteration radix_tree_iter also holds the slots' bit-mask for one chosen
273 * radix tree tag.
274 */
275struct radix_tree_iter {
276 unsigned long index;
277 unsigned long next_index;
278 unsigned long tags;
279};
280
281#define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */
282#define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */
283#define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */
284
285/**
286 * radix_tree_iter_init - initialize radix tree iterator
287 *
288 * @iter: pointer to iterator state
289 * @start: iteration starting index
290 * Returns: NULL
291 */
292static __always_inline void **
293radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
294{
295 /*
296 * Leave iter->tags uninitialized. radix_tree_next_chunk() will fill it
297 * in the case of a successful tagged chunk lookup. If the lookup was
298 * unsuccessful or non-tagged then nobody cares about ->tags.
299 *
300 * Set index to zero to bypass next_index overflow protection.
301 * See the comment in radix_tree_next_chunk() for details.
302 */
303 iter->index = 0;
304 iter->next_index = start;
305 return NULL;
306}
307
308/**
309 * radix_tree_next_chunk - find next chunk of slots for iteration
310 *
311 * @root: radix tree root
312 * @iter: iterator state
313 * @flags: RADIX_TREE_ITER_* flags and tag index
314 * Returns: pointer to chunk first slot, or NULL if there no more left
315 *
316 * This function looks up the next chunk in the radix tree starting from
317 * @iter->next_index. It returns a pointer to the chunk's first slot.
318 * Also it fills @iter with data about chunk: position in the tree (index),
319 * its end (next_index), and constructs a bit mask for tagged iterating (tags).
320 */
321void **radix_tree_next_chunk(struct radix_tree_root *root,
322 struct radix_tree_iter *iter, unsigned flags);
323
324/**
325 * radix_tree_chunk_size - get current chunk size
326 *
327 * @iter: pointer to radix tree iterator
328 * Returns: current chunk size
329 */
330static __always_inline unsigned
331radix_tree_chunk_size(struct radix_tree_iter *iter)
332{
333 return iter->next_index - iter->index;
334}
335
336/**
337 * radix_tree_next_slot - find next slot in chunk
338 *
339 * @slot: pointer to current slot
340 * @iter: pointer to interator state
341 * @flags: RADIX_TREE_ITER_*, should be constant
342 * Returns: pointer to next slot, or NULL if there no more left
343 *
344 * This function updates @iter->index in the case of a successful lookup.
345 * For tagged lookup it also eats @iter->tags.
346 */
347static __always_inline void **
348radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
349{
350 if (flags & RADIX_TREE_ITER_TAGGED) {
351 iter->tags >>= 1;
352 if (likely(iter->tags & 1ul)) {
353 iter->index++;
354 return slot + 1;
355 }
356 if (!(flags & RADIX_TREE_ITER_CONTIG) && likely(iter->tags)) {
357 unsigned offset = __ffs(iter->tags);
358
359 iter->tags >>= offset;
360 iter->index += offset + 1;
361 return slot + offset + 1;
362 }
363 } else {
364 unsigned size = radix_tree_chunk_size(iter) - 1;
365
366 while (size--) {
367 slot++;
368 iter->index++;
369 if (likely(*slot))
370 return slot;
371 if (flags & RADIX_TREE_ITER_CONTIG)
372 break;
373 }
374 }
375 return NULL;
376}
377
378/**
379 * radix_tree_for_each_chunk - iterate over chunks
380 *
381 * @slot: the void** variable for pointer to chunk first slot
382 * @root: the struct radix_tree_root pointer
383 * @iter: the struct radix_tree_iter pointer
384 * @start: iteration starting index
385 * @flags: RADIX_TREE_ITER_* and tag index
386 *
387 * Locks can be released and reacquired between iterations.
388 */
389#define radix_tree_for_each_chunk(slot, root, iter, start, flags) \
390 for (slot = radix_tree_iter_init(iter, start) ; \
391 (slot = radix_tree_next_chunk(root, iter, flags)) ;)
392
393/**
394 * radix_tree_for_each_chunk_slot - iterate over slots in one chunk
395 *
396 * @slot: the void** variable, at the beginning points to chunk first slot
397 * @iter: the struct radix_tree_iter pointer
398 * @flags: RADIX_TREE_ITER_*, should be constant
399 *
400 * This macro is designed to be nested inside radix_tree_for_each_chunk().
401 * @slot points to the radix tree slot, @iter->index contains its index.
402 */
403#define radix_tree_for_each_chunk_slot(slot, iter, flags) \
404 for (; slot ; slot = radix_tree_next_slot(slot, iter, flags))
405
406/**
407 * radix_tree_for_each_slot - iterate over non-empty slots
408 *
409 * @slot: the void** variable for pointer to slot
410 * @root: the struct radix_tree_root pointer
411 * @iter: the struct radix_tree_iter pointer
412 * @start: iteration starting index
413 *
414 * @slot points to radix tree slot, @iter->index contains its index.
415 */
416#define radix_tree_for_each_slot(slot, root, iter, start) \
417 for (slot = radix_tree_iter_init(iter, start) ; \
418 slot || (slot = radix_tree_next_chunk(root, iter, 0)) ; \
419 slot = radix_tree_next_slot(slot, iter, 0))
420
421/**
422 * radix_tree_for_each_contig - iterate over contiguous slots
423 *
424 * @slot: the void** variable for pointer to slot
425 * @root: the struct radix_tree_root pointer
426 * @iter: the struct radix_tree_iter pointer
427 * @start: iteration starting index
428 *
429 * @slot points to radix tree slot, @iter->index contains its index.
430 */
431#define radix_tree_for_each_contig(slot, root, iter, start) \
432 for (slot = radix_tree_iter_init(iter, start) ; \
433 slot || (slot = radix_tree_next_chunk(root, iter, \
434 RADIX_TREE_ITER_CONTIG)) ; \
435 slot = radix_tree_next_slot(slot, iter, \
436 RADIX_TREE_ITER_CONTIG))
437
438/**
439 * radix_tree_for_each_tagged - iterate over tagged slots
440 *
441 * @slot: the void** variable for pointer to slot
442 * @root: the struct radix_tree_root pointer
443 * @iter: the struct radix_tree_iter pointer
444 * @start: iteration starting index
445 * @tag: tag index
446 *
447 * @slot points to radix tree slot, @iter->index contains its index.
448 */
449#define radix_tree_for_each_tagged(slot, root, iter, start, tag) \
450 for (slot = radix_tree_iter_init(iter, start) ; \
451 slot || (slot = radix_tree_next_chunk(root, iter, \
452 RADIX_TREE_ITER_TAGGED | tag)) ; \
453 slot = radix_tree_next_slot(slot, iter, \
454 RADIX_TREE_ITER_TAGGED))
455
260#endif /* _LINUX_RADIX_TREE_H */ 456#endif /* _LINUX_RADIX_TREE_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 8cc38d3bab0c..10530d92c04b 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -102,6 +102,22 @@ static inline void call_function_init(void) { }
102int on_each_cpu(smp_call_func_t func, void *info, int wait); 102int on_each_cpu(smp_call_func_t func, void *info, int wait);
103 103
104/* 104/*
105 * Call a function on processors specified by mask, which might include
106 * the local one.
107 */
108void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
109 void *info, bool wait);
110
111/*
112 * Call a function on each processor for which the supplied function
113 * cond_func returns a positive value. This may include the local
114 * processor.
115 */
116void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
117 smp_call_func_t func, void *info, bool wait,
118 gfp_t gfp_flags);
119
120/*
105 * Mark the boot cpu "online" so that it can call console drivers in 121 * Mark the boot cpu "online" so that it can call console drivers in
106 * printk() and can access its per-cpu storage. 122 * printk() and can access its per-cpu storage.
107 */ 123 */
@@ -132,6 +148,36 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)
132 local_irq_enable(); \ 148 local_irq_enable(); \
133 0; \ 149 0; \
134 }) 150 })
151/*
152 * Note we still need to test the mask even for UP
153 * because we actually can get an empty mask from
154 * code that on SMP might call us without the local
155 * CPU in the mask.
156 */
157#define on_each_cpu_mask(mask, func, info, wait) \
158 do { \
159 if (cpumask_test_cpu(0, (mask))) { \
160 local_irq_disable(); \
161 (func)(info); \
162 local_irq_enable(); \
163 } \
164 } while (0)
165/*
166 * Preemption is disabled here to make sure the cond_func is called under the
167 * same condtions in UP and SMP.
168 */
169#define on_each_cpu_cond(cond_func, func, info, wait, gfp_flags)\
170 do { \
171 void *__info = (info); \
172 preempt_disable(); \
173 if ((cond_func)(0, __info)) { \
174 local_irq_disable(); \
175 (func)(__info); \
176 local_irq_enable(); \
177 } \
178 preempt_enable(); \
179 } while (0)
180
135static inline void smp_send_reschedule(int cpu) { } 181static inline void smp_send_reschedule(int cpu) { }
136#define num_booting_cpus() 1 182#define num_booting_cpus() 1
137#define smp_prepare_boot_cpu() do {} while (0) 183#define smp_prepare_boot_cpu() do {} while (0)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b86b5c20617d..8dc0ea7caf02 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -21,6 +21,9 @@ struct bio;
21#define SWAP_FLAG_PRIO_SHIFT 0 21#define SWAP_FLAG_PRIO_SHIFT 0
22#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */ 22#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */
23 23
24#define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
25 SWAP_FLAG_DISCARD)
26
24static inline int current_is_kswapd(void) 27static inline int current_is_kswapd(void)
25{ 28{
26 return current->flags & PF_KSWAPD; 29 return current->flags & PF_KSWAPD;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 2a0deffa5dbe..4e2e472f6aeb 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1358,6 +1358,10 @@ static int __init parse_crashkernel_simple(char *cmdline,
1358 1358
1359 if (*cur == '@') 1359 if (*cur == '@')
1360 *crash_base = memparse(cur+1, &cur); 1360 *crash_base = memparse(cur+1, &cur);
1361 else if (*cur != ' ' && *cur != '\0') {
1362 pr_warning("crashkernel: unrecognized char\n");
1363 return -EINVAL;
1364 }
1361 1365
1362 return 0; 1366 return 0;
1363} 1367}
@@ -1461,7 +1465,9 @@ static int __init crash_save_vmcoreinfo_init(void)
1461 1465
1462 VMCOREINFO_SYMBOL(init_uts_ns); 1466 VMCOREINFO_SYMBOL(init_uts_ns);
1463 VMCOREINFO_SYMBOL(node_online_map); 1467 VMCOREINFO_SYMBOL(node_online_map);
1468#ifdef CONFIG_MMU
1464 VMCOREINFO_SYMBOL(swapper_pg_dir); 1469 VMCOREINFO_SYMBOL(swapper_pg_dir);
1470#endif
1465 VMCOREINFO_SYMBOL(_stext); 1471 VMCOREINFO_SYMBOL(_stext);
1466 VMCOREINFO_SYMBOL(vmlist); 1472 VMCOREINFO_SYMBOL(vmlist);
1467 1473
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 17b232869a04..57bc1fd35b3c 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -15,6 +15,7 @@
15#include <linux/acct.h> 15#include <linux/acct.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/reboot.h>
18 19
19#define BITS_PER_PAGE (PAGE_SIZE*8) 20#define BITS_PER_PAGE (PAGE_SIZE*8)
20 21
@@ -183,6 +184,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
183 rc = sys_wait4(-1, NULL, __WALL, NULL); 184 rc = sys_wait4(-1, NULL, __WALL, NULL);
184 } while (rc != -ECHILD); 185 } while (rc != -ECHILD);
185 186
187 if (pid_ns->reboot)
188 current->signal->group_exit_code = pid_ns->reboot;
189
186 acct_exit_ns(pid_ns); 190 acct_exit_ns(pid_ns);
187 return; 191 return;
188} 192}
@@ -217,6 +221,35 @@ static struct ctl_table pid_ns_ctl_table[] = {
217 221
218static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; 222static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
219 223
224int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
225{
226 if (pid_ns == &init_pid_ns)
227 return 0;
228
229 switch (cmd) {
230 case LINUX_REBOOT_CMD_RESTART2:
231 case LINUX_REBOOT_CMD_RESTART:
232 pid_ns->reboot = SIGHUP;
233 break;
234
235 case LINUX_REBOOT_CMD_POWER_OFF:
236 case LINUX_REBOOT_CMD_HALT:
237 pid_ns->reboot = SIGINT;
238 break;
239 default:
240 return -EINVAL;
241 }
242
243 read_lock(&tasklist_lock);
244 force_sig(SIGKILL, pid_ns->child_reaper);
245 read_unlock(&tasklist_lock);
246
247 do_exit(0);
248
249 /* Not reached */
250 return 0;
251}
252
220static __init int pid_namespaces_init(void) 253static __init int pid_namespaces_init(void)
221{ 254{
222 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); 255 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/smp.c b/kernel/smp.c
index db197d60489b..2f8b10ecf759 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -701,3 +701,93 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
701 return ret; 701 return ret;
702} 702}
703EXPORT_SYMBOL(on_each_cpu); 703EXPORT_SYMBOL(on_each_cpu);
704
705/**
706 * on_each_cpu_mask(): Run a function on processors specified by
707 * cpumask, which may include the local processor.
708 * @mask: The set of cpus to run on (only runs on online subset).
709 * @func: The function to run. This must be fast and non-blocking.
710 * @info: An arbitrary pointer to pass to the function.
711 * @wait: If true, wait (atomically) until function has completed
712 * on other CPUs.
713 *
714 * If @wait is true, then returns once @func has returned.
715 *
716 * You must not call this function with disabled interrupts or
717 * from a hardware interrupt handler or from a bottom half handler.
718 */
719void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
720 void *info, bool wait)
721{
722 int cpu = get_cpu();
723
724 smp_call_function_many(mask, func, info, wait);
725 if (cpumask_test_cpu(cpu, mask)) {
726 local_irq_disable();
727 func(info);
728 local_irq_enable();
729 }
730 put_cpu();
731}
732EXPORT_SYMBOL(on_each_cpu_mask);
733
734/*
735 * on_each_cpu_cond(): Call a function on each processor for which
736 * the supplied function cond_func returns true, optionally waiting
737 * for all the required CPUs to finish. This may include the local
738 * processor.
739 * @cond_func: A callback function that is passed a cpu id and
740 * the the info parameter. The function is called
741 * with preemption disabled. The function should
742 * return a blooean value indicating whether to IPI
743 * the specified CPU.
744 * @func: The function to run on all applicable CPUs.
745 * This must be fast and non-blocking.
746 * @info: An arbitrary pointer to pass to both functions.
747 * @wait: If true, wait (atomically) until function has
748 * completed on other CPUs.
749 * @gfp_flags: GFP flags to use when allocating the cpumask
750 * used internally by the function.
751 *
752 * The function might sleep if the GFP flags indicates a non
753 * atomic allocation is allowed.
754 *
755 * Preemption is disabled to protect against CPUs going offline but not online.
756 * CPUs going online during the call will not be seen or sent an IPI.
757 *
758 * You must not call this function with disabled interrupts or
759 * from a hardware interrupt handler or from a bottom half handler.
760 */
761void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
762 smp_call_func_t func, void *info, bool wait,
763 gfp_t gfp_flags)
764{
765 cpumask_var_t cpus;
766 int cpu, ret;
767
768 might_sleep_if(gfp_flags & __GFP_WAIT);
769
770 if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
771 preempt_disable();
772 for_each_online_cpu(cpu)
773 if (cond_func(cpu, info))
774 cpumask_set_cpu(cpu, cpus);
775 on_each_cpu_mask(cpus, func, info, wait);
776 preempt_enable();
777 free_cpumask_var(cpus);
778 } else {
779 /*
780 * No free cpumask, bother. No matter, we'll
781 * just have to IPI them one by one.
782 */
783 preempt_disable();
784 for_each_online_cpu(cpu)
785 if (cond_func(cpu, info)) {
786 ret = smp_call_function_single(cpu, func,
787 info, wait);
788 WARN_ON_ONCE(!ret);
789 }
790 preempt_enable();
791 }
792}
793EXPORT_SYMBOL(on_each_cpu_cond);
diff --git a/kernel/sys.c b/kernel/sys.c
index 9eb7fcab8df6..e7006eb6c1e4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -444,6 +444,15 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
444 magic2 != LINUX_REBOOT_MAGIC2C)) 444 magic2 != LINUX_REBOOT_MAGIC2C))
445 return -EINVAL; 445 return -EINVAL;
446 446
447 /*
448 * If pid namespaces are enabled and the current task is in a child
449 * pid_namespace, the command is handled by reboot_pid_ns() which will
450 * call do_exit().
451 */
452 ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
453 if (ret)
454 return ret;
455
447 /* Instead of trying to make the power_off code look like 456 /* Instead of trying to make the power_off code look like
448 * halt when pm_power_off is not set do it the easy way. 457 * halt when pm_power_off is not set do it the easy way.
449 */ 458 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 803a374f6766..52b3a06a02f8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
23#include <linux/swap.h> 23#include <linux/swap.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/sysctl.h> 25#include <linux/sysctl.h>
26#include <linux/bitmap.h>
26#include <linux/signal.h> 27#include <linux/signal.h>
27#include <linux/printk.h> 28#include <linux/printk.h>
28#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
@@ -2395,9 +2396,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
2395 } 2396 }
2396 } 2397 }
2397 2398
2398 while (val_a <= val_b) 2399 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
2399 set_bit(val_a++, tmp_bitmap);
2400
2401 first = 0; 2400 first = 0;
2402 proc_skip_char(&kbuf, &left, '\n'); 2401 proc_skip_char(&kbuf, &left, '\n');
2403 } 2402 }
@@ -2440,8 +2439,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
2440 if (*ppos) 2439 if (*ppos)
2441 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); 2440 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2442 else 2441 else
2443 memcpy(bitmap, tmp_bitmap, 2442 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
2444 BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2445 } 2443 }
2446 kfree(tmp_bitmap); 2444 kfree(tmp_bitmap);
2447 *lenp -= left; 2445 *lenp -= left;
diff --git a/lib/Kconfig b/lib/Kconfig
index a0e5900a9d85..4a8aba2e5cc0 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -88,6 +88,10 @@ choice
88 prompt "CRC32 implementation" 88 prompt "CRC32 implementation"
89 depends on CRC32 89 depends on CRC32
90 default CRC32_SLICEBY8 90 default CRC32_SLICEBY8
91 help
92 This option allows a kernel builder to override the default choice
93 of CRC32 algorithm. Choose the default ("slice by 8") unless you
94 know that you need one of the others.
91 95
92config CRC32_SLICEBY8 96config CRC32_SLICEBY8
93 bool "Slice by 8 bytes" 97 bool "Slice by 8 bytes"
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 0b660118ed91..402a54ac35cb 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -26,18 +26,6 @@ int __next_cpu_nr(int n, const cpumask_t *srcp)
26EXPORT_SYMBOL(__next_cpu_nr); 26EXPORT_SYMBOL(__next_cpu_nr);
27#endif 27#endif
28 28
29int __any_online_cpu(const cpumask_t *mask)
30{
31 int cpu;
32
33 for_each_cpu(cpu, mask) {
34 if (cpu_online(cpu))
35 break;
36 }
37 return cpu;
38}
39EXPORT_SYMBOL(__any_online_cpu);
40
41/** 29/**
42 * cpumask_next_and - get the next cpu in *src1p & *src2p 30 * cpumask_next_and - get the next cpu in *src1p & *src2p
43 * @n: the cpu prior to the place to search (ie. return will be > @n) 31 * @n: the cpu prior to the place to search (ie. return will be > @n)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 3e69c2b66c94..86516f5588e3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -3,6 +3,7 @@
3 * Portions Copyright (C) 2001 Christoph Hellwig 3 * Portions Copyright (C) 2001 Christoph Hellwig
4 * Copyright (C) 2005 SGI, Christoph Lameter 4 * Copyright (C) 2005 SGI, Christoph Lameter
5 * Copyright (C) 2006 Nick Piggin 5 * Copyright (C) 2006 Nick Piggin
6 * Copyright (C) 2012 Konstantin Khlebnikov
6 * 7 *
7 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as 9 * modify it under the terms of the GNU General Public License as
@@ -146,6 +147,43 @@ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
146 } 147 }
147 return 0; 148 return 0;
148} 149}
150
151/**
152 * radix_tree_find_next_bit - find the next set bit in a memory region
153 *
154 * @addr: The address to base the search on
155 * @size: The bitmap size in bits
156 * @offset: The bitnumber to start searching at
157 *
158 * Unrollable variant of find_next_bit() for constant size arrays.
159 * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
160 * Returns next bit offset, or size if nothing found.
161 */
162static __always_inline unsigned long
163radix_tree_find_next_bit(const unsigned long *addr,
164 unsigned long size, unsigned long offset)
165{
166 if (!__builtin_constant_p(size))
167 return find_next_bit(addr, size, offset);
168
169 if (offset < size) {
170 unsigned long tmp;
171
172 addr += offset / BITS_PER_LONG;
173 tmp = *addr >> (offset % BITS_PER_LONG);
174 if (tmp)
175 return __ffs(tmp) + offset;
176 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
177 while (offset < size) {
178 tmp = *++addr;
179 if (tmp)
180 return __ffs(tmp) + offset;
181 offset += BITS_PER_LONG;
182 }
183 }
184 return size;
185}
186
149/* 187/*
150 * This assumes that the caller has performed appropriate preallocation, and 188 * This assumes that the caller has performed appropriate preallocation, and
151 * that the caller has pinned this thread of control to the current CPU. 189 * that the caller has pinned this thread of control to the current CPU.
@@ -613,6 +651,119 @@ int radix_tree_tag_get(struct radix_tree_root *root,
613EXPORT_SYMBOL(radix_tree_tag_get); 651EXPORT_SYMBOL(radix_tree_tag_get);
614 652
615/** 653/**
654 * radix_tree_next_chunk - find next chunk of slots for iteration
655 *
656 * @root: radix tree root
657 * @iter: iterator state
658 * @flags: RADIX_TREE_ITER_* flags and tag index
659 * Returns: pointer to chunk first slot, or NULL if iteration is over
660 */
661void **radix_tree_next_chunk(struct radix_tree_root *root,
662 struct radix_tree_iter *iter, unsigned flags)
663{
664 unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
665 struct radix_tree_node *rnode, *node;
666 unsigned long index, offset;
667
668 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
669 return NULL;
670
671 /*
672 * Catch next_index overflow after ~0UL. iter->index never overflows
673 * during iterating; it can be zero only at the beginning.
674 * And we cannot overflow iter->next_index in a single step,
675 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
676 */
677 index = iter->next_index;
678 if (!index && iter->index)
679 return NULL;
680
681 rnode = rcu_dereference_raw(root->rnode);
682 if (radix_tree_is_indirect_ptr(rnode)) {
683 rnode = indirect_to_ptr(rnode);
684 } else if (rnode && !index) {
685 /* Single-slot tree */
686 iter->index = 0;
687 iter->next_index = 1;
688 iter->tags = 1;
689 return (void **)&root->rnode;
690 } else
691 return NULL;
692
693restart:
694 shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
695 offset = index >> shift;
696
697 /* Index outside of the tree */
698 if (offset >= RADIX_TREE_MAP_SIZE)
699 return NULL;
700
701 node = rnode;
702 while (1) {
703 if ((flags & RADIX_TREE_ITER_TAGGED) ?
704 !test_bit(offset, node->tags[tag]) :
705 !node->slots[offset]) {
706 /* Hole detected */
707 if (flags & RADIX_TREE_ITER_CONTIG)
708 return NULL;
709
710 if (flags & RADIX_TREE_ITER_TAGGED)
711 offset = radix_tree_find_next_bit(
712 node->tags[tag],
713 RADIX_TREE_MAP_SIZE,
714 offset + 1);
715 else
716 while (++offset < RADIX_TREE_MAP_SIZE) {
717 if (node->slots[offset])
718 break;
719 }
720 index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
721 index += offset << shift;
722 /* Overflow after ~0UL */
723 if (!index)
724 return NULL;
725 if (offset == RADIX_TREE_MAP_SIZE)
726 goto restart;
727 }
728
729 /* This is leaf-node */
730 if (!shift)
731 break;
732
733 node = rcu_dereference_raw(node->slots[offset]);
734 if (node == NULL)
735 goto restart;
736 shift -= RADIX_TREE_MAP_SHIFT;
737 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
738 }
739
740 /* Update the iterator state */
741 iter->index = index;
742 iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
743
744 /* Construct iter->tags bit-mask from node->tags[tag] array */
745 if (flags & RADIX_TREE_ITER_TAGGED) {
746 unsigned tag_long, tag_bit;
747
748 tag_long = offset / BITS_PER_LONG;
749 tag_bit = offset % BITS_PER_LONG;
750 iter->tags = node->tags[tag][tag_long] >> tag_bit;
751 /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
752 if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
753 /* Pick tags from next element */
754 if (tag_bit)
755 iter->tags |= node->tags[tag][tag_long + 1] <<
756 (BITS_PER_LONG - tag_bit);
757 /* Clip chunk size, here only BITS_PER_LONG tags */
758 iter->next_index = index + BITS_PER_LONG;
759 }
760 }
761
762 return node->slots + offset;
763}
764EXPORT_SYMBOL(radix_tree_next_chunk);
765
766/**
616 * radix_tree_range_tag_if_tagged - for each item in given range set given 767 * radix_tree_range_tag_if_tagged - for each item in given range set given
617 * tag if item has another tag set 768 * tag if item has another tag set
618 * @root: radix tree root 769 * @root: radix tree root
@@ -817,57 +968,6 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
817} 968}
818EXPORT_SYMBOL(radix_tree_prev_hole); 969EXPORT_SYMBOL(radix_tree_prev_hole);
819 970
820static unsigned int
821__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
822 unsigned long index, unsigned int max_items, unsigned long *next_index)
823{
824 unsigned int nr_found = 0;
825 unsigned int shift, height;
826 unsigned long i;
827
828 height = slot->height;
829 if (height == 0)
830 goto out;
831 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
832
833 for ( ; height > 1; height--) {
834 i = (index >> shift) & RADIX_TREE_MAP_MASK;
835 for (;;) {
836 if (slot->slots[i] != NULL)
837 break;
838 index &= ~((1UL << shift) - 1);
839 index += 1UL << shift;
840 if (index == 0)
841 goto out; /* 32-bit wraparound */
842 i++;
843 if (i == RADIX_TREE_MAP_SIZE)
844 goto out;
845 }
846
847 shift -= RADIX_TREE_MAP_SHIFT;
848 slot = rcu_dereference_raw(slot->slots[i]);
849 if (slot == NULL)
850 goto out;
851 }
852
853 /* Bottom level: grab some items */
854 for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
855 if (slot->slots[i]) {
856 results[nr_found] = &(slot->slots[i]);
857 if (indices)
858 indices[nr_found] = index;
859 if (++nr_found == max_items) {
860 index++;
861 goto out;
862 }
863 }
864 index++;
865 }
866out:
867 *next_index = index;
868 return nr_found;
869}
870
871/** 971/**
872 * radix_tree_gang_lookup - perform multiple lookup on a radix tree 972 * radix_tree_gang_lookup - perform multiple lookup on a radix tree
873 * @root: radix tree root 973 * @root: radix tree root
@@ -891,48 +991,19 @@ unsigned int
891radix_tree_gang_lookup(struct radix_tree_root *root, void **results, 991radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
892 unsigned long first_index, unsigned int max_items) 992 unsigned long first_index, unsigned int max_items)
893{ 993{
894 unsigned long max_index; 994 struct radix_tree_iter iter;
895 struct radix_tree_node *node; 995 void **slot;
896 unsigned long cur_index = first_index; 996 unsigned int ret = 0;
897 unsigned int ret;
898 997
899 node = rcu_dereference_raw(root->rnode); 998 if (unlikely(!max_items))
900 if (!node)
901 return 0; 999 return 0;
902 1000
903 if (!radix_tree_is_indirect_ptr(node)) { 1001 radix_tree_for_each_slot(slot, root, &iter, first_index) {
904 if (first_index > 0) 1002 results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
905 return 0; 1003 if (!results[ret])
906 results[0] = node; 1004 continue;
907 return 1; 1005 if (++ret == max_items)
908 }
909 node = indirect_to_ptr(node);
910
911 max_index = radix_tree_maxindex(node->height);
912
913 ret = 0;
914 while (ret < max_items) {
915 unsigned int nr_found, slots_found, i;
916 unsigned long next_index; /* Index of next search */
917
918 if (cur_index > max_index)
919 break;
920 slots_found = __lookup(node, (void ***)results + ret, NULL,
921 cur_index, max_items - ret, &next_index);
922 nr_found = 0;
923 for (i = 0; i < slots_found; i++) {
924 struct radix_tree_node *slot;
925 slot = *(((void ***)results)[ret + i]);
926 if (!slot)
927 continue;
928 results[ret + nr_found] =
929 indirect_to_ptr(rcu_dereference_raw(slot));
930 nr_found++;
931 }
932 ret += nr_found;
933 if (next_index == 0)
934 break; 1006 break;
935 cur_index = next_index;
936 } 1007 }
937 1008
938 return ret; 1009 return ret;
@@ -962,112 +1033,25 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root,
962 void ***results, unsigned long *indices, 1033 void ***results, unsigned long *indices,
963 unsigned long first_index, unsigned int max_items) 1034 unsigned long first_index, unsigned int max_items)
964{ 1035{
965 unsigned long max_index; 1036 struct radix_tree_iter iter;
966 struct radix_tree_node *node; 1037 void **slot;
967 unsigned long cur_index = first_index; 1038 unsigned int ret = 0;
968 unsigned int ret;
969 1039
970 node = rcu_dereference_raw(root->rnode); 1040 if (unlikely(!max_items))
971 if (!node)
972 return 0; 1041 return 0;
973 1042
974 if (!radix_tree_is_indirect_ptr(node)) { 1043 radix_tree_for_each_slot(slot, root, &iter, first_index) {
975 if (first_index > 0) 1044 results[ret] = slot;
976 return 0;
977 results[0] = (void **)&root->rnode;
978 if (indices) 1045 if (indices)
979 indices[0] = 0; 1046 indices[ret] = iter.index;
980 return 1; 1047 if (++ret == max_items)
981 }
982 node = indirect_to_ptr(node);
983
984 max_index = radix_tree_maxindex(node->height);
985
986 ret = 0;
987 while (ret < max_items) {
988 unsigned int slots_found;
989 unsigned long next_index; /* Index of next search */
990
991 if (cur_index > max_index)
992 break; 1048 break;
993 slots_found = __lookup(node, results + ret,
994 indices ? indices + ret : NULL,
995 cur_index, max_items - ret, &next_index);
996 ret += slots_found;
997 if (next_index == 0)
998 break;
999 cur_index = next_index;
1000 } 1049 }
1001 1050
1002 return ret; 1051 return ret;
1003} 1052}
1004EXPORT_SYMBOL(radix_tree_gang_lookup_slot); 1053EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
1005 1054
1006/*
1007 * FIXME: the two tag_get()s here should use find_next_bit() instead of
1008 * open-coding the search.
1009 */
1010static unsigned int
1011__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
1012 unsigned int max_items, unsigned long *next_index, unsigned int tag)
1013{
1014 unsigned int nr_found = 0;
1015 unsigned int shift, height;
1016
1017 height = slot->height;
1018 if (height == 0)
1019 goto out;
1020 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1021
1022 while (height > 0) {
1023 unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
1024
1025 for (;;) {
1026 if (tag_get(slot, tag, i))
1027 break;
1028 index &= ~((1UL << shift) - 1);
1029 index += 1UL << shift;
1030 if (index == 0)
1031 goto out; /* 32-bit wraparound */
1032 i++;
1033 if (i == RADIX_TREE_MAP_SIZE)
1034 goto out;
1035 }
1036 height--;
1037 if (height == 0) { /* Bottom level: grab some items */
1038 unsigned long j = index & RADIX_TREE_MAP_MASK;
1039
1040 for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
1041 index++;
1042 if (!tag_get(slot, tag, j))
1043 continue;
1044 /*
1045 * Even though the tag was found set, we need to
1046 * recheck that we have a non-NULL node, because
1047 * if this lookup is lockless, it may have been
1048 * subsequently deleted.
1049 *
1050 * Similar care must be taken in any place that
1051 * lookup ->slots[x] without a lock (ie. can't
1052 * rely on its value remaining the same).
1053 */
1054 if (slot->slots[j]) {
1055 results[nr_found++] = &(slot->slots[j]);
1056 if (nr_found == max_items)
1057 goto out;
1058 }
1059 }
1060 }
1061 shift -= RADIX_TREE_MAP_SHIFT;
1062 slot = rcu_dereference_raw(slot->slots[i]);
1063 if (slot == NULL)
1064 break;
1065 }
1066out:
1067 *next_index = index;
1068 return nr_found;
1069}
1070
1071/** 1055/**
1072 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1056 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
1073 * based on a tag 1057 * based on a tag
@@ -1086,52 +1070,19 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
1086 unsigned long first_index, unsigned int max_items, 1070 unsigned long first_index, unsigned int max_items,
1087 unsigned int tag) 1071 unsigned int tag)
1088{ 1072{
1089 struct radix_tree_node *node; 1073 struct radix_tree_iter iter;
1090 unsigned long max_index; 1074 void **slot;
1091 unsigned long cur_index = first_index; 1075 unsigned int ret = 0;
1092 unsigned int ret;
1093
1094 /* check the root's tag bit */
1095 if (!root_tag_get(root, tag))
1096 return 0;
1097 1076
1098 node = rcu_dereference_raw(root->rnode); 1077 if (unlikely(!max_items))
1099 if (!node)
1100 return 0; 1078 return 0;
1101 1079
1102 if (!radix_tree_is_indirect_ptr(node)) { 1080 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1103 if (first_index > 0) 1081 results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
1104 return 0; 1082 if (!results[ret])
1105 results[0] = node; 1083 continue;
1106 return 1; 1084 if (++ret == max_items)
1107 }
1108 node = indirect_to_ptr(node);
1109
1110 max_index = radix_tree_maxindex(node->height);
1111
1112 ret = 0;
1113 while (ret < max_items) {
1114 unsigned int nr_found, slots_found, i;
1115 unsigned long next_index; /* Index of next search */
1116
1117 if (cur_index > max_index)
1118 break;
1119 slots_found = __lookup_tag(node, (void ***)results + ret,
1120 cur_index, max_items - ret, &next_index, tag);
1121 nr_found = 0;
1122 for (i = 0; i < slots_found; i++) {
1123 struct radix_tree_node *slot;
1124 slot = *(((void ***)results)[ret + i]);
1125 if (!slot)
1126 continue;
1127 results[ret + nr_found] =
1128 indirect_to_ptr(rcu_dereference_raw(slot));
1129 nr_found++;
1130 }
1131 ret += nr_found;
1132 if (next_index == 0)
1133 break; 1085 break;
1134 cur_index = next_index;
1135 } 1086 }
1136 1087
1137 return ret; 1088 return ret;
@@ -1156,42 +1107,17 @@ radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
1156 unsigned long first_index, unsigned int max_items, 1107 unsigned long first_index, unsigned int max_items,
1157 unsigned int tag) 1108 unsigned int tag)
1158{ 1109{
1159 struct radix_tree_node *node; 1110 struct radix_tree_iter iter;
1160 unsigned long max_index; 1111 void **slot;
1161 unsigned long cur_index = first_index; 1112 unsigned int ret = 0;
1162 unsigned int ret;
1163 1113
1164 /* check the root's tag bit */ 1114 if (unlikely(!max_items))
1165 if (!root_tag_get(root, tag))
1166 return 0;
1167
1168 node = rcu_dereference_raw(root->rnode);
1169 if (!node)
1170 return 0; 1115 return 0;
1171 1116
1172 if (!radix_tree_is_indirect_ptr(node)) { 1117 radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1173 if (first_index > 0) 1118 results[ret] = slot;
1174 return 0; 1119 if (++ret == max_items)
1175 results[0] = (void **)&root->rnode;
1176 return 1;
1177 }
1178 node = indirect_to_ptr(node);
1179
1180 max_index = radix_tree_maxindex(node->height);
1181
1182 ret = 0;
1183 while (ret < max_items) {
1184 unsigned int slots_found;
1185 unsigned long next_index; /* Index of next search */
1186
1187 if (cur_index > max_index)
1188 break;
1189 slots_found = __lookup_tag(node, results + ret,
1190 cur_index, max_items - ret, &next_index, tag);
1191 ret += slots_found;
1192 if (next_index == 0)
1193 break; 1120 break;
1194 cur_index = next_index;
1195 } 1121 }
1196 1122
1197 return ret; 1123 return ret;
diff --git a/mm/filemap.c b/mm/filemap.c
index c3811bc6b9e3..79c4b2b0b14e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -813,20 +813,19 @@ EXPORT_SYMBOL(find_or_create_page);
813unsigned find_get_pages(struct address_space *mapping, pgoff_t start, 813unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
814 unsigned int nr_pages, struct page **pages) 814 unsigned int nr_pages, struct page **pages)
815{ 815{
816 unsigned int i; 816 struct radix_tree_iter iter;
817 unsigned int ret; 817 void **slot;
818 unsigned int nr_found, nr_skip; 818 unsigned ret = 0;
819
820 if (unlikely(!nr_pages))
821 return 0;
819 822
820 rcu_read_lock(); 823 rcu_read_lock();
821restart: 824restart:
822 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 825 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
823 (void ***)pages, NULL, start, nr_pages);
824 ret = 0;
825 nr_skip = 0;
826 for (i = 0; i < nr_found; i++) {
827 struct page *page; 826 struct page *page;
828repeat: 827repeat:
829 page = radix_tree_deref_slot((void **)pages[i]); 828 page = radix_tree_deref_slot(slot);
830 if (unlikely(!page)) 829 if (unlikely(!page))
831 continue; 830 continue;
832 831
@@ -837,7 +836,7 @@ repeat:
837 * when entry at index 0 moves out of or back 836 * when entry at index 0 moves out of or back
838 * to root: none yet gotten, safe to restart. 837 * to root: none yet gotten, safe to restart.
839 */ 838 */
840 WARN_ON(start | i); 839 WARN_ON(iter.index);
841 goto restart; 840 goto restart;
842 } 841 }
843 /* 842 /*
@@ -845,7 +844,6 @@ repeat:
845 * here as an exceptional entry: so skip over it - 844 * here as an exceptional entry: so skip over it -
846 * we only reach this from invalidate_mapping_pages(). 845 * we only reach this from invalidate_mapping_pages().
847 */ 846 */
848 nr_skip++;
849 continue; 847 continue;
850 } 848 }
851 849
@@ -853,21 +851,16 @@ repeat:
853 goto repeat; 851 goto repeat;
854 852
855 /* Has the page moved? */ 853 /* Has the page moved? */
856 if (unlikely(page != *((void **)pages[i]))) { 854 if (unlikely(page != *slot)) {
857 page_cache_release(page); 855 page_cache_release(page);
858 goto repeat; 856 goto repeat;
859 } 857 }
860 858
861 pages[ret] = page; 859 pages[ret] = page;
862 ret++; 860 if (++ret == nr_pages)
861 break;
863 } 862 }
864 863
865 /*
866 * If all entries were removed before we could secure them,
867 * try again, because callers stop trying once 0 is returned.
868 */
869 if (unlikely(!ret && nr_found > nr_skip))
870 goto restart;
871 rcu_read_unlock(); 864 rcu_read_unlock();
872 return ret; 865 return ret;
873} 866}
@@ -887,21 +880,22 @@ repeat:
887unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, 880unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
888 unsigned int nr_pages, struct page **pages) 881 unsigned int nr_pages, struct page **pages)
889{ 882{
890 unsigned int i; 883 struct radix_tree_iter iter;
891 unsigned int ret; 884 void **slot;
892 unsigned int nr_found; 885 unsigned int ret = 0;
886
887 if (unlikely(!nr_pages))
888 return 0;
893 889
894 rcu_read_lock(); 890 rcu_read_lock();
895restart: 891restart:
896 nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, 892 radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
897 (void ***)pages, NULL, index, nr_pages);
898 ret = 0;
899 for (i = 0; i < nr_found; i++) {
900 struct page *page; 893 struct page *page;
901repeat: 894repeat:
902 page = radix_tree_deref_slot((void **)pages[i]); 895 page = radix_tree_deref_slot(slot);
896 /* The hole, there no reason to continue */
903 if (unlikely(!page)) 897 if (unlikely(!page))
904 continue; 898 break;
905 899
906 if (radix_tree_exception(page)) { 900 if (radix_tree_exception(page)) {
907 if (radix_tree_deref_retry(page)) { 901 if (radix_tree_deref_retry(page)) {
@@ -924,7 +918,7 @@ repeat:
924 goto repeat; 918 goto repeat;
925 919
926 /* Has the page moved? */ 920 /* Has the page moved? */
927 if (unlikely(page != *((void **)pages[i]))) { 921 if (unlikely(page != *slot)) {
928 page_cache_release(page); 922 page_cache_release(page);
929 goto repeat; 923 goto repeat;
930 } 924 }
@@ -934,14 +928,14 @@ repeat:
934 * otherwise we can get both false positives and false 928 * otherwise we can get both false positives and false
935 * negatives, which is just confusing to the caller. 929 * negatives, which is just confusing to the caller.
936 */ 930 */
937 if (page->mapping == NULL || page->index != index) { 931 if (page->mapping == NULL || page->index != iter.index) {
938 page_cache_release(page); 932 page_cache_release(page);
939 break; 933 break;
940 } 934 }
941 935
942 pages[ret] = page; 936 pages[ret] = page;
943 ret++; 937 if (++ret == nr_pages)
944 index++; 938 break;
945 } 939 }
946 rcu_read_unlock(); 940 rcu_read_unlock();
947 return ret; 941 return ret;
@@ -962,19 +956,20 @@ EXPORT_SYMBOL(find_get_pages_contig);
962unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, 956unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
963 int tag, unsigned int nr_pages, struct page **pages) 957 int tag, unsigned int nr_pages, struct page **pages)
964{ 958{
965 unsigned int i; 959 struct radix_tree_iter iter;
966 unsigned int ret; 960 void **slot;
967 unsigned int nr_found; 961 unsigned ret = 0;
962
963 if (unlikely(!nr_pages))
964 return 0;
968 965
969 rcu_read_lock(); 966 rcu_read_lock();
970restart: 967restart:
971 nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree, 968 radix_tree_for_each_tagged(slot, &mapping->page_tree,
972 (void ***)pages, *index, nr_pages, tag); 969 &iter, *index, tag) {
973 ret = 0;
974 for (i = 0; i < nr_found; i++) {
975 struct page *page; 970 struct page *page;
976repeat: 971repeat:
977 page = radix_tree_deref_slot((void **)pages[i]); 972 page = radix_tree_deref_slot(slot);
978 if (unlikely(!page)) 973 if (unlikely(!page))
979 continue; 974 continue;
980 975
@@ -998,21 +993,16 @@ repeat:
998 goto repeat; 993 goto repeat;
999 994
1000 /* Has the page moved? */ 995 /* Has the page moved? */
1001 if (unlikely(page != *((void **)pages[i]))) { 996 if (unlikely(page != *slot)) {
1002 page_cache_release(page); 997 page_cache_release(page);
1003 goto repeat; 998 goto repeat;
1004 } 999 }
1005 1000
1006 pages[ret] = page; 1001 pages[ret] = page;
1007 ret++; 1002 if (++ret == nr_pages)
1003 break;
1008 } 1004 }
1009 1005
1010 /*
1011 * If all entries were removed before we could secure them,
1012 * try again, because callers stop trying once 0 is returned.
1013 */
1014 if (unlikely(!ret && nr_found))
1015 goto restart;
1016 rcu_read_unlock(); 1006 rcu_read_unlock();
1017 1007
1018 if (ret) 1008 if (ret)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b2ee6df0e9bb..7d698df4a067 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5306,6 +5306,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
5306 return 0; 5306 return 0;
5307 } 5307 }
5308 5308
5309 if (pmd_trans_unstable(pmd))
5310 return 0;
5309 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 5311 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
5310 for (; addr != end; pte++, addr += PAGE_SIZE) 5312 for (; addr != end; pte++, addr += PAGE_SIZE)
5311 if (get_mctgt_type(vma, addr, *pte, NULL)) 5313 if (get_mctgt_type(vma, addr, *pte, NULL))
@@ -5502,6 +5504,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
5502 return 0; 5504 return 0;
5503 } 5505 }
5504 5506
5507 if (pmd_trans_unstable(pmd))
5508 return 0;
5505retry: 5509retry:
5506 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 5510 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
5507 for (; addr != end; addr += PAGE_SIZE) { 5511 for (; addr != end; addr += PAGE_SIZE) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index caea788628e4..a712fb9e04ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg)
1161} 1161}
1162 1162
1163/* 1163/*
1164 * Spill all the per-cpu pages from all CPUs back into the buddy allocator 1164 * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
1165 *
1166 * Note that this code is protected against sending an IPI to an offline
1167 * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
1168 * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
1169 * nothing keeps CPUs from showing up after we populated the cpumask and
1170 * before the call to on_each_cpu_mask().
1165 */ 1171 */
1166void drain_all_pages(void) 1172void drain_all_pages(void)
1167{ 1173{
1168 on_each_cpu(drain_local_pages, NULL, 1); 1174 int cpu;
1175 struct per_cpu_pageset *pcp;
1176 struct zone *zone;
1177
1178 /*
1179 * Allocate in the BSS so we wont require allocation in
1180 * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
1181 */
1182 static cpumask_t cpus_with_pcps;
1183
1184 /*
1185 * We don't care about racing with CPU hotplug event
1186 * as offline notification will cause the notified
1187 * cpu to drain that CPU pcps and on_each_cpu_mask
1188 * disables preemption as part of its processing
1189 */
1190 for_each_online_cpu(cpu) {
1191 bool has_pcps = false;
1192 for_each_populated_zone(zone) {
1193 pcp = per_cpu_ptr(zone->pageset, cpu);
1194 if (pcp->pcp.count) {
1195 has_pcps = true;
1196 break;
1197 }
1198 }
1199 if (has_pcps)
1200 cpumask_set_cpu(cpu, &cpus_with_pcps);
1201 else
1202 cpumask_clear_cpu(cpu, &cpus_with_pcps);
1203 }
1204 on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
1169} 1205}
1170 1206
1171#ifdef CONFIG_HIBERNATION 1207#ifdef CONFIG_HIBERNATION
@@ -2308,6 +2344,10 @@ rebalance:
2308 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 2344 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
2309 if (oom_killer_disabled) 2345 if (oom_killer_disabled)
2310 goto nopage; 2346 goto nopage;
2347 /* Coredumps can quickly deplete all memory reserves */
2348 if ((current->flags & PF_DUMPCORE) &&
2349 !(gfp_mask & __GFP_NOFAIL))
2350 goto nopage;
2311 page = __alloc_pages_may_oom(gfp_mask, order, 2351 page = __alloc_pages_may_oom(gfp_mask, order,
2312 zonelist, high_zoneidx, 2352 zonelist, high_zoneidx,
2313 nodemask, preferred_zone, 2353 nodemask, preferred_zone,
diff --git a/mm/slub.c b/mm/slub.c
index 64d9966d16bc..ffe13fdf8144 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2035,9 +2035,17 @@ static void flush_cpu_slab(void *d)
2035 __flush_cpu_slab(s, smp_processor_id()); 2035 __flush_cpu_slab(s, smp_processor_id());
2036} 2036}
2037 2037
2038static bool has_cpu_slab(int cpu, void *info)
2039{
2040 struct kmem_cache *s = info;
2041 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2042
2043 return !!(c->page);
2044}
2045
2038static void flush_all(struct kmem_cache *s) 2046static void flush_all(struct kmem_cache *s)
2039{ 2047{
2040 on_each_cpu(flush_cpu_slab, s, 1); 2048 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2041} 2049}
2042 2050
2043/* 2051/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index dae42f380d6e..fafc26d1b1dc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2022,6 +2022,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2022 struct page *page = NULL; 2022 struct page *page = NULL;
2023 struct inode *inode = NULL; 2023 struct inode *inode = NULL;
2024 2024
2025 if (swap_flags & ~SWAP_FLAGS_VALID)
2026 return -EINVAL;
2027
2025 if (!capable(CAP_SYS_ADMIN)) 2028 if (!capable(CAP_SYS_ADMIN))
2026 return -EPERM; 2029 return -EPERM;
2027 2030
diff --git a/mm/truncate.c b/mm/truncate.c
index 18aded3a89fc..61a183b89df6 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -626,3 +626,43 @@ int vmtruncate_range(struct inode *inode, loff_t lstart, loff_t lend)
626 626
627 return 0; 627 return 0;
628} 628}
629
630/**
631 * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
632 * @inode: inode
633 * @lstart: offset of beginning of hole
634 * @lend: offset of last byte of hole
635 *
636 * This function should typically be called before the filesystem
637 * releases resources associated with the freed range (eg. deallocates
638 * blocks). This way, pagecache will always stay logically coherent
639 * with on-disk format, and the filesystem would not have to deal with
640 * situations such as writepage being called for a page that has already
641 * had its underlying blocks deallocated.
642 */
643void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
644{
645 struct address_space *mapping = inode->i_mapping;
646 loff_t unmap_start = round_up(lstart, PAGE_SIZE);
647 loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
648 /*
649 * This rounding is currently just for example: unmap_mapping_range
650 * expands its hole outwards, whereas we want it to contract the hole
651 * inwards. However, existing callers of truncate_pagecache_range are
652 * doing their own page rounding first; and truncate_inode_pages_range
653 * currently BUGs if lend is not pagealigned-1 (it handles partial
654 * page at start of hole, but not partial page at end of hole). Note
655 * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
656 */
657
658 /*
659 * Unlike in truncate_pagecache, unmap_mapping_range is called only
660 * once (before truncating pagecache), and without "even_cows" flag:
661 * hole-punching should not remove private COWed pages from the hole.
662 */
663 if ((u64)unmap_end > (u64)unmap_start)
664 unmap_mapping_range(mapping, unmap_start,
665 1 + unmap_end - unmap_start, 0);
666 truncate_inode_pages_range(mapping, lstart, lend);
667}
668EXPORT_SYMBOL(truncate_pagecache_range);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 4ec84018cc13..28bc57ee757c 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,10 +1,15 @@
1TARGETS = breakpoints 1TARGETS = breakpoints vm
2 2
3all: 3all:
4 for TARGET in $(TARGETS); do \ 4 for TARGET in $(TARGETS); do \
5 make -C $$TARGET; \ 5 make -C $$TARGET; \
6 done; 6 done;
7 7
8run_tests: all
9 for TARGET in $(TARGETS); do \
10 make -C $$TARGET run_tests; \
11 done;
12
8clean: 13clean:
9 for TARGET in $(TARGETS); do \ 14 for TARGET in $(TARGETS); do \
10 make -C $$TARGET clean; \ 15 make -C $$TARGET clean; \
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index f362722cdce7..931278035f5c 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -11,10 +11,13 @@ endif
11 11
12all: 12all:
13ifeq ($(ARCH),x86) 13ifeq ($(ARCH),x86)
14 gcc breakpoint_test.c -o run_test 14 gcc breakpoint_test.c -o breakpoint_test
15else 15else
16 echo "Not an x86 target, can't build breakpoints selftests" 16 echo "Not an x86 target, can't build breakpoints selftests"
17endif 17endif
18 18
19run_tests:
20 ./breakpoint_test
21
19clean: 22clean:
20 rm -fr run_test 23 rm -fr breakpoint_test
diff --git a/tools/testing/selftests/run_tests b/tools/testing/selftests/run_tests
deleted file mode 100644
index 320718a4e6bf..000000000000
--- a/tools/testing/selftests/run_tests
+++ /dev/null
@@ -1,8 +0,0 @@
1#!/bin/bash
2
3TARGETS=breakpoints
4
5for TARGET in $TARGETS
6do
7 $TARGET/run_test
8done
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
new file mode 100644
index 000000000000..b336b24aa6c0
--- /dev/null
+++ b/tools/testing/selftests/vm/Makefile
@@ -0,0 +1,14 @@
1# Makefile for vm selftests
2
3CC = $(CROSS_COMPILE)gcc
4CFLAGS = -Wall -Wextra
5
6all: hugepage-mmap hugepage-shm map_hugetlb
7%: %.c
8 $(CC) $(CFLAGS) -o $@ $^
9
10run_tests: all
11 /bin/sh ./run_vmtests
12
13clean:
14 $(RM) hugepage-mmap hugepage-shm map_hugetlb
diff --git a/Documentation/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c
index db0dd9a33d54..a10f310d2362 100644
--- a/Documentation/vm/hugepage-mmap.c
+++ b/tools/testing/selftests/vm/hugepage-mmap.c
@@ -22,7 +22,7 @@
22#include <sys/mman.h> 22#include <sys/mman.h>
23#include <fcntl.h> 23#include <fcntl.h>
24 24
25#define FILE_NAME "/mnt/hugepagefile" 25#define FILE_NAME "huge/hugepagefile"
26#define LENGTH (256UL*1024*1024) 26#define LENGTH (256UL*1024*1024)
27#define PROTECTION (PROT_READ | PROT_WRITE) 27#define PROTECTION (PROT_READ | PROT_WRITE)
28 28
@@ -48,7 +48,7 @@ static void write_bytes(char *addr)
48 *(addr + i) = (char)i; 48 *(addr + i) = (char)i;
49} 49}
50 50
51static void read_bytes(char *addr) 51static int read_bytes(char *addr)
52{ 52{
53 unsigned long i; 53 unsigned long i;
54 54
@@ -56,14 +56,15 @@ static void read_bytes(char *addr)
56 for (i = 0; i < LENGTH; i++) 56 for (i = 0; i < LENGTH; i++)
57 if (*(addr + i) != (char)i) { 57 if (*(addr + i) != (char)i) {
58 printf("Mismatch at %lu\n", i); 58 printf("Mismatch at %lu\n", i);
59 break; 59 return 1;
60 } 60 }
61 return 0;
61} 62}
62 63
63int main(void) 64int main(void)
64{ 65{
65 void *addr; 66 void *addr;
66 int fd; 67 int fd, ret;
67 68
68 fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); 69 fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
69 if (fd < 0) { 70 if (fd < 0) {
@@ -81,11 +82,11 @@ int main(void)
81 printf("Returned address is %p\n", addr); 82 printf("Returned address is %p\n", addr);
82 check_bytes(addr); 83 check_bytes(addr);
83 write_bytes(addr); 84 write_bytes(addr);
84 read_bytes(addr); 85 ret = read_bytes(addr);
85 86
86 munmap(addr, LENGTH); 87 munmap(addr, LENGTH);
87 close(fd); 88 close(fd);
88 unlink(FILE_NAME); 89 unlink(FILE_NAME);
89 90
90 return 0; 91 return ret;
91} 92}
diff --git a/Documentation/vm/hugepage-shm.c b/tools/testing/selftests/vm/hugepage-shm.c
index 07956d8592c9..0d0ef4fc0c04 100644
--- a/Documentation/vm/hugepage-shm.c
+++ b/tools/testing/selftests/vm/hugepage-shm.c
@@ -57,8 +57,8 @@ int main(void)
57 unsigned long i; 57 unsigned long i;
58 char *shmaddr; 58 char *shmaddr;
59 59
60 if ((shmid = shmget(2, LENGTH, 60 shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
61 SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) { 61 if (shmid < 0) {
62 perror("shmget"); 62 perror("shmget");
63 exit(1); 63 exit(1);
64 } 64 }
@@ -82,14 +82,16 @@ int main(void)
82 82
83 dprintf("Starting the Check..."); 83 dprintf("Starting the Check...");
84 for (i = 0; i < LENGTH; i++) 84 for (i = 0; i < LENGTH; i++)
85 if (shmaddr[i] != (char)i) 85 if (shmaddr[i] != (char)i) {
86 printf("\nIndex %lu mismatched\n", i); 86 printf("\nIndex %lu mismatched\n", i);
87 exit(3);
88 }
87 dprintf("Done.\n"); 89 dprintf("Done.\n");
88 90
89 if (shmdt((const void *)shmaddr) != 0) { 91 if (shmdt((const void *)shmaddr) != 0) {
90 perror("Detach failure"); 92 perror("Detach failure");
91 shmctl(shmid, IPC_RMID, NULL); 93 shmctl(shmid, IPC_RMID, NULL);
92 exit(3); 94 exit(4);
93 } 95 }
94 96
95 shmctl(shmid, IPC_RMID, NULL); 97 shmctl(shmid, IPC_RMID, NULL);
diff --git a/Documentation/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
index eda1a6d3578a..ac56639dd4a9 100644
--- a/Documentation/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -44,7 +44,7 @@ static void write_bytes(char *addr)
44 *(addr + i) = (char)i; 44 *(addr + i) = (char)i;
45} 45}
46 46
47static void read_bytes(char *addr) 47static int read_bytes(char *addr)
48{ 48{
49 unsigned long i; 49 unsigned long i;
50 50
@@ -52,13 +52,15 @@ static void read_bytes(char *addr)
52 for (i = 0; i < LENGTH; i++) 52 for (i = 0; i < LENGTH; i++)
53 if (*(addr + i) != (char)i) { 53 if (*(addr + i) != (char)i) {
54 printf("Mismatch at %lu\n", i); 54 printf("Mismatch at %lu\n", i);
55 break; 55 return 1;
56 } 56 }
57 return 0;
57} 58}
58 59
59int main(void) 60int main(void)
60{ 61{
61 void *addr; 62 void *addr;
63 int ret;
62 64
63 addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0); 65 addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
64 if (addr == MAP_FAILED) { 66 if (addr == MAP_FAILED) {
@@ -69,9 +71,9 @@ int main(void)
69 printf("Returned address is %p\n", addr); 71 printf("Returned address is %p\n", addr);
70 check_bytes(addr); 72 check_bytes(addr);
71 write_bytes(addr); 73 write_bytes(addr);
72 read_bytes(addr); 74 ret = read_bytes(addr);
73 75
74 munmap(addr, LENGTH); 76 munmap(addr, LENGTH);
75 77
76 return 0; 78 return ret;
77} 79}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
new file mode 100644
index 000000000000..8b40bd5e5cc2
--- /dev/null
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -0,0 +1,77 @@
1#!/bin/bash
2#please run as root
3
4#we need 256M, below is the size in kB
5needmem=262144
6mnt=./huge
7
8#get pagesize and freepages from /proc/meminfo
9while read name size unit; do
10 if [ "$name" = "HugePages_Free:" ]; then
11 freepgs=$size
12 fi
13 if [ "$name" = "Hugepagesize:" ]; then
14 pgsize=$size
15 fi
16done < /proc/meminfo
17
18#set proper nr_hugepages
19if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then
20 nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
21 needpgs=`expr $needmem / $pgsize`
22 if [ $freepgs -lt $needpgs ]; then
23 lackpgs=$(( $needpgs - $freepgs ))
24 echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
25 if [ $? -ne 0 ]; then
26 echo "Please run this test as root"
27 exit 1
28 fi
29 fi
30else
31 echo "no hugetlbfs support in kernel?"
32 exit 1
33fi
34
35mkdir $mnt
36mount -t hugetlbfs none $mnt
37
38echo "--------------------"
39echo "runing hugepage-mmap"
40echo "--------------------"
41./hugepage-mmap
42if [ $? -ne 0 ]; then
43 echo "[FAIL]"
44else
45 echo "[PASS]"
46fi
47
48shmmax=`cat /proc/sys/kernel/shmmax`
49shmall=`cat /proc/sys/kernel/shmall`
50echo 268435456 > /proc/sys/kernel/shmmax
51echo 4194304 > /proc/sys/kernel/shmall
52echo "--------------------"
53echo "runing hugepage-shm"
54echo "--------------------"
55./hugepage-shm
56if [ $? -ne 0 ]; then
57 echo "[FAIL]"
58else
59 echo "[PASS]"
60fi
61echo $shmmax > /proc/sys/kernel/shmmax
62echo $shmall > /proc/sys/kernel/shmall
63
64echo "--------------------"
65echo "runing map_hugetlb"
66echo "--------------------"
67./map_hugetlb
68if [ $? -ne 0 ]; then
69 echo "[FAIL]"
70else
71 echo "[PASS]"
72fi
73
74#cleanup
75umount $mnt
76rm -rf $mnt
77echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
new file mode 100644
index 000000000000..8e30e5c40f8a
--- /dev/null
+++ b/tools/vm/Makefile
@@ -0,0 +1,11 @@
1# Makefile for vm tools
2
3CC = $(CROSS_COMPILE)gcc
4CFLAGS = -Wall -Wextra
5
6all: page-types slabinfo
7%: %.c
8 $(CC) $(CFLAGS) -o $@ $^
9
10clean:
11 $(RM) page-types slabinfo
diff --git a/Documentation/vm/page-types.c b/tools/vm/page-types.c
index 0b13f02d4059..7dab7b25b5c6 100644
--- a/Documentation/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -124,7 +124,7 @@
124#define BIT(name) (1ULL << KPF_##name) 124#define BIT(name) (1ULL << KPF_##name)
125#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL)) 125#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL))
126 126
127static const char *page_flag_names[] = { 127static const char * const page_flag_names[] = {
128 [KPF_LOCKED] = "L:locked", 128 [KPF_LOCKED] = "L:locked",
129 [KPF_ERROR] = "E:error", 129 [KPF_ERROR] = "E:error",
130 [KPF_REFERENCED] = "R:referenced", 130 [KPF_REFERENCED] = "R:referenced",
@@ -166,7 +166,7 @@ static const char *page_flag_names[] = {
166}; 166};
167 167
168 168
169static const char *debugfs_known_mountpoints[] = { 169static const char * const debugfs_known_mountpoints[] = {
170 "/sys/kernel/debug", 170 "/sys/kernel/debug",
171 "/debug", 171 "/debug",
172 0, 172 0,
@@ -215,7 +215,7 @@ static int hwpoison_forget_fd;
215 215
216static unsigned long total_pages; 216static unsigned long total_pages;
217static unsigned long nr_pages[HASH_SIZE]; 217static unsigned long nr_pages[HASH_SIZE];
218static uint64_t page_flags[HASH_SIZE]; 218static uint64_t page_flags[HASH_SIZE];
219 219
220 220
221/* 221/*
diff --git a/tools/slub/slabinfo.c b/tools/vm/slabinfo.c
index 164cbcf61106..164cbcf61106 100644
--- a/tools/slub/slabinfo.c
+++ b/tools/vm/slabinfo.c